Command that produces this log: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> basic_gcn.T_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.0.bias: torch.Size([1024]) >>> basic_gcn.T_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.1.bias: torch.Size([1024]) >>> basic_gcn.T_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.2.bias: torch.Size([1024]) >>> basic_gcn.T_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.0.bias: torch.Size([1024]) >>> basic_gcn.T_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.1.bias: torch.Size([1024]) >>> basic_gcn.T_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.2.bias: torch.Size([1024]) >>> basic_gcn.E_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.0.bias: torch.Size([1024]) >>> basic_gcn.E_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.1.bias: torch.Size([1024]) >>> basic_gcn.E_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.2.bias: torch.Size([1024]) >>> basic_gcn.E_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.0.bias: torch.Size([1024]) >>> basic_gcn.E_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.1.bias: torch.Size([1024]) >>> basic_gcn.E_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.2.bias: torch.Size([1024]) >>> basic_gcn.f_t.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_t.0.bias: torch.Size([1024]) >>> basic_gcn.f_e.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_e.0.bias: torch.Size([1024]) >>> name2classifier.occupy-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.occupy-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.occupy-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.occupy-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outcome-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outcome-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outcome-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outcome-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.when-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.when-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.when-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.when-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.where-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.where-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.where-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.where-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.who-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.who-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.who-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.who-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-against-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-against-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-against-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-against-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-for-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-for-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-for-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-for-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.wounded-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.wounded-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.wounded-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.wounded-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.arrested-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.arrested-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.arrested-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.arrested-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.imprisoned-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.imprisoned-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.imprisoned-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.imprisoned-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.corrupt-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.corrupt-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.corrupt-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.corrupt-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.judicial-actions-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.judicial-actions-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.judicial-actions-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.judicial-actions-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.charged-with-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.charged-with-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.charged-with-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.charged-with-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.prison-term-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.prison-term-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.prison-term-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.prison-term-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.fine-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.fine-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.fine-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.fine-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.npi-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.npi-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.npi-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.npi-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outbreak-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outbreak-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outbreak-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outbreak-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blamed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blamed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blamed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blamed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.claimed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.claimed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.claimed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.claimed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.terror-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.terror-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.terror-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.terror-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-org-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-org-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-org-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-org-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-physical-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-physical-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-physical-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-physical-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.kidnapped-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.kidnapped-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.kidnapped-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.kidnapped-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-human-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-human-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-human-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-human-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-captured-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-captured-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-captured-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-captured-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-objective-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-objective-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-objective-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-objective-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.weapon-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.weapon-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.weapon-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.weapon-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.damage-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.damage-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.damage-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.damage-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.major-disaster-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.major-disaster-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.major-disaster-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.major-disaster-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.responders-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.responders-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.responders-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.responders-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-provided-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-provided-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-provided-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-provided-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescue-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescue-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescue-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescue-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.individuals-affected-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.individuals-affected-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.individuals-affected-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.individuals-affected-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.missing-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.missing-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.missing-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.missing-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.injured-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.injured-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.injured-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.injured-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-needed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-needed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-needed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-needed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescued-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescued-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescued-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescued-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.repair-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.repair-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.repair-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.repair-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.declare-emergency-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.declare-emergency-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.declare-emergency-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.declare-emergency-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.current-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.current-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.current-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.current-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.group-identity-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.group-identity-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.group-identity-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.group-identity-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.total-displaced-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.total-displaced-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.total-displaced-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.total-displaced-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transitory-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transitory-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transitory-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transitory-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.destination-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.destination-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.destination-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.destination-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transiting-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transiting-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transiting-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transiting-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.detained-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.detained-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.detained-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.detained-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blocked-migration-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blocked-migration-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.cybercrime-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.cybercrime-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.cybercrime-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.cybercrime-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perpetrator-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perpetrator-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perpetrator-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perpetrator-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.response-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.response-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.response-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.response-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.information-stolen-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.information-stolen-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.information-stolen-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.information-stolen-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-crimes-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-crimes-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-crimes-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-crimes-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-impact-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-impact-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-impact-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-impact-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.etip-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.etip-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.etip-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.etip-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-name-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-name-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-name-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-name-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.signatories-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.signatories-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.signatories-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.signatories-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awardee-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awardee-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awardee-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awardee-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.overall-project-value-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.overall-project-value-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.overall-project-value-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.overall-project-value-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-recipient-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-recipient-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-recipient-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-recipient-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-source-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-source-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-source-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-source-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awarder-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awarder-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awarder-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awarder-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.agreement-length-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.agreement-length-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.agreement-length-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.agreement-length-ffn.layers.1.bias: torch.Size([2]) >>> irrealis_classifier.layers.0.weight: torch.Size([350, 1127]) >>> irrealis_classifier.layers.0.bias: torch.Size([350]) >>> irrealis_classifier.layers.1.weight: torch.Size([7, 350]) >>> irrealis_classifier.layers.1.bias: torch.Size([7]) n_trainable_params: 613743345, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 09:28:04.650580: step: 2/466, loss: 10.935382843017578 2023-01-22 09:28:05.286307: step: 4/466, loss: 31.552471160888672 2023-01-22 09:28:05.949353: step: 6/466, loss: 24.21432113647461 2023-01-22 09:28:06.577418: step: 8/466, loss: 14.790350914001465 2023-01-22 09:28:07.201184: step: 10/466, loss: 6.093600749969482 2023-01-22 09:28:07.814424: step: 12/466, loss: 22.027292251586914 2023-01-22 09:28:08.453691: step: 14/466, loss: 13.000919342041016 2023-01-22 09:28:09.089380: step: 16/466, loss: 31.851593017578125 2023-01-22 09:28:09.655236: step: 18/466, loss: 14.411346435546875 2023-01-22 09:28:10.265246: step: 20/466, loss: 11.301730155944824 2023-01-22 09:28:10.858525: step: 22/466, loss: 30.992475509643555 2023-01-22 09:28:11.487763: step: 24/466, loss: 16.0606689453125 2023-01-22 09:28:12.084894: step: 26/466, loss: 10.805730819702148 2023-01-22 09:28:12.710489: step: 28/466, loss: 20.61066436767578 2023-01-22 09:28:13.305356: step: 30/466, loss: 6.085881233215332 2023-01-22 09:28:13.939488: step: 32/466, loss: 19.626184463500977 2023-01-22 09:28:14.557920: step: 34/466, loss: 4.410502910614014 2023-01-22 09:28:15.175454: step: 36/466, loss: 25.338882446289062 2023-01-22 09:28:15.805848: step: 38/466, loss: 5.67061185836792 2023-01-22 09:28:16.372257: step: 40/466, loss: 24.38402557373047 2023-01-22 09:28:16.972719: step: 42/466, loss: 30.825908660888672 2023-01-22 09:28:17.565040: step: 44/466, loss: 6.1968994140625 2023-01-22 09:28:18.239517: step: 46/466, loss: 22.731178283691406 2023-01-22 09:28:18.864962: step: 48/466, loss: 15.784555435180664 2023-01-22 09:28:19.515477: step: 50/466, loss: 16.66205596923828 2023-01-22 09:28:20.135485: step: 52/466, loss: 20.352027893066406 2023-01-22 09:28:20.758903: step: 54/466, loss: 6.7969512939453125 2023-01-22 09:28:21.376924: step: 56/466, loss: 22.42051124572754 2023-01-22 09:28:21.981759: step: 58/466, loss: 17.6397647857666 2023-01-22 09:28:22.603503: step: 60/466, loss: 12.36483383178711 2023-01-22 09:28:23.251501: step: 62/466, loss: 16.508319854736328 2023-01-22 09:28:23.839099: step: 64/466, loss: 17.55796241760254 2023-01-22 09:28:24.453922: step: 66/466, loss: 12.24570369720459 2023-01-22 09:28:25.110452: step: 68/466, loss: 22.549480438232422 2023-01-22 09:28:25.627932: step: 70/466, loss: 15.868330001831055 2023-01-22 09:28:26.232544: step: 72/466, loss: 16.54954719543457 2023-01-22 09:28:26.823645: step: 74/466, loss: 12.17561149597168 2023-01-22 09:28:27.404397: step: 76/466, loss: 18.420719146728516 2023-01-22 09:28:27.972138: step: 78/466, loss: 21.157407760620117 2023-01-22 09:28:28.664307: step: 80/466, loss: 6.287039756774902 2023-01-22 09:28:29.275690: step: 82/466, loss: 23.08069610595703 2023-01-22 09:28:29.878279: step: 84/466, loss: 6.357649326324463 2023-01-22 09:28:30.517570: step: 86/466, loss: 9.457338333129883 2023-01-22 09:28:31.153002: step: 88/466, loss: 22.535430908203125 2023-01-22 09:28:31.829389: step: 90/466, loss: 10.483683586120605 2023-01-22 09:28:32.473719: step: 92/466, loss: 5.847053527832031 2023-01-22 09:28:33.099102: step: 94/466, loss: 5.426416397094727 2023-01-22 09:28:33.709559: step: 96/466, loss: 22.074766159057617 2023-01-22 09:28:34.401160: step: 98/466, loss: 13.938610076904297 2023-01-22 09:28:35.041193: step: 100/466, loss: 13.14396858215332 2023-01-22 09:28:35.624400: step: 102/466, loss: 12.43580436706543 2023-01-22 09:28:36.257034: step: 104/466, loss: 20.18524932861328 2023-01-22 09:28:36.921561: step: 106/466, loss: 5.016257286071777 2023-01-22 09:28:37.586117: step: 108/466, loss: 11.029955863952637 2023-01-22 09:28:38.197642: step: 110/466, loss: 13.102413177490234 2023-01-22 09:28:38.801888: step: 112/466, loss: 19.9345760345459 2023-01-22 09:28:39.374708: step: 114/466, loss: 4.981045246124268 2023-01-22 09:28:40.039938: step: 116/466, loss: 12.654452323913574 2023-01-22 09:28:40.648230: step: 118/466, loss: 9.176351547241211 2023-01-22 09:28:41.269351: step: 120/466, loss: 17.193998336791992 2023-01-22 09:28:41.869015: step: 122/466, loss: 12.385316848754883 2023-01-22 09:28:42.588393: step: 124/466, loss: 18.726558685302734 2023-01-22 09:28:43.125836: step: 126/466, loss: 4.600466251373291 2023-01-22 09:28:43.766311: step: 128/466, loss: 8.098365783691406 2023-01-22 09:28:44.479425: step: 130/466, loss: 5.148943901062012 2023-01-22 09:28:45.081801: step: 132/466, loss: 17.139907836914062 2023-01-22 09:28:45.693241: step: 134/466, loss: 14.471826553344727 2023-01-22 09:28:46.248513: step: 136/466, loss: 13.262125968933105 2023-01-22 09:28:46.824053: step: 138/466, loss: 5.0157389640808105 2023-01-22 09:28:47.445635: step: 140/466, loss: 10.014802932739258 2023-01-22 09:28:48.098613: step: 142/466, loss: 6.947539329528809 2023-01-22 09:28:48.695585: step: 144/466, loss: 9.322587966918945 2023-01-22 09:28:49.364274: step: 146/466, loss: 14.274321556091309 2023-01-22 09:28:49.991854: step: 148/466, loss: 7.078306674957275 2023-01-22 09:28:50.572084: step: 150/466, loss: 11.2247314453125 2023-01-22 09:28:51.221819: step: 152/466, loss: 7.076643943786621 2023-01-22 09:28:51.794847: step: 154/466, loss: 5.292274475097656 2023-01-22 09:28:52.432426: step: 156/466, loss: 10.551542282104492 2023-01-22 09:28:53.087503: step: 158/466, loss: 10.318815231323242 2023-01-22 09:28:53.733641: step: 160/466, loss: 7.905094146728516 2023-01-22 09:28:54.384033: step: 162/466, loss: 10.10220718383789 2023-01-22 09:28:55.002498: step: 164/466, loss: 7.8116254806518555 2023-01-22 09:28:55.644325: step: 166/466, loss: 5.788525581359863 2023-01-22 09:28:56.237661: step: 168/466, loss: 3.7819089889526367 2023-01-22 09:28:57.012184: step: 170/466, loss: 13.363239288330078 2023-01-22 09:28:57.649501: step: 172/466, loss: 9.444230079650879 2023-01-22 09:28:58.314276: step: 174/466, loss: 3.878795862197876 2023-01-22 09:28:58.942817: step: 176/466, loss: 7.288130760192871 2023-01-22 09:28:59.605980: step: 178/466, loss: 9.1649169921875 2023-01-22 09:29:00.304455: step: 180/466, loss: 9.544353485107422 2023-01-22 09:29:00.939471: step: 182/466, loss: 12.113117218017578 2023-01-22 09:29:01.568305: step: 184/466, loss: 2.878976821899414 2023-01-22 09:29:02.268247: step: 186/466, loss: 18.907833099365234 2023-01-22 09:29:02.869776: step: 188/466, loss: 9.29400634765625 2023-01-22 09:29:03.569388: step: 190/466, loss: 3.236294746398926 2023-01-22 09:29:04.228572: step: 192/466, loss: 21.163219451904297 2023-01-22 09:29:04.868989: step: 194/466, loss: 6.056461334228516 2023-01-22 09:29:05.486141: step: 196/466, loss: 4.537060260772705 2023-01-22 09:29:06.077548: step: 198/466, loss: 5.748075485229492 2023-01-22 09:29:06.669594: step: 200/466, loss: 3.9136929512023926 2023-01-22 09:29:07.324044: step: 202/466, loss: 11.612344741821289 2023-01-22 09:29:07.938292: step: 204/466, loss: 5.7864670753479 2023-01-22 09:29:08.570762: step: 206/466, loss: 10.0962495803833 2023-01-22 09:29:09.245267: step: 208/466, loss: 17.319595336914062 2023-01-22 09:29:09.862649: step: 210/466, loss: 9.155757904052734 2023-01-22 09:29:10.568697: step: 212/466, loss: 8.002532958984375 2023-01-22 09:29:11.275895: step: 214/466, loss: 2.7576231956481934 2023-01-22 09:29:11.897266: step: 216/466, loss: 7.004976272583008 2023-01-22 09:29:12.528865: step: 218/466, loss: 9.138443946838379 2023-01-22 09:29:13.139949: step: 220/466, loss: 7.824585914611816 2023-01-22 09:29:13.770164: step: 222/466, loss: 7.4660797119140625 2023-01-22 09:29:14.531703: step: 224/466, loss: 8.690784454345703 2023-01-22 09:29:15.273181: step: 226/466, loss: 1.8159675598144531 2023-01-22 09:29:15.954749: step: 228/466, loss: 2.712833881378174 2023-01-22 09:29:16.579549: step: 230/466, loss: 4.209612846374512 2023-01-22 09:29:17.198007: step: 232/466, loss: 2.7250709533691406 2023-01-22 09:29:17.801684: step: 234/466, loss: 3.2030768394470215 2023-01-22 09:29:18.459870: step: 236/466, loss: 6.869441986083984 2023-01-22 09:29:19.104795: step: 238/466, loss: 8.67182731628418 2023-01-22 09:29:19.823066: step: 240/466, loss: 5.470909118652344 2023-01-22 09:29:20.577565: step: 242/466, loss: 8.2802734375 2023-01-22 09:29:21.220312: step: 244/466, loss: 3.2316181659698486 2023-01-22 09:29:21.862296: step: 246/466, loss: 2.4566733837127686 2023-01-22 09:29:22.578266: step: 248/466, loss: 15.329526901245117 2023-01-22 09:29:23.161164: step: 250/466, loss: 9.337640762329102 2023-01-22 09:29:23.790649: step: 252/466, loss: 3.938605308532715 2023-01-22 09:29:24.464798: step: 254/466, loss: 4.5049967765808105 2023-01-22 09:29:25.101410: step: 256/466, loss: 9.990443229675293 2023-01-22 09:29:25.768982: step: 258/466, loss: 6.6896772384643555 2023-01-22 09:29:26.461387: step: 260/466, loss: 6.534847259521484 2023-01-22 09:29:27.146341: step: 262/466, loss: 3.0652568340301514 2023-01-22 09:29:27.816264: step: 264/466, loss: 4.987476348876953 2023-01-22 09:29:28.464181: step: 266/466, loss: 12.270776748657227 2023-01-22 09:29:29.027738: step: 268/466, loss: 6.708323955535889 2023-01-22 09:29:29.755134: step: 270/466, loss: 12.853983879089355 2023-01-22 09:29:30.472388: step: 272/466, loss: 6.741026878356934 2023-01-22 09:29:31.162847: step: 274/466, loss: 2.9499881267547607 2023-01-22 09:29:31.791824: step: 276/466, loss: 2.5946052074432373 2023-01-22 09:29:32.453522: step: 278/466, loss: 4.536483287811279 2023-01-22 09:29:33.120003: step: 280/466, loss: 2.211632013320923 2023-01-22 09:29:33.806165: step: 282/466, loss: 3.691544532775879 2023-01-22 09:29:34.442564: step: 284/466, loss: 3.1353511810302734 2023-01-22 09:29:35.117857: step: 286/466, loss: 3.9749929904937744 2023-01-22 09:29:35.685923: step: 288/466, loss: 5.54394006729126 2023-01-22 09:29:36.235905: step: 290/466, loss: 4.538595199584961 2023-01-22 09:29:36.900338: step: 292/466, loss: 5.125032424926758 2023-01-22 09:29:37.506878: step: 294/466, loss: 5.607701778411865 2023-01-22 09:29:38.106862: step: 296/466, loss: 2.525693893432617 2023-01-22 09:29:38.727963: step: 298/466, loss: 2.7111496925354004 2023-01-22 09:29:39.346019: step: 300/466, loss: 4.847977161407471 2023-01-22 09:29:39.985446: step: 302/466, loss: 13.873931884765625 2023-01-22 09:29:40.670559: step: 304/466, loss: 6.348834991455078 2023-01-22 09:29:41.289531: step: 306/466, loss: 8.936687469482422 2023-01-22 09:29:41.956903: step: 308/466, loss: 13.306646347045898 2023-01-22 09:29:42.540683: step: 310/466, loss: 5.151156425476074 2023-01-22 09:29:43.106043: step: 312/466, loss: 3.3634378910064697 2023-01-22 09:29:43.677358: step: 314/466, loss: 8.005563735961914 2023-01-22 09:29:44.353452: step: 316/466, loss: 7.66864013671875 2023-01-22 09:29:44.970048: step: 318/466, loss: 6.213191509246826 2023-01-22 09:29:45.577648: step: 320/466, loss: 3.8438405990600586 2023-01-22 09:29:46.253873: step: 322/466, loss: 9.124351501464844 2023-01-22 09:29:46.920953: step: 324/466, loss: 6.281722068786621 2023-01-22 09:29:47.571658: step: 326/466, loss: 2.8622822761535645 2023-01-22 09:29:48.258903: step: 328/466, loss: 8.143562316894531 2023-01-22 09:29:48.879417: step: 330/466, loss: 14.289070129394531 2023-01-22 09:29:49.509507: step: 332/466, loss: 8.19924545288086 2023-01-22 09:29:50.191967: step: 334/466, loss: 3.265841484069824 2023-01-22 09:29:50.840137: step: 336/466, loss: 2.4021248817443848 2023-01-22 09:29:51.509599: step: 338/466, loss: 3.2878198623657227 2023-01-22 09:29:52.126213: step: 340/466, loss: 3.6344642639160156 2023-01-22 09:29:52.736639: step: 342/466, loss: 5.6455583572387695 2023-01-22 09:29:53.335307: step: 344/466, loss: 15.640310287475586 2023-01-22 09:29:53.942800: step: 346/466, loss: 2.7401907444000244 2023-01-22 09:29:54.570512: step: 348/466, loss: 6.943836212158203 2023-01-22 09:29:55.213980: step: 350/466, loss: 3.270993947982788 2023-01-22 09:29:55.850669: step: 352/466, loss: 2.140139579772949 2023-01-22 09:29:56.487914: step: 354/466, loss: 2.002262592315674 2023-01-22 09:29:57.069056: step: 356/466, loss: 6.698768615722656 2023-01-22 09:29:57.690339: step: 358/466, loss: 8.702178001403809 2023-01-22 09:29:58.300296: step: 360/466, loss: 3.5848991870880127 2023-01-22 09:29:58.891755: step: 362/466, loss: 4.4226460456848145 2023-01-22 09:29:59.564166: step: 364/466, loss: 9.540589332580566 2023-01-22 09:30:00.157556: step: 366/466, loss: 2.794017791748047 2023-01-22 09:30:00.761195: step: 368/466, loss: 3.7166731357574463 2023-01-22 09:30:01.462000: step: 370/466, loss: 2.8494551181793213 2023-01-22 09:30:02.142379: step: 372/466, loss: 2.1792683601379395 2023-01-22 09:30:02.804706: step: 374/466, loss: 6.519150257110596 2023-01-22 09:30:03.394635: step: 376/466, loss: 1.7491375207901 2023-01-22 09:30:03.993517: step: 378/466, loss: 4.340683460235596 2023-01-22 09:30:04.584934: step: 380/466, loss: 3.240265130996704 2023-01-22 09:30:05.211584: step: 382/466, loss: 7.557920932769775 2023-01-22 09:30:05.839555: step: 384/466, loss: 6.803190231323242 2023-01-22 09:30:06.569300: step: 386/466, loss: 5.763479232788086 2023-01-22 09:30:07.240013: step: 388/466, loss: 2.6579391956329346 2023-01-22 09:30:07.828967: step: 390/466, loss: 4.02760124206543 2023-01-22 09:30:08.428739: step: 392/466, loss: 4.680157661437988 2023-01-22 09:30:08.990072: step: 394/466, loss: 2.3998398780822754 2023-01-22 09:30:09.596599: step: 396/466, loss: 4.017712593078613 2023-01-22 09:30:10.315320: step: 398/466, loss: 4.725624084472656 2023-01-22 09:30:10.907262: step: 400/466, loss: 2.2863736152648926 2023-01-22 09:30:11.517705: step: 402/466, loss: 5.301486015319824 2023-01-22 09:30:12.130316: step: 404/466, loss: 4.332059383392334 2023-01-22 09:30:12.758347: step: 406/466, loss: 2.0066604614257812 2023-01-22 09:30:13.408411: step: 408/466, loss: 3.5898351669311523 2023-01-22 09:30:14.096247: step: 410/466, loss: 3.8044934272766113 2023-01-22 09:30:14.790025: step: 412/466, loss: 7.56033992767334 2023-01-22 09:30:15.418871: step: 414/466, loss: 1.6290899515151978 2023-01-22 09:30:16.002262: step: 416/466, loss: 1.7890315055847168 2023-01-22 09:30:16.631524: step: 418/466, loss: 1.864124059677124 2023-01-22 09:30:17.242548: step: 420/466, loss: 2.122128963470459 2023-01-22 09:30:17.801712: step: 422/466, loss: 0.8767684102058411 2023-01-22 09:30:18.455127: step: 424/466, loss: 2.351550817489624 2023-01-22 09:30:19.078234: step: 426/466, loss: 6.1453447341918945 2023-01-22 09:30:19.722653: step: 428/466, loss: 2.0417628288269043 2023-01-22 09:30:20.353146: step: 430/466, loss: 0.7644177675247192 2023-01-22 09:30:21.004400: step: 432/466, loss: 1.2050378322601318 2023-01-22 09:30:21.590594: step: 434/466, loss: 0.8422468304634094 2023-01-22 09:30:22.296461: step: 436/466, loss: 5.608556747436523 2023-01-22 09:30:22.969224: step: 438/466, loss: 0.5147002339363098 2023-01-22 09:30:23.560530: step: 440/466, loss: 1.6460902690887451 2023-01-22 09:30:24.195218: step: 442/466, loss: 0.38369235396385193 2023-01-22 09:30:24.845387: step: 444/466, loss: 0.7263200283050537 2023-01-22 09:30:25.463629: step: 446/466, loss: 2.806840181350708 2023-01-22 09:30:26.142922: step: 448/466, loss: 2.154893398284912 2023-01-22 09:30:26.769077: step: 450/466, loss: 1.0717921257019043 2023-01-22 09:30:27.414023: step: 452/466, loss: 1.0342295169830322 2023-01-22 09:30:28.069163: step: 454/466, loss: 1.136860966682434 2023-01-22 09:30:28.694517: step: 456/466, loss: 7.433987617492676 2023-01-22 09:30:29.276486: step: 458/466, loss: 0.8435848951339722 2023-01-22 09:30:29.821822: step: 460/466, loss: 2.5238540172576904 2023-01-22 09:30:30.619810: step: 462/466, loss: 1.891288161277771 2023-01-22 09:30:31.241476: step: 464/466, loss: 0.9611189365386963 2023-01-22 09:30:31.894917: step: 466/466, loss: 4.8004326820373535 2023-01-22 09:30:32.744094: step: 468/466, loss: 3.1122493743896484 2023-01-22 09:30:33.390968: step: 470/466, loss: 0.8799759745597839 2023-01-22 09:30:34.055741: step: 472/466, loss: 2.821756362915039 2023-01-22 09:30:34.668076: step: 474/466, loss: 10.722322463989258 2023-01-22 09:30:35.388257: step: 476/466, loss: 0.9301474094390869 2023-01-22 09:30:36.025608: step: 478/466, loss: 3.162686824798584 2023-01-22 09:30:36.710686: step: 480/466, loss: 3.3056721687316895 2023-01-22 09:30:37.318139: step: 482/466, loss: 2.354368209838867 2023-01-22 09:30:37.905139: step: 484/466, loss: 1.06724214553833 2023-01-22 09:30:38.650276: step: 486/466, loss: 2.0066750049591064 2023-01-22 09:30:39.336481: step: 488/466, loss: 15.266960144042969 2023-01-22 09:30:39.935914: step: 490/466, loss: 4.051244735717773 2023-01-22 09:30:40.574711: step: 492/466, loss: 0.7980690598487854 2023-01-22 09:30:41.208200: step: 494/466, loss: 1.2072261571884155 2023-01-22 09:30:41.856743: step: 496/466, loss: 1.5347435474395752 2023-01-22 09:30:42.553418: step: 498/466, loss: 0.9659013748168945 2023-01-22 09:30:43.219751: step: 500/466, loss: 1.3465356826782227 2023-01-22 09:30:43.863876: step: 502/466, loss: 0.8857125639915466 2023-01-22 09:30:44.467349: step: 504/466, loss: 0.8151245713233948 2023-01-22 09:30:45.138527: step: 506/466, loss: 0.33978378772735596 2023-01-22 09:30:45.821883: step: 508/466, loss: 0.4371962547302246 2023-01-22 09:30:46.443963: step: 510/466, loss: 7.064756870269775 2023-01-22 09:30:47.082440: step: 512/466, loss: 7.476099014282227 2023-01-22 09:30:47.743246: step: 514/466, loss: 0.9030598998069763 2023-01-22 09:30:48.390775: step: 516/466, loss: 0.8397070169448853 2023-01-22 09:30:49.036797: step: 518/466, loss: 2.821498394012451 2023-01-22 09:30:49.645511: step: 520/466, loss: 1.5774316787719727 2023-01-22 09:30:50.254188: step: 522/466, loss: 5.750486373901367 2023-01-22 09:30:50.917427: step: 524/466, loss: 4.969746112823486 2023-01-22 09:30:51.580980: step: 526/466, loss: 2.1585094928741455 2023-01-22 09:30:52.212472: step: 528/466, loss: 0.7462534308433533 2023-01-22 09:30:52.918032: step: 530/466, loss: 9.493864059448242 2023-01-22 09:30:53.624388: step: 532/466, loss: 4.18302059173584 2023-01-22 09:30:54.276160: step: 534/466, loss: 9.291149139404297 2023-01-22 09:30:54.918035: step: 536/466, loss: 3.3164713382720947 2023-01-22 09:30:55.588286: step: 538/466, loss: 2.596146821975708 2023-01-22 09:30:56.202848: step: 540/466, loss: 7.012548446655273 2023-01-22 09:30:56.839823: step: 542/466, loss: 3.4024271965026855 2023-01-22 09:30:57.493034: step: 544/466, loss: 0.7824108600616455 2023-01-22 09:30:58.094310: step: 546/466, loss: 3.5947446823120117 2023-01-22 09:30:58.653507: step: 548/466, loss: 2.312133550643921 2023-01-22 09:30:59.226124: step: 550/466, loss: 3.605940341949463 2023-01-22 09:30:59.846129: step: 552/466, loss: 5.734463691711426 2023-01-22 09:31:00.540990: step: 554/466, loss: 3.824420213699341 2023-01-22 09:31:01.233735: step: 556/466, loss: 1.526013970375061 2023-01-22 09:31:01.844321: step: 558/466, loss: 1.2279987335205078 2023-01-22 09:31:02.443846: step: 560/466, loss: 3.907745122909546 2023-01-22 09:31:03.107692: step: 562/466, loss: 1.75930655002594 2023-01-22 09:31:03.812848: step: 564/466, loss: 0.5211279988288879 2023-01-22 09:31:04.494301: step: 566/466, loss: 0.5466214418411255 2023-01-22 09:31:05.142411: step: 568/466, loss: 6.440335273742676 2023-01-22 09:31:05.803459: step: 570/466, loss: 4.154919624328613 2023-01-22 09:31:06.426232: step: 572/466, loss: 4.83548641204834 2023-01-22 09:31:07.144340: step: 574/466, loss: 0.8471634984016418 2023-01-22 09:31:07.758030: step: 576/466, loss: 0.60401451587677 2023-01-22 09:31:08.405328: step: 578/466, loss: 5.233648300170898 2023-01-22 09:31:09.070383: step: 580/466, loss: 0.6951800584793091 2023-01-22 09:31:09.701159: step: 582/466, loss: 3.380234718322754 2023-01-22 09:31:10.335631: step: 584/466, loss: 2.091862678527832 2023-01-22 09:31:10.940642: step: 586/466, loss: 1.276121735572815 2023-01-22 09:31:11.604493: step: 588/466, loss: 2.7723000049591064 2023-01-22 09:31:12.204308: step: 590/466, loss: 1.389462947845459 2023-01-22 09:31:12.877768: step: 592/466, loss: 6.692296981811523 2023-01-22 09:31:13.472393: step: 594/466, loss: 1.937554121017456 2023-01-22 09:31:14.128642: step: 596/466, loss: 2.5454559326171875 2023-01-22 09:31:14.744597: step: 598/466, loss: 1.0721949338912964 2023-01-22 09:31:15.455248: step: 600/466, loss: 2.6973354816436768 2023-01-22 09:31:16.107986: step: 602/466, loss: 1.4351885318756104 2023-01-22 09:31:16.675452: step: 604/466, loss: 1.5300734043121338 2023-01-22 09:31:17.242850: step: 606/466, loss: 2.501544713973999 2023-01-22 09:31:17.850540: step: 608/466, loss: 0.6491356492042542 2023-01-22 09:31:18.462141: step: 610/466, loss: 1.4302908182144165 2023-01-22 09:31:19.043610: step: 612/466, loss: 0.9263692498207092 2023-01-22 09:31:19.641941: step: 614/466, loss: 0.7574171423912048 2023-01-22 09:31:20.224101: step: 616/466, loss: 0.5629387497901917 2023-01-22 09:31:20.908764: step: 618/466, loss: 1.5841846466064453 2023-01-22 09:31:21.513426: step: 620/466, loss: 8.079695701599121 2023-01-22 09:31:22.128362: step: 622/466, loss: 0.7748921513557434 2023-01-22 09:31:22.722266: step: 624/466, loss: 3.2063374519348145 2023-01-22 09:31:23.325079: step: 626/466, loss: 1.6632599830627441 2023-01-22 09:31:23.943178: step: 628/466, loss: 2.4611549377441406 2023-01-22 09:31:24.570534: step: 630/466, loss: 1.1037203073501587 2023-01-22 09:31:25.167736: step: 632/466, loss: 6.284277439117432 2023-01-22 09:31:25.762509: step: 634/466, loss: 1.2190725803375244 2023-01-22 09:31:26.347167: step: 636/466, loss: 1.3618507385253906 2023-01-22 09:31:26.965557: step: 638/466, loss: 5.7003679275512695 2023-01-22 09:31:27.581962: step: 640/466, loss: 8.65969467163086 2023-01-22 09:31:28.224703: step: 642/466, loss: 3.9915173053741455 2023-01-22 09:31:28.835750: step: 644/466, loss: 2.6033904552459717 2023-01-22 09:31:29.444002: step: 646/466, loss: 0.6600139141082764 2023-01-22 09:31:30.034995: step: 648/466, loss: 1.091048240661621 2023-01-22 09:31:30.685217: step: 650/466, loss: 2.0598092079162598 2023-01-22 09:31:31.338641: step: 652/466, loss: 2.6983959674835205 2023-01-22 09:31:32.024643: step: 654/466, loss: 1.2380976676940918 2023-01-22 09:31:32.711339: step: 656/466, loss: 0.8623917698860168 2023-01-22 09:31:33.311148: step: 658/466, loss: 1.8455694913864136 2023-01-22 09:31:33.994015: step: 660/466, loss: 2.5191493034362793 2023-01-22 09:31:34.627725: step: 662/466, loss: 1.4284855127334595 2023-01-22 09:31:35.293087: step: 664/466, loss: 2.01560115814209 2023-01-22 09:31:35.961773: step: 666/466, loss: 4.184616565704346 2023-01-22 09:31:36.697752: step: 668/466, loss: 4.633384704589844 2023-01-22 09:31:37.313069: step: 670/466, loss: 0.761311948299408 2023-01-22 09:31:37.887086: step: 672/466, loss: 2.524230480194092 2023-01-22 09:31:38.455660: step: 674/466, loss: 1.7049751281738281 2023-01-22 09:31:39.053805: step: 676/466, loss: 4.10607385635376 2023-01-22 09:31:39.695859: step: 678/466, loss: 0.5962437391281128 2023-01-22 09:31:40.288284: step: 680/466, loss: 2.6895506381988525 2023-01-22 09:31:40.914301: step: 682/466, loss: 1.299211025238037 2023-01-22 09:31:41.560946: step: 684/466, loss: 2.7528464794158936 2023-01-22 09:31:42.195597: step: 686/466, loss: 1.287872552871704 2023-01-22 09:31:42.791329: step: 688/466, loss: 4.796496391296387 2023-01-22 09:31:43.388959: step: 690/466, loss: 0.9580941200256348 2023-01-22 09:31:44.023035: step: 692/466, loss: 2.782583713531494 2023-01-22 09:31:44.628065: step: 694/466, loss: 0.9516090154647827 2023-01-22 09:31:45.248640: step: 696/466, loss: 1.9994728565216064 2023-01-22 09:31:45.879171: step: 698/466, loss: 1.4923850297927856 2023-01-22 09:31:46.441347: step: 700/466, loss: 1.8313624858856201 2023-01-22 09:31:47.100093: step: 702/466, loss: 4.105555534362793 2023-01-22 09:31:47.719043: step: 704/466, loss: 16.962718963623047 2023-01-22 09:31:48.307408: step: 706/466, loss: 2.9927914142608643 2023-01-22 09:31:48.909164: step: 708/466, loss: 0.7770897746086121 2023-01-22 09:31:49.561490: step: 710/466, loss: 14.245176315307617 2023-01-22 09:31:50.195832: step: 712/466, loss: 1.8469178676605225 2023-01-22 09:31:50.786350: step: 714/466, loss: 1.0387372970581055 2023-01-22 09:31:51.384376: step: 716/466, loss: 3.556546688079834 2023-01-22 09:31:52.031816: step: 718/466, loss: 0.7969565987586975 2023-01-22 09:31:52.735979: step: 720/466, loss: 5.698383331298828 2023-01-22 09:31:53.291283: step: 722/466, loss: 1.0497674942016602 2023-01-22 09:31:53.873636: step: 724/466, loss: 0.8513200879096985 2023-01-22 09:31:54.459926: step: 726/466, loss: 1.0116870403289795 2023-01-22 09:31:55.043452: step: 728/466, loss: 0.7006529569625854 2023-01-22 09:31:55.684334: step: 730/466, loss: 2.557363986968994 2023-01-22 09:31:56.362857: step: 732/466, loss: 3.872546672821045 2023-01-22 09:31:56.919086: step: 734/466, loss: 3.9225711822509766 2023-01-22 09:31:57.521152: step: 736/466, loss: 5.718816757202148 2023-01-22 09:31:58.157146: step: 738/466, loss: 1.5927464962005615 2023-01-22 09:31:58.787870: step: 740/466, loss: 5.787430286407471 2023-01-22 09:31:59.425887: step: 742/466, loss: 0.9632762670516968 2023-01-22 09:32:00.015842: step: 744/466, loss: 0.8985154032707214 2023-01-22 09:32:00.618034: step: 746/466, loss: 6.514135360717773 2023-01-22 09:32:01.182814: step: 748/466, loss: 5.090004920959473 2023-01-22 09:32:01.801306: step: 750/466, loss: 0.6915642619132996 2023-01-22 09:32:02.432284: step: 752/466, loss: 1.69606614112854 2023-01-22 09:32:03.047428: step: 754/466, loss: 3.6167702674865723 2023-01-22 09:32:03.645944: step: 756/466, loss: 6.165261268615723 2023-01-22 09:32:04.246606: step: 758/466, loss: 1.6209237575531006 2023-01-22 09:32:04.875273: step: 760/466, loss: 1.159082055091858 2023-01-22 09:32:05.472831: step: 762/466, loss: 1.1850038766860962 2023-01-22 09:32:06.145284: step: 764/466, loss: 0.9388983249664307 2023-01-22 09:32:06.765959: step: 766/466, loss: 2.732558488845825 2023-01-22 09:32:07.490891: step: 768/466, loss: 2.2904887199401855 2023-01-22 09:32:08.046910: step: 770/466, loss: 2.290951728820801 2023-01-22 09:32:08.632817: step: 772/466, loss: 0.5040983557701111 2023-01-22 09:32:09.247640: step: 774/466, loss: 1.4131964445114136 2023-01-22 09:32:09.841543: step: 776/466, loss: 0.7500594854354858 2023-01-22 09:32:10.452267: step: 778/466, loss: 1.775069236755371 2023-01-22 09:32:11.041588: step: 780/466, loss: 0.8299316763877869 2023-01-22 09:32:11.659220: step: 782/466, loss: 8.539618492126465 2023-01-22 09:32:12.243527: step: 784/466, loss: 8.508955001831055 2023-01-22 09:32:12.814688: step: 786/466, loss: 1.5897914171218872 2023-01-22 09:32:13.385321: step: 788/466, loss: 2.956247568130493 2023-01-22 09:32:14.005421: step: 790/466, loss: 1.2754576206207275 2023-01-22 09:32:14.597435: step: 792/466, loss: 1.6307601928710938 2023-01-22 09:32:15.220156: step: 794/466, loss: 8.023615837097168 2023-01-22 09:32:15.843744: step: 796/466, loss: 3.913644552230835 2023-01-22 09:32:16.522531: step: 798/466, loss: 1.479268193244934 2023-01-22 09:32:17.132805: step: 800/466, loss: 3.459024667739868 2023-01-22 09:32:17.672539: step: 802/466, loss: 1.494737982749939 2023-01-22 09:32:18.306448: step: 804/466, loss: 2.402879238128662 2023-01-22 09:32:18.961088: step: 806/466, loss: 2.3793046474456787 2023-01-22 09:32:19.567105: step: 808/466, loss: 7.7648234367370605 2023-01-22 09:32:20.217728: step: 810/466, loss: 12.321928024291992 2023-01-22 09:32:20.821916: step: 812/466, loss: 8.237139701843262 2023-01-22 09:32:21.438184: step: 814/466, loss: 2.2108066082000732 2023-01-22 09:32:22.025528: step: 816/466, loss: 1.0587334632873535 2023-01-22 09:32:22.698137: step: 818/466, loss: 2.11734938621521 2023-01-22 09:32:23.330163: step: 820/466, loss: 1.61896550655365 2023-01-22 09:32:23.953500: step: 822/466, loss: 1.7837340831756592 2023-01-22 09:32:24.637444: step: 824/466, loss: 2.19197416305542 2023-01-22 09:32:25.216566: step: 826/466, loss: 1.5917332172393799 2023-01-22 09:32:25.786900: step: 828/466, loss: 3.1244866847991943 2023-01-22 09:32:26.492063: step: 830/466, loss: 3.1029326915740967 2023-01-22 09:32:27.116243: step: 832/466, loss: 3.0160324573516846 2023-01-22 09:32:27.726083: step: 834/466, loss: 2.6842033863067627 2023-01-22 09:32:28.389700: step: 836/466, loss: 1.0783379077911377 2023-01-22 09:32:29.054365: step: 838/466, loss: 0.7518795728683472 2023-01-22 09:32:29.681344: step: 840/466, loss: 2.020292043685913 2023-01-22 09:32:30.382176: step: 842/466, loss: 9.57175350189209 2023-01-22 09:32:30.956881: step: 844/466, loss: 3.6099765300750732 2023-01-22 09:32:31.545874: step: 846/466, loss: 0.6370089054107666 2023-01-22 09:32:32.196229: step: 848/466, loss: 1.0624032020568848 2023-01-22 09:32:32.847988: step: 850/466, loss: 0.9141412377357483 2023-01-22 09:32:33.515354: step: 852/466, loss: 0.9670308828353882 2023-01-22 09:32:34.082068: step: 854/466, loss: 0.788347065448761 2023-01-22 09:32:34.664010: step: 856/466, loss: 2.0098037719726562 2023-01-22 09:32:35.281348: step: 858/466, loss: 4.156428337097168 2023-01-22 09:32:35.827264: step: 860/466, loss: 0.8457652926445007 2023-01-22 09:32:36.417357: step: 862/466, loss: 8.276666641235352 2023-01-22 09:32:37.056951: step: 864/466, loss: 1.2429496049880981 2023-01-22 09:32:37.654221: step: 866/466, loss: 1.0977206230163574 2023-01-22 09:32:38.261872: step: 868/466, loss: 1.4717178344726562 2023-01-22 09:32:38.858634: step: 870/466, loss: 9.140847206115723 2023-01-22 09:32:39.527006: step: 872/466, loss: 1.118363618850708 2023-01-22 09:32:40.144339: step: 874/466, loss: 5.926425933837891 2023-01-22 09:32:40.739772: step: 876/466, loss: 2.4152932167053223 2023-01-22 09:32:41.397455: step: 878/466, loss: 0.7910025715827942 2023-01-22 09:32:42.046412: step: 880/466, loss: 1.3264044523239136 2023-01-22 09:32:42.626213: step: 882/466, loss: 4.652287006378174 2023-01-22 09:32:43.247347: step: 884/466, loss: 2.289107322692871 2023-01-22 09:32:43.795916: step: 886/466, loss: 5.126692295074463 2023-01-22 09:32:44.391599: step: 888/466, loss: 0.8533294796943665 2023-01-22 09:32:44.980494: step: 890/466, loss: 0.5715285539627075 2023-01-22 09:32:45.576097: step: 892/466, loss: 1.5395843982696533 2023-01-22 09:32:46.179569: step: 894/466, loss: 0.9893038272857666 2023-01-22 09:32:46.806704: step: 896/466, loss: 1.1434332132339478 2023-01-22 09:32:47.350815: step: 898/466, loss: 5.790560722351074 2023-01-22 09:32:48.031668: step: 900/466, loss: 8.075478553771973 2023-01-22 09:32:48.715025: step: 902/466, loss: 3.857839584350586 2023-01-22 09:32:49.339310: step: 904/466, loss: 1.1071827411651611 2023-01-22 09:32:49.926057: step: 906/466, loss: 6.996962547302246 2023-01-22 09:32:50.549424: step: 908/466, loss: 2.3261501789093018 2023-01-22 09:32:51.105297: step: 910/466, loss: 4.405646324157715 2023-01-22 09:32:51.678700: step: 912/466, loss: 3.5805516242980957 2023-01-22 09:32:52.391532: step: 914/466, loss: 4.537283897399902 2023-01-22 09:32:52.992204: step: 916/466, loss: 3.6143927574157715 2023-01-22 09:32:53.553865: step: 918/466, loss: 5.42225456237793 2023-01-22 09:32:54.178254: step: 920/466, loss: 2.3025240898132324 2023-01-22 09:32:54.820913: step: 922/466, loss: 0.8708323240280151 2023-01-22 09:32:55.524919: step: 924/466, loss: 3.0053529739379883 2023-01-22 09:32:56.158265: step: 926/466, loss: 1.0579087734222412 2023-01-22 09:32:56.762590: step: 928/466, loss: 5.881206512451172 2023-01-22 09:32:57.412654: step: 930/466, loss: 0.386121928691864 2023-01-22 09:32:57.994210: step: 932/466, loss: 1.6460617780685425 ================================================== Loss: 5.728 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.20312209474306964, 'r': 0.10444085022706039, 'f1': 0.13795058619254666}, 'combined': 0.1016478003524028, 'epoch': 0} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.234680928208847, 'r': 0.03971833578792342, 'f1': 0.0679384906056471}, 'combined': 0.045057651800636415, 'epoch': 0} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21351721439749607, 'r': 0.08597195967233774, 'f1': 0.12258535489667564}, 'combined': 0.0817235699311171, 'epoch': 0} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.2706356143856144, 'r': 0.03324818973981346, 'f1': 0.05922095310962947}, 'combined': 0.03864946413470555, 'epoch': 0} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.19270833333333331, 'r': 0.09690059861373661, 'f1': 0.12895702306079665}, 'combined': 0.095020964360587, 'epoch': 0} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.23682669789227168, 'r': 0.03723306332842415, 'f1': 0.06434934775692015}, 'combined': 0.04267728763153253, 'epoch': 0} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.18881578947368421, 'r': 0.205, 'f1': 0.1965753424657534}, 'combined': 0.13105022831050225, 'epoch': 0} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42857142857142855, 'r': 0.10344827586206896, 'f1': 0.16666666666666663}, 'combined': 0.11111111111111108, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.20312209474306964, 'r': 0.10444085022706039, 'f1': 0.13795058619254666}, 'combined': 0.1016478003524028, 'epoch': 0} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.234680928208847, 'r': 0.03971833578792342, 'f1': 0.0679384906056471}, 'combined': 0.045057651800636415, 'epoch': 0} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.18881578947368421, 'r': 0.205, 'f1': 0.1965753424657534}, 'combined': 0.13105022831050225, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21351721439749607, 'r': 0.08597195967233774, 'f1': 0.12258535489667564}, 'combined': 0.0817235699311171, 'epoch': 0} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.2706356143856144, 'r': 0.03324818973981346, 'f1': 0.05922095310962947}, 'combined': 0.03864946413470555, 'epoch': 0} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.19270833333333331, 'r': 0.09690059861373661, 'f1': 0.12895702306079665}, 'combined': 0.095020964360587, 'epoch': 0} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.23682669789227168, 'r': 0.03723306332842415, 'f1': 0.06434934775692015}, 'combined': 0.04267728763153253, 'epoch': 0} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42857142857142855, 'r': 0.10344827586206896, 'f1': 0.16666666666666663}, 'combined': 0.11111111111111108, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 09:35:43.101371: step: 2/466, loss: 12.579239845275879 2023-01-22 09:35:43.742925: step: 4/466, loss: 8.355171203613281 2023-01-22 09:35:44.265462: step: 6/466, loss: 1.0959560871124268 2023-01-22 09:35:44.921240: step: 8/466, loss: 2.282801866531372 2023-01-22 09:35:45.590944: step: 10/466, loss: 0.46709614992141724 2023-01-22 09:35:46.288388: step: 12/466, loss: 4.711222171783447 2023-01-22 09:35:46.882787: step: 14/466, loss: 2.075840950012207 2023-01-22 09:35:47.482599: step: 16/466, loss: 1.3335590362548828 2023-01-22 09:35:48.130940: step: 18/466, loss: 2.6483826637268066 2023-01-22 09:35:48.813220: step: 20/466, loss: 0.9723260402679443 2023-01-22 09:35:49.476191: step: 22/466, loss: 3.485568046569824 2023-01-22 09:35:50.107565: step: 24/466, loss: 1.1184146404266357 2023-01-22 09:35:50.791796: step: 26/466, loss: 0.5919270515441895 2023-01-22 09:35:51.350826: step: 28/466, loss: 0.5985679626464844 2023-01-22 09:35:51.901835: step: 30/466, loss: 1.5386550426483154 2023-01-22 09:35:52.520055: step: 32/466, loss: 0.29626911878585815 2023-01-22 09:35:53.118304: step: 34/466, loss: 1.6548181772232056 2023-01-22 09:35:53.740583: step: 36/466, loss: 1.5368632078170776 2023-01-22 09:35:54.311778: step: 38/466, loss: 1.086187481880188 2023-01-22 09:35:54.932945: step: 40/466, loss: 3.108809232711792 2023-01-22 09:35:55.538301: step: 42/466, loss: 1.6362186670303345 2023-01-22 09:35:56.173383: step: 44/466, loss: 2.8455889225006104 2023-01-22 09:35:56.795767: step: 46/466, loss: 2.073005437850952 2023-01-22 09:35:57.366294: step: 48/466, loss: 1.2606151103973389 2023-01-22 09:35:57.996072: step: 50/466, loss: 0.48470303416252136 2023-01-22 09:35:58.562674: step: 52/466, loss: 1.1830860376358032 2023-01-22 09:35:59.177410: step: 54/466, loss: 1.5276299715042114 2023-01-22 09:35:59.817579: step: 56/466, loss: 5.069943428039551 2023-01-22 09:36:00.438427: step: 58/466, loss: 1.8566813468933105 2023-01-22 09:36:01.028729: step: 60/466, loss: 2.134775161743164 2023-01-22 09:36:01.644402: step: 62/466, loss: 1.424320936203003 2023-01-22 09:36:02.279739: step: 64/466, loss: 0.26281335949897766 2023-01-22 09:36:02.975821: step: 66/466, loss: 4.2092084884643555 2023-01-22 09:36:03.629944: step: 68/466, loss: 2.038078546524048 2023-01-22 09:36:04.237647: step: 70/466, loss: 0.6230260133743286 2023-01-22 09:36:04.841733: step: 72/466, loss: 0.9377046823501587 2023-01-22 09:36:05.565595: step: 74/466, loss: 4.913173675537109 2023-01-22 09:36:06.234702: step: 76/466, loss: 8.410189628601074 2023-01-22 09:36:06.849467: step: 78/466, loss: 3.003692150115967 2023-01-22 09:36:07.448505: step: 80/466, loss: 0.8860955238342285 2023-01-22 09:36:08.017592: step: 82/466, loss: 1.2767869234085083 2023-01-22 09:36:08.616953: step: 84/466, loss: 3.6442768573760986 2023-01-22 09:36:09.243663: step: 86/466, loss: 3.817312479019165 2023-01-22 09:36:09.867491: step: 88/466, loss: 1.678422451019287 2023-01-22 09:36:10.509174: step: 90/466, loss: 5.551665306091309 2023-01-22 09:36:11.122444: step: 92/466, loss: 2.8305606842041016 2023-01-22 09:36:11.783235: step: 94/466, loss: 0.44949406385421753 2023-01-22 09:36:12.361433: step: 96/466, loss: 3.2628884315490723 2023-01-22 09:36:12.944239: step: 98/466, loss: 0.6608309745788574 2023-01-22 09:36:13.577366: step: 100/466, loss: 3.4487357139587402 2023-01-22 09:36:14.194374: step: 102/466, loss: 2.0214779376983643 2023-01-22 09:36:14.872391: step: 104/466, loss: 1.7519176006317139 2023-01-22 09:36:15.482693: step: 106/466, loss: 0.37205448746681213 2023-01-22 09:36:16.153436: step: 108/466, loss: 3.332947254180908 2023-01-22 09:36:16.743727: step: 110/466, loss: 2.115652084350586 2023-01-22 09:36:17.358685: step: 112/466, loss: 1.5125521421432495 2023-01-22 09:36:17.979166: step: 114/466, loss: 0.2838253676891327 2023-01-22 09:36:18.578331: step: 116/466, loss: 3.1312167644500732 2023-01-22 09:36:19.159433: step: 118/466, loss: 0.916276216506958 2023-01-22 09:36:19.714415: step: 120/466, loss: 0.8722048997879028 2023-01-22 09:36:20.322915: step: 122/466, loss: 1.2177882194519043 2023-01-22 09:36:20.984005: step: 124/466, loss: 1.0140763521194458 2023-01-22 09:36:21.628443: step: 126/466, loss: 1.6305534839630127 2023-01-22 09:36:22.232276: step: 128/466, loss: 0.47239357233047485 2023-01-22 09:36:22.789579: step: 130/466, loss: 2.2214860916137695 2023-01-22 09:36:23.423568: step: 132/466, loss: 1.5602493286132812 2023-01-22 09:36:24.019119: step: 134/466, loss: 0.5952809453010559 2023-01-22 09:36:24.668097: step: 136/466, loss: 0.4576283097267151 2023-01-22 09:36:25.224024: step: 138/466, loss: 1.1963032484054565 2023-01-22 09:36:25.819167: step: 140/466, loss: 2.7550652027130127 2023-01-22 09:36:26.461084: step: 142/466, loss: 0.2905564606189728 2023-01-22 09:36:27.153149: step: 144/466, loss: 0.7401619553565979 2023-01-22 09:36:27.776524: step: 146/466, loss: 0.9804503917694092 2023-01-22 09:36:28.441029: step: 148/466, loss: 2.015255928039551 2023-01-22 09:36:29.037478: step: 150/466, loss: 0.6618127822875977 2023-01-22 09:36:29.657608: step: 152/466, loss: 0.9437558054924011 2023-01-22 09:36:30.209550: step: 154/466, loss: 5.640690803527832 2023-01-22 09:36:30.920354: step: 156/466, loss: 0.5138984322547913 2023-01-22 09:36:31.547295: step: 158/466, loss: 0.4257822334766388 2023-01-22 09:36:32.129067: step: 160/466, loss: 0.8269299864768982 2023-01-22 09:36:32.718246: step: 162/466, loss: 1.4057587385177612 2023-01-22 09:36:33.331185: step: 164/466, loss: 0.7340868711471558 2023-01-22 09:36:33.948400: step: 166/466, loss: 0.9378079175949097 2023-01-22 09:36:34.581709: step: 168/466, loss: 1.642561912536621 2023-01-22 09:36:35.204809: step: 170/466, loss: 1.8315976858139038 2023-01-22 09:36:35.817756: step: 172/466, loss: 2.245410442352295 2023-01-22 09:36:36.511165: step: 174/466, loss: 1.2697768211364746 2023-01-22 09:36:37.091178: step: 176/466, loss: 2.777341365814209 2023-01-22 09:36:37.760528: step: 178/466, loss: 0.9618198871612549 2023-01-22 09:36:38.373102: step: 180/466, loss: 2.3667354583740234 2023-01-22 09:36:39.012915: step: 182/466, loss: 1.2746222019195557 2023-01-22 09:36:39.621166: step: 184/466, loss: 3.522149085998535 2023-01-22 09:36:40.216760: step: 186/466, loss: 1.1410295963287354 2023-01-22 09:36:40.838552: step: 188/466, loss: 0.7399966716766357 2023-01-22 09:36:41.512565: step: 190/466, loss: 0.5701156258583069 2023-01-22 09:36:42.072968: step: 192/466, loss: 1.5691550970077515 2023-01-22 09:36:42.767985: step: 194/466, loss: 1.9450173377990723 2023-01-22 09:36:43.442584: step: 196/466, loss: 1.6858083009719849 2023-01-22 09:36:44.075729: step: 198/466, loss: 0.5167574882507324 2023-01-22 09:36:44.714071: step: 200/466, loss: 0.9078482389450073 2023-01-22 09:36:45.316109: step: 202/466, loss: 0.27712491154670715 2023-01-22 09:36:45.941073: step: 204/466, loss: 0.9520632028579712 2023-01-22 09:36:46.524890: step: 206/466, loss: 1.923095464706421 2023-01-22 09:36:47.101445: step: 208/466, loss: 5.610950469970703 2023-01-22 09:36:47.721828: step: 210/466, loss: 6.252904891967773 2023-01-22 09:36:48.392398: step: 212/466, loss: 1.0028800964355469 2023-01-22 09:36:49.063516: step: 214/466, loss: 0.7181891202926636 2023-01-22 09:36:49.691637: step: 216/466, loss: 0.6957169771194458 2023-01-22 09:36:50.351834: step: 218/466, loss: 2.1483540534973145 2023-01-22 09:36:50.940602: step: 220/466, loss: 1.477760910987854 2023-01-22 09:36:51.586620: step: 222/466, loss: 0.40660589933395386 2023-01-22 09:36:52.181602: step: 224/466, loss: 1.5613031387329102 2023-01-22 09:36:52.763079: step: 226/466, loss: 0.7885868549346924 2023-01-22 09:36:53.392694: step: 228/466, loss: 2.1106314659118652 2023-01-22 09:36:54.019231: step: 230/466, loss: 3.05161452293396 2023-01-22 09:36:54.649367: step: 232/466, loss: 0.373366117477417 2023-01-22 09:36:55.250491: step: 234/466, loss: 0.3329656422138214 2023-01-22 09:36:55.850137: step: 236/466, loss: 0.6629565954208374 2023-01-22 09:36:56.468923: step: 238/466, loss: 6.061126232147217 2023-01-22 09:36:57.087341: step: 240/466, loss: 4.7828497886657715 2023-01-22 09:36:57.701432: step: 242/466, loss: 3.0968270301818848 2023-01-22 09:36:58.312205: step: 244/466, loss: 2.103395462036133 2023-01-22 09:36:58.898219: step: 246/466, loss: 0.6191222071647644 2023-01-22 09:36:59.571984: step: 248/466, loss: 2.9041786193847656 2023-01-22 09:37:00.220290: step: 250/466, loss: 1.4008110761642456 2023-01-22 09:37:00.827714: step: 252/466, loss: 0.6556980013847351 2023-01-22 09:37:01.459208: step: 254/466, loss: 4.224248886108398 2023-01-22 09:37:02.113797: step: 256/466, loss: 2.330866813659668 2023-01-22 09:37:02.685535: step: 258/466, loss: 0.3894461989402771 2023-01-22 09:37:03.440946: step: 260/466, loss: 2.0729668140411377 2023-01-22 09:37:04.125630: step: 262/466, loss: 2.676603317260742 2023-01-22 09:37:04.806691: step: 264/466, loss: 2.0413904190063477 2023-01-22 09:37:05.562275: step: 266/466, loss: 1.497300624847412 2023-01-22 09:37:06.157754: step: 268/466, loss: 1.5379443168640137 2023-01-22 09:37:06.758974: step: 270/466, loss: 1.128397822380066 2023-01-22 09:37:07.356196: step: 272/466, loss: 1.7599122524261475 2023-01-22 09:37:07.954558: step: 274/466, loss: 0.8250323534011841 2023-01-22 09:37:08.556453: step: 276/466, loss: 3.5199387073516846 2023-01-22 09:37:09.210001: step: 278/466, loss: 0.5130527019500732 2023-01-22 09:37:09.833055: step: 280/466, loss: 1.7161411046981812 2023-01-22 09:37:10.472257: step: 282/466, loss: 1.1118619441986084 2023-01-22 09:37:11.077029: step: 284/466, loss: 1.6944289207458496 2023-01-22 09:37:11.681253: step: 286/466, loss: 1.8150008916854858 2023-01-22 09:37:12.444077: step: 288/466, loss: 1.1598901748657227 2023-01-22 09:37:13.084056: step: 290/466, loss: 2.952678680419922 2023-01-22 09:37:13.696348: step: 292/466, loss: 0.9587304592132568 2023-01-22 09:37:14.321495: step: 294/466, loss: 1.411131739616394 2023-01-22 09:37:14.894373: step: 296/466, loss: 0.7512630820274353 2023-01-22 09:37:15.520123: step: 298/466, loss: 0.33326366543769836 2023-01-22 09:37:16.150206: step: 300/466, loss: 0.570182740688324 2023-01-22 09:37:16.804098: step: 302/466, loss: 1.506582260131836 2023-01-22 09:37:17.506842: step: 304/466, loss: 2.3775475025177 2023-01-22 09:37:18.170209: step: 306/466, loss: 1.159224033355713 2023-01-22 09:37:18.784638: step: 308/466, loss: 1.5621230602264404 2023-01-22 09:37:19.380979: step: 310/466, loss: 1.3572973012924194 2023-01-22 09:37:19.979239: step: 312/466, loss: 0.7192381620407104 2023-01-22 09:37:20.596958: step: 314/466, loss: 0.8887008428573608 2023-01-22 09:37:21.213510: step: 316/466, loss: 0.34289538860321045 2023-01-22 09:37:21.840762: step: 318/466, loss: 1.4671778678894043 2023-01-22 09:37:22.488901: step: 320/466, loss: 4.913384914398193 2023-01-22 09:37:23.089659: step: 322/466, loss: 1.5437592267990112 2023-01-22 09:37:23.674032: step: 324/466, loss: 1.5716824531555176 2023-01-22 09:37:24.332880: step: 326/466, loss: 1.6771984100341797 2023-01-22 09:37:24.925006: step: 328/466, loss: 0.4275144338607788 2023-01-22 09:37:25.550730: step: 330/466, loss: 1.4679495096206665 2023-01-22 09:37:26.174870: step: 332/466, loss: 0.2106197476387024 2023-01-22 09:37:26.838339: step: 334/466, loss: 4.643703460693359 2023-01-22 09:37:27.421686: step: 336/466, loss: 0.6623115539550781 2023-01-22 09:37:28.003837: step: 338/466, loss: 6.546868801116943 2023-01-22 09:37:28.681781: step: 340/466, loss: 11.798778533935547 2023-01-22 09:37:29.325930: step: 342/466, loss: 0.8140530586242676 2023-01-22 09:37:29.937122: step: 344/466, loss: 1.2153860330581665 2023-01-22 09:37:30.518726: step: 346/466, loss: 4.89198112487793 2023-01-22 09:37:31.131654: step: 348/466, loss: 1.0797613859176636 2023-01-22 09:37:31.732665: step: 350/466, loss: 0.4592924118041992 2023-01-22 09:37:32.386894: step: 352/466, loss: 0.5482463836669922 2023-01-22 09:37:33.069343: step: 354/466, loss: 1.0288336277008057 2023-01-22 09:37:33.648073: step: 356/466, loss: 4.854940891265869 2023-01-22 09:37:34.195385: step: 358/466, loss: 0.894655168056488 2023-01-22 09:37:34.844476: step: 360/466, loss: 2.249600410461426 2023-01-22 09:37:35.430413: step: 362/466, loss: 2.275702476501465 2023-01-22 09:37:36.078001: step: 364/466, loss: 3.057396411895752 2023-01-22 09:37:36.680732: step: 366/466, loss: 3.021085262298584 2023-01-22 09:37:37.364047: step: 368/466, loss: 0.44216188788414 2023-01-22 09:37:37.971897: step: 370/466, loss: 0.9607877135276794 2023-01-22 09:37:38.560450: step: 372/466, loss: 1.0053690671920776 2023-01-22 09:37:39.177810: step: 374/466, loss: 2.570640802383423 2023-01-22 09:37:39.862897: step: 376/466, loss: 2.2469892501831055 2023-01-22 09:37:40.467397: step: 378/466, loss: 5.98201322555542 2023-01-22 09:37:41.105483: step: 380/466, loss: 0.677047610282898 2023-01-22 09:37:41.689393: step: 382/466, loss: 0.7705219984054565 2023-01-22 09:37:42.290621: step: 384/466, loss: 1.9747231006622314 2023-01-22 09:37:42.949094: step: 386/466, loss: 0.9605329036712646 2023-01-22 09:37:43.525981: step: 388/466, loss: 1.0569020509719849 2023-01-22 09:37:44.074494: step: 390/466, loss: 1.4205042123794556 2023-01-22 09:37:44.747097: step: 392/466, loss: 0.48208555579185486 2023-01-22 09:37:45.485117: step: 394/466, loss: 1.7804467678070068 2023-01-22 09:37:46.104156: step: 396/466, loss: 2.304171085357666 2023-01-22 09:37:46.652421: step: 398/466, loss: 10.734625816345215 2023-01-22 09:37:47.243764: step: 400/466, loss: 0.8132683038711548 2023-01-22 09:37:47.866974: step: 402/466, loss: 1.5852059125900269 2023-01-22 09:37:48.500250: step: 404/466, loss: 1.0326261520385742 2023-01-22 09:37:49.086198: step: 406/466, loss: 0.21298131346702576 2023-01-22 09:37:49.669898: step: 408/466, loss: 0.6365259885787964 2023-01-22 09:37:50.256764: step: 410/466, loss: 0.5460081696510315 2023-01-22 09:37:50.849476: step: 412/466, loss: 2.189100742340088 2023-01-22 09:37:51.483999: step: 414/466, loss: 0.9356738924980164 2023-01-22 09:37:52.074896: step: 416/466, loss: 2.1108479499816895 2023-01-22 09:37:52.647917: step: 418/466, loss: 0.6288458108901978 2023-01-22 09:37:53.261746: step: 420/466, loss: 1.7310302257537842 2023-01-22 09:37:53.974505: step: 422/466, loss: 2.0583267211914062 2023-01-22 09:37:54.532918: step: 424/466, loss: 0.34806692600250244 2023-01-22 09:37:55.166863: step: 426/466, loss: 3.313180923461914 2023-01-22 09:37:55.806894: step: 428/466, loss: 1.883329153060913 2023-01-22 09:37:56.404702: step: 430/466, loss: 0.9804830551147461 2023-01-22 09:37:57.019691: step: 432/466, loss: 0.4481969475746155 2023-01-22 09:37:57.632940: step: 434/466, loss: 3.6360621452331543 2023-01-22 09:37:58.219515: step: 436/466, loss: 1.488958716392517 2023-01-22 09:37:58.789386: step: 438/466, loss: 1.733879566192627 2023-01-22 09:37:59.335208: step: 440/466, loss: 2.5470359325408936 2023-01-22 09:37:59.970658: step: 442/466, loss: 4.520044803619385 2023-01-22 09:38:00.592058: step: 444/466, loss: 1.3759305477142334 2023-01-22 09:38:01.174579: step: 446/466, loss: 1.4914339780807495 2023-01-22 09:38:01.788967: step: 448/466, loss: 2.6907448768615723 2023-01-22 09:38:02.432454: step: 450/466, loss: 0.8654686808586121 2023-01-22 09:38:02.974389: step: 452/466, loss: 2.585897922515869 2023-01-22 09:38:03.603289: step: 454/466, loss: 3.011698007583618 2023-01-22 09:38:04.204807: step: 456/466, loss: 2.0853371620178223 2023-01-22 09:38:04.891837: step: 458/466, loss: 1.8473291397094727 2023-01-22 09:38:05.530036: step: 460/466, loss: 1.509905457496643 2023-01-22 09:38:06.112221: step: 462/466, loss: 0.45855090022087097 2023-01-22 09:38:06.716408: step: 464/466, loss: 1.5549182891845703 2023-01-22 09:38:07.355539: step: 466/466, loss: 0.8358886241912842 2023-01-22 09:38:07.965700: step: 468/466, loss: 3.466428279876709 2023-01-22 09:38:08.532882: step: 470/466, loss: 1.2473845481872559 2023-01-22 09:38:09.246803: step: 472/466, loss: 2.0258431434631348 2023-01-22 09:38:09.915335: step: 474/466, loss: 1.6379798650741577 2023-01-22 09:38:10.537620: step: 476/466, loss: 3.311021566390991 2023-01-22 09:38:11.142628: step: 478/466, loss: 0.4525070786476135 2023-01-22 09:38:11.788364: step: 480/466, loss: 0.9990955591201782 2023-01-22 09:38:12.548970: step: 482/466, loss: 0.23777519166469574 2023-01-22 09:38:13.152845: step: 484/466, loss: 6.6074652671813965 2023-01-22 09:38:13.803499: step: 486/466, loss: 2.4265973567962646 2023-01-22 09:38:14.418469: step: 488/466, loss: 1.9913256168365479 2023-01-22 09:38:15.073615: step: 490/466, loss: 2.090097665786743 2023-01-22 09:38:15.698199: step: 492/466, loss: 0.9152706861495972 2023-01-22 09:38:16.384724: step: 494/466, loss: 0.4318042993545532 2023-01-22 09:38:17.040696: step: 496/466, loss: 0.695695161819458 2023-01-22 09:38:17.629892: step: 498/466, loss: 1.1006947755813599 2023-01-22 09:38:18.329069: step: 500/466, loss: 0.4782952070236206 2023-01-22 09:38:18.941189: step: 502/466, loss: 3.6143462657928467 2023-01-22 09:38:19.539442: step: 504/466, loss: 0.45223885774612427 2023-01-22 09:38:20.197274: step: 506/466, loss: 4.147829532623291 2023-01-22 09:38:20.779864: step: 508/466, loss: 2.7985970973968506 2023-01-22 09:38:21.435631: step: 510/466, loss: 0.9218372702598572 2023-01-22 09:38:22.020309: step: 512/466, loss: 3.021129608154297 2023-01-22 09:38:22.713139: step: 514/466, loss: 2.36238956451416 2023-01-22 09:38:23.338543: step: 516/466, loss: 1.4599518775939941 2023-01-22 09:38:23.947020: step: 518/466, loss: 1.1134390830993652 2023-01-22 09:38:24.589210: step: 520/466, loss: 1.2822983264923096 2023-01-22 09:38:25.242438: step: 522/466, loss: 2.5615310668945312 2023-01-22 09:38:25.905844: step: 524/466, loss: 0.5547291040420532 2023-01-22 09:38:26.502586: step: 526/466, loss: 3.790654182434082 2023-01-22 09:38:27.162860: step: 528/466, loss: 0.6939271688461304 2023-01-22 09:38:27.789935: step: 530/466, loss: 1.255786418914795 2023-01-22 09:38:28.407939: step: 532/466, loss: 1.297980546951294 2023-01-22 09:38:29.057252: step: 534/466, loss: 2.0212531089782715 2023-01-22 09:38:29.605141: step: 536/466, loss: 1.225157380104065 2023-01-22 09:38:30.210333: step: 538/466, loss: 0.2992454767227173 2023-01-22 09:38:30.840287: step: 540/466, loss: 3.0153403282165527 2023-01-22 09:38:31.451259: step: 542/466, loss: 0.43518757820129395 2023-01-22 09:38:32.179026: step: 544/466, loss: 1.9364233016967773 2023-01-22 09:38:32.800943: step: 546/466, loss: 2.9780986309051514 2023-01-22 09:38:33.417846: step: 548/466, loss: 0.9115053415298462 2023-01-22 09:38:34.075221: step: 550/466, loss: 0.8266890048980713 2023-01-22 09:38:34.649744: step: 552/466, loss: 1.7876451015472412 2023-01-22 09:38:35.236093: step: 554/466, loss: 0.3994894027709961 2023-01-22 09:38:35.841325: step: 556/466, loss: 1.1966663599014282 2023-01-22 09:38:36.475681: step: 558/466, loss: 8.233001708984375 2023-01-22 09:38:37.102212: step: 560/466, loss: 1.256848692893982 2023-01-22 09:38:37.687913: step: 562/466, loss: 1.3873143196105957 2023-01-22 09:38:38.304514: step: 564/466, loss: 1.9018683433532715 2023-01-22 09:38:38.917676: step: 566/466, loss: 0.48335394263267517 2023-01-22 09:38:39.534160: step: 568/466, loss: 1.3220398426055908 2023-01-22 09:38:40.184161: step: 570/466, loss: 1.0557098388671875 2023-01-22 09:38:40.786733: step: 572/466, loss: 0.586812436580658 2023-01-22 09:38:41.336133: step: 574/466, loss: 1.9707565307617188 2023-01-22 09:38:41.988459: step: 576/466, loss: 0.7529993057250977 2023-01-22 09:38:42.561403: step: 578/466, loss: 1.8851854801177979 2023-01-22 09:38:43.152950: step: 580/466, loss: 1.3797357082366943 2023-01-22 09:38:43.751004: step: 582/466, loss: 3.5268735885620117 2023-01-22 09:38:44.349273: step: 584/466, loss: 1.2795867919921875 2023-01-22 09:38:44.985754: step: 586/466, loss: 1.1228471994400024 2023-01-22 09:38:45.566924: step: 588/466, loss: 0.9561172127723694 2023-01-22 09:38:46.123281: step: 590/466, loss: 1.8342702388763428 2023-01-22 09:38:46.741458: step: 592/466, loss: 2.8268752098083496 2023-01-22 09:38:47.335915: step: 594/466, loss: 0.9906534552574158 2023-01-22 09:38:47.980582: step: 596/466, loss: 1.8398644924163818 2023-01-22 09:38:48.645259: step: 598/466, loss: 0.46265068650245667 2023-01-22 09:38:49.270557: step: 600/466, loss: 0.5323865413665771 2023-01-22 09:38:49.875849: step: 602/466, loss: 1.052520751953125 2023-01-22 09:38:50.464186: step: 604/466, loss: 9.658276557922363 2023-01-22 09:38:51.129082: step: 606/466, loss: 0.883283257484436 2023-01-22 09:38:51.720724: step: 608/466, loss: 0.7546254396438599 2023-01-22 09:38:52.380646: step: 610/466, loss: 6.600062847137451 2023-01-22 09:38:53.025841: step: 612/466, loss: 5.850398063659668 2023-01-22 09:38:53.670732: step: 614/466, loss: 0.40383315086364746 2023-01-22 09:38:54.275852: step: 616/466, loss: 2.252748966217041 2023-01-22 09:38:54.828764: step: 618/466, loss: 0.9612758755683899 2023-01-22 09:38:55.439177: step: 620/466, loss: 0.3715060353279114 2023-01-22 09:38:56.063920: step: 622/466, loss: 1.1357539892196655 2023-01-22 09:38:56.733041: step: 624/466, loss: 0.7216935157775879 2023-01-22 09:38:57.301700: step: 626/466, loss: 2.094508171081543 2023-01-22 09:38:57.936236: step: 628/466, loss: 3.520603656768799 2023-01-22 09:38:58.524918: step: 630/466, loss: 0.35452020168304443 2023-01-22 09:38:59.132387: step: 632/466, loss: 1.1755645275115967 2023-01-22 09:38:59.713176: step: 634/466, loss: 2.9755730628967285 2023-01-22 09:39:00.321079: step: 636/466, loss: 0.28536292910575867 2023-01-22 09:39:00.929943: step: 638/466, loss: 0.6651081442832947 2023-01-22 09:39:01.507729: step: 640/466, loss: 1.1705361604690552 2023-01-22 09:39:02.149915: step: 642/466, loss: 1.835726261138916 2023-01-22 09:39:02.777515: step: 644/466, loss: 1.3732147216796875 2023-01-22 09:39:03.455027: step: 646/466, loss: 0.21464434266090393 2023-01-22 09:39:04.017451: step: 648/466, loss: 0.518509030342102 2023-01-22 09:39:04.599266: step: 650/466, loss: 1.4306645393371582 2023-01-22 09:39:05.202703: step: 652/466, loss: 2.33333683013916 2023-01-22 09:39:05.757033: step: 654/466, loss: 2.096909761428833 2023-01-22 09:39:06.425942: step: 656/466, loss: 1.3629062175750732 2023-01-22 09:39:07.040691: step: 658/466, loss: 2.293306350708008 2023-01-22 09:39:07.663415: step: 660/466, loss: 1.3752634525299072 2023-01-22 09:39:08.280036: step: 662/466, loss: 0.5292986035346985 2023-01-22 09:39:08.954563: step: 664/466, loss: 0.8116971850395203 2023-01-22 09:39:09.559983: step: 666/466, loss: 1.6029118299484253 2023-01-22 09:39:10.148850: step: 668/466, loss: 0.9917951822280884 2023-01-22 09:39:10.816771: step: 670/466, loss: 10.244860649108887 2023-01-22 09:39:11.423897: step: 672/466, loss: 1.4981614351272583 2023-01-22 09:39:12.021111: step: 674/466, loss: 1.0083128213882446 2023-01-22 09:39:12.598519: step: 676/466, loss: 6.35294771194458 2023-01-22 09:39:13.209392: step: 678/466, loss: 3.928889751434326 2023-01-22 09:39:13.758758: step: 680/466, loss: 0.8590149283409119 2023-01-22 09:39:14.337704: step: 682/466, loss: 0.2028389275074005 2023-01-22 09:39:14.982895: step: 684/466, loss: 0.860236644744873 2023-01-22 09:39:15.503927: step: 686/466, loss: 2.617568254470825 2023-01-22 09:39:16.130184: step: 688/466, loss: 1.339379906654358 2023-01-22 09:39:16.724063: step: 690/466, loss: 2.5586915016174316 2023-01-22 09:39:17.347662: step: 692/466, loss: 3.329720973968506 2023-01-22 09:39:17.922303: step: 694/466, loss: 1.036975622177124 2023-01-22 09:39:18.622802: step: 696/466, loss: 1.7532050609588623 2023-01-22 09:39:19.275802: step: 698/466, loss: 1.2536813020706177 2023-01-22 09:39:19.894935: step: 700/466, loss: 1.6638786792755127 2023-01-22 09:39:20.473524: step: 702/466, loss: 0.9238472580909729 2023-01-22 09:39:21.132006: step: 704/466, loss: 0.3592261075973511 2023-01-22 09:39:21.732971: step: 706/466, loss: 3.991269588470459 2023-01-22 09:39:22.326140: step: 708/466, loss: 4.487711429595947 2023-01-22 09:39:22.884612: step: 710/466, loss: 1.7793514728546143 2023-01-22 09:39:23.513729: step: 712/466, loss: 4.636097431182861 2023-01-22 09:39:24.153525: step: 714/466, loss: 3.309096097946167 2023-01-22 09:39:24.757607: step: 716/466, loss: 0.39011961221694946 2023-01-22 09:39:25.404528: step: 718/466, loss: 2.96675968170166 2023-01-22 09:39:25.993749: step: 720/466, loss: 2.651958465576172 2023-01-22 09:39:26.647330: step: 722/466, loss: 2.880375623703003 2023-01-22 09:39:27.271288: step: 724/466, loss: 1.8882092237472534 2023-01-22 09:39:27.877488: step: 726/466, loss: 0.37530142068862915 2023-01-22 09:39:28.454703: step: 728/466, loss: 1.3665651082992554 2023-01-22 09:39:29.056233: step: 730/466, loss: 0.3963625431060791 2023-01-22 09:39:29.646441: step: 732/466, loss: 5.215664386749268 2023-01-22 09:39:30.238464: step: 734/466, loss: 1.9291287660598755 2023-01-22 09:39:30.875428: step: 736/466, loss: 0.7714915871620178 2023-01-22 09:39:31.462711: step: 738/466, loss: 1.3604142665863037 2023-01-22 09:39:32.122086: step: 740/466, loss: 0.4155275821685791 2023-01-22 09:39:32.734719: step: 742/466, loss: 1.0549376010894775 2023-01-22 09:39:33.431047: step: 744/466, loss: 0.4867190718650818 2023-01-22 09:39:34.109554: step: 746/466, loss: 1.5686765909194946 2023-01-22 09:39:34.675021: step: 748/466, loss: 1.4739316701889038 2023-01-22 09:39:35.294675: step: 750/466, loss: 0.34707027673721313 2023-01-22 09:39:35.907879: step: 752/466, loss: 5.242847442626953 2023-01-22 09:39:36.497875: step: 754/466, loss: 0.6374714970588684 2023-01-22 09:39:37.089887: step: 756/466, loss: 1.4829870462417603 2023-01-22 09:39:37.744847: step: 758/466, loss: 6.024981498718262 2023-01-22 09:39:38.321997: step: 760/466, loss: 1.0136044025421143 2023-01-22 09:39:38.920610: step: 762/466, loss: 0.5662339329719543 2023-01-22 09:39:39.552163: step: 764/466, loss: 2.2438929080963135 2023-01-22 09:39:40.214454: step: 766/466, loss: 0.7552876472473145 2023-01-22 09:39:40.879167: step: 768/466, loss: 1.6896439790725708 2023-01-22 09:39:41.450904: step: 770/466, loss: 10.491397857666016 2023-01-22 09:39:42.045510: step: 772/466, loss: 0.5391125679016113 2023-01-22 09:39:42.749309: step: 774/466, loss: 1.70233154296875 2023-01-22 09:39:43.344793: step: 776/466, loss: 0.6633647680282593 2023-01-22 09:39:43.966905: step: 778/466, loss: 0.354541152715683 2023-01-22 09:39:44.619594: step: 780/466, loss: 1.1705315113067627 2023-01-22 09:39:45.226721: step: 782/466, loss: 1.2291159629821777 2023-01-22 09:39:45.837533: step: 784/466, loss: 2.6389012336730957 2023-01-22 09:39:46.402589: step: 786/466, loss: 0.4652237892150879 2023-01-22 09:39:47.025175: step: 788/466, loss: 1.0216243267059326 2023-01-22 09:39:47.583027: step: 790/466, loss: 0.6143762469291687 2023-01-22 09:39:48.239441: step: 792/466, loss: 3.523286819458008 2023-01-22 09:39:48.910286: step: 794/466, loss: 2.3982529640197754 2023-01-22 09:39:49.570499: step: 796/466, loss: 0.6625871062278748 2023-01-22 09:39:50.165644: step: 798/466, loss: 0.9518342614173889 2023-01-22 09:39:50.790728: step: 800/466, loss: 1.6259984970092773 2023-01-22 09:39:51.472534: step: 802/466, loss: 0.7471064329147339 2023-01-22 09:39:52.128623: step: 804/466, loss: 1.2354010343551636 2023-01-22 09:39:52.961429: step: 806/466, loss: 0.4772886037826538 2023-01-22 09:39:53.609033: step: 808/466, loss: 1.1822104454040527 2023-01-22 09:39:54.246196: step: 810/466, loss: 0.5853030681610107 2023-01-22 09:39:54.856461: step: 812/466, loss: 1.3605884313583374 2023-01-22 09:39:55.415856: step: 814/466, loss: 3.5283548831939697 2023-01-22 09:39:56.029546: step: 816/466, loss: 3.4050121307373047 2023-01-22 09:39:56.648397: step: 818/466, loss: 4.525536060333252 2023-01-22 09:39:57.277708: step: 820/466, loss: 1.727461576461792 2023-01-22 09:39:57.867751: step: 822/466, loss: 1.5522887706756592 2023-01-22 09:39:58.584494: step: 824/466, loss: 2.8800466060638428 2023-01-22 09:39:59.146762: step: 826/466, loss: 4.962917327880859 2023-01-22 09:39:59.776506: step: 828/466, loss: 0.2558228075504303 2023-01-22 09:40:00.377256: step: 830/466, loss: 0.6200178265571594 2023-01-22 09:40:01.027281: step: 832/466, loss: 9.803146362304688 2023-01-22 09:40:01.647970: step: 834/466, loss: 0.5240775346755981 2023-01-22 09:40:02.294625: step: 836/466, loss: 0.3322848677635193 2023-01-22 09:40:02.937981: step: 838/466, loss: 0.4767006039619446 2023-01-22 09:40:03.531309: step: 840/466, loss: 0.3725050687789917 2023-01-22 09:40:04.132459: step: 842/466, loss: 1.8058209419250488 2023-01-22 09:40:04.673238: step: 844/466, loss: 1.364620566368103 2023-01-22 09:40:05.438357: step: 846/466, loss: 0.6291117668151855 2023-01-22 09:40:06.066007: step: 848/466, loss: 0.9351247549057007 2023-01-22 09:40:06.764263: step: 850/466, loss: 0.8235954642295837 2023-01-22 09:40:07.347961: step: 852/466, loss: 0.7105476260185242 2023-01-22 09:40:08.036968: step: 854/466, loss: 0.8148956894874573 2023-01-22 09:40:08.618452: step: 856/466, loss: 0.2897741496562958 2023-01-22 09:40:09.254243: step: 858/466, loss: 1.7938950061798096 2023-01-22 09:40:09.793943: step: 860/466, loss: 1.265045166015625 2023-01-22 09:40:10.385618: step: 862/466, loss: 0.6019369959831238 2023-01-22 09:40:10.930371: step: 864/466, loss: 0.36597803235054016 2023-01-22 09:40:11.624596: step: 866/466, loss: 1.755831241607666 2023-01-22 09:40:12.246124: step: 868/466, loss: 0.627671480178833 2023-01-22 09:40:12.866707: step: 870/466, loss: 1.4150893688201904 2023-01-22 09:40:13.499819: step: 872/466, loss: 5.4517316818237305 2023-01-22 09:40:14.127747: step: 874/466, loss: 9.735913276672363 2023-01-22 09:40:14.710222: step: 876/466, loss: 3.080693244934082 2023-01-22 09:40:15.393280: step: 878/466, loss: 1.8908751010894775 2023-01-22 09:40:16.008795: step: 880/466, loss: 1.8814504146575928 2023-01-22 09:40:16.624186: step: 882/466, loss: 0.810623049736023 2023-01-22 09:40:17.236248: step: 884/466, loss: 0.9066575765609741 2023-01-22 09:40:17.865989: step: 886/466, loss: 0.49293220043182373 2023-01-22 09:40:18.536915: step: 888/466, loss: 5.225826740264893 2023-01-22 09:40:19.131624: step: 890/466, loss: 0.3955407738685608 2023-01-22 09:40:19.780696: step: 892/466, loss: 1.4828016757965088 2023-01-22 09:40:20.394622: step: 894/466, loss: 2.075650930404663 2023-01-22 09:40:20.985591: step: 896/466, loss: 1.8842453956604004 2023-01-22 09:40:21.623210: step: 898/466, loss: 0.8249079585075378 2023-01-22 09:40:22.261887: step: 900/466, loss: 1.673976182937622 2023-01-22 09:40:22.869921: step: 902/466, loss: 1.0010340213775635 2023-01-22 09:40:23.446610: step: 904/466, loss: 0.965110182762146 2023-01-22 09:40:24.056205: step: 906/466, loss: 0.7587573528289795 2023-01-22 09:40:24.757926: step: 908/466, loss: 2.1170947551727295 2023-01-22 09:40:25.362403: step: 910/466, loss: 3.88934326171875 2023-01-22 09:40:25.975987: step: 912/466, loss: 1.153349757194519 2023-01-22 09:40:26.563742: step: 914/466, loss: 1.6431703567504883 2023-01-22 09:40:27.125047: step: 916/466, loss: 0.47427424788475037 2023-01-22 09:40:27.786984: step: 918/466, loss: 1.332406997680664 2023-01-22 09:40:28.386072: step: 920/466, loss: 0.9475909471511841 2023-01-22 09:40:29.003785: step: 922/466, loss: 1.633178949356079 2023-01-22 09:40:29.658364: step: 924/466, loss: 1.9771479368209839 2023-01-22 09:40:30.254794: step: 926/466, loss: 0.8867887854576111 2023-01-22 09:40:30.861229: step: 928/466, loss: 1.328560709953308 2023-01-22 09:40:31.433441: step: 930/466, loss: 2.47381591796875 2023-01-22 09:40:32.087210: step: 932/466, loss: 2.4950668811798096 ================================================== Loss: 1.902 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36978735830269605, 'r': 0.17929084038918597, 'f1': 0.24149378501400556}, 'combined': 0.1779427889576883, 'epoch': 1} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3743471859300341, 'r': 0.20038205649611118, 'f1': 0.2610358179915709}, 'combined': 0.1731222005332698, 'epoch': 1} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36697700496192326, 'r': 0.14637457097725104, 'f1': 0.2092760758026103}, 'combined': 0.13951738386840684, 'epoch': 1} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3863158955721628, 'r': 0.18916845488860637, 'f1': 0.25397313065344956}, 'combined': 0.16575088526856707, 'epoch': 1} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.37588245137412396, 'r': 0.17726274695484254, 'f1': 0.2409130769424887}, 'combined': 0.1775148987997285, 'epoch': 1} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.37389101637702665, 'r': 0.1882325684858525, 'f1': 0.2504021117121472}, 'combined': 0.16606979429613905, 'epoch': 1} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2675438596491228, 'r': 0.14523809523809522, 'f1': 0.1882716049382716}, 'combined': 0.12551440329218105, 'epoch': 1} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.07608695652173914, 'f1': 0.1346153846153846}, 'combined': 0.08974358974358973, 'epoch': 1} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.034482758620689655, 'f1': 0.0606060606060606}, 'combined': 0.0404040404040404, 'epoch': 1} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36978735830269605, 'r': 0.17929084038918597, 'f1': 0.24149378501400556}, 'combined': 0.1779427889576883, 'epoch': 1} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3743471859300341, 'r': 0.20038205649611118, 'f1': 0.2610358179915709}, 'combined': 0.1731222005332698, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2675438596491228, 'r': 0.14523809523809522, 'f1': 0.1882716049382716}, 'combined': 0.12551440329218105, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36697700496192326, 'r': 0.14637457097725104, 'f1': 0.2092760758026103}, 'combined': 0.13951738386840684, 'epoch': 1} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3863158955721628, 'r': 0.18916845488860637, 'f1': 0.25397313065344956}, 'combined': 0.16575088526856707, 'epoch': 1} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.07608695652173914, 'f1': 0.1346153846153846}, 'combined': 0.08974358974358973, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.37588245137412396, 'r': 0.17726274695484254, 'f1': 0.2409130769424887}, 'combined': 0.1775148987997285, 'epoch': 1} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.37389101637702665, 'r': 0.1882325684858525, 'f1': 0.2504021117121472}, 'combined': 0.16606979429613905, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.034482758620689655, 'f1': 0.0606060606060606}, 'combined': 0.0404040404040404, 'epoch': 1} ****************************** Epoch: 2 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 09:43:10.946328: step: 2/466, loss: 0.3859618306159973 2023-01-22 09:43:11.563654: step: 4/466, loss: 0.5589725971221924 2023-01-22 09:43:12.216715: step: 6/466, loss: 1.2912181615829468 2023-01-22 09:43:12.952003: step: 8/466, loss: 1.1314879655838013 2023-01-22 09:43:13.559836: step: 10/466, loss: 1.917891263961792 2023-01-22 09:43:14.171110: step: 12/466, loss: 1.408381700515747 2023-01-22 09:43:14.754075: step: 14/466, loss: 0.5122565031051636 2023-01-22 09:43:15.349446: step: 16/466, loss: 0.4245198369026184 2023-01-22 09:43:15.975564: step: 18/466, loss: 1.6285946369171143 2023-01-22 09:43:16.656162: step: 20/466, loss: 0.49963173270225525 2023-01-22 09:43:17.258116: step: 22/466, loss: 0.4725305736064911 2023-01-22 09:43:17.986639: step: 24/466, loss: 0.6637438535690308 2023-01-22 09:43:18.658343: step: 26/466, loss: 0.7057062983512878 2023-01-22 09:43:19.364614: step: 28/466, loss: 1.5215015411376953 2023-01-22 09:43:20.023887: step: 30/466, loss: 0.8151401877403259 2023-01-22 09:43:20.651927: step: 32/466, loss: 0.8009874820709229 2023-01-22 09:43:21.325970: step: 34/466, loss: 1.2140456438064575 2023-01-22 09:43:21.972117: step: 36/466, loss: 3.2386741638183594 2023-01-22 09:43:22.593362: step: 38/466, loss: 0.6042141318321228 2023-01-22 09:43:23.193872: step: 40/466, loss: 1.747312307357788 2023-01-22 09:43:23.785376: step: 42/466, loss: 1.4219690561294556 2023-01-22 09:43:24.370560: step: 44/466, loss: 1.022874355316162 2023-01-22 09:43:25.039591: step: 46/466, loss: 2.0148932933807373 2023-01-22 09:43:25.655835: step: 48/466, loss: 1.0092506408691406 2023-01-22 09:43:26.200994: step: 50/466, loss: 0.8231704831123352 2023-01-22 09:43:26.832397: step: 52/466, loss: 0.3929067552089691 2023-01-22 09:43:27.519403: step: 54/466, loss: 2.7094404697418213 2023-01-22 09:43:28.144625: step: 56/466, loss: 0.7950037121772766 2023-01-22 09:43:28.846814: step: 58/466, loss: 1.2933663129806519 2023-01-22 09:43:29.502295: step: 60/466, loss: 1.3285505771636963 2023-01-22 09:43:30.112848: step: 62/466, loss: 1.7081013917922974 2023-01-22 09:43:30.701022: step: 64/466, loss: 4.110572338104248 2023-01-22 09:43:31.343510: step: 66/466, loss: 0.8001346588134766 2023-01-22 09:43:31.992241: step: 68/466, loss: 0.3810636103153229 2023-01-22 09:43:32.676222: step: 70/466, loss: 0.4169490933418274 2023-01-22 09:43:33.355640: step: 72/466, loss: 1.4064998626708984 2023-01-22 09:43:33.966177: step: 74/466, loss: 1.4400849342346191 2023-01-22 09:43:34.584958: step: 76/466, loss: 3.868173837661743 2023-01-22 09:43:35.205840: step: 78/466, loss: 0.9741390943527222 2023-01-22 09:43:35.865359: step: 80/466, loss: 0.9136323928833008 2023-01-22 09:43:36.489046: step: 82/466, loss: 1.366288661956787 2023-01-22 09:43:37.187188: step: 84/466, loss: 2.6454687118530273 2023-01-22 09:43:37.841899: step: 86/466, loss: 0.6054922342300415 2023-01-22 09:43:38.530809: step: 88/466, loss: 2.3126707077026367 2023-01-22 09:43:39.197056: step: 90/466, loss: 3.112882137298584 2023-01-22 09:43:39.821975: step: 92/466, loss: 1.7007570266723633 2023-01-22 09:43:40.470782: step: 94/466, loss: 7.418228626251221 2023-01-22 09:43:41.032174: step: 96/466, loss: 2.902988910675049 2023-01-22 09:43:41.592324: step: 98/466, loss: 0.9322847127914429 2023-01-22 09:43:42.228005: step: 100/466, loss: 1.4995085000991821 2023-01-22 09:43:42.837930: step: 102/466, loss: 2.071957588195801 2023-01-22 09:43:43.404505: step: 104/466, loss: 0.431795597076416 2023-01-22 09:43:44.055658: step: 106/466, loss: 1.375353217124939 2023-01-22 09:43:44.688024: step: 108/466, loss: 0.3198004364967346 2023-01-22 09:43:45.300376: step: 110/466, loss: 0.9918296933174133 2023-01-22 09:43:45.891992: step: 112/466, loss: 0.3641217052936554 2023-01-22 09:43:46.500890: step: 114/466, loss: 1.6604777574539185 2023-01-22 09:43:47.149315: step: 116/466, loss: 0.3456338047981262 2023-01-22 09:43:47.833041: step: 118/466, loss: 7.719141483306885 2023-01-22 09:43:48.410587: step: 120/466, loss: 0.28431424498558044 2023-01-22 09:43:49.051417: step: 122/466, loss: 0.3917914628982544 2023-01-22 09:43:49.597380: step: 124/466, loss: 1.2277631759643555 2023-01-22 09:43:50.195521: step: 126/466, loss: 1.6502057313919067 2023-01-22 09:43:50.846127: step: 128/466, loss: 0.8259530067443848 2023-01-22 09:43:51.505750: step: 130/466, loss: 1.7057901620864868 2023-01-22 09:43:52.166494: step: 132/466, loss: 0.5677308440208435 2023-01-22 09:43:52.875021: step: 134/466, loss: 1.5732409954071045 2023-01-22 09:43:53.536938: step: 136/466, loss: 5.119172096252441 2023-01-22 09:43:54.108642: step: 138/466, loss: 3.7913308143615723 2023-01-22 09:43:54.799336: step: 140/466, loss: 0.9256842136383057 2023-01-22 09:43:55.448746: step: 142/466, loss: 0.32183602452278137 2023-01-22 09:43:56.105767: step: 144/466, loss: 1.5022695064544678 2023-01-22 09:43:56.715650: step: 146/466, loss: 0.835024893283844 2023-01-22 09:43:57.368317: step: 148/466, loss: 2.1166932582855225 2023-01-22 09:43:58.045689: step: 150/466, loss: 1.9419481754302979 2023-01-22 09:43:58.695649: step: 152/466, loss: 0.6141785979270935 2023-01-22 09:43:59.313453: step: 154/466, loss: 0.9153237342834473 2023-01-22 09:43:59.937846: step: 156/466, loss: 1.6009995937347412 2023-01-22 09:44:00.540139: step: 158/466, loss: 0.41681626439094543 2023-01-22 09:44:01.094777: step: 160/466, loss: 0.34275585412979126 2023-01-22 09:44:01.782375: step: 162/466, loss: 1.5234066247940063 2023-01-22 09:44:02.451462: step: 164/466, loss: 0.6240906715393066 2023-01-22 09:44:03.108248: step: 166/466, loss: 0.7960733771324158 2023-01-22 09:44:03.751476: step: 168/466, loss: 0.31994709372520447 2023-01-22 09:44:04.457602: step: 170/466, loss: 0.8821254968643188 2023-01-22 09:44:05.132551: step: 172/466, loss: 1.0889562368392944 2023-01-22 09:44:05.779100: step: 174/466, loss: 7.21901273727417 2023-01-22 09:44:06.420013: step: 176/466, loss: 1.2003504037857056 2023-01-22 09:44:07.051269: step: 178/466, loss: 0.48017793893814087 2023-01-22 09:44:07.690698: step: 180/466, loss: 1.6653739213943481 2023-01-22 09:44:08.357545: step: 182/466, loss: 0.6655115485191345 2023-01-22 09:44:09.060558: step: 184/466, loss: 0.6897805333137512 2023-01-22 09:44:09.683399: step: 186/466, loss: 0.3107963502407074 2023-01-22 09:44:10.384539: step: 188/466, loss: 0.5084014534950256 2023-01-22 09:44:10.988546: step: 190/466, loss: 5.032703876495361 2023-01-22 09:44:11.740472: step: 192/466, loss: 1.2486380338668823 2023-01-22 09:44:12.332287: step: 194/466, loss: 0.20551693439483643 2023-01-22 09:44:12.900030: step: 196/466, loss: 1.0858979225158691 2023-01-22 09:44:13.626658: step: 198/466, loss: 0.31415680050849915 2023-01-22 09:44:14.250379: step: 200/466, loss: 1.15254807472229 2023-01-22 09:44:14.827039: step: 202/466, loss: 2.475886344909668 2023-01-22 09:44:15.450767: step: 204/466, loss: 1.0149493217468262 2023-01-22 09:44:16.098521: step: 206/466, loss: 1.486482858657837 2023-01-22 09:44:16.747374: step: 208/466, loss: 0.2678473889827728 2023-01-22 09:44:17.308732: step: 210/466, loss: 0.3779557943344116 2023-01-22 09:44:17.942892: step: 212/466, loss: 1.080670952796936 2023-01-22 09:44:18.578811: step: 214/466, loss: 0.9954599142074585 2023-01-22 09:44:19.185921: step: 216/466, loss: 4.8555684089660645 2023-01-22 09:44:19.739049: step: 218/466, loss: 1.719361424446106 2023-01-22 09:44:20.341661: step: 220/466, loss: 0.8947339653968811 2023-01-22 09:44:20.972397: step: 222/466, loss: 1.861836314201355 2023-01-22 09:44:21.674216: step: 224/466, loss: 1.4084279537200928 2023-01-22 09:44:22.276325: step: 226/466, loss: 1.3126344680786133 2023-01-22 09:44:22.919159: step: 228/466, loss: 0.772314190864563 2023-01-22 09:44:23.541424: step: 230/466, loss: 0.6389649510383606 2023-01-22 09:44:24.098274: step: 232/466, loss: 0.7215180397033691 2023-01-22 09:44:24.741043: step: 234/466, loss: 0.9780803322792053 2023-01-22 09:44:25.293471: step: 236/466, loss: 6.474110126495361 2023-01-22 09:44:25.911607: step: 238/466, loss: 0.9038907885551453 2023-01-22 09:44:26.579833: step: 240/466, loss: 0.8878388404846191 2023-01-22 09:44:27.185971: step: 242/466, loss: 0.41521763801574707 2023-01-22 09:44:27.774886: step: 244/466, loss: 1.3140779733657837 2023-01-22 09:44:28.419115: step: 246/466, loss: 0.25495314598083496 2023-01-22 09:44:29.005160: step: 248/466, loss: 0.9745254516601562 2023-01-22 09:44:29.649729: step: 250/466, loss: 0.3536645174026489 2023-01-22 09:44:30.311197: step: 252/466, loss: 0.2425452321767807 2023-01-22 09:44:30.910102: step: 254/466, loss: 3.70131516456604 2023-01-22 09:44:31.552466: step: 256/466, loss: 3.986081600189209 2023-01-22 09:44:32.147163: step: 258/466, loss: 1.9067820310592651 2023-01-22 09:44:32.748983: step: 260/466, loss: 0.39824968576431274 2023-01-22 09:44:33.329937: step: 262/466, loss: 0.8292238712310791 2023-01-22 09:44:33.882317: step: 264/466, loss: 2.207139492034912 2023-01-22 09:44:34.467676: step: 266/466, loss: 1.1207127571105957 2023-01-22 09:44:34.991832: step: 268/466, loss: 0.8023765087127686 2023-01-22 09:44:35.596184: step: 270/466, loss: 2.832197427749634 2023-01-22 09:44:36.182059: step: 272/466, loss: 1.586511492729187 2023-01-22 09:44:36.747744: step: 274/466, loss: 0.8086563348770142 2023-01-22 09:44:37.328320: step: 276/466, loss: 1.5293811559677124 2023-01-22 09:44:37.907463: step: 278/466, loss: 1.3649612665176392 2023-01-22 09:44:38.487231: step: 280/466, loss: 1.25004243850708 2023-01-22 09:44:39.162577: step: 282/466, loss: 0.8976415395736694 2023-01-22 09:44:39.812723: step: 284/466, loss: 1.1052522659301758 2023-01-22 09:44:40.445477: step: 286/466, loss: 3.7007155418395996 2023-01-22 09:44:41.017296: step: 288/466, loss: 0.9899224042892456 2023-01-22 09:44:41.620692: step: 290/466, loss: 1.1725620031356812 2023-01-22 09:44:42.264098: step: 292/466, loss: 0.30295124650001526 2023-01-22 09:44:42.846459: step: 294/466, loss: 0.42656242847442627 2023-01-22 09:44:43.477822: step: 296/466, loss: 2.2831902503967285 2023-01-22 09:44:44.121429: step: 298/466, loss: 0.574013352394104 2023-01-22 09:44:44.726138: step: 300/466, loss: 5.317529678344727 2023-01-22 09:44:45.301261: step: 302/466, loss: 0.7478150129318237 2023-01-22 09:44:45.902171: step: 304/466, loss: 1.5402438640594482 2023-01-22 09:44:46.549700: step: 306/466, loss: 0.8089839220046997 2023-01-22 09:44:47.233391: step: 308/466, loss: 2.3785948753356934 2023-01-22 09:44:47.824149: step: 310/466, loss: 0.2538207173347473 2023-01-22 09:44:48.416518: step: 312/466, loss: 1.2879310846328735 2023-01-22 09:44:49.029649: step: 314/466, loss: 0.27034011483192444 2023-01-22 09:44:49.625886: step: 316/466, loss: 1.3283967971801758 2023-01-22 09:44:50.257204: step: 318/466, loss: 0.9468340873718262 2023-01-22 09:44:50.862244: step: 320/466, loss: 0.8612461686134338 2023-01-22 09:44:51.454017: step: 322/466, loss: 0.9248701333999634 2023-01-22 09:44:52.055840: step: 324/466, loss: 0.49967315793037415 2023-01-22 09:44:52.655306: step: 326/466, loss: 3.6828291416168213 2023-01-22 09:44:53.348463: step: 328/466, loss: 0.54094398021698 2023-01-22 09:44:53.926413: step: 330/466, loss: 0.9822638630867004 2023-01-22 09:44:54.498306: step: 332/466, loss: 0.935371458530426 2023-01-22 09:44:55.106678: step: 334/466, loss: 0.9043583869934082 2023-01-22 09:44:55.679656: step: 336/466, loss: 1.2238879203796387 2023-01-22 09:44:56.352140: step: 338/466, loss: 0.562913715839386 2023-01-22 09:44:56.957333: step: 340/466, loss: 1.2473548650741577 2023-01-22 09:44:57.585470: step: 342/466, loss: 2.6486573219299316 2023-01-22 09:44:58.215775: step: 344/466, loss: 1.0722802877426147 2023-01-22 09:44:58.941187: step: 346/466, loss: 1.7376291751861572 2023-01-22 09:44:59.594766: step: 348/466, loss: 2.0604095458984375 2023-01-22 09:45:00.247656: step: 350/466, loss: 1.029964566230774 2023-01-22 09:45:00.803971: step: 352/466, loss: 1.4632513523101807 2023-01-22 09:45:01.419585: step: 354/466, loss: 1.3040094375610352 2023-01-22 09:45:01.972433: step: 356/466, loss: 1.9913289546966553 2023-01-22 09:45:02.631176: step: 358/466, loss: 0.7091863751411438 2023-01-22 09:45:03.183420: step: 360/466, loss: 1.2750763893127441 2023-01-22 09:45:03.790382: step: 362/466, loss: 3.3938052654266357 2023-01-22 09:45:04.406513: step: 364/466, loss: 1.2354358434677124 2023-01-22 09:45:05.058453: step: 366/466, loss: 6.258620262145996 2023-01-22 09:45:05.619822: step: 368/466, loss: 0.35919690132141113 2023-01-22 09:45:06.262645: step: 370/466, loss: 1.1379178762435913 2023-01-22 09:45:06.876448: step: 372/466, loss: 0.9073055982589722 2023-01-22 09:45:07.465439: step: 374/466, loss: 1.434647798538208 2023-01-22 09:45:08.044177: step: 376/466, loss: 0.6871436238288879 2023-01-22 09:45:08.608913: step: 378/466, loss: 0.7046130299568176 2023-01-22 09:45:09.199202: step: 380/466, loss: 3.336244583129883 2023-01-22 09:45:09.822404: step: 382/466, loss: 0.2550167739391327 2023-01-22 09:45:10.498365: step: 384/466, loss: 0.6727685332298279 2023-01-22 09:45:11.120327: step: 386/466, loss: 2.2739858627319336 2023-01-22 09:45:11.691582: step: 388/466, loss: 5.542956352233887 2023-01-22 09:45:12.328009: step: 390/466, loss: 0.6057764291763306 2023-01-22 09:45:13.061990: step: 392/466, loss: 1.7758235931396484 2023-01-22 09:45:13.656694: step: 394/466, loss: 0.5540112257003784 2023-01-22 09:45:14.320873: step: 396/466, loss: 0.48421353101730347 2023-01-22 09:45:14.917640: step: 398/466, loss: 3.0981218814849854 2023-01-22 09:45:15.492950: step: 400/466, loss: 2.1436336040496826 2023-01-22 09:45:16.116454: step: 402/466, loss: 0.9988797903060913 2023-01-22 09:45:16.732618: step: 404/466, loss: 1.479414463043213 2023-01-22 09:45:17.334727: step: 406/466, loss: 1.2433871030807495 2023-01-22 09:45:17.985647: step: 408/466, loss: 0.9100664854049683 2023-01-22 09:45:18.627727: step: 410/466, loss: 4.007172584533691 2023-01-22 09:45:19.224422: step: 412/466, loss: 0.5805957317352295 2023-01-22 09:45:19.832389: step: 414/466, loss: 0.994168221950531 2023-01-22 09:45:20.521999: step: 416/466, loss: 1.7388842105865479 2023-01-22 09:45:21.145765: step: 418/466, loss: 1.2173644304275513 2023-01-22 09:45:21.761532: step: 420/466, loss: 7.888503074645996 2023-01-22 09:45:22.411433: step: 422/466, loss: 0.9034971594810486 2023-01-22 09:45:23.015485: step: 424/466, loss: 12.167634963989258 2023-01-22 09:45:23.566754: step: 426/466, loss: 1.0801763534545898 2023-01-22 09:45:24.155823: step: 428/466, loss: 2.7184035778045654 2023-01-22 09:45:24.772584: step: 430/466, loss: 0.380400151014328 2023-01-22 09:45:25.307603: step: 432/466, loss: 0.9667631983757019 2023-01-22 09:45:25.952803: step: 434/466, loss: 1.403149127960205 2023-01-22 09:45:26.553517: step: 436/466, loss: 2.0224409103393555 2023-01-22 09:45:27.296450: step: 438/466, loss: 1.8769458532333374 2023-01-22 09:45:27.972651: step: 440/466, loss: 4.415177822113037 2023-01-22 09:45:28.656125: step: 442/466, loss: 2.260067939758301 2023-01-22 09:45:29.227924: step: 444/466, loss: 3.9543333053588867 2023-01-22 09:45:29.868684: step: 446/466, loss: 2.0179100036621094 2023-01-22 09:45:30.510136: step: 448/466, loss: 1.4100654125213623 2023-01-22 09:45:31.110449: step: 450/466, loss: 5.165609836578369 2023-01-22 09:45:31.740955: step: 452/466, loss: 1.6865736246109009 2023-01-22 09:45:32.431219: step: 454/466, loss: 1.8559184074401855 2023-01-22 09:45:33.015975: step: 456/466, loss: 1.371927261352539 2023-01-22 09:45:33.645479: step: 458/466, loss: 0.27529627084732056 2023-01-22 09:45:34.237405: step: 460/466, loss: 1.0816210508346558 2023-01-22 09:45:34.832736: step: 462/466, loss: 1.4851192235946655 2023-01-22 09:45:35.399651: step: 464/466, loss: 1.0001541376113892 2023-01-22 09:45:36.055106: step: 466/466, loss: 0.3777729868888855 2023-01-22 09:45:36.630260: step: 468/466, loss: 0.24453957378864288 2023-01-22 09:45:37.213545: step: 470/466, loss: 0.7623027563095093 2023-01-22 09:45:37.833269: step: 472/466, loss: 0.84808748960495 2023-01-22 09:45:38.394202: step: 474/466, loss: 2.5589609146118164 2023-01-22 09:45:38.939585: step: 476/466, loss: 0.33705270290374756 2023-01-22 09:45:39.568799: step: 478/466, loss: 15.094965934753418 2023-01-22 09:45:40.155459: step: 480/466, loss: 0.3791036307811737 2023-01-22 09:45:40.789163: step: 482/466, loss: 0.7764727473258972 2023-01-22 09:45:41.345439: step: 484/466, loss: 0.8260162472724915 2023-01-22 09:45:41.942493: step: 486/466, loss: 0.6989418864250183 2023-01-22 09:45:42.549529: step: 488/466, loss: 0.3180275857448578 2023-01-22 09:45:43.197199: step: 490/466, loss: 0.5473271012306213 2023-01-22 09:45:43.866647: step: 492/466, loss: 0.29150083661079407 2023-01-22 09:45:44.425135: step: 494/466, loss: 1.8655388355255127 2023-01-22 09:45:45.194261: step: 496/466, loss: 0.3491297662258148 2023-01-22 09:45:45.904151: step: 498/466, loss: 1.1241211891174316 2023-01-22 09:45:46.536216: step: 500/466, loss: 0.9345641732215881 2023-01-22 09:45:47.202698: step: 502/466, loss: 1.3086748123168945 2023-01-22 09:45:47.764216: step: 504/466, loss: 1.6536461114883423 2023-01-22 09:45:48.415422: step: 506/466, loss: 0.5924420356750488 2023-01-22 09:45:49.014572: step: 508/466, loss: 0.4484773278236389 2023-01-22 09:45:49.660806: step: 510/466, loss: 5.096407890319824 2023-01-22 09:45:50.276543: step: 512/466, loss: 3.5445706844329834 2023-01-22 09:45:50.935743: step: 514/466, loss: 0.21641923487186432 2023-01-22 09:45:51.615144: step: 516/466, loss: 4.486607551574707 2023-01-22 09:45:52.256966: step: 518/466, loss: 3.2323899269104004 2023-01-22 09:45:52.878761: step: 520/466, loss: 1.0494511127471924 2023-01-22 09:45:53.511110: step: 522/466, loss: 0.6894415020942688 2023-01-22 09:45:54.104855: step: 524/466, loss: 1.5049724578857422 2023-01-22 09:45:54.667837: step: 526/466, loss: 2.985200881958008 2023-01-22 09:45:55.249473: step: 528/466, loss: 2.1904640197753906 2023-01-22 09:45:55.809371: step: 530/466, loss: 4.436951637268066 2023-01-22 09:45:56.494195: step: 532/466, loss: 1.0465539693832397 2023-01-22 09:45:57.179163: step: 534/466, loss: 1.1398470401763916 2023-01-22 09:45:57.749945: step: 536/466, loss: 0.6183001399040222 2023-01-22 09:45:58.317868: step: 538/466, loss: 2.022190809249878 2023-01-22 09:45:58.959792: step: 540/466, loss: 0.39715272188186646 2023-01-22 09:45:59.550298: step: 542/466, loss: 0.23415739834308624 2023-01-22 09:46:00.196956: step: 544/466, loss: 1.0342296361923218 2023-01-22 09:46:00.832355: step: 546/466, loss: 1.5827267169952393 2023-01-22 09:46:01.543417: step: 548/466, loss: 2.6225316524505615 2023-01-22 09:46:02.200776: step: 550/466, loss: 0.9604104161262512 2023-01-22 09:46:02.769154: step: 552/466, loss: 0.8755918145179749 2023-01-22 09:46:03.498672: step: 554/466, loss: 1.1196208000183105 2023-01-22 09:46:04.066137: step: 556/466, loss: 2.395808219909668 2023-01-22 09:46:04.668400: step: 558/466, loss: 0.9987582564353943 2023-01-22 09:46:05.311798: step: 560/466, loss: 1.783170461654663 2023-01-22 09:46:05.857435: step: 562/466, loss: 1.1715949773788452 2023-01-22 09:46:06.437866: step: 564/466, loss: 0.42941659688949585 2023-01-22 09:46:07.049901: step: 566/466, loss: 0.5479053258895874 2023-01-22 09:46:07.710248: step: 568/466, loss: 0.7223318815231323 2023-01-22 09:46:08.298807: step: 570/466, loss: 1.9938143491744995 2023-01-22 09:46:08.832741: step: 572/466, loss: 0.6955587267875671 2023-01-22 09:46:09.453734: step: 574/466, loss: 0.6885424852371216 2023-01-22 09:46:10.020837: step: 576/466, loss: 0.5971453189849854 2023-01-22 09:46:10.621059: step: 578/466, loss: 2.068782329559326 2023-01-22 09:46:11.203999: step: 580/466, loss: 1.2053582668304443 2023-01-22 09:46:11.852306: step: 582/466, loss: 0.5845661759376526 2023-01-22 09:46:12.447778: step: 584/466, loss: 0.4609079658985138 2023-01-22 09:46:13.109777: step: 586/466, loss: 1.3553452491760254 2023-01-22 09:46:13.727007: step: 588/466, loss: 7.491386890411377 2023-01-22 09:46:14.283014: step: 590/466, loss: 0.31566736102104187 2023-01-22 09:46:14.813059: step: 592/466, loss: 1.7004241943359375 2023-01-22 09:46:15.479038: step: 594/466, loss: 3.041285514831543 2023-01-22 09:46:16.099254: step: 596/466, loss: 2.2295432090759277 2023-01-22 09:46:16.787329: step: 598/466, loss: 1.2003366947174072 2023-01-22 09:46:17.399217: step: 600/466, loss: 1.229418396949768 2023-01-22 09:46:18.019758: step: 602/466, loss: 1.0585122108459473 2023-01-22 09:46:18.624571: step: 604/466, loss: 1.5848824977874756 2023-01-22 09:46:19.350134: step: 606/466, loss: 0.33494770526885986 2023-01-22 09:46:19.988316: step: 608/466, loss: 1.017104148864746 2023-01-22 09:46:20.611682: step: 610/466, loss: 1.0163648128509521 2023-01-22 09:46:21.399767: step: 612/466, loss: 4.332009315490723 2023-01-22 09:46:22.017984: step: 614/466, loss: 1.3501293659210205 2023-01-22 09:46:22.666285: step: 616/466, loss: 1.3002701997756958 2023-01-22 09:46:23.399320: step: 618/466, loss: 1.1786446571350098 2023-01-22 09:46:24.065902: step: 620/466, loss: 5.574863910675049 2023-01-22 09:46:24.707820: step: 622/466, loss: 8.472260475158691 2023-01-22 09:46:25.335164: step: 624/466, loss: 1.0419970750808716 2023-01-22 09:46:25.928993: step: 626/466, loss: 0.926811158657074 2023-01-22 09:46:26.567549: step: 628/466, loss: 1.934928059577942 2023-01-22 09:46:27.199677: step: 630/466, loss: 1.2481105327606201 2023-01-22 09:46:27.840146: step: 632/466, loss: 0.6394846439361572 2023-01-22 09:46:28.434425: step: 634/466, loss: 0.8781318068504333 2023-01-22 09:46:29.018071: step: 636/466, loss: 0.4937204122543335 2023-01-22 09:46:29.644485: step: 638/466, loss: 0.72397381067276 2023-01-22 09:46:30.285433: step: 640/466, loss: 1.065069317817688 2023-01-22 09:46:30.946622: step: 642/466, loss: 0.7574830055236816 2023-01-22 09:46:31.597690: step: 644/466, loss: 1.0235209465026855 2023-01-22 09:46:32.202842: step: 646/466, loss: 1.9580715894699097 2023-01-22 09:46:32.812482: step: 648/466, loss: 0.3159792721271515 2023-01-22 09:46:33.427001: step: 650/466, loss: 0.7083503603935242 2023-01-22 09:46:34.003534: step: 652/466, loss: 0.3604220151901245 2023-01-22 09:46:34.639773: step: 654/466, loss: 0.5020242929458618 2023-01-22 09:46:35.280392: step: 656/466, loss: 1.9850904941558838 2023-01-22 09:46:35.926957: step: 658/466, loss: 2.489419937133789 2023-01-22 09:46:36.541496: step: 660/466, loss: 2.914874315261841 2023-01-22 09:46:37.166328: step: 662/466, loss: 2.6481523513793945 2023-01-22 09:46:37.798451: step: 664/466, loss: 3.4673984050750732 2023-01-22 09:46:38.400025: step: 666/466, loss: 0.20957191288471222 2023-01-22 09:46:39.070048: step: 668/466, loss: 0.39392566680908203 2023-01-22 09:46:39.637149: step: 670/466, loss: 2.5256829261779785 2023-01-22 09:46:40.239515: step: 672/466, loss: 0.48323601484298706 2023-01-22 09:46:40.894031: step: 674/466, loss: 0.2096550613641739 2023-01-22 09:46:41.425835: step: 676/466, loss: 1.0257917642593384 2023-01-22 09:46:41.971721: step: 678/466, loss: 2.546658515930176 2023-01-22 09:46:42.590659: step: 680/466, loss: 0.5435642004013062 2023-01-22 09:46:43.258950: step: 682/466, loss: 1.7187285423278809 2023-01-22 09:46:43.903741: step: 684/466, loss: 0.7300114631652832 2023-01-22 09:46:44.528606: step: 686/466, loss: 0.38423314690589905 2023-01-22 09:46:45.114729: step: 688/466, loss: 1.514926552772522 2023-01-22 09:46:45.735272: step: 690/466, loss: 2.6025986671447754 2023-01-22 09:46:46.347096: step: 692/466, loss: 2.7757177352905273 2023-01-22 09:46:46.949921: step: 694/466, loss: 4.4856367111206055 2023-01-22 09:46:47.575848: step: 696/466, loss: 1.9674458503723145 2023-01-22 09:46:48.215636: step: 698/466, loss: 3.832584857940674 2023-01-22 09:46:48.824622: step: 700/466, loss: 1.8476121425628662 2023-01-22 09:46:49.474112: step: 702/466, loss: 1.2803583145141602 2023-01-22 09:46:50.093772: step: 704/466, loss: 2.006136894226074 2023-01-22 09:46:50.705822: step: 706/466, loss: 2.4802322387695312 2023-01-22 09:46:51.331795: step: 708/466, loss: 1.1326019763946533 2023-01-22 09:46:51.929110: step: 710/466, loss: 1.0300644636154175 2023-01-22 09:46:52.540187: step: 712/466, loss: 2.0992324352264404 2023-01-22 09:46:53.137456: step: 714/466, loss: 1.4960116147994995 2023-01-22 09:46:53.757437: step: 716/466, loss: 0.3980748653411865 2023-01-22 09:46:54.416224: step: 718/466, loss: 0.8751800060272217 2023-01-22 09:46:55.049107: step: 720/466, loss: 0.7033678293228149 2023-01-22 09:46:55.682412: step: 722/466, loss: 0.7334343791007996 2023-01-22 09:46:56.323394: step: 724/466, loss: 1.1421419382095337 2023-01-22 09:46:57.025430: step: 726/466, loss: 5.31184196472168 2023-01-22 09:46:57.642364: step: 728/466, loss: 4.327996730804443 2023-01-22 09:46:58.246631: step: 730/466, loss: 0.9183502793312073 2023-01-22 09:46:58.957671: step: 732/466, loss: 1.9118095636367798 2023-01-22 09:46:59.587450: step: 734/466, loss: 0.35955610871315 2023-01-22 09:47:00.162010: step: 736/466, loss: 0.5458019375801086 2023-01-22 09:47:00.833175: step: 738/466, loss: 1.2042368650436401 2023-01-22 09:47:01.461293: step: 740/466, loss: 0.546623706817627 2023-01-22 09:47:02.113426: step: 742/466, loss: 4.107133865356445 2023-01-22 09:47:02.731907: step: 744/466, loss: 1.4979407787322998 2023-01-22 09:47:03.342499: step: 746/466, loss: 1.1551812887191772 2023-01-22 09:47:03.928212: step: 748/466, loss: 0.409785658121109 2023-01-22 09:47:04.526087: step: 750/466, loss: 0.38589829206466675 2023-01-22 09:47:05.254260: step: 752/466, loss: 0.6685751080513 2023-01-22 09:47:05.833547: step: 754/466, loss: 0.9630231857299805 2023-01-22 09:47:06.426768: step: 756/466, loss: 1.2398910522460938 2023-01-22 09:47:07.026013: step: 758/466, loss: 1.371281623840332 2023-01-22 09:47:07.670952: step: 760/466, loss: 0.8932585716247559 2023-01-22 09:47:08.256757: step: 762/466, loss: 1.0760459899902344 2023-01-22 09:47:08.835261: step: 764/466, loss: 0.772979199886322 2023-01-22 09:47:09.418543: step: 766/466, loss: 0.7807055711746216 2023-01-22 09:47:10.007967: step: 768/466, loss: 3.313267946243286 2023-01-22 09:47:10.617507: step: 770/466, loss: 7.387088298797607 2023-01-22 09:47:11.181374: step: 772/466, loss: 0.481009304523468 2023-01-22 09:47:11.782105: step: 774/466, loss: 1.8092561960220337 2023-01-22 09:47:12.334490: step: 776/466, loss: 0.7750083208084106 2023-01-22 09:47:12.926890: step: 778/466, loss: 0.7640812993049622 2023-01-22 09:47:13.587922: step: 780/466, loss: 0.771941602230072 2023-01-22 09:47:14.231384: step: 782/466, loss: 4.130035400390625 2023-01-22 09:47:14.835295: step: 784/466, loss: 1.6280627250671387 2023-01-22 09:47:15.494038: step: 786/466, loss: 3.1433563232421875 2023-01-22 09:47:16.039722: step: 788/466, loss: 1.0050230026245117 2023-01-22 09:47:16.631001: step: 790/466, loss: 2.5180046558380127 2023-01-22 09:47:17.293600: step: 792/466, loss: 1.341071367263794 2023-01-22 09:47:17.909354: step: 794/466, loss: 0.5717883706092834 2023-01-22 09:47:18.461356: step: 796/466, loss: 0.6130303740501404 2023-01-22 09:47:19.107269: step: 798/466, loss: 1.025365948677063 2023-01-22 09:47:19.666353: step: 800/466, loss: 0.3749749958515167 2023-01-22 09:47:20.363745: step: 802/466, loss: 2.4972636699676514 2023-01-22 09:47:20.937264: step: 804/466, loss: 0.24893394112586975 2023-01-22 09:47:21.440191: step: 806/466, loss: 0.9291529059410095 2023-01-22 09:47:22.086371: step: 808/466, loss: 0.9736714363098145 2023-01-22 09:47:22.771494: step: 810/466, loss: 0.6771429777145386 2023-01-22 09:47:23.377635: step: 812/466, loss: 0.9184495806694031 2023-01-22 09:47:24.018588: step: 814/466, loss: 0.399580717086792 2023-01-22 09:47:24.626774: step: 816/466, loss: 0.5318378806114197 2023-01-22 09:47:25.245898: step: 818/466, loss: 2.599856376647949 2023-01-22 09:47:25.834175: step: 820/466, loss: 1.9299108982086182 2023-01-22 09:47:26.509577: step: 822/466, loss: 1.3706055879592896 2023-01-22 09:47:27.142229: step: 824/466, loss: 1.642850399017334 2023-01-22 09:47:27.742287: step: 826/466, loss: 0.41598743200302124 2023-01-22 09:47:28.372593: step: 828/466, loss: 0.9551495313644409 2023-01-22 09:47:28.939584: step: 830/466, loss: 0.5807445645332336 2023-01-22 09:47:29.552640: step: 832/466, loss: 0.9744365215301514 2023-01-22 09:47:30.148612: step: 834/466, loss: 1.759616494178772 2023-01-22 09:47:30.735913: step: 836/466, loss: 2.0120441913604736 2023-01-22 09:47:31.358107: step: 838/466, loss: 0.6406006217002869 2023-01-22 09:47:31.969007: step: 840/466, loss: 2.323092222213745 2023-01-22 09:47:32.634336: step: 842/466, loss: 0.7588452100753784 2023-01-22 09:47:33.245546: step: 844/466, loss: 0.23752079904079437 2023-01-22 09:47:33.917869: step: 846/466, loss: 2.2680630683898926 2023-01-22 09:47:34.524647: step: 848/466, loss: 0.8969069123268127 2023-01-22 09:47:35.173465: step: 850/466, loss: 1.1765252351760864 2023-01-22 09:47:35.825002: step: 852/466, loss: 0.780741274356842 2023-01-22 09:47:36.477516: step: 854/466, loss: 0.6287121176719666 2023-01-22 09:47:37.114218: step: 856/466, loss: 0.25774434208869934 2023-01-22 09:47:37.741724: step: 858/466, loss: 0.20518824458122253 2023-01-22 09:47:38.316516: step: 860/466, loss: 12.963945388793945 2023-01-22 09:47:38.855473: step: 862/466, loss: 0.8451552391052246 2023-01-22 09:47:39.469667: step: 864/466, loss: 1.062126636505127 2023-01-22 09:47:40.127941: step: 866/466, loss: 0.7250032424926758 2023-01-22 09:47:40.722726: step: 868/466, loss: 3.0983104705810547 2023-01-22 09:47:41.363701: step: 870/466, loss: 1.5424200296401978 2023-01-22 09:47:41.963224: step: 872/466, loss: 1.0159175395965576 2023-01-22 09:47:42.492859: step: 874/466, loss: 2.1191201210021973 2023-01-22 09:47:43.127619: step: 876/466, loss: 2.0893123149871826 2023-01-22 09:47:43.742891: step: 878/466, loss: 1.5631012916564941 2023-01-22 09:47:44.406224: step: 880/466, loss: 1.3018872737884521 2023-01-22 09:47:45.038988: step: 882/466, loss: 0.6196349263191223 2023-01-22 09:47:45.698298: step: 884/466, loss: 0.6660146117210388 2023-01-22 09:47:46.296886: step: 886/466, loss: 1.7531086206436157 2023-01-22 09:47:47.011846: step: 888/466, loss: 0.9928648471832275 2023-01-22 09:47:47.613879: step: 890/466, loss: 0.5851998329162598 2023-01-22 09:47:48.221479: step: 892/466, loss: 1.0412944555282593 2023-01-22 09:47:48.862094: step: 894/466, loss: 0.5938190221786499 2023-01-22 09:47:49.433000: step: 896/466, loss: 1.4201117753982544 2023-01-22 09:47:50.018798: step: 898/466, loss: 0.5297113656997681 2023-01-22 09:47:50.592639: step: 900/466, loss: 2.7390661239624023 2023-01-22 09:47:51.200832: step: 902/466, loss: 3.369328498840332 2023-01-22 09:47:51.891000: step: 904/466, loss: 1.364393949508667 2023-01-22 09:47:52.589235: step: 906/466, loss: 0.4259275496006012 2023-01-22 09:47:53.165788: step: 908/466, loss: 2.5091474056243896 2023-01-22 09:47:53.795043: step: 910/466, loss: 1.1209821701049805 2023-01-22 09:47:54.425778: step: 912/466, loss: 0.6636772155761719 2023-01-22 09:47:55.023134: step: 914/466, loss: 0.638691246509552 2023-01-22 09:47:55.644378: step: 916/466, loss: 1.4952242374420166 2023-01-22 09:47:56.246694: step: 918/466, loss: 0.9142274856567383 2023-01-22 09:47:56.884798: step: 920/466, loss: 0.5067021250724792 2023-01-22 09:47:57.433993: step: 922/466, loss: 0.7559120059013367 2023-01-22 09:47:58.001719: step: 924/466, loss: 0.6585716009140015 2023-01-22 09:47:58.564353: step: 926/466, loss: 1.6754124164581299 2023-01-22 09:47:59.220570: step: 928/466, loss: 4.6818013191223145 2023-01-22 09:47:59.842393: step: 930/466, loss: 1.664534091949463 2023-01-22 09:48:00.469287: step: 932/466, loss: 0.6084837913513184 ================================================== Loss: 1.577 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3169102404643449, 'r': 0.24128393308080806, 'f1': 0.27397401433691754}, 'combined': 0.2018755895114129, 'epoch': 2} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.35101408931754324, 'r': 0.17808579985254785, 'f1': 0.23629044774013516}, 'combined': 0.1567107632680689, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.313062069707401, 'r': 0.1964775182284634, 'f1': 0.24143230462916873}, 'combined': 0.16095486975277914, 'epoch': 2} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3849366689172522, 'r': 0.17285584441880064, 'f1': 0.23857814998447097}, 'combined': 0.15570363472670734, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3117443943596856, 'r': 0.24325509559884556, 'f1': 0.27327380952380953}, 'combined': 0.20135964912280702, 'epoch': 2} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3574179967599348, 'r': 0.17855453943581878, 'f1': 0.23814132810056754}, 'combined': 0.1579382901392365, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24722222222222223, 'r': 0.2119047619047619, 'f1': 0.22820512820512817}, 'combined': 0.1521367521367521, 'epoch': 2} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36764705882352944, 'r': 0.2717391304347826, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.18181818181818182, 'r': 0.06896551724137931, 'f1': 0.1}, 'combined': 0.06666666666666667, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3169102404643449, 'r': 0.24128393308080806, 'f1': 0.27397401433691754}, 'combined': 0.2018755895114129, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.35101408931754324, 'r': 0.17808579985254785, 'f1': 0.23629044774013516}, 'combined': 0.1567107632680689, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24722222222222223, 'r': 0.2119047619047619, 'f1': 0.22820512820512817}, 'combined': 0.1521367521367521, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.313062069707401, 'r': 0.1964775182284634, 'f1': 0.24143230462916873}, 'combined': 0.16095486975277914, 'epoch': 2} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3849366689172522, 'r': 0.17285584441880064, 'f1': 0.23857814998447097}, 'combined': 0.15570363472670734, 'epoch': 2} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36764705882352944, 'r': 0.2717391304347826, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3117443943596856, 'r': 0.24325509559884556, 'f1': 0.27327380952380953}, 'combined': 0.20135964912280702, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3574179967599348, 'r': 0.17855453943581878, 'f1': 0.23814132810056754}, 'combined': 0.1579382901392365, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.18181818181818182, 'r': 0.06896551724137931, 'f1': 0.1}, 'combined': 0.06666666666666667, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 09:50:39.410041: step: 2/466, loss: 7.128997802734375 2023-01-22 09:50:40.100114: step: 4/466, loss: 1.2443701028823853 2023-01-22 09:50:40.721670: step: 6/466, loss: 1.0124177932739258 2023-01-22 09:50:41.349260: step: 8/466, loss: 0.5614454746246338 2023-01-22 09:50:41.929663: step: 10/466, loss: 3.858065128326416 2023-01-22 09:50:42.536223: step: 12/466, loss: 0.6932990550994873 2023-01-22 09:50:43.169460: step: 14/466, loss: 0.7984504699707031 2023-01-22 09:50:43.754104: step: 16/466, loss: 3.1545112133026123 2023-01-22 09:50:44.376406: step: 18/466, loss: 0.8147408962249756 2023-01-22 09:50:44.982142: step: 20/466, loss: 0.6810839176177979 2023-01-22 09:50:45.551906: step: 22/466, loss: 0.8591039180755615 2023-01-22 09:50:46.197473: step: 24/466, loss: 1.1684130430221558 2023-01-22 09:50:46.765937: step: 26/466, loss: 0.17121870815753937 2023-01-22 09:50:47.309297: step: 28/466, loss: 0.6259769201278687 2023-01-22 09:50:47.957461: step: 30/466, loss: 0.7976721525192261 2023-01-22 09:50:48.527202: step: 32/466, loss: 0.7649844288825989 2023-01-22 09:50:49.134783: step: 34/466, loss: 0.3217013478279114 2023-01-22 09:50:49.737773: step: 36/466, loss: 0.8182716965675354 2023-01-22 09:50:50.416048: step: 38/466, loss: 0.2067316174507141 2023-01-22 09:50:51.025491: step: 40/466, loss: 0.17226776480674744 2023-01-22 09:50:51.619346: step: 42/466, loss: 1.2925540208816528 2023-01-22 09:50:52.190658: step: 44/466, loss: 0.3089175224304199 2023-01-22 09:50:52.878890: step: 46/466, loss: 0.3989468812942505 2023-01-22 09:50:53.535555: step: 48/466, loss: 1.590653419494629 2023-01-22 09:50:54.129019: step: 50/466, loss: 0.7529211640357971 2023-01-22 09:50:54.840041: step: 52/466, loss: 0.47044286131858826 2023-01-22 09:50:55.487027: step: 54/466, loss: 1.4606329202651978 2023-01-22 09:50:56.137538: step: 56/466, loss: 0.7644850015640259 2023-01-22 09:50:56.746710: step: 58/466, loss: 2.350726366043091 2023-01-22 09:50:57.349139: step: 60/466, loss: 0.7856782674789429 2023-01-22 09:50:57.951221: step: 62/466, loss: 3.875983953475952 2023-01-22 09:50:58.544646: step: 64/466, loss: 1.41089928150177 2023-01-22 09:50:59.127329: step: 66/466, loss: 0.7020902633666992 2023-01-22 09:50:59.719784: step: 68/466, loss: 0.514349102973938 2023-01-22 09:51:00.381546: step: 70/466, loss: 0.5861013531684875 2023-01-22 09:51:00.961688: step: 72/466, loss: 0.3562849760055542 2023-01-22 09:51:01.531279: step: 74/466, loss: 0.2749972641468048 2023-01-22 09:51:02.103221: step: 76/466, loss: 0.5348482131958008 2023-01-22 09:51:02.692380: step: 78/466, loss: 0.22524146735668182 2023-01-22 09:51:03.279062: step: 80/466, loss: 0.5875887274742126 2023-01-22 09:51:03.853397: step: 82/466, loss: 0.7782057523727417 2023-01-22 09:51:04.522684: step: 84/466, loss: 0.8846481442451477 2023-01-22 09:51:05.105826: step: 86/466, loss: 0.38065099716186523 2023-01-22 09:51:05.751806: step: 88/466, loss: 0.8411879539489746 2023-01-22 09:51:06.376654: step: 90/466, loss: 0.7535284757614136 2023-01-22 09:51:06.975855: step: 92/466, loss: 0.22157704830169678 2023-01-22 09:51:07.527748: step: 94/466, loss: 1.2213329076766968 2023-01-22 09:51:08.226252: step: 96/466, loss: 6.678746223449707 2023-01-22 09:51:08.898794: step: 98/466, loss: 0.27789440751075745 2023-01-22 09:51:09.569848: step: 100/466, loss: 0.40511220693588257 2023-01-22 09:51:10.180612: step: 102/466, loss: 1.249924659729004 2023-01-22 09:51:10.768491: step: 104/466, loss: 0.6266840100288391 2023-01-22 09:51:11.396661: step: 106/466, loss: 4.088799953460693 2023-01-22 09:51:12.009819: step: 108/466, loss: 1.7367558479309082 2023-01-22 09:51:12.624218: step: 110/466, loss: 0.3987114131450653 2023-01-22 09:51:13.197869: step: 112/466, loss: 0.2352711260318756 2023-01-22 09:51:13.805950: step: 114/466, loss: 0.482532262802124 2023-01-22 09:51:14.452932: step: 116/466, loss: 0.9187915325164795 2023-01-22 09:51:15.070562: step: 118/466, loss: 0.2020147144794464 2023-01-22 09:51:15.634955: step: 120/466, loss: 0.2355295866727829 2023-01-22 09:51:16.222323: step: 122/466, loss: 1.4388062953948975 2023-01-22 09:51:16.810026: step: 124/466, loss: 1.1046127080917358 2023-01-22 09:51:17.407691: step: 126/466, loss: 0.25789308547973633 2023-01-22 09:51:18.004024: step: 128/466, loss: 1.6567487716674805 2023-01-22 09:51:18.553000: step: 130/466, loss: 1.1461472511291504 2023-01-22 09:51:19.125238: step: 132/466, loss: 1.9202277660369873 2023-01-22 09:51:19.813839: step: 134/466, loss: 0.8971623778343201 2023-01-22 09:51:20.426735: step: 136/466, loss: 1.5297441482543945 2023-01-22 09:51:21.015387: step: 138/466, loss: 1.31128990650177 2023-01-22 09:51:21.625372: step: 140/466, loss: 0.7695094347000122 2023-01-22 09:51:22.272159: step: 142/466, loss: 0.25040504336357117 2023-01-22 09:51:22.851257: step: 144/466, loss: 0.7494414448738098 2023-01-22 09:51:23.420651: step: 146/466, loss: 0.8621645569801331 2023-01-22 09:51:24.036780: step: 148/466, loss: 0.9328680634498596 2023-01-22 09:51:24.610925: step: 150/466, loss: 3.1788134574890137 2023-01-22 09:51:25.159987: step: 152/466, loss: 1.3003443479537964 2023-01-22 09:51:25.763268: step: 154/466, loss: 0.7661277055740356 2023-01-22 09:51:26.395758: step: 156/466, loss: 0.248875230550766 2023-01-22 09:51:26.966094: step: 158/466, loss: 0.33294370770454407 2023-01-22 09:51:27.621789: step: 160/466, loss: 1.2023553848266602 2023-01-22 09:51:28.322626: step: 162/466, loss: 1.5967686176300049 2023-01-22 09:51:28.983294: step: 164/466, loss: 0.39988163113594055 2023-01-22 09:51:29.636866: step: 166/466, loss: 0.21817664802074432 2023-01-22 09:51:30.217837: step: 168/466, loss: 0.42362329363822937 2023-01-22 09:51:30.828677: step: 170/466, loss: 0.38722550868988037 2023-01-22 09:51:31.455721: step: 172/466, loss: 3.7433886528015137 2023-01-22 09:51:32.144272: step: 174/466, loss: 0.8374991416931152 2023-01-22 09:51:32.806626: step: 176/466, loss: 0.6235227584838867 2023-01-22 09:51:33.404105: step: 178/466, loss: 1.054993987083435 2023-01-22 09:51:34.029761: step: 180/466, loss: 0.5051757097244263 2023-01-22 09:51:34.677713: step: 182/466, loss: 1.1793485879898071 2023-01-22 09:51:35.277550: step: 184/466, loss: 0.9896507263183594 2023-01-22 09:51:35.846794: step: 186/466, loss: 4.04372501373291 2023-01-22 09:51:36.474705: step: 188/466, loss: 0.5763915181159973 2023-01-22 09:51:37.086963: step: 190/466, loss: 2.586012125015259 2023-01-22 09:51:37.630325: step: 192/466, loss: 0.22528810799121857 2023-01-22 09:51:38.212314: step: 194/466, loss: 1.463167428970337 2023-01-22 09:51:38.834446: step: 196/466, loss: 0.7889247536659241 2023-01-22 09:51:39.508249: step: 198/466, loss: 1.4208873510360718 2023-01-22 09:51:40.092640: step: 200/466, loss: 5.841209888458252 2023-01-22 09:51:40.678845: step: 202/466, loss: 1.1238937377929688 2023-01-22 09:51:41.273953: step: 204/466, loss: 1.2800912857055664 2023-01-22 09:51:41.885072: step: 206/466, loss: 2.091311454772949 2023-01-22 09:51:42.463872: step: 208/466, loss: 0.321956992149353 2023-01-22 09:51:43.194958: step: 210/466, loss: 5.405332088470459 2023-01-22 09:51:43.841041: step: 212/466, loss: 12.03922176361084 2023-01-22 09:51:44.482720: step: 214/466, loss: 1.0027766227722168 2023-01-22 09:51:45.167548: step: 216/466, loss: 1.1555352210998535 2023-01-22 09:51:45.863887: step: 218/466, loss: 3.034679412841797 2023-01-22 09:51:46.568859: step: 220/466, loss: 0.25661519169807434 2023-01-22 09:51:47.175069: step: 222/466, loss: 0.4973966181278229 2023-01-22 09:51:47.772382: step: 224/466, loss: 2.389017105102539 2023-01-22 09:51:48.403483: step: 226/466, loss: 0.22257673740386963 2023-01-22 09:51:49.090955: step: 228/466, loss: 0.7281988859176636 2023-01-22 09:51:49.709300: step: 230/466, loss: 1.7831833362579346 2023-01-22 09:51:50.353914: step: 232/466, loss: 3.927642583847046 2023-01-22 09:51:50.974737: step: 234/466, loss: 0.6490916013717651 2023-01-22 09:51:51.539182: step: 236/466, loss: 0.5916441082954407 2023-01-22 09:51:52.101420: step: 238/466, loss: 0.864163339138031 2023-01-22 09:51:52.709764: step: 240/466, loss: 4.118500709533691 2023-01-22 09:51:53.372197: step: 242/466, loss: 1.7515130043029785 2023-01-22 09:51:53.946181: step: 244/466, loss: 0.7366144061088562 2023-01-22 09:51:54.535823: step: 246/466, loss: 1.149341106414795 2023-01-22 09:51:55.099472: step: 248/466, loss: 0.9426298141479492 2023-01-22 09:51:55.717939: step: 250/466, loss: 1.1655728816986084 2023-01-22 09:51:56.266826: step: 252/466, loss: 1.1931763887405396 2023-01-22 09:51:56.894934: step: 254/466, loss: 0.6886307597160339 2023-01-22 09:51:57.514187: step: 256/466, loss: 0.7341050505638123 2023-01-22 09:51:58.148836: step: 258/466, loss: 6.162553310394287 2023-01-22 09:51:58.790535: step: 260/466, loss: 0.9465752243995667 2023-01-22 09:51:59.389850: step: 262/466, loss: 1.0216237306594849 2023-01-22 09:52:00.004788: step: 264/466, loss: 0.5391034483909607 2023-01-22 09:52:00.641645: step: 266/466, loss: 0.802638053894043 2023-01-22 09:52:01.241969: step: 268/466, loss: 1.4949768781661987 2023-01-22 09:52:01.894391: step: 270/466, loss: 1.0837867259979248 2023-01-22 09:52:02.434786: step: 272/466, loss: 0.31959158182144165 2023-01-22 09:52:03.048631: step: 274/466, loss: 0.6207334995269775 2023-01-22 09:52:03.698786: step: 276/466, loss: 0.45914387702941895 2023-01-22 09:52:04.308060: step: 278/466, loss: 4.065018653869629 2023-01-22 09:52:04.937711: step: 280/466, loss: 0.20469020307064056 2023-01-22 09:52:05.526725: step: 282/466, loss: 0.5084442496299744 2023-01-22 09:52:06.138994: step: 284/466, loss: 0.7040725350379944 2023-01-22 09:52:06.697094: step: 286/466, loss: 0.399564266204834 2023-01-22 09:52:07.308613: step: 288/466, loss: 0.6310179233551025 2023-01-22 09:52:07.926441: step: 290/466, loss: 0.13815368711948395 2023-01-22 09:52:08.512122: step: 292/466, loss: 2.8727893829345703 2023-01-22 09:52:09.152494: step: 294/466, loss: 0.43193039298057556 2023-01-22 09:52:09.752030: step: 296/466, loss: 0.7842096090316772 2023-01-22 09:52:10.361111: step: 298/466, loss: 0.7666311264038086 2023-01-22 09:52:10.997702: step: 300/466, loss: 2.4203577041625977 2023-01-22 09:52:11.589178: step: 302/466, loss: 0.3907892405986786 2023-01-22 09:52:12.185473: step: 304/466, loss: 0.3148607611656189 2023-01-22 09:52:12.865676: step: 306/466, loss: 0.531349778175354 2023-01-22 09:52:13.464960: step: 308/466, loss: 1.8285367488861084 2023-01-22 09:52:14.091561: step: 310/466, loss: 1.708573341369629 2023-01-22 09:52:14.748203: step: 312/466, loss: 3.27811598777771 2023-01-22 09:52:15.325347: step: 314/466, loss: 0.8649401664733887 2023-01-22 09:52:15.964159: step: 316/466, loss: 0.32861870527267456 2023-01-22 09:52:16.615881: step: 318/466, loss: 1.1042927503585815 2023-01-22 09:52:17.206965: step: 320/466, loss: 0.801863431930542 2023-01-22 09:52:17.775191: step: 322/466, loss: 0.46133917570114136 2023-01-22 09:52:18.466158: step: 324/466, loss: 0.4726460874080658 2023-01-22 09:52:19.103329: step: 326/466, loss: 0.27592214941978455 2023-01-22 09:52:19.719377: step: 328/466, loss: 1.2176792621612549 2023-01-22 09:52:20.308551: step: 330/466, loss: 0.8818544149398804 2023-01-22 09:52:20.905779: step: 332/466, loss: 0.7360835075378418 2023-01-22 09:52:21.475608: step: 334/466, loss: 0.2759115993976593 2023-01-22 09:52:22.052171: step: 336/466, loss: 1.2839148044586182 2023-01-22 09:52:22.633898: step: 338/466, loss: 0.5772988796234131 2023-01-22 09:52:23.281530: step: 340/466, loss: 0.621418833732605 2023-01-22 09:52:23.953481: step: 342/466, loss: 8.68262767791748 2023-01-22 09:52:24.582737: step: 344/466, loss: 2.322258234024048 2023-01-22 09:52:25.246365: step: 346/466, loss: 2.3204495906829834 2023-01-22 09:52:25.864447: step: 348/466, loss: 3.272414207458496 2023-01-22 09:52:26.562792: step: 350/466, loss: 0.9026280641555786 2023-01-22 09:52:27.237651: step: 352/466, loss: 1.1732512712478638 2023-01-22 09:52:27.855450: step: 354/466, loss: 1.1399867534637451 2023-01-22 09:52:28.455410: step: 356/466, loss: 0.6772962212562561 2023-01-22 09:52:29.285159: step: 358/466, loss: 1.2160906791687012 2023-01-22 09:52:29.868938: step: 360/466, loss: 0.6079787015914917 2023-01-22 09:52:30.567114: step: 362/466, loss: 0.3634703457355499 2023-01-22 09:52:31.277768: step: 364/466, loss: 0.6174222230911255 2023-01-22 09:52:31.852796: step: 366/466, loss: 0.4810933470726013 2023-01-22 09:52:32.430833: step: 368/466, loss: 1.0697897672653198 2023-01-22 09:52:33.006734: step: 370/466, loss: 0.1810256391763687 2023-01-22 09:52:33.656235: step: 372/466, loss: 1.742691159248352 2023-01-22 09:52:34.325517: step: 374/466, loss: 0.844283401966095 2023-01-22 09:52:34.937804: step: 376/466, loss: 5.25944709777832 2023-01-22 09:52:35.583331: step: 378/466, loss: 1.2075788974761963 2023-01-22 09:52:36.160031: step: 380/466, loss: 1.401288390159607 2023-01-22 09:52:36.754055: step: 382/466, loss: 1.144996166229248 2023-01-22 09:52:37.359275: step: 384/466, loss: 0.42485713958740234 2023-01-22 09:52:37.993412: step: 386/466, loss: 1.235788106918335 2023-01-22 09:52:38.650495: step: 388/466, loss: 1.0391279458999634 2023-01-22 09:52:39.214551: step: 390/466, loss: 4.329006195068359 2023-01-22 09:52:39.861895: step: 392/466, loss: 0.8605351448059082 2023-01-22 09:52:40.455142: step: 394/466, loss: 0.8281611800193787 2023-01-22 09:52:41.061789: step: 396/466, loss: 1.492803931236267 2023-01-22 09:52:41.709431: step: 398/466, loss: 0.942059338092804 2023-01-22 09:52:42.329166: step: 400/466, loss: 0.6410725712776184 2023-01-22 09:52:42.898657: step: 402/466, loss: 1.1986944675445557 2023-01-22 09:52:43.470777: step: 404/466, loss: 1.9941927194595337 2023-01-22 09:52:44.027188: step: 406/466, loss: 0.5979148149490356 2023-01-22 09:52:44.646892: step: 408/466, loss: 0.42674484848976135 2023-01-22 09:52:45.272266: step: 410/466, loss: 0.6338918209075928 2023-01-22 09:52:45.887533: step: 412/466, loss: 0.2838844954967499 2023-01-22 09:52:46.451964: step: 414/466, loss: 1.0026823282241821 2023-01-22 09:52:47.045391: step: 416/466, loss: 1.5562171936035156 2023-01-22 09:52:47.701429: step: 418/466, loss: 0.7744855880737305 2023-01-22 09:52:48.322322: step: 420/466, loss: 0.2533835470676422 2023-01-22 09:52:48.884559: step: 422/466, loss: 0.6974008083343506 2023-01-22 09:52:49.549427: step: 424/466, loss: 1.380007028579712 2023-01-22 09:52:50.174749: step: 426/466, loss: 1.7630139589309692 2023-01-22 09:52:50.736269: step: 428/466, loss: 2.447899341583252 2023-01-22 09:52:51.308175: step: 430/466, loss: 2.01246976852417 2023-01-22 09:52:51.995563: step: 432/466, loss: 0.5250542163848877 2023-01-22 09:52:52.632882: step: 434/466, loss: 0.38290348649024963 2023-01-22 09:52:53.234714: step: 436/466, loss: 2.1723570823669434 2023-01-22 09:52:53.877956: step: 438/466, loss: 0.8501411080360413 2023-01-22 09:52:54.595159: step: 440/466, loss: 1.0419926643371582 2023-01-22 09:52:55.246388: step: 442/466, loss: 1.304261326789856 2023-01-22 09:52:55.762968: step: 444/466, loss: 0.49662578105926514 2023-01-22 09:52:56.385133: step: 446/466, loss: 0.7082849144935608 2023-01-22 09:52:56.968481: step: 448/466, loss: 0.22583594918251038 2023-01-22 09:52:57.547893: step: 450/466, loss: 0.28244996070861816 2023-01-22 09:52:58.139542: step: 452/466, loss: 2.694072723388672 2023-01-22 09:52:58.750853: step: 454/466, loss: 0.78786301612854 2023-01-22 09:52:59.335291: step: 456/466, loss: 1.4211052656173706 2023-01-22 09:53:00.004520: step: 458/466, loss: 1.5807013511657715 2023-01-22 09:53:00.625765: step: 460/466, loss: 0.27115046977996826 2023-01-22 09:53:01.272644: step: 462/466, loss: 1.418265700340271 2023-01-22 09:53:01.934370: step: 464/466, loss: 0.6221765279769897 2023-01-22 09:53:02.630799: step: 466/466, loss: 1.763245940208435 2023-01-22 09:53:03.216357: step: 468/466, loss: 2.8533806800842285 2023-01-22 09:53:03.863960: step: 470/466, loss: 0.33080437779426575 2023-01-22 09:53:04.471539: step: 472/466, loss: 0.6602237224578857 2023-01-22 09:53:05.080931: step: 474/466, loss: 0.256158709526062 2023-01-22 09:53:05.648675: step: 476/466, loss: 0.4613457918167114 2023-01-22 09:53:06.283222: step: 478/466, loss: 1.4783824682235718 2023-01-22 09:53:06.906142: step: 480/466, loss: 0.7388719320297241 2023-01-22 09:53:07.502784: step: 482/466, loss: 1.4873404502868652 2023-01-22 09:53:08.089687: step: 484/466, loss: 7.858376502990723 2023-01-22 09:53:08.698989: step: 486/466, loss: 0.8068593740463257 2023-01-22 09:53:09.271945: step: 488/466, loss: 1.6794354915618896 2023-01-22 09:53:09.890523: step: 490/466, loss: 2.4783554077148438 2023-01-22 09:53:10.490288: step: 492/466, loss: 0.7154982089996338 2023-01-22 09:53:11.034789: step: 494/466, loss: 0.3904711902141571 2023-01-22 09:53:11.590229: step: 496/466, loss: 0.9338459968566895 2023-01-22 09:53:12.182955: step: 498/466, loss: 0.49109259247779846 2023-01-22 09:53:12.882973: step: 500/466, loss: 1.4999065399169922 2023-01-22 09:53:13.472881: step: 502/466, loss: 0.929957926273346 2023-01-22 09:53:14.161809: step: 504/466, loss: 1.8211406469345093 2023-01-22 09:53:14.744763: step: 506/466, loss: 3.3952579498291016 2023-01-22 09:53:15.380422: step: 508/466, loss: 0.4057934880256653 2023-01-22 09:53:15.975379: step: 510/466, loss: 4.130643367767334 2023-01-22 09:53:16.535094: step: 512/466, loss: 1.2591372728347778 2023-01-22 09:53:17.148763: step: 514/466, loss: 0.8045276999473572 2023-01-22 09:53:17.754926: step: 516/466, loss: 1.412993311882019 2023-01-22 09:53:18.390777: step: 518/466, loss: 1.677248239517212 2023-01-22 09:53:18.997162: step: 520/466, loss: 2.805729389190674 2023-01-22 09:53:19.607852: step: 522/466, loss: 1.2893203496932983 2023-01-22 09:53:20.218129: step: 524/466, loss: 1.1867783069610596 2023-01-22 09:53:20.823594: step: 526/466, loss: 0.3541945219039917 2023-01-22 09:53:21.509021: step: 528/466, loss: 0.5837615132331848 2023-01-22 09:53:22.170096: step: 530/466, loss: 0.3464820981025696 2023-01-22 09:53:22.798943: step: 532/466, loss: 1.0019688606262207 2023-01-22 09:53:23.437935: step: 534/466, loss: 2.7500970363616943 2023-01-22 09:53:24.132852: step: 536/466, loss: 1.2855634689331055 2023-01-22 09:53:24.676939: step: 538/466, loss: 1.05565345287323 2023-01-22 09:53:25.289883: step: 540/466, loss: 1.951469898223877 2023-01-22 09:53:25.923067: step: 542/466, loss: 1.0896230936050415 2023-01-22 09:53:26.556679: step: 544/466, loss: 2.1885619163513184 2023-01-22 09:53:27.167751: step: 546/466, loss: 0.2746879756450653 2023-01-22 09:53:27.792843: step: 548/466, loss: 0.5674010515213013 2023-01-22 09:53:28.419880: step: 550/466, loss: 1.5061687231063843 2023-01-22 09:53:28.993256: step: 552/466, loss: 3.505814790725708 2023-01-22 09:53:29.612356: step: 554/466, loss: 0.5314408540725708 2023-01-22 09:53:30.190921: step: 556/466, loss: 0.5787690877914429 2023-01-22 09:53:30.785228: step: 558/466, loss: 1.5390510559082031 2023-01-22 09:53:31.382851: step: 560/466, loss: 2.987964153289795 2023-01-22 09:53:32.002234: step: 562/466, loss: 0.6447228193283081 2023-01-22 09:53:32.614583: step: 564/466, loss: 0.7427317500114441 2023-01-22 09:53:33.177142: step: 566/466, loss: 0.6331799030303955 2023-01-22 09:53:33.799042: step: 568/466, loss: 2.916682481765747 2023-01-22 09:53:34.437465: step: 570/466, loss: 1.3643766641616821 2023-01-22 09:53:34.997893: step: 572/466, loss: 0.6767655611038208 2023-01-22 09:53:35.573554: step: 574/466, loss: 0.1964035928249359 2023-01-22 09:53:36.198939: step: 576/466, loss: 0.30235856771469116 2023-01-22 09:53:36.733824: step: 578/466, loss: 1.5001364946365356 2023-01-22 09:53:37.396475: step: 580/466, loss: 2.098022937774658 2023-01-22 09:53:37.956410: step: 582/466, loss: 1.3338154554367065 2023-01-22 09:53:38.599402: step: 584/466, loss: 1.5174646377563477 2023-01-22 09:53:39.269759: step: 586/466, loss: 2.505064010620117 2023-01-22 09:53:39.904073: step: 588/466, loss: 1.6157104969024658 2023-01-22 09:53:40.652207: step: 590/466, loss: 0.6165828108787537 2023-01-22 09:53:41.276243: step: 592/466, loss: 0.4426193833351135 2023-01-22 09:53:41.908615: step: 594/466, loss: 0.5435011982917786 2023-01-22 09:53:42.453018: step: 596/466, loss: 1.061076045036316 2023-01-22 09:53:43.012251: step: 598/466, loss: 0.27508556842803955 2023-01-22 09:53:43.639673: step: 600/466, loss: 1.2685134410858154 2023-01-22 09:53:44.222293: step: 602/466, loss: 0.46343937516212463 2023-01-22 09:53:44.872858: step: 604/466, loss: 0.30765286087989807 2023-01-22 09:53:45.476904: step: 606/466, loss: 0.6040436029434204 2023-01-22 09:53:46.069226: step: 608/466, loss: 1.361868977546692 2023-01-22 09:53:46.721268: step: 610/466, loss: 1.4016616344451904 2023-01-22 09:53:47.418679: step: 612/466, loss: 0.5135511159896851 2023-01-22 09:53:48.069528: step: 614/466, loss: 0.5744441747665405 2023-01-22 09:53:48.706376: step: 616/466, loss: 2.2033591270446777 2023-01-22 09:53:49.306166: step: 618/466, loss: 0.8985404968261719 2023-01-22 09:53:49.935371: step: 620/466, loss: 0.9845884442329407 2023-01-22 09:53:50.583977: step: 622/466, loss: 1.1178152561187744 2023-01-22 09:53:51.246433: step: 624/466, loss: 0.418687105178833 2023-01-22 09:53:51.822603: step: 626/466, loss: 0.262470543384552 2023-01-22 09:53:52.396025: step: 628/466, loss: 0.7660725116729736 2023-01-22 09:53:53.006966: step: 630/466, loss: 0.33508577942848206 2023-01-22 09:53:53.641903: step: 632/466, loss: 0.1842721551656723 2023-01-22 09:53:54.282670: step: 634/466, loss: 1.1078568696975708 2023-01-22 09:53:54.914433: step: 636/466, loss: 0.8883263468742371 2023-01-22 09:53:55.538850: step: 638/466, loss: 1.2047715187072754 2023-01-22 09:53:56.108889: step: 640/466, loss: 0.20477718114852905 2023-01-22 09:53:56.737883: step: 642/466, loss: 1.3275189399719238 2023-01-22 09:53:57.295619: step: 644/466, loss: 0.3288031816482544 2023-01-22 09:53:57.850376: step: 646/466, loss: 0.4039969742298126 2023-01-22 09:53:58.423517: step: 648/466, loss: 5.4602766036987305 2023-01-22 09:53:59.061639: step: 650/466, loss: 1.0883431434631348 2023-01-22 09:53:59.653018: step: 652/466, loss: 0.9030593633651733 2023-01-22 09:54:00.240292: step: 654/466, loss: 2.0893757343292236 2023-01-22 09:54:00.824778: step: 656/466, loss: 10.055910110473633 2023-01-22 09:54:01.527522: step: 658/466, loss: 1.5657265186309814 2023-01-22 09:54:02.145767: step: 660/466, loss: 0.8252211809158325 2023-01-22 09:54:02.734850: step: 662/466, loss: 0.2250550538301468 2023-01-22 09:54:03.365954: step: 664/466, loss: 1.1556295156478882 2023-01-22 09:54:03.967420: step: 666/466, loss: 3.229569435119629 2023-01-22 09:54:04.631483: step: 668/466, loss: 0.4573105573654175 2023-01-22 09:54:05.204711: step: 670/466, loss: 0.7028440237045288 2023-01-22 09:54:05.844831: step: 672/466, loss: 0.8527745604515076 2023-01-22 09:54:06.448489: step: 674/466, loss: 0.24894112348556519 2023-01-22 09:54:07.093159: step: 676/466, loss: 1.8360977172851562 2023-01-22 09:54:07.720475: step: 678/466, loss: 0.7502386569976807 2023-01-22 09:54:08.322035: step: 680/466, loss: 0.5243229866027832 2023-01-22 09:54:08.965676: step: 682/466, loss: 0.5677071809768677 2023-01-22 09:54:09.534652: step: 684/466, loss: 0.9893231391906738 2023-01-22 09:54:10.190493: step: 686/466, loss: 0.9406887888908386 2023-01-22 09:54:10.799297: step: 688/466, loss: 1.7328908443450928 2023-01-22 09:54:11.480671: step: 690/466, loss: 0.5731059908866882 2023-01-22 09:54:12.096088: step: 692/466, loss: 3.6121439933776855 2023-01-22 09:54:12.700132: step: 694/466, loss: 0.5973498821258545 2023-01-22 09:54:13.306987: step: 696/466, loss: 1.0482425689697266 2023-01-22 09:54:13.914375: step: 698/466, loss: 0.5279887318611145 2023-01-22 09:54:14.468372: step: 700/466, loss: 1.2407571077346802 2023-01-22 09:54:15.068227: step: 702/466, loss: 0.5543396472930908 2023-01-22 09:54:15.720214: step: 704/466, loss: 0.9664677381515503 2023-01-22 09:54:16.317760: step: 706/466, loss: 1.2225338220596313 2023-01-22 09:54:16.982005: step: 708/466, loss: 1.5828288793563843 2023-01-22 09:54:17.611852: step: 710/466, loss: 0.4475824236869812 2023-01-22 09:54:18.221574: step: 712/466, loss: 0.6657191514968872 2023-01-22 09:54:18.883249: step: 714/466, loss: 2.9985835552215576 2023-01-22 09:54:19.556005: step: 716/466, loss: 0.8259701132774353 2023-01-22 09:54:20.241234: step: 718/466, loss: 0.8415499925613403 2023-01-22 09:54:20.846269: step: 720/466, loss: 0.903465986251831 2023-01-22 09:54:21.478353: step: 722/466, loss: 2.4392549991607666 2023-01-22 09:54:22.118411: step: 724/466, loss: 0.33140289783477783 2023-01-22 09:54:22.721001: step: 726/466, loss: 3.3589072227478027 2023-01-22 09:54:23.307563: step: 728/466, loss: 3.946051597595215 2023-01-22 09:54:24.022088: step: 730/466, loss: 1.526811957359314 2023-01-22 09:54:24.668027: step: 732/466, loss: 0.24000266194343567 2023-01-22 09:54:25.250175: step: 734/466, loss: 0.7835390567779541 2023-01-22 09:54:25.864640: step: 736/466, loss: 0.4948054254055023 2023-01-22 09:54:26.503617: step: 738/466, loss: 0.7074456810951233 2023-01-22 09:54:27.141693: step: 740/466, loss: 0.4379880428314209 2023-01-22 09:54:27.879053: step: 742/466, loss: 0.6280728578567505 2023-01-22 09:54:28.531442: step: 744/466, loss: 4.928013801574707 2023-01-22 09:54:29.206269: step: 746/466, loss: 2.8795113563537598 2023-01-22 09:54:29.842718: step: 748/466, loss: 0.34982365369796753 2023-01-22 09:54:30.462915: step: 750/466, loss: 0.8757614493370056 2023-01-22 09:54:31.069337: step: 752/466, loss: 0.3209124207496643 2023-01-22 09:54:31.725680: step: 754/466, loss: 1.0372759103775024 2023-01-22 09:54:32.456662: step: 756/466, loss: 0.6970776915550232 2023-01-22 09:54:33.132624: step: 758/466, loss: 0.7311455607414246 2023-01-22 09:54:33.762625: step: 760/466, loss: 2.0544915199279785 2023-01-22 09:54:34.395252: step: 762/466, loss: 0.8903125524520874 2023-01-22 09:54:35.032460: step: 764/466, loss: 1.341835856437683 2023-01-22 09:54:35.718560: step: 766/466, loss: 0.5011296272277832 2023-01-22 09:54:36.342100: step: 768/466, loss: 2.6994924545288086 2023-01-22 09:54:37.003397: step: 770/466, loss: 1.665336012840271 2023-01-22 09:54:37.616368: step: 772/466, loss: 1.8195743560791016 2023-01-22 09:54:38.228220: step: 774/466, loss: 1.832624912261963 2023-01-22 09:54:38.838360: step: 776/466, loss: 0.27986520528793335 2023-01-22 09:54:39.518825: step: 778/466, loss: 0.9908888339996338 2023-01-22 09:54:40.117071: step: 780/466, loss: 0.2665031850337982 2023-01-22 09:54:40.882640: step: 782/466, loss: 0.5017292499542236 2023-01-22 09:54:41.622974: step: 784/466, loss: 1.8286349773406982 2023-01-22 09:54:42.277614: step: 786/466, loss: 0.8838155269622803 2023-01-22 09:54:42.931595: step: 788/466, loss: 0.8231385946273804 2023-01-22 09:54:43.613679: step: 790/466, loss: 1.1516478061676025 2023-01-22 09:54:44.250349: step: 792/466, loss: 2.0130913257598877 2023-01-22 09:54:44.899780: step: 794/466, loss: 1.003012776374817 2023-01-22 09:54:45.520798: step: 796/466, loss: 1.2084956169128418 2023-01-22 09:54:46.187727: step: 798/466, loss: 1.0133253335952759 2023-01-22 09:54:46.832657: step: 800/466, loss: 2.3319003582000732 2023-01-22 09:54:47.464039: step: 802/466, loss: 2.432572364807129 2023-01-22 09:54:48.058189: step: 804/466, loss: 2.595862627029419 2023-01-22 09:54:48.721551: step: 806/466, loss: 0.9174851179122925 2023-01-22 09:54:49.372383: step: 808/466, loss: 0.8310629725456238 2023-01-22 09:54:50.049668: step: 810/466, loss: 0.9610335826873779 2023-01-22 09:54:50.650738: step: 812/466, loss: 0.6178631782531738 2023-01-22 09:54:51.243298: step: 814/466, loss: 0.8955137729644775 2023-01-22 09:54:51.870314: step: 816/466, loss: 0.37296637892723083 2023-01-22 09:54:52.481887: step: 818/466, loss: 6.648438453674316 2023-01-22 09:54:53.072708: step: 820/466, loss: 0.9201613068580627 2023-01-22 09:54:53.694919: step: 822/466, loss: 0.6568300724029541 2023-01-22 09:54:54.306758: step: 824/466, loss: 1.3470970392227173 2023-01-22 09:54:54.938703: step: 826/466, loss: 1.4039759635925293 2023-01-22 09:54:55.611870: step: 828/466, loss: 0.38886359333992004 2023-01-22 09:54:56.192420: step: 830/466, loss: 1.1843596696853638 2023-01-22 09:54:56.765363: step: 832/466, loss: 2.6176464557647705 2023-01-22 09:54:57.391695: step: 834/466, loss: 1.1597771644592285 2023-01-22 09:54:57.988625: step: 836/466, loss: 0.3963584899902344 2023-01-22 09:54:58.628532: step: 838/466, loss: 0.2508530020713806 2023-01-22 09:54:59.174787: step: 840/466, loss: 0.8605871200561523 2023-01-22 09:54:59.799969: step: 842/466, loss: 1.2558029890060425 2023-01-22 09:55:00.360034: step: 844/466, loss: 1.2138640880584717 2023-01-22 09:55:00.988122: step: 846/466, loss: 0.5845181345939636 2023-01-22 09:55:01.687507: step: 848/466, loss: 1.2057701349258423 2023-01-22 09:55:02.296807: step: 850/466, loss: 1.1409090757369995 2023-01-22 09:55:02.903182: step: 852/466, loss: 1.0597467422485352 2023-01-22 09:55:03.471603: step: 854/466, loss: 0.5218050479888916 2023-01-22 09:55:04.094390: step: 856/466, loss: 0.7265652418136597 2023-01-22 09:55:04.759456: step: 858/466, loss: 2.1826112270355225 2023-01-22 09:55:05.393754: step: 860/466, loss: 0.37034282088279724 2023-01-22 09:55:06.068146: step: 862/466, loss: 2.7625083923339844 2023-01-22 09:55:06.696019: step: 864/466, loss: 0.6565631628036499 2023-01-22 09:55:07.261607: step: 866/466, loss: 1.849884271621704 2023-01-22 09:55:07.911274: step: 868/466, loss: 0.7041193842887878 2023-01-22 09:55:08.531059: step: 870/466, loss: 0.3684704601764679 2023-01-22 09:55:09.190228: step: 872/466, loss: 0.36822181940078735 2023-01-22 09:55:09.810273: step: 874/466, loss: 0.5671539306640625 2023-01-22 09:55:10.457810: step: 876/466, loss: 0.28059229254722595 2023-01-22 09:55:11.101465: step: 878/466, loss: 0.19189174473285675 2023-01-22 09:55:11.667603: step: 880/466, loss: 0.7543321251869202 2023-01-22 09:55:12.236669: step: 882/466, loss: 1.4416909217834473 2023-01-22 09:55:12.814439: step: 884/466, loss: 0.3737231492996216 2023-01-22 09:55:13.500550: step: 886/466, loss: 0.37167057394981384 2023-01-22 09:55:14.159903: step: 888/466, loss: 0.3493468165397644 2023-01-22 09:55:14.753685: step: 890/466, loss: 0.610969603061676 2023-01-22 09:55:15.373889: step: 892/466, loss: 0.42475399374961853 2023-01-22 09:55:15.999490: step: 894/466, loss: 0.17129488289356232 2023-01-22 09:55:16.649607: step: 896/466, loss: 4.020137786865234 2023-01-22 09:55:17.225073: step: 898/466, loss: 0.6816555261611938 2023-01-22 09:55:17.806258: step: 900/466, loss: 1.2390077114105225 2023-01-22 09:55:18.384257: step: 902/466, loss: 0.4940222501754761 2023-01-22 09:55:19.048432: step: 904/466, loss: 0.15959131717681885 2023-01-22 09:55:19.691622: step: 906/466, loss: 2.1150925159454346 2023-01-22 09:55:20.258996: step: 908/466, loss: 1.8174481391906738 2023-01-22 09:55:20.817432: step: 910/466, loss: 3.087872266769409 2023-01-22 09:55:21.433002: step: 912/466, loss: 0.49890315532684326 2023-01-22 09:55:22.032742: step: 914/466, loss: 1.5742014646530151 2023-01-22 09:55:22.630379: step: 916/466, loss: 0.7117514610290527 2023-01-22 09:55:23.192939: step: 918/466, loss: 1.0011276006698608 2023-01-22 09:55:23.889645: step: 920/466, loss: 1.4318996667861938 2023-01-22 09:55:24.471883: step: 922/466, loss: 0.8991138935089111 2023-01-22 09:55:25.035951: step: 924/466, loss: 0.5053771734237671 2023-01-22 09:55:25.605662: step: 926/466, loss: 2.1333112716674805 2023-01-22 09:55:26.185904: step: 928/466, loss: 4.616252422332764 2023-01-22 09:55:26.794859: step: 930/466, loss: 4.1801652908325195 2023-01-22 09:55:27.398718: step: 932/466, loss: 3.0591838359832764 ================================================== Loss: 1.294 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2867901015361953, 'r': 0.31345559484790986, 'f1': 0.2995305502898431}, 'combined': 0.22070672126620017, 'epoch': 3} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3304165661141477, 'r': 0.2916111662941787, 'f1': 0.30980342254005405}, 'combined': 0.20546548230635706, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2838856993736952, 'r': 0.2575402462121212, 'f1': 0.27007199602780535}, 'combined': 0.18004799735187021, 'epoch': 3} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3435604264873021, 'r': 0.2753230360796342, 'f1': 0.305679842550543}, 'combined': 0.1994963182961438, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2858411526903795, 'r': 0.3156727720413679, 'f1': 0.30001722428458233}, 'combined': 0.22106532315706065, 'epoch': 3} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32636076268334185, 'r': 0.28224266998091435, 'f1': 0.30270264048138956}, 'combined': 0.20075615534517025, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23809523809523808, 'r': 0.38095238095238093, 'f1': 0.293040293040293}, 'combined': 0.19536019536019533, 'epoch': 3} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.421875, 'r': 0.29347826086956524, 'f1': 0.3461538461538462}, 'combined': 0.23076923076923078, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.44642857142857145, 'r': 0.21551724137931033, 'f1': 0.2906976744186046}, 'combined': 0.19379844961240306, 'epoch': 3} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2867901015361953, 'r': 0.31345559484790986, 'f1': 0.2995305502898431}, 'combined': 0.22070672126620017, 'epoch': 3} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3304165661141477, 'r': 0.2916111662941787, 'f1': 0.30980342254005405}, 'combined': 0.20546548230635706, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23809523809523808, 'r': 0.38095238095238093, 'f1': 0.293040293040293}, 'combined': 0.19536019536019533, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2838856993736952, 'r': 0.2575402462121212, 'f1': 0.27007199602780535}, 'combined': 0.18004799735187021, 'epoch': 3} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3435604264873021, 'r': 0.2753230360796342, 'f1': 0.305679842550543}, 'combined': 0.1994963182961438, 'epoch': 3} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.421875, 'r': 0.29347826086956524, 'f1': 0.3461538461538462}, 'combined': 0.23076923076923078, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2858411526903795, 'r': 0.3156727720413679, 'f1': 0.30001722428458233}, 'combined': 0.22106532315706065, 'epoch': 3} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32636076268334185, 'r': 0.28224266998091435, 'f1': 0.30270264048138956}, 'combined': 0.20075615534517025, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.44642857142857145, 'r': 0.21551724137931033, 'f1': 0.2906976744186046}, 'combined': 0.19379844961240306, 'epoch': 3} ****************************** Epoch: 4 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 09:58:22.950702: step: 2/466, loss: 0.16752889752388 2023-01-22 09:58:23.663519: step: 4/466, loss: 0.6875675916671753 2023-01-22 09:58:24.241915: step: 6/466, loss: 0.975544810295105 2023-01-22 09:58:24.832426: step: 8/466, loss: 0.9720039367675781 2023-01-22 09:58:25.404478: step: 10/466, loss: 0.6962733864784241 2023-01-22 09:58:26.014612: step: 12/466, loss: 0.2384537160396576 2023-01-22 09:58:26.681800: step: 14/466, loss: 0.6073007583618164 2023-01-22 09:58:27.275483: step: 16/466, loss: 0.2983320355415344 2023-01-22 09:58:27.933263: step: 18/466, loss: 0.5829412341117859 2023-01-22 09:58:28.541729: step: 20/466, loss: 0.6241064071655273 2023-01-22 09:58:29.229319: step: 22/466, loss: 2.6800174713134766 2023-01-22 09:58:29.925009: step: 24/466, loss: 0.8684090971946716 2023-01-22 09:58:30.517385: step: 26/466, loss: 0.8638682961463928 2023-01-22 09:58:31.156739: step: 28/466, loss: 1.8441447019577026 2023-01-22 09:58:31.739935: step: 30/466, loss: 0.3063080310821533 2023-01-22 09:58:32.385819: step: 32/466, loss: 1.5212368965148926 2023-01-22 09:58:32.997308: step: 34/466, loss: 1.479271650314331 2023-01-22 09:58:33.631955: step: 36/466, loss: 0.17555014789104462 2023-01-22 09:58:34.203298: step: 38/466, loss: 1.2827237844467163 2023-01-22 09:58:34.930263: step: 40/466, loss: 0.8869252800941467 2023-01-22 09:58:35.545779: step: 42/466, loss: 1.5399391651153564 2023-01-22 09:58:36.190996: step: 44/466, loss: 0.3328384459018707 2023-01-22 09:58:36.801276: step: 46/466, loss: 1.2157955169677734 2023-01-22 09:58:37.444685: step: 48/466, loss: 0.8616793751716614 2023-01-22 09:58:38.026858: step: 50/466, loss: 0.5039883852005005 2023-01-22 09:58:38.615877: step: 52/466, loss: 0.5184072256088257 2023-01-22 09:58:39.255883: step: 54/466, loss: 0.4360441267490387 2023-01-22 09:58:39.917741: step: 56/466, loss: 0.6127593517303467 2023-01-22 09:58:40.582476: step: 58/466, loss: 0.19667549431324005 2023-01-22 09:58:41.149618: step: 60/466, loss: 0.21572081744670868 2023-01-22 09:58:41.788600: step: 62/466, loss: 0.9960227012634277 2023-01-22 09:58:42.431515: step: 64/466, loss: 1.609165072441101 2023-01-22 09:58:43.023417: step: 66/466, loss: 0.8556930422782898 2023-01-22 09:58:43.537736: step: 68/466, loss: 1.347257137298584 2023-01-22 09:58:44.150289: step: 70/466, loss: 2.176039934158325 2023-01-22 09:58:44.788610: step: 72/466, loss: 1.7566003799438477 2023-01-22 09:58:45.373869: step: 74/466, loss: 0.6123693585395813 2023-01-22 09:58:46.057245: step: 76/466, loss: 0.9963200092315674 2023-01-22 09:58:46.701715: step: 78/466, loss: 0.6093348264694214 2023-01-22 09:58:47.318239: step: 80/466, loss: 0.748720109462738 2023-01-22 09:58:47.863875: step: 82/466, loss: 1.1287039518356323 2023-01-22 09:58:48.490998: step: 84/466, loss: 0.6180115342140198 2023-01-22 09:58:49.120229: step: 86/466, loss: 0.4055139422416687 2023-01-22 09:58:49.706980: step: 88/466, loss: 0.2868320345878601 2023-01-22 09:58:50.243126: step: 90/466, loss: 0.8599556088447571 2023-01-22 09:58:50.866821: step: 92/466, loss: 0.8055647611618042 2023-01-22 09:58:51.457575: step: 94/466, loss: 1.122262716293335 2023-01-22 09:58:52.065926: step: 96/466, loss: 3.749009609222412 2023-01-22 09:58:52.645742: step: 98/466, loss: 0.796328604221344 2023-01-22 09:58:53.302503: step: 100/466, loss: 0.7650906443595886 2023-01-22 09:58:53.917413: step: 102/466, loss: 0.49554768204689026 2023-01-22 09:58:54.491383: step: 104/466, loss: 0.6650872826576233 2023-01-22 09:58:55.097925: step: 106/466, loss: 0.3901982009410858 2023-01-22 09:58:55.740566: step: 108/466, loss: 0.28979477286338806 2023-01-22 09:58:56.362615: step: 110/466, loss: 0.5083305835723877 2023-01-22 09:58:57.004224: step: 112/466, loss: 2.831669330596924 2023-01-22 09:58:57.600854: step: 114/466, loss: 0.48879584670066833 2023-01-22 09:58:58.235915: step: 116/466, loss: 0.2785993814468384 2023-01-22 09:58:58.778653: step: 118/466, loss: 0.8611894845962524 2023-01-22 09:58:59.391211: step: 120/466, loss: 0.3948362171649933 2023-01-22 09:59:00.034484: step: 122/466, loss: 0.7170612812042236 2023-01-22 09:59:00.661478: step: 124/466, loss: 0.4916868507862091 2023-01-22 09:59:01.280392: step: 126/466, loss: 2.437495470046997 2023-01-22 09:59:01.867902: step: 128/466, loss: 6.306944847106934 2023-01-22 09:59:02.430065: step: 130/466, loss: 0.627547562122345 2023-01-22 09:59:02.983542: step: 132/466, loss: 0.506788969039917 2023-01-22 09:59:03.687160: step: 134/466, loss: 0.49227237701416016 2023-01-22 09:59:04.311447: step: 136/466, loss: 0.7065977454185486 2023-01-22 09:59:04.898909: step: 138/466, loss: 0.7459494471549988 2023-01-22 09:59:05.534123: step: 140/466, loss: 0.45485028624534607 2023-01-22 09:59:06.199162: step: 142/466, loss: 0.43375688791275024 2023-01-22 09:59:06.796450: step: 144/466, loss: 1.2365134954452515 2023-01-22 09:59:07.382264: step: 146/466, loss: 0.7009040713310242 2023-01-22 09:59:07.989238: step: 148/466, loss: 0.6090517044067383 2023-01-22 09:59:08.626853: step: 150/466, loss: 0.27691200375556946 2023-01-22 09:59:09.202957: step: 152/466, loss: 1.0735443830490112 2023-01-22 09:59:09.783661: step: 154/466, loss: 0.44410645961761475 2023-01-22 09:59:10.347999: step: 156/466, loss: 0.39195290207862854 2023-01-22 09:59:10.991308: step: 158/466, loss: 1.1745972633361816 2023-01-22 09:59:11.629308: step: 160/466, loss: 0.39246582984924316 2023-01-22 09:59:12.234211: step: 162/466, loss: 0.6164143085479736 2023-01-22 09:59:12.852492: step: 164/466, loss: 0.07469842582941055 2023-01-22 09:59:13.450619: step: 166/466, loss: 0.6378391981124878 2023-01-22 09:59:14.015507: step: 168/466, loss: 1.5078856945037842 2023-01-22 09:59:14.634384: step: 170/466, loss: 0.5053169131278992 2023-01-22 09:59:15.248205: step: 172/466, loss: 2.8393361568450928 2023-01-22 09:59:15.764848: step: 174/466, loss: 0.15515677630901337 2023-01-22 09:59:16.415200: step: 176/466, loss: 0.2646979093551636 2023-01-22 09:59:17.106966: step: 178/466, loss: 0.32472139596939087 2023-01-22 09:59:17.791036: step: 180/466, loss: 0.7028464674949646 2023-01-22 09:59:18.414024: step: 182/466, loss: 0.5240946412086487 2023-01-22 09:59:18.977895: step: 184/466, loss: 0.2585528790950775 2023-01-22 09:59:19.560847: step: 186/466, loss: 0.3905927836894989 2023-01-22 09:59:20.147240: step: 188/466, loss: 0.298913836479187 2023-01-22 09:59:20.727511: step: 190/466, loss: 0.22440190613269806 2023-01-22 09:59:21.370885: step: 192/466, loss: 0.4986995458602905 2023-01-22 09:59:21.932078: step: 194/466, loss: 2.0994832515716553 2023-01-22 09:59:22.505008: step: 196/466, loss: 1.9224767684936523 2023-01-22 09:59:23.133298: step: 198/466, loss: 0.8672666549682617 2023-01-22 09:59:23.819161: step: 200/466, loss: 3.985262393951416 2023-01-22 09:59:24.421580: step: 202/466, loss: 0.5150429606437683 2023-01-22 09:59:25.012049: step: 204/466, loss: 1.835949182510376 2023-01-22 09:59:25.661486: step: 206/466, loss: 1.2212414741516113 2023-01-22 09:59:26.283234: step: 208/466, loss: 1.9496268033981323 2023-01-22 09:59:26.942297: step: 210/466, loss: 0.5779752135276794 2023-01-22 09:59:27.500873: step: 212/466, loss: 0.5665013194084167 2023-01-22 09:59:28.084656: step: 214/466, loss: 1.4755817651748657 2023-01-22 09:59:28.735951: step: 216/466, loss: 1.2451655864715576 2023-01-22 09:59:29.345276: step: 218/466, loss: 0.9476553201675415 2023-01-22 09:59:30.000216: step: 220/466, loss: 0.3696775436401367 2023-01-22 09:59:30.574292: step: 222/466, loss: 2.8585143089294434 2023-01-22 09:59:31.174434: step: 224/466, loss: 0.5285619497299194 2023-01-22 09:59:31.784080: step: 226/466, loss: 1.3603274822235107 2023-01-22 09:59:32.373956: step: 228/466, loss: 0.6532608270645142 2023-01-22 09:59:32.991080: step: 230/466, loss: 0.1637951135635376 2023-01-22 09:59:33.556657: step: 232/466, loss: 0.4016510248184204 2023-01-22 09:59:34.215676: step: 234/466, loss: 0.8992332816123962 2023-01-22 09:59:34.830395: step: 236/466, loss: 0.87233966588974 2023-01-22 09:59:35.512172: step: 238/466, loss: 1.3923922777175903 2023-01-22 09:59:36.142199: step: 240/466, loss: 0.3028479218482971 2023-01-22 09:59:36.749696: step: 242/466, loss: 5.808621883392334 2023-01-22 09:59:37.339839: step: 244/466, loss: 0.9096077680587769 2023-01-22 09:59:38.022557: step: 246/466, loss: 2.775336742401123 2023-01-22 09:59:38.636868: step: 248/466, loss: 0.6801366806030273 2023-01-22 09:59:39.215845: step: 250/466, loss: 0.4254688024520874 2023-01-22 09:59:39.847037: step: 252/466, loss: 0.8711274862289429 2023-01-22 09:59:40.470561: step: 254/466, loss: 0.5454870462417603 2023-01-22 09:59:41.069163: step: 256/466, loss: 1.483837366104126 2023-01-22 09:59:41.727149: step: 258/466, loss: 0.21793553233146667 2023-01-22 09:59:42.352151: step: 260/466, loss: 1.0067871809005737 2023-01-22 09:59:42.983634: step: 262/466, loss: 0.9667119979858398 2023-01-22 09:59:43.658206: step: 264/466, loss: 0.24902909994125366 2023-01-22 09:59:44.278407: step: 266/466, loss: 0.7938571572303772 2023-01-22 09:59:44.909785: step: 268/466, loss: 0.6056684255599976 2023-01-22 09:59:45.511715: step: 270/466, loss: 1.96467924118042 2023-01-22 09:59:46.086375: step: 272/466, loss: 1.3282179832458496 2023-01-22 09:59:46.783337: step: 274/466, loss: 0.4778505563735962 2023-01-22 09:59:47.342880: step: 276/466, loss: 1.424825668334961 2023-01-22 09:59:48.025560: step: 278/466, loss: 0.8274783492088318 2023-01-22 09:59:48.635653: step: 280/466, loss: 1.7603840827941895 2023-01-22 09:59:49.199941: step: 282/466, loss: 0.4785130023956299 2023-01-22 09:59:49.840239: step: 284/466, loss: 2.4316341876983643 2023-01-22 09:59:50.410239: step: 286/466, loss: 0.45507317781448364 2023-01-22 09:59:51.024029: step: 288/466, loss: 1.6569828987121582 2023-01-22 09:59:51.625437: step: 290/466, loss: 1.6912707090377808 2023-01-22 09:59:52.215342: step: 292/466, loss: 0.99015212059021 2023-01-22 09:59:52.874310: step: 294/466, loss: 0.8423710465431213 2023-01-22 09:59:53.450944: step: 296/466, loss: 0.3664628565311432 2023-01-22 09:59:54.082783: step: 298/466, loss: 0.5789371728897095 2023-01-22 09:59:54.655601: step: 300/466, loss: 0.3125426173210144 2023-01-22 09:59:55.295829: step: 302/466, loss: 0.1801319122314453 2023-01-22 09:59:55.886660: step: 304/466, loss: 0.6854432821273804 2023-01-22 09:59:56.485472: step: 306/466, loss: 3.1088051795959473 2023-01-22 09:59:57.074180: step: 308/466, loss: 0.9818800687789917 2023-01-22 09:59:57.674543: step: 310/466, loss: 0.406833291053772 2023-01-22 09:59:58.320634: step: 312/466, loss: 1.8748011589050293 2023-01-22 09:59:58.903637: step: 314/466, loss: 1.398498296737671 2023-01-22 09:59:59.534410: step: 316/466, loss: 0.4191115200519562 2023-01-22 10:00:00.126971: step: 318/466, loss: 2.0664098262786865 2023-01-22 10:00:00.718178: step: 320/466, loss: 0.5372059345245361 2023-01-22 10:00:01.410105: step: 322/466, loss: 0.47565436363220215 2023-01-22 10:00:02.008361: step: 324/466, loss: 1.0751205682754517 2023-01-22 10:00:02.644331: step: 326/466, loss: 0.6654729843139648 2023-01-22 10:00:03.351475: step: 328/466, loss: 0.6316863894462585 2023-01-22 10:00:03.939222: step: 330/466, loss: 1.9781851768493652 2023-01-22 10:00:04.583319: step: 332/466, loss: 0.558565080165863 2023-01-22 10:00:05.250996: step: 334/466, loss: 0.36841893196105957 2023-01-22 10:00:05.822174: step: 336/466, loss: 0.37612828612327576 2023-01-22 10:00:06.375227: step: 338/466, loss: 0.2611583173274994 2023-01-22 10:00:06.938454: step: 340/466, loss: 0.8179485201835632 2023-01-22 10:00:07.547610: step: 342/466, loss: 0.5163973569869995 2023-01-22 10:00:08.160756: step: 344/466, loss: 0.3190925121307373 2023-01-22 10:00:08.733752: step: 346/466, loss: 1.341943621635437 2023-01-22 10:00:09.349997: step: 348/466, loss: 1.0306843519210815 2023-01-22 10:00:10.009581: step: 350/466, loss: 0.23381835222244263 2023-01-22 10:00:10.607395: step: 352/466, loss: 0.32457369565963745 2023-01-22 10:00:11.223529: step: 354/466, loss: 1.210745096206665 2023-01-22 10:00:11.746568: step: 356/466, loss: 1.895652413368225 2023-01-22 10:00:12.345538: step: 358/466, loss: 0.8307660222053528 2023-01-22 10:00:12.969675: step: 360/466, loss: 0.8706965446472168 2023-01-22 10:00:13.543536: step: 362/466, loss: 0.27401426434516907 2023-01-22 10:00:14.142821: step: 364/466, loss: 1.4053891897201538 2023-01-22 10:00:14.732301: step: 366/466, loss: 0.9843682646751404 2023-01-22 10:00:15.303799: step: 368/466, loss: 0.8457871675491333 2023-01-22 10:00:15.909147: step: 370/466, loss: 0.647186815738678 2023-01-22 10:00:16.512205: step: 372/466, loss: 0.32200056314468384 2023-01-22 10:00:17.001063: step: 374/466, loss: 0.6154910326004028 2023-01-22 10:00:17.589740: step: 376/466, loss: 0.2628232538700104 2023-01-22 10:00:18.184848: step: 378/466, loss: 1.383215069770813 2023-01-22 10:00:18.799241: step: 380/466, loss: 1.0806177854537964 2023-01-22 10:00:19.374344: step: 382/466, loss: 0.937576949596405 2023-01-22 10:00:19.974468: step: 384/466, loss: 0.881920576095581 2023-01-22 10:00:20.589665: step: 386/466, loss: 1.1035183668136597 2023-01-22 10:00:21.205797: step: 388/466, loss: 1.3505921363830566 2023-01-22 10:00:21.825032: step: 390/466, loss: 0.593630850315094 2023-01-22 10:00:22.461253: step: 392/466, loss: 1.1212100982666016 2023-01-22 10:00:23.079809: step: 394/466, loss: 1.7661340236663818 2023-01-22 10:00:23.732876: step: 396/466, loss: 0.6906561255455017 2023-01-22 10:00:24.439816: step: 398/466, loss: 1.5854777097702026 2023-01-22 10:00:25.027225: step: 400/466, loss: 1.0544898509979248 2023-01-22 10:00:25.631105: step: 402/466, loss: 0.3514784276485443 2023-01-22 10:00:26.228711: step: 404/466, loss: 1.0262101888656616 2023-01-22 10:00:26.863236: step: 406/466, loss: 2.7127935886383057 2023-01-22 10:00:27.532542: step: 408/466, loss: 9.940874099731445 2023-01-22 10:00:28.176816: step: 410/466, loss: 0.32826903462409973 2023-01-22 10:00:28.743790: step: 412/466, loss: 2.594214916229248 2023-01-22 10:00:29.357474: step: 414/466, loss: 1.0893781185150146 2023-01-22 10:00:29.923268: step: 416/466, loss: 0.3404262661933899 2023-01-22 10:00:30.532719: step: 418/466, loss: 0.8813122510910034 2023-01-22 10:00:31.190286: step: 420/466, loss: 0.8629094362258911 2023-01-22 10:00:31.795923: step: 422/466, loss: 1.0467783212661743 2023-01-22 10:00:32.376211: step: 424/466, loss: 0.9128414988517761 2023-01-22 10:00:32.935363: step: 426/466, loss: 0.3509741425514221 2023-01-22 10:00:33.579399: step: 428/466, loss: 0.632339358329773 2023-01-22 10:00:34.208506: step: 430/466, loss: 0.25753694772720337 2023-01-22 10:00:34.854510: step: 432/466, loss: 0.3298838436603546 2023-01-22 10:00:35.470207: step: 434/466, loss: 1.2893836498260498 2023-01-22 10:00:36.273007: step: 436/466, loss: 0.8060743808746338 2023-01-22 10:00:36.889190: step: 438/466, loss: 0.6938568353652954 2023-01-22 10:00:37.520361: step: 440/466, loss: 0.3745870292186737 2023-01-22 10:00:38.170461: step: 442/466, loss: 1.093895673751831 2023-01-22 10:00:38.774957: step: 444/466, loss: 1.6614004373550415 2023-01-22 10:00:39.436761: step: 446/466, loss: 0.6754403114318848 2023-01-22 10:00:40.081387: step: 448/466, loss: 0.3983674943447113 2023-01-22 10:00:40.664654: step: 450/466, loss: 1.8840715885162354 2023-01-22 10:00:41.352549: step: 452/466, loss: 0.5922427177429199 2023-01-22 10:00:41.982517: step: 454/466, loss: 0.6781620979309082 2023-01-22 10:00:42.638278: step: 456/466, loss: 0.6283656358718872 2023-01-22 10:00:43.170048: step: 458/466, loss: 0.5273997783660889 2023-01-22 10:00:43.800816: step: 460/466, loss: 5.435046195983887 2023-01-22 10:00:44.380713: step: 462/466, loss: 0.20654211938381195 2023-01-22 10:00:44.995075: step: 464/466, loss: 0.32080647349357605 2023-01-22 10:00:45.577196: step: 466/466, loss: 1.0139718055725098 2023-01-22 10:00:46.175205: step: 468/466, loss: 0.23736083507537842 2023-01-22 10:00:46.734496: step: 470/466, loss: 0.7319826483726501 2023-01-22 10:00:47.321278: step: 472/466, loss: 11.274197578430176 2023-01-22 10:00:47.974003: step: 474/466, loss: 1.1109938621520996 2023-01-22 10:00:48.596371: step: 476/466, loss: 0.3432893753051758 2023-01-22 10:00:49.243106: step: 478/466, loss: 0.4525010883808136 2023-01-22 10:00:49.868999: step: 480/466, loss: 0.7121342420578003 2023-01-22 10:00:50.485776: step: 482/466, loss: 1.3188581466674805 2023-01-22 10:00:51.072643: step: 484/466, loss: 0.17577265202999115 2023-01-22 10:00:51.597368: step: 486/466, loss: 0.8547149896621704 2023-01-22 10:00:52.158764: step: 488/466, loss: 1.0815746784210205 2023-01-22 10:00:52.817615: step: 490/466, loss: 0.526871383190155 2023-01-22 10:00:53.414905: step: 492/466, loss: 0.8402315378189087 2023-01-22 10:00:54.094541: step: 494/466, loss: 0.8059301972389221 2023-01-22 10:00:54.660951: step: 496/466, loss: 0.5090266466140747 2023-01-22 10:00:55.351782: step: 498/466, loss: 1.7350958585739136 2023-01-22 10:00:55.987540: step: 500/466, loss: 0.25774773955345154 2023-01-22 10:00:56.600478: step: 502/466, loss: 0.9254035353660583 2023-01-22 10:00:57.218409: step: 504/466, loss: 0.6830517649650574 2023-01-22 10:00:57.837159: step: 506/466, loss: 0.7184911370277405 2023-01-22 10:00:58.363120: step: 508/466, loss: 0.19448937475681305 2023-01-22 10:00:58.975648: step: 510/466, loss: 0.3182905316352844 2023-01-22 10:00:59.545610: step: 512/466, loss: 0.22594107687473297 2023-01-22 10:01:00.135184: step: 514/466, loss: 0.4143276810646057 2023-01-22 10:01:00.675965: step: 516/466, loss: 0.2853304147720337 2023-01-22 10:01:01.289429: step: 518/466, loss: 2.4298224449157715 2023-01-22 10:01:01.900808: step: 520/466, loss: 2.2207794189453125 2023-01-22 10:01:02.455445: step: 522/466, loss: 0.23604892194271088 2023-01-22 10:01:03.075650: step: 524/466, loss: 0.9321451187133789 2023-01-22 10:01:03.713200: step: 526/466, loss: 0.7915027141571045 2023-01-22 10:01:04.341450: step: 528/466, loss: 1.3687559366226196 2023-01-22 10:01:04.955830: step: 530/466, loss: 7.580524444580078 2023-01-22 10:01:05.564945: step: 532/466, loss: 0.8179179430007935 2023-01-22 10:01:06.142022: step: 534/466, loss: 0.5741764903068542 2023-01-22 10:01:06.754309: step: 536/466, loss: 0.5159574747085571 2023-01-22 10:01:07.328209: step: 538/466, loss: 0.7804008722305298 2023-01-22 10:01:07.930959: step: 540/466, loss: 2.4360361099243164 2023-01-22 10:01:08.580578: step: 542/466, loss: 0.4893640875816345 2023-01-22 10:01:09.167294: step: 544/466, loss: 0.6733390092849731 2023-01-22 10:01:09.803843: step: 546/466, loss: 1.9966200590133667 2023-01-22 10:01:10.420851: step: 548/466, loss: 0.23160187900066376 2023-01-22 10:01:10.990115: step: 550/466, loss: 0.6399521231651306 2023-01-22 10:01:11.631325: step: 552/466, loss: 1.6272296905517578 2023-01-22 10:01:12.264947: step: 554/466, loss: 0.7216408252716064 2023-01-22 10:01:12.794899: step: 556/466, loss: 1.3738418817520142 2023-01-22 10:01:13.384126: step: 558/466, loss: 0.693959653377533 2023-01-22 10:01:13.963707: step: 560/466, loss: 0.4637894034385681 2023-01-22 10:01:14.588404: step: 562/466, loss: 1.4166477918624878 2023-01-22 10:01:15.232919: step: 564/466, loss: 1.5565288066864014 2023-01-22 10:01:15.850204: step: 566/466, loss: 1.0820739269256592 2023-01-22 10:01:16.608943: step: 568/466, loss: 0.27220243215560913 2023-01-22 10:01:17.222790: step: 570/466, loss: 0.6370554566383362 2023-01-22 10:01:17.822854: step: 572/466, loss: 0.7111790776252747 2023-01-22 10:01:18.488635: step: 574/466, loss: 1.1463621854782104 2023-01-22 10:01:19.107657: step: 576/466, loss: 0.47016438841819763 2023-01-22 10:01:19.705433: step: 578/466, loss: 0.8937940001487732 2023-01-22 10:01:20.315691: step: 580/466, loss: 0.28812530636787415 2023-01-22 10:01:20.903775: step: 582/466, loss: 1.85710871219635 2023-01-22 10:01:21.555776: step: 584/466, loss: 2.4654958248138428 2023-01-22 10:01:22.152053: step: 586/466, loss: 0.30235031247138977 2023-01-22 10:01:22.785788: step: 588/466, loss: 1.227952480316162 2023-01-22 10:01:23.403289: step: 590/466, loss: 0.6962063312530518 2023-01-22 10:01:24.043628: step: 592/466, loss: 0.2512756586074829 2023-01-22 10:01:24.701824: step: 594/466, loss: 1.5459935665130615 2023-01-22 10:01:25.271843: step: 596/466, loss: 0.5050576329231262 2023-01-22 10:01:25.923928: step: 598/466, loss: 0.7506095170974731 2023-01-22 10:01:26.570887: step: 600/466, loss: 0.24673344194889069 2023-01-22 10:01:27.237233: step: 602/466, loss: 0.8932666778564453 2023-01-22 10:01:27.854452: step: 604/466, loss: 1.055237054824829 2023-01-22 10:01:28.493872: step: 606/466, loss: 0.6997503638267517 2023-01-22 10:01:29.137101: step: 608/466, loss: 0.6910296082496643 2023-01-22 10:01:29.796275: step: 610/466, loss: 1.1907850503921509 2023-01-22 10:01:30.504766: step: 612/466, loss: 0.7755650877952576 2023-01-22 10:01:31.157277: step: 614/466, loss: 1.2764533758163452 2023-01-22 10:01:31.840253: step: 616/466, loss: 0.49234524369239807 2023-01-22 10:01:32.477179: step: 618/466, loss: 0.468046635389328 2023-01-22 10:01:33.214865: step: 620/466, loss: 0.9081097841262817 2023-01-22 10:01:33.830004: step: 622/466, loss: 0.372394859790802 2023-01-22 10:01:34.388098: step: 624/466, loss: 0.43292921781539917 2023-01-22 10:01:34.990829: step: 626/466, loss: 0.9822397828102112 2023-01-22 10:01:35.620636: step: 628/466, loss: 0.6980036497116089 2023-01-22 10:01:36.218397: step: 630/466, loss: 0.6188594698905945 2023-01-22 10:01:36.859344: step: 632/466, loss: 0.5005221366882324 2023-01-22 10:01:37.430763: step: 634/466, loss: 2.710353136062622 2023-01-22 10:01:38.046677: step: 636/466, loss: 1.0915191173553467 2023-01-22 10:01:38.670247: step: 638/466, loss: 0.34767547249794006 2023-01-22 10:01:39.352040: step: 640/466, loss: 0.43298444151878357 2023-01-22 10:01:40.055744: step: 642/466, loss: 0.9110534191131592 2023-01-22 10:01:40.739545: step: 644/466, loss: 1.2311044931411743 2023-01-22 10:01:41.376682: step: 646/466, loss: 1.852325201034546 2023-01-22 10:01:42.026953: step: 648/466, loss: 0.2186703085899353 2023-01-22 10:01:42.567844: step: 650/466, loss: 0.21883513033390045 2023-01-22 10:01:43.148396: step: 652/466, loss: 4.912973880767822 2023-01-22 10:01:43.735922: step: 654/466, loss: 1.7887861728668213 2023-01-22 10:01:44.384497: step: 656/466, loss: 0.19818219542503357 2023-01-22 10:01:44.949460: step: 658/466, loss: 4.3705573081970215 2023-01-22 10:01:45.532105: step: 660/466, loss: 1.8388097286224365 2023-01-22 10:01:46.151757: step: 662/466, loss: 2.788428783416748 2023-01-22 10:01:46.771225: step: 664/466, loss: 2.1162989139556885 2023-01-22 10:01:47.327708: step: 666/466, loss: 1.3119508028030396 2023-01-22 10:01:47.920428: step: 668/466, loss: 0.6477605700492859 2023-01-22 10:01:48.537124: step: 670/466, loss: 0.364826500415802 2023-01-22 10:01:49.137087: step: 672/466, loss: 1.7788515090942383 2023-01-22 10:01:49.713616: step: 674/466, loss: 1.2706859111785889 2023-01-22 10:01:50.320146: step: 676/466, loss: 1.1993398666381836 2023-01-22 10:01:50.884990: step: 678/466, loss: 0.48671069741249084 2023-01-22 10:01:51.500897: step: 680/466, loss: 0.4129486382007599 2023-01-22 10:01:52.114245: step: 682/466, loss: 1.152281641960144 2023-01-22 10:01:52.780282: step: 684/466, loss: 0.6857094764709473 2023-01-22 10:01:53.312526: step: 686/466, loss: 0.5028449892997742 2023-01-22 10:01:53.875927: step: 688/466, loss: 5.923160552978516 2023-01-22 10:01:54.534476: step: 690/466, loss: 0.7625570297241211 2023-01-22 10:01:55.208054: step: 692/466, loss: 0.16711105406284332 2023-01-22 10:01:55.798408: step: 694/466, loss: 1.0117989778518677 2023-01-22 10:01:56.412435: step: 696/466, loss: 0.7212257385253906 2023-01-22 10:01:57.066211: step: 698/466, loss: 1.5466200113296509 2023-01-22 10:01:57.643374: step: 700/466, loss: 1.6017810106277466 2023-01-22 10:01:58.258087: step: 702/466, loss: 1.5867637395858765 2023-01-22 10:01:58.853672: step: 704/466, loss: 0.4246661067008972 2023-01-22 10:01:59.501757: step: 706/466, loss: 0.6573480367660522 2023-01-22 10:02:00.147097: step: 708/466, loss: 0.338861882686615 2023-01-22 10:02:00.735178: step: 710/466, loss: 0.6983233690261841 2023-01-22 10:02:01.318899: step: 712/466, loss: 1.945359230041504 2023-01-22 10:02:01.976114: step: 714/466, loss: 1.229771614074707 2023-01-22 10:02:02.638563: step: 716/466, loss: 0.4587058424949646 2023-01-22 10:02:03.311566: step: 718/466, loss: 0.6692253947257996 2023-01-22 10:02:03.992365: step: 720/466, loss: 0.5753758549690247 2023-01-22 10:02:04.584697: step: 722/466, loss: 0.7323040962219238 2023-01-22 10:02:05.196889: step: 724/466, loss: 0.8918372392654419 2023-01-22 10:02:05.757574: step: 726/466, loss: 0.4605655074119568 2023-01-22 10:02:06.350518: step: 728/466, loss: 1.4957081079483032 2023-01-22 10:02:06.940354: step: 730/466, loss: 7.627898693084717 2023-01-22 10:02:07.594906: step: 732/466, loss: 0.7290813326835632 2023-01-22 10:02:08.170837: step: 734/466, loss: 0.27680712938308716 2023-01-22 10:02:08.809681: step: 736/466, loss: 0.2125605195760727 2023-01-22 10:02:09.418184: step: 738/466, loss: 0.848861813545227 2023-01-22 10:02:10.038272: step: 740/466, loss: 1.3741085529327393 2023-01-22 10:02:10.674033: step: 742/466, loss: 2.952786445617676 2023-01-22 10:02:11.329439: step: 744/466, loss: 1.1536624431610107 2023-01-22 10:02:12.077372: step: 746/466, loss: 0.8832286596298218 2023-01-22 10:02:12.775002: step: 748/466, loss: 0.8762491345405579 2023-01-22 10:02:13.359651: step: 750/466, loss: 1.5069191455841064 2023-01-22 10:02:13.995337: step: 752/466, loss: 1.0268547534942627 2023-01-22 10:02:14.583065: step: 754/466, loss: 1.148740530014038 2023-01-22 10:02:15.213242: step: 756/466, loss: 0.22098587453365326 2023-01-22 10:02:15.825333: step: 758/466, loss: 0.5976026654243469 2023-01-22 10:02:16.461633: step: 760/466, loss: 0.5490186214447021 2023-01-22 10:02:17.074603: step: 762/466, loss: 1.9247955083847046 2023-01-22 10:02:17.701255: step: 764/466, loss: 0.8851991891860962 2023-01-22 10:02:18.325408: step: 766/466, loss: 0.32720571756362915 2023-01-22 10:02:19.005031: step: 768/466, loss: 5.309898853302002 2023-01-22 10:02:19.554266: step: 770/466, loss: 0.6763961315155029 2023-01-22 10:02:20.200388: step: 772/466, loss: 0.4193522036075592 2023-01-22 10:02:20.758822: step: 774/466, loss: 0.5671581029891968 2023-01-22 10:02:21.443700: step: 776/466, loss: 0.7532185316085815 2023-01-22 10:02:22.065773: step: 778/466, loss: 0.14647214114665985 2023-01-22 10:02:22.680904: step: 780/466, loss: 0.7904964089393616 2023-01-22 10:02:23.278219: step: 782/466, loss: 1.7479318380355835 2023-01-22 10:02:23.873063: step: 784/466, loss: 0.15508301556110382 2023-01-22 10:02:24.596733: step: 786/466, loss: 1.5002763271331787 2023-01-22 10:02:25.258672: step: 788/466, loss: 0.510749340057373 2023-01-22 10:02:25.855836: step: 790/466, loss: 0.6137053966522217 2023-01-22 10:02:26.428621: step: 792/466, loss: 0.31379973888397217 2023-01-22 10:02:27.023144: step: 794/466, loss: 0.44335412979125977 2023-01-22 10:02:27.579852: step: 796/466, loss: 0.6573041677474976 2023-01-22 10:02:28.217740: step: 798/466, loss: 2.469816207885742 2023-01-22 10:02:28.842821: step: 800/466, loss: 0.3082996904850006 2023-01-22 10:02:29.419570: step: 802/466, loss: 0.5501794815063477 2023-01-22 10:02:29.973238: step: 804/466, loss: 0.8786368370056152 2023-01-22 10:02:30.568446: step: 806/466, loss: 0.41694772243499756 2023-01-22 10:02:31.213598: step: 808/466, loss: 1.077990174293518 2023-01-22 10:02:31.827906: step: 810/466, loss: 2.110581398010254 2023-01-22 10:02:32.423905: step: 812/466, loss: 0.4858446717262268 2023-01-22 10:02:33.034376: step: 814/466, loss: 2.4747583866119385 2023-01-22 10:02:33.597568: step: 816/466, loss: 1.3693656921386719 2023-01-22 10:02:34.240136: step: 818/466, loss: 1.0355596542358398 2023-01-22 10:02:34.801488: step: 820/466, loss: 1.659318447113037 2023-01-22 10:02:35.476814: step: 822/466, loss: 3.9617958068847656 2023-01-22 10:02:36.115331: step: 824/466, loss: 1.1150134801864624 2023-01-22 10:02:36.646704: step: 826/466, loss: 0.36890584230422974 2023-01-22 10:02:37.252575: step: 828/466, loss: 1.1831920146942139 2023-01-22 10:02:37.905199: step: 830/466, loss: 0.43788182735443115 2023-01-22 10:02:38.485450: step: 832/466, loss: 1.0296648740768433 2023-01-22 10:02:39.043893: step: 834/466, loss: 1.4571318626403809 2023-01-22 10:02:39.690192: step: 836/466, loss: 0.3482404947280884 2023-01-22 10:02:40.286038: step: 838/466, loss: 0.7448530197143555 2023-01-22 10:02:40.873992: step: 840/466, loss: 1.256547212600708 2023-01-22 10:02:41.494723: step: 842/466, loss: 3.370251178741455 2023-01-22 10:02:42.141038: step: 844/466, loss: 0.5442215800285339 2023-01-22 10:02:42.742590: step: 846/466, loss: 2.169522285461426 2023-01-22 10:02:43.316641: step: 848/466, loss: 0.46707239747047424 2023-01-22 10:02:43.907606: step: 850/466, loss: 0.791730523109436 2023-01-22 10:02:44.512181: step: 852/466, loss: 0.681902289390564 2023-01-22 10:02:45.116446: step: 854/466, loss: 0.38136905431747437 2023-01-22 10:02:45.740736: step: 856/466, loss: 3.295001983642578 2023-01-22 10:02:46.331833: step: 858/466, loss: 0.9217782616615295 2023-01-22 10:02:46.931422: step: 860/466, loss: 0.49116799235343933 2023-01-22 10:02:47.540460: step: 862/466, loss: 0.8243554830551147 2023-01-22 10:02:48.124120: step: 864/466, loss: 0.4150761663913727 2023-01-22 10:02:48.765026: step: 866/466, loss: 0.5980117917060852 2023-01-22 10:02:49.338138: step: 868/466, loss: 1.8143088817596436 2023-01-22 10:02:49.933161: step: 870/466, loss: 1.0737305879592896 2023-01-22 10:02:50.568395: step: 872/466, loss: 0.9121313095092773 2023-01-22 10:02:51.164471: step: 874/466, loss: 0.8232439160346985 2023-01-22 10:02:51.772380: step: 876/466, loss: 0.5766512155532837 2023-01-22 10:02:52.366239: step: 878/466, loss: 0.4171861410140991 2023-01-22 10:02:52.957175: step: 880/466, loss: 3.5865650177001953 2023-01-22 10:02:53.562283: step: 882/466, loss: 1.2284045219421387 2023-01-22 10:02:54.162892: step: 884/466, loss: 0.30379223823547363 2023-01-22 10:02:54.790653: step: 886/466, loss: 1.3876835107803345 2023-01-22 10:02:55.410268: step: 888/466, loss: 2.0049948692321777 2023-01-22 10:02:55.991009: step: 890/466, loss: 1.9099504947662354 2023-01-22 10:02:56.596064: step: 892/466, loss: 0.6487614512443542 2023-01-22 10:02:57.201101: step: 894/466, loss: 0.7433854341506958 2023-01-22 10:02:57.826926: step: 896/466, loss: 0.6980950236320496 2023-01-22 10:02:58.365371: step: 898/466, loss: 0.7908098101615906 2023-01-22 10:02:58.918181: step: 900/466, loss: 0.328832745552063 2023-01-22 10:02:59.489872: step: 902/466, loss: 0.5036404728889465 2023-01-22 10:03:00.041592: step: 904/466, loss: 0.22693654894828796 2023-01-22 10:03:00.650254: step: 906/466, loss: 0.2672615945339203 2023-01-22 10:03:01.238659: step: 908/466, loss: 0.6582983732223511 2023-01-22 10:03:01.870741: step: 910/466, loss: 0.9059508442878723 2023-01-22 10:03:02.506514: step: 912/466, loss: 0.706932544708252 2023-01-22 10:03:03.150907: step: 914/466, loss: 0.40315255522727966 2023-01-22 10:03:03.778390: step: 916/466, loss: 0.9941682815551758 2023-01-22 10:03:04.363498: step: 918/466, loss: 0.32447823882102966 2023-01-22 10:03:04.966790: step: 920/466, loss: 0.8413281440734863 2023-01-22 10:03:05.539678: step: 922/466, loss: 0.7358835935592651 2023-01-22 10:03:06.118599: step: 924/466, loss: 0.6849548816680908 2023-01-22 10:03:06.768014: step: 926/466, loss: 0.35149043798446655 2023-01-22 10:03:07.394370: step: 928/466, loss: 0.7100387215614319 2023-01-22 10:03:08.011817: step: 930/466, loss: 0.3302081227302551 2023-01-22 10:03:08.585422: step: 932/466, loss: 2.2487170696258545 ================================================== Loss: 1.062 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.33684598378776714, 'r': 0.2258399209486166, 'f1': 0.27039337474120084}, 'combined': 0.18026224982746722, 'epoch': 4} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3284236851202588, 'r': 0.22008652145721241, 'f1': 0.26355617655062763}, 'combined': 0.17200508364356748, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33960546593219226, 'r': 0.28483039078183864, 'f1': 0.30981551278024555}, 'combined': 0.22828511468018092, 'epoch': 4} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3078016825895471, 'r': 0.22492174900915823, 'f1': 0.2599145773942749}, 'combined': 0.17237857982625485, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2857142857142857, 'r': 0.17391304347826086, 'f1': 0.2162162162162162}, 'combined': 0.14414414414414412, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36538461538461536, 'r': 0.16379310344827586, 'f1': 0.2261904761904762}, 'combined': 0.15079365079365079, 'epoch': 4} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2838856993736952, 'r': 0.2575402462121212, 'f1': 0.27007199602780535}, 'combined': 0.18004799735187021, 'epoch': 3} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3435604264873021, 'r': 0.2753230360796342, 'f1': 0.305679842550543}, 'combined': 0.1994963182961438, 'epoch': 3} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.421875, 'r': 0.29347826086956524, 'f1': 0.3461538461538462}, 'combined': 0.23076923076923078, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2858411526903795, 'r': 0.3156727720413679, 'f1': 0.30001722428458233}, 'combined': 0.22106532315706065, 'epoch': 3} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32636076268334185, 'r': 0.28224266998091435, 'f1': 0.30270264048138956}, 'combined': 0.20075615534517025, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.44642857142857145, 'r': 0.21551724137931033, 'f1': 0.2906976744186046}, 'combined': 0.19379844961240306, 'epoch': 3} ****************************** Epoch: 5 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:05:40.098227: step: 2/466, loss: 0.8873512148857117 2023-01-22 10:05:40.713959: step: 4/466, loss: 0.43146419525146484 2023-01-22 10:05:41.321559: step: 6/466, loss: 1.6752448081970215 2023-01-22 10:05:41.918984: step: 8/466, loss: 0.789301335811615 2023-01-22 10:05:42.583895: step: 10/466, loss: 0.4882446527481079 2023-01-22 10:05:43.190076: step: 12/466, loss: 0.8184223771095276 2023-01-22 10:05:43.786628: step: 14/466, loss: 0.6357017159461975 2023-01-22 10:05:44.351979: step: 16/466, loss: 2.351536273956299 2023-01-22 10:05:44.978340: step: 18/466, loss: 1.0994080305099487 2023-01-22 10:05:45.604797: step: 20/466, loss: 0.4771495461463928 2023-01-22 10:05:46.205224: step: 22/466, loss: 0.36976194381713867 2023-01-22 10:05:46.807677: step: 24/466, loss: 0.6308794617652893 2023-01-22 10:05:47.409106: step: 26/466, loss: 0.7177110910415649 2023-01-22 10:05:47.941113: step: 28/466, loss: 0.39158812165260315 2023-01-22 10:05:48.486562: step: 30/466, loss: 1.6430250406265259 2023-01-22 10:05:49.099500: step: 32/466, loss: 0.548024594783783 2023-01-22 10:05:49.675474: step: 34/466, loss: 1.4155356884002686 2023-01-22 10:05:50.286003: step: 36/466, loss: 0.4548807740211487 2023-01-22 10:05:50.839243: step: 38/466, loss: 0.5886859893798828 2023-01-22 10:05:51.447672: step: 40/466, loss: 0.45883312821388245 2023-01-22 10:05:52.074399: step: 42/466, loss: 0.368941992521286 2023-01-22 10:05:52.638975: step: 44/466, loss: 1.1091209650039673 2023-01-22 10:05:53.223296: step: 46/466, loss: 1.2951784133911133 2023-01-22 10:05:53.911590: step: 48/466, loss: 0.3653731048107147 2023-01-22 10:05:54.575146: step: 50/466, loss: 0.40053850412368774 2023-01-22 10:05:55.197541: step: 52/466, loss: 1.1370720863342285 2023-01-22 10:05:55.778701: step: 54/466, loss: 0.22803129255771637 2023-01-22 10:05:56.361632: step: 56/466, loss: 1.3747471570968628 2023-01-22 10:05:56.990967: step: 58/466, loss: 0.8854600191116333 2023-01-22 10:05:57.595132: step: 60/466, loss: 2.8767919540405273 2023-01-22 10:05:58.244680: step: 62/466, loss: 3.0985217094421387 2023-01-22 10:05:58.925166: step: 64/466, loss: 0.42767399549484253 2023-01-22 10:05:59.583243: step: 66/466, loss: 2.14646315574646 2023-01-22 10:06:00.162493: step: 68/466, loss: 0.949621856212616 2023-01-22 10:06:00.756645: step: 70/466, loss: 0.5875003337860107 2023-01-22 10:06:01.344561: step: 72/466, loss: 0.40015918016433716 2023-01-22 10:06:01.996748: step: 74/466, loss: 0.5424792766571045 2023-01-22 10:06:02.727178: step: 76/466, loss: 0.7409729957580566 2023-01-22 10:06:03.365755: step: 78/466, loss: 1.1596415042877197 2023-01-22 10:06:03.948355: step: 80/466, loss: 1.3621959686279297 2023-01-22 10:06:04.531945: step: 82/466, loss: 0.28334301710128784 2023-01-22 10:06:05.096913: step: 84/466, loss: 0.5116409659385681 2023-01-22 10:06:05.744470: step: 86/466, loss: 0.8505147099494934 2023-01-22 10:06:06.394499: step: 88/466, loss: 1.3884097337722778 2023-01-22 10:06:07.007724: step: 90/466, loss: 0.4708605110645294 2023-01-22 10:06:07.587281: step: 92/466, loss: 0.49022626876831055 2023-01-22 10:06:08.197507: step: 94/466, loss: 0.6543488502502441 2023-01-22 10:06:08.797167: step: 96/466, loss: 0.6285028457641602 2023-01-22 10:06:09.413303: step: 98/466, loss: 1.4233053922653198 2023-01-22 10:06:10.003045: step: 100/466, loss: 0.39845722913742065 2023-01-22 10:06:10.608756: step: 102/466, loss: 0.5205832719802856 2023-01-22 10:06:11.183955: step: 104/466, loss: 0.7826010584831238 2023-01-22 10:06:11.734609: step: 106/466, loss: 0.6176843643188477 2023-01-22 10:06:12.306448: step: 108/466, loss: 1.3059556484222412 2023-01-22 10:06:12.915649: step: 110/466, loss: 1.2681266069412231 2023-01-22 10:06:13.570622: step: 112/466, loss: 0.3299587666988373 2023-01-22 10:06:14.240955: step: 114/466, loss: 0.4056156277656555 2023-01-22 10:06:14.863425: step: 116/466, loss: 3.232006549835205 2023-01-22 10:06:15.457291: step: 118/466, loss: 1.019670844078064 2023-01-22 10:06:16.048352: step: 120/466, loss: 0.3997257947921753 2023-01-22 10:06:16.661512: step: 122/466, loss: 0.600451648235321 2023-01-22 10:06:17.335319: step: 124/466, loss: 1.1474565267562866 2023-01-22 10:06:17.925370: step: 126/466, loss: 0.1493775099515915 2023-01-22 10:06:18.565739: step: 128/466, loss: 0.6986579895019531 2023-01-22 10:06:19.224957: step: 130/466, loss: 0.7964789271354675 2023-01-22 10:06:19.904237: step: 132/466, loss: 1.0484631061553955 2023-01-22 10:06:20.569633: step: 134/466, loss: 1.7508398294448853 2023-01-22 10:06:21.169346: step: 136/466, loss: 2.2924609184265137 2023-01-22 10:06:21.712224: step: 138/466, loss: 1.2998239994049072 2023-01-22 10:06:22.377677: step: 140/466, loss: 0.7389345765113831 2023-01-22 10:06:22.946740: step: 142/466, loss: 1.8558109998703003 2023-01-22 10:06:23.656219: step: 144/466, loss: 1.9583468437194824 2023-01-22 10:06:24.321637: step: 146/466, loss: 1.3969378471374512 2023-01-22 10:06:24.872146: step: 148/466, loss: 0.17234475910663605 2023-01-22 10:06:25.547829: step: 150/466, loss: 0.8840292096138 2023-01-22 10:06:26.107234: step: 152/466, loss: 0.41319066286087036 2023-01-22 10:06:26.830578: step: 154/466, loss: 0.38288575410842896 2023-01-22 10:06:27.475607: step: 156/466, loss: 0.6801028251647949 2023-01-22 10:06:28.128972: step: 158/466, loss: 1.5284907817840576 2023-01-22 10:06:28.808096: step: 160/466, loss: 0.5035023093223572 2023-01-22 10:06:29.479921: step: 162/466, loss: 1.1935200691223145 2023-01-22 10:06:30.047955: step: 164/466, loss: 0.1817062497138977 2023-01-22 10:06:30.675229: step: 166/466, loss: 0.8277509212493896 2023-01-22 10:06:31.192139: step: 168/466, loss: 0.4788461923599243 2023-01-22 10:06:31.810253: step: 170/466, loss: 1.3420499563217163 2023-01-22 10:06:32.400134: step: 172/466, loss: 0.43186071515083313 2023-01-22 10:06:33.000379: step: 174/466, loss: 1.1397451162338257 2023-01-22 10:06:33.622773: step: 176/466, loss: 1.153487205505371 2023-01-22 10:06:34.278981: step: 178/466, loss: 0.3397679328918457 2023-01-22 10:06:34.846412: step: 180/466, loss: 0.42831388115882874 2023-01-22 10:06:35.468990: step: 182/466, loss: 0.6156601309776306 2023-01-22 10:06:36.118772: step: 184/466, loss: 0.5617088079452515 2023-01-22 10:06:36.714200: step: 186/466, loss: 0.5770358443260193 2023-01-22 10:06:37.314703: step: 188/466, loss: 0.20702214539051056 2023-01-22 10:06:37.915081: step: 190/466, loss: 0.6894669532775879 2023-01-22 10:06:38.495769: step: 192/466, loss: 0.30901455879211426 2023-01-22 10:06:39.090924: step: 194/466, loss: 0.6040390729904175 2023-01-22 10:06:39.683482: step: 196/466, loss: 0.6449813842773438 2023-01-22 10:06:40.298086: step: 198/466, loss: 0.34480994939804077 2023-01-22 10:06:40.972396: step: 200/466, loss: 1.084185242652893 2023-01-22 10:06:41.557848: step: 202/466, loss: 1.1693246364593506 2023-01-22 10:06:42.104909: step: 204/466, loss: 1.1242389678955078 2023-01-22 10:06:42.693493: step: 206/466, loss: 0.888567328453064 2023-01-22 10:06:43.264704: step: 208/466, loss: 0.763310432434082 2023-01-22 10:06:43.820133: step: 210/466, loss: 0.8818193674087524 2023-01-22 10:06:44.398846: step: 212/466, loss: 3.0669779777526855 2023-01-22 10:06:45.029699: step: 214/466, loss: 1.6946501731872559 2023-01-22 10:06:45.612243: step: 216/466, loss: 1.333387017250061 2023-01-22 10:06:46.159051: step: 218/466, loss: 3.137331247329712 2023-01-22 10:06:46.836620: step: 220/466, loss: 0.2983590066432953 2023-01-22 10:06:47.431242: step: 222/466, loss: 1.8559598922729492 2023-01-22 10:06:48.066517: step: 224/466, loss: 0.50806725025177 2023-01-22 10:06:48.652052: step: 226/466, loss: 0.3796902298927307 2023-01-22 10:06:49.246369: step: 228/466, loss: 0.6262779235839844 2023-01-22 10:06:49.861583: step: 230/466, loss: 0.7788878679275513 2023-01-22 10:06:50.431850: step: 232/466, loss: 0.9816898703575134 2023-01-22 10:06:50.995588: step: 234/466, loss: 0.8697726726531982 2023-01-22 10:06:51.600116: step: 236/466, loss: 0.2544841170310974 2023-01-22 10:06:52.210339: step: 238/466, loss: 0.5122835040092468 2023-01-22 10:06:52.787670: step: 240/466, loss: 0.7203906178474426 2023-01-22 10:06:53.378674: step: 242/466, loss: 0.1544247716665268 2023-01-22 10:06:53.977021: step: 244/466, loss: 0.5812761783599854 2023-01-22 10:06:54.603550: step: 246/466, loss: 1.2550731897354126 2023-01-22 10:06:55.199973: step: 248/466, loss: 1.1732237339019775 2023-01-22 10:06:55.755480: step: 250/466, loss: 0.17169396579265594 2023-01-22 10:06:56.408799: step: 252/466, loss: 0.848768949508667 2023-01-22 10:06:57.101712: step: 254/466, loss: 0.3380299508571625 2023-01-22 10:06:57.741189: step: 256/466, loss: 0.5533966422080994 2023-01-22 10:06:58.353210: step: 258/466, loss: 0.82039475440979 2023-01-22 10:06:58.929632: step: 260/466, loss: 0.30277910828590393 2023-01-22 10:06:59.558832: step: 262/466, loss: 2.042152166366577 2023-01-22 10:07:00.160179: step: 264/466, loss: 1.1292691230773926 2023-01-22 10:07:00.763152: step: 266/466, loss: 1.9049155712127686 2023-01-22 10:07:01.430956: step: 268/466, loss: 0.306099534034729 2023-01-22 10:07:02.113778: step: 270/466, loss: 0.8231030106544495 2023-01-22 10:07:02.746915: step: 272/466, loss: 0.9825611710548401 2023-01-22 10:07:03.345034: step: 274/466, loss: 0.36353135108947754 2023-01-22 10:07:03.879395: step: 276/466, loss: 0.9937047958374023 2023-01-22 10:07:04.448815: step: 278/466, loss: 1.0498427152633667 2023-01-22 10:07:05.208361: step: 280/466, loss: 1.2113786935806274 2023-01-22 10:07:05.759326: step: 282/466, loss: 1.1858857870101929 2023-01-22 10:07:06.370908: step: 284/466, loss: 0.5884745717048645 2023-01-22 10:07:06.940455: step: 286/466, loss: 0.289907842874527 2023-01-22 10:07:07.603490: step: 288/466, loss: 0.32026946544647217 2023-01-22 10:07:08.278218: step: 290/466, loss: 1.020920991897583 2023-01-22 10:07:08.947379: step: 292/466, loss: 2.072936773300171 2023-01-22 10:07:09.536497: step: 294/466, loss: 0.4009312689304352 2023-01-22 10:07:10.103687: step: 296/466, loss: 1.1250522136688232 2023-01-22 10:07:10.802614: step: 298/466, loss: 0.9698195457458496 2023-01-22 10:07:11.410976: step: 300/466, loss: 1.2581521272659302 2023-01-22 10:07:12.007210: step: 302/466, loss: 0.6058701276779175 2023-01-22 10:07:12.633418: step: 304/466, loss: 1.154731035232544 2023-01-22 10:07:13.216662: step: 306/466, loss: 0.3935511112213135 2023-01-22 10:07:13.769550: step: 308/466, loss: 0.23435933887958527 2023-01-22 10:07:14.429450: step: 310/466, loss: 2.445216178894043 2023-01-22 10:07:15.003237: step: 312/466, loss: 1.0250365734100342 2023-01-22 10:07:15.707674: step: 314/466, loss: 1.1069316864013672 2023-01-22 10:07:16.352915: step: 316/466, loss: 1.266685128211975 2023-01-22 10:07:16.905127: step: 318/466, loss: 1.595887303352356 2023-01-22 10:07:17.481420: step: 320/466, loss: 0.12612585723400116 2023-01-22 10:07:18.100926: step: 322/466, loss: 2.205091714859009 2023-01-22 10:07:18.710691: step: 324/466, loss: 0.23279878497123718 2023-01-22 10:07:19.343843: step: 326/466, loss: 1.5497491359710693 2023-01-22 10:07:19.933182: step: 328/466, loss: 1.8650856018066406 2023-01-22 10:07:20.491625: step: 330/466, loss: 0.22802956402301788 2023-01-22 10:07:21.076810: step: 332/466, loss: 1.0268545150756836 2023-01-22 10:07:21.671797: step: 334/466, loss: 0.6684575080871582 2023-01-22 10:07:22.420288: step: 336/466, loss: 1.6692156791687012 2023-01-22 10:07:23.012211: step: 338/466, loss: 0.6034660339355469 2023-01-22 10:07:23.606201: step: 340/466, loss: 1.5330373048782349 2023-01-22 10:07:24.204877: step: 342/466, loss: 1.2503236532211304 2023-01-22 10:07:24.849801: step: 344/466, loss: 0.9118637442588806 2023-01-22 10:07:25.476053: step: 346/466, loss: 0.6592655181884766 2023-01-22 10:07:26.059444: step: 348/466, loss: 0.4565240144729614 2023-01-22 10:07:26.642686: step: 350/466, loss: 0.28018730878829956 2023-01-22 10:07:27.267877: step: 352/466, loss: 1.3790347576141357 2023-01-22 10:07:27.789132: step: 354/466, loss: 0.5521497130393982 2023-01-22 10:07:28.358074: step: 356/466, loss: 0.5413537621498108 2023-01-22 10:07:28.938111: step: 358/466, loss: 0.4369373023509979 2023-01-22 10:07:29.624873: step: 360/466, loss: 1.288124680519104 2023-01-22 10:07:30.268048: step: 362/466, loss: 0.7232743501663208 2023-01-22 10:07:30.878376: step: 364/466, loss: 0.384662389755249 2023-01-22 10:07:31.508678: step: 366/466, loss: 0.9474266767501831 2023-01-22 10:07:32.074425: step: 368/466, loss: 0.9133715033531189 2023-01-22 10:07:32.793661: step: 370/466, loss: 0.5375441908836365 2023-01-22 10:07:33.391876: step: 372/466, loss: 4.506320476531982 2023-01-22 10:07:33.999434: step: 374/466, loss: 0.3736286759376526 2023-01-22 10:07:34.601620: step: 376/466, loss: 0.54074627161026 2023-01-22 10:07:35.259656: step: 378/466, loss: 1.5236220359802246 2023-01-22 10:07:35.850183: step: 380/466, loss: 0.17001619935035706 2023-01-22 10:07:36.445530: step: 382/466, loss: 0.2629481256008148 2023-01-22 10:07:37.030007: step: 384/466, loss: 0.8746252059936523 2023-01-22 10:07:37.591285: step: 386/466, loss: 0.3887893855571747 2023-01-22 10:07:38.213483: step: 388/466, loss: 0.9724615812301636 2023-01-22 10:07:38.785341: step: 390/466, loss: 0.5996192693710327 2023-01-22 10:07:39.414636: step: 392/466, loss: 0.3284958004951477 2023-01-22 10:07:40.044422: step: 394/466, loss: 1.205241084098816 2023-01-22 10:07:40.622573: step: 396/466, loss: 1.0375643968582153 2023-01-22 10:07:41.218865: step: 398/466, loss: 0.7388166785240173 2023-01-22 10:07:41.814041: step: 400/466, loss: 0.4791616201400757 2023-01-22 10:07:42.431598: step: 402/466, loss: 0.34600746631622314 2023-01-22 10:07:43.069811: step: 404/466, loss: 0.4240439236164093 2023-01-22 10:07:43.674851: step: 406/466, loss: 0.9592070579528809 2023-01-22 10:07:44.265407: step: 408/466, loss: 0.41275113821029663 2023-01-22 10:07:44.934659: step: 410/466, loss: 0.3482060432434082 2023-01-22 10:07:45.537414: step: 412/466, loss: 0.1422082632780075 2023-01-22 10:07:46.198960: step: 414/466, loss: 0.5417606830596924 2023-01-22 10:07:46.857705: step: 416/466, loss: 0.3440185487270355 2023-01-22 10:07:47.505713: step: 418/466, loss: 1.4496549367904663 2023-01-22 10:07:48.149582: step: 420/466, loss: 0.3177294135093689 2023-01-22 10:07:48.701544: step: 422/466, loss: 0.39011937379837036 2023-01-22 10:07:49.295682: step: 424/466, loss: 1.9770435094833374 2023-01-22 10:07:49.981671: step: 426/466, loss: 1.8488770723342896 2023-01-22 10:07:50.598397: step: 428/466, loss: 0.6467912197113037 2023-01-22 10:07:51.237499: step: 430/466, loss: 1.8490506410598755 2023-01-22 10:07:51.833983: step: 432/466, loss: 1.3182984590530396 2023-01-22 10:07:52.467436: step: 434/466, loss: 1.0293097496032715 2023-01-22 10:07:53.067598: step: 436/466, loss: 0.565805196762085 2023-01-22 10:07:53.677504: step: 438/466, loss: 1.7070420980453491 2023-01-22 10:07:54.296391: step: 440/466, loss: 0.7296619415283203 2023-01-22 10:07:54.870931: step: 442/466, loss: 1.5177580118179321 2023-01-22 10:07:55.454061: step: 444/466, loss: 1.4175868034362793 2023-01-22 10:07:56.087667: step: 446/466, loss: 0.08863383531570435 2023-01-22 10:07:56.712895: step: 448/466, loss: 0.5503197908401489 2023-01-22 10:07:57.324578: step: 450/466, loss: 0.7010912299156189 2023-01-22 10:07:57.906284: step: 452/466, loss: 0.45214200019836426 2023-01-22 10:07:58.520659: step: 454/466, loss: 0.2602442502975464 2023-01-22 10:07:59.171812: step: 456/466, loss: 0.7292141914367676 2023-01-22 10:07:59.757836: step: 458/466, loss: 2.5713729858398438 2023-01-22 10:08:00.428239: step: 460/466, loss: 0.46030521392822266 2023-01-22 10:08:01.009660: step: 462/466, loss: 0.6398338079452515 2023-01-22 10:08:01.639595: step: 464/466, loss: 0.7439488768577576 2023-01-22 10:08:02.289717: step: 466/466, loss: 0.6633845567703247 2023-01-22 10:08:02.908849: step: 468/466, loss: 0.8546178936958313 2023-01-22 10:08:03.524046: step: 470/466, loss: 0.6649083495140076 2023-01-22 10:08:04.139592: step: 472/466, loss: 1.4693559408187866 2023-01-22 10:08:04.741135: step: 474/466, loss: 1.0182071924209595 2023-01-22 10:08:05.359546: step: 476/466, loss: 0.28218722343444824 2023-01-22 10:08:05.909874: step: 478/466, loss: 1.2137396335601807 2023-01-22 10:08:06.504762: step: 480/466, loss: 0.8096773624420166 2023-01-22 10:08:07.124484: step: 482/466, loss: 0.9805156588554382 2023-01-22 10:08:07.685841: step: 484/466, loss: 0.37933504581451416 2023-01-22 10:08:08.282765: step: 486/466, loss: 0.6712952852249146 2023-01-22 10:08:08.895054: step: 488/466, loss: 0.36372894048690796 2023-01-22 10:08:09.511469: step: 490/466, loss: 0.7804000377655029 2023-01-22 10:08:10.092382: step: 492/466, loss: 0.37860196828842163 2023-01-22 10:08:10.758964: step: 494/466, loss: 0.6025473475456238 2023-01-22 10:08:11.379430: step: 496/466, loss: 0.39121013879776 2023-01-22 10:08:11.966513: step: 498/466, loss: 1.1215208768844604 2023-01-22 10:08:12.568911: step: 500/466, loss: 0.7064223289489746 2023-01-22 10:08:13.101788: step: 502/466, loss: 0.488375186920166 2023-01-22 10:08:13.708707: step: 504/466, loss: 0.27472975850105286 2023-01-22 10:08:14.342732: step: 506/466, loss: 1.3731070756912231 2023-01-22 10:08:14.993439: step: 508/466, loss: 0.6567395925521851 2023-01-22 10:08:15.635185: step: 510/466, loss: 0.6438676118850708 2023-01-22 10:08:16.212787: step: 512/466, loss: 1.5238269567489624 2023-01-22 10:08:16.824233: step: 514/466, loss: 0.1650468409061432 2023-01-22 10:08:17.447725: step: 516/466, loss: 2.120837926864624 2023-01-22 10:08:18.129102: step: 518/466, loss: 0.48192933201789856 2023-01-22 10:08:18.761734: step: 520/466, loss: 0.34953781962394714 2023-01-22 10:08:19.415681: step: 522/466, loss: 1.1414295434951782 2023-01-22 10:08:20.027419: step: 524/466, loss: 0.1272927224636078 2023-01-22 10:08:20.664731: step: 526/466, loss: 1.9006937742233276 2023-01-22 10:08:21.302147: step: 528/466, loss: 0.30656200647354126 2023-01-22 10:08:21.927878: step: 530/466, loss: 0.8622303605079651 2023-01-22 10:08:22.515280: step: 532/466, loss: 0.9534271359443665 2023-01-22 10:08:23.131486: step: 534/466, loss: 0.41976526379585266 2023-01-22 10:08:23.750014: step: 536/466, loss: 1.968782663345337 2023-01-22 10:08:24.324269: step: 538/466, loss: 0.3606802523136139 2023-01-22 10:08:25.088113: step: 540/466, loss: 0.6844926476478577 2023-01-22 10:08:25.713063: step: 542/466, loss: 1.232601523399353 2023-01-22 10:08:26.333647: step: 544/466, loss: 0.5401339530944824 2023-01-22 10:08:26.970540: step: 546/466, loss: 3.593629837036133 2023-01-22 10:08:27.588206: step: 548/466, loss: 0.35622620582580566 2023-01-22 10:08:28.182897: step: 550/466, loss: 1.1047563552856445 2023-01-22 10:08:28.806995: step: 552/466, loss: 1.1900886297225952 2023-01-22 10:08:29.438462: step: 554/466, loss: 0.7149781584739685 2023-01-22 10:08:30.040716: step: 556/466, loss: 1.642871379852295 2023-01-22 10:08:30.739830: step: 558/466, loss: 1.541691780090332 2023-01-22 10:08:31.315534: step: 560/466, loss: 0.5821778774261475 2023-01-22 10:08:31.961139: step: 562/466, loss: 0.3414008915424347 2023-01-22 10:08:32.574923: step: 564/466, loss: 0.36129286885261536 2023-01-22 10:08:33.153000: step: 566/466, loss: 0.2286379039287567 2023-01-22 10:08:33.741976: step: 568/466, loss: 0.9652560353279114 2023-01-22 10:08:34.367860: step: 570/466, loss: 0.21559062600135803 2023-01-22 10:08:34.958492: step: 572/466, loss: 0.28920507431030273 2023-01-22 10:08:35.557724: step: 574/466, loss: 0.4134334623813629 2023-01-22 10:08:36.224101: step: 576/466, loss: 0.6740312576293945 2023-01-22 10:08:36.836438: step: 578/466, loss: 4.717375755310059 2023-01-22 10:08:37.462202: step: 580/466, loss: 2.156572103500366 2023-01-22 10:08:38.055271: step: 582/466, loss: 0.8814055919647217 2023-01-22 10:08:38.714942: step: 584/466, loss: 0.525405764579773 2023-01-22 10:08:39.362340: step: 586/466, loss: 1.29059636592865 2023-01-22 10:08:39.974637: step: 588/466, loss: 0.4213157296180725 2023-01-22 10:08:40.539726: step: 590/466, loss: 0.4958527684211731 2023-01-22 10:08:41.123062: step: 592/466, loss: 1.0746906995773315 2023-01-22 10:08:41.743289: step: 594/466, loss: 0.36387670040130615 2023-01-22 10:08:42.348138: step: 596/466, loss: 1.0891448259353638 2023-01-22 10:08:42.892648: step: 598/466, loss: 1.131120204925537 2023-01-22 10:08:43.511963: step: 600/466, loss: 9.013973236083984 2023-01-22 10:08:44.131067: step: 602/466, loss: 2.1736669540405273 2023-01-22 10:08:44.712657: step: 604/466, loss: 0.8762894868850708 2023-01-22 10:08:45.299771: step: 606/466, loss: 0.28769704699516296 2023-01-22 10:08:45.967060: step: 608/466, loss: 1.3017332553863525 2023-01-22 10:08:46.652541: step: 610/466, loss: 0.4562010169029236 2023-01-22 10:08:47.281716: step: 612/466, loss: 0.520621657371521 2023-01-22 10:08:47.860505: step: 614/466, loss: 0.8116568326950073 2023-01-22 10:08:48.474333: step: 616/466, loss: 1.0235605239868164 2023-01-22 10:08:49.110299: step: 618/466, loss: 0.8463140726089478 2023-01-22 10:08:49.784586: step: 620/466, loss: 1.0514177083969116 2023-01-22 10:08:50.427264: step: 622/466, loss: 0.5442951917648315 2023-01-22 10:08:51.021912: step: 624/466, loss: 0.7741603255271912 2023-01-22 10:08:51.627800: step: 626/466, loss: 1.048574686050415 2023-01-22 10:08:52.279435: step: 628/466, loss: 1.3407269716262817 2023-01-22 10:08:52.886561: step: 630/466, loss: 1.2626268863677979 2023-01-22 10:08:53.600911: step: 632/466, loss: 0.20626191794872284 2023-01-22 10:08:54.158475: step: 634/466, loss: 0.6527853012084961 2023-01-22 10:08:54.743188: step: 636/466, loss: 0.4264346957206726 2023-01-22 10:08:55.407244: step: 638/466, loss: 1.9168164730072021 2023-01-22 10:08:56.100609: step: 640/466, loss: 1.464083194732666 2023-01-22 10:08:56.748235: step: 642/466, loss: 1.5962927341461182 2023-01-22 10:08:57.414561: step: 644/466, loss: 0.33310598134994507 2023-01-22 10:08:57.992142: step: 646/466, loss: 0.2638262212276459 2023-01-22 10:08:58.597561: step: 648/466, loss: 0.21902170777320862 2023-01-22 10:08:59.207297: step: 650/466, loss: 1.34444260597229 2023-01-22 10:08:59.834776: step: 652/466, loss: 1.201189398765564 2023-01-22 10:09:00.412653: step: 654/466, loss: 0.8005489110946655 2023-01-22 10:09:01.073155: step: 656/466, loss: 0.2549632787704468 2023-01-22 10:09:01.679714: step: 658/466, loss: 0.41220033168792725 2023-01-22 10:09:02.292406: step: 660/466, loss: 0.7568885087966919 2023-01-22 10:09:02.872880: step: 662/466, loss: 0.1099766418337822 2023-01-22 10:09:03.475672: step: 664/466, loss: 1.3935626745224 2023-01-22 10:09:04.070861: step: 666/466, loss: 1.1161881685256958 2023-01-22 10:09:04.656599: step: 668/466, loss: 1.8901318311691284 2023-01-22 10:09:05.278863: step: 670/466, loss: 1.109795331954956 2023-01-22 10:09:05.936655: step: 672/466, loss: 1.0813663005828857 2023-01-22 10:09:06.564218: step: 674/466, loss: 0.41245943307876587 2023-01-22 10:09:07.157124: step: 676/466, loss: 0.43254128098487854 2023-01-22 10:09:07.776593: step: 678/466, loss: 0.5581113696098328 2023-01-22 10:09:08.368909: step: 680/466, loss: 0.5595136284828186 2023-01-22 10:09:08.936140: step: 682/466, loss: 1.6868298053741455 2023-01-22 10:09:09.462374: step: 684/466, loss: 0.2144726663827896 2023-01-22 10:09:10.122082: step: 686/466, loss: 0.49233365058898926 2023-01-22 10:09:10.663336: step: 688/466, loss: 0.44163215160369873 2023-01-22 10:09:11.300547: step: 690/466, loss: 0.9274957180023193 2023-01-22 10:09:11.913054: step: 692/466, loss: 0.9633333683013916 2023-01-22 10:09:12.563350: step: 694/466, loss: 0.6739097833633423 2023-01-22 10:09:13.132098: step: 696/466, loss: 1.2564454078674316 2023-01-22 10:09:13.709991: step: 698/466, loss: 0.9555618762969971 2023-01-22 10:09:14.309007: step: 700/466, loss: 0.7222802639007568 2023-01-22 10:09:14.962742: step: 702/466, loss: 0.45501071214675903 2023-01-22 10:09:15.585237: step: 704/466, loss: 0.9140257239341736 2023-01-22 10:09:16.162676: step: 706/466, loss: 0.3494584858417511 2023-01-22 10:09:16.726788: step: 708/466, loss: 0.22887399792671204 2023-01-22 10:09:17.345635: step: 710/466, loss: 1.0088324546813965 2023-01-22 10:09:17.969352: step: 712/466, loss: 1.3507208824157715 2023-01-22 10:09:18.601942: step: 714/466, loss: 0.5512253046035767 2023-01-22 10:09:19.251565: step: 716/466, loss: 1.1831029653549194 2023-01-22 10:09:19.819317: step: 718/466, loss: 0.8344932794570923 2023-01-22 10:09:20.378177: step: 720/466, loss: 1.2759710550308228 2023-01-22 10:09:21.025770: step: 722/466, loss: 0.49068954586982727 2023-01-22 10:09:21.623649: step: 724/466, loss: 1.6091653108596802 2023-01-22 10:09:22.197723: step: 726/466, loss: 0.5767663717269897 2023-01-22 10:09:22.909836: step: 728/466, loss: 0.449196994304657 2023-01-22 10:09:23.535018: step: 730/466, loss: 0.642565906047821 2023-01-22 10:09:24.147565: step: 732/466, loss: 1.0968046188354492 2023-01-22 10:09:24.742964: step: 734/466, loss: 1.483222484588623 2023-01-22 10:09:25.337721: step: 736/466, loss: 0.23038074374198914 2023-01-22 10:09:25.995127: step: 738/466, loss: 0.20710501074790955 2023-01-22 10:09:26.612641: step: 740/466, loss: 0.2826586067676544 2023-01-22 10:09:27.190857: step: 742/466, loss: 0.3242071270942688 2023-01-22 10:09:27.782850: step: 744/466, loss: 0.8751279711723328 2023-01-22 10:09:28.366950: step: 746/466, loss: 0.24087029695510864 2023-01-22 10:09:28.959104: step: 748/466, loss: 0.6206285953521729 2023-01-22 10:09:29.629922: step: 750/466, loss: 0.9701910614967346 2023-01-22 10:09:30.277884: step: 752/466, loss: 0.8841103315353394 2023-01-22 10:09:30.963395: step: 754/466, loss: 1.3417093753814697 2023-01-22 10:09:31.602429: step: 756/466, loss: 0.0679602101445198 2023-01-22 10:09:32.205940: step: 758/466, loss: 0.21013587713241577 2023-01-22 10:09:32.843139: step: 760/466, loss: 0.5504181385040283 2023-01-22 10:09:33.437485: step: 762/466, loss: 0.7431395649909973 2023-01-22 10:09:34.095833: step: 764/466, loss: 1.3952863216400146 2023-01-22 10:09:34.693240: step: 766/466, loss: 2.8130741119384766 2023-01-22 10:09:35.309069: step: 768/466, loss: 1.290252685546875 2023-01-22 10:09:35.908398: step: 770/466, loss: 0.8594147562980652 2023-01-22 10:09:36.523019: step: 772/466, loss: 0.5162642002105713 2023-01-22 10:09:37.131025: step: 774/466, loss: 0.8412430882453918 2023-01-22 10:09:37.693697: step: 776/466, loss: 0.4270803928375244 2023-01-22 10:09:38.318850: step: 778/466, loss: 0.45356687903404236 2023-01-22 10:09:38.916052: step: 780/466, loss: 1.001887321472168 2023-01-22 10:09:39.562857: step: 782/466, loss: 0.5256953239440918 2023-01-22 10:09:40.154886: step: 784/466, loss: 0.5954223275184631 2023-01-22 10:09:40.977988: step: 786/466, loss: 0.5046058893203735 2023-01-22 10:09:41.608784: step: 788/466, loss: 0.8506419658660889 2023-01-22 10:09:42.294577: step: 790/466, loss: 0.274544894695282 2023-01-22 10:09:42.896025: step: 792/466, loss: 0.777887225151062 2023-01-22 10:09:43.509289: step: 794/466, loss: 0.79243004322052 2023-01-22 10:09:44.130754: step: 796/466, loss: 1.4459573030471802 2023-01-22 10:09:44.722607: step: 798/466, loss: 1.8504751920700073 2023-01-22 10:09:45.336219: step: 800/466, loss: 0.5830101370811462 2023-01-22 10:09:45.921701: step: 802/466, loss: 0.15489044785499573 2023-01-22 10:09:46.533134: step: 804/466, loss: 2.4530303478240967 2023-01-22 10:09:47.104247: step: 806/466, loss: 1.168830394744873 2023-01-22 10:09:47.704410: step: 808/466, loss: 1.9327266216278076 2023-01-22 10:09:48.301032: step: 810/466, loss: 1.0205035209655762 2023-01-22 10:09:48.911324: step: 812/466, loss: 0.7465870380401611 2023-01-22 10:09:49.532883: step: 814/466, loss: 0.7815476655960083 2023-01-22 10:09:50.094994: step: 816/466, loss: 0.33895769715309143 2023-01-22 10:09:50.804205: step: 818/466, loss: 1.771838665008545 2023-01-22 10:09:51.427522: step: 820/466, loss: 0.6762361526489258 2023-01-22 10:09:52.095049: step: 822/466, loss: 1.0862176418304443 2023-01-22 10:09:52.753382: step: 824/466, loss: 0.6672700047492981 2023-01-22 10:09:53.352274: step: 826/466, loss: 1.169159173965454 2023-01-22 10:09:53.942831: step: 828/466, loss: 0.46546027064323425 2023-01-22 10:09:54.517593: step: 830/466, loss: 1.314602255821228 2023-01-22 10:09:55.061013: step: 832/466, loss: 0.10589657723903656 2023-01-22 10:09:55.673292: step: 834/466, loss: 0.5514154434204102 2023-01-22 10:09:56.268290: step: 836/466, loss: 0.8950437903404236 2023-01-22 10:09:56.859135: step: 838/466, loss: 0.37474846839904785 2023-01-22 10:09:57.518317: step: 840/466, loss: 0.20299357175827026 2023-01-22 10:09:58.172712: step: 842/466, loss: 1.3681626319885254 2023-01-22 10:09:58.794108: step: 844/466, loss: 0.5367602109909058 2023-01-22 10:09:59.410542: step: 846/466, loss: 0.258124440908432 2023-01-22 10:10:00.017275: step: 848/466, loss: 0.23170244693756104 2023-01-22 10:10:00.635459: step: 850/466, loss: 1.8249881267547607 2023-01-22 10:10:01.207991: step: 852/466, loss: 0.8979384899139404 2023-01-22 10:10:01.793812: step: 854/466, loss: 0.6674975156784058 2023-01-22 10:10:02.417205: step: 856/466, loss: 0.2780420482158661 2023-01-22 10:10:03.071152: step: 858/466, loss: 2.4918947219848633 2023-01-22 10:10:03.655236: step: 860/466, loss: 0.5463971495628357 2023-01-22 10:10:04.299071: step: 862/466, loss: 0.16769395768642426 2023-01-22 10:10:04.960372: step: 864/466, loss: 1.2891900539398193 2023-01-22 10:10:05.568959: step: 866/466, loss: 0.975051760673523 2023-01-22 10:10:06.109411: step: 868/466, loss: 0.39227622747421265 2023-01-22 10:10:06.686752: step: 870/466, loss: 0.5910021066665649 2023-01-22 10:10:07.303407: step: 872/466, loss: 0.2911394238471985 2023-01-22 10:10:07.915264: step: 874/466, loss: 0.8446894884109497 2023-01-22 10:10:08.522635: step: 876/466, loss: 1.073867917060852 2023-01-22 10:10:09.094356: step: 878/466, loss: 0.43695247173309326 2023-01-22 10:10:09.714872: step: 880/466, loss: 0.5594303607940674 2023-01-22 10:10:10.358799: step: 882/466, loss: 1.9501713514328003 2023-01-22 10:10:11.023760: step: 884/466, loss: 0.5615759491920471 2023-01-22 10:10:11.605638: step: 886/466, loss: 1.3543728590011597 2023-01-22 10:10:12.341900: step: 888/466, loss: 0.516370415687561 2023-01-22 10:10:12.904788: step: 890/466, loss: 1.3336546421051025 2023-01-22 10:10:13.452283: step: 892/466, loss: 1.3180499076843262 2023-01-22 10:10:14.040580: step: 894/466, loss: 0.891340434551239 2023-01-22 10:10:14.743978: step: 896/466, loss: 0.2793903648853302 2023-01-22 10:10:15.387674: step: 898/466, loss: 0.8381861448287964 2023-01-22 10:10:16.031415: step: 900/466, loss: 1.500229835510254 2023-01-22 10:10:16.697104: step: 902/466, loss: 0.8812735676765442 2023-01-22 10:10:17.317282: step: 904/466, loss: 1.176670789718628 2023-01-22 10:10:17.942777: step: 906/466, loss: 0.7041540145874023 2023-01-22 10:10:18.637063: step: 908/466, loss: 0.2905486226081848 2023-01-22 10:10:19.288574: step: 910/466, loss: 0.22905662655830383 2023-01-22 10:10:19.958692: step: 912/466, loss: 0.5883660316467285 2023-01-22 10:10:20.604424: step: 914/466, loss: 1.1307791471481323 2023-01-22 10:10:21.140784: step: 916/466, loss: 0.5354166030883789 2023-01-22 10:10:21.713573: step: 918/466, loss: 2.1675527095794678 2023-01-22 10:10:22.363440: step: 920/466, loss: 0.23495560884475708 2023-01-22 10:10:22.986918: step: 922/466, loss: 0.7527521252632141 2023-01-22 10:10:23.592687: step: 924/466, loss: 0.6191567182540894 2023-01-22 10:10:24.161802: step: 926/466, loss: 0.9814187288284302 2023-01-22 10:10:24.742021: step: 928/466, loss: 0.27741020917892456 2023-01-22 10:10:25.371140: step: 930/466, loss: 0.8087165355682373 2023-01-22 10:10:25.973300: step: 932/466, loss: 0.23670652508735657 ================================================== Loss: 0.900 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27295632870916964, 'r': 0.3184490501606979, 'f1': 0.29395296937910576}, 'combined': 0.21659692480565687, 'epoch': 5} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3325235315117566, 'r': 0.28653929206328554, 'f1': 0.3078235477411526}, 'combined': 0.20415240471952087, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2626251926040062, 'r': 0.26361810448530676, 'f1': 0.2631207118353345}, 'combined': 0.17541380789022298, 'epoch': 5} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.35095243640044055, 'r': 0.27468485065454395, 'f1': 0.30816998786401}, 'combined': 0.20112146576388018, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26420930541646576, 'r': 0.3233681252250862, 'f1': 0.29081058360686074}, 'combined': 0.21428148265768684, 'epoch': 5} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.33122811499428156, 'r': 0.2798949266098864, 'f1': 0.3034055750674977}, 'combined': 0.20122235030383265, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2271505376344086, 'r': 0.40238095238095234, 'f1': 0.2903780068728522}, 'combined': 0.1935853379152348, 'epoch': 5} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4625, 'r': 0.40217391304347827, 'f1': 0.43023255813953487}, 'combined': 0.28682170542635654, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.19642857142857142, 'r': 0.09482758620689655, 'f1': 0.12790697674418602}, 'combined': 0.08527131782945735, 'epoch': 5} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2626251926040062, 'r': 0.26361810448530676, 'f1': 0.2631207118353345}, 'combined': 0.17541380789022298, 'epoch': 5} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.35095243640044055, 'r': 0.27468485065454395, 'f1': 0.30816998786401}, 'combined': 0.20112146576388018, 'epoch': 5} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4625, 'r': 0.40217391304347827, 'f1': 0.43023255813953487}, 'combined': 0.28682170542635654, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2858411526903795, 'r': 0.3156727720413679, 'f1': 0.30001722428458233}, 'combined': 0.22106532315706065, 'epoch': 3} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32636076268334185, 'r': 0.28224266998091435, 'f1': 0.30270264048138956}, 'combined': 0.20075615534517025, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.44642857142857145, 'r': 0.21551724137931033, 'f1': 0.2906976744186046}, 'combined': 0.19379844961240306, 'epoch': 3} ****************************** Epoch: 6 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:13:04.203837: step: 2/466, loss: 0.5851011276245117 2023-01-22 10:13:04.855026: step: 4/466, loss: 0.2616092264652252 2023-01-22 10:13:05.462128: step: 6/466, loss: 0.3854227364063263 2023-01-22 10:13:06.091356: step: 8/466, loss: 0.7984930276870728 2023-01-22 10:13:06.705769: step: 10/466, loss: 0.9049591422080994 2023-01-22 10:13:07.321636: step: 12/466, loss: 1.1948599815368652 2023-01-22 10:13:07.925945: step: 14/466, loss: 1.0925015211105347 2023-01-22 10:13:08.499620: step: 16/466, loss: 0.5179755687713623 2023-01-22 10:13:09.255391: step: 18/466, loss: 0.8919814825057983 2023-01-22 10:13:09.913951: step: 20/466, loss: 0.13820907473564148 2023-01-22 10:13:10.626752: step: 22/466, loss: 0.7515466213226318 2023-01-22 10:13:11.229014: step: 24/466, loss: 1.1870259046554565 2023-01-22 10:13:11.806571: step: 26/466, loss: 0.4848284423351288 2023-01-22 10:13:12.373543: step: 28/466, loss: 0.5582742094993591 2023-01-22 10:13:12.961476: step: 30/466, loss: 0.6025078892707825 2023-01-22 10:13:13.533210: step: 32/466, loss: 0.23402145504951477 2023-01-22 10:13:14.157959: step: 34/466, loss: 1.5965683460235596 2023-01-22 10:13:14.736848: step: 36/466, loss: 0.1658952534198761 2023-01-22 10:13:15.463560: step: 38/466, loss: 0.45339614152908325 2023-01-22 10:13:16.109834: step: 40/466, loss: 0.426493376493454 2023-01-22 10:13:16.681843: step: 42/466, loss: 0.28591638803482056 2023-01-22 10:13:17.277141: step: 44/466, loss: 0.6934127807617188 2023-01-22 10:13:17.910128: step: 46/466, loss: 0.9057992696762085 2023-01-22 10:13:18.531671: step: 48/466, loss: 0.4184841513633728 2023-01-22 10:13:19.144990: step: 50/466, loss: 0.4128725826740265 2023-01-22 10:13:19.711081: step: 52/466, loss: 0.13497361540794373 2023-01-22 10:13:20.300507: step: 54/466, loss: 0.19966351985931396 2023-01-22 10:13:20.917859: step: 56/466, loss: 1.5218298435211182 2023-01-22 10:13:21.522357: step: 58/466, loss: 0.5765039324760437 2023-01-22 10:13:22.182504: step: 60/466, loss: 0.15954983234405518 2023-01-22 10:13:22.854440: step: 62/466, loss: 0.40534988045692444 2023-01-22 10:13:23.455037: step: 64/466, loss: 0.40137627720832825 2023-01-22 10:13:24.234401: step: 66/466, loss: 0.6724600195884705 2023-01-22 10:13:24.796392: step: 68/466, loss: 1.581249713897705 2023-01-22 10:13:25.477855: step: 70/466, loss: 0.7606999278068542 2023-01-22 10:13:26.095906: step: 72/466, loss: 1.073913812637329 2023-01-22 10:13:26.672969: step: 74/466, loss: 0.4068748652935028 2023-01-22 10:13:27.269633: step: 76/466, loss: 1.0465961694717407 2023-01-22 10:13:27.862054: step: 78/466, loss: 0.5474420785903931 2023-01-22 10:13:28.451571: step: 80/466, loss: 0.6576740145683289 2023-01-22 10:13:29.119345: step: 82/466, loss: 1.0797858238220215 2023-01-22 10:13:29.750240: step: 84/466, loss: 0.3516871929168701 2023-01-22 10:13:30.381360: step: 86/466, loss: 0.5890368223190308 2023-01-22 10:13:31.096135: step: 88/466, loss: 0.5695680975914001 2023-01-22 10:13:31.710367: step: 90/466, loss: 0.40643540024757385 2023-01-22 10:13:32.362472: step: 92/466, loss: 0.9107625484466553 2023-01-22 10:13:33.068177: step: 94/466, loss: 0.2521580159664154 2023-01-22 10:13:33.662163: step: 96/466, loss: 0.7301395535469055 2023-01-22 10:13:34.211396: step: 98/466, loss: 0.05241236835718155 2023-01-22 10:13:34.833763: step: 100/466, loss: 0.3641345202922821 2023-01-22 10:13:35.451949: step: 102/466, loss: 0.5393410921096802 2023-01-22 10:13:36.050851: step: 104/466, loss: 0.15966859459877014 2023-01-22 10:13:36.666668: step: 106/466, loss: 0.26376670598983765 2023-01-22 10:13:37.294248: step: 108/466, loss: 0.6145482063293457 2023-01-22 10:13:37.913797: step: 110/466, loss: 0.4013631343841553 2023-01-22 10:13:38.504297: step: 112/466, loss: 1.1954171657562256 2023-01-22 10:13:39.068246: step: 114/466, loss: 0.47752130031585693 2023-01-22 10:13:39.616731: step: 116/466, loss: 0.4595664441585541 2023-01-22 10:13:40.221916: step: 118/466, loss: 0.306684285402298 2023-01-22 10:13:40.826008: step: 120/466, loss: 0.9856728315353394 2023-01-22 10:13:41.447099: step: 122/466, loss: 0.22228127717971802 2023-01-22 10:13:42.004383: step: 124/466, loss: 0.5959650874137878 2023-01-22 10:13:42.594038: step: 126/466, loss: 0.5348347425460815 2023-01-22 10:13:43.157751: step: 128/466, loss: 0.3972974121570587 2023-01-22 10:13:43.713429: step: 130/466, loss: 0.9095874428749084 2023-01-22 10:13:44.303033: step: 132/466, loss: 0.2588285207748413 2023-01-22 10:13:44.870409: step: 134/466, loss: 0.33528074622154236 2023-01-22 10:13:45.527047: step: 136/466, loss: 0.5507739782333374 2023-01-22 10:13:46.091490: step: 138/466, loss: 0.6516622304916382 2023-01-22 10:13:46.674262: step: 140/466, loss: 0.16563507914543152 2023-01-22 10:13:47.259002: step: 142/466, loss: 0.23387084901332855 2023-01-22 10:13:47.876311: step: 144/466, loss: 1.556229829788208 2023-01-22 10:13:48.489498: step: 146/466, loss: 0.5888187885284424 2023-01-22 10:13:49.153347: step: 148/466, loss: 0.7805293798446655 2023-01-22 10:13:49.740682: step: 150/466, loss: 0.41787880659103394 2023-01-22 10:13:50.297163: step: 152/466, loss: 2.2686965465545654 2023-01-22 10:13:50.880154: step: 154/466, loss: 0.3498953580856323 2023-01-22 10:13:51.413850: step: 156/466, loss: 0.6985567212104797 2023-01-22 10:13:52.037458: step: 158/466, loss: 0.9538766741752625 2023-01-22 10:13:52.616305: step: 160/466, loss: 0.19783622026443481 2023-01-22 10:13:53.269589: step: 162/466, loss: 0.3048805892467499 2023-01-22 10:13:53.865139: step: 164/466, loss: 0.9341416358947754 2023-01-22 10:13:54.418956: step: 166/466, loss: 0.19876980781555176 2023-01-22 10:13:55.029945: step: 168/466, loss: 0.32706427574157715 2023-01-22 10:13:55.755753: step: 170/466, loss: 0.6943373680114746 2023-01-22 10:13:56.362344: step: 172/466, loss: 0.7037789225578308 2023-01-22 10:13:56.968622: step: 174/466, loss: 0.9194547533988953 2023-01-22 10:13:57.661729: step: 176/466, loss: 0.30153122544288635 2023-01-22 10:13:58.291121: step: 178/466, loss: 0.5422283411026001 2023-01-22 10:13:58.985229: step: 180/466, loss: 0.7232992053031921 2023-01-22 10:13:59.627647: step: 182/466, loss: 0.6932333707809448 2023-01-22 10:14:00.211222: step: 184/466, loss: 2.055283546447754 2023-01-22 10:14:00.820905: step: 186/466, loss: 1.341884732246399 2023-01-22 10:14:01.455070: step: 188/466, loss: 0.784245491027832 2023-01-22 10:14:02.245158: step: 190/466, loss: 0.17669035494327545 2023-01-22 10:14:02.854961: step: 192/466, loss: 1.3504656553268433 2023-01-22 10:14:03.448309: step: 194/466, loss: 1.777635931968689 2023-01-22 10:14:04.155349: step: 196/466, loss: 0.5566887855529785 2023-01-22 10:14:04.695150: step: 198/466, loss: 0.2367226481437683 2023-01-22 10:14:05.346263: step: 200/466, loss: 0.521005392074585 2023-01-22 10:14:05.947596: step: 202/466, loss: 0.33490070700645447 2023-01-22 10:14:06.487233: step: 204/466, loss: 0.7058187127113342 2023-01-22 10:14:07.113791: step: 206/466, loss: 2.002868175506592 2023-01-22 10:14:07.713370: step: 208/466, loss: 0.5000832676887512 2023-01-22 10:14:08.265203: step: 210/466, loss: 1.2851827144622803 2023-01-22 10:14:08.797695: step: 212/466, loss: 0.3409268856048584 2023-01-22 10:14:09.446061: step: 214/466, loss: 0.396659791469574 2023-01-22 10:14:10.046304: step: 216/466, loss: 1.1128814220428467 2023-01-22 10:14:10.635705: step: 218/466, loss: 0.7367991805076599 2023-01-22 10:14:11.269166: step: 220/466, loss: 0.31557464599609375 2023-01-22 10:14:11.839035: step: 222/466, loss: 0.49025699496269226 2023-01-22 10:14:12.399121: step: 224/466, loss: 0.6997539401054382 2023-01-22 10:14:13.027635: step: 226/466, loss: 0.20695850253105164 2023-01-22 10:14:13.636545: step: 228/466, loss: 0.6655076742172241 2023-01-22 10:14:14.274729: step: 230/466, loss: 0.8669025301933289 2023-01-22 10:14:14.896726: step: 232/466, loss: 0.3725025951862335 2023-01-22 10:14:15.435556: step: 234/466, loss: 0.11006996780633926 2023-01-22 10:14:15.992988: step: 236/466, loss: 0.3412753939628601 2023-01-22 10:14:16.667698: step: 238/466, loss: 1.3536577224731445 2023-01-22 10:14:17.228313: step: 240/466, loss: 0.16854868829250336 2023-01-22 10:14:17.845686: step: 242/466, loss: 0.6572349667549133 2023-01-22 10:14:18.461562: step: 244/466, loss: 0.45324134826660156 2023-01-22 10:14:19.110168: step: 246/466, loss: 0.3042180836200714 2023-01-22 10:14:19.730981: step: 248/466, loss: 0.505139172077179 2023-01-22 10:14:20.361506: step: 250/466, loss: 1.6602253913879395 2023-01-22 10:14:20.951531: step: 252/466, loss: 0.3393111228942871 2023-01-22 10:14:21.617837: step: 254/466, loss: 1.0471343994140625 2023-01-22 10:14:22.201023: step: 256/466, loss: 0.30317452549934387 2023-01-22 10:14:22.764981: step: 258/466, loss: 0.6668992042541504 2023-01-22 10:14:23.355004: step: 260/466, loss: 1.2988413572311401 2023-01-22 10:14:23.970993: step: 262/466, loss: 0.2204177975654602 2023-01-22 10:14:24.545899: step: 264/466, loss: 0.7073379158973694 2023-01-22 10:14:25.137729: step: 266/466, loss: 0.2726287543773651 2023-01-22 10:14:25.742489: step: 268/466, loss: 1.0368794202804565 2023-01-22 10:14:26.373269: step: 270/466, loss: 0.19614382088184357 2023-01-22 10:14:27.004590: step: 272/466, loss: 0.4229815900325775 2023-01-22 10:14:27.639230: step: 274/466, loss: 0.25105732679367065 2023-01-22 10:14:28.251705: step: 276/466, loss: 1.1729516983032227 2023-01-22 10:14:28.879949: step: 278/466, loss: 1.3843333721160889 2023-01-22 10:14:29.569820: step: 280/466, loss: 0.3619017004966736 2023-01-22 10:14:30.141237: step: 282/466, loss: 0.12895332276821136 2023-01-22 10:14:30.791016: step: 284/466, loss: 1.8011032342910767 2023-01-22 10:14:31.368368: step: 286/466, loss: 1.437566876411438 2023-01-22 10:14:31.996629: step: 288/466, loss: 0.6030497550964355 2023-01-22 10:14:32.703715: step: 290/466, loss: 0.36808836460113525 2023-01-22 10:14:33.232192: step: 292/466, loss: 0.3335151970386505 2023-01-22 10:14:33.829169: step: 294/466, loss: 0.611156165599823 2023-01-22 10:14:34.457297: step: 296/466, loss: 1.2586387395858765 2023-01-22 10:14:35.092576: step: 298/466, loss: 0.2738572955131531 2023-01-22 10:14:35.687274: step: 300/466, loss: 0.6908833980560303 2023-01-22 10:14:36.324477: step: 302/466, loss: 0.679551362991333 2023-01-22 10:14:36.922979: step: 304/466, loss: 3.43265962600708 2023-01-22 10:14:37.565482: step: 306/466, loss: 0.3721560835838318 2023-01-22 10:14:38.205884: step: 308/466, loss: 1.1952612400054932 2023-01-22 10:14:38.867114: step: 310/466, loss: 0.8409422636032104 2023-01-22 10:14:39.487772: step: 312/466, loss: 0.679412305355072 2023-01-22 10:14:40.076254: step: 314/466, loss: 0.30780380964279175 2023-01-22 10:14:40.649743: step: 316/466, loss: 0.2822536528110504 2023-01-22 10:14:41.264119: step: 318/466, loss: 1.0847021341323853 2023-01-22 10:14:41.841130: step: 320/466, loss: 0.2368328869342804 2023-01-22 10:14:42.457833: step: 322/466, loss: 0.3256698548793793 2023-01-22 10:14:43.059855: step: 324/466, loss: 0.5243033766746521 2023-01-22 10:14:43.683672: step: 326/466, loss: 0.7073649168014526 2023-01-22 10:14:44.264875: step: 328/466, loss: 1.7504061460494995 2023-01-22 10:14:44.907597: step: 330/466, loss: 0.5639289617538452 2023-01-22 10:14:45.563696: step: 332/466, loss: 0.3306322693824768 2023-01-22 10:14:46.259510: step: 334/466, loss: 0.9812665581703186 2023-01-22 10:14:46.905456: step: 336/466, loss: 0.4654000401496887 2023-01-22 10:14:47.492839: step: 338/466, loss: 0.8619095683097839 2023-01-22 10:14:48.170449: step: 340/466, loss: 0.9946466684341431 2023-01-22 10:14:48.791301: step: 342/466, loss: 0.976168155670166 2023-01-22 10:14:49.373521: step: 344/466, loss: 0.26822784543037415 2023-01-22 10:14:49.969467: step: 346/466, loss: 0.49369752407073975 2023-01-22 10:14:50.576658: step: 348/466, loss: 0.46656692028045654 2023-01-22 10:14:51.154762: step: 350/466, loss: 0.21348896622657776 2023-01-22 10:14:51.793471: step: 352/466, loss: 0.9902009963989258 2023-01-22 10:14:52.389096: step: 354/466, loss: 0.3983117938041687 2023-01-22 10:14:52.987429: step: 356/466, loss: 0.5899996161460876 2023-01-22 10:14:53.652282: step: 358/466, loss: 0.2520958483219147 2023-01-22 10:14:54.280554: step: 360/466, loss: 0.7416293025016785 2023-01-22 10:14:54.842920: step: 362/466, loss: 0.40580204129219055 2023-01-22 10:14:55.418016: step: 364/466, loss: 0.4914165139198303 2023-01-22 10:14:56.085701: step: 366/466, loss: 1.9097177982330322 2023-01-22 10:14:56.664046: step: 368/466, loss: 0.5574102997779846 2023-01-22 10:14:57.255137: step: 370/466, loss: 0.2993851602077484 2023-01-22 10:14:57.909606: step: 372/466, loss: 0.2144942283630371 2023-01-22 10:14:58.569564: step: 374/466, loss: 0.3030288815498352 2023-01-22 10:14:59.155508: step: 376/466, loss: 0.6745043992996216 2023-01-22 10:14:59.758759: step: 378/466, loss: 0.5604280233383179 2023-01-22 10:15:00.334751: step: 380/466, loss: 0.49297454953193665 2023-01-22 10:15:00.979375: step: 382/466, loss: 0.7742137312889099 2023-01-22 10:15:01.606101: step: 384/466, loss: 1.0356718301773071 2023-01-22 10:15:02.250865: step: 386/466, loss: 0.24659501016139984 2023-01-22 10:15:02.876357: step: 388/466, loss: 0.9688482284545898 2023-01-22 10:15:03.528482: step: 390/466, loss: 0.47970694303512573 2023-01-22 10:15:04.150579: step: 392/466, loss: 0.6070094704627991 2023-01-22 10:15:04.847827: step: 394/466, loss: 0.27566829323768616 2023-01-22 10:15:05.424953: step: 396/466, loss: 1.622042179107666 2023-01-22 10:15:06.042040: step: 398/466, loss: 1.9418872594833374 2023-01-22 10:15:06.650832: step: 400/466, loss: 0.389572411775589 2023-01-22 10:15:07.260792: step: 402/466, loss: 2.1840286254882812 2023-01-22 10:15:07.857560: step: 404/466, loss: 0.16141338646411896 2023-01-22 10:15:08.465197: step: 406/466, loss: 0.3883560299873352 2023-01-22 10:15:09.014487: step: 408/466, loss: 0.753023624420166 2023-01-22 10:15:09.592913: step: 410/466, loss: 0.09870093315839767 2023-01-22 10:15:10.136719: step: 412/466, loss: 0.6118962168693542 2023-01-22 10:15:10.712841: step: 414/466, loss: 0.6355909705162048 2023-01-22 10:15:11.331251: step: 416/466, loss: 1.0369325876235962 2023-01-22 10:15:11.922733: step: 418/466, loss: 0.5004268884658813 2023-01-22 10:15:12.581412: step: 420/466, loss: 0.41801849007606506 2023-01-22 10:15:13.165122: step: 422/466, loss: 1.4625771045684814 2023-01-22 10:15:13.821186: step: 424/466, loss: 1.1229809522628784 2023-01-22 10:15:14.366802: step: 426/466, loss: 0.8910166025161743 2023-01-22 10:15:14.971903: step: 428/466, loss: 0.22723639011383057 2023-01-22 10:15:15.599710: step: 430/466, loss: 1.198258638381958 2023-01-22 10:15:16.234844: step: 432/466, loss: 1.2684011459350586 2023-01-22 10:15:16.871255: step: 434/466, loss: 0.47714242339134216 2023-01-22 10:15:17.497411: step: 436/466, loss: 0.28571125864982605 2023-01-22 10:15:18.119205: step: 438/466, loss: 1.4924527406692505 2023-01-22 10:15:18.774790: step: 440/466, loss: 3.09102463722229 2023-01-22 10:15:19.391693: step: 442/466, loss: 2.5959880352020264 2023-01-22 10:15:20.032446: step: 444/466, loss: 0.4722447693347931 2023-01-22 10:15:20.654232: step: 446/466, loss: 0.25088632106781006 2023-01-22 10:15:21.228551: step: 448/466, loss: 0.6037149429321289 2023-01-22 10:15:21.854739: step: 450/466, loss: 0.531111478805542 2023-01-22 10:15:22.484587: step: 452/466, loss: 0.4763965606689453 2023-01-22 10:15:23.133956: step: 454/466, loss: 0.38432395458221436 2023-01-22 10:15:23.723660: step: 456/466, loss: 0.7968324422836304 2023-01-22 10:15:24.359389: step: 458/466, loss: 2.104541778564453 2023-01-22 10:15:25.026617: step: 460/466, loss: 0.8049629330635071 2023-01-22 10:15:25.680890: step: 462/466, loss: 0.14413891732692719 2023-01-22 10:15:26.247319: step: 464/466, loss: 1.6799324750900269 2023-01-22 10:15:26.867994: step: 466/466, loss: 1.1222249269485474 2023-01-22 10:15:27.492555: step: 468/466, loss: 0.7136576771736145 2023-01-22 10:15:28.123889: step: 470/466, loss: 0.4166320264339447 2023-01-22 10:15:28.659899: step: 472/466, loss: 1.827630639076233 2023-01-22 10:15:29.232224: step: 474/466, loss: 0.5749404430389404 2023-01-22 10:15:29.858777: step: 476/466, loss: 0.9107766151428223 2023-01-22 10:15:30.434915: step: 478/466, loss: 1.3580448627471924 2023-01-22 10:15:31.060409: step: 480/466, loss: 0.20735961198806763 2023-01-22 10:15:31.640243: step: 482/466, loss: 0.33012843132019043 2023-01-22 10:15:32.305670: step: 484/466, loss: 0.29290837049484253 2023-01-22 10:15:32.954189: step: 486/466, loss: 0.10790805518627167 2023-01-22 10:15:33.560640: step: 488/466, loss: 1.6741561889648438 2023-01-22 10:15:34.171501: step: 490/466, loss: 2.871872663497925 2023-01-22 10:15:34.738506: step: 492/466, loss: 0.17955073714256287 2023-01-22 10:15:35.346393: step: 494/466, loss: 0.313848078250885 2023-01-22 10:15:35.975507: step: 496/466, loss: 0.4093271791934967 2023-01-22 10:15:36.675429: step: 498/466, loss: 0.2761945426464081 2023-01-22 10:15:37.348309: step: 500/466, loss: 0.9821873903274536 2023-01-22 10:15:38.009146: step: 502/466, loss: 0.5871090888977051 2023-01-22 10:15:38.591755: step: 504/466, loss: 0.8444579839706421 2023-01-22 10:15:39.154696: step: 506/466, loss: 1.1011338233947754 2023-01-22 10:15:39.795356: step: 508/466, loss: 0.659293532371521 2023-01-22 10:15:40.327926: step: 510/466, loss: 0.5439980030059814 2023-01-22 10:15:40.965760: step: 512/466, loss: 0.9268401861190796 2023-01-22 10:15:41.558344: step: 514/466, loss: 0.9230048656463623 2023-01-22 10:15:42.141651: step: 516/466, loss: 0.3950723707675934 2023-01-22 10:15:42.690807: step: 518/466, loss: 3.7305030822753906 2023-01-22 10:15:43.319646: step: 520/466, loss: 1.420408844947815 2023-01-22 10:15:43.953114: step: 522/466, loss: 0.12743352353572845 2023-01-22 10:15:44.529854: step: 524/466, loss: 0.30235588550567627 2023-01-22 10:15:45.149053: step: 526/466, loss: 0.5124424695968628 2023-01-22 10:15:45.729251: step: 528/466, loss: 1.0410754680633545 2023-01-22 10:15:46.369680: step: 530/466, loss: 1.2636866569519043 2023-01-22 10:15:47.003944: step: 532/466, loss: 0.19068653881549835 2023-01-22 10:15:47.625555: step: 534/466, loss: 1.2769172191619873 2023-01-22 10:15:48.267607: step: 536/466, loss: 1.0977855920791626 2023-01-22 10:15:48.876921: step: 538/466, loss: 0.31804510951042175 2023-01-22 10:15:49.538608: step: 540/466, loss: 1.8412132263183594 2023-01-22 10:15:50.125630: step: 542/466, loss: 0.6204238533973694 2023-01-22 10:15:50.668617: step: 544/466, loss: 1.0468567609786987 2023-01-22 10:15:51.200576: step: 546/466, loss: 0.43631964921951294 2023-01-22 10:15:51.848245: step: 548/466, loss: 0.2319594770669937 2023-01-22 10:15:52.522839: step: 550/466, loss: 1.136697769165039 2023-01-22 10:15:53.114061: step: 552/466, loss: 1.1643078327178955 2023-01-22 10:15:53.729199: step: 554/466, loss: 0.256322979927063 2023-01-22 10:15:54.294563: step: 556/466, loss: 0.3079525828361511 2023-01-22 10:15:54.902038: step: 558/466, loss: 0.3707126975059509 2023-01-22 10:15:55.485012: step: 560/466, loss: 0.6252129077911377 2023-01-22 10:15:56.069942: step: 562/466, loss: 3.6634650230407715 2023-01-22 10:15:56.673260: step: 564/466, loss: 0.16154566407203674 2023-01-22 10:15:57.276330: step: 566/466, loss: 0.3450652062892914 2023-01-22 10:15:57.879147: step: 568/466, loss: 1.1304841041564941 2023-01-22 10:15:58.516782: step: 570/466, loss: 0.6155608892440796 2023-01-22 10:15:59.105448: step: 572/466, loss: 0.22098317742347717 2023-01-22 10:15:59.714368: step: 574/466, loss: 0.6425347924232483 2023-01-22 10:16:00.311237: step: 576/466, loss: 0.8756067156791687 2023-01-22 10:16:00.923734: step: 578/466, loss: 0.604966938495636 2023-01-22 10:16:01.570616: step: 580/466, loss: 0.39261394739151 2023-01-22 10:16:02.207012: step: 582/466, loss: 0.5821620225906372 2023-01-22 10:16:02.789827: step: 584/466, loss: 0.46261516213417053 2023-01-22 10:16:03.398535: step: 586/466, loss: 0.4566130042076111 2023-01-22 10:16:04.033253: step: 588/466, loss: 0.9882059097290039 2023-01-22 10:16:04.689253: step: 590/466, loss: 0.4548759460449219 2023-01-22 10:16:05.305576: step: 592/466, loss: 0.44460439682006836 2023-01-22 10:16:06.035834: step: 594/466, loss: 0.1863112598657608 2023-01-22 10:16:06.658662: step: 596/466, loss: 0.49223336577415466 2023-01-22 10:16:07.226416: step: 598/466, loss: 0.948884129524231 2023-01-22 10:16:07.884638: step: 600/466, loss: 0.27376243472099304 2023-01-22 10:16:08.491782: step: 602/466, loss: 1.7389003038406372 2023-01-22 10:16:09.077700: step: 604/466, loss: 0.8352102041244507 2023-01-22 10:16:09.681995: step: 606/466, loss: 0.22336582839488983 2023-01-22 10:16:10.404541: step: 608/466, loss: 2.5565452575683594 2023-01-22 10:16:11.083124: step: 610/466, loss: 0.2957126796245575 2023-01-22 10:16:11.683286: step: 612/466, loss: 0.30108410120010376 2023-01-22 10:16:12.267572: step: 614/466, loss: 1.1393479108810425 2023-01-22 10:16:12.891593: step: 616/466, loss: 1.0807772874832153 2023-01-22 10:16:13.522560: step: 618/466, loss: 2.6531262397766113 2023-01-22 10:16:14.115339: step: 620/466, loss: 0.8356918096542358 2023-01-22 10:16:14.746676: step: 622/466, loss: 0.18988583981990814 2023-01-22 10:16:15.293171: step: 624/466, loss: 0.49750959873199463 2023-01-22 10:16:15.925132: step: 626/466, loss: 0.16339921951293945 2023-01-22 10:16:16.460026: step: 628/466, loss: 0.39217299222946167 2023-01-22 10:16:17.122039: step: 630/466, loss: 0.6528757810592651 2023-01-22 10:16:17.724934: step: 632/466, loss: 0.5883387923240662 2023-01-22 10:16:18.339968: step: 634/466, loss: 1.029714584350586 2023-01-22 10:16:18.920992: step: 636/466, loss: 0.3076227009296417 2023-01-22 10:16:19.508251: step: 638/466, loss: 0.39846861362457275 2023-01-22 10:16:20.148830: step: 640/466, loss: 0.7532325983047485 2023-01-22 10:16:20.716060: step: 642/466, loss: 0.3744526505470276 2023-01-22 10:16:21.318886: step: 644/466, loss: 0.7421638369560242 2023-01-22 10:16:21.961087: step: 646/466, loss: 0.5743134021759033 2023-01-22 10:16:22.507711: step: 648/466, loss: 0.40305227041244507 2023-01-22 10:16:23.112729: step: 650/466, loss: 0.5777412056922913 2023-01-22 10:16:23.768988: step: 652/466, loss: 1.007772445678711 2023-01-22 10:16:24.370337: step: 654/466, loss: 0.6450772881507874 2023-01-22 10:16:24.978017: step: 656/466, loss: 0.4225735664367676 2023-01-22 10:16:25.568846: step: 658/466, loss: 1.1234859228134155 2023-01-22 10:16:26.222065: step: 660/466, loss: 0.17602777481079102 2023-01-22 10:16:26.841273: step: 662/466, loss: 0.2158088982105255 2023-01-22 10:16:27.424124: step: 664/466, loss: 0.22115278244018555 2023-01-22 10:16:28.007279: step: 666/466, loss: 0.45712563395500183 2023-01-22 10:16:28.620290: step: 668/466, loss: 0.2096031755208969 2023-01-22 10:16:29.199491: step: 670/466, loss: 0.339317262172699 2023-01-22 10:16:29.781199: step: 672/466, loss: 0.2058376669883728 2023-01-22 10:16:30.331779: step: 674/466, loss: 0.09214982390403748 2023-01-22 10:16:30.986167: step: 676/466, loss: 1.1575394868850708 2023-01-22 10:16:31.637243: step: 678/466, loss: 0.8664871454238892 2023-01-22 10:16:32.297530: step: 680/466, loss: 1.3440394401550293 2023-01-22 10:16:32.839757: step: 682/466, loss: 0.9377317428588867 2023-01-22 10:16:33.476578: step: 684/466, loss: 0.37596309185028076 2023-01-22 10:16:34.089296: step: 686/466, loss: 0.8694080114364624 2023-01-22 10:16:34.696220: step: 688/466, loss: 0.4781648516654968 2023-01-22 10:16:35.304360: step: 690/466, loss: 0.9240776896476746 2023-01-22 10:16:35.954286: step: 692/466, loss: 0.46945273876190186 2023-01-22 10:16:36.515349: step: 694/466, loss: 0.32516801357269287 2023-01-22 10:16:37.132410: step: 696/466, loss: 0.5208209753036499 2023-01-22 10:16:37.726948: step: 698/466, loss: 0.23446114361286163 2023-01-22 10:16:38.341927: step: 700/466, loss: 0.661338210105896 2023-01-22 10:16:38.927641: step: 702/466, loss: 0.2834647595882416 2023-01-22 10:16:39.529654: step: 704/466, loss: 0.679925799369812 2023-01-22 10:16:40.091369: step: 706/466, loss: 0.9928429126739502 2023-01-22 10:16:40.652426: step: 708/466, loss: 0.15573279559612274 2023-01-22 10:16:41.276138: step: 710/466, loss: 0.8083428740501404 2023-01-22 10:16:41.861159: step: 712/466, loss: 1.1022380590438843 2023-01-22 10:16:42.534335: step: 714/466, loss: 2.9695441722869873 2023-01-22 10:16:43.157652: step: 716/466, loss: 0.32324570417404175 2023-01-22 10:16:43.794944: step: 718/466, loss: 0.26076310873031616 2023-01-22 10:16:44.426063: step: 720/466, loss: 0.4587477147579193 2023-01-22 10:16:44.980519: step: 722/466, loss: 1.2385427951812744 2023-01-22 10:16:45.612401: step: 724/466, loss: 0.2180221974849701 2023-01-22 10:16:46.229859: step: 726/466, loss: 0.4084261655807495 2023-01-22 10:16:46.875832: step: 728/466, loss: 1.120225429534912 2023-01-22 10:16:47.465092: step: 730/466, loss: 0.4246133863925934 2023-01-22 10:16:48.121978: step: 732/466, loss: 0.336835652589798 2023-01-22 10:16:48.696042: step: 734/466, loss: 0.4902934432029724 2023-01-22 10:16:49.243990: step: 736/466, loss: 1.239622950553894 2023-01-22 10:16:49.871771: step: 738/466, loss: 0.3462580442428589 2023-01-22 10:16:50.475840: step: 740/466, loss: 0.4404885768890381 2023-01-22 10:16:51.098623: step: 742/466, loss: 0.22442424297332764 2023-01-22 10:16:51.675922: step: 744/466, loss: 7.0663228034973145 2023-01-22 10:16:52.262149: step: 746/466, loss: 0.3398008942604065 2023-01-22 10:16:52.841760: step: 748/466, loss: 0.8893722295761108 2023-01-22 10:16:53.494049: step: 750/466, loss: 0.17342069745063782 2023-01-22 10:16:54.134111: step: 752/466, loss: 0.8895680904388428 2023-01-22 10:16:54.711873: step: 754/466, loss: 0.7023111581802368 2023-01-22 10:16:55.285554: step: 756/466, loss: 0.26367777585983276 2023-01-22 10:16:55.820356: step: 758/466, loss: 0.9022852182388306 2023-01-22 10:16:56.492322: step: 760/466, loss: 0.9276801943778992 2023-01-22 10:16:57.095438: step: 762/466, loss: 0.7745717167854309 2023-01-22 10:16:57.736416: step: 764/466, loss: 0.598774790763855 2023-01-22 10:16:58.287975: step: 766/466, loss: 0.44912904500961304 2023-01-22 10:16:58.966289: step: 768/466, loss: 0.24844907224178314 2023-01-22 10:16:59.613472: step: 770/466, loss: 0.272910475730896 2023-01-22 10:17:00.167099: step: 772/466, loss: 0.25028693675994873 2023-01-22 10:17:00.747198: step: 774/466, loss: 0.5121179223060608 2023-01-22 10:17:01.287393: step: 776/466, loss: 1.4639263153076172 2023-01-22 10:17:01.982010: step: 778/466, loss: 0.681140661239624 2023-01-22 10:17:02.659658: step: 780/466, loss: 0.8039424419403076 2023-01-22 10:17:03.306871: step: 782/466, loss: 0.35990825295448303 2023-01-22 10:17:03.863112: step: 784/466, loss: 0.34313514828681946 2023-01-22 10:17:04.418937: step: 786/466, loss: 0.5792441368103027 2023-01-22 10:17:05.210844: step: 788/466, loss: 0.6026006937026978 2023-01-22 10:17:05.796309: step: 790/466, loss: 7.832659721374512 2023-01-22 10:17:06.507208: step: 792/466, loss: 1.1409316062927246 2023-01-22 10:17:07.063011: step: 794/466, loss: 0.5021165013313293 2023-01-22 10:17:07.707126: step: 796/466, loss: 1.1863479614257812 2023-01-22 10:17:08.380018: step: 798/466, loss: 1.0290889739990234 2023-01-22 10:17:09.035207: step: 800/466, loss: 0.2763671875 2023-01-22 10:17:09.641975: step: 802/466, loss: 1.0786328315734863 2023-01-22 10:17:10.298823: step: 804/466, loss: 0.641802966594696 2023-01-22 10:17:10.902340: step: 806/466, loss: 0.19582396745681763 2023-01-22 10:17:11.543216: step: 808/466, loss: 0.8715490698814392 2023-01-22 10:17:12.053599: step: 810/466, loss: 0.2146126627922058 2023-01-22 10:17:12.719576: step: 812/466, loss: 0.2053159922361374 2023-01-22 10:17:13.319056: step: 814/466, loss: 3.9195573329925537 2023-01-22 10:17:14.023712: step: 816/466, loss: 0.8048123121261597 2023-01-22 10:17:14.584509: step: 818/466, loss: 2.6136231422424316 2023-01-22 10:17:15.196795: step: 820/466, loss: 0.85329270362854 2023-01-22 10:17:15.807066: step: 822/466, loss: 0.8017750978469849 2023-01-22 10:17:16.388383: step: 824/466, loss: 0.26032787561416626 2023-01-22 10:17:16.992517: step: 826/466, loss: 1.9249786138534546 2023-01-22 10:17:17.591072: step: 828/466, loss: 0.6099420189857483 2023-01-22 10:17:18.236074: step: 830/466, loss: 0.21212445199489594 2023-01-22 10:17:18.840101: step: 832/466, loss: 0.5787874460220337 2023-01-22 10:17:19.444890: step: 834/466, loss: 0.5170060992240906 2023-01-22 10:17:20.086239: step: 836/466, loss: 0.4283927083015442 2023-01-22 10:17:20.676684: step: 838/466, loss: 0.8669469356536865 2023-01-22 10:17:21.315172: step: 840/466, loss: 1.6597884893417358 2023-01-22 10:17:21.924133: step: 842/466, loss: 1.3498128652572632 2023-01-22 10:17:22.560476: step: 844/466, loss: 1.7638030052185059 2023-01-22 10:17:23.194515: step: 846/466, loss: 0.43602848052978516 2023-01-22 10:17:23.880919: step: 848/466, loss: 0.6229745149612427 2023-01-22 10:17:24.464737: step: 850/466, loss: 0.6166430115699768 2023-01-22 10:17:25.084386: step: 852/466, loss: 0.5322221517562866 2023-01-22 10:17:25.632976: step: 854/466, loss: 0.3037514388561249 2023-01-22 10:17:26.269950: step: 856/466, loss: 0.4232807755470276 2023-01-22 10:17:26.864378: step: 858/466, loss: 0.23372140526771545 2023-01-22 10:17:27.461527: step: 860/466, loss: 1.3362098932266235 2023-01-22 10:17:28.047506: step: 862/466, loss: 0.38267797231674194 2023-01-22 10:17:28.668275: step: 864/466, loss: 0.8275524377822876 2023-01-22 10:17:29.286190: step: 866/466, loss: 1.5383274555206299 2023-01-22 10:17:30.001403: step: 868/466, loss: 0.17373280227184296 2023-01-22 10:17:30.574303: step: 870/466, loss: 0.28224244713783264 2023-01-22 10:17:31.144230: step: 872/466, loss: 0.4439685344696045 2023-01-22 10:17:31.795846: step: 874/466, loss: 0.5664662718772888 2023-01-22 10:17:32.365064: step: 876/466, loss: 1.4287652969360352 2023-01-22 10:17:33.048055: step: 878/466, loss: 1.4793837070465088 2023-01-22 10:17:33.660078: step: 880/466, loss: 1.6305980682373047 2023-01-22 10:17:34.287059: step: 882/466, loss: 2.0438435077667236 2023-01-22 10:17:34.904370: step: 884/466, loss: 0.4367019534111023 2023-01-22 10:17:35.558003: step: 886/466, loss: 6.0026535987854 2023-01-22 10:17:36.135051: step: 888/466, loss: 0.6394232511520386 2023-01-22 10:17:36.728757: step: 890/466, loss: 0.775500476360321 2023-01-22 10:17:37.411473: step: 892/466, loss: 0.55797278881073 2023-01-22 10:17:38.028863: step: 894/466, loss: 0.5209435820579529 2023-01-22 10:17:38.641220: step: 896/466, loss: 3.8680386543273926 2023-01-22 10:17:39.224782: step: 898/466, loss: 0.9209475517272949 2023-01-22 10:17:39.830770: step: 900/466, loss: 0.7050312757492065 2023-01-22 10:17:40.421815: step: 902/466, loss: 0.5102740526199341 2023-01-22 10:17:41.051430: step: 904/466, loss: 0.529159665107727 2023-01-22 10:17:41.659221: step: 906/466, loss: 0.5836001038551331 2023-01-22 10:17:42.272622: step: 908/466, loss: 0.8525623679161072 2023-01-22 10:17:42.856525: step: 910/466, loss: 10.857308387756348 2023-01-22 10:17:43.479166: step: 912/466, loss: 0.52732914686203 2023-01-22 10:17:44.062307: step: 914/466, loss: 0.9026237726211548 2023-01-22 10:17:44.639066: step: 916/466, loss: 1.7058541774749756 2023-01-22 10:17:45.204018: step: 918/466, loss: 1.7477123737335205 2023-01-22 10:17:45.830802: step: 920/466, loss: 0.24393534660339355 2023-01-22 10:17:46.491206: step: 922/466, loss: 0.695246696472168 2023-01-22 10:17:47.133803: step: 924/466, loss: 0.23524464666843414 2023-01-22 10:17:47.783882: step: 926/466, loss: 0.31187599897384644 2023-01-22 10:17:48.441699: step: 928/466, loss: 0.3860665261745453 2023-01-22 10:17:49.022143: step: 930/466, loss: 0.6614386439323425 2023-01-22 10:17:49.734261: step: 932/466, loss: 2.147660255432129 ================================================== Loss: 0.800 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3041802176182446, 'r': 0.3422748938284991, 'f1': 0.32210512329931973}, 'combined': 0.2373406171679198, 'epoch': 6} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.36356442132171524, 'r': 0.27603386416773396, 'f1': 0.31380975955725765}, 'combined': 0.20812253483590143, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2905397067363531, 'r': 0.2707301812770563, 'f1': 0.2802853641456583}, 'combined': 0.18685690943043887, 'epoch': 6} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.39384742421285285, 'r': 0.2601903310691285, 'f1': 0.3133620066091889}, 'combined': 0.2045099411554706, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2808417739221707, 'r': 0.331467900151025, 'f1': 0.304061937997546}, 'combined': 0.22404563852450757, 'epoch': 6} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.35675049365771727, 'r': 0.26856152580262266, 'f1': 0.3064372790730663}, 'combined': 0.20323301410027192, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2685185185185185, 'r': 0.3452380952380952, 'f1': 0.30208333333333326}, 'combined': 0.20138888888888884, 'epoch': 6} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4305555555555556, 'r': 0.33695652173913043, 'f1': 0.37804878048780494}, 'combined': 0.2520325203252033, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27941176470588236, 'r': 0.16379310344827586, 'f1': 0.20652173913043476}, 'combined': 0.13768115942028983, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2626251926040062, 'r': 0.26361810448530676, 'f1': 0.2631207118353345}, 'combined': 0.17541380789022298, 'epoch': 5} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.35095243640044055, 'r': 0.27468485065454395, 'f1': 0.30816998786401}, 'combined': 0.20112146576388018, 'epoch': 5} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4625, 'r': 0.40217391304347827, 'f1': 0.43023255813953487}, 'combined': 0.28682170542635654, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2858411526903795, 'r': 0.3156727720413679, 'f1': 0.30001722428458233}, 'combined': 0.22106532315706065, 'epoch': 3} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32636076268334185, 'r': 0.28224266998091435, 'f1': 0.30270264048138956}, 'combined': 0.20075615534517025, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.44642857142857145, 'r': 0.21551724137931033, 'f1': 0.2906976744186046}, 'combined': 0.19379844961240306, 'epoch': 3} ****************************** Epoch: 7 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:20:14.150532: step: 2/466, loss: 0.5861604809761047 2023-01-22 10:20:14.765334: step: 4/466, loss: 0.4408992528915405 2023-01-22 10:20:15.359071: step: 6/466, loss: 0.28753623366355896 2023-01-22 10:20:15.996362: step: 8/466, loss: 0.32058003544807434 2023-01-22 10:20:16.614335: step: 10/466, loss: 0.3608631491661072 2023-01-22 10:20:17.179554: step: 12/466, loss: 0.8042821288108826 2023-01-22 10:20:17.755783: step: 14/466, loss: 0.22324897348880768 2023-01-22 10:20:18.369178: step: 16/466, loss: 0.26565486192703247 2023-01-22 10:20:18.978818: step: 18/466, loss: 0.21577036380767822 2023-01-22 10:20:19.631372: step: 20/466, loss: 0.7381148338317871 2023-01-22 10:20:20.223647: step: 22/466, loss: 0.22094513475894928 2023-01-22 10:20:20.887865: step: 24/466, loss: 0.5388932228088379 2023-01-22 10:20:21.551986: step: 26/466, loss: 0.2958400845527649 2023-01-22 10:20:22.214464: step: 28/466, loss: 2.005858898162842 2023-01-22 10:20:22.862408: step: 30/466, loss: 0.5006194114685059 2023-01-22 10:20:23.499633: step: 32/466, loss: 0.5860127210617065 2023-01-22 10:20:24.104810: step: 34/466, loss: 0.5371690392494202 2023-01-22 10:20:24.682648: step: 36/466, loss: 0.2739104628562927 2023-01-22 10:20:25.257484: step: 38/466, loss: 0.2741168737411499 2023-01-22 10:20:25.963468: step: 40/466, loss: 0.3612619936466217 2023-01-22 10:20:26.570239: step: 42/466, loss: 0.37896281480789185 2023-01-22 10:20:27.273606: step: 44/466, loss: 0.26077574491500854 2023-01-22 10:20:27.899810: step: 46/466, loss: 0.7730565071105957 2023-01-22 10:20:28.474791: step: 48/466, loss: 0.4118739664554596 2023-01-22 10:20:29.113572: step: 50/466, loss: 0.3541674017906189 2023-01-22 10:20:29.748974: step: 52/466, loss: 0.5203198790550232 2023-01-22 10:20:30.420555: step: 54/466, loss: 0.3168627619743347 2023-01-22 10:20:31.045467: step: 56/466, loss: 0.3129294812679291 2023-01-22 10:20:31.624278: step: 58/466, loss: 0.4802446663379669 2023-01-22 10:20:32.199889: step: 60/466, loss: 0.24453146755695343 2023-01-22 10:20:32.841339: step: 62/466, loss: 0.6423302292823792 2023-01-22 10:20:33.397093: step: 64/466, loss: 0.41287142038345337 2023-01-22 10:20:34.002159: step: 66/466, loss: 0.19917523860931396 2023-01-22 10:20:34.545414: step: 68/466, loss: 0.5443085432052612 2023-01-22 10:20:35.131584: step: 70/466, loss: 0.5711265206336975 2023-01-22 10:20:35.729018: step: 72/466, loss: 0.673648476600647 2023-01-22 10:20:36.334003: step: 74/466, loss: 0.23313216865062714 2023-01-22 10:20:36.959073: step: 76/466, loss: 2.2076218128204346 2023-01-22 10:20:37.599366: step: 78/466, loss: 0.2055993527173996 2023-01-22 10:20:38.256075: step: 80/466, loss: 0.3096136152744293 2023-01-22 10:20:38.831624: step: 82/466, loss: 0.21365565061569214 2023-01-22 10:20:39.408699: step: 84/466, loss: 0.1641816794872284 2023-01-22 10:20:39.979188: step: 86/466, loss: 0.8886588215827942 2023-01-22 10:20:40.591647: step: 88/466, loss: 0.29702797532081604 2023-01-22 10:20:41.192557: step: 90/466, loss: 0.40896323323249817 2023-01-22 10:20:41.830143: step: 92/466, loss: 0.31896260380744934 2023-01-22 10:20:42.487193: step: 94/466, loss: 1.0673526525497437 2023-01-22 10:20:43.071961: step: 96/466, loss: 0.3697822093963623 2023-01-22 10:20:43.680952: step: 98/466, loss: 0.2739983797073364 2023-01-22 10:20:44.262183: step: 100/466, loss: 0.6867333650588989 2023-01-22 10:20:44.905904: step: 102/466, loss: 0.7206767797470093 2023-01-22 10:20:45.554076: step: 104/466, loss: 1.2624249458312988 2023-01-22 10:20:46.156145: step: 106/466, loss: 0.3790944218635559 2023-01-22 10:20:46.722687: step: 108/466, loss: 0.05347498878836632 2023-01-22 10:20:47.370283: step: 110/466, loss: 1.041469931602478 2023-01-22 10:20:47.957079: step: 112/466, loss: 0.3049376904964447 2023-01-22 10:20:48.692625: step: 114/466, loss: 0.6779406070709229 2023-01-22 10:20:49.345433: step: 116/466, loss: 0.5164543390274048 2023-01-22 10:20:49.993828: step: 118/466, loss: 0.5160636305809021 2023-01-22 10:20:50.591483: step: 120/466, loss: 1.2827353477478027 2023-01-22 10:20:51.206470: step: 122/466, loss: 0.34198516607284546 2023-01-22 10:20:51.842698: step: 124/466, loss: 0.8944591879844666 2023-01-22 10:20:52.512148: step: 126/466, loss: 0.5568956136703491 2023-01-22 10:20:53.153042: step: 128/466, loss: 0.6809290647506714 2023-01-22 10:20:53.770241: step: 130/466, loss: 0.43843433260917664 2023-01-22 10:20:54.354392: step: 132/466, loss: 0.6921073794364929 2023-01-22 10:20:54.979901: step: 134/466, loss: 0.665354311466217 2023-01-22 10:20:55.640947: step: 136/466, loss: 0.23547694087028503 2023-01-22 10:20:56.277152: step: 138/466, loss: 0.5738288164138794 2023-01-22 10:20:56.853924: step: 140/466, loss: 0.1683214008808136 2023-01-22 10:20:57.534587: step: 142/466, loss: 0.5310552716255188 2023-01-22 10:20:58.156662: step: 144/466, loss: 0.7672767043113708 2023-01-22 10:20:58.743775: step: 146/466, loss: 0.8567443490028381 2023-01-22 10:20:59.281959: step: 148/466, loss: 0.34494611620903015 2023-01-22 10:20:59.850524: step: 150/466, loss: 0.6466522812843323 2023-01-22 10:21:00.488088: step: 152/466, loss: 0.6802599430084229 2023-01-22 10:21:01.108504: step: 154/466, loss: 0.4298156201839447 2023-01-22 10:21:01.884901: step: 156/466, loss: 0.4231945276260376 2023-01-22 10:21:02.527521: step: 158/466, loss: 1.2339304685592651 2023-01-22 10:21:03.168782: step: 160/466, loss: 0.2515791952610016 2023-01-22 10:21:03.774301: step: 162/466, loss: 0.16225486993789673 2023-01-22 10:21:04.318974: step: 164/466, loss: 1.2626041173934937 2023-01-22 10:21:04.932751: step: 166/466, loss: 0.9185038805007935 2023-01-22 10:21:05.548906: step: 168/466, loss: 0.8705633878707886 2023-01-22 10:21:06.188392: step: 170/466, loss: 0.5381495952606201 2023-01-22 10:21:06.769865: step: 172/466, loss: 0.6132254600524902 2023-01-22 10:21:07.368222: step: 174/466, loss: 0.2988283038139343 2023-01-22 10:21:08.049916: step: 176/466, loss: 0.2567707300186157 2023-01-22 10:21:08.689919: step: 178/466, loss: 0.30163484811782837 2023-01-22 10:21:09.330489: step: 180/466, loss: 0.5373499393463135 2023-01-22 10:21:09.995499: step: 182/466, loss: 0.3271122872829437 2023-01-22 10:21:10.610542: step: 184/466, loss: 0.9332323670387268 2023-01-22 10:21:11.212724: step: 186/466, loss: 0.2196381390094757 2023-01-22 10:21:11.806227: step: 188/466, loss: 0.23258720338344574 2023-01-22 10:21:12.513381: step: 190/466, loss: 0.4428851008415222 2023-01-22 10:21:13.104513: step: 192/466, loss: 0.18682517111301422 2023-01-22 10:21:13.696264: step: 194/466, loss: 0.07872353494167328 2023-01-22 10:21:14.343579: step: 196/466, loss: 0.6266285181045532 2023-01-22 10:21:14.981238: step: 198/466, loss: 0.2624841034412384 2023-01-22 10:21:15.584933: step: 200/466, loss: 0.17764367163181305 2023-01-22 10:21:16.148546: step: 202/466, loss: 0.27667200565338135 2023-01-22 10:21:16.757897: step: 204/466, loss: 0.8821558952331543 2023-01-22 10:21:17.303799: step: 206/466, loss: 0.6958469152450562 2023-01-22 10:21:17.889130: step: 208/466, loss: 0.25369971990585327 2023-01-22 10:21:18.458634: step: 210/466, loss: 0.27209770679473877 2023-01-22 10:21:19.026465: step: 212/466, loss: 0.34161660075187683 2023-01-22 10:21:19.635640: step: 214/466, loss: 1.7664700746536255 2023-01-22 10:21:20.278593: step: 216/466, loss: 1.5650590658187866 2023-01-22 10:21:20.940209: step: 218/466, loss: 0.4417518079280853 2023-01-22 10:21:21.568511: step: 220/466, loss: 0.36913424730300903 2023-01-22 10:21:22.170113: step: 222/466, loss: 3.28045916557312 2023-01-22 10:21:22.788780: step: 224/466, loss: 0.22563347220420837 2023-01-22 10:21:23.356897: step: 226/466, loss: 0.5703184008598328 2023-01-22 10:21:24.015321: step: 228/466, loss: 0.26921722292900085 2023-01-22 10:21:24.639129: step: 230/466, loss: 0.4393335282802582 2023-01-22 10:21:25.214774: step: 232/466, loss: 0.4259721040725708 2023-01-22 10:21:25.916504: step: 234/466, loss: 0.39365753531455994 2023-01-22 10:21:26.578391: step: 236/466, loss: 1.1271625757217407 2023-01-22 10:21:27.155682: step: 238/466, loss: 0.3785664141178131 2023-01-22 10:21:27.785349: step: 240/466, loss: 0.2514948546886444 2023-01-22 10:21:28.379842: step: 242/466, loss: 0.4495410919189453 2023-01-22 10:21:29.025048: step: 244/466, loss: 0.35675227642059326 2023-01-22 10:21:29.625549: step: 246/466, loss: 0.4869801104068756 2023-01-22 10:21:30.214993: step: 248/466, loss: 0.3292056918144226 2023-01-22 10:21:30.842138: step: 250/466, loss: 0.6296101212501526 2023-01-22 10:21:31.420418: step: 252/466, loss: 0.13931596279144287 2023-01-22 10:21:32.029687: step: 254/466, loss: 0.5509451627731323 2023-01-22 10:21:32.703505: step: 256/466, loss: 0.2474900782108307 2023-01-22 10:21:33.352071: step: 258/466, loss: 1.371412992477417 2023-01-22 10:21:33.924584: step: 260/466, loss: 0.63140869140625 2023-01-22 10:21:34.537226: step: 262/466, loss: 0.3595554530620575 2023-01-22 10:21:35.199853: step: 264/466, loss: 2.6054558753967285 2023-01-22 10:21:35.801635: step: 266/466, loss: 3.7890048027038574 2023-01-22 10:21:36.387860: step: 268/466, loss: 0.9801911115646362 2023-01-22 10:21:36.952229: step: 270/466, loss: 0.2202795296907425 2023-01-22 10:21:37.510633: step: 272/466, loss: 0.42003333568573 2023-01-22 10:21:38.157338: step: 274/466, loss: 1.0269274711608887 2023-01-22 10:21:38.812808: step: 276/466, loss: 0.3591936230659485 2023-01-22 10:21:39.406362: step: 278/466, loss: 0.44897300004959106 2023-01-22 10:21:39.971410: step: 280/466, loss: 0.24281011521816254 2023-01-22 10:21:40.618200: step: 282/466, loss: 0.893500566482544 2023-01-22 10:21:41.207490: step: 284/466, loss: 0.6132438778877258 2023-01-22 10:21:41.855353: step: 286/466, loss: 0.658779501914978 2023-01-22 10:21:42.438354: step: 288/466, loss: 0.4819200932979584 2023-01-22 10:21:43.012561: step: 290/466, loss: 2.5244646072387695 2023-01-22 10:21:43.594946: step: 292/466, loss: 1.0741807222366333 2023-01-22 10:21:44.340127: step: 294/466, loss: 0.8289614319801331 2023-01-22 10:21:44.977113: step: 296/466, loss: 0.5560529828071594 2023-01-22 10:21:45.577235: step: 298/466, loss: 0.3289271295070648 2023-01-22 10:21:46.167311: step: 300/466, loss: 0.3868092894554138 2023-01-22 10:21:46.788910: step: 302/466, loss: 0.20935015380382538 2023-01-22 10:21:47.456684: step: 304/466, loss: 0.40141284465789795 2023-01-22 10:21:48.079722: step: 306/466, loss: 0.3986812233924866 2023-01-22 10:21:48.742557: step: 308/466, loss: 0.2795734703540802 2023-01-22 10:21:49.304224: step: 310/466, loss: 0.24247369170188904 2023-01-22 10:21:49.892883: step: 312/466, loss: 0.23847003281116486 2023-01-22 10:21:50.472760: step: 314/466, loss: 0.14001093804836273 2023-01-22 10:21:51.079914: step: 316/466, loss: 0.5716454386711121 2023-01-22 10:21:51.706409: step: 318/466, loss: 0.2892405390739441 2023-01-22 10:21:52.361038: step: 320/466, loss: 0.36249661445617676 2023-01-22 10:21:52.938768: step: 322/466, loss: 0.31691575050354004 2023-01-22 10:21:53.605445: step: 324/466, loss: 0.4308742582798004 2023-01-22 10:21:54.266582: step: 326/466, loss: 0.8795764446258545 2023-01-22 10:21:54.812200: step: 328/466, loss: 0.5361948609352112 2023-01-22 10:21:55.408880: step: 330/466, loss: 0.13352420926094055 2023-01-22 10:21:55.962866: step: 332/466, loss: 0.5865218043327332 2023-01-22 10:21:56.585824: step: 334/466, loss: 0.9855297207832336 2023-01-22 10:21:57.258335: step: 336/466, loss: 1.1064568758010864 2023-01-22 10:21:57.859526: step: 338/466, loss: 0.8792426586151123 2023-01-22 10:21:58.560329: step: 340/466, loss: 0.3035004734992981 2023-01-22 10:21:59.095892: step: 342/466, loss: 0.8655188679695129 2023-01-22 10:21:59.676674: step: 344/466, loss: 0.3053174614906311 2023-01-22 10:22:00.253967: step: 346/466, loss: 0.4611417055130005 2023-01-22 10:22:00.917455: step: 348/466, loss: 0.6201977729797363 2023-01-22 10:22:01.487259: step: 350/466, loss: 0.7922503352165222 2023-01-22 10:22:02.043538: step: 352/466, loss: 0.11923874914646149 2023-01-22 10:22:02.674243: step: 354/466, loss: 0.3223556578159332 2023-01-22 10:22:03.288916: step: 356/466, loss: 0.7820574045181274 2023-01-22 10:22:03.890351: step: 358/466, loss: 1.522855520248413 2023-01-22 10:22:04.460023: step: 360/466, loss: 0.30281901359558105 2023-01-22 10:22:05.090651: step: 362/466, loss: 0.5263733863830566 2023-01-22 10:22:05.672220: step: 364/466, loss: 0.1544521301984787 2023-01-22 10:22:06.333530: step: 366/466, loss: 0.6664984822273254 2023-01-22 10:22:06.941759: step: 368/466, loss: 0.4794970452785492 2023-01-22 10:22:07.584900: step: 370/466, loss: 0.7100381851196289 2023-01-22 10:22:08.205378: step: 372/466, loss: 0.2704818844795227 2023-01-22 10:22:08.767712: step: 374/466, loss: 0.5666533708572388 2023-01-22 10:22:09.387448: step: 376/466, loss: 0.5648034811019897 2023-01-22 10:22:09.979863: step: 378/466, loss: 0.41750550270080566 2023-01-22 10:22:10.619844: step: 380/466, loss: 0.3844192624092102 2023-01-22 10:22:11.206410: step: 382/466, loss: 0.5948653221130371 2023-01-22 10:22:11.823718: step: 384/466, loss: 1.1000791788101196 2023-01-22 10:22:12.349366: step: 386/466, loss: 0.45296505093574524 2023-01-22 10:22:12.914504: step: 388/466, loss: 0.39085686206817627 2023-01-22 10:22:13.597115: step: 390/466, loss: 0.1594216376543045 2023-01-22 10:22:14.244660: step: 392/466, loss: 0.2634032368659973 2023-01-22 10:22:14.864898: step: 394/466, loss: 0.14748629927635193 2023-01-22 10:22:15.455719: step: 396/466, loss: 1.5020861625671387 2023-01-22 10:22:16.050894: step: 398/466, loss: 0.9530461430549622 2023-01-22 10:22:16.667103: step: 400/466, loss: 0.266817182302475 2023-01-22 10:22:17.283943: step: 402/466, loss: 0.5079426169395447 2023-01-22 10:22:17.888209: step: 404/466, loss: 0.17258180677890778 2023-01-22 10:22:18.517334: step: 406/466, loss: 0.5756434202194214 2023-01-22 10:22:19.099655: step: 408/466, loss: 0.5008427500724792 2023-01-22 10:22:19.705707: step: 410/466, loss: 0.6624125838279724 2023-01-22 10:22:20.324108: step: 412/466, loss: 0.2582664489746094 2023-01-22 10:22:20.978038: step: 414/466, loss: 0.2248954474925995 2023-01-22 10:22:21.571244: step: 416/466, loss: 0.43843311071395874 2023-01-22 10:22:22.176293: step: 418/466, loss: 0.2874451279640198 2023-01-22 10:22:22.785420: step: 420/466, loss: 0.22079920768737793 2023-01-22 10:22:23.514974: step: 422/466, loss: 0.2217000275850296 2023-01-22 10:22:24.115404: step: 424/466, loss: 0.150802880525589 2023-01-22 10:22:24.735107: step: 426/466, loss: 1.0697029829025269 2023-01-22 10:22:25.385687: step: 428/466, loss: 0.6755574345588684 2023-01-22 10:22:26.018156: step: 430/466, loss: 0.8953157663345337 2023-01-22 10:22:26.660415: step: 432/466, loss: 0.4585370719432831 2023-01-22 10:22:27.308776: step: 434/466, loss: 0.36771368980407715 2023-01-22 10:22:27.908956: step: 436/466, loss: 0.2886699140071869 2023-01-22 10:22:28.465781: step: 438/466, loss: 0.5681027770042419 2023-01-22 10:22:29.082304: step: 440/466, loss: 0.29305148124694824 2023-01-22 10:22:29.757399: step: 442/466, loss: 0.5841960310935974 2023-01-22 10:22:30.351521: step: 444/466, loss: 0.137999027967453 2023-01-22 10:22:30.985327: step: 446/466, loss: 0.45419827103614807 2023-01-22 10:22:31.494928: step: 448/466, loss: 0.15391451120376587 2023-01-22 10:22:32.115098: step: 450/466, loss: 0.40725621581077576 2023-01-22 10:22:32.688251: step: 452/466, loss: 0.42934536933898926 2023-01-22 10:22:33.276323: step: 454/466, loss: 0.15431423485279083 2023-01-22 10:22:33.930682: step: 456/466, loss: 0.24367600679397583 2023-01-22 10:22:34.553358: step: 458/466, loss: 0.578997015953064 2023-01-22 10:22:35.100577: step: 460/466, loss: 1.582404375076294 2023-01-22 10:22:35.691334: step: 462/466, loss: 0.2780749797821045 2023-01-22 10:22:36.259024: step: 464/466, loss: 0.6970811486244202 2023-01-22 10:22:36.830791: step: 466/466, loss: 0.45635974407196045 2023-01-22 10:22:37.425820: step: 468/466, loss: 0.15261076390743256 2023-01-22 10:22:38.036948: step: 470/466, loss: 0.24384662508964539 2023-01-22 10:22:38.743467: step: 472/466, loss: 0.4098736047744751 2023-01-22 10:22:39.343306: step: 474/466, loss: 0.30031678080558777 2023-01-22 10:22:39.914974: step: 476/466, loss: 0.3997558057308197 2023-01-22 10:22:40.563619: step: 478/466, loss: 0.06994882971048355 2023-01-22 10:22:41.175859: step: 480/466, loss: 0.2708031237125397 2023-01-22 10:22:41.752324: step: 482/466, loss: 0.17339487373828888 2023-01-22 10:22:42.334479: step: 484/466, loss: 2.775698661804199 2023-01-22 10:22:43.002870: step: 486/466, loss: 0.7528473138809204 2023-01-22 10:22:43.596167: step: 488/466, loss: 0.20074620842933655 2023-01-22 10:22:44.208780: step: 490/466, loss: 0.4729272723197937 2023-01-22 10:22:44.794271: step: 492/466, loss: 0.5164822936058044 2023-01-22 10:22:45.431168: step: 494/466, loss: 0.4550749957561493 2023-01-22 10:22:46.019449: step: 496/466, loss: 0.08575890213251114 2023-01-22 10:22:46.643233: step: 498/466, loss: 1.2750965356826782 2023-01-22 10:22:47.349223: step: 500/466, loss: 0.27038100361824036 2023-01-22 10:22:47.941492: step: 502/466, loss: 0.40854859352111816 2023-01-22 10:22:48.582711: step: 504/466, loss: 0.11246982216835022 2023-01-22 10:22:49.213542: step: 506/466, loss: 0.4013828933238983 2023-01-22 10:22:49.830078: step: 508/466, loss: 0.6209487915039062 2023-01-22 10:22:50.460147: step: 510/466, loss: 0.26867666840553284 2023-01-22 10:22:51.076232: step: 512/466, loss: 0.3805329203605652 2023-01-22 10:22:51.658993: step: 514/466, loss: 0.11256767809391022 2023-01-22 10:22:52.270636: step: 516/466, loss: 0.6399586796760559 2023-01-22 10:22:52.884606: step: 518/466, loss: 1.0372354984283447 2023-01-22 10:22:53.572282: step: 520/466, loss: 0.5978058576583862 2023-01-22 10:22:54.199752: step: 522/466, loss: 0.7899807095527649 2023-01-22 10:22:54.826345: step: 524/466, loss: 0.2514680027961731 2023-01-22 10:22:55.404884: step: 526/466, loss: 0.5277990102767944 2023-01-22 10:22:55.981472: step: 528/466, loss: 0.32197409868240356 2023-01-22 10:22:56.563368: step: 530/466, loss: 0.3707515001296997 2023-01-22 10:22:57.125516: step: 532/466, loss: 0.5824082493782043 2023-01-22 10:22:57.712506: step: 534/466, loss: 0.39320623874664307 2023-01-22 10:22:58.401058: step: 536/466, loss: 0.5981414318084717 2023-01-22 10:22:59.038918: step: 538/466, loss: 0.2387053370475769 2023-01-22 10:22:59.658484: step: 540/466, loss: 0.2407008707523346 2023-01-22 10:23:00.282261: step: 542/466, loss: 0.23002319037914276 2023-01-22 10:23:00.862067: step: 544/466, loss: 0.27901536226272583 2023-01-22 10:23:01.451489: step: 546/466, loss: 0.18295063078403473 2023-01-22 10:23:02.145786: step: 548/466, loss: 0.27758944034576416 2023-01-22 10:23:02.761429: step: 550/466, loss: 0.49734365940093994 2023-01-22 10:23:03.363202: step: 552/466, loss: 1.140255093574524 2023-01-22 10:23:04.084459: step: 554/466, loss: 0.8541272878646851 2023-01-22 10:23:04.654470: step: 556/466, loss: 0.22601443529129028 2023-01-22 10:23:05.256710: step: 558/466, loss: 0.5696001648902893 2023-01-22 10:23:05.907427: step: 560/466, loss: 0.7366502285003662 2023-01-22 10:23:06.561711: step: 562/466, loss: 0.5757856965065002 2023-01-22 10:23:07.114982: step: 564/466, loss: 0.40094703435897827 2023-01-22 10:23:07.758646: step: 566/466, loss: 0.6098746061325073 2023-01-22 10:23:08.332628: step: 568/466, loss: 0.22799567878246307 2023-01-22 10:23:08.999205: step: 570/466, loss: 0.5381388664245605 2023-01-22 10:23:09.566629: step: 572/466, loss: 0.2369133085012436 2023-01-22 10:23:10.193533: step: 574/466, loss: 0.991308867931366 2023-01-22 10:23:10.851888: step: 576/466, loss: 0.5809664130210876 2023-01-22 10:23:11.446313: step: 578/466, loss: 0.23986878991127014 2023-01-22 10:23:11.986339: step: 580/466, loss: 0.8582189083099365 2023-01-22 10:23:12.587073: step: 582/466, loss: 0.30137068033218384 2023-01-22 10:23:13.179529: step: 584/466, loss: 0.14077231287956238 2023-01-22 10:23:13.806999: step: 586/466, loss: 0.559489905834198 2023-01-22 10:23:14.482867: step: 588/466, loss: 0.7863790988922119 2023-01-22 10:23:15.098074: step: 590/466, loss: 0.2836732864379883 2023-01-22 10:23:15.659649: step: 592/466, loss: 0.4197135269641876 2023-01-22 10:23:16.248449: step: 594/466, loss: 0.6486166715621948 2023-01-22 10:23:16.839836: step: 596/466, loss: 0.5372748970985413 2023-01-22 10:23:17.436334: step: 598/466, loss: 0.2581697106361389 2023-01-22 10:23:17.974839: step: 600/466, loss: 1.1937419176101685 2023-01-22 10:23:18.583483: step: 602/466, loss: 0.4056375026702881 2023-01-22 10:23:19.176251: step: 604/466, loss: 0.9247855544090271 2023-01-22 10:23:19.766628: step: 606/466, loss: 0.3549393117427826 2023-01-22 10:23:20.334130: step: 608/466, loss: 0.21426716446876526 2023-01-22 10:23:20.932889: step: 610/466, loss: 3.929910898208618 2023-01-22 10:23:21.529155: step: 612/466, loss: 0.830359160900116 2023-01-22 10:23:22.123292: step: 614/466, loss: 0.7156874537467957 2023-01-22 10:23:22.704715: step: 616/466, loss: 1.1229567527770996 2023-01-22 10:23:23.385389: step: 618/466, loss: 0.7136696577072144 2023-01-22 10:23:23.980612: step: 620/466, loss: 0.21507291495800018 2023-01-22 10:23:24.583847: step: 622/466, loss: 0.10922008752822876 2023-01-22 10:23:25.235894: step: 624/466, loss: 0.4058763384819031 2023-01-22 10:23:25.854234: step: 626/466, loss: 0.9438013434410095 2023-01-22 10:23:26.500191: step: 628/466, loss: 0.3833908438682556 2023-01-22 10:23:27.113900: step: 630/466, loss: 0.4176103174686432 2023-01-22 10:23:27.728663: step: 632/466, loss: 1.2825418710708618 2023-01-22 10:23:28.348557: step: 634/466, loss: 0.18677985668182373 2023-01-22 10:23:28.931352: step: 636/466, loss: 0.7825669050216675 2023-01-22 10:23:29.637138: step: 638/466, loss: 0.3831029236316681 2023-01-22 10:23:30.199692: step: 640/466, loss: 0.5506783723831177 2023-01-22 10:23:30.848303: step: 642/466, loss: 0.6937105655670166 2023-01-22 10:23:31.521101: step: 644/466, loss: 0.5345378518104553 2023-01-22 10:23:32.146273: step: 646/466, loss: 1.5613038539886475 2023-01-22 10:23:32.728029: step: 648/466, loss: 0.5392831563949585 2023-01-22 10:23:33.367642: step: 650/466, loss: 0.566638708114624 2023-01-22 10:23:33.928310: step: 652/466, loss: 2.2288661003112793 2023-01-22 10:23:34.515344: step: 654/466, loss: 0.22525736689567566 2023-01-22 10:23:35.117304: step: 656/466, loss: 0.12807002663612366 2023-01-22 10:23:35.714124: step: 658/466, loss: 0.4514746069908142 2023-01-22 10:23:36.307930: step: 660/466, loss: 0.628321647644043 2023-01-22 10:23:36.918997: step: 662/466, loss: 0.8235511779785156 2023-01-22 10:23:37.524318: step: 664/466, loss: 0.4312689006328583 2023-01-22 10:23:38.155584: step: 666/466, loss: 0.9901965856552124 2023-01-22 10:23:38.858802: step: 668/466, loss: 0.3410632908344269 2023-01-22 10:23:39.400127: step: 670/466, loss: 0.22190023958683014 2023-01-22 10:23:40.007538: step: 672/466, loss: 1.326055884361267 2023-01-22 10:23:40.657943: step: 674/466, loss: 0.3149159848690033 2023-01-22 10:23:41.312575: step: 676/466, loss: 0.3897155523300171 2023-01-22 10:23:41.869894: step: 678/466, loss: 0.16535858809947968 2023-01-22 10:23:42.491792: step: 680/466, loss: 0.18679854273796082 2023-01-22 10:23:43.070030: step: 682/466, loss: 0.33889710903167725 2023-01-22 10:23:43.672307: step: 684/466, loss: 0.9268393516540527 2023-01-22 10:23:44.270071: step: 686/466, loss: 0.4853194057941437 2023-01-22 10:23:44.871086: step: 688/466, loss: 2.5176568031311035 2023-01-22 10:23:45.473372: step: 690/466, loss: 0.3999820947647095 2023-01-22 10:23:46.098202: step: 692/466, loss: 4.8594584465026855 2023-01-22 10:23:46.712866: step: 694/466, loss: 0.9282346367835999 2023-01-22 10:23:47.322018: step: 696/466, loss: 0.38179799914360046 2023-01-22 10:23:47.985627: step: 698/466, loss: 0.5187469720840454 2023-01-22 10:23:48.640135: step: 700/466, loss: 0.1862371265888214 2023-01-22 10:23:49.290482: step: 702/466, loss: 0.17429044842720032 2023-01-22 10:23:49.893351: step: 704/466, loss: 1.9816677570343018 2023-01-22 10:23:50.501631: step: 706/466, loss: 0.19061195850372314 2023-01-22 10:23:51.033142: step: 708/466, loss: 0.3051075339317322 2023-01-22 10:23:51.630330: step: 710/466, loss: 0.4641377031803131 2023-01-22 10:23:52.226916: step: 712/466, loss: 0.1860985904932022 2023-01-22 10:23:52.781728: step: 714/466, loss: 0.2122507393360138 2023-01-22 10:23:53.377804: step: 716/466, loss: 0.21075765788555145 2023-01-22 10:23:53.988322: step: 718/466, loss: 0.508497953414917 2023-01-22 10:23:54.554459: step: 720/466, loss: 0.5619471669197083 2023-01-22 10:23:55.133747: step: 722/466, loss: 0.2921290695667267 2023-01-22 10:23:55.708316: step: 724/466, loss: 0.7574358582496643 2023-01-22 10:23:56.282870: step: 726/466, loss: 0.5595558881759644 2023-01-22 10:23:56.912807: step: 728/466, loss: 0.09329615533351898 2023-01-22 10:23:57.522084: step: 730/466, loss: 0.9361396431922913 2023-01-22 10:23:58.193449: step: 732/466, loss: 0.3507343530654907 2023-01-22 10:23:58.735978: step: 734/466, loss: 0.19813771545886993 2023-01-22 10:23:59.285393: step: 736/466, loss: 2.7031946182250977 2023-01-22 10:23:59.889051: step: 738/466, loss: 0.14652515947818756 2023-01-22 10:24:00.512869: step: 740/466, loss: 0.3808930516242981 2023-01-22 10:24:01.190021: step: 742/466, loss: 0.34050026535987854 2023-01-22 10:24:01.808196: step: 744/466, loss: 0.5174801349639893 2023-01-22 10:24:02.434875: step: 746/466, loss: 1.3954707384109497 2023-01-22 10:24:03.001945: step: 748/466, loss: 0.35464537143707275 2023-01-22 10:24:03.621993: step: 750/466, loss: 0.18262244760990143 2023-01-22 10:24:04.192604: step: 752/466, loss: 0.5942860245704651 2023-01-22 10:24:04.840371: step: 754/466, loss: 2.3839564323425293 2023-01-22 10:24:05.419640: step: 756/466, loss: 0.12105849385261536 2023-01-22 10:24:06.085036: step: 758/466, loss: 1.135514497756958 2023-01-22 10:24:06.682719: step: 760/466, loss: 0.4444793462753296 2023-01-22 10:24:07.288284: step: 762/466, loss: 0.38591933250427246 2023-01-22 10:24:07.914744: step: 764/466, loss: 0.5965667963027954 2023-01-22 10:24:08.534239: step: 766/466, loss: 0.9795670509338379 2023-01-22 10:24:09.095557: step: 768/466, loss: 0.7009924650192261 2023-01-22 10:24:09.772507: step: 770/466, loss: 0.6074681878089905 2023-01-22 10:24:10.361894: step: 772/466, loss: 0.36849063634872437 2023-01-22 10:24:11.048206: step: 774/466, loss: 0.573112428188324 2023-01-22 10:24:11.639892: step: 776/466, loss: 0.6135465502738953 2023-01-22 10:24:12.255559: step: 778/466, loss: 0.7879122495651245 2023-01-22 10:24:12.816825: step: 780/466, loss: 0.8168390393257141 2023-01-22 10:24:13.413184: step: 782/466, loss: 0.3562544286251068 2023-01-22 10:24:14.038235: step: 784/466, loss: 0.30302950739860535 2023-01-22 10:24:14.646794: step: 786/466, loss: 4.270347595214844 2023-01-22 10:24:15.294447: step: 788/466, loss: 0.39814668893814087 2023-01-22 10:24:15.906011: step: 790/466, loss: 0.2591295838356018 2023-01-22 10:24:16.478467: step: 792/466, loss: 0.20328326523303986 2023-01-22 10:24:17.058446: step: 794/466, loss: 0.1836165338754654 2023-01-22 10:24:17.675817: step: 796/466, loss: 0.3204825222492218 2023-01-22 10:24:18.373744: step: 798/466, loss: 0.48555970191955566 2023-01-22 10:24:18.963660: step: 800/466, loss: 0.19230753183364868 2023-01-22 10:24:19.641506: step: 802/466, loss: 0.3014485836029053 2023-01-22 10:24:20.209181: step: 804/466, loss: 0.5951617956161499 2023-01-22 10:24:20.909721: step: 806/466, loss: 0.6485537886619568 2023-01-22 10:24:21.505582: step: 808/466, loss: 0.2819003462791443 2023-01-22 10:24:22.135579: step: 810/466, loss: 0.2669002115726471 2023-01-22 10:24:22.659495: step: 812/466, loss: 0.42355260252952576 2023-01-22 10:24:23.298032: step: 814/466, loss: 0.410382479429245 2023-01-22 10:24:23.906361: step: 816/466, loss: 0.4057581424713135 2023-01-22 10:24:24.500464: step: 818/466, loss: 0.784027099609375 2023-01-22 10:24:25.156555: step: 820/466, loss: 0.2816869914531708 2023-01-22 10:24:25.719805: step: 822/466, loss: 0.6319968700408936 2023-01-22 10:24:26.360919: step: 824/466, loss: 1.1715902090072632 2023-01-22 10:24:27.022157: step: 826/466, loss: 0.6217017769813538 2023-01-22 10:24:27.695271: step: 828/466, loss: 0.29474109411239624 2023-01-22 10:24:28.340380: step: 830/466, loss: 0.5678926706314087 2023-01-22 10:24:28.957638: step: 832/466, loss: 0.39161813259124756 2023-01-22 10:24:29.516179: step: 834/466, loss: 0.18232816457748413 2023-01-22 10:24:30.138487: step: 836/466, loss: 0.6148484349250793 2023-01-22 10:24:30.719043: step: 838/466, loss: 0.6800553798675537 2023-01-22 10:24:31.380726: step: 840/466, loss: 0.766160249710083 2023-01-22 10:24:32.032026: step: 842/466, loss: 0.4223789870738983 2023-01-22 10:24:32.682543: step: 844/466, loss: 0.2277809977531433 2023-01-22 10:24:33.324289: step: 846/466, loss: 0.9845823645591736 2023-01-22 10:24:33.928936: step: 848/466, loss: 0.36414122581481934 2023-01-22 10:24:34.577834: step: 850/466, loss: 0.36403149366378784 2023-01-22 10:24:35.195748: step: 852/466, loss: 0.5862483382225037 2023-01-22 10:24:35.835734: step: 854/466, loss: 0.3021498918533325 2023-01-22 10:24:36.365919: step: 856/466, loss: 0.458347886800766 2023-01-22 10:24:36.942177: step: 858/466, loss: 0.5069761872291565 2023-01-22 10:24:37.569618: step: 860/466, loss: 0.7813975214958191 2023-01-22 10:24:38.217601: step: 862/466, loss: 1.3759708404541016 2023-01-22 10:24:38.859743: step: 864/466, loss: 0.2643243372440338 2023-01-22 10:24:39.509903: step: 866/466, loss: 0.3489965796470642 2023-01-22 10:24:40.064063: step: 868/466, loss: 0.4561339020729065 2023-01-22 10:24:40.704999: step: 870/466, loss: 1.14317786693573 2023-01-22 10:24:41.276515: step: 872/466, loss: 1.186966061592102 2023-01-22 10:24:41.894532: step: 874/466, loss: 0.40301233530044556 2023-01-22 10:24:42.444363: step: 876/466, loss: 0.9938855767250061 2023-01-22 10:24:43.015992: step: 878/466, loss: 0.29392674565315247 2023-01-22 10:24:43.706705: step: 880/466, loss: 0.7371693253517151 2023-01-22 10:24:44.276919: step: 882/466, loss: 1.307347059249878 2023-01-22 10:24:44.853619: step: 884/466, loss: 0.2597961127758026 2023-01-22 10:24:45.428383: step: 886/466, loss: 4.855459213256836 2023-01-22 10:24:46.045085: step: 888/466, loss: 0.1865711212158203 2023-01-22 10:24:46.653559: step: 890/466, loss: 0.833149790763855 2023-01-22 10:24:47.248450: step: 892/466, loss: 0.24636347591876984 2023-01-22 10:24:47.899813: step: 894/466, loss: 1.0389864444732666 2023-01-22 10:24:48.482099: step: 896/466, loss: 1.0861743688583374 2023-01-22 10:24:49.083000: step: 898/466, loss: 0.5853481888771057 2023-01-22 10:24:49.692733: step: 900/466, loss: 1.08613121509552 2023-01-22 10:24:50.306827: step: 902/466, loss: 0.6622076034545898 2023-01-22 10:24:50.889951: step: 904/466, loss: 0.8512253761291504 2023-01-22 10:24:51.526069: step: 906/466, loss: 0.2479708194732666 2023-01-22 10:24:52.127036: step: 908/466, loss: 0.33753520250320435 2023-01-22 10:24:52.812351: step: 910/466, loss: 1.420801043510437 2023-01-22 10:24:53.380603: step: 912/466, loss: 0.2568238377571106 2023-01-22 10:24:53.984281: step: 914/466, loss: 0.15492361783981323 2023-01-22 10:24:54.595785: step: 916/466, loss: 1.5217208862304688 2023-01-22 10:24:55.183161: step: 918/466, loss: 0.9508876800537109 2023-01-22 10:24:55.740854: step: 920/466, loss: 0.3954949676990509 2023-01-22 10:24:56.320446: step: 922/466, loss: 1.1577404737472534 2023-01-22 10:24:56.863860: step: 924/466, loss: 0.8091952204704285 2023-01-22 10:24:57.483436: step: 926/466, loss: 0.7096596360206604 2023-01-22 10:24:58.034541: step: 928/466, loss: 0.18696229159832 2023-01-22 10:24:58.680206: step: 930/466, loss: 1.1425925493240356 2023-01-22 10:24:59.292911: step: 932/466, loss: 0.763130784034729 ================================================== Loss: 0.603 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2675956404067003, 'r': 0.3305593205023945, 'f1': 0.295763602554774}, 'combined': 0.21793107556667557, 'epoch': 7} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.30906237465222164, 'r': 0.3152009192264886, 'f1': 0.31210146598056754}, 'combined': 0.20698957329281162, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2620829608917208, 'r': 0.2690321303093043, 'f1': 0.26551208374450963}, 'combined': 0.17700805582967308, 'epoch': 7} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3128821070436849, 'r': 0.29664255131477396, 'f1': 0.30454599406738786}, 'combined': 0.1987563329702952, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2597295562742386, 'r': 0.32922076582768767, 'f1': 0.29037547044550854}, 'combined': 0.2139608729598484, 'epoch': 7} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3043484665238961, 'r': 0.3064474214654402, 'f1': 0.30539433754287854}, 'combined': 0.2025413223082303, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20903954802259886, 'r': 0.35238095238095235, 'f1': 0.2624113475177305}, 'combined': 0.17494089834515364, 'epoch': 7} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.23717948717948717, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.25862068965517243, 'f1': 0.28301886792452835}, 'combined': 0.18867924528301888, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2626251926040062, 'r': 0.26361810448530676, 'f1': 0.2631207118353345}, 'combined': 0.17541380789022298, 'epoch': 5} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.35095243640044055, 'r': 0.27468485065454395, 'f1': 0.30816998786401}, 'combined': 0.20112146576388018, 'epoch': 5} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4625, 'r': 0.40217391304347827, 'f1': 0.43023255813953487}, 'combined': 0.28682170542635654, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2858411526903795, 'r': 0.3156727720413679, 'f1': 0.30001722428458233}, 'combined': 0.22106532315706065, 'epoch': 3} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32636076268334185, 'r': 0.28224266998091435, 'f1': 0.30270264048138956}, 'combined': 0.20075615534517025, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.44642857142857145, 'r': 0.21551724137931033, 'f1': 0.2906976744186046}, 'combined': 0.19379844961240306, 'epoch': 3} ****************************** Epoch: 8 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:27:27.974075: step: 2/466, loss: 0.42070913314819336 2023-01-22 10:27:28.540190: step: 4/466, loss: 0.19329315423965454 2023-01-22 10:27:29.186157: step: 6/466, loss: 0.1542825996875763 2023-01-22 10:27:29.887041: step: 8/466, loss: 0.3330560028553009 2023-01-22 10:27:30.530078: step: 10/466, loss: 0.8174936771392822 2023-01-22 10:27:31.164259: step: 12/466, loss: 0.6355933547019958 2023-01-22 10:27:31.793988: step: 14/466, loss: 0.23780372738838196 2023-01-22 10:27:32.427828: step: 16/466, loss: 0.6033594608306885 2023-01-22 10:27:33.001824: step: 18/466, loss: 0.5725092887878418 2023-01-22 10:27:33.599927: step: 20/466, loss: 0.22369495034217834 2023-01-22 10:27:34.211928: step: 22/466, loss: 0.3154262602329254 2023-01-22 10:27:34.839897: step: 24/466, loss: 0.09151893109083176 2023-01-22 10:27:35.426314: step: 26/466, loss: 0.16328245401382446 2023-01-22 10:27:36.015653: step: 28/466, loss: 0.7152175307273865 2023-01-22 10:27:36.676315: step: 30/466, loss: 0.16174262762069702 2023-01-22 10:27:37.340459: step: 32/466, loss: 0.533721387386322 2023-01-22 10:27:37.937288: step: 34/466, loss: 1.1466821432113647 2023-01-22 10:27:38.604221: step: 36/466, loss: 0.14627352356910706 2023-01-22 10:27:39.254189: step: 38/466, loss: 0.47448137402534485 2023-01-22 10:27:39.857034: step: 40/466, loss: 0.16674591600894928 2023-01-22 10:27:40.455953: step: 42/466, loss: 1.0415873527526855 2023-01-22 10:27:41.074162: step: 44/466, loss: 0.3185746371746063 2023-01-22 10:27:41.668349: step: 46/466, loss: 0.10683126002550125 2023-01-22 10:27:42.352521: step: 48/466, loss: 0.20796655118465424 2023-01-22 10:27:43.040014: step: 50/466, loss: 0.5955159664154053 2023-01-22 10:27:43.669149: step: 52/466, loss: 0.4141726493835449 2023-01-22 10:27:44.195120: step: 54/466, loss: 0.22971345484256744 2023-01-22 10:27:44.777914: step: 56/466, loss: 0.3233274817466736 2023-01-22 10:27:45.364460: step: 58/466, loss: 0.192288339138031 2023-01-22 10:27:45.991551: step: 60/466, loss: 0.7038285732269287 2023-01-22 10:27:46.612932: step: 62/466, loss: 0.7595869898796082 2023-01-22 10:27:47.199714: step: 64/466, loss: 0.31833571195602417 2023-01-22 10:27:47.821264: step: 66/466, loss: 0.17052781581878662 2023-01-22 10:27:48.405177: step: 68/466, loss: 0.7216683626174927 2023-01-22 10:27:49.006924: step: 70/466, loss: 0.5472569465637207 2023-01-22 10:27:49.668394: step: 72/466, loss: 0.13880658149719238 2023-01-22 10:27:50.274968: step: 74/466, loss: 0.20119856297969818 2023-01-22 10:27:50.853652: step: 76/466, loss: 0.7351464033126831 2023-01-22 10:27:51.425468: step: 78/466, loss: 0.9380894899368286 2023-01-22 10:27:51.994269: step: 80/466, loss: 1.0884746313095093 2023-01-22 10:27:52.614944: step: 82/466, loss: 0.4526834785938263 2023-01-22 10:27:53.297070: step: 84/466, loss: 0.873041033744812 2023-01-22 10:27:53.948913: step: 86/466, loss: 1.6015405654907227 2023-01-22 10:27:54.511766: step: 88/466, loss: 0.28300535678863525 2023-01-22 10:27:55.068436: step: 90/466, loss: 0.28813549876213074 2023-01-22 10:27:55.651979: step: 92/466, loss: 0.144855797290802 2023-01-22 10:27:56.224080: step: 94/466, loss: 0.41047242283821106 2023-01-22 10:27:56.837587: step: 96/466, loss: 0.2648795545101166 2023-01-22 10:27:57.397744: step: 98/466, loss: 0.4050430953502655 2023-01-22 10:27:57.931446: step: 100/466, loss: 0.9858799576759338 2023-01-22 10:27:58.546339: step: 102/466, loss: 0.17314811050891876 2023-01-22 10:27:59.098375: step: 104/466, loss: 0.42704564332962036 2023-01-22 10:27:59.727177: step: 106/466, loss: 0.16546908020973206 2023-01-22 10:28:00.384445: step: 108/466, loss: 0.18945223093032837 2023-01-22 10:28:00.998456: step: 110/466, loss: 0.5868598222732544 2023-01-22 10:28:01.561441: step: 112/466, loss: 0.3855229616165161 2023-01-22 10:28:02.214620: step: 114/466, loss: 0.535126268863678 2023-01-22 10:28:02.804023: step: 116/466, loss: 0.6311970353126526 2023-01-22 10:28:03.374307: step: 118/466, loss: 0.29702258110046387 2023-01-22 10:28:03.957324: step: 120/466, loss: 0.18481245636940002 2023-01-22 10:28:04.559689: step: 122/466, loss: 0.13972234725952148 2023-01-22 10:28:05.146670: step: 124/466, loss: 0.3186049461364746 2023-01-22 10:28:05.746110: step: 126/466, loss: 0.187290757894516 2023-01-22 10:28:06.341872: step: 128/466, loss: 0.2006399929523468 2023-01-22 10:28:07.003079: step: 130/466, loss: 0.27899008989334106 2023-01-22 10:28:07.566954: step: 132/466, loss: 0.23280629515647888 2023-01-22 10:28:08.168948: step: 134/466, loss: 0.274791955947876 2023-01-22 10:28:08.715343: step: 136/466, loss: 0.40975281596183777 2023-01-22 10:28:09.278656: step: 138/466, loss: 0.26108965277671814 2023-01-22 10:28:09.894474: step: 140/466, loss: 0.5550920367240906 2023-01-22 10:28:10.540426: step: 142/466, loss: 0.18534642457962036 2023-01-22 10:28:11.261302: step: 144/466, loss: 0.3210761547088623 2023-01-22 10:28:11.912955: step: 146/466, loss: 0.45470285415649414 2023-01-22 10:28:12.546615: step: 148/466, loss: 0.3254496455192566 2023-01-22 10:28:13.155514: step: 150/466, loss: 0.03482885658740997 2023-01-22 10:28:13.829083: step: 152/466, loss: 0.7344004511833191 2023-01-22 10:28:14.369857: step: 154/466, loss: 0.47011271119117737 2023-01-22 10:28:14.981483: step: 156/466, loss: 0.3940267562866211 2023-01-22 10:28:15.540268: step: 158/466, loss: 1.9927465915679932 2023-01-22 10:28:16.228169: step: 160/466, loss: 0.07561226189136505 2023-01-22 10:28:16.892292: step: 162/466, loss: 0.27004295587539673 2023-01-22 10:28:17.482808: step: 164/466, loss: 0.18644846975803375 2023-01-22 10:28:18.082434: step: 166/466, loss: 0.579391360282898 2023-01-22 10:28:18.671873: step: 168/466, loss: 0.7410764694213867 2023-01-22 10:28:19.350217: step: 170/466, loss: 0.36173009872436523 2023-01-22 10:28:20.016568: step: 172/466, loss: 0.3252776265144348 2023-01-22 10:28:20.649134: step: 174/466, loss: 0.43130847811698914 2023-01-22 10:28:21.258864: step: 176/466, loss: 0.2706472873687744 2023-01-22 10:28:21.848409: step: 178/466, loss: 0.4012158215045929 2023-01-22 10:28:22.510011: step: 180/466, loss: 0.23293635249137878 2023-01-22 10:28:23.221761: step: 182/466, loss: 0.3880685567855835 2023-01-22 10:28:23.838203: step: 184/466, loss: 0.219434455037117 2023-01-22 10:28:24.591639: step: 186/466, loss: 0.5739613175392151 2023-01-22 10:28:25.215875: step: 188/466, loss: 0.5995786190032959 2023-01-22 10:28:25.864050: step: 190/466, loss: 0.35044941306114197 2023-01-22 10:28:26.531181: step: 192/466, loss: 0.391764372587204 2023-01-22 10:28:27.138215: step: 194/466, loss: 0.25634679198265076 2023-01-22 10:28:27.760343: step: 196/466, loss: 0.3363511562347412 2023-01-22 10:28:28.333343: step: 198/466, loss: 0.32367298007011414 2023-01-22 10:28:29.003854: step: 200/466, loss: 0.3627299964427948 2023-01-22 10:28:29.640628: step: 202/466, loss: 0.49342265725135803 2023-01-22 10:28:30.166701: step: 204/466, loss: 0.25272437930107117 2023-01-22 10:28:30.753306: step: 206/466, loss: 0.1899859458208084 2023-01-22 10:28:31.337786: step: 208/466, loss: 0.25297975540161133 2023-01-22 10:28:31.924788: step: 210/466, loss: 0.18472492694854736 2023-01-22 10:28:32.499728: step: 212/466, loss: 0.3657456040382385 2023-01-22 10:28:33.081982: step: 214/466, loss: 0.1637917011976242 2023-01-22 10:28:33.686112: step: 216/466, loss: 0.12977047264575958 2023-01-22 10:28:34.283837: step: 218/466, loss: 0.3983894884586334 2023-01-22 10:28:34.937998: step: 220/466, loss: 0.4265328645706177 2023-01-22 10:28:35.565134: step: 222/466, loss: 0.6930554509162903 2023-01-22 10:28:36.165281: step: 224/466, loss: 0.3342722952365875 2023-01-22 10:28:36.786684: step: 226/466, loss: 2.727994918823242 2023-01-22 10:28:37.403144: step: 228/466, loss: 1.518387794494629 2023-01-22 10:28:38.036344: step: 230/466, loss: 0.3353487253189087 2023-01-22 10:28:38.568010: step: 232/466, loss: 0.5437520742416382 2023-01-22 10:28:39.164206: step: 234/466, loss: 0.1320791393518448 2023-01-22 10:28:39.827849: step: 236/466, loss: 0.6413620114326477 2023-01-22 10:28:40.448013: step: 238/466, loss: 0.3188782334327698 2023-01-22 10:28:41.157173: step: 240/466, loss: 0.3302933871746063 2023-01-22 10:28:41.760997: step: 242/466, loss: 0.28584909439086914 2023-01-22 10:28:42.369217: step: 244/466, loss: 0.5395722389221191 2023-01-22 10:28:42.963148: step: 246/466, loss: 0.22122004628181458 2023-01-22 10:28:43.592813: step: 248/466, loss: 0.6224589347839355 2023-01-22 10:28:44.165490: step: 250/466, loss: 0.5412746667861938 2023-01-22 10:28:44.795322: step: 252/466, loss: 0.7373511791229248 2023-01-22 10:28:45.617710: step: 254/466, loss: 0.23652246594429016 2023-01-22 10:28:46.327314: step: 256/466, loss: 0.2539837658405304 2023-01-22 10:28:46.883524: step: 258/466, loss: 0.260820597410202 2023-01-22 10:28:47.432457: step: 260/466, loss: 0.21884596347808838 2023-01-22 10:28:48.032872: step: 262/466, loss: 0.07499537616968155 2023-01-22 10:28:48.613268: step: 264/466, loss: 1.2581735849380493 2023-01-22 10:28:49.356005: step: 266/466, loss: 0.34262615442276 2023-01-22 10:28:49.977185: step: 268/466, loss: 0.10764749348163605 2023-01-22 10:28:50.585700: step: 270/466, loss: 0.19042930006980896 2023-01-22 10:28:51.236355: step: 272/466, loss: 0.10762417316436768 2023-01-22 10:28:51.861341: step: 274/466, loss: 0.1228412389755249 2023-01-22 10:28:52.488684: step: 276/466, loss: 0.3271018862724304 2023-01-22 10:28:53.269985: step: 278/466, loss: 1.02420973777771 2023-01-22 10:28:53.939285: step: 280/466, loss: 0.13888515532016754 2023-01-22 10:28:54.596793: step: 282/466, loss: 0.9880621433258057 2023-01-22 10:28:55.217064: step: 284/466, loss: 0.8876910209655762 2023-01-22 10:28:55.808397: step: 286/466, loss: 1.1690138578414917 2023-01-22 10:28:56.441920: step: 288/466, loss: 0.1375790536403656 2023-01-22 10:28:57.069105: step: 290/466, loss: 0.840218186378479 2023-01-22 10:28:57.704484: step: 292/466, loss: 0.4882718324661255 2023-01-22 10:28:58.257063: step: 294/466, loss: 0.31972554326057434 2023-01-22 10:28:58.932020: step: 296/466, loss: 0.33755213022232056 2023-01-22 10:28:59.515893: step: 298/466, loss: 0.2977468967437744 2023-01-22 10:29:00.103582: step: 300/466, loss: 0.18298953771591187 2023-01-22 10:29:00.700928: step: 302/466, loss: 0.28846272826194763 2023-01-22 10:29:01.334745: step: 304/466, loss: 0.3550736606121063 2023-01-22 10:29:01.875946: step: 306/466, loss: 0.9019066095352173 2023-01-22 10:29:02.495810: step: 308/466, loss: 0.147572323679924 2023-01-22 10:29:03.066722: step: 310/466, loss: 0.3051636815071106 2023-01-22 10:29:03.656732: step: 312/466, loss: 0.28385066986083984 2023-01-22 10:29:04.219431: step: 314/466, loss: 1.4107277393341064 2023-01-22 10:29:04.784542: step: 316/466, loss: 0.14702874422073364 2023-01-22 10:29:05.333206: step: 318/466, loss: 0.41051381826400757 2023-01-22 10:29:05.937348: step: 320/466, loss: 0.18689079582691193 2023-01-22 10:29:06.528071: step: 322/466, loss: 0.24710184335708618 2023-01-22 10:29:07.149882: step: 324/466, loss: 0.7665988206863403 2023-01-22 10:29:07.735564: step: 326/466, loss: 0.45251378417015076 2023-01-22 10:29:08.329932: step: 328/466, loss: 0.21326738595962524 2023-01-22 10:29:08.939988: step: 330/466, loss: 0.18569011986255646 2023-01-22 10:29:09.449787: step: 332/466, loss: 0.44794222712516785 2023-01-22 10:29:10.077215: step: 334/466, loss: 0.2406645119190216 2023-01-22 10:29:10.667545: step: 336/466, loss: 0.30867999792099 2023-01-22 10:29:11.227768: step: 338/466, loss: 0.4746020436286926 2023-01-22 10:29:11.869358: step: 340/466, loss: 0.5891717076301575 2023-01-22 10:29:12.462612: step: 342/466, loss: 0.18415430188179016 2023-01-22 10:29:13.018010: step: 344/466, loss: 0.33969199657440186 2023-01-22 10:29:13.603034: step: 346/466, loss: 0.2565991282463074 2023-01-22 10:29:14.203813: step: 348/466, loss: 0.45773324370384216 2023-01-22 10:29:14.935951: step: 350/466, loss: 0.4006710350513458 2023-01-22 10:29:15.519716: step: 352/466, loss: 0.4616091847419739 2023-01-22 10:29:16.156865: step: 354/466, loss: 0.1680757999420166 2023-01-22 10:29:16.750857: step: 356/466, loss: 0.30026933550834656 2023-01-22 10:29:17.399191: step: 358/466, loss: 0.1306033730506897 2023-01-22 10:29:18.028451: step: 360/466, loss: 0.22498393058776855 2023-01-22 10:29:18.640121: step: 362/466, loss: 0.17583592236042023 2023-01-22 10:29:19.215059: step: 364/466, loss: 1.0427978038787842 2023-01-22 10:29:19.823902: step: 366/466, loss: 0.34120306372642517 2023-01-22 10:29:20.381374: step: 368/466, loss: 0.25547441840171814 2023-01-22 10:29:21.030911: step: 370/466, loss: 1.1784662008285522 2023-01-22 10:29:21.668411: step: 372/466, loss: 0.5351514220237732 2023-01-22 10:29:22.258925: step: 374/466, loss: 0.24661028385162354 2023-01-22 10:29:22.842757: step: 376/466, loss: 0.17091259360313416 2023-01-22 10:29:23.428186: step: 378/466, loss: 1.1215652227401733 2023-01-22 10:29:24.039456: step: 380/466, loss: 0.5957236886024475 2023-01-22 10:29:24.640407: step: 382/466, loss: 0.22655294835567474 2023-01-22 10:29:25.266370: step: 384/466, loss: 0.11992353945970535 2023-01-22 10:29:25.868670: step: 386/466, loss: 0.2618737816810608 2023-01-22 10:29:26.498983: step: 388/466, loss: 0.15113231539726257 2023-01-22 10:29:27.181561: step: 390/466, loss: 0.31584858894348145 2023-01-22 10:29:27.844516: step: 392/466, loss: 0.05570532754063606 2023-01-22 10:29:28.398421: step: 394/466, loss: 0.7033010721206665 2023-01-22 10:29:29.048028: step: 396/466, loss: 0.21475452184677124 2023-01-22 10:29:29.649361: step: 398/466, loss: 0.4419141113758087 2023-01-22 10:29:30.231384: step: 400/466, loss: 0.24417926371097565 2023-01-22 10:29:30.873320: step: 402/466, loss: 0.38471463322639465 2023-01-22 10:29:31.496045: step: 404/466, loss: 0.40086495876312256 2023-01-22 10:29:32.126423: step: 406/466, loss: 1.5156762599945068 2023-01-22 10:29:32.694509: step: 408/466, loss: 0.17294296622276306 2023-01-22 10:29:33.287371: step: 410/466, loss: 0.3071759343147278 2023-01-22 10:29:33.885628: step: 412/466, loss: 0.26017171144485474 2023-01-22 10:29:34.481714: step: 414/466, loss: 0.47733762860298157 2023-01-22 10:29:35.067255: step: 416/466, loss: 0.24585777521133423 2023-01-22 10:29:35.699717: step: 418/466, loss: 0.5091809034347534 2023-01-22 10:29:36.313098: step: 420/466, loss: 0.8872667551040649 2023-01-22 10:29:36.901513: step: 422/466, loss: 0.4296327829360962 2023-01-22 10:29:37.536574: step: 424/466, loss: 0.690857470035553 2023-01-22 10:29:38.150680: step: 426/466, loss: 0.2613014876842499 2023-01-22 10:29:38.730568: step: 428/466, loss: 1.7435870170593262 2023-01-22 10:29:39.363540: step: 430/466, loss: 0.22666385769844055 2023-01-22 10:29:39.926348: step: 432/466, loss: 0.11715306341648102 2023-01-22 10:29:40.614444: step: 434/466, loss: 0.8825061917304993 2023-01-22 10:29:41.244910: step: 436/466, loss: 1.4174325466156006 2023-01-22 10:29:41.949300: step: 438/466, loss: 0.34822991490364075 2023-01-22 10:29:42.570652: step: 440/466, loss: 0.29355087876319885 2023-01-22 10:29:43.143503: step: 442/466, loss: 1.369311809539795 2023-01-22 10:29:43.717399: step: 444/466, loss: 0.38101184368133545 2023-01-22 10:29:44.324214: step: 446/466, loss: 0.3892883062362671 2023-01-22 10:29:44.952720: step: 448/466, loss: 0.26244592666625977 2023-01-22 10:29:45.526873: step: 450/466, loss: 0.25750574469566345 2023-01-22 10:29:46.147488: step: 452/466, loss: 0.8240104913711548 2023-01-22 10:29:46.773492: step: 454/466, loss: 0.48789867758750916 2023-01-22 10:29:47.401666: step: 456/466, loss: 0.4815549850463867 2023-01-22 10:29:48.008017: step: 458/466, loss: 0.16869626939296722 2023-01-22 10:29:48.627421: step: 460/466, loss: 0.27055051922798157 2023-01-22 10:29:49.209055: step: 462/466, loss: 0.9371935129165649 2023-01-22 10:29:49.853689: step: 464/466, loss: 0.9419511556625366 2023-01-22 10:29:50.440438: step: 466/466, loss: 0.7592545747756958 2023-01-22 10:29:51.037360: step: 468/466, loss: 0.13283738493919373 2023-01-22 10:29:51.666369: step: 470/466, loss: 0.5328441858291626 2023-01-22 10:29:52.201956: step: 472/466, loss: 0.3986574709415436 2023-01-22 10:29:52.829954: step: 474/466, loss: 0.17218521237373352 2023-01-22 10:29:53.508691: step: 476/466, loss: 0.2489137500524521 2023-01-22 10:29:54.113782: step: 478/466, loss: 3.4816160202026367 2023-01-22 10:29:54.705485: step: 480/466, loss: 0.5082486867904663 2023-01-22 10:29:55.332841: step: 482/466, loss: 0.4919622838497162 2023-01-22 10:29:55.995881: step: 484/466, loss: 0.2767554521560669 2023-01-22 10:29:56.593566: step: 486/466, loss: 0.37757161259651184 2023-01-22 10:29:57.161103: step: 488/466, loss: 0.5388099551200867 2023-01-22 10:29:57.732119: step: 490/466, loss: 0.651739776134491 2023-01-22 10:29:58.296758: step: 492/466, loss: 0.23953072726726532 2023-01-22 10:29:58.845363: step: 494/466, loss: 0.41552096605300903 2023-01-22 10:29:59.443576: step: 496/466, loss: 0.44315606355667114 2023-01-22 10:30:00.056250: step: 498/466, loss: 0.2738582491874695 2023-01-22 10:30:00.669208: step: 500/466, loss: 1.0400668382644653 2023-01-22 10:30:01.316511: step: 502/466, loss: 0.4278027415275574 2023-01-22 10:30:01.943172: step: 504/466, loss: 0.6743075847625732 2023-01-22 10:30:02.592506: step: 506/466, loss: 0.6389839053153992 2023-01-22 10:30:03.169175: step: 508/466, loss: 0.13873815536499023 2023-01-22 10:30:03.824362: step: 510/466, loss: 0.3239734470844269 2023-01-22 10:30:04.437167: step: 512/466, loss: 0.995073139667511 2023-01-22 10:30:05.024607: step: 514/466, loss: 1.3936810493469238 2023-01-22 10:30:05.618940: step: 516/466, loss: 0.37060728669166565 2023-01-22 10:30:06.293923: step: 518/466, loss: 0.6086036562919617 2023-01-22 10:30:06.861721: step: 520/466, loss: 0.6494529247283936 2023-01-22 10:30:07.493573: step: 522/466, loss: 0.5009200572967529 2023-01-22 10:30:08.127962: step: 524/466, loss: 0.1802213490009308 2023-01-22 10:30:08.840807: step: 526/466, loss: 0.5180705785751343 2023-01-22 10:30:09.519520: step: 528/466, loss: 0.8920317888259888 2023-01-22 10:30:10.096953: step: 530/466, loss: 0.3410802185535431 2023-01-22 10:30:10.715905: step: 532/466, loss: 0.26836782693862915 2023-01-22 10:30:11.302181: step: 534/466, loss: 0.1766878366470337 2023-01-22 10:30:11.905539: step: 536/466, loss: 0.09978803992271423 2023-01-22 10:30:12.541432: step: 538/466, loss: 0.40650302171707153 2023-01-22 10:30:13.187585: step: 540/466, loss: 0.1943265050649643 2023-01-22 10:30:13.766757: step: 542/466, loss: 0.22957094013690948 2023-01-22 10:30:14.342962: step: 544/466, loss: 0.3287438750267029 2023-01-22 10:30:14.908351: step: 546/466, loss: 0.714024543762207 2023-01-22 10:30:15.531254: step: 548/466, loss: 0.27795958518981934 2023-01-22 10:30:16.096176: step: 550/466, loss: 0.5719060301780701 2023-01-22 10:30:16.640914: step: 552/466, loss: 0.2060711830854416 2023-01-22 10:30:17.216161: step: 554/466, loss: 0.32010918855667114 2023-01-22 10:30:17.799989: step: 556/466, loss: 0.6014868021011353 2023-01-22 10:30:18.395049: step: 558/466, loss: 0.2549934685230255 2023-01-22 10:30:18.996731: step: 560/466, loss: 0.7524586915969849 2023-01-22 10:30:19.646287: step: 562/466, loss: 0.20897318422794342 2023-01-22 10:30:20.251419: step: 564/466, loss: 0.405833899974823 2023-01-22 10:30:20.828196: step: 566/466, loss: 0.2669678330421448 2023-01-22 10:30:21.474020: step: 568/466, loss: 0.43072205781936646 2023-01-22 10:30:22.194651: step: 570/466, loss: 0.33123594522476196 2023-01-22 10:30:22.777253: step: 572/466, loss: 0.24202190339565277 2023-01-22 10:30:23.391703: step: 574/466, loss: 0.9372299313545227 2023-01-22 10:30:24.036595: step: 576/466, loss: 0.5606487393379211 2023-01-22 10:30:24.591319: step: 578/466, loss: 0.19988267123699188 2023-01-22 10:30:25.162554: step: 580/466, loss: 0.758746862411499 2023-01-22 10:30:25.804512: step: 582/466, loss: 0.6153707504272461 2023-01-22 10:30:26.322778: step: 584/466, loss: 0.48051753640174866 2023-01-22 10:30:26.917342: step: 586/466, loss: 0.41417208313941956 2023-01-22 10:30:27.471770: step: 588/466, loss: 0.9258098006248474 2023-01-22 10:30:28.139447: step: 590/466, loss: 0.25086748600006104 2023-01-22 10:30:28.844272: step: 592/466, loss: 0.6945814490318298 2023-01-22 10:30:29.480120: step: 594/466, loss: 0.4970313012599945 2023-01-22 10:30:30.117433: step: 596/466, loss: 0.08689434826374054 2023-01-22 10:30:30.767539: step: 598/466, loss: 0.22101353108882904 2023-01-22 10:30:31.427449: step: 600/466, loss: 0.1183728501200676 2023-01-22 10:30:32.013232: step: 602/466, loss: 0.2400856614112854 2023-01-22 10:30:32.675086: step: 604/466, loss: 0.29236307740211487 2023-01-22 10:30:33.261044: step: 606/466, loss: 1.5404255390167236 2023-01-22 10:30:33.808455: step: 608/466, loss: 0.664776086807251 2023-01-22 10:30:34.421630: step: 610/466, loss: 0.2434491068124771 2023-01-22 10:30:35.054663: step: 612/466, loss: 1.052943468093872 2023-01-22 10:30:35.663055: step: 614/466, loss: 0.8199459314346313 2023-01-22 10:30:36.294206: step: 616/466, loss: 4.667254447937012 2023-01-22 10:30:36.906830: step: 618/466, loss: 0.6161572933197021 2023-01-22 10:30:37.575525: step: 620/466, loss: 1.5852956771850586 2023-01-22 10:30:38.145088: step: 622/466, loss: 0.8171479105949402 2023-01-22 10:30:38.795534: step: 624/466, loss: 1.4977972507476807 2023-01-22 10:30:39.409868: step: 626/466, loss: 0.8954432010650635 2023-01-22 10:30:40.056889: step: 628/466, loss: 0.19785064458847046 2023-01-22 10:30:40.662506: step: 630/466, loss: 0.23454849421977997 2023-01-22 10:30:41.230336: step: 632/466, loss: 0.9934785962104797 2023-01-22 10:30:41.857883: step: 634/466, loss: 0.4460870027542114 2023-01-22 10:30:42.523670: step: 636/466, loss: 0.10035587847232819 2023-01-22 10:30:43.109694: step: 638/466, loss: 0.11986562609672546 2023-01-22 10:30:43.647426: step: 640/466, loss: 0.7981387376785278 2023-01-22 10:30:44.273266: step: 642/466, loss: 2.019948959350586 2023-01-22 10:30:44.864071: step: 644/466, loss: 0.1733536273241043 2023-01-22 10:30:45.523571: step: 646/466, loss: 0.5257325172424316 2023-01-22 10:30:46.118743: step: 648/466, loss: 0.9252492785453796 2023-01-22 10:30:46.715903: step: 650/466, loss: 0.30040332674980164 2023-01-22 10:30:47.360229: step: 652/466, loss: 0.46063125133514404 2023-01-22 10:30:47.977689: step: 654/466, loss: 0.4684278070926666 2023-01-22 10:30:48.506128: step: 656/466, loss: 0.22722361981868744 2023-01-22 10:30:49.134495: step: 658/466, loss: 0.7254287600517273 2023-01-22 10:30:49.752016: step: 660/466, loss: 0.1062985435128212 2023-01-22 10:30:50.320681: step: 662/466, loss: 0.3585277199745178 2023-01-22 10:30:50.969766: step: 664/466, loss: 0.5264803767204285 2023-01-22 10:30:51.547403: step: 666/466, loss: 0.6416162252426147 2023-01-22 10:30:52.142890: step: 668/466, loss: 0.15391018986701965 2023-01-22 10:30:52.705859: step: 670/466, loss: 0.28397443890571594 2023-01-22 10:30:53.323225: step: 672/466, loss: 0.20207203924655914 2023-01-22 10:30:53.877316: step: 674/466, loss: 0.12301262468099594 2023-01-22 10:30:54.440720: step: 676/466, loss: 0.5341054201126099 2023-01-22 10:30:55.027227: step: 678/466, loss: 0.10569257289171219 2023-01-22 10:30:55.625652: step: 680/466, loss: 0.15092137455940247 2023-01-22 10:30:56.224624: step: 682/466, loss: 0.7261207699775696 2023-01-22 10:30:56.837736: step: 684/466, loss: 1.65827214717865 2023-01-22 10:30:57.509602: step: 686/466, loss: 1.0371516942977905 2023-01-22 10:30:58.122565: step: 688/466, loss: 0.4550298750400543 2023-01-22 10:30:58.686087: step: 690/466, loss: 0.414910227060318 2023-01-22 10:30:59.248710: step: 692/466, loss: 0.2945830523967743 2023-01-22 10:30:59.851050: step: 694/466, loss: 0.1816388964653015 2023-01-22 10:31:00.489173: step: 696/466, loss: 0.41306084394454956 2023-01-22 10:31:01.100889: step: 698/466, loss: 1.016797423362732 2023-01-22 10:31:01.688779: step: 700/466, loss: 0.17569591104984283 2023-01-22 10:31:02.324699: step: 702/466, loss: 1.9182236194610596 2023-01-22 10:31:02.999754: step: 704/466, loss: 0.42811667919158936 2023-01-22 10:31:03.576549: step: 706/466, loss: 0.6352326273918152 2023-01-22 10:31:04.165280: step: 708/466, loss: 0.9747324585914612 2023-01-22 10:31:04.739559: step: 710/466, loss: 0.729386031627655 2023-01-22 10:31:05.389983: step: 712/466, loss: 0.14309903979301453 2023-01-22 10:31:05.962865: step: 714/466, loss: 0.5412364602088928 2023-01-22 10:31:06.558061: step: 716/466, loss: 0.49542367458343506 2023-01-22 10:31:07.188335: step: 718/466, loss: 0.44659021496772766 2023-01-22 10:31:07.772778: step: 720/466, loss: 0.4688337743282318 2023-01-22 10:31:08.348528: step: 722/466, loss: 0.7991344928741455 2023-01-22 10:31:08.993628: step: 724/466, loss: 0.9413745999336243 2023-01-22 10:31:09.637588: step: 726/466, loss: 0.9676265716552734 2023-01-22 10:31:10.197530: step: 728/466, loss: 0.21356771886348724 2023-01-22 10:31:10.838869: step: 730/466, loss: 0.3693164587020874 2023-01-22 10:31:11.400198: step: 732/466, loss: 0.21786029636859894 2023-01-22 10:31:12.057886: step: 734/466, loss: 0.5728607773780823 2023-01-22 10:31:12.689207: step: 736/466, loss: 0.5436547994613647 2023-01-22 10:31:13.349884: step: 738/466, loss: 0.3133682906627655 2023-01-22 10:31:13.983893: step: 740/466, loss: 0.34399712085723877 2023-01-22 10:31:14.664536: step: 742/466, loss: 0.21830910444259644 2023-01-22 10:31:15.308637: step: 744/466, loss: 0.2679389417171478 2023-01-22 10:31:15.930872: step: 746/466, loss: 0.2695886194705963 2023-01-22 10:31:16.521942: step: 748/466, loss: 0.30671921372413635 2023-01-22 10:31:17.092292: step: 750/466, loss: 0.172052800655365 2023-01-22 10:31:17.749875: step: 752/466, loss: 0.648655354976654 2023-01-22 10:31:18.353194: step: 754/466, loss: 0.32802191376686096 2023-01-22 10:31:18.979234: step: 756/466, loss: 0.22855710983276367 2023-01-22 10:31:19.619436: step: 758/466, loss: 0.5866103172302246 2023-01-22 10:31:20.186576: step: 760/466, loss: 0.26203909516334534 2023-01-22 10:31:20.807773: step: 762/466, loss: 0.6300651431083679 2023-01-22 10:31:21.416571: step: 764/466, loss: 0.2713729739189148 2023-01-22 10:31:21.990981: step: 766/466, loss: 0.8633518815040588 2023-01-22 10:31:22.584603: step: 768/466, loss: 0.1878773421049118 2023-01-22 10:31:23.220690: step: 770/466, loss: 0.11146188527345657 2023-01-22 10:31:23.811367: step: 772/466, loss: 0.49176108837127686 2023-01-22 10:31:24.437715: step: 774/466, loss: 0.2051166445016861 2023-01-22 10:31:25.025413: step: 776/466, loss: 0.20102080702781677 2023-01-22 10:31:25.588511: step: 778/466, loss: 0.25653690099716187 2023-01-22 10:31:26.202319: step: 780/466, loss: 0.30637654662132263 2023-01-22 10:31:26.830647: step: 782/466, loss: 0.23440568149089813 2023-01-22 10:31:27.473792: step: 784/466, loss: 0.5576182007789612 2023-01-22 10:31:28.113540: step: 786/466, loss: 0.4898272454738617 2023-01-22 10:31:28.655135: step: 788/466, loss: 0.6654157042503357 2023-01-22 10:31:29.242360: step: 790/466, loss: 0.20696957409381866 2023-01-22 10:31:29.814489: step: 792/466, loss: 0.18953485786914825 2023-01-22 10:31:30.396285: step: 794/466, loss: 0.6122508645057678 2023-01-22 10:31:30.946487: step: 796/466, loss: 0.8150749206542969 2023-01-22 10:31:31.579977: step: 798/466, loss: 0.42300933599472046 2023-01-22 10:31:32.237624: step: 800/466, loss: 0.7897908091545105 2023-01-22 10:31:32.813465: step: 802/466, loss: 0.6928276419639587 2023-01-22 10:31:33.406322: step: 804/466, loss: 0.8072050213813782 2023-01-22 10:31:34.043348: step: 806/466, loss: 0.16678494215011597 2023-01-22 10:31:34.620269: step: 808/466, loss: 0.3240396976470947 2023-01-22 10:31:35.273051: step: 810/466, loss: 0.45988866686820984 2023-01-22 10:31:35.930982: step: 812/466, loss: 0.23593668639659882 2023-01-22 10:31:36.526615: step: 814/466, loss: 0.4983198642730713 2023-01-22 10:31:37.145129: step: 816/466, loss: 0.5303546190261841 2023-01-22 10:31:37.800554: step: 818/466, loss: 0.20623086392879486 2023-01-22 10:31:38.448902: step: 820/466, loss: 0.22160398960113525 2023-01-22 10:31:39.099621: step: 822/466, loss: 0.365662544965744 2023-01-22 10:31:39.713834: step: 824/466, loss: 0.38205280900001526 2023-01-22 10:31:40.366058: step: 826/466, loss: 0.1559482216835022 2023-01-22 10:31:41.003480: step: 828/466, loss: 0.6489866375923157 2023-01-22 10:31:41.558880: step: 830/466, loss: 0.1958453506231308 2023-01-22 10:31:42.120715: step: 832/466, loss: 0.19367656111717224 2023-01-22 10:31:42.772038: step: 834/466, loss: 0.2849229574203491 2023-01-22 10:31:43.333985: step: 836/466, loss: 0.2829507291316986 2023-01-22 10:31:43.902013: step: 838/466, loss: 3.710892915725708 2023-01-22 10:31:44.554740: step: 840/466, loss: 0.6591838598251343 2023-01-22 10:31:45.131012: step: 842/466, loss: 0.45559462904930115 2023-01-22 10:31:45.709368: step: 844/466, loss: 0.8332613706588745 2023-01-22 10:31:46.322834: step: 846/466, loss: 0.9570567011833191 2023-01-22 10:31:47.103998: step: 848/466, loss: 0.2521909773349762 2023-01-22 10:31:47.721555: step: 850/466, loss: 0.37644124031066895 2023-01-22 10:31:48.294601: step: 852/466, loss: 0.18477340042591095 2023-01-22 10:31:48.990995: step: 854/466, loss: 0.2751915752887726 2023-01-22 10:31:49.581088: step: 856/466, loss: 0.25587552785873413 2023-01-22 10:31:50.164249: step: 858/466, loss: 1.611753225326538 2023-01-22 10:31:50.781791: step: 860/466, loss: 0.37444859743118286 2023-01-22 10:31:51.369331: step: 862/466, loss: 1.0017908811569214 2023-01-22 10:31:51.940905: step: 864/466, loss: 0.2559552788734436 2023-01-22 10:31:52.611610: step: 866/466, loss: 0.16050826013088226 2023-01-22 10:31:53.207126: step: 868/466, loss: 0.8576302528381348 2023-01-22 10:31:53.862349: step: 870/466, loss: 0.5445796251296997 2023-01-22 10:31:54.422542: step: 872/466, loss: 0.9365764260292053 2023-01-22 10:31:55.100504: step: 874/466, loss: 0.14005893468856812 2023-01-22 10:31:55.656868: step: 876/466, loss: 0.3255828619003296 2023-01-22 10:31:56.232082: step: 878/466, loss: 0.8145939111709595 2023-01-22 10:31:56.852917: step: 880/466, loss: 0.5594971776008606 2023-01-22 10:31:57.458537: step: 882/466, loss: 1.537627935409546 2023-01-22 10:31:58.058342: step: 884/466, loss: 0.20095406472682953 2023-01-22 10:31:58.678807: step: 886/466, loss: 0.868279218673706 2023-01-22 10:31:59.200210: step: 888/466, loss: 0.13002410531044006 2023-01-22 10:31:59.812252: step: 890/466, loss: 0.2226947396993637 2023-01-22 10:32:00.441142: step: 892/466, loss: 0.6223090887069702 2023-01-22 10:32:01.083384: step: 894/466, loss: 0.18663303554058075 2023-01-22 10:32:01.748431: step: 896/466, loss: 1.025985598564148 2023-01-22 10:32:02.370346: step: 898/466, loss: 0.2230851948261261 2023-01-22 10:32:02.961646: step: 900/466, loss: 0.39694327116012573 2023-01-22 10:32:03.564037: step: 902/466, loss: 0.15065787732601166 2023-01-22 10:32:04.166938: step: 904/466, loss: 0.20817795395851135 2023-01-22 10:32:04.803853: step: 906/466, loss: 0.49367645382881165 2023-01-22 10:32:05.487288: step: 908/466, loss: 1.8523865938186646 2023-01-22 10:32:06.087553: step: 910/466, loss: 0.26639750599861145 2023-01-22 10:32:06.737560: step: 912/466, loss: 0.4446331858634949 2023-01-22 10:32:07.398557: step: 914/466, loss: 2.210296154022217 2023-01-22 10:32:08.049697: step: 916/466, loss: 0.5116863250732422 2023-01-22 10:32:08.669258: step: 918/466, loss: 0.4732198417186737 2023-01-22 10:32:09.300612: step: 920/466, loss: 0.7057874202728271 2023-01-22 10:32:09.892646: step: 922/466, loss: 0.28075242042541504 2023-01-22 10:32:10.514435: step: 924/466, loss: 0.23045767843723297 2023-01-22 10:32:11.121752: step: 926/466, loss: 3.6228244304656982 2023-01-22 10:32:11.729332: step: 928/466, loss: 0.7160196304321289 2023-01-22 10:32:12.408706: step: 930/466, loss: 0.2058020830154419 2023-01-22 10:32:13.061593: step: 932/466, loss: 0.09874732792377472 ================================================== Loss: 0.505 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28801988000813505, 'r': 0.32573026277959866, 'f1': 0.3057165600798727}, 'combined': 0.22526483374306408, 'epoch': 8} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.35541684263457896, 'r': 0.3090314120055198, 'f1': 0.3306050334631405}, 'combined': 0.21926136934342996, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2879032258064516, 'r': 0.2704545454545455, 'f1': 0.2789062500000001}, 'combined': 0.18593750000000003, 'epoch': 8} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.37654638536275015, 'r': 0.2927250157134063, 'f1': 0.3293866924386776}, 'combined': 0.21496815717050533, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27745651415470496, 'r': 0.32010163302857797, 'f1': 0.2972573755172874}, 'combined': 0.21903175038115916, 'epoch': 8} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3585008886409438, 'r': 0.298595545344232, 'f1': 0.3258175294025394}, 'combined': 0.21608623711671002, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2579365079365079, 'r': 0.3095238095238095, 'f1': 0.28138528138528135}, 'combined': 0.18759018759018756, 'epoch': 8} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4868421052631579, 'r': 0.40217391304347827, 'f1': 0.44047619047619047}, 'combined': 0.2936507936507936, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.23275862068965517, 'f1': 0.2872340425531915}, 'combined': 0.19148936170212766, 'epoch': 8} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2879032258064516, 'r': 0.2704545454545455, 'f1': 0.2789062500000001}, 'combined': 0.18593750000000003, 'epoch': 8} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.37654638536275015, 'r': 0.2927250157134063, 'f1': 0.3293866924386776}, 'combined': 0.21496815717050533, 'epoch': 8} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4868421052631579, 'r': 0.40217391304347827, 'f1': 0.44047619047619047}, 'combined': 0.2936507936507936, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2858411526903795, 'r': 0.3156727720413679, 'f1': 0.30001722428458233}, 'combined': 0.22106532315706065, 'epoch': 3} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32636076268334185, 'r': 0.28224266998091435, 'f1': 0.30270264048138956}, 'combined': 0.20075615534517025, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.44642857142857145, 'r': 0.21551724137931033, 'f1': 0.2906976744186046}, 'combined': 0.19379844961240306, 'epoch': 3} ****************************** Epoch: 9 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:34:46.387363: step: 2/466, loss: 0.2886248230934143 2023-01-22 10:34:47.005477: step: 4/466, loss: 0.308744341135025 2023-01-22 10:34:47.627217: step: 6/466, loss: 0.18767115473747253 2023-01-22 10:34:48.272546: step: 8/466, loss: 0.4757394790649414 2023-01-22 10:34:48.834532: step: 10/466, loss: 0.18802697956562042 2023-01-22 10:34:49.410664: step: 12/466, loss: 0.6095316410064697 2023-01-22 10:34:50.007594: step: 14/466, loss: 0.48483580350875854 2023-01-22 10:34:50.633229: step: 16/466, loss: 0.46638408303260803 2023-01-22 10:34:51.239150: step: 18/466, loss: 0.15202441811561584 2023-01-22 10:34:51.814213: step: 20/466, loss: 0.24129228293895721 2023-01-22 10:34:52.433424: step: 22/466, loss: 0.48490291833877563 2023-01-22 10:34:53.038981: step: 24/466, loss: 0.0903395414352417 2023-01-22 10:34:53.657639: step: 26/466, loss: 0.06721008569002151 2023-01-22 10:34:54.279995: step: 28/466, loss: 0.1363254338502884 2023-01-22 10:34:54.881844: step: 30/466, loss: 0.20085632801055908 2023-01-22 10:34:55.493765: step: 32/466, loss: 0.4882054328918457 2023-01-22 10:34:56.088010: step: 34/466, loss: 0.4240463078022003 2023-01-22 10:34:56.712398: step: 36/466, loss: 0.14644275605678558 2023-01-22 10:34:57.335079: step: 38/466, loss: 0.1937483698129654 2023-01-22 10:34:57.969041: step: 40/466, loss: 0.295713871717453 2023-01-22 10:34:58.560609: step: 42/466, loss: 0.14528240263462067 2023-01-22 10:34:59.179584: step: 44/466, loss: 0.20442795753479004 2023-01-22 10:34:59.795649: step: 46/466, loss: 0.42661958932876587 2023-01-22 10:35:00.406579: step: 48/466, loss: 0.16562780737876892 2023-01-22 10:35:01.040115: step: 50/466, loss: 0.18909737467765808 2023-01-22 10:35:01.699651: step: 52/466, loss: 0.10642421245574951 2023-01-22 10:35:02.381106: step: 54/466, loss: 0.3187907338142395 2023-01-22 10:35:02.971794: step: 56/466, loss: 0.653616726398468 2023-01-22 10:35:03.590397: step: 58/466, loss: 0.15026280283927917 2023-01-22 10:35:04.148521: step: 60/466, loss: 0.4882783889770508 2023-01-22 10:35:04.748185: step: 62/466, loss: 0.13916371762752533 2023-01-22 10:35:05.323380: step: 64/466, loss: 0.8189008235931396 2023-01-22 10:35:05.951095: step: 66/466, loss: 0.14434003829956055 2023-01-22 10:35:06.561723: step: 68/466, loss: 0.18077050149440765 2023-01-22 10:35:07.155050: step: 70/466, loss: 0.4928813576698303 2023-01-22 10:35:07.815102: step: 72/466, loss: 0.1190536841750145 2023-01-22 10:35:08.421870: step: 74/466, loss: 0.7139197587966919 2023-01-22 10:35:09.013784: step: 76/466, loss: 0.3733008801937103 2023-01-22 10:35:09.611488: step: 78/466, loss: 0.1874418556690216 2023-01-22 10:35:10.231139: step: 80/466, loss: 0.22743932902812958 2023-01-22 10:35:10.804390: step: 82/466, loss: 0.057189784944057465 2023-01-22 10:35:11.385415: step: 84/466, loss: 0.7802344560623169 2023-01-22 10:35:11.984382: step: 86/466, loss: 0.25141051411628723 2023-01-22 10:35:12.574188: step: 88/466, loss: 0.21101033687591553 2023-01-22 10:35:13.115657: step: 90/466, loss: 0.2927389144897461 2023-01-22 10:35:13.759696: step: 92/466, loss: 0.903736412525177 2023-01-22 10:35:14.366778: step: 94/466, loss: 0.9050028920173645 2023-01-22 10:35:14.986167: step: 96/466, loss: 0.19579294323921204 2023-01-22 10:35:15.625009: step: 98/466, loss: 0.39611369371414185 2023-01-22 10:35:16.262959: step: 100/466, loss: 0.267816960811615 2023-01-22 10:35:16.911745: step: 102/466, loss: 0.3000200092792511 2023-01-22 10:35:17.483492: step: 104/466, loss: 0.20672659575939178 2023-01-22 10:35:18.085543: step: 106/466, loss: 3.0240981578826904 2023-01-22 10:35:18.750098: step: 108/466, loss: 0.2653675079345703 2023-01-22 10:35:19.337645: step: 110/466, loss: 0.2504380941390991 2023-01-22 10:35:19.930201: step: 112/466, loss: 0.1431732028722763 2023-01-22 10:35:20.610243: step: 114/466, loss: 0.2724539339542389 2023-01-22 10:35:21.189206: step: 116/466, loss: 1.323121428489685 2023-01-22 10:35:21.779796: step: 118/466, loss: 0.3038110136985779 2023-01-22 10:35:22.387569: step: 120/466, loss: 0.3248835504055023 2023-01-22 10:35:23.009724: step: 122/466, loss: 0.3454150855541229 2023-01-22 10:35:23.598082: step: 124/466, loss: 0.32745280861854553 2023-01-22 10:35:24.206573: step: 126/466, loss: 0.12488136440515518 2023-01-22 10:35:24.953032: step: 128/466, loss: 0.11687491834163666 2023-01-22 10:35:25.509023: step: 130/466, loss: 0.31433528661727905 2023-01-22 10:35:26.107449: step: 132/466, loss: 0.5409859418869019 2023-01-22 10:35:26.877660: step: 134/466, loss: 0.10108667612075806 2023-01-22 10:35:27.449696: step: 136/466, loss: 0.1774735450744629 2023-01-22 10:35:27.997094: step: 138/466, loss: 0.5564969778060913 2023-01-22 10:35:28.579356: step: 140/466, loss: 0.2754778563976288 2023-01-22 10:35:29.198071: step: 142/466, loss: 0.21344691514968872 2023-01-22 10:35:29.821477: step: 144/466, loss: 0.13812457025051117 2023-01-22 10:35:30.405369: step: 146/466, loss: 0.23207207024097443 2023-01-22 10:35:30.988046: step: 148/466, loss: 0.46367210149765015 2023-01-22 10:35:31.582648: step: 150/466, loss: 0.27947306632995605 2023-01-22 10:35:32.316274: step: 152/466, loss: 0.11693456768989563 2023-01-22 10:35:32.921087: step: 154/466, loss: 0.3103890120983124 2023-01-22 10:35:33.574267: step: 156/466, loss: 0.26553767919540405 2023-01-22 10:35:34.187021: step: 158/466, loss: 1.2345422506332397 2023-01-22 10:35:34.751134: step: 160/466, loss: 0.4150778651237488 2023-01-22 10:35:35.402300: step: 162/466, loss: 0.9702160954475403 2023-01-22 10:35:36.024668: step: 164/466, loss: 0.6917840838432312 2023-01-22 10:35:36.669797: step: 166/466, loss: 2.0591235160827637 2023-01-22 10:35:37.276024: step: 168/466, loss: 0.2591855525970459 2023-01-22 10:35:37.915036: step: 170/466, loss: 0.20238420367240906 2023-01-22 10:35:38.488446: step: 172/466, loss: 0.17404764890670776 2023-01-22 10:35:39.015855: step: 174/466, loss: 0.17147669196128845 2023-01-22 10:35:39.613494: step: 176/466, loss: 0.28134211897850037 2023-01-22 10:35:40.234535: step: 178/466, loss: 0.3062577247619629 2023-01-22 10:35:40.863308: step: 180/466, loss: 0.36133408546447754 2023-01-22 10:35:41.470535: step: 182/466, loss: 1.115287184715271 2023-01-22 10:35:42.035684: step: 184/466, loss: 0.14870578050613403 2023-01-22 10:35:42.659825: step: 186/466, loss: 0.2662959098815918 2023-01-22 10:35:43.291506: step: 188/466, loss: 0.8476355671882629 2023-01-22 10:35:43.902742: step: 190/466, loss: 0.7082141637802124 2023-01-22 10:35:44.536485: step: 192/466, loss: 0.25874102115631104 2023-01-22 10:35:45.086074: step: 194/466, loss: 0.2716241180896759 2023-01-22 10:35:45.722598: step: 196/466, loss: 0.3178020417690277 2023-01-22 10:35:46.339905: step: 198/466, loss: 1.2630382776260376 2023-01-22 10:35:46.936342: step: 200/466, loss: 0.14553770422935486 2023-01-22 10:35:47.571537: step: 202/466, loss: 0.9363977313041687 2023-01-22 10:35:48.236183: step: 204/466, loss: 4.289189338684082 2023-01-22 10:35:48.825383: step: 206/466, loss: 0.8392980694770813 2023-01-22 10:35:49.401494: step: 208/466, loss: 0.37578555941581726 2023-01-22 10:35:49.995317: step: 210/466, loss: 0.2799866199493408 2023-01-22 10:35:50.615570: step: 212/466, loss: 0.22827349603176117 2023-01-22 10:35:51.484080: step: 214/466, loss: 0.8818905353546143 2023-01-22 10:35:52.086906: step: 216/466, loss: 0.31714412569999695 2023-01-22 10:35:52.696892: step: 218/466, loss: 0.4018808603286743 2023-01-22 10:35:53.298611: step: 220/466, loss: 0.7286221981048584 2023-01-22 10:35:53.888607: step: 222/466, loss: 1.2584357261657715 2023-01-22 10:35:54.500051: step: 224/466, loss: 0.6334192752838135 2023-01-22 10:35:55.119621: step: 226/466, loss: 0.4385855197906494 2023-01-22 10:35:55.764119: step: 228/466, loss: 0.36628538370132446 2023-01-22 10:35:56.378168: step: 230/466, loss: 7.889693260192871 2023-01-22 10:35:57.005785: step: 232/466, loss: 0.21159392595291138 2023-01-22 10:35:57.660157: step: 234/466, loss: 0.7000142335891724 2023-01-22 10:35:58.224058: step: 236/466, loss: 0.11271099746227264 2023-01-22 10:35:58.887525: step: 238/466, loss: 0.3571082353591919 2023-01-22 10:35:59.514984: step: 240/466, loss: 0.4262726604938507 2023-01-22 10:36:00.129004: step: 242/466, loss: 0.6651432514190674 2023-01-22 10:36:00.744239: step: 244/466, loss: 0.6429592370986938 2023-01-22 10:36:01.348653: step: 246/466, loss: 0.3488086462020874 2023-01-22 10:36:01.975302: step: 248/466, loss: 0.30513519048690796 2023-01-22 10:36:02.658897: step: 250/466, loss: 0.1501394361257553 2023-01-22 10:36:03.285503: step: 252/466, loss: 0.1115301251411438 2023-01-22 10:36:03.823959: step: 254/466, loss: 0.2430560290813446 2023-01-22 10:36:04.451959: step: 256/466, loss: 0.46057721972465515 2023-01-22 10:36:05.059601: step: 258/466, loss: 0.1814437210559845 2023-01-22 10:36:05.734289: step: 260/466, loss: 0.4026636481285095 2023-01-22 10:36:06.331526: step: 262/466, loss: 0.26710110902786255 2023-01-22 10:36:06.957455: step: 264/466, loss: 0.40534016489982605 2023-01-22 10:36:07.538171: step: 266/466, loss: 0.25253409147262573 2023-01-22 10:36:08.245643: step: 268/466, loss: 0.2516902983188629 2023-01-22 10:36:08.868084: step: 270/466, loss: 0.2063271850347519 2023-01-22 10:36:09.457921: step: 272/466, loss: 0.14844533801078796 2023-01-22 10:36:10.044614: step: 274/466, loss: 0.2199673056602478 2023-01-22 10:36:10.643049: step: 276/466, loss: 0.05996764451265335 2023-01-22 10:36:11.182716: step: 278/466, loss: 0.143018439412117 2023-01-22 10:36:11.740159: step: 280/466, loss: 0.06843327730894089 2023-01-22 10:36:12.357672: step: 282/466, loss: 0.24048197269439697 2023-01-22 10:36:12.961396: step: 284/466, loss: 0.08511865139007568 2023-01-22 10:36:13.592646: step: 286/466, loss: 0.4760672450065613 2023-01-22 10:36:14.253259: step: 288/466, loss: 0.9917468428611755 2023-01-22 10:36:14.922596: step: 290/466, loss: 0.506651759147644 2023-01-22 10:36:15.511540: step: 292/466, loss: 0.24498611688613892 2023-01-22 10:36:16.080807: step: 294/466, loss: 0.097173772752285 2023-01-22 10:36:16.664567: step: 296/466, loss: 0.11477378755807877 2023-01-22 10:36:17.199484: step: 298/466, loss: 0.4592766761779785 2023-01-22 10:36:17.820603: step: 300/466, loss: 0.23259836435317993 2023-01-22 10:36:18.436892: step: 302/466, loss: 0.6135904788970947 2023-01-22 10:36:19.048398: step: 304/466, loss: 0.45602795481681824 2023-01-22 10:36:19.706533: step: 306/466, loss: 0.229086235165596 2023-01-22 10:36:20.370465: step: 308/466, loss: 0.7307414412498474 2023-01-22 10:36:20.940551: step: 310/466, loss: 0.1451161801815033 2023-01-22 10:36:21.561316: step: 312/466, loss: 0.38837188482284546 2023-01-22 10:36:22.129376: step: 314/466, loss: 0.1958981454372406 2023-01-22 10:36:22.813422: step: 316/466, loss: 0.10762417316436768 2023-01-22 10:36:23.392982: step: 318/466, loss: 0.1184818223118782 2023-01-22 10:36:23.953299: step: 320/466, loss: 0.17390476167201996 2023-01-22 10:36:24.536175: step: 322/466, loss: 0.35525721311569214 2023-01-22 10:36:25.228378: step: 324/466, loss: 0.4133126735687256 2023-01-22 10:36:25.872927: step: 326/466, loss: 0.2449815571308136 2023-01-22 10:36:26.486669: step: 328/466, loss: 0.1683533489704132 2023-01-22 10:36:26.993320: step: 330/466, loss: 0.15328174829483032 2023-01-22 10:36:27.590444: step: 332/466, loss: 0.18109317123889923 2023-01-22 10:36:28.300065: step: 334/466, loss: 0.13646559417247772 2023-01-22 10:36:28.907848: step: 336/466, loss: 0.35461995005607605 2023-01-22 10:36:29.481728: step: 338/466, loss: 0.2538391351699829 2023-01-22 10:36:30.153031: step: 340/466, loss: 0.42897504568099976 2023-01-22 10:36:30.786691: step: 342/466, loss: 0.2490566372871399 2023-01-22 10:36:31.416809: step: 344/466, loss: 0.3087506890296936 2023-01-22 10:36:32.046133: step: 346/466, loss: 0.3386785686016083 2023-01-22 10:36:32.716768: step: 348/466, loss: 0.1746833473443985 2023-01-22 10:36:33.316559: step: 350/466, loss: 0.48220208287239075 2023-01-22 10:36:33.985765: step: 352/466, loss: 0.5700064301490784 2023-01-22 10:36:34.597272: step: 354/466, loss: 0.2225196361541748 2023-01-22 10:36:35.205496: step: 356/466, loss: 0.21626007556915283 2023-01-22 10:36:35.783354: step: 358/466, loss: 0.3181474208831787 2023-01-22 10:36:36.361301: step: 360/466, loss: 0.2892976701259613 2023-01-22 10:36:37.017469: step: 362/466, loss: 0.6562973856925964 2023-01-22 10:36:37.693304: step: 364/466, loss: 0.42469480633735657 2023-01-22 10:36:38.260021: step: 366/466, loss: 0.16269342601299286 2023-01-22 10:36:38.847270: step: 368/466, loss: 0.11876245588064194 2023-01-22 10:36:39.450519: step: 370/466, loss: 0.19721461832523346 2023-01-22 10:36:40.092416: step: 372/466, loss: 0.11414781957864761 2023-01-22 10:36:40.673913: step: 374/466, loss: 0.6896754503250122 2023-01-22 10:36:41.254183: step: 376/466, loss: 0.6822637319564819 2023-01-22 10:36:41.917627: step: 378/466, loss: 0.5647622346878052 2023-01-22 10:36:42.507738: step: 380/466, loss: 0.4214338958263397 2023-01-22 10:36:43.143843: step: 382/466, loss: 0.21153606474399567 2023-01-22 10:36:43.793184: step: 384/466, loss: 0.2709487974643707 2023-01-22 10:36:44.379306: step: 386/466, loss: 0.23430021107196808 2023-01-22 10:36:45.002917: step: 388/466, loss: 0.1149580180644989 2023-01-22 10:36:45.652968: step: 390/466, loss: 0.9785158634185791 2023-01-22 10:36:46.273036: step: 392/466, loss: 0.2314041405916214 2023-01-22 10:36:46.870593: step: 394/466, loss: 0.1163083165884018 2023-01-22 10:36:47.472721: step: 396/466, loss: 0.6735597252845764 2023-01-22 10:36:48.128779: step: 398/466, loss: 0.682388186454773 2023-01-22 10:36:48.814919: step: 400/466, loss: 0.27005475759506226 2023-01-22 10:36:49.489543: step: 402/466, loss: 0.261234849691391 2023-01-22 10:36:50.090746: step: 404/466, loss: 0.35506269335746765 2023-01-22 10:36:50.707862: step: 406/466, loss: 0.2801685631275177 2023-01-22 10:36:51.315352: step: 408/466, loss: 0.5067627429962158 2023-01-22 10:36:51.935516: step: 410/466, loss: 0.4552246332168579 2023-01-22 10:36:52.528179: step: 412/466, loss: 0.30544689297676086 2023-01-22 10:36:53.129617: step: 414/466, loss: 0.3824828267097473 2023-01-22 10:36:53.683783: step: 416/466, loss: 0.49223142862319946 2023-01-22 10:36:54.274262: step: 418/466, loss: 0.18452343344688416 2023-01-22 10:36:54.898157: step: 420/466, loss: 0.1926453560590744 2023-01-22 10:36:55.530543: step: 422/466, loss: 0.2764644920825958 2023-01-22 10:36:56.090240: step: 424/466, loss: 0.30662691593170166 2023-01-22 10:36:56.708071: step: 426/466, loss: 0.17963269352912903 2023-01-22 10:36:57.338741: step: 428/466, loss: 0.2118184119462967 2023-01-22 10:36:57.965559: step: 430/466, loss: 0.1730094999074936 2023-01-22 10:36:58.553522: step: 432/466, loss: 0.4022391140460968 2023-01-22 10:36:59.112533: step: 434/466, loss: 0.28192245960235596 2023-01-22 10:36:59.693523: step: 436/466, loss: 0.3259207010269165 2023-01-22 10:37:00.275959: step: 438/466, loss: 0.2654353380203247 2023-01-22 10:37:00.889639: step: 440/466, loss: 1.4085147380828857 2023-01-22 10:37:01.574335: step: 442/466, loss: 0.8635183572769165 2023-01-22 10:37:02.202734: step: 444/466, loss: 0.18238022923469543 2023-01-22 10:37:02.803296: step: 446/466, loss: 0.9470014572143555 2023-01-22 10:37:03.438511: step: 448/466, loss: 0.6711329817771912 2023-01-22 10:37:04.037936: step: 450/466, loss: 0.10989248752593994 2023-01-22 10:37:04.701370: step: 452/466, loss: 0.22127914428710938 2023-01-22 10:37:05.260421: step: 454/466, loss: 0.154241144657135 2023-01-22 10:37:05.804879: step: 456/466, loss: 0.14610041677951813 2023-01-22 10:37:06.459738: step: 458/466, loss: 0.6313903331756592 2023-01-22 10:37:07.111690: step: 460/466, loss: 0.4210139513015747 2023-01-22 10:37:07.717973: step: 462/466, loss: 0.10614173859357834 2023-01-22 10:37:08.277058: step: 464/466, loss: 0.1791078895330429 2023-01-22 10:37:08.901381: step: 466/466, loss: 0.10792126506567001 2023-01-22 10:37:09.514924: step: 468/466, loss: 0.41749194264411926 2023-01-22 10:37:10.192505: step: 470/466, loss: 0.15609955787658691 2023-01-22 10:37:10.810447: step: 472/466, loss: 0.18218357861042023 2023-01-22 10:37:11.434419: step: 474/466, loss: 0.15988190472126007 2023-01-22 10:37:12.031704: step: 476/466, loss: 0.49651825428009033 2023-01-22 10:37:12.665562: step: 478/466, loss: 0.3248773515224457 2023-01-22 10:37:13.242924: step: 480/466, loss: 0.2903391420841217 2023-01-22 10:37:13.867155: step: 482/466, loss: 0.1469796597957611 2023-01-22 10:37:14.481587: step: 484/466, loss: 0.7293221950531006 2023-01-22 10:37:15.188070: step: 486/466, loss: 0.2348928600549698 2023-01-22 10:37:15.937202: step: 488/466, loss: 0.19862528145313263 2023-01-22 10:37:16.577883: step: 490/466, loss: 1.9945156574249268 2023-01-22 10:37:17.176575: step: 492/466, loss: 0.6894669532775879 2023-01-22 10:37:17.832877: step: 494/466, loss: 0.23232914507389069 2023-01-22 10:37:18.459496: step: 496/466, loss: 0.180666983127594 2023-01-22 10:37:19.074798: step: 498/466, loss: 0.4309394657611847 2023-01-22 10:37:19.765928: step: 500/466, loss: 1.1669100522994995 2023-01-22 10:37:20.349003: step: 502/466, loss: 0.6310187578201294 2023-01-22 10:37:20.886653: step: 504/466, loss: 0.1634405255317688 2023-01-22 10:37:21.463114: step: 506/466, loss: 0.30890509486198425 2023-01-22 10:37:22.102951: step: 508/466, loss: 0.15104559063911438 2023-01-22 10:37:22.707718: step: 510/466, loss: 0.44312986731529236 2023-01-22 10:37:23.261011: step: 512/466, loss: 1.0001057386398315 2023-01-22 10:37:23.888289: step: 514/466, loss: 0.25896045565605164 2023-01-22 10:37:24.497847: step: 516/466, loss: 0.11799373477697372 2023-01-22 10:37:25.155261: step: 518/466, loss: 0.290573388338089 2023-01-22 10:37:25.709322: step: 520/466, loss: 0.22119809687137604 2023-01-22 10:37:26.277730: step: 522/466, loss: 0.10746853798627853 2023-01-22 10:37:26.836368: step: 524/466, loss: 0.38003775477409363 2023-01-22 10:37:27.426901: step: 526/466, loss: 0.09424518793821335 2023-01-22 10:37:28.133276: step: 528/466, loss: 1.2450789213180542 2023-01-22 10:37:28.752097: step: 530/466, loss: 0.7346737384796143 2023-01-22 10:37:29.373013: step: 532/466, loss: 0.1465187668800354 2023-01-22 10:37:29.941668: step: 534/466, loss: 0.20528163015842438 2023-01-22 10:37:30.513035: step: 536/466, loss: 0.2840029001235962 2023-01-22 10:37:31.084125: step: 538/466, loss: 0.4026467204093933 2023-01-22 10:37:31.738098: step: 540/466, loss: 0.13014625012874603 2023-01-22 10:37:32.384672: step: 542/466, loss: 0.20283189415931702 2023-01-22 10:37:32.994037: step: 544/466, loss: 0.13066184520721436 2023-01-22 10:37:33.642800: step: 546/466, loss: 0.32642120122909546 2023-01-22 10:37:34.266381: step: 548/466, loss: 0.21554157137870789 2023-01-22 10:37:34.826908: step: 550/466, loss: 0.2458067387342453 2023-01-22 10:37:35.446267: step: 552/466, loss: 0.1621115505695343 2023-01-22 10:37:36.028154: step: 554/466, loss: 0.22945551574230194 2023-01-22 10:37:36.667035: step: 556/466, loss: 1.2747833728790283 2023-01-22 10:37:37.320556: step: 558/466, loss: 0.1876629889011383 2023-01-22 10:37:37.941705: step: 560/466, loss: 0.1095738410949707 2023-01-22 10:37:38.591577: step: 562/466, loss: 0.35169994831085205 2023-01-22 10:37:39.200453: step: 564/466, loss: 0.2967619001865387 2023-01-22 10:37:39.774913: step: 566/466, loss: 0.23424719274044037 2023-01-22 10:37:40.332642: step: 568/466, loss: 0.16903695464134216 2023-01-22 10:37:40.910504: step: 570/466, loss: 0.2578504979610443 2023-01-22 10:37:41.504834: step: 572/466, loss: 0.1446860134601593 2023-01-22 10:37:42.087644: step: 574/466, loss: 0.09747033566236496 2023-01-22 10:37:42.635731: step: 576/466, loss: 0.7096918225288391 2023-01-22 10:37:43.196447: step: 578/466, loss: 0.14603278040885925 2023-01-22 10:37:43.769287: step: 580/466, loss: 0.2655777037143707 2023-01-22 10:37:44.384644: step: 582/466, loss: 0.1466868370771408 2023-01-22 10:37:44.918482: step: 584/466, loss: 0.1403120905160904 2023-01-22 10:37:45.563817: step: 586/466, loss: 0.14304976165294647 2023-01-22 10:37:46.144677: step: 588/466, loss: 0.22928020358085632 2023-01-22 10:37:46.753894: step: 590/466, loss: 0.5682132840156555 2023-01-22 10:37:47.433384: step: 592/466, loss: 0.11868242174386978 2023-01-22 10:37:48.100257: step: 594/466, loss: 0.9860707521438599 2023-01-22 10:37:48.734909: step: 596/466, loss: 0.6569647192955017 2023-01-22 10:37:49.391161: step: 598/466, loss: 0.4909774363040924 2023-01-22 10:37:49.951043: step: 600/466, loss: 0.35181015729904175 2023-01-22 10:37:50.549139: step: 602/466, loss: 0.7969850301742554 2023-01-22 10:37:51.211309: step: 604/466, loss: 0.23063130676746368 2023-01-22 10:37:51.825483: step: 606/466, loss: 0.44911569356918335 2023-01-22 10:37:52.389901: step: 608/466, loss: 0.07528708875179291 2023-01-22 10:37:52.997452: step: 610/466, loss: 0.18028683960437775 2023-01-22 10:37:53.619669: step: 612/466, loss: 0.06006666645407677 2023-01-22 10:37:54.170477: step: 614/466, loss: 0.3530943989753723 2023-01-22 10:37:54.736911: step: 616/466, loss: 0.31971678137779236 2023-01-22 10:37:55.331184: step: 618/466, loss: 0.6333480477333069 2023-01-22 10:37:55.923814: step: 620/466, loss: 0.35352540016174316 2023-01-22 10:37:56.515283: step: 622/466, loss: 0.6284568309783936 2023-01-22 10:37:57.241596: step: 624/466, loss: 0.11288725584745407 2023-01-22 10:37:57.807134: step: 626/466, loss: 0.07124555110931396 2023-01-22 10:37:58.406425: step: 628/466, loss: 0.05913181230425835 2023-01-22 10:37:59.027493: step: 630/466, loss: 0.18568114936351776 2023-01-22 10:37:59.684894: step: 632/466, loss: 0.4419129490852356 2023-01-22 10:38:00.255850: step: 634/466, loss: 0.22381986677646637 2023-01-22 10:38:00.856390: step: 636/466, loss: 1.7939949035644531 2023-01-22 10:38:01.480721: step: 638/466, loss: 0.3469640910625458 2023-01-22 10:38:02.118857: step: 640/466, loss: 0.15622952580451965 2023-01-22 10:38:02.723156: step: 642/466, loss: 0.3314429223537445 2023-01-22 10:38:03.400268: step: 644/466, loss: 0.17653773725032806 2023-01-22 10:38:03.995299: step: 646/466, loss: 0.3294999897480011 2023-01-22 10:38:04.578918: step: 648/466, loss: 1.521822452545166 2023-01-22 10:38:05.208851: step: 650/466, loss: 0.4291199743747711 2023-01-22 10:38:05.875287: step: 652/466, loss: 0.26721054315567017 2023-01-22 10:38:06.505340: step: 654/466, loss: 0.2262919545173645 2023-01-22 10:38:07.078719: step: 656/466, loss: 0.16066038608551025 2023-01-22 10:38:07.580348: step: 658/466, loss: 0.22145728766918182 2023-01-22 10:38:08.227320: step: 660/466, loss: 0.4337965250015259 2023-01-22 10:38:08.884691: step: 662/466, loss: 0.25202491879463196 2023-01-22 10:38:09.546517: step: 664/466, loss: 0.131889209151268 2023-01-22 10:38:10.121690: step: 666/466, loss: 0.3332974314689636 2023-01-22 10:38:10.659329: step: 668/466, loss: 0.19245274364948273 2023-01-22 10:38:11.253459: step: 670/466, loss: 0.41981491446495056 2023-01-22 10:38:11.841531: step: 672/466, loss: 0.503174364566803 2023-01-22 10:38:12.424457: step: 674/466, loss: 1.4129886627197266 2023-01-22 10:38:13.075806: step: 676/466, loss: 0.10373762249946594 2023-01-22 10:38:13.703304: step: 678/466, loss: 0.2581503093242645 2023-01-22 10:38:14.326069: step: 680/466, loss: 0.7148119211196899 2023-01-22 10:38:14.963573: step: 682/466, loss: 0.43798357248306274 2023-01-22 10:38:15.526070: step: 684/466, loss: 0.36510202288627625 2023-01-22 10:38:16.133539: step: 686/466, loss: 1.9389971494674683 2023-01-22 10:38:16.709675: step: 688/466, loss: 0.08822230994701385 2023-01-22 10:38:17.237299: step: 690/466, loss: 0.20560045540332794 2023-01-22 10:38:17.795403: step: 692/466, loss: 0.2151784747838974 2023-01-22 10:38:18.358402: step: 694/466, loss: 0.12266770005226135 2023-01-22 10:38:19.028880: step: 696/466, loss: 0.15356658399105072 2023-01-22 10:38:19.655791: step: 698/466, loss: 0.7142479419708252 2023-01-22 10:38:20.327943: step: 700/466, loss: 0.2554255723953247 2023-01-22 10:38:20.921171: step: 702/466, loss: 0.15858277678489685 2023-01-22 10:38:21.540009: step: 704/466, loss: 0.4641050696372986 2023-01-22 10:38:22.137556: step: 706/466, loss: 0.9413942098617554 2023-01-22 10:38:22.732678: step: 708/466, loss: 0.16942070424556732 2023-01-22 10:38:23.382592: step: 710/466, loss: 0.13268499076366425 2023-01-22 10:38:23.990037: step: 712/466, loss: 0.365239679813385 2023-01-22 10:38:24.619904: step: 714/466, loss: 0.6016151309013367 2023-01-22 10:38:25.199477: step: 716/466, loss: 0.29063844680786133 2023-01-22 10:38:25.786187: step: 718/466, loss: 0.4106799364089966 2023-01-22 10:38:26.403388: step: 720/466, loss: 0.146284282207489 2023-01-22 10:38:27.093983: step: 722/466, loss: 0.17649759352207184 2023-01-22 10:38:27.649754: step: 724/466, loss: 0.2214135378599167 2023-01-22 10:38:28.250433: step: 726/466, loss: 0.9062934517860413 2023-01-22 10:38:28.925863: step: 728/466, loss: 0.8165644407272339 2023-01-22 10:38:29.511703: step: 730/466, loss: 0.12466230988502502 2023-01-22 10:38:30.101690: step: 732/466, loss: 0.22552810609340668 2023-01-22 10:38:30.780432: step: 734/466, loss: 0.45871126651763916 2023-01-22 10:38:31.385629: step: 736/466, loss: 0.779687225818634 2023-01-22 10:38:31.982050: step: 738/466, loss: 0.35549259185791016 2023-01-22 10:38:32.626585: step: 740/466, loss: 0.19906482100486755 2023-01-22 10:38:33.234042: step: 742/466, loss: 0.21490739285945892 2023-01-22 10:38:33.814886: step: 744/466, loss: 0.3891027569770813 2023-01-22 10:38:34.416299: step: 746/466, loss: 0.16956500709056854 2023-01-22 10:38:35.020601: step: 748/466, loss: 0.34901368618011475 2023-01-22 10:38:35.577108: step: 750/466, loss: 0.255240261554718 2023-01-22 10:38:36.177364: step: 752/466, loss: 0.5560647249221802 2023-01-22 10:38:36.735241: step: 754/466, loss: 0.3323211073875427 2023-01-22 10:38:37.393378: step: 756/466, loss: 0.24396903812885284 2023-01-22 10:38:38.054337: step: 758/466, loss: 0.49413108825683594 2023-01-22 10:38:38.642817: step: 760/466, loss: 0.35466375946998596 2023-01-22 10:38:39.311088: step: 762/466, loss: 0.18038402497768402 2023-01-22 10:38:39.946051: step: 764/466, loss: 0.29585492610931396 2023-01-22 10:38:40.568132: step: 766/466, loss: 0.48583555221557617 2023-01-22 10:38:41.143479: step: 768/466, loss: 0.2323603630065918 2023-01-22 10:38:41.701715: step: 770/466, loss: 0.22142218053340912 2023-01-22 10:38:42.274998: step: 772/466, loss: 0.45981019735336304 2023-01-22 10:38:42.834310: step: 774/466, loss: 0.70528644323349 2023-01-22 10:38:43.439884: step: 776/466, loss: 0.4367358684539795 2023-01-22 10:38:44.036879: step: 778/466, loss: 0.16971944272518158 2023-01-22 10:38:44.670920: step: 780/466, loss: 0.5198857188224792 2023-01-22 10:38:45.333428: step: 782/466, loss: 0.37577539682388306 2023-01-22 10:38:45.952877: step: 784/466, loss: 0.9556046724319458 2023-01-22 10:38:46.587483: step: 786/466, loss: 0.3785717785358429 2023-01-22 10:38:47.213351: step: 788/466, loss: 0.2823404371738434 2023-01-22 10:38:47.813519: step: 790/466, loss: 2.3219895362854004 2023-01-22 10:38:48.432013: step: 792/466, loss: 0.31830859184265137 2023-01-22 10:38:48.992953: step: 794/466, loss: 0.29214802384376526 2023-01-22 10:38:49.562195: step: 796/466, loss: 0.17592476308345795 2023-01-22 10:38:50.164849: step: 798/466, loss: 0.7485669851303101 2023-01-22 10:38:50.793135: step: 800/466, loss: 0.12755541503429413 2023-01-22 10:38:51.380660: step: 802/466, loss: 0.28463253378868103 2023-01-22 10:38:51.974431: step: 804/466, loss: 0.5147067308425903 2023-01-22 10:38:52.657452: step: 806/466, loss: 0.3051072061061859 2023-01-22 10:38:53.283077: step: 808/466, loss: 1.1821234226226807 2023-01-22 10:38:53.899831: step: 810/466, loss: 0.9276929497718811 2023-01-22 10:38:54.599768: step: 812/466, loss: 0.4206128418445587 2023-01-22 10:38:55.198081: step: 814/466, loss: 0.5495182275772095 2023-01-22 10:38:55.845037: step: 816/466, loss: 0.21994075179100037 2023-01-22 10:38:56.390093: step: 818/466, loss: 0.15149745345115662 2023-01-22 10:38:57.125902: step: 820/466, loss: 1.3844549655914307 2023-01-22 10:38:57.680200: step: 822/466, loss: 2.7750017642974854 2023-01-22 10:38:58.312043: step: 824/466, loss: 0.33474108576774597 2023-01-22 10:38:58.911635: step: 826/466, loss: 0.11480654776096344 2023-01-22 10:38:59.538166: step: 828/466, loss: 0.3044185936450958 2023-01-22 10:39:00.158902: step: 830/466, loss: 0.31660595536231995 2023-01-22 10:39:00.738844: step: 832/466, loss: 0.8432048559188843 2023-01-22 10:39:01.378223: step: 834/466, loss: 3.6031930446624756 2023-01-22 10:39:02.126445: step: 836/466, loss: 0.1622489094734192 2023-01-22 10:39:02.795548: step: 838/466, loss: 0.3535557985305786 2023-01-22 10:39:03.394705: step: 840/466, loss: 0.377373069524765 2023-01-22 10:39:04.034551: step: 842/466, loss: 0.31769198179244995 2023-01-22 10:39:04.635834: step: 844/466, loss: 0.19945383071899414 2023-01-22 10:39:05.225817: step: 846/466, loss: 0.18304133415222168 2023-01-22 10:39:05.848254: step: 848/466, loss: 0.07066892832517624 2023-01-22 10:39:06.448481: step: 850/466, loss: 0.3190172612667084 2023-01-22 10:39:07.168634: step: 852/466, loss: 0.19037355482578278 2023-01-22 10:39:07.805201: step: 854/466, loss: 1.0925954580307007 2023-01-22 10:39:08.473723: step: 856/466, loss: 0.07573944330215454 2023-01-22 10:39:09.069765: step: 858/466, loss: 0.2416946142911911 2023-01-22 10:39:09.698248: step: 860/466, loss: 0.1962304413318634 2023-01-22 10:39:10.308505: step: 862/466, loss: 0.5230595469474792 2023-01-22 10:39:10.915784: step: 864/466, loss: 1.2808942794799805 2023-01-22 10:39:11.468949: step: 866/466, loss: 0.1189044788479805 2023-01-22 10:39:12.101534: step: 868/466, loss: 0.35028746724128723 2023-01-22 10:39:12.702257: step: 870/466, loss: 0.15857447683811188 2023-01-22 10:39:13.352622: step: 872/466, loss: 0.30284059047698975 2023-01-22 10:39:13.922422: step: 874/466, loss: 0.15418531000614166 2023-01-22 10:39:14.465541: step: 876/466, loss: 0.22523635625839233 2023-01-22 10:39:15.033240: step: 878/466, loss: 0.6470255255699158 2023-01-22 10:39:15.658469: step: 880/466, loss: 0.13579440116882324 2023-01-22 10:39:16.258911: step: 882/466, loss: 0.27756914496421814 2023-01-22 10:39:16.852814: step: 884/466, loss: 0.2099146693944931 2023-01-22 10:39:17.553400: step: 886/466, loss: 0.8106641173362732 2023-01-22 10:39:18.085109: step: 888/466, loss: 0.37550458312034607 2023-01-22 10:39:18.771061: step: 890/466, loss: 0.31646865606307983 2023-01-22 10:39:19.417277: step: 892/466, loss: 0.46495404839515686 2023-01-22 10:39:20.012725: step: 894/466, loss: 0.48244810104370117 2023-01-22 10:39:20.606324: step: 896/466, loss: 0.21186278760433197 2023-01-22 10:39:21.242111: step: 898/466, loss: 0.17754632234573364 2023-01-22 10:39:21.887682: step: 900/466, loss: 0.16431452333927155 2023-01-22 10:39:22.609948: step: 902/466, loss: 0.7147395610809326 2023-01-22 10:39:23.206531: step: 904/466, loss: 0.5099537372589111 2023-01-22 10:39:23.807318: step: 906/466, loss: 0.5110349655151367 2023-01-22 10:39:24.370781: step: 908/466, loss: 1.6138478517532349 2023-01-22 10:39:24.941714: step: 910/466, loss: 0.21494626998901367 2023-01-22 10:39:25.541886: step: 912/466, loss: 0.9132940769195557 2023-01-22 10:39:26.117525: step: 914/466, loss: 0.1571851521730423 2023-01-22 10:39:26.744971: step: 916/466, loss: 0.20044052600860596 2023-01-22 10:39:27.389371: step: 918/466, loss: 0.1431189775466919 2023-01-22 10:39:27.966219: step: 920/466, loss: 0.4292384088039398 2023-01-22 10:39:28.659152: step: 922/466, loss: 0.15444906055927277 2023-01-22 10:39:29.286366: step: 924/466, loss: 0.9784746170043945 2023-01-22 10:39:29.870396: step: 926/466, loss: 0.6480464339256287 2023-01-22 10:39:30.532127: step: 928/466, loss: 0.6187877655029297 2023-01-22 10:39:31.172094: step: 930/466, loss: 0.22937346994876862 2023-01-22 10:39:31.808718: step: 932/466, loss: 0.43938004970550537 ================================================== Loss: 0.429 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28702471875376667, 'r': 0.35074747415071295, 'f1': 0.3157026795515384}, 'combined': 0.23262302703797566, 'epoch': 9} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3328731910807604, 'r': 0.2982889634360061, 'f1': 0.3146335641722256}, 'combined': 0.20866889230075064, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.28435316502936847, 'r': 0.292969927606016, 'f1': 0.288597242119359}, 'combined': 0.192398161412906, 'epoch': 9} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3439593580923461, 'r': 0.2802300917445002, 'f1': 0.3088413701955131}, 'combined': 0.20155963107496644, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2814701218024629, 'r': 0.3551757703958972, 'f1': 0.3140564278500635}, 'combined': 0.23140999946846783, 'epoch': 9} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3308734183854636, 'r': 0.29166091118926246, 'f1': 0.3100321961699194}, 'combined': 0.2056172078225372, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2212854349951124, 'r': 0.39199134199134195, 'f1': 0.28288034989065913}, 'combined': 0.1885868999271061, 'epoch': 9} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3409090909090909, 'r': 0.32608695652173914, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.2413793103448276, 'f1': 0.28}, 'combined': 0.18666666666666668, 'epoch': 9} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2879032258064516, 'r': 0.2704545454545455, 'f1': 0.2789062500000001}, 'combined': 0.18593750000000003, 'epoch': 8} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.37654638536275015, 'r': 0.2927250157134063, 'f1': 0.3293866924386776}, 'combined': 0.21496815717050533, 'epoch': 8} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4868421052631579, 'r': 0.40217391304347827, 'f1': 0.44047619047619047}, 'combined': 0.2936507936507936, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2814701218024629, 'r': 0.3551757703958972, 'f1': 0.3140564278500635}, 'combined': 0.23140999946846783, 'epoch': 9} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3308734183854636, 'r': 0.29166091118926246, 'f1': 0.3100321961699194}, 'combined': 0.2056172078225372, 'epoch': 9} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.2413793103448276, 'f1': 0.28}, 'combined': 0.18666666666666668, 'epoch': 9} ****************************** Epoch: 10 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:42:04.326953: step: 2/466, loss: 0.3424968421459198 2023-01-22 10:42:04.950844: step: 4/466, loss: 0.15314273536205292 2023-01-22 10:42:05.519058: step: 6/466, loss: 0.16525211930274963 2023-01-22 10:42:06.189350: step: 8/466, loss: 0.08957090228796005 2023-01-22 10:42:06.798983: step: 10/466, loss: 0.1828995943069458 2023-01-22 10:42:07.405442: step: 12/466, loss: 0.5272262096405029 2023-01-22 10:42:07.943817: step: 14/466, loss: 0.3937443494796753 2023-01-22 10:42:08.533568: step: 16/466, loss: 0.22258463501930237 2023-01-22 10:42:09.127892: step: 18/466, loss: 0.35958412289619446 2023-01-22 10:42:09.747719: step: 20/466, loss: 1.7579293251037598 2023-01-22 10:42:10.305733: step: 22/466, loss: 0.6147506833076477 2023-01-22 10:42:10.889071: step: 24/466, loss: 0.561853289604187 2023-01-22 10:42:11.510810: step: 26/466, loss: 0.08265829086303711 2023-01-22 10:42:12.147303: step: 28/466, loss: 0.1963891237974167 2023-01-22 10:42:12.809009: step: 30/466, loss: 0.15613555908203125 2023-01-22 10:42:13.408267: step: 32/466, loss: 0.14221565425395966 2023-01-22 10:42:14.034332: step: 34/466, loss: 0.11288049817085266 2023-01-22 10:42:14.641461: step: 36/466, loss: 0.23729105293750763 2023-01-22 10:42:15.283093: step: 38/466, loss: 0.41222819685935974 2023-01-22 10:42:15.873498: step: 40/466, loss: 0.1543423980474472 2023-01-22 10:42:16.435667: step: 42/466, loss: 0.10196036100387573 2023-01-22 10:42:17.038913: step: 44/466, loss: 0.1147770956158638 2023-01-22 10:42:17.653362: step: 46/466, loss: 0.09728725999593735 2023-01-22 10:42:18.261234: step: 48/466, loss: 0.08855711668729782 2023-01-22 10:42:18.927272: step: 50/466, loss: 0.10126923024654388 2023-01-22 10:42:19.541475: step: 52/466, loss: 0.44507497549057007 2023-01-22 10:42:20.127449: step: 54/466, loss: 0.31747809052467346 2023-01-22 10:42:20.677847: step: 56/466, loss: 0.2689839005470276 2023-01-22 10:42:21.271592: step: 58/466, loss: 0.41009822487831116 2023-01-22 10:42:21.879586: step: 60/466, loss: 0.09176215529441833 2023-01-22 10:42:22.476956: step: 62/466, loss: 0.24102617800235748 2023-01-22 10:42:23.080071: step: 64/466, loss: 0.08780214190483093 2023-01-22 10:42:23.681593: step: 66/466, loss: 0.13865149021148682 2023-01-22 10:42:24.307016: step: 68/466, loss: 0.13049353659152985 2023-01-22 10:42:24.921455: step: 70/466, loss: 0.292896032333374 2023-01-22 10:42:25.548637: step: 72/466, loss: 0.2684376835823059 2023-01-22 10:42:26.242145: step: 74/466, loss: 0.2586406171321869 2023-01-22 10:42:26.834593: step: 76/466, loss: 0.14662769436836243 2023-01-22 10:42:27.481475: step: 78/466, loss: 0.18522438406944275 2023-01-22 10:42:28.067258: step: 80/466, loss: 0.02016591839492321 2023-01-22 10:42:28.692926: step: 82/466, loss: 0.37746357917785645 2023-01-22 10:42:29.344697: step: 84/466, loss: 0.519001305103302 2023-01-22 10:42:29.988609: step: 86/466, loss: 0.126045823097229 2023-01-22 10:42:30.636752: step: 88/466, loss: 0.21811991930007935 2023-01-22 10:42:31.198327: step: 90/466, loss: 0.7372027635574341 2023-01-22 10:42:31.756765: step: 92/466, loss: 0.10083332657814026 2023-01-22 10:42:32.407590: step: 94/466, loss: 0.1068217009305954 2023-01-22 10:42:33.104843: step: 96/466, loss: 0.04024646803736687 2023-01-22 10:42:33.708058: step: 98/466, loss: 0.40056195855140686 2023-01-22 10:42:34.315301: step: 100/466, loss: 0.15211622416973114 2023-01-22 10:42:34.938805: step: 102/466, loss: 0.3278835415840149 2023-01-22 10:42:35.540318: step: 104/466, loss: 0.08469592779874802 2023-01-22 10:42:36.149502: step: 106/466, loss: 0.2144700288772583 2023-01-22 10:42:36.774252: step: 108/466, loss: 0.16949813067913055 2023-01-22 10:42:37.371191: step: 110/466, loss: 0.9435144662857056 2023-01-22 10:42:37.936607: step: 112/466, loss: 0.19128726422786713 2023-01-22 10:42:38.560878: step: 114/466, loss: 0.6171504259109497 2023-01-22 10:42:39.127426: step: 116/466, loss: 0.17125974595546722 2023-01-22 10:42:39.762741: step: 118/466, loss: 0.1719478815793991 2023-01-22 10:42:40.349195: step: 120/466, loss: 0.15581001341342926 2023-01-22 10:42:40.997856: step: 122/466, loss: 0.1323140412569046 2023-01-22 10:42:41.658955: step: 124/466, loss: 0.03781604394316673 2023-01-22 10:42:42.206571: step: 126/466, loss: 0.14860422909259796 2023-01-22 10:42:42.853719: step: 128/466, loss: 0.15132874250411987 2023-01-22 10:42:43.450050: step: 130/466, loss: 0.282259076833725 2023-01-22 10:42:43.990107: step: 132/466, loss: 0.6195818781852722 2023-01-22 10:42:44.532329: step: 134/466, loss: 0.14780747890472412 2023-01-22 10:42:45.174141: step: 136/466, loss: 0.19852077960968018 2023-01-22 10:42:45.756199: step: 138/466, loss: 3.3535280227661133 2023-01-22 10:42:46.380744: step: 140/466, loss: 0.3124248683452606 2023-01-22 10:42:46.966087: step: 142/466, loss: 0.50434410572052 2023-01-22 10:42:47.623102: step: 144/466, loss: 0.19816236197948456 2023-01-22 10:42:48.266679: step: 146/466, loss: 0.1461830735206604 2023-01-22 10:42:48.887471: step: 148/466, loss: 0.4960712194442749 2023-01-22 10:42:49.516616: step: 150/466, loss: 0.12675248086452484 2023-01-22 10:42:50.120655: step: 152/466, loss: 0.0832248106598854 2023-01-22 10:42:50.700586: step: 154/466, loss: 0.7244929075241089 2023-01-22 10:42:51.302689: step: 156/466, loss: 0.20706027746200562 2023-01-22 10:42:51.980940: step: 158/466, loss: 0.06518436223268509 2023-01-22 10:42:52.587633: step: 160/466, loss: 0.2813121974468231 2023-01-22 10:42:53.214583: step: 162/466, loss: 0.25889384746551514 2023-01-22 10:42:53.790580: step: 164/466, loss: 0.26677221059799194 2023-01-22 10:42:54.397000: step: 166/466, loss: 0.1657583862543106 2023-01-22 10:42:55.050400: step: 168/466, loss: 0.38243743777275085 2023-01-22 10:42:55.651681: step: 170/466, loss: 0.33843281865119934 2023-01-22 10:42:56.289419: step: 172/466, loss: 0.28743040561676025 2023-01-22 10:42:56.850819: step: 174/466, loss: 0.2466418594121933 2023-01-22 10:42:57.467316: step: 176/466, loss: 0.2965812087059021 2023-01-22 10:42:58.039481: step: 178/466, loss: 0.13026443123817444 2023-01-22 10:42:58.595347: step: 180/466, loss: 0.45092785358428955 2023-01-22 10:42:59.204908: step: 182/466, loss: 0.1328914910554886 2023-01-22 10:42:59.785868: step: 184/466, loss: 0.41911742091178894 2023-01-22 10:43:00.358966: step: 186/466, loss: 0.11339353024959564 2023-01-22 10:43:00.951107: step: 188/466, loss: 0.5373882055282593 2023-01-22 10:43:01.557867: step: 190/466, loss: 0.23385509848594666 2023-01-22 10:43:02.210441: step: 192/466, loss: 0.21484020352363586 2023-01-22 10:43:02.821448: step: 194/466, loss: 0.15819811820983887 2023-01-22 10:43:03.434491: step: 196/466, loss: 0.40069326758384705 2023-01-22 10:43:04.005238: step: 198/466, loss: 0.09636548161506653 2023-01-22 10:43:04.658577: step: 200/466, loss: 0.19350042939186096 2023-01-22 10:43:05.237599: step: 202/466, loss: 0.11633653938770294 2023-01-22 10:43:05.817679: step: 204/466, loss: 0.12276885658502579 2023-01-22 10:43:06.426167: step: 206/466, loss: 0.1648620367050171 2023-01-22 10:43:06.968452: step: 208/466, loss: 0.23498764634132385 2023-01-22 10:43:07.609209: step: 210/466, loss: 0.23746566474437714 2023-01-22 10:43:08.236885: step: 212/466, loss: 0.5610257387161255 2023-01-22 10:43:08.796492: step: 214/466, loss: 0.20346219837665558 2023-01-22 10:43:09.392182: step: 216/466, loss: 0.8227632641792297 2023-01-22 10:43:10.006669: step: 218/466, loss: 0.09616819769144058 2023-01-22 10:43:10.707732: step: 220/466, loss: 0.23829485476016998 2023-01-22 10:43:11.482842: step: 222/466, loss: 0.08007515221834183 2023-01-22 10:43:12.083613: step: 224/466, loss: 0.16075478494167328 2023-01-22 10:43:12.729219: step: 226/466, loss: 0.19215460121631622 2023-01-22 10:43:13.455668: step: 228/466, loss: 0.20689161121845245 2023-01-22 10:43:14.116472: step: 230/466, loss: 0.36305904388427734 2023-01-22 10:43:14.786335: step: 232/466, loss: 0.47161346673965454 2023-01-22 10:43:15.367503: step: 234/466, loss: 0.11965955048799515 2023-01-22 10:43:16.017504: step: 236/466, loss: 0.15132421255111694 2023-01-22 10:43:16.578546: step: 238/466, loss: 0.20713631808757782 2023-01-22 10:43:17.163161: step: 240/466, loss: 0.10391706228256226 2023-01-22 10:43:17.760729: step: 242/466, loss: 0.16056931018829346 2023-01-22 10:43:18.378911: step: 244/466, loss: 0.09839039295911789 2023-01-22 10:43:18.969372: step: 246/466, loss: 0.12909168004989624 2023-01-22 10:43:19.542536: step: 248/466, loss: 0.15248292684555054 2023-01-22 10:43:20.142418: step: 250/466, loss: 0.06441118568181992 2023-01-22 10:43:20.754421: step: 252/466, loss: 0.3634990453720093 2023-01-22 10:43:21.381896: step: 254/466, loss: 0.12455514073371887 2023-01-22 10:43:22.079142: step: 256/466, loss: 0.4242327809333801 2023-01-22 10:43:22.712058: step: 258/466, loss: 0.2633454501628876 2023-01-22 10:43:23.303287: step: 260/466, loss: 0.10316567122936249 2023-01-22 10:43:23.851829: step: 262/466, loss: 0.4540386199951172 2023-01-22 10:43:24.412347: step: 264/466, loss: 0.08635711669921875 2023-01-22 10:43:25.061896: step: 266/466, loss: 0.6375790238380432 2023-01-22 10:43:25.679169: step: 268/466, loss: 0.2528059780597687 2023-01-22 10:43:26.268929: step: 270/466, loss: 0.12142959237098694 2023-01-22 10:43:26.841223: step: 272/466, loss: 0.9206447005271912 2023-01-22 10:43:27.444493: step: 274/466, loss: 0.41452789306640625 2023-01-22 10:43:28.076295: step: 276/466, loss: 0.17503076791763306 2023-01-22 10:43:28.671115: step: 278/466, loss: 0.2776041328907013 2023-01-22 10:43:29.254685: step: 280/466, loss: 0.10080447047948837 2023-01-22 10:43:29.812654: step: 282/466, loss: 0.48633429408073425 2023-01-22 10:43:30.380556: step: 284/466, loss: 0.12377966940402985 2023-01-22 10:43:31.047881: step: 286/466, loss: 0.19372420012950897 2023-01-22 10:43:31.650987: step: 288/466, loss: 0.8181790709495544 2023-01-22 10:43:32.258599: step: 290/466, loss: 0.2616439461708069 2023-01-22 10:43:32.938393: step: 292/466, loss: 0.558844804763794 2023-01-22 10:43:33.546630: step: 294/466, loss: 0.4766530990600586 2023-01-22 10:43:34.123048: step: 296/466, loss: 0.22030682861804962 2023-01-22 10:43:34.729980: step: 298/466, loss: 0.2769697904586792 2023-01-22 10:43:35.251755: step: 300/466, loss: 0.1449180245399475 2023-01-22 10:43:35.893966: step: 302/466, loss: 0.11370931565761566 2023-01-22 10:43:36.538894: step: 304/466, loss: 0.5098021626472473 2023-01-22 10:43:37.223790: step: 306/466, loss: 0.9076952338218689 2023-01-22 10:43:37.850784: step: 308/466, loss: 0.2744629979133606 2023-01-22 10:43:38.444907: step: 310/466, loss: 0.1508442908525467 2023-01-22 10:43:39.052652: step: 312/466, loss: 0.5850167274475098 2023-01-22 10:43:39.678952: step: 314/466, loss: 0.17930437624454498 2023-01-22 10:43:40.263747: step: 316/466, loss: 1.0136152505874634 2023-01-22 10:43:40.918677: step: 318/466, loss: 0.31405845284461975 2023-01-22 10:43:41.472067: step: 320/466, loss: 0.17222459614276886 2023-01-22 10:43:42.111687: step: 322/466, loss: 0.4613257944583893 2023-01-22 10:43:42.751422: step: 324/466, loss: 0.677399754524231 2023-01-22 10:43:43.379630: step: 326/466, loss: 0.26653000712394714 2023-01-22 10:43:44.033070: step: 328/466, loss: 0.6625899076461792 2023-01-22 10:43:44.633334: step: 330/466, loss: 0.14762520790100098 2023-01-22 10:43:45.319500: step: 332/466, loss: 0.2265157699584961 2023-01-22 10:43:45.924472: step: 334/466, loss: 0.18762357532978058 2023-01-22 10:43:46.553755: step: 336/466, loss: 0.1634010672569275 2023-01-22 10:43:47.113873: step: 338/466, loss: 0.17027297616004944 2023-01-22 10:43:47.670788: step: 340/466, loss: 0.23633556067943573 2023-01-22 10:43:48.271326: step: 342/466, loss: 0.1393851935863495 2023-01-22 10:43:48.928279: step: 344/466, loss: 0.276138037443161 2023-01-22 10:43:49.514546: step: 346/466, loss: 0.22308233380317688 2023-01-22 10:43:50.089750: step: 348/466, loss: 0.0407722033560276 2023-01-22 10:43:50.690289: step: 350/466, loss: 0.15903085470199585 2023-01-22 10:43:51.397169: step: 352/466, loss: 0.141182079911232 2023-01-22 10:43:52.076182: step: 354/466, loss: 0.11356158554553986 2023-01-22 10:43:52.683395: step: 356/466, loss: 0.11174963414669037 2023-01-22 10:43:53.223769: step: 358/466, loss: 0.1275566965341568 2023-01-22 10:43:53.835031: step: 360/466, loss: 0.33931320905685425 2023-01-22 10:43:54.433987: step: 362/466, loss: 0.3848154842853546 2023-01-22 10:43:55.052751: step: 364/466, loss: 0.1476249396800995 2023-01-22 10:43:55.643078: step: 366/466, loss: 0.6467593908309937 2023-01-22 10:43:56.249160: step: 368/466, loss: 0.48849937319755554 2023-01-22 10:43:56.884239: step: 370/466, loss: 0.2312539666891098 2023-01-22 10:43:57.478448: step: 372/466, loss: 0.40469416975975037 2023-01-22 10:43:58.117585: step: 374/466, loss: 0.8057838082313538 2023-01-22 10:43:58.758923: step: 376/466, loss: 0.1687404364347458 2023-01-22 10:43:59.426165: step: 378/466, loss: 0.36339056491851807 2023-01-22 10:44:00.011558: step: 380/466, loss: 0.15135253965854645 2023-01-22 10:44:00.676297: step: 382/466, loss: 0.7757834792137146 2023-01-22 10:44:01.326578: step: 384/466, loss: 0.19134391844272614 2023-01-22 10:44:01.922711: step: 386/466, loss: 0.2958052158355713 2023-01-22 10:44:02.535290: step: 388/466, loss: 0.2449759989976883 2023-01-22 10:44:03.140766: step: 390/466, loss: 0.19907286763191223 2023-01-22 10:44:03.734216: step: 392/466, loss: 0.28933587670326233 2023-01-22 10:44:04.333198: step: 394/466, loss: 0.3133198320865631 2023-01-22 10:44:04.895944: step: 396/466, loss: 0.30926313996315 2023-01-22 10:44:05.568896: step: 398/466, loss: 0.3633095324039459 2023-01-22 10:44:06.117648: step: 400/466, loss: 0.20471030473709106 2023-01-22 10:44:06.697429: step: 402/466, loss: 0.2586519420146942 2023-01-22 10:44:07.332900: step: 404/466, loss: 0.24502499401569366 2023-01-22 10:44:07.951597: step: 406/466, loss: 0.23827120661735535 2023-01-22 10:44:08.514851: step: 408/466, loss: 0.23574049770832062 2023-01-22 10:44:09.144239: step: 410/466, loss: 0.14450325071811676 2023-01-22 10:44:09.748428: step: 412/466, loss: 0.8013572096824646 2023-01-22 10:44:10.396661: step: 414/466, loss: 0.1458197832107544 2023-01-22 10:44:10.968948: step: 416/466, loss: 0.5983514785766602 2023-01-22 10:44:11.576837: step: 418/466, loss: 0.5537610650062561 2023-01-22 10:44:12.163375: step: 420/466, loss: 0.3009760081768036 2023-01-22 10:44:12.788170: step: 422/466, loss: 0.911166787147522 2023-01-22 10:44:13.451846: step: 424/466, loss: 0.15086902678012848 2023-01-22 10:44:14.040088: step: 426/466, loss: 0.1715857833623886 2023-01-22 10:44:14.642444: step: 428/466, loss: 0.861497700214386 2023-01-22 10:44:15.289509: step: 430/466, loss: 0.25514018535614014 2023-01-22 10:44:15.874519: step: 432/466, loss: 0.2692078649997711 2023-01-22 10:44:16.491189: step: 434/466, loss: 0.556423544883728 2023-01-22 10:44:17.165853: step: 436/466, loss: 0.17007210850715637 2023-01-22 10:44:17.826811: step: 438/466, loss: 0.30793359875679016 2023-01-22 10:44:18.450811: step: 440/466, loss: 0.22319169342517853 2023-01-22 10:44:19.089318: step: 442/466, loss: 0.20929493010044098 2023-01-22 10:44:19.789362: step: 444/466, loss: 0.15269042551517487 2023-01-22 10:44:20.425456: step: 446/466, loss: 0.2763131856918335 2023-01-22 10:44:21.068260: step: 448/466, loss: 0.23531506955623627 2023-01-22 10:44:21.680036: step: 450/466, loss: 0.07401667535305023 2023-01-22 10:44:22.300860: step: 452/466, loss: 0.6907615661621094 2023-01-22 10:44:22.908503: step: 454/466, loss: 0.1963689923286438 2023-01-22 10:44:23.503338: step: 456/466, loss: 0.18842215836048126 2023-01-22 10:44:24.098172: step: 458/466, loss: 0.1701459288597107 2023-01-22 10:44:24.756049: step: 460/466, loss: 0.2941122353076935 2023-01-22 10:44:25.393160: step: 462/466, loss: 0.3442278802394867 2023-01-22 10:44:26.056105: step: 464/466, loss: 0.14135023951530457 2023-01-22 10:44:26.692860: step: 466/466, loss: 0.39910465478897095 2023-01-22 10:44:27.305804: step: 468/466, loss: 0.13927897810935974 2023-01-22 10:44:27.951065: step: 470/466, loss: 0.34257107973098755 2023-01-22 10:44:28.567387: step: 472/466, loss: 0.17023462057113647 2023-01-22 10:44:29.165595: step: 474/466, loss: 0.15634143352508545 2023-01-22 10:44:29.773246: step: 476/466, loss: 0.7175696492195129 2023-01-22 10:44:30.416936: step: 478/466, loss: 0.8162303566932678 2023-01-22 10:44:30.998275: step: 480/466, loss: 0.13726294040679932 2023-01-22 10:44:31.619724: step: 482/466, loss: 0.24070516228675842 2023-01-22 10:44:32.233898: step: 484/466, loss: 0.12580016255378723 2023-01-22 10:44:32.910939: step: 486/466, loss: 0.10945683717727661 2023-01-22 10:44:33.517411: step: 488/466, loss: 0.4962387979030609 2023-01-22 10:44:34.157303: step: 490/466, loss: 0.2147718220949173 2023-01-22 10:44:34.739232: step: 492/466, loss: 0.11244887858629227 2023-01-22 10:44:35.306319: step: 494/466, loss: 0.500853419303894 2023-01-22 10:44:35.926415: step: 496/466, loss: 0.3162543475627899 2023-01-22 10:44:36.549977: step: 498/466, loss: 0.13642749190330505 2023-01-22 10:44:37.181004: step: 500/466, loss: 0.19492042064666748 2023-01-22 10:44:37.813587: step: 502/466, loss: 0.4172359108924866 2023-01-22 10:44:38.420354: step: 504/466, loss: 0.13469327986240387 2023-01-22 10:44:39.022253: step: 506/466, loss: 0.36627480387687683 2023-01-22 10:44:39.626558: step: 508/466, loss: 0.19249746203422546 2023-01-22 10:44:40.254749: step: 510/466, loss: 0.24976111948490143 2023-01-22 10:44:40.854507: step: 512/466, loss: 0.6852096319198608 2023-01-22 10:44:41.433413: step: 514/466, loss: 0.2618003189563751 2023-01-22 10:44:42.031005: step: 516/466, loss: 0.189588725566864 2023-01-22 10:44:42.714978: step: 518/466, loss: 0.22583279013633728 2023-01-22 10:44:43.367208: step: 520/466, loss: 0.17520493268966675 2023-01-22 10:44:43.942013: step: 522/466, loss: 0.17015887796878815 2023-01-22 10:44:44.561548: step: 524/466, loss: 0.08240482211112976 2023-01-22 10:44:45.188606: step: 526/466, loss: 0.24475426971912384 2023-01-22 10:44:45.749661: step: 528/466, loss: 0.470389723777771 2023-01-22 10:44:46.293443: step: 530/466, loss: 0.0590042769908905 2023-01-22 10:44:46.896844: step: 532/466, loss: 0.37143149971961975 2023-01-22 10:44:47.535632: step: 534/466, loss: 0.14989838004112244 2023-01-22 10:44:48.156299: step: 536/466, loss: 0.2862009108066559 2023-01-22 10:44:48.794502: step: 538/466, loss: 0.26238441467285156 2023-01-22 10:44:49.404583: step: 540/466, loss: 0.19798694550991058 2023-01-22 10:44:49.973057: step: 542/466, loss: 0.1776721179485321 2023-01-22 10:44:50.594834: step: 544/466, loss: 0.13989011943340302 2023-01-22 10:44:51.234254: step: 546/466, loss: 0.26300859451293945 2023-01-22 10:44:51.787960: step: 548/466, loss: 0.1900997906923294 2023-01-22 10:44:52.378002: step: 550/466, loss: 0.19733339548110962 2023-01-22 10:44:52.978217: step: 552/466, loss: 0.10670574754476547 2023-01-22 10:44:53.568064: step: 554/466, loss: 0.23030467331409454 2023-01-22 10:44:54.178568: step: 556/466, loss: 0.15442197024822235 2023-01-22 10:44:54.813791: step: 558/466, loss: 0.46626031398773193 2023-01-22 10:44:55.389133: step: 560/466, loss: 0.17873597145080566 2023-01-22 10:44:56.029367: step: 562/466, loss: 0.08750782907009125 2023-01-22 10:44:56.622442: step: 564/466, loss: 0.7056400775909424 2023-01-22 10:44:57.234778: step: 566/466, loss: 0.1467796415090561 2023-01-22 10:44:57.838537: step: 568/466, loss: 0.1509588658809662 2023-01-22 10:44:58.463771: step: 570/466, loss: 0.2639663517475128 2023-01-22 10:44:59.127171: step: 572/466, loss: 0.17098958790302277 2023-01-22 10:44:59.670340: step: 574/466, loss: 0.06614339351654053 2023-01-22 10:45:00.303173: step: 576/466, loss: 0.330814003944397 2023-01-22 10:45:00.877209: step: 578/466, loss: 0.7567887902259827 2023-01-22 10:45:01.501723: step: 580/466, loss: 0.45156949758529663 2023-01-22 10:45:02.136633: step: 582/466, loss: 0.09206489473581314 2023-01-22 10:45:02.704342: step: 584/466, loss: 0.23979876935482025 2023-01-22 10:45:03.272882: step: 586/466, loss: 0.21902264654636383 2023-01-22 10:45:03.849066: step: 588/466, loss: 0.1813296377658844 2023-01-22 10:45:04.471063: step: 590/466, loss: 0.4988095462322235 2023-01-22 10:45:05.023223: step: 592/466, loss: 0.3031708300113678 2023-01-22 10:45:05.651535: step: 594/466, loss: 0.27922341227531433 2023-01-22 10:45:06.344629: step: 596/466, loss: 0.3294333815574646 2023-01-22 10:45:07.007619: step: 598/466, loss: 0.16292539238929749 2023-01-22 10:45:07.607271: step: 600/466, loss: 0.26730597019195557 2023-01-22 10:45:08.263529: step: 602/466, loss: 0.23192861676216125 2023-01-22 10:45:08.840101: step: 604/466, loss: 0.17797262966632843 2023-01-22 10:45:09.465099: step: 606/466, loss: 0.1849815845489502 2023-01-22 10:45:10.092017: step: 608/466, loss: 0.849368691444397 2023-01-22 10:45:10.760685: step: 610/466, loss: 0.17126432061195374 2023-01-22 10:45:11.333126: step: 612/466, loss: 0.1651676893234253 2023-01-22 10:45:11.935738: step: 614/466, loss: 0.15718361735343933 2023-01-22 10:45:12.543903: step: 616/466, loss: 0.7476538419723511 2023-01-22 10:45:13.216309: step: 618/466, loss: 0.5841106176376343 2023-01-22 10:45:13.767450: step: 620/466, loss: 0.3979680836200714 2023-01-22 10:45:14.354507: step: 622/466, loss: 0.2936483919620514 2023-01-22 10:45:14.973900: step: 624/466, loss: 0.26303133368492126 2023-01-22 10:45:15.560232: step: 626/466, loss: 1.160338044166565 2023-01-22 10:45:16.124765: step: 628/466, loss: 0.100038543343544 2023-01-22 10:45:16.828178: step: 630/466, loss: 0.5164868235588074 2023-01-22 10:45:17.440536: step: 632/466, loss: 0.19388951361179352 2023-01-22 10:45:17.970751: step: 634/466, loss: 0.5035961270332336 2023-01-22 10:45:18.585214: step: 636/466, loss: 0.18198414146900177 2023-01-22 10:45:19.198733: step: 638/466, loss: 0.29664215445518494 2023-01-22 10:45:19.808729: step: 640/466, loss: 0.2572523355484009 2023-01-22 10:45:20.358053: step: 642/466, loss: 0.09871802479028702 2023-01-22 10:45:20.974862: step: 644/466, loss: 0.39587247371673584 2023-01-22 10:45:21.567898: step: 646/466, loss: 0.1277804970741272 2023-01-22 10:45:22.102621: step: 648/466, loss: 0.12480014562606812 2023-01-22 10:45:22.701770: step: 650/466, loss: 0.34665799140930176 2023-01-22 10:45:23.331179: step: 652/466, loss: 0.12297838926315308 2023-01-22 10:45:23.938281: step: 654/466, loss: 0.417325496673584 2023-01-22 10:45:24.595009: step: 656/466, loss: 0.5616034865379333 2023-01-22 10:45:25.127337: step: 658/466, loss: 0.7238473296165466 2023-01-22 10:45:25.697073: step: 660/466, loss: 0.3585425913333893 2023-01-22 10:45:26.286551: step: 662/466, loss: 0.3884541988372803 2023-01-22 10:45:26.865126: step: 664/466, loss: 0.6369414925575256 2023-01-22 10:45:27.417627: step: 666/466, loss: 0.25105607509613037 2023-01-22 10:45:28.075943: step: 668/466, loss: 0.4268193244934082 2023-01-22 10:45:28.735962: step: 670/466, loss: 0.2666698098182678 2023-01-22 10:45:29.307627: step: 672/466, loss: 0.31034159660339355 2023-01-22 10:45:29.891031: step: 674/466, loss: 0.13177955150604248 2023-01-22 10:45:30.513494: step: 676/466, loss: 0.8183972835540771 2023-01-22 10:45:31.093052: step: 678/466, loss: 0.41874760389328003 2023-01-22 10:45:31.747696: step: 680/466, loss: 0.21547198295593262 2023-01-22 10:45:32.423255: step: 682/466, loss: 0.08435779809951782 2023-01-22 10:45:33.001357: step: 684/466, loss: 0.22993096709251404 2023-01-22 10:45:33.665557: step: 686/466, loss: 0.1858258694410324 2023-01-22 10:45:34.272594: step: 688/466, loss: 0.35966718196868896 2023-01-22 10:45:34.815600: step: 690/466, loss: 0.3706780672073364 2023-01-22 10:45:35.369996: step: 692/466, loss: 0.36994704604148865 2023-01-22 10:45:36.003313: step: 694/466, loss: 0.564039409160614 2023-01-22 10:45:36.612490: step: 696/466, loss: 0.06898679584264755 2023-01-22 10:45:37.202606: step: 698/466, loss: 0.46038195490837097 2023-01-22 10:45:37.885515: step: 700/466, loss: 0.28017866611480713 2023-01-22 10:45:38.537170: step: 702/466, loss: 0.24359598755836487 2023-01-22 10:45:39.154316: step: 704/466, loss: 0.26845529675483704 2023-01-22 10:45:39.824327: step: 706/466, loss: 0.18026471138000488 2023-01-22 10:45:40.405467: step: 708/466, loss: 0.323679655790329 2023-01-22 10:45:40.995814: step: 710/466, loss: 0.11869863420724869 2023-01-22 10:45:41.663201: step: 712/466, loss: 0.30867138504981995 2023-01-22 10:45:42.260024: step: 714/466, loss: 1.2142316102981567 2023-01-22 10:45:42.858565: step: 716/466, loss: 0.23127204179763794 2023-01-22 10:45:43.434447: step: 718/466, loss: 0.05211823433637619 2023-01-22 10:45:44.015949: step: 720/466, loss: 0.15031933784484863 2023-01-22 10:45:44.639295: step: 722/466, loss: 0.10426419228315353 2023-01-22 10:45:45.262565: step: 724/466, loss: 0.4317232370376587 2023-01-22 10:45:45.899606: step: 726/466, loss: 0.6303207278251648 2023-01-22 10:45:46.526007: step: 728/466, loss: 0.12916883826255798 2023-01-22 10:45:47.070681: step: 730/466, loss: 0.22627326846122742 2023-01-22 10:45:47.734680: step: 732/466, loss: 0.5379096865653992 2023-01-22 10:45:48.365317: step: 734/466, loss: 0.4592354893684387 2023-01-22 10:45:48.967618: step: 736/466, loss: 0.1455874741077423 2023-01-22 10:45:49.574236: step: 738/466, loss: 0.13084712624549866 2023-01-22 10:45:50.228787: step: 740/466, loss: 0.682135283946991 2023-01-22 10:45:50.835061: step: 742/466, loss: 0.8221052885055542 2023-01-22 10:45:51.507595: step: 744/466, loss: 0.20233270525932312 2023-01-22 10:45:52.111558: step: 746/466, loss: 0.045890893787145615 2023-01-22 10:45:52.721945: step: 748/466, loss: 0.12642249464988708 2023-01-22 10:45:53.358012: step: 750/466, loss: 0.601008951663971 2023-01-22 10:45:54.138590: step: 752/466, loss: 2.653719902038574 2023-01-22 10:45:54.711252: step: 754/466, loss: 0.1881476491689682 2023-01-22 10:45:55.310267: step: 756/466, loss: 0.21659430861473083 2023-01-22 10:45:55.924847: step: 758/466, loss: 0.1342061311006546 2023-01-22 10:45:56.541118: step: 760/466, loss: 0.44529831409454346 2023-01-22 10:45:57.159309: step: 762/466, loss: 0.4104353189468384 2023-01-22 10:45:57.859062: step: 764/466, loss: 0.3914719223976135 2023-01-22 10:45:58.493254: step: 766/466, loss: 3.8568103313446045 2023-01-22 10:45:59.083307: step: 768/466, loss: 0.1950238198041916 2023-01-22 10:45:59.668294: step: 770/466, loss: 0.4235544502735138 2023-01-22 10:46:00.288479: step: 772/466, loss: 0.15523557364940643 2023-01-22 10:46:00.914881: step: 774/466, loss: 0.24269673228263855 2023-01-22 10:46:01.613599: step: 776/466, loss: 0.2621227204799652 2023-01-22 10:46:02.309026: step: 778/466, loss: 0.2298140972852707 2023-01-22 10:46:02.894079: step: 780/466, loss: 0.18909944593906403 2023-01-22 10:46:03.539436: step: 782/466, loss: 0.4999868869781494 2023-01-22 10:46:04.136856: step: 784/466, loss: 0.34763866662979126 2023-01-22 10:46:04.765700: step: 786/466, loss: 3.5766942501068115 2023-01-22 10:46:05.435379: step: 788/466, loss: 2.156630039215088 2023-01-22 10:46:06.049553: step: 790/466, loss: 0.24687305092811584 2023-01-22 10:46:06.613552: step: 792/466, loss: 0.39704298973083496 2023-01-22 10:46:07.199751: step: 794/466, loss: 0.35833752155303955 2023-01-22 10:46:07.859011: step: 796/466, loss: 0.3989448547363281 2023-01-22 10:46:08.437819: step: 798/466, loss: 0.1479235291481018 2023-01-22 10:46:09.019752: step: 800/466, loss: 0.1703123152256012 2023-01-22 10:46:09.607199: step: 802/466, loss: 2.342402935028076 2023-01-22 10:46:10.200081: step: 804/466, loss: 0.1217118352651596 2023-01-22 10:46:10.761038: step: 806/466, loss: 0.44758445024490356 2023-01-22 10:46:11.332670: step: 808/466, loss: 0.7267645001411438 2023-01-22 10:46:11.917773: step: 810/466, loss: 0.1048843264579773 2023-01-22 10:46:12.501293: step: 812/466, loss: 0.1265694946050644 2023-01-22 10:46:13.112813: step: 814/466, loss: 0.14260146021842957 2023-01-22 10:46:13.752230: step: 816/466, loss: 0.1159842237830162 2023-01-22 10:46:14.354649: step: 818/466, loss: 0.24127842485904694 2023-01-22 10:46:14.966150: step: 820/466, loss: 0.3379100561141968 2023-01-22 10:46:15.539358: step: 822/466, loss: 0.41297850012779236 2023-01-22 10:46:16.091258: step: 824/466, loss: 0.4508364796638489 2023-01-22 10:46:16.690084: step: 826/466, loss: 0.1604514718055725 2023-01-22 10:46:17.261140: step: 828/466, loss: 0.16290849447250366 2023-01-22 10:46:17.831215: step: 830/466, loss: 0.1882670521736145 2023-01-22 10:46:18.477371: step: 832/466, loss: 0.9084370732307434 2023-01-22 10:46:19.148408: step: 834/466, loss: 0.17885644733905792 2023-01-22 10:46:19.786793: step: 836/466, loss: 0.6350449323654175 2023-01-22 10:46:20.405403: step: 838/466, loss: 0.25155529379844666 2023-01-22 10:46:20.962993: step: 840/466, loss: 0.1859191209077835 2023-01-22 10:46:21.649549: step: 842/466, loss: 0.6225964426994324 2023-01-22 10:46:22.227238: step: 844/466, loss: 0.32836204767227173 2023-01-22 10:46:22.843818: step: 846/466, loss: 0.12162549793720245 2023-01-22 10:46:23.466784: step: 848/466, loss: 0.3245471119880676 2023-01-22 10:46:24.037188: step: 850/466, loss: 0.17035870254039764 2023-01-22 10:46:24.598794: step: 852/466, loss: 0.14016790688037872 2023-01-22 10:46:25.196623: step: 854/466, loss: 1.0957573652267456 2023-01-22 10:46:25.789505: step: 856/466, loss: 0.17246706783771515 2023-01-22 10:46:26.344656: step: 858/466, loss: 0.09959319233894348 2023-01-22 10:46:26.946639: step: 860/466, loss: 0.06413392722606659 2023-01-22 10:46:27.549213: step: 862/466, loss: 0.11291803419589996 2023-01-22 10:46:28.170956: step: 864/466, loss: 0.1998555213212967 2023-01-22 10:46:28.787463: step: 866/466, loss: 0.8061385750770569 2023-01-22 10:46:29.408720: step: 868/466, loss: 0.21117821335792542 2023-01-22 10:46:30.063641: step: 870/466, loss: 0.386350154876709 2023-01-22 10:46:30.663784: step: 872/466, loss: 0.38534924387931824 2023-01-22 10:46:31.278169: step: 874/466, loss: 0.43389642238616943 2023-01-22 10:46:31.864809: step: 876/466, loss: 0.18496811389923096 2023-01-22 10:46:32.456027: step: 878/466, loss: 0.6521280407905579 2023-01-22 10:46:33.028213: step: 880/466, loss: 0.9200221300125122 2023-01-22 10:46:33.627667: step: 882/466, loss: 0.18344150483608246 2023-01-22 10:46:34.202461: step: 884/466, loss: 0.2574477791786194 2023-01-22 10:46:34.881982: step: 886/466, loss: 0.7089791893959045 2023-01-22 10:46:35.645101: step: 888/466, loss: 1.1452258825302124 2023-01-22 10:46:36.315473: step: 890/466, loss: 0.13744662702083588 2023-01-22 10:46:36.907749: step: 892/466, loss: 0.14688539505004883 2023-01-22 10:46:37.453925: step: 894/466, loss: 0.09006017446517944 2023-01-22 10:46:38.097309: step: 896/466, loss: 0.11454999446868896 2023-01-22 10:46:38.782080: step: 898/466, loss: 0.44436606764793396 2023-01-22 10:46:39.363846: step: 900/466, loss: 0.15809577703475952 2023-01-22 10:46:39.975243: step: 902/466, loss: 0.25707101821899414 2023-01-22 10:46:40.624792: step: 904/466, loss: 0.31899547576904297 2023-01-22 10:46:41.212314: step: 906/466, loss: 0.07329529523849487 2023-01-22 10:46:41.808763: step: 908/466, loss: 0.4771485924720764 2023-01-22 10:46:42.391080: step: 910/466, loss: 0.32161709666252136 2023-01-22 10:46:42.972470: step: 912/466, loss: 0.20154333114624023 2023-01-22 10:46:43.545975: step: 914/466, loss: 0.19467943906784058 2023-01-22 10:46:44.104413: step: 916/466, loss: 0.11051704734563828 2023-01-22 10:46:44.762681: step: 918/466, loss: 1.6850031614303589 2023-01-22 10:46:45.461074: step: 920/466, loss: 0.3364313244819641 2023-01-22 10:46:46.116394: step: 922/466, loss: 1.0680686235427856 2023-01-22 10:46:46.740750: step: 924/466, loss: 0.15946362912654877 2023-01-22 10:46:47.335682: step: 926/466, loss: 0.0858464390039444 2023-01-22 10:46:47.951220: step: 928/466, loss: 0.32690322399139404 2023-01-22 10:46:48.569345: step: 930/466, loss: 0.2521716356277466 2023-01-22 10:46:49.208216: step: 932/466, loss: 0.3197533190250397 ================================================== Loss: 0.339 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2855706521739131, 'r': 0.37389705882352947, 'f1': 0.3238188167625309}, 'combined': 0.23860333866712802, 'epoch': 10} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3331597360778062, 'r': 0.3305726458062788, 'f1': 0.33186114897312874}, 'combined': 0.22009444076974338, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2791124217992775, 'r': 0.3076580103923854, 'f1': 0.2926908639408639}, 'combined': 0.1951272426272426, 'epoch': 10} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.34934765710323107, 'r': 0.3114339578827254, 'f1': 0.32930311940058665}, 'combined': 0.21491361476669862, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27268278779354727, 'r': 0.3678889224311425, 'f1': 0.3132107627160131}, 'combined': 0.2307868777907465, 'epoch': 10} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3260683107941324, 'r': 0.3204269905381785, 'f1': 0.3232230375760335}, 'combined': 0.21436553787426052, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25595238095238093, 'r': 0.4095238095238095, 'f1': 0.315018315018315}, 'combined': 0.21001221001221, 'epoch': 10} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4583333333333333, 'r': 0.4782608695652174, 'f1': 0.4680851063829787}, 'combined': 0.31205673758865243, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29347826086956524, 'r': 0.23275862068965517, 'f1': 0.25961538461538464}, 'combined': 0.17307692307692307, 'epoch': 10} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2791124217992775, 'r': 0.3076580103923854, 'f1': 0.2926908639408639}, 'combined': 0.1951272426272426, 'epoch': 10} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.34934765710323107, 'r': 0.3114339578827254, 'f1': 0.32930311940058665}, 'combined': 0.21491361476669862, 'epoch': 10} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4583333333333333, 'r': 0.4782608695652174, 'f1': 0.4680851063829787}, 'combined': 0.31205673758865243, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2814701218024629, 'r': 0.3551757703958972, 'f1': 0.3140564278500635}, 'combined': 0.23140999946846783, 'epoch': 9} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3308734183854636, 'r': 0.29166091118926246, 'f1': 0.3100321961699194}, 'combined': 0.2056172078225372, 'epoch': 9} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.2413793103448276, 'f1': 0.28}, 'combined': 0.18666666666666668, 'epoch': 9} ****************************** Epoch: 11 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:49:21.325681: step: 2/466, loss: 0.18607491254806519 2023-01-22 10:49:21.906991: step: 4/466, loss: 0.3782379627227783 2023-01-22 10:49:22.476501: step: 6/466, loss: 0.14562508463859558 2023-01-22 10:49:23.134304: step: 8/466, loss: 0.5758311748504639 2023-01-22 10:49:23.746979: step: 10/466, loss: 0.08559700101613998 2023-01-22 10:49:24.402221: step: 12/466, loss: 0.03899945691227913 2023-01-22 10:49:25.032482: step: 14/466, loss: 0.2517959773540497 2023-01-22 10:49:25.564165: step: 16/466, loss: 0.15470734238624573 2023-01-22 10:49:26.216590: step: 18/466, loss: 0.5453633666038513 2023-01-22 10:49:26.805024: step: 20/466, loss: 0.17590969800949097 2023-01-22 10:49:27.428294: step: 22/466, loss: 0.45994845032691956 2023-01-22 10:49:28.104355: step: 24/466, loss: 0.26488208770751953 2023-01-22 10:49:28.676174: step: 26/466, loss: 0.09462356567382812 2023-01-22 10:49:29.290658: step: 28/466, loss: 0.14928802847862244 2023-01-22 10:49:29.971000: step: 30/466, loss: 0.8031451106071472 2023-01-22 10:49:30.630508: step: 32/466, loss: 0.08310721814632416 2023-01-22 10:49:31.227999: step: 34/466, loss: 0.38305965065956116 2023-01-22 10:49:31.820333: step: 36/466, loss: 0.706479549407959 2023-01-22 10:49:32.462998: step: 38/466, loss: 0.14212380349636078 2023-01-22 10:49:33.026542: step: 40/466, loss: 0.206035777926445 2023-01-22 10:49:33.591878: step: 42/466, loss: 0.17984972894191742 2023-01-22 10:49:34.236679: step: 44/466, loss: 0.12307525426149368 2023-01-22 10:49:34.873797: step: 46/466, loss: 0.1807154268026352 2023-01-22 10:49:35.469577: step: 48/466, loss: 0.17570000886917114 2023-01-22 10:49:36.204851: step: 50/466, loss: 0.25609198212623596 2023-01-22 10:49:36.756681: step: 52/466, loss: 0.06491687893867493 2023-01-22 10:49:37.427616: step: 54/466, loss: 0.07231342792510986 2023-01-22 10:49:38.168472: step: 56/466, loss: 0.05617355927824974 2023-01-22 10:49:38.806719: step: 58/466, loss: 0.029409393668174744 2023-01-22 10:49:39.589512: step: 60/466, loss: 0.06893029063940048 2023-01-22 10:49:40.214374: step: 62/466, loss: 0.07814190536737442 2023-01-22 10:49:40.766438: step: 64/466, loss: 0.017962148413062096 2023-01-22 10:49:41.381153: step: 66/466, loss: 0.09650110453367233 2023-01-22 10:49:42.062886: step: 68/466, loss: 0.053260087966918945 2023-01-22 10:49:42.774153: step: 70/466, loss: 0.08349565416574478 2023-01-22 10:49:43.408948: step: 72/466, loss: 0.2979294955730438 2023-01-22 10:49:44.023992: step: 74/466, loss: 0.17924754321575165 2023-01-22 10:49:44.632267: step: 76/466, loss: 0.20314748585224152 2023-01-22 10:49:45.265997: step: 78/466, loss: 0.28504741191864014 2023-01-22 10:49:45.960830: step: 80/466, loss: 0.5961211919784546 2023-01-22 10:49:46.631685: step: 82/466, loss: 0.5081316828727722 2023-01-22 10:49:47.242301: step: 84/466, loss: 0.17852233350276947 2023-01-22 10:49:47.926296: step: 86/466, loss: 0.2734845280647278 2023-01-22 10:49:48.601932: step: 88/466, loss: 0.16375912725925446 2023-01-22 10:49:49.253506: step: 90/466, loss: 0.24265673756599426 2023-01-22 10:49:49.891973: step: 92/466, loss: 0.3444412052631378 2023-01-22 10:49:50.612586: step: 94/466, loss: 1.2213103771209717 2023-01-22 10:49:51.243252: step: 96/466, loss: 0.18616735935211182 2023-01-22 10:49:51.852559: step: 98/466, loss: 0.20802858471870422 2023-01-22 10:49:52.607447: step: 100/466, loss: 0.22155752778053284 2023-01-22 10:49:53.215602: step: 102/466, loss: 0.068345807492733 2023-01-22 10:49:53.795746: step: 104/466, loss: 0.6814802289009094 2023-01-22 10:49:54.413591: step: 106/466, loss: 0.013646832667291164 2023-01-22 10:49:55.052559: step: 108/466, loss: 0.13286662101745605 2023-01-22 10:49:55.640127: step: 110/466, loss: 0.0960453674197197 2023-01-22 10:49:56.245098: step: 112/466, loss: 0.10478698462247849 2023-01-22 10:49:56.984389: step: 114/466, loss: 0.3638004958629608 2023-01-22 10:49:57.641945: step: 116/466, loss: 0.05827972665429115 2023-01-22 10:49:58.301195: step: 118/466, loss: 0.32893651723861694 2023-01-22 10:49:58.954126: step: 120/466, loss: 0.3385923206806183 2023-01-22 10:49:59.545277: step: 122/466, loss: 0.24091915786266327 2023-01-22 10:50:00.159630: step: 124/466, loss: 0.1859840601682663 2023-01-22 10:50:00.832033: step: 126/466, loss: 0.28287869691848755 2023-01-22 10:50:01.491442: step: 128/466, loss: 0.14036421477794647 2023-01-22 10:50:02.099597: step: 130/466, loss: 0.2465360164642334 2023-01-22 10:50:02.714299: step: 132/466, loss: 0.16373713314533234 2023-01-22 10:50:03.387918: step: 134/466, loss: 0.7166844606399536 2023-01-22 10:50:03.994594: step: 136/466, loss: 0.166682630777359 2023-01-22 10:50:04.589832: step: 138/466, loss: 0.384368360042572 2023-01-22 10:50:05.251291: step: 140/466, loss: 0.5179634094238281 2023-01-22 10:50:05.847491: step: 142/466, loss: 0.04584791511297226 2023-01-22 10:50:06.476633: step: 144/466, loss: 0.7276697158813477 2023-01-22 10:50:07.107861: step: 146/466, loss: 0.41061267256736755 2023-01-22 10:50:07.770846: step: 148/466, loss: 0.2997644245624542 2023-01-22 10:50:08.364963: step: 150/466, loss: 0.21162858605384827 2023-01-22 10:50:09.031861: step: 152/466, loss: 0.09424500912427902 2023-01-22 10:50:09.707506: step: 154/466, loss: 0.16237930953502655 2023-01-22 10:50:10.383313: step: 156/466, loss: 0.21634267270565033 2023-01-22 10:50:11.062820: step: 158/466, loss: 0.3554103672504425 2023-01-22 10:50:11.807727: step: 160/466, loss: 0.6150345802307129 2023-01-22 10:50:12.561098: step: 162/466, loss: 0.14475339651107788 2023-01-22 10:50:13.235942: step: 164/466, loss: 0.19218337535858154 2023-01-22 10:50:13.890258: step: 166/466, loss: 0.06557288765907288 2023-01-22 10:50:14.575445: step: 168/466, loss: 0.5332788228988647 2023-01-22 10:50:15.143202: step: 170/466, loss: 0.20994780957698822 2023-01-22 10:50:15.780653: step: 172/466, loss: 1.137925148010254 2023-01-22 10:50:16.450015: step: 174/466, loss: 0.21695555746555328 2023-01-22 10:50:17.126111: step: 176/466, loss: 0.09937255829572678 2023-01-22 10:50:17.742650: step: 178/466, loss: 0.17750242352485657 2023-01-22 10:50:18.303780: step: 180/466, loss: 0.19674468040466309 2023-01-22 10:50:18.962428: step: 182/466, loss: 0.09151066094636917 2023-01-22 10:50:19.672159: step: 184/466, loss: 1.1057778596878052 2023-01-22 10:50:20.350546: step: 186/466, loss: 0.14974889159202576 2023-01-22 10:50:21.038442: step: 188/466, loss: 0.23337318003177643 2023-01-22 10:50:21.686215: step: 190/466, loss: 0.1663864701986313 2023-01-22 10:50:22.372905: step: 192/466, loss: 0.1742500364780426 2023-01-22 10:50:23.040242: step: 194/466, loss: 1.0310101509094238 2023-01-22 10:50:23.699576: step: 196/466, loss: 0.28460630774497986 2023-01-22 10:50:24.352321: step: 198/466, loss: 0.2701140344142914 2023-01-22 10:50:25.042613: step: 200/466, loss: 0.10820496082305908 2023-01-22 10:50:25.750084: step: 202/466, loss: 0.10222582519054413 2023-01-22 10:50:26.416548: step: 204/466, loss: 0.10434279590845108 2023-01-22 10:50:27.020136: step: 206/466, loss: 0.6771631836891174 2023-01-22 10:50:27.663567: step: 208/466, loss: 0.08052944391965866 2023-01-22 10:50:28.320551: step: 210/466, loss: 0.2689838111400604 2023-01-22 10:50:28.925110: step: 212/466, loss: 0.25654590129852295 2023-01-22 10:50:29.579539: step: 214/466, loss: 0.5604700446128845 2023-01-22 10:50:30.220109: step: 216/466, loss: 0.1821504384279251 2023-01-22 10:50:30.865073: step: 218/466, loss: 0.12607960402965546 2023-01-22 10:50:31.501334: step: 220/466, loss: 0.3847530484199524 2023-01-22 10:50:32.207530: step: 222/466, loss: 0.1250201016664505 2023-01-22 10:50:32.824694: step: 224/466, loss: 0.23013734817504883 2023-01-22 10:50:33.449969: step: 226/466, loss: 0.12877550721168518 2023-01-22 10:50:34.081077: step: 228/466, loss: 0.1783233880996704 2023-01-22 10:50:34.819494: step: 230/466, loss: 0.22882427275180817 2023-01-22 10:50:35.488842: step: 232/466, loss: 0.13324867188930511 2023-01-22 10:50:36.063053: step: 234/466, loss: 0.06851288676261902 2023-01-22 10:50:36.634935: step: 236/466, loss: 0.08244407176971436 2023-01-22 10:50:37.293771: step: 238/466, loss: 1.0778659582138062 2023-01-22 10:50:37.958747: step: 240/466, loss: 0.1519305258989334 2023-01-22 10:50:38.608470: step: 242/466, loss: 2.159055233001709 2023-01-22 10:50:39.212436: step: 244/466, loss: 0.11591868102550507 2023-01-22 10:50:39.842234: step: 246/466, loss: 0.16447295248508453 2023-01-22 10:50:40.523729: step: 248/466, loss: 0.2301783561706543 2023-01-22 10:50:41.114126: step: 250/466, loss: 0.1749679446220398 2023-01-22 10:50:41.791334: step: 252/466, loss: 0.21180734038352966 2023-01-22 10:50:42.414352: step: 254/466, loss: 0.006519475486129522 2023-01-22 10:50:43.053385: step: 256/466, loss: 0.18454588949680328 2023-01-22 10:50:43.665613: step: 258/466, loss: 0.08782033622264862 2023-01-22 10:50:44.314273: step: 260/466, loss: 0.10792701691389084 2023-01-22 10:50:45.000562: step: 262/466, loss: 0.3083246946334839 2023-01-22 10:50:45.817351: step: 264/466, loss: 1.006682276725769 2023-01-22 10:50:46.513639: step: 266/466, loss: 0.1580200344324112 2023-01-22 10:50:47.199912: step: 268/466, loss: 0.08529648929834366 2023-01-22 10:50:47.802689: step: 270/466, loss: 1.1525237560272217 2023-01-22 10:50:48.516956: step: 272/466, loss: 0.21152204275131226 2023-01-22 10:50:49.182426: step: 274/466, loss: 0.10467851907014847 2023-01-22 10:50:49.842149: step: 276/466, loss: 0.2370503544807434 2023-01-22 10:50:50.496199: step: 278/466, loss: 0.37224963307380676 2023-01-22 10:50:51.092120: step: 280/466, loss: 0.27512431144714355 2023-01-22 10:50:51.703735: step: 282/466, loss: 1.7817201614379883 2023-01-22 10:50:52.338279: step: 284/466, loss: 0.4905145764350891 2023-01-22 10:50:53.037047: step: 286/466, loss: 0.37921059131622314 2023-01-22 10:50:53.683950: step: 288/466, loss: 0.3173060119152069 2023-01-22 10:50:54.450584: step: 290/466, loss: 0.45783019065856934 2023-01-22 10:50:55.114005: step: 292/466, loss: 0.7370656728744507 2023-01-22 10:50:55.855961: step: 294/466, loss: 0.23975718021392822 2023-01-22 10:50:56.491234: step: 296/466, loss: 0.2448563575744629 2023-01-22 10:50:57.103272: step: 298/466, loss: 0.2936834394931793 2023-01-22 10:50:57.733627: step: 300/466, loss: 0.10825812071561813 2023-01-22 10:50:58.386778: step: 302/466, loss: 0.15110787749290466 2023-01-22 10:50:59.035768: step: 304/466, loss: 0.10494061559438705 2023-01-22 10:50:59.663512: step: 306/466, loss: 0.38257136940956116 2023-01-22 10:51:00.361879: step: 308/466, loss: 0.10423853993415833 2023-01-22 10:51:01.065028: step: 310/466, loss: 0.4629186987876892 2023-01-22 10:51:01.710649: step: 312/466, loss: 0.2475818246603012 2023-01-22 10:51:02.448611: step: 314/466, loss: 0.8074089288711548 2023-01-22 10:51:03.098654: step: 316/466, loss: 0.18972155451774597 2023-01-22 10:51:03.727954: step: 318/466, loss: 0.21986006200313568 2023-01-22 10:51:04.444972: step: 320/466, loss: 0.5212770700454712 2023-01-22 10:51:05.203484: step: 322/466, loss: 0.43529248237609863 2023-01-22 10:51:05.837963: step: 324/466, loss: 0.3243367075920105 2023-01-22 10:51:06.622421: step: 326/466, loss: 0.287121057510376 2023-01-22 10:51:07.292635: step: 328/466, loss: 0.061606720089912415 2023-01-22 10:51:08.086301: step: 330/466, loss: 0.29698190093040466 2023-01-22 10:51:08.695026: step: 332/466, loss: 0.18679064512252808 2023-01-22 10:51:09.356428: step: 334/466, loss: 0.20813262462615967 2023-01-22 10:51:09.998945: step: 336/466, loss: 0.1052006259560585 2023-01-22 10:51:10.605283: step: 338/466, loss: 0.12170988321304321 2023-01-22 10:51:11.280639: step: 340/466, loss: 0.11190930753946304 2023-01-22 10:51:11.937819: step: 342/466, loss: 1.2435797452926636 2023-01-22 10:51:12.555698: step: 344/466, loss: 0.06316729635000229 2023-01-22 10:51:13.208061: step: 346/466, loss: 0.2365454137325287 2023-01-22 10:51:13.859548: step: 348/466, loss: 0.37936925888061523 2023-01-22 10:51:14.543509: step: 350/466, loss: 0.11781468242406845 2023-01-22 10:51:15.175598: step: 352/466, loss: 0.11133911460638046 2023-01-22 10:51:15.810087: step: 354/466, loss: 0.2571800947189331 2023-01-22 10:51:16.366794: step: 356/466, loss: 0.2111179381608963 2023-01-22 10:51:17.044609: step: 358/466, loss: 0.17491096258163452 2023-01-22 10:51:17.706112: step: 360/466, loss: 0.16860328614711761 2023-01-22 10:51:18.363845: step: 362/466, loss: 4.821085453033447 2023-01-22 10:51:19.023732: step: 364/466, loss: 0.13677658140659332 2023-01-22 10:51:19.687725: step: 366/466, loss: 0.3939335346221924 2023-01-22 10:51:20.337037: step: 368/466, loss: 0.37653470039367676 2023-01-22 10:51:20.978189: step: 370/466, loss: 0.18542204797267914 2023-01-22 10:51:21.596427: step: 372/466, loss: 0.07824349403381348 2023-01-22 10:51:22.264914: step: 374/466, loss: 0.12440678477287292 2023-01-22 10:51:22.996303: step: 376/466, loss: 0.2704429030418396 2023-01-22 10:51:23.681073: step: 378/466, loss: 0.1222703754901886 2023-01-22 10:51:24.302476: step: 380/466, loss: 0.1519077718257904 2023-01-22 10:51:24.977357: step: 382/466, loss: 0.17306558787822723 2023-01-22 10:51:25.591316: step: 384/466, loss: 0.09035259485244751 2023-01-22 10:51:26.315376: step: 386/466, loss: 0.337478905916214 2023-01-22 10:51:26.949365: step: 388/466, loss: 0.7239857316017151 2023-01-22 10:51:27.567197: step: 390/466, loss: 0.15432657301425934 2023-01-22 10:51:28.193108: step: 392/466, loss: 0.12917044758796692 2023-01-22 10:51:28.878335: step: 394/466, loss: 0.12573650479316711 2023-01-22 10:51:29.482492: step: 396/466, loss: 0.39603760838508606 2023-01-22 10:51:30.164348: step: 398/466, loss: 0.22024846076965332 2023-01-22 10:51:30.820780: step: 400/466, loss: 0.2544387876987457 2023-01-22 10:51:31.470026: step: 402/466, loss: 0.24689553678035736 2023-01-22 10:51:32.234442: step: 404/466, loss: 0.19383157789707184 2023-01-22 10:51:32.911040: step: 406/466, loss: 0.7465018630027771 2023-01-22 10:51:33.520383: step: 408/466, loss: 0.25976499915122986 2023-01-22 10:51:34.122717: step: 410/466, loss: 0.2334873080253601 2023-01-22 10:51:34.852197: step: 412/466, loss: 0.29507511854171753 2023-01-22 10:51:35.577105: step: 414/466, loss: 0.06479204446077347 2023-01-22 10:51:36.332290: step: 416/466, loss: 0.26891064643859863 2023-01-22 10:51:36.936144: step: 418/466, loss: 0.2599930763244629 2023-01-22 10:51:37.619952: step: 420/466, loss: 0.7796530723571777 2023-01-22 10:51:38.295366: step: 422/466, loss: 0.10785645991563797 2023-01-22 10:51:38.917836: step: 424/466, loss: 0.10304387658834457 2023-01-22 10:51:39.591851: step: 426/466, loss: 0.0517401397228241 2023-01-22 10:51:40.297361: step: 428/466, loss: 0.19476906955242157 2023-01-22 10:51:40.970191: step: 430/466, loss: 0.630233883857727 2023-01-22 10:51:41.614781: step: 432/466, loss: 0.28799697756767273 2023-01-22 10:51:42.304592: step: 434/466, loss: 0.2140231430530548 2023-01-22 10:51:42.899542: step: 436/466, loss: 0.13422894477844238 2023-01-22 10:51:43.575859: step: 438/466, loss: 0.6948972940444946 2023-01-22 10:51:44.226527: step: 440/466, loss: 0.3033457100391388 2023-01-22 10:51:44.882915: step: 442/466, loss: 0.17105747759342194 2023-01-22 10:51:45.504836: step: 444/466, loss: 0.15871189534664154 2023-01-22 10:51:46.144731: step: 446/466, loss: 0.08819045126438141 2023-01-22 10:51:46.785359: step: 448/466, loss: 0.15792372822761536 2023-01-22 10:51:47.438851: step: 450/466, loss: 0.18644051253795624 2023-01-22 10:51:48.057268: step: 452/466, loss: 0.8649181723594666 2023-01-22 10:51:48.700087: step: 454/466, loss: 0.1666182279586792 2023-01-22 10:51:49.330623: step: 456/466, loss: 0.09834443777799606 2023-01-22 10:51:50.004450: step: 458/466, loss: 0.15646080672740936 2023-01-22 10:51:50.716789: step: 460/466, loss: 0.42353373765945435 2023-01-22 10:51:51.398961: step: 462/466, loss: 0.549027681350708 2023-01-22 10:51:52.184150: step: 464/466, loss: 0.2670876979827881 2023-01-22 10:51:52.880993: step: 466/466, loss: 0.18293240666389465 2023-01-22 10:51:53.527015: step: 468/466, loss: 0.056671373546123505 2023-01-22 10:51:54.163522: step: 470/466, loss: 0.16536259651184082 2023-01-22 10:51:54.895783: step: 472/466, loss: 0.8619071245193481 2023-01-22 10:51:55.479897: step: 474/466, loss: 0.08908943831920624 2023-01-22 10:51:56.205483: step: 476/466, loss: 1.3924860954284668 2023-01-22 10:51:56.787770: step: 478/466, loss: 0.24757857620716095 2023-01-22 10:51:57.445409: step: 480/466, loss: 0.1696261763572693 2023-01-22 10:51:58.127347: step: 482/466, loss: 0.12697002291679382 2023-01-22 10:51:58.797080: step: 484/466, loss: 0.1525171548128128 2023-01-22 10:51:59.402295: step: 486/466, loss: 0.22188422083854675 2023-01-22 10:52:00.124230: step: 488/466, loss: 0.5944315195083618 2023-01-22 10:52:00.772288: step: 490/466, loss: 0.5309344530105591 2023-01-22 10:52:01.439452: step: 492/466, loss: 0.24633949995040894 2023-01-22 10:52:02.199555: step: 494/466, loss: 0.2065393030643463 2023-01-22 10:52:02.900579: step: 496/466, loss: 0.2309531420469284 2023-01-22 10:52:03.545255: step: 498/466, loss: 0.07951924949884415 2023-01-22 10:52:04.235103: step: 500/466, loss: 0.1381409466266632 2023-01-22 10:52:04.902552: step: 502/466, loss: 0.22923199832439423 2023-01-22 10:52:05.466614: step: 504/466, loss: 0.1416565328836441 2023-01-22 10:52:06.090334: step: 506/466, loss: 0.09851235151290894 2023-01-22 10:52:06.732107: step: 508/466, loss: 0.3562050461769104 2023-01-22 10:52:07.384661: step: 510/466, loss: 0.08076535165309906 2023-01-22 10:52:08.001666: step: 512/466, loss: 0.4545714855194092 2023-01-22 10:52:08.610651: step: 514/466, loss: 0.36821848154067993 2023-01-22 10:52:09.282636: step: 516/466, loss: 0.16712959110736847 2023-01-22 10:52:09.875746: step: 518/466, loss: 0.12090260535478592 2023-01-22 10:52:10.446271: step: 520/466, loss: 0.15361355245113373 2023-01-22 10:52:11.081165: step: 522/466, loss: 0.2245166003704071 2023-01-22 10:52:11.756993: step: 524/466, loss: 0.09379874914884567 2023-01-22 10:52:12.379060: step: 526/466, loss: 0.06804991513490677 2023-01-22 10:52:13.071938: step: 528/466, loss: 0.33074793219566345 2023-01-22 10:52:13.708808: step: 530/466, loss: 0.1294187307357788 2023-01-22 10:52:14.407034: step: 532/466, loss: 0.2864542603492737 2023-01-22 10:52:15.184029: step: 534/466, loss: 0.18013721704483032 2023-01-22 10:52:15.819745: step: 536/466, loss: 0.3223685920238495 2023-01-22 10:52:16.461185: step: 538/466, loss: 0.11612385511398315 2023-01-22 10:52:17.147569: step: 540/466, loss: 0.051373809576034546 2023-01-22 10:52:17.772671: step: 542/466, loss: 0.036936067044734955 2023-01-22 10:52:18.684187: step: 544/466, loss: 0.5212790966033936 2023-01-22 10:52:19.358555: step: 546/466, loss: 0.08071798831224442 2023-01-22 10:52:19.999816: step: 548/466, loss: 0.07064666599035263 2023-01-22 10:52:20.596438: step: 550/466, loss: 0.17133018374443054 2023-01-22 10:52:21.233617: step: 552/466, loss: 0.47561606764793396 2023-01-22 10:52:21.942918: step: 554/466, loss: 0.069338358938694 2023-01-22 10:52:22.624904: step: 556/466, loss: 0.06468157470226288 2023-01-22 10:52:23.325509: step: 558/466, loss: 0.4185435473918915 2023-01-22 10:52:24.012913: step: 560/466, loss: 0.32739049196243286 2023-01-22 10:52:24.647153: step: 562/466, loss: 0.44227561354637146 2023-01-22 10:52:25.222753: step: 564/466, loss: 0.1070290356874466 2023-01-22 10:52:25.841130: step: 566/466, loss: 0.22124671936035156 2023-01-22 10:52:26.445258: step: 568/466, loss: 0.8226217031478882 2023-01-22 10:52:27.092262: step: 570/466, loss: 0.1698789745569229 2023-01-22 10:52:27.750474: step: 572/466, loss: 0.3633151650428772 2023-01-22 10:52:28.401459: step: 574/466, loss: 0.32821306586265564 2023-01-22 10:52:28.997311: step: 576/466, loss: 0.1997094601392746 2023-01-22 10:52:29.643653: step: 578/466, loss: 0.26783257722854614 2023-01-22 10:52:30.303945: step: 580/466, loss: 0.4049939215183258 2023-01-22 10:52:30.970305: step: 582/466, loss: 0.7677994966506958 2023-01-22 10:52:31.625282: step: 584/466, loss: 0.1088162437081337 2023-01-22 10:52:32.367821: step: 586/466, loss: 0.3080524802207947 2023-01-22 10:52:32.962592: step: 588/466, loss: 0.029904013499617577 2023-01-22 10:52:33.612513: step: 590/466, loss: 0.343360960483551 2023-01-22 10:52:34.343136: step: 592/466, loss: 0.48682013154029846 2023-01-22 10:52:35.061222: step: 594/466, loss: 0.665473997592926 2023-01-22 10:52:35.734212: step: 596/466, loss: 0.3582887053489685 2023-01-22 10:52:36.368832: step: 598/466, loss: 0.18016378581523895 2023-01-22 10:52:37.111585: step: 600/466, loss: 0.1603153645992279 2023-01-22 10:52:37.779636: step: 602/466, loss: 0.33286023139953613 2023-01-22 10:52:38.430499: step: 604/466, loss: 0.1129535436630249 2023-01-22 10:52:39.115516: step: 606/466, loss: 0.9827211499214172 2023-01-22 10:52:39.790660: step: 608/466, loss: 0.22517018020153046 2023-01-22 10:52:40.462187: step: 610/466, loss: 0.10864400118589401 2023-01-22 10:52:41.082144: step: 612/466, loss: 0.25312262773513794 2023-01-22 10:52:41.757317: step: 614/466, loss: 0.5937185287475586 2023-01-22 10:52:42.386624: step: 616/466, loss: 0.3340551555156708 2023-01-22 10:52:43.049877: step: 618/466, loss: 0.22389067709445953 2023-01-22 10:52:43.696333: step: 620/466, loss: 0.15252818167209625 2023-01-22 10:52:44.385633: step: 622/466, loss: 0.34194090962409973 2023-01-22 10:52:45.077131: step: 624/466, loss: 1.6129209995269775 2023-01-22 10:52:45.744483: step: 626/466, loss: 0.15540900826454163 2023-01-22 10:52:46.451295: step: 628/466, loss: 0.6548113226890564 2023-01-22 10:52:47.176624: step: 630/466, loss: 0.2582647502422333 2023-01-22 10:52:47.812444: step: 632/466, loss: 0.5408102869987488 2023-01-22 10:52:48.461895: step: 634/466, loss: 0.06651590019464493 2023-01-22 10:52:49.165100: step: 636/466, loss: 0.1293291449546814 2023-01-22 10:52:49.812299: step: 638/466, loss: 0.09778794646263123 2023-01-22 10:52:50.429371: step: 640/466, loss: 0.5053603053092957 2023-01-22 10:52:51.038508: step: 642/466, loss: 0.12509065866470337 2023-01-22 10:52:51.633814: step: 644/466, loss: 0.22191186249256134 2023-01-22 10:52:52.271586: step: 646/466, loss: 0.15831364691257477 2023-01-22 10:52:52.935047: step: 648/466, loss: 0.28588342666625977 2023-01-22 10:52:53.617406: step: 650/466, loss: 0.11837802082300186 2023-01-22 10:52:54.283298: step: 652/466, loss: 0.15723967552185059 2023-01-22 10:52:54.949750: step: 654/466, loss: 0.1029759868979454 2023-01-22 10:52:55.593618: step: 656/466, loss: 0.20298771560192108 2023-01-22 10:52:56.317375: step: 658/466, loss: 0.13640204071998596 2023-01-22 10:52:56.919315: step: 660/466, loss: 0.24446871876716614 2023-01-22 10:52:57.584731: step: 662/466, loss: 0.413612425327301 2023-01-22 10:52:58.222537: step: 664/466, loss: 0.04400372877717018 2023-01-22 10:52:58.933172: step: 666/466, loss: 0.38971996307373047 2023-01-22 10:52:59.675222: step: 668/466, loss: 0.6782568097114563 2023-01-22 10:53:00.308598: step: 670/466, loss: 0.25608938932418823 2023-01-22 10:53:00.984944: step: 672/466, loss: 0.3360452353954315 2023-01-22 10:53:01.649095: step: 674/466, loss: 0.13316825032234192 2023-01-22 10:53:02.325992: step: 676/466, loss: 0.28985121846199036 2023-01-22 10:53:02.972563: step: 678/466, loss: 0.7279584407806396 2023-01-22 10:53:03.581525: step: 680/466, loss: 0.2239963263273239 2023-01-22 10:53:04.261201: step: 682/466, loss: 0.20093773305416107 2023-01-22 10:53:04.945326: step: 684/466, loss: 1.3977488279342651 2023-01-22 10:53:05.612835: step: 686/466, loss: 0.36743053793907166 2023-01-22 10:53:06.309467: step: 688/466, loss: 12.828628540039062 2023-01-22 10:53:06.991230: step: 690/466, loss: 0.10988299548625946 2023-01-22 10:53:07.670915: step: 692/466, loss: 0.7742509245872498 2023-01-22 10:53:08.272577: step: 694/466, loss: 0.5149338841438293 2023-01-22 10:53:08.892830: step: 696/466, loss: 0.5421413779258728 2023-01-22 10:53:09.552062: step: 698/466, loss: 0.1817217469215393 2023-01-22 10:53:10.246977: step: 700/466, loss: 0.11957786977291107 2023-01-22 10:53:10.966685: step: 702/466, loss: 0.10088533908128738 2023-01-22 10:53:11.692167: step: 704/466, loss: 0.44104164838790894 2023-01-22 10:53:12.415570: step: 706/466, loss: 0.39744728803634644 2023-01-22 10:53:13.144134: step: 708/466, loss: 0.25253307819366455 2023-01-22 10:53:13.821878: step: 710/466, loss: 0.20075541734695435 2023-01-22 10:53:14.481754: step: 712/466, loss: 0.25348201394081116 2023-01-22 10:53:15.132685: step: 714/466, loss: 0.14546802639961243 2023-01-22 10:53:15.828782: step: 716/466, loss: 0.5452193021774292 2023-01-22 10:53:16.504569: step: 718/466, loss: 0.6032496690750122 2023-01-22 10:53:17.171678: step: 720/466, loss: 0.2527567148208618 2023-01-22 10:53:17.806001: step: 722/466, loss: 0.23223836719989777 2023-01-22 10:53:18.434365: step: 724/466, loss: 0.28572478890419006 2023-01-22 10:53:19.076657: step: 726/466, loss: 0.03468862920999527 2023-01-22 10:53:19.757777: step: 728/466, loss: 0.1436808854341507 2023-01-22 10:53:20.462190: step: 730/466, loss: 0.7947600483894348 2023-01-22 10:53:21.080774: step: 732/466, loss: 0.6669540405273438 2023-01-22 10:53:21.839744: step: 734/466, loss: 0.9614865779876709 2023-01-22 10:53:22.491896: step: 736/466, loss: 0.21250706911087036 2023-01-22 10:53:23.188492: step: 738/466, loss: 0.4781217873096466 2023-01-22 10:53:23.836468: step: 740/466, loss: 0.10923323035240173 2023-01-22 10:53:24.504043: step: 742/466, loss: 0.5838552713394165 2023-01-22 10:53:25.142044: step: 744/466, loss: 0.36028003692626953 2023-01-22 10:53:25.749358: step: 746/466, loss: 0.34653040766716003 2023-01-22 10:53:26.442088: step: 748/466, loss: 0.1995936930179596 2023-01-22 10:53:27.091216: step: 750/466, loss: 0.47797736525535583 2023-01-22 10:53:27.719678: step: 752/466, loss: 0.22129611670970917 2023-01-22 10:53:28.328530: step: 754/466, loss: 0.15425199270248413 2023-01-22 10:53:28.996645: step: 756/466, loss: 0.34954312443733215 2023-01-22 10:53:29.705416: step: 758/466, loss: 0.22375522553920746 2023-01-22 10:53:30.360668: step: 760/466, loss: 0.2790171504020691 2023-01-22 10:53:31.049368: step: 762/466, loss: 0.17393219470977783 2023-01-22 10:53:31.696937: step: 764/466, loss: 0.27619192004203796 2023-01-22 10:53:32.368267: step: 766/466, loss: 0.0823623314499855 2023-01-22 10:53:33.035648: step: 768/466, loss: 0.14845037460327148 2023-01-22 10:53:33.698608: step: 770/466, loss: 0.06040605902671814 2023-01-22 10:53:34.300026: step: 772/466, loss: 0.11510689556598663 2023-01-22 10:53:34.941521: step: 774/466, loss: 0.5105097889900208 2023-01-22 10:53:35.653813: step: 776/466, loss: 0.4596703052520752 2023-01-22 10:53:36.305778: step: 778/466, loss: 0.08241189271211624 2023-01-22 10:53:36.960674: step: 780/466, loss: 0.2193770706653595 2023-01-22 10:53:37.709475: step: 782/466, loss: 0.13047076761722565 2023-01-22 10:53:38.388277: step: 784/466, loss: 0.1318654865026474 2023-01-22 10:53:39.080452: step: 786/466, loss: 0.4017501175403595 2023-01-22 10:53:39.709428: step: 788/466, loss: 0.5616803169250488 2023-01-22 10:53:40.392888: step: 790/466, loss: 0.286371648311615 2023-01-22 10:53:41.054473: step: 792/466, loss: 0.263804167509079 2023-01-22 10:53:41.663723: step: 794/466, loss: 0.1747511327266693 2023-01-22 10:53:42.346350: step: 796/466, loss: 0.38937389850616455 2023-01-22 10:53:42.952564: step: 798/466, loss: 0.3421323001384735 2023-01-22 10:53:43.635121: step: 800/466, loss: 0.09835729748010635 2023-01-22 10:53:44.254891: step: 802/466, loss: 0.18446092307567596 2023-01-22 10:53:44.861454: step: 804/466, loss: 0.20434127748012543 2023-01-22 10:53:45.503339: step: 806/466, loss: 0.727473258972168 2023-01-22 10:53:46.206934: step: 808/466, loss: 0.09985418617725372 2023-01-22 10:53:46.937294: step: 810/466, loss: 0.7417232990264893 2023-01-22 10:53:47.583543: step: 812/466, loss: 0.1557294726371765 2023-01-22 10:53:48.198245: step: 814/466, loss: 0.11794015765190125 2023-01-22 10:53:48.855295: step: 816/466, loss: 0.24623864889144897 2023-01-22 10:53:49.520987: step: 818/466, loss: 0.07437814772129059 2023-01-22 10:53:50.158940: step: 820/466, loss: 0.034862954169511795 2023-01-22 10:53:50.835167: step: 822/466, loss: 0.32118910551071167 2023-01-22 10:53:51.480918: step: 824/466, loss: 0.19968284666538239 2023-01-22 10:53:52.143603: step: 826/466, loss: 0.30660173296928406 2023-01-22 10:53:52.834454: step: 828/466, loss: 0.2359127253293991 2023-01-22 10:53:53.482801: step: 830/466, loss: 0.15513497591018677 2023-01-22 10:53:54.155943: step: 832/466, loss: 0.10971243679523468 2023-01-22 10:53:54.894983: step: 834/466, loss: 0.30356305837631226 2023-01-22 10:53:55.606910: step: 836/466, loss: 0.42627593874931335 2023-01-22 10:53:56.265574: step: 838/466, loss: 0.2816768288612366 2023-01-22 10:53:56.940184: step: 840/466, loss: 0.2914235293865204 2023-01-22 10:53:57.547383: step: 842/466, loss: 0.13204285502433777 2023-01-22 10:53:58.119391: step: 844/466, loss: 0.8028279542922974 2023-01-22 10:53:58.777774: step: 846/466, loss: 0.050989869982004166 2023-01-22 10:53:59.436031: step: 848/466, loss: 0.06458903849124908 2023-01-22 10:54:00.059750: step: 850/466, loss: 0.5054102540016174 2023-01-22 10:54:00.655072: step: 852/466, loss: 0.22338427603244781 2023-01-22 10:54:01.292325: step: 854/466, loss: 0.49166548252105713 2023-01-22 10:54:01.964311: step: 856/466, loss: 0.2450941950082779 2023-01-22 10:54:02.686641: step: 858/466, loss: 0.4752826988697052 2023-01-22 10:54:03.296783: step: 860/466, loss: 0.07163707166910172 2023-01-22 10:54:03.951141: step: 862/466, loss: 0.754447340965271 2023-01-22 10:54:04.650555: step: 864/466, loss: 0.21170610189437866 2023-01-22 10:54:05.320614: step: 866/466, loss: 0.2848890423774719 2023-01-22 10:54:05.940505: step: 868/466, loss: 0.20099715888500214 2023-01-22 10:54:06.580226: step: 870/466, loss: 0.15913009643554688 2023-01-22 10:54:07.314675: step: 872/466, loss: 0.08308277279138565 2023-01-22 10:54:08.020050: step: 874/466, loss: 1.4316819906234741 2023-01-22 10:54:08.651762: step: 876/466, loss: 0.19593428075313568 2023-01-22 10:54:09.291605: step: 878/466, loss: 0.1286579817533493 2023-01-22 10:54:09.912470: step: 880/466, loss: 0.08160890638828278 2023-01-22 10:54:10.574818: step: 882/466, loss: 0.30313640832901 2023-01-22 10:54:11.166224: step: 884/466, loss: 0.11539170145988464 2023-01-22 10:54:11.823886: step: 886/466, loss: 0.7092605829238892 2023-01-22 10:54:12.475039: step: 888/466, loss: 0.2879091501235962 2023-01-22 10:54:13.124699: step: 890/466, loss: 0.05659577250480652 2023-01-22 10:54:13.759717: step: 892/466, loss: 0.2133093923330307 2023-01-22 10:54:14.399918: step: 894/466, loss: 0.19630642235279083 2023-01-22 10:54:15.019289: step: 896/466, loss: 0.28548285365104675 2023-01-22 10:54:15.649459: step: 898/466, loss: 0.13361498713493347 2023-01-22 10:54:16.270609: step: 900/466, loss: 0.3782631754875183 2023-01-22 10:54:16.879758: step: 902/466, loss: 0.4256414473056793 2023-01-22 10:54:17.569431: step: 904/466, loss: 0.1539887934923172 2023-01-22 10:54:18.224513: step: 906/466, loss: 0.32209718227386475 2023-01-22 10:54:18.879387: step: 908/466, loss: 0.17927029728889465 2023-01-22 10:54:19.504932: step: 910/466, loss: 0.23833033442497253 2023-01-22 10:54:20.049305: step: 912/466, loss: 0.3317146599292755 2023-01-22 10:54:20.688849: step: 914/466, loss: 0.3678411543369293 2023-01-22 10:54:21.336700: step: 916/466, loss: 0.0920814499258995 2023-01-22 10:54:21.984642: step: 918/466, loss: 0.1649651676416397 2023-01-22 10:54:22.609572: step: 920/466, loss: 0.44486376643180847 2023-01-22 10:54:23.314920: step: 922/466, loss: 0.2081485092639923 2023-01-22 10:54:23.995039: step: 924/466, loss: 0.22808825969696045 2023-01-22 10:54:24.647281: step: 926/466, loss: 0.2163880616426468 2023-01-22 10:54:25.395682: step: 928/466, loss: 0.24781255424022675 2023-01-22 10:54:26.069860: step: 930/466, loss: 0.6487457752227783 2023-01-22 10:54:26.704724: step: 932/466, loss: 0.12162760645151138 ================================================== Loss: 0.332 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3007789855072464, 'r': 0.32817441492726124, 'f1': 0.3138800665456745}, 'combined': 0.2312800490336549, 'epoch': 11} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3433426555648645, 'r': 0.32019595968408715, 'f1': 0.3313655861846948}, 'combined': 0.21976577736601519, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2991128177966102, 'r': 0.26738873106060607, 'f1': 0.2823625}, 'combined': 0.18824166666666667, 'epoch': 11} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.34760027882270855, 'r': 0.29854500137846485, 'f1': 0.32121050451059796}, 'combined': 0.20963211873323231, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.282426489470981, 'r': 0.3161890489333563, 'f1': 0.2983556468896666}, 'combined': 0.21984100297133327, 'epoch': 11} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.332615194721464, 'r': 0.306454449069214, 'f1': 0.3189993680369596}, 'combined': 0.21156434771363122, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24305555555555552, 'r': 0.29166666666666663, 'f1': 0.2651515151515152}, 'combined': 0.1767676767676768, 'epoch': 11} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.425, 'r': 0.3695652173913043, 'f1': 0.3953488372093023}, 'combined': 0.2635658914728682, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2875, 'r': 0.19827586206896552, 'f1': 0.23469387755102042}, 'combined': 0.1564625850340136, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2791124217992775, 'r': 0.3076580103923854, 'f1': 0.2926908639408639}, 'combined': 0.1951272426272426, 'epoch': 10} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.34934765710323107, 'r': 0.3114339578827254, 'f1': 0.32930311940058665}, 'combined': 0.21491361476669862, 'epoch': 10} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4583333333333333, 'r': 0.4782608695652174, 'f1': 0.4680851063829787}, 'combined': 0.31205673758865243, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2814701218024629, 'r': 0.3551757703958972, 'f1': 0.3140564278500635}, 'combined': 0.23140999946846783, 'epoch': 9} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3308734183854636, 'r': 0.29166091118926246, 'f1': 0.3100321961699194}, 'combined': 0.2056172078225372, 'epoch': 9} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.2413793103448276, 'f1': 0.28}, 'combined': 0.18666666666666668, 'epoch': 9} ****************************** Epoch: 12 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:57:11.178117: step: 2/466, loss: 0.13710524141788483 2023-01-22 10:57:11.809893: step: 4/466, loss: 0.06199297681450844 2023-01-22 10:57:12.426608: step: 6/466, loss: 4.465801239013672 2023-01-22 10:57:13.084833: step: 8/466, loss: 0.15292063355445862 2023-01-22 10:57:13.725366: step: 10/466, loss: 0.15151306986808777 2023-01-22 10:57:14.400903: step: 12/466, loss: 0.11174985766410828 2023-01-22 10:57:15.047065: step: 14/466, loss: 0.13583029806613922 2023-01-22 10:57:15.718294: step: 16/466, loss: 1.3309175968170166 2023-01-22 10:57:16.359278: step: 18/466, loss: 0.11795716732740402 2023-01-22 10:57:16.996854: step: 20/466, loss: 0.2237384021282196 2023-01-22 10:57:17.710504: step: 22/466, loss: 0.18687458336353302 2023-01-22 10:57:18.369339: step: 24/466, loss: 0.13026343286037445 2023-01-22 10:57:19.047452: step: 26/466, loss: 0.13793495297431946 2023-01-22 10:57:19.725049: step: 28/466, loss: 0.19610273838043213 2023-01-22 10:57:20.329223: step: 30/466, loss: 0.11369558423757553 2023-01-22 10:57:20.985350: step: 32/466, loss: 0.12538641691207886 2023-01-22 10:57:21.687306: step: 34/466, loss: 0.08715818077325821 2023-01-22 10:57:22.378758: step: 36/466, loss: 0.08705645054578781 2023-01-22 10:57:23.004524: step: 38/466, loss: 0.08952294290065765 2023-01-22 10:57:23.708806: step: 40/466, loss: 0.16769270598888397 2023-01-22 10:57:24.372795: step: 42/466, loss: 0.32103365659713745 2023-01-22 10:57:25.004203: step: 44/466, loss: 0.06588691473007202 2023-01-22 10:57:25.652106: step: 46/466, loss: 0.2702665328979492 2023-01-22 10:57:26.289710: step: 48/466, loss: 0.8478837609291077 2023-01-22 10:57:26.972471: step: 50/466, loss: 2.1849398612976074 2023-01-22 10:57:27.573532: step: 52/466, loss: 0.07038564234972 2023-01-22 10:57:28.238106: step: 54/466, loss: 0.10733061283826828 2023-01-22 10:57:28.938414: step: 56/466, loss: 0.07326068729162216 2023-01-22 10:57:29.602142: step: 58/466, loss: 0.14899058640003204 2023-01-22 10:57:30.303077: step: 60/466, loss: 0.15643711388111115 2023-01-22 10:57:30.886095: step: 62/466, loss: 0.0960053876042366 2023-01-22 10:57:31.532698: step: 64/466, loss: 0.12543736398220062 2023-01-22 10:57:32.210499: step: 66/466, loss: 0.3649769425392151 2023-01-22 10:57:32.855117: step: 68/466, loss: 0.13690850138664246 2023-01-22 10:57:33.532508: step: 70/466, loss: 0.2428659349679947 2023-01-22 10:57:34.183738: step: 72/466, loss: 0.20831294357776642 2023-01-22 10:57:34.839582: step: 74/466, loss: 0.18887560069561005 2023-01-22 10:57:35.496419: step: 76/466, loss: 0.09079209715127945 2023-01-22 10:57:36.179701: step: 78/466, loss: 0.23012274503707886 2023-01-22 10:57:36.828015: step: 80/466, loss: 0.4214737117290497 2023-01-22 10:57:37.472104: step: 82/466, loss: 0.08949767053127289 2023-01-22 10:57:38.164060: step: 84/466, loss: 0.13404501974582672 2023-01-22 10:57:38.850934: step: 86/466, loss: 0.20653145015239716 2023-01-22 10:57:39.527802: step: 88/466, loss: 0.062412939965724945 2023-01-22 10:57:40.182743: step: 90/466, loss: 0.24332228302955627 2023-01-22 10:57:40.856654: step: 92/466, loss: 0.6783890724182129 2023-01-22 10:57:41.587956: step: 94/466, loss: 0.222461998462677 2023-01-22 10:57:42.286704: step: 96/466, loss: 0.2090371549129486 2023-01-22 10:57:42.933325: step: 98/466, loss: 0.17527589201927185 2023-01-22 10:57:43.593440: step: 100/466, loss: 0.18766961991786957 2023-01-22 10:57:44.246597: step: 102/466, loss: 0.07617698609828949 2023-01-22 10:57:44.882151: step: 104/466, loss: 0.09402088820934296 2023-01-22 10:57:45.548666: step: 106/466, loss: 0.0528925284743309 2023-01-22 10:57:46.251776: step: 108/466, loss: 0.10213060677051544 2023-01-22 10:57:46.924693: step: 110/466, loss: 0.511379599571228 2023-01-22 10:57:47.588141: step: 112/466, loss: 0.22382397949695587 2023-01-22 10:57:48.254855: step: 114/466, loss: 0.13445906341075897 2023-01-22 10:57:48.861197: step: 116/466, loss: 0.3029036521911621 2023-01-22 10:57:49.570557: step: 118/466, loss: 0.3222705125808716 2023-01-22 10:57:50.239514: step: 120/466, loss: 0.15039843320846558 2023-01-22 10:57:50.849691: step: 122/466, loss: 0.1167134940624237 2023-01-22 10:57:51.445546: step: 124/466, loss: 0.19666531682014465 2023-01-22 10:57:52.103530: step: 126/466, loss: 0.05981430411338806 2023-01-22 10:57:52.777336: step: 128/466, loss: 0.0997176542878151 2023-01-22 10:57:53.442323: step: 130/466, loss: 0.11331747472286224 2023-01-22 10:57:54.090957: step: 132/466, loss: 0.15961550176143646 2023-01-22 10:57:54.774990: step: 134/466, loss: 0.6935784816741943 2023-01-22 10:57:55.452051: step: 136/466, loss: 0.06055481359362602 2023-01-22 10:57:56.158812: step: 138/466, loss: 0.17825272679328918 2023-01-22 10:57:56.817232: step: 140/466, loss: 0.4913868308067322 2023-01-22 10:57:57.509839: step: 142/466, loss: 0.8437122702598572 2023-01-22 10:57:58.142133: step: 144/466, loss: 0.14781421422958374 2023-01-22 10:57:58.794220: step: 146/466, loss: 0.09563115239143372 2023-01-22 10:57:59.529689: step: 148/466, loss: 0.17472043633460999 2023-01-22 10:58:00.175569: step: 150/466, loss: 0.3626697063446045 2023-01-22 10:58:00.947121: step: 152/466, loss: 0.19643613696098328 2023-01-22 10:58:01.590313: step: 154/466, loss: 0.4196276366710663 2023-01-22 10:58:02.284712: step: 156/466, loss: 0.16844557225704193 2023-01-22 10:58:02.967666: step: 158/466, loss: 3.0080199241638184 2023-01-22 10:58:03.580652: step: 160/466, loss: 0.034158289432525635 2023-01-22 10:58:04.197175: step: 162/466, loss: 0.21539528667926788 2023-01-22 10:58:04.901231: step: 164/466, loss: 0.15623827278614044 2023-01-22 10:58:05.551858: step: 166/466, loss: 0.13679563999176025 2023-01-22 10:58:06.246368: step: 168/466, loss: 0.1737860143184662 2023-01-22 10:58:06.889078: step: 170/466, loss: 2.247715711593628 2023-01-22 10:58:07.587459: step: 172/466, loss: 0.19016523659229279 2023-01-22 10:58:08.368720: step: 174/466, loss: 0.13316655158996582 2023-01-22 10:58:09.039397: step: 176/466, loss: 0.3083028495311737 2023-01-22 10:58:09.730984: step: 178/466, loss: 0.07834689319133759 2023-01-22 10:58:10.437185: step: 180/466, loss: 0.31468725204467773 2023-01-22 10:58:11.112386: step: 182/466, loss: 0.2045125812292099 2023-01-22 10:58:11.836131: step: 184/466, loss: 0.31081730127334595 2023-01-22 10:58:12.489043: step: 186/466, loss: 0.34246203303337097 2023-01-22 10:58:13.166899: step: 188/466, loss: 0.09704740345478058 2023-01-22 10:58:13.812875: step: 190/466, loss: 0.03455285355448723 2023-01-22 10:58:14.509945: step: 192/466, loss: 0.13362093269824982 2023-01-22 10:58:15.100181: step: 194/466, loss: 0.08739200234413147 2023-01-22 10:58:15.755867: step: 196/466, loss: 0.6040061712265015 2023-01-22 10:58:16.519610: step: 198/466, loss: 0.09349583834409714 2023-01-22 10:58:17.200660: step: 200/466, loss: 0.3042532205581665 2023-01-22 10:58:17.867566: step: 202/466, loss: 0.11280182003974915 2023-01-22 10:58:18.525966: step: 204/466, loss: 0.21730904281139374 2023-01-22 10:58:19.190789: step: 206/466, loss: 0.5868622064590454 2023-01-22 10:58:19.845972: step: 208/466, loss: 0.1880406141281128 2023-01-22 10:58:20.532602: step: 210/466, loss: 0.08322806656360626 2023-01-22 10:58:21.182023: step: 212/466, loss: 0.08090437948703766 2023-01-22 10:58:21.793335: step: 214/466, loss: 0.4925406873226166 2023-01-22 10:58:22.371611: step: 216/466, loss: 0.14719174802303314 2023-01-22 10:58:23.043088: step: 218/466, loss: 0.07024139910936356 2023-01-22 10:58:23.701051: step: 220/466, loss: 0.3707457184791565 2023-01-22 10:58:24.321021: step: 222/466, loss: 0.10386056452989578 2023-01-22 10:58:24.923947: step: 224/466, loss: 0.10939282923936844 2023-01-22 10:58:25.555906: step: 226/466, loss: 0.23045538365840912 2023-01-22 10:58:26.230040: step: 228/466, loss: 0.4715212285518646 2023-01-22 10:58:26.822007: step: 230/466, loss: 0.06053485721349716 2023-01-22 10:58:27.470713: step: 232/466, loss: 0.08302391320466995 2023-01-22 10:58:28.164843: step: 234/466, loss: 0.2692738473415375 2023-01-22 10:58:28.795827: step: 236/466, loss: 0.07110875099897385 2023-01-22 10:58:29.423684: step: 238/466, loss: 0.10884547978639603 2023-01-22 10:58:30.110369: step: 240/466, loss: 0.18593835830688477 2023-01-22 10:58:30.815510: step: 242/466, loss: 0.24864543974399567 2023-01-22 10:58:31.469674: step: 244/466, loss: 2.9477343559265137 2023-01-22 10:58:32.203541: step: 246/466, loss: 0.1442636251449585 2023-01-22 10:58:32.834318: step: 248/466, loss: 0.107085682451725 2023-01-22 10:58:33.470732: step: 250/466, loss: 0.7243697047233582 2023-01-22 10:58:34.120838: step: 252/466, loss: 0.26127350330352783 2023-01-22 10:58:34.861246: step: 254/466, loss: 0.15399640798568726 2023-01-22 10:58:35.560762: step: 256/466, loss: 0.1712428778409958 2023-01-22 10:58:36.188035: step: 258/466, loss: 0.15944349765777588 2023-01-22 10:58:36.865149: step: 260/466, loss: 0.2489410638809204 2023-01-22 10:58:37.541698: step: 262/466, loss: 0.05128001049160957 2023-01-22 10:58:38.136426: step: 264/466, loss: 0.0540747307240963 2023-01-22 10:58:38.798071: step: 266/466, loss: 0.3385080099105835 2023-01-22 10:58:39.524519: step: 268/466, loss: 0.21575245261192322 2023-01-22 10:58:40.195266: step: 270/466, loss: 0.12338259816169739 2023-01-22 10:58:40.854068: step: 272/466, loss: 0.19334723055362701 2023-01-22 10:58:41.504647: step: 274/466, loss: 0.15544076263904572 2023-01-22 10:58:42.114224: step: 276/466, loss: 0.13702447712421417 2023-01-22 10:58:42.794718: step: 278/466, loss: 0.18537229299545288 2023-01-22 10:58:43.379033: step: 280/466, loss: 0.02131054364144802 2023-01-22 10:58:44.121651: step: 282/466, loss: 0.16477516293525696 2023-01-22 10:58:44.830317: step: 284/466, loss: 0.33140814304351807 2023-01-22 10:58:45.588745: step: 286/466, loss: 0.1358073502779007 2023-01-22 10:58:46.285046: step: 288/466, loss: 0.1653151959180832 2023-01-22 10:58:46.935725: step: 290/466, loss: 0.08968791365623474 2023-01-22 10:58:47.659059: step: 292/466, loss: 0.20049548149108887 2023-01-22 10:58:48.309057: step: 294/466, loss: 0.21416182816028595 2023-01-22 10:58:48.919592: step: 296/466, loss: 0.11540967226028442 2023-01-22 10:58:49.569603: step: 298/466, loss: 0.2275293916463852 2023-01-22 10:58:50.173860: step: 300/466, loss: 0.1523159146308899 2023-01-22 10:58:50.819613: step: 302/466, loss: 0.38231658935546875 2023-01-22 10:58:51.459072: step: 304/466, loss: 0.6548060774803162 2023-01-22 10:58:52.068737: step: 306/466, loss: 0.11360199004411697 2023-01-22 10:58:52.768825: step: 308/466, loss: 0.6178106665611267 2023-01-22 10:58:53.414590: step: 310/466, loss: 0.9341347813606262 2023-01-22 10:58:54.120812: step: 312/466, loss: 0.08135673403739929 2023-01-22 10:58:54.771160: step: 314/466, loss: 0.09795916080474854 2023-01-22 10:58:55.408121: step: 316/466, loss: 0.11982527375221252 2023-01-22 10:58:56.004654: step: 318/466, loss: 0.12153121083974838 2023-01-22 10:58:56.618213: step: 320/466, loss: 0.22064079344272614 2023-01-22 10:58:57.237040: step: 322/466, loss: 0.5312843918800354 2023-01-22 10:58:57.814551: step: 324/466, loss: 0.11995311826467514 2023-01-22 10:58:58.451606: step: 326/466, loss: 0.34171998500823975 2023-01-22 10:58:59.094083: step: 328/466, loss: 0.2656693756580353 2023-01-22 10:58:59.784771: step: 330/466, loss: 0.1589646339416504 2023-01-22 10:59:00.446837: step: 332/466, loss: 0.1326562464237213 2023-01-22 10:59:01.107559: step: 334/466, loss: 0.36644625663757324 2023-01-22 10:59:01.855350: step: 336/466, loss: 0.11936230212450027 2023-01-22 10:59:02.529263: step: 338/466, loss: 0.17160767316818237 2023-01-22 10:59:03.139402: step: 340/466, loss: 0.20013047754764557 2023-01-22 10:59:03.795821: step: 342/466, loss: 0.08777187019586563 2023-01-22 10:59:04.444245: step: 344/466, loss: 0.3543819189071655 2023-01-22 10:59:05.074280: step: 346/466, loss: 0.09497516602277756 2023-01-22 10:59:05.669746: step: 348/466, loss: 0.1696675419807434 2023-01-22 10:59:06.303778: step: 350/466, loss: 0.0606960654258728 2023-01-22 10:59:07.022602: step: 352/466, loss: 0.128300741314888 2023-01-22 10:59:07.693339: step: 354/466, loss: 0.1335906833410263 2023-01-22 10:59:08.344093: step: 356/466, loss: 0.291808545589447 2023-01-22 10:59:09.065043: step: 358/466, loss: 0.620805561542511 2023-01-22 10:59:09.709338: step: 360/466, loss: 0.23039770126342773 2023-01-22 10:59:10.379696: step: 362/466, loss: 0.0966557189822197 2023-01-22 10:59:11.053770: step: 364/466, loss: 0.321336954832077 2023-01-22 10:59:11.711744: step: 366/466, loss: 0.17469625174999237 2023-01-22 10:59:12.348790: step: 368/466, loss: 0.2617659866809845 2023-01-22 10:59:12.971364: step: 370/466, loss: 0.3682243525981903 2023-01-22 10:59:13.601398: step: 372/466, loss: 0.19373668730258942 2023-01-22 10:59:14.191527: step: 374/466, loss: 0.4200045168399811 2023-01-22 10:59:14.848819: step: 376/466, loss: 0.11585645377635956 2023-01-22 10:59:15.570150: step: 378/466, loss: 0.5483799576759338 2023-01-22 10:59:16.257918: step: 380/466, loss: 1.5028091669082642 2023-01-22 10:59:16.955867: step: 382/466, loss: 0.1518053114414215 2023-01-22 10:59:17.643462: step: 384/466, loss: 0.17489144206047058 2023-01-22 10:59:18.321484: step: 386/466, loss: 0.4138447940349579 2023-01-22 10:59:18.950905: step: 388/466, loss: 0.10435126721858978 2023-01-22 10:59:19.562894: step: 390/466, loss: 0.06659137457609177 2023-01-22 10:59:20.184569: step: 392/466, loss: 0.3992398679256439 2023-01-22 10:59:20.793356: step: 394/466, loss: 0.25137820839881897 2023-01-22 10:59:21.459004: step: 396/466, loss: 0.491972416639328 2023-01-22 10:59:22.122773: step: 398/466, loss: 0.28629037737846375 2023-01-22 10:59:22.743289: step: 400/466, loss: 0.24112729728221893 2023-01-22 10:59:23.358936: step: 402/466, loss: 0.21412010490894318 2023-01-22 10:59:24.058127: step: 404/466, loss: 0.14537203311920166 2023-01-22 10:59:24.722843: step: 406/466, loss: 0.2578541040420532 2023-01-22 10:59:25.356281: step: 408/466, loss: 0.23483307659626007 2023-01-22 10:59:26.043630: step: 410/466, loss: 0.21885579824447632 2023-01-22 10:59:26.698064: step: 412/466, loss: 0.05923938378691673 2023-01-22 10:59:27.337003: step: 414/466, loss: 0.1962558776140213 2023-01-22 10:59:28.031052: step: 416/466, loss: 0.09239842742681503 2023-01-22 10:59:28.670882: step: 418/466, loss: 0.3583330810070038 2023-01-22 10:59:29.324300: step: 420/466, loss: 0.3155226409435272 2023-01-22 10:59:29.964767: step: 422/466, loss: 0.1832641214132309 2023-01-22 10:59:30.625545: step: 424/466, loss: 0.27545690536499023 2023-01-22 10:59:31.308853: step: 426/466, loss: 0.19648611545562744 2023-01-22 10:59:32.048679: step: 428/466, loss: 0.15953874588012695 2023-01-22 10:59:32.766384: step: 430/466, loss: 0.10485388338565826 2023-01-22 10:59:33.388942: step: 432/466, loss: 0.10260970890522003 2023-01-22 10:59:33.989539: step: 434/466, loss: 0.3881319761276245 2023-01-22 10:59:34.678683: step: 436/466, loss: 0.09818845987319946 2023-01-22 10:59:35.272289: step: 438/466, loss: 0.25729766488075256 2023-01-22 10:59:35.967002: step: 440/466, loss: 1.216381311416626 2023-01-22 10:59:36.649522: step: 442/466, loss: 0.02328294888138771 2023-01-22 10:59:37.386237: step: 444/466, loss: 0.3791840076446533 2023-01-22 10:59:37.983357: step: 446/466, loss: 0.08898995816707611 2023-01-22 10:59:38.677685: step: 448/466, loss: 0.0699378028512001 2023-01-22 10:59:39.307981: step: 450/466, loss: 0.2938188314437866 2023-01-22 10:59:40.043933: step: 452/466, loss: 0.09860727190971375 2023-01-22 10:59:40.741132: step: 454/466, loss: 0.15588293969631195 2023-01-22 10:59:41.395292: step: 456/466, loss: 0.286975622177124 2023-01-22 10:59:42.044862: step: 458/466, loss: 0.12239721417427063 2023-01-22 10:59:42.729715: step: 460/466, loss: 0.061540111899375916 2023-01-22 10:59:43.407828: step: 462/466, loss: 0.19804197549819946 2023-01-22 10:59:44.051452: step: 464/466, loss: 0.04648022726178169 2023-01-22 10:59:44.703877: step: 466/466, loss: 0.33823028206825256 2023-01-22 10:59:45.362748: step: 468/466, loss: 1.070517897605896 2023-01-22 10:59:46.019255: step: 470/466, loss: 0.30970969796180725 2023-01-22 10:59:46.660984: step: 472/466, loss: 0.20133820176124573 2023-01-22 10:59:47.393423: step: 474/466, loss: 0.1282711923122406 2023-01-22 10:59:48.029930: step: 476/466, loss: 0.2471041977405548 2023-01-22 10:59:48.633827: step: 478/466, loss: 0.17406708002090454 2023-01-22 10:59:49.229494: step: 480/466, loss: 0.11726734787225723 2023-01-22 10:59:49.944514: step: 482/466, loss: 0.2776939868927002 2023-01-22 10:59:50.613728: step: 484/466, loss: 0.16853603720664978 2023-01-22 10:59:51.281993: step: 486/466, loss: 0.39846131205558777 2023-01-22 10:59:51.946669: step: 488/466, loss: 0.059623707085847855 2023-01-22 10:59:52.604659: step: 490/466, loss: 0.09924140572547913 2023-01-22 10:59:53.285515: step: 492/466, loss: 0.09131093323230743 2023-01-22 10:59:53.920111: step: 494/466, loss: 0.0609959177672863 2023-01-22 10:59:54.526162: step: 496/466, loss: 0.16200877726078033 2023-01-22 10:59:55.173912: step: 498/466, loss: 0.14968974888324738 2023-01-22 10:59:55.997014: step: 500/466, loss: 0.06152166798710823 2023-01-22 10:59:56.665719: step: 502/466, loss: 0.21100901067256927 2023-01-22 10:59:57.317044: step: 504/466, loss: 0.16417381167411804 2023-01-22 10:59:58.018016: step: 506/466, loss: 0.8375449180603027 2023-01-22 10:59:58.638428: step: 508/466, loss: 0.09576047211885452 2023-01-22 10:59:59.282150: step: 510/466, loss: 0.7462514638900757 2023-01-22 10:59:59.958896: step: 512/466, loss: 0.08928915858268738 2023-01-22 11:00:00.684451: step: 514/466, loss: 0.08065564185380936 2023-01-22 11:00:01.343773: step: 516/466, loss: 0.24708355963230133 2023-01-22 11:00:02.002737: step: 518/466, loss: 0.6688655018806458 2023-01-22 11:00:02.669626: step: 520/466, loss: 0.1237562745809555 2023-01-22 11:00:03.344171: step: 522/466, loss: 0.870952844619751 2023-01-22 11:00:04.077595: step: 524/466, loss: 0.19325175881385803 2023-01-22 11:00:04.860868: step: 526/466, loss: 0.11155366152524948 2023-01-22 11:00:05.474320: step: 528/466, loss: 0.49736759066581726 2023-01-22 11:00:06.115246: step: 530/466, loss: 0.2320837527513504 2023-01-22 11:00:06.822951: step: 532/466, loss: 0.9562352895736694 2023-01-22 11:00:07.445461: step: 534/466, loss: 0.19843395054340363 2023-01-22 11:00:08.131702: step: 536/466, loss: 0.1723707616329193 2023-01-22 11:00:08.846889: step: 538/466, loss: 0.2933112680912018 2023-01-22 11:00:09.534533: step: 540/466, loss: 0.16960805654525757 2023-01-22 11:00:10.172347: step: 542/466, loss: 0.07599765062332153 2023-01-22 11:00:10.826878: step: 544/466, loss: 0.13477495312690735 2023-01-22 11:00:11.553332: step: 546/466, loss: 0.1572151482105255 2023-01-22 11:00:12.265937: step: 548/466, loss: 0.3136172294616699 2023-01-22 11:00:12.901616: step: 550/466, loss: 0.6616876125335693 2023-01-22 11:00:13.491840: step: 552/466, loss: 0.18141965568065643 2023-01-22 11:00:14.147458: step: 554/466, loss: 0.14190468192100525 2023-01-22 11:00:14.792641: step: 556/466, loss: 0.4681580364704132 2023-01-22 11:00:15.440925: step: 558/466, loss: 0.26399046182632446 2023-01-22 11:00:16.006076: step: 560/466, loss: 0.10197646170854568 2023-01-22 11:00:16.667167: step: 562/466, loss: 0.14712463319301605 2023-01-22 11:00:17.366800: step: 564/466, loss: 0.8725566267967224 2023-01-22 11:00:18.041826: step: 566/466, loss: 0.23474819958209991 2023-01-22 11:00:18.690127: step: 568/466, loss: 0.07910080999135971 2023-01-22 11:00:19.351365: step: 570/466, loss: 0.2522532343864441 2023-01-22 11:00:20.010434: step: 572/466, loss: 0.08765573054552078 2023-01-22 11:00:20.670170: step: 574/466, loss: 0.1364997774362564 2023-01-22 11:00:21.308224: step: 576/466, loss: 0.1621016561985016 2023-01-22 11:00:21.944588: step: 578/466, loss: 0.33065852522850037 2023-01-22 11:00:22.551244: step: 580/466, loss: 0.29974791407585144 2023-01-22 11:00:23.207300: step: 582/466, loss: 0.24464833736419678 2023-01-22 11:00:23.845753: step: 584/466, loss: 0.41071781516075134 2023-01-22 11:00:24.505397: step: 586/466, loss: 0.05391302332282066 2023-01-22 11:00:25.153317: step: 588/466, loss: 0.09712765365839005 2023-01-22 11:00:25.812429: step: 590/466, loss: 0.09114282578229904 2023-01-22 11:00:26.448414: step: 592/466, loss: 0.12766587734222412 2023-01-22 11:00:27.094439: step: 594/466, loss: 0.41201990842819214 2023-01-22 11:00:27.711173: step: 596/466, loss: 1.335031509399414 2023-01-22 11:00:28.401488: step: 598/466, loss: 0.4613915681838989 2023-01-22 11:00:29.060547: step: 600/466, loss: 0.20050835609436035 2023-01-22 11:00:29.627320: step: 602/466, loss: 0.22310605645179749 2023-01-22 11:00:30.280883: step: 604/466, loss: 0.4295780658721924 2023-01-22 11:00:30.892170: step: 606/466, loss: 0.08041802048683167 2023-01-22 11:00:31.571031: step: 608/466, loss: 0.09882882237434387 2023-01-22 11:00:32.319230: step: 610/466, loss: 0.07247479259967804 2023-01-22 11:00:33.030250: step: 612/466, loss: 0.13833218812942505 2023-01-22 11:00:33.699341: step: 614/466, loss: 0.27434587478637695 2023-01-22 11:00:34.370117: step: 616/466, loss: 0.5944318771362305 2023-01-22 11:00:35.031583: step: 618/466, loss: 0.17174427211284637 2023-01-22 11:00:35.684829: step: 620/466, loss: 0.14457230269908905 2023-01-22 11:00:36.249697: step: 622/466, loss: 0.07766750454902649 2023-01-22 11:00:36.906579: step: 624/466, loss: 0.10745465010404587 2023-01-22 11:00:37.589557: step: 626/466, loss: 0.4946940541267395 2023-01-22 11:00:38.217392: step: 628/466, loss: 0.15756690502166748 2023-01-22 11:00:38.881944: step: 630/466, loss: 0.09950481355190277 2023-01-22 11:00:39.554486: step: 632/466, loss: 0.18794941902160645 2023-01-22 11:00:40.169733: step: 634/466, loss: 0.10358545929193497 2023-01-22 11:00:40.832383: step: 636/466, loss: 0.16333678364753723 2023-01-22 11:00:41.465879: step: 638/466, loss: 0.5597032308578491 2023-01-22 11:00:42.115084: step: 640/466, loss: 0.18828891217708588 2023-01-22 11:00:42.790591: step: 642/466, loss: 0.12340757995843887 2023-01-22 11:00:43.437659: step: 644/466, loss: 0.17228133976459503 2023-01-22 11:00:44.082139: step: 646/466, loss: 0.12330249696969986 2023-01-22 11:00:44.773169: step: 648/466, loss: 0.36004215478897095 2023-01-22 11:00:45.391942: step: 650/466, loss: 0.32791563868522644 2023-01-22 11:00:46.186404: step: 652/466, loss: 0.36322280764579773 2023-01-22 11:00:46.882979: step: 654/466, loss: 0.09934289753437042 2023-01-22 11:00:47.553450: step: 656/466, loss: 0.12922798097133636 2023-01-22 11:00:48.162395: step: 658/466, loss: 0.14867113530635834 2023-01-22 11:00:48.916668: step: 660/466, loss: 0.1516675502061844 2023-01-22 11:00:49.587849: step: 662/466, loss: 0.2672687768936157 2023-01-22 11:00:50.417104: step: 664/466, loss: 0.10394330322742462 2023-01-22 11:00:51.091340: step: 666/466, loss: 0.36707255244255066 2023-01-22 11:00:51.697899: step: 668/466, loss: 0.15134350955486298 2023-01-22 11:00:52.389172: step: 670/466, loss: 0.16136594116687775 2023-01-22 11:00:53.080881: step: 672/466, loss: 0.052661944180727005 2023-01-22 11:00:53.765883: step: 674/466, loss: 0.24297872185707092 2023-01-22 11:00:54.420222: step: 676/466, loss: 0.17728376388549805 2023-01-22 11:00:55.035463: step: 678/466, loss: 0.22896040976047516 2023-01-22 11:00:55.695140: step: 680/466, loss: 0.8838499188423157 2023-01-22 11:00:56.331967: step: 682/466, loss: 0.19494293630123138 2023-01-22 11:00:56.964951: step: 684/466, loss: 0.10196879506111145 2023-01-22 11:00:57.652485: step: 686/466, loss: 0.21828074753284454 2023-01-22 11:00:58.272593: step: 688/466, loss: 0.3562825918197632 2023-01-22 11:00:58.991455: step: 690/466, loss: 0.1883367896080017 2023-01-22 11:00:59.614900: step: 692/466, loss: 0.12125629931688309 2023-01-22 11:01:00.241377: step: 694/466, loss: 0.18721602857112885 2023-01-22 11:01:00.924829: step: 696/466, loss: 0.16550004482269287 2023-01-22 11:01:01.599298: step: 698/466, loss: 0.25382041931152344 2023-01-22 11:01:02.302343: step: 700/466, loss: 0.6556934714317322 2023-01-22 11:01:02.914531: step: 702/466, loss: 0.07627542316913605 2023-01-22 11:01:03.601521: step: 704/466, loss: 4.631105422973633 2023-01-22 11:01:04.264405: step: 706/466, loss: 0.10441194474697113 2023-01-22 11:01:04.984690: step: 708/466, loss: 0.26737096905708313 2023-01-22 11:01:05.647329: step: 710/466, loss: 0.14205802977085114 2023-01-22 11:01:06.272097: step: 712/466, loss: 0.07517994195222855 2023-01-22 11:01:06.927621: step: 714/466, loss: 0.14049071073532104 2023-01-22 11:01:07.472659: step: 716/466, loss: 2.2365288734436035 2023-01-22 11:01:08.124885: step: 718/466, loss: 0.07635090500116348 2023-01-22 11:01:08.765455: step: 720/466, loss: 0.17170868813991547 2023-01-22 11:01:09.395683: step: 722/466, loss: 0.8543318510055542 2023-01-22 11:01:10.043730: step: 724/466, loss: 0.4963378310203552 2023-01-22 11:01:10.644496: step: 726/466, loss: 0.05161907896399498 2023-01-22 11:01:11.348245: step: 728/466, loss: 0.2685733735561371 2023-01-22 11:01:12.005301: step: 730/466, loss: 0.1145210936665535 2023-01-22 11:01:12.713330: step: 732/466, loss: 0.1569463610649109 2023-01-22 11:01:13.336610: step: 734/466, loss: 0.5626563429832458 2023-01-22 11:01:13.980422: step: 736/466, loss: 0.23395347595214844 2023-01-22 11:01:14.640658: step: 738/466, loss: 0.1768762171268463 2023-01-22 11:01:15.265643: step: 740/466, loss: 0.21685035526752472 2023-01-22 11:01:15.903321: step: 742/466, loss: 0.30971992015838623 2023-01-22 11:01:16.531088: step: 744/466, loss: 0.7027494311332703 2023-01-22 11:01:17.219864: step: 746/466, loss: 0.4980536997318268 2023-01-22 11:01:17.877877: step: 748/466, loss: 0.8457667827606201 2023-01-22 11:01:18.509048: step: 750/466, loss: 0.17517438530921936 2023-01-22 11:01:19.086725: step: 752/466, loss: 0.3755433261394501 2023-01-22 11:01:19.732923: step: 754/466, loss: 0.1993408501148224 2023-01-22 11:01:20.384668: step: 756/466, loss: 0.373582661151886 2023-01-22 11:01:21.055285: step: 758/466, loss: 0.16973261535167694 2023-01-22 11:01:21.713441: step: 760/466, loss: 0.18853074312210083 2023-01-22 11:01:22.430876: step: 762/466, loss: 0.40229472517967224 2023-01-22 11:01:23.120943: step: 764/466, loss: 0.16277316212654114 2023-01-22 11:01:23.781600: step: 766/466, loss: 0.18618665635585785 2023-01-22 11:01:24.468335: step: 768/466, loss: 0.5071804523468018 2023-01-22 11:01:25.175116: step: 770/466, loss: 0.25484156608581543 2023-01-22 11:01:25.833938: step: 772/466, loss: 0.4185568392276764 2023-01-22 11:01:26.463615: step: 774/466, loss: 0.1737195998430252 2023-01-22 11:01:27.144223: step: 776/466, loss: 0.3032733201980591 2023-01-22 11:01:27.831643: step: 778/466, loss: 0.6785239577293396 2023-01-22 11:01:28.526691: step: 780/466, loss: 0.09707774966955185 2023-01-22 11:01:29.155034: step: 782/466, loss: 0.1329166740179062 2023-01-22 11:01:29.882568: step: 784/466, loss: 0.09093427658081055 2023-01-22 11:01:30.557909: step: 786/466, loss: 0.14941225945949554 2023-01-22 11:01:31.245076: step: 788/466, loss: 0.05523379519581795 2023-01-22 11:01:31.884915: step: 790/466, loss: 0.1023048460483551 2023-01-22 11:01:32.611228: step: 792/466, loss: 0.157408207654953 2023-01-22 11:01:33.228772: step: 794/466, loss: 0.1586690992116928 2023-01-22 11:01:33.866316: step: 796/466, loss: 0.12543946504592896 2023-01-22 11:01:34.577674: step: 798/466, loss: 0.4799301028251648 2023-01-22 11:01:35.213496: step: 800/466, loss: 0.08886481821537018 2023-01-22 11:01:35.847828: step: 802/466, loss: 0.16303367912769318 2023-01-22 11:01:36.598221: step: 804/466, loss: 0.8298637270927429 2023-01-22 11:01:37.246127: step: 806/466, loss: 0.23885048925876617 2023-01-22 11:01:37.853906: step: 808/466, loss: 0.07132650911808014 2023-01-22 11:01:38.538947: step: 810/466, loss: 0.32651305198669434 2023-01-22 11:01:39.244561: step: 812/466, loss: 0.39218130707740784 2023-01-22 11:01:39.869640: step: 814/466, loss: 0.1230127364397049 2023-01-22 11:01:40.594458: step: 816/466, loss: 0.23656906187534332 2023-01-22 11:01:41.315140: step: 818/466, loss: 0.27691248059272766 2023-01-22 11:01:41.958549: step: 820/466, loss: 0.07484827935695648 2023-01-22 11:01:42.641324: step: 822/466, loss: 0.18714210391044617 2023-01-22 11:01:43.233403: step: 824/466, loss: 0.05669151246547699 2023-01-22 11:01:43.941886: step: 826/466, loss: 0.615753710269928 2023-01-22 11:01:44.606842: step: 828/466, loss: 0.18706496059894562 2023-01-22 11:01:45.260729: step: 830/466, loss: 0.13120721280574799 2023-01-22 11:01:45.998828: step: 832/466, loss: 0.17712688446044922 2023-01-22 11:01:46.635484: step: 834/466, loss: 0.1563715785741806 2023-01-22 11:01:47.330451: step: 836/466, loss: 0.0996440052986145 2023-01-22 11:01:48.011585: step: 838/466, loss: 0.1388978511095047 2023-01-22 11:01:48.709212: step: 840/466, loss: 0.48037368059158325 2023-01-22 11:01:49.360656: step: 842/466, loss: 0.26534557342529297 2023-01-22 11:01:50.044544: step: 844/466, loss: 0.1332325041294098 2023-01-22 11:01:50.684676: step: 846/466, loss: 0.11895450204610825 2023-01-22 11:01:51.341547: step: 848/466, loss: 0.16911429166793823 2023-01-22 11:01:51.984310: step: 850/466, loss: 0.16510894894599915 2023-01-22 11:01:52.635381: step: 852/466, loss: 0.21481208503246307 2023-01-22 11:01:53.260622: step: 854/466, loss: 0.17886097729206085 2023-01-22 11:01:53.883717: step: 856/466, loss: 0.25372764468193054 2023-01-22 11:01:54.522606: step: 858/466, loss: 0.4483509659767151 2023-01-22 11:01:55.159107: step: 860/466, loss: 0.14233942329883575 2023-01-22 11:01:55.711061: step: 862/466, loss: 0.1988704800605774 2023-01-22 11:01:56.353760: step: 864/466, loss: 0.19561944901943207 2023-01-22 11:01:57.051307: step: 866/466, loss: 0.4714718163013458 2023-01-22 11:01:57.711503: step: 868/466, loss: 0.22195981442928314 2023-01-22 11:01:58.354612: step: 870/466, loss: 0.13857711851596832 2023-01-22 11:01:58.962120: step: 872/466, loss: 1.053472876548767 2023-01-22 11:01:59.571233: step: 874/466, loss: 0.19076867401599884 2023-01-22 11:02:00.255052: step: 876/466, loss: 0.1434697061777115 2023-01-22 11:02:00.910037: step: 878/466, loss: 0.09571454674005508 2023-01-22 11:02:01.560297: step: 880/466, loss: 0.2282378375530243 2023-01-22 11:02:02.230809: step: 882/466, loss: 0.059323765337467194 2023-01-22 11:02:02.863720: step: 884/466, loss: 0.05962638556957245 2023-01-22 11:02:03.457505: step: 886/466, loss: 0.04837535694241524 2023-01-22 11:02:04.146645: step: 888/466, loss: 0.09476480633020401 2023-01-22 11:02:04.835079: step: 890/466, loss: 0.14488112926483154 2023-01-22 11:02:05.473737: step: 892/466, loss: 0.449963241815567 2023-01-22 11:02:06.182628: step: 894/466, loss: 0.2974949777126312 2023-01-22 11:02:06.841083: step: 896/466, loss: 0.8873539566993713 2023-01-22 11:02:07.533125: step: 898/466, loss: 1.1094868183135986 2023-01-22 11:02:08.171974: step: 900/466, loss: 1.1049017906188965 2023-01-22 11:02:08.788649: step: 902/466, loss: 0.2315780371427536 2023-01-22 11:02:09.433724: step: 904/466, loss: 1.0686709880828857 2023-01-22 11:02:10.116037: step: 906/466, loss: 0.27643969655036926 2023-01-22 11:02:10.856209: step: 908/466, loss: 0.40389660000801086 2023-01-22 11:02:11.426147: step: 910/466, loss: 0.2511165142059326 2023-01-22 11:02:12.052026: step: 912/466, loss: 0.08071059733629227 2023-01-22 11:02:12.705664: step: 914/466, loss: 0.34828317165374756 2023-01-22 11:02:13.374003: step: 916/466, loss: 0.15177913010120392 2023-01-22 11:02:14.027638: step: 918/466, loss: 0.2506985664367676 2023-01-22 11:02:14.630412: step: 920/466, loss: 0.40900301933288574 2023-01-22 11:02:15.291409: step: 922/466, loss: 0.2533995509147644 2023-01-22 11:02:16.062323: step: 924/466, loss: 0.17880086600780487 2023-01-22 11:02:16.669328: step: 926/466, loss: 0.23364755511283875 2023-01-22 11:02:17.276890: step: 928/466, loss: 0.056145116686820984 2023-01-22 11:02:17.920870: step: 930/466, loss: 0.06831599771976471 2023-01-22 11:02:18.731465: step: 932/466, loss: 0.7609554529190063 ================================================== Loss: 0.293 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2995272812011052, 'r': 0.33476578487182346, 'f1': 0.31616768571227777}, 'combined': 0.23296566315641518, 'epoch': 12} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.34775749397833156, 'r': 0.306579640326619, 'f1': 0.3258728928414315}, 'combined': 0.21612295483784055, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29399730392156864, 'r': 0.27840653780451574, 'f1': 0.2859895952544442}, 'combined': 0.19065973016962945, 'epoch': 12} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.35941700967573137, 'r': 0.293560133226937, 'f1': 0.3231675301080553}, 'combined': 0.21090933543894133, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2848071123859959, 'r': 0.3339863291357599, 'f1': 0.30744243747518857}, 'combined': 0.22653653287645473, 'epoch': 12} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3423216636072038, 'r': 0.2983889091907754, 'f1': 0.31884907829773607}, 'combined': 0.2114646736896902, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27721088435374147, 'r': 0.38809523809523805, 'f1': 0.3234126984126983}, 'combined': 0.21560846560846553, 'epoch': 12} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.43478260869565216, 'f1': 0.425531914893617}, 'combined': 0.2836879432624113, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3026315789473684, 'r': 0.19827586206896552, 'f1': 0.23958333333333337}, 'combined': 0.15972222222222224, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2791124217992775, 'r': 0.3076580103923854, 'f1': 0.2926908639408639}, 'combined': 0.1951272426272426, 'epoch': 10} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.34934765710323107, 'r': 0.3114339578827254, 'f1': 0.32930311940058665}, 'combined': 0.21491361476669862, 'epoch': 10} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4583333333333333, 'r': 0.4782608695652174, 'f1': 0.4680851063829787}, 'combined': 0.31205673758865243, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2814701218024629, 'r': 0.3551757703958972, 'f1': 0.3140564278500635}, 'combined': 0.23140999946846783, 'epoch': 9} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3308734183854636, 'r': 0.29166091118926246, 'f1': 0.3100321961699194}, 'combined': 0.2056172078225372, 'epoch': 9} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.2413793103448276, 'f1': 0.28}, 'combined': 0.18666666666666668, 'epoch': 9} ****************************** Epoch: 13 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:05:02.559923: step: 2/466, loss: 0.525618314743042 2023-01-22 11:05:03.198191: step: 4/466, loss: 0.12188282608985901 2023-01-22 11:05:03.808757: step: 6/466, loss: 0.18711575865745544 2023-01-22 11:05:04.485229: step: 8/466, loss: 0.07466843724250793 2023-01-22 11:05:05.119763: step: 10/466, loss: 0.2744048237800598 2023-01-22 11:05:05.747246: step: 12/466, loss: 1.0381118059158325 2023-01-22 11:05:06.421994: step: 14/466, loss: 0.10302115976810455 2023-01-22 11:05:07.032703: step: 16/466, loss: 0.1746019423007965 2023-01-22 11:05:07.749220: step: 18/466, loss: 0.2092004269361496 2023-01-22 11:05:08.393077: step: 20/466, loss: 0.7763059139251709 2023-01-22 11:05:09.043077: step: 22/466, loss: 0.1145629957318306 2023-01-22 11:05:09.720854: step: 24/466, loss: 0.32337671518325806 2023-01-22 11:05:10.393630: step: 26/466, loss: 1.5217716693878174 2023-01-22 11:05:11.003840: step: 28/466, loss: 0.07874564826488495 2023-01-22 11:05:11.685842: step: 30/466, loss: 0.1454867422580719 2023-01-22 11:05:12.279112: step: 32/466, loss: 0.6862086653709412 2023-01-22 11:05:12.985937: step: 34/466, loss: 0.3887907862663269 2023-01-22 11:05:13.601914: step: 36/466, loss: 0.04609563574194908 2023-01-22 11:05:14.196226: step: 38/466, loss: 0.06378234922885895 2023-01-22 11:05:14.829367: step: 40/466, loss: 0.07670899480581284 2023-01-22 11:05:15.477663: step: 42/466, loss: 0.030169617384672165 2023-01-22 11:05:16.104525: step: 44/466, loss: 0.19305665791034698 2023-01-22 11:05:16.818622: step: 46/466, loss: 0.04287484288215637 2023-01-22 11:05:17.538251: step: 48/466, loss: 0.40193885564804077 2023-01-22 11:05:18.202990: step: 50/466, loss: 0.06653300672769547 2023-01-22 11:05:18.895163: step: 52/466, loss: 0.18866319954395294 2023-01-22 11:05:19.551311: step: 54/466, loss: 0.16975773870944977 2023-01-22 11:05:20.162381: step: 56/466, loss: 0.06851798295974731 2023-01-22 11:05:20.833236: step: 58/466, loss: 0.1637471318244934 2023-01-22 11:05:21.491703: step: 60/466, loss: 0.0365801677107811 2023-01-22 11:05:22.127552: step: 62/466, loss: 0.04194226488471031 2023-01-22 11:05:22.918874: step: 64/466, loss: 0.029038073495030403 2023-01-22 11:05:23.559937: step: 66/466, loss: 0.11975918710231781 2023-01-22 11:05:24.213125: step: 68/466, loss: 0.1207805871963501 2023-01-22 11:05:24.869007: step: 70/466, loss: 0.2956823408603668 2023-01-22 11:05:25.478403: step: 72/466, loss: 0.10991843044757843 2023-01-22 11:05:26.180760: step: 74/466, loss: 0.8458861708641052 2023-01-22 11:05:26.833264: step: 76/466, loss: 0.12933480739593506 2023-01-22 11:05:27.478545: step: 78/466, loss: 0.33300691843032837 2023-01-22 11:05:28.103876: step: 80/466, loss: 0.2356657087802887 2023-01-22 11:05:28.770893: step: 82/466, loss: 0.21511326730251312 2023-01-22 11:05:29.449987: step: 84/466, loss: 0.20533886551856995 2023-01-22 11:05:30.140217: step: 86/466, loss: 0.09611569344997406 2023-01-22 11:05:30.795521: step: 88/466, loss: 0.37041494250297546 2023-01-22 11:05:31.443778: step: 90/466, loss: 0.17537567019462585 2023-01-22 11:05:32.154511: step: 92/466, loss: 0.13145779073238373 2023-01-22 11:05:32.783389: step: 94/466, loss: 0.13477493822574615 2023-01-22 11:05:33.469218: step: 96/466, loss: 0.515109121799469 2023-01-22 11:05:34.055130: step: 98/466, loss: 0.6469271779060364 2023-01-22 11:05:34.788669: step: 100/466, loss: 0.39150166511535645 2023-01-22 11:05:35.506744: step: 102/466, loss: 0.3539683520793915 2023-01-22 11:05:36.131322: step: 104/466, loss: 0.07924119383096695 2023-01-22 11:05:36.806007: step: 106/466, loss: 0.6557603478431702 2023-01-22 11:05:37.470705: step: 108/466, loss: 0.16338536143302917 2023-01-22 11:05:38.073517: step: 110/466, loss: 0.05812246352434158 2023-01-22 11:05:38.767939: step: 112/466, loss: 0.08773166686296463 2023-01-22 11:05:39.474891: step: 114/466, loss: 0.17465516924858093 2023-01-22 11:05:40.155511: step: 116/466, loss: 0.20950469374656677 2023-01-22 11:05:40.784204: step: 118/466, loss: 0.17099221050739288 2023-01-22 11:05:41.474456: step: 120/466, loss: 0.20525000989437103 2023-01-22 11:05:42.163405: step: 122/466, loss: 0.1835314929485321 2023-01-22 11:05:42.876227: step: 124/466, loss: 0.22093580663204193 2023-01-22 11:05:43.512169: step: 126/466, loss: 0.08336763083934784 2023-01-22 11:05:44.173185: step: 128/466, loss: 0.21546803414821625 2023-01-22 11:05:44.898771: step: 130/466, loss: 0.09574634581804276 2023-01-22 11:05:45.512079: step: 132/466, loss: 0.07790052890777588 2023-01-22 11:05:46.256899: step: 134/466, loss: 0.21874640882015228 2023-01-22 11:05:46.891990: step: 136/466, loss: 0.12523621320724487 2023-01-22 11:05:47.530990: step: 138/466, loss: 0.054812103509902954 2023-01-22 11:05:48.168383: step: 140/466, loss: 0.09920776635408401 2023-01-22 11:05:48.831875: step: 142/466, loss: 0.18406184017658234 2023-01-22 11:05:49.435124: step: 144/466, loss: 0.17157801985740662 2023-01-22 11:05:50.114767: step: 146/466, loss: 0.12519454956054688 2023-01-22 11:05:50.804000: step: 148/466, loss: 0.09784115105867386 2023-01-22 11:05:51.526141: step: 150/466, loss: 0.25946009159088135 2023-01-22 11:05:52.201095: step: 152/466, loss: 1.248733639717102 2023-01-22 11:05:52.912920: step: 154/466, loss: 0.13074356317520142 2023-01-22 11:05:53.594441: step: 156/466, loss: 0.3054160177707672 2023-01-22 11:05:54.219876: step: 158/466, loss: 0.18431904911994934 2023-01-22 11:05:54.861780: step: 160/466, loss: 0.17071475088596344 2023-01-22 11:05:55.534456: step: 162/466, loss: 0.38476401567459106 2023-01-22 11:05:56.216115: step: 164/466, loss: 0.1319192796945572 2023-01-22 11:05:56.830938: step: 166/466, loss: 0.5609253644943237 2023-01-22 11:05:57.554661: step: 168/466, loss: 0.11155123263597488 2023-01-22 11:05:58.292655: step: 170/466, loss: 0.07531193643808365 2023-01-22 11:05:59.023674: step: 172/466, loss: 0.12356498837471008 2023-01-22 11:05:59.694717: step: 174/466, loss: 0.09486281126737595 2023-01-22 11:06:00.326017: step: 176/466, loss: 0.21441110968589783 2023-01-22 11:06:01.010047: step: 178/466, loss: 0.1300574094057083 2023-01-22 11:06:01.679490: step: 180/466, loss: 0.16298283636569977 2023-01-22 11:06:02.393088: step: 182/466, loss: 0.3968439996242523 2023-01-22 11:06:03.010036: step: 184/466, loss: 0.17258816957473755 2023-01-22 11:06:03.699483: step: 186/466, loss: 0.10586294531822205 2023-01-22 11:06:04.322882: step: 188/466, loss: 0.07512211799621582 2023-01-22 11:06:04.978342: step: 190/466, loss: 0.109731025993824 2023-01-22 11:06:05.616494: step: 192/466, loss: 0.08937923610210419 2023-01-22 11:06:06.316121: step: 194/466, loss: 0.10278814285993576 2023-01-22 11:06:07.072114: step: 196/466, loss: 0.19093210995197296 2023-01-22 11:06:07.763546: step: 198/466, loss: 0.09084939956665039 2023-01-22 11:06:08.395286: step: 200/466, loss: 0.174184188246727 2023-01-22 11:06:09.098972: step: 202/466, loss: 0.21613729000091553 2023-01-22 11:06:09.702192: step: 204/466, loss: 0.17608675360679626 2023-01-22 11:06:10.349130: step: 206/466, loss: 0.1232207715511322 2023-01-22 11:06:10.972779: step: 208/466, loss: 0.126413956284523 2023-01-22 11:06:11.635259: step: 210/466, loss: 0.0623779371380806 2023-01-22 11:06:12.302242: step: 212/466, loss: 0.20897889137268066 2023-01-22 11:06:12.946518: step: 214/466, loss: 0.04284898564219475 2023-01-22 11:06:13.579384: step: 216/466, loss: 0.07657509297132492 2023-01-22 11:06:14.246569: step: 218/466, loss: 0.1104479730129242 2023-01-22 11:06:14.878847: step: 220/466, loss: 0.134963259100914 2023-01-22 11:06:15.555532: step: 222/466, loss: 0.15580850839614868 2023-01-22 11:06:16.219160: step: 224/466, loss: 0.12098768353462219 2023-01-22 11:06:16.859865: step: 226/466, loss: 0.15912289917469025 2023-01-22 11:06:17.466762: step: 228/466, loss: 0.13218237459659576 2023-01-22 11:06:18.096597: step: 230/466, loss: 0.5889711380004883 2023-01-22 11:06:18.757269: step: 232/466, loss: 0.08734430372714996 2023-01-22 11:06:19.422461: step: 234/466, loss: 0.07764595746994019 2023-01-22 11:06:20.151640: step: 236/466, loss: 0.03637455403804779 2023-01-22 11:06:20.805116: step: 238/466, loss: 0.10069497674703598 2023-01-22 11:06:21.440393: step: 240/466, loss: 0.17100438475608826 2023-01-22 11:06:22.052025: step: 242/466, loss: 0.20142343640327454 2023-01-22 11:06:22.779697: step: 244/466, loss: 0.0414368100464344 2023-01-22 11:06:23.509796: step: 246/466, loss: 0.45548686385154724 2023-01-22 11:06:24.254084: step: 248/466, loss: 0.2700558006763458 2023-01-22 11:06:24.869903: step: 250/466, loss: 0.045008108019828796 2023-01-22 11:06:25.529816: step: 252/466, loss: 0.1975070983171463 2023-01-22 11:06:26.185426: step: 254/466, loss: 0.2863794267177582 2023-01-22 11:06:26.875917: step: 256/466, loss: 0.2864835560321808 2023-01-22 11:06:27.617798: step: 258/466, loss: 0.0764208734035492 2023-01-22 11:06:28.288652: step: 260/466, loss: 0.13654173910617828 2023-01-22 11:06:28.918102: step: 262/466, loss: 0.5410063862800598 2023-01-22 11:06:29.600560: step: 264/466, loss: 0.40156546235084534 2023-01-22 11:06:30.201727: step: 266/466, loss: 0.08285211026668549 2023-01-22 11:06:30.849984: step: 268/466, loss: 0.11998789012432098 2023-01-22 11:06:31.516730: step: 270/466, loss: 0.37964218854904175 2023-01-22 11:06:32.154365: step: 272/466, loss: 0.0892152190208435 2023-01-22 11:06:32.757072: step: 274/466, loss: 0.15910491347312927 2023-01-22 11:06:33.417899: step: 276/466, loss: 0.24213582277297974 2023-01-22 11:06:34.004666: step: 278/466, loss: 0.0799097791314125 2023-01-22 11:06:34.638930: step: 280/466, loss: 0.14344455301761627 2023-01-22 11:06:35.223473: step: 282/466, loss: 0.1222216784954071 2023-01-22 11:06:35.879457: step: 284/466, loss: 0.20590557157993317 2023-01-22 11:06:36.544240: step: 286/466, loss: 0.0666474997997284 2023-01-22 11:06:37.253287: step: 288/466, loss: 0.09086759388446808 2023-01-22 11:06:37.892422: step: 290/466, loss: 0.14365486800670624 2023-01-22 11:06:38.534975: step: 292/466, loss: 0.2211560308933258 2023-01-22 11:06:39.154527: step: 294/466, loss: 0.14865663647651672 2023-01-22 11:06:39.809827: step: 296/466, loss: 0.16953952610492706 2023-01-22 11:06:40.442351: step: 298/466, loss: 0.15864787995815277 2023-01-22 11:06:41.157950: step: 300/466, loss: 0.8891596794128418 2023-01-22 11:06:41.852124: step: 302/466, loss: 0.06364311277866364 2023-01-22 11:06:42.582567: step: 304/466, loss: 0.08998765796422958 2023-01-22 11:06:43.229313: step: 306/466, loss: 0.03975175321102142 2023-01-22 11:06:43.921022: step: 308/466, loss: 0.38749098777770996 2023-01-22 11:06:44.573484: step: 310/466, loss: 0.09961052238941193 2023-01-22 11:06:45.287812: step: 312/466, loss: 0.22312025725841522 2023-01-22 11:06:45.937394: step: 314/466, loss: 0.18734197318553925 2023-01-22 11:06:46.557414: step: 316/466, loss: 0.22206509113311768 2023-01-22 11:06:47.248051: step: 318/466, loss: 0.16230708360671997 2023-01-22 11:06:47.918738: step: 320/466, loss: 0.6054675579071045 2023-01-22 11:06:48.647988: step: 322/466, loss: 0.10808802396059036 2023-01-22 11:06:49.269504: step: 324/466, loss: 0.23263190686702728 2023-01-22 11:06:49.919866: step: 326/466, loss: 0.1171720027923584 2023-01-22 11:06:50.587937: step: 328/466, loss: 0.22656656801700592 2023-01-22 11:06:51.223404: step: 330/466, loss: 0.09640899300575256 2023-01-22 11:06:51.938165: step: 332/466, loss: 0.10093360394239426 2023-01-22 11:06:52.564788: step: 334/466, loss: 0.05876747891306877 2023-01-22 11:06:53.314849: step: 336/466, loss: 0.35687941312789917 2023-01-22 11:06:53.962960: step: 338/466, loss: 0.23797641694545746 2023-01-22 11:06:54.599312: step: 340/466, loss: 0.36994874477386475 2023-01-22 11:06:55.242771: step: 342/466, loss: 0.05669309198856354 2023-01-22 11:06:55.894193: step: 344/466, loss: 0.160336434841156 2023-01-22 11:06:56.546571: step: 346/466, loss: 0.07873602211475372 2023-01-22 11:06:57.151073: step: 348/466, loss: 0.3927478492259979 2023-01-22 11:06:57.811594: step: 350/466, loss: 0.07420491427183151 2023-01-22 11:06:58.539248: step: 352/466, loss: 0.26476380228996277 2023-01-22 11:06:59.166207: step: 354/466, loss: 0.11637300252914429 2023-01-22 11:06:59.811410: step: 356/466, loss: 0.44961050152778625 2023-01-22 11:07:00.428819: step: 358/466, loss: 0.18461167812347412 2023-01-22 11:07:01.038559: step: 360/466, loss: 0.08700277656316757 2023-01-22 11:07:01.728400: step: 362/466, loss: 0.20201227068901062 2023-01-22 11:07:02.449734: step: 364/466, loss: 0.40742918848991394 2023-01-22 11:07:03.108859: step: 366/466, loss: 0.24917148053646088 2023-01-22 11:07:03.755169: step: 368/466, loss: 0.21557331085205078 2023-01-22 11:07:04.533169: step: 370/466, loss: 2.6784920692443848 2023-01-22 11:07:05.199733: step: 372/466, loss: 0.12315894663333893 2023-01-22 11:07:05.807864: step: 374/466, loss: 0.22482457756996155 2023-01-22 11:07:06.475774: step: 376/466, loss: 0.13915163278579712 2023-01-22 11:07:07.038175: step: 378/466, loss: 0.052151940762996674 2023-01-22 11:07:07.717135: step: 380/466, loss: 0.16188733279705048 2023-01-22 11:07:08.318349: step: 382/466, loss: 0.28139740228652954 2023-01-22 11:07:08.922252: step: 384/466, loss: 0.0629613995552063 2023-01-22 11:07:09.673621: step: 386/466, loss: 0.038815055042505264 2023-01-22 11:07:10.381210: step: 388/466, loss: 0.1372925341129303 2023-01-22 11:07:11.068384: step: 390/466, loss: 0.23523753881454468 2023-01-22 11:07:11.726710: step: 392/466, loss: 0.5332252383232117 2023-01-22 11:07:12.366424: step: 394/466, loss: 0.14214055240154266 2023-01-22 11:07:13.037268: step: 396/466, loss: 0.13956071436405182 2023-01-22 11:07:13.718395: step: 398/466, loss: 0.17934565246105194 2023-01-22 11:07:14.338470: step: 400/466, loss: 0.09901084750890732 2023-01-22 11:07:14.944097: step: 402/466, loss: 0.18380653858184814 2023-01-22 11:07:15.625455: step: 404/466, loss: 0.1347212940454483 2023-01-22 11:07:16.324451: step: 406/466, loss: 0.2966226041316986 2023-01-22 11:07:16.943853: step: 408/466, loss: 0.1731652170419693 2023-01-22 11:07:17.537574: step: 410/466, loss: 0.07733267545700073 2023-01-22 11:07:18.230924: step: 412/466, loss: 0.09514249116182327 2023-01-22 11:07:18.898631: step: 414/466, loss: 0.04069305956363678 2023-01-22 11:07:19.640863: step: 416/466, loss: 0.18317970633506775 2023-01-22 11:07:20.218689: step: 418/466, loss: 0.12108591943979263 2023-01-22 11:07:20.887755: step: 420/466, loss: 0.2907571494579315 2023-01-22 11:07:21.553977: step: 422/466, loss: 0.0754459872841835 2023-01-22 11:07:22.260835: step: 424/466, loss: 0.05278822034597397 2023-01-22 11:07:22.994896: step: 426/466, loss: 0.023496627807617188 2023-01-22 11:07:23.699441: step: 428/466, loss: 0.3592928349971771 2023-01-22 11:07:24.337484: step: 430/466, loss: 0.29169031977653503 2023-01-22 11:07:25.046785: step: 432/466, loss: 0.1340051293373108 2023-01-22 11:07:25.743424: step: 434/466, loss: 0.2072197049856186 2023-01-22 11:07:26.355289: step: 436/466, loss: 0.10318450629711151 2023-01-22 11:07:27.009719: step: 438/466, loss: 0.2741301357746124 2023-01-22 11:07:27.688236: step: 440/466, loss: 0.12780462205410004 2023-01-22 11:07:28.323030: step: 442/466, loss: 0.28163811564445496 2023-01-22 11:07:29.036006: step: 444/466, loss: 0.16987621784210205 2023-01-22 11:07:29.684818: step: 446/466, loss: 0.18827319145202637 2023-01-22 11:07:30.391156: step: 448/466, loss: 0.15258075296878815 2023-01-22 11:07:31.091479: step: 450/466, loss: 0.6299476027488708 2023-01-22 11:07:31.715562: step: 452/466, loss: 0.2035852074623108 2023-01-22 11:07:32.462345: step: 454/466, loss: 0.46440279483795166 2023-01-22 11:07:33.106170: step: 456/466, loss: 0.20179222524166107 2023-01-22 11:07:33.745946: step: 458/466, loss: 0.08790719509124756 2023-01-22 11:07:34.358367: step: 460/466, loss: 0.1740276962518692 2023-01-22 11:07:34.999406: step: 462/466, loss: 0.7034498453140259 2023-01-22 11:07:35.709235: step: 464/466, loss: 2.465857744216919 2023-01-22 11:07:36.426962: step: 466/466, loss: 0.46868717670440674 2023-01-22 11:07:37.078744: step: 468/466, loss: 0.13198892772197723 2023-01-22 11:07:37.735582: step: 470/466, loss: 0.33430054783821106 2023-01-22 11:07:38.391164: step: 472/466, loss: 0.1354030817747116 2023-01-22 11:07:39.079203: step: 474/466, loss: 0.10925836861133575 2023-01-22 11:07:39.741757: step: 476/466, loss: 0.11237549036741257 2023-01-22 11:07:40.348417: step: 478/466, loss: 0.12133002281188965 2023-01-22 11:07:41.010247: step: 480/466, loss: 0.05736667290329933 2023-01-22 11:07:41.686111: step: 482/466, loss: 0.12249401956796646 2023-01-22 11:07:42.430279: step: 484/466, loss: 0.11551576852798462 2023-01-22 11:07:43.119168: step: 486/466, loss: 0.13422928750514984 2023-01-22 11:07:43.721771: step: 488/466, loss: 0.22650648653507233 2023-01-22 11:07:44.333992: step: 490/466, loss: 0.1355985850095749 2023-01-22 11:07:45.021339: step: 492/466, loss: 0.038527145981788635 2023-01-22 11:07:45.635676: step: 494/466, loss: 0.09556832164525986 2023-01-22 11:07:46.292877: step: 496/466, loss: 0.13957932591438293 2023-01-22 11:07:46.940926: step: 498/466, loss: 0.13868115842342377 2023-01-22 11:07:47.620563: step: 500/466, loss: 0.15585461258888245 2023-01-22 11:07:48.235348: step: 502/466, loss: 0.07137840986251831 2023-01-22 11:07:48.919276: step: 504/466, loss: 0.3687639534473419 2023-01-22 11:07:49.511933: step: 506/466, loss: 0.04002955183386803 2023-01-22 11:07:50.160822: step: 508/466, loss: 0.1983628123998642 2023-01-22 11:07:50.808812: step: 510/466, loss: 0.22286492586135864 2023-01-22 11:07:51.501409: step: 512/466, loss: 0.08817946165800095 2023-01-22 11:07:52.180612: step: 514/466, loss: 0.10085298866033554 2023-01-22 11:07:52.783353: step: 516/466, loss: 0.6722241044044495 2023-01-22 11:07:53.442475: step: 518/466, loss: 0.42127126455307007 2023-01-22 11:07:54.083048: step: 520/466, loss: 0.09877334535121918 2023-01-22 11:07:54.710350: step: 522/466, loss: 0.18479159474372864 2023-01-22 11:07:55.352420: step: 524/466, loss: 0.10049667209386826 2023-01-22 11:07:56.172317: step: 526/466, loss: 0.21246574819087982 2023-01-22 11:07:56.778659: step: 528/466, loss: 1.5300737619400024 2023-01-22 11:07:57.446928: step: 530/466, loss: 0.1098194494843483 2023-01-22 11:07:58.081656: step: 532/466, loss: 0.031503114849328995 2023-01-22 11:07:58.758779: step: 534/466, loss: 0.09223125874996185 2023-01-22 11:07:59.485138: step: 536/466, loss: 0.09262816607952118 2023-01-22 11:08:00.102322: step: 538/466, loss: 0.08239345997571945 2023-01-22 11:08:00.772290: step: 540/466, loss: 0.12973906099796295 2023-01-22 11:08:01.416892: step: 542/466, loss: 0.5507596731185913 2023-01-22 11:08:02.127661: step: 544/466, loss: 0.34149739146232605 2023-01-22 11:08:02.790663: step: 546/466, loss: 0.0915873795747757 2023-01-22 11:08:03.431182: step: 548/466, loss: 1.4637705087661743 2023-01-22 11:08:04.081774: step: 550/466, loss: 0.38938653469085693 2023-01-22 11:08:04.819698: step: 552/466, loss: 0.10289011150598526 2023-01-22 11:08:05.478141: step: 554/466, loss: 0.15459519624710083 2023-01-22 11:08:06.150508: step: 556/466, loss: 0.09873844683170319 2023-01-22 11:08:06.806415: step: 558/466, loss: 0.17922475934028625 2023-01-22 11:08:07.430169: step: 560/466, loss: 0.14410601556301117 2023-01-22 11:08:08.098983: step: 562/466, loss: 0.12939496338367462 2023-01-22 11:08:08.806204: step: 564/466, loss: 0.25387799739837646 2023-01-22 11:08:09.464861: step: 566/466, loss: 0.12458079308271408 2023-01-22 11:08:10.110916: step: 568/466, loss: 0.2783191204071045 2023-01-22 11:08:10.711520: step: 570/466, loss: 0.09612763673067093 2023-01-22 11:08:11.363550: step: 572/466, loss: 0.27933767437934875 2023-01-22 11:08:11.995775: step: 574/466, loss: 0.061390191316604614 2023-01-22 11:08:12.634622: step: 576/466, loss: 0.7752988934516907 2023-01-22 11:08:13.302608: step: 578/466, loss: 0.5396843552589417 2023-01-22 11:08:13.929015: step: 580/466, loss: 1.0238251686096191 2023-01-22 11:08:14.562578: step: 582/466, loss: 0.13761739432811737 2023-01-22 11:08:15.195181: step: 584/466, loss: 0.643787145614624 2023-01-22 11:08:15.818536: step: 586/466, loss: 0.8737914562225342 2023-01-22 11:08:16.492082: step: 588/466, loss: 0.6515952944755554 2023-01-22 11:08:17.172114: step: 590/466, loss: 0.20468327403068542 2023-01-22 11:08:17.795025: step: 592/466, loss: 0.17673346400260925 2023-01-22 11:08:18.472933: step: 594/466, loss: 0.2418610006570816 2023-01-22 11:08:19.034585: step: 596/466, loss: 0.10020948201417923 2023-01-22 11:08:19.670274: step: 598/466, loss: 0.060589466243982315 2023-01-22 11:08:20.284246: step: 600/466, loss: 0.08437406271696091 2023-01-22 11:08:20.897682: step: 602/466, loss: 0.11464756727218628 2023-01-22 11:08:21.578111: step: 604/466, loss: 0.0527980662882328 2023-01-22 11:08:22.218164: step: 606/466, loss: 0.19469307363033295 2023-01-22 11:08:22.864039: step: 608/466, loss: 0.22520983219146729 2023-01-22 11:08:23.516300: step: 610/466, loss: 0.4616248905658722 2023-01-22 11:08:24.140790: step: 612/466, loss: 0.6428051590919495 2023-01-22 11:08:24.741755: step: 614/466, loss: 0.11791159957647324 2023-01-22 11:08:25.430140: step: 616/466, loss: 0.10004676878452301 2023-01-22 11:08:26.126594: step: 618/466, loss: 0.140882670879364 2023-01-22 11:08:26.821648: step: 620/466, loss: 0.2986718714237213 2023-01-22 11:08:27.493228: step: 622/466, loss: 0.2703951299190521 2023-01-22 11:08:28.105413: step: 624/466, loss: 0.09847074747085571 2023-01-22 11:08:28.694698: step: 626/466, loss: 0.08627253025770187 2023-01-22 11:08:29.378879: step: 628/466, loss: 0.15860922634601593 2023-01-22 11:08:30.111182: step: 630/466, loss: 0.15982860326766968 2023-01-22 11:08:30.889481: step: 632/466, loss: 0.5689049363136292 2023-01-22 11:08:31.565589: step: 634/466, loss: 0.24592043459415436 2023-01-22 11:08:32.293366: step: 636/466, loss: 0.14072726666927338 2023-01-22 11:08:32.954296: step: 638/466, loss: 0.1406276971101761 2023-01-22 11:08:33.600547: step: 640/466, loss: 0.03222530707716942 2023-01-22 11:08:34.305303: step: 642/466, loss: 0.38642606139183044 2023-01-22 11:08:34.946466: step: 644/466, loss: 0.09393725544214249 2023-01-22 11:08:35.614862: step: 646/466, loss: 0.08864431083202362 2023-01-22 11:08:36.232197: step: 648/466, loss: 0.07840286195278168 2023-01-22 11:08:36.840402: step: 650/466, loss: 0.2724342346191406 2023-01-22 11:08:37.518066: step: 652/466, loss: 0.22781580686569214 2023-01-22 11:08:38.152137: step: 654/466, loss: 0.1687709242105484 2023-01-22 11:08:38.935885: step: 656/466, loss: 0.15484493970870972 2023-01-22 11:08:39.742700: step: 658/466, loss: 0.06312824785709381 2023-01-22 11:08:40.374558: step: 660/466, loss: 0.140718013048172 2023-01-22 11:08:41.099118: step: 662/466, loss: 0.10476689040660858 2023-01-22 11:08:41.676107: step: 664/466, loss: 0.07167988270521164 2023-01-22 11:08:42.310399: step: 666/466, loss: 0.12937051057815552 2023-01-22 11:08:42.946633: step: 668/466, loss: 0.05053913593292236 2023-01-22 11:08:43.619944: step: 670/466, loss: 0.3197830319404602 2023-01-22 11:08:44.253881: step: 672/466, loss: 0.2779462933540344 2023-01-22 11:08:44.972375: step: 674/466, loss: 0.18884597718715668 2023-01-22 11:08:45.642752: step: 676/466, loss: 0.3712058961391449 2023-01-22 11:08:46.360586: step: 678/466, loss: 0.2407611608505249 2023-01-22 11:08:47.001053: step: 680/466, loss: 0.394243448972702 2023-01-22 11:08:47.677072: step: 682/466, loss: 0.07410983741283417 2023-01-22 11:08:48.330532: step: 684/466, loss: 0.4667331576347351 2023-01-22 11:08:48.940159: step: 686/466, loss: 0.5091947317123413 2023-01-22 11:08:49.586758: step: 688/466, loss: 0.395091712474823 2023-01-22 11:08:50.212310: step: 690/466, loss: 0.37954455614089966 2023-01-22 11:08:50.931583: step: 692/466, loss: 0.7210215926170349 2023-01-22 11:08:51.564162: step: 694/466, loss: 0.08687297999858856 2023-01-22 11:08:52.333042: step: 696/466, loss: 0.3269064724445343 2023-01-22 11:08:52.924978: step: 698/466, loss: 0.25779739022254944 2023-01-22 11:08:53.538114: step: 700/466, loss: 0.19471396505832672 2023-01-22 11:08:54.230505: step: 702/466, loss: 0.17518281936645508 2023-01-22 11:08:54.990339: step: 704/466, loss: 0.1451595425605774 2023-01-22 11:08:55.675766: step: 706/466, loss: 0.043420497328042984 2023-01-22 11:08:56.340647: step: 708/466, loss: 0.0574161633849144 2023-01-22 11:08:57.093301: step: 710/466, loss: 0.08920268714427948 2023-01-22 11:08:57.765567: step: 712/466, loss: 0.10907725244760513 2023-01-22 11:08:58.431797: step: 714/466, loss: 0.12948867678642273 2023-01-22 11:08:59.097813: step: 716/466, loss: 0.16905492544174194 2023-01-22 11:08:59.698113: step: 718/466, loss: 0.2499549686908722 2023-01-22 11:09:00.441829: step: 720/466, loss: 0.03271065279841423 2023-01-22 11:09:01.126637: step: 722/466, loss: 0.02659328654408455 2023-01-22 11:09:01.780269: step: 724/466, loss: 0.23334236443042755 2023-01-22 11:09:02.527048: step: 726/466, loss: 0.3844182789325714 2023-01-22 11:09:03.250005: step: 728/466, loss: 0.33857661485671997 2023-01-22 11:09:03.901342: step: 730/466, loss: 0.10223531723022461 2023-01-22 11:09:04.638403: step: 732/466, loss: 0.4171455204486847 2023-01-22 11:09:05.311603: step: 734/466, loss: 0.08916039019823074 2023-01-22 11:09:05.999776: step: 736/466, loss: 0.37796342372894287 2023-01-22 11:09:06.663488: step: 738/466, loss: 0.05818198248744011 2023-01-22 11:09:07.276983: step: 740/466, loss: 0.14418166875839233 2023-01-22 11:09:07.933380: step: 742/466, loss: 0.9279890060424805 2023-01-22 11:09:08.601349: step: 744/466, loss: 0.10512785613536835 2023-01-22 11:09:09.297266: step: 746/466, loss: 0.21932624280452728 2023-01-22 11:09:09.944379: step: 748/466, loss: 0.31123635172843933 2023-01-22 11:09:10.696069: step: 750/466, loss: 0.2003611922264099 2023-01-22 11:09:11.343121: step: 752/466, loss: 0.05662640929222107 2023-01-22 11:09:11.968049: step: 754/466, loss: 0.06766672432422638 2023-01-22 11:09:12.597552: step: 756/466, loss: 0.02892901934683323 2023-01-22 11:09:13.225756: step: 758/466, loss: 0.11633181571960449 2023-01-22 11:09:13.898912: step: 760/466, loss: 0.11391647905111313 2023-01-22 11:09:14.581011: step: 762/466, loss: 0.06218302249908447 2023-01-22 11:09:15.255714: step: 764/466, loss: 0.14876174926757812 2023-01-22 11:09:15.893749: step: 766/466, loss: 0.18223460018634796 2023-01-22 11:09:16.501253: step: 768/466, loss: 0.1018591970205307 2023-01-22 11:09:17.120040: step: 770/466, loss: 0.051450781524181366 2023-01-22 11:09:17.750450: step: 772/466, loss: 0.11895598471164703 2023-01-22 11:09:18.399889: step: 774/466, loss: 0.3299162983894348 2023-01-22 11:09:19.044750: step: 776/466, loss: 0.03409243002533913 2023-01-22 11:09:19.747449: step: 778/466, loss: 0.2987508773803711 2023-01-22 11:09:20.472295: step: 780/466, loss: 0.4159797430038452 2023-01-22 11:09:21.040788: step: 782/466, loss: 0.0329253189265728 2023-01-22 11:09:21.750525: step: 784/466, loss: 0.07173468172550201 2023-01-22 11:09:22.428248: step: 786/466, loss: 0.17268122732639313 2023-01-22 11:09:23.143694: step: 788/466, loss: 0.37131261825561523 2023-01-22 11:09:23.806842: step: 790/466, loss: 0.1132279559969902 2023-01-22 11:09:24.440482: step: 792/466, loss: 0.17152756452560425 2023-01-22 11:09:25.069214: step: 794/466, loss: 0.19634950160980225 2023-01-22 11:09:25.711384: step: 796/466, loss: 0.2054695188999176 2023-01-22 11:09:26.384742: step: 798/466, loss: 0.2601601481437683 2023-01-22 11:09:27.050735: step: 800/466, loss: 0.7241315245628357 2023-01-22 11:09:27.703735: step: 802/466, loss: 0.8270736336708069 2023-01-22 11:09:28.370878: step: 804/466, loss: 0.03443297743797302 2023-01-22 11:09:29.026541: step: 806/466, loss: 0.06627636402845383 2023-01-22 11:09:29.745072: step: 808/466, loss: 0.10838421434164047 2023-01-22 11:09:30.389890: step: 810/466, loss: 0.09377229958772659 2023-01-22 11:09:31.048472: step: 812/466, loss: 0.11635808646678925 2023-01-22 11:09:31.738923: step: 814/466, loss: 0.0770280733704567 2023-01-22 11:09:32.444328: step: 816/466, loss: 0.12906278669834137 2023-01-22 11:09:33.094508: step: 818/466, loss: 0.41343000531196594 2023-01-22 11:09:33.718272: step: 820/466, loss: 0.04330282285809517 2023-01-22 11:09:34.359078: step: 822/466, loss: 0.08779174834489822 2023-01-22 11:09:34.982231: step: 824/466, loss: 0.13243816792964935 2023-01-22 11:09:35.605731: step: 826/466, loss: 0.156992107629776 2023-01-22 11:09:36.254698: step: 828/466, loss: 0.08866634219884872 2023-01-22 11:09:36.877213: step: 830/466, loss: 0.5653623342514038 2023-01-22 11:09:37.514628: step: 832/466, loss: 0.18135926127433777 2023-01-22 11:09:38.168239: step: 834/466, loss: 0.8506476283073425 2023-01-22 11:09:38.815208: step: 836/466, loss: 0.10663268715143204 2023-01-22 11:09:39.540844: step: 838/466, loss: 0.37173619866371155 2023-01-22 11:09:40.184949: step: 840/466, loss: 0.8415031433105469 2023-01-22 11:09:40.832553: step: 842/466, loss: 0.4686752259731293 2023-01-22 11:09:41.496959: step: 844/466, loss: 0.2503049075603485 2023-01-22 11:09:42.188435: step: 846/466, loss: 1.3546899557113647 2023-01-22 11:09:42.831162: step: 848/466, loss: 0.08763039857149124 2023-01-22 11:09:43.448124: step: 850/466, loss: 0.044537756592035294 2023-01-22 11:09:44.029361: step: 852/466, loss: 0.12424295395612717 2023-01-22 11:09:44.701646: step: 854/466, loss: 0.17801827192306519 2023-01-22 11:09:45.373019: step: 856/466, loss: 0.08622211962938309 2023-01-22 11:09:46.023755: step: 858/466, loss: 0.10934218764305115 2023-01-22 11:09:46.678782: step: 860/466, loss: 0.5443745851516724 2023-01-22 11:09:47.325487: step: 862/466, loss: 0.32259345054626465 2023-01-22 11:09:47.999378: step: 864/466, loss: 0.3073665201663971 2023-01-22 11:09:48.666590: step: 866/466, loss: 0.0844200998544693 2023-01-22 11:09:49.342261: step: 868/466, loss: 0.15249481797218323 2023-01-22 11:09:50.012801: step: 870/466, loss: 0.09784666448831558 2023-01-22 11:09:50.627093: step: 872/466, loss: 0.26929613947868347 2023-01-22 11:09:51.256001: step: 874/466, loss: 0.7266647219657898 2023-01-22 11:09:51.862977: step: 876/466, loss: 0.11726915836334229 2023-01-22 11:09:52.562196: step: 878/466, loss: 1.9974907636642456 2023-01-22 11:09:53.267256: step: 880/466, loss: 0.37205514311790466 2023-01-22 11:09:53.943932: step: 882/466, loss: 0.10242167860269547 2023-01-22 11:09:54.641936: step: 884/466, loss: 0.12692664563655853 2023-01-22 11:09:55.283743: step: 886/466, loss: 0.23743067681789398 2023-01-22 11:09:55.953046: step: 888/466, loss: 0.3533828556537628 2023-01-22 11:09:56.616002: step: 890/466, loss: 0.11567486077547073 2023-01-22 11:09:57.242355: step: 892/466, loss: 0.1629101186990738 2023-01-22 11:09:57.929921: step: 894/466, loss: 0.260605126619339 2023-01-22 11:09:58.563185: step: 896/466, loss: 0.10653205215930939 2023-01-22 11:09:59.212329: step: 898/466, loss: 0.13470444083213806 2023-01-22 11:09:59.930329: step: 900/466, loss: 0.10960997641086578 2023-01-22 11:10:00.627142: step: 902/466, loss: 0.39571037888526917 2023-01-22 11:10:01.295860: step: 904/466, loss: 0.05582544207572937 2023-01-22 11:10:01.968534: step: 906/466, loss: 0.13240954279899597 2023-01-22 11:10:02.667507: step: 908/466, loss: 0.3220178484916687 2023-01-22 11:10:03.244587: step: 910/466, loss: 0.09687075018882751 2023-01-22 11:10:03.919375: step: 912/466, loss: 0.19923625886440277 2023-01-22 11:10:04.626619: step: 914/466, loss: 0.09295041859149933 2023-01-22 11:10:05.312656: step: 916/466, loss: 0.07397592067718506 2023-01-22 11:10:05.979698: step: 918/466, loss: 0.12189310789108276 2023-01-22 11:10:06.606324: step: 920/466, loss: 0.14278768002986908 2023-01-22 11:10:07.199257: step: 922/466, loss: 0.07068316638469696 2023-01-22 11:10:07.810630: step: 924/466, loss: 0.08041635900735855 2023-01-22 11:10:08.452358: step: 926/466, loss: 0.1307588815689087 2023-01-22 11:10:09.038226: step: 928/466, loss: 0.054248761385679245 2023-01-22 11:10:09.663799: step: 930/466, loss: 0.41642510890960693 2023-01-22 11:10:10.301225: step: 932/466, loss: 0.646379828453064 ================================================== Loss: 0.237 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3049076832433865, 'r': 0.3430934652055564, 'f1': 0.3228754574345147}, 'combined': 0.2379082317938529, 'epoch': 13} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.364065493787582, 'r': 0.3356967540119263, 'f1': 0.3493060818772747}, 'combined': 0.23166413720358112, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.295810434857045, 'r': 0.2773222826784797, 'f1': 0.28626816276488226}, 'combined': 0.19084544184325483, 'epoch': 13} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.37835971362439375, 'r': 0.31317046426400036, 'f1': 0.3426924549738706}, 'combined': 0.2236519179829471, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2886258254463912, 'r': 0.3357260550258782, 'f1': 0.31039935262918916}, 'combined': 0.22871531246361304, 'epoch': 13} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.35413085903566327, 'r': 0.32071071736043616, 'f1': 0.336593256293779}, 'combined': 0.2232328331896565, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2681159420289855, 'r': 0.35238095238095235, 'f1': 0.30452674897119336}, 'combined': 0.20301783264746223, 'epoch': 13} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4125, 'r': 0.358695652173913, 'f1': 0.3837209302325581}, 'combined': 0.25581395348837205, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39705882352941174, 'r': 0.23275862068965517, 'f1': 0.2934782608695652}, 'combined': 0.19565217391304346, 'epoch': 13} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2791124217992775, 'r': 0.3076580103923854, 'f1': 0.2926908639408639}, 'combined': 0.1951272426272426, 'epoch': 10} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.34934765710323107, 'r': 0.3114339578827254, 'f1': 0.32930311940058665}, 'combined': 0.21491361476669862, 'epoch': 10} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4583333333333333, 'r': 0.4782608695652174, 'f1': 0.4680851063829787}, 'combined': 0.31205673758865243, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2886258254463912, 'r': 0.3357260550258782, 'f1': 0.31039935262918916}, 'combined': 0.22871531246361304, 'epoch': 13} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.35413085903566327, 'r': 0.32071071736043616, 'f1': 0.336593256293779}, 'combined': 0.2232328331896565, 'epoch': 13} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39705882352941174, 'r': 0.23275862068965517, 'f1': 0.2934782608695652}, 'combined': 0.19565217391304346, 'epoch': 13} ****************************** Epoch: 14 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:12:59.431830: step: 2/466, loss: 0.26537057757377625 2023-01-22 11:13:00.015414: step: 4/466, loss: 0.025286676362156868 2023-01-22 11:13:00.663791: step: 6/466, loss: 0.08132816851139069 2023-01-22 11:13:01.353329: step: 8/466, loss: 0.09927152842283249 2023-01-22 11:13:02.022980: step: 10/466, loss: 0.47495347261428833 2023-01-22 11:13:02.712743: step: 12/466, loss: 0.23650674521923065 2023-01-22 11:13:03.334668: step: 14/466, loss: 0.10547780245542526 2023-01-22 11:13:04.007832: step: 16/466, loss: 0.10848968476057053 2023-01-22 11:13:04.693837: step: 18/466, loss: 0.05141892284154892 2023-01-22 11:13:05.304298: step: 20/466, loss: 0.07002759724855423 2023-01-22 11:13:05.931464: step: 22/466, loss: 0.12314854562282562 2023-01-22 11:13:06.641561: step: 24/466, loss: 0.09595341980457306 2023-01-22 11:13:07.256534: step: 26/466, loss: 0.0839044377207756 2023-01-22 11:13:08.118448: step: 28/466, loss: 0.13150641322135925 2023-01-22 11:13:08.750934: step: 30/466, loss: 0.03693179786205292 2023-01-22 11:13:09.401232: step: 32/466, loss: 0.16620874404907227 2023-01-22 11:13:10.113003: step: 34/466, loss: 0.37741395831108093 2023-01-22 11:13:10.774743: step: 36/466, loss: 0.07734955102205276 2023-01-22 11:13:11.412830: step: 38/466, loss: 2.731236219406128 2023-01-22 11:13:12.060046: step: 40/466, loss: 0.16634321212768555 2023-01-22 11:13:12.711163: step: 42/466, loss: 0.047279294580221176 2023-01-22 11:13:13.388330: step: 44/466, loss: 0.19378140568733215 2023-01-22 11:13:13.990930: step: 46/466, loss: 0.14623573422431946 2023-01-22 11:13:14.648646: step: 48/466, loss: 0.12817901372909546 2023-01-22 11:13:15.304562: step: 50/466, loss: 0.27838876843452454 2023-01-22 11:13:16.063287: step: 52/466, loss: 0.12414537370204926 2023-01-22 11:13:16.701455: step: 54/466, loss: 0.13180899620056152 2023-01-22 11:13:17.351369: step: 56/466, loss: 0.13430505990982056 2023-01-22 11:13:17.939821: step: 58/466, loss: 0.09954774379730225 2023-01-22 11:13:18.576755: step: 60/466, loss: 0.1269797831773758 2023-01-22 11:13:19.288199: step: 62/466, loss: 0.2481815665960312 2023-01-22 11:13:19.959489: step: 64/466, loss: 0.07848747819662094 2023-01-22 11:13:20.666976: step: 66/466, loss: 0.061888862401247025 2023-01-22 11:13:21.296824: step: 68/466, loss: 0.5187919735908508 2023-01-22 11:13:21.943373: step: 70/466, loss: 0.41317012906074524 2023-01-22 11:13:22.610295: step: 72/466, loss: 0.08287671208381653 2023-01-22 11:13:23.212993: step: 74/466, loss: 0.11693020164966583 2023-01-22 11:13:23.846458: step: 76/466, loss: 0.18891726434230804 2023-01-22 11:13:24.544265: step: 78/466, loss: 0.14649678766727448 2023-01-22 11:13:25.185650: step: 80/466, loss: 0.09510315954685211 2023-01-22 11:13:25.838114: step: 82/466, loss: 0.041796330362558365 2023-01-22 11:13:26.483987: step: 84/466, loss: 0.12924985587596893 2023-01-22 11:13:27.090507: step: 86/466, loss: 0.07326295226812363 2023-01-22 11:13:27.701938: step: 88/466, loss: 0.13155905902385712 2023-01-22 11:13:28.373760: step: 90/466, loss: 0.8130422234535217 2023-01-22 11:13:29.064805: step: 92/466, loss: 0.18054792284965515 2023-01-22 11:13:29.663232: step: 94/466, loss: 0.08204445242881775 2023-01-22 11:13:30.330890: step: 96/466, loss: 0.33193227648735046 2023-01-22 11:13:30.971410: step: 98/466, loss: 0.13546620309352875 2023-01-22 11:13:31.686362: step: 100/466, loss: 0.03974077105522156 2023-01-22 11:13:32.331794: step: 102/466, loss: 0.06648655980825424 2023-01-22 11:13:33.049245: step: 104/466, loss: 0.11741594225168228 2023-01-22 11:13:33.679760: step: 106/466, loss: 0.1027723178267479 2023-01-22 11:13:34.264546: step: 108/466, loss: 0.12174259126186371 2023-01-22 11:13:34.939584: step: 110/466, loss: 0.06153004989027977 2023-01-22 11:13:35.599195: step: 112/466, loss: 0.040703579783439636 2023-01-22 11:13:36.236321: step: 114/466, loss: 1.0315808057785034 2023-01-22 11:13:36.800679: step: 116/466, loss: 0.07071185111999512 2023-01-22 11:13:37.500299: step: 118/466, loss: 0.4233225882053375 2023-01-22 11:13:38.166304: step: 120/466, loss: 0.2131342887878418 2023-01-22 11:13:38.834341: step: 122/466, loss: 0.06186900660395622 2023-01-22 11:13:39.417233: step: 124/466, loss: 0.10383732616901398 2023-01-22 11:13:40.083695: step: 126/466, loss: 0.09479478001594543 2023-01-22 11:13:40.709617: step: 128/466, loss: 0.17489869892597198 2023-01-22 11:13:41.333759: step: 130/466, loss: 0.16707611083984375 2023-01-22 11:13:41.930333: step: 132/466, loss: 0.17206239700317383 2023-01-22 11:13:42.592132: step: 134/466, loss: 0.20160230994224548 2023-01-22 11:13:43.294135: step: 136/466, loss: 0.20505794882774353 2023-01-22 11:13:43.974626: step: 138/466, loss: 0.16573834419250488 2023-01-22 11:13:44.583534: step: 140/466, loss: 0.1187601089477539 2023-01-22 11:13:45.200737: step: 142/466, loss: 0.024643516167998314 2023-01-22 11:13:45.924173: step: 144/466, loss: 0.12837985157966614 2023-01-22 11:13:46.677540: step: 146/466, loss: 0.43892914056777954 2023-01-22 11:13:47.381273: step: 148/466, loss: 0.04670446738600731 2023-01-22 11:13:48.063825: step: 150/466, loss: 0.1311047524213791 2023-01-22 11:13:48.826306: step: 152/466, loss: 0.11167477816343307 2023-01-22 11:13:49.575319: step: 154/466, loss: 0.12165019661188126 2023-01-22 11:13:50.219156: step: 156/466, loss: 0.2770508825778961 2023-01-22 11:13:50.888067: step: 158/466, loss: 0.11379371583461761 2023-01-22 11:13:51.538573: step: 160/466, loss: 0.09370989352464676 2023-01-22 11:13:52.156627: step: 162/466, loss: 0.209278404712677 2023-01-22 11:13:52.821136: step: 164/466, loss: 0.05772625654935837 2023-01-22 11:13:53.452595: step: 166/466, loss: 0.06810871511697769 2023-01-22 11:13:54.074699: step: 168/466, loss: 0.05928172543644905 2023-01-22 11:13:54.757574: step: 170/466, loss: 0.19251835346221924 2023-01-22 11:13:55.387393: step: 172/466, loss: 0.1081613078713417 2023-01-22 11:13:56.043582: step: 174/466, loss: 0.04214688017964363 2023-01-22 11:13:56.636032: step: 176/466, loss: 0.10552027821540833 2023-01-22 11:13:57.370222: step: 178/466, loss: 0.082865409553051 2023-01-22 11:13:58.034794: step: 180/466, loss: 0.4372321367263794 2023-01-22 11:13:58.667458: step: 182/466, loss: 0.0875171348452568 2023-01-22 11:13:59.335951: step: 184/466, loss: 0.13187700510025024 2023-01-22 11:14:00.046968: step: 186/466, loss: 0.09077508002519608 2023-01-22 11:14:00.607279: step: 188/466, loss: 0.15599395334720612 2023-01-22 11:14:01.209637: step: 190/466, loss: 0.07060946524143219 2023-01-22 11:14:01.847796: step: 192/466, loss: 0.061473701149225235 2023-01-22 11:14:02.493350: step: 194/466, loss: 0.24900802969932556 2023-01-22 11:14:03.183231: step: 196/466, loss: 0.614746630191803 2023-01-22 11:14:03.869114: step: 198/466, loss: 0.5936498641967773 2023-01-22 11:14:04.530393: step: 200/466, loss: 0.17943210899829865 2023-01-22 11:14:05.173176: step: 202/466, loss: 0.14334438741207123 2023-01-22 11:14:05.843636: step: 204/466, loss: 0.13771358132362366 2023-01-22 11:14:06.480178: step: 206/466, loss: 0.10882329195737839 2023-01-22 11:14:07.203540: step: 208/466, loss: 0.171889528632164 2023-01-22 11:14:07.923871: step: 210/466, loss: 0.17607173323631287 2023-01-22 11:14:08.604846: step: 212/466, loss: 0.13134627044200897 2023-01-22 11:14:09.291209: step: 214/466, loss: 0.15773671865463257 2023-01-22 11:14:09.994910: step: 216/466, loss: 0.09228464215993881 2023-01-22 11:14:10.696994: step: 218/466, loss: 0.029966870322823524 2023-01-22 11:14:11.300041: step: 220/466, loss: 0.07456782460212708 2023-01-22 11:14:11.991466: step: 222/466, loss: 0.011664489284157753 2023-01-22 11:14:12.649902: step: 224/466, loss: 0.1483556181192398 2023-01-22 11:14:13.293008: step: 226/466, loss: 0.2248881608247757 2023-01-22 11:14:13.972109: step: 228/466, loss: 0.17643052339553833 2023-01-22 11:14:14.664121: step: 230/466, loss: 0.05628127232193947 2023-01-22 11:14:15.342695: step: 232/466, loss: 0.21768799424171448 2023-01-22 11:14:16.013991: step: 234/466, loss: 0.5761271119117737 2023-01-22 11:14:16.641583: step: 236/466, loss: 0.3103395700454712 2023-01-22 11:14:17.308705: step: 238/466, loss: 0.2983532249927521 2023-01-22 11:14:17.959935: step: 240/466, loss: 0.16951322555541992 2023-01-22 11:14:18.564368: step: 242/466, loss: 0.07378947734832764 2023-01-22 11:14:19.263956: step: 244/466, loss: 0.11983276158571243 2023-01-22 11:14:19.933680: step: 246/466, loss: 0.48832592368125916 2023-01-22 11:14:20.657010: step: 248/466, loss: 0.07185845077037811 2023-01-22 11:14:21.263654: step: 250/466, loss: 0.03485824167728424 2023-01-22 11:14:21.918946: step: 252/466, loss: 0.12447050213813782 2023-01-22 11:14:22.552479: step: 254/466, loss: 0.1706133782863617 2023-01-22 11:14:23.175463: step: 256/466, loss: 0.11184553056955338 2023-01-22 11:14:23.907268: step: 258/466, loss: 0.11569743603467941 2023-01-22 11:14:24.530247: step: 260/466, loss: 0.13082577288150787 2023-01-22 11:14:25.196561: step: 262/466, loss: 0.16038110852241516 2023-01-22 11:14:25.877278: step: 264/466, loss: 0.13148073852062225 2023-01-22 11:14:26.526149: step: 266/466, loss: 0.21504873037338257 2023-01-22 11:14:27.231952: step: 268/466, loss: 0.44771113991737366 2023-01-22 11:14:27.849935: step: 270/466, loss: 0.14703847467899323 2023-01-22 11:14:28.524218: step: 272/466, loss: 0.10682922601699829 2023-01-22 11:14:29.183173: step: 274/466, loss: 0.07483697682619095 2023-01-22 11:14:29.818526: step: 276/466, loss: 0.04568616673350334 2023-01-22 11:14:30.473025: step: 278/466, loss: 0.2048133760690689 2023-01-22 11:14:31.156857: step: 280/466, loss: 0.3653821051120758 2023-01-22 11:14:31.865414: step: 282/466, loss: 0.3007429242134094 2023-01-22 11:14:32.600686: step: 284/466, loss: 0.117209292948246 2023-01-22 11:14:33.249635: step: 286/466, loss: 0.06718248873949051 2023-01-22 11:14:33.857594: step: 288/466, loss: 0.3619995713233948 2023-01-22 11:14:34.493499: step: 290/466, loss: 0.34004920721054077 2023-01-22 11:14:35.173520: step: 292/466, loss: 0.08876284211874008 2023-01-22 11:14:35.832054: step: 294/466, loss: 0.056431349366903305 2023-01-22 11:14:36.527810: step: 296/466, loss: 0.13510681688785553 2023-01-22 11:14:37.209811: step: 298/466, loss: 0.18540455400943756 2023-01-22 11:14:37.809005: step: 300/466, loss: 0.1090221256017685 2023-01-22 11:14:38.489064: step: 302/466, loss: 0.08833901584148407 2023-01-22 11:14:39.144342: step: 304/466, loss: 0.18773992359638214 2023-01-22 11:14:39.811449: step: 306/466, loss: 0.35183510184288025 2023-01-22 11:14:40.467387: step: 308/466, loss: 0.07946083694696426 2023-01-22 11:14:41.115204: step: 310/466, loss: 0.06417113542556763 2023-01-22 11:14:41.750561: step: 312/466, loss: 0.026588531211018562 2023-01-22 11:14:42.373629: step: 314/466, loss: 0.16041052341461182 2023-01-22 11:14:43.031940: step: 316/466, loss: 0.07452167570590973 2023-01-22 11:14:43.687234: step: 318/466, loss: 0.09353282302618027 2023-01-22 11:14:44.404624: step: 320/466, loss: 9.388075828552246 2023-01-22 11:14:45.016366: step: 322/466, loss: 0.10507649928331375 2023-01-22 11:14:45.676288: step: 324/466, loss: 0.05475550517439842 2023-01-22 11:14:46.338490: step: 326/466, loss: 0.07265757769346237 2023-01-22 11:14:46.942832: step: 328/466, loss: 0.0750664621591568 2023-01-22 11:14:47.559197: step: 330/466, loss: 0.09978318214416504 2023-01-22 11:14:48.227598: step: 332/466, loss: 0.30219554901123047 2023-01-22 11:14:48.893036: step: 334/466, loss: 0.023919468745589256 2023-01-22 11:14:49.573772: step: 336/466, loss: 0.11267466098070145 2023-01-22 11:14:50.268261: step: 338/466, loss: 0.02998601458966732 2023-01-22 11:14:50.911006: step: 340/466, loss: 0.24879337847232819 2023-01-22 11:14:51.604635: step: 342/466, loss: 0.23057223856449127 2023-01-22 11:14:52.232965: step: 344/466, loss: 0.08246339857578278 2023-01-22 11:14:52.845164: step: 346/466, loss: 0.14387105405330658 2023-01-22 11:14:53.455591: step: 348/466, loss: 0.405474454164505 2023-01-22 11:14:54.084933: step: 350/466, loss: 0.05964050069451332 2023-01-22 11:14:54.740708: step: 352/466, loss: 0.03839050233364105 2023-01-22 11:14:55.396582: step: 354/466, loss: 0.13237349689006805 2023-01-22 11:14:56.045059: step: 356/466, loss: 0.289070188999176 2023-01-22 11:14:56.742856: step: 358/466, loss: 0.08378314971923828 2023-01-22 11:14:57.334653: step: 360/466, loss: 0.19309566915035248 2023-01-22 11:14:57.965727: step: 362/466, loss: 0.1765211969614029 2023-01-22 11:14:58.649275: step: 364/466, loss: 0.08976773172616959 2023-01-22 11:14:59.285472: step: 366/466, loss: 5.7123122215271 2023-01-22 11:14:59.929652: step: 368/466, loss: 0.3234618902206421 2023-01-22 11:15:00.566038: step: 370/466, loss: 0.09172488003969193 2023-01-22 11:15:01.270498: step: 372/466, loss: 0.03426278382539749 2023-01-22 11:15:01.865096: step: 374/466, loss: 0.1049998477101326 2023-01-22 11:15:02.554021: step: 376/466, loss: 0.050934456288814545 2023-01-22 11:15:03.200231: step: 378/466, loss: 2.1087403297424316 2023-01-22 11:15:03.796702: step: 380/466, loss: 0.07639701664447784 2023-01-22 11:15:04.415987: step: 382/466, loss: 0.22721798717975616 2023-01-22 11:15:05.114930: step: 384/466, loss: 0.0736975371837616 2023-01-22 11:15:05.765762: step: 386/466, loss: 0.16866913437843323 2023-01-22 11:15:06.385385: step: 388/466, loss: 0.4156855642795563 2023-01-22 11:15:06.968370: step: 390/466, loss: 0.46621739864349365 2023-01-22 11:15:07.584769: step: 392/466, loss: 0.06919531524181366 2023-01-22 11:15:08.137289: step: 394/466, loss: 0.18814410269260406 2023-01-22 11:15:08.750815: step: 396/466, loss: 0.30297258496284485 2023-01-22 11:15:09.386775: step: 398/466, loss: 0.2292100340127945 2023-01-22 11:15:09.995997: step: 400/466, loss: 0.7158749103546143 2023-01-22 11:15:10.686452: step: 402/466, loss: 0.0643678605556488 2023-01-22 11:15:11.332344: step: 404/466, loss: 0.27010399103164673 2023-01-22 11:15:12.023548: step: 406/466, loss: 0.33736926317214966 2023-01-22 11:15:12.661087: step: 408/466, loss: 0.1557852327823639 2023-01-22 11:15:13.263386: step: 410/466, loss: 0.09922328591346741 2023-01-22 11:15:13.885083: step: 412/466, loss: 0.13485293090343475 2023-01-22 11:15:14.461076: step: 414/466, loss: 0.11178892850875854 2023-01-22 11:15:15.027247: step: 416/466, loss: 0.07302606105804443 2023-01-22 11:15:15.701692: step: 418/466, loss: 0.0684945359826088 2023-01-22 11:15:16.322289: step: 420/466, loss: 0.5240396857261658 2023-01-22 11:15:16.955485: step: 422/466, loss: 0.5128722786903381 2023-01-22 11:15:17.582522: step: 424/466, loss: 0.21964208781719208 2023-01-22 11:15:18.349735: step: 426/466, loss: 0.02849763073027134 2023-01-22 11:15:19.051153: step: 428/466, loss: 0.10740663856267929 2023-01-22 11:15:19.668449: step: 430/466, loss: 0.1858994960784912 2023-01-22 11:15:20.278197: step: 432/466, loss: 0.08060059696435928 2023-01-22 11:15:20.935246: step: 434/466, loss: 0.19610847532749176 2023-01-22 11:15:21.552190: step: 436/466, loss: 0.15405970811843872 2023-01-22 11:15:22.195563: step: 438/466, loss: 0.13509126007556915 2023-01-22 11:15:22.809192: step: 440/466, loss: 0.10840291529893875 2023-01-22 11:15:23.415062: step: 442/466, loss: 0.12234757095575333 2023-01-22 11:15:24.070442: step: 444/466, loss: 0.0750017836689949 2023-01-22 11:15:24.759676: step: 446/466, loss: 0.1562875509262085 2023-01-22 11:15:25.360804: step: 448/466, loss: 0.06718388199806213 2023-01-22 11:15:25.951011: step: 450/466, loss: 0.1260349452495575 2023-01-22 11:15:26.605470: step: 452/466, loss: 0.3128783106803894 2023-01-22 11:15:27.216002: step: 454/466, loss: 0.11964062601327896 2023-01-22 11:15:27.914135: step: 456/466, loss: 0.1366245299577713 2023-01-22 11:15:28.617467: step: 458/466, loss: 0.02635299414396286 2023-01-22 11:15:29.216095: step: 460/466, loss: 0.12284113466739655 2023-01-22 11:15:29.808021: step: 462/466, loss: 0.1805114895105362 2023-01-22 11:15:30.393367: step: 464/466, loss: 0.15980634093284607 2023-01-22 11:15:31.063585: step: 466/466, loss: 0.12533527612686157 2023-01-22 11:15:31.668151: step: 468/466, loss: 0.15628713369369507 2023-01-22 11:15:32.346252: step: 470/466, loss: 0.15992864966392517 2023-01-22 11:15:33.051634: step: 472/466, loss: 0.12801966071128845 2023-01-22 11:15:33.627895: step: 474/466, loss: 0.06098826974630356 2023-01-22 11:15:34.254056: step: 476/466, loss: 0.8425703048706055 2023-01-22 11:15:34.882932: step: 478/466, loss: 0.04198523610830307 2023-01-22 11:15:35.555897: step: 480/466, loss: 0.13882498443126678 2023-01-22 11:15:36.235640: step: 482/466, loss: 0.30974912643432617 2023-01-22 11:15:36.945845: step: 484/466, loss: 0.03182601556181908 2023-01-22 11:15:37.609359: step: 486/466, loss: 0.11850722134113312 2023-01-22 11:15:38.262189: step: 488/466, loss: 0.06199447438120842 2023-01-22 11:15:38.898780: step: 490/466, loss: 0.03679699823260307 2023-01-22 11:15:39.534869: step: 492/466, loss: 0.06447263062000275 2023-01-22 11:15:40.191731: step: 494/466, loss: 0.09972032904624939 2023-01-22 11:15:40.853339: step: 496/466, loss: 1.3976175785064697 2023-01-22 11:15:41.545739: step: 498/466, loss: 6.323942184448242 2023-01-22 11:15:42.178832: step: 500/466, loss: 0.15918752551078796 2023-01-22 11:15:42.796371: step: 502/466, loss: 0.36978256702423096 2023-01-22 11:15:43.504042: step: 504/466, loss: 0.09340979158878326 2023-01-22 11:15:44.142723: step: 506/466, loss: 0.0978708490729332 2023-01-22 11:15:44.843476: step: 508/466, loss: 0.09205763787031174 2023-01-22 11:15:45.467405: step: 510/466, loss: 0.06624890118837357 2023-01-22 11:15:46.164341: step: 512/466, loss: 0.17666137218475342 2023-01-22 11:15:46.877833: step: 514/466, loss: 0.18705052137374878 2023-01-22 11:15:47.569676: step: 516/466, loss: 0.13752776384353638 2023-01-22 11:15:48.221887: step: 518/466, loss: 0.08362399786710739 2023-01-22 11:15:48.883971: step: 520/466, loss: 0.2757173180580139 2023-01-22 11:15:49.575601: step: 522/466, loss: 0.06851635128259659 2023-01-22 11:15:50.172954: step: 524/466, loss: 0.08473610132932663 2023-01-22 11:15:50.859632: step: 526/466, loss: 0.232706218957901 2023-01-22 11:15:51.527437: step: 528/466, loss: 1.5634676218032837 2023-01-22 11:15:52.232184: step: 530/466, loss: 0.26936009526252747 2023-01-22 11:15:52.895267: step: 532/466, loss: 0.5476840734481812 2023-01-22 11:15:53.473989: step: 534/466, loss: 0.11797524243593216 2023-01-22 11:15:54.146779: step: 536/466, loss: 0.2392173558473587 2023-01-22 11:15:54.861187: step: 538/466, loss: 0.08022218942642212 2023-01-22 11:15:55.622465: step: 540/466, loss: 0.5519012808799744 2023-01-22 11:15:56.270831: step: 542/466, loss: 0.29034101963043213 2023-01-22 11:15:56.918268: step: 544/466, loss: 0.1117844432592392 2023-01-22 11:15:57.582437: step: 546/466, loss: 0.1492680013179779 2023-01-22 11:15:58.291633: step: 548/466, loss: 0.14945177733898163 2023-01-22 11:15:59.017823: step: 550/466, loss: 0.15347805619239807 2023-01-22 11:15:59.693827: step: 552/466, loss: 0.5507827997207642 2023-01-22 11:16:00.334296: step: 554/466, loss: 0.5270827412605286 2023-01-22 11:16:00.949380: step: 556/466, loss: 0.08873269706964493 2023-01-22 11:16:01.565183: step: 558/466, loss: 8.167985916137695 2023-01-22 11:16:02.286932: step: 560/466, loss: 0.10504305362701416 2023-01-22 11:16:02.929222: step: 562/466, loss: 3.3082435131073 2023-01-22 11:16:03.568964: step: 564/466, loss: 0.15133020281791687 2023-01-22 11:16:04.200466: step: 566/466, loss: 0.10324478894472122 2023-01-22 11:16:04.765894: step: 568/466, loss: 0.23461833596229553 2023-01-22 11:16:05.393679: step: 570/466, loss: 0.06707794219255447 2023-01-22 11:16:06.089042: step: 572/466, loss: 0.08997345715761185 2023-01-22 11:16:06.825585: step: 574/466, loss: 0.25757864117622375 2023-01-22 11:16:07.505830: step: 576/466, loss: 0.07078008353710175 2023-01-22 11:16:08.100763: step: 578/466, loss: 0.24525022506713867 2023-01-22 11:16:08.694082: step: 580/466, loss: 0.11206243187189102 2023-01-22 11:16:09.356292: step: 582/466, loss: 0.06284279376268387 2023-01-22 11:16:10.068147: step: 584/466, loss: 0.48521706461906433 2023-01-22 11:16:10.724578: step: 586/466, loss: 0.09278231859207153 2023-01-22 11:16:11.365353: step: 588/466, loss: 0.10632549226284027 2023-01-22 11:16:11.993476: step: 590/466, loss: 0.17148663103580475 2023-01-22 11:16:12.669924: step: 592/466, loss: 0.10256657749414444 2023-01-22 11:16:13.252327: step: 594/466, loss: 0.15673145651817322 2023-01-22 11:16:13.900737: step: 596/466, loss: 0.0729982927441597 2023-01-22 11:16:14.533281: step: 598/466, loss: 0.20437881350517273 2023-01-22 11:16:15.163537: step: 600/466, loss: 0.10806822776794434 2023-01-22 11:16:15.788418: step: 602/466, loss: 0.1564791202545166 2023-01-22 11:16:16.394356: step: 604/466, loss: 0.07354568690061569 2023-01-22 11:16:17.086333: step: 606/466, loss: 0.29064008593559265 2023-01-22 11:16:17.752445: step: 608/466, loss: 0.06247774884104729 2023-01-22 11:16:18.355402: step: 610/466, loss: 0.09393714368343353 2023-01-22 11:16:19.095700: step: 612/466, loss: 0.43984925746917725 2023-01-22 11:16:19.680043: step: 614/466, loss: 0.04968178644776344 2023-01-22 11:16:20.313362: step: 616/466, loss: 0.09494192153215408 2023-01-22 11:16:21.048193: step: 618/466, loss: 0.1882258951663971 2023-01-22 11:16:21.779819: step: 620/466, loss: 0.11165252327919006 2023-01-22 11:16:22.394907: step: 622/466, loss: 0.08681537210941315 2023-01-22 11:16:23.017643: step: 624/466, loss: 0.1627599149942398 2023-01-22 11:16:23.746503: step: 626/466, loss: 0.1943708062171936 2023-01-22 11:16:24.426680: step: 628/466, loss: 0.1491440385580063 2023-01-22 11:16:25.121192: step: 630/466, loss: 0.16645674407482147 2023-01-22 11:16:25.750656: step: 632/466, loss: 0.013026190921664238 2023-01-22 11:16:26.425371: step: 634/466, loss: 0.24470369517803192 2023-01-22 11:16:27.133795: step: 636/466, loss: 0.5334924459457397 2023-01-22 11:16:27.768044: step: 638/466, loss: 1.54473876953125 2023-01-22 11:16:28.463013: step: 640/466, loss: 0.21863879263401031 2023-01-22 11:16:29.084317: step: 642/466, loss: 0.538988471031189 2023-01-22 11:16:29.760493: step: 644/466, loss: 0.190269336104393 2023-01-22 11:16:30.368723: step: 646/466, loss: 0.26166078448295593 2023-01-22 11:16:31.016207: step: 648/466, loss: 0.058466147631406784 2023-01-22 11:16:31.682726: step: 650/466, loss: 0.5121904015541077 2023-01-22 11:16:32.347733: step: 652/466, loss: 0.04038256034255028 2023-01-22 11:16:33.031119: step: 654/466, loss: 0.10419758409261703 2023-01-22 11:16:33.644297: step: 656/466, loss: 0.0707746148109436 2023-01-22 11:16:34.250742: step: 658/466, loss: 0.12626321613788605 2023-01-22 11:16:34.959988: step: 660/466, loss: 0.2745856046676636 2023-01-22 11:16:35.639899: step: 662/466, loss: 0.05933433026075363 2023-01-22 11:16:36.277475: step: 664/466, loss: 0.22782056033611298 2023-01-22 11:16:36.976290: step: 666/466, loss: 0.058970000594854355 2023-01-22 11:16:37.665674: step: 668/466, loss: 0.6271088123321533 2023-01-22 11:16:38.308800: step: 670/466, loss: 0.05891264230012894 2023-01-22 11:16:38.960821: step: 672/466, loss: 0.20704032480716705 2023-01-22 11:16:39.610086: step: 674/466, loss: 0.10041045397520065 2023-01-22 11:16:40.229613: step: 676/466, loss: 0.13243088126182556 2023-01-22 11:16:40.799446: step: 678/466, loss: 0.09281554818153381 2023-01-22 11:16:41.553849: step: 680/466, loss: 0.1242906004190445 2023-01-22 11:16:42.223226: step: 682/466, loss: 0.024335691705346107 2023-01-22 11:16:43.013390: step: 684/466, loss: 0.11704385280609131 2023-01-22 11:16:43.656818: step: 686/466, loss: 0.03983628377318382 2023-01-22 11:16:44.307464: step: 688/466, loss: 0.13050445914268494 2023-01-22 11:16:44.987635: step: 690/466, loss: 0.21144269406795502 2023-01-22 11:16:45.686671: step: 692/466, loss: 2.6851344108581543 2023-01-22 11:16:46.387990: step: 694/466, loss: 0.06771936267614365 2023-01-22 11:16:47.057105: step: 696/466, loss: 0.15440306067466736 2023-01-22 11:16:47.708560: step: 698/466, loss: 0.11375857144594193 2023-01-22 11:16:48.387360: step: 700/466, loss: 0.16460925340652466 2023-01-22 11:16:49.013569: step: 702/466, loss: 0.16876919567584991 2023-01-22 11:16:49.657104: step: 704/466, loss: 0.054315388202667236 2023-01-22 11:16:50.327176: step: 706/466, loss: 0.13007497787475586 2023-01-22 11:16:51.040833: step: 708/466, loss: 0.4762824475765228 2023-01-22 11:16:51.692791: step: 710/466, loss: 0.3519395589828491 2023-01-22 11:16:52.417052: step: 712/466, loss: 0.06498491764068604 2023-01-22 11:16:53.014698: step: 714/466, loss: 0.10465317219495773 2023-01-22 11:16:53.706095: step: 716/466, loss: 0.10128390043973923 2023-01-22 11:16:54.418779: step: 718/466, loss: 0.27678248286247253 2023-01-22 11:16:55.016524: step: 720/466, loss: 0.1347552388906479 2023-01-22 11:16:55.727620: step: 722/466, loss: 0.3467319905757904 2023-01-22 11:16:56.386048: step: 724/466, loss: 0.6930481791496277 2023-01-22 11:16:57.002101: step: 726/466, loss: 0.07042728364467621 2023-01-22 11:16:57.669219: step: 728/466, loss: 0.3940177857875824 2023-01-22 11:16:58.355460: step: 730/466, loss: 0.2059410810470581 2023-01-22 11:16:58.979011: step: 732/466, loss: 0.0927201509475708 2023-01-22 11:16:59.648385: step: 734/466, loss: 0.49056684970855713 2023-01-22 11:17:00.353767: step: 736/466, loss: 0.2960192561149597 2023-01-22 11:17:01.000892: step: 738/466, loss: 0.22611430287361145 2023-01-22 11:17:01.681528: step: 740/466, loss: 0.054231416434049606 2023-01-22 11:17:02.401313: step: 742/466, loss: 0.16621573269367218 2023-01-22 11:17:03.075255: step: 744/466, loss: 0.10313557088375092 2023-01-22 11:17:03.676955: step: 746/466, loss: 0.17337164282798767 2023-01-22 11:17:04.314665: step: 748/466, loss: 0.04055658355355263 2023-01-22 11:17:05.045038: step: 750/466, loss: 0.11937404423952103 2023-01-22 11:17:05.657400: step: 752/466, loss: 0.10778648406267166 2023-01-22 11:17:06.293142: step: 754/466, loss: 0.10828586667776108 2023-01-22 11:17:07.014816: step: 756/466, loss: 0.22498983144760132 2023-01-22 11:17:07.680347: step: 758/466, loss: 0.22420690953731537 2023-01-22 11:17:08.389791: step: 760/466, loss: 0.13945281505584717 2023-01-22 11:17:09.134153: step: 762/466, loss: 0.16044212877750397 2023-01-22 11:17:09.854155: step: 764/466, loss: 0.20257921516895294 2023-01-22 11:17:10.502272: step: 766/466, loss: 0.16559775173664093 2023-01-22 11:17:11.154838: step: 768/466, loss: 0.13508321344852448 2023-01-22 11:17:11.854881: step: 770/466, loss: 0.22314563393592834 2023-01-22 11:17:12.550534: step: 772/466, loss: 1.148383617401123 2023-01-22 11:17:13.173230: step: 774/466, loss: 0.13575758039951324 2023-01-22 11:17:13.849855: step: 776/466, loss: 0.15329691767692566 2023-01-22 11:17:14.508297: step: 778/466, loss: 0.15127770602703094 2023-01-22 11:17:15.160068: step: 780/466, loss: 0.26028984785079956 2023-01-22 11:17:15.825741: step: 782/466, loss: 0.16642004251480103 2023-01-22 11:17:16.485776: step: 784/466, loss: 0.0878530815243721 2023-01-22 11:17:17.161106: step: 786/466, loss: 0.13579607009887695 2023-01-22 11:17:17.841538: step: 788/466, loss: 0.1477285623550415 2023-01-22 11:17:18.554645: step: 790/466, loss: 0.3247869908809662 2023-01-22 11:17:19.218122: step: 792/466, loss: 0.11100728064775467 2023-01-22 11:17:19.961494: step: 794/466, loss: 0.4956207275390625 2023-01-22 11:17:20.619420: step: 796/466, loss: 0.18071597814559937 2023-01-22 11:17:21.226836: step: 798/466, loss: 0.08764049410820007 2023-01-22 11:17:21.912375: step: 800/466, loss: 0.09589579701423645 2023-01-22 11:17:22.557798: step: 802/466, loss: 0.2966838479042053 2023-01-22 11:17:23.164764: step: 804/466, loss: 0.10895594954490662 2023-01-22 11:17:23.831587: step: 806/466, loss: 0.13455626368522644 2023-01-22 11:17:24.471717: step: 808/466, loss: 0.027952078729867935 2023-01-22 11:17:25.156171: step: 810/466, loss: 0.16448421776294708 2023-01-22 11:17:25.768752: step: 812/466, loss: 0.4903505742549896 2023-01-22 11:17:26.417712: step: 814/466, loss: 0.04348333925008774 2023-01-22 11:17:27.116886: step: 816/466, loss: 0.08801747858524323 2023-01-22 11:17:27.787438: step: 818/466, loss: 0.499617338180542 2023-01-22 11:17:28.446319: step: 820/466, loss: 0.11657022684812546 2023-01-22 11:17:29.158598: step: 822/466, loss: 0.09049463272094727 2023-01-22 11:17:29.773584: step: 824/466, loss: 0.10426073521375656 2023-01-22 11:17:30.476508: step: 826/466, loss: 0.07946509122848511 2023-01-22 11:17:31.070376: step: 828/466, loss: 0.30058789253234863 2023-01-22 11:17:31.764352: step: 830/466, loss: 0.22830384969711304 2023-01-22 11:17:32.497202: step: 832/466, loss: 0.16114521026611328 2023-01-22 11:17:33.181528: step: 834/466, loss: 0.11515027284622192 2023-01-22 11:17:33.822919: step: 836/466, loss: 0.2690531611442566 2023-01-22 11:17:34.533736: step: 838/466, loss: 0.1222851350903511 2023-01-22 11:17:35.169037: step: 840/466, loss: 0.10726886987686157 2023-01-22 11:17:35.812560: step: 842/466, loss: 0.16912440955638885 2023-01-22 11:17:36.510547: step: 844/466, loss: 0.09197650849819183 2023-01-22 11:17:37.212202: step: 846/466, loss: 0.10261756926774979 2023-01-22 11:17:37.821582: step: 848/466, loss: 0.20265816152095795 2023-01-22 11:17:38.477727: step: 850/466, loss: 0.10778229683637619 2023-01-22 11:17:39.144895: step: 852/466, loss: 0.05235063657164574 2023-01-22 11:17:39.822278: step: 854/466, loss: 0.3888838589191437 2023-01-22 11:17:40.502140: step: 856/466, loss: 0.20329241454601288 2023-01-22 11:17:41.185376: step: 858/466, loss: 0.09305126219987869 2023-01-22 11:17:41.960553: step: 860/466, loss: 0.13486655056476593 2023-01-22 11:17:42.607614: step: 862/466, loss: 0.19808350503444672 2023-01-22 11:17:43.276903: step: 864/466, loss: 0.18498556315898895 2023-01-22 11:17:43.911411: step: 866/466, loss: 0.10210976004600525 2023-01-22 11:17:44.566858: step: 868/466, loss: 0.5273813605308533 2023-01-22 11:17:45.257285: step: 870/466, loss: 0.12989220023155212 2023-01-22 11:17:45.869446: step: 872/466, loss: 0.2621801793575287 2023-01-22 11:17:46.575571: step: 874/466, loss: 0.8328834176063538 2023-01-22 11:17:47.286087: step: 876/466, loss: 0.35489052534103394 2023-01-22 11:17:47.911762: step: 878/466, loss: 0.40134337544441223 2023-01-22 11:17:48.550838: step: 880/466, loss: 0.15391066670417786 2023-01-22 11:17:49.169772: step: 882/466, loss: 0.11246410757303238 2023-01-22 11:17:49.920469: step: 884/466, loss: 0.13483500480651855 2023-01-22 11:17:50.600001: step: 886/466, loss: 0.10536754876375198 2023-01-22 11:17:51.238014: step: 888/466, loss: 0.03027348406612873 2023-01-22 11:17:51.906585: step: 890/466, loss: 0.08882451057434082 2023-01-22 11:17:52.528656: step: 892/466, loss: 0.43937447667121887 2023-01-22 11:17:53.220389: step: 894/466, loss: 0.3225778639316559 2023-01-22 11:17:53.996494: step: 896/466, loss: 0.09378521889448166 2023-01-22 11:17:54.619672: step: 898/466, loss: 0.15521328151226044 2023-01-22 11:17:55.306962: step: 900/466, loss: 0.06254531443119049 2023-01-22 11:17:55.986611: step: 902/466, loss: 0.07430645078420639 2023-01-22 11:17:56.559589: step: 904/466, loss: 0.615398108959198 2023-01-22 11:17:57.178430: step: 906/466, loss: 0.3123369812965393 2023-01-22 11:17:57.888205: step: 908/466, loss: 0.4180082082748413 2023-01-22 11:17:58.525689: step: 910/466, loss: 1.0601695775985718 2023-01-22 11:17:59.174096: step: 912/466, loss: 0.09402686357498169 2023-01-22 11:17:59.848119: step: 914/466, loss: 0.029817946255207062 2023-01-22 11:18:00.501372: step: 916/466, loss: 0.24438625574111938 2023-01-22 11:18:01.214258: step: 918/466, loss: 0.07211785018444061 2023-01-22 11:18:01.857014: step: 920/466, loss: 0.06276479363441467 2023-01-22 11:18:02.531538: step: 922/466, loss: 0.11545450985431671 2023-01-22 11:18:03.143872: step: 924/466, loss: 0.0142361456528306 2023-01-22 11:18:03.775548: step: 926/466, loss: 0.1268867552280426 2023-01-22 11:18:04.403531: step: 928/466, loss: 0.1386023461818695 2023-01-22 11:18:05.078271: step: 930/466, loss: 0.8721864223480225 2023-01-22 11:18:05.742601: step: 932/466, loss: 0.12333185970783234 ================================================== Loss: 0.274 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3168323463056966, 'r': 0.35530913978494627, 'f1': 0.3349694394752534}, 'combined': 0.24681958698176568, 'epoch': 14} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.37749269564554716, 'r': 0.3272038763294448, 'f1': 0.350553921257496}, 'combined': 0.23249171979771754, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31126262626262624, 'r': 0.2918087121212121, 'f1': 0.3012218963831867}, 'combined': 0.2008145975887911, 'epoch': 14} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3859772726500204, 'r': 0.29957440867737106, 'f1': 0.337330988651794}, 'combined': 0.2201528557516971, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3058790125305776, 'r': 0.35231225921453624, 'f1': 0.3274577788466677}, 'combined': 0.2412846791501762, 'epoch': 14} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3673238837172359, 'r': 0.30922511904977334, 'f1': 0.33577988056366753}, 'combined': 0.22269339229092974, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26686507936507936, 'r': 0.3202380952380952, 'f1': 0.2911255411255411}, 'combined': 0.19408369408369408, 'epoch': 14} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3977272727272727, 'r': 0.3804347826086957, 'f1': 0.3888888888888889}, 'combined': 0.25925925925925924, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27380952380952384, 'r': 0.19827586206896552, 'f1': 0.23000000000000004}, 'combined': 0.15333333333333335, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2791124217992775, 'r': 0.3076580103923854, 'f1': 0.2926908639408639}, 'combined': 0.1951272426272426, 'epoch': 10} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.34934765710323107, 'r': 0.3114339578827254, 'f1': 0.32930311940058665}, 'combined': 0.21491361476669862, 'epoch': 10} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4583333333333333, 'r': 0.4782608695652174, 'f1': 0.4680851063829787}, 'combined': 0.31205673758865243, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2886258254463912, 'r': 0.3357260550258782, 'f1': 0.31039935262918916}, 'combined': 0.22871531246361304, 'epoch': 13} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.35413085903566327, 'r': 0.32071071736043616, 'f1': 0.336593256293779}, 'combined': 0.2232328331896565, 'epoch': 13} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39705882352941174, 'r': 0.23275862068965517, 'f1': 0.2934782608695652}, 'combined': 0.19565217391304346, 'epoch': 13} ****************************** Epoch: 15 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:20:50.311722: step: 2/466, loss: 0.435541033744812 2023-01-22 11:20:50.951184: step: 4/466, loss: 0.03687071055173874 2023-01-22 11:20:51.551513: step: 6/466, loss: 0.327267587184906 2023-01-22 11:20:52.218630: step: 8/466, loss: 0.3533381223678589 2023-01-22 11:20:52.882764: step: 10/466, loss: 0.03968863934278488 2023-01-22 11:20:53.498267: step: 12/466, loss: 0.07497228682041168 2023-01-22 11:20:54.220366: step: 14/466, loss: 0.05809565261006355 2023-01-22 11:20:55.009807: step: 16/466, loss: 0.18471305072307587 2023-01-22 11:20:55.685095: step: 18/466, loss: 1.8211289644241333 2023-01-22 11:20:56.302551: step: 20/466, loss: 0.02774050645530224 2023-01-22 11:20:56.914819: step: 22/466, loss: 0.13910217583179474 2023-01-22 11:20:57.511744: step: 24/466, loss: 0.2877402901649475 2023-01-22 11:20:58.141280: step: 26/466, loss: 0.046039097011089325 2023-01-22 11:20:58.820352: step: 28/466, loss: 0.05408158898353577 2023-01-22 11:20:59.498432: step: 30/466, loss: 0.20368832349777222 2023-01-22 11:21:00.166576: step: 32/466, loss: 0.05387134850025177 2023-01-22 11:21:00.811078: step: 34/466, loss: 0.06808287650346756 2023-01-22 11:21:01.442759: step: 36/466, loss: 0.22698763012886047 2023-01-22 11:21:02.095742: step: 38/466, loss: 0.13414643704891205 2023-01-22 11:21:02.707316: step: 40/466, loss: 0.017022065818309784 2023-01-22 11:21:03.353391: step: 42/466, loss: 0.11883985996246338 2023-01-22 11:21:04.014662: step: 44/466, loss: 0.1732538789510727 2023-01-22 11:21:04.648622: step: 46/466, loss: 0.13217847049236298 2023-01-22 11:21:05.317095: step: 48/466, loss: 0.10473383218050003 2023-01-22 11:21:05.946503: step: 50/466, loss: 0.07943100482225418 2023-01-22 11:21:06.613713: step: 52/466, loss: 0.1259610503911972 2023-01-22 11:21:07.259302: step: 54/466, loss: 0.0526583306491375 2023-01-22 11:21:07.903926: step: 56/466, loss: 0.04471132531762123 2023-01-22 11:21:08.565139: step: 58/466, loss: 0.33938050270080566 2023-01-22 11:21:09.229923: step: 60/466, loss: 0.1816110759973526 2023-01-22 11:21:09.860423: step: 62/466, loss: 0.058411043137311935 2023-01-22 11:21:10.447772: step: 64/466, loss: 0.08439125120639801 2023-01-22 11:21:11.116163: step: 66/466, loss: 0.017518168315291405 2023-01-22 11:21:11.778050: step: 68/466, loss: 0.05559399351477623 2023-01-22 11:21:12.497795: step: 70/466, loss: 0.8502869606018066 2023-01-22 11:21:13.176299: step: 72/466, loss: 0.1861373633146286 2023-01-22 11:21:13.859537: step: 74/466, loss: 0.8699095249176025 2023-01-22 11:21:14.534529: step: 76/466, loss: 0.059313587844371796 2023-01-22 11:21:15.245242: step: 78/466, loss: 0.17662163078784943 2023-01-22 11:21:15.881800: step: 80/466, loss: 0.031233610585331917 2023-01-22 11:21:16.533769: step: 82/466, loss: 0.1937914341688156 2023-01-22 11:21:17.235682: step: 84/466, loss: 0.10098868608474731 2023-01-22 11:21:17.867029: step: 86/466, loss: 0.07239064574241638 2023-01-22 11:21:18.529000: step: 88/466, loss: 1.1043068170547485 2023-01-22 11:21:19.197279: step: 90/466, loss: 0.11017554253339767 2023-01-22 11:21:19.854564: step: 92/466, loss: 0.053812894970178604 2023-01-22 11:21:20.491638: step: 94/466, loss: 0.060267288237810135 2023-01-22 11:21:21.135767: step: 96/466, loss: 0.268286794424057 2023-01-22 11:21:21.848343: step: 98/466, loss: 0.08513689786195755 2023-01-22 11:21:22.477959: step: 100/466, loss: 0.1262291818857193 2023-01-22 11:21:23.063330: step: 102/466, loss: 0.06185045465826988 2023-01-22 11:21:23.745247: step: 104/466, loss: 0.14014405012130737 2023-01-22 11:21:24.407680: step: 106/466, loss: 0.5968447327613831 2023-01-22 11:21:25.118484: step: 108/466, loss: 0.10632877796888351 2023-01-22 11:21:25.822101: step: 110/466, loss: 0.11529248207807541 2023-01-22 11:21:26.417926: step: 112/466, loss: 0.021922091022133827 2023-01-22 11:21:27.070410: step: 114/466, loss: 0.30956166982650757 2023-01-22 11:21:27.743805: step: 116/466, loss: 0.07864983379840851 2023-01-22 11:21:28.406944: step: 118/466, loss: 0.08320973068475723 2023-01-22 11:21:29.114399: step: 120/466, loss: 0.09572584927082062 2023-01-22 11:21:29.792715: step: 122/466, loss: 0.05570907145738602 2023-01-22 11:21:30.467359: step: 124/466, loss: 0.250662624835968 2023-01-22 11:21:31.166962: step: 126/466, loss: 0.0643407478928566 2023-01-22 11:21:31.810326: step: 128/466, loss: 0.10747619718313217 2023-01-22 11:21:32.490567: step: 130/466, loss: 0.06883567571640015 2023-01-22 11:21:33.150314: step: 132/466, loss: 0.06860269606113434 2023-01-22 11:21:33.806215: step: 134/466, loss: 0.09205099195241928 2023-01-22 11:21:34.548725: step: 136/466, loss: 0.30829140543937683 2023-01-22 11:21:35.145199: step: 138/466, loss: 1.0106439590454102 2023-01-22 11:21:35.837188: step: 140/466, loss: 0.09725064039230347 2023-01-22 11:21:36.499141: step: 142/466, loss: 0.08082885295152664 2023-01-22 11:21:37.155621: step: 144/466, loss: 0.14608044922351837 2023-01-22 11:21:37.893316: step: 146/466, loss: 0.2036202996969223 2023-01-22 11:21:38.535952: step: 148/466, loss: 0.03470924124121666 2023-01-22 11:21:39.157317: step: 150/466, loss: 0.13530747592449188 2023-01-22 11:21:39.794154: step: 152/466, loss: 0.15470536053180695 2023-01-22 11:21:40.486019: step: 154/466, loss: 0.2205692082643509 2023-01-22 11:21:41.206160: step: 156/466, loss: 1.0972814559936523 2023-01-22 11:21:41.849650: step: 158/466, loss: 0.16078124940395355 2023-01-22 11:21:42.525143: step: 160/466, loss: 0.15565645694732666 2023-01-22 11:21:43.155043: step: 162/466, loss: 0.03340250253677368 2023-01-22 11:21:43.875641: step: 164/466, loss: 0.08437010645866394 2023-01-22 11:21:44.539864: step: 166/466, loss: 0.3232613801956177 2023-01-22 11:21:45.164010: step: 168/466, loss: 0.4667429029941559 2023-01-22 11:21:45.840607: step: 170/466, loss: 0.09632215648889542 2023-01-22 11:21:46.505894: step: 172/466, loss: 0.038387689739465714 2023-01-22 11:21:47.130234: step: 174/466, loss: 0.19090475142002106 2023-01-22 11:21:47.802806: step: 176/466, loss: 0.08887051045894623 2023-01-22 11:21:48.515851: step: 178/466, loss: 0.45436954498291016 2023-01-22 11:21:49.144386: step: 180/466, loss: 0.7342172265052795 2023-01-22 11:21:49.800487: step: 182/466, loss: 0.08003537356853485 2023-01-22 11:21:50.431684: step: 184/466, loss: 0.10012319684028625 2023-01-22 11:21:51.092826: step: 186/466, loss: 0.06299827247858047 2023-01-22 11:21:51.806453: step: 188/466, loss: 0.19645850360393524 2023-01-22 11:21:52.511946: step: 190/466, loss: 0.08092363178730011 2023-01-22 11:21:53.196706: step: 192/466, loss: 0.19707578420639038 2023-01-22 11:21:53.845647: step: 194/466, loss: 0.06013033911585808 2023-01-22 11:21:54.496994: step: 196/466, loss: 0.08408872038125992 2023-01-22 11:21:55.118433: step: 198/466, loss: 0.3458172678947449 2023-01-22 11:21:55.849659: step: 200/466, loss: 1.0924654006958008 2023-01-22 11:21:56.496800: step: 202/466, loss: 0.33933231234550476 2023-01-22 11:21:57.191602: step: 204/466, loss: 0.0806460827589035 2023-01-22 11:21:57.897450: step: 206/466, loss: 0.10306204855442047 2023-01-22 11:21:58.586831: step: 208/466, loss: 0.15154699981212616 2023-01-22 11:21:59.349723: step: 210/466, loss: 0.11981084942817688 2023-01-22 11:22:00.011009: step: 212/466, loss: 0.11672055721282959 2023-01-22 11:22:00.674912: step: 214/466, loss: 0.24000589549541473 2023-01-22 11:22:01.357813: step: 216/466, loss: 0.03687505051493645 2023-01-22 11:22:01.975359: step: 218/466, loss: 0.40529903769493103 2023-01-22 11:22:02.685501: step: 220/466, loss: 0.21455496549606323 2023-01-22 11:22:03.360974: step: 222/466, loss: 0.09716780483722687 2023-01-22 11:22:04.029362: step: 224/466, loss: 0.07382888346910477 2023-01-22 11:22:04.690415: step: 226/466, loss: 0.14204277098178864 2023-01-22 11:22:05.299604: step: 228/466, loss: 0.056718990206718445 2023-01-22 11:22:05.927759: step: 230/466, loss: 0.03239859640598297 2023-01-22 11:22:06.577291: step: 232/466, loss: 0.028303178027272224 2023-01-22 11:22:07.286947: step: 234/466, loss: 0.11211122572422028 2023-01-22 11:22:07.981100: step: 236/466, loss: 1.3105813264846802 2023-01-22 11:22:08.666927: step: 238/466, loss: 0.0535355880856514 2023-01-22 11:22:09.257296: step: 240/466, loss: 0.21491867303848267 2023-01-22 11:22:09.890532: step: 242/466, loss: 0.05640966817736626 2023-01-22 11:22:10.594869: step: 244/466, loss: 0.027308456599712372 2023-01-22 11:22:11.213421: step: 246/466, loss: 0.13510803878307343 2023-01-22 11:22:11.827125: step: 248/466, loss: 1.0216366052627563 2023-01-22 11:22:12.446433: step: 250/466, loss: 0.15312233567237854 2023-01-22 11:22:13.115941: step: 252/466, loss: 0.22686350345611572 2023-01-22 11:22:13.752233: step: 254/466, loss: 0.14514611661434174 2023-01-22 11:22:14.494944: step: 256/466, loss: 0.0583498515188694 2023-01-22 11:22:15.184741: step: 258/466, loss: 0.05628177151083946 2023-01-22 11:22:15.865299: step: 260/466, loss: 0.11870299279689789 2023-01-22 11:22:16.546040: step: 262/466, loss: 0.07947038859128952 2023-01-22 11:22:17.222699: step: 264/466, loss: 0.03605325520038605 2023-01-22 11:22:17.896148: step: 266/466, loss: 0.11093839257955551 2023-01-22 11:22:18.541071: step: 268/466, loss: 0.26418861746788025 2023-01-22 11:22:19.236170: step: 270/466, loss: 0.03737075999379158 2023-01-22 11:22:19.879118: step: 272/466, loss: 0.16607210040092468 2023-01-22 11:22:20.509303: step: 274/466, loss: 0.16027934849262238 2023-01-22 11:22:21.162688: step: 276/466, loss: 0.09926323592662811 2023-01-22 11:22:21.776414: step: 278/466, loss: 0.16546255350112915 2023-01-22 11:22:22.471333: step: 280/466, loss: 0.02960674650967121 2023-01-22 11:22:23.258560: step: 282/466, loss: 0.07635494321584702 2023-01-22 11:22:23.887944: step: 284/466, loss: 0.2444678395986557 2023-01-22 11:22:24.521885: step: 286/466, loss: 0.3181353211402893 2023-01-22 11:22:25.181943: step: 288/466, loss: 0.06947898864746094 2023-01-22 11:22:25.820651: step: 290/466, loss: 0.08936656266450882 2023-01-22 11:22:26.436676: step: 292/466, loss: 0.18234677612781525 2023-01-22 11:22:27.085636: step: 294/466, loss: 0.17520812153816223 2023-01-22 11:22:27.743783: step: 296/466, loss: 0.27067720890045166 2023-01-22 11:22:28.392816: step: 298/466, loss: 0.31783533096313477 2023-01-22 11:22:29.017074: step: 300/466, loss: 0.3244658410549164 2023-01-22 11:22:29.678228: step: 302/466, loss: 0.05142327770590782 2023-01-22 11:22:30.325413: step: 304/466, loss: 0.35001641511917114 2023-01-22 11:22:30.989579: step: 306/466, loss: 0.10582118481397629 2023-01-22 11:22:31.629138: step: 308/466, loss: 0.329561322927475 2023-01-22 11:22:32.352051: step: 310/466, loss: 0.1648482382297516 2023-01-22 11:22:33.062900: step: 312/466, loss: 0.0752813071012497 2023-01-22 11:22:33.761464: step: 314/466, loss: 0.05904865264892578 2023-01-22 11:22:34.359910: step: 316/466, loss: 0.037242304533720016 2023-01-22 11:22:34.989239: step: 318/466, loss: 0.12373863905668259 2023-01-22 11:22:35.667128: step: 320/466, loss: 0.17016130685806274 2023-01-22 11:22:36.292469: step: 322/466, loss: 0.10103777050971985 2023-01-22 11:22:36.948052: step: 324/466, loss: 0.33198267221450806 2023-01-22 11:22:37.636051: step: 326/466, loss: 0.10659413784742355 2023-01-22 11:22:38.296930: step: 328/466, loss: 0.041212305426597595 2023-01-22 11:22:38.904045: step: 330/466, loss: 0.13769038021564484 2023-01-22 11:22:39.508908: step: 332/466, loss: 0.1511014699935913 2023-01-22 11:22:40.145314: step: 334/466, loss: 0.02524626813828945 2023-01-22 11:22:40.781898: step: 336/466, loss: 0.17625188827514648 2023-01-22 11:22:41.404173: step: 338/466, loss: 0.05785483866930008 2023-01-22 11:22:42.019997: step: 340/466, loss: 0.036739859730005264 2023-01-22 11:22:42.725185: step: 342/466, loss: 0.05239155888557434 2023-01-22 11:22:43.408504: step: 344/466, loss: 0.15496668219566345 2023-01-22 11:22:44.054968: step: 346/466, loss: 0.19038543105125427 2023-01-22 11:22:44.682629: step: 348/466, loss: 0.08914028108119965 2023-01-22 11:22:45.370262: step: 350/466, loss: 0.1829203963279724 2023-01-22 11:22:46.003078: step: 352/466, loss: 0.0923493281006813 2023-01-22 11:22:46.732881: step: 354/466, loss: 0.11299723386764526 2023-01-22 11:22:47.347026: step: 356/466, loss: 0.0236994419246912 2023-01-22 11:22:47.979316: step: 358/466, loss: 0.40911865234375 2023-01-22 11:22:48.668967: step: 360/466, loss: 0.6588344573974609 2023-01-22 11:22:49.382043: step: 362/466, loss: 0.12048368155956268 2023-01-22 11:22:50.064453: step: 364/466, loss: 0.2735106348991394 2023-01-22 11:22:50.751664: step: 366/466, loss: 0.18617478013038635 2023-01-22 11:22:51.373381: step: 368/466, loss: 0.048725713044404984 2023-01-22 11:22:52.098190: step: 370/466, loss: 0.11221233755350113 2023-01-22 11:22:52.819410: step: 372/466, loss: 0.04269242659211159 2023-01-22 11:22:53.452673: step: 374/466, loss: 0.05626671761274338 2023-01-22 11:22:54.119826: step: 376/466, loss: 0.07424618303775787 2023-01-22 11:22:54.749717: step: 378/466, loss: 0.07768986374139786 2023-01-22 11:22:55.538314: step: 380/466, loss: 0.04393366351723671 2023-01-22 11:22:56.234680: step: 382/466, loss: 0.09401178359985352 2023-01-22 11:22:56.885723: step: 384/466, loss: 0.04772252216935158 2023-01-22 11:22:57.553856: step: 386/466, loss: 0.1341552436351776 2023-01-22 11:22:58.308007: step: 388/466, loss: 0.06578285992145538 2023-01-22 11:22:59.006258: step: 390/466, loss: 0.04696401581168175 2023-01-22 11:22:59.762176: step: 392/466, loss: 0.15234379470348358 2023-01-22 11:23:00.507708: step: 394/466, loss: 0.06454946845769882 2023-01-22 11:23:01.226099: step: 396/466, loss: 0.032967355102300644 2023-01-22 11:23:01.826370: step: 398/466, loss: 0.12278541922569275 2023-01-22 11:23:02.499829: step: 400/466, loss: 0.3646247088909149 2023-01-22 11:23:03.183914: step: 402/466, loss: 0.39636847376823425 2023-01-22 11:23:03.839981: step: 404/466, loss: 0.2703320384025574 2023-01-22 11:23:04.445192: step: 406/466, loss: 0.17627400159835815 2023-01-22 11:23:05.135608: step: 408/466, loss: 0.35617542266845703 2023-01-22 11:23:05.760927: step: 410/466, loss: 0.11145827919244766 2023-01-22 11:23:06.428069: step: 412/466, loss: 0.030111731961369514 2023-01-22 11:23:07.052800: step: 414/466, loss: 0.028507882729172707 2023-01-22 11:23:07.753956: step: 416/466, loss: 0.03310058265924454 2023-01-22 11:23:08.410766: step: 418/466, loss: 0.11942702531814575 2023-01-22 11:23:09.011081: step: 420/466, loss: 0.07002561539411545 2023-01-22 11:23:09.714438: step: 422/466, loss: 0.1809057742357254 2023-01-22 11:23:10.313290: step: 424/466, loss: 0.048225123435258865 2023-01-22 11:23:10.976985: step: 426/466, loss: 0.272867888212204 2023-01-22 11:23:11.592574: step: 428/466, loss: 0.049407534301280975 2023-01-22 11:23:12.322226: step: 430/466, loss: 0.06726373732089996 2023-01-22 11:23:13.090526: step: 432/466, loss: 0.19764664769172668 2023-01-22 11:23:13.754621: step: 434/466, loss: 0.05364568531513214 2023-01-22 11:23:14.468108: step: 436/466, loss: 0.027864878997206688 2023-01-22 11:23:15.138882: step: 438/466, loss: 0.04443821683526039 2023-01-22 11:23:15.807705: step: 440/466, loss: 13.03366470336914 2023-01-22 11:23:16.445075: step: 442/466, loss: 0.06950782239437103 2023-01-22 11:23:17.090590: step: 444/466, loss: 0.48722684383392334 2023-01-22 11:23:17.729667: step: 446/466, loss: 0.1150822639465332 2023-01-22 11:23:18.440087: step: 448/466, loss: 0.33581680059432983 2023-01-22 11:23:19.063399: step: 450/466, loss: 0.09013377875089645 2023-01-22 11:23:19.724680: step: 452/466, loss: 0.031149625778198242 2023-01-22 11:23:20.415088: step: 454/466, loss: 0.2107708752155304 2023-01-22 11:23:21.122482: step: 456/466, loss: 0.06587281823158264 2023-01-22 11:23:21.671857: step: 458/466, loss: 0.03040958382189274 2023-01-22 11:23:22.320063: step: 460/466, loss: 0.6418898701667786 2023-01-22 11:23:22.966223: step: 462/466, loss: 0.10450328886508942 2023-01-22 11:23:23.617075: step: 464/466, loss: 0.20812921226024628 2023-01-22 11:23:24.236467: step: 466/466, loss: 0.015257641673088074 2023-01-22 11:23:24.851208: step: 468/466, loss: 0.03937196731567383 2023-01-22 11:23:25.443498: step: 470/466, loss: 0.07607219368219376 2023-01-22 11:23:26.089596: step: 472/466, loss: 0.10207568109035492 2023-01-22 11:23:26.717869: step: 474/466, loss: 0.09679700434207916 2023-01-22 11:23:27.386188: step: 476/466, loss: 0.1701655387878418 2023-01-22 11:23:28.064652: step: 478/466, loss: 0.10100958496332169 2023-01-22 11:23:28.707646: step: 480/466, loss: 0.04442289471626282 2023-01-22 11:23:29.358871: step: 482/466, loss: 0.21667301654815674 2023-01-22 11:23:30.034605: step: 484/466, loss: 0.059234533458948135 2023-01-22 11:23:30.769403: step: 486/466, loss: 0.3126879930496216 2023-01-22 11:23:31.358922: step: 488/466, loss: 0.02259223349392414 2023-01-22 11:23:31.917649: step: 490/466, loss: 0.06512950360774994 2023-01-22 11:23:32.547568: step: 492/466, loss: 0.06362253427505493 2023-01-22 11:23:33.156365: step: 494/466, loss: 0.016480471938848495 2023-01-22 11:23:33.778455: step: 496/466, loss: 0.07561665773391724 2023-01-22 11:23:34.401747: step: 498/466, loss: 0.03929918259382248 2023-01-22 11:23:35.056922: step: 500/466, loss: 0.19528630375862122 2023-01-22 11:23:35.741361: step: 502/466, loss: 0.10272553563117981 2023-01-22 11:23:36.409643: step: 504/466, loss: 0.2748989462852478 2023-01-22 11:23:36.969468: step: 506/466, loss: 0.04124134033918381 2023-01-22 11:23:37.659775: step: 508/466, loss: 0.0517859160900116 2023-01-22 11:23:38.285582: step: 510/466, loss: 0.09681697189807892 2023-01-22 11:23:38.936173: step: 512/466, loss: 0.10827016830444336 2023-01-22 11:23:39.648050: step: 514/466, loss: 0.9610345959663391 2023-01-22 11:23:40.273729: step: 516/466, loss: 0.2481914609670639 2023-01-22 11:23:40.968120: step: 518/466, loss: 0.1883612424135208 2023-01-22 11:23:41.637098: step: 520/466, loss: 0.08209282904863358 2023-01-22 11:23:42.312372: step: 522/466, loss: 0.08709144592285156 2023-01-22 11:23:42.958654: step: 524/466, loss: 0.07596763968467712 2023-01-22 11:23:43.604084: step: 526/466, loss: 0.35548317432403564 2023-01-22 11:23:44.232401: step: 528/466, loss: 0.03592758625745773 2023-01-22 11:23:44.871534: step: 530/466, loss: 0.07017184793949127 2023-01-22 11:23:45.497350: step: 532/466, loss: 0.06274805963039398 2023-01-22 11:23:46.147936: step: 534/466, loss: 0.019063686951994896 2023-01-22 11:23:46.782088: step: 536/466, loss: 0.08310821652412415 2023-01-22 11:23:47.426779: step: 538/466, loss: 0.11306414008140564 2023-01-22 11:23:48.090789: step: 540/466, loss: 0.11510761827230453 2023-01-22 11:23:48.757630: step: 542/466, loss: 0.12245524674654007 2023-01-22 11:23:49.449029: step: 544/466, loss: 0.08942381292581558 2023-01-22 11:23:50.092953: step: 546/466, loss: 0.9175434708595276 2023-01-22 11:23:50.778431: step: 548/466, loss: 0.03244870528578758 2023-01-22 11:23:51.463959: step: 550/466, loss: 0.03925330191850662 2023-01-22 11:23:52.090350: step: 552/466, loss: 0.0484529547393322 2023-01-22 11:23:52.741670: step: 554/466, loss: 0.2532956898212433 2023-01-22 11:23:53.350868: step: 556/466, loss: 0.044494111090898514 2023-01-22 11:23:54.009844: step: 558/466, loss: 0.42364785075187683 2023-01-22 11:23:54.734983: step: 560/466, loss: 0.19850437343120575 2023-01-22 11:23:55.364873: step: 562/466, loss: 0.1446579545736313 2023-01-22 11:23:56.148974: step: 564/466, loss: 0.14363546669483185 2023-01-22 11:23:56.784740: step: 566/466, loss: 0.10299845039844513 2023-01-22 11:23:57.456793: step: 568/466, loss: 0.1500239372253418 2023-01-22 11:23:58.112928: step: 570/466, loss: 0.06343446671962738 2023-01-22 11:23:58.845531: step: 572/466, loss: 0.13200996816158295 2023-01-22 11:23:59.542753: step: 574/466, loss: 0.0875290259718895 2023-01-22 11:24:00.168813: step: 576/466, loss: 0.01447082869708538 2023-01-22 11:24:00.877398: step: 578/466, loss: 0.0954570472240448 2023-01-22 11:24:01.481777: step: 580/466, loss: 0.08869298547506332 2023-01-22 11:24:02.138888: step: 582/466, loss: 0.0968579426407814 2023-01-22 11:24:02.845527: step: 584/466, loss: 0.3615601062774658 2023-01-22 11:24:03.523205: step: 586/466, loss: 0.1191064789891243 2023-01-22 11:24:04.176568: step: 588/466, loss: 0.059348151087760925 2023-01-22 11:24:05.035613: step: 590/466, loss: 0.11688205599784851 2023-01-22 11:24:05.654666: step: 592/466, loss: 0.0417327918112278 2023-01-22 11:24:06.255463: step: 594/466, loss: 0.023670373484492302 2023-01-22 11:24:06.929868: step: 596/466, loss: 0.32675257325172424 2023-01-22 11:24:07.623443: step: 598/466, loss: 0.1834038347005844 2023-01-22 11:24:08.295753: step: 600/466, loss: 0.08239725977182388 2023-01-22 11:24:08.988440: step: 602/466, loss: 0.112456314265728 2023-01-22 11:24:09.601036: step: 604/466, loss: 0.10530654340982437 2023-01-22 11:24:10.279347: step: 606/466, loss: 0.09844251722097397 2023-01-22 11:24:10.933119: step: 608/466, loss: 0.2826271951198578 2023-01-22 11:24:11.599849: step: 610/466, loss: 0.20284876227378845 2023-01-22 11:24:12.279453: step: 612/466, loss: 0.0901341587305069 2023-01-22 11:24:12.976879: step: 614/466, loss: 0.18216392397880554 2023-01-22 11:24:13.648904: step: 616/466, loss: 0.12945237755775452 2023-01-22 11:24:14.308855: step: 618/466, loss: 0.04815996438264847 2023-01-22 11:24:14.935349: step: 620/466, loss: 0.09071626514196396 2023-01-22 11:24:15.608209: step: 622/466, loss: 0.15761756896972656 2023-01-22 11:24:16.242956: step: 624/466, loss: 0.061441466212272644 2023-01-22 11:24:16.869469: step: 626/466, loss: 0.32481932640075684 2023-01-22 11:24:17.483572: step: 628/466, loss: 0.05131962150335312 2023-01-22 11:24:18.100729: step: 630/466, loss: 0.12522053718566895 2023-01-22 11:24:18.715744: step: 632/466, loss: 0.08636586368083954 2023-01-22 11:24:19.406391: step: 634/466, loss: 0.05276182293891907 2023-01-22 11:24:20.042921: step: 636/466, loss: 0.12636514008045197 2023-01-22 11:24:20.678262: step: 638/466, loss: 0.6053150296211243 2023-01-22 11:24:21.361020: step: 640/466, loss: 0.11609852313995361 2023-01-22 11:24:21.996378: step: 642/466, loss: 0.02773885801434517 2023-01-22 11:24:22.641533: step: 644/466, loss: 0.08347609639167786 2023-01-22 11:24:23.284824: step: 646/466, loss: 0.13086169958114624 2023-01-22 11:24:23.944993: step: 648/466, loss: 0.22208954393863678 2023-01-22 11:24:24.658098: step: 650/466, loss: 0.0469534769654274 2023-01-22 11:24:25.314374: step: 652/466, loss: 0.09385628253221512 2023-01-22 11:24:25.974663: step: 654/466, loss: 0.05745408311486244 2023-01-22 11:24:26.685950: step: 656/466, loss: 0.20931097865104675 2023-01-22 11:24:27.372766: step: 658/466, loss: 0.5002903342247009 2023-01-22 11:24:28.054038: step: 660/466, loss: 0.08020833879709244 2023-01-22 11:24:28.737715: step: 662/466, loss: 0.3154780864715576 2023-01-22 11:24:29.400060: step: 664/466, loss: 0.4076489210128784 2023-01-22 11:24:30.092687: step: 666/466, loss: 0.16374658048152924 2023-01-22 11:24:30.685466: step: 668/466, loss: 0.04053359851241112 2023-01-22 11:24:31.338345: step: 670/466, loss: 0.18959200382232666 2023-01-22 11:24:31.925756: step: 672/466, loss: 0.03634379059076309 2023-01-22 11:24:32.650112: step: 674/466, loss: 0.11083564907312393 2023-01-22 11:24:33.276554: step: 676/466, loss: 0.11092586815357208 2023-01-22 11:24:33.947747: step: 678/466, loss: 0.16896851360797882 2023-01-22 11:24:34.576466: step: 680/466, loss: 1.5057514905929565 2023-01-22 11:24:35.166865: step: 682/466, loss: 0.20026735961437225 2023-01-22 11:24:35.807899: step: 684/466, loss: 0.21140651404857635 2023-01-22 11:24:36.647243: step: 686/466, loss: 0.02491314709186554 2023-01-22 11:24:37.341934: step: 688/466, loss: 0.5144820809364319 2023-01-22 11:24:38.054021: step: 690/466, loss: 0.06804531812667847 2023-01-22 11:24:38.696082: step: 692/466, loss: 0.07824739068746567 2023-01-22 11:24:39.318725: step: 694/466, loss: 0.041952550411224365 2023-01-22 11:24:39.998264: step: 696/466, loss: 0.08526717871427536 2023-01-22 11:24:40.690265: step: 698/466, loss: 0.05078260973095894 2023-01-22 11:24:41.286968: step: 700/466, loss: 0.1528579443693161 2023-01-22 11:24:41.873710: step: 702/466, loss: 0.03180788829922676 2023-01-22 11:24:42.473118: step: 704/466, loss: 0.08808571100234985 2023-01-22 11:24:43.173023: step: 706/466, loss: 0.046116508543491364 2023-01-22 11:24:43.801820: step: 708/466, loss: 0.058172132819890976 2023-01-22 11:24:44.477118: step: 710/466, loss: 0.07205815613269806 2023-01-22 11:24:45.160097: step: 712/466, loss: 0.09958315640687943 2023-01-22 11:24:45.819254: step: 714/466, loss: 0.08324968814849854 2023-01-22 11:24:46.385968: step: 716/466, loss: 0.06993231922388077 2023-01-22 11:24:47.104427: step: 718/466, loss: 0.15261013805866241 2023-01-22 11:24:47.742192: step: 720/466, loss: 0.11847439408302307 2023-01-22 11:24:48.450610: step: 722/466, loss: 0.6557598114013672 2023-01-22 11:24:49.076110: step: 724/466, loss: 0.13594911992549896 2023-01-22 11:24:49.687944: step: 726/466, loss: 0.12737493216991425 2023-01-22 11:24:50.468133: step: 728/466, loss: 0.09064139425754547 2023-01-22 11:24:51.144230: step: 730/466, loss: 0.09410417824983597 2023-01-22 11:24:51.838929: step: 732/466, loss: 0.046360161155462265 2023-01-22 11:24:52.539370: step: 734/466, loss: 0.2701513171195984 2023-01-22 11:24:53.210265: step: 736/466, loss: 0.13428397476673126 2023-01-22 11:24:53.937202: step: 738/466, loss: 0.13104593753814697 2023-01-22 11:24:54.569446: step: 740/466, loss: 0.12228985875844955 2023-01-22 11:24:55.221254: step: 742/466, loss: 0.018590405583381653 2023-01-22 11:24:55.787225: step: 744/466, loss: 0.07382070273160934 2023-01-22 11:24:56.436144: step: 746/466, loss: 0.11061914265155792 2023-01-22 11:24:57.108280: step: 748/466, loss: 0.07724656909704208 2023-01-22 11:24:57.745349: step: 750/466, loss: 0.022709783166646957 2023-01-22 11:24:58.433829: step: 752/466, loss: 0.05855545401573181 2023-01-22 11:24:59.121739: step: 754/466, loss: 0.04040146619081497 2023-01-22 11:24:59.790918: step: 756/466, loss: 0.06897459924221039 2023-01-22 11:25:00.509465: step: 758/466, loss: 0.030555035918951035 2023-01-22 11:25:01.174711: step: 760/466, loss: 0.037560224533081055 2023-01-22 11:25:01.860828: step: 762/466, loss: 0.06906607002019882 2023-01-22 11:25:02.603979: step: 764/466, loss: 0.06891036033630371 2023-01-22 11:25:03.256691: step: 766/466, loss: 0.17637237906455994 2023-01-22 11:25:03.891617: step: 768/466, loss: 0.09575371444225311 2023-01-22 11:25:04.571615: step: 770/466, loss: 0.3711543381214142 2023-01-22 11:25:05.190890: step: 772/466, loss: 0.824753999710083 2023-01-22 11:25:05.794624: step: 774/466, loss: 0.8033647537231445 2023-01-22 11:25:06.374005: step: 776/466, loss: 0.06524951756000519 2023-01-22 11:25:06.957379: step: 778/466, loss: 0.2661142647266388 2023-01-22 11:25:07.599991: step: 780/466, loss: 0.09411405026912689 2023-01-22 11:25:08.186627: step: 782/466, loss: 0.1399596482515335 2023-01-22 11:25:08.853199: step: 784/466, loss: 0.19879567623138428 2023-01-22 11:25:09.534440: step: 786/466, loss: 0.2200627326965332 2023-01-22 11:25:10.127799: step: 788/466, loss: 0.14337505400180817 2023-01-22 11:25:10.778806: step: 790/466, loss: 0.14063367247581482 2023-01-22 11:25:11.439288: step: 792/466, loss: 0.6075316071510315 2023-01-22 11:25:12.119048: step: 794/466, loss: 0.08412862569093704 2023-01-22 11:25:12.735455: step: 796/466, loss: 0.17682573199272156 2023-01-22 11:25:13.410690: step: 798/466, loss: 0.02219691127538681 2023-01-22 11:25:14.104188: step: 800/466, loss: 0.09762563556432724 2023-01-22 11:25:14.785456: step: 802/466, loss: 0.11324504017829895 2023-01-22 11:25:15.484557: step: 804/466, loss: 0.15628759562969208 2023-01-22 11:25:16.177357: step: 806/466, loss: 0.38020768761634827 2023-01-22 11:25:16.868079: step: 808/466, loss: 0.18933938443660736 2023-01-22 11:25:17.492239: step: 810/466, loss: 0.2434525489807129 2023-01-22 11:25:18.162338: step: 812/466, loss: 0.10575389862060547 2023-01-22 11:25:18.815268: step: 814/466, loss: 0.11092200130224228 2023-01-22 11:25:19.511343: step: 816/466, loss: 0.08927656710147858 2023-01-22 11:25:20.117823: step: 818/466, loss: 0.1141166090965271 2023-01-22 11:25:20.743713: step: 820/466, loss: 0.15775534510612488 2023-01-22 11:25:21.346576: step: 822/466, loss: 0.04831439256668091 2023-01-22 11:25:22.010100: step: 824/466, loss: 0.3275830149650574 2023-01-22 11:25:22.718731: step: 826/466, loss: 0.08251907676458359 2023-01-22 11:25:23.355274: step: 828/466, loss: 0.1518041044473648 2023-01-22 11:25:23.966054: step: 830/466, loss: 0.0283180084079504 2023-01-22 11:25:24.571220: step: 832/466, loss: 0.18164034187793732 2023-01-22 11:25:25.204993: step: 834/466, loss: 0.054658275097608566 2023-01-22 11:25:25.840210: step: 836/466, loss: 0.1343078762292862 2023-01-22 11:25:26.568067: step: 838/466, loss: 0.13170497119426727 2023-01-22 11:25:27.206945: step: 840/466, loss: 0.03123355470597744 2023-01-22 11:25:27.940671: step: 842/466, loss: 0.23193548619747162 2023-01-22 11:25:28.652522: step: 844/466, loss: 0.6250280141830444 2023-01-22 11:25:29.241256: step: 846/466, loss: 0.1379401683807373 2023-01-22 11:25:29.925044: step: 848/466, loss: 0.42695051431655884 2023-01-22 11:25:30.542082: step: 850/466, loss: 0.10238040238618851 2023-01-22 11:25:31.155357: step: 852/466, loss: 0.11693931370973587 2023-01-22 11:25:31.808948: step: 854/466, loss: 0.19939444959163666 2023-01-22 11:25:32.462920: step: 856/466, loss: 0.10059074312448502 2023-01-22 11:25:33.101771: step: 858/466, loss: 0.05090232193470001 2023-01-22 11:25:33.733183: step: 860/466, loss: 0.15322493016719818 2023-01-22 11:25:34.374266: step: 862/466, loss: 0.022669047117233276 2023-01-22 11:25:35.017675: step: 864/466, loss: 0.08482439815998077 2023-01-22 11:25:35.707888: step: 866/466, loss: 0.12503689527511597 2023-01-22 11:25:36.425476: step: 868/466, loss: 0.020080553367733955 2023-01-22 11:25:37.007694: step: 870/466, loss: 0.1167760118842125 2023-01-22 11:25:37.647468: step: 872/466, loss: 0.05984390527009964 2023-01-22 11:25:38.299747: step: 874/466, loss: 0.0961577519774437 2023-01-22 11:25:38.905701: step: 876/466, loss: 0.51129549741745 2023-01-22 11:25:39.633103: step: 878/466, loss: 0.15473543107509613 2023-01-22 11:25:40.281339: step: 880/466, loss: 0.19731540977954865 2023-01-22 11:25:40.989429: step: 882/466, loss: 0.11906936019659042 2023-01-22 11:25:41.663750: step: 884/466, loss: 0.08546533435583115 2023-01-22 11:25:42.295234: step: 886/466, loss: 0.17211639881134033 2023-01-22 11:25:42.954269: step: 888/466, loss: 0.1469283550977707 2023-01-22 11:25:43.598815: step: 890/466, loss: 0.07431633025407791 2023-01-22 11:25:44.277369: step: 892/466, loss: 0.1898496001958847 2023-01-22 11:25:44.952246: step: 894/466, loss: 0.20680542290210724 2023-01-22 11:25:45.552103: step: 896/466, loss: 0.04137062653899193 2023-01-22 11:25:46.166158: step: 898/466, loss: 0.17267300188541412 2023-01-22 11:25:46.801780: step: 900/466, loss: 0.12561747431755066 2023-01-22 11:25:47.423217: step: 902/466, loss: 0.29961609840393066 2023-01-22 11:25:48.066397: step: 904/466, loss: 0.538173258304596 2023-01-22 11:25:48.727239: step: 906/466, loss: 0.14885465800762177 2023-01-22 11:25:49.382243: step: 908/466, loss: 0.11128829419612885 2023-01-22 11:25:50.073570: step: 910/466, loss: 0.10712137818336487 2023-01-22 11:25:50.757958: step: 912/466, loss: 0.10897429287433624 2023-01-22 11:25:51.388388: step: 914/466, loss: 0.08950541168451309 2023-01-22 11:25:52.046830: step: 916/466, loss: 0.047900937497615814 2023-01-22 11:25:52.734558: step: 918/466, loss: 0.2691085636615753 2023-01-22 11:25:53.401384: step: 920/466, loss: 0.12888646125793457 2023-01-22 11:25:54.104756: step: 922/466, loss: 0.10967149585485458 2023-01-22 11:25:54.764702: step: 924/466, loss: 0.1109466403722763 2023-01-22 11:25:55.440422: step: 926/466, loss: 0.10536369681358337 2023-01-22 11:25:56.143272: step: 928/466, loss: 0.1543402373790741 2023-01-22 11:25:56.869762: step: 930/466, loss: 0.04843086004257202 2023-01-22 11:25:57.503570: step: 932/466, loss: 0.15260881185531616 ================================================== Loss: 0.197 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31796133445002794, 'r': 0.3601952877925364, 'f1': 0.33776319691577705}, 'combined': 0.24887814509583572, 'epoch': 15} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3567484519390349, 'r': 0.3462467659079292, 'f1': 0.3514191692650775}, 'combined': 0.23306556303590628, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3013073400127695, 'r': 0.3527640014950262, 'f1': 0.3250115887899979}, 'combined': 0.2394822233189458, 'epoch': 15} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.338029880336409, 'r': 0.32837188375536874, 'f1': 0.3331308965634176}, 'combined': 0.2209365531612303, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24691358024691357, 'r': 0.38095238095238093, 'f1': 0.299625468164794}, 'combined': 0.19975031210986266, 'epoch': 15} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3055555555555556, 'r': 0.1896551724137931, 'f1': 0.23404255319148937}, 'combined': 0.15602836879432624, 'epoch': 15} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2886258254463912, 'r': 0.3357260550258782, 'f1': 0.31039935262918916}, 'combined': 0.22871531246361304, 'epoch': 13} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.35413085903566327, 'r': 0.32071071736043616, 'f1': 0.336593256293779}, 'combined': 0.2232328331896565, 'epoch': 13} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39705882352941174, 'r': 0.23275862068965517, 'f1': 0.2934782608695652}, 'combined': 0.19565217391304346, 'epoch': 13} ****************************** Epoch: 16 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:28:51.950126: step: 2/466, loss: 0.042849015444517136 2023-01-22 11:28:52.566551: step: 4/466, loss: 0.09236946702003479 2023-01-22 11:28:53.242438: step: 6/466, loss: 0.1932559758424759 2023-01-22 11:28:53.858925: step: 8/466, loss: 0.061434224247932434 2023-01-22 11:28:54.457338: step: 10/466, loss: 0.05620969086885452 2023-01-22 11:28:55.046482: step: 12/466, loss: 0.09529339522123337 2023-01-22 11:28:55.730286: step: 14/466, loss: 0.155241459608078 2023-01-22 11:28:56.312140: step: 16/466, loss: 0.014359625056385994 2023-01-22 11:28:56.917509: step: 18/466, loss: 0.05823211371898651 2023-01-22 11:28:57.584631: step: 20/466, loss: 0.5219416618347168 2023-01-22 11:28:58.235277: step: 22/466, loss: 0.161069855093956 2023-01-22 11:28:58.917143: step: 24/466, loss: 0.06981996446847916 2023-01-22 11:28:59.577313: step: 26/466, loss: 0.05380038544535637 2023-01-22 11:29:00.232784: step: 28/466, loss: 0.0803898423910141 2023-01-22 11:29:00.879555: step: 30/466, loss: 0.14585107564926147 2023-01-22 11:29:01.563111: step: 32/466, loss: 0.0993504524230957 2023-01-22 11:29:02.262745: step: 34/466, loss: 0.015155520290136337 2023-01-22 11:29:02.955891: step: 36/466, loss: 0.06372464448213577 2023-01-22 11:29:03.605999: step: 38/466, loss: 0.05958646535873413 2023-01-22 11:29:04.263754: step: 40/466, loss: 0.9672218561172485 2023-01-22 11:29:04.954615: step: 42/466, loss: 0.13717980682849884 2023-01-22 11:29:05.577008: step: 44/466, loss: 0.12620197236537933 2023-01-22 11:29:06.277718: step: 46/466, loss: 0.0906166285276413 2023-01-22 11:29:06.927175: step: 48/466, loss: 0.09270259737968445 2023-01-22 11:29:07.565712: step: 50/466, loss: 0.2560976445674896 2023-01-22 11:29:08.354972: step: 52/466, loss: 0.09398878365755081 2023-01-22 11:29:08.994755: step: 54/466, loss: 0.040162697434425354 2023-01-22 11:29:09.667555: step: 56/466, loss: 0.05243329331278801 2023-01-22 11:29:10.310835: step: 58/466, loss: 0.05652332305908203 2023-01-22 11:29:11.006700: step: 60/466, loss: 0.026979945600032806 2023-01-22 11:29:11.699797: step: 62/466, loss: 0.13747258484363556 2023-01-22 11:29:12.381389: step: 64/466, loss: 0.11705562472343445 2023-01-22 11:29:13.025552: step: 66/466, loss: 0.1764337122440338 2023-01-22 11:29:13.695915: step: 68/466, loss: 0.18013818562030792 2023-01-22 11:29:14.363031: step: 70/466, loss: 0.08881554007530212 2023-01-22 11:29:15.020006: step: 72/466, loss: 0.12398698180913925 2023-01-22 11:29:15.674469: step: 74/466, loss: 0.14571566879749298 2023-01-22 11:29:16.325310: step: 76/466, loss: 0.19627001881599426 2023-01-22 11:29:17.008279: step: 78/466, loss: 0.041960377246141434 2023-01-22 11:29:17.669228: step: 80/466, loss: 0.1169004812836647 2023-01-22 11:29:18.431977: step: 82/466, loss: 0.16998115181922913 2023-01-22 11:29:19.068378: step: 84/466, loss: 0.49829229712486267 2023-01-22 11:29:19.677824: step: 86/466, loss: 0.06498653441667557 2023-01-22 11:29:20.410649: step: 88/466, loss: 0.07490881532430649 2023-01-22 11:29:21.076411: step: 90/466, loss: 0.26454323530197144 2023-01-22 11:29:21.741472: step: 92/466, loss: 0.05853681638836861 2023-01-22 11:29:22.370799: step: 94/466, loss: 0.16520006954669952 2023-01-22 11:29:23.007859: step: 96/466, loss: 0.020829584449529648 2023-01-22 11:29:23.635723: step: 98/466, loss: 0.1657995879650116 2023-01-22 11:29:24.309532: step: 100/466, loss: 0.09411315619945526 2023-01-22 11:29:25.014600: step: 102/466, loss: 0.1425132304430008 2023-01-22 11:29:25.742977: step: 104/466, loss: 0.13217833638191223 2023-01-22 11:29:26.320039: step: 106/466, loss: 0.5152426958084106 2023-01-22 11:29:26.948927: step: 108/466, loss: 0.20067660510540009 2023-01-22 11:29:27.630895: step: 110/466, loss: 0.6288007497787476 2023-01-22 11:29:28.265889: step: 112/466, loss: 0.10686130076646805 2023-01-22 11:29:28.899173: step: 114/466, loss: 0.08602900803089142 2023-01-22 11:29:29.600549: step: 116/466, loss: 0.30152466893196106 2023-01-22 11:29:30.227959: step: 118/466, loss: 0.020262155681848526 2023-01-22 11:29:30.905277: step: 120/466, loss: 0.5782296657562256 2023-01-22 11:29:31.554845: step: 122/466, loss: 0.02323436550796032 2023-01-22 11:29:32.215011: step: 124/466, loss: 0.0832483321428299 2023-01-22 11:29:32.833154: step: 126/466, loss: 0.08001965284347534 2023-01-22 11:29:33.537664: step: 128/466, loss: 0.0427028127014637 2023-01-22 11:29:34.244927: step: 130/466, loss: 0.2891668975353241 2023-01-22 11:29:34.891689: step: 132/466, loss: 0.1530880630016327 2023-01-22 11:29:35.506556: step: 134/466, loss: 0.027556931599974632 2023-01-22 11:29:36.088686: step: 136/466, loss: 0.04821633920073509 2023-01-22 11:29:36.766335: step: 138/466, loss: 0.07989934831857681 2023-01-22 11:29:37.460815: step: 140/466, loss: 0.11616438627243042 2023-01-22 11:29:38.059587: step: 142/466, loss: 0.10473637282848358 2023-01-22 11:29:38.776694: step: 144/466, loss: 0.19119058549404144 2023-01-22 11:29:39.391160: step: 146/466, loss: 0.08404102921485901 2023-01-22 11:29:40.049668: step: 148/466, loss: 0.23165667057037354 2023-01-22 11:29:40.685116: step: 150/466, loss: 0.2223658710718155 2023-01-22 11:29:41.360994: step: 152/466, loss: 0.06914431601762772 2023-01-22 11:29:41.991034: step: 154/466, loss: 0.20164556801319122 2023-01-22 11:29:42.684573: step: 156/466, loss: 0.24763233959674835 2023-01-22 11:29:43.372394: step: 158/466, loss: 0.16237004101276398 2023-01-22 11:29:44.020642: step: 160/466, loss: 0.0988822802901268 2023-01-22 11:29:44.717392: step: 162/466, loss: 0.46181219816207886 2023-01-22 11:29:45.354359: step: 164/466, loss: 0.11193667352199554 2023-01-22 11:29:46.023514: step: 166/466, loss: 0.09703291952610016 2023-01-22 11:29:46.688036: step: 168/466, loss: 0.07440285384654999 2023-01-22 11:29:47.258634: step: 170/466, loss: 0.08722874522209167 2023-01-22 11:29:47.954896: step: 172/466, loss: 0.21509423851966858 2023-01-22 11:29:48.667595: step: 174/466, loss: 0.17768056690692902 2023-01-22 11:29:49.367793: step: 176/466, loss: 0.08026005327701569 2023-01-22 11:29:50.032989: step: 178/466, loss: 0.09798018634319305 2023-01-22 11:29:50.697140: step: 180/466, loss: 0.0405559204518795 2023-01-22 11:29:51.360370: step: 182/466, loss: 0.06418140977621078 2023-01-22 11:29:52.060009: step: 184/466, loss: 0.045770592987537384 2023-01-22 11:29:52.756130: step: 186/466, loss: 0.21991638839244843 2023-01-22 11:29:53.394315: step: 188/466, loss: 0.0976586565375328 2023-01-22 11:29:54.064804: step: 190/466, loss: 0.03503083065152168 2023-01-22 11:29:54.663099: step: 192/466, loss: 0.11509787291288376 2023-01-22 11:29:55.361995: step: 194/466, loss: 0.29007306694984436 2023-01-22 11:29:56.001570: step: 196/466, loss: 0.030970891937613487 2023-01-22 11:29:56.722206: step: 198/466, loss: 0.0502634197473526 2023-01-22 11:29:57.314082: step: 200/466, loss: 0.04383686184883118 2023-01-22 11:29:57.936052: step: 202/466, loss: 0.05791878700256348 2023-01-22 11:29:58.743574: step: 204/466, loss: 0.08554225414991379 2023-01-22 11:29:59.373529: step: 206/466, loss: 0.037981659173965454 2023-01-22 11:30:00.004243: step: 208/466, loss: 0.08121279627084732 2023-01-22 11:30:00.696800: step: 210/466, loss: 0.1800466924905777 2023-01-22 11:30:01.323636: step: 212/466, loss: 0.12716861069202423 2023-01-22 11:30:02.012785: step: 214/466, loss: 0.13399292528629303 2023-01-22 11:30:02.704203: step: 216/466, loss: 0.08899815380573273 2023-01-22 11:30:03.321862: step: 218/466, loss: 0.358853816986084 2023-01-22 11:30:03.982771: step: 220/466, loss: 0.09514690935611725 2023-01-22 11:30:04.655919: step: 222/466, loss: 0.05393754318356514 2023-01-22 11:30:05.318332: step: 224/466, loss: 1.1265686750411987 2023-01-22 11:30:06.053465: step: 226/466, loss: 0.05097786709666252 2023-01-22 11:30:06.675279: step: 228/466, loss: 0.10566449910402298 2023-01-22 11:30:07.360326: step: 230/466, loss: 0.05373445525765419 2023-01-22 11:30:07.993859: step: 232/466, loss: 0.022259365767240524 2023-01-22 11:30:08.641699: step: 234/466, loss: 0.0640915185213089 2023-01-22 11:30:09.384038: step: 236/466, loss: 0.1120535135269165 2023-01-22 11:30:10.092765: step: 238/466, loss: 0.07983346283435822 2023-01-22 11:30:10.747331: step: 240/466, loss: 0.17747534811496735 2023-01-22 11:30:11.413178: step: 242/466, loss: 0.20360440015792847 2023-01-22 11:30:12.059069: step: 244/466, loss: 0.1407889872789383 2023-01-22 11:30:12.705853: step: 246/466, loss: 0.11615476757287979 2023-01-22 11:30:13.370196: step: 248/466, loss: 0.1729411482810974 2023-01-22 11:30:14.034040: step: 250/466, loss: 0.05504698306322098 2023-01-22 11:30:14.715322: step: 252/466, loss: 0.1191413551568985 2023-01-22 11:30:15.435061: step: 254/466, loss: 0.04059406742453575 2023-01-22 11:30:16.106650: step: 256/466, loss: 0.03980935364961624 2023-01-22 11:30:16.759061: step: 258/466, loss: 0.10433091968297958 2023-01-22 11:30:17.360675: step: 260/466, loss: 0.06259889155626297 2023-01-22 11:30:18.061667: step: 262/466, loss: 0.09888036549091339 2023-01-22 11:30:18.687117: step: 264/466, loss: 0.03456645458936691 2023-01-22 11:30:19.345063: step: 266/466, loss: 0.18702808022499084 2023-01-22 11:30:20.025203: step: 268/466, loss: 0.0509725883603096 2023-01-22 11:30:20.728378: step: 270/466, loss: 0.17532788217067719 2023-01-22 11:30:21.417316: step: 272/466, loss: 0.11292461305856705 2023-01-22 11:30:22.053636: step: 274/466, loss: 1.1103038787841797 2023-01-22 11:30:22.716967: step: 276/466, loss: 0.10820992290973663 2023-01-22 11:30:23.442800: step: 278/466, loss: 0.07610810548067093 2023-01-22 11:30:24.070726: step: 280/466, loss: 0.08254982531070709 2023-01-22 11:30:24.678250: step: 282/466, loss: 0.10432609170675278 2023-01-22 11:30:25.319617: step: 284/466, loss: 0.04582104831933975 2023-01-22 11:30:25.951349: step: 286/466, loss: 0.03947072848677635 2023-01-22 11:30:26.575627: step: 288/466, loss: 0.0196113009005785 2023-01-22 11:30:27.325097: step: 290/466, loss: 0.0797518640756607 2023-01-22 11:30:27.954980: step: 292/466, loss: 0.08071209490299225 2023-01-22 11:30:28.592237: step: 294/466, loss: 0.066269151866436 2023-01-22 11:30:29.265425: step: 296/466, loss: 0.20743513107299805 2023-01-22 11:30:29.951263: step: 298/466, loss: 0.24499349296092987 2023-01-22 11:30:30.622625: step: 300/466, loss: 0.05682981014251709 2023-01-22 11:30:31.278370: step: 302/466, loss: 0.08268841356039047 2023-01-22 11:30:32.085902: step: 304/466, loss: 0.06325706094503403 2023-01-22 11:30:32.799101: step: 306/466, loss: 0.04648834466934204 2023-01-22 11:30:33.467096: step: 308/466, loss: 0.04719547927379608 2023-01-22 11:30:34.071383: step: 310/466, loss: 0.12633663415908813 2023-01-22 11:30:34.769411: step: 312/466, loss: 0.07563754171133041 2023-01-22 11:30:35.417284: step: 314/466, loss: 0.12214470654726028 2023-01-22 11:30:36.130705: step: 316/466, loss: 0.043213777244091034 2023-01-22 11:30:36.809265: step: 318/466, loss: 0.13378891348838806 2023-01-22 11:30:37.490223: step: 320/466, loss: 0.2778260111808777 2023-01-22 11:30:38.096176: step: 322/466, loss: 0.16735616326332092 2023-01-22 11:30:38.718098: step: 324/466, loss: 0.10614963620901108 2023-01-22 11:30:39.350395: step: 326/466, loss: 0.03816225752234459 2023-01-22 11:30:39.991234: step: 328/466, loss: 0.029803283512592316 2023-01-22 11:30:40.640621: step: 330/466, loss: 0.027446869760751724 2023-01-22 11:30:41.340980: step: 332/466, loss: 0.04507140442728996 2023-01-22 11:30:41.987914: step: 334/466, loss: 0.36620408296585083 2023-01-22 11:30:42.609665: step: 336/466, loss: 0.06710328906774521 2023-01-22 11:30:43.240959: step: 338/466, loss: 0.021497823297977448 2023-01-22 11:30:43.906271: step: 340/466, loss: 0.14197732508182526 2023-01-22 11:30:44.671440: step: 342/466, loss: 0.10229825973510742 2023-01-22 11:30:45.344276: step: 344/466, loss: 0.3133872449398041 2023-01-22 11:30:45.971744: step: 346/466, loss: 0.0795479416847229 2023-01-22 11:30:46.633015: step: 348/466, loss: 0.09182026982307434 2023-01-22 11:30:47.353890: step: 350/466, loss: 0.17283767461776733 2023-01-22 11:30:48.013645: step: 352/466, loss: 0.417004257440567 2023-01-22 11:30:48.766017: step: 354/466, loss: 0.24089911580085754 2023-01-22 11:30:49.418935: step: 356/466, loss: 0.07301607728004456 2023-01-22 11:30:50.174082: step: 358/466, loss: 0.07322098314762115 2023-01-22 11:30:50.890726: step: 360/466, loss: 0.13493238389492035 2023-01-22 11:30:51.560340: step: 362/466, loss: 0.029753653332591057 2023-01-22 11:30:52.231184: step: 364/466, loss: 0.11101134121417999 2023-01-22 11:30:52.887126: step: 366/466, loss: 0.1328093707561493 2023-01-22 11:30:53.520484: step: 368/466, loss: 0.06907472759485245 2023-01-22 11:30:54.175569: step: 370/466, loss: 0.07155998796224594 2023-01-22 11:30:54.791891: step: 372/466, loss: 0.03286347910761833 2023-01-22 11:30:55.404144: step: 374/466, loss: 0.030025159940123558 2023-01-22 11:30:56.003364: step: 376/466, loss: 0.057663481682538986 2023-01-22 11:30:56.664670: step: 378/466, loss: 0.053767379373311996 2023-01-22 11:30:57.283985: step: 380/466, loss: 0.016332991421222687 2023-01-22 11:30:57.943180: step: 382/466, loss: 0.13445210456848145 2023-01-22 11:30:58.604305: step: 384/466, loss: 0.038524363189935684 2023-01-22 11:30:59.237250: step: 386/466, loss: 0.17334969341754913 2023-01-22 11:30:59.919473: step: 388/466, loss: 0.1045505702495575 2023-01-22 11:31:00.670183: step: 390/466, loss: 0.15772441029548645 2023-01-22 11:31:01.365838: step: 392/466, loss: 0.10239655524492264 2023-01-22 11:31:01.997567: step: 394/466, loss: 0.07589149475097656 2023-01-22 11:31:02.748172: step: 396/466, loss: 0.16218271851539612 2023-01-22 11:31:03.332979: step: 398/466, loss: 0.08361567556858063 2023-01-22 11:31:04.004648: step: 400/466, loss: 0.08070149272680283 2023-01-22 11:31:04.692936: step: 402/466, loss: 0.12400388717651367 2023-01-22 11:31:05.439844: step: 404/466, loss: 0.49983540177345276 2023-01-22 11:31:06.139574: step: 406/466, loss: 0.21592941880226135 2023-01-22 11:31:06.816526: step: 408/466, loss: 0.29184338450431824 2023-01-22 11:31:07.436692: step: 410/466, loss: 0.10343955457210541 2023-01-22 11:31:08.038065: step: 412/466, loss: 0.06982731074094772 2023-01-22 11:31:08.688045: step: 414/466, loss: 0.04141250625252724 2023-01-22 11:31:09.385316: step: 416/466, loss: 0.5886058807373047 2023-01-22 11:31:10.049032: step: 418/466, loss: 0.09144656360149384 2023-01-22 11:31:10.741765: step: 420/466, loss: 0.0958983525633812 2023-01-22 11:31:11.411938: step: 422/466, loss: 0.059212010353803635 2023-01-22 11:31:12.099295: step: 424/466, loss: 0.04879840463399887 2023-01-22 11:31:12.712561: step: 426/466, loss: 0.08247572183609009 2023-01-22 11:31:13.350547: step: 428/466, loss: 0.09836183488368988 2023-01-22 11:31:14.001504: step: 430/466, loss: 0.09267791360616684 2023-01-22 11:31:14.721927: step: 432/466, loss: 0.0319414921104908 2023-01-22 11:31:15.401533: step: 434/466, loss: 0.07540285587310791 2023-01-22 11:31:16.081856: step: 436/466, loss: 0.035845641046762466 2023-01-22 11:31:16.730905: step: 438/466, loss: 0.14549075067043304 2023-01-22 11:31:17.297357: step: 440/466, loss: 0.08438480645418167 2023-01-22 11:31:18.034271: step: 442/466, loss: 0.4051809012889862 2023-01-22 11:31:18.715672: step: 444/466, loss: 0.2547866702079773 2023-01-22 11:31:19.353970: step: 446/466, loss: 0.06865016371011734 2023-01-22 11:31:19.982348: step: 448/466, loss: 0.0883961096405983 2023-01-22 11:31:20.636019: step: 450/466, loss: 0.266781747341156 2023-01-22 11:31:21.342695: step: 452/466, loss: 0.14414082467556 2023-01-22 11:31:21.989219: step: 454/466, loss: 0.5005567073822021 2023-01-22 11:31:22.649992: step: 456/466, loss: 0.07571188360452652 2023-01-22 11:31:23.338135: step: 458/466, loss: 0.04615286737680435 2023-01-22 11:31:23.969441: step: 460/466, loss: 0.17617224156856537 2023-01-22 11:31:24.582539: step: 462/466, loss: 0.10700923204421997 2023-01-22 11:31:25.259210: step: 464/466, loss: 0.3710244297981262 2023-01-22 11:31:25.902029: step: 466/466, loss: 0.1079602986574173 2023-01-22 11:31:26.552813: step: 468/466, loss: 0.12475752085447311 2023-01-22 11:31:27.131223: step: 470/466, loss: 0.0040720487013459206 2023-01-22 11:31:27.786225: step: 472/466, loss: 0.08998247236013412 2023-01-22 11:31:28.408387: step: 474/466, loss: 0.10712777078151703 2023-01-22 11:31:29.096365: step: 476/466, loss: 0.14514942467212677 2023-01-22 11:31:29.815226: step: 478/466, loss: 0.1995212584733963 2023-01-22 11:31:30.571281: step: 480/466, loss: 0.1564503163099289 2023-01-22 11:31:31.303150: step: 482/466, loss: 0.2669309675693512 2023-01-22 11:31:31.920317: step: 484/466, loss: 0.019181542098522186 2023-01-22 11:31:32.633727: step: 486/466, loss: 0.019846079871058464 2023-01-22 11:31:33.333313: step: 488/466, loss: 0.35277754068374634 2023-01-22 11:31:33.938169: step: 490/466, loss: 0.04823637753725052 2023-01-22 11:31:34.622274: step: 492/466, loss: 0.027920136228203773 2023-01-22 11:31:35.277130: step: 494/466, loss: 0.03421937674283981 2023-01-22 11:31:35.971021: step: 496/466, loss: 0.08403817564249039 2023-01-22 11:31:36.681544: step: 498/466, loss: 0.08438766747713089 2023-01-22 11:31:37.445456: step: 500/466, loss: 0.10292106866836548 2023-01-22 11:31:38.146627: step: 502/466, loss: 0.10465852916240692 2023-01-22 11:31:38.797035: step: 504/466, loss: 0.13783647119998932 2023-01-22 11:31:39.500802: step: 506/466, loss: 0.04002920910716057 2023-01-22 11:31:40.180876: step: 508/466, loss: 0.38707074522972107 2023-01-22 11:31:40.746680: step: 510/466, loss: 0.07652793079614639 2023-01-22 11:31:41.521855: step: 512/466, loss: 0.020424529910087585 2023-01-22 11:31:42.281508: step: 514/466, loss: 0.17076388001441956 2023-01-22 11:31:42.937062: step: 516/466, loss: 0.049553290009498596 2023-01-22 11:31:43.538113: step: 518/466, loss: 0.19098210334777832 2023-01-22 11:31:44.151301: step: 520/466, loss: 0.1638951152563095 2023-01-22 11:31:44.838260: step: 522/466, loss: 0.026427242904901505 2023-01-22 11:31:45.481851: step: 524/466, loss: 0.08538713306188583 2023-01-22 11:31:46.128356: step: 526/466, loss: 0.0711318626999855 2023-01-22 11:31:46.829862: step: 528/466, loss: 0.1881960779428482 2023-01-22 11:31:47.503242: step: 530/466, loss: 0.035001594573259354 2023-01-22 11:31:48.203755: step: 532/466, loss: 0.17813636362552643 2023-01-22 11:31:48.851769: step: 534/466, loss: 0.9085713028907776 2023-01-22 11:31:49.516460: step: 536/466, loss: 0.1646883636713028 2023-01-22 11:31:50.126913: step: 538/466, loss: 0.09630478918552399 2023-01-22 11:31:50.792057: step: 540/466, loss: 0.09880103170871735 2023-01-22 11:31:51.484029: step: 542/466, loss: 0.05040838569402695 2023-01-22 11:31:52.174911: step: 544/466, loss: 0.09186426550149918 2023-01-22 11:31:52.769677: step: 546/466, loss: 0.35977739095687866 2023-01-22 11:31:53.449641: step: 548/466, loss: 0.07499703764915466 2023-01-22 11:31:54.128779: step: 550/466, loss: 0.1100747287273407 2023-01-22 11:31:54.834443: step: 552/466, loss: 0.3778783977031708 2023-01-22 11:31:55.528291: step: 554/466, loss: 0.07648572325706482 2023-01-22 11:31:56.221401: step: 556/466, loss: 0.06567910313606262 2023-01-22 11:31:56.891341: step: 558/466, loss: 0.0745609775185585 2023-01-22 11:31:57.593906: step: 560/466, loss: 0.12958188354969025 2023-01-22 11:31:58.234130: step: 562/466, loss: 0.032594673335552216 2023-01-22 11:31:58.836232: step: 564/466, loss: 0.06931976974010468 2023-01-22 11:31:59.563952: step: 566/466, loss: 0.18030980229377747 2023-01-22 11:32:00.235980: step: 568/466, loss: 0.060430657118558884 2023-01-22 11:32:00.899964: step: 570/466, loss: 0.08576898276805878 2023-01-22 11:32:01.631851: step: 572/466, loss: 0.07026806473731995 2023-01-22 11:32:02.364215: step: 574/466, loss: 0.12407351285219193 2023-01-22 11:32:03.008855: step: 576/466, loss: 0.1357271671295166 2023-01-22 11:32:03.704324: step: 578/466, loss: 0.0655779093503952 2023-01-22 11:32:04.383306: step: 580/466, loss: 0.05485844612121582 2023-01-22 11:32:05.099253: step: 582/466, loss: 0.14106249809265137 2023-01-22 11:32:05.695934: step: 584/466, loss: 0.34478509426116943 2023-01-22 11:32:06.349921: step: 586/466, loss: 0.15412241220474243 2023-01-22 11:32:06.940960: step: 588/466, loss: 0.04285565763711929 2023-01-22 11:32:07.602756: step: 590/466, loss: 0.054340530186891556 2023-01-22 11:32:08.261173: step: 592/466, loss: 0.17163047194480896 2023-01-22 11:32:08.883631: step: 594/466, loss: 0.1610175222158432 2023-01-22 11:32:09.557870: step: 596/466, loss: 0.10305967926979065 2023-01-22 11:32:10.149785: step: 598/466, loss: 0.10210266709327698 2023-01-22 11:32:10.789610: step: 600/466, loss: 0.02406475506722927 2023-01-22 11:32:11.394070: step: 602/466, loss: 0.17943254113197327 2023-01-22 11:32:12.104254: step: 604/466, loss: 0.06392550468444824 2023-01-22 11:32:12.779842: step: 606/466, loss: 0.0971219390630722 2023-01-22 11:32:13.491886: step: 608/466, loss: 0.026507209986448288 2023-01-22 11:32:14.126928: step: 610/466, loss: 0.12073804438114166 2023-01-22 11:32:14.799693: step: 612/466, loss: 0.038244716823101044 2023-01-22 11:32:15.443695: step: 614/466, loss: 0.047044411301612854 2023-01-22 11:32:16.084797: step: 616/466, loss: 0.06327566504478455 2023-01-22 11:32:16.811464: step: 618/466, loss: 0.053750984370708466 2023-01-22 11:32:17.429998: step: 620/466, loss: 0.045614615082740784 2023-01-22 11:32:18.053323: step: 622/466, loss: 0.05823136493563652 2023-01-22 11:32:18.745989: step: 624/466, loss: 0.3171159327030182 2023-01-22 11:32:19.437142: step: 626/466, loss: 0.11763111501932144 2023-01-22 11:32:20.143554: step: 628/466, loss: 0.07963328063488007 2023-01-22 11:32:20.797641: step: 630/466, loss: 0.10646232962608337 2023-01-22 11:32:21.510840: step: 632/466, loss: 0.07549462467432022 2023-01-22 11:32:22.182435: step: 634/466, loss: 0.043111998587846756 2023-01-22 11:32:22.769047: step: 636/466, loss: 0.27588483691215515 2023-01-22 11:32:23.465780: step: 638/466, loss: 0.18270190060138702 2023-01-22 11:32:24.112299: step: 640/466, loss: 0.10143808275461197 2023-01-22 11:32:24.787316: step: 642/466, loss: 0.08115211129188538 2023-01-22 11:32:25.475021: step: 644/466, loss: 0.09656275063753128 2023-01-22 11:32:26.109276: step: 646/466, loss: 0.027801234275102615 2023-01-22 11:32:26.743877: step: 648/466, loss: 0.045937929302453995 2023-01-22 11:32:27.409676: step: 650/466, loss: 0.10616622865200043 2023-01-22 11:32:28.121794: step: 652/466, loss: 0.11118504405021667 2023-01-22 11:32:28.782901: step: 654/466, loss: 0.20897029340267181 2023-01-22 11:32:29.434671: step: 656/466, loss: 0.061517294496297836 2023-01-22 11:32:30.282041: step: 658/466, loss: 0.13016831874847412 2023-01-22 11:32:30.842823: step: 660/466, loss: 0.06911101192235947 2023-01-22 11:32:31.431180: step: 662/466, loss: 0.07121779024600983 2023-01-22 11:32:32.021511: step: 664/466, loss: 0.033248383551836014 2023-01-22 11:32:32.699138: step: 666/466, loss: 0.22419223189353943 2023-01-22 11:32:33.320831: step: 668/466, loss: 0.08588635176420212 2023-01-22 11:32:33.898895: step: 670/466, loss: 0.14215855300426483 2023-01-22 11:32:34.593823: step: 672/466, loss: 0.021613050252199173 2023-01-22 11:32:35.218535: step: 674/466, loss: 0.10710838437080383 2023-01-22 11:32:35.837649: step: 676/466, loss: 0.1299968957901001 2023-01-22 11:32:36.516305: step: 678/466, loss: 0.16836518049240112 2023-01-22 11:32:37.162492: step: 680/466, loss: 0.6163982152938843 2023-01-22 11:32:37.824887: step: 682/466, loss: 0.838560938835144 2023-01-22 11:32:38.492419: step: 684/466, loss: 0.3202309310436249 2023-01-22 11:32:39.132074: step: 686/466, loss: 0.13837195932865143 2023-01-22 11:32:39.769869: step: 688/466, loss: 0.07165122032165527 2023-01-22 11:32:40.436353: step: 690/466, loss: 0.059542424976825714 2023-01-22 11:32:41.055383: step: 692/466, loss: 0.0504116453230381 2023-01-22 11:32:41.703417: step: 694/466, loss: 0.046665359288454056 2023-01-22 11:32:42.403572: step: 696/466, loss: 0.3874886929988861 2023-01-22 11:32:43.091007: step: 698/466, loss: 0.131962850689888 2023-01-22 11:32:43.794037: step: 700/466, loss: 0.13335564732551575 2023-01-22 11:32:44.458740: step: 702/466, loss: 0.08524461090564728 2023-01-22 11:32:45.138782: step: 704/466, loss: 0.32217660546302795 2023-01-22 11:32:45.766364: step: 706/466, loss: 0.1130739077925682 2023-01-22 11:32:46.449351: step: 708/466, loss: 0.4742449223995209 2023-01-22 11:32:47.094867: step: 710/466, loss: 0.1473226696252823 2023-01-22 11:32:47.731049: step: 712/466, loss: 0.4161403775215149 2023-01-22 11:32:48.395324: step: 714/466, loss: 0.11619622260332108 2023-01-22 11:32:49.031657: step: 716/466, loss: 0.1965903788805008 2023-01-22 11:32:49.681815: step: 718/466, loss: 0.07091796398162842 2023-01-22 11:32:50.327858: step: 720/466, loss: 0.12858054041862488 2023-01-22 11:32:51.054711: step: 722/466, loss: 0.07139798253774643 2023-01-22 11:32:51.731550: step: 724/466, loss: 0.08102741092443466 2023-01-22 11:32:52.387706: step: 726/466, loss: 0.039550743997097015 2023-01-22 11:32:53.052588: step: 728/466, loss: 0.16001680493354797 2023-01-22 11:32:53.687349: step: 730/466, loss: 0.06432478129863739 2023-01-22 11:32:54.389552: step: 732/466, loss: 0.051167555153369904 2023-01-22 11:32:55.013106: step: 734/466, loss: 0.16745160520076752 2023-01-22 11:32:55.639132: step: 736/466, loss: 0.07915138453245163 2023-01-22 11:32:56.245215: step: 738/466, loss: 0.1392790526151657 2023-01-22 11:32:56.906511: step: 740/466, loss: 0.16193625330924988 2023-01-22 11:32:57.615734: step: 742/466, loss: 0.09272979944944382 2023-01-22 11:32:58.234949: step: 744/466, loss: 0.09982388466596603 2023-01-22 11:32:58.927362: step: 746/466, loss: 0.23354339599609375 2023-01-22 11:32:59.607931: step: 748/466, loss: 0.4393465518951416 2023-01-22 11:33:00.283909: step: 750/466, loss: 0.09297937154769897 2023-01-22 11:33:00.974654: step: 752/466, loss: 0.17270395159721375 2023-01-22 11:33:01.601354: step: 754/466, loss: 0.015132890082895756 2023-01-22 11:33:02.286875: step: 756/466, loss: 0.07725682109594345 2023-01-22 11:33:02.916698: step: 758/466, loss: 0.2666139602661133 2023-01-22 11:33:03.546939: step: 760/466, loss: 0.3584570288658142 2023-01-22 11:33:04.209144: step: 762/466, loss: 0.05754183977842331 2023-01-22 11:33:04.855210: step: 764/466, loss: 0.27439069747924805 2023-01-22 11:33:05.497350: step: 766/466, loss: 0.02167350985109806 2023-01-22 11:33:06.125634: step: 768/466, loss: 0.05525706335902214 2023-01-22 11:33:06.839260: step: 770/466, loss: 0.1161939725279808 2023-01-22 11:33:07.510291: step: 772/466, loss: 0.11963807046413422 2023-01-22 11:33:08.150711: step: 774/466, loss: 0.07343064248561859 2023-01-22 11:33:08.796043: step: 776/466, loss: 0.2884886562824249 2023-01-22 11:33:09.491914: step: 778/466, loss: 0.07670705765485764 2023-01-22 11:33:10.162366: step: 780/466, loss: 0.1908833086490631 2023-01-22 11:33:10.841885: step: 782/466, loss: 0.08754006773233414 2023-01-22 11:33:11.493828: step: 784/466, loss: 0.0543396957218647 2023-01-22 11:33:12.217404: step: 786/466, loss: 0.04662584885954857 2023-01-22 11:33:12.895817: step: 788/466, loss: 0.06784288585186005 2023-01-22 11:33:13.605549: step: 790/466, loss: 0.0806589275598526 2023-01-22 11:33:14.267791: step: 792/466, loss: 0.1591053009033203 2023-01-22 11:33:14.917689: step: 794/466, loss: 0.1348680704832077 2023-01-22 11:33:15.539551: step: 796/466, loss: 0.03892558068037033 2023-01-22 11:33:16.162181: step: 798/466, loss: 0.09071938693523407 2023-01-22 11:33:16.826284: step: 800/466, loss: 0.07300712913274765 2023-01-22 11:33:17.508475: step: 802/466, loss: 0.2393036037683487 2023-01-22 11:33:18.205527: step: 804/466, loss: 0.20589615404605865 2023-01-22 11:33:18.834406: step: 806/466, loss: 0.35018736124038696 2023-01-22 11:33:19.538739: step: 808/466, loss: 0.27487683296203613 2023-01-22 11:33:20.219512: step: 810/466, loss: 0.08876249939203262 2023-01-22 11:33:20.867106: step: 812/466, loss: 0.12461288273334503 2023-01-22 11:33:21.449140: step: 814/466, loss: 0.02857905998826027 2023-01-22 11:33:22.104067: step: 816/466, loss: 0.14356930553913116 2023-01-22 11:33:22.746957: step: 818/466, loss: 0.09146396070718765 2023-01-22 11:33:23.369270: step: 820/466, loss: 0.8136057257652283 2023-01-22 11:33:24.028277: step: 822/466, loss: 0.18825575709342957 2023-01-22 11:33:24.682164: step: 824/466, loss: 0.13904358446598053 2023-01-22 11:33:25.293533: step: 826/466, loss: 0.03442108631134033 2023-01-22 11:33:25.915702: step: 828/466, loss: 0.03802469000220299 2023-01-22 11:33:26.527234: step: 830/466, loss: 0.07993285357952118 2023-01-22 11:33:27.207913: step: 832/466, loss: 0.021052174270153046 2023-01-22 11:33:27.885277: step: 834/466, loss: 0.03310456871986389 2023-01-22 11:33:28.514771: step: 836/466, loss: 0.08458700776100159 2023-01-22 11:33:29.240468: step: 838/466, loss: 0.44769522547721863 2023-01-22 11:33:29.907907: step: 840/466, loss: 0.0225103460252285 2023-01-22 11:33:30.549703: step: 842/466, loss: 0.19166380167007446 2023-01-22 11:33:31.165900: step: 844/466, loss: 0.028776248916983604 2023-01-22 11:33:31.883341: step: 846/466, loss: 0.07082303613424301 2023-01-22 11:33:32.520389: step: 848/466, loss: 0.048280857503414154 2023-01-22 11:33:33.110223: step: 850/466, loss: 0.15884102880954742 2023-01-22 11:33:33.692183: step: 852/466, loss: 0.16965623199939728 2023-01-22 11:33:34.355565: step: 854/466, loss: 0.045534998178482056 2023-01-22 11:33:34.992064: step: 856/466, loss: 0.03119780868291855 2023-01-22 11:33:35.611667: step: 858/466, loss: 0.12831765413284302 2023-01-22 11:33:36.235870: step: 860/466, loss: 0.06411344558000565 2023-01-22 11:33:36.856642: step: 862/466, loss: 0.09839695692062378 2023-01-22 11:33:37.511128: step: 864/466, loss: 0.03153237700462341 2023-01-22 11:33:38.157497: step: 866/466, loss: 0.2929447889328003 2023-01-22 11:33:38.807637: step: 868/466, loss: 0.21719765663146973 2023-01-22 11:33:39.502922: step: 870/466, loss: 0.15172508358955383 2023-01-22 11:33:40.106356: step: 872/466, loss: 0.08611549437046051 2023-01-22 11:33:40.790478: step: 874/466, loss: 0.11833415180444717 2023-01-22 11:33:41.445486: step: 876/466, loss: 0.3285122513771057 2023-01-22 11:33:42.122876: step: 878/466, loss: 0.09364908188581467 2023-01-22 11:33:42.772661: step: 880/466, loss: 0.01854356937110424 2023-01-22 11:33:43.470307: step: 882/466, loss: 0.19637906551361084 2023-01-22 11:33:44.161281: step: 884/466, loss: 0.1268959641456604 2023-01-22 11:33:44.815633: step: 886/466, loss: 0.08831460028886795 2023-01-22 11:33:45.408709: step: 888/466, loss: 0.01859605871140957 2023-01-22 11:33:46.127122: step: 890/466, loss: 0.05948847532272339 2023-01-22 11:33:46.770347: step: 892/466, loss: 0.03629082813858986 2023-01-22 11:33:47.412419: step: 894/466, loss: 0.06120298430323601 2023-01-22 11:33:48.071659: step: 896/466, loss: 0.04455656558275223 2023-01-22 11:33:48.732294: step: 898/466, loss: 0.10806294530630112 2023-01-22 11:33:49.315125: step: 900/466, loss: 0.18784120678901672 2023-01-22 11:33:49.963963: step: 902/466, loss: 0.018527891486883163 2023-01-22 11:33:50.608003: step: 904/466, loss: 0.026689549908041954 2023-01-22 11:33:51.221253: step: 906/466, loss: 0.10876591503620148 2023-01-22 11:33:51.888055: step: 908/466, loss: 0.1081932783126831 2023-01-22 11:33:52.569121: step: 910/466, loss: 0.0986640676856041 2023-01-22 11:33:53.210331: step: 912/466, loss: 0.015939170494675636 2023-01-22 11:33:53.835137: step: 914/466, loss: 0.07669097930192947 2023-01-22 11:33:54.441029: step: 916/466, loss: 0.05065422132611275 2023-01-22 11:33:55.123324: step: 918/466, loss: 0.11322743445634842 2023-01-22 11:33:55.838379: step: 920/466, loss: 0.05013665556907654 2023-01-22 11:33:56.533203: step: 922/466, loss: 0.040548644959926605 2023-01-22 11:33:57.146441: step: 924/466, loss: 0.16020895540714264 2023-01-22 11:33:57.827880: step: 926/466, loss: 0.18104447424411774 2023-01-22 11:33:58.522256: step: 928/466, loss: 0.09960732609033585 2023-01-22 11:33:59.149735: step: 930/466, loss: 0.1459933966398239 2023-01-22 11:33:59.795348: step: 932/466, loss: 0.11824838817119598 ================================================== Loss: 0.136 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28712468617373554, 'r': 0.33125580492150136, 'f1': 0.30761552280816074}, 'combined': 0.2266640694375921, 'epoch': 16} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3428321755461481, 'r': 0.3336226457004543, 'f1': 0.3381647194890859}, 'combined': 0.22427504712229526, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.28334745762711866, 'r': 0.28173752889060094, 'f1': 0.28254019991308127}, 'combined': 0.1883601332753875, 'epoch': 16} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.35900473036139513, 'r': 0.3148673522563578, 'f1': 0.335490582893075}, 'combined': 0.2189517488354805, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28644572309519495, 'r': 0.33753850861881607, 'f1': 0.30990033805246703}, 'combined': 0.22834761751234411, 'epoch': 16} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3295124397384556, 'r': 0.3186619434559068, 'f1': 0.32399637246530083}, 'combined': 0.2148784231894223, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22756410256410253, 'r': 0.33809523809523806, 'f1': 0.2720306513409961}, 'combined': 0.18135376756066404, 'epoch': 16} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.4673913043478261, 'f1': 0.4673913043478261}, 'combined': 0.3115942028985507, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3055555555555556, 'r': 0.1896551724137931, 'f1': 0.23404255319148937}, 'combined': 0.15602836879432624, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2886258254463912, 'r': 0.3357260550258782, 'f1': 0.31039935262918916}, 'combined': 0.22871531246361304, 'epoch': 13} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.35413085903566327, 'r': 0.32071071736043616, 'f1': 0.336593256293779}, 'combined': 0.2232328331896565, 'epoch': 13} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39705882352941174, 'r': 0.23275862068965517, 'f1': 0.2934782608695652}, 'combined': 0.19565217391304346, 'epoch': 13} ****************************** Epoch: 17 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:36:44.857273: step: 2/466, loss: 0.07163936644792557 2023-01-22 11:36:45.554729: step: 4/466, loss: 0.20703236758708954 2023-01-22 11:36:46.180982: step: 6/466, loss: 0.2051185816526413 2023-01-22 11:36:46.768058: step: 8/466, loss: 0.0900440439581871 2023-01-22 11:36:47.404228: step: 10/466, loss: 0.04501933604478836 2023-01-22 11:36:48.026312: step: 12/466, loss: 0.18890134990215302 2023-01-22 11:36:48.692111: step: 14/466, loss: 0.09076967090368271 2023-01-22 11:36:49.340489: step: 16/466, loss: 0.1052854135632515 2023-01-22 11:36:50.054002: step: 18/466, loss: 0.12530001997947693 2023-01-22 11:36:50.722729: step: 20/466, loss: 0.1331702023744583 2023-01-22 11:36:51.390759: step: 22/466, loss: 0.09598618745803833 2023-01-22 11:36:52.067929: step: 24/466, loss: 0.10314838588237762 2023-01-22 11:36:52.711438: step: 26/466, loss: 0.10385505855083466 2023-01-22 11:36:53.375700: step: 28/466, loss: 1.707896113395691 2023-01-22 11:36:54.095206: step: 30/466, loss: 0.18123123049736023 2023-01-22 11:36:54.763517: step: 32/466, loss: 0.15528570115566254 2023-01-22 11:36:55.410259: step: 34/466, loss: 0.07825500518083572 2023-01-22 11:36:56.019708: step: 36/466, loss: 0.08328621834516525 2023-01-22 11:36:56.757675: step: 38/466, loss: 0.06680682301521301 2023-01-22 11:36:57.424494: step: 40/466, loss: 0.11616786569356918 2023-01-22 11:36:58.087579: step: 42/466, loss: 0.3848291039466858 2023-01-22 11:36:58.727137: step: 44/466, loss: 0.08743570744991302 2023-01-22 11:36:59.308544: step: 46/466, loss: 0.06026541069149971 2023-01-22 11:36:59.952806: step: 48/466, loss: 0.051632121205329895 2023-01-22 11:37:00.649611: step: 50/466, loss: 0.6892358660697937 2023-01-22 11:37:01.279703: step: 52/466, loss: 0.0357104130089283 2023-01-22 11:37:01.952610: step: 54/466, loss: 0.016136176884174347 2023-01-22 11:37:02.672229: step: 56/466, loss: 0.03522597998380661 2023-01-22 11:37:03.332952: step: 58/466, loss: 0.028929902240633965 2023-01-22 11:37:04.042553: step: 60/466, loss: 0.08128506690263748 2023-01-22 11:37:04.829729: step: 62/466, loss: 0.30395564436912537 2023-01-22 11:37:05.526534: step: 64/466, loss: 0.04575144499540329 2023-01-22 11:37:06.145281: step: 66/466, loss: 0.054458633065223694 2023-01-22 11:37:06.859206: step: 68/466, loss: 0.08864127844572067 2023-01-22 11:37:07.459458: step: 70/466, loss: 1.7842373847961426 2023-01-22 11:37:08.126186: step: 72/466, loss: 0.12249581515789032 2023-01-22 11:37:08.849439: step: 74/466, loss: 0.05205175280570984 2023-01-22 11:37:09.474472: step: 76/466, loss: 0.09778035432100296 2023-01-22 11:37:10.129634: step: 78/466, loss: 0.15009304881095886 2023-01-22 11:37:10.758619: step: 80/466, loss: 0.015185577794909477 2023-01-22 11:37:11.427385: step: 82/466, loss: 0.10033207386732101 2023-01-22 11:37:12.163645: step: 84/466, loss: 0.07721716165542603 2023-01-22 11:37:12.874048: step: 86/466, loss: 0.061948612332344055 2023-01-22 11:37:13.528647: step: 88/466, loss: 0.2933771312236786 2023-01-22 11:37:14.224438: step: 90/466, loss: 0.17141754925251007 2023-01-22 11:37:14.910556: step: 92/466, loss: 0.10136251151561737 2023-01-22 11:37:15.578272: step: 94/466, loss: 0.18460702896118164 2023-01-22 11:37:16.251666: step: 96/466, loss: 0.012896597385406494 2023-01-22 11:37:16.900225: step: 98/466, loss: 0.175594300031662 2023-01-22 11:37:17.593677: step: 100/466, loss: 0.1161227747797966 2023-01-22 11:37:18.262652: step: 102/466, loss: 0.08087904751300812 2023-01-22 11:37:18.925224: step: 104/466, loss: 0.3065102994441986 2023-01-22 11:37:19.677834: step: 106/466, loss: 0.22393816709518433 2023-01-22 11:37:20.308303: step: 108/466, loss: 0.2180919647216797 2023-01-22 11:37:20.992563: step: 110/466, loss: 0.11499159038066864 2023-01-22 11:37:21.700533: step: 112/466, loss: 0.19297310709953308 2023-01-22 11:37:22.413326: step: 114/466, loss: 0.09911413490772247 2023-01-22 11:37:23.048117: step: 116/466, loss: 0.32927078008651733 2023-01-22 11:37:23.646391: step: 118/466, loss: 0.06469939649105072 2023-01-22 11:37:24.277137: step: 120/466, loss: 0.044414736330509186 2023-01-22 11:37:24.915197: step: 122/466, loss: 0.046586912125349045 2023-01-22 11:37:25.550329: step: 124/466, loss: 0.06715916842222214 2023-01-22 11:37:26.193242: step: 126/466, loss: 0.15492646396160126 2023-01-22 11:37:26.919190: step: 128/466, loss: 0.04904414340853691 2023-01-22 11:37:27.555304: step: 130/466, loss: 0.13736887276172638 2023-01-22 11:37:28.169661: step: 132/466, loss: 0.06990758329629898 2023-01-22 11:37:28.771646: step: 134/466, loss: 0.07760658860206604 2023-01-22 11:37:29.429951: step: 136/466, loss: 0.029811248183250427 2023-01-22 11:37:30.070034: step: 138/466, loss: 0.06430856883525848 2023-01-22 11:37:30.693452: step: 140/466, loss: 0.15281669795513153 2023-01-22 11:37:31.295781: step: 142/466, loss: 0.06193069368600845 2023-01-22 11:37:31.977775: step: 144/466, loss: 0.05268199369311333 2023-01-22 11:37:32.679747: step: 146/466, loss: 0.13049347698688507 2023-01-22 11:37:33.390300: step: 148/466, loss: 0.07088293880224228 2023-01-22 11:37:33.946935: step: 150/466, loss: 0.016721755266189575 2023-01-22 11:37:34.552010: step: 152/466, loss: 0.061871930956840515 2023-01-22 11:37:35.193002: step: 154/466, loss: 0.05041418969631195 2023-01-22 11:37:35.817053: step: 156/466, loss: 0.06532739847898483 2023-01-22 11:37:36.418867: step: 158/466, loss: 0.03481756150722504 2023-01-22 11:37:37.057826: step: 160/466, loss: 0.08428133279085159 2023-01-22 11:37:37.645967: step: 162/466, loss: 0.041844822466373444 2023-01-22 11:37:38.328235: step: 164/466, loss: 0.1911388486623764 2023-01-22 11:37:38.951282: step: 166/466, loss: 0.11913547664880753 2023-01-22 11:37:39.558160: step: 168/466, loss: 0.046030666679143906 2023-01-22 11:37:40.212006: step: 170/466, loss: 0.0535283088684082 2023-01-22 11:37:40.879736: step: 172/466, loss: 0.04303639754652977 2023-01-22 11:37:41.526601: step: 174/466, loss: 0.0598825141787529 2023-01-22 11:37:42.277310: step: 176/466, loss: 0.9581116437911987 2023-01-22 11:37:42.999592: step: 178/466, loss: 0.09876693040132523 2023-01-22 11:37:43.753331: step: 180/466, loss: 1.1125866174697876 2023-01-22 11:37:44.324385: step: 182/466, loss: 0.03268519416451454 2023-01-22 11:37:44.975725: step: 184/466, loss: 0.10946287959814072 2023-01-22 11:37:45.613061: step: 186/466, loss: 0.07927299290895462 2023-01-22 11:37:46.222609: step: 188/466, loss: 0.11638938635587692 2023-01-22 11:37:46.837289: step: 190/466, loss: 2.2971489429473877 2023-01-22 11:37:47.432573: step: 192/466, loss: 0.1603151112794876 2023-01-22 11:37:48.089452: step: 194/466, loss: 0.0545412041246891 2023-01-22 11:37:48.703879: step: 196/466, loss: 0.033792734146118164 2023-01-22 11:37:49.326620: step: 198/466, loss: 0.03249639272689819 2023-01-22 11:37:50.105535: step: 200/466, loss: 0.30851632356643677 2023-01-22 11:37:50.749020: step: 202/466, loss: 0.048087891191244125 2023-01-22 11:37:51.413942: step: 204/466, loss: 0.031088771298527718 2023-01-22 11:37:52.104082: step: 206/466, loss: 0.08168289065361023 2023-01-22 11:37:52.699726: step: 208/466, loss: 0.06491156667470932 2023-01-22 11:37:53.308152: step: 210/466, loss: 0.07623785734176636 2023-01-22 11:37:53.913058: step: 212/466, loss: 0.07565370202064514 2023-01-22 11:37:54.616784: step: 214/466, loss: 0.2032523900270462 2023-01-22 11:37:55.264751: step: 216/466, loss: 0.09473734349012375 2023-01-22 11:37:55.951869: step: 218/466, loss: 0.4610787332057953 2023-01-22 11:37:56.577449: step: 220/466, loss: 0.00763842323794961 2023-01-22 11:37:57.287764: step: 222/466, loss: 0.32014867663383484 2023-01-22 11:37:57.936627: step: 224/466, loss: 0.07198350131511688 2023-01-22 11:37:58.619791: step: 226/466, loss: 1.0742568969726562 2023-01-22 11:37:59.176663: step: 228/466, loss: 0.05047997459769249 2023-01-22 11:37:59.814808: step: 230/466, loss: 0.04707179218530655 2023-01-22 11:38:00.542699: step: 232/466, loss: 0.1336919367313385 2023-01-22 11:38:01.246464: step: 234/466, loss: 0.09445635974407196 2023-01-22 11:38:01.895689: step: 236/466, loss: 0.36126482486724854 2023-01-22 11:38:02.594747: step: 238/466, loss: 0.060997847467660904 2023-01-22 11:38:03.227664: step: 240/466, loss: 0.04949461296200752 2023-01-22 11:38:03.919801: step: 242/466, loss: 0.025348510593175888 2023-01-22 11:38:04.633031: step: 244/466, loss: 0.01648002117872238 2023-01-22 11:38:05.324120: step: 246/466, loss: 0.09901939332485199 2023-01-22 11:38:06.037742: step: 248/466, loss: 0.10556934028863907 2023-01-22 11:38:06.672789: step: 250/466, loss: 0.06789106130599976 2023-01-22 11:38:07.309972: step: 252/466, loss: 0.07301247864961624 2023-01-22 11:38:07.981761: step: 254/466, loss: 0.06608757376670837 2023-01-22 11:38:08.583499: step: 256/466, loss: 0.03352828323841095 2023-01-22 11:38:09.251533: step: 258/466, loss: 0.15243376791477203 2023-01-22 11:38:09.903858: step: 260/466, loss: 0.5134772658348083 2023-01-22 11:38:10.618434: step: 262/466, loss: 0.06259741634130478 2023-01-22 11:38:11.260510: step: 264/466, loss: 0.08475439995527267 2023-01-22 11:38:11.903324: step: 266/466, loss: 0.053116872906684875 2023-01-22 11:38:12.524795: step: 268/466, loss: 0.040617186576128006 2023-01-22 11:38:13.171587: step: 270/466, loss: 0.04007381200790405 2023-01-22 11:38:13.847612: step: 272/466, loss: 0.09571406245231628 2023-01-22 11:38:14.533724: step: 274/466, loss: 0.12687426805496216 2023-01-22 11:38:15.202963: step: 276/466, loss: 0.0580701120197773 2023-01-22 11:38:15.779133: step: 278/466, loss: 0.013055864721536636 2023-01-22 11:38:16.380204: step: 280/466, loss: 0.04769768565893173 2023-01-22 11:38:16.998322: step: 282/466, loss: 0.0945633053779602 2023-01-22 11:38:17.583559: step: 284/466, loss: 0.015341498889029026 2023-01-22 11:38:18.201995: step: 286/466, loss: 0.06844498962163925 2023-01-22 11:38:18.907503: step: 288/466, loss: 0.12033065408468246 2023-01-22 11:38:19.546138: step: 290/466, loss: 0.1408940702676773 2023-01-22 11:38:20.246810: step: 292/466, loss: 0.201069638133049 2023-01-22 11:38:20.809444: step: 294/466, loss: 0.04977531358599663 2023-01-22 11:38:21.520441: step: 296/466, loss: 0.026707181707024574 2023-01-22 11:38:22.161616: step: 298/466, loss: 0.1375470608472824 2023-01-22 11:38:22.830861: step: 300/466, loss: 0.1574658751487732 2023-01-22 11:38:23.545680: step: 302/466, loss: 0.13280268013477325 2023-01-22 11:38:24.183740: step: 304/466, loss: 0.021194612607359886 2023-01-22 11:38:24.871935: step: 306/466, loss: 0.0552968792617321 2023-01-22 11:38:25.489623: step: 308/466, loss: 0.04537500813603401 2023-01-22 11:38:26.133788: step: 310/466, loss: 0.11480645090341568 2023-01-22 11:38:26.797217: step: 312/466, loss: 0.08807799220085144 2023-01-22 11:38:27.506656: step: 314/466, loss: 0.21497578918933868 2023-01-22 11:38:28.205486: step: 316/466, loss: 0.011518558487296104 2023-01-22 11:38:28.865360: step: 318/466, loss: 0.023418935015797615 2023-01-22 11:38:29.536280: step: 320/466, loss: 0.03557395190000534 2023-01-22 11:38:30.175624: step: 322/466, loss: 0.2847992181777954 2023-01-22 11:38:30.857724: step: 324/466, loss: 0.0652703270316124 2023-01-22 11:38:31.485767: step: 326/466, loss: 0.10121436417102814 2023-01-22 11:38:32.235711: step: 328/466, loss: 0.11288412660360336 2023-01-22 11:38:32.885336: step: 330/466, loss: 0.08179786056280136 2023-01-22 11:38:33.498924: step: 332/466, loss: 0.10343725234270096 2023-01-22 11:38:34.249637: step: 334/466, loss: 0.24230018258094788 2023-01-22 11:38:34.908336: step: 336/466, loss: 0.015430030412971973 2023-01-22 11:38:35.649938: step: 338/466, loss: 0.13451127707958221 2023-01-22 11:38:36.294575: step: 340/466, loss: 0.04132866859436035 2023-01-22 11:38:36.909351: step: 342/466, loss: 0.08746642619371414 2023-01-22 11:38:37.605228: step: 344/466, loss: 0.10066674649715424 2023-01-22 11:38:38.235923: step: 346/466, loss: 0.09099888056516647 2023-01-22 11:38:38.930346: step: 348/466, loss: 0.04106014594435692 2023-01-22 11:38:39.572792: step: 350/466, loss: 0.10349002480506897 2023-01-22 11:38:40.210348: step: 352/466, loss: 0.0962907001376152 2023-01-22 11:38:40.862111: step: 354/466, loss: 0.25616008043289185 2023-01-22 11:38:41.519141: step: 356/466, loss: 0.06833707541227341 2023-01-22 11:38:42.131181: step: 358/466, loss: 0.27315330505371094 2023-01-22 11:38:42.749379: step: 360/466, loss: 0.17183949053287506 2023-01-22 11:38:43.464425: step: 362/466, loss: 0.03962790593504906 2023-01-22 11:38:44.111678: step: 364/466, loss: 0.06679973006248474 2023-01-22 11:38:44.782286: step: 366/466, loss: 0.12388741225004196 2023-01-22 11:38:45.422320: step: 368/466, loss: 0.009611066430807114 2023-01-22 11:38:46.070431: step: 370/466, loss: 0.0658520832657814 2023-01-22 11:38:46.763513: step: 372/466, loss: 0.02278760075569153 2023-01-22 11:38:47.379601: step: 374/466, loss: 0.00905968714505434 2023-01-22 11:38:48.003126: step: 376/466, loss: 0.05500665307044983 2023-01-22 11:38:48.684283: step: 378/466, loss: 0.04999172315001488 2023-01-22 11:38:49.337027: step: 380/466, loss: 0.12665079534053802 2023-01-22 11:38:49.978393: step: 382/466, loss: 0.04226607829332352 2023-01-22 11:38:50.547251: step: 384/466, loss: 0.08261600881814957 2023-01-22 11:38:51.183933: step: 386/466, loss: 0.014171634800732136 2023-01-22 11:38:51.854636: step: 388/466, loss: 0.2714396119117737 2023-01-22 11:38:52.490075: step: 390/466, loss: 0.11800781637430191 2023-01-22 11:38:53.119506: step: 392/466, loss: 0.04906920716166496 2023-01-22 11:38:53.816913: step: 394/466, loss: 0.15990416705608368 2023-01-22 11:38:54.374383: step: 396/466, loss: 1.1693564653396606 2023-01-22 11:38:55.066883: step: 398/466, loss: 0.1673891246318817 2023-01-22 11:38:55.726619: step: 400/466, loss: 0.03268442302942276 2023-01-22 11:38:56.370511: step: 402/466, loss: 0.11904057115316391 2023-01-22 11:38:57.003105: step: 404/466, loss: 0.2910826802253723 2023-01-22 11:38:57.624410: step: 406/466, loss: 0.26489701867103577 2023-01-22 11:38:58.261240: step: 408/466, loss: 0.10169929265975952 2023-01-22 11:38:58.861932: step: 410/466, loss: 0.03802566975355148 2023-01-22 11:38:59.557554: step: 412/466, loss: 0.09995359182357788 2023-01-22 11:39:00.169276: step: 414/466, loss: 0.05769165977835655 2023-01-22 11:39:00.798751: step: 416/466, loss: 0.06827948987483978 2023-01-22 11:39:01.401342: step: 418/466, loss: 0.04335479065775871 2023-01-22 11:39:02.048146: step: 420/466, loss: 0.0710509717464447 2023-01-22 11:39:02.723313: step: 422/466, loss: 0.15698561072349548 2023-01-22 11:39:03.375678: step: 424/466, loss: 0.15473562479019165 2023-01-22 11:39:04.013062: step: 426/466, loss: 0.08751913905143738 2023-01-22 11:39:04.627512: step: 428/466, loss: 0.11267465353012085 2023-01-22 11:39:05.268259: step: 430/466, loss: 0.6198447346687317 2023-01-22 11:39:05.962171: step: 432/466, loss: 0.05800934135913849 2023-01-22 11:39:06.688583: step: 434/466, loss: 0.03908151388168335 2023-01-22 11:39:07.336463: step: 436/466, loss: 0.09468743205070496 2023-01-22 11:39:08.003435: step: 438/466, loss: 0.08381114900112152 2023-01-22 11:39:08.809028: step: 440/466, loss: 0.04809914156794548 2023-01-22 11:39:09.482384: step: 442/466, loss: 0.04134088754653931 2023-01-22 11:39:10.110485: step: 444/466, loss: 0.15189415216445923 2023-01-22 11:39:10.860408: step: 446/466, loss: 0.19982358813285828 2023-01-22 11:39:11.579829: step: 448/466, loss: 0.169953390955925 2023-01-22 11:39:12.213498: step: 450/466, loss: 0.08291981369256973 2023-01-22 11:39:12.865900: step: 452/466, loss: 0.29688578844070435 2023-01-22 11:39:13.559078: step: 454/466, loss: 0.14033854007720947 2023-01-22 11:39:14.247438: step: 456/466, loss: 0.07793901115655899 2023-01-22 11:39:14.974538: step: 458/466, loss: 0.02118169143795967 2023-01-22 11:39:15.578479: step: 460/466, loss: 0.05224119871854782 2023-01-22 11:39:16.174005: step: 462/466, loss: 0.05721522122621536 2023-01-22 11:39:16.833188: step: 464/466, loss: 0.601054310798645 2023-01-22 11:39:17.444630: step: 466/466, loss: 0.019567223265767097 2023-01-22 11:39:18.121357: step: 468/466, loss: 0.43480831384658813 2023-01-22 11:39:18.752299: step: 470/466, loss: 0.05632892996072769 2023-01-22 11:39:19.391100: step: 472/466, loss: 0.12266655266284943 2023-01-22 11:39:20.014763: step: 474/466, loss: 0.24110047519207 2023-01-22 11:39:20.629231: step: 476/466, loss: 0.041576821357011795 2023-01-22 11:39:21.275362: step: 478/466, loss: 0.5691177248954773 2023-01-22 11:39:21.952106: step: 480/466, loss: 0.2592619061470032 2023-01-22 11:39:22.603620: step: 482/466, loss: 0.0854816809296608 2023-01-22 11:39:23.229327: step: 484/466, loss: 0.06306815147399902 2023-01-22 11:39:23.847163: step: 486/466, loss: 0.014913514256477356 2023-01-22 11:39:24.493160: step: 488/466, loss: 0.027327340096235275 2023-01-22 11:39:25.098329: step: 490/466, loss: 0.19861024618148804 2023-01-22 11:39:25.792218: step: 492/466, loss: 0.21431532502174377 2023-01-22 11:39:26.478979: step: 494/466, loss: 0.15162241458892822 2023-01-22 11:39:27.111400: step: 496/466, loss: 0.05793420597910881 2023-01-22 11:39:27.780481: step: 498/466, loss: 0.03180902451276779 2023-01-22 11:39:28.402171: step: 500/466, loss: 0.07200276851654053 2023-01-22 11:39:28.965148: step: 502/466, loss: 0.0146143464371562 2023-01-22 11:39:29.600706: step: 504/466, loss: 0.16181471943855286 2023-01-22 11:39:30.216176: step: 506/466, loss: 0.09121792018413544 2023-01-22 11:39:30.819012: step: 508/466, loss: 0.015691015869379044 2023-01-22 11:39:31.499922: step: 510/466, loss: 0.06310312449932098 2023-01-22 11:39:32.115728: step: 512/466, loss: 0.03279956057667732 2023-01-22 11:39:32.731287: step: 514/466, loss: 0.06740190088748932 2023-01-22 11:39:33.419289: step: 516/466, loss: 0.09652914851903915 2023-01-22 11:39:34.083872: step: 518/466, loss: 0.0983295813202858 2023-01-22 11:39:34.706883: step: 520/466, loss: 0.07624398916959763 2023-01-22 11:39:35.370713: step: 522/466, loss: 0.08277488499879837 2023-01-22 11:39:35.980130: step: 524/466, loss: 0.047908440232276917 2023-01-22 11:39:36.632202: step: 526/466, loss: 0.16350354254245758 2023-01-22 11:39:37.324087: step: 528/466, loss: 0.06512527912855148 2023-01-22 11:39:37.991263: step: 530/466, loss: 0.18338938057422638 2023-01-22 11:39:38.676151: step: 532/466, loss: 0.10968554019927979 2023-01-22 11:39:39.331330: step: 534/466, loss: 0.02084297500550747 2023-01-22 11:39:40.027165: step: 536/466, loss: 0.02822844497859478 2023-01-22 11:39:40.697358: step: 538/466, loss: 0.15922009944915771 2023-01-22 11:39:41.352960: step: 540/466, loss: 0.06503274291753769 2023-01-22 11:39:42.017119: step: 542/466, loss: 0.054418645799160004 2023-01-22 11:39:42.740967: step: 544/466, loss: 0.0865282341837883 2023-01-22 11:39:43.357119: step: 546/466, loss: 0.026990242302417755 2023-01-22 11:39:43.900761: step: 548/466, loss: 0.040507011115550995 2023-01-22 11:39:44.583678: step: 550/466, loss: 0.05850210040807724 2023-01-22 11:39:45.259124: step: 552/466, loss: 0.06116797775030136 2023-01-22 11:39:45.907756: step: 554/466, loss: 0.04400629177689552 2023-01-22 11:39:46.564746: step: 556/466, loss: 0.20492783188819885 2023-01-22 11:39:47.244368: step: 558/466, loss: 0.1363212764263153 2023-01-22 11:39:47.921363: step: 560/466, loss: 0.07164261490106583 2023-01-22 11:39:48.578002: step: 562/466, loss: 0.05946716293692589 2023-01-22 11:39:49.219357: step: 564/466, loss: 0.07661996781826019 2023-01-22 11:39:49.949133: step: 566/466, loss: 0.045083437114953995 2023-01-22 11:39:50.553049: step: 568/466, loss: 0.0828741118311882 2023-01-22 11:39:51.275809: step: 570/466, loss: 0.054826319217681885 2023-01-22 11:39:51.938512: step: 572/466, loss: 0.16736988723278046 2023-01-22 11:39:52.609143: step: 574/466, loss: 0.11668187379837036 2023-01-22 11:39:53.245096: step: 576/466, loss: 0.10664176940917969 2023-01-22 11:39:53.912532: step: 578/466, loss: 0.06612854450941086 2023-01-22 11:39:54.614060: step: 580/466, loss: 0.10616856813430786 2023-01-22 11:39:55.221583: step: 582/466, loss: 0.12009678035974503 2023-01-22 11:39:55.891499: step: 584/466, loss: 0.5012925863265991 2023-01-22 11:39:56.561813: step: 586/466, loss: 0.1577220857143402 2023-01-22 11:39:57.177085: step: 588/466, loss: 0.058271583169698715 2023-01-22 11:39:57.813915: step: 590/466, loss: 0.37642702460289 2023-01-22 11:39:58.477875: step: 592/466, loss: 0.08568401634693146 2023-01-22 11:39:59.104902: step: 594/466, loss: 0.13801072537899017 2023-01-22 11:39:59.732183: step: 596/466, loss: 0.08078078180551529 2023-01-22 11:40:00.400913: step: 598/466, loss: 0.04879489168524742 2023-01-22 11:40:01.155367: step: 600/466, loss: 0.09097907692193985 2023-01-22 11:40:01.850652: step: 602/466, loss: 0.18482044339179993 2023-01-22 11:40:02.565881: step: 604/466, loss: 0.09671653807163239 2023-01-22 11:40:03.147962: step: 606/466, loss: 0.5252463221549988 2023-01-22 11:40:03.754436: step: 608/466, loss: 0.2623167932033539 2023-01-22 11:40:04.378581: step: 610/466, loss: 0.09503703564405441 2023-01-22 11:40:05.151366: step: 612/466, loss: 1.5838477611541748 2023-01-22 11:40:05.813841: step: 614/466, loss: 0.0861937403678894 2023-01-22 11:40:06.457682: step: 616/466, loss: 0.1169193685054779 2023-01-22 11:40:07.315832: step: 618/466, loss: 0.1271267980337143 2023-01-22 11:40:08.003371: step: 620/466, loss: 0.6097317337989807 2023-01-22 11:40:08.692443: step: 622/466, loss: 0.21033012866973877 2023-01-22 11:40:09.365144: step: 624/466, loss: 0.15617677569389343 2023-01-22 11:40:09.982254: step: 626/466, loss: 0.06437318027019501 2023-01-22 11:40:10.712776: step: 628/466, loss: 0.05594280734658241 2023-01-22 11:40:11.398475: step: 630/466, loss: 0.04285358265042305 2023-01-22 11:40:12.031516: step: 632/466, loss: 0.08711235225200653 2023-01-22 11:40:12.724338: step: 634/466, loss: 0.03910278156399727 2023-01-22 11:40:13.385414: step: 636/466, loss: 0.0911153107881546 2023-01-22 11:40:14.044708: step: 638/466, loss: 0.04268583655357361 2023-01-22 11:40:14.753809: step: 640/466, loss: 0.035510484129190445 2023-01-22 11:40:15.396636: step: 642/466, loss: 0.05552142485976219 2023-01-22 11:40:16.141624: step: 644/466, loss: 0.21432751417160034 2023-01-22 11:40:16.834744: step: 646/466, loss: 0.10881240665912628 2023-01-22 11:40:17.491150: step: 648/466, loss: 0.05536452680826187 2023-01-22 11:40:18.265108: step: 650/466, loss: 0.06242331117391586 2023-01-22 11:40:19.033603: step: 652/466, loss: 0.1637096405029297 2023-01-22 11:40:19.695495: step: 654/466, loss: 0.05477649345993996 2023-01-22 11:40:20.381152: step: 656/466, loss: 0.07378639280796051 2023-01-22 11:40:21.066575: step: 658/466, loss: 0.08187831938266754 2023-01-22 11:40:21.766968: step: 660/466, loss: 0.03793375566601753 2023-01-22 11:40:22.426124: step: 662/466, loss: 0.044803231954574585 2023-01-22 11:40:23.127957: step: 664/466, loss: 0.2506631314754486 2023-01-22 11:40:23.786626: step: 666/466, loss: 0.06307424604892731 2023-01-22 11:40:24.493553: step: 668/466, loss: 0.08123359084129333 2023-01-22 11:40:25.104566: step: 670/466, loss: 0.058123879134655 2023-01-22 11:40:25.780300: step: 672/466, loss: 0.07231193035840988 2023-01-22 11:40:26.439251: step: 674/466, loss: 0.04953724145889282 2023-01-22 11:40:27.185153: step: 676/466, loss: 0.1181645542383194 2023-01-22 11:40:27.867337: step: 678/466, loss: 0.08696312457323074 2023-01-22 11:40:28.547173: step: 680/466, loss: 0.0802014023065567 2023-01-22 11:40:29.140912: step: 682/466, loss: 0.04924570769071579 2023-01-22 11:40:29.791501: step: 684/466, loss: 0.06582105904817581 2023-01-22 11:40:30.489718: step: 686/466, loss: 0.1181468814611435 2023-01-22 11:40:31.243685: step: 688/466, loss: 0.12264467030763626 2023-01-22 11:40:31.872901: step: 690/466, loss: 0.08274734765291214 2023-01-22 11:40:32.532254: step: 692/466, loss: 0.07421544939279556 2023-01-22 11:40:33.214019: step: 694/466, loss: 0.011576401069760323 2023-01-22 11:40:33.844199: step: 696/466, loss: 0.015556792728602886 2023-01-22 11:40:34.532587: step: 698/466, loss: 1.4480268955230713 2023-01-22 11:40:35.195850: step: 700/466, loss: 0.28690746426582336 2023-01-22 11:40:35.835863: step: 702/466, loss: 0.0877830758690834 2023-01-22 11:40:36.516119: step: 704/466, loss: 0.25909364223480225 2023-01-22 11:40:37.218690: step: 706/466, loss: 0.8405012488365173 2023-01-22 11:40:37.864210: step: 708/466, loss: 0.15286941826343536 2023-01-22 11:40:38.523452: step: 710/466, loss: 0.15358571708202362 2023-01-22 11:40:39.154398: step: 712/466, loss: 0.11230973899364471 2023-01-22 11:40:39.777477: step: 714/466, loss: 0.0365796834230423 2023-01-22 11:40:40.419372: step: 716/466, loss: 0.3047601878643036 2023-01-22 11:40:41.023760: step: 718/466, loss: 0.11558191478252411 2023-01-22 11:40:41.627366: step: 720/466, loss: 0.09022244065999985 2023-01-22 11:40:42.273000: step: 722/466, loss: 0.018942786380648613 2023-01-22 11:40:42.845419: step: 724/466, loss: 0.21641039848327637 2023-01-22 11:40:43.563637: step: 726/466, loss: 0.6364198923110962 2023-01-22 11:40:44.276321: step: 728/466, loss: 0.14027713239192963 2023-01-22 11:40:44.918704: step: 730/466, loss: 0.08480183780193329 2023-01-22 11:40:45.630239: step: 732/466, loss: 0.040003206580877304 2023-01-22 11:40:46.329998: step: 734/466, loss: 0.10973091423511505 2023-01-22 11:40:47.014624: step: 736/466, loss: 0.46130046248435974 2023-01-22 11:40:47.688647: step: 738/466, loss: 20.934114456176758 2023-01-22 11:40:48.299906: step: 740/466, loss: 0.019502513110637665 2023-01-22 11:40:48.971479: step: 742/466, loss: 0.09954078495502472 2023-01-22 11:40:49.593485: step: 744/466, loss: 0.5703690648078918 2023-01-22 11:40:50.258844: step: 746/466, loss: 0.011088987812399864 2023-01-22 11:40:50.927646: step: 748/466, loss: 0.060354653745889664 2023-01-22 11:40:51.581684: step: 750/466, loss: 0.04820029065012932 2023-01-22 11:40:52.208962: step: 752/466, loss: 0.07377003133296967 2023-01-22 11:40:52.841288: step: 754/466, loss: 0.2375066727399826 2023-01-22 11:40:53.447759: step: 756/466, loss: 0.2742730677127838 2023-01-22 11:40:54.137953: step: 758/466, loss: 0.07631280273199081 2023-01-22 11:40:54.784968: step: 760/466, loss: 0.0527004674077034 2023-01-22 11:40:55.464790: step: 762/466, loss: 0.034588977694511414 2023-01-22 11:40:56.032405: step: 764/466, loss: 0.12235993146896362 2023-01-22 11:40:56.713850: step: 766/466, loss: 0.034359920769929886 2023-01-22 11:40:57.345102: step: 768/466, loss: 0.06783917546272278 2023-01-22 11:40:58.018364: step: 770/466, loss: 0.17482250928878784 2023-01-22 11:40:58.625011: step: 772/466, loss: 0.06787631660699844 2023-01-22 11:40:59.234344: step: 774/466, loss: 0.05822568014264107 2023-01-22 11:40:59.807249: step: 776/466, loss: 0.1922196000814438 2023-01-22 11:41:00.432826: step: 778/466, loss: 0.033735353499650955 2023-01-22 11:41:01.105592: step: 780/466, loss: 0.024258237332105637 2023-01-22 11:41:01.756371: step: 782/466, loss: 0.08711081743240356 2023-01-22 11:41:02.494476: step: 784/466, loss: 0.3389933109283447 2023-01-22 11:41:03.141997: step: 786/466, loss: 0.14080190658569336 2023-01-22 11:41:03.747929: step: 788/466, loss: 0.11569789797067642 2023-01-22 11:41:04.375282: step: 790/466, loss: 0.7507767081260681 2023-01-22 11:41:05.046664: step: 792/466, loss: 0.052032433450222015 2023-01-22 11:41:05.708346: step: 794/466, loss: 0.04452061280608177 2023-01-22 11:41:06.422489: step: 796/466, loss: 0.049668360501527786 2023-01-22 11:41:07.098589: step: 798/466, loss: 0.13247907161712646 2023-01-22 11:41:07.696650: step: 800/466, loss: 0.05487549304962158 2023-01-22 11:41:08.358804: step: 802/466, loss: 0.0959228128194809 2023-01-22 11:41:09.004947: step: 804/466, loss: 0.03185079246759415 2023-01-22 11:41:09.608214: step: 806/466, loss: 0.07701914012432098 2023-01-22 11:41:10.255953: step: 808/466, loss: 0.17329666018486023 2023-01-22 11:41:10.913361: step: 810/466, loss: 0.05172254145145416 2023-01-22 11:41:11.549794: step: 812/466, loss: 0.0889836996793747 2023-01-22 11:41:12.250451: step: 814/466, loss: 0.36501458287239075 2023-01-22 11:41:12.953744: step: 816/466, loss: 0.09125789999961853 2023-01-22 11:41:13.637245: step: 818/466, loss: 0.02714724838733673 2023-01-22 11:41:14.287471: step: 820/466, loss: 0.04561009258031845 2023-01-22 11:41:14.983271: step: 822/466, loss: 0.09402687847614288 2023-01-22 11:41:15.630455: step: 824/466, loss: 0.19560956954956055 2023-01-22 11:41:16.327392: step: 826/466, loss: 0.0626026913523674 2023-01-22 11:41:16.941148: step: 828/466, loss: 0.094856858253479 2023-01-22 11:41:17.619071: step: 830/466, loss: 0.033885981887578964 2023-01-22 11:41:18.277963: step: 832/466, loss: 0.03236625716090202 2023-01-22 11:41:18.956699: step: 834/466, loss: 0.10357360541820526 2023-01-22 11:41:19.578913: step: 836/466, loss: 0.06699132919311523 2023-01-22 11:41:20.321619: step: 838/466, loss: 0.1298193782567978 2023-01-22 11:41:21.005729: step: 840/466, loss: 0.052056584507226944 2023-01-22 11:41:21.667348: step: 842/466, loss: 0.03698112443089485 2023-01-22 11:41:22.301145: step: 844/466, loss: 0.21999523043632507 2023-01-22 11:41:23.031364: step: 846/466, loss: 0.07879167795181274 2023-01-22 11:41:23.707821: step: 848/466, loss: 0.1226952001452446 2023-01-22 11:41:24.401261: step: 850/466, loss: 0.15505649149417877 2023-01-22 11:41:25.038610: step: 852/466, loss: 0.045842863619327545 2023-01-22 11:41:25.677145: step: 854/466, loss: 0.44745418429374695 2023-01-22 11:41:26.356309: step: 856/466, loss: 0.03154830262064934 2023-01-22 11:41:27.054938: step: 858/466, loss: 0.24629947543144226 2023-01-22 11:41:27.690693: step: 860/466, loss: 0.052139271050691605 2023-01-22 11:41:28.372469: step: 862/466, loss: 0.5984631180763245 2023-01-22 11:41:28.996773: step: 864/466, loss: 0.06645335257053375 2023-01-22 11:41:29.662256: step: 866/466, loss: 0.026118503883481026 2023-01-22 11:41:30.306561: step: 868/466, loss: 0.3702729046344757 2023-01-22 11:41:30.955704: step: 870/466, loss: 0.0635104700922966 2023-01-22 11:41:31.532272: step: 872/466, loss: 0.07143677026033401 2023-01-22 11:41:32.201485: step: 874/466, loss: 0.020376041531562805 2023-01-22 11:41:32.981994: step: 876/466, loss: 0.07505100220441818 2023-01-22 11:41:33.639836: step: 878/466, loss: 0.1263282299041748 2023-01-22 11:41:34.219628: step: 880/466, loss: 0.04056801274418831 2023-01-22 11:41:34.901606: step: 882/466, loss: 0.07290873676538467 2023-01-22 11:41:35.608877: step: 884/466, loss: 0.45427680015563965 2023-01-22 11:41:36.206774: step: 886/466, loss: 0.016614042222499847 2023-01-22 11:41:36.881616: step: 888/466, loss: 0.0716577097773552 2023-01-22 11:41:37.466734: step: 890/466, loss: 0.07818274945020676 2023-01-22 11:41:38.100634: step: 892/466, loss: 0.08689560741186142 2023-01-22 11:41:38.777536: step: 894/466, loss: 0.11718635261058807 2023-01-22 11:41:39.411868: step: 896/466, loss: 0.07583006471395493 2023-01-22 11:41:40.052857: step: 898/466, loss: 0.06984055042266846 2023-01-22 11:41:40.699582: step: 900/466, loss: 0.0533699207007885 2023-01-22 11:41:41.364114: step: 902/466, loss: 0.06694526225328445 2023-01-22 11:41:42.183907: step: 904/466, loss: 2.499326229095459 2023-01-22 11:41:42.862166: step: 906/466, loss: 0.08951568603515625 2023-01-22 11:41:43.521131: step: 908/466, loss: 0.16057872772216797 2023-01-22 11:41:44.161862: step: 910/466, loss: 0.05228939652442932 2023-01-22 11:41:44.853903: step: 912/466, loss: 0.48351049423217773 2023-01-22 11:41:45.455576: step: 914/466, loss: 0.1175408884882927 2023-01-22 11:41:46.186706: step: 916/466, loss: 0.06571335345506668 2023-01-22 11:41:46.868000: step: 918/466, loss: 0.08429094403982162 2023-01-22 11:41:47.473378: step: 920/466, loss: 0.022826338186860085 2023-01-22 11:41:48.121191: step: 922/466, loss: 1.0452570915222168 2023-01-22 11:41:48.818644: step: 924/466, loss: 0.05522065982222557 2023-01-22 11:41:49.442734: step: 926/466, loss: 0.16498221457004547 2023-01-22 11:41:50.143650: step: 928/466, loss: 0.053920526057481766 2023-01-22 11:41:50.840634: step: 930/466, loss: 0.38070955872535706 2023-01-22 11:41:51.508015: step: 932/466, loss: 0.1380595862865448 ================================================== Loss: 0.197 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3092103494623656, 'r': 0.3637768817204301, 'f1': 0.3342814588782331}, 'combined': 0.24631265391027699, 'epoch': 17} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.33481831533137546, 'r': 0.34958971159599495, 'f1': 0.3420446099068727}, 'combined': 0.22684823869471346, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3102125, 'r': 0.2937618371212121, 'f1': 0.3017631322957199}, 'combined': 0.2011754215304799, 'epoch': 17} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.35101196017854525, 'r': 0.33551273076806404, 'f1': 0.34308738737238953}, 'combined': 0.22390966333777, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2988762216983449, 'r': 0.3499177016847795, 'f1': 0.32238921116762026}, 'combined': 0.23754994507087807, 'epoch': 17} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32986914780508886, 'r': 0.3387151197617997, 'f1': 0.33423361369580923}, 'combined': 0.22166788887597708, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21515151515151512, 'r': 0.33809523809523806, 'f1': 0.2629629629629629}, 'combined': 0.1753086419753086, 'epoch': 17} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42045454545454547, 'r': 0.40217391304347827, 'f1': 0.41111111111111115}, 'combined': 0.2740740740740741, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.1896551724137931, 'f1': 0.2156862745098039}, 'combined': 0.1437908496732026, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2886258254463912, 'r': 0.3357260550258782, 'f1': 0.31039935262918916}, 'combined': 0.22871531246361304, 'epoch': 13} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.35413085903566327, 'r': 0.32071071736043616, 'f1': 0.336593256293779}, 'combined': 0.2232328331896565, 'epoch': 13} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39705882352941174, 'r': 0.23275862068965517, 'f1': 0.2934782608695652}, 'combined': 0.19565217391304346, 'epoch': 13} ****************************** Epoch: 18 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:44:37.813506: step: 2/466, loss: 0.31189316511154175 2023-01-22 11:44:38.420451: step: 4/466, loss: 0.10597145557403564 2023-01-22 11:44:39.104739: step: 6/466, loss: 0.07821610569953918 2023-01-22 11:44:39.782539: step: 8/466, loss: 0.5892221927642822 2023-01-22 11:44:40.400516: step: 10/466, loss: 0.25590741634368896 2023-01-22 11:44:41.071215: step: 12/466, loss: 0.12411262094974518 2023-01-22 11:44:41.693220: step: 14/466, loss: 0.08438217639923096 2023-01-22 11:44:42.317352: step: 16/466, loss: 0.06738931685686111 2023-01-22 11:44:42.979664: step: 18/466, loss: 0.033883992582559586 2023-01-22 11:44:43.584414: step: 20/466, loss: 0.0135149285197258 2023-01-22 11:44:44.217296: step: 22/466, loss: 0.05257147178053856 2023-01-22 11:44:44.946812: step: 24/466, loss: 0.04349208250641823 2023-01-22 11:44:45.552217: step: 26/466, loss: 0.08731899410486221 2023-01-22 11:44:46.293920: step: 28/466, loss: 0.037038665264844894 2023-01-22 11:44:46.982358: step: 30/466, loss: 0.07792191207408905 2023-01-22 11:44:47.591865: step: 32/466, loss: 0.11329524219036102 2023-01-22 11:44:48.311890: step: 34/466, loss: 0.2606694996356964 2023-01-22 11:44:48.930993: step: 36/466, loss: 0.06266503036022186 2023-01-22 11:44:49.614866: step: 38/466, loss: 0.12956149876117706 2023-01-22 11:44:50.282194: step: 40/466, loss: 0.03676142543554306 2023-01-22 11:44:50.897501: step: 42/466, loss: 0.0485200434923172 2023-01-22 11:44:51.596206: step: 44/466, loss: 0.06625945121049881 2023-01-22 11:44:52.315695: step: 46/466, loss: 0.12503652274608612 2023-01-22 11:44:52.942055: step: 48/466, loss: 0.08727210015058517 2023-01-22 11:44:53.534228: step: 50/466, loss: 0.02814129739999771 2023-01-22 11:44:54.164123: step: 52/466, loss: 0.0902329534292221 2023-01-22 11:44:54.808377: step: 54/466, loss: 0.09235873818397522 2023-01-22 11:44:55.415188: step: 56/466, loss: 0.27931010723114014 2023-01-22 11:44:56.166835: step: 58/466, loss: 0.0059571899473667145 2023-01-22 11:44:56.875018: step: 60/466, loss: 0.02129332534968853 2023-01-22 11:44:57.516822: step: 62/466, loss: 0.456002801656723 2023-01-22 11:44:58.114322: step: 64/466, loss: 0.06236730515956879 2023-01-22 11:44:58.787721: step: 66/466, loss: 0.06540032476186752 2023-01-22 11:44:59.431392: step: 68/466, loss: 0.03372754901647568 2023-01-22 11:45:00.058950: step: 70/466, loss: 0.04925305396318436 2023-01-22 11:45:00.663248: step: 72/466, loss: 0.1637629121541977 2023-01-22 11:45:01.345483: step: 74/466, loss: 0.08615628629922867 2023-01-22 11:45:02.003315: step: 76/466, loss: 0.027274945750832558 2023-01-22 11:45:02.716294: step: 78/466, loss: 0.19199179112911224 2023-01-22 11:45:03.393097: step: 80/466, loss: 0.06197204068303108 2023-01-22 11:45:04.023163: step: 82/466, loss: 0.028553558513522148 2023-01-22 11:45:04.671403: step: 84/466, loss: 0.06519344449043274 2023-01-22 11:45:05.349257: step: 86/466, loss: 0.09060356020927429 2023-01-22 11:45:05.940436: step: 88/466, loss: 0.09861177951097488 2023-01-22 11:45:06.636772: step: 90/466, loss: 0.15060625970363617 2023-01-22 11:45:07.248559: step: 92/466, loss: 0.041267793625593185 2023-01-22 11:45:07.870770: step: 94/466, loss: 0.0036512063816189766 2023-01-22 11:45:08.567998: step: 96/466, loss: 0.1721828132867813 2023-01-22 11:45:09.281001: step: 98/466, loss: 0.06883466988801956 2023-01-22 11:45:09.925851: step: 100/466, loss: 0.07559314370155334 2023-01-22 11:45:10.597911: step: 102/466, loss: 0.08263487368822098 2023-01-22 11:45:11.223778: step: 104/466, loss: 0.07542078197002411 2023-01-22 11:45:11.943531: step: 106/466, loss: 0.04282296076416969 2023-01-22 11:45:12.633627: step: 108/466, loss: 0.2059038281440735 2023-01-22 11:45:13.332161: step: 110/466, loss: 0.647688090801239 2023-01-22 11:45:14.053506: step: 112/466, loss: 0.08664006739854813 2023-01-22 11:45:14.740519: step: 114/466, loss: 0.04270491376519203 2023-01-22 11:45:15.414957: step: 116/466, loss: 0.07413546741008759 2023-01-22 11:45:16.017023: step: 118/466, loss: 0.04129385948181152 2023-01-22 11:45:16.688761: step: 120/466, loss: 0.036656226962804794 2023-01-22 11:45:17.268963: step: 122/466, loss: 0.03237105906009674 2023-01-22 11:45:17.916038: step: 124/466, loss: 0.08183992654085159 2023-01-22 11:45:18.571606: step: 126/466, loss: 0.08177828043699265 2023-01-22 11:45:19.227951: step: 128/466, loss: 0.1501251608133316 2023-01-22 11:45:19.833234: step: 130/466, loss: 0.059020865708589554 2023-01-22 11:45:20.555715: step: 132/466, loss: 0.01964469440281391 2023-01-22 11:45:21.204999: step: 134/466, loss: 0.08319169282913208 2023-01-22 11:45:21.858836: step: 136/466, loss: 0.020794998854398727 2023-01-22 11:45:22.491252: step: 138/466, loss: 0.06269271671772003 2023-01-22 11:45:23.090685: step: 140/466, loss: 0.044970426708459854 2023-01-22 11:45:23.732943: step: 142/466, loss: 0.08268948644399643 2023-01-22 11:45:24.345317: step: 144/466, loss: 0.011812067590653896 2023-01-22 11:45:25.013681: step: 146/466, loss: 0.5511666536331177 2023-01-22 11:45:25.684615: step: 148/466, loss: 1.1024284362792969 2023-01-22 11:45:26.334759: step: 150/466, loss: 0.07916638255119324 2023-01-22 11:45:27.006948: step: 152/466, loss: 0.0712054893374443 2023-01-22 11:45:27.613851: step: 154/466, loss: 0.051965199410915375 2023-01-22 11:45:28.359539: step: 156/466, loss: 0.1934525966644287 2023-01-22 11:45:28.994529: step: 158/466, loss: 0.15321888029575348 2023-01-22 11:45:29.638351: step: 160/466, loss: 0.40636488795280457 2023-01-22 11:45:30.316414: step: 162/466, loss: 0.02754190005362034 2023-01-22 11:45:30.870972: step: 164/466, loss: 0.022459637373685837 2023-01-22 11:45:31.572496: step: 166/466, loss: 0.04907315969467163 2023-01-22 11:45:32.208575: step: 168/466, loss: 0.032183870673179626 2023-01-22 11:45:32.860919: step: 170/466, loss: 0.1283939629793167 2023-01-22 11:45:33.577946: step: 172/466, loss: 0.02123296447098255 2023-01-22 11:45:34.242395: step: 174/466, loss: 0.09502942860126495 2023-01-22 11:45:34.865146: step: 176/466, loss: 0.06678149104118347 2023-01-22 11:45:35.453238: step: 178/466, loss: 0.05468068644404411 2023-01-22 11:45:36.100784: step: 180/466, loss: 0.0322553776204586 2023-01-22 11:45:36.746439: step: 182/466, loss: 0.030692176893353462 2023-01-22 11:45:37.380553: step: 184/466, loss: 0.06752853840589523 2023-01-22 11:45:38.021962: step: 186/466, loss: 0.565311849117279 2023-01-22 11:45:38.764748: step: 188/466, loss: 0.07549922913312912 2023-01-22 11:45:39.373150: step: 190/466, loss: 0.02192053571343422 2023-01-22 11:45:40.050506: step: 192/466, loss: 0.06956149637699127 2023-01-22 11:45:40.713286: step: 194/466, loss: 0.4531049132347107 2023-01-22 11:45:41.388937: step: 196/466, loss: 0.07766777276992798 2023-01-22 11:45:42.045255: step: 198/466, loss: 0.12816458940505981 2023-01-22 11:45:42.684069: step: 200/466, loss: 0.04339540749788284 2023-01-22 11:45:43.303002: step: 202/466, loss: 0.111495740711689 2023-01-22 11:45:43.984487: step: 204/466, loss: 0.1359895020723343 2023-01-22 11:45:44.609093: step: 206/466, loss: 0.44120514392852783 2023-01-22 11:45:45.240635: step: 208/466, loss: 0.035208724439144135 2023-01-22 11:45:45.874860: step: 210/466, loss: 0.07476918399333954 2023-01-22 11:45:46.525537: step: 212/466, loss: 0.059200409799814224 2023-01-22 11:45:47.183749: step: 214/466, loss: 0.08655449748039246 2023-01-22 11:45:47.887925: step: 216/466, loss: 0.03006293624639511 2023-01-22 11:45:48.596690: step: 218/466, loss: 0.12334776669740677 2023-01-22 11:45:49.295144: step: 220/466, loss: 0.0786009430885315 2023-01-22 11:45:49.974077: step: 222/466, loss: 0.16112715005874634 2023-01-22 11:45:50.631485: step: 224/466, loss: 0.028480958193540573 2023-01-22 11:45:51.199506: step: 226/466, loss: 0.05188245326280594 2023-01-22 11:45:51.822913: step: 228/466, loss: 1.4788625240325928 2023-01-22 11:45:52.565928: step: 230/466, loss: 0.06716261804103851 2023-01-22 11:45:53.218335: step: 232/466, loss: 0.021676335483789444 2023-01-22 11:45:53.884583: step: 234/466, loss: 0.06132663041353226 2023-01-22 11:45:54.538743: step: 236/466, loss: 0.050321124494075775 2023-01-22 11:45:55.323595: step: 238/466, loss: 0.272202730178833 2023-01-22 11:45:55.977123: step: 240/466, loss: 0.06891126185655594 2023-01-22 11:45:56.663388: step: 242/466, loss: 0.18587826192378998 2023-01-22 11:45:57.312947: step: 244/466, loss: 0.10435792803764343 2023-01-22 11:45:58.004731: step: 246/466, loss: 0.08475508540868759 2023-01-22 11:45:58.680048: step: 248/466, loss: 0.04646135866641998 2023-01-22 11:45:59.338816: step: 250/466, loss: 0.044781383126974106 2023-01-22 11:46:00.027178: step: 252/466, loss: 0.10665663331747055 2023-01-22 11:46:00.660662: step: 254/466, loss: 0.06658080965280533 2023-01-22 11:46:01.319080: step: 256/466, loss: 0.17637376487255096 2023-01-22 11:46:02.020420: step: 258/466, loss: 0.0785864070057869 2023-01-22 11:46:02.726227: step: 260/466, loss: 0.006488861050456762 2023-01-22 11:46:03.420164: step: 262/466, loss: 0.025803137570619583 2023-01-22 11:46:04.129661: step: 264/466, loss: 0.08928970247507095 2023-01-22 11:46:04.823042: step: 266/466, loss: 0.06495597213506699 2023-01-22 11:46:05.473036: step: 268/466, loss: 0.040781501680612564 2023-01-22 11:46:06.083034: step: 270/466, loss: 0.07361089438199997 2023-01-22 11:46:06.756556: step: 272/466, loss: 0.050119150429964066 2023-01-22 11:46:07.364451: step: 274/466, loss: 0.03163013607263565 2023-01-22 11:46:08.041374: step: 276/466, loss: 0.05836133658885956 2023-01-22 11:46:08.674300: step: 278/466, loss: 0.19847925007343292 2023-01-22 11:46:09.322684: step: 280/466, loss: 0.05794009938836098 2023-01-22 11:46:09.974539: step: 282/466, loss: 0.01365175936371088 2023-01-22 11:46:10.648253: step: 284/466, loss: 0.07203265279531479 2023-01-22 11:46:11.333138: step: 286/466, loss: 0.0678028017282486 2023-01-22 11:46:11.996456: step: 288/466, loss: 0.3244488537311554 2023-01-22 11:46:12.628692: step: 290/466, loss: 0.017647601664066315 2023-01-22 11:46:13.265948: step: 292/466, loss: 0.1130477637052536 2023-01-22 11:46:13.852350: step: 294/466, loss: 0.10335894674062729 2023-01-22 11:46:14.524857: step: 296/466, loss: 0.013636148534715176 2023-01-22 11:46:15.251705: step: 298/466, loss: 0.054267518222332 2023-01-22 11:46:15.915427: step: 300/466, loss: 0.1287168264389038 2023-01-22 11:46:16.578564: step: 302/466, loss: 0.07640498131513596 2023-01-22 11:46:17.187919: step: 304/466, loss: 0.07450221478939056 2023-01-22 11:46:17.824849: step: 306/466, loss: 0.12391683459281921 2023-01-22 11:46:18.482460: step: 308/466, loss: 0.04375586658716202 2023-01-22 11:46:19.176333: step: 310/466, loss: 0.04667707532644272 2023-01-22 11:46:19.856489: step: 312/466, loss: 0.06447603553533554 2023-01-22 11:46:20.492924: step: 314/466, loss: 0.061549168080091476 2023-01-22 11:46:21.089814: step: 316/466, loss: 0.12271782755851746 2023-01-22 11:46:21.715074: step: 318/466, loss: 0.035681240260601044 2023-01-22 11:46:22.372508: step: 320/466, loss: 0.06072334572672844 2023-01-22 11:46:22.994419: step: 322/466, loss: 0.11734100431203842 2023-01-22 11:46:23.595117: step: 324/466, loss: 0.10797211527824402 2023-01-22 11:46:24.219111: step: 326/466, loss: 0.0348459854722023 2023-01-22 11:46:24.908899: step: 328/466, loss: 0.31300246715545654 2023-01-22 11:46:25.556268: step: 330/466, loss: 0.05565841868519783 2023-01-22 11:46:26.158531: step: 332/466, loss: 0.04864946007728577 2023-01-22 11:46:26.802130: step: 334/466, loss: 0.05759395658969879 2023-01-22 11:46:27.486415: step: 336/466, loss: 0.01965009979903698 2023-01-22 11:46:28.128329: step: 338/466, loss: 0.8483911156654358 2023-01-22 11:46:28.832698: step: 340/466, loss: 0.09508222341537476 2023-01-22 11:46:29.417908: step: 342/466, loss: 0.119389608502388 2023-01-22 11:46:30.167677: step: 344/466, loss: 0.049336206167936325 2023-01-22 11:46:30.875438: step: 346/466, loss: 0.08939649164676666 2023-01-22 11:46:31.436134: step: 348/466, loss: 0.025346161797642708 2023-01-22 11:46:32.151887: step: 350/466, loss: 0.13140961527824402 2023-01-22 11:46:32.847995: step: 352/466, loss: 0.14738011360168457 2023-01-22 11:46:33.506677: step: 354/466, loss: 0.08162593841552734 2023-01-22 11:46:34.117333: step: 356/466, loss: 0.04835676774382591 2023-01-22 11:46:34.835781: step: 358/466, loss: 0.24017801880836487 2023-01-22 11:46:35.504265: step: 360/466, loss: 0.014580821618437767 2023-01-22 11:46:36.174896: step: 362/466, loss: 0.1441817432641983 2023-01-22 11:46:36.824353: step: 364/466, loss: 0.05539591610431671 2023-01-22 11:46:37.456261: step: 366/466, loss: 0.059686385095119476 2023-01-22 11:46:38.129907: step: 368/466, loss: 0.03324635699391365 2023-01-22 11:46:38.798892: step: 370/466, loss: 0.10210835933685303 2023-01-22 11:46:39.477612: step: 372/466, loss: 0.06760048866271973 2023-01-22 11:46:40.135910: step: 374/466, loss: 0.05021263659000397 2023-01-22 11:46:40.782644: step: 376/466, loss: 0.07459504157304764 2023-01-22 11:46:41.416094: step: 378/466, loss: 0.08886837959289551 2023-01-22 11:46:42.019059: step: 380/466, loss: 0.08208400011062622 2023-01-22 11:46:42.681449: step: 382/466, loss: 0.01850113831460476 2023-01-22 11:46:43.381746: step: 384/466, loss: 0.12809403240680695 2023-01-22 11:46:44.046640: step: 386/466, loss: 0.14002923667430878 2023-01-22 11:46:44.714348: step: 388/466, loss: 0.14379382133483887 2023-01-22 11:46:45.359638: step: 390/466, loss: 0.10631173849105835 2023-01-22 11:46:46.004384: step: 392/466, loss: 0.038909073919057846 2023-01-22 11:46:46.659615: step: 394/466, loss: 0.19458021223545074 2023-01-22 11:46:47.334338: step: 396/466, loss: 0.04370439797639847 2023-01-22 11:46:47.978936: step: 398/466, loss: 0.09685692936182022 2023-01-22 11:46:48.719531: step: 400/466, loss: 0.07326396554708481 2023-01-22 11:46:49.381190: step: 402/466, loss: 0.033047426491975784 2023-01-22 11:46:50.050288: step: 404/466, loss: 0.014400389045476913 2023-01-22 11:46:50.684018: step: 406/466, loss: 0.07890332490205765 2023-01-22 11:46:51.405088: step: 408/466, loss: 0.023150041699409485 2023-01-22 11:46:51.956994: step: 410/466, loss: 0.04116898775100708 2023-01-22 11:46:52.635855: step: 412/466, loss: 0.19657215476036072 2023-01-22 11:46:53.281309: step: 414/466, loss: 0.0530715212225914 2023-01-22 11:46:53.891047: step: 416/466, loss: 0.14518778026103973 2023-01-22 11:46:54.570408: step: 418/466, loss: 0.02331777848303318 2023-01-22 11:46:55.204873: step: 420/466, loss: 0.01835496909916401 2023-01-22 11:46:55.820804: step: 422/466, loss: 0.04725594073534012 2023-01-22 11:46:56.466398: step: 424/466, loss: 0.09757312387228012 2023-01-22 11:46:57.120277: step: 426/466, loss: 0.07201900333166122 2023-01-22 11:46:57.756792: step: 428/466, loss: 0.032236743718385696 2023-01-22 11:46:58.490558: step: 430/466, loss: 0.0317675843834877 2023-01-22 11:46:59.132979: step: 432/466, loss: 0.8498877286911011 2023-01-22 11:46:59.772252: step: 434/466, loss: 0.037114985287189484 2023-01-22 11:47:00.440657: step: 436/466, loss: 0.2792483866214752 2023-01-22 11:47:01.171436: step: 438/466, loss: 0.06097016483545303 2023-01-22 11:47:01.787409: step: 440/466, loss: 0.008344702422618866 2023-01-22 11:47:02.435014: step: 442/466, loss: 0.18476438522338867 2023-01-22 11:47:03.042285: step: 444/466, loss: 0.07133772224187851 2023-01-22 11:47:03.622929: step: 446/466, loss: 0.038877032697200775 2023-01-22 11:47:04.208750: step: 448/466, loss: 0.03844582289457321 2023-01-22 11:47:04.887434: step: 450/466, loss: 0.023474067449569702 2023-01-22 11:47:05.593834: step: 452/466, loss: 0.12620507180690765 2023-01-22 11:47:06.164208: step: 454/466, loss: 0.026539389044046402 2023-01-22 11:47:06.768285: step: 456/466, loss: 0.7999367713928223 2023-01-22 11:47:07.452436: step: 458/466, loss: 0.07577664405107498 2023-01-22 11:47:08.147657: step: 460/466, loss: 0.1380268633365631 2023-01-22 11:47:08.915745: step: 462/466, loss: 0.19326843321323395 2023-01-22 11:47:09.562333: step: 464/466, loss: 0.11803829669952393 2023-01-22 11:47:10.274950: step: 466/466, loss: 0.17432992160320282 2023-01-22 11:47:10.954526: step: 468/466, loss: 0.13016678392887115 2023-01-22 11:47:11.595678: step: 470/466, loss: 0.06393105536699295 2023-01-22 11:47:12.207489: step: 472/466, loss: 0.026901068165898323 2023-01-22 11:47:12.898843: step: 474/466, loss: 0.029669426381587982 2023-01-22 11:47:13.589204: step: 476/466, loss: 0.11381050199270248 2023-01-22 11:47:14.205535: step: 478/466, loss: 0.05609027296304703 2023-01-22 11:47:14.882004: step: 480/466, loss: 0.028202557936310768 2023-01-22 11:47:15.588047: step: 482/466, loss: 0.5612933039665222 2023-01-22 11:47:16.220636: step: 484/466, loss: 0.12710697948932648 2023-01-22 11:47:16.988899: step: 486/466, loss: 0.34370070695877075 2023-01-22 11:47:17.634169: step: 488/466, loss: 0.3669477105140686 2023-01-22 11:47:18.278517: step: 490/466, loss: 0.08090394735336304 2023-01-22 11:47:18.947666: step: 492/466, loss: 0.1269034892320633 2023-01-22 11:47:19.620967: step: 494/466, loss: 1.480870246887207 2023-01-22 11:47:20.328920: step: 496/466, loss: 0.15957190096378326 2023-01-22 11:47:21.013228: step: 498/466, loss: 0.03893940895795822 2023-01-22 11:47:21.699349: step: 500/466, loss: 0.027543164789676666 2023-01-22 11:47:22.385449: step: 502/466, loss: 0.11042570322751999 2023-01-22 11:47:23.010249: step: 504/466, loss: 0.054097600281238556 2023-01-22 11:47:23.654414: step: 506/466, loss: 0.09026054292917252 2023-01-22 11:47:24.379537: step: 508/466, loss: 0.070386603474617 2023-01-22 11:47:25.045598: step: 510/466, loss: 0.05811745673418045 2023-01-22 11:47:25.754585: step: 512/466, loss: 0.15301252901554108 2023-01-22 11:47:26.383936: step: 514/466, loss: 0.07264979183673859 2023-01-22 11:47:27.086930: step: 516/466, loss: 0.10656112432479858 2023-01-22 11:47:27.831251: step: 518/466, loss: 0.10963854938745499 2023-01-22 11:47:28.522850: step: 520/466, loss: 0.07999063283205032 2023-01-22 11:47:29.165828: step: 522/466, loss: 0.0714545026421547 2023-01-22 11:47:29.853151: step: 524/466, loss: 0.1233542338013649 2023-01-22 11:47:30.546834: step: 526/466, loss: 0.08987359702587128 2023-01-22 11:47:31.225406: step: 528/466, loss: 0.026457447558641434 2023-01-22 11:47:31.898331: step: 530/466, loss: 0.09527673572301865 2023-01-22 11:47:32.584480: step: 532/466, loss: 0.17456305027008057 2023-01-22 11:47:33.227174: step: 534/466, loss: 0.04650742560625076 2023-01-22 11:47:33.919497: step: 536/466, loss: 0.14098072052001953 2023-01-22 11:47:34.595211: step: 538/466, loss: 0.8169363141059875 2023-01-22 11:47:35.249774: step: 540/466, loss: 0.07249260693788528 2023-01-22 11:47:35.836273: step: 542/466, loss: 0.031110601499676704 2023-01-22 11:47:36.493610: step: 544/466, loss: 0.09898725897073746 2023-01-22 11:47:37.177188: step: 546/466, loss: 0.06797879934310913 2023-01-22 11:47:37.780871: step: 548/466, loss: 0.04290494695305824 2023-01-22 11:47:38.484842: step: 550/466, loss: 0.18823488056659698 2023-01-22 11:47:39.192808: step: 552/466, loss: 0.1126476526260376 2023-01-22 11:47:40.050643: step: 554/466, loss: 0.08767277002334595 2023-01-22 11:47:40.673104: step: 556/466, loss: 0.04120711237192154 2023-01-22 11:47:41.331469: step: 558/466, loss: 0.21468394994735718 2023-01-22 11:47:42.018100: step: 560/466, loss: 0.008046263828873634 2023-01-22 11:47:42.714595: step: 562/466, loss: 0.10749727487564087 2023-01-22 11:47:43.376740: step: 564/466, loss: 0.022814007475972176 2023-01-22 11:47:44.134736: step: 566/466, loss: 0.05688408017158508 2023-01-22 11:47:44.802452: step: 568/466, loss: 0.06673478335142136 2023-01-22 11:47:45.550947: step: 570/466, loss: 0.0634184256196022 2023-01-22 11:47:46.201356: step: 572/466, loss: 0.04535965248942375 2023-01-22 11:47:46.895255: step: 574/466, loss: 0.060279201716184616 2023-01-22 11:47:47.550468: step: 576/466, loss: 0.04101169854402542 2023-01-22 11:47:48.245948: step: 578/466, loss: 0.06607304513454437 2023-01-22 11:47:48.910435: step: 580/466, loss: 0.10051118582487106 2023-01-22 11:47:49.595235: step: 582/466, loss: 0.06458373367786407 2023-01-22 11:47:50.218585: step: 584/466, loss: 0.4127489924430847 2023-01-22 11:47:50.860808: step: 586/466, loss: 0.03470811992883682 2023-01-22 11:47:51.549863: step: 588/466, loss: 0.15565644204616547 2023-01-22 11:47:52.198637: step: 590/466, loss: 0.07380617409944534 2023-01-22 11:47:52.874583: step: 592/466, loss: 0.04714164882898331 2023-01-22 11:47:53.527801: step: 594/466, loss: 0.4443550109863281 2023-01-22 11:47:54.265462: step: 596/466, loss: 0.4796164035797119 2023-01-22 11:47:55.015542: step: 598/466, loss: 0.590786337852478 2023-01-22 11:47:55.620507: step: 600/466, loss: 0.0254229623824358 2023-01-22 11:47:56.237810: step: 602/466, loss: 0.0715954527258873 2023-01-22 11:47:56.891389: step: 604/466, loss: 0.12897107005119324 2023-01-22 11:47:57.566526: step: 606/466, loss: 0.15412917733192444 2023-01-22 11:47:58.241419: step: 608/466, loss: 0.1416398286819458 2023-01-22 11:47:58.832549: step: 610/466, loss: 0.06602641195058823 2023-01-22 11:47:59.433740: step: 612/466, loss: 0.12513640522956848 2023-01-22 11:48:00.037591: step: 614/466, loss: 0.7022066712379456 2023-01-22 11:48:00.725743: step: 616/466, loss: 0.03784637153148651 2023-01-22 11:48:01.356396: step: 618/466, loss: 0.5480793118476868 2023-01-22 11:48:01.985973: step: 620/466, loss: 0.022540397942066193 2023-01-22 11:48:02.626933: step: 622/466, loss: 0.06010816618800163 2023-01-22 11:48:03.289409: step: 624/466, loss: 0.11559788137674332 2023-01-22 11:48:03.958492: step: 626/466, loss: 0.09054840356111526 2023-01-22 11:48:04.633958: step: 628/466, loss: 0.22871863842010498 2023-01-22 11:48:05.284889: step: 630/466, loss: 0.04240802302956581 2023-01-22 11:48:05.877702: step: 632/466, loss: 0.6050617694854736 2023-01-22 11:48:06.553719: step: 634/466, loss: 0.07030927389860153 2023-01-22 11:48:07.192511: step: 636/466, loss: 0.13568347692489624 2023-01-22 11:48:07.824802: step: 638/466, loss: 0.02763962559401989 2023-01-22 11:48:08.468827: step: 640/466, loss: 0.10188723355531693 2023-01-22 11:48:09.155321: step: 642/466, loss: 0.10485479235649109 2023-01-22 11:48:09.805885: step: 644/466, loss: 0.06153221055865288 2023-01-22 11:48:10.411517: step: 646/466, loss: 0.09548943489789963 2023-01-22 11:48:11.029297: step: 648/466, loss: 0.07203955948352814 2023-01-22 11:48:11.664336: step: 650/466, loss: 0.04134916141629219 2023-01-22 11:48:12.351120: step: 652/466, loss: 0.4011443257331848 2023-01-22 11:48:13.020280: step: 654/466, loss: 0.08159706741571426 2023-01-22 11:48:13.686942: step: 656/466, loss: 0.02823558636009693 2023-01-22 11:48:14.372068: step: 658/466, loss: 0.06172872334718704 2023-01-22 11:48:14.974544: step: 660/466, loss: 0.5977843403816223 2023-01-22 11:48:15.623384: step: 662/466, loss: 0.048087794333696365 2023-01-22 11:48:16.336845: step: 664/466, loss: 0.07502186298370361 2023-01-22 11:48:16.941732: step: 666/466, loss: 0.020353632047772408 2023-01-22 11:48:17.517994: step: 668/466, loss: 0.04688615724444389 2023-01-22 11:48:18.132263: step: 670/466, loss: 0.06386066973209381 2023-01-22 11:48:18.875178: step: 672/466, loss: 0.148604616522789 2023-01-22 11:48:19.567029: step: 674/466, loss: 0.1732548624277115 2023-01-22 11:48:20.253969: step: 676/466, loss: 0.0486491434276104 2023-01-22 11:48:20.916060: step: 678/466, loss: 0.09224563091993332 2023-01-22 11:48:21.565098: step: 680/466, loss: 0.060158032923936844 2023-01-22 11:48:22.235232: step: 682/466, loss: 0.02874758094549179 2023-01-22 11:48:22.975283: step: 684/466, loss: 0.049542300403118134 2023-01-22 11:48:23.640200: step: 686/466, loss: 0.04817377030849457 2023-01-22 11:48:24.300196: step: 688/466, loss: 0.07989070564508438 2023-01-22 11:48:24.946538: step: 690/466, loss: 0.0017653441755101085 2023-01-22 11:48:25.610419: step: 692/466, loss: 0.15680000185966492 2023-01-22 11:48:26.358780: step: 694/466, loss: 0.06483715027570724 2023-01-22 11:48:26.965842: step: 696/466, loss: 0.06312517821788788 2023-01-22 11:48:27.665117: step: 698/466, loss: 0.0584401860833168 2023-01-22 11:48:28.319108: step: 700/466, loss: 0.1653444766998291 2023-01-22 11:48:28.950477: step: 702/466, loss: 0.1201152354478836 2023-01-22 11:48:29.561168: step: 704/466, loss: 0.07937215268611908 2023-01-22 11:48:30.178336: step: 706/466, loss: 0.12155706435441971 2023-01-22 11:48:30.826395: step: 708/466, loss: 0.08106189966201782 2023-01-22 11:48:31.461596: step: 710/466, loss: 0.06951658427715302 2023-01-22 11:48:32.108862: step: 712/466, loss: 0.08725471794605255 2023-01-22 11:48:32.821926: step: 714/466, loss: 0.02670646458864212 2023-01-22 11:48:33.520515: step: 716/466, loss: 0.06493691354990005 2023-01-22 11:48:34.184862: step: 718/466, loss: 0.028881005942821503 2023-01-22 11:48:34.865453: step: 720/466, loss: 0.09733739495277405 2023-01-22 11:48:35.470082: step: 722/466, loss: 0.061924561858177185 2023-01-22 11:48:36.091383: step: 724/466, loss: 0.01758013479411602 2023-01-22 11:48:36.762911: step: 726/466, loss: 0.08518119156360626 2023-01-22 11:48:37.402377: step: 728/466, loss: 0.0513693243265152 2023-01-22 11:48:38.065354: step: 730/466, loss: 0.5823181867599487 2023-01-22 11:48:38.759230: step: 732/466, loss: 0.9185676574707031 2023-01-22 11:48:39.367252: step: 734/466, loss: 0.06561222672462463 2023-01-22 11:48:40.015701: step: 736/466, loss: 0.09126259386539459 2023-01-22 11:48:40.700012: step: 738/466, loss: 0.08993934094905853 2023-01-22 11:48:41.363803: step: 740/466, loss: 0.15887029469013214 2023-01-22 11:48:42.036998: step: 742/466, loss: 0.5598637461662292 2023-01-22 11:48:42.722602: step: 744/466, loss: 0.17869073152542114 2023-01-22 11:48:43.391254: step: 746/466, loss: 0.09603740274906158 2023-01-22 11:48:44.048510: step: 748/466, loss: 0.07360593974590302 2023-01-22 11:48:44.756910: step: 750/466, loss: 0.1839761584997177 2023-01-22 11:48:45.431585: step: 752/466, loss: 0.13834035396575928 2023-01-22 11:48:46.086276: step: 754/466, loss: 0.008991479873657227 2023-01-22 11:48:46.715038: step: 756/466, loss: 0.06636402010917664 2023-01-22 11:48:47.344357: step: 758/466, loss: 0.08498692512512207 2023-01-22 11:48:47.987161: step: 760/466, loss: 0.05186803638935089 2023-01-22 11:48:48.696310: step: 762/466, loss: 0.04967869818210602 2023-01-22 11:48:49.320423: step: 764/466, loss: 0.3186541795730591 2023-01-22 11:48:50.080478: step: 766/466, loss: 1.305495023727417 2023-01-22 11:48:50.807172: step: 768/466, loss: 0.054354071617126465 2023-01-22 11:48:51.490868: step: 770/466, loss: 0.07185588777065277 2023-01-22 11:48:52.151100: step: 772/466, loss: 0.05645868182182312 2023-01-22 11:48:52.830398: step: 774/466, loss: 0.1832362711429596 2023-01-22 11:48:53.644224: step: 776/466, loss: 0.1475657969713211 2023-01-22 11:48:54.320551: step: 778/466, loss: 0.8674372434616089 2023-01-22 11:48:54.914345: step: 780/466, loss: 0.08966337889432907 2023-01-22 11:48:55.601586: step: 782/466, loss: 0.008127622306346893 2023-01-22 11:48:56.240659: step: 784/466, loss: 0.06214836984872818 2023-01-22 11:48:56.900073: step: 786/466, loss: 0.1667788326740265 2023-01-22 11:48:57.547674: step: 788/466, loss: 0.08831219375133514 2023-01-22 11:48:58.141176: step: 790/466, loss: 0.0821632519364357 2023-01-22 11:48:58.850358: step: 792/466, loss: 0.08928848803043365 2023-01-22 11:48:59.494853: step: 794/466, loss: 0.08582484722137451 2023-01-22 11:49:00.139837: step: 796/466, loss: 0.014395495876669884 2023-01-22 11:49:00.866847: step: 798/466, loss: 0.03297445923089981 2023-01-22 11:49:01.540808: step: 800/466, loss: 0.011172035709023476 2023-01-22 11:49:02.193424: step: 802/466, loss: 0.016700677573680878 2023-01-22 11:49:02.845740: step: 804/466, loss: 0.1435922384262085 2023-01-22 11:49:03.534310: step: 806/466, loss: 0.5995723605155945 2023-01-22 11:49:04.191751: step: 808/466, loss: 0.04722990840673447 2023-01-22 11:49:04.819348: step: 810/466, loss: 0.4910792112350464 2023-01-22 11:49:05.513324: step: 812/466, loss: 0.0818682312965393 2023-01-22 11:49:06.212444: step: 814/466, loss: 0.026851100847125053 2023-01-22 11:49:06.786658: step: 816/466, loss: 0.08519230037927628 2023-01-22 11:49:07.424341: step: 818/466, loss: 0.636756420135498 2023-01-22 11:49:08.055983: step: 820/466, loss: 0.024506378918886185 2023-01-22 11:49:08.709171: step: 822/466, loss: 0.1749979555606842 2023-01-22 11:49:09.349183: step: 824/466, loss: 0.04244179651141167 2023-01-22 11:49:09.971927: step: 826/466, loss: 0.03763969615101814 2023-01-22 11:49:10.660817: step: 828/466, loss: 0.017545204609632492 2023-01-22 11:49:11.350698: step: 830/466, loss: 0.15983814001083374 2023-01-22 11:49:12.008549: step: 832/466, loss: 0.21922515332698822 2023-01-22 11:49:12.619461: step: 834/466, loss: 0.10022667050361633 2023-01-22 11:49:13.227805: step: 836/466, loss: 0.03854656592011452 2023-01-22 11:49:13.879842: step: 838/466, loss: 0.039883919060230255 2023-01-22 11:49:14.512031: step: 840/466, loss: 0.18616662919521332 2023-01-22 11:49:15.184704: step: 842/466, loss: 0.09625918418169022 2023-01-22 11:49:15.907508: step: 844/466, loss: 0.020380394533276558 2023-01-22 11:49:16.559278: step: 846/466, loss: 0.13634014129638672 2023-01-22 11:49:17.225447: step: 848/466, loss: 0.056695401668548584 2023-01-22 11:49:17.804808: step: 850/466, loss: 0.023742130026221275 2023-01-22 11:49:18.461557: step: 852/466, loss: 0.3502342402935028 2023-01-22 11:49:19.197917: step: 854/466, loss: 0.053310394287109375 2023-01-22 11:49:19.837522: step: 856/466, loss: 0.13849897682666779 2023-01-22 11:49:20.505869: step: 858/466, loss: 0.049945440143346786 2023-01-22 11:49:21.163720: step: 860/466, loss: 0.07457361370325089 2023-01-22 11:49:21.809643: step: 862/466, loss: 1.0580438375473022 2023-01-22 11:49:22.444575: step: 864/466, loss: 1.2639542818069458 2023-01-22 11:49:23.076788: step: 866/466, loss: 0.008224474266171455 2023-01-22 11:49:23.691584: step: 868/466, loss: 1.7443006038665771 2023-01-22 11:49:24.343671: step: 870/466, loss: 0.2736111581325531 2023-01-22 11:49:25.029363: step: 872/466, loss: 0.06253460794687271 2023-01-22 11:49:25.705454: step: 874/466, loss: 0.26736292243003845 2023-01-22 11:49:26.392880: step: 876/466, loss: 0.19355060160160065 2023-01-22 11:49:27.044356: step: 878/466, loss: 0.2999248504638672 2023-01-22 11:49:27.639188: step: 880/466, loss: 0.06770453602075577 2023-01-22 11:49:28.301666: step: 882/466, loss: 0.046020377427339554 2023-01-22 11:49:29.004646: step: 884/466, loss: 0.09162288904190063 2023-01-22 11:49:29.672464: step: 886/466, loss: 0.07737147808074951 2023-01-22 11:49:30.340209: step: 888/466, loss: 0.7829618453979492 2023-01-22 11:49:30.993016: step: 890/466, loss: 0.032086726278066635 2023-01-22 11:49:31.624336: step: 892/466, loss: 0.05165988206863403 2023-01-22 11:49:32.334391: step: 894/466, loss: 0.1862795352935791 2023-01-22 11:49:33.054323: step: 896/466, loss: 0.11243996024131775 2023-01-22 11:49:33.684214: step: 898/466, loss: 0.39769259095191956 2023-01-22 11:49:34.429336: step: 900/466, loss: 0.025845997035503387 2023-01-22 11:49:35.077321: step: 902/466, loss: 0.024150336161255836 2023-01-22 11:49:35.793356: step: 904/466, loss: 0.2105114609003067 2023-01-22 11:49:36.465716: step: 906/466, loss: 0.27665263414382935 2023-01-22 11:49:37.144306: step: 908/466, loss: 0.03600461781024933 2023-01-22 11:49:37.813955: step: 910/466, loss: 0.09567281603813171 2023-01-22 11:49:38.485950: step: 912/466, loss: 0.2852194607257843 2023-01-22 11:49:39.095461: step: 914/466, loss: 0.2959142327308655 2023-01-22 11:49:39.736938: step: 916/466, loss: 0.04316151514649391 2023-01-22 11:49:40.474039: step: 918/466, loss: 0.14325612783432007 2023-01-22 11:49:41.162578: step: 920/466, loss: 0.10374636948108673 2023-01-22 11:49:41.792482: step: 922/466, loss: 0.18825989961624146 2023-01-22 11:49:42.438826: step: 924/466, loss: 0.6862626671791077 2023-01-22 11:49:43.144990: step: 926/466, loss: 0.13701371848583221 2023-01-22 11:49:43.795378: step: 928/466, loss: 0.023627731949090958 2023-01-22 11:49:44.440022: step: 930/466, loss: 0.04441440850496292 2023-01-22 11:49:45.104821: step: 932/466, loss: 0.13912048935890198 ================================================== Loss: 0.142 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.313068515258216, 'r': 0.33742488931056297, 'f1': 0.3247907153729072}, 'combined': 0.23931947448530003, 'epoch': 18} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3333451975797733, 'r': 0.3301732277066094, 'f1': 0.33175163079429854}, 'combined': 0.2200218069516591, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3053661616161616, 'r': 0.2862807765151515, 'f1': 0.2955156402737048}, 'combined': 0.19701042684913653, 'epoch': 18} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3436798231178743, 'r': 0.31211131250219437, 'f1': 0.32713574443917554}, 'combined': 0.2134991174234619, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29691894271664004, 'r': 0.3380481321252069, 'f1': 0.3161514918012139}, 'combined': 0.23295373080089443, 'epoch': 18} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3211611416822526, 'r': 0.3092251614814429, 'f1': 0.3150801513289559}, 'combined': 0.20896507445650958, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22076023391812863, 'r': 0.35952380952380947, 'f1': 0.2735507246376811}, 'combined': 0.18236714975845406, 'epoch': 18} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4625, 'r': 0.40217391304347827, 'f1': 0.43023255813953487}, 'combined': 0.28682170542635654, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.38333333333333336, 'r': 0.19827586206896552, 'f1': 0.2613636363636364}, 'combined': 0.17424242424242425, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2886258254463912, 'r': 0.3357260550258782, 'f1': 0.31039935262918916}, 'combined': 0.22871531246361304, 'epoch': 13} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.35413085903566327, 'r': 0.32071071736043616, 'f1': 0.336593256293779}, 'combined': 0.2232328331896565, 'epoch': 13} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39705882352941174, 'r': 0.23275862068965517, 'f1': 0.2934782608695652}, 'combined': 0.19565217391304346, 'epoch': 13} ****************************** Epoch: 19 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:52:30.029419: step: 2/466, loss: 0.17019319534301758 2023-01-22 11:52:31.389650: step: 4/466, loss: 0.09898035228252411 2023-01-22 11:52:32.018243: step: 6/466, loss: 0.06031796336174011 2023-01-22 11:52:32.664720: step: 8/466, loss: 0.04319017007946968 2023-01-22 11:52:33.384892: step: 10/466, loss: 0.08148910105228424 2023-01-22 11:52:34.061704: step: 12/466, loss: 0.03979048877954483 2023-01-22 11:52:34.698769: step: 14/466, loss: 0.07453230023384094 2023-01-22 11:52:35.310533: step: 16/466, loss: 0.06262771040201187 2023-01-22 11:52:35.952195: step: 18/466, loss: 0.4737427234649658 2023-01-22 11:52:36.571887: step: 20/466, loss: 0.0073438892140984535 2023-01-22 11:52:37.219167: step: 22/466, loss: 0.01852789707481861 2023-01-22 11:52:37.847206: step: 24/466, loss: 0.2755466401576996 2023-01-22 11:52:38.490423: step: 26/466, loss: 0.0645308718085289 2023-01-22 11:52:39.249521: step: 28/466, loss: 0.076554074883461 2023-01-22 11:52:39.932983: step: 30/466, loss: 0.12746798992156982 2023-01-22 11:52:40.615575: step: 32/466, loss: 0.11316990852355957 2023-01-22 11:52:41.297104: step: 34/466, loss: 0.0632438138127327 2023-01-22 11:52:41.900598: step: 36/466, loss: 0.047239162027835846 2023-01-22 11:52:42.560285: step: 38/466, loss: 0.03865513578057289 2023-01-22 11:52:43.251315: step: 40/466, loss: 0.009294724091887474 2023-01-22 11:52:43.937735: step: 42/466, loss: 0.03557030111551285 2023-01-22 11:52:44.620746: step: 44/466, loss: 0.06436656415462494 2023-01-22 11:52:45.337195: step: 46/466, loss: 0.11278537660837173 2023-01-22 11:52:46.017798: step: 48/466, loss: 0.03214911371469498 2023-01-22 11:52:46.725067: step: 50/466, loss: 0.03796916455030441 2023-01-22 11:52:47.431253: step: 52/466, loss: 0.12940087914466858 2023-01-22 11:52:48.169212: step: 54/466, loss: 0.05129818990826607 2023-01-22 11:52:48.858150: step: 56/466, loss: 0.1207818016409874 2023-01-22 11:52:49.448431: step: 58/466, loss: 0.03639287129044533 2023-01-22 11:52:50.064769: step: 60/466, loss: 0.04632697254419327 2023-01-22 11:52:50.704012: step: 62/466, loss: 0.007665011566132307 2023-01-22 11:52:51.326001: step: 64/466, loss: 0.022454943507909775 2023-01-22 11:52:52.051874: step: 66/466, loss: 0.030852628871798515 2023-01-22 11:52:52.867603: step: 68/466, loss: 0.04700777307152748 2023-01-22 11:52:53.579325: step: 70/466, loss: 0.1398026943206787 2023-01-22 11:52:54.193704: step: 72/466, loss: 0.06548305600881577 2023-01-22 11:52:54.840107: step: 74/466, loss: 0.038578152656555176 2023-01-22 11:52:55.547044: step: 76/466, loss: 0.016550444066524506 2023-01-22 11:52:56.207885: step: 78/466, loss: 0.1139058768749237 2023-01-22 11:52:56.847293: step: 80/466, loss: 0.03092702478170395 2023-01-22 11:52:57.484747: step: 82/466, loss: 0.2325407713651657 2023-01-22 11:52:58.101723: step: 84/466, loss: 0.034473005682229996 2023-01-22 11:52:58.782602: step: 86/466, loss: 0.22252222895622253 2023-01-22 11:52:59.415280: step: 88/466, loss: 0.09144359081983566 2023-01-22 11:53:00.011762: step: 90/466, loss: 0.05625465139746666 2023-01-22 11:53:00.663018: step: 92/466, loss: 0.01845523715019226 2023-01-22 11:53:01.336588: step: 94/466, loss: 0.010869793593883514 2023-01-22 11:53:02.001131: step: 96/466, loss: 0.05374189093708992 2023-01-22 11:53:02.790498: step: 98/466, loss: 0.1615215241909027 2023-01-22 11:53:03.446656: step: 100/466, loss: 0.13167911767959595 2023-01-22 11:53:04.109813: step: 102/466, loss: 0.03532380983233452 2023-01-22 11:53:04.715444: step: 104/466, loss: 0.05657459422945976 2023-01-22 11:53:05.416409: step: 106/466, loss: 0.011587120592594147 2023-01-22 11:53:06.013652: step: 108/466, loss: 0.00539175933226943 2023-01-22 11:53:06.616802: step: 110/466, loss: 0.01461577508598566 2023-01-22 11:53:07.298010: step: 112/466, loss: 0.04137485846877098 2023-01-22 11:53:08.000117: step: 114/466, loss: 0.10069483518600464 2023-01-22 11:53:08.671894: step: 116/466, loss: 0.03732612356543541 2023-01-22 11:53:09.328743: step: 118/466, loss: 0.06704573333263397 2023-01-22 11:53:10.087668: step: 120/466, loss: 0.17603015899658203 2023-01-22 11:53:10.735293: step: 122/466, loss: 0.00975867360830307 2023-01-22 11:53:11.431371: step: 124/466, loss: 1.9634912014007568 2023-01-22 11:53:12.063290: step: 126/466, loss: 0.033877596259117126 2023-01-22 11:53:12.692727: step: 128/466, loss: 0.04435870796442032 2023-01-22 11:53:13.416533: step: 130/466, loss: 0.06567829847335815 2023-01-22 11:53:14.097850: step: 132/466, loss: 0.06342989206314087 2023-01-22 11:53:14.849840: step: 134/466, loss: 0.10938435792922974 2023-01-22 11:53:15.466551: step: 136/466, loss: 0.023159049451351166 2023-01-22 11:53:16.167529: step: 138/466, loss: 0.028895560652017593 2023-01-22 11:53:16.882934: step: 140/466, loss: 0.07450679689645767 2023-01-22 11:53:17.517630: step: 142/466, loss: 0.09156231582164764 2023-01-22 11:53:18.148754: step: 144/466, loss: 0.048212744295597076 2023-01-22 11:53:18.769339: step: 146/466, loss: 0.07371757179498672 2023-01-22 11:53:19.402121: step: 148/466, loss: 0.04905620589852333 2023-01-22 11:53:20.059328: step: 150/466, loss: 0.10677918791770935 2023-01-22 11:53:20.759397: step: 152/466, loss: 0.009451477788388729 2023-01-22 11:53:21.399748: step: 154/466, loss: 0.014463577419519424 2023-01-22 11:53:22.064823: step: 156/466, loss: 0.0741467997431755 2023-01-22 11:53:22.762561: step: 158/466, loss: 0.08025062084197998 2023-01-22 11:53:23.451033: step: 160/466, loss: 0.05890921503305435 2023-01-22 11:53:24.134275: step: 162/466, loss: 0.05542481318116188 2023-01-22 11:53:24.843367: step: 164/466, loss: 0.019434651359915733 2023-01-22 11:53:25.406152: step: 166/466, loss: 0.858182430267334 2023-01-22 11:53:26.000383: step: 168/466, loss: 0.0657401755452156 2023-01-22 11:53:26.673361: step: 170/466, loss: 0.04019244760274887 2023-01-22 11:53:27.317863: step: 172/466, loss: 0.06402216851711273 2023-01-22 11:53:28.036367: step: 174/466, loss: 0.08312556147575378 2023-01-22 11:53:28.681154: step: 176/466, loss: 0.05441839620471001 2023-01-22 11:53:29.431002: step: 178/466, loss: 0.03143666684627533 2023-01-22 11:53:30.089931: step: 180/466, loss: 0.12197541445493698 2023-01-22 11:53:30.763180: step: 182/466, loss: 0.2967766523361206 2023-01-22 11:53:31.393182: step: 184/466, loss: 0.03947914019227028 2023-01-22 11:53:32.021077: step: 186/466, loss: 0.05565882474184036 2023-01-22 11:53:32.672522: step: 188/466, loss: 0.12150771170854568 2023-01-22 11:53:33.357119: step: 190/466, loss: 0.741322934627533 2023-01-22 11:53:34.032652: step: 192/466, loss: 0.06171559914946556 2023-01-22 11:53:34.651337: step: 194/466, loss: 0.028971601277589798 2023-01-22 11:53:35.288838: step: 196/466, loss: 0.5960246324539185 2023-01-22 11:53:35.935020: step: 198/466, loss: 0.10869164764881134 2023-01-22 11:53:36.606043: step: 200/466, loss: 0.03132522851228714 2023-01-22 11:53:37.269411: step: 202/466, loss: 0.11369634419679642 2023-01-22 11:53:37.901083: step: 204/466, loss: 0.04653134569525719 2023-01-22 11:53:38.493554: step: 206/466, loss: 0.24414774775505066 2023-01-22 11:53:39.129492: step: 208/466, loss: 0.09183460474014282 2023-01-22 11:53:39.768677: step: 210/466, loss: 0.03295493870973587 2023-01-22 11:53:40.409662: step: 212/466, loss: 0.05806458741426468 2023-01-22 11:53:41.103466: step: 214/466, loss: 0.18339110910892487 2023-01-22 11:53:41.722062: step: 216/466, loss: 0.021839501336216927 2023-01-22 11:53:42.312136: step: 218/466, loss: 0.014637252315878868 2023-01-22 11:53:42.972359: step: 220/466, loss: 0.06076514720916748 2023-01-22 11:53:43.678993: step: 222/466, loss: 0.06928149610757828 2023-01-22 11:53:44.343322: step: 224/466, loss: 0.06949815899133682 2023-01-22 11:53:45.080102: step: 226/466, loss: 0.03996966779232025 2023-01-22 11:53:45.681274: step: 228/466, loss: 0.11684077978134155 2023-01-22 11:53:46.302233: step: 230/466, loss: 0.02931184135377407 2023-01-22 11:53:46.927160: step: 232/466, loss: 0.03776870295405388 2023-01-22 11:53:47.585470: step: 234/466, loss: 1.6303871870040894 2023-01-22 11:53:48.311892: step: 236/466, loss: 0.02073729783296585 2023-01-22 11:53:48.981759: step: 238/466, loss: 0.11177118867635727 2023-01-22 11:53:49.668278: step: 240/466, loss: 0.02529243938624859 2023-01-22 11:53:50.316844: step: 242/466, loss: 0.726740837097168 2023-01-22 11:53:50.968168: step: 244/466, loss: 0.10609099268913269 2023-01-22 11:53:51.549146: step: 246/466, loss: 0.1528254896402359 2023-01-22 11:53:52.222117: step: 248/466, loss: 0.13189128041267395 2023-01-22 11:53:52.887137: step: 250/466, loss: 0.10527793318033218 2023-01-22 11:53:53.569369: step: 252/466, loss: 0.2536887228488922 2023-01-22 11:53:54.180932: step: 254/466, loss: 0.030160672962665558 2023-01-22 11:53:54.796020: step: 256/466, loss: 0.3606482148170471 2023-01-22 11:53:55.426171: step: 258/466, loss: 0.11497034132480621 2023-01-22 11:53:56.073123: step: 260/466, loss: 0.18138667941093445 2023-01-22 11:53:56.662069: step: 262/466, loss: 0.043892133980989456 2023-01-22 11:53:57.285003: step: 264/466, loss: 0.04916343465447426 2023-01-22 11:53:57.880899: step: 266/466, loss: 0.041603077203035355 2023-01-22 11:53:58.536407: step: 268/466, loss: 0.14506392180919647 2023-01-22 11:53:59.148177: step: 270/466, loss: 0.01465784665197134 2023-01-22 11:53:59.803400: step: 272/466, loss: 0.03732382133603096 2023-01-22 11:54:00.436707: step: 274/466, loss: 0.04902615398168564 2023-01-22 11:54:01.069996: step: 276/466, loss: 0.024088485166430473 2023-01-22 11:54:01.766243: step: 278/466, loss: 0.04977373778820038 2023-01-22 11:54:02.510009: step: 280/466, loss: 0.019392477348446846 2023-01-22 11:54:03.147852: step: 282/466, loss: 0.30683591961860657 2023-01-22 11:54:03.805344: step: 284/466, loss: 0.21062332391738892 2023-01-22 11:54:04.511370: step: 286/466, loss: 0.13876795768737793 2023-01-22 11:54:05.096922: step: 288/466, loss: 0.06014120206236839 2023-01-22 11:54:05.750401: step: 290/466, loss: 0.07535697519779205 2023-01-22 11:54:06.369623: step: 292/466, loss: 0.059721991419792175 2023-01-22 11:54:07.003160: step: 294/466, loss: 0.09441936016082764 2023-01-22 11:54:07.718144: step: 296/466, loss: 0.1585637778043747 2023-01-22 11:54:08.273184: step: 298/466, loss: 0.09514472633600235 2023-01-22 11:54:08.894252: step: 300/466, loss: 0.18422214686870575 2023-01-22 11:54:09.488130: step: 302/466, loss: 0.036070097237825394 2023-01-22 11:54:10.127431: step: 304/466, loss: 0.033678553998470306 2023-01-22 11:54:10.827715: step: 306/466, loss: 0.09714172780513763 2023-01-22 11:54:11.544299: step: 308/466, loss: 0.13208194077014923 2023-01-22 11:54:12.203593: step: 310/466, loss: 0.0807897076010704 2023-01-22 11:54:12.934273: step: 312/466, loss: 0.07150286436080933 2023-01-22 11:54:13.593513: step: 314/466, loss: 0.07468264549970627 2023-01-22 11:54:14.344687: step: 316/466, loss: 0.05557527393102646 2023-01-22 11:54:15.038819: step: 318/466, loss: 0.023244045674800873 2023-01-22 11:54:15.702097: step: 320/466, loss: 0.23286482691764832 2023-01-22 11:54:16.409975: step: 322/466, loss: 0.01101283635944128 2023-01-22 11:54:17.067013: step: 324/466, loss: 0.21222420036792755 2023-01-22 11:54:17.633557: step: 326/466, loss: 0.00755928223952651 2023-01-22 11:54:18.245263: step: 328/466, loss: 0.439331591129303 2023-01-22 11:54:18.888441: step: 330/466, loss: 0.06044579669833183 2023-01-22 11:54:19.536118: step: 332/466, loss: 0.07248007506132126 2023-01-22 11:54:20.188650: step: 334/466, loss: 0.1341545581817627 2023-01-22 11:54:20.874589: step: 336/466, loss: 0.06600915640592575 2023-01-22 11:54:21.523655: step: 338/466, loss: 0.09433504194021225 2023-01-22 11:54:22.184688: step: 340/466, loss: 1.2103420495986938 2023-01-22 11:54:22.951401: step: 342/466, loss: 0.09330989420413971 2023-01-22 11:54:23.622641: step: 344/466, loss: 0.10772915929555893 2023-01-22 11:54:24.289880: step: 346/466, loss: 0.08495800942182541 2023-01-22 11:54:24.968721: step: 348/466, loss: 0.07539297640323639 2023-01-22 11:54:25.574172: step: 350/466, loss: 0.028574472293257713 2023-01-22 11:54:26.257624: step: 352/466, loss: 0.15439830720424652 2023-01-22 11:54:27.011590: step: 354/466, loss: 0.055617015808820724 2023-01-22 11:54:27.678930: step: 356/466, loss: 0.2645198404788971 2023-01-22 11:54:28.276313: step: 358/466, loss: 0.033317677676677704 2023-01-22 11:54:28.855229: step: 360/466, loss: 0.025735756382346153 2023-01-22 11:54:29.547140: step: 362/466, loss: 0.05049290880560875 2023-01-22 11:54:30.208874: step: 364/466, loss: 0.0639977902173996 2023-01-22 11:54:30.856480: step: 366/466, loss: 0.04859790951013565 2023-01-22 11:54:31.468105: step: 368/466, loss: 0.07877586036920547 2023-01-22 11:54:32.155426: step: 370/466, loss: 0.030141178518533707 2023-01-22 11:54:32.824258: step: 372/466, loss: 0.24391008913516998 2023-01-22 11:54:33.435371: step: 374/466, loss: 0.025568336248397827 2023-01-22 11:54:34.069571: step: 376/466, loss: 0.13599267601966858 2023-01-22 11:54:34.736459: step: 378/466, loss: 0.0934305340051651 2023-01-22 11:54:35.365016: step: 380/466, loss: 0.030407529324293137 2023-01-22 11:54:36.068708: step: 382/466, loss: 0.05794856324791908 2023-01-22 11:54:36.724509: step: 384/466, loss: 0.052790604531764984 2023-01-22 11:54:37.341858: step: 386/466, loss: 0.13118720054626465 2023-01-22 11:54:37.954176: step: 388/466, loss: 0.1165083646774292 2023-01-22 11:54:38.679198: step: 390/466, loss: 0.10365615785121918 2023-01-22 11:54:39.354583: step: 392/466, loss: 0.2166069895029068 2023-01-22 11:54:40.071188: step: 394/466, loss: 0.031927574425935745 2023-01-22 11:54:40.780232: step: 396/466, loss: 0.12831361591815948 2023-01-22 11:54:41.425098: step: 398/466, loss: 0.06301099061965942 2023-01-22 11:54:42.035578: step: 400/466, loss: 0.6753872632980347 2023-01-22 11:54:42.722049: step: 402/466, loss: 0.061331309378147125 2023-01-22 11:54:43.366227: step: 404/466, loss: 0.02498142421245575 2023-01-22 11:54:44.060151: step: 406/466, loss: 0.059494998306035995 2023-01-22 11:54:44.719413: step: 408/466, loss: 0.08644162863492966 2023-01-22 11:54:45.353323: step: 410/466, loss: 0.031151285395026207 2023-01-22 11:54:46.002866: step: 412/466, loss: 0.0479624904692173 2023-01-22 11:54:46.703491: step: 414/466, loss: 0.005968266166746616 2023-01-22 11:54:47.277739: step: 416/466, loss: 0.057921234518289566 2023-01-22 11:54:47.932620: step: 418/466, loss: 0.08367381989955902 2023-01-22 11:54:48.634460: step: 420/466, loss: 0.01985708251595497 2023-01-22 11:54:49.330732: step: 422/466, loss: 0.47170504927635193 2023-01-22 11:54:49.992403: step: 424/466, loss: 0.6092000007629395 2023-01-22 11:54:50.680391: step: 426/466, loss: 0.03696879372000694 2023-01-22 11:54:51.356902: step: 428/466, loss: 0.024741677567362785 2023-01-22 11:54:52.038930: step: 430/466, loss: 0.02118833363056183 2023-01-22 11:54:52.771384: step: 432/466, loss: 0.40384843945503235 2023-01-22 11:54:53.369360: step: 434/466, loss: 0.04938744008541107 2023-01-22 11:54:54.071462: step: 436/466, loss: 0.06573593616485596 2023-01-22 11:54:54.774422: step: 438/466, loss: 0.05361180007457733 2023-01-22 11:54:55.461620: step: 440/466, loss: 0.20754894614219666 2023-01-22 11:54:56.092406: step: 442/466, loss: 0.04232131689786911 2023-01-22 11:54:56.760186: step: 444/466, loss: 0.0521935373544693 2023-01-22 11:54:57.422594: step: 446/466, loss: 0.04974536970257759 2023-01-22 11:54:58.012758: step: 448/466, loss: 0.3302471339702606 2023-01-22 11:54:58.670132: step: 450/466, loss: 0.01604943722486496 2023-01-22 11:54:59.351282: step: 452/466, loss: 0.09881814569234848 2023-01-22 11:55:00.069452: step: 454/466, loss: 0.07670079916715622 2023-01-22 11:55:00.705691: step: 456/466, loss: 0.06403877586126328 2023-01-22 11:55:01.349280: step: 458/466, loss: 0.04350895807147026 2023-01-22 11:55:01.994584: step: 460/466, loss: 0.06272173672914505 2023-01-22 11:55:02.708842: step: 462/466, loss: 0.030401557683944702 2023-01-22 11:55:03.375714: step: 464/466, loss: 0.47371336817741394 2023-01-22 11:55:04.032633: step: 466/466, loss: 0.07108789682388306 2023-01-22 11:55:04.767431: step: 468/466, loss: 0.07831962406635284 2023-01-22 11:55:05.422480: step: 470/466, loss: 0.195390522480011 2023-01-22 11:55:06.084804: step: 472/466, loss: 0.11402513086795807 2023-01-22 11:55:06.760761: step: 474/466, loss: 0.04407782852649689 2023-01-22 11:55:07.371677: step: 476/466, loss: 0.11348390579223633 2023-01-22 11:55:08.055255: step: 478/466, loss: 16.509958267211914 2023-01-22 11:55:08.710330: step: 480/466, loss: 0.03846314549446106 2023-01-22 11:55:09.320428: step: 482/466, loss: 0.14290757477283478 2023-01-22 11:55:09.953533: step: 484/466, loss: 0.0507693774998188 2023-01-22 11:55:10.638864: step: 486/466, loss: 0.08279737830162048 2023-01-22 11:55:11.267590: step: 488/466, loss: 0.03770788013935089 2023-01-22 11:55:11.905389: step: 490/466, loss: 0.04785335063934326 2023-01-22 11:55:12.586094: step: 492/466, loss: 0.11262853443622589 2023-01-22 11:55:13.237033: step: 494/466, loss: 0.0479293055832386 2023-01-22 11:55:13.883350: step: 496/466, loss: 0.09707148373126984 2023-01-22 11:55:14.524357: step: 498/466, loss: 0.08645107597112656 2023-01-22 11:55:15.187487: step: 500/466, loss: 0.07878920435905457 2023-01-22 11:55:15.836203: step: 502/466, loss: 0.25702032446861267 2023-01-22 11:55:16.494424: step: 504/466, loss: 0.026958482339978218 2023-01-22 11:55:17.127632: step: 506/466, loss: 0.10854596644639969 2023-01-22 11:55:17.706421: step: 508/466, loss: 0.07018983364105225 2023-01-22 11:55:18.341457: step: 510/466, loss: 0.033980175852775574 2023-01-22 11:55:18.967046: step: 512/466, loss: 0.03171353414654732 2023-01-22 11:55:19.649799: step: 514/466, loss: 0.15424910187721252 2023-01-22 11:55:20.306259: step: 516/466, loss: 0.05672196298837662 2023-01-22 11:55:20.970081: step: 518/466, loss: 0.24705109000205994 2023-01-22 11:55:21.566700: step: 520/466, loss: 0.04120907559990883 2023-01-22 11:55:22.210675: step: 522/466, loss: 0.045313362032175064 2023-01-22 11:55:22.910584: step: 524/466, loss: 0.01784730888903141 2023-01-22 11:55:23.558310: step: 526/466, loss: 0.047152016311883926 2023-01-22 11:55:24.222995: step: 528/466, loss: 0.06034964695572853 2023-01-22 11:55:24.883754: step: 530/466, loss: 0.07564128190279007 2023-01-22 11:55:25.547264: step: 532/466, loss: 0.49377718567848206 2023-01-22 11:55:26.202575: step: 534/466, loss: 0.030665753409266472 2023-01-22 11:55:26.842183: step: 536/466, loss: 0.03836410865187645 2023-01-22 11:55:27.493503: step: 538/466, loss: 0.12349491566419601 2023-01-22 11:55:28.121076: step: 540/466, loss: 0.07908549159765244 2023-01-22 11:55:28.723033: step: 542/466, loss: 0.017840398475527763 2023-01-22 11:55:29.319925: step: 544/466, loss: 0.12404599785804749 2023-01-22 11:55:29.982864: step: 546/466, loss: 0.05739966034889221 2023-01-22 11:55:30.651511: step: 548/466, loss: 0.13945209980010986 2023-01-22 11:55:31.260387: step: 550/466, loss: 0.25514689087867737 2023-01-22 11:55:31.879613: step: 552/466, loss: 0.0977272093296051 2023-01-22 11:55:32.617894: step: 554/466, loss: 0.03294859081506729 2023-01-22 11:55:33.288627: step: 556/466, loss: 0.06530392915010452 2023-01-22 11:55:33.942546: step: 558/466, loss: 0.02967679314315319 2023-01-22 11:55:34.633467: step: 560/466, loss: 0.16877122223377228 2023-01-22 11:55:35.372572: step: 562/466, loss: 0.06818056851625443 2023-01-22 11:55:36.051246: step: 564/466, loss: 0.06080375611782074 2023-01-22 11:55:36.716163: step: 566/466, loss: 0.03576469048857689 2023-01-22 11:55:37.367548: step: 568/466, loss: 0.0933336690068245 2023-01-22 11:55:38.039029: step: 570/466, loss: 0.0925021767616272 2023-01-22 11:55:38.666520: step: 572/466, loss: 0.13887295126914978 2023-01-22 11:55:39.317974: step: 574/466, loss: 0.06011636182665825 2023-01-22 11:55:39.969517: step: 576/466, loss: 0.18684709072113037 2023-01-22 11:55:40.668595: step: 578/466, loss: 0.03664421662688255 2023-01-22 11:55:41.278056: step: 580/466, loss: 0.07297038286924362 2023-01-22 11:55:41.968211: step: 582/466, loss: 0.07130319625139236 2023-01-22 11:55:42.627492: step: 584/466, loss: 0.15805700421333313 2023-01-22 11:55:43.262976: step: 586/466, loss: 0.23254035413265228 2023-01-22 11:55:43.943724: step: 588/466, loss: 0.009973529726266861 2023-01-22 11:55:44.604782: step: 590/466, loss: 0.07603029161691666 2023-01-22 11:55:45.336021: step: 592/466, loss: 0.06145777925848961 2023-01-22 11:55:45.977472: step: 594/466, loss: 0.7084916830062866 2023-01-22 11:55:46.641993: step: 596/466, loss: 0.08905018866062164 2023-01-22 11:55:47.296006: step: 598/466, loss: 0.0640096366405487 2023-01-22 11:55:48.109500: step: 600/466, loss: 0.017966795712709427 2023-01-22 11:55:48.737966: step: 602/466, loss: 0.07718763500452042 2023-01-22 11:55:49.401675: step: 604/466, loss: 0.09043219685554504 2023-01-22 11:55:50.037927: step: 606/466, loss: 0.04277624562382698 2023-01-22 11:55:50.686816: step: 608/466, loss: 0.09092157334089279 2023-01-22 11:55:51.370982: step: 610/466, loss: 0.04313157871365547 2023-01-22 11:55:52.026417: step: 612/466, loss: 0.02429984137415886 2023-01-22 11:55:52.800734: step: 614/466, loss: 0.3619789779186249 2023-01-22 11:55:53.470486: step: 616/466, loss: 0.025472547858953476 2023-01-22 11:55:54.126244: step: 618/466, loss: 0.04127861186861992 2023-01-22 11:55:54.808733: step: 620/466, loss: 0.05021412670612335 2023-01-22 11:55:55.534140: step: 622/466, loss: 0.2635730504989624 2023-01-22 11:55:56.144350: step: 624/466, loss: 0.09003724902868271 2023-01-22 11:55:56.858447: step: 626/466, loss: 0.29988300800323486 2023-01-22 11:55:57.531969: step: 628/466, loss: 0.040291618555784225 2023-01-22 11:55:58.156450: step: 630/466, loss: 0.06659505516290665 2023-01-22 11:55:58.802670: step: 632/466, loss: 0.11002293229103088 2023-01-22 11:55:59.420082: step: 634/466, loss: 0.03860000893473625 2023-01-22 11:56:00.067392: step: 636/466, loss: 0.11201709508895874 2023-01-22 11:56:00.740482: step: 638/466, loss: 0.09177178889513016 2023-01-22 11:56:01.429472: step: 640/466, loss: 0.12213198840618134 2023-01-22 11:56:02.153953: step: 642/466, loss: 0.5728474855422974 2023-01-22 11:56:02.893968: step: 644/466, loss: 0.04759373143315315 2023-01-22 11:56:03.585864: step: 646/466, loss: 0.0527830645442009 2023-01-22 11:56:04.291121: step: 648/466, loss: 0.06714562326669693 2023-01-22 11:56:05.032725: step: 650/466, loss: 0.15858644247055054 2023-01-22 11:56:05.697162: step: 652/466, loss: 0.07921741157770157 2023-01-22 11:56:06.318546: step: 654/466, loss: 0.04882184788584709 2023-01-22 11:56:06.982640: step: 656/466, loss: 0.044943083077669144 2023-01-22 11:56:07.665122: step: 658/466, loss: 0.11141613125801086 2023-01-22 11:56:08.260225: step: 660/466, loss: 0.09633949398994446 2023-01-22 11:56:08.959397: step: 662/466, loss: 0.026888061314821243 2023-01-22 11:56:09.604865: step: 664/466, loss: 0.013612605631351471 2023-01-22 11:56:10.256626: step: 666/466, loss: 0.09860095381736755 2023-01-22 11:56:10.904822: step: 668/466, loss: 0.05068560689687729 2023-01-22 11:56:11.500347: step: 670/466, loss: 0.07181273400783539 2023-01-22 11:56:12.129721: step: 672/466, loss: 0.14695997536182404 2023-01-22 11:56:12.758406: step: 674/466, loss: 0.014467950910329819 2023-01-22 11:56:13.545383: step: 676/466, loss: 0.06014389172196388 2023-01-22 11:56:14.227985: step: 678/466, loss: 0.1156170442700386 2023-01-22 11:56:14.917996: step: 680/466, loss: 0.024952847510576248 2023-01-22 11:56:15.570130: step: 682/466, loss: 0.2912246286869049 2023-01-22 11:56:16.203012: step: 684/466, loss: 0.04628412798047066 2023-01-22 11:56:16.834096: step: 686/466, loss: 0.03355022892355919 2023-01-22 11:56:17.457478: step: 688/466, loss: 0.07397707551717758 2023-01-22 11:56:18.116699: step: 690/466, loss: 0.05694981664419174 2023-01-22 11:56:18.804832: step: 692/466, loss: 0.12282607704401016 2023-01-22 11:56:19.490757: step: 694/466, loss: 0.0603046678006649 2023-01-22 11:56:20.195177: step: 696/466, loss: 0.02642957493662834 2023-01-22 11:56:20.772969: step: 698/466, loss: 0.21970698237419128 2023-01-22 11:56:21.412016: step: 700/466, loss: 0.11033938080072403 2023-01-22 11:56:22.054234: step: 702/466, loss: 0.034184087067842484 2023-01-22 11:56:22.685724: step: 704/466, loss: 0.01633790135383606 2023-01-22 11:56:23.245989: step: 706/466, loss: 0.3997347056865692 2023-01-22 11:56:23.938372: step: 708/466, loss: 0.10520128160715103 2023-01-22 11:56:24.568767: step: 710/466, loss: 0.11116138100624084 2023-01-22 11:56:25.223563: step: 712/466, loss: 0.15260502696037292 2023-01-22 11:56:25.918421: step: 714/466, loss: 0.04590679332613945 2023-01-22 11:56:26.624636: step: 716/466, loss: 0.02621178887784481 2023-01-22 11:56:27.312327: step: 718/466, loss: 0.038498785346746445 2023-01-22 11:56:27.929648: step: 720/466, loss: 0.015885451808571815 2023-01-22 11:56:28.564723: step: 722/466, loss: 0.057695887982845306 2023-01-22 11:56:29.207928: step: 724/466, loss: 0.04524630308151245 2023-01-22 11:56:29.799399: step: 726/466, loss: 0.00518582109361887 2023-01-22 11:56:30.441428: step: 728/466, loss: 0.035789694637060165 2023-01-22 11:56:31.107081: step: 730/466, loss: 0.11010007560253143 2023-01-22 11:56:31.956938: step: 732/466, loss: 0.06833881139755249 2023-01-22 11:56:32.648336: step: 734/466, loss: 0.07563214004039764 2023-01-22 11:56:33.320382: step: 736/466, loss: 0.04967145249247551 2023-01-22 11:56:33.945270: step: 738/466, loss: 0.029912220314145088 2023-01-22 11:56:34.627430: step: 740/466, loss: 0.11592493206262589 2023-01-22 11:56:35.298325: step: 742/466, loss: 0.03424540534615517 2023-01-22 11:56:35.958141: step: 744/466, loss: 0.016852907836437225 2023-01-22 11:56:36.621633: step: 746/466, loss: 0.033034175634384155 2023-01-22 11:56:37.378665: step: 748/466, loss: 0.12496069073677063 2023-01-22 11:56:38.032610: step: 750/466, loss: 0.14536382257938385 2023-01-22 11:56:38.692516: step: 752/466, loss: 1.701302170753479 2023-01-22 11:56:39.281133: step: 754/466, loss: 0.8775402903556824 2023-01-22 11:56:39.969204: step: 756/466, loss: 0.030453836545348167 2023-01-22 11:56:40.712635: step: 758/466, loss: 0.0717419981956482 2023-01-22 11:56:41.319912: step: 760/466, loss: 0.04080541059374809 2023-01-22 11:56:41.993465: step: 762/466, loss: 0.06974674761295319 2023-01-22 11:56:42.607299: step: 764/466, loss: 0.04770641773939133 2023-01-22 11:56:43.267280: step: 766/466, loss: 0.0678180605173111 2023-01-22 11:56:43.926663: step: 768/466, loss: 0.06281007081270218 2023-01-22 11:56:44.620935: step: 770/466, loss: 0.2280104011297226 2023-01-22 11:56:45.270634: step: 772/466, loss: 0.02376980520784855 2023-01-22 11:56:45.928347: step: 774/466, loss: 0.06682606041431427 2023-01-22 11:56:46.558347: step: 776/466, loss: 0.09349855035543442 2023-01-22 11:56:47.200471: step: 778/466, loss: 0.08626078814268112 2023-01-22 11:56:47.824703: step: 780/466, loss: 0.12150692194700241 2023-01-22 11:56:48.434622: step: 782/466, loss: 0.13246864080429077 2023-01-22 11:56:49.116008: step: 784/466, loss: 0.04481721296906471 2023-01-22 11:56:49.751997: step: 786/466, loss: 0.027929870411753654 2023-01-22 11:56:50.465481: step: 788/466, loss: 0.21095824241638184 2023-01-22 11:56:51.111614: step: 790/466, loss: 0.029106542468070984 2023-01-22 11:56:51.822070: step: 792/466, loss: 0.10976512730121613 2023-01-22 11:56:52.504301: step: 794/466, loss: 0.08589852601289749 2023-01-22 11:56:53.206591: step: 796/466, loss: 0.06154797598719597 2023-01-22 11:56:53.873131: step: 798/466, loss: 2.7724342346191406 2023-01-22 11:56:54.583412: step: 800/466, loss: 0.07345017045736313 2023-01-22 11:56:55.207212: step: 802/466, loss: 0.0817703828215599 2023-01-22 11:56:55.783347: step: 804/466, loss: 0.22079487144947052 2023-01-22 11:56:56.572482: step: 806/466, loss: 0.04317941144108772 2023-01-22 11:56:57.235531: step: 808/466, loss: 0.06339377909898758 2023-01-22 11:56:57.839711: step: 810/466, loss: 0.010256998240947723 2023-01-22 11:56:58.520679: step: 812/466, loss: 0.1356644630432129 2023-01-22 11:56:59.256182: step: 814/466, loss: 0.08809737861156464 2023-01-22 11:56:59.902142: step: 816/466, loss: 0.2747352421283722 2023-01-22 11:57:00.534884: step: 818/466, loss: 0.032459042966365814 2023-01-22 11:57:01.178273: step: 820/466, loss: 0.04946858808398247 2023-01-22 11:57:01.880148: step: 822/466, loss: 0.38934946060180664 2023-01-22 11:57:02.667701: step: 824/466, loss: 0.06598320603370667 2023-01-22 11:57:03.314143: step: 826/466, loss: 0.014138715341687202 2023-01-22 11:57:04.093588: step: 828/466, loss: 0.19181425869464874 2023-01-22 11:57:04.728686: step: 830/466, loss: 0.02072925865650177 2023-01-22 11:57:05.405329: step: 832/466, loss: 0.13066735863685608 2023-01-22 11:57:06.029668: step: 834/466, loss: 0.2115868330001831 2023-01-22 11:57:06.686120: step: 836/466, loss: 0.03603954613208771 2023-01-22 11:57:07.303430: step: 838/466, loss: 0.07363086193799973 2023-01-22 11:57:07.945734: step: 840/466, loss: 0.023128263652324677 2023-01-22 11:57:08.692770: step: 842/466, loss: 0.042165763676166534 2023-01-22 11:57:09.364449: step: 844/466, loss: 0.08984938263893127 2023-01-22 11:57:10.021301: step: 846/466, loss: 0.07657621055841446 2023-01-22 11:57:10.648448: step: 848/466, loss: 0.020295914262533188 2023-01-22 11:57:11.298233: step: 850/466, loss: 0.08770626783370972 2023-01-22 11:57:11.969072: step: 852/466, loss: 0.04018435627222061 2023-01-22 11:57:12.622688: step: 854/466, loss: 0.08496487140655518 2023-01-22 11:57:13.262017: step: 856/466, loss: 0.02933735027909279 2023-01-22 11:57:13.938113: step: 858/466, loss: 0.09103970974683762 2023-01-22 11:57:14.633402: step: 860/466, loss: 0.048141077160835266 2023-01-22 11:57:15.281941: step: 862/466, loss: 0.5548475980758667 2023-01-22 11:57:15.956937: step: 864/466, loss: 0.17042019963264465 2023-01-22 11:57:16.612038: step: 866/466, loss: 0.17668190598487854 2023-01-22 11:57:17.344264: step: 868/466, loss: 0.13648608326911926 2023-01-22 11:57:17.954683: step: 870/466, loss: 0.028354184702038765 2023-01-22 11:57:18.639705: step: 872/466, loss: 0.08296729624271393 2023-01-22 11:57:19.296001: step: 874/466, loss: 0.03876206651329994 2023-01-22 11:57:19.933635: step: 876/466, loss: 0.0596102811396122 2023-01-22 11:57:20.593553: step: 878/466, loss: 0.06662774085998535 2023-01-22 11:57:21.260135: step: 880/466, loss: 0.10815246403217316 2023-01-22 11:57:21.909612: step: 882/466, loss: 0.07196032255887985 2023-01-22 11:57:22.649515: step: 884/466, loss: 0.40313446521759033 2023-01-22 11:57:23.344099: step: 886/466, loss: 0.07323966175317764 2023-01-22 11:57:23.999190: step: 888/466, loss: 0.06867456436157227 2023-01-22 11:57:24.685400: step: 890/466, loss: 0.0214415080845356 2023-01-22 11:57:25.328146: step: 892/466, loss: 0.06691908836364746 2023-01-22 11:57:26.061969: step: 894/466, loss: 0.02709440514445305 2023-01-22 11:57:26.706421: step: 896/466, loss: 0.02463386207818985 2023-01-22 11:57:27.342990: step: 898/466, loss: 0.08717241138219833 2023-01-22 11:57:28.029233: step: 900/466, loss: 0.08618682622909546 2023-01-22 11:57:28.689158: step: 902/466, loss: 0.06320453435182571 2023-01-22 11:57:29.330553: step: 904/466, loss: 0.0032288103830069304 2023-01-22 11:57:29.959196: step: 906/466, loss: 0.04616895318031311 2023-01-22 11:57:30.663819: step: 908/466, loss: 0.08718512952327728 2023-01-22 11:57:31.260751: step: 910/466, loss: 7.17824125289917 2023-01-22 11:57:31.835149: step: 912/466, loss: 0.016759289428591728 2023-01-22 11:57:32.470669: step: 914/466, loss: 0.029550690203905106 2023-01-22 11:57:33.069583: step: 916/466, loss: 0.03661734610795975 2023-01-22 11:57:33.755575: step: 918/466, loss: 0.11967132240533829 2023-01-22 11:57:34.345965: step: 920/466, loss: 0.03834659978747368 2023-01-22 11:57:35.028810: step: 922/466, loss: 0.06053052097558975 2023-01-22 11:57:35.699337: step: 924/466, loss: 0.06139500066637993 2023-01-22 11:57:36.335685: step: 926/466, loss: 0.05111083388328552 2023-01-22 11:57:37.012710: step: 928/466, loss: 0.2747494578361511 2023-01-22 11:57:37.660231: step: 930/466, loss: 0.051467474550008774 2023-01-22 11:57:38.309336: step: 932/466, loss: 0.041985541582107544 ================================================== Loss: 0.170 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2992154947916667, 'r': 0.3633736559139786, 'f1': 0.32818837475007145}, 'combined': 0.24182301297373685, 'epoch': 19} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3576100433256908, 'r': 0.3287655211840696, 'f1': 0.3425816953684882}, 'combined': 0.22720444045163982, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29039265799256503, 'r': 0.29589251893939394, 'f1': 0.2931167917448405}, 'combined': 0.19541119449656033, 'epoch': 19} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.368158333198803, 'r': 0.30722398169016224, 'f1': 0.3349423475393927}, 'combined': 0.21859395313097207, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2775741007358654, 'r': 0.3492061267322178, 'f1': 0.30929685510567867}, 'combined': 0.22790294586734217, 'epoch': 19} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.341322536583974, 'r': 0.3091668265687163, 'f1': 0.3244499032562402}, 'combined': 0.2151792104497344, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21695402298850572, 'r': 0.35952380952380947, 'f1': 0.2706093189964157}, 'combined': 0.18040621266427714, 'epoch': 19} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4473684210526316, 'r': 0.3695652173913043, 'f1': 0.40476190476190477}, 'combined': 0.2698412698412698, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.296875, 'r': 0.16379310344827586, 'f1': 0.21111111111111108}, 'combined': 0.14074074074074072, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2886258254463912, 'r': 0.3357260550258782, 'f1': 0.31039935262918916}, 'combined': 0.22871531246361304, 'epoch': 13} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.35413085903566327, 'r': 0.32071071736043616, 'f1': 0.336593256293779}, 'combined': 0.2232328331896565, 'epoch': 13} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39705882352941174, 'r': 0.23275862068965517, 'f1': 0.2934782608695652}, 'combined': 0.19565217391304346, 'epoch': 13} ****************************** Epoch: 20 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:00:22.156168: step: 2/466, loss: 0.02529050223529339 2023-01-22 12:00:22.842922: step: 4/466, loss: 0.1284036636352539 2023-01-22 12:00:23.486605: step: 6/466, loss: 0.8496418595314026 2023-01-22 12:00:24.126027: step: 8/466, loss: 0.25180578231811523 2023-01-22 12:00:24.829442: step: 10/466, loss: 0.040862686932086945 2023-01-22 12:00:25.509931: step: 12/466, loss: 0.0089346868917346 2023-01-22 12:00:26.143680: step: 14/466, loss: 0.07790698856115341 2023-01-22 12:00:26.827475: step: 16/466, loss: 0.02893769182264805 2023-01-22 12:00:27.508346: step: 18/466, loss: 0.032509054988622665 2023-01-22 12:00:28.117040: step: 20/466, loss: 0.0163698922842741 2023-01-22 12:00:28.727467: step: 22/466, loss: 0.06177888438105583 2023-01-22 12:00:29.363547: step: 24/466, loss: 0.005556738469749689 2023-01-22 12:00:30.085948: step: 26/466, loss: 0.028866780921816826 2023-01-22 12:00:30.806057: step: 28/466, loss: 0.022818591445684433 2023-01-22 12:00:31.457479: step: 30/466, loss: 0.014123926870524883 2023-01-22 12:00:32.128134: step: 32/466, loss: 0.034859590232372284 2023-01-22 12:00:32.768690: step: 34/466, loss: 0.06828051805496216 2023-01-22 12:00:33.465004: step: 36/466, loss: 0.20173564553260803 2023-01-22 12:00:34.177465: step: 38/466, loss: 0.03694624453783035 2023-01-22 12:00:34.798876: step: 40/466, loss: 0.016344420611858368 2023-01-22 12:00:35.487506: step: 42/466, loss: 0.07911890000104904 2023-01-22 12:00:36.078281: step: 44/466, loss: 0.05435716360807419 2023-01-22 12:00:36.716819: step: 46/466, loss: 0.14925718307495117 2023-01-22 12:00:37.381969: step: 48/466, loss: 0.06081797555088997 2023-01-22 12:00:38.042525: step: 50/466, loss: 0.014909287914633751 2023-01-22 12:00:38.724896: step: 52/466, loss: 0.14070676267147064 2023-01-22 12:00:39.371086: step: 54/466, loss: 0.10343374311923981 2023-01-22 12:00:40.037649: step: 56/466, loss: 0.07278351485729218 2023-01-22 12:00:40.686871: step: 58/466, loss: 0.05475543811917305 2023-01-22 12:00:41.351858: step: 60/466, loss: 0.09487111866474152 2023-01-22 12:00:41.946436: step: 62/466, loss: 0.04730089753866196 2023-01-22 12:00:42.646073: step: 64/466, loss: 0.08981947600841522 2023-01-22 12:00:43.293075: step: 66/466, loss: 0.07103098928928375 2023-01-22 12:00:43.987393: step: 68/466, loss: 0.7478634119033813 2023-01-22 12:00:44.706756: step: 70/466, loss: 0.06133272126317024 2023-01-22 12:00:45.379333: step: 72/466, loss: 0.022983666509389877 2023-01-22 12:00:46.055832: step: 74/466, loss: 0.18768425285816193 2023-01-22 12:00:46.710508: step: 76/466, loss: 0.01486103143543005 2023-01-22 12:00:47.385230: step: 78/466, loss: 0.07413389533758163 2023-01-22 12:00:48.057893: step: 80/466, loss: 0.027376830577850342 2023-01-22 12:00:48.758687: step: 82/466, loss: 0.027573490515351295 2023-01-22 12:00:49.389062: step: 84/466, loss: 0.05019203945994377 2023-01-22 12:00:50.029713: step: 86/466, loss: 0.33560293912887573 2023-01-22 12:00:50.706855: step: 88/466, loss: 0.08343034237623215 2023-01-22 12:00:51.344338: step: 90/466, loss: 0.16450229287147522 2023-01-22 12:00:52.021177: step: 92/466, loss: 0.07126929610967636 2023-01-22 12:00:52.679731: step: 94/466, loss: 0.07928193360567093 2023-01-22 12:00:53.359326: step: 96/466, loss: 0.32826054096221924 2023-01-22 12:00:54.013208: step: 98/466, loss: 0.18427452445030212 2023-01-22 12:00:54.686980: step: 100/466, loss: 0.0352289155125618 2023-01-22 12:00:55.362776: step: 102/466, loss: 0.17166413366794586 2023-01-22 12:00:55.993691: step: 104/466, loss: 0.07920347154140472 2023-01-22 12:00:56.738809: step: 106/466, loss: 0.01411384716629982 2023-01-22 12:00:57.378189: step: 108/466, loss: 0.1047717034816742 2023-01-22 12:00:58.047704: step: 110/466, loss: 0.18175145983695984 2023-01-22 12:00:58.705393: step: 112/466, loss: 1.1283595561981201 2023-01-22 12:00:59.336672: step: 114/466, loss: 0.101852186024189 2023-01-22 12:01:00.027078: step: 116/466, loss: 0.03352248668670654 2023-01-22 12:01:00.692154: step: 118/466, loss: 0.05666612461209297 2023-01-22 12:01:01.421274: step: 120/466, loss: 0.01730651780962944 2023-01-22 12:01:02.044838: step: 122/466, loss: 0.030800916254520416 2023-01-22 12:01:02.750202: step: 124/466, loss: 0.03704928979277611 2023-01-22 12:01:03.420714: step: 126/466, loss: 0.042389120906591415 2023-01-22 12:01:04.076876: step: 128/466, loss: 0.2540122866630554 2023-01-22 12:01:04.739230: step: 130/466, loss: 0.015463070943951607 2023-01-22 12:01:05.419018: step: 132/466, loss: 0.015473777428269386 2023-01-22 12:01:06.246856: step: 134/466, loss: 0.5191654562950134 2023-01-22 12:01:06.935796: step: 136/466, loss: 0.04981977865099907 2023-01-22 12:01:07.598133: step: 138/466, loss: 0.029307564720511436 2023-01-22 12:01:08.295075: step: 140/466, loss: 0.07894758880138397 2023-01-22 12:01:08.979803: step: 142/466, loss: 0.2544078826904297 2023-01-22 12:01:09.592924: step: 144/466, loss: 0.04332369193434715 2023-01-22 12:01:10.260279: step: 146/466, loss: 0.014201727695763111 2023-01-22 12:01:10.900698: step: 148/466, loss: 0.04584435373544693 2023-01-22 12:01:11.561459: step: 150/466, loss: 0.12334337830543518 2023-01-22 12:01:12.149649: step: 152/466, loss: 0.055985480546951294 2023-01-22 12:01:12.829882: step: 154/466, loss: 0.02131708897650242 2023-01-22 12:01:13.566661: step: 156/466, loss: 0.056722573935985565 2023-01-22 12:01:14.157260: step: 158/466, loss: 0.47319892048835754 2023-01-22 12:01:14.806701: step: 160/466, loss: 0.02541445940732956 2023-01-22 12:01:15.460095: step: 162/466, loss: 0.19760848581790924 2023-01-22 12:01:16.055312: step: 164/466, loss: 0.010579260066151619 2023-01-22 12:01:16.687118: step: 166/466, loss: 0.048830416053533554 2023-01-22 12:01:17.290397: step: 168/466, loss: 0.07627157866954803 2023-01-22 12:01:17.859210: step: 170/466, loss: 0.039207588881254196 2023-01-22 12:01:18.540508: step: 172/466, loss: 0.03126634284853935 2023-01-22 12:01:19.162403: step: 174/466, loss: 0.0609285943210125 2023-01-22 12:01:19.832054: step: 176/466, loss: 0.06033097207546234 2023-01-22 12:01:20.433142: step: 178/466, loss: 0.013422602787613869 2023-01-22 12:01:21.099251: step: 180/466, loss: 0.04581700637936592 2023-01-22 12:01:21.740222: step: 182/466, loss: 0.020022494718432426 2023-01-22 12:01:22.402203: step: 184/466, loss: 0.09475758671760559 2023-01-22 12:01:23.112104: step: 186/466, loss: 0.14202749729156494 2023-01-22 12:01:23.767633: step: 188/466, loss: 0.3445826470851898 2023-01-22 12:01:24.443859: step: 190/466, loss: 0.035508062690496445 2023-01-22 12:01:25.077415: step: 192/466, loss: 0.008882682770490646 2023-01-22 12:01:25.708294: step: 194/466, loss: 0.10349462926387787 2023-01-22 12:01:26.411199: step: 196/466, loss: 0.07579652965068817 2023-01-22 12:01:27.084233: step: 198/466, loss: 0.105499267578125 2023-01-22 12:01:27.750383: step: 200/466, loss: 0.05291704088449478 2023-01-22 12:01:28.310877: step: 202/466, loss: 0.01687799021601677 2023-01-22 12:01:28.978276: step: 204/466, loss: 0.045372042804956436 2023-01-22 12:01:29.604596: step: 206/466, loss: 0.011181673035025597 2023-01-22 12:01:30.220524: step: 208/466, loss: 0.031141316518187523 2023-01-22 12:01:30.883233: step: 210/466, loss: 0.012476320378482342 2023-01-22 12:01:31.561699: step: 212/466, loss: 0.06704815477132797 2023-01-22 12:01:32.228366: step: 214/466, loss: 1.2883732318878174 2023-01-22 12:01:32.867116: step: 216/466, loss: 0.03643377497792244 2023-01-22 12:01:33.526431: step: 218/466, loss: 0.04377938434481621 2023-01-22 12:01:34.188973: step: 220/466, loss: 0.06631084531545639 2023-01-22 12:01:34.814637: step: 222/466, loss: 0.106280118227005 2023-01-22 12:01:35.495108: step: 224/466, loss: 0.10199912637472153 2023-01-22 12:01:36.196689: step: 226/466, loss: 0.08121784776449203 2023-01-22 12:01:36.838547: step: 228/466, loss: 0.058072999119758606 2023-01-22 12:01:37.458605: step: 230/466, loss: 0.023868972435593605 2023-01-22 12:01:38.061688: step: 232/466, loss: 0.06327088177204132 2023-01-22 12:01:38.822114: step: 234/466, loss: 0.012144657783210278 2023-01-22 12:01:39.457820: step: 236/466, loss: 0.036326225847005844 2023-01-22 12:01:40.064237: step: 238/466, loss: 0.14834503829479218 2023-01-22 12:01:40.723749: step: 240/466, loss: 0.05553761124610901 2023-01-22 12:01:41.390706: step: 242/466, loss: 0.0351240448653698 2023-01-22 12:01:42.105975: step: 244/466, loss: 0.06827200949192047 2023-01-22 12:01:42.795542: step: 246/466, loss: 0.0644996166229248 2023-01-22 12:01:43.430305: step: 248/466, loss: 0.04270177707076073 2023-01-22 12:01:44.069076: step: 250/466, loss: 0.030522502958774567 2023-01-22 12:01:44.751804: step: 252/466, loss: 0.028394509106874466 2023-01-22 12:01:45.443773: step: 254/466, loss: 0.46309852600097656 2023-01-22 12:01:46.175327: step: 256/466, loss: 0.05730801448225975 2023-01-22 12:01:46.854926: step: 258/466, loss: 0.08036874979734421 2023-01-22 12:01:47.521814: step: 260/466, loss: 0.08377359062433243 2023-01-22 12:01:48.230107: step: 262/466, loss: 0.14369578659534454 2023-01-22 12:01:48.836613: step: 264/466, loss: 0.1626148521900177 2023-01-22 12:01:49.463968: step: 266/466, loss: 0.07974758744239807 2023-01-22 12:01:50.191625: step: 268/466, loss: 0.018431322649121284 2023-01-22 12:01:50.827900: step: 270/466, loss: 0.040509093552827835 2023-01-22 12:01:51.440489: step: 272/466, loss: 0.009697719477117062 2023-01-22 12:01:52.051287: step: 274/466, loss: 0.08021612465381622 2023-01-22 12:01:52.665841: step: 276/466, loss: 0.06953954696655273 2023-01-22 12:01:53.352759: step: 278/466, loss: 0.037968482822179794 2023-01-22 12:01:53.984919: step: 280/466, loss: 0.038745686411857605 2023-01-22 12:01:54.600932: step: 282/466, loss: 0.10231854021549225 2023-01-22 12:01:55.169538: step: 284/466, loss: 0.12913811206817627 2023-01-22 12:01:55.810188: step: 286/466, loss: 0.030376749113202095 2023-01-22 12:01:56.447273: step: 288/466, loss: 0.03901521489024162 2023-01-22 12:01:57.112872: step: 290/466, loss: 0.17596976459026337 2023-01-22 12:01:57.783243: step: 292/466, loss: 0.07888085395097733 2023-01-22 12:01:58.380048: step: 294/466, loss: 0.054244693368673325 2023-01-22 12:01:59.054522: step: 296/466, loss: 0.04164930060505867 2023-01-22 12:01:59.786113: step: 298/466, loss: 0.06038874015212059 2023-01-22 12:02:00.398846: step: 300/466, loss: 0.08669485151767731 2023-01-22 12:02:01.033940: step: 302/466, loss: 0.005868879612535238 2023-01-22 12:02:01.653824: step: 304/466, loss: 0.0111148776486516 2023-01-22 12:02:02.355818: step: 306/466, loss: 0.10046995431184769 2023-01-22 12:02:03.093541: step: 308/466, loss: 0.1179303228855133 2023-01-22 12:02:03.737124: step: 310/466, loss: 0.06580962985754013 2023-01-22 12:02:04.369336: step: 312/466, loss: 0.1103833019733429 2023-01-22 12:02:04.977421: step: 314/466, loss: 0.01620732806622982 2023-01-22 12:02:05.708318: step: 316/466, loss: 0.1872664988040924 2023-01-22 12:02:06.286001: step: 318/466, loss: 0.017346568405628204 2023-01-22 12:02:06.872862: step: 320/466, loss: 0.09177451580762863 2023-01-22 12:02:07.506395: step: 322/466, loss: 0.02192498929798603 2023-01-22 12:02:08.131790: step: 324/466, loss: 0.052837517112493515 2023-01-22 12:02:08.768550: step: 326/466, loss: 0.12525829672813416 2023-01-22 12:02:09.379057: step: 328/466, loss: 0.15684545040130615 2023-01-22 12:02:09.995595: step: 330/466, loss: 0.07931232452392578 2023-01-22 12:02:10.632363: step: 332/466, loss: 0.07490004599094391 2023-01-22 12:02:11.321727: step: 334/466, loss: 0.3396855592727661 2023-01-22 12:02:11.970068: step: 336/466, loss: 0.05998348817229271 2023-01-22 12:02:12.630612: step: 338/466, loss: 0.03129202499985695 2023-01-22 12:02:13.302125: step: 340/466, loss: 0.022101087495684624 2023-01-22 12:02:13.980890: step: 342/466, loss: 0.07139355689287186 2023-01-22 12:02:14.628776: step: 344/466, loss: 0.21404370665550232 2023-01-22 12:02:15.309385: step: 346/466, loss: 0.16895224153995514 2023-01-22 12:02:15.964100: step: 348/466, loss: 0.08540564775466919 2023-01-22 12:02:16.635263: step: 350/466, loss: 0.08686162531375885 2023-01-22 12:02:17.326005: step: 352/466, loss: 0.13402172923088074 2023-01-22 12:02:18.010645: step: 354/466, loss: 0.031210817396640778 2023-01-22 12:02:18.769173: step: 356/466, loss: 0.03649180755019188 2023-01-22 12:02:19.463467: step: 358/466, loss: 0.030150389298796654 2023-01-22 12:02:20.164811: step: 360/466, loss: 0.060796499252319336 2023-01-22 12:02:20.892035: step: 362/466, loss: 0.040671683847904205 2023-01-22 12:02:21.569023: step: 364/466, loss: 0.16725201904773712 2023-01-22 12:02:22.245546: step: 366/466, loss: 0.15237775444984436 2023-01-22 12:02:22.932616: step: 368/466, loss: 0.04570477083325386 2023-01-22 12:02:23.596957: step: 370/466, loss: 0.18501229584217072 2023-01-22 12:02:24.233949: step: 372/466, loss: 0.018478266894817352 2023-01-22 12:02:24.890282: step: 374/466, loss: 0.450448215007782 2023-01-22 12:02:25.516523: step: 376/466, loss: 0.41951102018356323 2023-01-22 12:02:26.194769: step: 378/466, loss: 0.039621587842702866 2023-01-22 12:02:26.791667: step: 380/466, loss: 0.026672614738345146 2023-01-22 12:02:27.452386: step: 382/466, loss: 0.06711740791797638 2023-01-22 12:02:28.248147: step: 384/466, loss: 0.10505777597427368 2023-01-22 12:02:28.901217: step: 386/466, loss: 0.06020501255989075 2023-01-22 12:02:29.569082: step: 388/466, loss: 0.09692052751779556 2023-01-22 12:02:30.200695: step: 390/466, loss: 1.6137539148330688 2023-01-22 12:02:30.831465: step: 392/466, loss: 0.03121868334710598 2023-01-22 12:02:31.487929: step: 394/466, loss: 0.02551921457052231 2023-01-22 12:02:32.111801: step: 396/466, loss: 0.03320877254009247 2023-01-22 12:02:32.723477: step: 398/466, loss: 0.04377833381295204 2023-01-22 12:02:33.409131: step: 400/466, loss: 0.0744377076625824 2023-01-22 12:02:34.093622: step: 402/466, loss: 0.07616252452135086 2023-01-22 12:02:34.688382: step: 404/466, loss: 0.04090946167707443 2023-01-22 12:02:35.373630: step: 406/466, loss: 0.20691360533237457 2023-01-22 12:02:36.012889: step: 408/466, loss: 0.01815725676715374 2023-01-22 12:02:36.726492: step: 410/466, loss: 0.05874943360686302 2023-01-22 12:02:37.440181: step: 412/466, loss: 0.0488753616809845 2023-01-22 12:02:38.158689: step: 414/466, loss: 0.03152293711900711 2023-01-22 12:02:38.883499: step: 416/466, loss: 0.02398141287267208 2023-01-22 12:02:39.586175: step: 418/466, loss: 0.019400104880332947 2023-01-22 12:02:40.238448: step: 420/466, loss: 0.04799790680408478 2023-01-22 12:02:40.867769: step: 422/466, loss: 0.047729700803756714 2023-01-22 12:02:41.526787: step: 424/466, loss: 0.07025714963674545 2023-01-22 12:02:42.196616: step: 426/466, loss: 0.2905091345310211 2023-01-22 12:02:42.905705: step: 428/466, loss: 0.030055930837988853 2023-01-22 12:02:43.590795: step: 430/466, loss: 0.09775189310312271 2023-01-22 12:02:44.222215: step: 432/466, loss: 0.06666316837072372 2023-01-22 12:02:44.846369: step: 434/466, loss: 0.024716192856431007 2023-01-22 12:02:45.470364: step: 436/466, loss: 0.9140031337738037 2023-01-22 12:02:46.159466: step: 438/466, loss: 0.1764558106660843 2023-01-22 12:02:46.775206: step: 440/466, loss: 0.026693593710660934 2023-01-22 12:02:47.448961: step: 442/466, loss: 0.044917576014995575 2023-01-22 12:02:48.071906: step: 444/466, loss: 0.05101257562637329 2023-01-22 12:02:48.776830: step: 446/466, loss: 0.07329098135232925 2023-01-22 12:02:49.456454: step: 448/466, loss: 0.12456972151994705 2023-01-22 12:02:50.054379: step: 450/466, loss: 0.046114739030599594 2023-01-22 12:02:50.717734: step: 452/466, loss: 0.12324994802474976 2023-01-22 12:02:51.337999: step: 454/466, loss: 0.03191633149981499 2023-01-22 12:02:51.968840: step: 456/466, loss: 0.08683544397354126 2023-01-22 12:02:52.561666: step: 458/466, loss: 0.07412993907928467 2023-01-22 12:02:53.230560: step: 460/466, loss: 0.0910847932100296 2023-01-22 12:02:53.950775: step: 462/466, loss: 0.061169225722551346 2023-01-22 12:02:54.573922: step: 464/466, loss: 0.01881217025220394 2023-01-22 12:02:55.216839: step: 466/466, loss: 0.010416771285235882 2023-01-22 12:02:55.913984: step: 468/466, loss: 0.3848955035209656 2023-01-22 12:02:56.548579: step: 470/466, loss: 0.0624094232916832 2023-01-22 12:02:57.163377: step: 472/466, loss: 0.09354517608880997 2023-01-22 12:02:57.822144: step: 474/466, loss: 0.1539187729358673 2023-01-22 12:02:58.503044: step: 476/466, loss: 0.04534471780061722 2023-01-22 12:02:59.088285: step: 478/466, loss: 0.019748156890273094 2023-01-22 12:02:59.757305: step: 480/466, loss: 0.27361956238746643 2023-01-22 12:03:00.428039: step: 482/466, loss: 0.0669504776597023 2023-01-22 12:03:01.123406: step: 484/466, loss: 0.025343511253595352 2023-01-22 12:03:01.754777: step: 486/466, loss: 0.11701531708240509 2023-01-22 12:03:02.427028: step: 488/466, loss: 0.22605279088020325 2023-01-22 12:03:03.167983: step: 490/466, loss: 0.5436602830886841 2023-01-22 12:03:03.921604: step: 492/466, loss: 0.024394290521740913 2023-01-22 12:03:04.576696: step: 494/466, loss: 0.042096640914678574 2023-01-22 12:03:05.251960: step: 496/466, loss: 0.022823672741651535 2023-01-22 12:03:05.941265: step: 498/466, loss: 0.12584716081619263 2023-01-22 12:03:06.649962: step: 500/466, loss: 1.1072688102722168 2023-01-22 12:03:07.312374: step: 502/466, loss: 0.04686303064227104 2023-01-22 12:03:08.018401: step: 504/466, loss: 0.014092482626438141 2023-01-22 12:03:08.663074: step: 506/466, loss: 0.012901059351861477 2023-01-22 12:03:09.343565: step: 508/466, loss: 0.06189880520105362 2023-01-22 12:03:09.967310: step: 510/466, loss: 0.1094948947429657 2023-01-22 12:03:10.690160: step: 512/466, loss: 0.011595910415053368 2023-01-22 12:03:11.351099: step: 514/466, loss: 0.08042123168706894 2023-01-22 12:03:12.011380: step: 516/466, loss: 0.06183750927448273 2023-01-22 12:03:12.619560: step: 518/466, loss: 0.11537908762693405 2023-01-22 12:03:13.247745: step: 520/466, loss: 0.14726556837558746 2023-01-22 12:03:13.917875: step: 522/466, loss: 0.27260708808898926 2023-01-22 12:03:14.662850: step: 524/466, loss: 0.12424908578395844 2023-01-22 12:03:15.272101: step: 526/466, loss: 0.019538627937436104 2023-01-22 12:03:15.886412: step: 528/466, loss: 0.46317681670188904 2023-01-22 12:03:16.500736: step: 530/466, loss: 0.060504645109176636 2023-01-22 12:03:17.104797: step: 532/466, loss: 0.07724351435899734 2023-01-22 12:03:17.679754: step: 534/466, loss: 0.12051215767860413 2023-01-22 12:03:18.343078: step: 536/466, loss: 0.07808685302734375 2023-01-22 12:03:19.059044: step: 538/466, loss: 0.01689128950238228 2023-01-22 12:03:19.721775: step: 540/466, loss: 0.014444484375417233 2023-01-22 12:03:20.368934: step: 542/466, loss: 0.07736461609601974 2023-01-22 12:03:20.974136: step: 544/466, loss: 0.06107090041041374 2023-01-22 12:03:21.586907: step: 546/466, loss: 0.035207852721214294 2023-01-22 12:03:22.265515: step: 548/466, loss: 0.0847686156630516 2023-01-22 12:03:22.893021: step: 550/466, loss: 0.030109670013189316 2023-01-22 12:03:23.515947: step: 552/466, loss: 0.004358192905783653 2023-01-22 12:03:24.164467: step: 554/466, loss: 0.06824992597103119 2023-01-22 12:03:24.869473: step: 556/466, loss: 0.047165438532829285 2023-01-22 12:03:25.530712: step: 558/466, loss: 0.05250353738665581 2023-01-22 12:03:26.128099: step: 560/466, loss: 0.018119188025593758 2023-01-22 12:03:26.764380: step: 562/466, loss: 0.913058340549469 2023-01-22 12:03:27.396195: step: 564/466, loss: 0.05874692276120186 2023-01-22 12:03:28.114018: step: 566/466, loss: 0.03652816265821457 2023-01-22 12:03:28.745082: step: 568/466, loss: 0.0494910329580307 2023-01-22 12:03:29.392067: step: 570/466, loss: 0.23736171424388885 2023-01-22 12:03:30.138938: step: 572/466, loss: 0.3850165009498596 2023-01-22 12:03:30.724914: step: 574/466, loss: 0.11515842378139496 2023-01-22 12:03:31.360958: step: 576/466, loss: 0.18601633608341217 2023-01-22 12:03:32.093114: step: 578/466, loss: 0.2169901430606842 2023-01-22 12:03:32.770166: step: 580/466, loss: 0.05301076918840408 2023-01-22 12:03:33.388476: step: 582/466, loss: 0.02497091516852379 2023-01-22 12:03:34.065895: step: 584/466, loss: 0.15598836541175842 2023-01-22 12:03:34.640337: step: 586/466, loss: 0.02245033159852028 2023-01-22 12:03:35.323665: step: 588/466, loss: 0.29864266514778137 2023-01-22 12:03:35.962179: step: 590/466, loss: 0.1642669439315796 2023-01-22 12:03:36.566008: step: 592/466, loss: 0.06498217582702637 2023-01-22 12:03:37.221149: step: 594/466, loss: 0.08255688846111298 2023-01-22 12:03:37.816514: step: 596/466, loss: 0.009655744768679142 2023-01-22 12:03:38.434177: step: 598/466, loss: 0.018490519374608994 2023-01-22 12:03:39.092426: step: 600/466, loss: 0.03158906102180481 2023-01-22 12:03:39.756099: step: 602/466, loss: 0.030988484621047974 2023-01-22 12:03:40.369356: step: 604/466, loss: 0.005555752664804459 2023-01-22 12:03:41.117531: step: 606/466, loss: 0.11046244949102402 2023-01-22 12:03:41.809037: step: 608/466, loss: 0.09846675395965576 2023-01-22 12:03:42.432521: step: 610/466, loss: 0.5371130108833313 2023-01-22 12:03:43.022862: step: 612/466, loss: 0.23331308364868164 2023-01-22 12:03:43.724618: step: 614/466, loss: 0.05774373188614845 2023-01-22 12:03:44.397411: step: 616/466, loss: 0.09073471277952194 2023-01-22 12:03:44.989006: step: 618/466, loss: 0.11869775503873825 2023-01-22 12:03:45.660334: step: 620/466, loss: 0.03296220675110817 2023-01-22 12:03:46.249869: step: 622/466, loss: 0.009494036436080933 2023-01-22 12:03:46.906981: step: 624/466, loss: 0.31830641627311707 2023-01-22 12:03:47.563474: step: 626/466, loss: 0.021785251796245575 2023-01-22 12:03:48.191682: step: 628/466, loss: 0.28550639748573303 2023-01-22 12:03:48.806334: step: 630/466, loss: 0.020771397277712822 2023-01-22 12:03:49.363642: step: 632/466, loss: 0.023239050060510635 2023-01-22 12:03:50.003302: step: 634/466, loss: 0.11806279420852661 2023-01-22 12:03:50.602069: step: 636/466, loss: 0.010297476314008236 2023-01-22 12:03:51.253577: step: 638/466, loss: 0.05225253477692604 2023-01-22 12:03:51.855132: step: 640/466, loss: 0.013791006058454514 2023-01-22 12:03:52.526924: step: 642/466, loss: 0.161981463432312 2023-01-22 12:03:53.177620: step: 644/466, loss: 0.026967084035277367 2023-01-22 12:03:53.810653: step: 646/466, loss: 0.04840588569641113 2023-01-22 12:03:54.455538: step: 648/466, loss: 0.004754234105348587 2023-01-22 12:03:55.119848: step: 650/466, loss: 0.07378221303224564 2023-01-22 12:03:55.698991: step: 652/466, loss: 0.25798919796943665 2023-01-22 12:03:56.290367: step: 654/466, loss: 0.06274838000535965 2023-01-22 12:03:56.938082: step: 656/466, loss: 0.10202678292989731 2023-01-22 12:03:57.600677: step: 658/466, loss: 0.052663031965494156 2023-01-22 12:03:58.201705: step: 660/466, loss: 0.044118110090494156 2023-01-22 12:03:58.818363: step: 662/466, loss: 0.03136580064892769 2023-01-22 12:03:59.442187: step: 664/466, loss: 0.09924820810556412 2023-01-22 12:04:00.127869: step: 666/466, loss: 0.09181737154722214 2023-01-22 12:04:00.727029: step: 668/466, loss: 0.04943827539682388 2023-01-22 12:04:01.327983: step: 670/466, loss: 0.14395646750926971 2023-01-22 12:04:01.892174: step: 672/466, loss: 0.075625479221344 2023-01-22 12:04:02.565603: step: 674/466, loss: 0.030542463064193726 2023-01-22 12:04:03.214831: step: 676/466, loss: 0.05961842089891434 2023-01-22 12:04:03.911232: step: 678/466, loss: 0.201963409781456 2023-01-22 12:04:04.570608: step: 680/466, loss: 0.16936613619327545 2023-01-22 12:04:05.227489: step: 682/466, loss: 0.06179400533437729 2023-01-22 12:04:06.003211: step: 684/466, loss: 0.7320647239685059 2023-01-22 12:04:06.584565: step: 686/466, loss: 0.13582631945610046 2023-01-22 12:04:07.174330: step: 688/466, loss: 0.027148069813847542 2023-01-22 12:04:07.827105: step: 690/466, loss: 0.042925890535116196 2023-01-22 12:04:08.490238: step: 692/466, loss: 0.2105482518672943 2023-01-22 12:04:09.121658: step: 694/466, loss: 0.09240254014730453 2023-01-22 12:04:09.764262: step: 696/466, loss: 0.07900448888540268 2023-01-22 12:04:10.418157: step: 698/466, loss: 0.041338928043842316 2023-01-22 12:04:11.026087: step: 700/466, loss: 0.04051181301474571 2023-01-22 12:04:11.656469: step: 702/466, loss: 0.6643872857093811 2023-01-22 12:04:12.320188: step: 704/466, loss: 0.08506612479686737 2023-01-22 12:04:12.976483: step: 706/466, loss: 0.026076463982462883 2023-01-22 12:04:13.604245: step: 708/466, loss: 0.05837669596076012 2023-01-22 12:04:14.220922: step: 710/466, loss: 0.1373455673456192 2023-01-22 12:04:14.905637: step: 712/466, loss: 0.023741627112030983 2023-01-22 12:04:15.516731: step: 714/466, loss: 0.07516603916883469 2023-01-22 12:04:16.170649: step: 716/466, loss: 0.10175301879644394 2023-01-22 12:04:16.852879: step: 718/466, loss: 0.5919366478919983 2023-01-22 12:04:17.482851: step: 720/466, loss: 0.3820701539516449 2023-01-22 12:04:18.098931: step: 722/466, loss: 0.004326160065829754 2023-01-22 12:04:18.695661: step: 724/466, loss: 4.940408229827881 2023-01-22 12:04:19.287248: step: 726/466, loss: 0.07517941296100616 2023-01-22 12:04:19.911261: step: 728/466, loss: 0.0352647639811039 2023-01-22 12:04:20.546503: step: 730/466, loss: 0.04286986216902733 2023-01-22 12:04:21.151133: step: 732/466, loss: 0.05781130865216255 2023-01-22 12:04:21.846999: step: 734/466, loss: 0.9605228304862976 2023-01-22 12:04:22.509302: step: 736/466, loss: 0.0912160873413086 2023-01-22 12:04:23.216880: step: 738/466, loss: 0.11297646909952164 2023-01-22 12:04:23.809558: step: 740/466, loss: 0.0951039046049118 2023-01-22 12:04:24.349790: step: 742/466, loss: 0.021256210282444954 2023-01-22 12:04:24.946644: step: 744/466, loss: 0.02005433849990368 2023-01-22 12:04:25.630442: step: 746/466, loss: 0.07645412534475327 2023-01-22 12:04:26.284783: step: 748/466, loss: 0.03697463497519493 2023-01-22 12:04:26.817859: step: 750/466, loss: 0.04665801301598549 2023-01-22 12:04:27.451780: step: 752/466, loss: 0.04964252933859825 2023-01-22 12:04:28.101170: step: 754/466, loss: 0.07287842780351639 2023-01-22 12:04:28.712731: step: 756/466, loss: 0.025916436687111855 2023-01-22 12:04:29.361600: step: 758/466, loss: 0.22002416849136353 2023-01-22 12:04:30.001913: step: 760/466, loss: 0.019609622657299042 2023-01-22 12:04:30.619051: step: 762/466, loss: 0.1243818998336792 2023-01-22 12:04:31.267586: step: 764/466, loss: 0.0855434387922287 2023-01-22 12:04:31.905844: step: 766/466, loss: 0.029025837779045105 2023-01-22 12:04:32.511756: step: 768/466, loss: 0.0746544748544693 2023-01-22 12:04:33.204639: step: 770/466, loss: 0.07845261693000793 2023-01-22 12:04:33.827018: step: 772/466, loss: 0.025406787171959877 2023-01-22 12:04:34.453956: step: 774/466, loss: 0.20520764589309692 2023-01-22 12:04:35.103530: step: 776/466, loss: 0.06779426336288452 2023-01-22 12:04:35.808225: step: 778/466, loss: 0.03305256366729736 2023-01-22 12:04:36.416085: step: 780/466, loss: 0.15617133677005768 2023-01-22 12:04:37.058671: step: 782/466, loss: 0.025844769552350044 2023-01-22 12:04:37.699845: step: 784/466, loss: 0.12392221391201019 2023-01-22 12:04:38.338925: step: 786/466, loss: 0.032041821628808975 2023-01-22 12:04:39.044581: step: 788/466, loss: 0.06751325726509094 2023-01-22 12:04:39.695426: step: 790/466, loss: 0.11863002926111221 2023-01-22 12:04:40.277939: step: 792/466, loss: 0.055254314094781876 2023-01-22 12:04:40.907004: step: 794/466, loss: 0.3632022738456726 2023-01-22 12:04:41.654601: step: 796/466, loss: 0.13491855561733246 2023-01-22 12:04:42.302065: step: 798/466, loss: 0.0824522003531456 2023-01-22 12:04:42.916880: step: 800/466, loss: 0.14025896787643433 2023-01-22 12:04:43.543731: step: 802/466, loss: 0.00802691001445055 2023-01-22 12:04:44.239644: step: 804/466, loss: 0.10936498641967773 2023-01-22 12:04:44.854788: step: 806/466, loss: 0.05121378228068352 2023-01-22 12:04:45.492651: step: 808/466, loss: 0.09438611567020416 2023-01-22 12:04:46.139933: step: 810/466, loss: 0.008763416670262814 2023-01-22 12:04:46.807454: step: 812/466, loss: 0.08204493671655655 2023-01-22 12:04:47.387916: step: 814/466, loss: 0.04841059818863869 2023-01-22 12:04:48.096719: step: 816/466, loss: 0.21222209930419922 2023-01-22 12:04:48.721721: step: 818/466, loss: 0.0064653055742383 2023-01-22 12:04:49.339395: step: 820/466, loss: 0.08223865181207657 2023-01-22 12:04:49.999744: step: 822/466, loss: 0.08027202636003494 2023-01-22 12:04:50.629905: step: 824/466, loss: 0.23100262880325317 2023-01-22 12:04:51.254622: step: 826/466, loss: 0.11842045187950134 2023-01-22 12:04:51.909118: step: 828/466, loss: 0.05246425047516823 2023-01-22 12:04:52.586948: step: 830/466, loss: 0.06855402141809464 2023-01-22 12:04:53.215848: step: 832/466, loss: 0.048354990780353546 2023-01-22 12:04:53.833104: step: 834/466, loss: 0.0696854218840599 2023-01-22 12:04:54.549501: step: 836/466, loss: 0.08798477798700333 2023-01-22 12:04:55.141153: step: 838/466, loss: 0.05054626613855362 2023-01-22 12:04:55.773489: step: 840/466, loss: 0.081448994576931 2023-01-22 12:04:56.374193: step: 842/466, loss: 0.07047303020954132 2023-01-22 12:04:57.010138: step: 844/466, loss: 0.06402552872896194 2023-01-22 12:04:57.636909: step: 846/466, loss: 0.01810120977461338 2023-01-22 12:04:58.228809: step: 848/466, loss: 0.037891753017902374 2023-01-22 12:04:58.955508: step: 850/466, loss: 0.18829333782196045 2023-01-22 12:04:59.624866: step: 852/466, loss: 0.026354145258665085 2023-01-22 12:05:00.235284: step: 854/466, loss: 0.07193075120449066 2023-01-22 12:05:00.954681: step: 856/466, loss: 0.11727716773748398 2023-01-22 12:05:01.756944: step: 858/466, loss: 0.15801364183425903 2023-01-22 12:05:02.476966: step: 860/466, loss: 0.021523352712392807 2023-01-22 12:05:03.112405: step: 862/466, loss: 1.1879605054855347 2023-01-22 12:05:03.748882: step: 864/466, loss: 0.02499072439968586 2023-01-22 12:05:04.411380: step: 866/466, loss: 0.09587027877569199 2023-01-22 12:05:05.049177: step: 868/466, loss: 0.1396237313747406 2023-01-22 12:05:05.698933: step: 870/466, loss: 0.06507892161607742 2023-01-22 12:05:06.395971: step: 872/466, loss: 0.044944878667593 2023-01-22 12:05:07.011080: step: 874/466, loss: 0.03046913631260395 2023-01-22 12:05:07.636659: step: 876/466, loss: 0.03433838114142418 2023-01-22 12:05:08.324079: step: 878/466, loss: 0.06141744181513786 2023-01-22 12:05:08.998795: step: 880/466, loss: 0.2057872861623764 2023-01-22 12:05:09.637565: step: 882/466, loss: 0.04921253025531769 2023-01-22 12:05:10.314892: step: 884/466, loss: 0.01852300949394703 2023-01-22 12:05:10.953635: step: 886/466, loss: 0.015369892120361328 2023-01-22 12:05:11.581254: step: 888/466, loss: 0.14897996187210083 2023-01-22 12:05:12.244773: step: 890/466, loss: 0.027026517316699028 2023-01-22 12:05:12.930148: step: 892/466, loss: 3.0559287071228027 2023-01-22 12:05:13.559468: step: 894/466, loss: 0.029394425451755524 2023-01-22 12:05:14.212251: step: 896/466, loss: 0.05040713772177696 2023-01-22 12:05:14.912208: step: 898/466, loss: 0.4347133934497833 2023-01-22 12:05:15.596873: step: 900/466, loss: 0.5011553764343262 2023-01-22 12:05:16.191241: step: 902/466, loss: 0.0415460541844368 2023-01-22 12:05:16.854441: step: 904/466, loss: 0.43558353185653687 2023-01-22 12:05:17.501724: step: 906/466, loss: 0.2618334889411926 2023-01-22 12:05:18.022537: step: 908/466, loss: 0.005394692067056894 2023-01-22 12:05:18.703282: step: 910/466, loss: 0.06840041279792786 2023-01-22 12:05:19.319070: step: 912/466, loss: 0.05254624783992767 2023-01-22 12:05:19.953724: step: 914/466, loss: 0.11503753811120987 2023-01-22 12:05:20.571668: step: 916/466, loss: 0.05245266482234001 2023-01-22 12:05:21.206659: step: 918/466, loss: 0.5177193284034729 2023-01-22 12:05:21.819741: step: 920/466, loss: 0.07801266014575958 2023-01-22 12:05:22.488674: step: 922/466, loss: 0.03294721990823746 2023-01-22 12:05:23.125553: step: 924/466, loss: 0.004950059577822685 2023-01-22 12:05:23.748087: step: 926/466, loss: 0.23353128135204315 2023-01-22 12:05:24.381422: step: 928/466, loss: 0.034723926335573196 2023-01-22 12:05:25.074358: step: 930/466, loss: 0.3066639006137848 2023-01-22 12:05:25.688153: step: 932/466, loss: 0.013994027860462666 ================================================== Loss: 0.132 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3159173976608187, 'r': 0.341694339025933, 'f1': 0.3283006684898207}, 'combined': 0.24190575572934156, 'epoch': 20} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.35282005484400175, 'r': 0.325585896230718, 'f1': 0.33865632688680003}, 'combined': 0.2246010872617119, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3094669117647059, 'r': 0.27898910984848485, 'f1': 0.29343874501992034}, 'combined': 0.1956258300132802, 'epoch': 20} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.36503163800381416, 'r': 0.30746165696681743, 'f1': 0.33378245732804074}, 'combined': 0.2178369721509318, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30319557450581547, 'r': 0.33426305272842277, 'f1': 0.31797225412974517}, 'combined': 0.23429534514823327, 'epoch': 20} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3506096078931504, 'r': 0.3105399384196475, 'f1': 0.329360540748111}, 'combined': 0.2184360062992653, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2624113475177305, 'r': 0.35238095238095235, 'f1': 0.30081300813008127}, 'combined': 0.20054200542005418, 'epoch': 20} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40217391304347827, 'r': 0.40217391304347827, 'f1': 0.40217391304347827}, 'combined': 0.2681159420289855, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27941176470588236, 'r': 0.16379310344827586, 'f1': 0.20652173913043476}, 'combined': 0.13768115942028983, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2886258254463912, 'r': 0.3357260550258782, 'f1': 0.31039935262918916}, 'combined': 0.22871531246361304, 'epoch': 13} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.35413085903566327, 'r': 0.32071071736043616, 'f1': 0.336593256293779}, 'combined': 0.2232328331896565, 'epoch': 13} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39705882352941174, 'r': 0.23275862068965517, 'f1': 0.2934782608695652}, 'combined': 0.19565217391304346, 'epoch': 13} ****************************** Epoch: 21 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:08:11.124102: step: 2/466, loss: 0.07089442014694214 2023-01-22 12:08:11.808568: step: 4/466, loss: 0.049593016505241394 2023-01-22 12:08:12.534614: step: 6/466, loss: 0.028266672044992447 2023-01-22 12:08:13.139331: step: 8/466, loss: 0.007336086593568325 2023-01-22 12:08:13.799676: step: 10/466, loss: 0.060494426637887955 2023-01-22 12:08:14.392026: step: 12/466, loss: 0.009333517402410507 2023-01-22 12:08:15.064060: step: 14/466, loss: 0.09427694231271744 2023-01-22 12:08:15.702818: step: 16/466, loss: 0.10178826749324799 2023-01-22 12:08:16.362128: step: 18/466, loss: 0.07092513889074326 2023-01-22 12:08:16.925445: step: 20/466, loss: 0.07888146489858627 2023-01-22 12:08:17.592716: step: 22/466, loss: 0.036193717271089554 2023-01-22 12:08:18.221315: step: 24/466, loss: 0.06915474683046341 2023-01-22 12:08:18.829106: step: 26/466, loss: 0.05310487374663353 2023-01-22 12:08:19.479241: step: 28/466, loss: 0.2157112956047058 2023-01-22 12:08:20.112898: step: 30/466, loss: 0.011305051855742931 2023-01-22 12:08:20.757319: step: 32/466, loss: 0.023503711447119713 2023-01-22 12:08:21.407085: step: 34/466, loss: 0.027887945994734764 2023-01-22 12:08:22.146680: step: 36/466, loss: 0.005748997442424297 2023-01-22 12:08:22.788369: step: 38/466, loss: 0.11313948035240173 2023-01-22 12:08:23.425115: step: 40/466, loss: 0.05925760790705681 2023-01-22 12:08:24.012296: step: 42/466, loss: 0.007985766977071762 2023-01-22 12:08:24.667024: step: 44/466, loss: 0.030068177729845047 2023-01-22 12:08:25.300998: step: 46/466, loss: 0.05711689963936806 2023-01-22 12:08:25.949830: step: 48/466, loss: 0.053210336714982986 2023-01-22 12:08:26.566320: step: 50/466, loss: 0.44939592480659485 2023-01-22 12:08:27.184174: step: 52/466, loss: 0.03848563879728317 2023-01-22 12:08:27.880521: step: 54/466, loss: 0.22953787446022034 2023-01-22 12:08:28.541740: step: 56/466, loss: 0.10415433347225189 2023-01-22 12:08:29.155309: step: 58/466, loss: 0.08039028197526932 2023-01-22 12:08:29.973774: step: 60/466, loss: 0.03800737485289574 2023-01-22 12:08:30.628362: step: 62/466, loss: 0.1007322296500206 2023-01-22 12:08:31.237686: step: 64/466, loss: 0.07855730503797531 2023-01-22 12:08:31.879290: step: 66/466, loss: 0.006035930011421442 2023-01-22 12:08:32.589406: step: 68/466, loss: 0.07759585231542587 2023-01-22 12:08:33.283521: step: 70/466, loss: 0.05112384259700775 2023-01-22 12:08:34.020372: step: 72/466, loss: 0.12357943505048752 2023-01-22 12:08:34.754866: step: 74/466, loss: 0.11555702984333038 2023-01-22 12:08:35.563261: step: 76/466, loss: 0.028289109468460083 2023-01-22 12:08:36.220031: step: 78/466, loss: 0.08214019984006882 2023-01-22 12:08:36.832773: step: 80/466, loss: 0.02809653989970684 2023-01-22 12:08:37.484695: step: 82/466, loss: 0.06252384185791016 2023-01-22 12:08:38.138106: step: 84/466, loss: 0.06915662437677383 2023-01-22 12:08:38.806987: step: 86/466, loss: 0.0634181797504425 2023-01-22 12:08:39.468706: step: 88/466, loss: 0.1411423683166504 2023-01-22 12:08:40.164636: step: 90/466, loss: 0.17460677027702332 2023-01-22 12:08:40.901343: step: 92/466, loss: 0.04477472975850105 2023-01-22 12:08:41.620450: step: 94/466, loss: 0.014824727550148964 2023-01-22 12:08:42.271763: step: 96/466, loss: 0.022244907915592194 2023-01-22 12:08:42.969163: step: 98/466, loss: 0.6839988231658936 2023-01-22 12:08:43.656334: step: 100/466, loss: 0.021492348983883858 2023-01-22 12:08:44.314650: step: 102/466, loss: 0.01563192345201969 2023-01-22 12:08:45.011933: step: 104/466, loss: 0.0054557230323553085 2023-01-22 12:08:45.631670: step: 106/466, loss: 0.19327561557292938 2023-01-22 12:08:46.256202: step: 108/466, loss: 0.029385611414909363 2023-01-22 12:08:46.901095: step: 110/466, loss: 0.11822323501110077 2023-01-22 12:08:47.587761: step: 112/466, loss: 0.043833471834659576 2023-01-22 12:08:48.230995: step: 114/466, loss: 1.1973426342010498 2023-01-22 12:08:48.910871: step: 116/466, loss: 0.03044801950454712 2023-01-22 12:08:49.597764: step: 118/466, loss: 0.06850605458021164 2023-01-22 12:08:50.292703: step: 120/466, loss: 0.027155661955475807 2023-01-22 12:08:50.905580: step: 122/466, loss: 0.010388410650193691 2023-01-22 12:08:51.535591: step: 124/466, loss: 0.05787854641675949 2023-01-22 12:08:52.240268: step: 126/466, loss: 0.07108601182699203 2023-01-22 12:08:52.893432: step: 128/466, loss: 0.023687263950705528 2023-01-22 12:08:53.544802: step: 130/466, loss: 0.007384110242128372 2023-01-22 12:08:54.186705: step: 132/466, loss: 0.06754773110151291 2023-01-22 12:08:54.831354: step: 134/466, loss: 0.05037299543619156 2023-01-22 12:08:55.538019: step: 136/466, loss: 0.007863358594477177 2023-01-22 12:08:56.184614: step: 138/466, loss: 0.03649885579943657 2023-01-22 12:08:56.817689: step: 140/466, loss: 0.03751469403505325 2023-01-22 12:08:57.509381: step: 142/466, loss: 0.037405870854854584 2023-01-22 12:08:58.205075: step: 144/466, loss: 0.05105443298816681 2023-01-22 12:08:58.915935: step: 146/466, loss: 0.027803178876638412 2023-01-22 12:08:59.588419: step: 148/466, loss: 0.027827255427837372 2023-01-22 12:09:00.355367: step: 150/466, loss: 0.09103554487228394 2023-01-22 12:09:01.035297: step: 152/466, loss: 0.011072168126702309 2023-01-22 12:09:01.691107: step: 154/466, loss: 0.06728797405958176 2023-01-22 12:09:02.445489: step: 156/466, loss: 0.040998075157403946 2023-01-22 12:09:03.107943: step: 158/466, loss: 0.08982980996370316 2023-01-22 12:09:03.733555: step: 160/466, loss: 0.03985963389277458 2023-01-22 12:09:04.402541: step: 162/466, loss: 0.23042425513267517 2023-01-22 12:09:05.011462: step: 164/466, loss: 0.06946532428264618 2023-01-22 12:09:05.672761: step: 166/466, loss: 0.05300728231668472 2023-01-22 12:09:06.277014: step: 168/466, loss: 0.019600747153162956 2023-01-22 12:09:06.915812: step: 170/466, loss: 0.7505860924720764 2023-01-22 12:09:07.659438: step: 172/466, loss: 0.08120985329151154 2023-01-22 12:09:08.334155: step: 174/466, loss: 0.03269527107477188 2023-01-22 12:09:08.970025: step: 176/466, loss: 0.027445124462246895 2023-01-22 12:09:09.609700: step: 178/466, loss: 0.05066234618425369 2023-01-22 12:09:10.206598: step: 180/466, loss: 0.06630782783031464 2023-01-22 12:09:10.858757: step: 182/466, loss: 0.03700609877705574 2023-01-22 12:09:11.507792: step: 184/466, loss: 0.006005365867167711 2023-01-22 12:09:12.201182: step: 186/466, loss: 0.08509300649166107 2023-01-22 12:09:12.892572: step: 188/466, loss: 0.18691766262054443 2023-01-22 12:09:13.545771: step: 190/466, loss: 0.017493173480033875 2023-01-22 12:09:14.176203: step: 192/466, loss: 0.029288295656442642 2023-01-22 12:09:14.864325: step: 194/466, loss: 0.017930535599589348 2023-01-22 12:09:15.498074: step: 196/466, loss: 0.1339683085680008 2023-01-22 12:09:16.161656: step: 198/466, loss: 0.07602230459451675 2023-01-22 12:09:16.879339: step: 200/466, loss: 0.026082264259457588 2023-01-22 12:09:17.633632: step: 202/466, loss: 0.29196175932884216 2023-01-22 12:09:18.250181: step: 204/466, loss: 0.004092944320291281 2023-01-22 12:09:18.920154: step: 206/466, loss: 0.0425824411213398 2023-01-22 12:09:19.685825: step: 208/466, loss: 1.332690954208374 2023-01-22 12:09:20.336638: step: 210/466, loss: 0.09265276789665222 2023-01-22 12:09:20.996218: step: 212/466, loss: 0.07450100779533386 2023-01-22 12:09:21.641140: step: 214/466, loss: 0.062187325209379196 2023-01-22 12:09:22.296799: step: 216/466, loss: 0.1990901678800583 2023-01-22 12:09:22.964995: step: 218/466, loss: 0.11643319576978683 2023-01-22 12:09:23.622108: step: 220/466, loss: 0.04903949797153473 2023-01-22 12:09:24.237114: step: 222/466, loss: 0.1236814484000206 2023-01-22 12:09:24.922826: step: 224/466, loss: 0.03793327882885933 2023-01-22 12:09:25.573079: step: 226/466, loss: 0.006792228668928146 2023-01-22 12:09:26.252947: step: 228/466, loss: 0.7302740812301636 2023-01-22 12:09:26.888219: step: 230/466, loss: 0.13557493686676025 2023-01-22 12:09:27.560847: step: 232/466, loss: 0.03244762122631073 2023-01-22 12:09:28.191223: step: 234/466, loss: 0.049592722207307816 2023-01-22 12:09:28.853266: step: 236/466, loss: 0.10927261412143707 2023-01-22 12:09:29.601577: step: 238/466, loss: 0.03153478354215622 2023-01-22 12:09:30.194525: step: 240/466, loss: 0.007875959388911724 2023-01-22 12:09:30.830289: step: 242/466, loss: 0.8636252880096436 2023-01-22 12:09:31.488899: step: 244/466, loss: 0.03750202804803848 2023-01-22 12:09:32.154052: step: 246/466, loss: 0.14788737893104553 2023-01-22 12:09:32.819383: step: 248/466, loss: 0.050005968660116196 2023-01-22 12:09:33.489133: step: 250/466, loss: 0.027507677674293518 2023-01-22 12:09:34.093097: step: 252/466, loss: 0.04816051572561264 2023-01-22 12:09:34.765376: step: 254/466, loss: 0.06385090202093124 2023-01-22 12:09:35.416777: step: 256/466, loss: 0.04586299508810043 2023-01-22 12:09:36.090904: step: 258/466, loss: 0.08451353013515472 2023-01-22 12:09:36.696146: step: 260/466, loss: 0.031098363921046257 2023-01-22 12:09:37.347221: step: 262/466, loss: 0.03151926398277283 2023-01-22 12:09:37.982939: step: 264/466, loss: 0.054376523941755295 2023-01-22 12:09:38.598138: step: 266/466, loss: 0.07591578364372253 2023-01-22 12:09:39.209856: step: 268/466, loss: 0.06133175268769264 2023-01-22 12:09:39.880031: step: 270/466, loss: 0.061705656349658966 2023-01-22 12:09:40.539208: step: 272/466, loss: 0.05282587185502052 2023-01-22 12:09:41.181050: step: 274/466, loss: 0.016464704647660255 2023-01-22 12:09:41.805872: step: 276/466, loss: 0.019573258236050606 2023-01-22 12:09:42.480201: step: 278/466, loss: 0.07507047802209854 2023-01-22 12:09:43.152189: step: 280/466, loss: 0.026676513254642487 2023-01-22 12:09:43.863555: step: 282/466, loss: 1.1871224641799927 2023-01-22 12:09:44.501074: step: 284/466, loss: 0.037493109703063965 2023-01-22 12:09:45.203688: step: 286/466, loss: 0.028032071888446808 2023-01-22 12:09:45.883317: step: 288/466, loss: 0.03383786231279373 2023-01-22 12:09:46.594450: step: 290/466, loss: 0.10688777267932892 2023-01-22 12:09:47.360705: step: 292/466, loss: 0.08672936260700226 2023-01-22 12:09:48.092878: step: 294/466, loss: 0.0877775326371193 2023-01-22 12:09:48.733059: step: 296/466, loss: 0.019870324060320854 2023-01-22 12:09:49.391161: step: 298/466, loss: 0.06824889779090881 2023-01-22 12:09:50.013983: step: 300/466, loss: 0.01863786205649376 2023-01-22 12:09:50.633698: step: 302/466, loss: 0.0038263502065092325 2023-01-22 12:09:51.313063: step: 304/466, loss: 0.019151929765939713 2023-01-22 12:09:51.974236: step: 306/466, loss: 0.04331501945853233 2023-01-22 12:09:52.571906: step: 308/466, loss: 0.06901475042104721 2023-01-22 12:09:53.256518: step: 310/466, loss: 0.07544735819101334 2023-01-22 12:09:53.907078: step: 312/466, loss: 0.02430512011051178 2023-01-22 12:09:54.561522: step: 314/466, loss: 0.0816473513841629 2023-01-22 12:09:55.207544: step: 316/466, loss: 0.04463319852948189 2023-01-22 12:09:55.848201: step: 318/466, loss: 0.010686423629522324 2023-01-22 12:09:56.556821: step: 320/466, loss: 0.047023750841617584 2023-01-22 12:09:57.227476: step: 322/466, loss: 0.06095031648874283 2023-01-22 12:09:57.824802: step: 324/466, loss: 0.019171956926584244 2023-01-22 12:09:58.468865: step: 326/466, loss: 0.10878366231918335 2023-01-22 12:09:59.206923: step: 328/466, loss: 0.10855834186077118 2023-01-22 12:09:59.891342: step: 330/466, loss: 0.09366338700056076 2023-01-22 12:10:00.519974: step: 332/466, loss: 0.021269308403134346 2023-01-22 12:10:01.164577: step: 334/466, loss: 0.0992373451590538 2023-01-22 12:10:01.832506: step: 336/466, loss: 0.15862494707107544 2023-01-22 12:10:02.406527: step: 338/466, loss: 0.027936633676290512 2023-01-22 12:10:03.066569: step: 340/466, loss: 0.027892297133803368 2023-01-22 12:10:03.653745: step: 342/466, loss: 0.054857946932315826 2023-01-22 12:10:04.348700: step: 344/466, loss: 0.019250625744462013 2023-01-22 12:10:04.981394: step: 346/466, loss: 0.058253031224012375 2023-01-22 12:10:05.759438: step: 348/466, loss: 0.10461164265871048 2023-01-22 12:10:06.311509: step: 350/466, loss: 0.008837548084557056 2023-01-22 12:10:06.923823: step: 352/466, loss: 0.05633135139942169 2023-01-22 12:10:07.505329: step: 354/466, loss: 0.35336071252822876 2023-01-22 12:10:08.133034: step: 356/466, loss: 0.14346835017204285 2023-01-22 12:10:08.737173: step: 358/466, loss: 0.07668203860521317 2023-01-22 12:10:09.352727: step: 360/466, loss: 0.08573367446660995 2023-01-22 12:10:09.991164: step: 362/466, loss: 0.013371109962463379 2023-01-22 12:10:10.610029: step: 364/466, loss: 0.014967229217290878 2023-01-22 12:10:11.387568: step: 366/466, loss: 0.035412389785051346 2023-01-22 12:10:11.995838: step: 368/466, loss: 0.026346392929553986 2023-01-22 12:10:12.659795: step: 370/466, loss: 0.06741994619369507 2023-01-22 12:10:13.362039: step: 372/466, loss: 0.15302880108356476 2023-01-22 12:10:14.012617: step: 374/466, loss: 0.04921845719218254 2023-01-22 12:10:14.630397: step: 376/466, loss: 0.03806721791625023 2023-01-22 12:10:15.238809: step: 378/466, loss: 0.08171114325523376 2023-01-22 12:10:15.893789: step: 380/466, loss: 0.01198134571313858 2023-01-22 12:10:16.499253: step: 382/466, loss: 0.23268190026283264 2023-01-22 12:10:17.092313: step: 384/466, loss: 0.25207993388175964 2023-01-22 12:10:17.732869: step: 386/466, loss: 0.2673199474811554 2023-01-22 12:10:18.434433: step: 388/466, loss: 0.07382744550704956 2023-01-22 12:10:19.061658: step: 390/466, loss: 0.05229777470231056 2023-01-22 12:10:19.660534: step: 392/466, loss: 0.04268745332956314 2023-01-22 12:10:20.270135: step: 394/466, loss: 0.0031622499227523804 2023-01-22 12:10:20.913238: step: 396/466, loss: 0.19509929418563843 2023-01-22 12:10:21.626524: step: 398/466, loss: 0.08037510514259338 2023-01-22 12:10:22.318102: step: 400/466, loss: 0.05764692276716232 2023-01-22 12:10:22.974995: step: 402/466, loss: 0.05277779698371887 2023-01-22 12:10:23.695573: step: 404/466, loss: 0.049481362104415894 2023-01-22 12:10:24.343988: step: 406/466, loss: 0.1518610268831253 2023-01-22 12:10:24.987340: step: 408/466, loss: 0.0408242866396904 2023-01-22 12:10:25.615364: step: 410/466, loss: 0.07179911434650421 2023-01-22 12:10:26.305884: step: 412/466, loss: 0.08425852656364441 2023-01-22 12:10:27.019746: step: 414/466, loss: 0.05670277029275894 2023-01-22 12:10:27.615494: step: 416/466, loss: 0.014492363668978214 2023-01-22 12:10:28.298422: step: 418/466, loss: 0.10018357634544373 2023-01-22 12:10:28.877375: step: 420/466, loss: 0.005106528755277395 2023-01-22 12:10:29.571711: step: 422/466, loss: 0.03475326672196388 2023-01-22 12:10:30.225678: step: 424/466, loss: 0.05975669249892235 2023-01-22 12:10:30.899943: step: 426/466, loss: 1.164226770401001 2023-01-22 12:10:31.559001: step: 428/466, loss: 0.03273681923747063 2023-01-22 12:10:32.226432: step: 430/466, loss: 0.020399469882249832 2023-01-22 12:10:32.871023: step: 432/466, loss: 0.12875859439373016 2023-01-22 12:10:33.557840: step: 434/466, loss: 0.057574886828660965 2023-01-22 12:10:34.233808: step: 436/466, loss: 0.04370862618088722 2023-01-22 12:10:34.869013: step: 438/466, loss: 0.07838702201843262 2023-01-22 12:10:35.500630: step: 440/466, loss: 0.3533000349998474 2023-01-22 12:10:36.154367: step: 442/466, loss: 0.025857489556074142 2023-01-22 12:10:36.808557: step: 444/466, loss: 0.02611837536096573 2023-01-22 12:10:37.468241: step: 446/466, loss: 0.034644901752471924 2023-01-22 12:10:38.131704: step: 448/466, loss: 0.18575358390808105 2023-01-22 12:10:38.808594: step: 450/466, loss: 0.016335871070623398 2023-01-22 12:10:39.458997: step: 452/466, loss: 0.16690199077129364 2023-01-22 12:10:40.067215: step: 454/466, loss: 0.04394973814487457 2023-01-22 12:10:40.707292: step: 456/466, loss: 0.41671910881996155 2023-01-22 12:10:41.319446: step: 458/466, loss: 4.167999267578125 2023-01-22 12:10:41.971680: step: 460/466, loss: 1.054667592048645 2023-01-22 12:10:42.719808: step: 462/466, loss: 0.10038013756275177 2023-01-22 12:10:43.431146: step: 464/466, loss: 0.034050267189741135 2023-01-22 12:10:44.123793: step: 466/466, loss: 0.040084537118673325 2023-01-22 12:10:44.828203: step: 468/466, loss: 0.08686064928770065 2023-01-22 12:10:45.490996: step: 470/466, loss: 0.03887888044118881 2023-01-22 12:10:46.062224: step: 472/466, loss: 0.019564831629395485 2023-01-22 12:10:46.701790: step: 474/466, loss: 0.011036333627998829 2023-01-22 12:10:47.359117: step: 476/466, loss: 0.13818205893039703 2023-01-22 12:10:48.004471: step: 478/466, loss: 0.0896473228931427 2023-01-22 12:10:48.722857: step: 480/466, loss: 0.04010609909892082 2023-01-22 12:10:49.358060: step: 482/466, loss: 0.05827804282307625 2023-01-22 12:10:49.981535: step: 484/466, loss: 0.033501170575618744 2023-01-22 12:10:50.663367: step: 486/466, loss: 0.12091982364654541 2023-01-22 12:10:51.355383: step: 488/466, loss: 0.059742338955402374 2023-01-22 12:10:52.048511: step: 490/466, loss: 0.0497790165245533 2023-01-22 12:10:52.749430: step: 492/466, loss: 0.022443275898694992 2023-01-22 12:10:53.418058: step: 494/466, loss: 0.07436135411262512 2023-01-22 12:10:54.012945: step: 496/466, loss: 0.05396753177046776 2023-01-22 12:10:54.701781: step: 498/466, loss: 0.06043091043829918 2023-01-22 12:10:55.299758: step: 500/466, loss: 0.006692246999591589 2023-01-22 12:10:55.991211: step: 502/466, loss: 0.056607022881507874 2023-01-22 12:10:56.655582: step: 504/466, loss: 0.03901538997888565 2023-01-22 12:10:57.362414: step: 506/466, loss: 0.047609083354473114 2023-01-22 12:10:57.923303: step: 508/466, loss: 0.008187373168766499 2023-01-22 12:10:58.537302: step: 510/466, loss: 0.038169074803590775 2023-01-22 12:10:59.196107: step: 512/466, loss: 0.10957618057727814 2023-01-22 12:10:59.886095: step: 514/466, loss: 0.06552216410636902 2023-01-22 12:11:00.620704: step: 516/466, loss: 0.09400929510593414 2023-01-22 12:11:01.310684: step: 518/466, loss: 0.03930743411183357 2023-01-22 12:11:01.999311: step: 520/466, loss: 0.011088813655078411 2023-01-22 12:11:02.730240: step: 522/466, loss: 0.043089017271995544 2023-01-22 12:11:03.378071: step: 524/466, loss: 0.01242264173924923 2023-01-22 12:11:04.041733: step: 526/466, loss: 0.0019793591927736998 2023-01-22 12:11:04.701130: step: 528/466, loss: 0.057496801018714905 2023-01-22 12:11:05.360628: step: 530/466, loss: 0.018461046740412712 2023-01-22 12:11:06.044765: step: 532/466, loss: 0.05408164858818054 2023-01-22 12:11:06.689485: step: 534/466, loss: 0.024738499894738197 2023-01-22 12:11:07.361049: step: 536/466, loss: 0.04198291152715683 2023-01-22 12:11:08.017972: step: 538/466, loss: 0.7320752143859863 2023-01-22 12:11:08.734465: step: 540/466, loss: 0.132931649684906 2023-01-22 12:11:09.416607: step: 542/466, loss: 0.0771997720003128 2023-01-22 12:11:10.067139: step: 544/466, loss: 0.30851155519485474 2023-01-22 12:11:10.730477: step: 546/466, loss: 0.016442397609353065 2023-01-22 12:11:11.380797: step: 548/466, loss: 0.07672475278377533 2023-01-22 12:11:12.063478: step: 550/466, loss: 0.03206007182598114 2023-01-22 12:11:12.718041: step: 552/466, loss: 0.08088162541389465 2023-01-22 12:11:13.449218: step: 554/466, loss: 0.33736979961395264 2023-01-22 12:11:14.092752: step: 556/466, loss: 0.049401722848415375 2023-01-22 12:11:14.795871: step: 558/466, loss: 0.033894944936037064 2023-01-22 12:11:15.435610: step: 560/466, loss: 0.6094706654548645 2023-01-22 12:11:16.065623: step: 562/466, loss: 0.030762692913413048 2023-01-22 12:11:16.727543: step: 564/466, loss: 0.022721601650118828 2023-01-22 12:11:17.356531: step: 566/466, loss: 0.01707230508327484 2023-01-22 12:11:18.020462: step: 568/466, loss: 0.02287483401596546 2023-01-22 12:11:18.663910: step: 570/466, loss: 0.045053161680698395 2023-01-22 12:11:19.265806: step: 572/466, loss: 0.04668237641453743 2023-01-22 12:11:19.902329: step: 574/466, loss: 0.017005762085318565 2023-01-22 12:11:20.551502: step: 576/466, loss: 0.034409213811159134 2023-01-22 12:11:21.189974: step: 578/466, loss: 0.08835047483444214 2023-01-22 12:11:21.862298: step: 580/466, loss: 0.05900361016392708 2023-01-22 12:11:22.464178: step: 582/466, loss: 0.07710152119398117 2023-01-22 12:11:23.096688: step: 584/466, loss: 0.033414438366889954 2023-01-22 12:11:23.808958: step: 586/466, loss: 0.04823828116059303 2023-01-22 12:11:24.527267: step: 588/466, loss: 0.04212420806288719 2023-01-22 12:11:25.131600: step: 590/466, loss: 0.015205265022814274 2023-01-22 12:11:25.766146: step: 592/466, loss: 0.019105862826108932 2023-01-22 12:11:26.394129: step: 594/466, loss: 0.04656066745519638 2023-01-22 12:11:27.038549: step: 596/466, loss: 0.5374192595481873 2023-01-22 12:11:27.706797: step: 598/466, loss: 0.12201011925935745 2023-01-22 12:11:28.348179: step: 600/466, loss: 0.04644192382693291 2023-01-22 12:11:29.052270: step: 602/466, loss: 0.08558553457260132 2023-01-22 12:11:29.695393: step: 604/466, loss: 0.004719720687717199 2023-01-22 12:11:30.435912: step: 606/466, loss: 0.05577418953180313 2023-01-22 12:11:31.038987: step: 608/466, loss: 0.03790593519806862 2023-01-22 12:11:31.699768: step: 610/466, loss: 0.15378184616565704 2023-01-22 12:11:32.296301: step: 612/466, loss: 0.09049602597951889 2023-01-22 12:11:32.993354: step: 614/466, loss: 0.056327223777770996 2023-01-22 12:11:33.589843: step: 616/466, loss: 0.04873852804303169 2023-01-22 12:11:34.249251: step: 618/466, loss: 0.021303944289684296 2023-01-22 12:11:34.928901: step: 620/466, loss: 0.03830740973353386 2023-01-22 12:11:35.539731: step: 622/466, loss: 0.08673488348722458 2023-01-22 12:11:36.193240: step: 624/466, loss: 0.010257110930979252 2023-01-22 12:11:36.854328: step: 626/466, loss: 0.005480485036969185 2023-01-22 12:11:37.481518: step: 628/466, loss: 0.09009231626987457 2023-01-22 12:11:38.132650: step: 630/466, loss: 0.12214872241020203 2023-01-22 12:11:38.825212: step: 632/466, loss: 0.048097286373376846 2023-01-22 12:11:39.443727: step: 634/466, loss: 0.18809495866298676 2023-01-22 12:11:40.075042: step: 636/466, loss: 0.040272731333971024 2023-01-22 12:11:40.795967: step: 638/466, loss: 0.009438310749828815 2023-01-22 12:11:41.473778: step: 640/466, loss: 0.052058763802051544 2023-01-22 12:11:42.169667: step: 642/466, loss: 0.04133598506450653 2023-01-22 12:11:42.809737: step: 644/466, loss: 0.028369462117552757 2023-01-22 12:11:43.416192: step: 646/466, loss: 0.00392342172563076 2023-01-22 12:11:44.217668: step: 648/466, loss: 0.47618475556373596 2023-01-22 12:11:44.896407: step: 650/466, loss: 0.06533084064722061 2023-01-22 12:11:45.545859: step: 652/466, loss: 0.022673049941658974 2023-01-22 12:11:46.250014: step: 654/466, loss: 0.06585227698087692 2023-01-22 12:11:46.939320: step: 656/466, loss: 0.1225774809718132 2023-01-22 12:11:47.565254: step: 658/466, loss: 0.12389501929283142 2023-01-22 12:11:48.220958: step: 660/466, loss: 0.10344074666500092 2023-01-22 12:11:48.851543: step: 662/466, loss: 0.06762241572141647 2023-01-22 12:11:49.519700: step: 664/466, loss: 0.018936727195978165 2023-01-22 12:11:50.144059: step: 666/466, loss: 0.1071561723947525 2023-01-22 12:11:50.838673: step: 668/466, loss: 0.07230392098426819 2023-01-22 12:11:51.505621: step: 670/466, loss: 0.034910738468170166 2023-01-22 12:11:52.115022: step: 672/466, loss: 0.009688897989690304 2023-01-22 12:11:52.715463: step: 674/466, loss: 0.03755712881684303 2023-01-22 12:11:53.394128: step: 676/466, loss: 0.12783585488796234 2023-01-22 12:11:54.202639: step: 678/466, loss: 0.01756904274225235 2023-01-22 12:11:54.904086: step: 680/466, loss: 0.09806942939758301 2023-01-22 12:11:55.507934: step: 682/466, loss: 0.01094213966280222 2023-01-22 12:11:56.129097: step: 684/466, loss: 0.08024577796459198 2023-01-22 12:11:56.734411: step: 686/466, loss: 0.08542653173208237 2023-01-22 12:11:57.397947: step: 688/466, loss: 0.0802675113081932 2023-01-22 12:11:58.133785: step: 690/466, loss: 0.07116401195526123 2023-01-22 12:11:58.755022: step: 692/466, loss: 0.00978310126811266 2023-01-22 12:11:59.526980: step: 694/466, loss: 0.062231533229351044 2023-01-22 12:12:00.182195: step: 696/466, loss: 0.009687211364507675 2023-01-22 12:12:00.851081: step: 698/466, loss: 0.017184313386678696 2023-01-22 12:12:01.473183: step: 700/466, loss: 0.10960377752780914 2023-01-22 12:12:02.104065: step: 702/466, loss: 0.11471197009086609 2023-01-22 12:12:02.736784: step: 704/466, loss: 0.11303263157606125 2023-01-22 12:12:03.350253: step: 706/466, loss: 0.06571970880031586 2023-01-22 12:12:04.009273: step: 708/466, loss: 0.02506648376584053 2023-01-22 12:12:04.663550: step: 710/466, loss: 0.05620276555418968 2023-01-22 12:12:05.374075: step: 712/466, loss: 0.1362437605857849 2023-01-22 12:12:06.082044: step: 714/466, loss: 0.10670512914657593 2023-01-22 12:12:06.708591: step: 716/466, loss: 0.012019413523375988 2023-01-22 12:12:07.318639: step: 718/466, loss: 0.5341221690177917 2023-01-22 12:12:07.984810: step: 720/466, loss: 0.14672306180000305 2023-01-22 12:12:08.652312: step: 722/466, loss: 0.11437015235424042 2023-01-22 12:12:09.340298: step: 724/466, loss: 0.09794958680868149 2023-01-22 12:12:09.976071: step: 726/466, loss: 0.01830868236720562 2023-01-22 12:12:10.620055: step: 728/466, loss: 0.04564938694238663 2023-01-22 12:12:11.295368: step: 730/466, loss: 0.05757681280374527 2023-01-22 12:12:11.937924: step: 732/466, loss: 0.06785449385643005 2023-01-22 12:12:12.586846: step: 734/466, loss: 0.027226001024246216 2023-01-22 12:12:13.256488: step: 736/466, loss: 0.06051834672689438 2023-01-22 12:12:13.991980: step: 738/466, loss: 0.8186269402503967 2023-01-22 12:12:14.692887: step: 740/466, loss: 0.22527964413166046 2023-01-22 12:12:15.498106: step: 742/466, loss: 0.024167144671082497 2023-01-22 12:12:16.235778: step: 744/466, loss: 0.06646440923213959 2023-01-22 12:12:16.890708: step: 746/466, loss: 0.049378346651792526 2023-01-22 12:12:17.479288: step: 748/466, loss: 0.01376586128026247 2023-01-22 12:12:18.105673: step: 750/466, loss: 0.08511679619550705 2023-01-22 12:12:18.736021: step: 752/466, loss: 0.028222622349858284 2023-01-22 12:12:19.362344: step: 754/466, loss: 0.12172418087720871 2023-01-22 12:12:20.040708: step: 756/466, loss: 0.021877678111195564 2023-01-22 12:12:20.689896: step: 758/466, loss: 0.05017302185297012 2023-01-22 12:12:21.323185: step: 760/466, loss: 0.050847191363573074 2023-01-22 12:12:22.026321: step: 762/466, loss: 0.05814317241311073 2023-01-22 12:12:22.671940: step: 764/466, loss: 0.12317948043346405 2023-01-22 12:12:23.322121: step: 766/466, loss: 0.013819770887494087 2023-01-22 12:12:24.021716: step: 768/466, loss: 0.10745225101709366 2023-01-22 12:12:24.706601: step: 770/466, loss: 0.5924587845802307 2023-01-22 12:12:25.320162: step: 772/466, loss: 0.10218442976474762 2023-01-22 12:12:25.955868: step: 774/466, loss: 0.06877769529819489 2023-01-22 12:12:26.619087: step: 776/466, loss: 0.039890218526124954 2023-01-22 12:12:27.274157: step: 778/466, loss: 0.0665740892291069 2023-01-22 12:12:27.942065: step: 780/466, loss: 0.037569571286439896 2023-01-22 12:12:28.575757: step: 782/466, loss: 0.021711068227887154 2023-01-22 12:12:29.197514: step: 784/466, loss: 0.021156296133995056 2023-01-22 12:12:29.811368: step: 786/466, loss: 0.05858328565955162 2023-01-22 12:12:30.484690: step: 788/466, loss: 0.016213275492191315 2023-01-22 12:12:31.116304: step: 790/466, loss: 0.011195183731615543 2023-01-22 12:12:31.822187: step: 792/466, loss: 0.01327480748295784 2023-01-22 12:12:32.478138: step: 794/466, loss: 0.015499280765652657 2023-01-22 12:12:33.165833: step: 796/466, loss: 0.007978331297636032 2023-01-22 12:12:33.812736: step: 798/466, loss: 0.31933045387268066 2023-01-22 12:12:34.498538: step: 800/466, loss: 0.13486042618751526 2023-01-22 12:12:35.182971: step: 802/466, loss: 0.028332777321338654 2023-01-22 12:12:35.875194: step: 804/466, loss: 0.036711569875478745 2023-01-22 12:12:36.598208: step: 806/466, loss: 0.02059587650001049 2023-01-22 12:12:37.242772: step: 808/466, loss: 0.023733140900731087 2023-01-22 12:12:37.948581: step: 810/466, loss: 0.055078815668821335 2023-01-22 12:12:38.561154: step: 812/466, loss: 0.015577802434563637 2023-01-22 12:12:39.207306: step: 814/466, loss: 0.01860414631664753 2023-01-22 12:12:39.820349: step: 816/466, loss: 0.005331545602530241 2023-01-22 12:12:40.499268: step: 818/466, loss: 0.060170210897922516 2023-01-22 12:12:41.106793: step: 820/466, loss: 0.0259061511605978 2023-01-22 12:12:41.773663: step: 822/466, loss: 0.05004158988595009 2023-01-22 12:12:42.441239: step: 824/466, loss: 0.10845375061035156 2023-01-22 12:12:43.117690: step: 826/466, loss: 0.08583880960941315 2023-01-22 12:12:43.858012: step: 828/466, loss: 0.0069772228598594666 2023-01-22 12:12:44.471257: step: 830/466, loss: 0.0416720025241375 2023-01-22 12:12:45.177812: step: 832/466, loss: 0.04423435404896736 2023-01-22 12:12:45.806891: step: 834/466, loss: 0.10517225414514542 2023-01-22 12:12:46.441382: step: 836/466, loss: 0.03952678292989731 2023-01-22 12:12:47.117958: step: 838/466, loss: 0.13019147515296936 2023-01-22 12:12:47.766153: step: 840/466, loss: 1.2260103225708008 2023-01-22 12:12:48.422719: step: 842/466, loss: 0.016969038173556328 2023-01-22 12:12:49.066916: step: 844/466, loss: 0.06362573057413101 2023-01-22 12:12:49.708898: step: 846/466, loss: 0.12209760397672653 2023-01-22 12:12:50.326151: step: 848/466, loss: 0.1008475199341774 2023-01-22 12:12:50.972426: step: 850/466, loss: 0.00492806127294898 2023-01-22 12:12:51.705840: step: 852/466, loss: 0.039185430854558945 2023-01-22 12:12:52.327069: step: 854/466, loss: 0.061721861362457275 2023-01-22 12:12:52.931826: step: 856/466, loss: 0.02105090022087097 2023-01-22 12:12:53.615618: step: 858/466, loss: 0.04675479978322983 2023-01-22 12:12:54.288589: step: 860/466, loss: 0.019012775272130966 2023-01-22 12:12:54.919795: step: 862/466, loss: 0.22230355441570282 2023-01-22 12:12:55.566536: step: 864/466, loss: 0.08994642645120621 2023-01-22 12:12:56.224792: step: 866/466, loss: 0.03868407756090164 2023-01-22 12:12:56.913573: step: 868/466, loss: 0.09276425838470459 2023-01-22 12:12:57.587868: step: 870/466, loss: 0.06973981112241745 2023-01-22 12:12:58.214004: step: 872/466, loss: 0.01598411612212658 2023-01-22 12:12:58.882826: step: 874/466, loss: 0.10209723562002182 2023-01-22 12:12:59.529432: step: 876/466, loss: 0.012818582355976105 2023-01-22 12:13:00.172665: step: 878/466, loss: 0.024183141067624092 2023-01-22 12:13:00.864647: step: 880/466, loss: 0.052464935928583145 2023-01-22 12:13:01.566220: step: 882/466, loss: 0.06050201132893562 2023-01-22 12:13:02.235870: step: 884/466, loss: 0.7219332456588745 2023-01-22 12:13:02.907392: step: 886/466, loss: 0.05493218079209328 2023-01-22 12:13:03.573893: step: 888/466, loss: 0.01993950642645359 2023-01-22 12:13:04.275753: step: 890/466, loss: 0.2340915948152542 2023-01-22 12:13:04.933218: step: 892/466, loss: 0.0463443323969841 2023-01-22 12:13:05.558964: step: 894/466, loss: 0.19318298995494843 2023-01-22 12:13:06.251743: step: 896/466, loss: 0.051702868193387985 2023-01-22 12:13:06.861840: step: 898/466, loss: 0.0025893133133649826 2023-01-22 12:13:07.493665: step: 900/466, loss: 0.05517101660370827 2023-01-22 12:13:08.101219: step: 902/466, loss: 0.024286819621920586 2023-01-22 12:13:08.762482: step: 904/466, loss: 0.10800661891698837 2023-01-22 12:13:09.394730: step: 906/466, loss: 0.04178786650300026 2023-01-22 12:13:10.071447: step: 908/466, loss: 0.7517957091331482 2023-01-22 12:13:10.780925: step: 910/466, loss: 0.07966821640729904 2023-01-22 12:13:11.375317: step: 912/466, loss: 0.03132426366209984 2023-01-22 12:13:12.123712: step: 914/466, loss: 0.09199076145887375 2023-01-22 12:13:12.795777: step: 916/466, loss: 0.07831721752882004 2023-01-22 12:13:13.495897: step: 918/466, loss: 0.02353138104081154 2023-01-22 12:13:14.147405: step: 920/466, loss: 0.007074636872857809 2023-01-22 12:13:14.710416: step: 922/466, loss: 0.025794483721256256 2023-01-22 12:13:15.388749: step: 924/466, loss: 0.06823277473449707 2023-01-22 12:13:16.032832: step: 926/466, loss: 0.04336352273821831 2023-01-22 12:13:16.704438: step: 928/466, loss: 0.09034590423107147 2023-01-22 12:13:17.362216: step: 930/466, loss: 0.03232569992542267 2023-01-22 12:13:18.005016: step: 932/466, loss: 0.10629475116729736 ================================================== Loss: 0.106 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3089220248166949, 'r': 0.3464381720430108, 'f1': 0.3266062909958259}, 'combined': 0.2406572670495559, 'epoch': 21} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.34468035545814396, 'r': 0.3273417079155834, 'f1': 0.3357873569632275}, 'combined': 0.22269835073208866, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.30450349301397206, 'r': 0.2889322916666667, 'f1': 0.29651360544217686}, 'combined': 0.19767573696145124, 'epoch': 21} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.35644369333713294, 'r': 0.3073323005809768, 'f1': 0.3300711725178662}, 'combined': 0.21541487048534425, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2881215452571316, 'r': 0.3356862026335461, 'f1': 0.31009049743712325}, 'combined': 0.22848773495366975, 'epoch': 21} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32986997749788943, 'r': 0.3115597619212503, 'f1': 0.32045352854166065}, 'combined': 0.21252876504317386, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26215277777777773, 'r': 0.35952380952380947, 'f1': 0.3032128514056224}, 'combined': 0.20214190093708156, 'epoch': 21} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.32608695652173914, 'f1': 0.3488372093023256}, 'combined': 0.23255813953488372, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.34210526315789475, 'r': 0.22413793103448276, 'f1': 0.2708333333333333}, 'combined': 0.18055555555555552, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2886258254463912, 'r': 0.3357260550258782, 'f1': 0.31039935262918916}, 'combined': 0.22871531246361304, 'epoch': 13} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.35413085903566327, 'r': 0.32071071736043616, 'f1': 0.336593256293779}, 'combined': 0.2232328331896565, 'epoch': 13} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39705882352941174, 'r': 0.23275862068965517, 'f1': 0.2934782608695652}, 'combined': 0.19565217391304346, 'epoch': 13} ****************************** Epoch: 22 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:16:02.543875: step: 2/466, loss: 0.05701306089758873 2023-01-22 12:16:03.090340: step: 4/466, loss: 0.016013167798519135 2023-01-22 12:16:03.747274: step: 6/466, loss: 0.05998304486274719 2023-01-22 12:16:04.405488: step: 8/466, loss: 0.009235535748302937 2023-01-22 12:16:05.128053: step: 10/466, loss: 0.0133706359192729 2023-01-22 12:16:05.815666: step: 12/466, loss: 0.012997171841561794 2023-01-22 12:16:06.478258: step: 14/466, loss: 0.05838117375969887 2023-01-22 12:16:07.123681: step: 16/466, loss: 0.02190295420587063 2023-01-22 12:16:07.781372: step: 18/466, loss: 0.009717869572341442 2023-01-22 12:16:08.445848: step: 20/466, loss: 0.0579771064221859 2023-01-22 12:16:09.215642: step: 22/466, loss: 0.2816099226474762 2023-01-22 12:16:09.860095: step: 24/466, loss: 0.016298677772283554 2023-01-22 12:16:10.531701: step: 26/466, loss: 0.01881585642695427 2023-01-22 12:16:11.203522: step: 28/466, loss: 0.02248998172581196 2023-01-22 12:16:11.850768: step: 30/466, loss: 0.11135374009609222 2023-01-22 12:16:12.447063: step: 32/466, loss: 0.009332059882581234 2023-01-22 12:16:13.064134: step: 34/466, loss: 0.04593450576066971 2023-01-22 12:16:13.713579: step: 36/466, loss: 1.151442050933838 2023-01-22 12:16:14.349907: step: 38/466, loss: 0.027117688208818436 2023-01-22 12:16:15.054291: step: 40/466, loss: 0.4642413556575775 2023-01-22 12:16:15.776153: step: 42/466, loss: 0.012373731471598148 2023-01-22 12:16:16.438227: step: 44/466, loss: 0.017485400661826134 2023-01-22 12:16:17.030845: step: 46/466, loss: 0.05597267299890518 2023-01-22 12:16:17.693941: step: 48/466, loss: 0.020483272150158882 2023-01-22 12:16:18.424585: step: 50/466, loss: 0.09144927561283112 2023-01-22 12:16:19.056721: step: 52/466, loss: 0.0037049695383757353 2023-01-22 12:16:19.769233: step: 54/466, loss: 0.12996885180473328 2023-01-22 12:16:20.450217: step: 56/466, loss: 0.06591861695051193 2023-01-22 12:16:21.102308: step: 58/466, loss: 0.04005799442529678 2023-01-22 12:16:21.754171: step: 60/466, loss: 0.08128900080919266 2023-01-22 12:16:22.400924: step: 62/466, loss: 0.06788836419582367 2023-01-22 12:16:23.009776: step: 64/466, loss: 0.03723767027258873 2023-01-22 12:16:23.630011: step: 66/466, loss: 0.05795411020517349 2023-01-22 12:16:24.268329: step: 68/466, loss: 0.006802109070122242 2023-01-22 12:16:24.942939: step: 70/466, loss: 0.023723842576146126 2023-01-22 12:16:25.636006: step: 72/466, loss: 0.04105640947818756 2023-01-22 12:16:26.272226: step: 74/466, loss: 0.014921381138265133 2023-01-22 12:16:26.946574: step: 76/466, loss: 0.010845566168427467 2023-01-22 12:16:27.589380: step: 78/466, loss: 0.007722970098257065 2023-01-22 12:16:28.211260: step: 80/466, loss: 0.058121874928474426 2023-01-22 12:16:28.792811: step: 82/466, loss: 0.03176690265536308 2023-01-22 12:16:29.470878: step: 84/466, loss: 0.0331866480410099 2023-01-22 12:16:30.079956: step: 86/466, loss: 0.016345029696822166 2023-01-22 12:16:30.709528: step: 88/466, loss: 0.034479204565286636 2023-01-22 12:16:31.346223: step: 90/466, loss: 0.01930215209722519 2023-01-22 12:16:32.028516: step: 92/466, loss: 0.20928475260734558 2023-01-22 12:16:32.742306: step: 94/466, loss: 0.05373154953122139 2023-01-22 12:16:33.397124: step: 96/466, loss: 0.1054593026638031 2023-01-22 12:16:34.016613: step: 98/466, loss: 0.03443080559372902 2023-01-22 12:16:34.631574: step: 100/466, loss: 0.024371333420276642 2023-01-22 12:16:35.264153: step: 102/466, loss: 0.0245809443295002 2023-01-22 12:16:35.881382: step: 104/466, loss: 0.025508206337690353 2023-01-22 12:16:36.513760: step: 106/466, loss: 0.013353685848414898 2023-01-22 12:16:37.208444: step: 108/466, loss: 0.02436508983373642 2023-01-22 12:16:37.844225: step: 110/466, loss: 0.011672325432300568 2023-01-22 12:16:38.488149: step: 112/466, loss: 0.02600080519914627 2023-01-22 12:16:39.152880: step: 114/466, loss: 0.005994630511850119 2023-01-22 12:16:39.768308: step: 116/466, loss: 0.04980537295341492 2023-01-22 12:16:40.452326: step: 118/466, loss: 0.7140741944313049 2023-01-22 12:16:41.098617: step: 120/466, loss: 0.4516701400279999 2023-01-22 12:16:41.745160: step: 122/466, loss: 0.06617661565542221 2023-01-22 12:16:42.424103: step: 124/466, loss: 0.05022461339831352 2023-01-22 12:16:43.008467: step: 126/466, loss: 0.004974848125129938 2023-01-22 12:16:43.635510: step: 128/466, loss: 0.07024489343166351 2023-01-22 12:16:44.268648: step: 130/466, loss: 0.06471412628889084 2023-01-22 12:16:44.900399: step: 132/466, loss: 0.019128937274217606 2023-01-22 12:16:45.612578: step: 134/466, loss: 0.06269881129264832 2023-01-22 12:16:46.267304: step: 136/466, loss: 0.006089359056204557 2023-01-22 12:16:46.883141: step: 138/466, loss: 0.03454560786485672 2023-01-22 12:16:47.584162: step: 140/466, loss: 0.043931517750024796 2023-01-22 12:16:48.287522: step: 142/466, loss: 0.04773161932826042 2023-01-22 12:16:48.919096: step: 144/466, loss: 0.00738488556817174 2023-01-22 12:16:49.532480: step: 146/466, loss: 0.010454999282956123 2023-01-22 12:16:50.224948: step: 148/466, loss: 0.06720507889986038 2023-01-22 12:16:50.909425: step: 150/466, loss: 0.09316574037075043 2023-01-22 12:16:51.520240: step: 152/466, loss: 0.21962876617908478 2023-01-22 12:16:52.206770: step: 154/466, loss: 0.031010324135422707 2023-01-22 12:16:52.847268: step: 156/466, loss: 0.04724857956171036 2023-01-22 12:16:53.504466: step: 158/466, loss: 0.09203273057937622 2023-01-22 12:16:54.129123: step: 160/466, loss: 0.19428269565105438 2023-01-22 12:16:54.805535: step: 162/466, loss: 0.033569034188985825 2023-01-22 12:16:55.431911: step: 164/466, loss: 0.04066508635878563 2023-01-22 12:16:56.031944: step: 166/466, loss: 0.09030517190694809 2023-01-22 12:16:56.685756: step: 168/466, loss: 0.052252087742090225 2023-01-22 12:16:57.231783: step: 170/466, loss: 0.035676948726177216 2023-01-22 12:16:57.874465: step: 172/466, loss: 0.006707613822072744 2023-01-22 12:16:58.494891: step: 174/466, loss: 0.027036748826503754 2023-01-22 12:16:59.224064: step: 176/466, loss: 0.0769827589392662 2023-01-22 12:16:59.825068: step: 178/466, loss: 0.15034034848213196 2023-01-22 12:17:00.493890: step: 180/466, loss: 0.04687143862247467 2023-01-22 12:17:01.166561: step: 182/466, loss: 0.026359152048826218 2023-01-22 12:17:01.797653: step: 184/466, loss: 0.032875049859285355 2023-01-22 12:17:02.478163: step: 186/466, loss: 0.035999175161123276 2023-01-22 12:17:03.147563: step: 188/466, loss: 0.09714668989181519 2023-01-22 12:17:03.823273: step: 190/466, loss: 0.004884875845164061 2023-01-22 12:17:04.400000: step: 192/466, loss: 0.031640052795410156 2023-01-22 12:17:05.110457: step: 194/466, loss: 0.012053578160703182 2023-01-22 12:17:05.835877: step: 196/466, loss: 0.07030040770769119 2023-01-22 12:17:06.518988: step: 198/466, loss: 0.07704166322946548 2023-01-22 12:17:07.169276: step: 200/466, loss: 3.1605722904205322 2023-01-22 12:17:07.827613: step: 202/466, loss: 0.13462023437023163 2023-01-22 12:17:08.430687: step: 204/466, loss: 0.08356402069330215 2023-01-22 12:17:09.073359: step: 206/466, loss: 0.03021739050745964 2023-01-22 12:17:09.749731: step: 208/466, loss: 0.018619155511260033 2023-01-22 12:17:10.378708: step: 210/466, loss: 0.9524333477020264 2023-01-22 12:17:10.973526: step: 212/466, loss: 0.04453596845269203 2023-01-22 12:17:11.583184: step: 214/466, loss: 0.22760796546936035 2023-01-22 12:17:12.231946: step: 216/466, loss: 0.043370675295591354 2023-01-22 12:17:12.914310: step: 218/466, loss: 0.009173259139060974 2023-01-22 12:17:13.643479: step: 220/466, loss: 0.09860237687826157 2023-01-22 12:17:14.249624: step: 222/466, loss: 0.035534873604774475 2023-01-22 12:17:14.842458: step: 224/466, loss: 0.04748576879501343 2023-01-22 12:17:15.465219: step: 226/466, loss: 0.0008534886292181909 2023-01-22 12:17:16.120244: step: 228/466, loss: 0.2527320086956024 2023-01-22 12:17:16.717677: step: 230/466, loss: 0.09271737933158875 2023-01-22 12:17:17.336614: step: 232/466, loss: 0.00955366250127554 2023-01-22 12:17:18.051346: step: 234/466, loss: 0.006326592527329922 2023-01-22 12:17:18.698008: step: 236/466, loss: 0.03701920807361603 2023-01-22 12:17:19.420231: step: 238/466, loss: 0.017830880358815193 2023-01-22 12:17:20.046269: step: 240/466, loss: 0.6824085712432861 2023-01-22 12:17:20.669182: step: 242/466, loss: 0.10842016339302063 2023-01-22 12:17:21.331556: step: 244/466, loss: 0.012845429591834545 2023-01-22 12:17:21.974927: step: 246/466, loss: 0.014873155392706394 2023-01-22 12:17:22.634075: step: 248/466, loss: 0.015622666105628014 2023-01-22 12:17:23.231003: step: 250/466, loss: 0.06831051409244537 2023-01-22 12:17:23.909284: step: 252/466, loss: 0.0325254425406456 2023-01-22 12:17:24.539057: step: 254/466, loss: 0.05407670885324478 2023-01-22 12:17:25.187209: step: 256/466, loss: 0.2906532883644104 2023-01-22 12:17:25.874531: step: 258/466, loss: 0.0017354099545627832 2023-01-22 12:17:26.486221: step: 260/466, loss: 0.09595493227243423 2023-01-22 12:17:27.153980: step: 262/466, loss: 0.215519517660141 2023-01-22 12:17:27.821836: step: 264/466, loss: 0.009827272966504097 2023-01-22 12:17:28.461906: step: 266/466, loss: 0.04185410216450691 2023-01-22 12:17:29.111913: step: 268/466, loss: 0.023078553378582 2023-01-22 12:17:29.771079: step: 270/466, loss: 0.07125691324472427 2023-01-22 12:17:30.408599: step: 272/466, loss: 0.06753233820199966 2023-01-22 12:17:31.053537: step: 274/466, loss: 0.013809312134981155 2023-01-22 12:17:31.712092: step: 276/466, loss: 0.009868262335658073 2023-01-22 12:17:32.448372: step: 278/466, loss: 0.0939086303114891 2023-01-22 12:17:33.067418: step: 280/466, loss: 0.07660525292158127 2023-01-22 12:17:33.742370: step: 282/466, loss: 0.023387039080262184 2023-01-22 12:17:34.447341: step: 284/466, loss: 0.11623013019561768 2023-01-22 12:17:35.120225: step: 286/466, loss: 0.08008614182472229 2023-01-22 12:17:35.814559: step: 288/466, loss: 0.04176018759608269 2023-01-22 12:17:36.533060: step: 290/466, loss: 0.6140563488006592 2023-01-22 12:17:37.200970: step: 292/466, loss: 0.06483432650566101 2023-01-22 12:17:37.779504: step: 294/466, loss: 0.28950560092926025 2023-01-22 12:17:38.466853: step: 296/466, loss: 0.07724431157112122 2023-01-22 12:17:39.104992: step: 298/466, loss: 0.05625889077782631 2023-01-22 12:17:39.690405: step: 300/466, loss: 0.1193554475903511 2023-01-22 12:17:40.359569: step: 302/466, loss: 0.0603012852370739 2023-01-22 12:17:41.059435: step: 304/466, loss: 0.07429218292236328 2023-01-22 12:17:41.731329: step: 306/466, loss: 0.055066242814064026 2023-01-22 12:17:42.386968: step: 308/466, loss: 0.17455391585826874 2023-01-22 12:17:43.017066: step: 310/466, loss: 0.02669745311141014 2023-01-22 12:17:43.672600: step: 312/466, loss: 0.010350657626986504 2023-01-22 12:17:44.305818: step: 314/466, loss: 0.05983854457736015 2023-01-22 12:17:44.984485: step: 316/466, loss: 0.07685940712690353 2023-01-22 12:17:45.672395: step: 318/466, loss: 0.025662999600172043 2023-01-22 12:17:46.397830: step: 320/466, loss: 0.09239023178815842 2023-01-22 12:17:47.033543: step: 322/466, loss: 0.0246170312166214 2023-01-22 12:17:47.828489: step: 324/466, loss: 0.07709164917469025 2023-01-22 12:17:48.559154: step: 326/466, loss: 0.034793369472026825 2023-01-22 12:17:49.206592: step: 328/466, loss: 0.0283452607691288 2023-01-22 12:17:49.914317: step: 330/466, loss: 0.02577410452067852 2023-01-22 12:17:50.567178: step: 332/466, loss: 0.034362152218818665 2023-01-22 12:17:51.192362: step: 334/466, loss: 0.08701495826244354 2023-01-22 12:17:51.848595: step: 336/466, loss: 0.07708227634429932 2023-01-22 12:17:52.518665: step: 338/466, loss: 0.04221782088279724 2023-01-22 12:17:53.212073: step: 340/466, loss: 0.08996445685625076 2023-01-22 12:17:53.831411: step: 342/466, loss: 0.02697903849184513 2023-01-22 12:17:54.431408: step: 344/466, loss: 0.062445010989904404 2023-01-22 12:17:55.046788: step: 346/466, loss: 0.05373561754822731 2023-01-22 12:17:55.722348: step: 348/466, loss: 0.05835549533367157 2023-01-22 12:17:56.408368: step: 350/466, loss: 0.6431697607040405 2023-01-22 12:17:57.073617: step: 352/466, loss: 0.30082228779792786 2023-01-22 12:17:57.734114: step: 354/466, loss: 0.11711085587739944 2023-01-22 12:17:58.392287: step: 356/466, loss: 0.012538755312561989 2023-01-22 12:17:59.026750: step: 358/466, loss: 0.06648360192775726 2023-01-22 12:17:59.702146: step: 360/466, loss: 0.022442931309342384 2023-01-22 12:18:00.391713: step: 362/466, loss: 0.09700530767440796 2023-01-22 12:18:01.044317: step: 364/466, loss: 0.08707442879676819 2023-01-22 12:18:01.754294: step: 366/466, loss: 0.06549425423145294 2023-01-22 12:18:02.421043: step: 368/466, loss: 0.00937668140977621 2023-01-22 12:18:03.114786: step: 370/466, loss: 0.02462584339082241 2023-01-22 12:18:03.767169: step: 372/466, loss: 0.012783851474523544 2023-01-22 12:18:04.409282: step: 374/466, loss: 0.08071441948413849 2023-01-22 12:18:05.079815: step: 376/466, loss: 0.12172964215278625 2023-01-22 12:18:05.793105: step: 378/466, loss: 0.023829028010368347 2023-01-22 12:18:06.395211: step: 380/466, loss: 0.16985544562339783 2023-01-22 12:18:07.009308: step: 382/466, loss: 0.03699149936437607 2023-01-22 12:18:07.699625: step: 384/466, loss: 0.011300604790449142 2023-01-22 12:18:08.352456: step: 386/466, loss: 0.0649348646402359 2023-01-22 12:18:09.002348: step: 388/466, loss: 0.031760040670633316 2023-01-22 12:18:09.653602: step: 390/466, loss: 0.044917892664670944 2023-01-22 12:18:10.264238: step: 392/466, loss: 0.2522069811820984 2023-01-22 12:18:10.828554: step: 394/466, loss: 0.00824504904448986 2023-01-22 12:18:11.464660: step: 396/466, loss: 0.037625525146722794 2023-01-22 12:18:12.121111: step: 398/466, loss: 0.11952449381351471 2023-01-22 12:18:12.732140: step: 400/466, loss: 0.5322363376617432 2023-01-22 12:18:13.396698: step: 402/466, loss: 0.026489701122045517 2023-01-22 12:18:14.041803: step: 404/466, loss: 0.053119905292987823 2023-01-22 12:18:14.705179: step: 406/466, loss: 0.04038258269429207 2023-01-22 12:18:15.471647: step: 408/466, loss: 0.1848820447921753 2023-01-22 12:18:16.095221: step: 410/466, loss: 0.025337228551506996 2023-01-22 12:18:16.812510: step: 412/466, loss: 0.02773207239806652 2023-01-22 12:18:17.456111: step: 414/466, loss: 0.03828001767396927 2023-01-22 12:18:18.131976: step: 416/466, loss: 0.35797908902168274 2023-01-22 12:18:18.775230: step: 418/466, loss: 0.03279887139797211 2023-01-22 12:18:19.390017: step: 420/466, loss: 0.00925395917147398 2023-01-22 12:18:19.965593: step: 422/466, loss: 0.21304737031459808 2023-01-22 12:18:20.630103: step: 424/466, loss: 0.028571486473083496 2023-01-22 12:18:21.313638: step: 426/466, loss: 0.2181033194065094 2023-01-22 12:18:22.006529: step: 428/466, loss: 0.07693160325288773 2023-01-22 12:18:22.720471: step: 430/466, loss: 0.14517450332641602 2023-01-22 12:18:23.385947: step: 432/466, loss: 0.016695642843842506 2023-01-22 12:18:24.032685: step: 434/466, loss: 0.1899147927761078 2023-01-22 12:18:24.668906: step: 436/466, loss: 0.009916950948536396 2023-01-22 12:18:25.295431: step: 438/466, loss: 0.00741297984495759 2023-01-22 12:18:25.969802: step: 440/466, loss: 0.0704260990023613 2023-01-22 12:18:26.681460: step: 442/466, loss: 0.011597951874136925 2023-01-22 12:18:27.318346: step: 444/466, loss: 0.013319095596671104 2023-01-22 12:18:27.974223: step: 446/466, loss: 0.054979003965854645 2023-01-22 12:18:28.626383: step: 448/466, loss: 0.023092715069651604 2023-01-22 12:18:29.292295: step: 450/466, loss: 0.045721933245658875 2023-01-22 12:18:29.959385: step: 452/466, loss: 0.004618957173079252 2023-01-22 12:18:30.623302: step: 454/466, loss: 0.0299563929438591 2023-01-22 12:18:31.288672: step: 456/466, loss: 0.0010994257172569633 2023-01-22 12:18:31.904660: step: 458/466, loss: 0.016170697286725044 2023-01-22 12:18:32.604112: step: 460/466, loss: 0.04268626496195793 2023-01-22 12:18:33.352701: step: 462/466, loss: 0.05014938861131668 2023-01-22 12:18:34.042121: step: 464/466, loss: 0.04969983547925949 2023-01-22 12:18:34.923084: step: 466/466, loss: 0.04016483575105667 2023-01-22 12:18:35.517664: step: 468/466, loss: 0.014492525719106197 2023-01-22 12:18:36.168497: step: 470/466, loss: 0.8588945865631104 2023-01-22 12:18:36.887008: step: 472/466, loss: 0.049487411975860596 2023-01-22 12:18:37.546276: step: 474/466, loss: 0.06922387331724167 2023-01-22 12:18:38.257737: step: 476/466, loss: 0.040300093591213226 2023-01-22 12:18:38.876198: step: 478/466, loss: 0.17167863249778748 2023-01-22 12:18:39.484129: step: 480/466, loss: 0.03799637407064438 2023-01-22 12:18:40.167404: step: 482/466, loss: 0.08162984997034073 2023-01-22 12:18:40.819555: step: 484/466, loss: 0.04913508519530296 2023-01-22 12:18:41.477544: step: 486/466, loss: 0.06300981342792511 2023-01-22 12:18:42.127117: step: 488/466, loss: 0.09749052673578262 2023-01-22 12:18:42.779335: step: 490/466, loss: 0.06170743703842163 2023-01-22 12:18:43.475088: step: 492/466, loss: 0.23926964402198792 2023-01-22 12:18:44.111207: step: 494/466, loss: 0.04832770302891731 2023-01-22 12:18:44.762087: step: 496/466, loss: 0.019094914197921753 2023-01-22 12:18:45.406287: step: 498/466, loss: 0.06809645146131516 2023-01-22 12:18:46.040696: step: 500/466, loss: 0.07340987026691437 2023-01-22 12:18:46.683772: step: 502/466, loss: 0.08297903835773468 2023-01-22 12:18:47.358703: step: 504/466, loss: 0.04602425917983055 2023-01-22 12:18:47.946778: step: 506/466, loss: 0.047050878405570984 2023-01-22 12:18:48.569601: step: 508/466, loss: 0.056306980550289154 2023-01-22 12:18:49.220762: step: 510/466, loss: 0.02863304875791073 2023-01-22 12:18:49.876502: step: 512/466, loss: 0.023716315627098083 2023-01-22 12:18:50.533042: step: 514/466, loss: 0.059006400406360626 2023-01-22 12:18:51.234727: step: 516/466, loss: 0.1620517373085022 2023-01-22 12:18:51.814589: step: 518/466, loss: 0.0029416207689791918 2023-01-22 12:18:52.414816: step: 520/466, loss: 0.07356319576501846 2023-01-22 12:18:53.138923: step: 522/466, loss: 0.018130777403712273 2023-01-22 12:18:53.767503: step: 524/466, loss: 0.013137644156813622 2023-01-22 12:18:54.480367: step: 526/466, loss: 0.05470186471939087 2023-01-22 12:18:55.087779: step: 528/466, loss: 0.0040150294080376625 2023-01-22 12:18:55.786771: step: 530/466, loss: 0.016491297632455826 2023-01-22 12:18:56.503471: step: 532/466, loss: 0.06494159996509552 2023-01-22 12:18:57.178861: step: 534/466, loss: 0.06707453727722168 2023-01-22 12:18:57.872863: step: 536/466, loss: 0.015546813607215881 2023-01-22 12:18:58.531372: step: 538/466, loss: 0.024059467017650604 2023-01-22 12:18:59.156760: step: 540/466, loss: 0.054490331560373306 2023-01-22 12:18:59.791614: step: 542/466, loss: 0.0058572967536747456 2023-01-22 12:19:00.461975: step: 544/466, loss: 0.3280733823776245 2023-01-22 12:19:01.055581: step: 546/466, loss: 0.019469719380140305 2023-01-22 12:19:01.731960: step: 548/466, loss: 0.05076098069548607 2023-01-22 12:19:02.474236: step: 550/466, loss: 0.07283362746238708 2023-01-22 12:19:03.081446: step: 552/466, loss: 0.26903781294822693 2023-01-22 12:19:03.686948: step: 554/466, loss: 0.06180036813020706 2023-01-22 12:19:04.297069: step: 556/466, loss: 0.00028102347278036177 2023-01-22 12:19:04.977855: step: 558/466, loss: 0.03176679462194443 2023-01-22 12:19:05.611195: step: 560/466, loss: 0.04618803411722183 2023-01-22 12:19:06.239977: step: 562/466, loss: 0.005040397401899099 2023-01-22 12:19:06.998586: step: 564/466, loss: 0.022054478526115417 2023-01-22 12:19:07.637951: step: 566/466, loss: 0.006742571480572224 2023-01-22 12:19:08.318531: step: 568/466, loss: 0.09823835641145706 2023-01-22 12:19:08.984687: step: 570/466, loss: 0.01592562161386013 2023-01-22 12:19:09.650098: step: 572/466, loss: 0.060743462294340134 2023-01-22 12:19:10.364032: step: 574/466, loss: 0.06120866909623146 2023-01-22 12:19:11.086820: step: 576/466, loss: 0.018308117985725403 2023-01-22 12:19:11.771482: step: 578/466, loss: 0.0007237203535623848 2023-01-22 12:19:12.446643: step: 580/466, loss: 0.02220681495964527 2023-01-22 12:19:13.054336: step: 582/466, loss: 0.016686266288161278 2023-01-22 12:19:13.678820: step: 584/466, loss: 0.024640217423439026 2023-01-22 12:19:14.274304: step: 586/466, loss: 0.07240679115056992 2023-01-22 12:19:14.950319: step: 588/466, loss: 0.03808869048953056 2023-01-22 12:19:15.569866: step: 590/466, loss: 0.03586426377296448 2023-01-22 12:19:16.205512: step: 592/466, loss: 0.024462319910526276 2023-01-22 12:19:16.855849: step: 594/466, loss: 0.05637772008776665 2023-01-22 12:19:17.507918: step: 596/466, loss: 0.04977262765169144 2023-01-22 12:19:18.130311: step: 598/466, loss: 0.015603912062942982 2023-01-22 12:19:18.834087: step: 600/466, loss: 0.0405561625957489 2023-01-22 12:19:19.489346: step: 602/466, loss: 0.09790562093257904 2023-01-22 12:19:20.153755: step: 604/466, loss: 0.033606477081775665 2023-01-22 12:19:20.866088: step: 606/466, loss: 0.008672555908560753 2023-01-22 12:19:21.463008: step: 608/466, loss: 0.12055826932191849 2023-01-22 12:19:22.145252: step: 610/466, loss: 0.03952990099787712 2023-01-22 12:19:22.798972: step: 612/466, loss: 0.056830912828445435 2023-01-22 12:19:23.519656: step: 614/466, loss: 0.0020802398212254047 2023-01-22 12:19:24.174037: step: 616/466, loss: 0.054080478847026825 2023-01-22 12:19:24.840837: step: 618/466, loss: 0.7020652890205383 2023-01-22 12:19:25.468979: step: 620/466, loss: 0.07433077692985535 2023-01-22 12:19:26.152226: step: 622/466, loss: 0.015092556364834309 2023-01-22 12:19:26.871951: step: 624/466, loss: 0.08288074284791946 2023-01-22 12:19:27.487520: step: 626/466, loss: 0.02156096138060093 2023-01-22 12:19:28.135711: step: 628/466, loss: 0.01666255295276642 2023-01-22 12:19:28.786287: step: 630/466, loss: 0.08199740201234818 2023-01-22 12:19:29.450821: step: 632/466, loss: 0.01296154409646988 2023-01-22 12:19:30.076797: step: 634/466, loss: 0.05086514353752136 2023-01-22 12:19:30.789293: step: 636/466, loss: 0.06781143695116043 2023-01-22 12:19:31.485171: step: 638/466, loss: 0.16483181715011597 2023-01-22 12:19:32.156715: step: 640/466, loss: 0.025504454970359802 2023-01-22 12:19:32.801258: step: 642/466, loss: 0.4947722852230072 2023-01-22 12:19:33.512739: step: 644/466, loss: 0.03764424845576286 2023-01-22 12:19:34.159729: step: 646/466, loss: 0.009929514490067959 2023-01-22 12:19:34.797000: step: 648/466, loss: 0.06044996529817581 2023-01-22 12:19:35.459542: step: 650/466, loss: 0.1053556576371193 2023-01-22 12:19:36.131682: step: 652/466, loss: 0.025075990706682205 2023-01-22 12:19:36.789527: step: 654/466, loss: 0.022899439558386803 2023-01-22 12:19:37.580463: step: 656/466, loss: 0.32105860114097595 2023-01-22 12:19:38.230158: step: 658/466, loss: 0.021026339381933212 2023-01-22 12:19:38.858771: step: 660/466, loss: 0.08893311023712158 2023-01-22 12:19:39.550733: step: 662/466, loss: 0.004652900155633688 2023-01-22 12:19:40.163661: step: 664/466, loss: 0.006031044293195009 2023-01-22 12:19:40.813057: step: 666/466, loss: 0.07278808206319809 2023-01-22 12:19:41.430799: step: 668/466, loss: 0.0484389029443264 2023-01-22 12:19:42.079398: step: 670/466, loss: 0.19756385684013367 2023-01-22 12:19:42.747855: step: 672/466, loss: 0.12207819521427155 2023-01-22 12:19:43.441153: step: 674/466, loss: 0.051613833755254745 2023-01-22 12:19:44.112580: step: 676/466, loss: 0.08182787150144577 2023-01-22 12:19:44.765018: step: 678/466, loss: 0.055441003292798996 2023-01-22 12:19:45.443825: step: 680/466, loss: 0.07255645096302032 2023-01-22 12:19:46.154589: step: 682/466, loss: 0.1373283565044403 2023-01-22 12:19:46.891262: step: 684/466, loss: 0.08799053728580475 2023-01-22 12:19:47.596691: step: 686/466, loss: 0.09948298335075378 2023-01-22 12:19:48.271512: step: 688/466, loss: 0.3227572739124298 2023-01-22 12:19:48.967950: step: 690/466, loss: 0.13032247126102448 2023-01-22 12:19:49.628181: step: 692/466, loss: 0.05579046905040741 2023-01-22 12:19:50.322939: step: 694/466, loss: 0.08206340670585632 2023-01-22 12:19:50.931295: step: 696/466, loss: 0.08887199312448502 2023-01-22 12:19:51.580628: step: 698/466, loss: 0.056815605610609055 2023-01-22 12:19:52.239445: step: 700/466, loss: 0.1173422560095787 2023-01-22 12:19:52.937195: step: 702/466, loss: 0.025796858593821526 2023-01-22 12:19:53.610063: step: 704/466, loss: 0.1318012923002243 2023-01-22 12:19:54.290790: step: 706/466, loss: 0.06997198611497879 2023-01-22 12:19:54.915384: step: 708/466, loss: 0.0759538933634758 2023-01-22 12:19:55.593826: step: 710/466, loss: 0.12641946971416473 2023-01-22 12:19:56.272180: step: 712/466, loss: 0.07710997015237808 2023-01-22 12:19:56.838962: step: 714/466, loss: 0.05794893577694893 2023-01-22 12:19:57.440018: step: 716/466, loss: 0.037828873842954636 2023-01-22 12:19:58.078691: step: 718/466, loss: 0.06943363696336746 2023-01-22 12:19:58.810846: step: 720/466, loss: 0.07901345193386078 2023-01-22 12:19:59.366264: step: 722/466, loss: 0.015312760137021542 2023-01-22 12:19:59.969215: step: 724/466, loss: 0.053611285984516144 2023-01-22 12:20:00.627128: step: 726/466, loss: 0.016187705099582672 2023-01-22 12:20:01.278519: step: 728/466, loss: 0.08087359368801117 2023-01-22 12:20:01.946695: step: 730/466, loss: 0.07364023476839066 2023-01-22 12:20:02.649790: step: 732/466, loss: 1.0714809894561768 2023-01-22 12:20:03.247270: step: 734/466, loss: 0.0519305095076561 2023-01-22 12:20:03.938328: step: 736/466, loss: 0.12604323029518127 2023-01-22 12:20:04.570581: step: 738/466, loss: 0.0702681615948677 2023-01-22 12:20:05.217133: step: 740/466, loss: 0.058377061039209366 2023-01-22 12:20:05.852573: step: 742/466, loss: 0.08331472426652908 2023-01-22 12:20:06.513444: step: 744/466, loss: 0.1307016909122467 2023-01-22 12:20:07.157888: step: 746/466, loss: 0.057829976081848145 2023-01-22 12:20:07.822004: step: 748/466, loss: 0.0034869564697146416 2023-01-22 12:20:08.491676: step: 750/466, loss: 0.028278179466724396 2023-01-22 12:20:09.129732: step: 752/466, loss: 0.5153988599777222 2023-01-22 12:20:09.786512: step: 754/466, loss: 0.03906656429171562 2023-01-22 12:20:10.509493: step: 756/466, loss: 0.21079841256141663 2023-01-22 12:20:11.123494: step: 758/466, loss: 0.026310265064239502 2023-01-22 12:20:11.845753: step: 760/466, loss: 0.0046156407333910465 2023-01-22 12:20:12.524824: step: 762/466, loss: 0.06732354313135147 2023-01-22 12:20:13.169521: step: 764/466, loss: 0.021007949486374855 2023-01-22 12:20:13.798792: step: 766/466, loss: 0.035399969667196274 2023-01-22 12:20:14.466452: step: 768/466, loss: 0.05831155180931091 2023-01-22 12:20:15.090077: step: 770/466, loss: 0.1032584011554718 2023-01-22 12:20:15.707179: step: 772/466, loss: 0.020311955362558365 2023-01-22 12:20:16.346699: step: 774/466, loss: 0.28223592042922974 2023-01-22 12:20:17.018965: step: 776/466, loss: 0.04397254437208176 2023-01-22 12:20:17.688675: step: 778/466, loss: 0.06936588138341904 2023-01-22 12:20:18.344557: step: 780/466, loss: 0.2023026943206787 2023-01-22 12:20:18.961591: step: 782/466, loss: 0.013379652053117752 2023-01-22 12:20:19.582839: step: 784/466, loss: 0.07581201940774918 2023-01-22 12:20:20.247447: step: 786/466, loss: 0.1080794408917427 2023-01-22 12:20:20.991305: step: 788/466, loss: 0.010013054125010967 2023-01-22 12:20:21.635308: step: 790/466, loss: 0.1666671633720398 2023-01-22 12:20:22.310191: step: 792/466, loss: 0.043947044759988785 2023-01-22 12:20:22.903530: step: 794/466, loss: 0.03906881436705589 2023-01-22 12:20:23.519614: step: 796/466, loss: 0.03650645911693573 2023-01-22 12:20:24.195382: step: 798/466, loss: 0.01717812567949295 2023-01-22 12:20:24.847610: step: 800/466, loss: 0.05117480456829071 2023-01-22 12:20:25.559780: step: 802/466, loss: 0.0996786504983902 2023-01-22 12:20:26.203660: step: 804/466, loss: 0.015471360646188259 2023-01-22 12:20:26.800385: step: 806/466, loss: 0.033040329813957214 2023-01-22 12:20:27.470319: step: 808/466, loss: 0.0883571058511734 2023-01-22 12:20:28.131960: step: 810/466, loss: 0.07848524302244186 2023-01-22 12:20:28.791733: step: 812/466, loss: 0.029181169345974922 2023-01-22 12:20:29.601085: step: 814/466, loss: 0.031819898635149 2023-01-22 12:20:30.192004: step: 816/466, loss: 0.02793828584253788 2023-01-22 12:20:30.848238: step: 818/466, loss: 0.08263324946165085 2023-01-22 12:20:31.471921: step: 820/466, loss: 0.029203617945313454 2023-01-22 12:20:32.127490: step: 822/466, loss: 0.0037075963336974382 2023-01-22 12:20:32.757534: step: 824/466, loss: 0.1230461373925209 2023-01-22 12:20:33.346007: step: 826/466, loss: 0.05489637702703476 2023-01-22 12:20:34.044866: step: 828/466, loss: 0.041764117777347565 2023-01-22 12:20:34.695576: step: 830/466, loss: 0.035227157175540924 2023-01-22 12:20:35.373436: step: 832/466, loss: 0.178544819355011 2023-01-22 12:20:35.978902: step: 834/466, loss: 0.07546160370111465 2023-01-22 12:20:36.639734: step: 836/466, loss: 0.036592237651348114 2023-01-22 12:20:37.239230: step: 838/466, loss: 0.0232541523873806 2023-01-22 12:20:37.939842: step: 840/466, loss: 0.13159701228141785 2023-01-22 12:20:38.585934: step: 842/466, loss: 0.04711227864027023 2023-01-22 12:20:39.174933: step: 844/466, loss: 0.014443104155361652 2023-01-22 12:20:39.799080: step: 846/466, loss: 0.03396781533956528 2023-01-22 12:20:40.613344: step: 848/466, loss: 0.0871630385518074 2023-01-22 12:20:41.284323: step: 850/466, loss: 0.2766623795032501 2023-01-22 12:20:41.905696: step: 852/466, loss: 0.03323986008763313 2023-01-22 12:20:42.548479: step: 854/466, loss: 0.02848413586616516 2023-01-22 12:20:43.255033: step: 856/466, loss: 0.06942589581012726 2023-01-22 12:20:43.903474: step: 858/466, loss: 0.02229192480444908 2023-01-22 12:20:44.625471: step: 860/466, loss: 0.04600051790475845 2023-01-22 12:20:45.313781: step: 862/466, loss: 0.02044217474758625 2023-01-22 12:20:45.947827: step: 864/466, loss: 0.0533686988055706 2023-01-22 12:20:46.621273: step: 866/466, loss: 0.050430938601493835 2023-01-22 12:20:47.274844: step: 868/466, loss: 0.05311398208141327 2023-01-22 12:20:47.908856: step: 870/466, loss: 0.08430203050374985 2023-01-22 12:20:48.550671: step: 872/466, loss: 0.0548601895570755 2023-01-22 12:20:49.234515: step: 874/466, loss: 0.0949767678976059 2023-01-22 12:20:49.931415: step: 876/466, loss: 0.1755812168121338 2023-01-22 12:20:50.653024: step: 878/466, loss: 0.04888708516955376 2023-01-22 12:20:51.275085: step: 880/466, loss: 0.012096043676137924 2023-01-22 12:20:51.908853: step: 882/466, loss: 0.10377335548400879 2023-01-22 12:20:52.572531: step: 884/466, loss: 0.04046103358268738 2023-01-22 12:20:53.232587: step: 886/466, loss: 0.023146986961364746 2023-01-22 12:20:53.838319: step: 888/466, loss: 0.038587309420108795 2023-01-22 12:20:54.510013: step: 890/466, loss: 0.03209630399942398 2023-01-22 12:20:55.247386: step: 892/466, loss: 0.05548754334449768 2023-01-22 12:20:55.985685: step: 894/466, loss: 0.033855512738227844 2023-01-22 12:20:56.678911: step: 896/466, loss: 0.015545746311545372 2023-01-22 12:20:57.266517: step: 898/466, loss: 0.02351602539420128 2023-01-22 12:20:57.879279: step: 900/466, loss: 0.03280020132660866 2023-01-22 12:20:58.510209: step: 902/466, loss: 0.003082484472543001 2023-01-22 12:20:59.156953: step: 904/466, loss: 0.023638760671019554 2023-01-22 12:20:59.785694: step: 906/466, loss: 0.06338606029748917 2023-01-22 12:21:00.502796: step: 908/466, loss: 0.04905043542385101 2023-01-22 12:21:01.134932: step: 910/466, loss: 0.016157738864421844 2023-01-22 12:21:01.814079: step: 912/466, loss: 0.06345370411872864 2023-01-22 12:21:02.575527: step: 914/466, loss: 0.04141675680875778 2023-01-22 12:21:03.258185: step: 916/466, loss: 0.08778607845306396 2023-01-22 12:21:03.849412: step: 918/466, loss: 0.05597427487373352 2023-01-22 12:21:04.447237: step: 920/466, loss: 0.02496947906911373 2023-01-22 12:21:05.111444: step: 922/466, loss: 0.06652742624282837 2023-01-22 12:21:05.750459: step: 924/466, loss: 0.026141630485653877 2023-01-22 12:21:06.566627: step: 926/466, loss: 0.0483468696475029 2023-01-22 12:21:07.218186: step: 928/466, loss: 0.04227868467569351 2023-01-22 12:21:07.880268: step: 930/466, loss: 0.040610309690237045 2023-01-22 12:21:08.604392: step: 932/466, loss: 0.014453450217843056 ================================================== Loss: 0.087 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3065358654690001, 'r': 0.3414355844977287, 'f1': 0.3230458761764866}, 'combined': 0.23803380349846381, 'epoch': 22} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.33399658023630646, 'r': 0.3267734708021303, 'f1': 0.33034554634653485}, 'combined': 0.21908927426091426, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29626014198782963, 'r': 0.2766216856060606, 'f1': 0.28610430950048965}, 'combined': 0.19073620633365976, 'epoch': 22} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3571320583413024, 'r': 0.3156626512202153, 'f1': 0.3351193187747272}, 'combined': 0.2187094501477167, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28715858837127495, 'r': 0.32857045310792937, 'f1': 0.30647190935907753}, 'combined': 0.22582140689616237, 'epoch': 22} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32987866090159834, 'r': 0.3190348986920302, 'f1': 0.3243661766824863}, 'combined': 0.21512368194486134, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2725694444444444, 'r': 0.37380952380952376, 'f1': 0.31526104417670675}, 'combined': 0.21017402945113783, 'epoch': 22} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40789473684210525, 'r': 0.33695652173913043, 'f1': 0.36904761904761907}, 'combined': 0.24603174603174605, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36904761904761907, 'r': 0.2672413793103448, 'f1': 0.31}, 'combined': 0.20666666666666667, 'epoch': 22} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28715858837127495, 'r': 0.32857045310792937, 'f1': 0.30647190935907753}, 'combined': 0.22582140689616237, 'epoch': 22} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32987866090159834, 'r': 0.3190348986920302, 'f1': 0.3243661766824863}, 'combined': 0.21512368194486134, 'epoch': 22} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36904761904761907, 'r': 0.2672413793103448, 'f1': 0.31}, 'combined': 0.20666666666666667, 'epoch': 22} ****************************** Epoch: 23 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:23:59.860860: step: 2/466, loss: 0.014219677075743675 2023-01-22 12:24:00.468639: step: 4/466, loss: 0.02799920178949833 2023-01-22 12:24:01.145829: step: 6/466, loss: 0.09419617056846619 2023-01-22 12:24:01.799118: step: 8/466, loss: 0.31039220094680786 2023-01-22 12:24:02.511830: step: 10/466, loss: 0.2978193163871765 2023-01-22 12:24:03.168486: step: 12/466, loss: 0.007043609861284494 2023-01-22 12:24:03.790842: step: 14/466, loss: 0.027942348271608353 2023-01-22 12:24:04.433498: step: 16/466, loss: 0.11210431903600693 2023-01-22 12:24:05.252812: step: 18/466, loss: 0.10927058011293411 2023-01-22 12:24:05.965065: step: 20/466, loss: 0.021317631006240845 2023-01-22 12:24:06.696003: step: 22/466, loss: 0.038125600665807724 2023-01-22 12:24:07.358957: step: 24/466, loss: 0.0418558269739151 2023-01-22 12:24:08.008536: step: 26/466, loss: 0.018245242536067963 2023-01-22 12:24:08.635619: step: 28/466, loss: 0.047045011073350906 2023-01-22 12:24:09.307749: step: 30/466, loss: 0.03388955444097519 2023-01-22 12:24:09.927008: step: 32/466, loss: 0.03465335816144943 2023-01-22 12:24:10.489272: step: 34/466, loss: 0.017350606620311737 2023-01-22 12:24:11.185900: step: 36/466, loss: 0.014976875856518745 2023-01-22 12:24:11.859372: step: 38/466, loss: 0.033230796456336975 2023-01-22 12:24:12.521070: step: 40/466, loss: 0.020534897223114967 2023-01-22 12:24:13.153501: step: 42/466, loss: 0.021572591736912727 2023-01-22 12:24:13.824927: step: 44/466, loss: 0.06211506202816963 2023-01-22 12:24:14.471946: step: 46/466, loss: 0.01666572503745556 2023-01-22 12:24:15.172216: step: 48/466, loss: 0.027486102655529976 2023-01-22 12:24:15.818044: step: 50/466, loss: 0.10002031922340393 2023-01-22 12:24:16.470853: step: 52/466, loss: 0.019476555287837982 2023-01-22 12:24:17.229811: step: 54/466, loss: 0.00325954332947731 2023-01-22 12:24:17.879541: step: 56/466, loss: 0.030487297102808952 2023-01-22 12:24:18.570852: step: 58/466, loss: 0.0035734514240175486 2023-01-22 12:24:19.269742: step: 60/466, loss: 0.029996968805789948 2023-01-22 12:24:19.860970: step: 62/466, loss: 0.0012635894818231463 2023-01-22 12:24:20.487541: step: 64/466, loss: 0.11661484092473984 2023-01-22 12:24:21.155010: step: 66/466, loss: 0.20790398120880127 2023-01-22 12:24:21.839942: step: 68/466, loss: 0.045636314898729324 2023-01-22 12:24:22.558942: step: 70/466, loss: 0.0090608149766922 2023-01-22 12:24:23.250067: step: 72/466, loss: 0.03447333350777626 2023-01-22 12:24:23.896325: step: 74/466, loss: 0.04142190143465996 2023-01-22 12:24:24.540519: step: 76/466, loss: 0.08950883895158768 2023-01-22 12:24:25.153444: step: 78/466, loss: 0.03892095386981964 2023-01-22 12:24:25.820195: step: 80/466, loss: 0.03963964059948921 2023-01-22 12:24:26.537088: step: 82/466, loss: 0.08380357921123505 2023-01-22 12:24:27.160953: step: 84/466, loss: 0.04118441417813301 2023-01-22 12:24:27.821488: step: 86/466, loss: 0.024973010644316673 2023-01-22 12:24:28.406706: step: 88/466, loss: 0.0016479233745485544 2023-01-22 12:24:29.032001: step: 90/466, loss: 0.006236465647816658 2023-01-22 12:24:29.711929: step: 92/466, loss: 0.08336075395345688 2023-01-22 12:24:30.372295: step: 94/466, loss: 0.015114004723727703 2023-01-22 12:24:31.062838: step: 96/466, loss: 0.010505511425435543 2023-01-22 12:24:31.759332: step: 98/466, loss: 0.2507909834384918 2023-01-22 12:24:32.457421: step: 100/466, loss: 0.260568231344223 2023-01-22 12:24:33.148071: step: 102/466, loss: 0.044688452035188675 2023-01-22 12:24:33.827804: step: 104/466, loss: 0.004466955550014973 2023-01-22 12:24:34.490219: step: 106/466, loss: 0.06955769658088684 2023-01-22 12:24:35.191186: step: 108/466, loss: 0.06379435211420059 2023-01-22 12:24:35.840193: step: 110/466, loss: 0.015097280032932758 2023-01-22 12:24:36.428103: step: 112/466, loss: 0.08070417493581772 2023-01-22 12:24:37.053333: step: 114/466, loss: 0.061258550733327866 2023-01-22 12:24:37.730526: step: 116/466, loss: 0.008601571433246136 2023-01-22 12:24:38.386663: step: 118/466, loss: 0.021000182256102562 2023-01-22 12:24:39.082471: step: 120/466, loss: 0.010197958908975124 2023-01-22 12:24:39.739696: step: 122/466, loss: 0.011712507344782352 2023-01-22 12:24:40.381877: step: 124/466, loss: 0.02409764751791954 2023-01-22 12:24:40.983632: step: 126/466, loss: 0.10284940153360367 2023-01-22 12:24:41.598583: step: 128/466, loss: 0.013877983205020428 2023-01-22 12:24:42.280632: step: 130/466, loss: 0.06758103519678116 2023-01-22 12:24:42.968376: step: 132/466, loss: 0.04407202824950218 2023-01-22 12:24:43.632308: step: 134/466, loss: 0.1829264909029007 2023-01-22 12:24:44.257520: step: 136/466, loss: 0.034951724112033844 2023-01-22 12:24:44.870412: step: 138/466, loss: 0.08043573796749115 2023-01-22 12:24:45.505253: step: 140/466, loss: 0.0018060511210933328 2023-01-22 12:24:46.198899: step: 142/466, loss: 0.08757258951663971 2023-01-22 12:24:46.836338: step: 144/466, loss: 0.014110724441707134 2023-01-22 12:24:47.435135: step: 146/466, loss: 0.0018658102490007877 2023-01-22 12:24:48.089236: step: 148/466, loss: 0.04405156150460243 2023-01-22 12:24:48.703723: step: 150/466, loss: 0.006086637265980244 2023-01-22 12:24:49.416903: step: 152/466, loss: 0.13918232917785645 2023-01-22 12:24:50.023965: step: 154/466, loss: 0.02179715596139431 2023-01-22 12:24:50.649750: step: 156/466, loss: 0.1636473536491394 2023-01-22 12:24:51.451952: step: 158/466, loss: 0.04377252236008644 2023-01-22 12:24:52.088582: step: 160/466, loss: 0.030937492847442627 2023-01-22 12:24:52.733895: step: 162/466, loss: 0.06732071191072464 2023-01-22 12:24:53.399173: step: 164/466, loss: 0.1363304704427719 2023-01-22 12:24:54.089628: step: 166/466, loss: 0.08036568760871887 2023-01-22 12:24:54.701776: step: 168/466, loss: 0.08385451883077621 2023-01-22 12:24:55.370178: step: 170/466, loss: 0.012089313007891178 2023-01-22 12:24:56.008174: step: 172/466, loss: 0.048308826982975006 2023-01-22 12:24:56.769738: step: 174/466, loss: 0.03280562534928322 2023-01-22 12:24:57.433361: step: 176/466, loss: 0.023207364603877068 2023-01-22 12:24:58.038011: step: 178/466, loss: 0.0603955052793026 2023-01-22 12:24:58.673158: step: 180/466, loss: 1.5096756219863892 2023-01-22 12:24:59.382158: step: 182/466, loss: 0.02950737066566944 2023-01-22 12:24:59.966266: step: 184/466, loss: 0.026478124782443047 2023-01-22 12:25:00.578226: step: 186/466, loss: 0.016791829839348793 2023-01-22 12:25:01.284731: step: 188/466, loss: 0.01706968992948532 2023-01-22 12:25:01.977622: step: 190/466, loss: 0.014634549617767334 2023-01-22 12:25:02.679920: step: 192/466, loss: 0.014598404057323933 2023-01-22 12:25:03.350765: step: 194/466, loss: 0.016471099108457565 2023-01-22 12:25:04.029597: step: 196/466, loss: 0.011771813035011292 2023-01-22 12:25:04.742869: step: 198/466, loss: 0.03157324343919754 2023-01-22 12:25:05.364311: step: 200/466, loss: 0.0009993526618927717 2023-01-22 12:25:06.039285: step: 202/466, loss: 0.038105957210063934 2023-01-22 12:25:06.748117: step: 204/466, loss: 0.04690699279308319 2023-01-22 12:25:07.333171: step: 206/466, loss: 0.02774890325963497 2023-01-22 12:25:07.968088: step: 208/466, loss: 0.02330356277525425 2023-01-22 12:25:08.696808: step: 210/466, loss: 0.04613875970244408 2023-01-22 12:25:09.398714: step: 212/466, loss: 0.006724870763719082 2023-01-22 12:25:10.091332: step: 214/466, loss: 0.035854555666446686 2023-01-22 12:25:10.739031: step: 216/466, loss: 0.018583467230200768 2023-01-22 12:25:11.364196: step: 218/466, loss: 0.03919846564531326 2023-01-22 12:25:11.992447: step: 220/466, loss: 0.1321801096200943 2023-01-22 12:25:12.562025: step: 222/466, loss: 0.03498874232172966 2023-01-22 12:25:13.198510: step: 224/466, loss: 0.052615873515605927 2023-01-22 12:25:13.830620: step: 226/466, loss: 0.026402872055768967 2023-01-22 12:25:14.489292: step: 228/466, loss: 0.026835087686777115 2023-01-22 12:25:15.175212: step: 230/466, loss: 0.03692265599966049 2023-01-22 12:25:15.852520: step: 232/466, loss: 0.007541773375123739 2023-01-22 12:25:16.502567: step: 234/466, loss: 0.009179790504276752 2023-01-22 12:25:17.170289: step: 236/466, loss: 0.0473739430308342 2023-01-22 12:25:17.778145: step: 238/466, loss: 0.0576513297855854 2023-01-22 12:25:18.515540: step: 240/466, loss: 0.11710616946220398 2023-01-22 12:25:19.111950: step: 242/466, loss: 0.02180691994726658 2023-01-22 12:25:19.786691: step: 244/466, loss: 0.03474956750869751 2023-01-22 12:25:20.443151: step: 246/466, loss: 0.268618106842041 2023-01-22 12:25:21.066413: step: 248/466, loss: 0.04940341040492058 2023-01-22 12:25:21.743288: step: 250/466, loss: 0.019864708185195923 2023-01-22 12:25:22.365926: step: 252/466, loss: 0.4139997959136963 2023-01-22 12:25:23.026511: step: 254/466, loss: 0.22472988069057465 2023-01-22 12:25:23.698389: step: 256/466, loss: 0.08591841161251068 2023-01-22 12:25:24.429637: step: 258/466, loss: 0.04147052392363548 2023-01-22 12:25:25.116772: step: 260/466, loss: 0.041704703122377396 2023-01-22 12:25:25.730676: step: 262/466, loss: 0.07820732891559601 2023-01-22 12:25:26.354711: step: 264/466, loss: 0.18900932371616364 2023-01-22 12:25:27.003331: step: 266/466, loss: 0.006983160972595215 2023-01-22 12:25:27.625241: step: 268/466, loss: 0.007102139759808779 2023-01-22 12:25:28.288178: step: 270/466, loss: 0.03518517315387726 2023-01-22 12:25:28.843898: step: 272/466, loss: 0.0104660140350461 2023-01-22 12:25:29.492859: step: 274/466, loss: 0.08754755556583405 2023-01-22 12:25:30.190018: step: 276/466, loss: 0.03776015713810921 2023-01-22 12:25:30.769714: step: 278/466, loss: 0.06886523216962814 2023-01-22 12:25:31.403083: step: 280/466, loss: 0.054589878767728806 2023-01-22 12:25:32.093547: step: 282/466, loss: 0.03363099694252014 2023-01-22 12:25:32.716607: step: 284/466, loss: 0.022089608013629913 2023-01-22 12:25:33.321304: step: 286/466, loss: 0.03461292013525963 2023-01-22 12:25:33.952043: step: 288/466, loss: 0.1107168048620224 2023-01-22 12:25:34.623451: step: 290/466, loss: 0.06371128559112549 2023-01-22 12:25:35.259389: step: 292/466, loss: 0.014528674073517323 2023-01-22 12:25:35.921050: step: 294/466, loss: 0.12037225812673569 2023-01-22 12:25:36.557318: step: 296/466, loss: 0.036495864391326904 2023-01-22 12:25:37.185754: step: 298/466, loss: 0.032784491777420044 2023-01-22 12:25:37.808917: step: 300/466, loss: 0.03633768483996391 2023-01-22 12:25:38.577280: step: 302/466, loss: 0.005397193133831024 2023-01-22 12:25:39.238055: step: 304/466, loss: 0.19300426542758942 2023-01-22 12:25:39.899734: step: 306/466, loss: 0.0667954683303833 2023-01-22 12:25:40.553477: step: 308/466, loss: 0.3774685561656952 2023-01-22 12:25:41.254264: step: 310/466, loss: 0.03482966870069504 2023-01-22 12:25:41.899859: step: 312/466, loss: 0.055863045156002045 2023-01-22 12:25:42.487631: step: 314/466, loss: 0.034730829298496246 2023-01-22 12:25:43.128260: step: 316/466, loss: 0.004963377956300974 2023-01-22 12:25:43.770264: step: 318/466, loss: 0.06198538467288017 2023-01-22 12:25:44.467560: step: 320/466, loss: 0.4823494255542755 2023-01-22 12:25:45.186940: step: 322/466, loss: 0.02422151528298855 2023-01-22 12:25:45.842811: step: 324/466, loss: 0.01642017997801304 2023-01-22 12:25:46.489678: step: 326/466, loss: 0.2166920006275177 2023-01-22 12:25:47.138045: step: 328/466, loss: 0.10384485125541687 2023-01-22 12:25:47.877834: step: 330/466, loss: 0.0906425192952156 2023-01-22 12:25:48.583707: step: 332/466, loss: 0.04465954750776291 2023-01-22 12:25:49.232616: step: 334/466, loss: 0.02751227281987667 2023-01-22 12:25:49.950497: step: 336/466, loss: 0.019835565239191055 2023-01-22 12:25:50.616241: step: 338/466, loss: 0.059981536120176315 2023-01-22 12:25:51.178905: step: 340/466, loss: 0.06736723333597183 2023-01-22 12:25:51.796817: step: 342/466, loss: 0.21253731846809387 2023-01-22 12:25:52.473751: step: 344/466, loss: 0.022313419729471207 2023-01-22 12:25:53.100797: step: 346/466, loss: 0.03751381114125252 2023-01-22 12:25:53.766750: step: 348/466, loss: 0.030055083334445953 2023-01-22 12:25:54.460718: step: 350/466, loss: 0.022654645144939423 2023-01-22 12:25:55.103614: step: 352/466, loss: 0.1595204770565033 2023-01-22 12:25:55.815543: step: 354/466, loss: 0.016580000519752502 2023-01-22 12:25:56.460846: step: 356/466, loss: 0.07218082994222641 2023-01-22 12:25:57.106053: step: 358/466, loss: 0.6492629647254944 2023-01-22 12:25:57.761080: step: 360/466, loss: 0.1036364808678627 2023-01-22 12:25:58.453263: step: 362/466, loss: 0.14433376491069794 2023-01-22 12:25:59.071431: step: 364/466, loss: 0.04885255917906761 2023-01-22 12:25:59.736167: step: 366/466, loss: 0.014400053769350052 2023-01-22 12:26:00.392139: step: 368/466, loss: 0.04731348156929016 2023-01-22 12:26:01.064887: step: 370/466, loss: 0.04398616775870323 2023-01-22 12:26:01.709223: step: 372/466, loss: 8.719284057617188 2023-01-22 12:26:02.400841: step: 374/466, loss: 0.04839807003736496 2023-01-22 12:26:03.050707: step: 376/466, loss: 0.015130757354199886 2023-01-22 12:26:03.769567: step: 378/466, loss: 4.531932830810547 2023-01-22 12:26:04.440461: step: 380/466, loss: 0.002152061089873314 2023-01-22 12:26:05.072850: step: 382/466, loss: 0.07399486005306244 2023-01-22 12:26:05.713436: step: 384/466, loss: 0.026347309350967407 2023-01-22 12:26:06.347569: step: 386/466, loss: 0.009930618107318878 2023-01-22 12:26:07.052152: step: 388/466, loss: 0.029909221455454826 2023-01-22 12:26:07.622103: step: 390/466, loss: 0.04094990715384483 2023-01-22 12:26:08.322951: step: 392/466, loss: 0.06808948516845703 2023-01-22 12:26:08.944024: step: 394/466, loss: 0.05577511712908745 2023-01-22 12:26:09.584964: step: 396/466, loss: 0.026923881843686104 2023-01-22 12:26:10.229697: step: 398/466, loss: 0.10875479131937027 2023-01-22 12:26:10.841462: step: 400/466, loss: 0.026609651744365692 2023-01-22 12:26:11.478246: step: 402/466, loss: 0.028742486611008644 2023-01-22 12:26:11.997559: step: 404/466, loss: 0.006182553246617317 2023-01-22 12:26:12.574513: step: 406/466, loss: 0.5232219696044922 2023-01-22 12:26:13.244565: step: 408/466, loss: 0.03529982641339302 2023-01-22 12:26:13.866612: step: 410/466, loss: 0.025698933750391006 2023-01-22 12:26:14.490174: step: 412/466, loss: 0.062040552496910095 2023-01-22 12:26:15.125473: step: 414/466, loss: 0.015775354579091072 2023-01-22 12:26:15.740615: step: 416/466, loss: 0.03277741000056267 2023-01-22 12:26:16.427088: step: 418/466, loss: 0.04921555519104004 2023-01-22 12:26:17.082316: step: 420/466, loss: 0.006552983541041613 2023-01-22 12:26:17.753638: step: 422/466, loss: 0.010042181238532066 2023-01-22 12:26:18.385098: step: 424/466, loss: 0.010497386567294598 2023-01-22 12:26:19.008004: step: 426/466, loss: 0.015600942075252533 2023-01-22 12:26:19.642861: step: 428/466, loss: 0.09118566662073135 2023-01-22 12:26:20.280565: step: 430/466, loss: 0.08123798668384552 2023-01-22 12:26:21.009219: step: 432/466, loss: 0.0365171805024147 2023-01-22 12:26:21.698861: step: 434/466, loss: 0.06810249388217926 2023-01-22 12:26:22.321851: step: 436/466, loss: 0.04731258004903793 2023-01-22 12:26:23.075562: step: 438/466, loss: 0.04645892605185509 2023-01-22 12:26:23.824815: step: 440/466, loss: 0.03728519007563591 2023-01-22 12:26:24.457648: step: 442/466, loss: 0.016282543540000916 2023-01-22 12:26:25.124247: step: 444/466, loss: 0.022868501022458076 2023-01-22 12:26:25.724471: step: 446/466, loss: 0.027541210874915123 2023-01-22 12:26:26.388040: step: 448/466, loss: 0.00614620978012681 2023-01-22 12:26:27.039316: step: 450/466, loss: 0.01625632867217064 2023-01-22 12:26:27.736506: step: 452/466, loss: 0.012641062028706074 2023-01-22 12:26:28.350709: step: 454/466, loss: 0.015201380476355553 2023-01-22 12:26:29.038354: step: 456/466, loss: 0.04819103702902794 2023-01-22 12:26:29.668474: step: 458/466, loss: 0.005609402433037758 2023-01-22 12:26:30.302007: step: 460/466, loss: 0.016664976254105568 2023-01-22 12:26:30.991877: step: 462/466, loss: 0.019776416942477226 2023-01-22 12:26:31.630331: step: 464/466, loss: 0.009411384351551533 2023-01-22 12:26:32.317605: step: 466/466, loss: 0.02541434019804001 2023-01-22 12:26:32.917653: step: 468/466, loss: 0.016636408865451813 2023-01-22 12:26:33.613946: step: 470/466, loss: 0.03303654119372368 2023-01-22 12:26:34.185075: step: 472/466, loss: 0.054419100284576416 2023-01-22 12:26:34.825131: step: 474/466, loss: 0.09407573938369751 2023-01-22 12:26:35.492002: step: 476/466, loss: 0.034601762890815735 2023-01-22 12:26:36.152484: step: 478/466, loss: 0.02005459927022457 2023-01-22 12:26:36.796263: step: 480/466, loss: 0.02703125588595867 2023-01-22 12:26:37.436514: step: 482/466, loss: 0.0024854594375938177 2023-01-22 12:26:38.050475: step: 484/466, loss: 0.04000410437583923 2023-01-22 12:26:38.689949: step: 486/466, loss: 0.22972926497459412 2023-01-22 12:26:39.377000: step: 488/466, loss: 0.1464962661266327 2023-01-22 12:26:40.003149: step: 490/466, loss: 0.21413233876228333 2023-01-22 12:26:40.648271: step: 492/466, loss: 0.017804233357310295 2023-01-22 12:26:41.355047: step: 494/466, loss: 0.018891816958785057 2023-01-22 12:26:42.056435: step: 496/466, loss: 0.05741456523537636 2023-01-22 12:26:42.658203: step: 498/466, loss: 0.06254232674837112 2023-01-22 12:26:43.288946: step: 500/466, loss: 0.0880371630191803 2023-01-22 12:26:43.929911: step: 502/466, loss: 0.026517130434513092 2023-01-22 12:26:44.544469: step: 504/466, loss: 0.015300702303647995 2023-01-22 12:26:45.140593: step: 506/466, loss: 0.019848767668008804 2023-01-22 12:26:45.851449: step: 508/466, loss: 0.5972859263420105 2023-01-22 12:26:46.455419: step: 510/466, loss: 0.002276431303471327 2023-01-22 12:26:47.195555: step: 512/466, loss: 0.02450762875378132 2023-01-22 12:26:47.777547: step: 514/466, loss: 0.06573548167943954 2023-01-22 12:26:48.434767: step: 516/466, loss: 0.10455503314733505 2023-01-22 12:26:49.060898: step: 518/466, loss: 0.04081840068101883 2023-01-22 12:26:49.742155: step: 520/466, loss: 0.0435999259352684 2023-01-22 12:26:50.385029: step: 522/466, loss: 0.019436439499258995 2023-01-22 12:26:51.020775: step: 524/466, loss: 0.07334206253290176 2023-01-22 12:26:51.698775: step: 526/466, loss: 0.2929591238498688 2023-01-22 12:26:52.353298: step: 528/466, loss: 0.02410922572016716 2023-01-22 12:26:53.055861: step: 530/466, loss: 0.08006177097558975 2023-01-22 12:26:53.636264: step: 532/466, loss: 0.07058415561914444 2023-01-22 12:26:54.243737: step: 534/466, loss: 0.06360165774822235 2023-01-22 12:26:54.870220: step: 536/466, loss: 0.012963922694325447 2023-01-22 12:26:55.578505: step: 538/466, loss: 0.005889566615223885 2023-01-22 12:26:56.256418: step: 540/466, loss: 0.0956955999135971 2023-01-22 12:26:56.953199: step: 542/466, loss: 0.014393622055649757 2023-01-22 12:26:57.615410: step: 544/466, loss: 0.2035215198993683 2023-01-22 12:26:58.256649: step: 546/466, loss: 0.061057738959789276 2023-01-22 12:26:58.942857: step: 548/466, loss: 0.049713004380464554 2023-01-22 12:26:59.650294: step: 550/466, loss: 0.005676799453794956 2023-01-22 12:27:00.259403: step: 552/466, loss: 0.015204534865915775 2023-01-22 12:27:00.923918: step: 554/466, loss: 0.06786204129457474 2023-01-22 12:27:01.589791: step: 556/466, loss: 0.055277228355407715 2023-01-22 12:27:02.229236: step: 558/466, loss: 0.2268158197402954 2023-01-22 12:27:02.908049: step: 560/466, loss: 0.03270851820707321 2023-01-22 12:27:03.642825: step: 562/466, loss: 0.08453086018562317 2023-01-22 12:27:04.332947: step: 564/466, loss: 0.15126405656337738 2023-01-22 12:27:04.992805: step: 566/466, loss: 0.18492238223552704 2023-01-22 12:27:05.635897: step: 568/466, loss: 0.015301107428967953 2023-01-22 12:27:06.292789: step: 570/466, loss: 0.016525086015462875 2023-01-22 12:27:06.916292: step: 572/466, loss: 0.07693120837211609 2023-01-22 12:27:07.514712: step: 574/466, loss: 0.011329410597682 2023-01-22 12:27:08.187459: step: 576/466, loss: 0.14618626236915588 2023-01-22 12:27:08.826377: step: 578/466, loss: 0.23490644991397858 2023-01-22 12:27:09.525758: step: 580/466, loss: 0.0417061522603035 2023-01-22 12:27:10.150736: step: 582/466, loss: 0.03330326825380325 2023-01-22 12:27:10.806999: step: 584/466, loss: 0.013184239156544209 2023-01-22 12:27:11.413418: step: 586/466, loss: 0.01833726279437542 2023-01-22 12:27:12.074948: step: 588/466, loss: 0.1240232065320015 2023-01-22 12:27:12.742575: step: 590/466, loss: 0.16520902514457703 2023-01-22 12:27:13.337250: step: 592/466, loss: 0.05892754718661308 2023-01-22 12:27:13.962408: step: 594/466, loss: 0.025030579417943954 2023-01-22 12:27:14.570788: step: 596/466, loss: 0.04172952473163605 2023-01-22 12:27:15.250440: step: 598/466, loss: 0.017421064898371696 2023-01-22 12:27:15.890797: step: 600/466, loss: 0.0441550612449646 2023-01-22 12:27:16.537082: step: 602/466, loss: 0.009792739525437355 2023-01-22 12:27:17.100776: step: 604/466, loss: 0.034416936337947845 2023-01-22 12:27:17.688693: step: 606/466, loss: 0.00828731432557106 2023-01-22 12:27:18.401040: step: 608/466, loss: 0.03894294798374176 2023-01-22 12:27:19.141533: step: 610/466, loss: 0.5128212571144104 2023-01-22 12:27:19.780314: step: 612/466, loss: 0.014220272190868855 2023-01-22 12:27:20.426180: step: 614/466, loss: 0.05291266366839409 2023-01-22 12:27:21.063141: step: 616/466, loss: 0.08025780320167542 2023-01-22 12:27:21.741566: step: 618/466, loss: 0.014249353669583797 2023-01-22 12:27:22.492797: step: 620/466, loss: 0.09287619590759277 2023-01-22 12:27:23.119471: step: 622/466, loss: 0.022892482578754425 2023-01-22 12:27:23.763595: step: 624/466, loss: 0.029883043840527534 2023-01-22 12:27:24.399266: step: 626/466, loss: 0.04735163226723671 2023-01-22 12:27:25.014560: step: 628/466, loss: 0.051800090819597244 2023-01-22 12:27:25.642646: step: 630/466, loss: 0.13179709017276764 2023-01-22 12:27:26.351650: step: 632/466, loss: 0.01729048229753971 2023-01-22 12:27:27.075033: step: 634/466, loss: 0.07633604854345322 2023-01-22 12:27:27.852151: step: 636/466, loss: 0.023014308884739876 2023-01-22 12:27:28.476329: step: 638/466, loss: 0.0821717232465744 2023-01-22 12:27:29.165149: step: 640/466, loss: 0.01899549923837185 2023-01-22 12:27:29.813099: step: 642/466, loss: 0.007269714493304491 2023-01-22 12:27:30.489865: step: 644/466, loss: 0.027033589780330658 2023-01-22 12:27:31.134143: step: 646/466, loss: 0.061697300523519516 2023-01-22 12:27:31.811827: step: 648/466, loss: 0.06357747316360474 2023-01-22 12:27:32.465927: step: 650/466, loss: 0.3793969452381134 2023-01-22 12:27:33.106519: step: 652/466, loss: 0.04672146216034889 2023-01-22 12:27:33.732142: step: 654/466, loss: 0.00910738855600357 2023-01-22 12:27:34.374319: step: 656/466, loss: 0.008787152357399464 2023-01-22 12:27:35.043696: step: 658/466, loss: 0.027202855795621872 2023-01-22 12:27:35.746320: step: 660/466, loss: 0.032332953065633774 2023-01-22 12:27:36.453581: step: 662/466, loss: 0.0030818090308457613 2023-01-22 12:27:37.116736: step: 664/466, loss: 0.04429381713271141 2023-01-22 12:27:37.737075: step: 666/466, loss: 0.24332015216350555 2023-01-22 12:27:38.467578: step: 668/466, loss: 0.07040964066982269 2023-01-22 12:27:39.075820: step: 670/466, loss: 0.012174397706985474 2023-01-22 12:27:39.782492: step: 672/466, loss: 0.5175571441650391 2023-01-22 12:27:40.408961: step: 674/466, loss: 0.047638919204473495 2023-01-22 12:27:41.054077: step: 676/466, loss: 0.781222939491272 2023-01-22 12:27:41.680662: step: 678/466, loss: 0.08504980802536011 2023-01-22 12:27:42.364394: step: 680/466, loss: 0.001895928755402565 2023-01-22 12:27:43.002025: step: 682/466, loss: 0.15976615250110626 2023-01-22 12:27:43.605906: step: 684/466, loss: 0.013469494879245758 2023-01-22 12:27:44.262823: step: 686/466, loss: 0.02041921205818653 2023-01-22 12:27:44.913963: step: 688/466, loss: 0.008028162643313408 2023-01-22 12:27:45.630633: step: 690/466, loss: 0.17626157402992249 2023-01-22 12:27:46.316350: step: 692/466, loss: 0.8268342614173889 2023-01-22 12:27:47.003238: step: 694/466, loss: 0.08224974572658539 2023-01-22 12:27:47.660630: step: 696/466, loss: 0.03389971703290939 2023-01-22 12:27:48.345761: step: 698/466, loss: 0.031209105625748634 2023-01-22 12:27:49.020580: step: 700/466, loss: 0.0534808486700058 2023-01-22 12:27:49.649868: step: 702/466, loss: 0.12038559466600418 2023-01-22 12:27:50.297649: step: 704/466, loss: 0.04523187503218651 2023-01-22 12:27:50.978133: step: 706/466, loss: 0.026461729779839516 2023-01-22 12:27:51.708654: step: 708/466, loss: 0.14966435730457306 2023-01-22 12:27:52.344428: step: 710/466, loss: 0.025082498788833618 2023-01-22 12:27:53.064259: step: 712/466, loss: 0.0047665368765592575 2023-01-22 12:27:53.708050: step: 714/466, loss: 0.025602247565984726 2023-01-22 12:27:54.351398: step: 716/466, loss: 0.05528547987341881 2023-01-22 12:27:55.026545: step: 718/466, loss: 0.030770858749747276 2023-01-22 12:27:55.632129: step: 720/466, loss: 0.08510482311248779 2023-01-22 12:27:56.244584: step: 722/466, loss: 0.01946280337870121 2023-01-22 12:27:56.930132: step: 724/466, loss: 0.030918046832084656 2023-01-22 12:27:57.551315: step: 726/466, loss: 0.026320137083530426 2023-01-22 12:27:58.180934: step: 728/466, loss: 0.035525575280189514 2023-01-22 12:27:58.863846: step: 730/466, loss: 0.23102110624313354 2023-01-22 12:27:59.520535: step: 732/466, loss: 0.05604511499404907 2023-01-22 12:28:00.163768: step: 734/466, loss: 0.1168517991900444 2023-01-22 12:28:00.774424: step: 736/466, loss: 0.02180151827633381 2023-01-22 12:28:01.425156: step: 738/466, loss: 0.008714184165000916 2023-01-22 12:28:02.099313: step: 740/466, loss: 0.0680922344326973 2023-01-22 12:28:02.786229: step: 742/466, loss: 0.02514813095331192 2023-01-22 12:28:03.408009: step: 744/466, loss: 0.017655352130532265 2023-01-22 12:28:04.015101: step: 746/466, loss: 0.06468646228313446 2023-01-22 12:28:04.607179: step: 748/466, loss: 0.009327889420092106 2023-01-22 12:28:05.207817: step: 750/466, loss: 0.018987977877259254 2023-01-22 12:28:05.859708: step: 752/466, loss: 0.017294151708483696 2023-01-22 12:28:06.558029: step: 754/466, loss: 0.01210116222500801 2023-01-22 12:28:07.237158: step: 756/466, loss: 0.5217264294624329 2023-01-22 12:28:07.901645: step: 758/466, loss: 0.12612611055374146 2023-01-22 12:28:08.566430: step: 760/466, loss: 0.6621501445770264 2023-01-22 12:28:09.131857: step: 762/466, loss: 0.0022763311862945557 2023-01-22 12:28:09.741963: step: 764/466, loss: 0.012427791953086853 2023-01-22 12:28:10.393240: step: 766/466, loss: 0.06161829084157944 2023-01-22 12:28:11.092168: step: 768/466, loss: 0.0627053752541542 2023-01-22 12:28:11.775161: step: 770/466, loss: 0.0074045998044312 2023-01-22 12:28:12.425325: step: 772/466, loss: 0.05282897502183914 2023-01-22 12:28:13.064646: step: 774/466, loss: 0.01831836998462677 2023-01-22 12:28:13.711817: step: 776/466, loss: 0.026106664910912514 2023-01-22 12:28:14.376401: step: 778/466, loss: 0.048861004412174225 2023-01-22 12:28:14.983830: step: 780/466, loss: 0.006538981571793556 2023-01-22 12:28:15.665983: step: 782/466, loss: 0.04800443723797798 2023-01-22 12:28:16.329755: step: 784/466, loss: 0.03181067481637001 2023-01-22 12:28:16.991079: step: 786/466, loss: 0.012432624585926533 2023-01-22 12:28:17.675061: step: 788/466, loss: 0.1364171802997589 2023-01-22 12:28:18.284966: step: 790/466, loss: 0.011952829547226429 2023-01-22 12:28:18.878525: step: 792/466, loss: 0.3651338517665863 2023-01-22 12:28:19.541868: step: 794/466, loss: 0.14958646893501282 2023-01-22 12:28:20.198116: step: 796/466, loss: 0.013606944121420383 2023-01-22 12:28:20.856538: step: 798/466, loss: 0.024553818628191948 2023-01-22 12:28:21.488850: step: 800/466, loss: 0.018948564305901527 2023-01-22 12:28:22.157924: step: 802/466, loss: 0.052254047244787216 2023-01-22 12:28:22.761434: step: 804/466, loss: 0.08240722119808197 2023-01-22 12:28:23.367629: step: 806/466, loss: 0.050371814519166946 2023-01-22 12:28:24.037037: step: 808/466, loss: 0.0409635566174984 2023-01-22 12:28:24.693418: step: 810/466, loss: 0.060565996915102005 2023-01-22 12:28:25.345409: step: 812/466, loss: 0.031367238610982895 2023-01-22 12:28:25.974112: step: 814/466, loss: 0.08814281970262527 2023-01-22 12:28:26.676351: step: 816/466, loss: 0.04216299206018448 2023-01-22 12:28:27.425413: step: 818/466, loss: 0.021370599046349525 2023-01-22 12:28:28.030571: step: 820/466, loss: 0.021927153691649437 2023-01-22 12:28:28.672862: step: 822/466, loss: 0.03202419355511665 2023-01-22 12:28:29.309235: step: 824/466, loss: 0.8685194253921509 2023-01-22 12:28:29.937598: step: 826/466, loss: 0.10557854175567627 2023-01-22 12:28:30.529701: step: 828/466, loss: 0.027995627373456955 2023-01-22 12:28:31.172235: step: 830/466, loss: 0.008571329526603222 2023-01-22 12:28:31.875787: step: 832/466, loss: 0.017078906297683716 2023-01-22 12:28:32.531688: step: 834/466, loss: 0.06253548711538315 2023-01-22 12:28:33.204465: step: 836/466, loss: 0.03214738890528679 2023-01-22 12:28:34.020863: step: 838/466, loss: 0.11375249922275543 2023-01-22 12:28:34.681706: step: 840/466, loss: 0.010107310488820076 2023-01-22 12:28:35.315744: step: 842/466, loss: 0.0403822585940361 2023-01-22 12:28:35.950907: step: 844/466, loss: 0.07216506451368332 2023-01-22 12:28:36.559085: step: 846/466, loss: 0.03277384862303734 2023-01-22 12:28:37.233940: step: 848/466, loss: 1.960360050201416 2023-01-22 12:28:37.802881: step: 850/466, loss: 0.028355680406093597 2023-01-22 12:28:38.382397: step: 852/466, loss: 0.049147844314575195 2023-01-22 12:28:39.069795: step: 854/466, loss: 0.08806189894676208 2023-01-22 12:28:39.697802: step: 856/466, loss: 0.19922910630702972 2023-01-22 12:28:40.336359: step: 858/466, loss: 0.017885398119688034 2023-01-22 12:28:40.959973: step: 860/466, loss: 0.02041354402899742 2023-01-22 12:28:41.664574: step: 862/466, loss: 0.04174542427062988 2023-01-22 12:28:42.332389: step: 864/466, loss: 0.011507490649819374 2023-01-22 12:28:43.082515: step: 866/466, loss: 0.00754587771371007 2023-01-22 12:28:43.763688: step: 868/466, loss: 0.13759632408618927 2023-01-22 12:28:44.452990: step: 870/466, loss: 0.008561786264181137 2023-01-22 12:28:45.110337: step: 872/466, loss: 0.019189957529306412 2023-01-22 12:28:45.768369: step: 874/466, loss: 0.08911080658435822 2023-01-22 12:28:46.433045: step: 876/466, loss: 0.03029986470937729 2023-01-22 12:28:47.076399: step: 878/466, loss: 0.014277573674917221 2023-01-22 12:28:47.733891: step: 880/466, loss: 0.001170774339698255 2023-01-22 12:28:48.381529: step: 882/466, loss: 0.04861774295568466 2023-01-22 12:28:49.022117: step: 884/466, loss: 0.04374024271965027 2023-01-22 12:28:49.647459: step: 886/466, loss: 0.02988075092434883 2023-01-22 12:28:50.236333: step: 888/466, loss: 0.03934662044048309 2023-01-22 12:28:50.898426: step: 890/466, loss: 0.06640966981649399 2023-01-22 12:28:51.603491: step: 892/466, loss: 0.03840658813714981 2023-01-22 12:28:52.232288: step: 894/466, loss: 0.017629683017730713 2023-01-22 12:28:52.936863: step: 896/466, loss: 0.0430067740380764 2023-01-22 12:28:53.610071: step: 898/466, loss: 0.025082241743803024 2023-01-22 12:28:54.307048: step: 900/466, loss: 0.06866186112165451 2023-01-22 12:28:55.030925: step: 902/466, loss: 0.08462058007717133 2023-01-22 12:28:55.677529: step: 904/466, loss: 0.041680388152599335 2023-01-22 12:28:56.313893: step: 906/466, loss: 0.04381396621465683 2023-01-22 12:28:56.974843: step: 908/466, loss: 0.0076791406609117985 2023-01-22 12:28:57.627224: step: 910/466, loss: 0.0775487944483757 2023-01-22 12:28:58.284402: step: 912/466, loss: 0.019085828214883804 2023-01-22 12:28:58.921464: step: 914/466, loss: 0.050074320286512375 2023-01-22 12:28:59.587538: step: 916/466, loss: 0.0503820925951004 2023-01-22 12:29:00.283270: step: 918/466, loss: 0.1022348552942276 2023-01-22 12:29:00.930801: step: 920/466, loss: 0.0878489688038826 2023-01-22 12:29:01.701022: step: 922/466, loss: 0.08654739707708359 2023-01-22 12:29:02.342758: step: 924/466, loss: 0.01694355346262455 2023-01-22 12:29:02.966648: step: 926/466, loss: 0.17691993713378906 2023-01-22 12:29:03.627023: step: 928/466, loss: 0.07006848603487015 2023-01-22 12:29:04.220363: step: 930/466, loss: 0.008420702069997787 2023-01-22 12:29:04.750189: step: 932/466, loss: 0.44087299704551697 ================================================== Loss: 0.106 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3076343201754386, 'r': 0.3549177735610374, 'f1': 0.3295888399412629}, 'combined': 0.24285493469356215, 'epoch': 23} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3498015091350006, 'r': 0.32468600285627647, 'f1': 0.3367761501138229}, 'combined': 0.2233541306454369, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29927884615384615, 'r': 0.2947443181818182, 'f1': 0.2969942748091603}, 'combined': 0.19799618320610685, 'epoch': 23} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.36184074843658587, 'r': 0.30696888450556115, 'f1': 0.33215386096522975}, 'combined': 0.21677409873520254, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2874809714095428, 'r': 0.3474864872635271, 'f1': 0.31464841716130376}, 'combined': 0.2318462021188554, 'epoch': 23} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3397515828631042, 'r': 0.3070434946603249, 'f1': 0.3225705233196853}, 'combined': 0.21393278230528348, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23809523809523808, 'r': 0.38095238095238093, 'f1': 0.293040293040293}, 'combined': 0.19536019536019533, 'epoch': 23} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4625, 'r': 0.40217391304347827, 'f1': 0.43023255813953487}, 'combined': 0.28682170542635654, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4117647058823529, 'r': 0.2413793103448276, 'f1': 0.3043478260869565}, 'combined': 0.20289855072463764, 'epoch': 23} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2874809714095428, 'r': 0.3474864872635271, 'f1': 0.31464841716130376}, 'combined': 0.2318462021188554, 'epoch': 23} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3397515828631042, 'r': 0.3070434946603249, 'f1': 0.3225705233196853}, 'combined': 0.21393278230528348, 'epoch': 23} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4117647058823529, 'r': 0.2413793103448276, 'f1': 0.3043478260869565}, 'combined': 0.20289855072463764, 'epoch': 23} ****************************** Epoch: 24 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:31:37.109949: step: 2/466, loss: 0.5174586772918701 2023-01-22 12:31:37.672666: step: 4/466, loss: 0.009579693898558617 2023-01-22 12:31:38.336963: step: 6/466, loss: 0.09315939247608185 2023-01-22 12:31:38.889385: step: 8/466, loss: 0.03629219904541969 2023-01-22 12:31:39.471687: step: 10/466, loss: 0.016620714217424393 2023-01-22 12:31:40.044723: step: 12/466, loss: 0.031349532306194305 2023-01-22 12:31:40.657238: step: 14/466, loss: 0.25620338320732117 2023-01-22 12:31:41.272616: step: 16/466, loss: 0.04357297345995903 2023-01-22 12:31:41.898798: step: 18/466, loss: 0.004880046471953392 2023-01-22 12:31:42.503828: step: 20/466, loss: 0.017147121950984 2023-01-22 12:31:43.122582: step: 22/466, loss: 0.11786799132823944 2023-01-22 12:31:43.718377: step: 24/466, loss: 0.021733110770583153 2023-01-22 12:31:44.361071: step: 26/466, loss: 0.031022317707538605 2023-01-22 12:31:45.028982: step: 28/466, loss: 0.04381708428263664 2023-01-22 12:31:45.633205: step: 30/466, loss: 0.061711300164461136 2023-01-22 12:31:46.218607: step: 32/466, loss: 0.015330356545746326 2023-01-22 12:31:46.829765: step: 34/466, loss: 0.022332649677991867 2023-01-22 12:31:47.443078: step: 36/466, loss: 0.048555828630924225 2023-01-22 12:31:48.009185: step: 38/466, loss: 0.022368174046278 2023-01-22 12:31:48.713185: step: 40/466, loss: 0.042115744203329086 2023-01-22 12:31:49.287093: step: 42/466, loss: 0.05961944907903671 2023-01-22 12:31:49.911853: step: 44/466, loss: 0.014870070852339268 2023-01-22 12:31:50.504717: step: 46/466, loss: 0.3316933214664459 2023-01-22 12:31:51.105106: step: 48/466, loss: 0.022402647882699966 2023-01-22 12:31:51.762888: step: 50/466, loss: 0.20861859619617462 2023-01-22 12:31:52.367615: step: 52/466, loss: 0.03810880705714226 2023-01-22 12:31:53.013424: step: 54/466, loss: 0.03817550092935562 2023-01-22 12:31:53.585727: step: 56/466, loss: 0.002065233187749982 2023-01-22 12:31:54.254825: step: 58/466, loss: 0.011738712899386883 2023-01-22 12:31:54.823726: step: 60/466, loss: 0.007858922705054283 2023-01-22 12:31:55.455380: step: 62/466, loss: 0.014686870388686657 2023-01-22 12:31:56.080837: step: 64/466, loss: 0.007629875093698502 2023-01-22 12:31:56.677623: step: 66/466, loss: 0.015685701742768288 2023-01-22 12:31:57.293707: step: 68/466, loss: 0.0027154877316206694 2023-01-22 12:31:57.869316: step: 70/466, loss: 0.0007991743623279035 2023-01-22 12:31:58.456348: step: 72/466, loss: 0.05554645135998726 2023-01-22 12:31:59.118805: step: 74/466, loss: 0.023773424327373505 2023-01-22 12:31:59.694558: step: 76/466, loss: 0.043621812015771866 2023-01-22 12:32:00.325539: step: 78/466, loss: 0.08062449097633362 2023-01-22 12:32:00.893472: step: 80/466, loss: 0.11252401769161224 2023-01-22 12:32:01.508169: step: 82/466, loss: 0.15902875363826752 2023-01-22 12:32:02.145469: step: 84/466, loss: 0.05257668346166611 2023-01-22 12:32:02.764844: step: 86/466, loss: 0.022970149293541908 2023-01-22 12:32:03.319381: step: 88/466, loss: 0.04428599402308464 2023-01-22 12:32:03.960957: step: 90/466, loss: 0.21043623983860016 2023-01-22 12:32:04.602394: step: 92/466, loss: 0.05659813806414604 2023-01-22 12:32:05.176483: step: 94/466, loss: 0.02158990129828453 2023-01-22 12:32:05.734063: step: 96/466, loss: 0.0067445761524140835 2023-01-22 12:32:06.317120: step: 98/466, loss: 0.06192212179303169 2023-01-22 12:32:06.987266: step: 100/466, loss: 0.04253120347857475 2023-01-22 12:32:07.556875: step: 102/466, loss: 0.06492670625448227 2023-01-22 12:32:08.163478: step: 104/466, loss: 0.04266819730401039 2023-01-22 12:32:08.724792: step: 106/466, loss: 0.036097414791584015 2023-01-22 12:32:09.422028: step: 108/466, loss: 0.2894755005836487 2023-01-22 12:32:10.051215: step: 110/466, loss: 0.05016065761446953 2023-01-22 12:32:10.750516: step: 112/466, loss: 0.043521471321582794 2023-01-22 12:32:11.411535: step: 114/466, loss: 0.10526257008314133 2023-01-22 12:32:12.030436: step: 116/466, loss: 0.0045556980185210705 2023-01-22 12:32:12.648316: step: 118/466, loss: 4.002925872802734 2023-01-22 12:32:13.236307: step: 120/466, loss: 0.016178200021386147 2023-01-22 12:32:13.857946: step: 122/466, loss: 0.03486013412475586 2023-01-22 12:32:14.538904: step: 124/466, loss: 0.11223459243774414 2023-01-22 12:32:15.111941: step: 126/466, loss: 0.01602235622704029 2023-01-22 12:32:15.717642: step: 128/466, loss: 0.03682239353656769 2023-01-22 12:32:16.344946: step: 130/466, loss: 0.03416857123374939 2023-01-22 12:32:16.967878: step: 132/466, loss: 0.08398226648569107 2023-01-22 12:32:17.532632: step: 134/466, loss: 0.0524989515542984 2023-01-22 12:32:18.164866: step: 136/466, loss: 0.01495880726724863 2023-01-22 12:32:18.735057: step: 138/466, loss: 0.0672888457775116 2023-01-22 12:32:19.393722: step: 140/466, loss: 0.03272339329123497 2023-01-22 12:32:19.994472: step: 142/466, loss: 0.010015328414738178 2023-01-22 12:32:20.598439: step: 144/466, loss: 0.023423034697771072 2023-01-22 12:32:21.193817: step: 146/466, loss: 0.012548690661787987 2023-01-22 12:32:21.832179: step: 148/466, loss: 0.009754863567650318 2023-01-22 12:32:22.407537: step: 150/466, loss: 0.0527384988963604 2023-01-22 12:32:22.991130: step: 152/466, loss: 0.00016302397125400603 2023-01-22 12:32:23.546974: step: 154/466, loss: 0.040271829813718796 2023-01-22 12:32:24.141861: step: 156/466, loss: 0.023221759125590324 2023-01-22 12:32:24.768069: step: 158/466, loss: 0.005486933048814535 2023-01-22 12:32:25.396994: step: 160/466, loss: 0.14957301318645477 2023-01-22 12:32:25.996123: step: 162/466, loss: 0.040905553847551346 2023-01-22 12:32:26.631326: step: 164/466, loss: 0.14041171967983246 2023-01-22 12:32:27.224594: step: 166/466, loss: 0.045817721635103226 2023-01-22 12:32:27.825980: step: 168/466, loss: 0.020683517679572105 2023-01-22 12:32:28.339595: step: 170/466, loss: 0.12141299247741699 2023-01-22 12:32:28.954198: step: 172/466, loss: 0.03760865703225136 2023-01-22 12:32:29.586579: step: 174/466, loss: 0.030975865200161934 2023-01-22 12:32:30.140025: step: 176/466, loss: 0.02207639068365097 2023-01-22 12:32:30.790940: step: 178/466, loss: 0.017290756106376648 2023-01-22 12:32:31.372721: step: 180/466, loss: 0.0924760028719902 2023-01-22 12:32:31.958626: step: 182/466, loss: 0.02489081583917141 2023-01-22 12:32:32.602159: step: 184/466, loss: 0.010242631658911705 2023-01-22 12:32:33.196151: step: 186/466, loss: 0.023558583110570908 2023-01-22 12:32:33.812487: step: 188/466, loss: 0.07605066150426865 2023-01-22 12:32:34.375049: step: 190/466, loss: 0.0016990734729915857 2023-01-22 12:32:34.979041: step: 192/466, loss: 0.005150969605892897 2023-01-22 12:32:35.555085: step: 194/466, loss: 0.012659971602261066 2023-01-22 12:32:36.177397: step: 196/466, loss: 0.014583379961550236 2023-01-22 12:32:36.813615: step: 198/466, loss: 0.010530868545174599 2023-01-22 12:32:37.380188: step: 200/466, loss: 0.04471125826239586 2023-01-22 12:32:38.051404: step: 202/466, loss: 0.04739345610141754 2023-01-22 12:32:38.682597: step: 204/466, loss: 0.07824733853340149 2023-01-22 12:32:39.252666: step: 206/466, loss: 0.0018017380498349667 2023-01-22 12:32:39.794506: step: 208/466, loss: 0.07637045532464981 2023-01-22 12:32:40.429597: step: 210/466, loss: 0.07172827422618866 2023-01-22 12:32:41.054389: step: 212/466, loss: 0.10092227905988693 2023-01-22 12:32:41.705354: step: 214/466, loss: 0.023817284032702446 2023-01-22 12:32:42.317788: step: 216/466, loss: 0.29648497700691223 2023-01-22 12:32:42.879712: step: 218/466, loss: 0.010222791694104671 2023-01-22 12:32:43.579098: step: 220/466, loss: 0.01697523146867752 2023-01-22 12:32:44.158036: step: 222/466, loss: 0.03982623293995857 2023-01-22 12:32:44.735739: step: 224/466, loss: 0.16172361373901367 2023-01-22 12:32:45.365594: step: 226/466, loss: 0.020176347345113754 2023-01-22 12:32:45.983032: step: 228/466, loss: 0.03942863643169403 2023-01-22 12:32:46.580677: step: 230/466, loss: 0.0117865651845932 2023-01-22 12:32:47.244296: step: 232/466, loss: 0.09619100391864777 2023-01-22 12:32:47.846054: step: 234/466, loss: 0.10768011957406998 2023-01-22 12:32:48.467043: step: 236/466, loss: 0.011421790346503258 2023-01-22 12:32:49.104731: step: 238/466, loss: 0.07146967202425003 2023-01-22 12:32:49.693456: step: 240/466, loss: 0.014844970777630806 2023-01-22 12:32:50.230561: step: 242/466, loss: 0.002789780031889677 2023-01-22 12:32:50.847484: step: 244/466, loss: 0.7855795621871948 2023-01-22 12:32:51.521726: step: 246/466, loss: 0.05135297030210495 2023-01-22 12:32:52.147018: step: 248/466, loss: 0.012107757851481438 2023-01-22 12:32:52.766805: step: 250/466, loss: 0.02014041505753994 2023-01-22 12:32:53.348284: step: 252/466, loss: 0.02659180946648121 2023-01-22 12:32:53.993625: step: 254/466, loss: 0.016569100320339203 2023-01-22 12:32:54.595106: step: 256/466, loss: 0.08627676963806152 2023-01-22 12:32:55.207723: step: 258/466, loss: 0.008308811113238335 2023-01-22 12:32:55.805452: step: 260/466, loss: 0.036741409450769424 2023-01-22 12:32:56.537076: step: 262/466, loss: 0.026793222874403 2023-01-22 12:32:57.119076: step: 264/466, loss: 0.04340960457921028 2023-01-22 12:32:57.678439: step: 266/466, loss: 0.015985840931534767 2023-01-22 12:32:58.258020: step: 268/466, loss: 0.02499624900519848 2023-01-22 12:32:58.945352: step: 270/466, loss: 0.6226954460144043 2023-01-22 12:32:59.559071: step: 272/466, loss: 0.052729010581970215 2023-01-22 12:33:00.164159: step: 274/466, loss: 0.023654548451304436 2023-01-22 12:33:00.698628: step: 276/466, loss: 0.0032235553953796625 2023-01-22 12:33:01.306526: step: 278/466, loss: 0.0302759800106287 2023-01-22 12:33:01.885223: step: 280/466, loss: 0.008658492006361485 2023-01-22 12:33:02.547477: step: 282/466, loss: 0.03539307788014412 2023-01-22 12:33:03.183254: step: 284/466, loss: 0.033288102596998215 2023-01-22 12:33:03.805225: step: 286/466, loss: 0.06405381113290787 2023-01-22 12:33:04.382588: step: 288/466, loss: 0.018247967585921288 2023-01-22 12:33:05.019666: step: 290/466, loss: 0.02976909652352333 2023-01-22 12:33:05.664804: step: 292/466, loss: 0.06236724928021431 2023-01-22 12:33:06.250426: step: 294/466, loss: 0.031794726848602295 2023-01-22 12:33:06.842098: step: 296/466, loss: 0.02337266318500042 2023-01-22 12:33:07.417076: step: 298/466, loss: 0.010144537314772606 2023-01-22 12:33:08.062208: step: 300/466, loss: 0.03884013369679451 2023-01-22 12:33:08.642608: step: 302/466, loss: 0.0067479852586984634 2023-01-22 12:33:09.181758: step: 304/466, loss: 0.0077685341238975525 2023-01-22 12:33:09.795226: step: 306/466, loss: 0.014038033783435822 2023-01-22 12:33:10.376182: step: 308/466, loss: 0.024625875055789948 2023-01-22 12:33:11.030363: step: 310/466, loss: 0.023192688822746277 2023-01-22 12:33:11.615160: step: 312/466, loss: 0.003318582195788622 2023-01-22 12:33:12.236629: step: 314/466, loss: 0.06751104444265366 2023-01-22 12:33:12.921338: step: 316/466, loss: 0.08357201516628265 2023-01-22 12:33:13.515048: step: 318/466, loss: 0.03922853246331215 2023-01-22 12:33:14.102629: step: 320/466, loss: 0.04735429957509041 2023-01-22 12:33:14.732386: step: 322/466, loss: 0.03607185557484627 2023-01-22 12:33:15.324616: step: 324/466, loss: 2.4579479694366455 2023-01-22 12:33:15.929298: step: 326/466, loss: 0.0066762263886630535 2023-01-22 12:33:16.616863: step: 328/466, loss: 0.020376691594719887 2023-01-22 12:33:17.196550: step: 330/466, loss: 0.0028863598126918077 2023-01-22 12:33:17.830820: step: 332/466, loss: 0.061129070818424225 2023-01-22 12:33:18.518023: step: 334/466, loss: 0.003399621695280075 2023-01-22 12:33:19.183714: step: 336/466, loss: 0.0011069603497162461 2023-01-22 12:33:19.736182: step: 338/466, loss: 0.008519783616065979 2023-01-22 12:33:20.337250: step: 340/466, loss: 0.01564904674887657 2023-01-22 12:33:20.942580: step: 342/466, loss: 0.059392645955085754 2023-01-22 12:33:21.585616: step: 344/466, loss: 0.006279794033616781 2023-01-22 12:33:22.213687: step: 346/466, loss: 0.09346354007720947 2023-01-22 12:33:22.804591: step: 348/466, loss: 0.03669194132089615 2023-01-22 12:33:23.445287: step: 350/466, loss: 0.04633399099111557 2023-01-22 12:33:24.034936: step: 352/466, loss: 0.014510966837406158 2023-01-22 12:33:24.741519: step: 354/466, loss: 0.04897434264421463 2023-01-22 12:33:25.352196: step: 356/466, loss: 0.011935423128306866 2023-01-22 12:33:26.029016: step: 358/466, loss: 0.02206057496368885 2023-01-22 12:33:26.621195: step: 360/466, loss: 0.17159996926784515 2023-01-22 12:33:27.225455: step: 362/466, loss: 0.014811763539910316 2023-01-22 12:33:27.841929: step: 364/466, loss: 0.014396698214113712 2023-01-22 12:33:28.461771: step: 366/466, loss: 0.02253626473248005 2023-01-22 12:33:29.078913: step: 368/466, loss: 0.03557388857007027 2023-01-22 12:33:29.699785: step: 370/466, loss: 0.014918249100446701 2023-01-22 12:33:30.223575: step: 372/466, loss: 0.7161222100257874 2023-01-22 12:33:30.864903: step: 374/466, loss: 0.0009834831580519676 2023-01-22 12:33:31.538125: step: 376/466, loss: 0.5186393857002258 2023-01-22 12:33:32.117405: step: 378/466, loss: 0.09099865704774857 2023-01-22 12:33:32.690739: step: 380/466, loss: 0.03353620693087578 2023-01-22 12:33:33.272960: step: 382/466, loss: 0.011328631080687046 2023-01-22 12:33:33.852917: step: 384/466, loss: 0.02683149091899395 2023-01-22 12:33:34.521918: step: 386/466, loss: 0.15679757297039032 2023-01-22 12:33:35.084630: step: 388/466, loss: 0.19017009437084198 2023-01-22 12:33:35.707042: step: 390/466, loss: 0.03918571397662163 2023-01-22 12:33:36.296326: step: 392/466, loss: 0.006072483025491238 2023-01-22 12:33:36.913398: step: 394/466, loss: 0.01014915481209755 2023-01-22 12:33:37.544820: step: 396/466, loss: 0.07179990410804749 2023-01-22 12:33:38.146698: step: 398/466, loss: 0.11546458303928375 2023-01-22 12:33:38.767548: step: 400/466, loss: 0.031725261360406876 2023-01-22 12:33:39.379100: step: 402/466, loss: 0.03558645024895668 2023-01-22 12:33:39.970117: step: 404/466, loss: 0.48780667781829834 2023-01-22 12:33:40.521361: step: 406/466, loss: 0.04735840857028961 2023-01-22 12:33:41.109070: step: 408/466, loss: 0.00975461583584547 2023-01-22 12:33:41.690955: step: 410/466, loss: 0.012872268445789814 2023-01-22 12:33:42.275387: step: 412/466, loss: 0.019132103770971298 2023-01-22 12:33:42.931520: step: 414/466, loss: 0.35264527797698975 2023-01-22 12:33:43.510463: step: 416/466, loss: 0.008495563641190529 2023-01-22 12:33:44.109631: step: 418/466, loss: 0.056623030453920364 2023-01-22 12:33:44.705905: step: 420/466, loss: 0.08087506145238876 2023-01-22 12:33:45.304232: step: 422/466, loss: 0.9835139513015747 2023-01-22 12:33:45.907519: step: 424/466, loss: 0.013827823102474213 2023-01-22 12:33:46.498484: step: 426/466, loss: 0.027738556265830994 2023-01-22 12:33:47.074615: step: 428/466, loss: 0.02511964738368988 2023-01-22 12:33:47.709788: step: 430/466, loss: 0.051109958440065384 2023-01-22 12:33:48.315017: step: 432/466, loss: 0.024527449160814285 2023-01-22 12:33:48.931361: step: 434/466, loss: 0.04220099374651909 2023-01-22 12:33:49.576267: step: 436/466, loss: 0.3841703534126282 2023-01-22 12:33:50.156606: step: 438/466, loss: 0.03868027776479721 2023-01-22 12:33:50.746727: step: 440/466, loss: 0.009918006137013435 2023-01-22 12:33:51.355467: step: 442/466, loss: 0.02802487462759018 2023-01-22 12:33:51.980758: step: 444/466, loss: 0.03066260740160942 2023-01-22 12:33:52.504791: step: 446/466, loss: 0.12128327786922455 2023-01-22 12:33:53.153609: step: 448/466, loss: 0.013228870928287506 2023-01-22 12:33:53.767997: step: 450/466, loss: 0.0864589735865593 2023-01-22 12:33:54.349224: step: 452/466, loss: 0.22337065637111664 2023-01-22 12:33:54.955204: step: 454/466, loss: 0.031695976853370667 2023-01-22 12:33:55.553829: step: 456/466, loss: 0.07854624837636948 2023-01-22 12:33:56.128880: step: 458/466, loss: 0.0038878133054822683 2023-01-22 12:33:56.779126: step: 460/466, loss: 0.009189965203404427 2023-01-22 12:33:57.428347: step: 462/466, loss: 0.03963831812143326 2023-01-22 12:33:58.074448: step: 464/466, loss: 0.003491988405585289 2023-01-22 12:33:58.713933: step: 466/466, loss: 0.0387866348028183 2023-01-22 12:33:59.370844: step: 468/466, loss: 0.09318447858095169 2023-01-22 12:34:00.019212: step: 470/466, loss: 0.06316616386175156 2023-01-22 12:34:00.738504: step: 472/466, loss: 0.03835407271981239 2023-01-22 12:34:01.360114: step: 474/466, loss: 0.010601839981973171 2023-01-22 12:34:02.020754: step: 476/466, loss: 0.04524579644203186 2023-01-22 12:34:02.626451: step: 478/466, loss: 0.041294775903224945 2023-01-22 12:34:03.252976: step: 480/466, loss: 0.050560399889945984 2023-01-22 12:34:03.829414: step: 482/466, loss: 0.05223943665623665 2023-01-22 12:34:04.358080: step: 484/466, loss: 0.019072677940130234 2023-01-22 12:34:05.082687: step: 486/466, loss: 0.04862317070364952 2023-01-22 12:34:05.691698: step: 488/466, loss: 0.03726345673203468 2023-01-22 12:34:06.401574: step: 490/466, loss: 0.2885895073413849 2023-01-22 12:34:07.040779: step: 492/466, loss: 0.02119053155183792 2023-01-22 12:34:07.691140: step: 494/466, loss: 0.04115508496761322 2023-01-22 12:34:08.321597: step: 496/466, loss: 0.041004154831171036 2023-01-22 12:34:08.879881: step: 498/466, loss: 0.03753751143813133 2023-01-22 12:34:09.411959: step: 500/466, loss: 0.00984681211411953 2023-01-22 12:34:10.071626: step: 502/466, loss: 0.0046401964500546455 2023-01-22 12:34:10.658622: step: 504/466, loss: 0.04446402192115784 2023-01-22 12:34:11.211319: step: 506/466, loss: 0.045399025082588196 2023-01-22 12:34:11.851608: step: 508/466, loss: 0.379410058259964 2023-01-22 12:34:12.441720: step: 510/466, loss: 0.04710298031568527 2023-01-22 12:34:13.048097: step: 512/466, loss: 0.0644146129488945 2023-01-22 12:34:13.602108: step: 514/466, loss: 0.08703358471393585 2023-01-22 12:34:14.187151: step: 516/466, loss: 0.0953550636768341 2023-01-22 12:34:14.811120: step: 518/466, loss: 0.01842649094760418 2023-01-22 12:34:15.404414: step: 520/466, loss: 0.002994328737258911 2023-01-22 12:34:15.975659: step: 522/466, loss: 0.01781976781785488 2023-01-22 12:34:16.561723: step: 524/466, loss: 0.03385910764336586 2023-01-22 12:34:17.204985: step: 526/466, loss: 0.02381124719977379 2023-01-22 12:34:17.860140: step: 528/466, loss: 0.008905943483114243 2023-01-22 12:34:18.472186: step: 530/466, loss: 0.013084372505545616 2023-01-22 12:34:19.098065: step: 532/466, loss: 0.06574474275112152 2023-01-22 12:34:19.751633: step: 534/466, loss: 0.20575186610221863 2023-01-22 12:34:20.357889: step: 536/466, loss: 0.18175360560417175 2023-01-22 12:34:20.955147: step: 538/466, loss: 0.3304038345813751 2023-01-22 12:34:21.632840: step: 540/466, loss: 0.06652913987636566 2023-01-22 12:34:22.279900: step: 542/466, loss: 0.04313105344772339 2023-01-22 12:34:22.885285: step: 544/466, loss: 0.023333299905061722 2023-01-22 12:34:23.549064: step: 546/466, loss: 0.006351171061396599 2023-01-22 12:34:24.196810: step: 548/466, loss: 0.02981061115860939 2023-01-22 12:34:24.808707: step: 550/466, loss: 0.09407296776771545 2023-01-22 12:34:25.447574: step: 552/466, loss: 0.0001765866472851485 2023-01-22 12:34:26.068034: step: 554/466, loss: 0.037890564650297165 2023-01-22 12:34:26.630103: step: 556/466, loss: 0.04096215218305588 2023-01-22 12:34:27.241415: step: 558/466, loss: 0.07184533774852753 2023-01-22 12:34:27.941698: step: 560/466, loss: 0.01907883584499359 2023-01-22 12:34:28.632651: step: 562/466, loss: 0.03673747554421425 2023-01-22 12:34:29.257994: step: 564/466, loss: 0.007795785088092089 2023-01-22 12:34:29.880848: step: 566/466, loss: 0.21403954923152924 2023-01-22 12:34:30.437035: step: 568/466, loss: 0.003437581704929471 2023-01-22 12:34:31.018276: step: 570/466, loss: 0.048479288816452026 2023-01-22 12:34:31.598291: step: 572/466, loss: 0.042687129229307175 2023-01-22 12:34:32.208207: step: 574/466, loss: 0.06387472152709961 2023-01-22 12:34:32.792804: step: 576/466, loss: 0.005016942508518696 2023-01-22 12:34:33.489382: step: 578/466, loss: 0.010053220205008984 2023-01-22 12:34:34.103280: step: 580/466, loss: 0.003696274943649769 2023-01-22 12:34:34.660680: step: 582/466, loss: 0.024698616936802864 2023-01-22 12:34:35.228182: step: 584/466, loss: 0.025689121335744858 2023-01-22 12:34:35.782084: step: 586/466, loss: 0.159027099609375 2023-01-22 12:34:36.475166: step: 588/466, loss: 0.03976619243621826 2023-01-22 12:34:37.097784: step: 590/466, loss: 0.008023654110729694 2023-01-22 12:34:37.776516: step: 592/466, loss: 0.052323099225759506 2023-01-22 12:34:38.408423: step: 594/466, loss: 0.03495736047625542 2023-01-22 12:34:39.063447: step: 596/466, loss: 0.07690518349409103 2023-01-22 12:34:39.691044: step: 598/466, loss: 0.02207178808748722 2023-01-22 12:34:40.304378: step: 600/466, loss: 0.05315900593996048 2023-01-22 12:34:40.890822: step: 602/466, loss: 0.027270402759313583 2023-01-22 12:34:41.539622: step: 604/466, loss: 0.037802521139383316 2023-01-22 12:34:42.333339: step: 606/466, loss: 0.020048823207616806 2023-01-22 12:34:42.902933: step: 608/466, loss: 0.06595315784215927 2023-01-22 12:34:43.474275: step: 610/466, loss: 0.07635252922773361 2023-01-22 12:34:44.099945: step: 612/466, loss: 0.02917221561074257 2023-01-22 12:34:44.723508: step: 614/466, loss: 0.049803707748651505 2023-01-22 12:34:45.367695: step: 616/466, loss: 0.04995589330792427 2023-01-22 12:34:45.987448: step: 618/466, loss: 0.015341007150709629 2023-01-22 12:34:46.615673: step: 620/466, loss: 0.013207652606070042 2023-01-22 12:34:47.213099: step: 622/466, loss: 0.9710530042648315 2023-01-22 12:34:47.837010: step: 624/466, loss: 0.02297407202422619 2023-01-22 12:34:48.456960: step: 626/466, loss: 0.1126004084944725 2023-01-22 12:34:49.068096: step: 628/466, loss: 0.07128822803497314 2023-01-22 12:34:49.695820: step: 630/466, loss: 0.057949699461460114 2023-01-22 12:34:50.254678: step: 632/466, loss: 0.035416506230831146 2023-01-22 12:34:50.840560: step: 634/466, loss: 0.024575524032115936 2023-01-22 12:34:51.387694: step: 636/466, loss: 0.09601810574531555 2023-01-22 12:34:52.045732: step: 638/466, loss: 0.03678590804338455 2023-01-22 12:34:52.626471: step: 640/466, loss: 0.05857367068529129 2023-01-22 12:34:53.237694: step: 642/466, loss: 0.009968073107302189 2023-01-22 12:34:53.826217: step: 644/466, loss: 0.05171883851289749 2023-01-22 12:34:54.512416: step: 646/466, loss: 0.10127750039100647 2023-01-22 12:34:55.126762: step: 648/466, loss: 0.08442340046167374 2023-01-22 12:34:55.705835: step: 650/466, loss: 0.21667565405368805 2023-01-22 12:34:56.330839: step: 652/466, loss: 0.034365225583314896 2023-01-22 12:34:56.940515: step: 654/466, loss: 0.011747542768716812 2023-01-22 12:34:57.550527: step: 656/466, loss: 0.0651884526014328 2023-01-22 12:34:58.179934: step: 658/466, loss: 0.02953972853720188 2023-01-22 12:34:58.849854: step: 660/466, loss: 0.05717940255999565 2023-01-22 12:34:59.488971: step: 662/466, loss: 0.0911126658320427 2023-01-22 12:35:00.073012: step: 664/466, loss: 0.02355436235666275 2023-01-22 12:35:00.667962: step: 666/466, loss: 0.16068434715270996 2023-01-22 12:35:01.273213: step: 668/466, loss: 0.011923057027161121 2023-01-22 12:35:01.931537: step: 670/466, loss: 0.01504041999578476 2023-01-22 12:35:02.604525: step: 672/466, loss: 0.10375463217496872 2023-01-22 12:35:03.241979: step: 674/466, loss: 0.060116566717624664 2023-01-22 12:35:03.839466: step: 676/466, loss: 0.023999834433197975 2023-01-22 12:35:04.440018: step: 678/466, loss: 0.013309174217283726 2023-01-22 12:35:05.032945: step: 680/466, loss: 0.011735780164599419 2023-01-22 12:35:05.650951: step: 682/466, loss: 0.20573440194129944 2023-01-22 12:35:06.198244: step: 684/466, loss: 0.03826363757252693 2023-01-22 12:35:06.804953: step: 686/466, loss: 0.038021162152290344 2023-01-22 12:35:07.395226: step: 688/466, loss: 0.05251982808113098 2023-01-22 12:35:07.997979: step: 690/466, loss: 0.13504108786582947 2023-01-22 12:35:08.597404: step: 692/466, loss: 0.09346406161785126 2023-01-22 12:35:09.264925: step: 694/466, loss: 0.007996978238224983 2023-01-22 12:35:09.839203: step: 696/466, loss: 0.007372912019491196 2023-01-22 12:35:10.417049: step: 698/466, loss: 0.05871805176138878 2023-01-22 12:35:11.020297: step: 700/466, loss: 0.026169583201408386 2023-01-22 12:35:11.598036: step: 702/466, loss: 0.08200190961360931 2023-01-22 12:35:12.195312: step: 704/466, loss: 0.08218079805374146 2023-01-22 12:35:12.766604: step: 706/466, loss: 0.002647866029292345 2023-01-22 12:35:13.380573: step: 708/466, loss: 0.03334326297044754 2023-01-22 12:35:13.987461: step: 710/466, loss: 0.015425094403326511 2023-01-22 12:35:14.601888: step: 712/466, loss: 0.033472705632448196 2023-01-22 12:35:15.158395: step: 714/466, loss: 0.09041909128427505 2023-01-22 12:35:15.782748: step: 716/466, loss: 0.021868692710995674 2023-01-22 12:35:16.358121: step: 718/466, loss: 0.00036438918323256075 2023-01-22 12:35:16.987554: step: 720/466, loss: 0.03918217122554779 2023-01-22 12:35:17.591852: step: 722/466, loss: 0.014676067046821117 2023-01-22 12:35:18.143995: step: 724/466, loss: 0.0330355279147625 2023-01-22 12:35:18.664360: step: 726/466, loss: 0.10043784976005554 2023-01-22 12:35:19.264524: step: 728/466, loss: 0.016035128384828568 2023-01-22 12:35:19.878529: step: 730/466, loss: 0.13090096414089203 2023-01-22 12:35:20.488520: step: 732/466, loss: 0.016727568581700325 2023-01-22 12:35:21.093159: step: 734/466, loss: 0.016407720744609833 2023-01-22 12:35:21.721004: step: 736/466, loss: 0.3815709948539734 2023-01-22 12:35:22.348825: step: 738/466, loss: 0.013494301587343216 2023-01-22 12:35:23.055209: step: 740/466, loss: 0.033299703150987625 2023-01-22 12:35:23.686703: step: 742/466, loss: 0.004631971009075642 2023-01-22 12:35:24.223128: step: 744/466, loss: 0.0062673985958099365 2023-01-22 12:35:24.818336: step: 746/466, loss: 0.01861630566418171 2023-01-22 12:35:25.414595: step: 748/466, loss: 0.016963746398687363 2023-01-22 12:35:26.062794: step: 750/466, loss: 0.016340335831046104 2023-01-22 12:35:26.654696: step: 752/466, loss: 0.03189510852098465 2023-01-22 12:35:27.262268: step: 754/466, loss: 0.030143678188323975 2023-01-22 12:35:27.876087: step: 756/466, loss: 0.10187117755413055 2023-01-22 12:35:28.490680: step: 758/466, loss: 0.019785935059189796 2023-01-22 12:35:29.093338: step: 760/466, loss: 0.026150960475206375 2023-01-22 12:35:29.640765: step: 762/466, loss: 0.008185453712940216 2023-01-22 12:35:30.284351: step: 764/466, loss: 0.0060665239579975605 2023-01-22 12:35:30.864338: step: 766/466, loss: 0.03670055419206619 2023-01-22 12:35:31.473173: step: 768/466, loss: 0.004702350124716759 2023-01-22 12:35:32.105584: step: 770/466, loss: 0.023463036864995956 2023-01-22 12:35:32.697317: step: 772/466, loss: 0.08310149610042572 2023-01-22 12:35:33.229733: step: 774/466, loss: 0.018694311380386353 2023-01-22 12:35:33.839040: step: 776/466, loss: 0.1447852998971939 2023-01-22 12:35:34.404622: step: 778/466, loss: 0.025596708059310913 2023-01-22 12:35:35.104939: step: 780/466, loss: 0.4690394997596741 2023-01-22 12:35:35.645093: step: 782/466, loss: 0.012877174653112888 2023-01-22 12:35:36.247924: step: 784/466, loss: 0.10288326442241669 2023-01-22 12:35:36.820482: step: 786/466, loss: 0.03267328813672066 2023-01-22 12:35:37.460389: step: 788/466, loss: 0.08561872690916061 2023-01-22 12:35:38.096854: step: 790/466, loss: 0.015591911971569061 2023-01-22 12:35:38.718851: step: 792/466, loss: 0.021638771519064903 2023-01-22 12:35:39.313648: step: 794/466, loss: 0.02019706554710865 2023-01-22 12:35:39.861631: step: 796/466, loss: 0.01928093656897545 2023-01-22 12:35:40.443601: step: 798/466, loss: 0.04225790873169899 2023-01-22 12:35:41.015291: step: 800/466, loss: 0.00393377710133791 2023-01-22 12:35:41.585967: step: 802/466, loss: 0.06843379884958267 2023-01-22 12:35:42.228429: step: 804/466, loss: 0.01800605095922947 2023-01-22 12:35:42.812063: step: 806/466, loss: 0.01616629585623741 2023-01-22 12:35:43.434900: step: 808/466, loss: 0.024410026147961617 2023-01-22 12:35:44.141919: step: 810/466, loss: 0.004984854720532894 2023-01-22 12:35:44.705979: step: 812/466, loss: 0.020824674516916275 2023-01-22 12:35:45.391239: step: 814/466, loss: 0.021463720127940178 2023-01-22 12:35:45.962417: step: 816/466, loss: 0.07688427716493607 2023-01-22 12:35:46.509388: step: 818/466, loss: 0.033285610377788544 2023-01-22 12:35:47.125595: step: 820/466, loss: 0.039045482873916626 2023-01-22 12:35:47.765193: step: 822/466, loss: 0.007764583453536034 2023-01-22 12:35:48.378861: step: 824/466, loss: 0.10974142700433731 2023-01-22 12:35:48.976508: step: 826/466, loss: 0.05139411613345146 2023-01-22 12:35:49.606700: step: 828/466, loss: 0.41900506615638733 2023-01-22 12:35:50.212470: step: 830/466, loss: 0.05137622356414795 2023-01-22 12:35:50.769866: step: 832/466, loss: 0.0016587182180956006 2023-01-22 12:35:51.371623: step: 834/466, loss: 0.028809627518057823 2023-01-22 12:35:51.967343: step: 836/466, loss: 0.07262729853391647 2023-01-22 12:35:52.545147: step: 838/466, loss: 0.028702951967716217 2023-01-22 12:35:53.171727: step: 840/466, loss: 0.02230178751051426 2023-01-22 12:35:53.847369: step: 842/466, loss: 0.03495894372463226 2023-01-22 12:35:54.394394: step: 844/466, loss: 0.008132087998092175 2023-01-22 12:35:55.028945: step: 846/466, loss: 0.04570523276925087 2023-01-22 12:35:55.621802: step: 848/466, loss: 0.024685293436050415 2023-01-22 12:35:56.269478: step: 850/466, loss: 0.016668280586600304 2023-01-22 12:35:56.836374: step: 852/466, loss: 0.08117682486772537 2023-01-22 12:35:57.440471: step: 854/466, loss: 0.08451499789953232 2023-01-22 12:35:58.013568: step: 856/466, loss: 0.0015832686331123114 2023-01-22 12:35:58.573247: step: 858/466, loss: 0.008496706373989582 2023-01-22 12:35:59.170410: step: 860/466, loss: 0.07007251679897308 2023-01-22 12:35:59.727714: step: 862/466, loss: 0.047850530594587326 2023-01-22 12:36:00.355238: step: 864/466, loss: 0.04546159505844116 2023-01-22 12:36:00.975077: step: 866/466, loss: 0.020945146679878235 2023-01-22 12:36:01.606098: step: 868/466, loss: 0.06443048268556595 2023-01-22 12:36:02.222456: step: 870/466, loss: 0.043095044791698456 2023-01-22 12:36:02.827280: step: 872/466, loss: 0.024777084589004517 2023-01-22 12:36:03.535622: step: 874/466, loss: 0.04186893627047539 2023-01-22 12:36:04.067484: step: 876/466, loss: 0.7887666821479797 2023-01-22 12:36:04.703597: step: 878/466, loss: 0.018086452037096024 2023-01-22 12:36:05.303582: step: 880/466, loss: 0.06803685426712036 2023-01-22 12:36:05.873994: step: 882/466, loss: 0.08622019737958908 2023-01-22 12:36:06.437955: step: 884/466, loss: 0.030558787286281586 2023-01-22 12:36:07.081558: step: 886/466, loss: 0.021821726113557816 2023-01-22 12:36:07.676930: step: 888/466, loss: 0.09157668799161911 2023-01-22 12:36:08.232503: step: 890/466, loss: 0.005649706348776817 2023-01-22 12:36:08.838720: step: 892/466, loss: 0.3497958183288574 2023-01-22 12:36:09.432891: step: 894/466, loss: 0.04933559149503708 2023-01-22 12:36:10.020088: step: 896/466, loss: 0.029646283015608788 2023-01-22 12:36:10.618439: step: 898/466, loss: 0.006502463947981596 2023-01-22 12:36:11.152635: step: 900/466, loss: 0.013586016371846199 2023-01-22 12:36:11.752659: step: 902/466, loss: 0.023692291229963303 2023-01-22 12:36:12.381229: step: 904/466, loss: 0.010394468903541565 2023-01-22 12:36:12.973571: step: 906/466, loss: 7.299670696258545 2023-01-22 12:36:13.714368: step: 908/466, loss: 0.09700772166252136 2023-01-22 12:36:14.234285: step: 910/466, loss: 0.048023246228694916 2023-01-22 12:36:14.826937: step: 912/466, loss: 0.01920267567038536 2023-01-22 12:36:15.391714: step: 914/466, loss: 0.005680992268025875 2023-01-22 12:36:16.015340: step: 916/466, loss: 0.015114396810531616 2023-01-22 12:36:16.637476: step: 918/466, loss: 0.012126674875617027 2023-01-22 12:36:17.221842: step: 920/466, loss: 0.006995673291385174 2023-01-22 12:36:17.797177: step: 922/466, loss: 0.001748422160744667 2023-01-22 12:36:18.437132: step: 924/466, loss: 0.03409386798739433 2023-01-22 12:36:19.051175: step: 926/466, loss: 0.05706127732992172 2023-01-22 12:36:19.645204: step: 928/466, loss: 0.016984621062874794 2023-01-22 12:36:20.232432: step: 930/466, loss: 0.07859759777784348 2023-01-22 12:36:20.848386: step: 932/466, loss: 0.11002255231142044 ================================================== Loss: 0.093 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3099011299435029, 'r': 0.34694813409234665, 'f1': 0.32737988660101464}, 'combined': 0.24122728486390552, 'epoch': 24} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3519081431890309, 'r': 0.3357599773110927, 'f1': 0.3436444606063908}, 'combined': 0.22790927957315035, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3072564612326044, 'r': 0.29270833333333335, 'f1': 0.2998060135790495}, 'combined': 0.19987067571936634, 'epoch': 24} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.35723608166617044, 'r': 0.30863463901314725, 'f1': 0.3311616675231724}, 'combined': 0.21612656196249141, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28873012916045704, 'r': 0.3342037548157093, 'f1': 0.3098071746488633}, 'combined': 0.22827897079389928, 'epoch': 24} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.34429836347837417, 'r': 0.32522035892199674, 'f1': 0.33448754635343386}, 'combined': 0.22183630017222553, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23363095238095236, 'r': 0.37380952380952376, 'f1': 0.2875457875457875}, 'combined': 0.19169719169719168, 'epoch': 24} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39473684210526316, 'r': 0.32608695652173914, 'f1': 0.35714285714285715}, 'combined': 0.23809523809523808, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.25862068965517243, 'f1': 0.3061224489795919}, 'combined': 0.20408163265306126, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2874809714095428, 'r': 0.3474864872635271, 'f1': 0.31464841716130376}, 'combined': 0.2318462021188554, 'epoch': 23} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3397515828631042, 'r': 0.3070434946603249, 'f1': 0.3225705233196853}, 'combined': 0.21393278230528348, 'epoch': 23} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4117647058823529, 'r': 0.2413793103448276, 'f1': 0.3043478260869565}, 'combined': 0.20289855072463764, 'epoch': 23} ****************************** Epoch: 25 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:38:46.471567: step: 2/466, loss: 1.2913615703582764 2023-01-22 12:38:47.119545: step: 4/466, loss: 0.221303328871727 2023-01-22 12:38:47.720321: step: 6/466, loss: 0.07709360867738724 2023-01-22 12:38:48.335330: step: 8/466, loss: 0.07634203135967255 2023-01-22 12:38:48.902045: step: 10/466, loss: 0.808277428150177 2023-01-22 12:38:49.542189: step: 12/466, loss: 0.02934054285287857 2023-01-22 12:38:50.161259: step: 14/466, loss: 0.044773418456315994 2023-01-22 12:38:50.740709: step: 16/466, loss: 0.0567832812666893 2023-01-22 12:38:51.323614: step: 18/466, loss: 0.04091653227806091 2023-01-22 12:38:51.948829: step: 20/466, loss: 0.012444142252206802 2023-01-22 12:38:52.494672: step: 22/466, loss: 0.004964071791619062 2023-01-22 12:38:53.104547: step: 24/466, loss: 0.0060002775862813 2023-01-22 12:38:53.735043: step: 26/466, loss: 0.7530148029327393 2023-01-22 12:38:54.374053: step: 28/466, loss: 0.010613384656608105 2023-01-22 12:38:54.941327: step: 30/466, loss: 0.0024078087881207466 2023-01-22 12:38:55.608246: step: 32/466, loss: 0.007756354752928019 2023-01-22 12:38:56.236875: step: 34/466, loss: 0.0068458979949355125 2023-01-22 12:38:56.879230: step: 36/466, loss: 0.04409642145037651 2023-01-22 12:38:57.479312: step: 38/466, loss: 0.0186622217297554 2023-01-22 12:38:58.073840: step: 40/466, loss: 0.0013015653239563107 2023-01-22 12:38:58.685697: step: 42/466, loss: 0.2598473131656647 2023-01-22 12:38:59.306709: step: 44/466, loss: 0.006549442186951637 2023-01-22 12:38:59.944516: step: 46/466, loss: 0.002103047212585807 2023-01-22 12:39:00.590000: step: 48/466, loss: 0.004661684390157461 2023-01-22 12:39:01.242369: step: 50/466, loss: 0.011915341019630432 2023-01-22 12:39:01.860870: step: 52/466, loss: 0.007606809958815575 2023-01-22 12:39:02.455030: step: 54/466, loss: 0.044846631586551666 2023-01-22 12:39:03.032687: step: 56/466, loss: 0.017281727865338326 2023-01-22 12:39:03.633260: step: 58/466, loss: 0.024489082396030426 2023-01-22 12:39:04.270153: step: 60/466, loss: 0.04151010513305664 2023-01-22 12:39:04.910196: step: 62/466, loss: 0.0005681780166924 2023-01-22 12:39:05.496597: step: 64/466, loss: 0.06110693886876106 2023-01-22 12:39:06.146304: step: 66/466, loss: 0.1623503714799881 2023-01-22 12:39:06.744143: step: 68/466, loss: 0.012861186638474464 2023-01-22 12:39:07.373488: step: 70/466, loss: 0.018734971061348915 2023-01-22 12:39:08.004460: step: 72/466, loss: 0.04459035396575928 2023-01-22 12:39:08.604693: step: 74/466, loss: 0.05913710221648216 2023-01-22 12:39:09.161937: step: 76/466, loss: 0.01718541607260704 2023-01-22 12:39:09.701197: step: 78/466, loss: 0.0338447242975235 2023-01-22 12:39:10.294962: step: 80/466, loss: 0.009336519986391068 2023-01-22 12:39:10.896368: step: 82/466, loss: 0.012285180389881134 2023-01-22 12:39:11.491227: step: 84/466, loss: 0.044400352984666824 2023-01-22 12:39:12.148042: step: 86/466, loss: 0.0017768596298992634 2023-01-22 12:39:12.772016: step: 88/466, loss: 0.03392492234706879 2023-01-22 12:39:13.458296: step: 90/466, loss: 0.08839622884988785 2023-01-22 12:39:14.012761: step: 92/466, loss: 0.28095516562461853 2023-01-22 12:39:14.655352: step: 94/466, loss: 0.04762560501694679 2023-01-22 12:39:15.389366: step: 96/466, loss: 0.03430235758423805 2023-01-22 12:39:16.012254: step: 98/466, loss: 0.011945521458983421 2023-01-22 12:39:16.663801: step: 100/466, loss: 0.022856013849377632 2023-01-22 12:39:17.294272: step: 102/466, loss: 0.002989865140989423 2023-01-22 12:39:17.917451: step: 104/466, loss: 0.03537214174866676 2023-01-22 12:39:18.534924: step: 106/466, loss: 0.02911374531686306 2023-01-22 12:39:19.229386: step: 108/466, loss: 0.5798623561859131 2023-01-22 12:39:19.892129: step: 110/466, loss: 0.02080407552421093 2023-01-22 12:39:20.521609: step: 112/466, loss: 0.032129302620887756 2023-01-22 12:39:21.116081: step: 114/466, loss: 0.014343895949423313 2023-01-22 12:39:21.767833: step: 116/466, loss: 0.031172694638371468 2023-01-22 12:39:22.318199: step: 118/466, loss: 0.01042020134627819 2023-01-22 12:39:23.027488: step: 120/466, loss: 0.006632381118834019 2023-01-22 12:39:23.680879: step: 122/466, loss: 0.038836054503917694 2023-01-22 12:39:24.244426: step: 124/466, loss: 0.008173519745469093 2023-01-22 12:39:24.826553: step: 126/466, loss: 0.016281452029943466 2023-01-22 12:39:25.468886: step: 128/466, loss: 0.0038052171003073454 2023-01-22 12:39:26.089087: step: 130/466, loss: 0.11949094384908676 2023-01-22 12:39:26.666809: step: 132/466, loss: 0.05344999581575394 2023-01-22 12:39:27.276190: step: 134/466, loss: 0.024968191981315613 2023-01-22 12:39:27.913693: step: 136/466, loss: 0.03751816600561142 2023-01-22 12:39:28.487689: step: 138/466, loss: 0.0008284052019007504 2023-01-22 12:39:29.123759: step: 140/466, loss: 0.11725995689630508 2023-01-22 12:39:29.745042: step: 142/466, loss: 0.0842091366648674 2023-01-22 12:39:30.351342: step: 144/466, loss: 0.0696955993771553 2023-01-22 12:39:30.961315: step: 146/466, loss: 0.00623112078756094 2023-01-22 12:39:31.583986: step: 148/466, loss: 0.5660732984542847 2023-01-22 12:39:32.163839: step: 150/466, loss: 0.02277527004480362 2023-01-22 12:39:32.740074: step: 152/466, loss: 0.014372216537594795 2023-01-22 12:39:33.368626: step: 154/466, loss: 0.04187621921300888 2023-01-22 12:39:33.967446: step: 156/466, loss: 3.977751839556731e-05 2023-01-22 12:39:34.612664: step: 158/466, loss: 0.10576049983501434 2023-01-22 12:39:35.197445: step: 160/466, loss: 0.026855265721678734 2023-01-22 12:39:35.819387: step: 162/466, loss: 0.05360988527536392 2023-01-22 12:39:36.388467: step: 164/466, loss: 0.002422438934445381 2023-01-22 12:39:37.011229: step: 166/466, loss: 0.046311236917972565 2023-01-22 12:39:37.647680: step: 168/466, loss: 0.004069761373102665 2023-01-22 12:39:38.236117: step: 170/466, loss: 0.002492958679795265 2023-01-22 12:39:38.760349: step: 172/466, loss: 0.04683367908000946 2023-01-22 12:39:39.419815: step: 174/466, loss: 0.0008087892201729119 2023-01-22 12:39:40.024525: step: 176/466, loss: 0.00553395040333271 2023-01-22 12:39:40.536942: step: 178/466, loss: 0.005937342531979084 2023-01-22 12:39:41.137043: step: 180/466, loss: 0.026208385825157166 2023-01-22 12:39:41.817485: step: 182/466, loss: 0.04297297075390816 2023-01-22 12:39:42.498509: step: 184/466, loss: 0.019133813679218292 2023-01-22 12:39:43.097871: step: 186/466, loss: 0.24795441329479218 2023-01-22 12:39:43.670374: step: 188/466, loss: 0.05760320648550987 2023-01-22 12:39:44.267182: step: 190/466, loss: 0.7904965281486511 2023-01-22 12:39:44.858440: step: 192/466, loss: 0.033945102244615555 2023-01-22 12:39:45.448722: step: 194/466, loss: 0.01732119917869568 2023-01-22 12:39:46.048016: step: 196/466, loss: 0.0867454931139946 2023-01-22 12:39:46.745275: step: 198/466, loss: 0.14356158673763275 2023-01-22 12:39:47.309650: step: 200/466, loss: 0.00874892994761467 2023-01-22 12:39:47.957369: step: 202/466, loss: 0.09583394229412079 2023-01-22 12:39:48.527873: step: 204/466, loss: 0.06581012904644012 2023-01-22 12:39:49.113748: step: 206/466, loss: 0.0020562163554131985 2023-01-22 12:39:49.771909: step: 208/466, loss: 0.026082372292876244 2023-01-22 12:39:50.392621: step: 210/466, loss: 0.0012509813532233238 2023-01-22 12:39:51.005805: step: 212/466, loss: 0.019496295601129532 2023-01-22 12:39:51.606215: step: 214/466, loss: 0.4355490207672119 2023-01-22 12:39:52.099525: step: 216/466, loss: 0.003379611298441887 2023-01-22 12:39:52.659243: step: 218/466, loss: 0.04011888802051544 2023-01-22 12:39:53.243945: step: 220/466, loss: 0.020177897065877914 2023-01-22 12:39:53.826237: step: 222/466, loss: 0.05331127345561981 2023-01-22 12:39:54.388519: step: 224/466, loss: 0.0049538989551365376 2023-01-22 12:39:54.971778: step: 226/466, loss: 0.02534342184662819 2023-01-22 12:39:55.596539: step: 228/466, loss: 0.02678047865629196 2023-01-22 12:39:56.211441: step: 230/466, loss: 0.08728460222482681 2023-01-22 12:39:56.850844: step: 232/466, loss: 0.12621189653873444 2023-01-22 12:39:57.462484: step: 234/466, loss: 0.08374364674091339 2023-01-22 12:39:58.086734: step: 236/466, loss: 0.19564181566238403 2023-01-22 12:39:58.635556: step: 238/466, loss: 0.00403230544179678 2023-01-22 12:39:59.195561: step: 240/466, loss: 0.02667354792356491 2023-01-22 12:39:59.769499: step: 242/466, loss: 0.032107461243867874 2023-01-22 12:40:00.383555: step: 244/466, loss: 0.018708113580942154 2023-01-22 12:40:01.027842: step: 246/466, loss: 0.10217863321304321 2023-01-22 12:40:01.611222: step: 248/466, loss: 0.01089020911604166 2023-01-22 12:40:02.346703: step: 250/466, loss: 0.03414095938205719 2023-01-22 12:40:02.903513: step: 252/466, loss: 0.007417671848088503 2023-01-22 12:40:03.415559: step: 254/466, loss: 0.014984002336859703 2023-01-22 12:40:03.984879: step: 256/466, loss: 0.010449710302054882 2023-01-22 12:40:04.557313: step: 258/466, loss: 0.039140958338975906 2023-01-22 12:40:05.148339: step: 260/466, loss: 0.005764668807387352 2023-01-22 12:40:05.710818: step: 262/466, loss: 0.010224856436252594 2023-01-22 12:40:06.300333: step: 264/466, loss: 0.004748664330691099 2023-01-22 12:40:06.937294: step: 266/466, loss: 0.06514088809490204 2023-01-22 12:40:07.532706: step: 268/466, loss: 0.018356163054704666 2023-01-22 12:40:08.156341: step: 270/466, loss: 0.037056829780340195 2023-01-22 12:40:08.819342: step: 272/466, loss: 0.03318117931485176 2023-01-22 12:40:09.423211: step: 274/466, loss: 0.02692977897822857 2023-01-22 12:40:10.053772: step: 276/466, loss: 0.1166185736656189 2023-01-22 12:40:10.645076: step: 278/466, loss: 0.05034998804330826 2023-01-22 12:40:11.350197: step: 280/466, loss: 0.07291010767221451 2023-01-22 12:40:11.911916: step: 282/466, loss: 0.040174517780542374 2023-01-22 12:40:12.484144: step: 284/466, loss: 0.03222532942891121 2023-01-22 12:40:13.088362: step: 286/466, loss: 0.41899573802948 2023-01-22 12:40:13.682098: step: 288/466, loss: 0.01620565727353096 2023-01-22 12:40:14.237857: step: 290/466, loss: 0.0240196343511343 2023-01-22 12:40:14.847215: step: 292/466, loss: 0.012082924135029316 2023-01-22 12:40:15.430822: step: 294/466, loss: 0.11501817405223846 2023-01-22 12:40:15.997072: step: 296/466, loss: 0.004925146698951721 2023-01-22 12:40:16.672026: step: 298/466, loss: 0.36350271105766296 2023-01-22 12:40:17.308833: step: 300/466, loss: 0.05301080644130707 2023-01-22 12:40:17.946210: step: 302/466, loss: 0.0018715260084718466 2023-01-22 12:40:18.562665: step: 304/466, loss: 0.030930697917938232 2023-01-22 12:40:19.176408: step: 306/466, loss: 0.02174968086183071 2023-01-22 12:40:19.748682: step: 308/466, loss: 0.0800512433052063 2023-01-22 12:40:20.296370: step: 310/466, loss: 0.03545597568154335 2023-01-22 12:40:20.920379: step: 312/466, loss: 0.05008088797330856 2023-01-22 12:40:21.566372: step: 314/466, loss: 0.14534986019134521 2023-01-22 12:40:22.164567: step: 316/466, loss: 0.03142093867063522 2023-01-22 12:40:22.802708: step: 318/466, loss: 0.02881583571434021 2023-01-22 12:40:23.510829: step: 320/466, loss: 0.01100649032741785 2023-01-22 12:40:24.177922: step: 322/466, loss: 0.028189118951559067 2023-01-22 12:40:24.825452: step: 324/466, loss: 0.010489918291568756 2023-01-22 12:40:25.450814: step: 326/466, loss: 0.04265473783016205 2023-01-22 12:40:26.273713: step: 328/466, loss: 0.1682056188583374 2023-01-22 12:40:26.820628: step: 330/466, loss: 0.019547518342733383 2023-01-22 12:40:27.346258: step: 332/466, loss: 0.021306661888957024 2023-01-22 12:40:28.004810: step: 334/466, loss: 0.04452720656991005 2023-01-22 12:40:28.640088: step: 336/466, loss: 0.0865069106221199 2023-01-22 12:40:29.245614: step: 338/466, loss: 0.040249284356832504 2023-01-22 12:40:29.835105: step: 340/466, loss: 0.01776963099837303 2023-01-22 12:40:30.502720: step: 342/466, loss: 0.06774012744426727 2023-01-22 12:40:31.133552: step: 344/466, loss: 0.0254667978733778 2023-01-22 12:40:31.783784: step: 346/466, loss: 0.015283987857401371 2023-01-22 12:40:32.360605: step: 348/466, loss: 0.038537781685590744 2023-01-22 12:40:32.995800: step: 350/466, loss: 0.09057411551475525 2023-01-22 12:40:33.590965: step: 352/466, loss: 0.051457326859235764 2023-01-22 12:40:34.169930: step: 354/466, loss: 0.04070008918642998 2023-01-22 12:40:34.826539: step: 356/466, loss: 0.05365927889943123 2023-01-22 12:40:35.427430: step: 358/466, loss: 0.07253986597061157 2023-01-22 12:40:36.052957: step: 360/466, loss: 0.001430719392374158 2023-01-22 12:40:36.698850: step: 362/466, loss: 0.03338882699608803 2023-01-22 12:40:37.293924: step: 364/466, loss: 0.02356225810945034 2023-01-22 12:40:37.947412: step: 366/466, loss: 0.029113180935382843 2023-01-22 12:40:38.550557: step: 368/466, loss: 0.021528951823711395 2023-01-22 12:40:39.144487: step: 370/466, loss: 0.011167202144861221 2023-01-22 12:40:39.779299: step: 372/466, loss: 0.0653170719742775 2023-01-22 12:40:40.444176: step: 374/466, loss: 0.20392701029777527 2023-01-22 12:40:41.055618: step: 376/466, loss: 0.01070206705480814 2023-01-22 12:40:41.644981: step: 378/466, loss: 0.03856460750102997 2023-01-22 12:40:42.220061: step: 380/466, loss: 0.02206994593143463 2023-01-22 12:40:42.856470: step: 382/466, loss: 0.17710959911346436 2023-01-22 12:40:43.492124: step: 384/466, loss: 0.34115350246429443 2023-01-22 12:40:44.069367: step: 386/466, loss: 0.030216675251722336 2023-01-22 12:40:44.726295: step: 388/466, loss: 0.01600075513124466 2023-01-22 12:40:45.349556: step: 390/466, loss: 0.08036049455404282 2023-01-22 12:40:45.960629: step: 392/466, loss: 0.7863232493400574 2023-01-22 12:40:46.613841: step: 394/466, loss: 0.09372567385435104 2023-01-22 12:40:47.285518: step: 396/466, loss: 0.2711730897426605 2023-01-22 12:40:47.912779: step: 398/466, loss: 0.031892675906419754 2023-01-22 12:40:48.439207: step: 400/466, loss: 0.040052060037851334 2023-01-22 12:40:49.031368: step: 402/466, loss: 0.029966307803988457 2023-01-22 12:40:49.602385: step: 404/466, loss: 0.00928488653153181 2023-01-22 12:40:50.303341: step: 406/466, loss: 0.005141105968505144 2023-01-22 12:40:50.912473: step: 408/466, loss: 0.025026477873325348 2023-01-22 12:40:51.548923: step: 410/466, loss: 0.020624669268727303 2023-01-22 12:40:52.163445: step: 412/466, loss: 0.015934638679027557 2023-01-22 12:40:52.742120: step: 414/466, loss: 0.2350529432296753 2023-01-22 12:40:53.332668: step: 416/466, loss: 0.03973572701215744 2023-01-22 12:40:53.974634: step: 418/466, loss: 0.028968505561351776 2023-01-22 12:40:54.553867: step: 420/466, loss: 0.020778411999344826 2023-01-22 12:40:55.116464: step: 422/466, loss: 0.3516223430633545 2023-01-22 12:40:55.731467: step: 424/466, loss: 0.13670171797275543 2023-01-22 12:40:56.343549: step: 426/466, loss: 0.0782453641295433 2023-01-22 12:40:57.009752: step: 428/466, loss: 0.007612935733050108 2023-01-22 12:40:57.650026: step: 430/466, loss: 0.009548550471663475 2023-01-22 12:40:58.245584: step: 432/466, loss: 0.10455300658941269 2023-01-22 12:40:58.810702: step: 434/466, loss: 0.005702846217900515 2023-01-22 12:40:59.398191: step: 436/466, loss: 0.00872819498181343 2023-01-22 12:40:59.913303: step: 438/466, loss: 0.0432143472135067 2023-01-22 12:41:00.551175: step: 440/466, loss: 0.0244740080088377 2023-01-22 12:41:01.215185: step: 442/466, loss: 0.036844585090875626 2023-01-22 12:41:01.860101: step: 444/466, loss: 0.038751665502786636 2023-01-22 12:41:02.421642: step: 446/466, loss: 0.028784453868865967 2023-01-22 12:41:03.117545: step: 448/466, loss: 0.051399070769548416 2023-01-22 12:41:03.677255: step: 450/466, loss: 0.014154715463519096 2023-01-22 12:41:04.253151: step: 452/466, loss: 0.008030824363231659 2023-01-22 12:41:04.782830: step: 454/466, loss: 0.05862676724791527 2023-01-22 12:41:05.456109: step: 456/466, loss: 0.006854075472801924 2023-01-22 12:41:06.054916: step: 458/466, loss: 0.0058186049573123455 2023-01-22 12:41:06.663952: step: 460/466, loss: 0.07675496488809586 2023-01-22 12:41:07.196211: step: 462/466, loss: 0.02009126916527748 2023-01-22 12:41:07.856935: step: 464/466, loss: 0.013251720927655697 2023-01-22 12:41:08.446238: step: 466/466, loss: 0.019120588898658752 2023-01-22 12:41:09.095578: step: 468/466, loss: 0.03249607980251312 2023-01-22 12:41:09.650589: step: 470/466, loss: 0.030159030109643936 2023-01-22 12:41:10.262747: step: 472/466, loss: 0.36210766434669495 2023-01-22 12:41:10.877938: step: 474/466, loss: 0.01699150539934635 2023-01-22 12:41:11.523160: step: 476/466, loss: 0.0033404179848730564 2023-01-22 12:41:12.067873: step: 478/466, loss: 0.01301879994571209 2023-01-22 12:41:12.613261: step: 480/466, loss: 0.0035054609179496765 2023-01-22 12:41:13.202222: step: 482/466, loss: 0.021764883771538734 2023-01-22 12:41:13.802952: step: 484/466, loss: 0.015501120127737522 2023-01-22 12:41:14.402307: step: 486/466, loss: 0.026269903406500816 2023-01-22 12:41:14.988865: step: 488/466, loss: 0.0158759243786335 2023-01-22 12:41:15.624980: step: 490/466, loss: 0.0009323122794739902 2023-01-22 12:41:16.255676: step: 492/466, loss: 0.01813668943941593 2023-01-22 12:41:16.841643: step: 494/466, loss: 0.2687680423259735 2023-01-22 12:41:17.385816: step: 496/466, loss: 0.022268308326601982 2023-01-22 12:41:17.974814: step: 498/466, loss: 0.020895710214972496 2023-01-22 12:41:18.606259: step: 500/466, loss: 0.03320271894335747 2023-01-22 12:41:19.228175: step: 502/466, loss: 0.08355792611837387 2023-01-22 12:41:19.812114: step: 504/466, loss: 0.16657261550426483 2023-01-22 12:41:20.359417: step: 506/466, loss: 0.02144947461783886 2023-01-22 12:41:21.018650: step: 508/466, loss: 0.02311338298022747 2023-01-22 12:41:21.593607: step: 510/466, loss: 0.008023801259696484 2023-01-22 12:41:22.186552: step: 512/466, loss: 0.022611264139413834 2023-01-22 12:41:22.823234: step: 514/466, loss: 0.013177191838622093 2023-01-22 12:41:23.438040: step: 516/466, loss: 0.003986111376434565 2023-01-22 12:41:24.027704: step: 518/466, loss: 0.16800203919410706 2023-01-22 12:41:24.590492: step: 520/466, loss: 0.0060579185374081135 2023-01-22 12:41:25.220680: step: 522/466, loss: 0.029098467901349068 2023-01-22 12:41:25.827293: step: 524/466, loss: 0.029289137572050095 2023-01-22 12:41:26.426530: step: 526/466, loss: 0.07186546176671982 2023-01-22 12:41:27.004826: step: 528/466, loss: 0.074771448969841 2023-01-22 12:41:27.557314: step: 530/466, loss: 0.005138975568115711 2023-01-22 12:41:28.165064: step: 532/466, loss: 0.03942608833312988 2023-01-22 12:41:28.778316: step: 534/466, loss: 0.005945540964603424 2023-01-22 12:41:29.394145: step: 536/466, loss: 0.028450069949030876 2023-01-22 12:41:30.000760: step: 538/466, loss: 0.010474931448698044 2023-01-22 12:41:30.642593: step: 540/466, loss: 0.011315548792481422 2023-01-22 12:41:31.292687: step: 542/466, loss: 0.07123514264822006 2023-01-22 12:41:31.906773: step: 544/466, loss: 0.020416492596268654 2023-01-22 12:41:32.536457: step: 546/466, loss: 0.04518275335431099 2023-01-22 12:41:33.141918: step: 548/466, loss: 0.05349896848201752 2023-01-22 12:41:33.805300: step: 550/466, loss: 0.03100154921412468 2023-01-22 12:41:34.410051: step: 552/466, loss: 0.02530047297477722 2023-01-22 12:41:35.007501: step: 554/466, loss: 0.009493943303823471 2023-01-22 12:41:35.577971: step: 556/466, loss: 0.18496611714363098 2023-01-22 12:41:36.199071: step: 558/466, loss: 0.027933640405535698 2023-01-22 12:41:36.834828: step: 560/466, loss: 0.04296899959445 2023-01-22 12:41:37.427088: step: 562/466, loss: 0.038775667548179626 2023-01-22 12:41:37.991983: step: 564/466, loss: 0.004564823117107153 2023-01-22 12:41:38.586173: step: 566/466, loss: 0.03634239733219147 2023-01-22 12:41:39.199581: step: 568/466, loss: 0.027375012636184692 2023-01-22 12:41:39.810520: step: 570/466, loss: 0.008710439316928387 2023-01-22 12:41:40.419679: step: 572/466, loss: 0.02768733724951744 2023-01-22 12:41:41.090385: step: 574/466, loss: 0.05412431061267853 2023-01-22 12:41:41.684559: step: 576/466, loss: 0.008412402123212814 2023-01-22 12:41:42.313649: step: 578/466, loss: 0.06098160147666931 2023-01-22 12:41:42.902491: step: 580/466, loss: 0.07021938264369965 2023-01-22 12:41:43.490510: step: 582/466, loss: 0.06752918660640717 2023-01-22 12:41:44.102452: step: 584/466, loss: 0.05675654858350754 2023-01-22 12:41:44.692234: step: 586/466, loss: 0.025131892412900925 2023-01-22 12:41:45.240334: step: 588/466, loss: 0.026210255920886993 2023-01-22 12:41:45.877439: step: 590/466, loss: 0.025238752365112305 2023-01-22 12:41:46.483501: step: 592/466, loss: 0.0021176671143621206 2023-01-22 12:41:47.075226: step: 594/466, loss: 0.06994631141424179 2023-01-22 12:41:47.728400: step: 596/466, loss: 0.13406339287757874 2023-01-22 12:41:48.246551: step: 598/466, loss: 0.04605163633823395 2023-01-22 12:41:48.863334: step: 600/466, loss: 0.04097013548016548 2023-01-22 12:41:49.428264: step: 602/466, loss: 0.039376720786094666 2023-01-22 12:41:50.036672: step: 604/466, loss: 0.05856088921427727 2023-01-22 12:41:50.667819: step: 606/466, loss: 0.004100484307855368 2023-01-22 12:41:51.244606: step: 608/466, loss: 0.00830951239913702 2023-01-22 12:41:51.876964: step: 610/466, loss: 0.03391300514340401 2023-01-22 12:41:52.428652: step: 612/466, loss: 0.04908227175474167 2023-01-22 12:41:52.977245: step: 614/466, loss: 0.006420582998543978 2023-01-22 12:41:53.557588: step: 616/466, loss: 0.05737851932644844 2023-01-22 12:41:54.153129: step: 618/466, loss: 0.004047843161970377 2023-01-22 12:41:54.732233: step: 620/466, loss: 0.018269887194037437 2023-01-22 12:41:55.293430: step: 622/466, loss: 0.015939846634864807 2023-01-22 12:41:55.863429: step: 624/466, loss: 0.11771312355995178 2023-01-22 12:41:56.503727: step: 626/466, loss: 0.23366926610469818 2023-01-22 12:41:57.161749: step: 628/466, loss: 0.14914771914482117 2023-01-22 12:41:57.839302: step: 630/466, loss: 0.056283481419086456 2023-01-22 12:41:58.469806: step: 632/466, loss: 0.055580224841833115 2023-01-22 12:41:59.132052: step: 634/466, loss: 0.00767617579549551 2023-01-22 12:41:59.683265: step: 636/466, loss: 0.004347812384366989 2023-01-22 12:42:00.332288: step: 638/466, loss: 0.018882201984524727 2023-01-22 12:42:00.910654: step: 640/466, loss: 0.010219431482255459 2023-01-22 12:42:01.464617: step: 642/466, loss: 0.056204766035079956 2023-01-22 12:42:02.073609: step: 644/466, loss: 0.02552511729300022 2023-01-22 12:42:02.639229: step: 646/466, loss: 0.15252268314361572 2023-01-22 12:42:03.235397: step: 648/466, loss: 0.2137022316455841 2023-01-22 12:42:03.835423: step: 650/466, loss: 0.05027611553668976 2023-01-22 12:42:04.483272: step: 652/466, loss: 0.011146695353090763 2023-01-22 12:42:05.100032: step: 654/466, loss: 0.0017963236896321177 2023-01-22 12:42:05.662546: step: 656/466, loss: 0.008355779573321342 2023-01-22 12:42:06.306798: step: 658/466, loss: 0.011687744408845901 2023-01-22 12:42:06.937084: step: 660/466, loss: 0.017062479630112648 2023-01-22 12:42:07.543531: step: 662/466, loss: 0.027931544929742813 2023-01-22 12:42:08.162468: step: 664/466, loss: 0.0728331133723259 2023-01-22 12:42:08.779581: step: 666/466, loss: 0.040246982127428055 2023-01-22 12:42:09.406224: step: 668/466, loss: 0.0050750491209328175 2023-01-22 12:42:09.988898: step: 670/466, loss: 0.0024562934413552284 2023-01-22 12:42:10.632420: step: 672/466, loss: 0.0371859148144722 2023-01-22 12:42:11.275368: step: 674/466, loss: 0.014029420912265778 2023-01-22 12:42:11.911463: step: 676/466, loss: 0.008747943677008152 2023-01-22 12:42:12.492672: step: 678/466, loss: 0.08006829023361206 2023-01-22 12:42:13.025594: step: 680/466, loss: 0.0167327169328928 2023-01-22 12:42:13.657324: step: 682/466, loss: 0.05692826956510544 2023-01-22 12:42:14.241531: step: 684/466, loss: 0.007294619921594858 2023-01-22 12:42:14.860455: step: 686/466, loss: 0.0033462075516581535 2023-01-22 12:42:15.553307: step: 688/466, loss: 0.023898642510175705 2023-01-22 12:42:16.163001: step: 690/466, loss: 0.013447328470647335 2023-01-22 12:42:16.763686: step: 692/466, loss: 0.02081284299492836 2023-01-22 12:42:17.340375: step: 694/466, loss: 0.5867592692375183 2023-01-22 12:42:17.944476: step: 696/466, loss: 0.05716710537672043 2023-01-22 12:42:18.567317: step: 698/466, loss: 0.014385833404958248 2023-01-22 12:42:19.200982: step: 700/466, loss: 0.14949959516525269 2023-01-22 12:42:19.758909: step: 702/466, loss: 0.0021207034587860107 2023-01-22 12:42:20.329835: step: 704/466, loss: 0.013642613776028156 2023-01-22 12:42:20.915504: step: 706/466, loss: 0.050527218729257584 2023-01-22 12:42:21.562686: step: 708/466, loss: 0.09773019701242447 2023-01-22 12:42:22.174338: step: 710/466, loss: 0.007971648126840591 2023-01-22 12:42:22.857718: step: 712/466, loss: 0.04106210917234421 2023-01-22 12:42:23.500973: step: 714/466, loss: 0.033593036234378815 2023-01-22 12:42:24.092989: step: 716/466, loss: 0.005900632124394178 2023-01-22 12:42:24.672553: step: 718/466, loss: 0.025367496535182 2023-01-22 12:42:25.260323: step: 720/466, loss: 0.13995912671089172 2023-01-22 12:42:25.841216: step: 722/466, loss: 0.07217904180288315 2023-01-22 12:42:26.526784: step: 724/466, loss: 0.04147706553339958 2023-01-22 12:42:27.082143: step: 726/466, loss: 0.012126031331717968 2023-01-22 12:42:27.713831: step: 728/466, loss: 0.002041777828708291 2023-01-22 12:42:28.303428: step: 730/466, loss: 0.096912682056427 2023-01-22 12:42:28.925767: step: 732/466, loss: 0.13700343668460846 2023-01-22 12:42:29.486044: step: 734/466, loss: 0.015220297500491142 2023-01-22 12:42:30.103086: step: 736/466, loss: 0.003426916664466262 2023-01-22 12:42:30.742764: step: 738/466, loss: 0.02217986062169075 2023-01-22 12:42:31.361740: step: 740/466, loss: 0.013844664208590984 2023-01-22 12:42:31.967324: step: 742/466, loss: 0.015300248749554157 2023-01-22 12:42:32.598862: step: 744/466, loss: 1.4703454971313477 2023-01-22 12:42:33.203671: step: 746/466, loss: 0.04135194048285484 2023-01-22 12:42:33.837573: step: 748/466, loss: 0.027552183717489243 2023-01-22 12:42:34.439197: step: 750/466, loss: 0.28672534227371216 2023-01-22 12:42:35.074806: step: 752/466, loss: 0.011509292759001255 2023-01-22 12:42:35.640111: step: 754/466, loss: 0.02878592722117901 2023-01-22 12:42:36.292551: step: 756/466, loss: 0.01280699297785759 2023-01-22 12:42:36.946717: step: 758/466, loss: 0.009594520553946495 2023-01-22 12:42:37.613334: step: 760/466, loss: 0.03990183770656586 2023-01-22 12:42:38.218254: step: 762/466, loss: 0.04524315893650055 2023-01-22 12:42:38.832600: step: 764/466, loss: 0.08061192184686661 2023-01-22 12:42:39.403560: step: 766/466, loss: 0.02775973081588745 2023-01-22 12:42:39.979861: step: 768/466, loss: 0.02547493390738964 2023-01-22 12:42:40.562389: step: 770/466, loss: 0.06113841384649277 2023-01-22 12:42:41.209454: step: 772/466, loss: 0.05478772148489952 2023-01-22 12:42:41.874003: step: 774/466, loss: 0.021239129826426506 2023-01-22 12:42:42.535978: step: 776/466, loss: 0.002169389743357897 2023-01-22 12:42:43.146373: step: 778/466, loss: 0.07251054793596268 2023-01-22 12:42:43.797894: step: 780/466, loss: 0.02980934828519821 2023-01-22 12:42:44.419751: step: 782/466, loss: 0.007325939834117889 2023-01-22 12:42:44.975702: step: 784/466, loss: 0.02687842957675457 2023-01-22 12:42:45.642188: step: 786/466, loss: 0.022026900202035904 2023-01-22 12:42:46.259587: step: 788/466, loss: 0.03064543381333351 2023-01-22 12:42:46.874351: step: 790/466, loss: 0.051075346767902374 2023-01-22 12:42:47.499522: step: 792/466, loss: 0.014683965593576431 2023-01-22 12:42:48.049760: step: 794/466, loss: 0.0023933525662869215 2023-01-22 12:42:48.678282: step: 796/466, loss: 0.0172136090695858 2023-01-22 12:42:49.301736: step: 798/466, loss: 0.021011296659708023 2023-01-22 12:42:49.926451: step: 800/466, loss: 0.06336275488138199 2023-01-22 12:42:50.539613: step: 802/466, loss: 0.16190217435359955 2023-01-22 12:42:51.158281: step: 804/466, loss: 0.03494153544306755 2023-01-22 12:42:51.723498: step: 806/466, loss: 0.04694707319140434 2023-01-22 12:42:52.317192: step: 808/466, loss: 0.018555352464318275 2023-01-22 12:42:52.916058: step: 810/466, loss: 0.026234649121761322 2023-01-22 12:42:53.512371: step: 812/466, loss: 0.029778089374303818 2023-01-22 12:42:54.129948: step: 814/466, loss: 0.01820109598338604 2023-01-22 12:42:54.681410: step: 816/466, loss: 0.0023990797344595194 2023-01-22 12:42:55.309211: step: 818/466, loss: 0.024855811148881912 2023-01-22 12:42:55.891957: step: 820/466, loss: 0.03238849341869354 2023-01-22 12:42:56.493981: step: 822/466, loss: 0.04709932580590248 2023-01-22 12:42:57.072769: step: 824/466, loss: 0.019101126119494438 2023-01-22 12:42:57.682595: step: 826/466, loss: 0.03390325605869293 2023-01-22 12:42:58.300724: step: 828/466, loss: 0.08838388323783875 2023-01-22 12:42:58.887013: step: 830/466, loss: 0.04158073291182518 2023-01-22 12:42:59.492310: step: 832/466, loss: 0.48680511116981506 2023-01-22 12:43:00.143636: step: 834/466, loss: 0.011362655088305473 2023-01-22 12:43:00.745744: step: 836/466, loss: 0.043221790343523026 2023-01-22 12:43:01.380498: step: 838/466, loss: 0.029000479727983475 2023-01-22 12:43:02.172886: step: 840/466, loss: 0.1502661556005478 2023-01-22 12:43:02.797910: step: 842/466, loss: 0.09018251299858093 2023-01-22 12:43:03.457186: step: 844/466, loss: 0.03327755257487297 2023-01-22 12:43:04.085199: step: 846/466, loss: 0.034349311143159866 2023-01-22 12:43:04.730009: step: 848/466, loss: 0.5977291464805603 2023-01-22 12:43:05.345675: step: 850/466, loss: 0.0321338064968586 2023-01-22 12:43:05.922631: step: 852/466, loss: 0.09711363166570663 2023-01-22 12:43:06.515201: step: 854/466, loss: 0.017812050879001617 2023-01-22 12:43:07.053868: step: 856/466, loss: 0.013304860331118107 2023-01-22 12:43:07.636004: step: 858/466, loss: 0.017864806577563286 2023-01-22 12:43:08.312176: step: 860/466, loss: 0.025627808645367622 2023-01-22 12:43:08.860096: step: 862/466, loss: 0.04636122286319733 2023-01-22 12:43:09.454019: step: 864/466, loss: 0.2754199802875519 2023-01-22 12:43:10.108785: step: 866/466, loss: 0.02035515569150448 2023-01-22 12:43:10.689158: step: 868/466, loss: 0.019799085333943367 2023-01-22 12:43:11.311805: step: 870/466, loss: 0.014882412739098072 2023-01-22 12:43:11.967271: step: 872/466, loss: 0.13393734395503998 2023-01-22 12:43:12.595052: step: 874/466, loss: 0.057462695986032486 2023-01-22 12:43:13.218574: step: 876/466, loss: 0.018368752673268318 2023-01-22 12:43:13.771119: step: 878/466, loss: 0.04251131787896156 2023-01-22 12:43:14.368741: step: 880/466, loss: 0.03908228501677513 2023-01-22 12:43:14.984517: step: 882/466, loss: 0.0012620191555470228 2023-01-22 12:43:15.659236: step: 884/466, loss: 0.07473523169755936 2023-01-22 12:43:16.302884: step: 886/466, loss: 0.04367680475115776 2023-01-22 12:43:16.907816: step: 888/466, loss: 0.005734715610742569 2023-01-22 12:43:17.495519: step: 890/466, loss: 0.018414320424199104 2023-01-22 12:43:18.122788: step: 892/466, loss: 0.028936324641108513 2023-01-22 12:43:18.708510: step: 894/466, loss: 0.022568896412849426 2023-01-22 12:43:19.264709: step: 896/466, loss: 0.013084076344966888 2023-01-22 12:43:19.841520: step: 898/466, loss: 0.0422712080180645 2023-01-22 12:43:20.448660: step: 900/466, loss: 0.034558240324258804 2023-01-22 12:43:21.025789: step: 902/466, loss: 0.03358054161071777 2023-01-22 12:43:21.580828: step: 904/466, loss: 0.06838488578796387 2023-01-22 12:43:22.181010: step: 906/466, loss: 0.03336134925484657 2023-01-22 12:43:22.783867: step: 908/466, loss: 0.024106288328766823 2023-01-22 12:43:23.397814: step: 910/466, loss: 0.1128292828798294 2023-01-22 12:43:24.031363: step: 912/466, loss: 0.26297375559806824 2023-01-22 12:43:24.607375: step: 914/466, loss: 0.0409359335899353 2023-01-22 12:43:25.195778: step: 916/466, loss: 0.6932467222213745 2023-01-22 12:43:25.788552: step: 918/466, loss: 0.15438520908355713 2023-01-22 12:43:26.448313: step: 920/466, loss: 0.011742820963263512 2023-01-22 12:43:27.044650: step: 922/466, loss: 0.021806012839078903 2023-01-22 12:43:27.707531: step: 924/466, loss: 0.19510570168495178 2023-01-22 12:43:28.395686: step: 926/466, loss: 0.01388524565845728 2023-01-22 12:43:29.002929: step: 928/466, loss: 0.008038608357310295 2023-01-22 12:43:29.591228: step: 930/466, loss: 0.019992290064692497 2023-01-22 12:43:30.225813: step: 932/466, loss: 0.21347476541996002 ================================================== Loss: 0.068 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31683838933204417, 'r': 0.3553159166892564, 'f1': 0.3349758284351308}, 'combined': 0.24682429463641214, 'epoch': 25} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.36118400462970596, 'r': 0.3337127925230386, 'f1': 0.34690539168836987}, 'combined': 0.2300719696171572, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.297234623015873, 'r': 0.28372395833333336, 'f1': 0.2903221899224806}, 'combined': 0.19354812661498705, 'epoch': 25} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.36873138012842605, 'r': 0.31332614684488014, 'f1': 0.33877841087429617}, 'combined': 0.2210974892021722, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29902726030799803, 'r': 0.34612263527111725, 'f1': 0.32085598731377096}, 'combined': 0.23642020117856807, 'epoch': 25} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.34863148296768576, 'r': 0.32025801162659273, 'f1': 0.3338429633833164}, 'combined': 0.22140880473090413, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23809523809523808, 'r': 0.38095238095238093, 'f1': 0.293040293040293}, 'combined': 0.19536019536019533, 'epoch': 25} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.44047619047619047, 'r': 0.40217391304347827, 'f1': 0.4204545454545454}, 'combined': 0.2803030303030303, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.38235294117647056, 'r': 0.22413793103448276, 'f1': 0.28260869565217395}, 'combined': 0.18840579710144928, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2874809714095428, 'r': 0.3474864872635271, 'f1': 0.31464841716130376}, 'combined': 0.2318462021188554, 'epoch': 23} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3397515828631042, 'r': 0.3070434946603249, 'f1': 0.3225705233196853}, 'combined': 0.21393278230528348, 'epoch': 23} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4117647058823529, 'r': 0.2413793103448276, 'f1': 0.3043478260869565}, 'combined': 0.20289855072463764, 'epoch': 23} ****************************** Epoch: 26 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:45:55.942259: step: 2/466, loss: 0.045414477586746216 2023-01-22 12:45:56.531181: step: 4/466, loss: 0.03918834775686264 2023-01-22 12:45:57.163837: step: 6/466, loss: 0.33598366379737854 2023-01-22 12:45:57.729474: step: 8/466, loss: 0.3022470772266388 2023-01-22 12:45:58.312393: step: 10/466, loss: 0.012483332306146622 2023-01-22 12:45:58.878864: step: 12/466, loss: 0.024693988263607025 2023-01-22 12:45:59.524307: step: 14/466, loss: 0.024169620126485825 2023-01-22 12:46:00.189241: step: 16/466, loss: 0.09472887217998505 2023-01-22 12:46:00.774953: step: 18/466, loss: 0.008662912994623184 2023-01-22 12:46:01.348238: step: 20/466, loss: 0.008214856497943401 2023-01-22 12:46:01.938529: step: 22/466, loss: 0.022339563816785812 2023-01-22 12:46:02.522579: step: 24/466, loss: 0.03240164369344711 2023-01-22 12:46:03.119362: step: 26/466, loss: 0.04471060633659363 2023-01-22 12:46:03.769079: step: 28/466, loss: 0.009255090728402138 2023-01-22 12:46:04.433999: step: 30/466, loss: 0.020481910556554794 2023-01-22 12:46:05.015946: step: 32/466, loss: 0.06160318851470947 2023-01-22 12:46:05.624439: step: 34/466, loss: 0.03593392297625542 2023-01-22 12:46:06.261706: step: 36/466, loss: 0.021053927019238472 2023-01-22 12:46:06.825944: step: 38/466, loss: 0.014367097057402134 2023-01-22 12:46:07.398148: step: 40/466, loss: 0.004586027003824711 2023-01-22 12:46:07.967371: step: 42/466, loss: 0.009572326205670834 2023-01-22 12:46:08.582805: step: 44/466, loss: 0.013665699400007725 2023-01-22 12:46:09.105593: step: 46/466, loss: 0.0024054343812167645 2023-01-22 12:46:09.799338: step: 48/466, loss: 0.06742464751005173 2023-01-22 12:46:10.402945: step: 50/466, loss: 2.3411319255828857 2023-01-22 12:46:11.040113: step: 52/466, loss: 0.15515947341918945 2023-01-22 12:46:11.670010: step: 54/466, loss: 0.018642693758010864 2023-01-22 12:46:12.325999: step: 56/466, loss: 0.007206358015537262 2023-01-22 12:46:12.893704: step: 58/466, loss: 0.025296475738286972 2023-01-22 12:46:13.512536: step: 60/466, loss: 0.02713521011173725 2023-01-22 12:46:14.086353: step: 62/466, loss: 0.002757714129984379 2023-01-22 12:46:14.707646: step: 64/466, loss: 0.0047846343368291855 2023-01-22 12:46:15.478757: step: 66/466, loss: 0.04622029885649681 2023-01-22 12:46:16.069834: step: 68/466, loss: 0.028843289241194725 2023-01-22 12:46:16.717448: step: 70/466, loss: 0.07008807361125946 2023-01-22 12:46:17.273779: step: 72/466, loss: 0.013329028151929379 2023-01-22 12:46:17.837265: step: 74/466, loss: 0.06537533551454544 2023-01-22 12:46:18.464594: step: 76/466, loss: 0.11617883294820786 2023-01-22 12:46:19.104034: step: 78/466, loss: 0.0019994976464658976 2023-01-22 12:46:19.771353: step: 80/466, loss: 0.010550200939178467 2023-01-22 12:46:20.407378: step: 82/466, loss: 0.025649353861808777 2023-01-22 12:46:21.020596: step: 84/466, loss: 0.003584762569516897 2023-01-22 12:46:21.638401: step: 86/466, loss: 0.007658570073544979 2023-01-22 12:46:22.181728: step: 88/466, loss: 0.014431743882596493 2023-01-22 12:46:22.804226: step: 90/466, loss: 0.035823170095682144 2023-01-22 12:46:23.428239: step: 92/466, loss: 0.026814866811037064 2023-01-22 12:46:24.016082: step: 94/466, loss: 0.047312382608652115 2023-01-22 12:46:24.549750: step: 96/466, loss: 0.028254160657525063 2023-01-22 12:46:25.160938: step: 98/466, loss: 0.042886678129434586 2023-01-22 12:46:25.792037: step: 100/466, loss: 0.03463972359895706 2023-01-22 12:46:26.391274: step: 102/466, loss: 0.016591209918260574 2023-01-22 12:46:26.970346: step: 104/466, loss: 0.010787018574774265 2023-01-22 12:46:27.561368: step: 106/466, loss: 0.024628739804029465 2023-01-22 12:46:28.133489: step: 108/466, loss: 0.0013052689610049129 2023-01-22 12:46:28.781391: step: 110/466, loss: 0.008944116532802582 2023-01-22 12:46:29.370574: step: 112/466, loss: 0.01800631731748581 2023-01-22 12:46:30.001002: step: 114/466, loss: 0.01092657633125782 2023-01-22 12:46:30.596623: step: 116/466, loss: 0.056671883910894394 2023-01-22 12:46:31.184830: step: 118/466, loss: 0.05620972812175751 2023-01-22 12:46:31.776621: step: 120/466, loss: 0.09269988536834717 2023-01-22 12:46:32.410754: step: 122/466, loss: 0.03398977220058441 2023-01-22 12:46:33.012945: step: 124/466, loss: 0.005538527388125658 2023-01-22 12:46:33.647907: step: 126/466, loss: 0.019748879596590996 2023-01-22 12:46:34.207608: step: 128/466, loss: 0.012078657746315002 2023-01-22 12:46:34.811133: step: 130/466, loss: 0.022150034084916115 2023-01-22 12:46:35.408732: step: 132/466, loss: 0.02786128781735897 2023-01-22 12:46:36.008534: step: 134/466, loss: 0.040518466383218765 2023-01-22 12:46:36.662949: step: 136/466, loss: 0.011007298715412617 2023-01-22 12:46:37.312047: step: 138/466, loss: 0.01236250065267086 2023-01-22 12:46:37.908207: step: 140/466, loss: 0.00545964390039444 2023-01-22 12:46:38.555216: step: 142/466, loss: 0.013405879959464073 2023-01-22 12:46:39.125947: step: 144/466, loss: 0.019116604700684547 2023-01-22 12:46:39.725394: step: 146/466, loss: 0.024427859112620354 2023-01-22 12:46:40.339072: step: 148/466, loss: 0.03811703994870186 2023-01-22 12:46:40.884914: step: 150/466, loss: 0.00464991107583046 2023-01-22 12:46:41.456621: step: 152/466, loss: 4.555197715759277 2023-01-22 12:46:42.049087: step: 154/466, loss: 0.017821406945586205 2023-01-22 12:46:42.667240: step: 156/466, loss: 0.1571727991104126 2023-01-22 12:46:43.248970: step: 158/466, loss: 0.04710790142416954 2023-01-22 12:46:43.874329: step: 160/466, loss: 0.033908210694789886 2023-01-22 12:46:44.462586: step: 162/466, loss: 0.022364942356944084 2023-01-22 12:46:45.108641: step: 164/466, loss: 0.19430869817733765 2023-01-22 12:46:45.712007: step: 166/466, loss: 0.003512404393404722 2023-01-22 12:46:46.277014: step: 168/466, loss: 0.043155040591955185 2023-01-22 12:46:46.832514: step: 170/466, loss: 0.00416438328102231 2023-01-22 12:46:47.472245: step: 172/466, loss: 0.03939838707447052 2023-01-22 12:46:48.022139: step: 174/466, loss: 0.03198012337088585 2023-01-22 12:46:48.597756: step: 176/466, loss: 0.003328982973471284 2023-01-22 12:46:49.166575: step: 178/466, loss: 0.03389064222574234 2023-01-22 12:46:49.735956: step: 180/466, loss: 0.009292037226259708 2023-01-22 12:46:50.342325: step: 182/466, loss: 0.0263590719550848 2023-01-22 12:46:51.000749: step: 184/466, loss: 0.01964017190039158 2023-01-22 12:46:51.601446: step: 186/466, loss: 0.05610666796565056 2023-01-22 12:46:52.269235: step: 188/466, loss: 0.06703130900859833 2023-01-22 12:46:52.891993: step: 190/466, loss: 0.007344461977481842 2023-01-22 12:46:53.461439: step: 192/466, loss: 0.004412634763866663 2023-01-22 12:46:54.098413: step: 194/466, loss: 0.011533022858202457 2023-01-22 12:46:54.695413: step: 196/466, loss: 0.08746032416820526 2023-01-22 12:46:55.317224: step: 198/466, loss: 0.00856334250420332 2023-01-22 12:46:55.933613: step: 200/466, loss: 0.038558684289455414 2023-01-22 12:46:56.539229: step: 202/466, loss: 0.028453713282942772 2023-01-22 12:46:57.179124: step: 204/466, loss: 0.38487887382507324 2023-01-22 12:46:57.842856: step: 206/466, loss: 0.008262191899120808 2023-01-22 12:46:58.440170: step: 208/466, loss: 0.0030298414640128613 2023-01-22 12:46:59.064126: step: 210/466, loss: 0.05420486629009247 2023-01-22 12:46:59.598337: step: 212/466, loss: 0.016683876514434814 2023-01-22 12:47:00.228955: step: 214/466, loss: 0.05660427734255791 2023-01-22 12:47:00.812061: step: 216/466, loss: 0.08958832919597626 2023-01-22 12:47:01.416578: step: 218/466, loss: 0.012682387605309486 2023-01-22 12:47:02.026306: step: 220/466, loss: 0.016953550279140472 2023-01-22 12:47:02.670826: step: 222/466, loss: 0.017252590507268906 2023-01-22 12:47:03.236381: step: 224/466, loss: 0.011150977574288845 2023-01-22 12:47:03.815845: step: 226/466, loss: 0.006136606447398663 2023-01-22 12:47:04.355996: step: 228/466, loss: 0.008039235137403011 2023-01-22 12:47:04.957476: step: 230/466, loss: 0.0220362339168787 2023-01-22 12:47:05.647944: step: 232/466, loss: 0.014643709175288677 2023-01-22 12:47:06.246962: step: 234/466, loss: 0.22404278814792633 2023-01-22 12:47:06.888987: step: 236/466, loss: 0.04199331998825073 2023-01-22 12:47:07.441957: step: 238/466, loss: 0.0051811812445521355 2023-01-22 12:47:08.105884: step: 240/466, loss: 0.02466060034930706 2023-01-22 12:47:08.734230: step: 242/466, loss: 0.11676938086748123 2023-01-22 12:47:09.263318: step: 244/466, loss: 1.998118204937782e-05 2023-01-22 12:47:09.918386: step: 246/466, loss: 0.4189917743206024 2023-01-22 12:47:10.531420: step: 248/466, loss: 0.025706855580210686 2023-01-22 12:47:11.106777: step: 250/466, loss: 0.3023975193500519 2023-01-22 12:47:11.792658: step: 252/466, loss: 0.04882144555449486 2023-01-22 12:47:12.400823: step: 254/466, loss: 0.008105840533971786 2023-01-22 12:47:13.051868: step: 256/466, loss: 5.065145523985848e-05 2023-01-22 12:47:13.718692: step: 258/466, loss: 0.057812340557575226 2023-01-22 12:47:14.315420: step: 260/466, loss: 0.14420419931411743 2023-01-22 12:47:14.847495: step: 262/466, loss: 0.001418450498022139 2023-01-22 12:47:15.454571: step: 264/466, loss: 0.03700948879122734 2023-01-22 12:47:16.092718: step: 266/466, loss: 0.009782583452761173 2023-01-22 12:47:16.670046: step: 268/466, loss: 0.06596330553293228 2023-01-22 12:47:17.266733: step: 270/466, loss: 0.0538744255900383 2023-01-22 12:47:17.914668: step: 272/466, loss: 0.04572300612926483 2023-01-22 12:47:18.518837: step: 274/466, loss: 0.027452930808067322 2023-01-22 12:47:19.134440: step: 276/466, loss: 0.0159525778144598 2023-01-22 12:47:19.722070: step: 278/466, loss: 0.001892161089926958 2023-01-22 12:47:20.369246: step: 280/466, loss: 0.029880749061703682 2023-01-22 12:47:20.969700: step: 282/466, loss: 0.004739616997539997 2023-01-22 12:47:21.632672: step: 284/466, loss: 0.033471375703811646 2023-01-22 12:47:22.240289: step: 286/466, loss: 0.05095507577061653 2023-01-22 12:47:22.877741: step: 288/466, loss: 0.041758857667446136 2023-01-22 12:47:23.491126: step: 290/466, loss: 0.015397449024021626 2023-01-22 12:47:24.112781: step: 292/466, loss: 0.013284178450703621 2023-01-22 12:47:24.796989: step: 294/466, loss: 0.21159973740577698 2023-01-22 12:47:25.424504: step: 296/466, loss: 0.018651289865374565 2023-01-22 12:47:26.074235: step: 298/466, loss: 0.040743838995695114 2023-01-22 12:47:26.654743: step: 300/466, loss: 0.029271796345710754 2023-01-22 12:47:27.328416: step: 302/466, loss: 0.04050165042281151 2023-01-22 12:47:27.874970: step: 304/466, loss: 0.08837659657001495 2023-01-22 12:47:28.489477: step: 306/466, loss: 0.12287881225347519 2023-01-22 12:47:29.065906: step: 308/466, loss: 1.0752689838409424 2023-01-22 12:47:29.695343: step: 310/466, loss: 0.06996869295835495 2023-01-22 12:47:30.332511: step: 312/466, loss: 0.016347158700227737 2023-01-22 12:47:30.994185: step: 314/466, loss: 0.0714387372136116 2023-01-22 12:47:31.580274: step: 316/466, loss: 0.04747457429766655 2023-01-22 12:47:32.202053: step: 318/466, loss: 0.025179000571370125 2023-01-22 12:47:32.820148: step: 320/466, loss: 0.11100359261035919 2023-01-22 12:47:33.462954: step: 322/466, loss: 0.039913929998874664 2023-01-22 12:47:34.097588: step: 324/466, loss: 0.07095092535018921 2023-01-22 12:47:34.694997: step: 326/466, loss: 0.009094692766666412 2023-01-22 12:47:35.371103: step: 328/466, loss: 0.015984689816832542 2023-01-22 12:47:35.980803: step: 330/466, loss: 0.029985295608639717 2023-01-22 12:47:36.553579: step: 332/466, loss: 0.018034322187304497 2023-01-22 12:47:37.115751: step: 334/466, loss: 0.052487436681985855 2023-01-22 12:47:37.702671: step: 336/466, loss: 0.011984365992248058 2023-01-22 12:47:38.313687: step: 338/466, loss: 0.028638869524002075 2023-01-22 12:47:38.890036: step: 340/466, loss: 0.4480482339859009 2023-01-22 12:47:39.442061: step: 342/466, loss: 0.005989938974380493 2023-01-22 12:47:39.964677: step: 344/466, loss: 0.03386138007044792 2023-01-22 12:47:40.619154: step: 346/466, loss: 0.007416254375129938 2023-01-22 12:47:41.290643: step: 348/466, loss: 0.10737764090299606 2023-01-22 12:47:41.874917: step: 350/466, loss: 0.02148372493684292 2023-01-22 12:47:42.508959: step: 352/466, loss: 0.00944596342742443 2023-01-22 12:47:43.146071: step: 354/466, loss: 0.03134218230843544 2023-01-22 12:47:43.716304: step: 356/466, loss: 0.023554576560854912 2023-01-22 12:47:44.297050: step: 358/466, loss: 0.006292336154729128 2023-01-22 12:47:44.841775: step: 360/466, loss: 0.011793393641710281 2023-01-22 12:47:45.446271: step: 362/466, loss: 0.048451006412506104 2023-01-22 12:47:46.037324: step: 364/466, loss: 0.13643038272857666 2023-01-22 12:47:46.607681: step: 366/466, loss: 0.0035364192444831133 2023-01-22 12:47:47.221939: step: 368/466, loss: 0.02475653775036335 2023-01-22 12:47:47.827696: step: 370/466, loss: 0.03772176057100296 2023-01-22 12:47:48.478661: step: 372/466, loss: 0.06777799874544144 2023-01-22 12:47:49.091335: step: 374/466, loss: 0.12316180020570755 2023-01-22 12:47:49.636042: step: 376/466, loss: 0.06544934213161469 2023-01-22 12:47:50.274002: step: 378/466, loss: 0.024594111368060112 2023-01-22 12:47:50.844992: step: 380/466, loss: 0.015402672812342644 2023-01-22 12:47:51.466020: step: 382/466, loss: 0.021741464734077454 2023-01-22 12:47:52.055791: step: 384/466, loss: 0.03798363730311394 2023-01-22 12:47:52.626406: step: 386/466, loss: 0.00046246900456026196 2023-01-22 12:47:53.254075: step: 388/466, loss: 0.020787667483091354 2023-01-22 12:47:53.867910: step: 390/466, loss: 0.020255569368600845 2023-01-22 12:47:54.640436: step: 392/466, loss: 0.3557094633579254 2023-01-22 12:47:55.203734: step: 394/466, loss: 0.010126069188117981 2023-01-22 12:47:55.832664: step: 396/466, loss: 0.005736412014812231 2023-01-22 12:47:56.432570: step: 398/466, loss: 0.032208964228630066 2023-01-22 12:47:57.143062: step: 400/466, loss: 0.3010401725769043 2023-01-22 12:47:57.762371: step: 402/466, loss: 0.28091520071029663 2023-01-22 12:47:58.337853: step: 404/466, loss: 0.0113126365467906 2023-01-22 12:47:58.934042: step: 406/466, loss: 0.031300827860832214 2023-01-22 12:47:59.551186: step: 408/466, loss: 0.8436365127563477 2023-01-22 12:48:00.184659: step: 410/466, loss: 0.022062180563807487 2023-01-22 12:48:00.806272: step: 412/466, loss: 0.01742429845035076 2023-01-22 12:48:01.389430: step: 414/466, loss: 0.09323088824748993 2023-01-22 12:48:02.022220: step: 416/466, loss: 0.02583315037190914 2023-01-22 12:48:02.649356: step: 418/466, loss: 0.02988622523844242 2023-01-22 12:48:03.268729: step: 420/466, loss: 0.0024579628370702267 2023-01-22 12:48:03.900160: step: 422/466, loss: 0.004086425062268972 2023-01-22 12:48:04.594389: step: 424/466, loss: 0.03260628879070282 2023-01-22 12:48:05.202392: step: 426/466, loss: 0.049904391169548035 2023-01-22 12:48:05.796004: step: 428/466, loss: 0.0036581484600901604 2023-01-22 12:48:06.418801: step: 430/466, loss: 0.06395833939313889 2023-01-22 12:48:07.086916: step: 432/466, loss: 0.048287052661180496 2023-01-22 12:48:07.647892: step: 434/466, loss: 0.0005487494054250419 2023-01-22 12:48:08.239917: step: 436/466, loss: 0.3673534691333771 2023-01-22 12:48:08.805830: step: 438/466, loss: 0.005553035531193018 2023-01-22 12:48:09.369861: step: 440/466, loss: 0.03513813391327858 2023-01-22 12:48:09.975978: step: 442/466, loss: 0.0010807143989950418 2023-01-22 12:48:10.585636: step: 444/466, loss: 0.00991811603307724 2023-01-22 12:48:11.255503: step: 446/466, loss: 0.35704296827316284 2023-01-22 12:48:11.872916: step: 448/466, loss: 0.0004759306611958891 2023-01-22 12:48:12.456635: step: 450/466, loss: 0.013852309435606003 2023-01-22 12:48:13.165796: step: 452/466, loss: 0.055704496800899506 2023-01-22 12:48:13.708793: step: 454/466, loss: 0.05234242230653763 2023-01-22 12:48:14.370509: step: 456/466, loss: 0.029207957908511162 2023-01-22 12:48:15.007771: step: 458/466, loss: 0.010664550587534904 2023-01-22 12:48:15.590110: step: 460/466, loss: 0.09975343197584152 2023-01-22 12:48:16.346621: step: 462/466, loss: 0.17967922985553741 2023-01-22 12:48:17.015594: step: 464/466, loss: 0.001165557187050581 2023-01-22 12:48:17.633318: step: 466/466, loss: 0.01934705674648285 2023-01-22 12:48:18.218755: step: 468/466, loss: 0.009555082768201828 2023-01-22 12:48:18.884253: step: 470/466, loss: 0.004604933317750692 2023-01-22 12:48:19.449673: step: 472/466, loss: 0.014080885797739029 2023-01-22 12:48:20.072432: step: 474/466, loss: 0.042255330830812454 2023-01-22 12:48:20.699220: step: 476/466, loss: 0.02824373170733452 2023-01-22 12:48:21.314243: step: 478/466, loss: 0.014276370406150818 2023-01-22 12:48:21.888212: step: 480/466, loss: 0.033427610993385315 2023-01-22 12:48:22.481412: step: 482/466, loss: 0.006971611641347408 2023-01-22 12:48:23.064037: step: 484/466, loss: 0.03468838706612587 2023-01-22 12:48:23.657868: step: 486/466, loss: 0.008514195680618286 2023-01-22 12:48:24.235327: step: 488/466, loss: 0.21864424645900726 2023-01-22 12:48:24.890199: step: 490/466, loss: 0.2849474847316742 2023-01-22 12:48:25.584436: step: 492/466, loss: 0.005178901366889477 2023-01-22 12:48:26.219690: step: 494/466, loss: 0.010462756268680096 2023-01-22 12:48:26.807266: step: 496/466, loss: 0.10011930763721466 2023-01-22 12:48:27.464615: step: 498/466, loss: 0.034411054104566574 2023-01-22 12:48:28.030309: step: 500/466, loss: 0.002200616290792823 2023-01-22 12:48:28.684822: step: 502/466, loss: 0.0178882647305727 2023-01-22 12:48:29.344816: step: 504/466, loss: 0.07477042824029922 2023-01-22 12:48:30.043891: step: 506/466, loss: 0.007386498153209686 2023-01-22 12:48:30.622935: step: 508/466, loss: 0.052657391875982285 2023-01-22 12:48:31.252946: step: 510/466, loss: 3.171741247177124 2023-01-22 12:48:31.896758: step: 512/466, loss: 0.038998816162347794 2023-01-22 12:48:32.545049: step: 514/466, loss: 0.010146564804017544 2023-01-22 12:48:33.106699: step: 516/466, loss: 0.00945010595023632 2023-01-22 12:48:33.853720: step: 518/466, loss: 0.09256458282470703 2023-01-22 12:48:34.509005: step: 520/466, loss: 0.0418822318315506 2023-01-22 12:48:35.033984: step: 522/466, loss: 0.027969488874077797 2023-01-22 12:48:35.669479: step: 524/466, loss: 0.012059665285050869 2023-01-22 12:48:36.272240: step: 526/466, loss: 0.01905934512615204 2023-01-22 12:48:36.857502: step: 528/466, loss: 0.05664241686463356 2023-01-22 12:48:37.490025: step: 530/466, loss: 0.017221873626112938 2023-01-22 12:48:38.080014: step: 532/466, loss: 0.03145075589418411 2023-01-22 12:48:38.699054: step: 534/466, loss: 0.011874757707118988 2023-01-22 12:48:39.352812: step: 536/466, loss: 0.19178661704063416 2023-01-22 12:48:39.926368: step: 538/466, loss: 0.01207797322422266 2023-01-22 12:48:40.584936: step: 540/466, loss: 0.03249429538846016 2023-01-22 12:48:41.180960: step: 542/466, loss: 0.03537470102310181 2023-01-22 12:48:41.786094: step: 544/466, loss: 0.04688917100429535 2023-01-22 12:48:42.409457: step: 546/466, loss: 0.008935499005019665 2023-01-22 12:48:43.003579: step: 548/466, loss: 0.005889051128178835 2023-01-22 12:48:43.599928: step: 550/466, loss: 0.04385385289788246 2023-01-22 12:48:44.220273: step: 552/466, loss: 0.0009785328293219209 2023-01-22 12:48:44.838917: step: 554/466, loss: 0.008239595219492912 2023-01-22 12:48:45.448407: step: 556/466, loss: 0.6651448607444763 2023-01-22 12:48:46.102673: step: 558/466, loss: 0.11339122802019119 2023-01-22 12:48:46.669772: step: 560/466, loss: 0.015921475365757942 2023-01-22 12:48:47.264709: step: 562/466, loss: 0.03143639117479324 2023-01-22 12:48:47.966289: step: 564/466, loss: 0.05940140783786774 2023-01-22 12:48:48.581744: step: 566/466, loss: 0.08028946816921234 2023-01-22 12:48:49.208451: step: 568/466, loss: 0.010268310084939003 2023-01-22 12:48:49.857902: step: 570/466, loss: 0.019668761640787125 2023-01-22 12:48:50.461032: step: 572/466, loss: 0.01937008835375309 2023-01-22 12:48:51.096360: step: 574/466, loss: 0.03339478373527527 2023-01-22 12:48:51.713336: step: 576/466, loss: 0.04097174480557442 2023-01-22 12:48:52.330751: step: 578/466, loss: 0.052022628486156464 2023-01-22 12:48:52.887170: step: 580/466, loss: 0.0010472792200744152 2023-01-22 12:48:53.504781: step: 582/466, loss: 0.08285340666770935 2023-01-22 12:48:54.153344: step: 584/466, loss: 0.05154658481478691 2023-01-22 12:48:54.752249: step: 586/466, loss: 0.013209663331508636 2023-01-22 12:48:55.406666: step: 588/466, loss: 0.06628485023975372 2023-01-22 12:48:56.036629: step: 590/466, loss: 0.007210195530205965 2023-01-22 12:48:56.587122: step: 592/466, loss: 0.002422439167276025 2023-01-22 12:48:57.197242: step: 594/466, loss: 0.008276514708995819 2023-01-22 12:48:57.805454: step: 596/466, loss: 0.028346292674541473 2023-01-22 12:48:58.346651: step: 598/466, loss: 0.01611250266432762 2023-01-22 12:48:58.995371: step: 600/466, loss: 0.007911566644906998 2023-01-22 12:48:59.608575: step: 602/466, loss: 0.0665612444281578 2023-01-22 12:49:00.232209: step: 604/466, loss: 0.029158849269151688 2023-01-22 12:49:00.791974: step: 606/466, loss: 0.05460003763437271 2023-01-22 12:49:01.409791: step: 608/466, loss: 0.003954010549932718 2023-01-22 12:49:01.994201: step: 610/466, loss: 0.03006788343191147 2023-01-22 12:49:02.671120: step: 612/466, loss: 0.026150286197662354 2023-01-22 12:49:03.279221: step: 614/466, loss: 0.028111020103096962 2023-01-22 12:49:03.874048: step: 616/466, loss: 0.036183565855026245 2023-01-22 12:49:04.526489: step: 618/466, loss: 0.02873288281261921 2023-01-22 12:49:05.108479: step: 620/466, loss: 0.006896775681525469 2023-01-22 12:49:05.771522: step: 622/466, loss: 0.03590833768248558 2023-01-22 12:49:06.436452: step: 624/466, loss: 0.0337134450674057 2023-01-22 12:49:07.081323: step: 626/466, loss: 0.05777391046285629 2023-01-22 12:49:07.666375: step: 628/466, loss: 0.0067988913506269455 2023-01-22 12:49:08.258760: step: 630/466, loss: 0.0537460558116436 2023-01-22 12:49:08.887058: step: 632/466, loss: 0.012341397814452648 2023-01-22 12:49:09.428401: step: 634/466, loss: 0.08001233637332916 2023-01-22 12:49:09.996168: step: 636/466, loss: 0.1843031942844391 2023-01-22 12:49:10.562190: step: 638/466, loss: 0.038092005997896194 2023-01-22 12:49:11.179281: step: 640/466, loss: 0.07952667027711868 2023-01-22 12:49:11.774211: step: 642/466, loss: 0.0035366981755942106 2023-01-22 12:49:12.428635: step: 644/466, loss: 0.5054967403411865 2023-01-22 12:49:13.043405: step: 646/466, loss: 0.010712129063904285 2023-01-22 12:49:13.788003: step: 648/466, loss: 0.3804076611995697 2023-01-22 12:49:14.363792: step: 650/466, loss: 0.09094398468732834 2023-01-22 12:49:15.055467: step: 652/466, loss: 0.13170231878757477 2023-01-22 12:49:15.644473: step: 654/466, loss: 0.012623382732272148 2023-01-22 12:49:16.230371: step: 656/466, loss: 0.012589056976139545 2023-01-22 12:49:16.832478: step: 658/466, loss: 0.011988475918769836 2023-01-22 12:49:17.422262: step: 660/466, loss: 0.0673658475279808 2023-01-22 12:49:18.032905: step: 662/466, loss: 0.019919512793421745 2023-01-22 12:49:18.603597: step: 664/466, loss: 0.06564678996801376 2023-01-22 12:49:19.193465: step: 666/466, loss: 0.001861299155279994 2023-01-22 12:49:19.926021: step: 668/466, loss: 0.007484862580895424 2023-01-22 12:49:20.521098: step: 670/466, loss: 0.029324056580662727 2023-01-22 12:49:21.168699: step: 672/466, loss: 0.00355354230850935 2023-01-22 12:49:21.826794: step: 674/466, loss: 0.009839470498263836 2023-01-22 12:49:22.407711: step: 676/466, loss: 0.10285638272762299 2023-01-22 12:49:22.989753: step: 678/466, loss: 0.02608523704111576 2023-01-22 12:49:23.585891: step: 680/466, loss: 0.029285017400979996 2023-01-22 12:49:24.228196: step: 682/466, loss: 0.042839415371418 2023-01-22 12:49:24.861260: step: 684/466, loss: 0.052892055362463 2023-01-22 12:49:25.485960: step: 686/466, loss: 0.024776723235845566 2023-01-22 12:49:26.151567: step: 688/466, loss: 0.07299697399139404 2023-01-22 12:49:26.725883: step: 690/466, loss: 0.03305630758404732 2023-01-22 12:49:27.385679: step: 692/466, loss: 0.022931385785341263 2023-01-22 12:49:28.001004: step: 694/466, loss: 0.23795311152935028 2023-01-22 12:49:28.615959: step: 696/466, loss: 0.0009287637658417225 2023-01-22 12:49:29.243778: step: 698/466, loss: 0.06150897219777107 2023-01-22 12:49:29.791582: step: 700/466, loss: 0.025871066376566887 2023-01-22 12:49:30.393214: step: 702/466, loss: 0.038701131939888 2023-01-22 12:49:30.968491: step: 704/466, loss: 0.004738318733870983 2023-01-22 12:49:31.558148: step: 706/466, loss: 0.017376113682985306 2023-01-22 12:49:32.184710: step: 708/466, loss: 0.010453774593770504 2023-01-22 12:49:32.830366: step: 710/466, loss: 0.003291316330432892 2023-01-22 12:49:33.375881: step: 712/466, loss: 0.2283499538898468 2023-01-22 12:49:33.947374: step: 714/466, loss: 0.030491085723042488 2023-01-22 12:49:34.546777: step: 716/466, loss: 0.044772472232580185 2023-01-22 12:49:35.144316: step: 718/466, loss: 0.024380743503570557 2023-01-22 12:49:35.783513: step: 720/466, loss: 0.1536007523536682 2023-01-22 12:49:36.404479: step: 722/466, loss: 0.23740418255329132 2023-01-22 12:49:37.003863: step: 724/466, loss: 0.01061465684324503 2023-01-22 12:49:37.605887: step: 726/466, loss: 0.024473462253808975 2023-01-22 12:49:38.209195: step: 728/466, loss: 0.00921687949448824 2023-01-22 12:49:38.810008: step: 730/466, loss: 0.026211457327008247 2023-01-22 12:49:39.471397: step: 732/466, loss: 0.0797419473528862 2023-01-22 12:49:39.971398: step: 734/466, loss: 0.0002902036940213293 2023-01-22 12:49:40.618297: step: 736/466, loss: 0.08485154062509537 2023-01-22 12:49:41.208503: step: 738/466, loss: 0.010234514251351357 2023-01-22 12:49:41.831689: step: 740/466, loss: 0.021577196195721626 2023-01-22 12:49:42.438348: step: 742/466, loss: 0.06836622208356857 2023-01-22 12:49:43.042933: step: 744/466, loss: 0.045496616512537 2023-01-22 12:49:43.762137: step: 746/466, loss: 0.027603916823863983 2023-01-22 12:49:44.332194: step: 748/466, loss: 0.006657042074948549 2023-01-22 12:49:44.928426: step: 750/466, loss: 0.004206167533993721 2023-01-22 12:49:45.503498: step: 752/466, loss: 0.00930891465395689 2023-01-22 12:49:46.205126: step: 754/466, loss: 0.015324637293815613 2023-01-22 12:49:46.797789: step: 756/466, loss: 0.005557236261665821 2023-01-22 12:49:47.378866: step: 758/466, loss: 0.0005388028803281486 2023-01-22 12:49:48.033110: step: 760/466, loss: 0.007462238892912865 2023-01-22 12:49:48.727528: step: 762/466, loss: 0.05765986815094948 2023-01-22 12:49:49.335498: step: 764/466, loss: 0.00529530318453908 2023-01-22 12:49:49.994335: step: 766/466, loss: 0.22407777607440948 2023-01-22 12:49:50.594273: step: 768/466, loss: 0.01470536831766367 2023-01-22 12:49:51.225321: step: 770/466, loss: 0.004962172359228134 2023-01-22 12:49:51.807621: step: 772/466, loss: 0.02007342502474785 2023-01-22 12:49:52.372467: step: 774/466, loss: 0.01086547039449215 2023-01-22 12:49:52.938639: step: 776/466, loss: 0.01764582097530365 2023-01-22 12:49:53.548385: step: 778/466, loss: 0.03523987904191017 2023-01-22 12:49:54.165178: step: 780/466, loss: 0.0007762151653878391 2023-01-22 12:49:54.781401: step: 782/466, loss: 0.01184089295566082 2023-01-22 12:49:55.332493: step: 784/466, loss: 0.005018068943172693 2023-01-22 12:49:55.915619: step: 786/466, loss: 0.04472871869802475 2023-01-22 12:49:56.467365: step: 788/466, loss: 0.015511106699705124 2023-01-22 12:49:57.068434: step: 790/466, loss: 0.2705400884151459 2023-01-22 12:49:57.653350: step: 792/466, loss: 0.026682354509830475 2023-01-22 12:49:58.239911: step: 794/466, loss: 0.08655679225921631 2023-01-22 12:49:58.881889: step: 796/466, loss: 0.026306811720132828 2023-01-22 12:49:59.443587: step: 798/466, loss: 0.0008450224995613098 2023-01-22 12:50:00.088484: step: 800/466, loss: 0.03993954509496689 2023-01-22 12:50:00.732807: step: 802/466, loss: 0.02803313173353672 2023-01-22 12:50:01.385529: step: 804/466, loss: 0.04920896887779236 2023-01-22 12:50:02.047501: step: 806/466, loss: 0.00908689759671688 2023-01-22 12:50:02.610335: step: 808/466, loss: 0.007737348787486553 2023-01-22 12:50:03.189540: step: 810/466, loss: 0.01188454870134592 2023-01-22 12:50:03.782244: step: 812/466, loss: 0.10305050015449524 2023-01-22 12:50:04.414560: step: 814/466, loss: 0.05134795978665352 2023-01-22 12:50:05.043391: step: 816/466, loss: 0.0744282677769661 2023-01-22 12:50:05.616029: step: 818/466, loss: 0.008248677477240562 2023-01-22 12:50:06.235501: step: 820/466, loss: 0.03369758278131485 2023-01-22 12:50:06.823901: step: 822/466, loss: 0.03700336441397667 2023-01-22 12:50:07.414069: step: 824/466, loss: 0.026925429701805115 2023-01-22 12:50:08.047232: step: 826/466, loss: 0.05135258287191391 2023-01-22 12:50:08.644581: step: 828/466, loss: 0.01281293947249651 2023-01-22 12:50:09.279880: step: 830/466, loss: 0.01877225935459137 2023-01-22 12:50:09.803666: step: 832/466, loss: 0.050698645412921906 2023-01-22 12:50:10.418566: step: 834/466, loss: 0.006537098903208971 2023-01-22 12:50:11.010219: step: 836/466, loss: 0.004928469192236662 2023-01-22 12:50:11.652040: step: 838/466, loss: 0.6613799333572388 2023-01-22 12:50:12.258410: step: 840/466, loss: 0.016151078045368195 2023-01-22 12:50:13.035169: step: 842/466, loss: 0.03449910506606102 2023-01-22 12:50:13.639414: step: 844/466, loss: 0.042612750083208084 2023-01-22 12:50:14.229840: step: 846/466, loss: 0.012053466401994228 2023-01-22 12:50:14.834873: step: 848/466, loss: 0.016944939270615578 2023-01-22 12:50:15.401445: step: 850/466, loss: 0.009946225211024284 2023-01-22 12:50:15.991776: step: 852/466, loss: 0.03996839001774788 2023-01-22 12:50:16.564251: step: 854/466, loss: 0.25412529706954956 2023-01-22 12:50:17.146100: step: 856/466, loss: 0.003932584077119827 2023-01-22 12:50:17.717297: step: 858/466, loss: 0.019082466140389442 2023-01-22 12:50:18.338628: step: 860/466, loss: 0.028855543583631516 2023-01-22 12:50:18.907574: step: 862/466, loss: 0.012767578475177288 2023-01-22 12:50:19.531774: step: 864/466, loss: 0.09636927396059036 2023-01-22 12:50:20.119130: step: 866/466, loss: 0.02893782965838909 2023-01-22 12:50:20.707998: step: 868/466, loss: 0.017977142706513405 2023-01-22 12:50:21.316574: step: 870/466, loss: 0.08191262930631638 2023-01-22 12:50:21.924080: step: 872/466, loss: 0.05199733003973961 2023-01-22 12:50:22.539937: step: 874/466, loss: 0.007144299801439047 2023-01-22 12:50:23.124307: step: 876/466, loss: 0.027346298098564148 2023-01-22 12:50:23.786640: step: 878/466, loss: 0.25393012166023254 2023-01-22 12:50:24.403790: step: 880/466, loss: 0.009548576548695564 2023-01-22 12:50:25.038096: step: 882/466, loss: 0.03719523549079895 2023-01-22 12:50:25.644365: step: 884/466, loss: 0.18985582888126373 2023-01-22 12:50:26.281814: step: 886/466, loss: 0.051307398825883865 2023-01-22 12:50:26.824967: step: 888/466, loss: 0.04601292312145233 2023-01-22 12:50:27.426377: step: 890/466, loss: 0.06417310982942581 2023-01-22 12:50:28.020626: step: 892/466, loss: 0.04209336265921593 2023-01-22 12:50:28.557906: step: 894/466, loss: 0.029217995703220367 2023-01-22 12:50:29.163585: step: 896/466, loss: 0.08725843578577042 2023-01-22 12:50:29.836039: step: 898/466, loss: 0.013245921581983566 2023-01-22 12:50:30.427367: step: 900/466, loss: 0.006073274649679661 2023-01-22 12:50:31.065732: step: 902/466, loss: 0.016061050817370415 2023-01-22 12:50:31.687129: step: 904/466, loss: 0.034913044422864914 2023-01-22 12:50:32.372173: step: 906/466, loss: 0.0108271399512887 2023-01-22 12:50:32.996190: step: 908/466, loss: 0.005191510077565908 2023-01-22 12:50:33.544310: step: 910/466, loss: 0.020119434222579002 2023-01-22 12:50:34.199545: step: 912/466, loss: 0.058973293751478195 2023-01-22 12:50:34.792426: step: 914/466, loss: 0.051008958369493484 2023-01-22 12:50:35.346153: step: 916/466, loss: 0.028807258233428 2023-01-22 12:50:36.007829: step: 918/466, loss: 0.09778264164924622 2023-01-22 12:50:36.638217: step: 920/466, loss: 0.04884487763047218 2023-01-22 12:50:37.250459: step: 922/466, loss: 0.029354941099882126 2023-01-22 12:50:37.777770: step: 924/466, loss: 0.007687765639275312 2023-01-22 12:50:38.463997: step: 926/466, loss: 0.05332213640213013 2023-01-22 12:50:39.056175: step: 928/466, loss: 0.05926041677594185 2023-01-22 12:50:39.607672: step: 930/466, loss: 0.026867201551795006 2023-01-22 12:50:40.255774: step: 932/466, loss: 0.0791272446513176 ================================================== Loss: 0.076 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30933745714576255, 'r': 0.34983894584226655, 'f1': 0.32834394382702486}, 'combined': 0.24193764281991303, 'epoch': 26} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3320947980741736, 'r': 0.34072063698519106, 'f1': 0.3363524236905091}, 'combined': 0.22307311001235836, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2956985294117647, 'r': 0.28561789772727275, 'f1': 0.290570809248555}, 'combined': 0.19371387283237, 'epoch': 26} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3540539468829152, 'r': 0.32403899290841204, 'f1': 0.33838218229640854}, 'combined': 0.22083889791976133, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29515095352204085, 'r': 0.3399556523489161, 'f1': 0.3159728902784459}, 'combined': 0.23282212967885485, 'epoch': 26} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32557465157002546, 'r': 0.326702182830874, 'f1': 0.3261374426704058}, 'combined': 0.2162984075741551, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2299679487179487, 'r': 0.3416666666666666, 'f1': 0.27490421455938696}, 'combined': 0.18326947637292462, 'epoch': 26} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39285714285714285, 'r': 0.358695652173913, 'f1': 0.375}, 'combined': 0.25, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39473684210526316, 'r': 0.25862068965517243, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 26} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29515095352204085, 'r': 0.3399556523489161, 'f1': 0.3159728902784459}, 'combined': 0.23282212967885485, 'epoch': 26} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32557465157002546, 'r': 0.326702182830874, 'f1': 0.3261374426704058}, 'combined': 0.2162984075741551, 'epoch': 26} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39473684210526316, 'r': 0.25862068965517243, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 26} ****************************** Epoch: 27 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:53:15.987574: step: 2/466, loss: 0.027833836153149605 2023-01-22 12:53:16.584267: step: 4/466, loss: 0.024089179933071136 2023-01-22 12:53:17.155710: step: 6/466, loss: 0.05892466381192207 2023-01-22 12:53:17.788313: step: 8/466, loss: 0.0033265934325754642 2023-01-22 12:53:18.413879: step: 10/466, loss: 0.008479108102619648 2023-01-22 12:53:19.051586: step: 12/466, loss: 0.017342019826173782 2023-01-22 12:53:19.590408: step: 14/466, loss: 0.014171866700053215 2023-01-22 12:53:20.199035: step: 16/466, loss: 0.0018908302299678326 2023-01-22 12:53:20.811781: step: 18/466, loss: 0.022276606410741806 2023-01-22 12:53:21.354144: step: 20/466, loss: 0.0065987976267933846 2023-01-22 12:53:21.969625: step: 22/466, loss: 0.6846793293952942 2023-01-22 12:53:22.627324: step: 24/466, loss: 0.03842934966087341 2023-01-22 12:53:23.207413: step: 26/466, loss: 0.053983282297849655 2023-01-22 12:53:23.817574: step: 28/466, loss: 0.014167653396725655 2023-01-22 12:53:24.431048: step: 30/466, loss: 0.008245084434747696 2023-01-22 12:53:25.038878: step: 32/466, loss: 0.017242398113012314 2023-01-22 12:53:25.706239: step: 34/466, loss: 0.3137844204902649 2023-01-22 12:53:26.316475: step: 36/466, loss: 0.005847569555044174 2023-01-22 12:53:26.909606: step: 38/466, loss: 0.036376770585775375 2023-01-22 12:53:27.535706: step: 40/466, loss: 0.04871950298547745 2023-01-22 12:53:28.149727: step: 42/466, loss: 0.0542115792632103 2023-01-22 12:53:28.796258: step: 44/466, loss: 0.014360906556248665 2023-01-22 12:53:29.415201: step: 46/466, loss: 0.011436935514211655 2023-01-22 12:53:30.059344: step: 48/466, loss: 0.004590773489326239 2023-01-22 12:53:30.639262: step: 50/466, loss: 0.00757006648927927 2023-01-22 12:53:31.250859: step: 52/466, loss: 0.043378762900829315 2023-01-22 12:53:31.839838: step: 54/466, loss: 0.016078811138868332 2023-01-22 12:53:32.429398: step: 56/466, loss: 0.021416189149022102 2023-01-22 12:53:32.958204: step: 58/466, loss: 0.013789871707558632 2023-01-22 12:53:33.504867: step: 60/466, loss: 0.010896018706262112 2023-01-22 12:53:34.209967: step: 62/466, loss: 0.05236181616783142 2023-01-22 12:53:34.746919: step: 64/466, loss: 0.04595043882727623 2023-01-22 12:53:35.356805: step: 66/466, loss: 0.0866723507642746 2023-01-22 12:53:36.015905: step: 68/466, loss: 0.114671491086483 2023-01-22 12:53:36.687580: step: 70/466, loss: 0.004479016177356243 2023-01-22 12:53:37.305055: step: 72/466, loss: 0.018120339140295982 2023-01-22 12:53:37.929574: step: 74/466, loss: 0.06656885892152786 2023-01-22 12:53:38.554576: step: 76/466, loss: 0.024030594155192375 2023-01-22 12:53:39.187649: step: 78/466, loss: 0.2569972574710846 2023-01-22 12:53:39.789157: step: 80/466, loss: 0.0023115696385502815 2023-01-22 12:53:40.306926: step: 82/466, loss: 0.016187287867069244 2023-01-22 12:53:40.927359: step: 84/466, loss: 0.003180457279086113 2023-01-22 12:53:41.559198: step: 86/466, loss: 0.08133621513843536 2023-01-22 12:53:42.115580: step: 88/466, loss: 0.008258305490016937 2023-01-22 12:53:42.729010: step: 90/466, loss: 0.003522202605381608 2023-01-22 12:53:43.371965: step: 92/466, loss: 1.0791115760803223 2023-01-22 12:53:43.936212: step: 94/466, loss: 0.024251636117696762 2023-01-22 12:53:44.530347: step: 96/466, loss: 0.024751055985689163 2023-01-22 12:53:45.157738: step: 98/466, loss: 0.11981932818889618 2023-01-22 12:53:45.729586: step: 100/466, loss: 0.03855177015066147 2023-01-22 12:53:46.314993: step: 102/466, loss: 0.0010817792499437928 2023-01-22 12:53:46.928474: step: 104/466, loss: 0.05485154315829277 2023-01-22 12:53:47.506634: step: 106/466, loss: 0.08621357381343842 2023-01-22 12:53:48.103911: step: 108/466, loss: 0.0025955094024538994 2023-01-22 12:53:48.684107: step: 110/466, loss: 0.033904727548360825 2023-01-22 12:53:49.317740: step: 112/466, loss: 0.0053804488852620125 2023-01-22 12:53:49.857762: step: 114/466, loss: 0.03207462280988693 2023-01-22 12:53:50.505597: step: 116/466, loss: 0.00749216740950942 2023-01-22 12:53:51.097150: step: 118/466, loss: 0.001963667571544647 2023-01-22 12:53:51.665379: step: 120/466, loss: 0.012088139541447163 2023-01-22 12:53:52.246170: step: 122/466, loss: 0.009510976262390614 2023-01-22 12:53:52.926448: step: 124/466, loss: 0.0329180583357811 2023-01-22 12:53:53.593712: step: 126/466, loss: 0.051859792321920395 2023-01-22 12:53:54.132256: step: 128/466, loss: 0.023622572422027588 2023-01-22 12:53:54.713176: step: 130/466, loss: 0.053788863122463226 2023-01-22 12:53:55.335655: step: 132/466, loss: 0.06380758434534073 2023-01-22 12:53:55.900397: step: 134/466, loss: 0.0036389161832630634 2023-01-22 12:53:56.535426: step: 136/466, loss: 0.003958689048886299 2023-01-22 12:53:57.120644: step: 138/466, loss: 0.008076903410255909 2023-01-22 12:53:57.720845: step: 140/466, loss: 0.04739199951291084 2023-01-22 12:53:58.303455: step: 142/466, loss: 0.021099107339978218 2023-01-22 12:53:58.898951: step: 144/466, loss: 0.005976251792162657 2023-01-22 12:53:59.524437: step: 146/466, loss: 0.006119987461715937 2023-01-22 12:54:00.091730: step: 148/466, loss: 0.0069958982057869434 2023-01-22 12:54:00.763168: step: 150/466, loss: 0.030476871877908707 2023-01-22 12:54:01.423788: step: 152/466, loss: 0.01349339447915554 2023-01-22 12:54:02.099329: step: 154/466, loss: 0.025189753621816635 2023-01-22 12:54:02.651791: step: 156/466, loss: 0.03439941629767418 2023-01-22 12:54:03.229733: step: 158/466, loss: 0.005432324483990669 2023-01-22 12:54:03.875854: step: 160/466, loss: 0.17197854816913605 2023-01-22 12:54:04.450311: step: 162/466, loss: 0.00020336541638243943 2023-01-22 12:54:05.076070: step: 164/466, loss: 0.06483791023492813 2023-01-22 12:54:05.700161: step: 166/466, loss: 0.056566525250673294 2023-01-22 12:54:06.302230: step: 168/466, loss: 0.017517898231744766 2023-01-22 12:54:06.921560: step: 170/466, loss: 0.007111198268830776 2023-01-22 12:54:07.567859: step: 172/466, loss: 0.0664910301566124 2023-01-22 12:54:08.165277: step: 174/466, loss: 0.3708658814430237 2023-01-22 12:54:08.787104: step: 176/466, loss: 0.019381744787096977 2023-01-22 12:54:09.398147: step: 178/466, loss: 0.023170998319983482 2023-01-22 12:54:10.045564: step: 180/466, loss: 0.006079450715333223 2023-01-22 12:54:10.661740: step: 182/466, loss: 0.07094259560108185 2023-01-22 12:54:11.235513: step: 184/466, loss: 0.005931614898145199 2023-01-22 12:54:11.798393: step: 186/466, loss: 0.01944388635456562 2023-01-22 12:54:12.489023: step: 188/466, loss: 0.06885656714439392 2023-01-22 12:54:13.088478: step: 190/466, loss: 0.0036963620223104954 2023-01-22 12:54:13.643488: step: 192/466, loss: 0.0355551652610302 2023-01-22 12:54:14.247813: step: 194/466, loss: 0.0029540781397372484 2023-01-22 12:54:14.834591: step: 196/466, loss: 0.0021283773239701986 2023-01-22 12:54:15.463079: step: 198/466, loss: 0.03791055455803871 2023-01-22 12:54:16.023401: step: 200/466, loss: 0.0039059543050825596 2023-01-22 12:54:16.698791: step: 202/466, loss: 0.04408857598900795 2023-01-22 12:54:17.328046: step: 204/466, loss: 0.025127647444605827 2023-01-22 12:54:17.951429: step: 206/466, loss: 0.00040855578845366836 2023-01-22 12:54:18.502327: step: 208/466, loss: 0.021951226517558098 2023-01-22 12:54:19.068807: step: 210/466, loss: 0.10126690566539764 2023-01-22 12:54:19.709804: step: 212/466, loss: 0.01390159223228693 2023-01-22 12:54:20.363109: step: 214/466, loss: 0.0047782729379832745 2023-01-22 12:54:21.007899: step: 216/466, loss: 0.12035967409610748 2023-01-22 12:54:21.647181: step: 218/466, loss: 0.027765575796365738 2023-01-22 12:54:22.255760: step: 220/466, loss: 0.0063645802438259125 2023-01-22 12:54:22.961885: step: 222/466, loss: 0.0021806962322443724 2023-01-22 12:54:23.556373: step: 224/466, loss: 0.030791504308581352 2023-01-22 12:54:24.115638: step: 226/466, loss: 0.013874297961592674 2023-01-22 12:54:24.722491: step: 228/466, loss: 0.012175521813333035 2023-01-22 12:54:25.365888: step: 230/466, loss: 0.01178658939898014 2023-01-22 12:54:25.952773: step: 232/466, loss: 0.01597941480576992 2023-01-22 12:54:26.570154: step: 234/466, loss: 0.009201081469655037 2023-01-22 12:54:27.188692: step: 236/466, loss: 0.08578303456306458 2023-01-22 12:54:27.818239: step: 238/466, loss: 0.00881474930793047 2023-01-22 12:54:28.478820: step: 240/466, loss: 0.0507836751639843 2023-01-22 12:54:29.111861: step: 242/466, loss: 0.01997714675962925 2023-01-22 12:54:29.717197: step: 244/466, loss: 0.014055879786610603 2023-01-22 12:54:30.314482: step: 246/466, loss: 0.013484900817275047 2023-01-22 12:54:30.960168: step: 248/466, loss: 0.053369153290987015 2023-01-22 12:54:31.629117: step: 250/466, loss: 0.0779917910695076 2023-01-22 12:54:32.181163: step: 252/466, loss: 0.018295586109161377 2023-01-22 12:54:32.832863: step: 254/466, loss: 0.013621524907648563 2023-01-22 12:54:33.445762: step: 256/466, loss: 0.017526626586914062 2023-01-22 12:54:34.074593: step: 258/466, loss: 0.008595292456448078 2023-01-22 12:54:34.722601: step: 260/466, loss: 0.012955628335475922 2023-01-22 12:54:35.311951: step: 262/466, loss: 0.00752677395939827 2023-01-22 12:54:35.875806: step: 264/466, loss: 0.028774773702025414 2023-01-22 12:54:36.470982: step: 266/466, loss: 0.015406875871121883 2023-01-22 12:54:37.048142: step: 268/466, loss: 0.0460406057536602 2023-01-22 12:54:37.630217: step: 270/466, loss: 0.09528829902410507 2023-01-22 12:54:38.261261: step: 272/466, loss: 0.016132591292262077 2023-01-22 12:54:38.934346: step: 274/466, loss: 0.008536629378795624 2023-01-22 12:54:39.538622: step: 276/466, loss: 0.00013562050298787653 2023-01-22 12:54:40.121362: step: 278/466, loss: 0.009571983478963375 2023-01-22 12:54:40.788086: step: 280/466, loss: 0.008088672533631325 2023-01-22 12:54:41.390921: step: 282/466, loss: 0.046699605882167816 2023-01-22 12:54:41.910634: step: 284/466, loss: 0.11790218204259872 2023-01-22 12:54:42.506307: step: 286/466, loss: 0.9116804599761963 2023-01-22 12:54:43.087013: step: 288/466, loss: 0.029373178258538246 2023-01-22 12:54:43.704018: step: 290/466, loss: 0.025495873764157295 2023-01-22 12:54:44.335084: step: 292/466, loss: 0.0008974373922683299 2023-01-22 12:54:44.948011: step: 294/466, loss: 0.041956234723329544 2023-01-22 12:54:45.579622: step: 296/466, loss: 0.01854517310857773 2023-01-22 12:54:46.191099: step: 298/466, loss: 0.009173722006380558 2023-01-22 12:54:46.769118: step: 300/466, loss: 0.043664708733558655 2023-01-22 12:54:47.312484: step: 302/466, loss: 0.03538736328482628 2023-01-22 12:54:47.895280: step: 304/466, loss: 0.051593050360679626 2023-01-22 12:54:48.471027: step: 306/466, loss: 0.12222576141357422 2023-01-22 12:54:49.122392: step: 308/466, loss: 0.203830286860466 2023-01-22 12:54:49.743720: step: 310/466, loss: 0.029027223587036133 2023-01-22 12:54:50.411704: step: 312/466, loss: 0.1208413764834404 2023-01-22 12:54:50.999426: step: 314/466, loss: 0.013764460571110249 2023-01-22 12:54:51.558046: step: 316/466, loss: 0.014483177103102207 2023-01-22 12:54:52.139728: step: 318/466, loss: 0.007903095334768295 2023-01-22 12:54:52.730717: step: 320/466, loss: 0.07149031013250351 2023-01-22 12:54:53.308937: step: 322/466, loss: 0.010006767697632313 2023-01-22 12:54:53.931967: step: 324/466, loss: 0.004422049969434738 2023-01-22 12:54:54.551141: step: 326/466, loss: 0.015813073143363 2023-01-22 12:54:55.163748: step: 328/466, loss: 0.0066657704301178455 2023-01-22 12:54:55.777490: step: 330/466, loss: 0.04342114180326462 2023-01-22 12:54:56.385201: step: 332/466, loss: 0.02800775319337845 2023-01-22 12:54:56.907168: step: 334/466, loss: 0.029935946688055992 2023-01-22 12:54:57.511365: step: 336/466, loss: 0.028563717380166054 2023-01-22 12:54:58.103698: step: 338/466, loss: 0.0034600680228322744 2023-01-22 12:54:58.714542: step: 340/466, loss: 0.006282786373049021 2023-01-22 12:54:59.324165: step: 342/466, loss: 0.014332322403788567 2023-01-22 12:54:59.967663: step: 344/466, loss: 0.04797680303454399 2023-01-22 12:55:00.548734: step: 346/466, loss: 0.033090557903051376 2023-01-22 12:55:01.204941: step: 348/466, loss: 0.042578570544719696 2023-01-22 12:55:01.823401: step: 350/466, loss: 0.11458615958690643 2023-01-22 12:55:02.499694: step: 352/466, loss: 0.002273987978696823 2023-01-22 12:55:03.096661: step: 354/466, loss: 0.041761383414268494 2023-01-22 12:55:03.739592: step: 356/466, loss: 0.0080189760774374 2023-01-22 12:55:04.338950: step: 358/466, loss: 0.25496694445610046 2023-01-22 12:55:04.942071: step: 360/466, loss: 0.03920642286539078 2023-01-22 12:55:05.507400: step: 362/466, loss: 0.007292563561350107 2023-01-22 12:55:06.067350: step: 364/466, loss: 0.004238718654960394 2023-01-22 12:55:06.695182: step: 366/466, loss: 0.002630181610584259 2023-01-22 12:55:07.316403: step: 368/466, loss: 0.01754770055413246 2023-01-22 12:55:08.006352: step: 370/466, loss: 0.24520449340343475 2023-01-22 12:55:08.588196: step: 372/466, loss: 0.012534240260720253 2023-01-22 12:55:09.185446: step: 374/466, loss: 0.0020073133055120707 2023-01-22 12:55:09.812690: step: 376/466, loss: 0.0053610121831297874 2023-01-22 12:55:10.396699: step: 378/466, loss: 0.002231658436357975 2023-01-22 12:55:10.996627: step: 380/466, loss: 0.06281581521034241 2023-01-22 12:55:11.631971: step: 382/466, loss: 0.10772094130516052 2023-01-22 12:55:12.239372: step: 384/466, loss: 0.03544430807232857 2023-01-22 12:55:12.895929: step: 386/466, loss: 0.018868377432227135 2023-01-22 12:55:13.509685: step: 388/466, loss: 0.007571527734398842 2023-01-22 12:55:14.104031: step: 390/466, loss: 0.09080404788255692 2023-01-22 12:55:14.737276: step: 392/466, loss: 0.11332575976848602 2023-01-22 12:55:15.327010: step: 394/466, loss: 0.004821168724447489 2023-01-22 12:55:15.974468: step: 396/466, loss: 0.021221790462732315 2023-01-22 12:55:16.521161: step: 398/466, loss: 0.05074651166796684 2023-01-22 12:55:17.092189: step: 400/466, loss: 0.009762517176568508 2023-01-22 12:55:17.739243: step: 402/466, loss: 0.02435818873345852 2023-01-22 12:55:18.356023: step: 404/466, loss: 0.003784589236602187 2023-01-22 12:55:18.951831: step: 406/466, loss: 0.019062306731939316 2023-01-22 12:55:19.578950: step: 408/466, loss: 0.0393337719142437 2023-01-22 12:55:20.172662: step: 410/466, loss: 0.011388128623366356 2023-01-22 12:55:20.753916: step: 412/466, loss: 0.012343622744083405 2023-01-22 12:55:21.336922: step: 414/466, loss: 0.014441592618823051 2023-01-22 12:55:21.885172: step: 416/466, loss: 0.010982821695506573 2023-01-22 12:55:22.544880: step: 418/466, loss: 0.051883138716220856 2023-01-22 12:55:23.238376: step: 420/466, loss: 0.01821015402674675 2023-01-22 12:55:23.850159: step: 422/466, loss: 0.23361898958683014 2023-01-22 12:55:24.434654: step: 424/466, loss: 0.009966660290956497 2023-01-22 12:55:24.965969: step: 426/466, loss: 0.07146193832159042 2023-01-22 12:55:25.562401: step: 428/466, loss: 0.003511168761178851 2023-01-22 12:55:26.122884: step: 430/466, loss: 0.0016422433545812964 2023-01-22 12:55:26.741800: step: 432/466, loss: 0.0014138933038339019 2023-01-22 12:55:27.423768: step: 434/466, loss: 0.013912991620600224 2023-01-22 12:55:28.088875: step: 436/466, loss: 0.055950235575437546 2023-01-22 12:55:28.708374: step: 438/466, loss: 0.08086015284061432 2023-01-22 12:55:29.342742: step: 440/466, loss: 0.019065426662564278 2023-01-22 12:55:29.939405: step: 442/466, loss: 0.012725220061838627 2023-01-22 12:55:30.556944: step: 444/466, loss: 0.019271058961749077 2023-01-22 12:55:31.153425: step: 446/466, loss: 0.022563360631465912 2023-01-22 12:55:31.742245: step: 448/466, loss: 0.09099145978689194 2023-01-22 12:55:32.393909: step: 450/466, loss: 0.06450436264276505 2023-01-22 12:55:33.005722: step: 452/466, loss: 0.0597335547208786 2023-01-22 12:55:33.565373: step: 454/466, loss: 0.0012813321081921458 2023-01-22 12:55:34.184199: step: 456/466, loss: 0.023356245830655098 2023-01-22 12:55:34.682342: step: 458/466, loss: 0.0005833054892718792 2023-01-22 12:55:35.278002: step: 460/466, loss: 0.07915735244750977 2023-01-22 12:55:35.882935: step: 462/466, loss: 0.0856717973947525 2023-01-22 12:55:36.535672: step: 464/466, loss: 0.018680641427636147 2023-01-22 12:55:37.129591: step: 466/466, loss: 0.04496914893388748 2023-01-22 12:55:37.772505: step: 468/466, loss: 0.009340988472104073 2023-01-22 12:55:38.418439: step: 470/466, loss: 0.028811069205403328 2023-01-22 12:55:39.060283: step: 472/466, loss: 0.02744913473725319 2023-01-22 12:55:39.742028: step: 474/466, loss: 0.013957190327346325 2023-01-22 12:55:40.361498: step: 476/466, loss: 0.004813686013221741 2023-01-22 12:55:40.931645: step: 478/466, loss: 0.04070420563220978 2023-01-22 12:55:41.561850: step: 480/466, loss: 0.014992502517998219 2023-01-22 12:55:42.180780: step: 482/466, loss: 0.008838329464197159 2023-01-22 12:55:42.786768: step: 484/466, loss: 0.021108128130435944 2023-01-22 12:55:43.451009: step: 486/466, loss: 0.00671556917950511 2023-01-22 12:55:44.051576: step: 488/466, loss: 0.010902033187448978 2023-01-22 12:55:44.631318: step: 490/466, loss: 0.03226817771792412 2023-01-22 12:55:45.175880: step: 492/466, loss: 0.03758244588971138 2023-01-22 12:55:45.786280: step: 494/466, loss: 0.0034771913196891546 2023-01-22 12:55:46.387677: step: 496/466, loss: 0.025463111698627472 2023-01-22 12:55:46.996792: step: 498/466, loss: 0.0301282349973917 2023-01-22 12:55:47.583933: step: 500/466, loss: 0.010275552049279213 2023-01-22 12:55:48.227080: step: 502/466, loss: 0.0036540618166327477 2023-01-22 12:55:48.887011: step: 504/466, loss: 0.01159196998924017 2023-01-22 12:55:49.563771: step: 506/466, loss: 0.01329745166003704 2023-01-22 12:55:50.175224: step: 508/466, loss: 0.04020538181066513 2023-01-22 12:55:50.740029: step: 510/466, loss: 0.009330449625849724 2023-01-22 12:55:51.382005: step: 512/466, loss: 0.04804263263940811 2023-01-22 12:55:51.989473: step: 514/466, loss: 0.01904565468430519 2023-01-22 12:55:52.587676: step: 516/466, loss: 0.0011679809540510178 2023-01-22 12:55:53.211568: step: 518/466, loss: 0.051677506417036057 2023-01-22 12:55:53.872590: step: 520/466, loss: 0.022494550794363022 2023-01-22 12:55:54.480230: step: 522/466, loss: 0.056992609053850174 2023-01-22 12:55:55.143257: step: 524/466, loss: 0.007421521935611963 2023-01-22 12:55:55.748506: step: 526/466, loss: 0.0017072822665795684 2023-01-22 12:55:56.460692: step: 528/466, loss: 0.11647544056177139 2023-01-22 12:55:57.105793: step: 530/466, loss: 0.25799375772476196 2023-01-22 12:55:57.725872: step: 532/466, loss: 0.007462191861122847 2023-01-22 12:55:58.274050: step: 534/466, loss: 0.0788254365324974 2023-01-22 12:55:58.856726: step: 536/466, loss: 0.0102780656889081 2023-01-22 12:55:59.400304: step: 538/466, loss: 0.15707173943519592 2023-01-22 12:56:00.054765: step: 540/466, loss: 0.03727540746331215 2023-01-22 12:56:00.624245: step: 542/466, loss: 0.0025355357211083174 2023-01-22 12:56:01.223324: step: 544/466, loss: 0.006033079698681831 2023-01-22 12:56:01.857377: step: 546/466, loss: 0.026804547756910324 2023-01-22 12:56:02.410050: step: 548/466, loss: 0.004583065398037434 2023-01-22 12:56:03.102503: step: 550/466, loss: 0.15192300081253052 2023-01-22 12:56:03.719750: step: 552/466, loss: 0.015816085040569305 2023-01-22 12:56:04.312911: step: 554/466, loss: 0.2682707905769348 2023-01-22 12:56:04.908353: step: 556/466, loss: 0.018862945958971977 2023-01-22 12:56:05.484212: step: 558/466, loss: 0.009017648175358772 2023-01-22 12:56:06.095904: step: 560/466, loss: 0.017621343955397606 2023-01-22 12:56:06.943108: step: 562/466, loss: 0.01245852280408144 2023-01-22 12:56:07.601826: step: 564/466, loss: 0.004409522283822298 2023-01-22 12:56:08.230453: step: 566/466, loss: 0.020851565524935722 2023-01-22 12:56:08.863999: step: 568/466, loss: 0.004684189334511757 2023-01-22 12:56:09.454689: step: 570/466, loss: 0.0760229304432869 2023-01-22 12:56:10.023636: step: 572/466, loss: 0.0607258677482605 2023-01-22 12:56:10.589489: step: 574/466, loss: 0.027867596596479416 2023-01-22 12:56:11.214195: step: 576/466, loss: 0.013862524181604385 2023-01-22 12:56:11.861833: step: 578/466, loss: 0.07694022357463837 2023-01-22 12:56:12.451795: step: 580/466, loss: 0.0014871252933517098 2023-01-22 12:56:13.049931: step: 582/466, loss: 0.03727436065673828 2023-01-22 12:56:13.626036: step: 584/466, loss: 0.0019148996798321605 2023-01-22 12:56:14.267110: step: 586/466, loss: 0.020923055708408356 2023-01-22 12:56:14.879709: step: 588/466, loss: 1.0160681009292603 2023-01-22 12:56:15.490350: step: 590/466, loss: 0.033878784626722336 2023-01-22 12:56:16.077442: step: 592/466, loss: 0.06347450613975525 2023-01-22 12:56:16.744565: step: 594/466, loss: 0.08106108009815216 2023-01-22 12:56:17.320337: step: 596/466, loss: 0.00410859240218997 2023-01-22 12:56:17.905987: step: 598/466, loss: 0.06994364410638809 2023-01-22 12:56:18.602236: step: 600/466, loss: 0.2086619883775711 2023-01-22 12:56:19.302562: step: 602/466, loss: 0.015858981758356094 2023-01-22 12:56:19.926447: step: 604/466, loss: 0.01849283091723919 2023-01-22 12:56:20.494797: step: 606/466, loss: 0.025155507028102875 2023-01-22 12:56:21.171769: step: 608/466, loss: 0.0005380866350606084 2023-01-22 12:56:21.731733: step: 610/466, loss: 0.005587004590779543 2023-01-22 12:56:22.353731: step: 612/466, loss: 0.12961412966251373 2023-01-22 12:56:22.939988: step: 614/466, loss: 0.020661529153585434 2023-01-22 12:56:23.600587: step: 616/466, loss: 0.022744232788681984 2023-01-22 12:56:24.169482: step: 618/466, loss: 0.023633470758795738 2023-01-22 12:56:24.778026: step: 620/466, loss: 0.02143573947250843 2023-01-22 12:56:25.385088: step: 622/466, loss: 0.013075039722025394 2023-01-22 12:56:25.987362: step: 624/466, loss: 0.026958230882883072 2023-01-22 12:56:26.634580: step: 626/466, loss: 0.1400860995054245 2023-01-22 12:56:27.284258: step: 628/466, loss: 0.009638143703341484 2023-01-22 12:56:27.989088: step: 630/466, loss: 0.031049687415361404 2023-01-22 12:56:28.562748: step: 632/466, loss: 0.2267194539308548 2023-01-22 12:56:29.236661: step: 634/466, loss: 0.03791540116071701 2023-01-22 12:56:29.834595: step: 636/466, loss: 0.02210753597319126 2023-01-22 12:56:30.452638: step: 638/466, loss: 0.03216801956295967 2023-01-22 12:56:31.015106: step: 640/466, loss: 0.12850381433963776 2023-01-22 12:56:31.597354: step: 642/466, loss: 0.0019831915851682425 2023-01-22 12:56:32.221586: step: 644/466, loss: 0.014089684002101421 2023-01-22 12:56:32.748509: step: 646/466, loss: 0.0263113621622324 2023-01-22 12:56:33.360955: step: 648/466, loss: 0.27425116300582886 2023-01-22 12:56:33.902669: step: 650/466, loss: 0.13408559560775757 2023-01-22 12:56:34.474387: step: 652/466, loss: 0.0064635686576366425 2023-01-22 12:56:35.043682: step: 654/466, loss: 0.023577343672513962 2023-01-22 12:56:35.694146: step: 656/466, loss: 0.023118676617741585 2023-01-22 12:56:36.328254: step: 658/466, loss: 0.02726762555539608 2023-01-22 12:56:36.968957: step: 660/466, loss: 0.0005869396263733506 2023-01-22 12:56:37.549639: step: 662/466, loss: 0.05336834862828255 2023-01-22 12:56:38.276027: step: 664/466, loss: 0.02326621487736702 2023-01-22 12:56:38.852711: step: 666/466, loss: 0.003729963907971978 2023-01-22 12:56:39.544899: step: 668/466, loss: 0.0011509779142215848 2023-01-22 12:56:40.189230: step: 670/466, loss: 0.013985698111355305 2023-01-22 12:56:40.736878: step: 672/466, loss: 0.0028651151806116104 2023-01-22 12:56:41.327623: step: 674/466, loss: 0.003631117520853877 2023-01-22 12:56:41.910005: step: 676/466, loss: 0.29877933859825134 2023-01-22 12:56:42.506414: step: 678/466, loss: 0.018547525629401207 2023-01-22 12:56:43.090721: step: 680/466, loss: 0.019268687814474106 2023-01-22 12:56:43.680491: step: 682/466, loss: 0.01983795315027237 2023-01-22 12:56:44.304203: step: 684/466, loss: 0.006188856437802315 2023-01-22 12:56:44.946775: step: 686/466, loss: 0.08563053607940674 2023-01-22 12:56:45.525041: step: 688/466, loss: 0.0037638735957443714 2023-01-22 12:56:46.178336: step: 690/466, loss: 0.015350519679486752 2023-01-22 12:56:46.784274: step: 692/466, loss: 0.00996602326631546 2023-01-22 12:56:47.419165: step: 694/466, loss: 0.00022550724679604173 2023-01-22 12:56:48.054245: step: 696/466, loss: 0.04517294839024544 2023-01-22 12:56:48.662702: step: 698/466, loss: 0.1106954887509346 2023-01-22 12:56:49.280041: step: 700/466, loss: 0.021119993180036545 2023-01-22 12:56:50.044474: step: 702/466, loss: 0.058732982724905014 2023-01-22 12:56:50.616979: step: 704/466, loss: 0.005045011639595032 2023-01-22 12:56:51.246468: step: 706/466, loss: 0.003177064238116145 2023-01-22 12:56:51.816607: step: 708/466, loss: 0.03055475652217865 2023-01-22 12:56:52.427383: step: 710/466, loss: 0.1264713704586029 2023-01-22 12:56:53.039339: step: 712/466, loss: 0.0012993437703698874 2023-01-22 12:56:53.555367: step: 714/466, loss: 0.0006832975777797401 2023-01-22 12:56:54.162093: step: 716/466, loss: 0.03061027266085148 2023-01-22 12:56:54.776561: step: 718/466, loss: 0.0036416640505194664 2023-01-22 12:56:55.357658: step: 720/466, loss: 0.0023828730918467045 2023-01-22 12:56:55.940056: step: 722/466, loss: 0.05810290575027466 2023-01-22 12:56:56.608759: step: 724/466, loss: 0.026543136686086655 2023-01-22 12:56:57.231981: step: 726/466, loss: 0.0126041816547513 2023-01-22 12:56:57.828949: step: 728/466, loss: 0.05292503908276558 2023-01-22 12:56:58.469224: step: 730/466, loss: 0.06300801038742065 2023-01-22 12:56:59.028255: step: 732/466, loss: 0.011260651051998138 2023-01-22 12:56:59.659950: step: 734/466, loss: 0.0007103653624653816 2023-01-22 12:57:00.241431: step: 736/466, loss: 0.00010583127732388675 2023-01-22 12:57:00.808244: step: 738/466, loss: 0.022099090740084648 2023-01-22 12:57:01.451876: step: 740/466, loss: 0.0035885353572666645 2023-01-22 12:57:01.967646: step: 742/466, loss: 0.6832292675971985 2023-01-22 12:57:02.582799: step: 744/466, loss: 0.09095166623592377 2023-01-22 12:57:03.110001: step: 746/466, loss: 0.007521773222833872 2023-01-22 12:57:03.716855: step: 748/466, loss: 0.10964198410511017 2023-01-22 12:57:04.318166: step: 750/466, loss: 0.00558021105825901 2023-01-22 12:57:04.872059: step: 752/466, loss: 0.009862803854048252 2023-01-22 12:57:05.484715: step: 754/466, loss: 0.05558396503329277 2023-01-22 12:57:06.063887: step: 756/466, loss: 0.015214133076369762 2023-01-22 12:57:06.630795: step: 758/466, loss: 0.04548295959830284 2023-01-22 12:57:07.226020: step: 760/466, loss: 0.03597099334001541 2023-01-22 12:57:07.783082: step: 762/466, loss: 0.0132424496114254 2023-01-22 12:57:08.435612: step: 764/466, loss: 0.0013959844363853335 2023-01-22 12:57:09.023128: step: 766/466, loss: 0.010815260000526905 2023-01-22 12:57:09.672060: step: 768/466, loss: 0.11648006737232208 2023-01-22 12:57:10.262526: step: 770/466, loss: 0.0757567435503006 2023-01-22 12:57:10.840431: step: 772/466, loss: 0.0016913153231143951 2023-01-22 12:57:11.456828: step: 774/466, loss: 0.11876269429922104 2023-01-22 12:57:12.058106: step: 776/466, loss: 0.014088819734752178 2023-01-22 12:57:12.753355: step: 778/466, loss: 0.05380717292428017 2023-01-22 12:57:13.307854: step: 780/466, loss: 0.017499873414635658 2023-01-22 12:57:13.986041: step: 782/466, loss: 0.0022974826861172915 2023-01-22 12:57:14.538661: step: 784/466, loss: 0.0068471673876047134 2023-01-22 12:57:15.153509: step: 786/466, loss: 0.01211138442158699 2023-01-22 12:57:15.773489: step: 788/466, loss: 0.09165941178798676 2023-01-22 12:57:16.357098: step: 790/466, loss: 0.02449076995253563 2023-01-22 12:57:16.916040: step: 792/466, loss: 0.045832838863134384 2023-01-22 12:57:17.537245: step: 794/466, loss: 0.041032642126083374 2023-01-22 12:57:18.150669: step: 796/466, loss: 0.01552598550915718 2023-01-22 12:57:18.759712: step: 798/466, loss: 0.01997545175254345 2023-01-22 12:57:19.372918: step: 800/466, loss: 0.010445799678564072 2023-01-22 12:57:19.966219: step: 802/466, loss: 0.00863324198871851 2023-01-22 12:57:20.573602: step: 804/466, loss: 0.006967922672629356 2023-01-22 12:57:21.195798: step: 806/466, loss: 0.05250157043337822 2023-01-22 12:57:21.889808: step: 808/466, loss: 0.03862131014466286 2023-01-22 12:57:22.503636: step: 810/466, loss: 0.05220973864197731 2023-01-22 12:57:23.105475: step: 812/466, loss: 0.07533097267150879 2023-01-22 12:57:23.700072: step: 814/466, loss: 0.03043753281235695 2023-01-22 12:57:24.288049: step: 816/466, loss: 0.00482554966583848 2023-01-22 12:57:24.921569: step: 818/466, loss: 0.0547814704477787 2023-01-22 12:57:25.519755: step: 820/466, loss: 0.020278315991163254 2023-01-22 12:57:26.113066: step: 822/466, loss: 0.0074560982175171375 2023-01-22 12:57:26.813503: step: 824/466, loss: 0.01077156700193882 2023-01-22 12:57:27.512574: step: 826/466, loss: 0.024306146427989006 2023-01-22 12:57:28.096477: step: 828/466, loss: 0.034570641815662384 2023-01-22 12:57:28.700656: step: 830/466, loss: 0.06997299194335938 2023-01-22 12:57:29.356650: step: 832/466, loss: 0.00283684185706079 2023-01-22 12:57:30.014081: step: 834/466, loss: 0.005547116976231337 2023-01-22 12:57:30.623295: step: 836/466, loss: 0.027647120878100395 2023-01-22 12:57:31.286298: step: 838/466, loss: 0.06419383734464645 2023-01-22 12:57:31.921687: step: 840/466, loss: 0.0660874992609024 2023-01-22 12:57:32.561076: step: 842/466, loss: 0.056007083505392075 2023-01-22 12:57:33.136087: step: 844/466, loss: 0.01848028413951397 2023-01-22 12:57:33.782304: step: 846/466, loss: 0.06765052676200867 2023-01-22 12:57:34.424379: step: 848/466, loss: 0.032074473798274994 2023-01-22 12:57:35.103336: step: 850/466, loss: 0.002927222289144993 2023-01-22 12:57:35.733924: step: 852/466, loss: 0.062171820551157 2023-01-22 12:57:36.323572: step: 854/466, loss: 0.011124547570943832 2023-01-22 12:57:36.910487: step: 856/466, loss: 0.2015541046857834 2023-01-22 12:57:37.558457: step: 858/466, loss: 0.07552161812782288 2023-01-22 12:57:38.157144: step: 860/466, loss: 0.17228293418884277 2023-01-22 12:57:38.728774: step: 862/466, loss: 0.01148252934217453 2023-01-22 12:57:39.285332: step: 864/466, loss: 0.10406894981861115 2023-01-22 12:57:39.885674: step: 866/466, loss: 0.9883565306663513 2023-01-22 12:57:40.498548: step: 868/466, loss: 0.03360062092542648 2023-01-22 12:57:41.132526: step: 870/466, loss: 0.044887661933898926 2023-01-22 12:57:41.740909: step: 872/466, loss: 0.0762392058968544 2023-01-22 12:57:42.342388: step: 874/466, loss: 0.019538970664143562 2023-01-22 12:57:42.983213: step: 876/466, loss: 0.14621831476688385 2023-01-22 12:57:43.635176: step: 878/466, loss: 0.027906371280550957 2023-01-22 12:57:44.214328: step: 880/466, loss: 0.06606736779212952 2023-01-22 12:57:44.865217: step: 882/466, loss: 0.05360845848917961 2023-01-22 12:57:45.484063: step: 884/466, loss: 0.019824877381324768 2023-01-22 12:57:46.101448: step: 886/466, loss: 0.021310580894351006 2023-01-22 12:57:46.705729: step: 888/466, loss: 0.027834007516503334 2023-01-22 12:57:47.335570: step: 890/466, loss: 0.04889824241399765 2023-01-22 12:57:47.922915: step: 892/466, loss: 0.02395694889128208 2023-01-22 12:57:48.557194: step: 894/466, loss: 0.05169637128710747 2023-01-22 12:57:49.196557: step: 896/466, loss: 0.025937411934137344 2023-01-22 12:57:49.795545: step: 898/466, loss: 0.004313454497605562 2023-01-22 12:57:50.365023: step: 900/466, loss: 0.04247196391224861 2023-01-22 12:57:50.992823: step: 902/466, loss: 0.008535345084965229 2023-01-22 12:57:51.616909: step: 904/466, loss: 0.011454445309937 2023-01-22 12:57:52.191059: step: 906/466, loss: 0.01287270337343216 2023-01-22 12:57:52.764768: step: 908/466, loss: 0.02286466211080551 2023-01-22 12:57:53.360477: step: 910/466, loss: 0.029042303562164307 2023-01-22 12:57:53.993189: step: 912/466, loss: 0.006412671413272619 2023-01-22 12:57:54.596750: step: 914/466, loss: 0.06761791557073593 2023-01-22 12:57:55.194266: step: 916/466, loss: 0.08030974119901657 2023-01-22 12:57:55.835875: step: 918/466, loss: 0.002412214642390609 2023-01-22 12:57:56.441574: step: 920/466, loss: 0.031146619468927383 2023-01-22 12:57:57.027850: step: 922/466, loss: 0.08206193894147873 2023-01-22 12:57:57.709154: step: 924/466, loss: 0.07852324843406677 2023-01-22 12:57:58.323353: step: 926/466, loss: 0.008640158921480179 2023-01-22 12:57:58.896023: step: 928/466, loss: 0.07596441358327866 2023-01-22 12:57:59.508774: step: 930/466, loss: 0.008638971485197544 2023-01-22 12:58:00.098462: step: 932/466, loss: 0.02195640094578266 ================================================== Loss: 0.050 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30118051274966173, 'r': 0.3491865147818659, 'f1': 0.32341176325139426}, 'combined': 0.23830340450102733, 'epoch': 27} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.34963686007645167, 'r': 0.33628267444853166, 'f1': 0.3428297707652287}, 'combined': 0.2273689671396335, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2880684922152417, 'r': 0.2853405708874459, 'f1': 0.28669804268044047}, 'combined': 0.19113202845362698, 'epoch': 27} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3591011684353882, 'r': 0.31082651005942535, 'f1': 0.333224515200853}, 'combined': 0.21747284149950405, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2824114647187955, 'r': 0.3376076333450497, 'f1': 0.3075526754932432}, 'combined': 0.22661776088975816, 'epoch': 27} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3338887653506016, 'r': 0.315660869148619, 'f1': 0.32451905834967154}, 'combined': 0.21522507496765778, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23742138364779872, 'r': 0.35952380952380947, 'f1': 0.2859848484848484}, 'combined': 0.19065656565656558, 'epoch': 27} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3541666666666667, 'r': 0.3695652173913043, 'f1': 0.3617021276595745}, 'combined': 0.24113475177304966, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3875, 'r': 0.2672413793103448, 'f1': 0.3163265306122449}, 'combined': 0.2108843537414966, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29515095352204085, 'r': 0.3399556523489161, 'f1': 0.3159728902784459}, 'combined': 0.23282212967885485, 'epoch': 26} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32557465157002546, 'r': 0.326702182830874, 'f1': 0.3261374426704058}, 'combined': 0.2162984075741551, 'epoch': 26} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39473684210526316, 'r': 0.25862068965517243, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 26} ****************************** Epoch: 28 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:00:26.362209: step: 2/466, loss: 0.014356344006955624 2023-01-22 13:00:26.986210: step: 4/466, loss: 0.04025975614786148 2023-01-22 13:00:27.614964: step: 6/466, loss: 0.007678386755287647 2023-01-22 13:00:28.238361: step: 8/466, loss: 0.006299751810729504 2023-01-22 13:00:28.859918: step: 10/466, loss: 0.02206443063914776 2023-01-22 13:00:29.477230: step: 12/466, loss: 0.009057655930519104 2023-01-22 13:00:30.061616: step: 14/466, loss: 0.0005861087120138109 2023-01-22 13:00:30.631977: step: 16/466, loss: 0.020142707973718643 2023-01-22 13:00:31.192640: step: 18/466, loss: 0.01774671860039234 2023-01-22 13:00:31.748215: step: 20/466, loss: 0.3696327209472656 2023-01-22 13:00:32.442158: step: 22/466, loss: 0.04497640207409859 2023-01-22 13:00:33.113122: step: 24/466, loss: 0.0403880812227726 2023-01-22 13:00:33.721654: step: 26/466, loss: 0.028408147394657135 2023-01-22 13:00:34.315614: step: 28/466, loss: 0.013730722479522228 2023-01-22 13:00:34.942983: step: 30/466, loss: 0.220284104347229 2023-01-22 13:00:35.527714: step: 32/466, loss: 0.0032014932949095964 2023-01-22 13:00:36.137283: step: 34/466, loss: 0.020034316927194595 2023-01-22 13:00:36.731190: step: 36/466, loss: 0.010054860264062881 2023-01-22 13:00:37.269118: step: 38/466, loss: 0.000760409515351057 2023-01-22 13:00:37.865682: step: 40/466, loss: 0.002009687013924122 2023-01-22 13:00:38.444121: step: 42/466, loss: 0.058458112180233 2023-01-22 13:00:38.992905: step: 44/466, loss: 0.004694878589361906 2023-01-22 13:00:39.560668: step: 46/466, loss: 0.04860213026404381 2023-01-22 13:00:40.227591: step: 48/466, loss: 0.019404586404561996 2023-01-22 13:00:40.791138: step: 50/466, loss: 1.8576234579086304 2023-01-22 13:00:41.333017: step: 52/466, loss: 0.0023364226799458265 2023-01-22 13:00:41.969666: step: 54/466, loss: 0.03962698578834534 2023-01-22 13:00:42.569304: step: 56/466, loss: 0.014290332794189453 2023-01-22 13:00:43.245221: step: 58/466, loss: 0.5267027616500854 2023-01-22 13:00:43.852332: step: 60/466, loss: 0.03794585168361664 2023-01-22 13:00:44.424520: step: 62/466, loss: 0.4988641142845154 2023-01-22 13:00:45.152494: step: 64/466, loss: 0.03297088295221329 2023-01-22 13:00:45.747204: step: 66/466, loss: 0.0036860518157482147 2023-01-22 13:00:46.384921: step: 68/466, loss: 0.05012097209692001 2023-01-22 13:00:46.978482: step: 70/466, loss: 0.007256614975631237 2023-01-22 13:00:47.499327: step: 72/466, loss: 0.012991432100534439 2023-01-22 13:00:48.098845: step: 74/466, loss: 0.7627309560775757 2023-01-22 13:00:48.786590: step: 76/466, loss: 0.06482483446598053 2023-01-22 13:00:49.312602: step: 78/466, loss: 0.02229459211230278 2023-01-22 13:00:49.937445: step: 80/466, loss: 0.011697539128363132 2023-01-22 13:00:50.509012: step: 82/466, loss: 0.046592358499765396 2023-01-22 13:00:51.093578: step: 84/466, loss: 0.019263656809926033 2023-01-22 13:00:51.714185: step: 86/466, loss: 0.015519551932811737 2023-01-22 13:00:52.284643: step: 88/466, loss: 0.010792257264256477 2023-01-22 13:00:52.870193: step: 90/466, loss: 0.047918882220983505 2023-01-22 13:00:53.514248: step: 92/466, loss: 0.035363685339689255 2023-01-22 13:00:54.108728: step: 94/466, loss: 0.005803626496344805 2023-01-22 13:00:54.718579: step: 96/466, loss: 0.0007789679802954197 2023-01-22 13:00:55.274991: step: 98/466, loss: 0.05806687846779823 2023-01-22 13:00:55.843309: step: 100/466, loss: 0.015525509603321552 2023-01-22 13:00:56.466680: step: 102/466, loss: 0.046151284128427505 2023-01-22 13:00:57.046078: step: 104/466, loss: 0.04594738781452179 2023-01-22 13:00:57.607736: step: 106/466, loss: 0.015765078365802765 2023-01-22 13:00:58.209653: step: 108/466, loss: 0.0011960859410464764 2023-01-22 13:00:58.871843: step: 110/466, loss: 0.014489513821899891 2023-01-22 13:00:59.544738: step: 112/466, loss: 0.10112947225570679 2023-01-22 13:01:00.213906: step: 114/466, loss: 0.03327832370996475 2023-01-22 13:01:00.904454: step: 116/466, loss: 0.022093651816248894 2023-01-22 13:01:01.525453: step: 118/466, loss: 0.050986457616090775 2023-01-22 13:01:02.192229: step: 120/466, loss: 0.053221650421619415 2023-01-22 13:01:02.755199: step: 122/466, loss: 0.03191741928458214 2023-01-22 13:01:03.355949: step: 124/466, loss: 0.01965634897351265 2023-01-22 13:01:03.947529: step: 126/466, loss: 0.02230476588010788 2023-01-22 13:01:04.546279: step: 128/466, loss: 0.005988352932035923 2023-01-22 13:01:05.114363: step: 130/466, loss: 0.0023371069692075253 2023-01-22 13:01:05.672065: step: 132/466, loss: 0.007428319193422794 2023-01-22 13:01:06.256315: step: 134/466, loss: 0.03449222072958946 2023-01-22 13:01:06.851388: step: 136/466, loss: 0.014719021506607533 2023-01-22 13:01:07.436941: step: 138/466, loss: 0.03975159302353859 2023-01-22 13:01:07.973344: step: 140/466, loss: 0.005082350689917803 2023-01-22 13:01:08.574928: step: 142/466, loss: 0.006860900670289993 2023-01-22 13:01:09.194608: step: 144/466, loss: 0.024904923513531685 2023-01-22 13:01:09.832986: step: 146/466, loss: 0.020591579377651215 2023-01-22 13:01:10.425278: step: 148/466, loss: 0.07017152011394501 2023-01-22 13:01:11.096406: step: 150/466, loss: 0.016340402886271477 2023-01-22 13:01:11.755064: step: 152/466, loss: 0.009680577553808689 2023-01-22 13:01:12.340707: step: 154/466, loss: 0.11456497013568878 2023-01-22 13:01:13.042670: step: 156/466, loss: 0.0032149055041372776 2023-01-22 13:01:13.646164: step: 158/466, loss: 0.002328538103029132 2023-01-22 13:01:14.282981: step: 160/466, loss: 0.04624420031905174 2023-01-22 13:01:14.817079: step: 162/466, loss: 0.0006145125371403992 2023-01-22 13:01:15.406696: step: 164/466, loss: 0.017932584509253502 2023-01-22 13:01:16.075706: step: 166/466, loss: 0.10638228803873062 2023-01-22 13:01:16.701365: step: 168/466, loss: 0.03491752967238426 2023-01-22 13:01:17.321373: step: 170/466, loss: 0.008974886499345303 2023-01-22 13:01:17.918365: step: 172/466, loss: 0.009993337094783783 2023-01-22 13:01:18.514795: step: 174/466, loss: 0.009299321100115776 2023-01-22 13:01:19.133601: step: 176/466, loss: 0.06450952589511871 2023-01-22 13:01:19.759610: step: 178/466, loss: 0.01272459328174591 2023-01-22 13:01:20.321975: step: 180/466, loss: 0.004704466089606285 2023-01-22 13:01:20.914894: step: 182/466, loss: 0.06017916649580002 2023-01-22 13:01:21.503766: step: 184/466, loss: 0.005661633796989918 2023-01-22 13:01:22.097330: step: 186/466, loss: 0.03322940692305565 2023-01-22 13:01:22.659013: step: 188/466, loss: 0.0073445746675133705 2023-01-22 13:01:23.247721: step: 190/466, loss: 0.013768017292022705 2023-01-22 13:01:23.882054: step: 192/466, loss: 0.01368219219148159 2023-01-22 13:01:24.453239: step: 194/466, loss: 0.00118536117952317 2023-01-22 13:01:25.050796: step: 196/466, loss: 0.021496085450053215 2023-01-22 13:01:25.659477: step: 198/466, loss: 0.04687000811100006 2023-01-22 13:01:26.296133: step: 200/466, loss: 0.05527500808238983 2023-01-22 13:01:26.910836: step: 202/466, loss: 0.021703608334064484 2023-01-22 13:01:27.556206: step: 204/466, loss: 0.03900735452771187 2023-01-22 13:01:28.162906: step: 206/466, loss: 0.015040869824588299 2023-01-22 13:01:28.811661: step: 208/466, loss: 0.006747271865606308 2023-01-22 13:01:29.445668: step: 210/466, loss: 0.05444791540503502 2023-01-22 13:01:30.062040: step: 212/466, loss: 0.01580660231411457 2023-01-22 13:01:30.641298: step: 214/466, loss: 0.0037571711000055075 2023-01-22 13:01:31.221640: step: 216/466, loss: 0.002167017897590995 2023-01-22 13:01:31.860977: step: 218/466, loss: 0.02093181200325489 2023-01-22 13:01:32.520870: step: 220/466, loss: 0.27529504895210266 2023-01-22 13:01:33.121427: step: 222/466, loss: 0.017809255048632622 2023-01-22 13:01:33.691748: step: 224/466, loss: 0.025749122723937035 2023-01-22 13:01:34.298542: step: 226/466, loss: 0.0021865046583116055 2023-01-22 13:01:34.880912: step: 228/466, loss: 0.004157186485826969 2023-01-22 13:01:35.556153: step: 230/466, loss: 0.051419783383607864 2023-01-22 13:01:36.157971: step: 232/466, loss: 0.5104377865791321 2023-01-22 13:01:36.830598: step: 234/466, loss: 0.014660846441984177 2023-01-22 13:01:37.447319: step: 236/466, loss: 0.016716402024030685 2023-01-22 13:01:38.003810: step: 238/466, loss: 0.00012194723240099847 2023-01-22 13:01:38.584394: step: 240/466, loss: 0.027347303926944733 2023-01-22 13:01:39.218246: step: 242/466, loss: 0.00809621810913086 2023-01-22 13:01:39.736415: step: 244/466, loss: 0.004353704862296581 2023-01-22 13:01:40.391583: step: 246/466, loss: 0.002220169873908162 2023-01-22 13:01:41.028274: step: 248/466, loss: 0.065149687230587 2023-01-22 13:01:41.634450: step: 250/466, loss: 0.0572744645178318 2023-01-22 13:01:42.275672: step: 252/466, loss: 0.008158638142049313 2023-01-22 13:01:42.937230: step: 254/466, loss: 0.00939987227320671 2023-01-22 13:01:43.539628: step: 256/466, loss: 0.021884001791477203 2023-01-22 13:01:44.076359: step: 258/466, loss: 0.006775291170924902 2023-01-22 13:01:44.673701: step: 260/466, loss: 0.08223210275173187 2023-01-22 13:01:45.253016: step: 262/466, loss: 0.011056998744606972 2023-01-22 13:01:45.856923: step: 264/466, loss: 0.011703469790518284 2023-01-22 13:01:46.501579: step: 266/466, loss: 0.01117135863751173 2023-01-22 13:01:47.124012: step: 268/466, loss: 0.031074855476617813 2023-01-22 13:01:47.735709: step: 270/466, loss: 0.023963337764143944 2023-01-22 13:01:48.395918: step: 272/466, loss: 0.008821115829050541 2023-01-22 13:01:48.999797: step: 274/466, loss: 0.03331343084573746 2023-01-22 13:01:49.588551: step: 276/466, loss: 0.0015307868598029017 2023-01-22 13:01:50.192991: step: 278/466, loss: 0.0017039136728271842 2023-01-22 13:01:50.831906: step: 280/466, loss: 0.08409566432237625 2023-01-22 13:01:51.420919: step: 282/466, loss: 0.016561800613999367 2023-01-22 13:01:52.015261: step: 284/466, loss: 0.010610692203044891 2023-01-22 13:01:52.567955: step: 286/466, loss: 0.001059519941918552 2023-01-22 13:01:53.114727: step: 288/466, loss: 0.03135376423597336 2023-01-22 13:01:53.700459: step: 290/466, loss: 0.012570686638355255 2023-01-22 13:01:54.250542: step: 292/466, loss: 0.05125534161925316 2023-01-22 13:01:54.971565: step: 294/466, loss: 0.04573095589876175 2023-01-22 13:01:55.589275: step: 296/466, loss: 0.0014932537451386452 2023-01-22 13:01:56.192770: step: 298/466, loss: 0.011165394447743893 2023-01-22 13:01:56.779748: step: 300/466, loss: 0.11039699614048004 2023-01-22 13:01:57.446176: step: 302/466, loss: 0.017728859558701515 2023-01-22 13:01:57.986079: step: 304/466, loss: 0.013740737922489643 2023-01-22 13:01:58.665686: step: 306/466, loss: 0.5609971284866333 2023-01-22 13:01:59.308277: step: 308/466, loss: 0.0003966326476074755 2023-01-22 13:01:59.867456: step: 310/466, loss: 0.11472980678081512 2023-01-22 13:02:00.448817: step: 312/466, loss: 0.02893056534230709 2023-01-22 13:02:01.002021: step: 314/466, loss: 0.001872989465482533 2023-01-22 13:02:01.634445: step: 316/466, loss: 3.7685582637786865 2023-01-22 13:02:02.252072: step: 318/466, loss: 0.05967605859041214 2023-01-22 13:02:02.871490: step: 320/466, loss: 0.022798847407102585 2023-01-22 13:02:03.502146: step: 322/466, loss: 0.057037223130464554 2023-01-22 13:02:04.039940: step: 324/466, loss: 0.011289509013295174 2023-01-22 13:02:04.588971: step: 326/466, loss: 0.13259345293045044 2023-01-22 13:02:05.223825: step: 328/466, loss: 0.0009833957301452756 2023-01-22 13:02:05.885338: step: 330/466, loss: 0.05492498725652695 2023-01-22 13:02:06.513125: step: 332/466, loss: 0.46318674087524414 2023-01-22 13:02:07.151066: step: 334/466, loss: 0.0014214838156476617 2023-01-22 13:02:07.724545: step: 336/466, loss: 0.02357303723692894 2023-01-22 13:02:08.267839: step: 338/466, loss: 0.006936745252460241 2023-01-22 13:02:08.866419: step: 340/466, loss: 0.07935026288032532 2023-01-22 13:02:09.468582: step: 342/466, loss: 0.0024470698554068804 2023-01-22 13:02:10.012326: step: 344/466, loss: 0.0981178805232048 2023-01-22 13:02:10.721763: step: 346/466, loss: 0.004784220829606056 2023-01-22 13:02:11.259754: step: 348/466, loss: 0.0003342062991578132 2023-01-22 13:02:11.835482: step: 350/466, loss: 0.0058012730441987514 2023-01-22 13:02:12.468720: step: 352/466, loss: 0.015377351082861423 2023-01-22 13:02:13.097014: step: 354/466, loss: 0.010469252243638039 2023-01-22 13:02:13.770225: step: 356/466, loss: 0.006357523147016764 2023-01-22 13:02:14.382155: step: 358/466, loss: 0.021507292985916138 2023-01-22 13:02:14.933420: step: 360/466, loss: 0.002699486678466201 2023-01-22 13:02:15.565413: step: 362/466, loss: 0.0266110859811306 2023-01-22 13:02:16.149790: step: 364/466, loss: 0.0008565335301682353 2023-01-22 13:02:16.786163: step: 366/466, loss: 0.012085853144526482 2023-01-22 13:02:17.368590: step: 368/466, loss: 0.0008071648189797997 2023-01-22 13:02:17.979684: step: 370/466, loss: 0.0019020200707018375 2023-01-22 13:02:18.626662: step: 372/466, loss: 0.014376146718859673 2023-01-22 13:02:19.228627: step: 374/466, loss: 0.2126854807138443 2023-01-22 13:02:19.812306: step: 376/466, loss: 0.004843392875045538 2023-01-22 13:02:20.464931: step: 378/466, loss: 0.020051002502441406 2023-01-22 13:02:21.141206: step: 380/466, loss: 0.022910239174962044 2023-01-22 13:02:21.735097: step: 382/466, loss: 0.02626357600092888 2023-01-22 13:02:22.313120: step: 384/466, loss: 0.01931600458920002 2023-01-22 13:02:22.888439: step: 386/466, loss: 0.0068773808889091015 2023-01-22 13:02:23.500639: step: 388/466, loss: 0.021012280136346817 2023-01-22 13:02:24.046830: step: 390/466, loss: 0.004145463462918997 2023-01-22 13:02:24.676400: step: 392/466, loss: 0.08756530284881592 2023-01-22 13:02:25.328625: step: 394/466, loss: 0.05061415582895279 2023-01-22 13:02:25.926842: step: 396/466, loss: 0.005754661746323109 2023-01-22 13:02:26.506051: step: 398/466, loss: 0.0050090826116502285 2023-01-22 13:02:27.122636: step: 400/466, loss: 0.03864866867661476 2023-01-22 13:02:27.694236: step: 402/466, loss: 0.005267775617539883 2023-01-22 13:02:28.330978: step: 404/466, loss: 0.028270401060581207 2023-01-22 13:02:28.985143: step: 406/466, loss: 0.2942807674407959 2023-01-22 13:02:29.641165: step: 408/466, loss: 0.1558419167995453 2023-01-22 13:02:30.293050: step: 410/466, loss: 0.0996171310544014 2023-01-22 13:02:30.912017: step: 412/466, loss: 0.01766573078930378 2023-01-22 13:02:31.697226: step: 414/466, loss: 0.0002705434162635356 2023-01-22 13:02:32.333765: step: 416/466, loss: 0.00040582052315585315 2023-01-22 13:02:33.053540: step: 418/466, loss: 0.021263068541884422 2023-01-22 13:02:33.612947: step: 420/466, loss: 0.11323844641447067 2023-01-22 13:02:34.219259: step: 422/466, loss: 0.01785622164607048 2023-01-22 13:02:34.903853: step: 424/466, loss: 0.00032024685060605407 2023-01-22 13:02:35.430973: step: 426/466, loss: 0.0005431880126707256 2023-01-22 13:02:36.037527: step: 428/466, loss: 0.024054253473877907 2023-01-22 13:02:36.695469: step: 430/466, loss: 0.002253231592476368 2023-01-22 13:02:37.287608: step: 432/466, loss: 0.020636066794395447 2023-01-22 13:02:37.864636: step: 434/466, loss: 0.00567647023126483 2023-01-22 13:02:38.493588: step: 436/466, loss: 0.13762988150119781 2023-01-22 13:02:39.116672: step: 438/466, loss: 0.06241016462445259 2023-01-22 13:02:39.733320: step: 440/466, loss: 0.0031003092881292105 2023-01-22 13:02:40.381700: step: 442/466, loss: 0.00021963377366773784 2023-01-22 13:02:41.034140: step: 444/466, loss: 0.21239763498306274 2023-01-22 13:02:41.697421: step: 446/466, loss: 0.024081414565443993 2023-01-22 13:02:42.331169: step: 448/466, loss: 0.01938778907060623 2023-01-22 13:02:42.920334: step: 450/466, loss: 0.004417332820594311 2023-01-22 13:02:43.567482: step: 452/466, loss: 0.00482999486848712 2023-01-22 13:02:44.190421: step: 454/466, loss: 0.016177594661712646 2023-01-22 13:02:44.764903: step: 456/466, loss: 0.01952294632792473 2023-01-22 13:02:45.332925: step: 458/466, loss: 0.035597063601017 2023-01-22 13:02:45.920711: step: 460/466, loss: 0.021008197218179703 2023-01-22 13:02:46.472518: step: 462/466, loss: 0.031786419451236725 2023-01-22 13:02:47.094387: step: 464/466, loss: 0.04171544685959816 2023-01-22 13:02:47.764438: step: 466/466, loss: 0.024207210168242455 2023-01-22 13:02:48.397457: step: 468/466, loss: 0.060788240283727646 2023-01-22 13:02:48.928349: step: 470/466, loss: 0.009493236429989338 2023-01-22 13:02:49.561771: step: 472/466, loss: 0.04282115772366524 2023-01-22 13:02:50.112631: step: 474/466, loss: 0.20088478922843933 2023-01-22 13:02:50.734677: step: 476/466, loss: 0.01010823342949152 2023-01-22 13:02:51.293139: step: 478/466, loss: 0.02495218999683857 2023-01-22 13:02:51.785426: step: 480/466, loss: 0.005852373316884041 2023-01-22 13:02:52.435380: step: 482/466, loss: 0.0020090469624847174 2023-01-22 13:02:53.031495: step: 484/466, loss: 0.003161755623295903 2023-01-22 13:02:53.613770: step: 486/466, loss: 0.024762656539678574 2023-01-22 13:02:54.223591: step: 488/466, loss: 0.04741264879703522 2023-01-22 13:02:54.868988: step: 490/466, loss: 0.02133660390973091 2023-01-22 13:02:55.496887: step: 492/466, loss: 0.013741686008870602 2023-01-22 13:02:56.066453: step: 494/466, loss: 0.010217435657978058 2023-01-22 13:02:56.762953: step: 496/466, loss: 0.0017479138914495707 2023-01-22 13:02:57.324171: step: 498/466, loss: 0.01496767345815897 2023-01-22 13:02:57.998990: step: 500/466, loss: 0.015532799065113068 2023-01-22 13:02:58.620252: step: 502/466, loss: 0.012100692838430405 2023-01-22 13:02:59.163822: step: 504/466, loss: 0.0010967212729156017 2023-01-22 13:02:59.737731: step: 506/466, loss: 0.014203607104718685 2023-01-22 13:03:00.349316: step: 508/466, loss: 0.010653822682797909 2023-01-22 13:03:00.920731: step: 510/466, loss: 0.013628825545310974 2023-01-22 13:03:01.536067: step: 512/466, loss: 0.060214050114154816 2023-01-22 13:03:02.146676: step: 514/466, loss: 0.006166969425976276 2023-01-22 13:03:02.677850: step: 516/466, loss: 0.00669153593480587 2023-01-22 13:03:03.240728: step: 518/466, loss: 0.04255734756588936 2023-01-22 13:03:03.864754: step: 520/466, loss: 0.06647557765245438 2023-01-22 13:03:04.527342: step: 522/466, loss: 0.005385304801166058 2023-01-22 13:03:05.128958: step: 524/466, loss: 1.0983834266662598 2023-01-22 13:03:05.741166: step: 526/466, loss: 0.039968132972717285 2023-01-22 13:03:06.369424: step: 528/466, loss: 0.03974110633134842 2023-01-22 13:03:06.955301: step: 530/466, loss: 0.01054446306079626 2023-01-22 13:03:07.555038: step: 532/466, loss: 0.018481917679309845 2023-01-22 13:03:08.120441: step: 534/466, loss: 0.018977802246809006 2023-01-22 13:03:08.687059: step: 536/466, loss: 0.009774553589522839 2023-01-22 13:03:09.303135: step: 538/466, loss: 0.001242406782694161 2023-01-22 13:03:09.882738: step: 540/466, loss: 0.000893936085049063 2023-01-22 13:03:10.492404: step: 542/466, loss: 0.012543848715722561 2023-01-22 13:03:11.133448: step: 544/466, loss: 0.0074572633020579815 2023-01-22 13:03:11.708710: step: 546/466, loss: 0.42661380767822266 2023-01-22 13:03:12.352855: step: 548/466, loss: 0.02388249896466732 2023-01-22 13:03:13.004869: step: 550/466, loss: 0.0061629582196474075 2023-01-22 13:03:13.567078: step: 552/466, loss: 0.011627927422523499 2023-01-22 13:03:14.150008: step: 554/466, loss: 0.06222614645957947 2023-01-22 13:03:14.797421: step: 556/466, loss: 0.018478328362107277 2023-01-22 13:03:15.421988: step: 558/466, loss: 0.055090226233005524 2023-01-22 13:03:16.054841: step: 560/466, loss: 0.022607017308473587 2023-01-22 13:03:16.658978: step: 562/466, loss: 0.001342977280728519 2023-01-22 13:03:17.248274: step: 564/466, loss: 0.009456566534936428 2023-01-22 13:03:17.825810: step: 566/466, loss: 0.03890547528862953 2023-01-22 13:03:18.479484: step: 568/466, loss: 0.008460449986159801 2023-01-22 13:03:19.085603: step: 570/466, loss: 0.0013223905116319656 2023-01-22 13:03:19.705539: step: 572/466, loss: 0.0388493649661541 2023-01-22 13:03:20.336199: step: 574/466, loss: 0.06376676261425018 2023-01-22 13:03:20.972285: step: 576/466, loss: 0.02470981702208519 2023-01-22 13:03:21.548912: step: 578/466, loss: 0.24580951035022736 2023-01-22 13:03:22.162580: step: 580/466, loss: 0.07626300305128098 2023-01-22 13:03:22.772277: step: 582/466, loss: 0.025210030376911163 2023-01-22 13:03:23.426006: step: 584/466, loss: 0.13735957443714142 2023-01-22 13:03:24.067765: step: 586/466, loss: 0.004883863963186741 2023-01-22 13:03:24.760938: step: 588/466, loss: 0.023837409913539886 2023-01-22 13:03:25.376998: step: 590/466, loss: 0.02724401466548443 2023-01-22 13:03:26.007059: step: 592/466, loss: 0.019942408427596092 2023-01-22 13:03:26.570143: step: 594/466, loss: 0.3258499801158905 2023-01-22 13:03:27.175548: step: 596/466, loss: 0.205764502286911 2023-01-22 13:03:27.795222: step: 598/466, loss: 0.017428552731871605 2023-01-22 13:03:28.461147: step: 600/466, loss: 0.019880976527929306 2023-01-22 13:03:29.011515: step: 602/466, loss: 0.0377473384141922 2023-01-22 13:03:29.655008: step: 604/466, loss: 7.953244494274259e-05 2023-01-22 13:03:30.230507: step: 606/466, loss: 0.07877268642187119 2023-01-22 13:03:30.821446: step: 608/466, loss: 0.009737065061926842 2023-01-22 13:03:31.469600: step: 610/466, loss: 0.004096858203411102 2023-01-22 13:03:32.123002: step: 612/466, loss: 0.001593309105373919 2023-01-22 13:03:32.753793: step: 614/466, loss: 0.005396370310336351 2023-01-22 13:03:33.384386: step: 616/466, loss: 0.0022211617324501276 2023-01-22 13:03:33.964773: step: 618/466, loss: 0.061748407781124115 2023-01-22 13:03:34.560923: step: 620/466, loss: 0.021056359633803368 2023-01-22 13:03:35.187840: step: 622/466, loss: 0.029573313891887665 2023-01-22 13:03:35.829443: step: 624/466, loss: 0.01509782113134861 2023-01-22 13:03:36.416417: step: 626/466, loss: 0.22090202569961548 2023-01-22 13:03:37.084999: step: 628/466, loss: 0.027898678556084633 2023-01-22 13:03:37.701025: step: 630/466, loss: 0.0019739815033972263 2023-01-22 13:03:38.307850: step: 632/466, loss: 0.012427431531250477 2023-01-22 13:03:38.918199: step: 634/466, loss: 0.005324557889252901 2023-01-22 13:03:39.583388: step: 636/466, loss: 0.5416416525840759 2023-01-22 13:03:40.191178: step: 638/466, loss: 0.02002260461449623 2023-01-22 13:03:40.737510: step: 640/466, loss: 0.0034390026703476906 2023-01-22 13:03:41.298247: step: 642/466, loss: 0.003750969422981143 2023-01-22 13:03:41.918004: step: 644/466, loss: 0.02836640365421772 2023-01-22 13:03:42.518509: step: 646/466, loss: 0.01306913048028946 2023-01-22 13:03:43.126146: step: 648/466, loss: 0.039452120661735535 2023-01-22 13:03:43.740905: step: 650/466, loss: 0.43644657731056213 2023-01-22 13:03:44.358524: step: 652/466, loss: 0.054574090987443924 2023-01-22 13:03:44.959513: step: 654/466, loss: 0.005646299570798874 2023-01-22 13:03:45.516672: step: 656/466, loss: 0.0025711979251354933 2023-01-22 13:03:46.162795: step: 658/466, loss: 0.13658203184604645 2023-01-22 13:03:46.748204: step: 660/466, loss: 0.011020969599485397 2023-01-22 13:03:47.337274: step: 662/466, loss: 0.12328854203224182 2023-01-22 13:03:47.933033: step: 664/466, loss: 0.2930150330066681 2023-01-22 13:03:48.566082: step: 666/466, loss: 0.006133504211902618 2023-01-22 13:03:49.180244: step: 668/466, loss: 0.005903858691453934 2023-01-22 13:03:49.795602: step: 670/466, loss: 0.018333403393626213 2023-01-22 13:03:50.503761: step: 672/466, loss: 0.0354229100048542 2023-01-22 13:03:51.083503: step: 674/466, loss: 0.025511808693408966 2023-01-22 13:03:51.686086: step: 676/466, loss: 0.01756656914949417 2023-01-22 13:03:52.231534: step: 678/466, loss: 0.030399370938539505 2023-01-22 13:03:52.848110: step: 680/466, loss: 0.04207872599363327 2023-01-22 13:03:53.414867: step: 682/466, loss: 0.026421351358294487 2023-01-22 13:03:54.094068: step: 684/466, loss: 0.006762126926332712 2023-01-22 13:03:54.706018: step: 686/466, loss: 0.020466167479753494 2023-01-22 13:03:55.387941: step: 688/466, loss: 0.00631640525534749 2023-01-22 13:03:56.001353: step: 690/466, loss: 0.04035324230790138 2023-01-22 13:03:56.553559: step: 692/466, loss: 0.009602941572666168 2023-01-22 13:03:57.120743: step: 694/466, loss: 0.03543735668063164 2023-01-22 13:03:57.693737: step: 696/466, loss: 0.12897473573684692 2023-01-22 13:03:58.371665: step: 698/466, loss: 0.002430747961625457 2023-01-22 13:03:58.964766: step: 700/466, loss: 0.10553860664367676 2023-01-22 13:03:59.535382: step: 702/466, loss: 0.2577919661998749 2023-01-22 13:04:00.132259: step: 704/466, loss: 0.04251670464873314 2023-01-22 13:04:00.734543: step: 706/466, loss: 0.019595172256231308 2023-01-22 13:04:01.390148: step: 708/466, loss: 0.008916772902011871 2023-01-22 13:04:01.968235: step: 710/466, loss: 0.04072237387299538 2023-01-22 13:04:02.653739: step: 712/466, loss: 0.08060120791196823 2023-01-22 13:04:03.207343: step: 714/466, loss: 0.015646446496248245 2023-01-22 13:04:03.925392: step: 716/466, loss: 0.04056132212281227 2023-01-22 13:04:04.539499: step: 718/466, loss: 0.056185342371463776 2023-01-22 13:04:05.302266: step: 720/466, loss: 0.01655575819313526 2023-01-22 13:04:05.895356: step: 722/466, loss: 0.04714022949337959 2023-01-22 13:04:06.431488: step: 724/466, loss: 0.008735047653317451 2023-01-22 13:04:06.999913: step: 726/466, loss: 0.05232086777687073 2023-01-22 13:04:07.632921: step: 728/466, loss: 0.013638571836054325 2023-01-22 13:04:08.269564: step: 730/466, loss: 0.020393308252096176 2023-01-22 13:04:08.862892: step: 732/466, loss: 0.037196703255176544 2023-01-22 13:04:09.424822: step: 734/466, loss: 0.0359979048371315 2023-01-22 13:04:09.995596: step: 736/466, loss: 0.036491766571998596 2023-01-22 13:04:10.622801: step: 738/466, loss: 1.5769621133804321 2023-01-22 13:04:11.289323: step: 740/466, loss: 0.013950319960713387 2023-01-22 13:04:11.909983: step: 742/466, loss: 0.13955432176589966 2023-01-22 13:04:12.514256: step: 744/466, loss: 0.014387154020369053 2023-01-22 13:04:13.158739: step: 746/466, loss: 0.025880424305796623 2023-01-22 13:04:13.739649: step: 748/466, loss: 0.008674003183841705 2023-01-22 13:04:14.300932: step: 750/466, loss: 0.002916875295341015 2023-01-22 13:04:14.960652: step: 752/466, loss: 0.015369150787591934 2023-01-22 13:04:15.628677: step: 754/466, loss: 0.09049924463033676 2023-01-22 13:04:16.235888: step: 756/466, loss: 0.009892329573631287 2023-01-22 13:04:16.870394: step: 758/466, loss: 0.02250661514699459 2023-01-22 13:04:17.461302: step: 760/466, loss: 0.019506581127643585 2023-01-22 13:04:18.029991: step: 762/466, loss: 0.015529129654169083 2023-01-22 13:04:18.674725: step: 764/466, loss: 0.004296987317502499 2023-01-22 13:04:19.310134: step: 766/466, loss: 0.05676477029919624 2023-01-22 13:04:19.926991: step: 768/466, loss: 0.01909170299768448 2023-01-22 13:04:20.568303: step: 770/466, loss: 0.07450731098651886 2023-01-22 13:04:21.156034: step: 772/466, loss: 0.03332969546318054 2023-01-22 13:04:21.809165: step: 774/466, loss: 0.048512302339076996 2023-01-22 13:04:22.385764: step: 776/466, loss: 0.009806359186768532 2023-01-22 13:04:23.000962: step: 778/466, loss: 0.10658646374940872 2023-01-22 13:04:23.566296: step: 780/466, loss: 0.02136125974357128 2023-01-22 13:04:24.241125: step: 782/466, loss: 0.02680668793618679 2023-01-22 13:04:24.840683: step: 784/466, loss: 0.01709206961095333 2023-01-22 13:04:25.520492: step: 786/466, loss: 0.0015258606290444732 2023-01-22 13:04:26.143732: step: 788/466, loss: 0.0022918814793229103 2023-01-22 13:04:26.716527: step: 790/466, loss: 0.04231948405504227 2023-01-22 13:04:27.278322: step: 792/466, loss: 0.0018199823098257184 2023-01-22 13:04:27.951511: step: 794/466, loss: 0.041555311530828476 2023-01-22 13:04:28.623878: step: 796/466, loss: 0.009676672518253326 2023-01-22 13:04:29.226336: step: 798/466, loss: 0.0991070494055748 2023-01-22 13:04:29.860357: step: 800/466, loss: 0.25320005416870117 2023-01-22 13:04:30.476454: step: 802/466, loss: 0.015998797491192818 2023-01-22 13:04:31.142412: step: 804/466, loss: 0.011345572769641876 2023-01-22 13:04:31.737863: step: 806/466, loss: 0.005489513278007507 2023-01-22 13:04:32.373774: step: 808/466, loss: 0.08279027789831161 2023-01-22 13:04:32.930218: step: 810/466, loss: 0.03269160911440849 2023-01-22 13:04:33.449642: step: 812/466, loss: 0.006571331061422825 2023-01-22 13:04:34.051036: step: 814/466, loss: 0.0014252661494538188 2023-01-22 13:04:34.680380: step: 816/466, loss: 0.012872443534433842 2023-01-22 13:04:35.286568: step: 818/466, loss: 0.403464138507843 2023-01-22 13:04:36.005792: step: 820/466, loss: 0.011302152648568153 2023-01-22 13:04:36.647762: step: 822/466, loss: 0.06163051724433899 2023-01-22 13:04:37.247402: step: 824/466, loss: 0.014406262896955013 2023-01-22 13:04:37.867376: step: 826/466, loss: 0.013885182328522205 2023-01-22 13:04:38.501189: step: 828/466, loss: 0.003926701378077269 2023-01-22 13:04:39.122544: step: 830/466, loss: 0.07288981229066849 2023-01-22 13:04:39.743514: step: 832/466, loss: 0.0074288509786129 2023-01-22 13:04:40.398617: step: 834/466, loss: 0.0028718686662614346 2023-01-22 13:04:41.010297: step: 836/466, loss: 0.0031497676391154528 2023-01-22 13:04:41.604482: step: 838/466, loss: 0.03677372261881828 2023-01-22 13:04:42.214244: step: 840/466, loss: 0.01764160580933094 2023-01-22 13:04:42.859727: step: 842/466, loss: 0.00905714463442564 2023-01-22 13:04:43.479467: step: 844/466, loss: 0.052184417843818665 2023-01-22 13:04:44.053425: step: 846/466, loss: 0.00874386541545391 2023-01-22 13:04:44.640971: step: 848/466, loss: 0.0046443939208984375 2023-01-22 13:04:45.238110: step: 850/466, loss: 0.01964038610458374 2023-01-22 13:04:45.876224: step: 852/466, loss: 0.04825638607144356 2023-01-22 13:04:46.434345: step: 854/466, loss: 0.007597595453262329 2023-01-22 13:04:47.039073: step: 856/466, loss: 0.03477509319782257 2023-01-22 13:04:47.634623: step: 858/466, loss: 0.0387188121676445 2023-01-22 13:04:48.204242: step: 860/466, loss: 0.714797854423523 2023-01-22 13:04:48.845878: step: 862/466, loss: 0.0008584187598899007 2023-01-22 13:04:49.460411: step: 864/466, loss: 0.12720586359500885 2023-01-22 13:04:50.105682: step: 866/466, loss: 0.03522484004497528 2023-01-22 13:04:50.733729: step: 868/466, loss: 0.0659174770116806 2023-01-22 13:04:51.271043: step: 870/466, loss: 0.0002512026985641569 2023-01-22 13:04:51.863861: step: 872/466, loss: 0.016449326649308205 2023-01-22 13:04:52.442404: step: 874/466, loss: 0.0061976853758096695 2023-01-22 13:04:53.001230: step: 876/466, loss: 0.01034632883965969 2023-01-22 13:04:53.600324: step: 878/466, loss: 0.024085231125354767 2023-01-22 13:04:54.239909: step: 880/466, loss: 0.04048647731542587 2023-01-22 13:04:54.822134: step: 882/466, loss: 0.00844142772257328 2023-01-22 13:04:55.486466: step: 884/466, loss: 0.01513928547501564 2023-01-22 13:04:56.116201: step: 886/466, loss: 0.3922380208969116 2023-01-22 13:04:56.715378: step: 888/466, loss: 0.058890506625175476 2023-01-22 13:04:57.340374: step: 890/466, loss: 0.011841786094009876 2023-01-22 13:04:57.985149: step: 892/466, loss: 0.00801936350762844 2023-01-22 13:04:58.545895: step: 894/466, loss: 0.021403668448328972 2023-01-22 13:04:59.147151: step: 896/466, loss: 0.009452925063669682 2023-01-22 13:04:59.768388: step: 898/466, loss: 0.005652129650115967 2023-01-22 13:05:00.351289: step: 900/466, loss: 0.058477722108364105 2023-01-22 13:05:01.006977: step: 902/466, loss: 0.018946265801787376 2023-01-22 13:05:01.626431: step: 904/466, loss: 0.07883594185113907 2023-01-22 13:05:02.252253: step: 906/466, loss: 0.011059698648750782 2023-01-22 13:05:02.837956: step: 908/466, loss: 0.0018178574973717332 2023-01-22 13:05:03.429384: step: 910/466, loss: 0.012226266786456108 2023-01-22 13:05:04.152961: step: 912/466, loss: 0.023213017731904984 2023-01-22 13:05:04.743413: step: 914/466, loss: 0.04142146185040474 2023-01-22 13:05:05.282566: step: 916/466, loss: 0.18669646978378296 2023-01-22 13:05:05.938015: step: 918/466, loss: 0.002439426491037011 2023-01-22 13:05:06.562277: step: 920/466, loss: 0.0272270068526268 2023-01-22 13:05:07.076416: step: 922/466, loss: 0.13135747611522675 2023-01-22 13:05:07.732200: step: 924/466, loss: 0.0030987828504294157 2023-01-22 13:05:08.312041: step: 926/466, loss: 0.017529543489217758 2023-01-22 13:05:08.873782: step: 928/466, loss: 0.02338702231645584 2023-01-22 13:05:09.466124: step: 930/466, loss: 0.009364855475723743 2023-01-22 13:05:10.077230: step: 932/466, loss: 0.07453788071870804 ================================================== Loss: 0.064 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2981798373522139, 'r': 0.3457075533628134, 'f1': 0.3201895968755758}, 'combined': 0.2359291766451611, 'epoch': 28} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3377330832269227, 'r': 0.33159779365273123, 'f1': 0.33463731947844366}, 'combined': 0.22193563157119578, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2909959676097433, 'r': 0.28493355161787365, 'f1': 0.28793285216121967}, 'combined': 0.19195523477414644, 'epoch': 28} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3466204119107148, 'r': 0.30404247203932944, 'f1': 0.3239383388732395}, 'combined': 0.21141238958042993, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28872658286961156, 'r': 0.34406127901729805, 'f1': 0.31397453513786333}, 'combined': 0.23134965747000455, 'epoch': 28} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.33022509810092404, 'r': 0.3190842859677614, 'f1': 0.324559115335444}, 'combined': 0.21525164125874005, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24255952380952378, 'r': 0.38809523809523805, 'f1': 0.29853479853479853}, 'combined': 0.199023199023199, 'epoch': 28} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4625, 'r': 0.40217391304347827, 'f1': 0.43023255813953487}, 'combined': 0.28682170542635654, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29347826086956524, 'r': 0.23275862068965517, 'f1': 0.25961538461538464}, 'combined': 0.17307692307692307, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29515095352204085, 'r': 0.3399556523489161, 'f1': 0.3159728902784459}, 'combined': 0.23282212967885485, 'epoch': 26} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32557465157002546, 'r': 0.326702182830874, 'f1': 0.3261374426704058}, 'combined': 0.2162984075741551, 'epoch': 26} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39473684210526316, 'r': 0.25862068965517243, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 26} ****************************** Epoch: 29 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:07:35.961570: step: 2/466, loss: 0.04386208578944206 2023-01-22 13:07:36.555436: step: 4/466, loss: 0.012072478421032429 2023-01-22 13:07:37.240389: step: 6/466, loss: 0.005538816098123789 2023-01-22 13:07:37.784487: step: 8/466, loss: 0.002927206689491868 2023-01-22 13:07:38.430672: step: 10/466, loss: 0.0018422355642542243 2023-01-22 13:07:39.004841: step: 12/466, loss: 0.006833975203335285 2023-01-22 13:07:39.599603: step: 14/466, loss: 0.7979661822319031 2023-01-22 13:07:40.314073: step: 16/466, loss: 2.504417657852173 2023-01-22 13:07:40.992204: step: 18/466, loss: 0.0023924887645989656 2023-01-22 13:07:41.549484: step: 20/466, loss: 0.03579749912023544 2023-01-22 13:07:42.230683: step: 22/466, loss: 0.1898466944694519 2023-01-22 13:07:42.816707: step: 24/466, loss: 0.008246527053415775 2023-01-22 13:07:43.381417: step: 26/466, loss: 0.014673544093966484 2023-01-22 13:07:43.959963: step: 28/466, loss: 0.012497391551733017 2023-01-22 13:07:44.566139: step: 30/466, loss: 0.02695520780980587 2023-01-22 13:07:45.089057: step: 32/466, loss: 0.004595303442329168 2023-01-22 13:07:45.658098: step: 34/466, loss: 0.005384698044508696 2023-01-22 13:07:46.303074: step: 36/466, loss: 0.029108962044119835 2023-01-22 13:07:46.877851: step: 38/466, loss: 0.00017507233133073896 2023-01-22 13:07:47.502152: step: 40/466, loss: 0.0008183540776371956 2023-01-22 13:07:48.064554: step: 42/466, loss: 0.2459452599287033 2023-01-22 13:07:48.601521: step: 44/466, loss: 0.021580185741186142 2023-01-22 13:07:49.220193: step: 46/466, loss: 0.05248536914587021 2023-01-22 13:07:49.784563: step: 48/466, loss: 0.034506577998399734 2023-01-22 13:07:50.370949: step: 50/466, loss: 0.0018523776670917869 2023-01-22 13:07:50.953239: step: 52/466, loss: 0.04379947483539581 2023-01-22 13:07:51.565906: step: 54/466, loss: 0.07555657625198364 2023-01-22 13:07:52.141101: step: 56/466, loss: 0.05783117190003395 2023-01-22 13:07:52.732138: step: 58/466, loss: 0.446468323469162 2023-01-22 13:07:53.362866: step: 60/466, loss: 0.0647222027182579 2023-01-22 13:07:53.992556: step: 62/466, loss: 0.019346090033650398 2023-01-22 13:07:54.659209: step: 64/466, loss: 0.07000437378883362 2023-01-22 13:07:55.253762: step: 66/466, loss: 0.07137604802846909 2023-01-22 13:07:55.823578: step: 68/466, loss: 0.005074875894933939 2023-01-22 13:07:56.439483: step: 70/466, loss: 0.01667775772511959 2023-01-22 13:07:57.034976: step: 72/466, loss: 0.04960961639881134 2023-01-22 13:07:57.581393: step: 74/466, loss: 0.03634113445878029 2023-01-22 13:07:58.188068: step: 76/466, loss: 0.009740367531776428 2023-01-22 13:07:58.750436: step: 78/466, loss: 0.029107673093676567 2023-01-22 13:07:59.328436: step: 80/466, loss: 0.044322799891233444 2023-01-22 13:07:59.893618: step: 82/466, loss: 0.0037913850974291563 2023-01-22 13:08:00.536403: step: 84/466, loss: 0.0012504170881584287 2023-01-22 13:08:01.116632: step: 86/466, loss: 0.01192387379705906 2023-01-22 13:08:01.696434: step: 88/466, loss: 0.013240060769021511 2023-01-22 13:08:02.305421: step: 90/466, loss: 0.01812347024679184 2023-01-22 13:08:02.929023: step: 92/466, loss: 0.01432541012763977 2023-01-22 13:08:03.535684: step: 94/466, loss: 0.005916904658079147 2023-01-22 13:08:04.129337: step: 96/466, loss: 0.19921573996543884 2023-01-22 13:08:04.657252: step: 98/466, loss: 0.0003574567672330886 2023-01-22 13:08:05.290418: step: 100/466, loss: 0.0010081242071464658 2023-01-22 13:08:05.853510: step: 102/466, loss: 0.00035429472336545587 2023-01-22 13:08:06.483088: step: 104/466, loss: 0.004386215470731258 2023-01-22 13:08:07.095328: step: 106/466, loss: 0.08126308768987656 2023-01-22 13:08:07.671302: step: 108/466, loss: 0.30261746048927307 2023-01-22 13:08:08.280980: step: 110/466, loss: 0.0006659993669018149 2023-01-22 13:08:08.868696: step: 112/466, loss: 0.7788109183311462 2023-01-22 13:08:09.455287: step: 114/466, loss: 0.002139084041118622 2023-01-22 13:08:10.137599: step: 116/466, loss: 0.04724963381886482 2023-01-22 13:08:10.760958: step: 118/466, loss: 0.08716720342636108 2023-01-22 13:08:11.406777: step: 120/466, loss: 0.007712248712778091 2023-01-22 13:08:11.988527: step: 122/466, loss: 0.019319266080856323 2023-01-22 13:08:12.552410: step: 124/466, loss: 0.002920225029811263 2023-01-22 13:08:13.184436: step: 126/466, loss: 0.044984228909015656 2023-01-22 13:08:13.742379: step: 128/466, loss: 0.242207333445549 2023-01-22 13:08:14.293657: step: 130/466, loss: 0.1114872545003891 2023-01-22 13:08:14.938537: step: 132/466, loss: 0.015735222026705742 2023-01-22 13:08:15.600453: step: 134/466, loss: 0.04876323789358139 2023-01-22 13:08:16.244239: step: 136/466, loss: 0.003588942578062415 2023-01-22 13:08:16.863452: step: 138/466, loss: 0.015752285718917847 2023-01-22 13:08:17.485234: step: 140/466, loss: 0.02088429592549801 2023-01-22 13:08:18.106060: step: 142/466, loss: 0.05881446972489357 2023-01-22 13:08:18.667681: step: 144/466, loss: 0.034169264137744904 2023-01-22 13:08:19.263290: step: 146/466, loss: 0.016726719215512276 2023-01-22 13:08:19.870319: step: 148/466, loss: 0.15403063595294952 2023-01-22 13:08:20.475525: step: 150/466, loss: 0.0009215656900778413 2023-01-22 13:08:21.120513: step: 152/466, loss: 0.002376297488808632 2023-01-22 13:08:21.696371: step: 154/466, loss: 0.0004850882978644222 2023-01-22 13:08:22.278569: step: 156/466, loss: 0.0015028626658022404 2023-01-22 13:08:22.862231: step: 158/466, loss: 0.006702759303152561 2023-01-22 13:08:23.481781: step: 160/466, loss: 0.013435509987175465 2023-01-22 13:08:24.019790: step: 162/466, loss: 0.08299136906862259 2023-01-22 13:08:24.590222: step: 164/466, loss: 0.008563477545976639 2023-01-22 13:08:25.195100: step: 166/466, loss: 0.018727490678429604 2023-01-22 13:08:25.890569: step: 168/466, loss: 0.00214596395380795 2023-01-22 13:08:26.506893: step: 170/466, loss: 0.005763393361121416 2023-01-22 13:08:27.106154: step: 172/466, loss: 0.011483149603009224 2023-01-22 13:08:27.701865: step: 174/466, loss: 0.0037359632551670074 2023-01-22 13:08:28.379704: step: 176/466, loss: 0.0036448759492486715 2023-01-22 13:08:28.966133: step: 178/466, loss: 0.000318721286021173 2023-01-22 13:08:29.544322: step: 180/466, loss: 0.01972508803009987 2023-01-22 13:08:30.204617: step: 182/466, loss: 0.03576623275876045 2023-01-22 13:08:30.805563: step: 184/466, loss: 0.0046725524589419365 2023-01-22 13:08:31.408878: step: 186/466, loss: 0.017895927652716637 2023-01-22 13:08:32.033341: step: 188/466, loss: 0.01983766257762909 2023-01-22 13:08:32.744019: step: 190/466, loss: 0.006723640486598015 2023-01-22 13:08:33.325993: step: 192/466, loss: 0.006670257076621056 2023-01-22 13:08:33.890195: step: 194/466, loss: 0.008300933986902237 2023-01-22 13:08:34.514354: step: 196/466, loss: 0.010337584652006626 2023-01-22 13:08:35.137784: step: 198/466, loss: 0.00903736799955368 2023-01-22 13:08:35.697044: step: 200/466, loss: 0.016851140186190605 2023-01-22 13:08:36.319552: step: 202/466, loss: 0.03792807459831238 2023-01-22 13:08:37.026272: step: 204/466, loss: 0.02895973064005375 2023-01-22 13:08:37.665276: step: 206/466, loss: 0.007502868305891752 2023-01-22 13:08:38.263851: step: 208/466, loss: 0.031056923791766167 2023-01-22 13:08:38.857963: step: 210/466, loss: 0.013935143128037453 2023-01-22 13:08:39.442919: step: 212/466, loss: 0.002155495807528496 2023-01-22 13:08:40.063554: step: 214/466, loss: 0.01674588769674301 2023-01-22 13:08:40.694049: step: 216/466, loss: 0.0019156233174726367 2023-01-22 13:08:41.269691: step: 218/466, loss: 0.08050195872783661 2023-01-22 13:08:41.795172: step: 220/466, loss: 0.0023728234227746725 2023-01-22 13:08:42.357117: step: 222/466, loss: 0.11334630846977234 2023-01-22 13:08:42.963195: step: 224/466, loss: 0.0033581804018467665 2023-01-22 13:08:43.559736: step: 226/466, loss: 0.004865583032369614 2023-01-22 13:08:44.198613: step: 228/466, loss: 0.1188046932220459 2023-01-22 13:08:44.806833: step: 230/466, loss: 0.017414169386029243 2023-01-22 13:08:45.398183: step: 232/466, loss: 0.0009787804447114468 2023-01-22 13:08:45.937840: step: 234/466, loss: 0.0036582364700734615 2023-01-22 13:08:46.501809: step: 236/466, loss: 0.007959416136145592 2023-01-22 13:08:47.070989: step: 238/466, loss: 0.02049945294857025 2023-01-22 13:08:47.643874: step: 240/466, loss: 0.0003932398685719818 2023-01-22 13:08:48.259642: step: 242/466, loss: 0.015090403147041798 2023-01-22 13:08:48.843073: step: 244/466, loss: 0.011627012863755226 2023-01-22 13:08:49.415756: step: 246/466, loss: 0.053877320140600204 2023-01-22 13:08:50.024836: step: 248/466, loss: 0.019046692177653313 2023-01-22 13:08:50.597631: step: 250/466, loss: 0.0024457424879074097 2023-01-22 13:08:51.160330: step: 252/466, loss: 0.01016867347061634 2023-01-22 13:08:51.752974: step: 254/466, loss: 0.001433478551916778 2023-01-22 13:08:52.374412: step: 256/466, loss: 0.023510020226240158 2023-01-22 13:08:52.936924: step: 258/466, loss: 0.0041503384709358215 2023-01-22 13:08:53.494038: step: 260/466, loss: 0.0030759386718273163 2023-01-22 13:08:54.016656: step: 262/466, loss: 0.05536990612745285 2023-01-22 13:08:54.814505: step: 264/466, loss: 0.0028465837240219116 2023-01-22 13:08:55.442958: step: 266/466, loss: 0.0016013040440157056 2023-01-22 13:08:56.011707: step: 268/466, loss: 0.032356321811676025 2023-01-22 13:08:56.577829: step: 270/466, loss: 0.029682258144021034 2023-01-22 13:08:57.188023: step: 272/466, loss: 0.0367642343044281 2023-01-22 13:08:57.860648: step: 274/466, loss: 0.06313731521368027 2023-01-22 13:08:58.410802: step: 276/466, loss: 0.0310556311160326 2023-01-22 13:08:58.960263: step: 278/466, loss: 0.0056461249478161335 2023-01-22 13:08:59.569077: step: 280/466, loss: 0.00848975870758295 2023-01-22 13:09:00.157535: step: 282/466, loss: 0.28914815187454224 2023-01-22 13:09:00.761670: step: 284/466, loss: 0.014305856078863144 2023-01-22 13:09:01.338732: step: 286/466, loss: 0.01340281218290329 2023-01-22 13:09:01.919788: step: 288/466, loss: 0.32226353883743286 2023-01-22 13:09:02.624009: step: 290/466, loss: 0.026695629581809044 2023-01-22 13:09:03.218956: step: 292/466, loss: 0.1655883491039276 2023-01-22 13:09:03.766746: step: 294/466, loss: 0.0001594788918737322 2023-01-22 13:09:04.380928: step: 296/466, loss: 0.08155865222215652 2023-01-22 13:09:05.033914: step: 298/466, loss: 0.008540213108062744 2023-01-22 13:09:05.717679: step: 300/466, loss: 0.0696592628955841 2023-01-22 13:09:06.369968: step: 302/466, loss: 0.002189208986237645 2023-01-22 13:09:06.976053: step: 304/466, loss: 0.33625826239585876 2023-01-22 13:09:07.517232: step: 306/466, loss: 0.009354050271213055 2023-01-22 13:09:08.218513: step: 308/466, loss: 0.022616852074861526 2023-01-22 13:09:08.803740: step: 310/466, loss: 0.0198007021099329 2023-01-22 13:09:09.436686: step: 312/466, loss: 0.012987097725272179 2023-01-22 13:09:10.029458: step: 314/466, loss: 0.01676531694829464 2023-01-22 13:09:10.611929: step: 316/466, loss: 0.0025543151423335075 2023-01-22 13:09:11.210449: step: 318/466, loss: 0.3158953785896301 2023-01-22 13:09:11.956394: step: 320/466, loss: 0.1547013521194458 2023-01-22 13:09:12.570037: step: 322/466, loss: 0.010139084421098232 2023-01-22 13:09:13.145482: step: 324/466, loss: 0.014096586033701897 2023-01-22 13:09:13.770227: step: 326/466, loss: 0.012834510765969753 2023-01-22 13:09:14.334458: step: 328/466, loss: 0.02946978434920311 2023-01-22 13:09:14.919587: step: 330/466, loss: 0.00866196770220995 2023-01-22 13:09:15.502970: step: 332/466, loss: 0.03772924840450287 2023-01-22 13:09:16.147088: step: 334/466, loss: 0.015611520037055016 2023-01-22 13:09:16.787632: step: 336/466, loss: 0.010216974653303623 2023-01-22 13:09:17.409054: step: 338/466, loss: 0.010417732410132885 2023-01-22 13:09:17.984576: step: 340/466, loss: 0.00566525012254715 2023-01-22 13:09:18.618136: step: 342/466, loss: 0.0067802900448441505 2023-01-22 13:09:19.251776: step: 344/466, loss: 0.030475255101919174 2023-01-22 13:09:19.879551: step: 346/466, loss: 0.05227546766400337 2023-01-22 13:09:20.494238: step: 348/466, loss: 0.6098231673240662 2023-01-22 13:09:21.078214: step: 350/466, loss: 0.011939155869185925 2023-01-22 13:09:21.737515: step: 352/466, loss: 0.010138709098100662 2023-01-22 13:09:22.392958: step: 354/466, loss: 0.3563244640827179 2023-01-22 13:09:22.979429: step: 356/466, loss: 0.021631816402077675 2023-01-22 13:09:23.657275: step: 358/466, loss: 0.028252137824892998 2023-01-22 13:09:24.278975: step: 360/466, loss: 0.02579502761363983 2023-01-22 13:09:24.979988: step: 362/466, loss: 1.4603418111801147 2023-01-22 13:09:25.512831: step: 364/466, loss: 0.005669127218425274 2023-01-22 13:09:26.108662: step: 366/466, loss: 0.026713905856013298 2023-01-22 13:09:26.723701: step: 368/466, loss: 0.002620895393192768 2023-01-22 13:09:27.313207: step: 370/466, loss: 0.02652410790324211 2023-01-22 13:09:27.941118: step: 372/466, loss: 0.04273194819688797 2023-01-22 13:09:28.658210: step: 374/466, loss: 0.023813379928469658 2023-01-22 13:09:29.257194: step: 376/466, loss: 0.02738260105252266 2023-01-22 13:09:29.901198: step: 378/466, loss: 0.02079232968389988 2023-01-22 13:09:30.470456: step: 380/466, loss: 0.0023200856521725655 2023-01-22 13:09:31.086619: step: 382/466, loss: 0.009450546465814114 2023-01-22 13:09:31.635427: step: 384/466, loss: 0.0009971472900360823 2023-01-22 13:09:32.364606: step: 386/466, loss: 0.06446226686239243 2023-01-22 13:09:32.929077: step: 388/466, loss: 0.03794258087873459 2023-01-22 13:09:33.588581: step: 390/466, loss: 0.0006017423584125936 2023-01-22 13:09:34.172888: step: 392/466, loss: 0.00589497247710824 2023-01-22 13:09:34.724339: step: 394/466, loss: 0.00412049749866128 2023-01-22 13:09:35.373182: step: 396/466, loss: 0.010548084042966366 2023-01-22 13:09:35.974940: step: 398/466, loss: 0.1052076667547226 2023-01-22 13:09:36.581606: step: 400/466, loss: 0.005869503598660231 2023-01-22 13:09:37.180729: step: 402/466, loss: 0.004526306409388781 2023-01-22 13:09:37.779885: step: 404/466, loss: 0.06651441007852554 2023-01-22 13:09:38.370512: step: 406/466, loss: 0.00849270448088646 2023-01-22 13:09:38.922659: step: 408/466, loss: 0.0012003247393295169 2023-01-22 13:09:39.560354: step: 410/466, loss: 0.03083435632288456 2023-01-22 13:09:40.212226: step: 412/466, loss: 0.04964280128479004 2023-01-22 13:09:40.821770: step: 414/466, loss: 0.014443082734942436 2023-01-22 13:09:41.427925: step: 416/466, loss: 0.0007269966299645603 2023-01-22 13:09:42.072678: step: 418/466, loss: 0.017137521877884865 2023-01-22 13:09:42.682543: step: 420/466, loss: 0.015335258096456528 2023-01-22 13:09:43.309802: step: 422/466, loss: 0.008419616147875786 2023-01-22 13:09:43.912181: step: 424/466, loss: 0.0051277210004627705 2023-01-22 13:09:44.582438: step: 426/466, loss: 0.013941477052867413 2023-01-22 13:09:45.185146: step: 428/466, loss: 0.017742734402418137 2023-01-22 13:09:45.866041: step: 430/466, loss: 0.005647761281579733 2023-01-22 13:09:46.474025: step: 432/466, loss: 0.004662233404815197 2023-01-22 13:09:47.141409: step: 434/466, loss: 1.107483148574829 2023-01-22 13:09:47.762730: step: 436/466, loss: 0.014265783131122589 2023-01-22 13:09:48.412336: step: 438/466, loss: 1.2633209228515625 2023-01-22 13:09:49.017215: step: 440/466, loss: 0.0201712679117918 2023-01-22 13:09:49.548645: step: 442/466, loss: 0.014451676979660988 2023-01-22 13:09:50.158614: step: 444/466, loss: 0.13905183970928192 2023-01-22 13:09:50.812397: step: 446/466, loss: 0.034704096615314484 2023-01-22 13:09:51.364696: step: 448/466, loss: 0.012170841917395592 2023-01-22 13:09:51.947852: step: 450/466, loss: 0.08967079222202301 2023-01-22 13:09:52.583078: step: 452/466, loss: 0.008854460902512074 2023-01-22 13:09:53.171007: step: 454/466, loss: 0.006327802315354347 2023-01-22 13:09:53.799453: step: 456/466, loss: 0.31326547265052795 2023-01-22 13:09:54.408576: step: 458/466, loss: 0.001508050598204136 2023-01-22 13:09:54.994678: step: 460/466, loss: 0.008478164672851562 2023-01-22 13:09:55.549429: step: 462/466, loss: 0.015076616778969765 2023-01-22 13:09:56.154495: step: 464/466, loss: 0.018953507766127586 2023-01-22 13:09:56.748165: step: 466/466, loss: 0.9736811518669128 2023-01-22 13:09:57.381993: step: 468/466, loss: 0.029113473370671272 2023-01-22 13:09:58.025689: step: 470/466, loss: 0.031205737963318825 2023-01-22 13:09:58.598237: step: 472/466, loss: 0.0006522354669868946 2023-01-22 13:09:59.193659: step: 474/466, loss: 0.0013683760771527886 2023-01-22 13:09:59.791568: step: 476/466, loss: 0.0010390589013695717 2023-01-22 13:10:00.341754: step: 478/466, loss: 0.029676884412765503 2023-01-22 13:10:00.952970: step: 480/466, loss: 0.11172933876514435 2023-01-22 13:10:01.601938: step: 482/466, loss: 0.1738106608390808 2023-01-22 13:10:02.231443: step: 484/466, loss: 0.013176539912819862 2023-01-22 13:10:02.904478: step: 486/466, loss: 0.03815144672989845 2023-01-22 13:10:03.521685: step: 488/466, loss: 0.15745548903942108 2023-01-22 13:10:04.084395: step: 490/466, loss: 0.8841054439544678 2023-01-22 13:10:04.730266: step: 492/466, loss: 0.17422613501548767 2023-01-22 13:10:05.330243: step: 494/466, loss: 0.0311062540858984 2023-01-22 13:10:05.970373: step: 496/466, loss: 0.47466355562210083 2023-01-22 13:10:06.627077: step: 498/466, loss: 0.004679899197071791 2023-01-22 13:10:07.242047: step: 500/466, loss: 0.0400448776781559 2023-01-22 13:10:07.832554: step: 502/466, loss: 0.08364969491958618 2023-01-22 13:10:08.447109: step: 504/466, loss: 0.015947094187140465 2023-01-22 13:10:09.074354: step: 506/466, loss: 0.04322103038430214 2023-01-22 13:10:09.663855: step: 508/466, loss: 0.09971942752599716 2023-01-22 13:10:10.291449: step: 510/466, loss: 0.33148303627967834 2023-01-22 13:10:10.990949: step: 512/466, loss: 0.014690735377371311 2023-01-22 13:10:11.600183: step: 514/466, loss: 0.012732322327792645 2023-01-22 13:10:12.271198: step: 516/466, loss: 0.012350421398878098 2023-01-22 13:10:12.855224: step: 518/466, loss: 0.031112931668758392 2023-01-22 13:10:13.489169: step: 520/466, loss: 0.004019064828753471 2023-01-22 13:10:14.046754: step: 522/466, loss: 0.0008966823806986213 2023-01-22 13:10:14.602859: step: 524/466, loss: 0.03462705761194229 2023-01-22 13:10:15.237551: step: 526/466, loss: 0.0080671152099967 2023-01-22 13:10:15.834003: step: 528/466, loss: 0.005421999841928482 2023-01-22 13:10:16.475743: step: 530/466, loss: 0.01156659610569477 2023-01-22 13:10:17.141500: step: 532/466, loss: 0.002874686848372221 2023-01-22 13:10:17.694303: step: 534/466, loss: 0.004489653278142214 2023-01-22 13:10:18.263104: step: 536/466, loss: 0.024734828621149063 2023-01-22 13:10:18.833832: step: 538/466, loss: 0.009555038064718246 2023-01-22 13:10:19.432675: step: 540/466, loss: 0.02235630340874195 2023-01-22 13:10:20.118397: step: 542/466, loss: 0.03142331540584564 2023-01-22 13:10:20.733955: step: 544/466, loss: 0.010514368303120136 2023-01-22 13:10:21.412165: step: 546/466, loss: 0.006072200834751129 2023-01-22 13:10:22.000193: step: 548/466, loss: 0.2278628945350647 2023-01-22 13:10:22.567980: step: 550/466, loss: 0.005550784058868885 2023-01-22 13:10:23.178413: step: 552/466, loss: 0.029451590031385422 2023-01-22 13:10:23.831036: step: 554/466, loss: 0.022609373554587364 2023-01-22 13:10:24.426700: step: 556/466, loss: 0.029526591300964355 2023-01-22 13:10:24.996017: step: 558/466, loss: 0.009895593859255314 2023-01-22 13:10:25.647452: step: 560/466, loss: 0.009355615824460983 2023-01-22 13:10:26.255649: step: 562/466, loss: 3.3665573596954346 2023-01-22 13:10:26.855821: step: 564/466, loss: 0.090004563331604 2023-01-22 13:10:27.494803: step: 566/466, loss: 0.0005413692560978234 2023-01-22 13:10:28.119809: step: 568/466, loss: 0.05639300495386124 2023-01-22 13:10:28.770931: step: 570/466, loss: 0.011769573204219341 2023-01-22 13:10:29.390838: step: 572/466, loss: 0.017560189589858055 2023-01-22 13:10:30.018883: step: 574/466, loss: 0.01766313426196575 2023-01-22 13:10:30.568019: step: 576/466, loss: 0.0012013798113912344 2023-01-22 13:10:31.122936: step: 578/466, loss: 0.005746868904680014 2023-01-22 13:10:31.709120: step: 580/466, loss: 0.0014637453714385629 2023-01-22 13:10:32.362005: step: 582/466, loss: 0.07036874443292618 2023-01-22 13:10:33.017080: step: 584/466, loss: 0.03070271760225296 2023-01-22 13:10:33.623880: step: 586/466, loss: 0.005935574881732464 2023-01-22 13:10:34.300789: step: 588/466, loss: 0.04731940105557442 2023-01-22 13:10:34.876289: step: 590/466, loss: 0.1229914054274559 2023-01-22 13:10:35.515985: step: 592/466, loss: 0.028384150937199593 2023-01-22 13:10:36.102566: step: 594/466, loss: 0.0725310817360878 2023-01-22 13:10:36.738626: step: 596/466, loss: 0.0028359137941151857 2023-01-22 13:10:37.392522: step: 598/466, loss: 0.03283404931426048 2023-01-22 13:10:38.004879: step: 600/466, loss: 0.007365007419139147 2023-01-22 13:10:38.598105: step: 602/466, loss: 0.0025631687603890896 2023-01-22 13:10:39.225069: step: 604/466, loss: 0.011097467504441738 2023-01-22 13:10:39.849383: step: 606/466, loss: 0.0015583861386403441 2023-01-22 13:10:40.430889: step: 608/466, loss: 0.1234859973192215 2023-01-22 13:10:41.056355: step: 610/466, loss: 0.1035982072353363 2023-01-22 13:10:41.594023: step: 612/466, loss: 0.006027908064424992 2023-01-22 13:10:42.147977: step: 614/466, loss: 0.030623938888311386 2023-01-22 13:10:42.751188: step: 616/466, loss: 0.040002647787332535 2023-01-22 13:10:43.337357: step: 618/466, loss: 0.0009975489228963852 2023-01-22 13:10:43.947259: step: 620/466, loss: 0.023999236524105072 2023-01-22 13:10:44.508215: step: 622/466, loss: 0.576716423034668 2023-01-22 13:10:45.159416: step: 624/466, loss: 0.006064989138394594 2023-01-22 13:10:45.902824: step: 626/466, loss: 0.019019240513443947 2023-01-22 13:10:46.553672: step: 628/466, loss: 0.001003078417852521 2023-01-22 13:10:47.162975: step: 630/466, loss: 0.001638995367102325 2023-01-22 13:10:47.704180: step: 632/466, loss: 0.004462275188416243 2023-01-22 13:10:48.336378: step: 634/466, loss: 0.006321469321846962 2023-01-22 13:10:48.941841: step: 636/466, loss: 0.0014737026067450643 2023-01-22 13:10:49.528126: step: 638/466, loss: 0.005010698921978474 2023-01-22 13:10:50.133693: step: 640/466, loss: 0.06774980574846268 2023-01-22 13:10:50.736872: step: 642/466, loss: 0.001580861397087574 2023-01-22 13:10:51.332712: step: 644/466, loss: 0.00344419595785439 2023-01-22 13:10:52.008598: step: 646/466, loss: 0.013089598156511784 2023-01-22 13:10:52.598071: step: 648/466, loss: 0.0016473623691126704 2023-01-22 13:10:53.160549: step: 650/466, loss: 0.03453054651618004 2023-01-22 13:10:53.719288: step: 652/466, loss: 0.2897597551345825 2023-01-22 13:10:54.287821: step: 654/466, loss: 0.03459963575005531 2023-01-22 13:10:54.923758: step: 656/466, loss: 0.01754223369061947 2023-01-22 13:10:55.600757: step: 658/466, loss: 0.067201629281044 2023-01-22 13:10:56.161329: step: 660/466, loss: 0.002362264320254326 2023-01-22 13:10:56.710121: step: 662/466, loss: 0.0005576476105488837 2023-01-22 13:10:57.292600: step: 664/466, loss: 0.005788153037428856 2023-01-22 13:10:57.920765: step: 666/466, loss: 0.014405912719666958 2023-01-22 13:10:58.549429: step: 668/466, loss: 0.03309021145105362 2023-01-22 13:10:59.117006: step: 670/466, loss: 0.06431547552347183 2023-01-22 13:10:59.733264: step: 672/466, loss: 0.013910328038036823 2023-01-22 13:11:00.335518: step: 674/466, loss: 0.22224929928779602 2023-01-22 13:11:00.944798: step: 676/466, loss: 0.034740518778562546 2023-01-22 13:11:01.561784: step: 678/466, loss: 0.013087316416203976 2023-01-22 13:11:02.148459: step: 680/466, loss: 0.012118241749703884 2023-01-22 13:11:02.809436: step: 682/466, loss: 0.03825824707746506 2023-01-22 13:11:03.412427: step: 684/466, loss: 0.010267302393913269 2023-01-22 13:11:03.981424: step: 686/466, loss: 0.008821537718176842 2023-01-22 13:11:04.579446: step: 688/466, loss: 0.01823040284216404 2023-01-22 13:11:05.184639: step: 690/466, loss: 0.033738717436790466 2023-01-22 13:11:05.777770: step: 692/466, loss: 0.018728850409388542 2023-01-22 13:11:06.400252: step: 694/466, loss: 0.007886269129812717 2023-01-22 13:11:07.004216: step: 696/466, loss: 0.4238188564777374 2023-01-22 13:11:07.642020: step: 698/466, loss: 0.015290306881070137 2023-01-22 13:11:08.266445: step: 700/466, loss: 0.1405331939458847 2023-01-22 13:11:08.836261: step: 702/466, loss: 0.0031958790495991707 2023-01-22 13:11:09.562478: step: 704/466, loss: 0.009226196445524693 2023-01-22 13:11:10.152674: step: 706/466, loss: 0.012337690219283104 2023-01-22 13:11:10.800893: step: 708/466, loss: 0.025385482236742973 2023-01-22 13:11:11.420634: step: 710/466, loss: 0.0626063346862793 2023-01-22 13:11:12.052824: step: 712/466, loss: 0.0034278137609362602 2023-01-22 13:11:12.631958: step: 714/466, loss: 0.06579583138227463 2023-01-22 13:11:13.247770: step: 716/466, loss: 0.1320769339799881 2023-01-22 13:11:13.837202: step: 718/466, loss: 0.007669322192668915 2023-01-22 13:11:14.432566: step: 720/466, loss: 0.009058769792318344 2023-01-22 13:11:15.074747: step: 722/466, loss: 0.008587099611759186 2023-01-22 13:11:15.766553: step: 724/466, loss: 0.011049827560782433 2023-01-22 13:11:16.494487: step: 726/466, loss: 0.04374136030673981 2023-01-22 13:11:17.103114: step: 728/466, loss: 0.0014486410655081272 2023-01-22 13:11:17.725336: step: 730/466, loss: 0.15509715676307678 2023-01-22 13:11:18.303057: step: 732/466, loss: 0.0023542954586446285 2023-01-22 13:11:18.900963: step: 734/466, loss: 0.020629791542887688 2023-01-22 13:11:19.477155: step: 736/466, loss: 0.009806080721318722 2023-01-22 13:11:20.073479: step: 738/466, loss: 0.012316007167100906 2023-01-22 13:11:20.729507: step: 740/466, loss: 0.005239745602011681 2023-01-22 13:11:21.297042: step: 742/466, loss: 0.0018927620258182287 2023-01-22 13:11:21.893984: step: 744/466, loss: 0.041778482496738434 2023-01-22 13:11:22.432204: step: 746/466, loss: 0.002031937940046191 2023-01-22 13:11:23.023462: step: 748/466, loss: 0.031134361401200294 2023-01-22 13:11:23.585007: step: 750/466, loss: 0.01668274775147438 2023-01-22 13:11:24.174605: step: 752/466, loss: 0.008619182743132114 2023-01-22 13:11:24.785473: step: 754/466, loss: 0.050235576927661896 2023-01-22 13:11:25.366430: step: 756/466, loss: 0.006068137940019369 2023-01-22 13:11:26.019509: step: 758/466, loss: 0.018372712656855583 2023-01-22 13:11:26.636660: step: 760/466, loss: 0.010020049288868904 2023-01-22 13:11:27.233539: step: 762/466, loss: 0.005020998418331146 2023-01-22 13:11:27.821978: step: 764/466, loss: 9.63665297604166e-06 2023-01-22 13:11:28.456711: step: 766/466, loss: 0.20721535384655 2023-01-22 13:11:29.105529: step: 768/466, loss: 0.02705824188888073 2023-01-22 13:11:29.766226: step: 770/466, loss: 0.020132362842559814 2023-01-22 13:11:30.339277: step: 772/466, loss: 0.00651069451123476 2023-01-22 13:11:30.975524: step: 774/466, loss: 0.00996352918446064 2023-01-22 13:11:31.573714: step: 776/466, loss: 0.05295846238732338 2023-01-22 13:11:32.194153: step: 778/466, loss: 0.028427729383111 2023-01-22 13:11:32.816595: step: 780/466, loss: 0.046110689640045166 2023-01-22 13:11:33.447616: step: 782/466, loss: 0.052261386066675186 2023-01-22 13:11:34.038972: step: 784/466, loss: 0.04086603596806526 2023-01-22 13:11:34.677511: step: 786/466, loss: 0.002186446450650692 2023-01-22 13:11:35.338156: step: 788/466, loss: 0.004350494593381882 2023-01-22 13:11:35.996784: step: 790/466, loss: 0.010115685872733593 2023-01-22 13:11:36.568235: step: 792/466, loss: 0.0673961415886879 2023-01-22 13:11:37.138844: step: 794/466, loss: 0.002036201534792781 2023-01-22 13:11:37.716459: step: 796/466, loss: 0.002516336739063263 2023-01-22 13:11:38.264953: step: 798/466, loss: 0.00342946476303041 2023-01-22 13:11:38.901978: step: 800/466, loss: 0.0020202852319926023 2023-01-22 13:11:39.441063: step: 802/466, loss: 0.017617767676711082 2023-01-22 13:11:40.032534: step: 804/466, loss: 3.9753360748291016 2023-01-22 13:11:40.639564: step: 806/466, loss: 0.011270052753388882 2023-01-22 13:11:41.234268: step: 808/466, loss: 0.009572568349540234 2023-01-22 13:11:41.946708: step: 810/466, loss: 0.005419893190264702 2023-01-22 13:11:42.581289: step: 812/466, loss: 0.03984509035944939 2023-01-22 13:11:43.113653: step: 814/466, loss: 0.05492782965302467 2023-01-22 13:11:43.735458: step: 816/466, loss: 0.010290661826729774 2023-01-22 13:11:44.357603: step: 818/466, loss: 0.0068497927859425545 2023-01-22 13:11:44.987418: step: 820/466, loss: 0.0007102004019543529 2023-01-22 13:11:45.587747: step: 822/466, loss: 0.012335572391748428 2023-01-22 13:11:46.219904: step: 824/466, loss: 0.026347529143095016 2023-01-22 13:11:46.807124: step: 826/466, loss: 0.002580393571406603 2023-01-22 13:11:47.475360: step: 828/466, loss: 0.03455239161849022 2023-01-22 13:11:48.084660: step: 830/466, loss: 0.05823826044797897 2023-01-22 13:11:48.664350: step: 832/466, loss: 0.021898532286286354 2023-01-22 13:11:49.266777: step: 834/466, loss: 0.020318279042840004 2023-01-22 13:11:49.854950: step: 836/466, loss: 0.005964603740721941 2023-01-22 13:11:50.418896: step: 838/466, loss: 0.0307414922863245 2023-01-22 13:11:51.065125: step: 840/466, loss: 0.00021112307149451226 2023-01-22 13:11:51.672506: step: 842/466, loss: 0.05015534535050392 2023-01-22 13:11:52.269242: step: 844/466, loss: 0.006480558775365353 2023-01-22 13:11:52.915019: step: 846/466, loss: 0.010075699537992477 2023-01-22 13:11:53.479869: step: 848/466, loss: 0.14495618641376495 2023-01-22 13:11:54.140290: step: 850/466, loss: 0.04412548616528511 2023-01-22 13:11:54.736905: step: 852/466, loss: 0.00012186765525257215 2023-01-22 13:11:55.321737: step: 854/466, loss: 0.013047690503299236 2023-01-22 13:11:55.933501: step: 856/466, loss: 0.004875212907791138 2023-01-22 13:11:56.570315: step: 858/466, loss: 0.04846766218543053 2023-01-22 13:11:57.194101: step: 860/466, loss: 0.004972430877387524 2023-01-22 13:11:57.763547: step: 862/466, loss: 0.014919591136276722 2023-01-22 13:11:58.456207: step: 864/466, loss: 0.04054275527596474 2023-01-22 13:11:59.082037: step: 866/466, loss: 0.004106948152184486 2023-01-22 13:11:59.704847: step: 868/466, loss: 0.012273982167243958 2023-01-22 13:12:00.330743: step: 870/466, loss: 0.020487243309617043 2023-01-22 13:12:00.957186: step: 872/466, loss: 0.018841352313756943 2023-01-22 13:12:01.542382: step: 874/466, loss: 0.01061300653964281 2023-01-22 13:12:02.148386: step: 876/466, loss: 0.043411578983068466 2023-01-22 13:12:02.763163: step: 878/466, loss: 0.029093991965055466 2023-01-22 13:12:03.403907: step: 880/466, loss: 0.058155424892902374 2023-01-22 13:12:03.997186: step: 882/466, loss: 0.034859657287597656 2023-01-22 13:12:04.645917: step: 884/466, loss: 0.13316437602043152 2023-01-22 13:12:05.211704: step: 886/466, loss: 0.10845877230167389 2023-01-22 13:12:05.801605: step: 888/466, loss: 0.005917152855545282 2023-01-22 13:12:06.373502: step: 890/466, loss: 0.054976899176836014 2023-01-22 13:12:06.965773: step: 892/466, loss: 0.08583804219961166 2023-01-22 13:12:07.588001: step: 894/466, loss: 0.0240468867123127 2023-01-22 13:12:08.219687: step: 896/466, loss: 0.02332800254225731 2023-01-22 13:12:08.797451: step: 898/466, loss: 0.019618580117821693 2023-01-22 13:12:09.414730: step: 900/466, loss: 0.16611427068710327 2023-01-22 13:12:09.962297: step: 902/466, loss: 0.018733682110905647 2023-01-22 13:12:10.531610: step: 904/466, loss: 0.011458895169198513 2023-01-22 13:12:11.176633: step: 906/466, loss: 0.014567876234650612 2023-01-22 13:12:11.806547: step: 908/466, loss: 0.00459162425249815 2023-01-22 13:12:12.471247: step: 910/466, loss: 0.1225447803735733 2023-01-22 13:12:13.038242: step: 912/466, loss: 0.007581851910799742 2023-01-22 13:12:13.633993: step: 914/466, loss: 0.03084220364689827 2023-01-22 13:12:14.290115: step: 916/466, loss: 0.004150428809225559 2023-01-22 13:12:14.891579: step: 918/466, loss: 0.07581573724746704 2023-01-22 13:12:15.553476: step: 920/466, loss: 0.033521562814712524 2023-01-22 13:12:16.173355: step: 922/466, loss: 0.005024290177971125 2023-01-22 13:12:16.791408: step: 924/466, loss: 0.014391470700502396 2023-01-22 13:12:17.363064: step: 926/466, loss: 0.04559220001101494 2023-01-22 13:12:17.980971: step: 928/466, loss: 0.011902103200554848 2023-01-22 13:12:18.566786: step: 930/466, loss: 0.03757358342409134 2023-01-22 13:12:19.145552: step: 932/466, loss: 0.033605653792619705 ================================================== Loss: 0.076 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2953890797037903, 'r': 0.3486375855327468, 'f1': 0.31981202363056144}, 'combined': 0.23565096478041367, 'epoch': 29} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3522830160052356, 'r': 0.31641641040677326, 'f1': 0.33338783605586386}, 'combined': 0.22110695862772314, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2855033081285444, 'r': 0.2860440340909091, 'f1': 0.2857734153263955}, 'combined': 0.190515610217597, 'epoch': 29} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.36652793315073945, 'r': 0.29322234652059154, 'f1': 0.3258026072451017}, 'combined': 0.21262906999154002, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2796542241298433, 'r': 0.34280195215916276, 'f1': 0.3080249425198274}, 'combined': 0.22696574711987283, 'epoch': 29} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.33731886952800344, 'r': 0.3010731677762942, 'f1': 0.3181670655804428}, 'combined': 0.21101235437459415, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2150537634408602, 'r': 0.38095238095238093, 'f1': 0.274914089347079}, 'combined': 0.18327605956471932, 'epoch': 29} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.425, 'r': 0.3695652173913043, 'f1': 0.3953488372093023}, 'combined': 0.2635658914728682, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.34210526315789475, 'r': 0.22413793103448276, 'f1': 0.2708333333333333}, 'combined': 0.18055555555555552, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29515095352204085, 'r': 0.3399556523489161, 'f1': 0.3159728902784459}, 'combined': 0.23282212967885485, 'epoch': 26} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32557465157002546, 'r': 0.326702182830874, 'f1': 0.3261374426704058}, 'combined': 0.2162984075741551, 'epoch': 26} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39473684210526316, 'r': 0.25862068965517243, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 26} ****************************** Epoch: 30 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:14:45.287185: step: 2/466, loss: 0.00028478680178523064 2023-01-22 13:14:45.882501: step: 4/466, loss: 0.014078116975724697 2023-01-22 13:14:46.519966: step: 6/466, loss: 0.012460625730454922 2023-01-22 13:14:47.135333: step: 8/466, loss: 0.05085223913192749 2023-01-22 13:14:47.656810: step: 10/466, loss: 0.009307858534157276 2023-01-22 13:14:48.331296: step: 12/466, loss: 0.004645699169486761 2023-01-22 13:14:48.921815: step: 14/466, loss: 0.00043225885019637644 2023-01-22 13:14:49.561000: step: 16/466, loss: 0.009154175408184528 2023-01-22 13:14:50.203864: step: 18/466, loss: 0.004041125066578388 2023-01-22 13:14:50.801548: step: 20/466, loss: 0.14259779453277588 2023-01-22 13:14:51.367009: step: 22/466, loss: 0.004271865822374821 2023-01-22 13:14:51.977537: step: 24/466, loss: 0.018932662904262543 2023-01-22 13:14:52.648331: step: 26/466, loss: 0.008465741761028767 2023-01-22 13:14:53.252103: step: 28/466, loss: 0.015602695755660534 2023-01-22 13:14:53.870330: step: 30/466, loss: 0.758287787437439 2023-01-22 13:14:54.441358: step: 32/466, loss: 0.016388943418860435 2023-01-22 13:14:55.048984: step: 34/466, loss: 0.07368203997612 2023-01-22 13:14:55.608387: step: 36/466, loss: 0.0026354417204856873 2023-01-22 13:14:56.212746: step: 38/466, loss: 0.004369013477116823 2023-01-22 13:14:56.850878: step: 40/466, loss: 0.0012482845922932029 2023-01-22 13:14:57.421836: step: 42/466, loss: 0.0878681018948555 2023-01-22 13:14:58.126816: step: 44/466, loss: 0.08650446683168411 2023-01-22 13:14:58.661787: step: 46/466, loss: 0.0037090929690748453 2023-01-22 13:14:59.315359: step: 48/466, loss: 0.04962155595421791 2023-01-22 13:14:59.938809: step: 50/466, loss: 0.008082413114607334 2023-01-22 13:15:00.564384: step: 52/466, loss: 0.02043752558529377 2023-01-22 13:15:01.165003: step: 54/466, loss: 0.021009817719459534 2023-01-22 13:15:01.741105: step: 56/466, loss: 0.0005981952417641878 2023-01-22 13:15:02.354697: step: 58/466, loss: 0.012455707415938377 2023-01-22 13:15:02.992898: step: 60/466, loss: 0.010042482987046242 2023-01-22 13:15:03.645894: step: 62/466, loss: 0.0007668372127227485 2023-01-22 13:15:04.218915: step: 64/466, loss: 0.06405071169137955 2023-01-22 13:15:04.720116: step: 66/466, loss: 0.026616506278514862 2023-01-22 13:15:05.399052: step: 68/466, loss: 0.11562003195285797 2023-01-22 13:15:06.032274: step: 70/466, loss: 0.0031265579164028168 2023-01-22 13:15:06.660918: step: 72/466, loss: 0.07842712104320526 2023-01-22 13:15:07.340170: step: 74/466, loss: 0.02148415334522724 2023-01-22 13:15:07.873022: step: 76/466, loss: 0.01080042589455843 2023-01-22 13:15:08.503749: step: 78/466, loss: 0.018368011340498924 2023-01-22 13:15:09.106745: step: 80/466, loss: 0.00820663571357727 2023-01-22 13:15:09.700050: step: 82/466, loss: 0.003451654454693198 2023-01-22 13:15:10.273299: step: 84/466, loss: 0.011481214314699173 2023-01-22 13:15:10.858254: step: 86/466, loss: 0.00872575119137764 2023-01-22 13:15:11.495754: step: 88/466, loss: 0.003943064250051975 2023-01-22 13:15:12.142291: step: 90/466, loss: 0.001124784117564559 2023-01-22 13:15:12.734189: step: 92/466, loss: 0.011985089629888535 2023-01-22 13:15:13.420065: step: 94/466, loss: 0.005329095292836428 2023-01-22 13:15:14.028841: step: 96/466, loss: 0.36632513999938965 2023-01-22 13:15:14.610233: step: 98/466, loss: 0.03597888723015785 2023-01-22 13:15:15.178887: step: 100/466, loss: 0.014806758612394333 2023-01-22 13:15:15.833497: step: 102/466, loss: 0.0015192838618531823 2023-01-22 13:15:16.553708: step: 104/466, loss: 0.0349656417965889 2023-01-22 13:15:17.146438: step: 106/466, loss: 0.05895746126770973 2023-01-22 13:15:17.745667: step: 108/466, loss: 0.004985973238945007 2023-01-22 13:15:18.355410: step: 110/466, loss: 0.019262507557868958 2023-01-22 13:15:18.946587: step: 112/466, loss: 0.004171041306108236 2023-01-22 13:15:19.560072: step: 114/466, loss: 0.010270554572343826 2023-01-22 13:15:20.169333: step: 116/466, loss: 0.008071281015872955 2023-01-22 13:15:20.788191: step: 118/466, loss: 0.012917655520141125 2023-01-22 13:15:21.410369: step: 120/466, loss: 0.01800263673067093 2023-01-22 13:15:22.049171: step: 122/466, loss: 0.004215335939079523 2023-01-22 13:15:22.627084: step: 124/466, loss: 0.017569512128829956 2023-01-22 13:15:23.213073: step: 126/466, loss: 0.01752289943397045 2023-01-22 13:15:23.798952: step: 128/466, loss: 0.010893118567764759 2023-01-22 13:15:24.350299: step: 130/466, loss: 0.0005880341632291675 2023-01-22 13:15:24.919662: step: 132/466, loss: 0.003655801061540842 2023-01-22 13:15:25.539043: step: 134/466, loss: 0.03518220782279968 2023-01-22 13:15:26.139259: step: 136/466, loss: 0.05233493447303772 2023-01-22 13:15:26.817004: step: 138/466, loss: 0.021850325167179108 2023-01-22 13:15:27.499903: step: 140/466, loss: 0.027908580377697945 2023-01-22 13:15:28.074006: step: 142/466, loss: 0.006345653906464577 2023-01-22 13:15:28.729371: step: 144/466, loss: 0.0006852993974462152 2023-01-22 13:15:29.343939: step: 146/466, loss: 0.007213433738797903 2023-01-22 13:15:29.915782: step: 148/466, loss: 0.0066226390190422535 2023-01-22 13:15:30.514566: step: 150/466, loss: 0.015624548308551311 2023-01-22 13:15:31.161810: step: 152/466, loss: 0.02410743571817875 2023-01-22 13:15:31.729431: step: 154/466, loss: 0.0009644230012781918 2023-01-22 13:15:32.414659: step: 156/466, loss: 0.010475953109562397 2023-01-22 13:15:33.061942: step: 158/466, loss: 0.002348509384319186 2023-01-22 13:15:33.680920: step: 160/466, loss: 0.008804123848676682 2023-01-22 13:15:34.261977: step: 162/466, loss: 0.01287774182856083 2023-01-22 13:15:34.898034: step: 164/466, loss: 0.16314955055713654 2023-01-22 13:15:35.478133: step: 166/466, loss: 0.005296224262565374 2023-01-22 13:15:36.052714: step: 168/466, loss: 0.012881525792181492 2023-01-22 13:15:36.614121: step: 170/466, loss: 0.004743877798318863 2023-01-22 13:15:37.173875: step: 172/466, loss: 0.017502468079328537 2023-01-22 13:15:37.816681: step: 174/466, loss: 0.01408388838171959 2023-01-22 13:15:38.352367: step: 176/466, loss: 0.0014216086128726602 2023-01-22 13:15:38.972323: step: 178/466, loss: 0.008555619977414608 2023-01-22 13:15:39.561081: step: 180/466, loss: 0.003428714582696557 2023-01-22 13:15:40.175666: step: 182/466, loss: 0.005940432660281658 2023-01-22 13:15:40.813746: step: 184/466, loss: 0.011507420800626278 2023-01-22 13:15:41.478022: step: 186/466, loss: 0.05055483058094978 2023-01-22 13:15:42.118298: step: 188/466, loss: 0.3473048806190491 2023-01-22 13:15:42.766043: step: 190/466, loss: 0.006810220889747143 2023-01-22 13:15:43.397238: step: 192/466, loss: 0.046747103333473206 2023-01-22 13:15:43.949362: step: 194/466, loss: 0.0039004923310130835 2023-01-22 13:15:44.502431: step: 196/466, loss: 0.0035725836642086506 2023-01-22 13:15:45.067013: step: 198/466, loss: 0.14844007790088654 2023-01-22 13:15:45.668935: step: 200/466, loss: 0.033517882227897644 2023-01-22 13:15:46.253600: step: 202/466, loss: 0.01868906244635582 2023-01-22 13:15:46.834390: step: 204/466, loss: 0.025287127122282982 2023-01-22 13:15:47.453451: step: 206/466, loss: 0.010811547748744488 2023-01-22 13:15:48.044447: step: 208/466, loss: 0.07126505672931671 2023-01-22 13:15:48.719627: step: 210/466, loss: 0.0052129654213786125 2023-01-22 13:15:49.334521: step: 212/466, loss: 0.040356654673814774 2023-01-22 13:15:49.961072: step: 214/466, loss: 0.03850207477807999 2023-01-22 13:15:50.547434: step: 216/466, loss: 0.003108978969976306 2023-01-22 13:15:51.190419: step: 218/466, loss: 0.022466685622930527 2023-01-22 13:15:51.844355: step: 220/466, loss: 0.0003463841858319938 2023-01-22 13:15:52.493015: step: 222/466, loss: 0.016880718991160393 2023-01-22 13:15:53.076437: step: 224/466, loss: 0.015215063467621803 2023-01-22 13:15:53.665299: step: 226/466, loss: 0.004265309311449528 2023-01-22 13:15:54.229830: step: 228/466, loss: 0.0458204559981823 2023-01-22 13:15:54.824882: step: 230/466, loss: 0.018718402832746506 2023-01-22 13:15:55.478109: step: 232/466, loss: 0.00042664248030632734 2023-01-22 13:15:56.092512: step: 234/466, loss: 0.017034878954291344 2023-01-22 13:15:56.701646: step: 236/466, loss: 0.3019435703754425 2023-01-22 13:15:57.307574: step: 238/466, loss: 0.2951653301715851 2023-01-22 13:15:57.928888: step: 240/466, loss: 0.00019873416749760509 2023-01-22 13:15:58.530488: step: 242/466, loss: 0.023621132597327232 2023-01-22 13:15:59.150186: step: 244/466, loss: 0.07778826355934143 2023-01-22 13:15:59.759714: step: 246/466, loss: 0.1465650498867035 2023-01-22 13:16:00.332489: step: 248/466, loss: 0.013963623903691769 2023-01-22 13:16:00.912160: step: 250/466, loss: 0.16531668603420258 2023-01-22 13:16:01.575475: step: 252/466, loss: 0.02791769616305828 2023-01-22 13:16:02.191017: step: 254/466, loss: 0.027266457676887512 2023-01-22 13:16:02.803655: step: 256/466, loss: 0.004218158777803183 2023-01-22 13:16:03.417560: step: 258/466, loss: 0.011412985622882843 2023-01-22 13:16:04.017004: step: 260/466, loss: 0.00474912254139781 2023-01-22 13:16:04.600883: step: 262/466, loss: 0.004200744442641735 2023-01-22 13:16:05.187326: step: 264/466, loss: 0.007517603226006031 2023-01-22 13:16:05.728079: step: 266/466, loss: 0.017428897321224213 2023-01-22 13:16:06.368055: step: 268/466, loss: 0.02294786460697651 2023-01-22 13:16:06.959427: step: 270/466, loss: 0.12526442110538483 2023-01-22 13:16:07.576830: step: 272/466, loss: 0.08035369962453842 2023-01-22 13:16:08.129872: step: 274/466, loss: 0.006867168005555868 2023-01-22 13:16:08.755114: step: 276/466, loss: 0.026198022067546844 2023-01-22 13:16:09.361314: step: 278/466, loss: 0.00045783095993101597 2023-01-22 13:16:09.948672: step: 280/466, loss: 0.00981088075786829 2023-01-22 13:16:10.608936: step: 282/466, loss: 0.021191062405705452 2023-01-22 13:16:11.102766: step: 284/466, loss: 0.0037732140626758337 2023-01-22 13:16:11.714025: step: 286/466, loss: 0.0004485807439778 2023-01-22 13:16:12.311778: step: 288/466, loss: 0.008615471422672272 2023-01-22 13:16:12.915635: step: 290/466, loss: 0.6578670740127563 2023-01-22 13:16:13.536888: step: 292/466, loss: 0.0301654115319252 2023-01-22 13:16:14.191575: step: 294/466, loss: 0.01989324577152729 2023-01-22 13:16:14.791833: step: 296/466, loss: 0.0056756469421088696 2023-01-22 13:16:15.403494: step: 298/466, loss: 0.030090264976024628 2023-01-22 13:16:16.022022: step: 300/466, loss: 0.04090450331568718 2023-01-22 13:16:16.629229: step: 302/466, loss: 0.012088625691831112 2023-01-22 13:16:17.237256: step: 304/466, loss: 0.7122946977615356 2023-01-22 13:16:17.869137: step: 306/466, loss: 0.01763085089623928 2023-01-22 13:16:18.481094: step: 308/466, loss: 0.016639074310660362 2023-01-22 13:16:19.213788: step: 310/466, loss: 0.05521659180521965 2023-01-22 13:16:19.810857: step: 312/466, loss: 0.025561662390828133 2023-01-22 13:16:20.383603: step: 314/466, loss: 0.056694965809583664 2023-01-22 13:16:20.978234: step: 316/466, loss: 0.0177704319357872 2023-01-22 13:16:21.613225: step: 318/466, loss: 0.1333787739276886 2023-01-22 13:16:22.230496: step: 320/466, loss: 0.023962095379829407 2023-01-22 13:16:22.820407: step: 322/466, loss: 0.0034161526709795 2023-01-22 13:16:23.406496: step: 324/466, loss: 0.00201751128770411 2023-01-22 13:16:24.002709: step: 326/466, loss: 0.019854096695780754 2023-01-22 13:16:24.553679: step: 328/466, loss: 0.0136026656255126 2023-01-22 13:16:25.149220: step: 330/466, loss: 0.06052771210670471 2023-01-22 13:16:25.745222: step: 332/466, loss: 0.1464303433895111 2023-01-22 13:16:26.361368: step: 334/466, loss: 0.03735050559043884 2023-01-22 13:16:26.951317: step: 336/466, loss: 0.004379080608487129 2023-01-22 13:16:27.562524: step: 338/466, loss: 0.0054306890815496445 2023-01-22 13:16:28.139724: step: 340/466, loss: 0.00680329417809844 2023-01-22 13:16:28.709657: step: 342/466, loss: 0.005904025863856077 2023-01-22 13:16:29.327966: step: 344/466, loss: 0.01370063703507185 2023-01-22 13:16:29.923515: step: 346/466, loss: 0.027154013514518738 2023-01-22 13:16:30.521637: step: 348/466, loss: 0.0003415009123273194 2023-01-22 13:16:31.078854: step: 350/466, loss: 0.010796810500323772 2023-01-22 13:16:31.718770: step: 352/466, loss: 0.007304156664758921 2023-01-22 13:16:32.387009: step: 354/466, loss: 0.009061838500201702 2023-01-22 13:16:32.988390: step: 356/466, loss: 0.034129682928323746 2023-01-22 13:16:33.561469: step: 358/466, loss: 0.0017467025900259614 2023-01-22 13:16:34.073479: step: 360/466, loss: 0.0001253996742889285 2023-01-22 13:16:34.672599: step: 362/466, loss: 0.008916638791561127 2023-01-22 13:16:35.231124: step: 364/466, loss: 0.023510871455073357 2023-01-22 13:16:35.800327: step: 366/466, loss: 0.009778701700270176 2023-01-22 13:16:36.362538: step: 368/466, loss: 0.00026640386204235256 2023-01-22 13:16:36.928748: step: 370/466, loss: 0.00020242726895958185 2023-01-22 13:16:37.632968: step: 372/466, loss: 0.005702751688659191 2023-01-22 13:16:38.249416: step: 374/466, loss: 0.088680200278759 2023-01-22 13:16:38.798734: step: 376/466, loss: 0.13011717796325684 2023-01-22 13:16:39.424103: step: 378/466, loss: 0.033206891268491745 2023-01-22 13:16:40.023032: step: 380/466, loss: 0.008764302358031273 2023-01-22 13:16:40.738572: step: 382/466, loss: 0.0035456165205687284 2023-01-22 13:16:41.353278: step: 384/466, loss: 0.035435259342193604 2023-01-22 13:16:41.978338: step: 386/466, loss: 0.0048704869113862514 2023-01-22 13:16:42.611791: step: 388/466, loss: 0.03243507817387581 2023-01-22 13:16:43.184775: step: 390/466, loss: 0.009492951445281506 2023-01-22 13:16:43.798954: step: 392/466, loss: 0.04516708850860596 2023-01-22 13:16:44.439602: step: 394/466, loss: 0.0015181071357801557 2023-01-22 13:16:45.072483: step: 396/466, loss: 0.005739421583712101 2023-01-22 13:16:45.726400: step: 398/466, loss: 0.023896964266896248 2023-01-22 13:16:46.418001: step: 400/466, loss: 0.0031137680634856224 2023-01-22 13:16:46.995233: step: 402/466, loss: 0.0023499038070440292 2023-01-22 13:16:47.589714: step: 404/466, loss: 0.022085856646299362 2023-01-22 13:16:48.230025: step: 406/466, loss: 0.002061337698251009 2023-01-22 13:16:48.857577: step: 408/466, loss: 0.016577694565057755 2023-01-22 13:16:49.420816: step: 410/466, loss: 0.05804067105054855 2023-01-22 13:16:50.081097: step: 412/466, loss: 0.002286148490384221 2023-01-22 13:16:50.612114: step: 414/466, loss: 0.02970394864678383 2023-01-22 13:16:51.199168: step: 416/466, loss: 0.0006221303483471274 2023-01-22 13:16:51.865913: step: 418/466, loss: 0.07081273198127747 2023-01-22 13:16:52.420677: step: 420/466, loss: 0.018962359055876732 2023-01-22 13:16:53.034855: step: 422/466, loss: 0.007997725158929825 2023-01-22 13:16:53.641098: step: 424/466, loss: 0.021207401528954506 2023-01-22 13:16:54.206295: step: 426/466, loss: 0.004502940457314253 2023-01-22 13:16:54.865985: step: 428/466, loss: 0.0023897087667137384 2023-01-22 13:16:55.489069: step: 430/466, loss: 0.027787597849965096 2023-01-22 13:16:56.124701: step: 432/466, loss: 0.012907175347208977 2023-01-22 13:16:56.731458: step: 434/466, loss: 0.005142767447978258 2023-01-22 13:16:57.310941: step: 436/466, loss: 0.010803007520735264 2023-01-22 13:16:57.886603: step: 438/466, loss: 0.0029781744815409184 2023-01-22 13:16:58.494216: step: 440/466, loss: 0.002498141722753644 2023-01-22 13:16:59.050595: step: 442/466, loss: 0.006571412086486816 2023-01-22 13:16:59.696828: step: 444/466, loss: 0.010829522274434566 2023-01-22 13:17:00.234127: step: 446/466, loss: 0.0059796967543661594 2023-01-22 13:17:00.852850: step: 448/466, loss: 0.0032199707347899675 2023-01-22 13:17:01.618226: step: 450/466, loss: 0.10496566444635391 2023-01-22 13:17:02.218053: step: 452/466, loss: 0.022163324058055878 2023-01-22 13:17:02.818097: step: 454/466, loss: 0.012960226275026798 2023-01-22 13:17:03.376604: step: 456/466, loss: 0.02838488295674324 2023-01-22 13:17:03.956970: step: 458/466, loss: 0.0011124319862574339 2023-01-22 13:17:04.551510: step: 460/466, loss: 0.0037779128178954124 2023-01-22 13:17:05.183493: step: 462/466, loss: 0.005112520884722471 2023-01-22 13:17:05.767222: step: 464/466, loss: 0.018566833809018135 2023-01-22 13:17:06.329379: step: 466/466, loss: 0.0032261114101856947 2023-01-22 13:17:07.045120: step: 468/466, loss: 0.027344532310962677 2023-01-22 13:17:07.676251: step: 470/466, loss: 0.038745488971471786 2023-01-22 13:17:08.268741: step: 472/466, loss: 0.001235641655512154 2023-01-22 13:17:08.813967: step: 474/466, loss: 0.0025396200362592936 2023-01-22 13:17:09.350182: step: 476/466, loss: 0.0005299833719618618 2023-01-22 13:17:09.939962: step: 478/466, loss: 0.013481545262038708 2023-01-22 13:17:10.584862: step: 480/466, loss: 0.04830469191074371 2023-01-22 13:17:11.200360: step: 482/466, loss: 0.02216290310025215 2023-01-22 13:17:11.756538: step: 484/466, loss: 0.007065951824188232 2023-01-22 13:17:12.331571: step: 486/466, loss: 0.08484815806150436 2023-01-22 13:17:13.003988: step: 488/466, loss: 0.035927895456552505 2023-01-22 13:17:13.621052: step: 490/466, loss: 0.010688919574022293 2023-01-22 13:17:14.238139: step: 492/466, loss: 0.0003162022912874818 2023-01-22 13:17:14.806026: step: 494/466, loss: 0.008528665639460087 2023-01-22 13:17:15.415967: step: 496/466, loss: 0.014202555641531944 2023-01-22 13:17:16.011538: step: 498/466, loss: 0.0950498878955841 2023-01-22 13:17:16.596494: step: 500/466, loss: 0.0037851862143725157 2023-01-22 13:17:17.155407: step: 502/466, loss: 0.002583063906058669 2023-01-22 13:17:17.762248: step: 504/466, loss: 0.09229692816734314 2023-01-22 13:17:18.437469: step: 506/466, loss: 0.03515050932765007 2023-01-22 13:17:19.108663: step: 508/466, loss: 0.2170080989599228 2023-01-22 13:17:19.686666: step: 510/466, loss: 0.023440072312951088 2023-01-22 13:17:20.306936: step: 512/466, loss: 0.012734951451420784 2023-01-22 13:17:20.919562: step: 514/466, loss: 0.0022743670269846916 2023-01-22 13:17:21.539767: step: 516/466, loss: 0.014371275901794434 2023-01-22 13:17:22.072161: step: 518/466, loss: 0.017327800393104553 2023-01-22 13:17:22.678177: step: 520/466, loss: 0.06441115587949753 2023-01-22 13:17:23.383614: step: 522/466, loss: 0.036276645958423615 2023-01-22 13:17:23.944172: step: 524/466, loss: 0.0009572524577379227 2023-01-22 13:17:24.555534: step: 526/466, loss: 0.010352073237299919 2023-01-22 13:17:25.162136: step: 528/466, loss: 0.0002700319164432585 2023-01-22 13:17:25.764854: step: 530/466, loss: 0.04756855592131615 2023-01-22 13:17:26.324014: step: 532/466, loss: 0.03081398457288742 2023-01-22 13:17:26.902382: step: 534/466, loss: 0.013193175196647644 2023-01-22 13:17:27.515064: step: 536/466, loss: 0.08822406083345413 2023-01-22 13:17:28.080385: step: 538/466, loss: 0.02872275933623314 2023-01-22 13:17:28.667244: step: 540/466, loss: 0.0002939131227321923 2023-01-22 13:17:29.277458: step: 542/466, loss: 0.05053912475705147 2023-01-22 13:17:29.942425: step: 544/466, loss: 0.03908180445432663 2023-01-22 13:17:30.492466: step: 546/466, loss: 0.0028365193866193295 2023-01-22 13:17:31.047460: step: 548/466, loss: 1.3741116523742676 2023-01-22 13:17:31.651750: step: 550/466, loss: 0.013899214565753937 2023-01-22 13:17:32.308302: step: 552/466, loss: 0.011286936700344086 2023-01-22 13:17:32.824079: step: 554/466, loss: 0.009468899108469486 2023-01-22 13:17:33.394938: step: 556/466, loss: 0.003917319234460592 2023-01-22 13:17:33.998546: step: 558/466, loss: 0.010787371546030045 2023-01-22 13:17:34.666409: step: 560/466, loss: 0.009908036328852177 2023-01-22 13:17:35.230968: step: 562/466, loss: 0.012839104980230331 2023-01-22 13:17:35.814763: step: 564/466, loss: 0.0020545353181660175 2023-01-22 13:17:36.524640: step: 566/466, loss: 0.02453666739165783 2023-01-22 13:17:37.217901: step: 568/466, loss: 0.032193608582019806 2023-01-22 13:17:37.790213: step: 570/466, loss: 0.08889345824718475 2023-01-22 13:17:38.402981: step: 572/466, loss: 0.3024088740348816 2023-01-22 13:17:38.971687: step: 574/466, loss: 0.0006328593008220196 2023-01-22 13:17:39.559696: step: 576/466, loss: 0.0075796786695718765 2023-01-22 13:17:40.181223: step: 578/466, loss: 0.012202093377709389 2023-01-22 13:17:40.785550: step: 580/466, loss: 0.2748091220855713 2023-01-22 13:17:41.388215: step: 582/466, loss: 0.08011313527822495 2023-01-22 13:17:41.973550: step: 584/466, loss: 0.007275673560798168 2023-01-22 13:17:42.561166: step: 586/466, loss: 0.023380331695079803 2023-01-22 13:17:43.165573: step: 588/466, loss: 0.03591989725828171 2023-01-22 13:17:43.731427: step: 590/466, loss: 0.0058691492304205894 2023-01-22 13:17:44.332038: step: 592/466, loss: 0.004461975302547216 2023-01-22 13:17:45.055512: step: 594/466, loss: 0.02557777799665928 2023-01-22 13:17:45.713259: step: 596/466, loss: 0.0014842869713902473 2023-01-22 13:17:46.359494: step: 598/466, loss: 0.014528783038258553 2023-01-22 13:17:46.967188: step: 600/466, loss: 0.0010791391832754016 2023-01-22 13:17:47.611862: step: 602/466, loss: 0.07167264074087143 2023-01-22 13:17:48.134542: step: 604/466, loss: 0.0077194636687636375 2023-01-22 13:17:48.746807: step: 606/466, loss: 0.18285511434078217 2023-01-22 13:17:49.329440: step: 608/466, loss: 0.0013037014286965132 2023-01-22 13:17:49.899690: step: 610/466, loss: 0.2825278043746948 2023-01-22 13:17:50.441128: step: 612/466, loss: 0.028179537504911423 2023-01-22 13:17:51.002474: step: 614/466, loss: 0.036346279084682465 2023-01-22 13:17:51.577206: step: 616/466, loss: 0.017021380364894867 2023-01-22 13:17:52.188734: step: 618/466, loss: 0.02601482905447483 2023-01-22 13:17:52.805826: step: 620/466, loss: 0.037656866014003754 2023-01-22 13:17:53.383893: step: 622/466, loss: 0.02671785093843937 2023-01-22 13:17:53.968243: step: 624/466, loss: 0.000846438982989639 2023-01-22 13:17:54.631623: step: 626/466, loss: 0.0006022404413670301 2023-01-22 13:17:55.255743: step: 628/466, loss: 0.0027155440766364336 2023-01-22 13:17:55.900261: step: 630/466, loss: 0.03708767890930176 2023-01-22 13:17:56.534073: step: 632/466, loss: 0.007931017316877842 2023-01-22 13:17:57.100770: step: 634/466, loss: 0.006869449745863676 2023-01-22 13:17:57.712046: step: 636/466, loss: 0.049254097044467926 2023-01-22 13:17:58.306006: step: 638/466, loss: 0.12904299795627594 2023-01-22 13:17:58.988605: step: 640/466, loss: 0.020783551037311554 2023-01-22 13:17:59.569607: step: 642/466, loss: 0.005933032371103764 2023-01-22 13:18:00.200336: step: 644/466, loss: 0.06857472658157349 2023-01-22 13:18:00.811391: step: 646/466, loss: 0.004435302224010229 2023-01-22 13:18:01.456760: step: 648/466, loss: 0.717083752155304 2023-01-22 13:18:02.245554: step: 650/466, loss: 0.08154986053705215 2023-01-22 13:18:02.822280: step: 652/466, loss: 0.015391502529382706 2023-01-22 13:18:03.402670: step: 654/466, loss: 0.05044484883546829 2023-01-22 13:18:03.973457: step: 656/466, loss: 0.013044895604252815 2023-01-22 13:18:04.606242: step: 658/466, loss: 0.09905446320772171 2023-01-22 13:18:05.262290: step: 660/466, loss: 0.010607562959194183 2023-01-22 13:18:05.854231: step: 662/466, loss: 0.005811905954033136 2023-01-22 13:18:06.448824: step: 664/466, loss: 0.012407036498188972 2023-01-22 13:18:07.025810: step: 666/466, loss: 0.08473341166973114 2023-01-22 13:18:07.629192: step: 668/466, loss: 0.018354739993810654 2023-01-22 13:18:08.253865: step: 670/466, loss: 0.005217993166297674 2023-01-22 13:18:08.816392: step: 672/466, loss: 0.10953141003847122 2023-01-22 13:18:09.557446: step: 674/466, loss: 0.05236465856432915 2023-01-22 13:18:10.044327: step: 676/466, loss: 0.01544020976871252 2023-01-22 13:18:10.677602: step: 678/466, loss: 0.0276241023093462 2023-01-22 13:18:11.270346: step: 680/466, loss: 0.05030062049627304 2023-01-22 13:18:11.806742: step: 682/466, loss: 0.026244675740599632 2023-01-22 13:18:12.394174: step: 684/466, loss: 0.07645662128925323 2023-01-22 13:18:13.018972: step: 686/466, loss: 0.010076702572405338 2023-01-22 13:18:13.671561: step: 688/466, loss: 0.013777351938188076 2023-01-22 13:18:14.297275: step: 690/466, loss: 0.001946625066921115 2023-01-22 13:18:14.948826: step: 692/466, loss: 0.013552557677030563 2023-01-22 13:18:15.528591: step: 694/466, loss: 0.04471652954816818 2023-01-22 13:18:16.110805: step: 696/466, loss: 0.01028574537485838 2023-01-22 13:18:16.708262: step: 698/466, loss: 0.00933043658733368 2023-01-22 13:18:17.332498: step: 700/466, loss: 0.046223234385252 2023-01-22 13:18:18.035692: step: 702/466, loss: 0.08472418040037155 2023-01-22 13:18:18.674312: step: 704/466, loss: 0.04190603643655777 2023-01-22 13:18:19.283178: step: 706/466, loss: 0.2162904441356659 2023-01-22 13:18:19.852244: step: 708/466, loss: 0.01777159608900547 2023-01-22 13:18:20.446013: step: 710/466, loss: 0.018709806725382805 2023-01-22 13:18:21.052730: step: 712/466, loss: 0.04080433398485184 2023-01-22 13:18:21.635220: step: 714/466, loss: 0.024690769612789154 2023-01-22 13:18:22.285332: step: 716/466, loss: 0.026956753805279732 2023-01-22 13:18:22.979240: step: 718/466, loss: 0.23514507710933685 2023-01-22 13:18:23.592015: step: 720/466, loss: 0.005163929425179958 2023-01-22 13:18:24.250836: step: 722/466, loss: 0.16484905779361725 2023-01-22 13:18:24.823017: step: 724/466, loss: 0.04459630697965622 2023-01-22 13:18:25.359342: step: 726/466, loss: 0.0003833868831861764 2023-01-22 13:18:25.980449: step: 728/466, loss: 0.0007012172718532383 2023-01-22 13:18:26.570004: step: 730/466, loss: 0.009721334092319012 2023-01-22 13:18:27.281291: step: 732/466, loss: 0.006624647881835699 2023-01-22 13:18:27.864979: step: 734/466, loss: 0.0274251289665699 2023-01-22 13:18:28.437345: step: 736/466, loss: 0.015381962060928345 2023-01-22 13:18:29.104708: step: 738/466, loss: 0.18253710865974426 2023-01-22 13:18:29.708444: step: 740/466, loss: 0.9736369848251343 2023-01-22 13:18:30.298542: step: 742/466, loss: 0.005604333244264126 2023-01-22 13:18:30.926125: step: 744/466, loss: 0.006971567869186401 2023-01-22 13:18:31.529071: step: 746/466, loss: 0.0017240039305761456 2023-01-22 13:18:32.147192: step: 748/466, loss: 0.028396597132086754 2023-01-22 13:18:32.703627: step: 750/466, loss: 0.0009606159874238074 2023-01-22 13:18:33.324530: step: 752/466, loss: 0.12095136940479279 2023-01-22 13:18:34.152484: step: 754/466, loss: 0.01669827103614807 2023-01-22 13:18:34.705553: step: 756/466, loss: 0.0016191216418519616 2023-01-22 13:18:35.379420: step: 758/466, loss: 0.024371657520532608 2023-01-22 13:18:35.932552: step: 760/466, loss: 0.004538081120699644 2023-01-22 13:18:36.551102: step: 762/466, loss: 0.006123277824372053 2023-01-22 13:18:37.158969: step: 764/466, loss: 0.133865624666214 2023-01-22 13:18:37.831566: step: 766/466, loss: 0.02243422344326973 2023-01-22 13:18:38.502911: step: 768/466, loss: 0.048773620277643204 2023-01-22 13:18:39.099293: step: 770/466, loss: 0.004993142560124397 2023-01-22 13:18:39.688570: step: 772/466, loss: 0.005450873170047998 2023-01-22 13:18:40.349619: step: 774/466, loss: 0.039966829121112823 2023-01-22 13:18:40.932795: step: 776/466, loss: 0.000651595531962812 2023-01-22 13:18:41.570416: step: 778/466, loss: 0.02831016108393669 2023-01-22 13:18:42.154499: step: 780/466, loss: 0.3915857970714569 2023-01-22 13:18:42.721313: step: 782/466, loss: 0.01388892438262701 2023-01-22 13:18:43.332372: step: 784/466, loss: 0.07443975657224655 2023-01-22 13:18:43.944356: step: 786/466, loss: 0.03358267992734909 2023-01-22 13:18:44.538997: step: 788/466, loss: 0.08037069439888 2023-01-22 13:18:45.166139: step: 790/466, loss: 0.03405177965760231 2023-01-22 13:18:45.740973: step: 792/466, loss: 0.010468493215739727 2023-01-22 13:18:46.359072: step: 794/466, loss: 0.0027855599764734507 2023-01-22 13:18:46.982085: step: 796/466, loss: 0.007384442258626223 2023-01-22 13:18:47.606122: step: 798/466, loss: 0.017550082877278328 2023-01-22 13:18:48.257164: step: 800/466, loss: 0.021345291286706924 2023-01-22 13:18:48.830698: step: 802/466, loss: 0.014420836232602596 2023-01-22 13:18:49.403747: step: 804/466, loss: 0.0008694904972799122 2023-01-22 13:18:50.025506: step: 806/466, loss: 0.21264083683490753 2023-01-22 13:18:50.623866: step: 808/466, loss: 0.030158042907714844 2023-01-22 13:18:51.200934: step: 810/466, loss: 0.004542697686702013 2023-01-22 13:18:51.792745: step: 812/466, loss: 0.012855260632932186 2023-01-22 13:18:52.460471: step: 814/466, loss: 0.013972077518701553 2023-01-22 13:18:53.050345: step: 816/466, loss: 0.00239029573276639 2023-01-22 13:18:53.685443: step: 818/466, loss: 0.0032751683611422777 2023-01-22 13:18:54.249612: step: 820/466, loss: 0.002388720866292715 2023-01-22 13:18:54.881202: step: 822/466, loss: 0.006094999145716429 2023-01-22 13:18:55.499617: step: 824/466, loss: 0.034857261925935745 2023-01-22 13:18:56.102434: step: 826/466, loss: 0.026030782610177994 2023-01-22 13:18:56.654368: step: 828/466, loss: 0.004401189275085926 2023-01-22 13:18:57.236474: step: 830/466, loss: 0.007346487138420343 2023-01-22 13:18:57.852470: step: 832/466, loss: 0.011273558251559734 2023-01-22 13:18:58.448479: step: 834/466, loss: 0.00036735390312969685 2023-01-22 13:18:58.993281: step: 836/466, loss: 0.013762393966317177 2023-01-22 13:18:59.571951: step: 838/466, loss: 0.010150541551411152 2023-01-22 13:19:00.186983: step: 840/466, loss: 0.014261798933148384 2023-01-22 13:19:00.733032: step: 842/466, loss: 0.0029600472189486027 2023-01-22 13:19:01.300523: step: 844/466, loss: 0.005396983586251736 2023-01-22 13:19:01.817829: step: 846/466, loss: 4.712427471531555e-05 2023-01-22 13:19:02.490641: step: 848/466, loss: 0.023324286565184593 2023-01-22 13:19:03.108200: step: 850/466, loss: 0.22444498538970947 2023-01-22 13:19:03.693420: step: 852/466, loss: 0.009761333465576172 2023-01-22 13:19:04.347665: step: 854/466, loss: 0.14486908912658691 2023-01-22 13:19:05.015183: step: 856/466, loss: 0.004524150863289833 2023-01-22 13:19:05.663252: step: 858/466, loss: 0.005788067821413279 2023-01-22 13:19:06.250621: step: 860/466, loss: 0.005012852605432272 2023-01-22 13:19:06.885812: step: 862/466, loss: 0.024343110620975494 2023-01-22 13:19:07.502710: step: 864/466, loss: 0.0030281671788543463 2023-01-22 13:19:08.105124: step: 866/466, loss: 0.005659808404743671 2023-01-22 13:19:08.757831: step: 868/466, loss: 0.0009139773319475353 2023-01-22 13:19:09.357157: step: 870/466, loss: 0.012200686149299145 2023-01-22 13:19:09.915419: step: 872/466, loss: 0.010772855952382088 2023-01-22 13:19:10.548948: step: 874/466, loss: 0.002077052602544427 2023-01-22 13:19:11.201259: step: 876/466, loss: 0.1010916456580162 2023-01-22 13:19:11.782413: step: 878/466, loss: 0.18734891712665558 2023-01-22 13:19:12.362860: step: 880/466, loss: 0.020228328183293343 2023-01-22 13:19:13.018814: step: 882/466, loss: 0.03781450167298317 2023-01-22 13:19:13.616457: step: 884/466, loss: 0.010309301316738129 2023-01-22 13:19:14.220932: step: 886/466, loss: 0.001091604819521308 2023-01-22 13:19:14.878820: step: 888/466, loss: 0.005849903449416161 2023-01-22 13:19:15.539867: step: 890/466, loss: 0.01180856954306364 2023-01-22 13:19:16.113537: step: 892/466, loss: 0.012540485709905624 2023-01-22 13:19:16.698588: step: 894/466, loss: 0.07636093348264694 2023-01-22 13:19:17.289254: step: 896/466, loss: 0.022477617487311363 2023-01-22 13:19:17.897875: step: 898/466, loss: 0.003358501475304365 2023-01-22 13:19:18.447154: step: 900/466, loss: 0.006681774277240038 2023-01-22 13:19:19.060725: step: 902/466, loss: 0.008023912087082863 2023-01-22 13:19:19.653019: step: 904/466, loss: 0.008172152563929558 2023-01-22 13:19:20.236894: step: 906/466, loss: 0.012225516140460968 2023-01-22 13:19:20.854973: step: 908/466, loss: 0.0005171762895770371 2023-01-22 13:19:21.519807: step: 910/466, loss: 0.03175482898950577 2023-01-22 13:19:22.107120: step: 912/466, loss: 0.011137586086988449 2023-01-22 13:19:22.698789: step: 914/466, loss: 0.010821109637618065 2023-01-22 13:19:23.200051: step: 916/466, loss: 0.03788109868764877 2023-01-22 13:19:23.834511: step: 918/466, loss: 0.0038046720437705517 2023-01-22 13:19:24.481772: step: 920/466, loss: 0.1045636311173439 2023-01-22 13:19:25.108985: step: 922/466, loss: 0.023890666663646698 2023-01-22 13:19:25.645308: step: 924/466, loss: 0.005773146171122789 2023-01-22 13:19:26.238989: step: 926/466, loss: 0.000679883174598217 2023-01-22 13:19:26.908923: step: 928/466, loss: 0.01151447743177414 2023-01-22 13:19:27.420401: step: 930/466, loss: 0.006437981501221657 2023-01-22 13:19:28.026412: step: 932/466, loss: 0.003340943017974496 ================================================== Loss: 0.043 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3174403540614478, 'r': 0.3469556810994192, 'f1': 0.3315424187477678}, 'combined': 0.24429441381414468, 'epoch': 30} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.346054197985152, 'r': 0.32536291578326687, 'f1': 0.3353897318569808}, 'combined': 0.22243464081706496, 'epoch': 30} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3151041666666667, 'r': 0.2936197916666667, 'f1': 0.30398284313725493}, 'combined': 0.20265522875816994, 'epoch': 30} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.35794270249428406, 'r': 0.3088924449105053, 'f1': 0.3316135837819485}, 'combined': 0.21642149678400846, 'epoch': 30} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3054007110910096, 'r': 0.3494433183830717, 'f1': 0.32594093590775003}, 'combined': 0.24016700540571054, 'epoch': 30} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.33891459173366717, 'r': 0.31367939269548933, 'f1': 0.3258090814418077}, 'combined': 0.2160806343241004, 'epoch': 30} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22756410256410253, 'r': 0.33809523809523806, 'f1': 0.2720306513409961}, 'combined': 0.18135376756066404, 'epoch': 30} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42045454545454547, 'r': 0.40217391304347827, 'f1': 0.41111111111111115}, 'combined': 0.2740740740740741, 'epoch': 30} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.34210526315789475, 'r': 0.22413793103448276, 'f1': 0.2708333333333333}, 'combined': 0.18055555555555552, 'epoch': 30} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29515095352204085, 'r': 0.3399556523489161, 'f1': 0.3159728902784459}, 'combined': 0.23282212967885485, 'epoch': 26} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32557465157002546, 'r': 0.326702182830874, 'f1': 0.3261374426704058}, 'combined': 0.2162984075741551, 'epoch': 26} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39473684210526316, 'r': 0.25862068965517243, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 26} ****************************** Epoch: 31 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:21:53.374411: step: 2/466, loss: 0.03907116875052452 2023-01-22 13:21:53.953625: step: 4/466, loss: 0.022938139736652374 2023-01-22 13:21:54.589785: step: 6/466, loss: 0.0043020425364375114 2023-01-22 13:21:55.225954: step: 8/466, loss: 0.017011329531669617 2023-01-22 13:21:55.835756: step: 10/466, loss: 0.00695255259051919 2023-01-22 13:21:56.536679: step: 12/466, loss: 0.6651405692100525 2023-01-22 13:21:57.160030: step: 14/466, loss: 0.02912992797791958 2023-01-22 13:21:57.728039: step: 16/466, loss: 0.044540539383888245 2023-01-22 13:21:58.330100: step: 18/466, loss: 0.03385673463344574 2023-01-22 13:21:58.958455: step: 20/466, loss: 0.002722368575632572 2023-01-22 13:21:59.575198: step: 22/466, loss: 0.00797630287706852 2023-01-22 13:22:00.150844: step: 24/466, loss: 0.013233490288257599 2023-01-22 13:22:00.770015: step: 26/466, loss: 0.11989618092775345 2023-01-22 13:22:01.408014: step: 28/466, loss: 0.00013836105063091964 2023-01-22 13:22:02.003046: step: 30/466, loss: 0.04954499006271362 2023-01-22 13:22:02.604129: step: 32/466, loss: 0.0007336020935326815 2023-01-22 13:22:03.249120: step: 34/466, loss: 0.0631098598241806 2023-01-22 13:22:03.860329: step: 36/466, loss: 0.016741055995225906 2023-01-22 13:22:04.476304: step: 38/466, loss: 0.00044407794484868646 2023-01-22 13:22:05.099504: step: 40/466, loss: 0.0063453433103859425 2023-01-22 13:22:05.752652: step: 42/466, loss: 0.061242565512657166 2023-01-22 13:22:06.364210: step: 44/466, loss: 0.03584650903940201 2023-01-22 13:22:07.041784: step: 46/466, loss: 0.00015868403716012836 2023-01-22 13:22:07.637578: step: 48/466, loss: 0.1516694277524948 2023-01-22 13:22:08.246331: step: 50/466, loss: 0.0011374569730833173 2023-01-22 13:22:08.813745: step: 52/466, loss: 0.025352753698825836 2023-01-22 13:22:09.422281: step: 54/466, loss: 0.004081857856363058 2023-01-22 13:22:10.068294: step: 56/466, loss: 0.011429132893681526 2023-01-22 13:22:10.568279: step: 58/466, loss: 0.0034054499119520187 2023-01-22 13:22:11.190677: step: 60/466, loss: 0.005714302882552147 2023-01-22 13:22:11.889161: step: 62/466, loss: 0.0326569564640522 2023-01-22 13:22:12.520638: step: 64/466, loss: 0.024123404175043106 2023-01-22 13:22:13.150884: step: 66/466, loss: 0.0016146288253366947 2023-01-22 13:22:13.774315: step: 68/466, loss: 0.038427501916885376 2023-01-22 13:22:14.394636: step: 70/466, loss: 0.03747810795903206 2023-01-22 13:22:15.011696: step: 72/466, loss: 0.0019054411677643657 2023-01-22 13:22:15.595760: step: 74/466, loss: 0.002627598587423563 2023-01-22 13:22:16.281490: step: 76/466, loss: 0.011531215161085129 2023-01-22 13:22:16.872107: step: 78/466, loss: 0.017680393531918526 2023-01-22 13:22:17.488850: step: 80/466, loss: 0.019427277147769928 2023-01-22 13:22:18.088084: step: 82/466, loss: 0.029101530089974403 2023-01-22 13:22:18.692332: step: 84/466, loss: 0.03290451690554619 2023-01-22 13:22:19.392169: step: 86/466, loss: 5.953895379207097e-05 2023-01-22 13:22:19.930966: step: 88/466, loss: 0.00045504237641580403 2023-01-22 13:22:20.628241: step: 90/466, loss: 0.019244128838181496 2023-01-22 13:22:21.256254: step: 92/466, loss: 0.037002597004175186 2023-01-22 13:22:21.935491: step: 94/466, loss: 0.0021970639936625957 2023-01-22 13:22:22.666773: step: 96/466, loss: 0.020365744829177856 2023-01-22 13:22:23.270280: step: 98/466, loss: 0.005219418555498123 2023-01-22 13:22:23.890464: step: 100/466, loss: 0.020970579236745834 2023-01-22 13:22:24.472756: step: 102/466, loss: 0.02584446780383587 2023-01-22 13:22:25.098273: step: 104/466, loss: 0.02854795567691326 2023-01-22 13:22:25.697638: step: 106/466, loss: 0.0016252042260020971 2023-01-22 13:22:26.315166: step: 108/466, loss: 0.642196774482727 2023-01-22 13:22:26.922599: step: 110/466, loss: 0.006834291387349367 2023-01-22 13:22:27.584535: step: 112/466, loss: 0.27037227153778076 2023-01-22 13:22:28.126724: step: 114/466, loss: 0.011913014575839043 2023-01-22 13:22:28.721616: step: 116/466, loss: 0.03166844695806503 2023-01-22 13:22:29.414858: step: 118/466, loss: 0.014308612793684006 2023-01-22 13:22:30.031446: step: 120/466, loss: 0.03876231610774994 2023-01-22 13:22:30.656992: step: 122/466, loss: 0.02030685916543007 2023-01-22 13:22:31.379004: step: 124/466, loss: 0.004717486910521984 2023-01-22 13:22:31.935138: step: 126/466, loss: 0.03216750919818878 2023-01-22 13:22:32.534990: step: 128/466, loss: 0.011396355926990509 2023-01-22 13:22:33.249062: step: 130/466, loss: 0.034590549767017365 2023-01-22 13:22:33.871913: step: 132/466, loss: 0.0032397115137428045 2023-01-22 13:22:34.505757: step: 134/466, loss: 0.06025297939777374 2023-01-22 13:22:35.128646: step: 136/466, loss: 0.006710045505315065 2023-01-22 13:22:35.732289: step: 138/466, loss: 0.009684402495622635 2023-01-22 13:22:36.371615: step: 140/466, loss: 0.04758431017398834 2023-01-22 13:22:36.936161: step: 142/466, loss: 0.0214069951325655 2023-01-22 13:22:37.496933: step: 144/466, loss: 0.0012129333335906267 2023-01-22 13:22:38.113375: step: 146/466, loss: 0.004989032633602619 2023-01-22 13:22:38.754708: step: 148/466, loss: 0.005683788564056158 2023-01-22 13:22:39.385469: step: 150/466, loss: 0.007793087977916002 2023-01-22 13:22:40.035927: step: 152/466, loss: 0.010247008875012398 2023-01-22 13:22:40.589817: step: 154/466, loss: 0.0003937317233067006 2023-01-22 13:22:41.276334: step: 156/466, loss: 0.004915554076433182 2023-01-22 13:22:41.920315: step: 158/466, loss: 0.005361888092011213 2023-01-22 13:22:42.521581: step: 160/466, loss: 0.014959918335080147 2023-01-22 13:22:43.077631: step: 162/466, loss: 0.008497266098856926 2023-01-22 13:22:43.650812: step: 164/466, loss: 0.051364511251449585 2023-01-22 13:22:44.248434: step: 166/466, loss: 0.007937022484838963 2023-01-22 13:22:44.821430: step: 168/466, loss: 0.013537651859223843 2023-01-22 13:22:45.424815: step: 170/466, loss: 0.004952648654580116 2023-01-22 13:22:46.066352: step: 172/466, loss: 0.015321213752031326 2023-01-22 13:22:46.671241: step: 174/466, loss: 0.0075547955930233 2023-01-22 13:22:47.294186: step: 176/466, loss: 0.08551667630672455 2023-01-22 13:22:47.867948: step: 178/466, loss: 0.05356985330581665 2023-01-22 13:22:48.471483: step: 180/466, loss: 0.007350060157477856 2023-01-22 13:22:49.055399: step: 182/466, loss: 0.10914287716150284 2023-01-22 13:22:49.672255: step: 184/466, loss: 0.02786598540842533 2023-01-22 13:22:50.265980: step: 186/466, loss: 0.0018171078991144896 2023-01-22 13:22:50.881516: step: 188/466, loss: 0.008747768588364124 2023-01-22 13:22:51.533345: step: 190/466, loss: 0.019645584747195244 2023-01-22 13:22:52.134722: step: 192/466, loss: 0.0018772552721202374 2023-01-22 13:22:52.749405: step: 194/466, loss: 0.026600424200296402 2023-01-22 13:22:53.326143: step: 196/466, loss: 0.008282824419438839 2023-01-22 13:22:53.979014: step: 198/466, loss: 0.011049061082303524 2023-01-22 13:22:54.685078: step: 200/466, loss: 0.0026763228233903646 2023-01-22 13:22:55.300524: step: 202/466, loss: 0.006706198211759329 2023-01-22 13:22:55.902007: step: 204/466, loss: 0.11584700644016266 2023-01-22 13:22:56.459282: step: 206/466, loss: 0.030152572318911552 2023-01-22 13:22:57.142169: step: 208/466, loss: 0.020291537046432495 2023-01-22 13:22:57.736377: step: 210/466, loss: 0.0031722483690828085 2023-01-22 13:22:58.343749: step: 212/466, loss: 0.03458666428923607 2023-01-22 13:22:58.981170: step: 214/466, loss: 0.03625361621379852 2023-01-22 13:22:59.600837: step: 216/466, loss: 0.0018723373068496585 2023-01-22 13:23:00.142390: step: 218/466, loss: 0.10305580496788025 2023-01-22 13:23:00.688421: step: 220/466, loss: 0.0006719183875247836 2023-01-22 13:23:01.283388: step: 222/466, loss: 0.013313040137290955 2023-01-22 13:23:01.893012: step: 224/466, loss: 0.0028870024252682924 2023-01-22 13:23:02.492409: step: 226/466, loss: 0.0007040125783532858 2023-01-22 13:23:03.120874: step: 228/466, loss: 0.16122116148471832 2023-01-22 13:23:03.694966: step: 230/466, loss: 0.04338943958282471 2023-01-22 13:23:04.303746: step: 232/466, loss: 0.0360637828707695 2023-01-22 13:23:05.046379: step: 234/466, loss: 0.07026376575231552 2023-01-22 13:23:05.694703: step: 236/466, loss: 0.05887288227677345 2023-01-22 13:23:06.273211: step: 238/466, loss: 0.006853953935205936 2023-01-22 13:23:06.903123: step: 240/466, loss: 0.0164225772023201 2023-01-22 13:23:07.548235: step: 242/466, loss: 0.052106983959674835 2023-01-22 13:23:08.209548: step: 244/466, loss: 0.16110815107822418 2023-01-22 13:23:08.841510: step: 246/466, loss: 0.02118993178009987 2023-01-22 13:23:09.467834: step: 248/466, loss: 0.007692660205066204 2023-01-22 13:23:10.081812: step: 250/466, loss: 0.036612603813409805 2023-01-22 13:23:10.734195: step: 252/466, loss: 0.1068938821554184 2023-01-22 13:23:11.354529: step: 254/466, loss: 0.04812590405344963 2023-01-22 13:23:11.950486: step: 256/466, loss: 0.06173084303736687 2023-01-22 13:23:12.594530: step: 258/466, loss: 0.02443801425397396 2023-01-22 13:23:13.169032: step: 260/466, loss: 0.0062148310244083405 2023-01-22 13:23:13.721698: step: 262/466, loss: 0.022380737587809563 2023-01-22 13:23:14.267222: step: 264/466, loss: 0.012677857652306557 2023-01-22 13:23:14.842937: step: 266/466, loss: 0.003117139218375087 2023-01-22 13:23:15.434916: step: 268/466, loss: 0.014295085333287716 2023-01-22 13:23:15.990143: step: 270/466, loss: 0.022981129586696625 2023-01-22 13:23:16.642825: step: 272/466, loss: 0.04769422858953476 2023-01-22 13:23:17.311701: step: 274/466, loss: 0.04571692645549774 2023-01-22 13:23:17.919511: step: 276/466, loss: 0.015166614204645157 2023-01-22 13:23:18.477957: step: 278/466, loss: 9.916900307871401e-05 2023-01-22 13:23:19.106784: step: 280/466, loss: 0.30812007188796997 2023-01-22 13:23:19.696303: step: 282/466, loss: 0.0048457966186106205 2023-01-22 13:23:20.298057: step: 284/466, loss: 0.03308245912194252 2023-01-22 13:23:20.895517: step: 286/466, loss: 0.043762821704149246 2023-01-22 13:23:21.469964: step: 288/466, loss: 2.7630398273468018 2023-01-22 13:23:22.048290: step: 290/466, loss: 7.269015789031982 2023-01-22 13:23:22.692682: step: 292/466, loss: 0.004670634400099516 2023-01-22 13:23:23.312316: step: 294/466, loss: 0.005938251968473196 2023-01-22 13:23:23.889810: step: 296/466, loss: 0.01262521743774414 2023-01-22 13:23:24.453678: step: 298/466, loss: 0.00548663130030036 2023-01-22 13:23:25.044014: step: 300/466, loss: 0.08520643413066864 2023-01-22 13:23:25.805927: step: 302/466, loss: 0.050616439431905746 2023-01-22 13:23:26.370702: step: 304/466, loss: 0.015373089350759983 2023-01-22 13:23:26.937202: step: 306/466, loss: 0.013275873847305775 2023-01-22 13:23:27.543356: step: 308/466, loss: 0.0582607239484787 2023-01-22 13:23:28.176913: step: 310/466, loss: 0.37358009815216064 2023-01-22 13:23:28.763099: step: 312/466, loss: 0.2164192646741867 2023-01-22 13:23:29.423902: step: 314/466, loss: 0.048840828239917755 2023-01-22 13:23:30.065800: step: 316/466, loss: 0.0010105979163199663 2023-01-22 13:23:30.651117: step: 318/466, loss: 0.014398284256458282 2023-01-22 13:23:31.294120: step: 320/466, loss: 0.004466744605451822 2023-01-22 13:23:31.897846: step: 322/466, loss: 0.0074524241499602795 2023-01-22 13:23:32.459179: step: 324/466, loss: 0.02958526834845543 2023-01-22 13:23:33.103023: step: 326/466, loss: 0.01925458014011383 2023-01-22 13:23:33.771137: step: 328/466, loss: 0.022651994600892067 2023-01-22 13:23:34.395081: step: 330/466, loss: 0.0065770456567406654 2023-01-22 13:23:35.006528: step: 332/466, loss: 0.03941261023283005 2023-01-22 13:23:35.599283: step: 334/466, loss: 0.016719456762075424 2023-01-22 13:23:36.243002: step: 336/466, loss: 0.04962924122810364 2023-01-22 13:23:36.849256: step: 338/466, loss: 0.0021794959902763367 2023-01-22 13:23:37.518008: step: 340/466, loss: 0.00030944435275159776 2023-01-22 13:23:38.094156: step: 342/466, loss: 0.010837533511221409 2023-01-22 13:23:38.668101: step: 344/466, loss: 0.021002069115638733 2023-01-22 13:23:39.198487: step: 346/466, loss: 0.0004114443436264992 2023-01-22 13:23:39.849556: step: 348/466, loss: 4.3047621147707105e-05 2023-01-22 13:23:40.466776: step: 350/466, loss: 0.0005217364523559809 2023-01-22 13:23:41.034851: step: 352/466, loss: 0.013475162908434868 2023-01-22 13:23:41.614271: step: 354/466, loss: 0.06028321385383606 2023-01-22 13:23:42.223971: step: 356/466, loss: 0.001066471915692091 2023-01-22 13:23:42.855281: step: 358/466, loss: 0.02661680243909359 2023-01-22 13:23:43.453807: step: 360/466, loss: 0.1952916979789734 2023-01-22 13:23:44.096800: step: 362/466, loss: 0.0018600921612232924 2023-01-22 13:23:44.726022: step: 364/466, loss: 0.012644550763070583 2023-01-22 13:23:45.326499: step: 366/466, loss: 0.026175061240792274 2023-01-22 13:23:45.914066: step: 368/466, loss: 0.010393361561000347 2023-01-22 13:23:46.578306: step: 370/466, loss: 0.020793933421373367 2023-01-22 13:23:47.225120: step: 372/466, loss: 0.0021076411940157413 2023-01-22 13:23:47.756776: step: 374/466, loss: 0.019097154960036278 2023-01-22 13:23:48.350189: step: 376/466, loss: 0.002498042769730091 2023-01-22 13:23:48.968896: step: 378/466, loss: 0.010241997428238392 2023-01-22 13:23:49.617942: step: 380/466, loss: 0.008600963279604912 2023-01-22 13:23:50.223784: step: 382/466, loss: 0.018622539937496185 2023-01-22 13:23:50.839846: step: 384/466, loss: 0.07526848465204239 2023-01-22 13:23:51.481494: step: 386/466, loss: 0.019352687522768974 2023-01-22 13:23:52.060438: step: 388/466, loss: 0.061983659863471985 2023-01-22 13:23:52.670904: step: 390/466, loss: 0.011880343779921532 2023-01-22 13:23:53.283230: step: 392/466, loss: 0.22809994220733643 2023-01-22 13:23:53.904725: step: 394/466, loss: 0.04433238133788109 2023-01-22 13:23:54.517079: step: 396/466, loss: 0.018859324976801872 2023-01-22 13:23:55.129222: step: 398/466, loss: 0.0173000730574131 2023-01-22 13:23:55.716295: step: 400/466, loss: 0.006490845233201981 2023-01-22 13:23:56.316817: step: 402/466, loss: 0.012548327445983887 2023-01-22 13:23:56.929548: step: 404/466, loss: 0.01079154945909977 2023-01-22 13:23:57.532596: step: 406/466, loss: 0.009839864447712898 2023-01-22 13:23:58.135861: step: 408/466, loss: 0.04299784451723099 2023-01-22 13:23:58.767296: step: 410/466, loss: 0.033929768949747086 2023-01-22 13:23:59.423047: step: 412/466, loss: 0.015043784864246845 2023-01-22 13:24:00.015611: step: 414/466, loss: 0.003470017807558179 2023-01-22 13:24:00.625934: step: 416/466, loss: 0.000816081534139812 2023-01-22 13:24:01.202598: step: 418/466, loss: 0.014361785724759102 2023-01-22 13:24:01.767961: step: 420/466, loss: 0.025000043213367462 2023-01-22 13:24:02.414065: step: 422/466, loss: 0.024604009464383125 2023-01-22 13:24:02.981545: step: 424/466, loss: 0.0064855716191232204 2023-01-22 13:24:03.554896: step: 426/466, loss: 0.003926909063011408 2023-01-22 13:24:04.158102: step: 428/466, loss: 0.0009603975340723991 2023-01-22 13:24:04.735577: step: 430/466, loss: 0.002520288573578 2023-01-22 13:24:05.378397: step: 432/466, loss: 0.014697364531457424 2023-01-22 13:24:06.010845: step: 434/466, loss: 0.015412437729537487 2023-01-22 13:24:06.621346: step: 436/466, loss: 0.00022073285072110593 2023-01-22 13:24:07.161323: step: 438/466, loss: 0.0008744518272578716 2023-01-22 13:24:07.728948: step: 440/466, loss: 0.02448434755206108 2023-01-22 13:24:08.313620: step: 442/466, loss: 0.01597929373383522 2023-01-22 13:24:08.930459: step: 444/466, loss: 0.009523588232696056 2023-01-22 13:24:09.486346: step: 446/466, loss: 0.0016114782774820924 2023-01-22 13:24:10.146657: step: 448/466, loss: 0.0013937718467786908 2023-01-22 13:24:10.739093: step: 450/466, loss: 0.0294453427195549 2023-01-22 13:24:11.382343: step: 452/466, loss: 0.02636844851076603 2023-01-22 13:24:12.034918: step: 454/466, loss: 0.027769722044467926 2023-01-22 13:24:12.649223: step: 456/466, loss: 0.06924347579479218 2023-01-22 13:24:13.260589: step: 458/466, loss: 0.009108162485063076 2023-01-22 13:24:13.854831: step: 460/466, loss: 0.040876325219869614 2023-01-22 13:24:14.417565: step: 462/466, loss: 0.01908290758728981 2023-01-22 13:24:15.042818: step: 464/466, loss: 0.0011091256747022271 2023-01-22 13:24:15.601166: step: 466/466, loss: 0.012499526143074036 2023-01-22 13:24:16.201933: step: 468/466, loss: 0.04883186146616936 2023-01-22 13:24:16.822117: step: 470/466, loss: 0.0036303414963185787 2023-01-22 13:24:17.391716: step: 472/466, loss: 0.13335932791233063 2023-01-22 13:24:18.021761: step: 474/466, loss: 0.0019364000763744116 2023-01-22 13:24:18.596020: step: 476/466, loss: 7.695942622376606e-05 2023-01-22 13:24:19.225363: step: 478/466, loss: 0.0039782109670341015 2023-01-22 13:24:19.840201: step: 480/466, loss: 0.019232138991355896 2023-01-22 13:24:20.478439: step: 482/466, loss: 0.01933993399143219 2023-01-22 13:24:21.054192: step: 484/466, loss: 0.10488546639680862 2023-01-22 13:24:21.619331: step: 486/466, loss: 0.0014955231454223394 2023-01-22 13:24:22.276005: step: 488/466, loss: 0.013511566445231438 2023-01-22 13:24:22.855444: step: 490/466, loss: 0.021985583007335663 2023-01-22 13:24:23.428683: step: 492/466, loss: 0.011495518498122692 2023-01-22 13:24:24.185891: step: 494/466, loss: 0.023479973897337914 2023-01-22 13:24:24.793364: step: 496/466, loss: 0.02006375789642334 2023-01-22 13:24:25.409475: step: 498/466, loss: 0.008442089892923832 2023-01-22 13:24:26.052983: step: 500/466, loss: 0.0020238172728568316 2023-01-22 13:24:26.616372: step: 502/466, loss: 0.001612320076674223 2023-01-22 13:24:27.168436: step: 504/466, loss: 0.009541473351418972 2023-01-22 13:24:27.785684: step: 506/466, loss: 0.8183833360671997 2023-01-22 13:24:28.420566: step: 508/466, loss: 0.08022051304578781 2023-01-22 13:24:29.022456: step: 510/466, loss: 0.015515362843871117 2023-01-22 13:24:29.664850: step: 512/466, loss: 0.02318497560918331 2023-01-22 13:24:30.262570: step: 514/466, loss: 0.004739983938634396 2023-01-22 13:24:30.850908: step: 516/466, loss: 0.06561258435249329 2023-01-22 13:24:31.516708: step: 518/466, loss: 0.00429729325696826 2023-01-22 13:24:32.146797: step: 520/466, loss: 0.003679410321637988 2023-01-22 13:24:32.783901: step: 522/466, loss: 0.0016411789692938328 2023-01-22 13:24:33.456266: step: 524/466, loss: 0.04868793487548828 2023-01-22 13:24:33.987053: step: 526/466, loss: 0.013301272876560688 2023-01-22 13:24:34.637340: step: 528/466, loss: 0.6133697032928467 2023-01-22 13:24:35.239318: step: 530/466, loss: 0.0038876847829669714 2023-01-22 13:24:35.890212: step: 532/466, loss: 0.0029869128484278917 2023-01-22 13:24:36.530653: step: 534/466, loss: 0.017007049173116684 2023-01-22 13:24:37.134558: step: 536/466, loss: 0.027911217883229256 2023-01-22 13:24:37.812943: step: 538/466, loss: 0.011974958702921867 2023-01-22 13:24:38.423420: step: 540/466, loss: 0.012257498688995838 2023-01-22 13:24:39.020924: step: 542/466, loss: 0.0002447470906190574 2023-01-22 13:24:39.639316: step: 544/466, loss: 0.0006711081368848681 2023-01-22 13:24:40.258317: step: 546/466, loss: 0.04007217660546303 2023-01-22 13:24:40.846332: step: 548/466, loss: 0.0033877098467200994 2023-01-22 13:24:41.424346: step: 550/466, loss: 0.0033911005593836308 2023-01-22 13:24:42.075061: step: 552/466, loss: 0.03495388478040695 2023-01-22 13:24:42.673679: step: 554/466, loss: 0.01888686791062355 2023-01-22 13:24:43.282054: step: 556/466, loss: 0.010386298410594463 2023-01-22 13:24:43.949075: step: 558/466, loss: 0.9091969132423401 2023-01-22 13:24:44.597903: step: 560/466, loss: 0.004457194823771715 2023-01-22 13:24:45.145321: step: 562/466, loss: 0.03882373869419098 2023-01-22 13:24:45.726604: step: 564/466, loss: 0.005371954757720232 2023-01-22 13:24:46.301110: step: 566/466, loss: 0.024788759648799896 2023-01-22 13:24:46.875524: step: 568/466, loss: 0.008069412782788277 2023-01-22 13:24:47.428468: step: 570/466, loss: 0.028956662863492966 2023-01-22 13:24:48.099696: step: 572/466, loss: 0.0024390460457652807 2023-01-22 13:24:48.771685: step: 574/466, loss: 0.00887396652251482 2023-01-22 13:24:49.380951: step: 576/466, loss: 0.008376321755349636 2023-01-22 13:24:50.014721: step: 578/466, loss: 0.0015364962164312601 2023-01-22 13:24:50.649050: step: 580/466, loss: 0.029758483171463013 2023-01-22 13:24:51.287228: step: 582/466, loss: 0.010353045538067818 2023-01-22 13:24:51.908460: step: 584/466, loss: 0.0016091320430859923 2023-01-22 13:24:52.535566: step: 586/466, loss: 0.03720836341381073 2023-01-22 13:24:53.202673: step: 588/466, loss: 0.010439724661409855 2023-01-22 13:24:53.919683: step: 590/466, loss: 0.02553858608007431 2023-01-22 13:24:54.511909: step: 592/466, loss: 0.007521891500800848 2023-01-22 13:24:55.113255: step: 594/466, loss: 9.809506445890293e-05 2023-01-22 13:24:55.797103: step: 596/466, loss: 0.013808910734951496 2023-01-22 13:24:56.485745: step: 598/466, loss: 0.027568509802222252 2023-01-22 13:24:57.092148: step: 600/466, loss: 0.007159669417887926 2023-01-22 13:24:57.701252: step: 602/466, loss: 0.17362605035305023 2023-01-22 13:24:58.352492: step: 604/466, loss: 0.036958254873752594 2023-01-22 13:24:59.014077: step: 606/466, loss: 0.0023265182971954346 2023-01-22 13:24:59.610679: step: 608/466, loss: 0.03575790673494339 2023-01-22 13:25:00.202578: step: 610/466, loss: 0.0038715917617082596 2023-01-22 13:25:00.891361: step: 612/466, loss: 0.02022351138293743 2023-01-22 13:25:01.525559: step: 614/466, loss: 0.02672039344906807 2023-01-22 13:25:02.136764: step: 616/466, loss: 0.005389675032347441 2023-01-22 13:25:02.752516: step: 618/466, loss: 0.0008047828450798988 2023-01-22 13:25:03.378417: step: 620/466, loss: 0.006716866511851549 2023-01-22 13:25:03.931643: step: 622/466, loss: 0.0020203841850161552 2023-01-22 13:25:04.577576: step: 624/466, loss: 0.031006475910544395 2023-01-22 13:25:05.213746: step: 626/466, loss: 0.010505175217986107 2023-01-22 13:25:05.893083: step: 628/466, loss: 0.007487665396183729 2023-01-22 13:25:06.510163: step: 630/466, loss: 0.021982848644256592 2023-01-22 13:25:07.066048: step: 632/466, loss: 0.003571385983377695 2023-01-22 13:25:07.694130: step: 634/466, loss: 0.011617590673267841 2023-01-22 13:25:08.281203: step: 636/466, loss: 0.013700786046683788 2023-01-22 13:25:08.887780: step: 638/466, loss: 0.08118680864572525 2023-01-22 13:25:09.553562: step: 640/466, loss: 0.03135356307029724 2023-01-22 13:25:10.135732: step: 642/466, loss: 0.0007427539676427841 2023-01-22 13:25:10.710112: step: 644/466, loss: 0.03151165693998337 2023-01-22 13:25:11.349991: step: 646/466, loss: 0.10037717968225479 2023-01-22 13:25:12.001000: step: 648/466, loss: 0.0019138669595122337 2023-01-22 13:25:12.578698: step: 650/466, loss: 0.00436336500570178 2023-01-22 13:25:13.221462: step: 652/466, loss: 0.009080728515982628 2023-01-22 13:25:13.814332: step: 654/466, loss: 0.007250132970511913 2023-01-22 13:25:14.358846: step: 656/466, loss: 0.010160627774894238 2023-01-22 13:25:14.912115: step: 658/466, loss: 0.005207338836044073 2023-01-22 13:25:15.476101: step: 660/466, loss: 0.008738711476325989 2023-01-22 13:25:16.091786: step: 662/466, loss: 0.012235710397362709 2023-01-22 13:25:16.720327: step: 664/466, loss: 0.015246798284351826 2023-01-22 13:25:17.323374: step: 666/466, loss: 0.05778932943940163 2023-01-22 13:25:17.952856: step: 668/466, loss: 0.08114214986562729 2023-01-22 13:25:18.557243: step: 670/466, loss: 0.0023649095091968775 2023-01-22 13:25:19.184298: step: 672/466, loss: 0.026188427582383156 2023-01-22 13:25:19.875446: step: 674/466, loss: 0.002187538892030716 2023-01-22 13:25:20.429843: step: 676/466, loss: 0.030874181538820267 2023-01-22 13:25:21.053693: step: 678/466, loss: 0.07868461310863495 2023-01-22 13:25:21.666207: step: 680/466, loss: 0.004194202832877636 2023-01-22 13:25:22.215674: step: 682/466, loss: 0.0002074727526633069 2023-01-22 13:25:22.779073: step: 684/466, loss: 0.002662382321432233 2023-01-22 13:25:23.368283: step: 686/466, loss: 0.0001038382833939977 2023-01-22 13:25:23.972951: step: 688/466, loss: 0.021273411810398102 2023-01-22 13:25:24.593687: step: 690/466, loss: 0.008990940637886524 2023-01-22 13:25:25.250263: step: 692/466, loss: 0.044320378452539444 2023-01-22 13:25:25.878955: step: 694/466, loss: 0.017037227749824524 2023-01-22 13:25:26.500596: step: 696/466, loss: 0.04047022759914398 2023-01-22 13:25:27.115615: step: 698/466, loss: 0.01361551322042942 2023-01-22 13:25:27.676912: step: 700/466, loss: 0.006879597902297974 2023-01-22 13:25:28.259746: step: 702/466, loss: 0.060287315398454666 2023-01-22 13:25:28.854644: step: 704/466, loss: 0.003986467607319355 2023-01-22 13:25:29.437767: step: 706/466, loss: 0.08152619749307632 2023-01-22 13:25:30.140692: step: 708/466, loss: 0.001346687087789178 2023-01-22 13:25:30.802498: step: 710/466, loss: 0.08224773406982422 2023-01-22 13:25:31.415136: step: 712/466, loss: 0.018333574756979942 2023-01-22 13:25:32.044240: step: 714/466, loss: 0.0008959770784713328 2023-01-22 13:25:32.694874: step: 716/466, loss: 0.020969970151782036 2023-01-22 13:25:33.286600: step: 718/466, loss: 0.02299494855105877 2023-01-22 13:25:33.951459: step: 720/466, loss: 0.047333959490060806 2023-01-22 13:25:34.567980: step: 722/466, loss: 0.005843609105795622 2023-01-22 13:25:35.164255: step: 724/466, loss: 0.0007942087249830365 2023-01-22 13:25:35.708590: step: 726/466, loss: 0.00327915302477777 2023-01-22 13:25:36.384277: step: 728/466, loss: 0.1284780651330948 2023-01-22 13:25:36.955205: step: 730/466, loss: 0.0004653561918530613 2023-01-22 13:25:37.547295: step: 732/466, loss: 0.011936572380363941 2023-01-22 13:25:38.143167: step: 734/466, loss: 0.0001334332046099007 2023-01-22 13:25:38.711435: step: 736/466, loss: 0.031935177743434906 2023-01-22 13:25:39.273742: step: 738/466, loss: 0.034123290330171585 2023-01-22 13:25:39.845511: step: 740/466, loss: 0.0009675708715803921 2023-01-22 13:25:40.473550: step: 742/466, loss: 0.005988074000924826 2023-01-22 13:25:41.062516: step: 744/466, loss: 0.020195962861180305 2023-01-22 13:25:41.739783: step: 746/466, loss: 0.030538450926542282 2023-01-22 13:25:42.358894: step: 748/466, loss: 0.004954514559358358 2023-01-22 13:25:42.977186: step: 750/466, loss: 0.024093136191368103 2023-01-22 13:25:43.525721: step: 752/466, loss: 0.006772663444280624 2023-01-22 13:25:44.125188: step: 754/466, loss: 0.0009768909076228738 2023-01-22 13:25:44.719211: step: 756/466, loss: 0.01045869942754507 2023-01-22 13:25:45.362589: step: 758/466, loss: 0.003131929552182555 2023-01-22 13:25:45.945711: step: 760/466, loss: 0.009658971801400185 2023-01-22 13:25:46.541489: step: 762/466, loss: 0.0002491218037903309 2023-01-22 13:25:47.091408: step: 764/466, loss: 0.0007519605569541454 2023-01-22 13:25:47.674632: step: 766/466, loss: 0.010694680735468864 2023-01-22 13:25:48.210109: step: 768/466, loss: 0.0009108647354878485 2023-01-22 13:25:48.804785: step: 770/466, loss: 0.03541029989719391 2023-01-22 13:25:49.479319: step: 772/466, loss: 0.030286915600299835 2023-01-22 13:25:50.038909: step: 774/466, loss: 0.007199748884886503 2023-01-22 13:25:50.638157: step: 776/466, loss: 0.03642946481704712 2023-01-22 13:25:51.274925: step: 778/466, loss: 0.04434438794851303 2023-01-22 13:25:51.888000: step: 780/466, loss: 0.0012827562168240547 2023-01-22 13:25:52.539043: step: 782/466, loss: 0.009099474176764488 2023-01-22 13:25:53.151551: step: 784/466, loss: 0.0005583069869317114 2023-01-22 13:25:53.711322: step: 786/466, loss: 0.0026499121449887753 2023-01-22 13:25:54.326008: step: 788/466, loss: 0.00255676475353539 2023-01-22 13:25:54.881809: step: 790/466, loss: 0.03963962942361832 2023-01-22 13:25:55.463355: step: 792/466, loss: 0.015734344720840454 2023-01-22 13:25:56.108675: step: 794/466, loss: 0.009534102864563465 2023-01-22 13:25:56.694076: step: 796/466, loss: 0.09624442458152771 2023-01-22 13:25:57.326890: step: 798/466, loss: 0.003762713400647044 2023-01-22 13:25:57.934483: step: 800/466, loss: 0.0015033320523798466 2023-01-22 13:25:58.587502: step: 802/466, loss: 0.04189210757613182 2023-01-22 13:25:59.355337: step: 804/466, loss: 0.10371461510658264 2023-01-22 13:26:00.017545: step: 806/466, loss: 0.48933953046798706 2023-01-22 13:26:00.636373: step: 808/466, loss: 0.004488999489694834 2023-01-22 13:26:01.275372: step: 810/466, loss: 0.0008561794529668987 2023-01-22 13:26:01.938729: step: 812/466, loss: 0.007319875992834568 2023-01-22 13:26:02.519339: step: 814/466, loss: 0.19539205729961395 2023-01-22 13:26:03.102541: step: 816/466, loss: 0.009182372130453587 2023-01-22 13:26:03.738457: step: 818/466, loss: 0.045163724571466446 2023-01-22 13:26:04.413744: step: 820/466, loss: 0.009169608354568481 2023-01-22 13:26:05.072139: step: 822/466, loss: 0.0011140016140416265 2023-01-22 13:26:05.658929: step: 824/466, loss: 0.0025377203710377216 2023-01-22 13:26:06.231027: step: 826/466, loss: 0.028506889939308167 2023-01-22 13:26:06.858707: step: 828/466, loss: 0.03266207501292229 2023-01-22 13:26:07.500196: step: 830/466, loss: 0.022404903545975685 2023-01-22 13:26:08.110213: step: 832/466, loss: 0.02750786393880844 2023-01-22 13:26:08.721220: step: 834/466, loss: 0.011269154027104378 2023-01-22 13:26:09.324334: step: 836/466, loss: 0.022157708182930946 2023-01-22 13:26:09.914225: step: 838/466, loss: 0.0012200011406093836 2023-01-22 13:26:10.532695: step: 840/466, loss: 0.023260338231921196 2023-01-22 13:26:11.256396: step: 842/466, loss: 0.013728282414376736 2023-01-22 13:26:11.872218: step: 844/466, loss: 0.027568018063902855 2023-01-22 13:26:12.448552: step: 846/466, loss: 0.02342831902205944 2023-01-22 13:26:13.049537: step: 848/466, loss: 0.031135616824030876 2023-01-22 13:26:13.705852: step: 850/466, loss: 0.16939984261989594 2023-01-22 13:26:14.314166: step: 852/466, loss: 0.03348062187433243 2023-01-22 13:26:14.919622: step: 854/466, loss: 0.0009071927634067833 2023-01-22 13:26:15.499472: step: 856/466, loss: 0.009717311710119247 2023-01-22 13:26:16.088079: step: 858/466, loss: 0.01987823285162449 2023-01-22 13:26:16.681309: step: 860/466, loss: 0.021753111854195595 2023-01-22 13:26:17.227257: step: 862/466, loss: 0.025307273492217064 2023-01-22 13:26:17.812202: step: 864/466, loss: 0.002120056189596653 2023-01-22 13:26:18.453161: step: 866/466, loss: 0.13705721497535706 2023-01-22 13:26:18.978367: step: 868/466, loss: 1.5340710878372192 2023-01-22 13:26:19.617325: step: 870/466, loss: 0.005955138243734837 2023-01-22 13:26:20.278789: step: 872/466, loss: 0.03116076998412609 2023-01-22 13:26:20.853157: step: 874/466, loss: 0.042830005288124084 2023-01-22 13:26:21.499355: step: 876/466, loss: 0.07013010233640671 2023-01-22 13:26:22.079706: step: 878/466, loss: 0.005209819413721561 2023-01-22 13:26:22.714704: step: 880/466, loss: 0.025534560903906822 2023-01-22 13:26:23.264761: step: 882/466, loss: 0.00187689031008631 2023-01-22 13:26:23.888308: step: 884/466, loss: 0.042018599808216095 2023-01-22 13:26:24.512804: step: 886/466, loss: 0.039473798125982285 2023-01-22 13:26:25.087728: step: 888/466, loss: 0.17161712050437927 2023-01-22 13:26:25.704421: step: 890/466, loss: 0.0070687225088477135 2023-01-22 13:26:26.269160: step: 892/466, loss: 0.007750903721898794 2023-01-22 13:26:26.908453: step: 894/466, loss: 0.003680052701383829 2023-01-22 13:26:27.439228: step: 896/466, loss: 0.04857083782553673 2023-01-22 13:26:27.981301: step: 898/466, loss: 0.0007663737633265555 2023-01-22 13:26:28.514271: step: 900/466, loss: 0.04972157999873161 2023-01-22 13:26:29.145774: step: 902/466, loss: 0.022673940286040306 2023-01-22 13:26:29.797902: step: 904/466, loss: 0.02935558557510376 2023-01-22 13:26:30.384483: step: 906/466, loss: 0.02141900546848774 2023-01-22 13:26:30.999781: step: 908/466, loss: 0.026830747723579407 2023-01-22 13:26:31.672792: step: 910/466, loss: 0.00036158942384645343 2023-01-22 13:26:32.369615: step: 912/466, loss: 0.047231242060661316 2023-01-22 13:26:32.993299: step: 914/466, loss: 0.07010781764984131 2023-01-22 13:26:33.588789: step: 916/466, loss: 0.0036849970929324627 2023-01-22 13:26:34.206638: step: 918/466, loss: 0.0026618526317179203 2023-01-22 13:26:34.839950: step: 920/466, loss: 0.027415724471211433 2023-01-22 13:26:35.399895: step: 922/466, loss: 0.012835756875574589 2023-01-22 13:26:35.989647: step: 924/466, loss: 0.025382524356245995 2023-01-22 13:26:36.616872: step: 926/466, loss: 0.005560525692999363 2023-01-22 13:26:37.191155: step: 928/466, loss: 0.008374533616006374 2023-01-22 13:26:37.733319: step: 930/466, loss: 0.008973834104835987 2023-01-22 13:26:38.320417: step: 932/466, loss: 0.01936868205666542 ================================================== Loss: 0.060 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3251245924857036, 'r': 0.3498019809096659, 'f1': 0.337012146141488}, 'combined': 0.24832473926214904, 'epoch': 31} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3489896192258383, 'r': 0.3215647009619771, 'f1': 0.33471633592266914}, 'combined': 0.221988036259594, 'epoch': 31} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3149923312883436, 'r': 0.2917258522727273, 'f1': 0.30291297935103245}, 'combined': 0.20194198623402163, 'epoch': 31} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.36712059340607706, 'r': 0.30556363997411595, 'f1': 0.33352559570759643}, 'combined': 0.21766933614601028, 'epoch': 31} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30581924038363434, 'r': 0.34469948536599393, 'f1': 0.3240974643851539}, 'combined': 0.23880865796800813, 'epoch': 31} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3437417301201588, 'r': 0.3133326611936573, 'f1': 0.327833537528391}, 'combined': 0.21742327877530593, 'epoch': 31} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27685185185185185, 'r': 0.3559523809523809, 'f1': 0.31145833333333334}, 'combined': 0.20763888888888887, 'epoch': 31} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40476190476190477, 'r': 0.3695652173913043, 'f1': 0.38636363636363635}, 'combined': 0.25757575757575757, 'epoch': 31} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.325, 'r': 0.22413793103448276, 'f1': 0.2653061224489796}, 'combined': 0.17687074829931973, 'epoch': 31} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29515095352204085, 'r': 0.3399556523489161, 'f1': 0.3159728902784459}, 'combined': 0.23282212967885485, 'epoch': 26} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32557465157002546, 'r': 0.326702182830874, 'f1': 0.3261374426704058}, 'combined': 0.2162984075741551, 'epoch': 26} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39473684210526316, 'r': 0.25862068965517243, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 26} ****************************** Epoch: 32 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:29:06.244785: step: 2/466, loss: 0.004893600009381771 2023-01-22 13:29:06.909798: step: 4/466, loss: 0.001807203982025385 2023-01-22 13:29:07.478732: step: 6/466, loss: 0.004189473111182451 2023-01-22 13:29:08.150658: step: 8/466, loss: 0.009971344843506813 2023-01-22 13:29:08.743723: step: 10/466, loss: 0.0013811824610456824 2023-01-22 13:29:09.305409: step: 12/466, loss: 0.026940230280160904 2023-01-22 13:29:10.011075: step: 14/466, loss: 0.00019078326295129955 2023-01-22 13:29:10.640038: step: 16/466, loss: 0.03184819594025612 2023-01-22 13:29:11.277644: step: 18/466, loss: 2.7896217943634838e-05 2023-01-22 13:29:11.864903: step: 20/466, loss: 0.0034164085518568754 2023-01-22 13:29:12.445808: step: 22/466, loss: 0.0020141061395406723 2023-01-22 13:29:13.045585: step: 24/466, loss: 0.014625578187406063 2023-01-22 13:29:13.588545: step: 26/466, loss: 0.2044469118118286 2023-01-22 13:29:14.177568: step: 28/466, loss: 0.0008368648122996092 2023-01-22 13:29:14.745109: step: 30/466, loss: 0.0176450964063406 2023-01-22 13:29:15.333311: step: 32/466, loss: 0.04258988797664642 2023-01-22 13:29:15.954588: step: 34/466, loss: 0.06267206370830536 2023-01-22 13:29:16.546653: step: 36/466, loss: 0.016352983191609383 2023-01-22 13:29:17.232118: step: 38/466, loss: 0.0013128308346495032 2023-01-22 13:29:17.797177: step: 40/466, loss: 0.002571600489318371 2023-01-22 13:29:18.411211: step: 42/466, loss: 0.012269257567822933 2023-01-22 13:29:18.979794: step: 44/466, loss: 0.001995067112147808 2023-01-22 13:29:19.600601: step: 46/466, loss: 0.046191588044166565 2023-01-22 13:29:20.226828: step: 48/466, loss: 0.009372851811349392 2023-01-22 13:29:20.855931: step: 50/466, loss: 0.017919622361660004 2023-01-22 13:29:21.433942: step: 52/466, loss: 0.007651403080672026 2023-01-22 13:29:22.003526: step: 54/466, loss: 0.010477669537067413 2023-01-22 13:29:22.603400: step: 56/466, loss: 0.01425440888851881 2023-01-22 13:29:23.252580: step: 58/466, loss: 0.00841886643320322 2023-01-22 13:29:23.905681: step: 60/466, loss: 0.012636151164770126 2023-01-22 13:29:24.465357: step: 62/466, loss: 0.02314668707549572 2023-01-22 13:29:25.060592: step: 64/466, loss: 0.045987606048583984 2023-01-22 13:29:25.713273: step: 66/466, loss: 0.26990488171577454 2023-01-22 13:29:26.319299: step: 68/466, loss: 0.005559887737035751 2023-01-22 13:29:27.039993: step: 70/466, loss: 0.0006924280896782875 2023-01-22 13:29:27.599033: step: 72/466, loss: 0.0032412128057330847 2023-01-22 13:29:28.214321: step: 74/466, loss: 0.0180651992559433 2023-01-22 13:29:28.780548: step: 76/466, loss: 0.0009903390891849995 2023-01-22 13:29:29.415994: step: 78/466, loss: 0.02404508925974369 2023-01-22 13:29:30.066127: step: 80/466, loss: 0.0448649562895298 2023-01-22 13:29:30.614681: step: 82/466, loss: 0.004130546469241381 2023-01-22 13:29:31.142795: step: 84/466, loss: 0.005723233800381422 2023-01-22 13:29:31.771987: step: 86/466, loss: 0.003985012881457806 2023-01-22 13:29:32.373578: step: 88/466, loss: 0.020042594522237778 2023-01-22 13:29:33.020021: step: 90/466, loss: 0.0017711673863232136 2023-01-22 13:29:33.660915: step: 92/466, loss: 0.0028102535288780928 2023-01-22 13:29:34.255546: step: 94/466, loss: 0.03830303996801376 2023-01-22 13:29:34.861712: step: 96/466, loss: 0.0036949864588677883 2023-01-22 13:29:35.475426: step: 98/466, loss: 0.001504071638919413 2023-01-22 13:29:36.114410: step: 100/466, loss: 0.0048750219866633415 2023-01-22 13:29:36.799069: step: 102/466, loss: 0.03824969753623009 2023-01-22 13:29:37.325458: step: 104/466, loss: 0.004800750873982906 2023-01-22 13:29:37.872153: step: 106/466, loss: 0.0020471615716814995 2023-01-22 13:29:38.411133: step: 108/466, loss: 0.000646074942778796 2023-01-22 13:29:39.009005: step: 110/466, loss: 0.0010992448078468442 2023-01-22 13:29:39.686469: step: 112/466, loss: 0.006446210667490959 2023-01-22 13:29:40.211487: step: 114/466, loss: 0.0009745300631038845 2023-01-22 13:29:40.835527: step: 116/466, loss: 0.013410649262368679 2023-01-22 13:29:41.443119: step: 118/466, loss: 0.0012341059045866132 2023-01-22 13:29:42.054487: step: 120/466, loss: 0.001941015711054206 2023-01-22 13:29:42.652884: step: 122/466, loss: 0.20783285796642303 2023-01-22 13:29:43.329042: step: 124/466, loss: 0.025142325088381767 2023-01-22 13:29:43.877819: step: 126/466, loss: 0.020406607538461685 2023-01-22 13:29:44.508858: step: 128/466, loss: 0.003357550362125039 2023-01-22 13:29:45.083732: step: 130/466, loss: 0.003221118589863181 2023-01-22 13:29:45.669532: step: 132/466, loss: 0.4911470115184784 2023-01-22 13:29:46.276232: step: 134/466, loss: 0.011972006410360336 2023-01-22 13:29:46.898990: step: 136/466, loss: 0.008492819964885712 2023-01-22 13:29:47.511037: step: 138/466, loss: 0.006594249978661537 2023-01-22 13:29:48.119712: step: 140/466, loss: 0.007354431785643101 2023-01-22 13:29:48.673383: step: 142/466, loss: 0.016762271523475647 2023-01-22 13:29:49.254709: step: 144/466, loss: 0.007904180325567722 2023-01-22 13:29:49.826913: step: 146/466, loss: 0.003073585219681263 2023-01-22 13:29:50.466892: step: 148/466, loss: 0.014073972590267658 2023-01-22 13:29:51.065365: step: 150/466, loss: 0.04962771013379097 2023-01-22 13:29:51.660517: step: 152/466, loss: 0.050608739256858826 2023-01-22 13:29:52.257666: step: 154/466, loss: 0.006356202531605959 2023-01-22 13:29:52.813925: step: 156/466, loss: 0.014832945540547371 2023-01-22 13:29:53.426455: step: 158/466, loss: 0.03682565316557884 2023-01-22 13:29:54.012966: step: 160/466, loss: 1.9299652194604278e-05 2023-01-22 13:29:54.651991: step: 162/466, loss: 0.003554818918928504 2023-01-22 13:29:55.285171: step: 164/466, loss: 0.4816342294216156 2023-01-22 13:29:55.903765: step: 166/466, loss: 0.019198158755898476 2023-01-22 13:29:56.487032: step: 168/466, loss: 0.004678398370742798 2023-01-22 13:29:57.112990: step: 170/466, loss: 0.0006657781777903438 2023-01-22 13:29:57.747305: step: 172/466, loss: 0.04507363960146904 2023-01-22 13:29:58.370526: step: 174/466, loss: 0.06059860438108444 2023-01-22 13:29:58.963749: step: 176/466, loss: 0.023954475298523903 2023-01-22 13:29:59.576210: step: 178/466, loss: 0.006275218445807695 2023-01-22 13:30:00.116776: step: 180/466, loss: 0.004957010503858328 2023-01-22 13:30:00.756730: step: 182/466, loss: 0.008655861020088196 2023-01-22 13:30:01.436341: step: 184/466, loss: 0.07140230387449265 2023-01-22 13:30:02.132585: step: 186/466, loss: 0.00874532200396061 2023-01-22 13:30:02.750751: step: 188/466, loss: 0.017771543934941292 2023-01-22 13:30:03.361998: step: 190/466, loss: 0.05333786457777023 2023-01-22 13:30:03.991300: step: 192/466, loss: 0.0453433096408844 2023-01-22 13:30:04.620280: step: 194/466, loss: 0.03081073798239231 2023-01-22 13:30:05.300976: step: 196/466, loss: 0.018321197479963303 2023-01-22 13:30:05.873527: step: 198/466, loss: 0.005789279937744141 2023-01-22 13:30:06.443895: step: 200/466, loss: 0.00897742249071598 2023-01-22 13:30:07.087507: step: 202/466, loss: 0.050437066704034805 2023-01-22 13:30:07.674749: step: 204/466, loss: 0.0827391967177391 2023-01-22 13:30:08.303547: step: 206/466, loss: 4.914351666229777e-05 2023-01-22 13:30:08.924436: step: 208/466, loss: 0.04163723438978195 2023-01-22 13:30:09.527881: step: 210/466, loss: 0.04904909431934357 2023-01-22 13:30:10.097696: step: 212/466, loss: 0.0018292181193828583 2023-01-22 13:30:10.763395: step: 214/466, loss: 0.016725745052099228 2023-01-22 13:30:11.354716: step: 216/466, loss: 0.08441532403230667 2023-01-22 13:30:11.950316: step: 218/466, loss: 0.014585371129214764 2023-01-22 13:30:12.525819: step: 220/466, loss: 0.03478140011429787 2023-01-22 13:30:13.165501: step: 222/466, loss: 0.02361155115067959 2023-01-22 13:30:13.714365: step: 224/466, loss: 0.0034576638136059046 2023-01-22 13:30:14.354801: step: 226/466, loss: 0.0021823784336447716 2023-01-22 13:30:15.035486: step: 228/466, loss: 0.002427509054541588 2023-01-22 13:30:15.646652: step: 230/466, loss: 0.012929845601320267 2023-01-22 13:30:16.245031: step: 232/466, loss: 0.05927010253071785 2023-01-22 13:30:16.838428: step: 234/466, loss: 0.005471110809594393 2023-01-22 13:30:17.499233: step: 236/466, loss: 0.002051880117505789 2023-01-22 13:30:18.189006: step: 238/466, loss: 0.02066088654100895 2023-01-22 13:30:18.856547: step: 240/466, loss: 0.012557800859212875 2023-01-22 13:30:19.396683: step: 242/466, loss: 0.0020497667137533426 2023-01-22 13:30:19.968593: step: 244/466, loss: 0.0003299048694316298 2023-01-22 13:30:20.631016: step: 246/466, loss: 0.0010864927899092436 2023-01-22 13:30:21.339984: step: 248/466, loss: 0.007769247982650995 2023-01-22 13:30:21.989954: step: 250/466, loss: 0.02015012316405773 2023-01-22 13:30:22.599839: step: 252/466, loss: 0.01395625900477171 2023-01-22 13:30:23.175791: step: 254/466, loss: 5.754221638198942e-05 2023-01-22 13:30:23.814876: step: 256/466, loss: 0.008632590994238853 2023-01-22 13:30:24.433353: step: 258/466, loss: 0.009349017404019833 2023-01-22 13:30:25.010898: step: 260/466, loss: 0.6971507668495178 2023-01-22 13:30:25.659572: step: 262/466, loss: 0.05355757847428322 2023-01-22 13:30:26.264398: step: 264/466, loss: 0.002188299084082246 2023-01-22 13:30:26.928964: step: 266/466, loss: 0.10608571767807007 2023-01-22 13:30:27.582340: step: 268/466, loss: 0.062313344329595566 2023-01-22 13:30:28.175555: step: 270/466, loss: 0.007442169357091188 2023-01-22 13:30:28.804478: step: 272/466, loss: 0.0062944344244897366 2023-01-22 13:30:29.379432: step: 274/466, loss: 0.04566106200218201 2023-01-22 13:30:29.986204: step: 276/466, loss: 0.02992089092731476 2023-01-22 13:30:30.551457: step: 278/466, loss: 0.0064385076984763145 2023-01-22 13:30:31.127013: step: 280/466, loss: 0.0003736176004167646 2023-01-22 13:30:31.723713: step: 282/466, loss: 0.0007755811675451696 2023-01-22 13:30:32.354148: step: 284/466, loss: 0.011125924997031689 2023-01-22 13:30:32.913149: step: 286/466, loss: 0.03555982559919357 2023-01-22 13:30:33.530641: step: 288/466, loss: 0.004205263219773769 2023-01-22 13:30:34.098313: step: 290/466, loss: 0.008824083022773266 2023-01-22 13:30:34.714971: step: 292/466, loss: 0.04853668063879013 2023-01-22 13:30:35.265902: step: 294/466, loss: 0.006314811296761036 2023-01-22 13:30:35.888218: step: 296/466, loss: 0.14895592629909515 2023-01-22 13:30:36.496781: step: 298/466, loss: 0.09949561953544617 2023-01-22 13:30:37.117568: step: 300/466, loss: 0.061377447098493576 2023-01-22 13:30:37.712414: step: 302/466, loss: 0.016520462930202484 2023-01-22 13:30:38.346718: step: 304/466, loss: 0.012629845179617405 2023-01-22 13:30:38.977375: step: 306/466, loss: 0.037604060024023056 2023-01-22 13:30:39.579288: step: 308/466, loss: 0.00816622469574213 2023-01-22 13:30:40.197217: step: 310/466, loss: 0.015745321288704872 2023-01-22 13:30:40.795807: step: 312/466, loss: 0.00892604049295187 2023-01-22 13:30:41.442832: step: 314/466, loss: 0.002678923076018691 2023-01-22 13:30:42.020874: step: 316/466, loss: 0.018849464133381844 2023-01-22 13:30:42.643099: step: 318/466, loss: 0.011159980669617653 2023-01-22 13:30:43.226425: step: 320/466, loss: 0.03757862374186516 2023-01-22 13:30:43.868956: step: 322/466, loss: 0.0057309032417833805 2023-01-22 13:30:44.432641: step: 324/466, loss: 0.03270480036735535 2023-01-22 13:30:45.054039: step: 326/466, loss: 0.011551638133823872 2023-01-22 13:30:45.706493: step: 328/466, loss: 0.012251759879291058 2023-01-22 13:30:46.340834: step: 330/466, loss: 0.05296879634261131 2023-01-22 13:30:46.945348: step: 332/466, loss: 0.0004901235224679112 2023-01-22 13:30:47.516920: step: 334/466, loss: 0.014748001471161842 2023-01-22 13:30:48.138693: step: 336/466, loss: 0.004974644631147385 2023-01-22 13:30:48.750905: step: 338/466, loss: 0.376626580953598 2023-01-22 13:30:49.419648: step: 340/466, loss: 0.04346088692545891 2023-01-22 13:30:50.041443: step: 342/466, loss: 1.6282410797430202e-05 2023-01-22 13:30:50.634455: step: 344/466, loss: 0.022755125537514687 2023-01-22 13:30:51.275442: step: 346/466, loss: 0.031009746715426445 2023-01-22 13:30:51.908253: step: 348/466, loss: 0.0008659077575430274 2023-01-22 13:30:52.548640: step: 350/466, loss: 0.02779753878712654 2023-01-22 13:30:53.116893: step: 352/466, loss: 0.0033039574045687914 2023-01-22 13:30:53.753054: step: 354/466, loss: 0.01538103073835373 2023-01-22 13:30:54.393538: step: 356/466, loss: 0.013847199268639088 2023-01-22 13:30:54.976933: step: 358/466, loss: 0.007189096882939339 2023-01-22 13:30:55.593136: step: 360/466, loss: 0.0009842516155913472 2023-01-22 13:30:56.204003: step: 362/466, loss: 0.011768832802772522 2023-01-22 13:30:56.837431: step: 364/466, loss: 0.09084676206111908 2023-01-22 13:30:57.486879: step: 366/466, loss: 0.016181915998458862 2023-01-22 13:30:58.044733: step: 368/466, loss: 0.006875579711049795 2023-01-22 13:30:58.671466: step: 370/466, loss: 0.010697558522224426 2023-01-22 13:30:59.268941: step: 372/466, loss: 0.01979762688279152 2023-01-22 13:30:59.919507: step: 374/466, loss: 0.04282763600349426 2023-01-22 13:31:00.506851: step: 376/466, loss: 0.07851805537939072 2023-01-22 13:31:01.167772: step: 378/466, loss: 0.0012981746112927794 2023-01-22 13:31:01.796191: step: 380/466, loss: 0.08073896914720535 2023-01-22 13:31:02.354952: step: 382/466, loss: 0.004018519539386034 2023-01-22 13:31:02.948122: step: 384/466, loss: 0.23132452368736267 2023-01-22 13:31:03.572132: step: 386/466, loss: 0.013552346266806126 2023-01-22 13:31:04.135411: step: 388/466, loss: 0.007649657316505909 2023-01-22 13:31:04.782095: step: 390/466, loss: 0.035173993557691574 2023-01-22 13:31:05.419913: step: 392/466, loss: 0.004693764727562666 2023-01-22 13:31:06.023377: step: 394/466, loss: 0.012217522598803043 2023-01-22 13:31:06.615823: step: 396/466, loss: 0.01885879971086979 2023-01-22 13:31:07.183247: step: 398/466, loss: 0.007570372894406319 2023-01-22 13:31:07.818915: step: 400/466, loss: 0.03087976947426796 2023-01-22 13:31:08.389135: step: 402/466, loss: 0.015562249347567558 2023-01-22 13:31:09.049115: step: 404/466, loss: 0.05325576290488243 2023-01-22 13:31:09.689411: step: 406/466, loss: 0.013357589021325111 2023-01-22 13:31:10.328740: step: 408/466, loss: 0.004710727371275425 2023-01-22 13:31:10.944913: step: 410/466, loss: 0.0020447219721972942 2023-01-22 13:31:11.479095: step: 412/466, loss: 0.00359279103577137 2023-01-22 13:31:12.070111: step: 414/466, loss: 0.101906418800354 2023-01-22 13:31:12.667277: step: 416/466, loss: 0.026152092963457108 2023-01-22 13:31:13.248302: step: 418/466, loss: 0.00011928620369872078 2023-01-22 13:31:13.842361: step: 420/466, loss: 0.006412764545530081 2023-01-22 13:31:14.384266: step: 422/466, loss: 0.00213825237005949 2023-01-22 13:31:15.080606: step: 424/466, loss: 0.04086513817310333 2023-01-22 13:31:15.742406: step: 426/466, loss: 2.8988325595855713 2023-01-22 13:31:16.325634: step: 428/466, loss: 0.0008585536852478981 2023-01-22 13:31:17.015597: step: 430/466, loss: 0.07981168478727341 2023-01-22 13:31:17.596288: step: 432/466, loss: 0.012860963121056557 2023-01-22 13:31:18.282389: step: 434/466, loss: 0.004198842216283083 2023-01-22 13:31:18.870690: step: 436/466, loss: 0.005549263209104538 2023-01-22 13:31:19.413591: step: 438/466, loss: 0.0016691813943907619 2023-01-22 13:31:20.027034: step: 440/466, loss: 0.015377065166831017 2023-01-22 13:31:20.647591: step: 442/466, loss: 0.004243817180395126 2023-01-22 13:31:21.302933: step: 444/466, loss: 0.7251068353652954 2023-01-22 13:31:21.897182: step: 446/466, loss: 0.041107308119535446 2023-01-22 13:31:22.553177: step: 448/466, loss: 0.030324382707476616 2023-01-22 13:31:23.164479: step: 450/466, loss: 0.012087621726095676 2023-01-22 13:31:23.742003: step: 452/466, loss: 0.024647416546940804 2023-01-22 13:31:24.372065: step: 454/466, loss: 0.001115889404900372 2023-01-22 13:31:25.026836: step: 456/466, loss: 0.017052508890628815 2023-01-22 13:31:25.653749: step: 458/466, loss: 0.02020523138344288 2023-01-22 13:31:26.310058: step: 460/466, loss: 0.0014908250886946917 2023-01-22 13:31:26.930217: step: 462/466, loss: 0.023595234379172325 2023-01-22 13:31:27.615837: step: 464/466, loss: 0.3415941894054413 2023-01-22 13:31:28.203810: step: 466/466, loss: 0.00020736547594424337 2023-01-22 13:31:28.898601: step: 468/466, loss: 0.0034636743366718292 2023-01-22 13:31:29.514930: step: 470/466, loss: 0.0024635677691549063 2023-01-22 13:31:30.060829: step: 472/466, loss: 0.006891455966979265 2023-01-22 13:31:30.652674: step: 474/466, loss: 0.006211124360561371 2023-01-22 13:31:31.186924: step: 476/466, loss: 0.004383048042654991 2023-01-22 13:31:31.768476: step: 478/466, loss: 0.004917972721159458 2023-01-22 13:31:32.450894: step: 480/466, loss: 0.001569868065416813 2023-01-22 13:31:33.087834: step: 482/466, loss: 0.0012542768381536007 2023-01-22 13:31:33.721586: step: 484/466, loss: 0.03853151574730873 2023-01-22 13:31:34.308806: step: 486/466, loss: 0.0009523625485599041 2023-01-22 13:31:34.927046: step: 488/466, loss: 0.0006722973193973303 2023-01-22 13:31:35.607675: step: 490/466, loss: 0.008869620971381664 2023-01-22 13:31:36.207559: step: 492/466, loss: 0.017616745084524155 2023-01-22 13:31:36.906110: step: 494/466, loss: 0.004762888886034489 2023-01-22 13:31:37.492087: step: 496/466, loss: 0.2257559895515442 2023-01-22 13:31:38.068523: step: 498/466, loss: 0.01182840671390295 2023-01-22 13:31:38.667551: step: 500/466, loss: 0.009502295404672623 2023-01-22 13:31:39.256160: step: 502/466, loss: 0.01936890184879303 2023-01-22 13:31:39.914496: step: 504/466, loss: 0.020262565463781357 2023-01-22 13:31:40.583413: step: 506/466, loss: 0.013965466059744358 2023-01-22 13:31:41.208016: step: 508/466, loss: 0.005476214457303286 2023-01-22 13:31:41.778498: step: 510/466, loss: 0.004246894735842943 2023-01-22 13:31:42.345652: step: 512/466, loss: 0.023494044318795204 2023-01-22 13:31:42.969413: step: 514/466, loss: 0.00397317111492157 2023-01-22 13:31:43.564746: step: 516/466, loss: 0.01444228459149599 2023-01-22 13:31:44.157821: step: 518/466, loss: 0.001565203652717173 2023-01-22 13:31:44.733053: step: 520/466, loss: 0.0030631034169346094 2023-01-22 13:31:45.333003: step: 522/466, loss: 0.0035917942877858877 2023-01-22 13:31:45.902285: step: 524/466, loss: 0.011667449027299881 2023-01-22 13:31:46.497080: step: 526/466, loss: 0.005897277966141701 2023-01-22 13:31:47.127710: step: 528/466, loss: 0.007870707660913467 2023-01-22 13:31:47.746657: step: 530/466, loss: 0.05467689782381058 2023-01-22 13:31:48.366925: step: 532/466, loss: 0.009704221971333027 2023-01-22 13:31:48.981297: step: 534/466, loss: 0.011946790851652622 2023-01-22 13:31:49.578330: step: 536/466, loss: 0.03798848018050194 2023-01-22 13:31:50.167560: step: 538/466, loss: 0.010317761451005936 2023-01-22 13:31:50.763235: step: 540/466, loss: 1.3332763046491891e-05 2023-01-22 13:31:51.325628: step: 542/466, loss: 0.019306158646941185 2023-01-22 13:31:51.911718: step: 544/466, loss: 0.003761201398447156 2023-01-22 13:31:52.519426: step: 546/466, loss: 0.3841245472431183 2023-01-22 13:31:53.071736: step: 548/466, loss: 0.02378680743277073 2023-01-22 13:31:53.632309: step: 550/466, loss: 0.014144516550004482 2023-01-22 13:31:54.270805: step: 552/466, loss: 0.0009709474397823215 2023-01-22 13:31:54.868840: step: 554/466, loss: 0.012108071707189083 2023-01-22 13:31:55.519462: step: 556/466, loss: 0.0035140831023454666 2023-01-22 13:31:56.151697: step: 558/466, loss: 0.012012338265776634 2023-01-22 13:31:56.716936: step: 560/466, loss: 3.552379846572876 2023-01-22 13:31:57.320653: step: 562/466, loss: 0.014286902733147144 2023-01-22 13:31:57.965758: step: 564/466, loss: 0.03940424695611 2023-01-22 13:31:58.563277: step: 566/466, loss: 0.030426137149333954 2023-01-22 13:31:59.189177: step: 568/466, loss: 0.012256816029548645 2023-01-22 13:31:59.796459: step: 570/466, loss: 0.05975402146577835 2023-01-22 13:32:00.364444: step: 572/466, loss: 0.08712486922740936 2023-01-22 13:32:01.012784: step: 574/466, loss: 0.02631019987165928 2023-01-22 13:32:01.590320: step: 576/466, loss: 0.0014418819919228554 2023-01-22 13:32:02.225993: step: 578/466, loss: 0.04173552244901657 2023-01-22 13:32:02.834929: step: 580/466, loss: 0.012788847088813782 2023-01-22 13:32:03.437848: step: 582/466, loss: 0.03496984392404556 2023-01-22 13:32:04.011974: step: 584/466, loss: 0.00038491602754220366 2023-01-22 13:32:04.669886: step: 586/466, loss: 0.012075595557689667 2023-01-22 13:32:05.308052: step: 588/466, loss: 0.02586449310183525 2023-01-22 13:32:05.997006: step: 590/466, loss: 0.050056036561727524 2023-01-22 13:32:06.598461: step: 592/466, loss: 0.07017027586698532 2023-01-22 13:32:07.180463: step: 594/466, loss: 0.02162768505513668 2023-01-22 13:32:07.811448: step: 596/466, loss: 0.001000135438516736 2023-01-22 13:32:08.419156: step: 598/466, loss: 0.05198930576443672 2023-01-22 13:32:08.998984: step: 600/466, loss: 0.0026975013315677643 2023-01-22 13:32:09.633364: step: 602/466, loss: 0.9263908863067627 2023-01-22 13:32:10.253032: step: 604/466, loss: 0.004877468105405569 2023-01-22 13:32:10.887786: step: 606/466, loss: 0.023529309779405594 2023-01-22 13:32:11.483830: step: 608/466, loss: 0.022583313286304474 2023-01-22 13:32:12.066873: step: 610/466, loss: 0.02809302881360054 2023-01-22 13:32:12.630890: step: 612/466, loss: 0.0007098540663719177 2023-01-22 13:32:13.244713: step: 614/466, loss: 0.012303457595407963 2023-01-22 13:32:13.826955: step: 616/466, loss: 0.005014235619455576 2023-01-22 13:32:14.381220: step: 618/466, loss: 0.006878813728690147 2023-01-22 13:32:14.977391: step: 620/466, loss: 0.03495979681611061 2023-01-22 13:32:15.602113: step: 622/466, loss: 0.010468830354511738 2023-01-22 13:32:16.302520: step: 624/466, loss: 0.12259616702795029 2023-01-22 13:32:16.939677: step: 626/466, loss: 0.0879226103425026 2023-01-22 13:32:17.559600: step: 628/466, loss: 0.02980031818151474 2023-01-22 13:32:18.170279: step: 630/466, loss: 0.00035697617568075657 2023-01-22 13:32:18.765003: step: 632/466, loss: 0.007286733016371727 2023-01-22 13:32:19.348152: step: 634/466, loss: 0.00031953808502294123 2023-01-22 13:32:20.038832: step: 636/466, loss: 5.5171603889903054e-05 2023-01-22 13:32:20.648064: step: 638/466, loss: 0.01621876284480095 2023-01-22 13:32:21.248031: step: 640/466, loss: 0.0016461930936202407 2023-01-22 13:32:21.825678: step: 642/466, loss: 0.20600132644176483 2023-01-22 13:32:22.435299: step: 644/466, loss: 0.0797785222530365 2023-01-22 13:32:23.080614: step: 646/466, loss: 2.0617271729861386e-05 2023-01-22 13:32:23.717638: step: 648/466, loss: 0.026157179847359657 2023-01-22 13:32:24.341302: step: 650/466, loss: 0.01519196480512619 2023-01-22 13:32:24.941550: step: 652/466, loss: 0.12516631186008453 2023-01-22 13:32:25.623762: step: 654/466, loss: 0.01664874143898487 2023-01-22 13:32:26.218847: step: 656/466, loss: 0.009894109331071377 2023-01-22 13:32:26.875208: step: 658/466, loss: 0.03975345939397812 2023-01-22 13:32:27.486510: step: 660/466, loss: 0.0023406550753861666 2023-01-22 13:32:28.103969: step: 662/466, loss: 0.014231977052986622 2023-01-22 13:32:28.689127: step: 664/466, loss: 3.681071029859595e-05 2023-01-22 13:32:29.255200: step: 666/466, loss: 0.0002408704167464748 2023-01-22 13:32:29.918547: step: 668/466, loss: 0.008114716969430447 2023-01-22 13:32:30.575828: step: 670/466, loss: 0.02963598445057869 2023-01-22 13:32:31.149859: step: 672/466, loss: 0.16881270706653595 2023-01-22 13:32:31.759093: step: 674/466, loss: 0.011266290210187435 2023-01-22 13:32:32.401777: step: 676/466, loss: 0.02551671490073204 2023-01-22 13:32:33.048668: step: 678/466, loss: 0.00021820540132466704 2023-01-22 13:32:33.609239: step: 680/466, loss: 0.01580444909632206 2023-01-22 13:32:34.247608: step: 682/466, loss: 0.0003003499296028167 2023-01-22 13:32:34.894301: step: 684/466, loss: 0.20750835537910461 2023-01-22 13:32:35.478310: step: 686/466, loss: 0.0017898082733154297 2023-01-22 13:32:36.115611: step: 688/466, loss: 0.02732234075665474 2023-01-22 13:32:36.732697: step: 690/466, loss: 0.0007180742104537785 2023-01-22 13:32:37.316545: step: 692/466, loss: 0.5810558795928955 2023-01-22 13:32:37.998648: step: 694/466, loss: 0.00484499940648675 2023-01-22 13:32:38.619379: step: 696/466, loss: 0.05192787945270538 2023-01-22 13:32:39.230722: step: 698/466, loss: 0.010271672159433365 2023-01-22 13:32:39.782469: step: 700/466, loss: 0.028488852083683014 2023-01-22 13:32:40.402376: step: 702/466, loss: 0.008909497410058975 2023-01-22 13:32:41.028982: step: 704/466, loss: 0.012639102526009083 2023-01-22 13:32:41.619582: step: 706/466, loss: 0.022058192640542984 2023-01-22 13:32:42.267244: step: 708/466, loss: 0.0082834642380476 2023-01-22 13:32:42.908534: step: 710/466, loss: 0.08404853194952011 2023-01-22 13:32:43.481380: step: 712/466, loss: 0.0002973505179397762 2023-01-22 13:32:44.060208: step: 714/466, loss: 0.01604515127837658 2023-01-22 13:32:44.646955: step: 716/466, loss: 0.0721011534333229 2023-01-22 13:32:45.271300: step: 718/466, loss: 0.021386979147791862 2023-01-22 13:32:45.832124: step: 720/466, loss: 0.0023640303406864405 2023-01-22 13:32:46.476691: step: 722/466, loss: 0.04245742782950401 2023-01-22 13:32:47.063875: step: 724/466, loss: 0.005683106370270252 2023-01-22 13:32:47.672412: step: 726/466, loss: 0.01418995764106512 2023-01-22 13:32:48.274755: step: 728/466, loss: 0.032957032322883606 2023-01-22 13:32:48.906783: step: 730/466, loss: 0.008482889272272587 2023-01-22 13:32:49.489941: step: 732/466, loss: 0.0015429266495630145 2023-01-22 13:32:50.096014: step: 734/466, loss: 0.0006709589506499469 2023-01-22 13:32:50.753152: step: 736/466, loss: 0.0624673031270504 2023-01-22 13:32:51.372277: step: 738/466, loss: 0.037133317440748215 2023-01-22 13:32:51.963532: step: 740/466, loss: 0.0023529180325567722 2023-01-22 13:32:52.574799: step: 742/466, loss: 0.0028179381042718887 2023-01-22 13:32:53.222764: step: 744/466, loss: 0.006925663445144892 2023-01-22 13:32:53.849862: step: 746/466, loss: 0.009411841630935669 2023-01-22 13:32:54.388905: step: 748/466, loss: 0.056623708456754684 2023-01-22 13:32:55.020341: step: 750/466, loss: 0.021879877895116806 2023-01-22 13:32:55.577733: step: 752/466, loss: 0.018209518864750862 2023-01-22 13:32:56.187896: step: 754/466, loss: 0.0008968443726189435 2023-01-22 13:32:56.858253: step: 756/466, loss: 0.001655777683481574 2023-01-22 13:32:57.501452: step: 758/466, loss: 0.006058151368051767 2023-01-22 13:32:58.130518: step: 760/466, loss: 0.004729055799543858 2023-01-22 13:32:58.696100: step: 762/466, loss: 5.468863673740998e-05 2023-01-22 13:32:59.287651: step: 764/466, loss: 0.01741127111017704 2023-01-22 13:32:59.968150: step: 766/466, loss: 0.01633036509156227 2023-01-22 13:33:00.566413: step: 768/466, loss: 0.002862333320081234 2023-01-22 13:33:01.142309: step: 770/466, loss: 0.03288822993636131 2023-01-22 13:33:01.734280: step: 772/466, loss: 0.04681827872991562 2023-01-22 13:33:02.437309: step: 774/466, loss: 0.03713144734501839 2023-01-22 13:33:03.077455: step: 776/466, loss: 0.01241319626569748 2023-01-22 13:33:03.707013: step: 778/466, loss: 0.008597268722951412 2023-01-22 13:33:04.344955: step: 780/466, loss: 0.017979905009269714 2023-01-22 13:33:04.874558: step: 782/466, loss: 0.0002149198844563216 2023-01-22 13:33:05.511885: step: 784/466, loss: 0.05203290283679962 2023-01-22 13:33:06.118129: step: 786/466, loss: 0.013367298059165478 2023-01-22 13:33:06.856464: step: 788/466, loss: 0.014973390847444534 2023-01-22 13:33:07.513031: step: 790/466, loss: 0.03173128515481949 2023-01-22 13:33:08.147700: step: 792/466, loss: 0.08076297491788864 2023-01-22 13:33:08.703464: step: 794/466, loss: 0.001049180282279849 2023-01-22 13:33:09.283728: step: 796/466, loss: 0.0002666466752998531 2023-01-22 13:33:09.813485: step: 798/466, loss: 0.01713642291724682 2023-01-22 13:33:10.454308: step: 800/466, loss: 0.0040335580706596375 2023-01-22 13:33:11.086710: step: 802/466, loss: 0.0013026159722357988 2023-01-22 13:33:11.679132: step: 804/466, loss: 0.0008358605555258691 2023-01-22 13:33:12.258681: step: 806/466, loss: 0.007629586383700371 2023-01-22 13:33:12.875352: step: 808/466, loss: 0.0026855634059756994 2023-01-22 13:33:13.446565: step: 810/466, loss: 0.0005732735153287649 2023-01-22 13:33:14.059648: step: 812/466, loss: 0.018595751374959946 2023-01-22 13:33:14.712342: step: 814/466, loss: 0.0013379593146964908 2023-01-22 13:33:15.354797: step: 816/466, loss: 0.11120335757732391 2023-01-22 13:33:15.937103: step: 818/466, loss: 0.04682184010744095 2023-01-22 13:33:16.543040: step: 820/466, loss: 0.04700683429837227 2023-01-22 13:33:17.152429: step: 822/466, loss: 0.0036845568101853132 2023-01-22 13:33:17.774664: step: 824/466, loss: 0.010761707089841366 2023-01-22 13:33:18.404869: step: 826/466, loss: 0.03720209747552872 2023-01-22 13:33:19.032692: step: 828/466, loss: 0.006173505913466215 2023-01-22 13:33:19.584908: step: 830/466, loss: 0.2876453399658203 2023-01-22 13:33:20.281892: step: 832/466, loss: 0.0029347799718379974 2023-01-22 13:33:20.871983: step: 834/466, loss: 0.00524116912856698 2023-01-22 13:33:21.489526: step: 836/466, loss: 0.03424202650785446 2023-01-22 13:33:22.069463: step: 838/466, loss: 0.03777669370174408 2023-01-22 13:33:22.716732: step: 840/466, loss: 0.06569282710552216 2023-01-22 13:33:23.364651: step: 842/466, loss: 0.02280786633491516 2023-01-22 13:33:23.962278: step: 844/466, loss: 0.031920842826366425 2023-01-22 13:33:24.653658: step: 846/466, loss: 0.021082449704408646 2023-01-22 13:33:25.390054: step: 848/466, loss: 0.012781715020537376 2023-01-22 13:33:25.965907: step: 850/466, loss: 0.03208983317017555 2023-01-22 13:33:26.647712: step: 852/466, loss: 0.0037364468444138765 2023-01-22 13:33:27.253008: step: 854/466, loss: 0.06761616468429565 2023-01-22 13:33:27.951114: step: 856/466, loss: 0.0003608867700677365 2023-01-22 13:33:28.597680: step: 858/466, loss: 0.004010418429970741 2023-01-22 13:33:29.176220: step: 860/466, loss: 0.005447422154247761 2023-01-22 13:33:29.799779: step: 862/466, loss: 0.012670483440160751 2023-01-22 13:33:30.453718: step: 864/466, loss: 0.002907310612499714 2023-01-22 13:33:31.048612: step: 866/466, loss: 0.010033238679170609 2023-01-22 13:33:31.579894: step: 868/466, loss: 5.622063667942712e-07 2023-01-22 13:33:32.197529: step: 870/466, loss: 0.9328770637512207 2023-01-22 13:33:32.768243: step: 872/466, loss: 0.12100088596343994 2023-01-22 13:33:33.376099: step: 874/466, loss: 0.08131342381238937 2023-01-22 13:33:34.023094: step: 876/466, loss: 0.03969806432723999 2023-01-22 13:33:34.677088: step: 878/466, loss: 0.044402651488780975 2023-01-22 13:33:35.266806: step: 880/466, loss: 0.021927524358034134 2023-01-22 13:33:35.870188: step: 882/466, loss: 0.03587482497096062 2023-01-22 13:33:36.494518: step: 884/466, loss: 0.03218105062842369 2023-01-22 13:33:37.132787: step: 886/466, loss: 0.009517614729702473 2023-01-22 13:33:37.733991: step: 888/466, loss: 0.06283598393201828 2023-01-22 13:33:38.368238: step: 890/466, loss: 0.057447660714387894 2023-01-22 13:33:39.012694: step: 892/466, loss: 0.01942952163517475 2023-01-22 13:33:39.640804: step: 894/466, loss: 0.01550187449902296 2023-01-22 13:33:40.212056: step: 896/466, loss: 0.004953037016093731 2023-01-22 13:33:40.803082: step: 898/466, loss: 0.0468306839466095 2023-01-22 13:33:41.382087: step: 900/466, loss: 0.0006143661448732018 2023-01-22 13:33:41.982243: step: 902/466, loss: 0.0217197947204113 2023-01-22 13:33:42.636861: step: 904/466, loss: 0.014523173682391644 2023-01-22 13:33:43.305321: step: 906/466, loss: 0.00903339497745037 2023-01-22 13:33:43.855410: step: 908/466, loss: 0.0004052472941111773 2023-01-22 13:33:44.467285: step: 910/466, loss: 0.008048434741795063 2023-01-22 13:33:45.049798: step: 912/466, loss: 0.06616722792387009 2023-01-22 13:33:45.771693: step: 914/466, loss: 7.085939432727173e-05 2023-01-22 13:33:46.396983: step: 916/466, loss: 0.6689620018005371 2023-01-22 13:33:47.071375: step: 918/466, loss: 0.019276319071650505 2023-01-22 13:33:47.657940: step: 920/466, loss: 0.002870056079700589 2023-01-22 13:33:48.290322: step: 922/466, loss: 0.14844343066215515 2023-01-22 13:33:48.922841: step: 924/466, loss: 0.07893482595682144 2023-01-22 13:33:49.569087: step: 926/466, loss: 0.025511648505926132 2023-01-22 13:33:50.192922: step: 928/466, loss: 0.0012719589285552502 2023-01-22 13:33:50.823178: step: 930/466, loss: 0.03931349515914917 2023-01-22 13:33:51.465764: step: 932/466, loss: 0.007727402728050947 ================================================== Loss: 0.052 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3111490553200398, 'r': 0.341850669886723, 'f1': 0.3257781248287578}, 'combined': 0.24004703934750574, 'epoch': 32} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.35916819345881906, 'r': 0.31867221327699213, 'f1': 0.33771053484225355}, 'combined': 0.2239738262166241, 'epoch': 32} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29905940594059405, 'r': 0.286032196969697, 'f1': 0.29240077444336887}, 'combined': 0.19493384962891258, 'epoch': 32} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3776836596850576, 'r': 0.2978268026979224, 'f1': 0.333035009993607}, 'combined': 0.21734916441688035, 'epoch': 32} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29357672469374596, 'r': 0.34037073773791043, 'f1': 0.31524671140224747}, 'combined': 0.23228705050691917, 'epoch': 32} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.34878737772732915, 'r': 0.30371424738789116, 'f1': 0.3246940447271567}, 'combined': 0.21534112810920233, 'epoch': 32} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2494172494172494, 'r': 0.37056277056277054, 'f1': 0.29815395332636707}, 'combined': 0.19876930221757805, 'epoch': 32} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.4673913043478261, 'f1': 0.4673913043478261}, 'combined': 0.3115942028985507, 'epoch': 32} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3194444444444444, 'r': 0.19827586206896552, 'f1': 0.24468085106382978}, 'combined': 0.1631205673758865, 'epoch': 32} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29515095352204085, 'r': 0.3399556523489161, 'f1': 0.3159728902784459}, 'combined': 0.23282212967885485, 'epoch': 26} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32557465157002546, 'r': 0.326702182830874, 'f1': 0.3261374426704058}, 'combined': 0.2162984075741551, 'epoch': 26} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39473684210526316, 'r': 0.25862068965517243, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 26} ****************************** Epoch: 33 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:36:18.801947: step: 2/466, loss: 0.013426671735942364 2023-01-22 13:36:19.402917: step: 4/466, loss: 0.04405581206083298 2023-01-22 13:36:19.980507: step: 6/466, loss: 0.036784835159778595 2023-01-22 13:36:20.600579: step: 8/466, loss: 0.002065311186015606 2023-01-22 13:36:21.196097: step: 10/466, loss: 0.001026434125378728 2023-01-22 13:36:21.807030: step: 12/466, loss: 0.000631114817224443 2023-01-22 13:36:22.382596: step: 14/466, loss: 0.0009648153209127486 2023-01-22 13:36:22.912910: step: 16/466, loss: 0.004327333532273769 2023-01-22 13:36:23.487487: step: 18/466, loss: 0.024526774883270264 2023-01-22 13:36:24.154545: step: 20/466, loss: 0.0209369957447052 2023-01-22 13:36:24.792508: step: 22/466, loss: 0.009978730231523514 2023-01-22 13:36:25.432282: step: 24/466, loss: 0.02210579626262188 2023-01-22 13:36:26.080243: step: 26/466, loss: 0.008876738138496876 2023-01-22 13:36:26.672163: step: 28/466, loss: 0.1963396519422531 2023-01-22 13:36:27.285500: step: 30/466, loss: 0.055339012295007706 2023-01-22 13:36:27.858108: step: 32/466, loss: 0.0009447642951272428 2023-01-22 13:36:28.477618: step: 34/466, loss: 0.0005788745475001633 2023-01-22 13:36:29.045508: step: 36/466, loss: 0.00714969402179122 2023-01-22 13:36:29.648699: step: 38/466, loss: 0.029303908348083496 2023-01-22 13:36:30.249187: step: 40/466, loss: 0.006237163674086332 2023-01-22 13:36:30.896270: step: 42/466, loss: 0.000784186355303973 2023-01-22 13:36:31.493266: step: 44/466, loss: 0.005701386835426092 2023-01-22 13:36:32.172183: step: 46/466, loss: 0.15250776708126068 2023-01-22 13:36:32.823773: step: 48/466, loss: 0.0013273664517328143 2023-01-22 13:36:33.442484: step: 50/466, loss: 0.021707775071263313 2023-01-22 13:36:34.134988: step: 52/466, loss: 0.0027274428866803646 2023-01-22 13:36:34.698564: step: 54/466, loss: 0.020889142528176308 2023-01-22 13:36:35.354286: step: 56/466, loss: 0.019990235567092896 2023-01-22 13:36:35.981734: step: 58/466, loss: 0.00216134125366807 2023-01-22 13:36:36.628008: step: 60/466, loss: 0.011055981740355492 2023-01-22 13:36:37.278778: step: 62/466, loss: 0.001772857503965497 2023-01-22 13:36:37.938219: step: 64/466, loss: 0.007328768260776997 2023-01-22 13:36:38.622956: step: 66/466, loss: 0.0024733725003898144 2023-01-22 13:36:39.174854: step: 68/466, loss: 0.022164082154631615 2023-01-22 13:36:39.785335: step: 70/466, loss: 0.16365617513656616 2023-01-22 13:36:40.393507: step: 72/466, loss: 0.04055704176425934 2023-01-22 13:36:41.026849: step: 74/466, loss: 0.5103712677955627 2023-01-22 13:36:41.615886: step: 76/466, loss: 0.01298505999147892 2023-01-22 13:36:42.241910: step: 78/466, loss: 0.018091892823576927 2023-01-22 13:36:42.907511: step: 80/466, loss: 0.002492034574970603 2023-01-22 13:36:43.487481: step: 82/466, loss: 0.030289528891444206 2023-01-22 13:36:44.161729: step: 84/466, loss: 0.0030104555189609528 2023-01-22 13:36:44.792809: step: 86/466, loss: 0.014978118240833282 2023-01-22 13:36:45.389207: step: 88/466, loss: 0.004207504913210869 2023-01-22 13:36:46.004379: step: 90/466, loss: 0.011144906282424927 2023-01-22 13:36:46.598565: step: 92/466, loss: 0.016953200101852417 2023-01-22 13:36:47.154314: step: 94/466, loss: 0.060297973453998566 2023-01-22 13:36:47.758961: step: 96/466, loss: 0.03718177229166031 2023-01-22 13:36:48.397316: step: 98/466, loss: 0.006500702351331711 2023-01-22 13:36:49.020348: step: 100/466, loss: 0.06412974745035172 2023-01-22 13:36:49.605919: step: 102/466, loss: 0.005766607355326414 2023-01-22 13:36:50.195793: step: 104/466, loss: 0.016049914062023163 2023-01-22 13:36:50.801196: step: 106/466, loss: 0.01654273457825184 2023-01-22 13:36:51.372890: step: 108/466, loss: 0.038459960371255875 2023-01-22 13:36:51.906937: step: 110/466, loss: 0.009186459705233574 2023-01-22 13:36:52.470675: step: 112/466, loss: 0.00904849823564291 2023-01-22 13:36:53.029873: step: 114/466, loss: 0.0003890866064466536 2023-01-22 13:36:53.625969: step: 116/466, loss: 0.01985808089375496 2023-01-22 13:36:54.222235: step: 118/466, loss: 0.014591362327337265 2023-01-22 13:36:54.836009: step: 120/466, loss: 0.00038054390461184084 2023-01-22 13:36:55.395445: step: 122/466, loss: 0.0006946328212507069 2023-01-22 13:36:56.014021: step: 124/466, loss: 0.016799703240394592 2023-01-22 13:36:56.648368: step: 126/466, loss: 0.0006378726684488356 2023-01-22 13:36:57.300147: step: 128/466, loss: 0.005696744192391634 2023-01-22 13:36:57.898353: step: 130/466, loss: 0.010593943297863007 2023-01-22 13:36:58.496591: step: 132/466, loss: 0.02038961462676525 2023-01-22 13:36:59.101729: step: 134/466, loss: 0.004535729065537453 2023-01-22 13:36:59.732379: step: 136/466, loss: 0.003972996957600117 2023-01-22 13:37:00.374510: step: 138/466, loss: 0.07120930403470993 2023-01-22 13:37:01.112778: step: 140/466, loss: 0.024490805342793465 2023-01-22 13:37:01.730336: step: 142/466, loss: 0.005146074574440718 2023-01-22 13:37:02.397068: step: 144/466, loss: 0.00045003503328189254 2023-01-22 13:37:02.991813: step: 146/466, loss: 0.037370651960372925 2023-01-22 13:37:03.554540: step: 148/466, loss: 0.0008118917467072606 2023-01-22 13:37:04.187960: step: 150/466, loss: 0.002586959395557642 2023-01-22 13:37:04.847466: step: 152/466, loss: 0.04539839178323746 2023-01-22 13:37:05.441650: step: 154/466, loss: 0.0006347663584165275 2023-01-22 13:37:06.024760: step: 156/466, loss: 0.03967674821615219 2023-01-22 13:37:06.667551: step: 158/466, loss: 0.005713362712413073 2023-01-22 13:37:07.334535: step: 160/466, loss: 0.07420878112316132 2023-01-22 13:37:07.980208: step: 162/466, loss: 0.002022551139816642 2023-01-22 13:37:08.647069: step: 164/466, loss: 0.018466947600245476 2023-01-22 13:37:09.302583: step: 166/466, loss: 0.008294089697301388 2023-01-22 13:37:09.963712: step: 168/466, loss: 0.09669829159975052 2023-01-22 13:37:10.585281: step: 170/466, loss: 0.008423964492976665 2023-01-22 13:37:11.153912: step: 172/466, loss: 0.0061074914410710335 2023-01-22 13:37:11.767314: step: 174/466, loss: 0.03758373484015465 2023-01-22 13:37:12.363716: step: 176/466, loss: 0.08949467539787292 2023-01-22 13:37:12.945731: step: 178/466, loss: 0.06499304622411728 2023-01-22 13:37:13.573882: step: 180/466, loss: 0.03493135794997215 2023-01-22 13:37:14.197502: step: 182/466, loss: 0.0016586108831688762 2023-01-22 13:37:14.814640: step: 184/466, loss: 0.003063349286094308 2023-01-22 13:37:15.496570: step: 186/466, loss: 0.005292469635605812 2023-01-22 13:37:16.090459: step: 188/466, loss: 0.004043189808726311 2023-01-22 13:37:16.821156: step: 190/466, loss: 0.06299145519733429 2023-01-22 13:37:17.447993: step: 192/466, loss: 0.02014276757836342 2023-01-22 13:37:18.058742: step: 194/466, loss: 0.008015135303139687 2023-01-22 13:37:18.694617: step: 196/466, loss: 0.012555583380162716 2023-01-22 13:37:19.328119: step: 198/466, loss: 0.006674039643257856 2023-01-22 13:37:19.982815: step: 200/466, loss: 0.004354489967226982 2023-01-22 13:37:20.591125: step: 202/466, loss: 0.0257272869348526 2023-01-22 13:37:21.175220: step: 204/466, loss: 0.06419980525970459 2023-01-22 13:37:21.845365: step: 206/466, loss: 0.05442401394248009 2023-01-22 13:37:22.469104: step: 208/466, loss: 0.08057260513305664 2023-01-22 13:37:23.028873: step: 210/466, loss: 3.373867988586426 2023-01-22 13:37:23.618004: step: 212/466, loss: 0.001134110032580793 2023-01-22 13:37:24.187304: step: 214/466, loss: 0.0033750978764146566 2023-01-22 13:37:24.801642: step: 216/466, loss: 0.024464482441544533 2023-01-22 13:37:25.419117: step: 218/466, loss: 0.006701041478663683 2023-01-22 13:37:26.017385: step: 220/466, loss: 0.004200145602226257 2023-01-22 13:37:26.604609: step: 222/466, loss: 0.01735462062060833 2023-01-22 13:37:27.247861: step: 224/466, loss: 0.004717234987765551 2023-01-22 13:37:27.935045: step: 226/466, loss: 0.010514490306377411 2023-01-22 13:37:28.497395: step: 228/466, loss: 0.08764377236366272 2023-01-22 13:37:29.151003: step: 230/466, loss: 0.0010582390241324902 2023-01-22 13:37:29.792558: step: 232/466, loss: 0.006454814225435257 2023-01-22 13:37:30.415423: step: 234/466, loss: 0.00016704069275874645 2023-01-22 13:37:31.029083: step: 236/466, loss: 0.0017406251281499863 2023-01-22 13:37:31.665347: step: 238/466, loss: 0.04129519686102867 2023-01-22 13:37:32.215749: step: 240/466, loss: 0.02657444216310978 2023-01-22 13:37:32.818963: step: 242/466, loss: 0.00041153430356644094 2023-01-22 13:37:33.449512: step: 244/466, loss: 0.02432902529835701 2023-01-22 13:37:34.048447: step: 246/466, loss: 0.00379131268709898 2023-01-22 13:37:34.602080: step: 248/466, loss: 0.013274705968797207 2023-01-22 13:37:35.214134: step: 250/466, loss: 0.0012937224237248302 2023-01-22 13:37:35.790984: step: 252/466, loss: 0.0030212379060685635 2023-01-22 13:37:36.381885: step: 254/466, loss: 0.000778071815147996 2023-01-22 13:37:37.015790: step: 256/466, loss: 0.023154953494668007 2023-01-22 13:37:37.643872: step: 258/466, loss: 0.004224831238389015 2023-01-22 13:37:38.239017: step: 260/466, loss: 0.0016274972585961223 2023-01-22 13:37:38.970268: step: 262/466, loss: 0.02594602108001709 2023-01-22 13:37:39.598627: step: 264/466, loss: 0.013618562370538712 2023-01-22 13:37:40.184388: step: 266/466, loss: 0.003424879163503647 2023-01-22 13:37:40.836928: step: 268/466, loss: 0.01895812712609768 2023-01-22 13:37:41.418471: step: 270/466, loss: 0.00664788531139493 2023-01-22 13:37:42.024239: step: 272/466, loss: 0.08949951827526093 2023-01-22 13:37:42.658586: step: 274/466, loss: 0.04765995219349861 2023-01-22 13:37:43.267838: step: 276/466, loss: 0.0042534093372523785 2023-01-22 13:37:43.928501: step: 278/466, loss: 0.043888408690690994 2023-01-22 13:37:44.523788: step: 280/466, loss: 0.0014408992137759924 2023-01-22 13:37:45.068920: step: 282/466, loss: 0.001083766925148666 2023-01-22 13:37:45.714346: step: 284/466, loss: 3.8808677196502686 2023-01-22 13:37:46.298955: step: 286/466, loss: 0.0072978041134774685 2023-01-22 13:37:46.909681: step: 288/466, loss: 0.023800894618034363 2023-01-22 13:37:47.535161: step: 290/466, loss: 0.0016516759060323238 2023-01-22 13:37:48.163259: step: 292/466, loss: 0.015784960240125656 2023-01-22 13:37:48.883341: step: 294/466, loss: 0.007960588671267033 2023-01-22 13:37:49.460487: step: 296/466, loss: 0.009296424686908722 2023-01-22 13:37:50.063713: step: 298/466, loss: 0.04585607349872589 2023-01-22 13:37:50.725262: step: 300/466, loss: 0.5705857276916504 2023-01-22 13:37:51.254891: step: 302/466, loss: 0.03182634338736534 2023-01-22 13:37:51.839743: step: 304/466, loss: 0.00026920222444459796 2023-01-22 13:37:52.424742: step: 306/466, loss: 0.019237732514739037 2023-01-22 13:37:52.971345: step: 308/466, loss: 0.0022195458877831697 2023-01-22 13:37:53.561340: step: 310/466, loss: 0.00019793420506175607 2023-01-22 13:37:54.111924: step: 312/466, loss: 0.13449236750602722 2023-01-22 13:37:54.667554: step: 314/466, loss: 0.09944283962249756 2023-01-22 13:37:55.232239: step: 316/466, loss: 0.009415938518941402 2023-01-22 13:37:55.794639: step: 318/466, loss: 0.017806226387619972 2023-01-22 13:37:56.432840: step: 320/466, loss: 0.022105487063527107 2023-01-22 13:37:56.974642: step: 322/466, loss: 0.004306672140955925 2023-01-22 13:37:57.561607: step: 324/466, loss: 0.047689154744148254 2023-01-22 13:37:58.169891: step: 326/466, loss: 0.13233791291713715 2023-01-22 13:37:58.747469: step: 328/466, loss: 0.03858339786529541 2023-01-22 13:37:59.369496: step: 330/466, loss: 0.004022891633212566 2023-01-22 13:37:59.960107: step: 332/466, loss: 0.028247395530343056 2023-01-22 13:38:00.583414: step: 334/466, loss: 0.01629316434264183 2023-01-22 13:38:01.153204: step: 336/466, loss: 0.020957961678504944 2023-01-22 13:38:01.768632: step: 338/466, loss: 0.02816147543489933 2023-01-22 13:38:02.383984: step: 340/466, loss: 0.10556326806545258 2023-01-22 13:38:02.980781: step: 342/466, loss: 0.0038239702116698027 2023-01-22 13:38:03.748919: step: 344/466, loss: 0.012613574974238873 2023-01-22 13:38:04.439300: step: 346/466, loss: 0.27932924032211304 2023-01-22 13:38:05.028656: step: 348/466, loss: 0.015454914420843124 2023-01-22 13:38:05.571760: step: 350/466, loss: 0.0010309700155630708 2023-01-22 13:38:06.194409: step: 352/466, loss: 0.04021390900015831 2023-01-22 13:38:06.856406: step: 354/466, loss: 0.05277429148554802 2023-01-22 13:38:07.441946: step: 356/466, loss: 0.0029488904401659966 2023-01-22 13:38:08.016215: step: 358/466, loss: 0.008632347919046879 2023-01-22 13:38:08.674666: step: 360/466, loss: 0.0010874831350520253 2023-01-22 13:38:09.267732: step: 362/466, loss: 0.07463423907756805 2023-01-22 13:38:09.829547: step: 364/466, loss: 0.008078490383923054 2023-01-22 13:38:10.405254: step: 366/466, loss: 3.8495003536809236e-05 2023-01-22 13:38:11.054515: step: 368/466, loss: 0.00020329591643530875 2023-01-22 13:38:11.675927: step: 370/466, loss: 0.0014669331721961498 2023-01-22 13:38:12.276636: step: 372/466, loss: 0.019579097628593445 2023-01-22 13:38:12.854663: step: 374/466, loss: 0.01790340431034565 2023-01-22 13:38:13.430567: step: 376/466, loss: 0.0022823030594736338 2023-01-22 13:38:14.059870: step: 378/466, loss: 0.10092777758836746 2023-01-22 13:38:14.592364: step: 380/466, loss: 0.0015150151448324323 2023-01-22 13:38:15.197797: step: 382/466, loss: 0.0008342181099578738 2023-01-22 13:38:15.811344: step: 384/466, loss: 0.020368829369544983 2023-01-22 13:38:16.401072: step: 386/466, loss: 0.009976758621633053 2023-01-22 13:38:16.933760: step: 388/466, loss: 0.005474635865539312 2023-01-22 13:38:17.513439: step: 390/466, loss: 0.00407253485172987 2023-01-22 13:38:18.097621: step: 392/466, loss: 0.005135375075042248 2023-01-22 13:38:18.716191: step: 394/466, loss: 0.000495641550514847 2023-01-22 13:38:19.276786: step: 396/466, loss: 0.019225360825657845 2023-01-22 13:38:20.034305: step: 398/466, loss: 0.010420488193631172 2023-01-22 13:38:20.627434: step: 400/466, loss: 0.7432917952537537 2023-01-22 13:38:21.259616: step: 402/466, loss: 0.08139077574014664 2023-01-22 13:38:21.865311: step: 404/466, loss: 0.025644859299063683 2023-01-22 13:38:22.478593: step: 406/466, loss: 0.01703513041138649 2023-01-22 13:38:23.085060: step: 408/466, loss: 0.0009812447242438793 2023-01-22 13:38:23.778961: step: 410/466, loss: 0.0013426410732790828 2023-01-22 13:38:24.396476: step: 412/466, loss: 0.02057889848947525 2023-01-22 13:38:24.962064: step: 414/466, loss: 0.03548679128289223 2023-01-22 13:38:25.569419: step: 416/466, loss: 0.01182898785918951 2023-01-22 13:38:26.168297: step: 418/466, loss: 0.013406840153038502 2023-01-22 13:38:26.772560: step: 420/466, loss: 0.007967946119606495 2023-01-22 13:38:27.401023: step: 422/466, loss: 0.017121534794569016 2023-01-22 13:38:27.971594: step: 424/466, loss: 0.005006145685911179 2023-01-22 13:38:28.606613: step: 426/466, loss: 0.008276369422674179 2023-01-22 13:38:29.210345: step: 428/466, loss: 0.00829931627959013 2023-01-22 13:38:29.826393: step: 430/466, loss: 0.012971614487469196 2023-01-22 13:38:30.428073: step: 432/466, loss: 0.0019958114717155695 2023-01-22 13:38:31.027975: step: 434/466, loss: 0.0078291529789567 2023-01-22 13:38:31.679470: step: 436/466, loss: 0.003694443963468075 2023-01-22 13:38:32.319224: step: 438/466, loss: 0.0021362758707255125 2023-01-22 13:38:32.962968: step: 440/466, loss: 0.006038742605596781 2023-01-22 13:38:33.656649: step: 442/466, loss: 0.0014593410305678844 2023-01-22 13:38:34.269994: step: 444/466, loss: 0.016899054870009422 2023-01-22 13:38:34.913011: step: 446/466, loss: 0.023046938702464104 2023-01-22 13:38:35.546165: step: 448/466, loss: 0.00014703207125421613 2023-01-22 13:38:36.147927: step: 450/466, loss: 0.13239708542823792 2023-01-22 13:38:36.803321: step: 452/466, loss: 0.014334054663777351 2023-01-22 13:38:37.438521: step: 454/466, loss: 0.001111467950977385 2023-01-22 13:38:38.041574: step: 456/466, loss: 0.022834081202745438 2023-01-22 13:38:38.660303: step: 458/466, loss: 0.2740824818611145 2023-01-22 13:38:39.285300: step: 460/466, loss: 0.0005852937465533614 2023-01-22 13:38:39.931564: step: 462/466, loss: 0.0012171048438176513 2023-01-22 13:38:40.496355: step: 464/466, loss: 0.012282396666705608 2023-01-22 13:38:41.096620: step: 466/466, loss: 0.008685320615768433 2023-01-22 13:38:41.718648: step: 468/466, loss: 0.0013729456113651395 2023-01-22 13:38:42.321281: step: 470/466, loss: 0.006827492732554674 2023-01-22 13:38:42.908228: step: 472/466, loss: 0.08489412069320679 2023-01-22 13:38:43.521292: step: 474/466, loss: 0.049523983150720596 2023-01-22 13:38:44.115409: step: 476/466, loss: 0.009977877140045166 2023-01-22 13:38:44.707107: step: 478/466, loss: 0.0059689804911613464 2023-01-22 13:38:45.316292: step: 480/466, loss: 0.11533409357070923 2023-01-22 13:38:45.982999: step: 482/466, loss: 0.013530323281884193 2023-01-22 13:38:46.585190: step: 484/466, loss: 0.27607443928718567 2023-01-22 13:38:47.138359: step: 486/466, loss: 0.0014335147570818663 2023-01-22 13:38:47.760276: step: 488/466, loss: 0.038051776587963104 2023-01-22 13:38:48.302822: step: 490/466, loss: 0.019769888371229172 2023-01-22 13:38:48.902752: step: 492/466, loss: 0.054358504712581635 2023-01-22 13:38:49.540907: step: 494/466, loss: 0.0036494287196546793 2023-01-22 13:38:50.097797: step: 496/466, loss: 0.745313823223114 2023-01-22 13:38:50.715952: step: 498/466, loss: 0.034797411412000656 2023-01-22 13:38:51.259190: step: 500/466, loss: 0.025983678176999092 2023-01-22 13:38:51.820413: step: 502/466, loss: 0.0233176089823246 2023-01-22 13:38:52.448690: step: 504/466, loss: 0.018735304474830627 2023-01-22 13:38:53.057705: step: 506/466, loss: 0.029395515099167824 2023-01-22 13:38:53.603872: step: 508/466, loss: 0.0024682055227458477 2023-01-22 13:38:54.267333: step: 510/466, loss: 0.005346355494111776 2023-01-22 13:38:54.874672: step: 512/466, loss: 0.00011466677096905187 2023-01-22 13:38:55.452959: step: 514/466, loss: 0.02455970272421837 2023-01-22 13:38:56.060101: step: 516/466, loss: 0.4798331558704376 2023-01-22 13:38:56.703238: step: 518/466, loss: 0.001988023519515991 2023-01-22 13:38:57.360428: step: 520/466, loss: 0.055366151034832 2023-01-22 13:38:57.939719: step: 522/466, loss: 0.06646957248449326 2023-01-22 13:38:58.500641: step: 524/466, loss: 0.018799712881445885 2023-01-22 13:38:59.074038: step: 526/466, loss: 0.0010934981983155012 2023-01-22 13:38:59.720537: step: 528/466, loss: 0.005480987951159477 2023-01-22 13:39:00.375390: step: 530/466, loss: 0.015281077474355698 2023-01-22 13:39:00.979998: step: 532/466, loss: 0.01949336566030979 2023-01-22 13:39:01.508850: step: 534/466, loss: 0.0007982194656506181 2023-01-22 13:39:02.142946: step: 536/466, loss: 0.010515277273952961 2023-01-22 13:39:02.761052: step: 538/466, loss: 0.024209585040807724 2023-01-22 13:39:03.406449: step: 540/466, loss: 0.007699800655245781 2023-01-22 13:39:04.042660: step: 542/466, loss: 0.0018632489955052733 2023-01-22 13:39:04.641055: step: 544/466, loss: 0.0011185059556737542 2023-01-22 13:39:05.218561: step: 546/466, loss: 0.0012074961559846997 2023-01-22 13:39:05.863234: step: 548/466, loss: 0.003445641603320837 2023-01-22 13:39:06.427938: step: 550/466, loss: 0.01648532971739769 2023-01-22 13:39:06.955523: step: 552/466, loss: 0.03117789328098297 2023-01-22 13:39:07.598047: step: 554/466, loss: 0.029531968757510185 2023-01-22 13:39:08.195040: step: 556/466, loss: 0.007115309592336416 2023-01-22 13:39:08.848910: step: 558/466, loss: 0.028260016813874245 2023-01-22 13:39:09.468241: step: 560/466, loss: 0.024944953620433807 2023-01-22 13:39:10.200362: step: 562/466, loss: 0.0020174370147287846 2023-01-22 13:39:10.838769: step: 564/466, loss: 0.01885637827217579 2023-01-22 13:39:11.448782: step: 566/466, loss: 0.0009125018259510398 2023-01-22 13:39:12.119910: step: 568/466, loss: 0.016139905899763107 2023-01-22 13:39:12.753076: step: 570/466, loss: 0.06416390091180801 2023-01-22 13:39:13.422830: step: 572/466, loss: 0.15958631038665771 2023-01-22 13:39:14.040477: step: 574/466, loss: 0.016670292243361473 2023-01-22 13:39:14.638832: step: 576/466, loss: 0.0015069821383804083 2023-01-22 13:39:15.223229: step: 578/466, loss: 0.001389741781167686 2023-01-22 13:39:15.938769: step: 580/466, loss: 0.043244875967502594 2023-01-22 13:39:16.565205: step: 582/466, loss: 0.013401959091424942 2023-01-22 13:39:17.155648: step: 584/466, loss: 0.002430893713608384 2023-01-22 13:39:17.803111: step: 586/466, loss: 0.00083020085003227 2023-01-22 13:39:18.440727: step: 588/466, loss: 0.007434462197124958 2023-01-22 13:39:19.044399: step: 590/466, loss: 0.06438363343477249 2023-01-22 13:39:19.577337: step: 592/466, loss: 0.0016319775022566319 2023-01-22 13:39:20.194695: step: 594/466, loss: 0.00047406292287632823 2023-01-22 13:39:20.823364: step: 596/466, loss: 0.022736098617315292 2023-01-22 13:39:21.470466: step: 598/466, loss: 0.3369652032852173 2023-01-22 13:39:22.058366: step: 600/466, loss: 0.00040172869921661913 2023-01-22 13:39:22.640780: step: 602/466, loss: 0.005799479782581329 2023-01-22 13:39:23.261382: step: 604/466, loss: 0.07075946778059006 2023-01-22 13:39:23.831221: step: 606/466, loss: 0.0024011246860027313 2023-01-22 13:39:24.395879: step: 608/466, loss: 0.5161957144737244 2023-01-22 13:39:25.033964: step: 610/466, loss: 0.002114727860316634 2023-01-22 13:39:25.692160: step: 612/466, loss: 0.0044073727913200855 2023-01-22 13:39:26.280508: step: 614/466, loss: 0.099338598549366 2023-01-22 13:39:26.863548: step: 616/466, loss: 0.015629105269908905 2023-01-22 13:39:27.515503: step: 618/466, loss: 0.3922637403011322 2023-01-22 13:39:28.103115: step: 620/466, loss: 8.24608578113839e-05 2023-01-22 13:39:28.751784: step: 622/466, loss: 0.022870955988764763 2023-01-22 13:39:29.360735: step: 624/466, loss: 0.00864611566066742 2023-01-22 13:39:30.027651: step: 626/466, loss: 0.00594196654856205 2023-01-22 13:39:30.649930: step: 628/466, loss: 0.012344695627689362 2023-01-22 13:39:31.211602: step: 630/466, loss: 0.0002703698119148612 2023-01-22 13:39:31.788629: step: 632/466, loss: 3.023464887519367e-05 2023-01-22 13:39:32.391407: step: 634/466, loss: 0.0016201818361878395 2023-01-22 13:39:32.990439: step: 636/466, loss: 0.03301899880170822 2023-01-22 13:39:33.647323: step: 638/466, loss: 0.04122118651866913 2023-01-22 13:39:34.220573: step: 640/466, loss: 7.819570600986481e-05 2023-01-22 13:39:34.780898: step: 642/466, loss: 0.001188569818623364 2023-01-22 13:39:35.361830: step: 644/466, loss: 0.0031083908397704363 2023-01-22 13:39:35.961446: step: 646/466, loss: 0.01917244680225849 2023-01-22 13:39:36.493195: step: 648/466, loss: 0.142510324716568 2023-01-22 13:39:37.111444: step: 650/466, loss: 0.019693298265337944 2023-01-22 13:39:37.729825: step: 652/466, loss: 0.08926106989383698 2023-01-22 13:39:38.343866: step: 654/466, loss: 0.060860756784677505 2023-01-22 13:39:38.935270: step: 656/466, loss: 0.002574354875832796 2023-01-22 13:39:39.587075: step: 658/466, loss: 0.022529149428009987 2023-01-22 13:39:40.214878: step: 660/466, loss: 0.00436887051910162 2023-01-22 13:39:40.806996: step: 662/466, loss: 0.023581165820360184 2023-01-22 13:39:41.443027: step: 664/466, loss: 0.10075651109218597 2023-01-22 13:39:42.002351: step: 666/466, loss: 0.005409528501331806 2023-01-22 13:39:42.611870: step: 668/466, loss: 0.0017528823809698224 2023-01-22 13:39:43.257381: step: 670/466, loss: 0.08418670296669006 2023-01-22 13:39:43.892157: step: 672/466, loss: 0.001370617770589888 2023-01-22 13:39:44.522638: step: 674/466, loss: 0.0022740717977285385 2023-01-22 13:39:45.137137: step: 676/466, loss: 0.003733733668923378 2023-01-22 13:39:45.734741: step: 678/466, loss: 0.0049021136946976185 2023-01-22 13:39:46.304676: step: 680/466, loss: 0.05791711434721947 2023-01-22 13:39:46.893383: step: 682/466, loss: 1.726668357849121 2023-01-22 13:39:47.508703: step: 684/466, loss: 0.10720763355493546 2023-01-22 13:39:48.155565: step: 686/466, loss: 0.040691643953323364 2023-01-22 13:39:48.804492: step: 688/466, loss: 0.0018499793950468302 2023-01-22 13:39:49.411929: step: 690/466, loss: 0.012728684581816196 2023-01-22 13:39:50.019082: step: 692/466, loss: 0.0239124558866024 2023-01-22 13:39:50.615558: step: 694/466, loss: 0.005421977955847979 2023-01-22 13:39:51.167973: step: 696/466, loss: 0.03604663908481598 2023-01-22 13:39:51.788380: step: 698/466, loss: 0.021979086101055145 2023-01-22 13:39:52.471040: step: 700/466, loss: 0.007704432122409344 2023-01-22 13:39:53.094049: step: 702/466, loss: 0.058133162558078766 2023-01-22 13:39:53.739509: step: 704/466, loss: 0.004512380808591843 2023-01-22 13:39:54.336290: step: 706/466, loss: 0.005167886149138212 2023-01-22 13:39:54.972658: step: 708/466, loss: 0.013947400264441967 2023-01-22 13:39:55.588387: step: 710/466, loss: 0.006945465691387653 2023-01-22 13:39:56.233683: step: 712/466, loss: 0.0446675680577755 2023-01-22 13:39:56.839136: step: 714/466, loss: 0.015970544889569283 2023-01-22 13:39:57.460346: step: 716/466, loss: 0.0590476393699646 2023-01-22 13:39:58.095110: step: 718/466, loss: 0.014604576863348484 2023-01-22 13:39:58.732791: step: 720/466, loss: 0.006326437462121248 2023-01-22 13:39:59.317821: step: 722/466, loss: 0.006244446150958538 2023-01-22 13:40:00.005424: step: 724/466, loss: 0.22038207948207855 2023-01-22 13:40:00.618765: step: 726/466, loss: 0.008666588924825191 2023-01-22 13:40:01.285236: step: 728/466, loss: 0.0798700824379921 2023-01-22 13:40:01.865834: step: 730/466, loss: 0.054656002670526505 2023-01-22 13:40:02.495118: step: 732/466, loss: 0.004361420404165983 2023-01-22 13:40:03.105352: step: 734/466, loss: 0.01673595793545246 2023-01-22 13:40:03.752622: step: 736/466, loss: 0.002686643274500966 2023-01-22 13:40:04.376018: step: 738/466, loss: 0.012211292050778866 2023-01-22 13:40:05.001888: step: 740/466, loss: 0.007673958782106638 2023-01-22 13:40:05.565839: step: 742/466, loss: 0.0045503550209105015 2023-01-22 13:40:06.164270: step: 744/466, loss: 0.002969563938677311 2023-01-22 13:40:06.768894: step: 746/466, loss: 0.08212627470493317 2023-01-22 13:40:07.356945: step: 748/466, loss: 0.000188513717148453 2023-01-22 13:40:07.928537: step: 750/466, loss: 0.053561802953481674 2023-01-22 13:40:08.567993: step: 752/466, loss: 0.028425009921193123 2023-01-22 13:40:09.191368: step: 754/466, loss: 0.08109253644943237 2023-01-22 13:40:09.943016: step: 756/466, loss: 0.004710098262876272 2023-01-22 13:40:10.604249: step: 758/466, loss: 0.013896183110773563 2023-01-22 13:40:11.161614: step: 760/466, loss: 0.018331099301576614 2023-01-22 13:40:11.690000: step: 762/466, loss: 0.017580410465598106 2023-01-22 13:40:12.266590: step: 764/466, loss: 0.030865274369716644 2023-01-22 13:40:12.898584: step: 766/466, loss: 0.05202435702085495 2023-01-22 13:40:13.482065: step: 768/466, loss: 0.0060366494581103325 2023-01-22 13:40:14.187317: step: 770/466, loss: 0.019957980141043663 2023-01-22 13:40:14.786286: step: 772/466, loss: 0.023260585963726044 2023-01-22 13:40:15.395566: step: 774/466, loss: 0.009710898622870445 2023-01-22 13:40:16.082924: step: 776/466, loss: 0.002994952257722616 2023-01-22 13:40:16.695280: step: 778/466, loss: 0.0006601830828003585 2023-01-22 13:40:17.343626: step: 780/466, loss: 0.0024687948171049356 2023-01-22 13:40:17.921056: step: 782/466, loss: 0.0004755732079502195 2023-01-22 13:40:18.503164: step: 784/466, loss: 0.06548666208982468 2023-01-22 13:40:19.098905: step: 786/466, loss: 0.014849133789539337 2023-01-22 13:40:19.706178: step: 788/466, loss: 0.012060455977916718 2023-01-22 13:40:20.359037: step: 790/466, loss: 0.016499169170856476 2023-01-22 13:40:20.913812: step: 792/466, loss: 0.01723051816225052 2023-01-22 13:40:21.584352: step: 794/466, loss: 0.0010327841155231 2023-01-22 13:40:22.183390: step: 796/466, loss: 0.0008903178968466818 2023-01-22 13:40:22.754327: step: 798/466, loss: 0.07221655547618866 2023-01-22 13:40:23.309984: step: 800/466, loss: 0.003996188286691904 2023-01-22 13:40:23.908067: step: 802/466, loss: 0.04473995044827461 2023-01-22 13:40:24.517231: step: 804/466, loss: 0.027255544438958168 2023-01-22 13:40:25.160675: step: 806/466, loss: 1.667365550994873 2023-01-22 13:40:25.850462: step: 808/466, loss: 0.0021742419339716434 2023-01-22 13:40:26.520905: step: 810/466, loss: 0.008183703757822514 2023-01-22 13:40:27.120341: step: 812/466, loss: 0.0016619794769212604 2023-01-22 13:40:27.729495: step: 814/466, loss: 0.011446056887507439 2023-01-22 13:40:28.304475: step: 816/466, loss: 0.001039049937389791 2023-01-22 13:40:28.892079: step: 818/466, loss: 0.0022509461268782616 2023-01-22 13:40:29.494315: step: 820/466, loss: 0.010372995398938656 2023-01-22 13:40:30.094467: step: 822/466, loss: 3.292357723694295e-05 2023-01-22 13:40:30.734528: step: 824/466, loss: 0.002079638186842203 2023-01-22 13:40:31.380428: step: 826/466, loss: 0.015205027535557747 2023-01-22 13:40:31.982551: step: 828/466, loss: 0.022317998111248016 2023-01-22 13:40:32.685705: step: 830/466, loss: 0.022038595750927925 2023-01-22 13:40:33.297052: step: 832/466, loss: 0.015924014151096344 2023-01-22 13:40:33.887808: step: 834/466, loss: 0.001443645916879177 2023-01-22 13:40:34.518070: step: 836/466, loss: 0.04264770820736885 2023-01-22 13:40:35.168847: step: 838/466, loss: 0.002594459569081664 2023-01-22 13:40:35.779128: step: 840/466, loss: 0.014926884323358536 2023-01-22 13:40:36.423435: step: 842/466, loss: 7.93904036981985e-05 2023-01-22 13:40:37.007558: step: 844/466, loss: 0.00013029819820076227 2023-01-22 13:40:37.709837: step: 846/466, loss: 0.0015288868453353643 2023-01-22 13:40:38.350193: step: 848/466, loss: 0.001493810210376978 2023-01-22 13:40:38.960620: step: 850/466, loss: 0.01249898225069046 2023-01-22 13:40:39.543731: step: 852/466, loss: 0.025699099525809288 2023-01-22 13:40:40.188880: step: 854/466, loss: 0.01823694258928299 2023-01-22 13:40:40.805899: step: 856/466, loss: 0.06470238417387009 2023-01-22 13:40:41.399317: step: 858/466, loss: 0.0032758882734924555 2023-01-22 13:40:41.998152: step: 860/466, loss: 0.013614756055176258 2023-01-22 13:40:42.573206: step: 862/466, loss: 0.026815857738256454 2023-01-22 13:40:43.171132: step: 864/466, loss: 0.000988278421573341 2023-01-22 13:40:43.813595: step: 866/466, loss: 0.05569792911410332 2023-01-22 13:40:44.445502: step: 868/466, loss: 0.005735119339078665 2023-01-22 13:40:45.068759: step: 870/466, loss: 3.250784720876254e-05 2023-01-22 13:40:45.603399: step: 872/466, loss: 0.00030958899878896773 2023-01-22 13:40:46.174980: step: 874/466, loss: 0.02781083807349205 2023-01-22 13:40:46.748688: step: 876/466, loss: 0.02429971843957901 2023-01-22 13:40:47.365387: step: 878/466, loss: 0.012174009345471859 2023-01-22 13:40:47.953391: step: 880/466, loss: 0.0009367524180561304 2023-01-22 13:40:48.515432: step: 882/466, loss: 0.01255623809993267 2023-01-22 13:40:49.108960: step: 884/466, loss: 0.0006068717339076102 2023-01-22 13:40:49.765309: step: 886/466, loss: 0.009702799841761589 2023-01-22 13:40:50.423113: step: 888/466, loss: 0.099673792719841 2023-01-22 13:40:51.051758: step: 890/466, loss: 0.04090374335646629 2023-01-22 13:40:51.664745: step: 892/466, loss: 0.0034436695277690887 2023-01-22 13:40:52.309816: step: 894/466, loss: 0.003322442527860403 2023-01-22 13:40:52.958596: step: 896/466, loss: 0.009773043915629387 2023-01-22 13:40:53.547315: step: 898/466, loss: 0.0044140624813735485 2023-01-22 13:40:54.149361: step: 900/466, loss: 0.1074460819363594 2023-01-22 13:40:54.766522: step: 902/466, loss: 0.027551736682653427 2023-01-22 13:40:55.384857: step: 904/466, loss: 0.0009217429906129837 2023-01-22 13:40:56.028712: step: 906/466, loss: 0.002145333681255579 2023-01-22 13:40:56.608999: step: 908/466, loss: 0.019214188680052757 2023-01-22 13:40:57.228947: step: 910/466, loss: 0.0341348797082901 2023-01-22 13:40:57.823577: step: 912/466, loss: 1.8653154373168945 2023-01-22 13:40:58.455830: step: 914/466, loss: 0.009925548918545246 2023-01-22 13:40:59.072418: step: 916/466, loss: 0.06136850267648697 2023-01-22 13:40:59.700723: step: 918/466, loss: 0.02847776934504509 2023-01-22 13:41:00.379446: step: 920/466, loss: 0.002534595550969243 2023-01-22 13:41:01.044655: step: 922/466, loss: 0.00827273353934288 2023-01-22 13:41:01.631575: step: 924/466, loss: 0.03495006263256073 2023-01-22 13:41:02.228635: step: 926/466, loss: 0.006841725669801235 2023-01-22 13:41:02.816629: step: 928/466, loss: 0.0022770666982978582 2023-01-22 13:41:03.482523: step: 930/466, loss: 0.03333687782287598 2023-01-22 13:41:04.044461: step: 932/466, loss: 0.02715749852359295 ================================================== Loss: 0.059 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30221052436125967, 'r': 0.33719124919244914, 'f1': 0.3187440149316963}, 'combined': 0.23486401100230253, 'epoch': 33} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.34795941519317186, 'r': 0.31901305136427716, 'f1': 0.3328581024180613}, 'combined': 0.2207556326917712, 'epoch': 33} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2991213985594238, 'r': 0.27759372214795014, 'f1': 0.2879557667860858}, 'combined': 0.19197051119072386, 'epoch': 33} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.36756129786836267, 'r': 0.2977469663565054, 'f1': 0.3289911377183045}, 'combined': 0.21471000566878817, 'epoch': 33} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29276759733092517, 'r': 0.3355438876430338, 'f1': 0.3126996088202985}, 'combined': 0.23041023807811467, 'epoch': 33} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.34200683577365326, 'r': 0.3022937369923105, 'f1': 0.3209263776348908}, 'combined': 0.21284236444179283, 'epoch': 33} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23742138364779872, 'r': 0.35952380952380947, 'f1': 0.2859848484848484}, 'combined': 0.19065656565656558, 'epoch': 33} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.43478260869565216, 'r': 0.43478260869565216, 'f1': 0.43478260869565216}, 'combined': 0.28985507246376807, 'epoch': 33} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3382352941176471, 'r': 0.19827586206896552, 'f1': 0.25}, 'combined': 0.16666666666666666, 'epoch': 33} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29515095352204085, 'r': 0.3399556523489161, 'f1': 0.3159728902784459}, 'combined': 0.23282212967885485, 'epoch': 26} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32557465157002546, 'r': 0.326702182830874, 'f1': 0.3261374426704058}, 'combined': 0.2162984075741551, 'epoch': 26} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39473684210526316, 'r': 0.25862068965517243, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 26} ****************************** Epoch: 34 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:43:30.616079: step: 2/466, loss: 0.00145103526301682 2023-01-22 13:43:31.270123: step: 4/466, loss: 0.01543799601495266 2023-01-22 13:43:31.864880: step: 6/466, loss: 0.00019660727411974221 2023-01-22 13:43:32.409994: step: 8/466, loss: 0.01038823090493679 2023-01-22 13:43:33.038951: step: 10/466, loss: 0.1893748939037323 2023-01-22 13:43:33.678019: step: 12/466, loss: 0.002862396417185664 2023-01-22 13:43:34.332824: step: 14/466, loss: 0.4766947627067566 2023-01-22 13:43:34.943698: step: 16/466, loss: 0.0016948574921116233 2023-01-22 13:43:35.612349: step: 18/466, loss: 0.00034551628050394356 2023-01-22 13:43:36.204706: step: 20/466, loss: 0.00022335999528877437 2023-01-22 13:43:36.819335: step: 22/466, loss: 0.882878303527832 2023-01-22 13:43:37.405629: step: 24/466, loss: 0.007838007993996143 2023-01-22 13:43:38.137307: step: 26/466, loss: 0.031117921695113182 2023-01-22 13:43:38.711413: step: 28/466, loss: 0.017217734828591347 2023-01-22 13:43:39.374501: step: 30/466, loss: 0.025149572640657425 2023-01-22 13:43:40.039738: step: 32/466, loss: 0.0020521816331893206 2023-01-22 13:43:40.676821: step: 34/466, loss: 0.011958999559283257 2023-01-22 13:43:41.263379: step: 36/466, loss: 0.008193464949727058 2023-01-22 13:43:41.851548: step: 38/466, loss: 0.00041370323742739856 2023-01-22 13:43:42.454908: step: 40/466, loss: 0.0024150144308805466 2023-01-22 13:43:43.043967: step: 42/466, loss: 0.010512173175811768 2023-01-22 13:43:43.672408: step: 44/466, loss: 0.00981095340102911 2023-01-22 13:43:44.323502: step: 46/466, loss: 0.00645467359572649 2023-01-22 13:43:44.997342: step: 48/466, loss: 0.003874067682772875 2023-01-22 13:43:45.664894: step: 50/466, loss: 0.00272333063185215 2023-01-22 13:43:46.288808: step: 52/466, loss: 0.003950105048716068 2023-01-22 13:43:46.915317: step: 54/466, loss: 0.014292075298726559 2023-01-22 13:43:47.451544: step: 56/466, loss: 0.026993967592716217 2023-01-22 13:43:48.069190: step: 58/466, loss: 0.015481133945286274 2023-01-22 13:43:48.717679: step: 60/466, loss: 0.0004928237176500261 2023-01-22 13:43:49.312913: step: 62/466, loss: 0.014285244047641754 2023-01-22 13:43:49.880052: step: 64/466, loss: 0.0006637139013037086 2023-01-22 13:43:50.471517: step: 66/466, loss: 0.02772347629070282 2023-01-22 13:43:51.114416: step: 68/466, loss: 0.002016782760620117 2023-01-22 13:43:51.774238: step: 70/466, loss: 0.0017903584521263838 2023-01-22 13:43:52.343177: step: 72/466, loss: 0.018268978223204613 2023-01-22 13:43:52.946596: step: 74/466, loss: 0.005079879891127348 2023-01-22 13:43:53.522786: step: 76/466, loss: 0.003510237904265523 2023-01-22 13:43:54.182364: step: 78/466, loss: 0.00917114969342947 2023-01-22 13:43:54.750863: step: 80/466, loss: 0.006327086128294468 2023-01-22 13:43:55.378279: step: 82/466, loss: 0.06136607751250267 2023-01-22 13:43:55.967660: step: 84/466, loss: 0.0007208603201434016 2023-01-22 13:43:56.631119: step: 86/466, loss: 0.002733423840254545 2023-01-22 13:43:57.254437: step: 88/466, loss: 0.013530532829463482 2023-01-22 13:43:57.895469: step: 90/466, loss: 0.006512346677482128 2023-01-22 13:43:58.573055: step: 92/466, loss: 0.005329577252268791 2023-01-22 13:43:59.269472: step: 94/466, loss: 6.773492813110352 2023-01-22 13:43:59.933717: step: 96/466, loss: 0.009055419825017452 2023-01-22 13:44:00.569226: step: 98/466, loss: 0.02896817773580551 2023-01-22 13:44:01.116614: step: 100/466, loss: 0.01143586728721857 2023-01-22 13:44:01.648604: step: 102/466, loss: 0.00017800797650124878 2023-01-22 13:44:02.304620: step: 104/466, loss: 0.023297762498259544 2023-01-22 13:44:02.911727: step: 106/466, loss: 0.003763154149055481 2023-01-22 13:44:03.538105: step: 108/466, loss: 0.0016384078189730644 2023-01-22 13:44:04.161835: step: 110/466, loss: 0.08477050065994263 2023-01-22 13:44:04.743219: step: 112/466, loss: 0.04829823970794678 2023-01-22 13:44:05.379648: step: 114/466, loss: 0.01076584868133068 2023-01-22 13:44:06.000159: step: 116/466, loss: 0.002037909347563982 2023-01-22 13:44:06.593888: step: 118/466, loss: 0.005181357264518738 2023-01-22 13:44:07.212946: step: 120/466, loss: 0.001771796029061079 2023-01-22 13:44:07.799507: step: 122/466, loss: 0.014115221798419952 2023-01-22 13:44:08.398017: step: 124/466, loss: 0.00441603921353817 2023-01-22 13:44:08.966171: step: 126/466, loss: 0.008045581169426441 2023-01-22 13:44:09.566960: step: 128/466, loss: 0.017257267609238625 2023-01-22 13:44:10.241853: step: 130/466, loss: 5.825229891343042e-05 2023-01-22 13:44:10.871655: step: 132/466, loss: 0.003156565362587571 2023-01-22 13:44:11.468107: step: 134/466, loss: 0.03854076936841011 2023-01-22 13:44:12.039862: step: 136/466, loss: 0.16053859889507294 2023-01-22 13:44:12.626331: step: 138/466, loss: 0.015324209816753864 2023-01-22 13:44:13.254427: step: 140/466, loss: 0.005850520916283131 2023-01-22 13:44:13.896571: step: 142/466, loss: 0.0016848534578457475 2023-01-22 13:44:14.533559: step: 144/466, loss: 0.03472881019115448 2023-01-22 13:44:15.204855: step: 146/466, loss: 0.01919623836874962 2023-01-22 13:44:15.872027: step: 148/466, loss: 0.14697599411010742 2023-01-22 13:44:16.460315: step: 150/466, loss: 0.014312896877527237 2023-01-22 13:44:17.086023: step: 152/466, loss: 0.00022975423780735582 2023-01-22 13:44:17.761712: step: 154/466, loss: 0.00428403215482831 2023-01-22 13:44:18.329234: step: 156/466, loss: 0.000444417935796082 2023-01-22 13:44:18.946023: step: 158/466, loss: 0.010773024521768093 2023-01-22 13:44:19.569558: step: 160/466, loss: 0.010588569566607475 2023-01-22 13:44:20.239636: step: 162/466, loss: 0.43219447135925293 2023-01-22 13:44:20.782087: step: 164/466, loss: 0.0008868477889336646 2023-01-22 13:44:21.383118: step: 166/466, loss: 0.002060934202745557 2023-01-22 13:44:21.977466: step: 168/466, loss: 0.0030699800699949265 2023-01-22 13:44:22.548417: step: 170/466, loss: 0.00046602555084973574 2023-01-22 13:44:23.177814: step: 172/466, loss: 0.0474085696041584 2023-01-22 13:44:23.778240: step: 174/466, loss: 0.00043793785152956843 2023-01-22 13:44:24.352475: step: 176/466, loss: 0.0032128584571182728 2023-01-22 13:44:24.960469: step: 178/466, loss: 0.017221592366695404 2023-01-22 13:44:25.549059: step: 180/466, loss: 0.0053990427404642105 2023-01-22 13:44:26.118041: step: 182/466, loss: 0.007466496899724007 2023-01-22 13:44:26.705591: step: 184/466, loss: 0.0009559531463310122 2023-01-22 13:44:27.318068: step: 186/466, loss: 0.20325548946857452 2023-01-22 13:44:27.975946: step: 188/466, loss: 0.02338382601737976 2023-01-22 13:44:28.625819: step: 190/466, loss: 0.0074676163494586945 2023-01-22 13:44:29.232485: step: 192/466, loss: 0.005346193909645081 2023-01-22 13:44:29.775118: step: 194/466, loss: 0.01663224585354328 2023-01-22 13:44:30.443411: step: 196/466, loss: 0.0007311832159757614 2023-01-22 13:44:31.089197: step: 198/466, loss: 0.0018670763820409775 2023-01-22 13:44:31.724333: step: 200/466, loss: 0.01166330836713314 2023-01-22 13:44:32.363620: step: 202/466, loss: 0.03932376578450203 2023-01-22 13:44:32.970322: step: 204/466, loss: 0.005174162797629833 2023-01-22 13:44:33.561580: step: 206/466, loss: 0.005198253784328699 2023-01-22 13:44:34.117624: step: 208/466, loss: 0.012226833030581474 2023-01-22 13:44:34.696108: step: 210/466, loss: 0.005281386431306601 2023-01-22 13:44:35.320847: step: 212/466, loss: 0.0019306482281535864 2023-01-22 13:44:35.940914: step: 214/466, loss: 0.00234972289763391 2023-01-22 13:44:36.516483: step: 216/466, loss: 0.009221348911523819 2023-01-22 13:44:37.113751: step: 218/466, loss: 0.0305698923766613 2023-01-22 13:44:37.769753: step: 220/466, loss: 0.0838170200586319 2023-01-22 13:44:38.418198: step: 222/466, loss: 0.002633826807141304 2023-01-22 13:44:39.100050: step: 224/466, loss: 0.0003977204905822873 2023-01-22 13:44:39.705410: step: 226/466, loss: 0.0055332910269498825 2023-01-22 13:44:40.392734: step: 228/466, loss: 0.049719490110874176 2023-01-22 13:44:41.062460: step: 230/466, loss: 0.014783482067286968 2023-01-22 13:44:41.657891: step: 232/466, loss: 0.004010343458503485 2023-01-22 13:44:42.274646: step: 234/466, loss: 0.1441631317138672 2023-01-22 13:44:42.923267: step: 236/466, loss: 0.16137421131134033 2023-01-22 13:44:43.656276: step: 238/466, loss: 0.5159417390823364 2023-01-22 13:44:44.321670: step: 240/466, loss: 0.000661507889162749 2023-01-22 13:44:44.937889: step: 242/466, loss: 0.022038301452994347 2023-01-22 13:44:45.483814: step: 244/466, loss: 0.0033813118934631348 2023-01-22 13:44:46.124502: step: 246/466, loss: 0.00786846037954092 2023-01-22 13:44:46.669138: step: 248/466, loss: 0.21639883518218994 2023-01-22 13:44:47.216973: step: 250/466, loss: 0.0043896762654185295 2023-01-22 13:44:47.783436: step: 252/466, loss: 0.004003674257546663 2023-01-22 13:44:48.457652: step: 254/466, loss: 0.03909432142972946 2023-01-22 13:44:49.045622: step: 256/466, loss: 0.3959011435508728 2023-01-22 13:44:49.723361: step: 258/466, loss: 0.01426810584962368 2023-01-22 13:44:50.317901: step: 260/466, loss: 0.009774630889296532 2023-01-22 13:44:50.898644: step: 262/466, loss: 0.004245213698595762 2023-01-22 13:44:51.499487: step: 264/466, loss: 0.004529725294560194 2023-01-22 13:44:52.061242: step: 266/466, loss: 0.0024402092676609755 2023-01-22 13:44:52.684329: step: 268/466, loss: 0.002173554850742221 2023-01-22 13:44:53.324948: step: 270/466, loss: 0.0045709493570029736 2023-01-22 13:44:53.895637: step: 272/466, loss: 0.07321876287460327 2023-01-22 13:44:54.477102: step: 274/466, loss: 0.06183570250868797 2023-01-22 13:44:55.069467: step: 276/466, loss: 0.0011557862162590027 2023-01-22 13:44:55.645316: step: 278/466, loss: 0.062261614948511124 2023-01-22 13:44:56.282451: step: 280/466, loss: 0.011697226203978062 2023-01-22 13:44:56.954797: step: 282/466, loss: 0.006745433434844017 2023-01-22 13:44:57.582806: step: 284/466, loss: 0.0017828167183324695 2023-01-22 13:44:58.206113: step: 286/466, loss: 0.052710626274347305 2023-01-22 13:44:58.782973: step: 288/466, loss: 0.004695044830441475 2023-01-22 13:44:59.431190: step: 290/466, loss: 0.02131335251033306 2023-01-22 13:45:00.058828: step: 292/466, loss: 0.007738930638879538 2023-01-22 13:45:00.678024: step: 294/466, loss: 0.01593361236155033 2023-01-22 13:45:01.238394: step: 296/466, loss: 0.00036103566526435316 2023-01-22 13:45:01.847510: step: 298/466, loss: 0.009133227169513702 2023-01-22 13:45:02.429157: step: 300/466, loss: 0.0015408407198265195 2023-01-22 13:45:03.036289: step: 302/466, loss: 0.3041478991508484 2023-01-22 13:45:03.611961: step: 304/466, loss: 0.04272019490599632 2023-01-22 13:45:04.269469: step: 306/466, loss: 0.027257483452558517 2023-01-22 13:45:04.902742: step: 308/466, loss: 0.10972411930561066 2023-01-22 13:45:05.483698: step: 310/466, loss: 0.0012863239971920848 2023-01-22 13:45:06.079057: step: 312/466, loss: 0.023251373320817947 2023-01-22 13:45:06.736535: step: 314/466, loss: 0.010081824846565723 2023-01-22 13:45:07.357750: step: 316/466, loss: 0.007842368446290493 2023-01-22 13:45:07.986373: step: 318/466, loss: 0.006509221158921719 2023-01-22 13:45:08.607087: step: 320/466, loss: 0.014905976131558418 2023-01-22 13:45:09.217826: step: 322/466, loss: 0.000455966976005584 2023-01-22 13:45:09.870696: step: 324/466, loss: 0.012475952506065369 2023-01-22 13:45:10.567980: step: 326/466, loss: 0.0011488832533359528 2023-01-22 13:45:11.218843: step: 328/466, loss: 0.0370166040956974 2023-01-22 13:45:11.864930: step: 330/466, loss: 0.050278399139642715 2023-01-22 13:45:12.498170: step: 332/466, loss: 0.02249862253665924 2023-01-22 13:45:13.124233: step: 334/466, loss: 0.0007561338716186583 2023-01-22 13:45:13.692409: step: 336/466, loss: 0.009127049706876278 2023-01-22 13:45:14.275936: step: 338/466, loss: 0.028104346245527267 2023-01-22 13:45:14.900229: step: 340/466, loss: 0.018344346433877945 2023-01-22 13:45:15.538814: step: 342/466, loss: 0.015621643513441086 2023-01-22 13:45:16.194954: step: 344/466, loss: 0.01132090762257576 2023-01-22 13:45:16.800838: step: 346/466, loss: 0.007212344091385603 2023-01-22 13:45:17.388735: step: 348/466, loss: 0.003566417610272765 2023-01-22 13:45:18.010216: step: 350/466, loss: 8.620372682344168e-05 2023-01-22 13:45:18.576060: step: 352/466, loss: 0.014750273898243904 2023-01-22 13:45:19.209451: step: 354/466, loss: 0.010027170181274414 2023-01-22 13:45:19.901317: step: 356/466, loss: 0.026798786595463753 2023-01-22 13:45:20.573519: step: 358/466, loss: 0.022073006257414818 2023-01-22 13:45:21.194159: step: 360/466, loss: 0.008595152758061886 2023-01-22 13:45:21.798357: step: 362/466, loss: 0.01112151425331831 2023-01-22 13:45:22.418637: step: 364/466, loss: 0.005875526927411556 2023-01-22 13:45:23.086235: step: 366/466, loss: 0.00016584506374783814 2023-01-22 13:45:23.649027: step: 368/466, loss: 0.010205023922026157 2023-01-22 13:45:24.244288: step: 370/466, loss: 0.009187527000904083 2023-01-22 13:45:24.838690: step: 372/466, loss: 0.02374931424856186 2023-01-22 13:45:25.447691: step: 374/466, loss: 0.008131437003612518 2023-01-22 13:45:26.009075: step: 376/466, loss: 0.05454736202955246 2023-01-22 13:45:26.595234: step: 378/466, loss: 0.03893023729324341 2023-01-22 13:45:27.229637: step: 380/466, loss: 0.014377097599208355 2023-01-22 13:45:27.852906: step: 382/466, loss: 0.03035423718392849 2023-01-22 13:45:28.490319: step: 384/466, loss: 0.0008646403439342976 2023-01-22 13:45:29.099777: step: 386/466, loss: 0.005868476815521717 2023-01-22 13:45:29.733735: step: 388/466, loss: 0.004623404238373041 2023-01-22 13:45:30.294047: step: 390/466, loss: 0.07496129721403122 2023-01-22 13:45:30.889596: step: 392/466, loss: 0.0005022316472604871 2023-01-22 13:45:31.586032: step: 394/466, loss: 0.0347086526453495 2023-01-22 13:45:32.192165: step: 396/466, loss: 0.04865318536758423 2023-01-22 13:45:32.809865: step: 398/466, loss: 0.009207713417708874 2023-01-22 13:45:33.379133: step: 400/466, loss: 0.003410718170925975 2023-01-22 13:45:33.980921: step: 402/466, loss: 0.07742343842983246 2023-01-22 13:45:34.594738: step: 404/466, loss: 0.019158687442541122 2023-01-22 13:45:35.175510: step: 406/466, loss: 0.03728799149394035 2023-01-22 13:45:35.759828: step: 408/466, loss: 0.009705687873065472 2023-01-22 13:45:36.278081: step: 410/466, loss: 0.00032010520226322114 2023-01-22 13:45:36.868224: step: 412/466, loss: 0.014273714274168015 2023-01-22 13:45:37.412069: step: 414/466, loss: 0.010331586003303528 2023-01-22 13:45:38.107021: step: 416/466, loss: 0.014624751172959805 2023-01-22 13:45:38.731128: step: 418/466, loss: 0.005155010148882866 2023-01-22 13:45:39.360209: step: 420/466, loss: 0.02165280096232891 2023-01-22 13:45:39.962438: step: 422/466, loss: 0.003787653986364603 2023-01-22 13:45:40.519520: step: 424/466, loss: 0.002895970596000552 2023-01-22 13:45:41.115015: step: 426/466, loss: 0.0736176073551178 2023-01-22 13:45:41.966332: step: 428/466, loss: 0.023823287338018417 2023-01-22 13:45:42.587079: step: 430/466, loss: 0.02936473861336708 2023-01-22 13:45:43.230391: step: 432/466, loss: 0.007644005585461855 2023-01-22 13:45:43.843085: step: 434/466, loss: 0.027653008699417114 2023-01-22 13:45:44.472096: step: 436/466, loss: 0.016150671988725662 2023-01-22 13:45:45.105004: step: 438/466, loss: 0.01745207980275154 2023-01-22 13:45:45.711990: step: 440/466, loss: 0.016127359122037888 2023-01-22 13:45:46.353118: step: 442/466, loss: 0.06432020664215088 2023-01-22 13:45:46.940620: step: 444/466, loss: 0.022350141778588295 2023-01-22 13:45:47.543833: step: 446/466, loss: 0.013962076045572758 2023-01-22 13:45:48.190639: step: 448/466, loss: 0.007573869079351425 2023-01-22 13:45:48.764846: step: 450/466, loss: 0.00517288688570261 2023-01-22 13:45:49.371082: step: 452/466, loss: 0.055208589881658554 2023-01-22 13:45:50.062212: step: 454/466, loss: 0.01886332593858242 2023-01-22 13:45:50.622884: step: 456/466, loss: 0.00902930460870266 2023-01-22 13:45:51.195097: step: 458/466, loss: 0.017374033108353615 2023-01-22 13:45:51.838611: step: 460/466, loss: 0.0030088305938988924 2023-01-22 13:45:52.435883: step: 462/466, loss: 0.007050098851323128 2023-01-22 13:45:53.030044: step: 464/466, loss: 0.0005313614383339882 2023-01-22 13:45:53.624064: step: 466/466, loss: 0.03915034234523773 2023-01-22 13:45:54.197124: step: 468/466, loss: 0.00026846598484553397 2023-01-22 13:45:54.808110: step: 470/466, loss: 0.027079172432422638 2023-01-22 13:45:55.419302: step: 472/466, loss: 0.0015112333931028843 2023-01-22 13:45:55.959425: step: 474/466, loss: 3.813156217802316e-05 2023-01-22 13:45:56.529287: step: 476/466, loss: 0.0012540343450382352 2023-01-22 13:45:57.137392: step: 478/466, loss: 0.01737871579825878 2023-01-22 13:45:57.801217: step: 480/466, loss: 0.0008481427212245762 2023-01-22 13:45:58.368948: step: 482/466, loss: 0.00066763861104846 2023-01-22 13:45:59.016737: step: 484/466, loss: 0.033831000328063965 2023-01-22 13:45:59.594777: step: 486/466, loss: 7.685121818212792e-05 2023-01-22 13:46:00.176577: step: 488/466, loss: 0.013425313867628574 2023-01-22 13:46:00.728818: step: 490/466, loss: 0.04113367199897766 2023-01-22 13:46:01.366022: step: 492/466, loss: 0.004991469904780388 2023-01-22 13:46:01.950388: step: 494/466, loss: 0.04344819113612175 2023-01-22 13:46:02.566263: step: 496/466, loss: 0.0021680507343262434 2023-01-22 13:46:03.175152: step: 498/466, loss: 0.027491370216012 2023-01-22 13:46:03.800726: step: 500/466, loss: 0.02037181705236435 2023-01-22 13:46:04.418834: step: 502/466, loss: 0.016716280952095985 2023-01-22 13:46:05.000533: step: 504/466, loss: 0.0015023309970274568 2023-01-22 13:46:05.621745: step: 506/466, loss: 0.003428276162594557 2023-01-22 13:46:06.224055: step: 508/466, loss: 0.004894760437309742 2023-01-22 13:46:06.817657: step: 510/466, loss: 0.03600066527724266 2023-01-22 13:46:07.403815: step: 512/466, loss: 0.0008272746345028281 2023-01-22 13:46:07.974231: step: 514/466, loss: 0.004729779902845621 2023-01-22 13:46:08.500185: step: 516/466, loss: 0.01697508431971073 2023-01-22 13:46:09.081487: step: 518/466, loss: 0.033606935292482376 2023-01-22 13:46:09.679806: step: 520/466, loss: 0.011284686625003815 2023-01-22 13:46:10.290460: step: 522/466, loss: 0.0027831706684082747 2023-01-22 13:46:10.903593: step: 524/466, loss: 0.0012507356004789472 2023-01-22 13:46:11.536030: step: 526/466, loss: 0.006484354380518198 2023-01-22 13:46:12.131993: step: 528/466, loss: 0.018996044993400574 2023-01-22 13:46:12.747920: step: 530/466, loss: 0.084006167948246 2023-01-22 13:46:13.382053: step: 532/466, loss: 0.0024460619315505028 2023-01-22 13:46:13.991497: step: 534/466, loss: 0.0011832149466499686 2023-01-22 13:46:14.697683: step: 536/466, loss: 0.0038138527888804674 2023-01-22 13:46:15.272633: step: 538/466, loss: 0.003790761809796095 2023-01-22 13:46:15.850500: step: 540/466, loss: 0.0055133383721113205 2023-01-22 13:46:16.459775: step: 542/466, loss: 0.004894603043794632 2023-01-22 13:46:17.036443: step: 544/466, loss: 0.012636066414415836 2023-01-22 13:46:17.621982: step: 546/466, loss: 0.0008947293972596526 2023-01-22 13:46:18.196717: step: 548/466, loss: 0.02004299685359001 2023-01-22 13:46:18.834801: step: 550/466, loss: 0.1391642540693283 2023-01-22 13:46:19.387837: step: 552/466, loss: 0.016957959160208702 2023-01-22 13:46:19.986360: step: 554/466, loss: 0.004670858848839998 2023-01-22 13:46:20.587826: step: 556/466, loss: 0.0005273279966786504 2023-01-22 13:46:21.217177: step: 558/466, loss: 0.0011098579270765185 2023-01-22 13:46:21.858764: step: 560/466, loss: 0.0007891767891123891 2023-01-22 13:46:22.505015: step: 562/466, loss: 0.011333691887557507 2023-01-22 13:46:23.105492: step: 564/466, loss: 0.026420513167977333 2023-01-22 13:46:23.714115: step: 566/466, loss: 0.010849075391888618 2023-01-22 13:46:24.351735: step: 568/466, loss: 0.0009764400310814381 2023-01-22 13:46:24.996765: step: 570/466, loss: 0.004660797771066427 2023-01-22 13:46:25.655261: step: 572/466, loss: 0.003154938342049718 2023-01-22 13:46:26.303584: step: 574/466, loss: 0.0002601823944132775 2023-01-22 13:46:26.958322: step: 576/466, loss: 0.0025972893927246332 2023-01-22 13:46:27.564411: step: 578/466, loss: 0.0016454608412459493 2023-01-22 13:46:28.129086: step: 580/466, loss: 0.001574407215230167 2023-01-22 13:46:28.690847: step: 582/466, loss: 0.025794681161642075 2023-01-22 13:46:29.302819: step: 584/466, loss: 0.005013021640479565 2023-01-22 13:46:29.949925: step: 586/466, loss: 0.28778019547462463 2023-01-22 13:46:30.558930: step: 588/466, loss: 0.0031442195177078247 2023-01-22 13:46:31.176307: step: 590/466, loss: 0.009091082960367203 2023-01-22 13:46:31.813118: step: 592/466, loss: 0.03503568843007088 2023-01-22 13:46:32.482712: step: 594/466, loss: 0.0561491958796978 2023-01-22 13:46:33.089859: step: 596/466, loss: 0.03535356745123863 2023-01-22 13:46:33.705853: step: 598/466, loss: 0.027905019000172615 2023-01-22 13:46:34.270415: step: 600/466, loss: 0.0008747755200602114 2023-01-22 13:46:34.853358: step: 602/466, loss: 0.01476990431547165 2023-01-22 13:46:35.521829: step: 604/466, loss: 0.02200404927134514 2023-01-22 13:46:36.130845: step: 606/466, loss: 0.005571092013269663 2023-01-22 13:46:36.757101: step: 608/466, loss: 0.03939218446612358 2023-01-22 13:46:37.384386: step: 610/466, loss: 0.02648027054965496 2023-01-22 13:46:37.982137: step: 612/466, loss: 0.0020359500776976347 2023-01-22 13:46:38.525253: step: 614/466, loss: 0.0784219428896904 2023-01-22 13:46:39.081011: step: 616/466, loss: 0.00010506340186111629 2023-01-22 13:46:39.728653: step: 618/466, loss: 0.01642722263932228 2023-01-22 13:46:40.417368: step: 620/466, loss: 0.00024249528360087425 2023-01-22 13:46:41.038792: step: 622/466, loss: 0.007444320246577263 2023-01-22 13:46:41.603983: step: 624/466, loss: 0.07463310658931732 2023-01-22 13:46:42.173696: step: 626/466, loss: 0.0032307819928973913 2023-01-22 13:46:42.798791: step: 628/466, loss: 3.353673219680786 2023-01-22 13:46:43.476002: step: 630/466, loss: 0.02865377441048622 2023-01-22 13:46:44.028500: step: 632/466, loss: 0.013395837508141994 2023-01-22 13:46:44.673992: step: 634/466, loss: 0.021773796528577805 2023-01-22 13:46:45.233215: step: 636/466, loss: 2.937152657978004e-06 2023-01-22 13:46:45.848084: step: 638/466, loss: 0.023525815457105637 2023-01-22 13:46:46.420882: step: 640/466, loss: 0.005066219717264175 2023-01-22 13:46:47.018354: step: 642/466, loss: 0.0014348170952871442 2023-01-22 13:46:47.633890: step: 644/466, loss: 0.20574060082435608 2023-01-22 13:46:48.233829: step: 646/466, loss: 0.005184273701161146 2023-01-22 13:46:48.860593: step: 648/466, loss: 0.011186490766704082 2023-01-22 13:46:49.495451: step: 650/466, loss: 0.011190414428710938 2023-01-22 13:46:50.102921: step: 652/466, loss: 0.052119433879852295 2023-01-22 13:46:50.684811: step: 654/466, loss: 0.12704919278621674 2023-01-22 13:46:51.349235: step: 656/466, loss: 0.05566718056797981 2023-01-22 13:46:51.918241: step: 658/466, loss: 0.013254563324153423 2023-01-22 13:46:52.527981: step: 660/466, loss: 0.03673689812421799 2023-01-22 13:46:53.141658: step: 662/466, loss: 0.046389028429985046 2023-01-22 13:46:53.742661: step: 664/466, loss: 0.06802118569612503 2023-01-22 13:46:54.332139: step: 666/466, loss: 0.012471283785998821 2023-01-22 13:46:54.863377: step: 668/466, loss: 0.043097637593746185 2023-01-22 13:46:55.466404: step: 670/466, loss: 0.14850647747516632 2023-01-22 13:46:56.111132: step: 672/466, loss: 0.08933252096176147 2023-01-22 13:46:56.791442: step: 674/466, loss: 0.023991243913769722 2023-01-22 13:46:57.372317: step: 676/466, loss: 0.01252694707363844 2023-01-22 13:46:57.981010: step: 678/466, loss: 0.001300438423641026 2023-01-22 13:46:58.560516: step: 680/466, loss: 0.011431179940700531 2023-01-22 13:46:59.208233: step: 682/466, loss: 0.030292831361293793 2023-01-22 13:46:59.902553: step: 684/466, loss: 0.0014602902811020613 2023-01-22 13:47:00.553258: step: 686/466, loss: 0.03479599207639694 2023-01-22 13:47:01.244957: step: 688/466, loss: 0.021553900092840195 2023-01-22 13:47:01.808266: step: 690/466, loss: 0.009053482674062252 2023-01-22 13:47:02.426367: step: 692/466, loss: 0.007322824560105801 2023-01-22 13:47:03.005004: step: 694/466, loss: 0.007402040995657444 2023-01-22 13:47:03.612870: step: 696/466, loss: 0.00453294487670064 2023-01-22 13:47:04.300526: step: 698/466, loss: 0.004316447302699089 2023-01-22 13:47:05.001342: step: 700/466, loss: 0.01291638519614935 2023-01-22 13:47:05.624745: step: 702/466, loss: 1.0230196714401245 2023-01-22 13:47:06.280038: step: 704/466, loss: 0.01189468428492546 2023-01-22 13:47:06.878105: step: 706/466, loss: 0.004292115103453398 2023-01-22 13:47:07.463766: step: 708/466, loss: 0.01580323837697506 2023-01-22 13:47:08.087205: step: 710/466, loss: 0.008795082569122314 2023-01-22 13:47:08.745243: step: 712/466, loss: 0.0017119685653597116 2023-01-22 13:47:09.361406: step: 714/466, loss: 0.07867859303951263 2023-01-22 13:47:09.967527: step: 716/466, loss: 0.06292006373405457 2023-01-22 13:47:10.582195: step: 718/466, loss: 0.01796099729835987 2023-01-22 13:47:11.202334: step: 720/466, loss: 0.011473596096038818 2023-01-22 13:47:11.789797: step: 722/466, loss: 0.004189202096313238 2023-01-22 13:47:12.439684: step: 724/466, loss: 0.006237534806132317 2023-01-22 13:47:13.065817: step: 726/466, loss: 0.014844987541437149 2023-01-22 13:47:13.651641: step: 728/466, loss: 0.004478269722312689 2023-01-22 13:47:14.193596: step: 730/466, loss: 0.0007747714407742023 2023-01-22 13:47:14.786847: step: 732/466, loss: 0.020828496664762497 2023-01-22 13:47:15.383319: step: 734/466, loss: 0.0076482766307890415 2023-01-22 13:47:15.975951: step: 736/466, loss: 0.006298763677477837 2023-01-22 13:47:16.558620: step: 738/466, loss: 0.00467700744047761 2023-01-22 13:47:17.199768: step: 740/466, loss: 0.0018371690530329943 2023-01-22 13:47:17.794569: step: 742/466, loss: 0.01124525535851717 2023-01-22 13:47:18.437252: step: 744/466, loss: 0.0011776175815612078 2023-01-22 13:47:18.953368: step: 746/466, loss: 0.0012847530888393521 2023-01-22 13:47:19.496639: step: 748/466, loss: 0.030908742919564247 2023-01-22 13:47:20.086316: step: 750/466, loss: 0.03198450431227684 2023-01-22 13:47:20.704805: step: 752/466, loss: 0.003392572049051523 2023-01-22 13:47:21.342097: step: 754/466, loss: 0.008934213779866695 2023-01-22 13:47:21.942678: step: 756/466, loss: 0.0018937125569209456 2023-01-22 13:47:22.594386: step: 758/466, loss: 0.0012889329809695482 2023-01-22 13:47:23.219448: step: 760/466, loss: 0.0003134472935926169 2023-01-22 13:47:23.818228: step: 762/466, loss: 0.0072999438270926476 2023-01-22 13:47:24.468028: step: 764/466, loss: 0.0006366133457049727 2023-01-22 13:47:25.061579: step: 766/466, loss: 0.10606065392494202 2023-01-22 13:47:25.712578: step: 768/466, loss: 0.2799156904220581 2023-01-22 13:47:26.305269: step: 770/466, loss: 0.037246257066726685 2023-01-22 13:47:26.869049: step: 772/466, loss: 0.0026118732057511806 2023-01-22 13:47:27.411606: step: 774/466, loss: 0.0012117475271224976 2023-01-22 13:47:28.006689: step: 776/466, loss: 0.001340881921350956 2023-01-22 13:47:28.618470: step: 778/466, loss: 0.0028708649333566427 2023-01-22 13:47:29.199273: step: 780/466, loss: 0.0027547755744308233 2023-01-22 13:47:29.813847: step: 782/466, loss: 0.019336048513650894 2023-01-22 13:47:30.481287: step: 784/466, loss: 0.0019793594256043434 2023-01-22 13:47:31.113617: step: 786/466, loss: 0.0011085917940363288 2023-01-22 13:47:31.692217: step: 788/466, loss: 0.0020195310935378075 2023-01-22 13:47:32.292796: step: 790/466, loss: 0.005421169102191925 2023-01-22 13:47:32.911809: step: 792/466, loss: 0.1041063740849495 2023-01-22 13:47:33.534941: step: 794/466, loss: 1.3896070413466077e-05 2023-01-22 13:47:34.137867: step: 796/466, loss: 0.0036372009199112654 2023-01-22 13:47:34.759240: step: 798/466, loss: 0.0005540642305277288 2023-01-22 13:47:35.302037: step: 800/466, loss: 0.0004064899403601885 2023-01-22 13:47:35.920593: step: 802/466, loss: 0.0005614913534373045 2023-01-22 13:47:36.500142: step: 804/466, loss: 0.061234891414642334 2023-01-22 13:47:37.081034: step: 806/466, loss: 0.00150016276165843 2023-01-22 13:47:37.692579: step: 808/466, loss: 0.12199815362691879 2023-01-22 13:47:38.350599: step: 810/466, loss: 0.021360736340284348 2023-01-22 13:47:39.079894: step: 812/466, loss: 0.013401246629655361 2023-01-22 13:47:39.694597: step: 814/466, loss: 0.35365334153175354 2023-01-22 13:47:40.323990: step: 816/466, loss: 0.3945031464099884 2023-01-22 13:47:40.894562: step: 818/466, loss: 0.008412548340857029 2023-01-22 13:47:41.492621: step: 820/466, loss: 0.4514215886592865 2023-01-22 13:47:42.132964: step: 822/466, loss: 0.0017607983900234103 2023-01-22 13:47:42.726550: step: 824/466, loss: 0.013890708796679974 2023-01-22 13:47:43.304064: step: 826/466, loss: 0.31904810667037964 2023-01-22 13:47:43.910796: step: 828/466, loss: 0.006622544955462217 2023-01-22 13:47:44.430498: step: 830/466, loss: 0.008045758120715618 2023-01-22 13:47:45.039803: step: 832/466, loss: 0.055824968963861465 2023-01-22 13:47:45.578565: step: 834/466, loss: 0.0005027701845392585 2023-01-22 13:47:46.248947: step: 836/466, loss: 0.012430011294782162 2023-01-22 13:47:46.940086: step: 838/466, loss: 0.0092149768024683 2023-01-22 13:47:47.578213: step: 840/466, loss: 0.12487414479255676 2023-01-22 13:47:48.163386: step: 842/466, loss: 0.0024845825973898172 2023-01-22 13:47:48.802379: step: 844/466, loss: 0.004382827784866095 2023-01-22 13:47:49.410997: step: 846/466, loss: 0.009878009557723999 2023-01-22 13:47:50.040155: step: 848/466, loss: 0.5359202027320862 2023-01-22 13:47:50.626668: step: 850/466, loss: 0.00421258294954896 2023-01-22 13:47:51.204431: step: 852/466, loss: 0.0002831450547091663 2023-01-22 13:47:51.837069: step: 854/466, loss: 0.02538694068789482 2023-01-22 13:47:52.422249: step: 856/466, loss: 0.007417216431349516 2023-01-22 13:47:53.084507: step: 858/466, loss: 0.00018170750990975648 2023-01-22 13:47:53.702784: step: 860/466, loss: 0.005004175007343292 2023-01-22 13:47:54.300888: step: 862/466, loss: 0.00015706718841101974 2023-01-22 13:47:54.886303: step: 864/466, loss: 0.037154652178287506 2023-01-22 13:47:55.515569: step: 866/466, loss: 0.13900421559810638 2023-01-22 13:47:56.090795: step: 868/466, loss: 0.01564154215157032 2023-01-22 13:47:56.701108: step: 870/466, loss: 0.021798204630613327 2023-01-22 13:47:57.238641: step: 872/466, loss: 0.07405062019824982 2023-01-22 13:47:57.798916: step: 874/466, loss: 0.000820789544377476 2023-01-22 13:47:58.445259: step: 876/466, loss: 0.0032635561656206846 2023-01-22 13:47:59.107378: step: 878/466, loss: 0.009002278558909893 2023-01-22 13:47:59.666559: step: 880/466, loss: 0.007568451575934887 2023-01-22 13:48:00.354985: step: 882/466, loss: 0.005844644736498594 2023-01-22 13:48:00.956396: step: 884/466, loss: 0.03638865426182747 2023-01-22 13:48:01.635408: step: 886/466, loss: 0.026529058814048767 2023-01-22 13:48:02.283732: step: 888/466, loss: 0.013912210240960121 2023-01-22 13:48:02.864227: step: 890/466, loss: 0.025714127346873283 2023-01-22 13:48:03.509219: step: 892/466, loss: 0.012615867890417576 2023-01-22 13:48:04.176594: step: 894/466, loss: 0.001323892269283533 2023-01-22 13:48:04.788588: step: 896/466, loss: 0.0011425853008404374 2023-01-22 13:48:05.346039: step: 898/466, loss: 0.010848517529666424 2023-01-22 13:48:05.921154: step: 900/466, loss: 0.006292147561907768 2023-01-22 13:48:06.514531: step: 902/466, loss: 0.009328721091151237 2023-01-22 13:48:07.178580: step: 904/466, loss: 0.03581482172012329 2023-01-22 13:48:07.806460: step: 906/466, loss: 0.012340017594397068 2023-01-22 13:48:08.325192: step: 908/466, loss: 0.12274104356765747 2023-01-22 13:48:08.948802: step: 910/466, loss: 0.00909186527132988 2023-01-22 13:48:09.472159: step: 912/466, loss: 0.00010829462553374469 2023-01-22 13:48:10.115518: step: 914/466, loss: 0.11186635494232178 2023-01-22 13:48:10.752802: step: 916/466, loss: 0.08896740525960922 2023-01-22 13:48:11.340759: step: 918/466, loss: 0.017871228978037834 2023-01-22 13:48:11.961414: step: 920/466, loss: 0.0023724103812128305 2023-01-22 13:48:12.525852: step: 922/466, loss: 0.4413450360298157 2023-01-22 13:48:13.136779: step: 924/466, loss: 0.0022770024370402098 2023-01-22 13:48:13.756790: step: 926/466, loss: 0.0038027584087103605 2023-01-22 13:48:14.331430: step: 928/466, loss: 0.016884373500943184 2023-01-22 13:48:14.942525: step: 930/466, loss: 0.014772958122193813 2023-01-22 13:48:15.554714: step: 932/466, loss: 0.015412840060889721 ================================================== Loss: 0.057 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2983888846660586, 'r': 0.33858928468748206, 'f1': 0.31722053872053874}, 'combined': 0.23374144958355483, 'epoch': 34} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.35837237870141786, 'r': 0.32480410732262066, 'f1': 0.34076354480708065}, 'combined': 0.22599862039018817, 'epoch': 34} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2983455882352941, 'r': 0.2881747159090909, 'f1': 0.2931719653179191}, 'combined': 0.19544797687861273, 'epoch': 34} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3592525436017694, 'r': 0.29948088990026184, 'f1': 0.32665495930520055}, 'combined': 0.2131853418623414, 'epoch': 34} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29078955292190584, 'r': 0.3470714018745328, 'f1': 0.31644745465030927}, 'combined': 0.23317180868970155, 'epoch': 34} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.33926273802111745, 'r': 0.3062788607135088, 'f1': 0.3219281455674837}, 'combined': 0.21350674939190628, 'epoch': 34} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2829861111111111, 'r': 0.38809523809523805, 'f1': 0.3273092369477911}, 'combined': 0.21820615796519408, 'epoch': 34} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42391304347826086, 'r': 0.42391304347826086, 'f1': 0.4239130434782609}, 'combined': 0.28260869565217395, 'epoch': 34} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3375, 'r': 0.23275862068965517, 'f1': 0.2755102040816327}, 'combined': 0.1836734693877551, 'epoch': 34} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29515095352204085, 'r': 0.3399556523489161, 'f1': 0.3159728902784459}, 'combined': 0.23282212967885485, 'epoch': 26} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32557465157002546, 'r': 0.326702182830874, 'f1': 0.3261374426704058}, 'combined': 0.2162984075741551, 'epoch': 26} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39473684210526316, 'r': 0.25862068965517243, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 26} ****************************** Epoch: 35 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:50:43.083538: step: 2/466, loss: 0.002012265380471945 2023-01-22 13:50:43.667706: step: 4/466, loss: 0.012894488871097565 2023-01-22 13:50:44.254271: step: 6/466, loss: 0.00037483443156816065 2023-01-22 13:50:44.797590: step: 8/466, loss: 0.10634152591228485 2023-01-22 13:50:45.422878: step: 10/466, loss: 0.0006054774858057499 2023-01-22 13:50:46.091443: step: 12/466, loss: 0.09973970800638199 2023-01-22 13:50:46.705600: step: 14/466, loss: 0.006195437163114548 2023-01-22 13:50:47.291427: step: 16/466, loss: 0.012353341095149517 2023-01-22 13:50:47.901581: step: 18/466, loss: 0.007871192879974842 2023-01-22 13:50:48.498857: step: 20/466, loss: 0.03719558194279671 2023-01-22 13:50:49.107565: step: 22/466, loss: 0.8608661890029907 2023-01-22 13:50:49.673601: step: 24/466, loss: 0.009443704038858414 2023-01-22 13:50:50.266897: step: 26/466, loss: 0.024249430745840073 2023-01-22 13:50:50.901004: step: 28/466, loss: 0.0005519503611139953 2023-01-22 13:50:51.515273: step: 30/466, loss: 0.0016365292249247432 2023-01-22 13:50:52.076147: step: 32/466, loss: 0.08336590975522995 2023-01-22 13:50:52.665233: step: 34/466, loss: 0.013230552896857262 2023-01-22 13:50:53.301737: step: 36/466, loss: 0.05108135566115379 2023-01-22 13:50:53.860571: step: 38/466, loss: 0.002142277080565691 2023-01-22 13:50:54.469870: step: 40/466, loss: 0.013755310326814651 2023-01-22 13:50:55.172938: step: 42/466, loss: 0.02609098330140114 2023-01-22 13:50:55.815198: step: 44/466, loss: 0.005456157959997654 2023-01-22 13:50:56.424490: step: 46/466, loss: 0.0007077806512825191 2023-01-22 13:50:57.114875: step: 48/466, loss: 1.049682855606079 2023-01-22 13:50:57.731394: step: 50/466, loss: 0.006461585871875286 2023-01-22 13:50:58.335940: step: 52/466, loss: 0.004847324453294277 2023-01-22 13:50:59.008389: step: 54/466, loss: 0.003220370737835765 2023-01-22 13:50:59.588445: step: 56/466, loss: 0.004356561228632927 2023-01-22 13:51:00.158326: step: 58/466, loss: 0.01184358075261116 2023-01-22 13:51:00.684874: step: 60/466, loss: 0.0025993327144533396 2023-01-22 13:51:01.296000: step: 62/466, loss: 0.0036130023654550314 2023-01-22 13:51:01.844154: step: 64/466, loss: 0.0017474743071943521 2023-01-22 13:51:02.496024: step: 66/466, loss: 0.008417025208473206 2023-01-22 13:51:03.084866: step: 68/466, loss: 0.0027821469120681286 2023-01-22 13:51:03.621377: step: 70/466, loss: 0.0111384941264987 2023-01-22 13:51:04.272627: step: 72/466, loss: 0.021571749821305275 2023-01-22 13:51:04.850527: step: 74/466, loss: 0.0024087431374937296 2023-01-22 13:51:05.447807: step: 76/466, loss: 0.0031457082368433475 2023-01-22 13:51:06.007628: step: 78/466, loss: 0.0029580495320260525 2023-01-22 13:51:06.605312: step: 80/466, loss: 0.0061109671369194984 2023-01-22 13:51:07.227349: step: 82/466, loss: 0.011633175425231457 2023-01-22 13:51:07.829362: step: 84/466, loss: 0.000648948538582772 2023-01-22 13:51:08.416078: step: 86/466, loss: 0.022715579718351364 2023-01-22 13:51:09.014822: step: 88/466, loss: 0.04326004534959793 2023-01-22 13:51:09.604955: step: 90/466, loss: 0.0031368292402476072 2023-01-22 13:51:10.212322: step: 92/466, loss: 0.008613396435976028 2023-01-22 13:51:10.818604: step: 94/466, loss: 0.00023159265401773155 2023-01-22 13:51:11.455973: step: 96/466, loss: 0.009789356961846352 2023-01-22 13:51:12.070426: step: 98/466, loss: 0.011413405649363995 2023-01-22 13:51:12.647937: step: 100/466, loss: 0.002103513339534402 2023-01-22 13:51:13.264711: step: 102/466, loss: 0.07808516919612885 2023-01-22 13:51:13.862730: step: 104/466, loss: 0.007273864466696978 2023-01-22 13:51:14.667036: step: 106/466, loss: 0.0027130546513944864 2023-01-22 13:51:15.278569: step: 108/466, loss: 0.005673782899975777 2023-01-22 13:51:15.973340: step: 110/466, loss: 0.003928873222321272 2023-01-22 13:51:16.591016: step: 112/466, loss: 0.0015045427717268467 2023-01-22 13:51:17.143778: step: 114/466, loss: 0.36543506383895874 2023-01-22 13:51:17.709126: step: 116/466, loss: 0.009011466056108475 2023-01-22 13:51:18.303728: step: 118/466, loss: 0.0004968467983417213 2023-01-22 13:51:18.974904: step: 120/466, loss: 0.012009035795927048 2023-01-22 13:51:19.594800: step: 122/466, loss: 0.014121904037892818 2023-01-22 13:51:20.173172: step: 124/466, loss: 0.02587854117155075 2023-01-22 13:51:20.802484: step: 126/466, loss: 0.011958899907767773 2023-01-22 13:51:21.454103: step: 128/466, loss: 0.008790363557636738 2023-01-22 13:51:22.071770: step: 130/466, loss: 0.02592388354241848 2023-01-22 13:51:22.695236: step: 132/466, loss: 0.024880820885300636 2023-01-22 13:51:23.323373: step: 134/466, loss: 0.07368521392345428 2023-01-22 13:51:23.872166: step: 136/466, loss: 0.20551887154579163 2023-01-22 13:51:24.456151: step: 138/466, loss: 0.12554225325584412 2023-01-22 13:51:25.006002: step: 140/466, loss: 0.7651264667510986 2023-01-22 13:51:25.600775: step: 142/466, loss: 0.012582586146891117 2023-01-22 13:51:26.248162: step: 144/466, loss: 0.0031956112943589687 2023-01-22 13:51:26.858740: step: 146/466, loss: 0.03728998452425003 2023-01-22 13:51:27.513304: step: 148/466, loss: 0.010229956358671188 2023-01-22 13:51:28.137834: step: 150/466, loss: 0.004200585186481476 2023-01-22 13:51:28.706922: step: 152/466, loss: 0.02673015184700489 2023-01-22 13:51:29.312820: step: 154/466, loss: 0.012018535286188126 2023-01-22 13:51:29.988528: step: 156/466, loss: 0.006779039278626442 2023-01-22 13:51:30.602340: step: 158/466, loss: 0.01809326931834221 2023-01-22 13:51:31.227573: step: 160/466, loss: 0.011561712250113487 2023-01-22 13:51:31.847137: step: 162/466, loss: 0.019324587658047676 2023-01-22 13:51:32.450526: step: 164/466, loss: 0.008751314133405685 2023-01-22 13:51:33.052560: step: 166/466, loss: 0.009230945259332657 2023-01-22 13:51:33.664823: step: 168/466, loss: 0.00518200034275651 2023-01-22 13:51:34.361879: step: 170/466, loss: 0.004224380478262901 2023-01-22 13:51:34.941968: step: 172/466, loss: 0.009996664710342884 2023-01-22 13:51:35.463965: step: 174/466, loss: 0.0077126771211624146 2023-01-22 13:51:36.073945: step: 176/466, loss: 0.019181014969944954 2023-01-22 13:51:36.674856: step: 178/466, loss: 0.004100507125258446 2023-01-22 13:51:37.276308: step: 180/466, loss: 0.010374164208769798 2023-01-22 13:51:37.887926: step: 182/466, loss: 0.032318126410245895 2023-01-22 13:51:38.511327: step: 184/466, loss: 0.006364541593939066 2023-01-22 13:51:39.083710: step: 186/466, loss: 0.014697098173201084 2023-01-22 13:51:39.717620: step: 188/466, loss: 0.0025067031383514404 2023-01-22 13:51:40.345656: step: 190/466, loss: 0.01777309738099575 2023-01-22 13:51:41.031937: step: 192/466, loss: 0.008049978874623775 2023-01-22 13:51:41.635704: step: 194/466, loss: 0.006283496040850878 2023-01-22 13:51:42.290042: step: 196/466, loss: 0.0018434731755405664 2023-01-22 13:51:42.950807: step: 198/466, loss: 0.009735134430229664 2023-01-22 13:51:43.540780: step: 200/466, loss: 0.028072571381926537 2023-01-22 13:51:44.081609: step: 202/466, loss: 0.013600992038846016 2023-01-22 13:51:44.785497: step: 204/466, loss: 0.21987058222293854 2023-01-22 13:51:45.449564: step: 206/466, loss: 0.034088876098394394 2023-01-22 13:51:46.128911: step: 208/466, loss: 0.010856841690838337 2023-01-22 13:51:46.726622: step: 210/466, loss: 0.033877093344926834 2023-01-22 13:51:47.357862: step: 212/466, loss: 0.05989128351211548 2023-01-22 13:51:48.095002: step: 214/466, loss: 0.24762582778930664 2023-01-22 13:51:48.678275: step: 216/466, loss: 0.008403414860367775 2023-01-22 13:51:49.227463: step: 218/466, loss: 0.003733893157914281 2023-01-22 13:51:49.824659: step: 220/466, loss: 0.0018547578947618604 2023-01-22 13:51:50.464944: step: 222/466, loss: 0.0006512874970212579 2023-01-22 13:51:51.023749: step: 224/466, loss: 0.08464835584163666 2023-01-22 13:51:51.659265: step: 226/466, loss: 0.013823796063661575 2023-01-22 13:51:52.287781: step: 228/466, loss: 0.05559268966317177 2023-01-22 13:51:52.928895: step: 230/466, loss: 0.025688299909234047 2023-01-22 13:51:53.477397: step: 232/466, loss: 0.0025740419514477253 2023-01-22 13:51:53.994845: step: 234/466, loss: 0.0037856039125472307 2023-01-22 13:51:54.586817: step: 236/466, loss: 5.876278877258301 2023-01-22 13:51:55.192492: step: 238/466, loss: 0.004950941540300846 2023-01-22 13:51:55.847485: step: 240/466, loss: 0.030039146542549133 2023-01-22 13:51:56.437940: step: 242/466, loss: 0.0005290998960845172 2023-01-22 13:51:57.025936: step: 244/466, loss: 0.002077216748148203 2023-01-22 13:51:57.574297: step: 246/466, loss: 0.008934388868510723 2023-01-22 13:51:58.162239: step: 248/466, loss: 0.011586735025048256 2023-01-22 13:51:58.778521: step: 250/466, loss: 0.02876337245106697 2023-01-22 13:51:59.413062: step: 252/466, loss: 0.09464550018310547 2023-01-22 13:52:00.028428: step: 254/466, loss: 0.0040171486325562 2023-01-22 13:52:00.647829: step: 256/466, loss: 0.004150137770920992 2023-01-22 13:52:01.245034: step: 258/466, loss: 0.0008456672076135874 2023-01-22 13:52:01.830806: step: 260/466, loss: 0.0011844084365293384 2023-01-22 13:52:02.378681: step: 262/466, loss: 0.0007199643296189606 2023-01-22 13:52:02.975120: step: 264/466, loss: 0.0002797591732814908 2023-01-22 13:52:03.581263: step: 266/466, loss: 0.0007250283961184323 2023-01-22 13:52:04.251518: step: 268/466, loss: 0.017860015854239464 2023-01-22 13:52:04.903618: step: 270/466, loss: 0.025834210216999054 2023-01-22 13:52:05.521540: step: 272/466, loss: 0.05327354371547699 2023-01-22 13:52:06.108267: step: 274/466, loss: 0.023485787212848663 2023-01-22 13:52:06.694191: step: 276/466, loss: 0.00020847993437200785 2023-01-22 13:52:07.275232: step: 278/466, loss: 0.02168998494744301 2023-01-22 13:52:07.895342: step: 280/466, loss: 0.000491889426484704 2023-01-22 13:52:08.446882: step: 282/466, loss: 0.006376080680638552 2023-01-22 13:52:09.018459: step: 284/466, loss: 0.06472522020339966 2023-01-22 13:52:09.653867: step: 286/466, loss: 0.0011324502993375063 2023-01-22 13:52:10.255482: step: 288/466, loss: 0.00033992258249782026 2023-01-22 13:52:10.865632: step: 290/466, loss: 0.006721579935401678 2023-01-22 13:52:11.508512: step: 292/466, loss: 0.001619403250515461 2023-01-22 13:52:12.052855: step: 294/466, loss: 0.0036933289375156164 2023-01-22 13:52:12.627361: step: 296/466, loss: 0.00347943720407784 2023-01-22 13:52:13.274059: step: 298/466, loss: 0.04912320151925087 2023-01-22 13:52:13.914504: step: 300/466, loss: 0.0557754747569561 2023-01-22 13:52:14.543895: step: 302/466, loss: 0.0028693275526165962 2023-01-22 13:52:15.091651: step: 304/466, loss: 0.08447981625795364 2023-01-22 13:52:15.687105: step: 306/466, loss: 0.012897913344204426 2023-01-22 13:52:16.260461: step: 308/466, loss: 0.003185515059158206 2023-01-22 13:52:16.872887: step: 310/466, loss: 0.03757956251502037 2023-01-22 13:52:17.485718: step: 312/466, loss: 0.053706299513578415 2023-01-22 13:52:18.036127: step: 314/466, loss: 0.0005277339951135218 2023-01-22 13:52:18.686472: step: 316/466, loss: 0.006508746184408665 2023-01-22 13:52:19.309067: step: 318/466, loss: 0.04517892748117447 2023-01-22 13:52:19.907694: step: 320/466, loss: 0.01487236749380827 2023-01-22 13:52:20.591693: step: 322/466, loss: 0.013757728040218353 2023-01-22 13:52:21.188314: step: 324/466, loss: 0.0076672472059726715 2023-01-22 13:52:21.857743: step: 326/466, loss: 0.0007945263059809804 2023-01-22 13:52:22.473617: step: 328/466, loss: 0.006535803899168968 2023-01-22 13:52:23.082922: step: 330/466, loss: 0.0017893729964271188 2023-01-22 13:52:23.689688: step: 332/466, loss: 0.0004827079246751964 2023-01-22 13:52:24.310878: step: 334/466, loss: 0.00041103153489530087 2023-01-22 13:52:24.984051: step: 336/466, loss: 0.004424719139933586 2023-01-22 13:52:25.595462: step: 338/466, loss: 0.042862024158239365 2023-01-22 13:52:26.184225: step: 340/466, loss: 0.0029731725808233023 2023-01-22 13:52:26.815196: step: 342/466, loss: 0.0063887243159115314 2023-01-22 13:52:27.476504: step: 344/466, loss: 0.0518309660255909 2023-01-22 13:52:28.055137: step: 346/466, loss: 0.4405127763748169 2023-01-22 13:52:28.667928: step: 348/466, loss: 0.037538956850767136 2023-01-22 13:52:29.229249: step: 350/466, loss: 0.005816600285470486 2023-01-22 13:52:29.794640: step: 352/466, loss: 0.0004314797988627106 2023-01-22 13:52:30.400528: step: 354/466, loss: 0.013126222416758537 2023-01-22 13:52:31.009681: step: 356/466, loss: 0.016857674345374107 2023-01-22 13:52:31.537593: step: 358/466, loss: 0.012838209047913551 2023-01-22 13:52:32.121514: step: 360/466, loss: 0.0008656500140205026 2023-01-22 13:52:32.775791: step: 362/466, loss: 0.16545487940311432 2023-01-22 13:52:33.368294: step: 364/466, loss: 0.006057723890990019 2023-01-22 13:52:33.966708: step: 366/466, loss: 0.0021400887053459883 2023-01-22 13:52:34.628999: step: 368/466, loss: 0.24039912223815918 2023-01-22 13:52:35.353420: step: 370/466, loss: 0.004498835653066635 2023-01-22 13:52:36.001107: step: 372/466, loss: 0.0002478906826581806 2023-01-22 13:52:36.644266: step: 374/466, loss: 0.009848754853010178 2023-01-22 13:52:37.267361: step: 376/466, loss: 0.007356069982051849 2023-01-22 13:52:37.927475: step: 378/466, loss: 0.08883567154407501 2023-01-22 13:52:38.545567: step: 380/466, loss: 0.12306622415781021 2023-01-22 13:52:39.108795: step: 382/466, loss: 0.0016421453328803182 2023-01-22 13:52:39.619904: step: 384/466, loss: 0.021117009222507477 2023-01-22 13:52:40.174538: step: 386/466, loss: 0.005438767373561859 2023-01-22 13:52:40.801629: step: 388/466, loss: 0.0004887752002105117 2023-01-22 13:52:41.385522: step: 390/466, loss: 0.0034964196383953094 2023-01-22 13:52:41.966426: step: 392/466, loss: 0.003222419647499919 2023-01-22 13:52:42.618144: step: 394/466, loss: 0.0004024989320896566 2023-01-22 13:52:43.233833: step: 396/466, loss: 0.0022491118870675564 2023-01-22 13:52:43.876628: step: 398/466, loss: 0.08100035041570663 2023-01-22 13:52:44.418859: step: 400/466, loss: 0.004757395014166832 2023-01-22 13:52:45.011702: step: 402/466, loss: 0.01630372554063797 2023-01-22 13:52:45.608236: step: 404/466, loss: 0.0118543840944767 2023-01-22 13:52:46.174915: step: 406/466, loss: 0.0010891692945733666 2023-01-22 13:52:46.754853: step: 408/466, loss: 0.05898061767220497 2023-01-22 13:52:47.386146: step: 410/466, loss: 0.0320536345243454 2023-01-22 13:52:48.046885: step: 412/466, loss: 0.015358486212790012 2023-01-22 13:52:48.633126: step: 414/466, loss: 0.021513421088457108 2023-01-22 13:52:49.202372: step: 416/466, loss: 0.004470662213861942 2023-01-22 13:52:49.785292: step: 418/466, loss: 0.01665816269814968 2023-01-22 13:52:50.371491: step: 420/466, loss: 0.012034298852086067 2023-01-22 13:52:51.007973: step: 422/466, loss: 0.05559059977531433 2023-01-22 13:52:51.613544: step: 424/466, loss: 0.004251624457538128 2023-01-22 13:52:52.205467: step: 426/466, loss: 0.005079424940049648 2023-01-22 13:52:52.773059: step: 428/466, loss: 0.00046944443602114916 2023-01-22 13:52:53.397004: step: 430/466, loss: 0.0004140078090131283 2023-01-22 13:52:53.915340: step: 432/466, loss: 0.0032719718292355537 2023-01-22 13:52:54.539481: step: 434/466, loss: 0.0004585221759043634 2023-01-22 13:52:55.165316: step: 436/466, loss: 0.3552585542201996 2023-01-22 13:52:55.735576: step: 438/466, loss: 0.009487979114055634 2023-01-22 13:52:56.340814: step: 440/466, loss: 0.010056398808956146 2023-01-22 13:52:56.934612: step: 442/466, loss: 0.0001256998657481745 2023-01-22 13:52:57.605695: step: 444/466, loss: 0.00877401977777481 2023-01-22 13:52:58.263451: step: 446/466, loss: 0.046221181750297546 2023-01-22 13:52:58.898906: step: 448/466, loss: 0.0017273180419579148 2023-01-22 13:52:59.510532: step: 450/466, loss: 0.006932374089956284 2023-01-22 13:53:00.141290: step: 452/466, loss: 0.0431647002696991 2023-01-22 13:53:00.756244: step: 454/466, loss: 5.422514004749246e-05 2023-01-22 13:53:01.318413: step: 456/466, loss: 0.0026062550023198128 2023-01-22 13:53:01.910659: step: 458/466, loss: 0.09274192899465561 2023-01-22 13:53:02.508953: step: 460/466, loss: 0.0007122901733964682 2023-01-22 13:53:03.148105: step: 462/466, loss: 0.0017089269822463393 2023-01-22 13:53:03.749006: step: 464/466, loss: 0.010782505385577679 2023-01-22 13:53:04.306834: step: 466/466, loss: 0.0005593986716121435 2023-01-22 13:53:04.891442: step: 468/466, loss: 0.000961803481914103 2023-01-22 13:53:05.469158: step: 470/466, loss: 0.015021142549812794 2023-01-22 13:53:06.020790: step: 472/466, loss: 0.002034959616139531 2023-01-22 13:53:06.587950: step: 474/466, loss: 0.013840865343809128 2023-01-22 13:53:07.242575: step: 476/466, loss: 0.0044415052980184555 2023-01-22 13:53:07.808586: step: 478/466, loss: 0.0029340493492782116 2023-01-22 13:53:08.459700: step: 480/466, loss: 0.02091211825609207 2023-01-22 13:53:09.078883: step: 482/466, loss: 0.010680991224944592 2023-01-22 13:53:09.699513: step: 484/466, loss: 0.03337204456329346 2023-01-22 13:53:10.374062: step: 486/466, loss: 0.0021719844080507755 2023-01-22 13:53:10.996310: step: 488/466, loss: 0.0014358946355059743 2023-01-22 13:53:11.529293: step: 490/466, loss: 0.0053126271814107895 2023-01-22 13:53:12.245583: step: 492/466, loss: 0.005152451805770397 2023-01-22 13:53:12.842737: step: 494/466, loss: 0.016728512942790985 2023-01-22 13:53:13.499532: step: 496/466, loss: 0.0023070168681442738 2023-01-22 13:53:14.149833: step: 498/466, loss: 0.0012790559558197856 2023-01-22 13:53:14.804310: step: 500/466, loss: 0.0027140311431139708 2023-01-22 13:53:15.432220: step: 502/466, loss: 0.00013023210340179503 2023-01-22 13:53:16.113687: step: 504/466, loss: 0.0019716620445251465 2023-01-22 13:53:16.730803: step: 506/466, loss: 0.004332349635660648 2023-01-22 13:53:17.372422: step: 508/466, loss: 0.0004268595075700432 2023-01-22 13:53:18.057833: step: 510/466, loss: 0.018236014991998672 2023-01-22 13:53:18.743962: step: 512/466, loss: 0.0096100103110075 2023-01-22 13:53:19.359743: step: 514/466, loss: 0.26996174454689026 2023-01-22 13:53:20.035071: step: 516/466, loss: 0.07348065078258514 2023-01-22 13:53:20.701949: step: 518/466, loss: 0.011505013331770897 2023-01-22 13:53:21.302650: step: 520/466, loss: 0.02061111107468605 2023-01-22 13:53:21.932940: step: 522/466, loss: 0.04129303619265556 2023-01-22 13:53:22.551233: step: 524/466, loss: 0.0015660661738365889 2023-01-22 13:53:23.174003: step: 526/466, loss: 0.006148469168692827 2023-01-22 13:53:23.736786: step: 528/466, loss: 0.01998778060078621 2023-01-22 13:53:24.336158: step: 530/466, loss: 0.022510893642902374 2023-01-22 13:53:24.945161: step: 532/466, loss: 0.0020203834865242243 2023-01-22 13:53:25.583988: step: 534/466, loss: 0.016820572316646576 2023-01-22 13:53:26.268109: step: 536/466, loss: 0.07796097546815872 2023-01-22 13:53:26.824836: step: 538/466, loss: 0.008878006599843502 2023-01-22 13:53:27.373875: step: 540/466, loss: 0.002800745191052556 2023-01-22 13:53:27.937994: step: 542/466, loss: 0.00031313998624682426 2023-01-22 13:53:28.578223: step: 544/466, loss: 2.1622219719574787e-05 2023-01-22 13:53:29.212913: step: 546/466, loss: 0.0022388698998838663 2023-01-22 13:53:29.864523: step: 548/466, loss: 0.03359275311231613 2023-01-22 13:53:30.470436: step: 550/466, loss: 0.011839710175991058 2023-01-22 13:53:31.033719: step: 552/466, loss: 0.0029783437494188547 2023-01-22 13:53:31.632179: step: 554/466, loss: 0.0022885308135300875 2023-01-22 13:53:32.263874: step: 556/466, loss: 0.0033760373480618 2023-01-22 13:53:32.859732: step: 558/466, loss: 0.0036777276545763016 2023-01-22 13:53:33.478969: step: 560/466, loss: 0.01203171443194151 2023-01-22 13:53:34.076570: step: 562/466, loss: 0.055107247084379196 2023-01-22 13:53:34.663981: step: 564/466, loss: 0.05906687304377556 2023-01-22 13:53:35.282286: step: 566/466, loss: 0.13570892810821533 2023-01-22 13:53:35.934495: step: 568/466, loss: 0.00559958815574646 2023-01-22 13:53:36.549762: step: 570/466, loss: 0.002212547929957509 2023-01-22 13:53:37.198468: step: 572/466, loss: 0.013021819293498993 2023-01-22 13:53:37.892121: step: 574/466, loss: 0.005140095017850399 2023-01-22 13:53:38.507804: step: 576/466, loss: 0.00024036553804762661 2023-01-22 13:53:39.131524: step: 578/466, loss: 0.00848733726888895 2023-01-22 13:53:39.788098: step: 580/466, loss: 0.0004640131664928049 2023-01-22 13:53:40.354656: step: 582/466, loss: 0.0006688210996799171 2023-01-22 13:53:40.906927: step: 584/466, loss: 0.006631654687225819 2023-01-22 13:53:41.629111: step: 586/466, loss: 0.034514736384153366 2023-01-22 13:53:42.221105: step: 588/466, loss: 0.01949184387922287 2023-01-22 13:53:42.814006: step: 590/466, loss: 0.0045889331959187984 2023-01-22 13:53:43.380682: step: 592/466, loss: 0.02588224969804287 2023-01-22 13:53:43.950459: step: 594/466, loss: 2.4295799448736943e-05 2023-01-22 13:53:44.528717: step: 596/466, loss: 0.03241288661956787 2023-01-22 13:53:45.171497: step: 598/466, loss: 0.028325794264674187 2023-01-22 13:53:45.834546: step: 600/466, loss: 0.0477851964533329 2023-01-22 13:53:46.449072: step: 602/466, loss: 0.009745490737259388 2023-01-22 13:53:47.079348: step: 604/466, loss: 0.015827316790819168 2023-01-22 13:53:47.621361: step: 606/466, loss: 0.002607752103358507 2023-01-22 13:53:48.157259: step: 608/466, loss: 0.003142754314467311 2023-01-22 13:53:48.738063: step: 610/466, loss: 0.1278313845396042 2023-01-22 13:53:49.348185: step: 612/466, loss: 0.8673977851867676 2023-01-22 13:53:49.952817: step: 614/466, loss: 0.019910240545868874 2023-01-22 13:53:50.551469: step: 616/466, loss: 0.00808005966246128 2023-01-22 13:53:51.189793: step: 618/466, loss: 0.09069032222032547 2023-01-22 13:53:51.831349: step: 620/466, loss: 0.12162695825099945 2023-01-22 13:53:52.449967: step: 622/466, loss: 0.004187727812677622 2023-01-22 13:53:53.091419: step: 624/466, loss: 0.008895082399249077 2023-01-22 13:53:53.760322: step: 626/466, loss: 0.02198082022368908 2023-01-22 13:53:54.379085: step: 628/466, loss: 0.09377451986074448 2023-01-22 13:53:54.974989: step: 630/466, loss: 0.007859280332922935 2023-01-22 13:53:55.572791: step: 632/466, loss: 0.0025520678609609604 2023-01-22 13:53:56.177241: step: 634/466, loss: 0.043739113956689835 2023-01-22 13:53:56.827765: step: 636/466, loss: 0.0031407184433192015 2023-01-22 13:53:57.481380: step: 638/466, loss: 0.008512577973306179 2023-01-22 13:53:58.104335: step: 640/466, loss: 0.008761793375015259 2023-01-22 13:53:58.615874: step: 642/466, loss: 0.0004297647101338953 2023-01-22 13:53:59.239945: step: 644/466, loss: 0.030514473095536232 2023-01-22 13:53:59.847638: step: 646/466, loss: 0.016970517113804817 2023-01-22 13:54:00.483619: step: 648/466, loss: 0.0021906227339059114 2023-01-22 13:54:01.009806: step: 650/466, loss: 0.0007351999520324171 2023-01-22 13:54:01.622820: step: 652/466, loss: 0.01548150647431612 2023-01-22 13:54:02.319478: step: 654/466, loss: 0.017431585118174553 2023-01-22 13:54:02.932728: step: 656/466, loss: 0.05731036514043808 2023-01-22 13:54:03.521605: step: 658/466, loss: 0.00026396213797852397 2023-01-22 13:54:04.088392: step: 660/466, loss: 0.012690886855125427 2023-01-22 13:54:04.630842: step: 662/466, loss: 2.1528674551518634e-05 2023-01-22 13:54:05.246803: step: 664/466, loss: 0.0011564485030248761 2023-01-22 13:54:05.915047: step: 666/466, loss: 0.03397271782159805 2023-01-22 13:54:06.564804: step: 668/466, loss: 0.02553057111799717 2023-01-22 13:54:07.180064: step: 670/466, loss: 0.048447057604789734 2023-01-22 13:54:07.754951: step: 672/466, loss: 0.01090525183826685 2023-01-22 13:54:08.380268: step: 674/466, loss: 0.01414660457521677 2023-01-22 13:54:09.016149: step: 676/466, loss: 0.005885283462703228 2023-01-22 13:54:09.617783: step: 678/466, loss: 0.022681541740894318 2023-01-22 13:54:10.203445: step: 680/466, loss: 0.011428939178586006 2023-01-22 13:54:10.747545: step: 682/466, loss: 0.007325511425733566 2023-01-22 13:54:11.329064: step: 684/466, loss: 0.004513971973210573 2023-01-22 13:54:11.980736: step: 686/466, loss: 0.0050977542996406555 2023-01-22 13:54:12.522577: step: 688/466, loss: 0.002478186273947358 2023-01-22 13:54:13.109728: step: 690/466, loss: 0.0032952444162219763 2023-01-22 13:54:13.737220: step: 692/466, loss: 0.0006835025269538164 2023-01-22 13:54:14.376548: step: 694/466, loss: 0.039983466267585754 2023-01-22 13:54:14.956776: step: 696/466, loss: 0.05446304753422737 2023-01-22 13:54:15.600373: step: 698/466, loss: 0.0005271573318168521 2023-01-22 13:54:16.154450: step: 700/466, loss: 0.026197724044322968 2023-01-22 13:54:16.730792: step: 702/466, loss: 0.000332408380927518 2023-01-22 13:54:17.363317: step: 704/466, loss: 0.010294831357896328 2023-01-22 13:54:17.986724: step: 706/466, loss: 0.013582558371126652 2023-01-22 13:54:18.552917: step: 708/466, loss: 0.00780984153971076 2023-01-22 13:54:19.195235: step: 710/466, loss: 0.0002171879168599844 2023-01-22 13:54:19.755017: step: 712/466, loss: 0.004038907587528229 2023-01-22 13:54:20.448105: step: 714/466, loss: 0.0026337308809161186 2023-01-22 13:54:21.014060: step: 716/466, loss: 0.01237061619758606 2023-01-22 13:54:21.571569: step: 718/466, loss: 0.0006351915071718395 2023-01-22 13:54:22.255497: step: 720/466, loss: 0.002086139051243663 2023-01-22 13:54:22.868012: step: 722/466, loss: 0.0002931247581727803 2023-01-22 13:54:23.468247: step: 724/466, loss: 0.07869864255189896 2023-01-22 13:54:24.100428: step: 726/466, loss: 0.024655615910887718 2023-01-22 13:54:24.747154: step: 728/466, loss: 0.00587571831420064 2023-01-22 13:54:25.375889: step: 730/466, loss: 0.0014530383050441742 2023-01-22 13:54:26.046878: step: 732/466, loss: 3.143577487207949e-05 2023-01-22 13:54:26.654881: step: 734/466, loss: 0.0005164192989468575 2023-01-22 13:54:27.257782: step: 736/466, loss: 0.0863901674747467 2023-01-22 13:54:27.930821: step: 738/466, loss: 0.02110672928392887 2023-01-22 13:54:28.544635: step: 740/466, loss: 0.05317772179841995 2023-01-22 13:54:29.135990: step: 742/466, loss: 0.008193924091756344 2023-01-22 13:54:29.790872: step: 744/466, loss: 0.00725803105160594 2023-01-22 13:54:30.417983: step: 746/466, loss: 0.016299206763505936 2023-01-22 13:54:31.006956: step: 748/466, loss: 0.07704475522041321 2023-01-22 13:54:31.645066: step: 750/466, loss: 0.0030760911758989096 2023-01-22 13:54:32.293620: step: 752/466, loss: 0.00893436186015606 2023-01-22 13:54:32.873364: step: 754/466, loss: 0.0026519685052335262 2023-01-22 13:54:33.461213: step: 756/466, loss: 0.007419214583933353 2023-01-22 13:54:34.025956: step: 758/466, loss: 0.012317190878093243 2023-01-22 13:54:34.693994: step: 760/466, loss: 0.006765678990632296 2023-01-22 13:54:35.273780: step: 762/466, loss: 0.04686688259243965 2023-01-22 13:54:35.876099: step: 764/466, loss: 0.02657747082412243 2023-01-22 13:54:36.498187: step: 766/466, loss: 0.003171471878886223 2023-01-22 13:54:37.153301: step: 768/466, loss: 0.000835277431178838 2023-01-22 13:54:37.751193: step: 770/466, loss: 0.028463780879974365 2023-01-22 13:54:38.250632: step: 772/466, loss: 0.0001282665180042386 2023-01-22 13:54:38.855392: step: 774/466, loss: 0.018418481573462486 2023-01-22 13:54:39.498216: step: 776/466, loss: 0.0020590792410075665 2023-01-22 13:54:40.124669: step: 778/466, loss: 0.00039314222522079945 2023-01-22 13:54:40.737064: step: 780/466, loss: 0.00898836925625801 2023-01-22 13:54:41.372875: step: 782/466, loss: 0.009828277863562107 2023-01-22 13:54:42.049686: step: 784/466, loss: 0.009791073389351368 2023-01-22 13:54:42.647636: step: 786/466, loss: 0.0013660689583048224 2023-01-22 13:54:43.312427: step: 788/466, loss: 0.020413346588611603 2023-01-22 13:54:43.979712: step: 790/466, loss: 0.004203976131975651 2023-01-22 13:54:44.614770: step: 792/466, loss: 0.005849914625287056 2023-01-22 13:54:45.238421: step: 794/466, loss: 0.05871148779988289 2023-01-22 13:54:45.905241: step: 796/466, loss: 0.006010339595377445 2023-01-22 13:54:46.492645: step: 798/466, loss: 0.0334501676261425 2023-01-22 13:54:47.076221: step: 800/466, loss: 0.009299539029598236 2023-01-22 13:54:47.685688: step: 802/466, loss: 0.014091639779508114 2023-01-22 13:54:48.318722: step: 804/466, loss: 0.014303319156169891 2023-01-22 13:54:48.915579: step: 806/466, loss: 0.004221655894070864 2023-01-22 13:54:49.470350: step: 808/466, loss: 0.01323504839092493 2023-01-22 13:54:50.084640: step: 810/466, loss: 0.11757577955722809 2023-01-22 13:54:50.703866: step: 812/466, loss: 0.001458548940718174 2023-01-22 13:54:51.353840: step: 814/466, loss: 0.00623718835413456 2023-01-22 13:54:51.955845: step: 816/466, loss: 0.006825688295066357 2023-01-22 13:54:52.590398: step: 818/466, loss: 0.005815485492348671 2023-01-22 13:54:53.197932: step: 820/466, loss: 0.0036692700814455748 2023-01-22 13:54:53.840820: step: 822/466, loss: 0.008228559046983719 2023-01-22 13:54:54.451611: step: 824/466, loss: 0.009341354481875896 2023-01-22 13:54:55.040820: step: 826/466, loss: 0.021602025255560875 2023-01-22 13:54:55.637973: step: 828/466, loss: 0.0003038942231796682 2023-01-22 13:54:56.243273: step: 830/466, loss: 0.041580308228731155 2023-01-22 13:54:56.841871: step: 832/466, loss: 0.005860107019543648 2023-01-22 13:54:57.423303: step: 834/466, loss: 0.04242083430290222 2023-01-22 13:54:58.081563: step: 836/466, loss: 0.005504989065229893 2023-01-22 13:54:58.704420: step: 838/466, loss: 0.004706955049186945 2023-01-22 13:54:59.265914: step: 840/466, loss: 0.0018675555475056171 2023-01-22 13:54:59.970980: step: 842/466, loss: 0.0537487268447876 2023-01-22 13:55:00.546516: step: 844/466, loss: 0.009648822247982025 2023-01-22 13:55:01.169297: step: 846/466, loss: 0.0014984318986535072 2023-01-22 13:55:01.747700: step: 848/466, loss: 0.004129413515329361 2023-01-22 13:55:02.410933: step: 850/466, loss: 0.06596551835536957 2023-01-22 13:55:02.962492: step: 852/466, loss: 0.00615499634295702 2023-01-22 13:55:03.561372: step: 854/466, loss: 0.3469690978527069 2023-01-22 13:55:04.156250: step: 856/466, loss: 0.0012979827588424087 2023-01-22 13:55:04.782608: step: 858/466, loss: 0.004978050012141466 2023-01-22 13:55:05.427944: step: 860/466, loss: 0.16628430783748627 2023-01-22 13:55:06.047862: step: 862/466, loss: 0.016617318615317345 2023-01-22 13:55:06.666567: step: 864/466, loss: 0.022998757660388947 2023-01-22 13:55:07.238531: step: 866/466, loss: 0.0009642437798902392 2023-01-22 13:55:07.812851: step: 868/466, loss: 0.007418345659971237 2023-01-22 13:55:08.507604: step: 870/466, loss: 0.01664545387029648 2023-01-22 13:55:09.145995: step: 872/466, loss: 0.06470434367656708 2023-01-22 13:55:09.733756: step: 874/466, loss: 0.012197468429803848 2023-01-22 13:55:10.367382: step: 876/466, loss: 0.18374070525169373 2023-01-22 13:55:10.982661: step: 878/466, loss: 0.6884382963180542 2023-01-22 13:55:11.635201: step: 880/466, loss: 0.012180290184915066 2023-01-22 13:55:12.332433: step: 882/466, loss: 0.0011859252117574215 2023-01-22 13:55:12.936705: step: 884/466, loss: 0.002707699779421091 2023-01-22 13:55:13.638928: step: 886/466, loss: 0.010159934870898724 2023-01-22 13:55:14.367644: step: 888/466, loss: 0.0010092303855344653 2023-01-22 13:55:14.900102: step: 890/466, loss: 0.013814612291753292 2023-01-22 13:55:15.612453: step: 892/466, loss: 0.04083891957998276 2023-01-22 13:55:16.201592: step: 894/466, loss: 0.032238803803920746 2023-01-22 13:55:16.822128: step: 896/466, loss: 0.012169080786406994 2023-01-22 13:55:17.423146: step: 898/466, loss: 0.003366566263139248 2023-01-22 13:55:18.011330: step: 900/466, loss: 0.00712932413443923 2023-01-22 13:55:18.595112: step: 902/466, loss: 0.0002415215567452833 2023-01-22 13:55:19.178941: step: 904/466, loss: 6.97071009199135e-05 2023-01-22 13:55:19.724799: step: 906/466, loss: 0.0115275327116251 2023-01-22 13:55:20.252832: step: 908/466, loss: 0.0026450790464878082 2023-01-22 13:55:20.882443: step: 910/466, loss: 0.020093899220228195 2023-01-22 13:55:21.510943: step: 912/466, loss: 0.006807427387684584 2023-01-22 13:55:22.109285: step: 914/466, loss: 0.0032413809094578028 2023-01-22 13:55:22.701959: step: 916/466, loss: 0.0001539701479487121 2023-01-22 13:55:23.340912: step: 918/466, loss: 0.009963742457330227 2023-01-22 13:55:23.934512: step: 920/466, loss: 0.004906130023300648 2023-01-22 13:55:24.564938: step: 922/466, loss: 0.023356501013040543 2023-01-22 13:55:25.140910: step: 924/466, loss: 0.17208455502986908 2023-01-22 13:55:25.881970: step: 926/466, loss: 0.0598161555826664 2023-01-22 13:55:26.490712: step: 928/466, loss: 0.06309767067432404 2023-01-22 13:55:27.093137: step: 930/466, loss: 0.00023569687618874013 2023-01-22 13:55:27.717678: step: 932/466, loss: 0.0010740574216470122 ================================================== Loss: 0.045 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30704044578011525, 'r': 0.35248476223333913, 'f1': 0.32819694292750834}, 'combined': 0.2418293263676377, 'epoch': 35} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3481688346457925, 'r': 0.3218976389700042, 'f1': 0.33451823139469566}, 'combined': 0.221856650873166, 'epoch': 35} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29973249027237353, 'r': 0.2917850378787879, 'f1': 0.2957053742802303}, 'combined': 0.1971369161868202, 'epoch': 35} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.35569618489382226, 'r': 0.2968244593178084, 'f1': 0.3236045593318383}, 'combined': 0.21119455451130498, 'epoch': 35} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2884961162255466, 'r': 0.345976367086424, 'f1': 0.3146325201976626}, 'combined': 0.23183448856669872, 'epoch': 35} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.34283331653035365, 'r': 0.30744982592574643, 'f1': 0.3241789202490952}, 'combined': 0.214999491149659, 'epoch': 35} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2612179487179487, 'r': 0.38809523809523805, 'f1': 0.3122605363984674}, 'combined': 0.20817369093231158, 'epoch': 35} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4583333333333333, 'r': 0.4782608695652174, 'f1': 0.4680851063829787}, 'combined': 0.31205673758865243, 'epoch': 35} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3409090909090909, 'r': 0.25862068965517243, 'f1': 0.29411764705882354}, 'combined': 0.19607843137254902, 'epoch': 35} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29515095352204085, 'r': 0.3399556523489161, 'f1': 0.3159728902784459}, 'combined': 0.23282212967885485, 'epoch': 26} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32557465157002546, 'r': 0.326702182830874, 'f1': 0.3261374426704058}, 'combined': 0.2162984075741551, 'epoch': 26} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39473684210526316, 'r': 0.25862068965517243, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 26} ****************************** Epoch: 36 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:57:54.193671: step: 2/466, loss: 0.004134782124310732 2023-01-22 13:57:54.790468: step: 4/466, loss: 0.014764559455215931 2023-01-22 13:57:55.437318: step: 6/466, loss: 0.00023114972282201052 2023-01-22 13:57:56.051464: step: 8/466, loss: 0.0036247021052986383 2023-01-22 13:57:56.631349: step: 10/466, loss: 0.035689063370227814 2023-01-22 13:57:57.285941: step: 12/466, loss: 0.13503894209861755 2023-01-22 13:57:57.925493: step: 14/466, loss: 0.0015168027020990849 2023-01-22 13:57:58.551526: step: 16/466, loss: 0.00198641000315547 2023-01-22 13:57:59.129572: step: 18/466, loss: 0.007483336143195629 2023-01-22 13:57:59.838391: step: 20/466, loss: 0.011556041426956654 2023-01-22 13:58:00.451755: step: 22/466, loss: 0.011115025728940964 2023-01-22 13:58:01.046030: step: 24/466, loss: 0.00882694125175476 2023-01-22 13:58:01.708562: step: 26/466, loss: 0.11645354330539703 2023-01-22 13:58:02.412184: step: 28/466, loss: 0.056584376841783524 2023-01-22 13:58:02.970833: step: 30/466, loss: 0.001533707370981574 2023-01-22 13:58:03.576760: step: 32/466, loss: 0.0009939366718754172 2023-01-22 13:58:04.215053: step: 34/466, loss: 0.02367999777197838 2023-01-22 13:58:04.830806: step: 36/466, loss: 4.9245820264331996e-05 2023-01-22 13:58:05.425079: step: 38/466, loss: 0.0013309363275766373 2023-01-22 13:58:06.048470: step: 40/466, loss: 0.00654733506962657 2023-01-22 13:58:06.670760: step: 42/466, loss: 0.012524322606623173 2023-01-22 13:58:07.298102: step: 44/466, loss: 9.207048424286768e-05 2023-01-22 13:58:07.877560: step: 46/466, loss: 0.0011543171713128686 2023-01-22 13:58:08.472603: step: 48/466, loss: 0.08923041075468063 2023-01-22 13:58:09.108112: step: 50/466, loss: 0.013645232655107975 2023-01-22 13:58:09.619679: step: 52/466, loss: 0.025942079722881317 2023-01-22 13:58:10.264625: step: 54/466, loss: 0.002431912114843726 2023-01-22 13:58:10.855921: step: 56/466, loss: 0.01853998750448227 2023-01-22 13:58:11.404113: step: 58/466, loss: 0.00020470521121751517 2023-01-22 13:58:12.128182: step: 60/466, loss: 0.0017083105631172657 2023-01-22 13:58:12.672493: step: 62/466, loss: 0.0002841560635715723 2023-01-22 13:58:13.285481: step: 64/466, loss: 0.007360382005572319 2023-01-22 13:58:13.916457: step: 66/466, loss: 0.018356889486312866 2023-01-22 13:58:14.549048: step: 68/466, loss: 0.012392015196383 2023-01-22 13:58:15.244393: step: 70/466, loss: 0.005072339903563261 2023-01-22 13:58:15.789306: step: 72/466, loss: 0.003504128661006689 2023-01-22 13:58:16.544118: step: 74/466, loss: 0.045964308083057404 2023-01-22 13:58:17.106375: step: 76/466, loss: 0.0016541439108550549 2023-01-22 13:58:17.712400: step: 78/466, loss: 0.007963433861732483 2023-01-22 13:58:18.394825: step: 80/466, loss: 0.0008533421787433326 2023-01-22 13:58:19.063626: step: 82/466, loss: 0.0015797861851751804 2023-01-22 13:58:19.664378: step: 84/466, loss: 0.004055194091051817 2023-01-22 13:58:20.267224: step: 86/466, loss: 0.04232597351074219 2023-01-22 13:58:20.899168: step: 88/466, loss: 0.005299170035868883 2023-01-22 13:58:21.463031: step: 90/466, loss: 0.0011265198700129986 2023-01-22 13:58:22.025972: step: 92/466, loss: 0.0067293415777385235 2023-01-22 13:58:22.630868: step: 94/466, loss: 0.011530286632478237 2023-01-22 13:58:23.202858: step: 96/466, loss: 0.0001433990546502173 2023-01-22 13:58:23.800681: step: 98/466, loss: 0.0009852410294115543 2023-01-22 13:58:24.424242: step: 100/466, loss: 0.3928929567337036 2023-01-22 13:58:24.986749: step: 102/466, loss: 0.01997952163219452 2023-01-22 13:58:25.578438: step: 104/466, loss: 0.012904261238873005 2023-01-22 13:58:26.070913: step: 106/466, loss: 0.0003965873329434544 2023-01-22 13:58:26.683342: step: 108/466, loss: 0.01776755601167679 2023-01-22 13:58:27.264995: step: 110/466, loss: 0.003667705925181508 2023-01-22 13:58:27.833929: step: 112/466, loss: 0.0005210679373703897 2023-01-22 13:58:28.409075: step: 114/466, loss: 0.00494978716596961 2023-01-22 13:58:29.049497: step: 116/466, loss: 0.18897537887096405 2023-01-22 13:58:29.718512: step: 118/466, loss: 0.014218885451555252 2023-01-22 13:58:30.290260: step: 120/466, loss: 0.0057451482862234116 2023-01-22 13:58:30.942595: step: 122/466, loss: 0.00048032597987912595 2023-01-22 13:58:31.593409: step: 124/466, loss: 0.007962165400385857 2023-01-22 13:58:32.190925: step: 126/466, loss: 0.006490239407867193 2023-01-22 13:58:32.783370: step: 128/466, loss: 0.06516426056623459 2023-01-22 13:58:33.392500: step: 130/466, loss: 0.0018735408084467053 2023-01-22 13:58:34.028918: step: 132/466, loss: 0.0036469902843236923 2023-01-22 13:58:34.715088: step: 134/466, loss: 0.0337684229016304 2023-01-22 13:58:35.307596: step: 136/466, loss: 0.008938982151448727 2023-01-22 13:58:35.872587: step: 138/466, loss: 0.004523784387856722 2023-01-22 13:58:36.582562: step: 140/466, loss: 0.0073253437876701355 2023-01-22 13:58:37.117382: step: 142/466, loss: 0.0024708369746804237 2023-01-22 13:58:37.707729: step: 144/466, loss: 0.05165370553731918 2023-01-22 13:58:38.286302: step: 146/466, loss: 0.0035205720923841 2023-01-22 13:58:38.890657: step: 148/466, loss: 0.0007877741591073573 2023-01-22 13:58:39.523976: step: 150/466, loss: 0.009827791713178158 2023-01-22 13:58:40.201511: step: 152/466, loss: 0.02217124029994011 2023-01-22 13:58:40.796992: step: 154/466, loss: 0.008764995262026787 2023-01-22 13:58:41.410297: step: 156/466, loss: 0.28140372037887573 2023-01-22 13:58:42.018736: step: 158/466, loss: 0.00439114635810256 2023-01-22 13:58:42.615164: step: 160/466, loss: 0.0021257810294628143 2023-01-22 13:58:43.209055: step: 162/466, loss: 0.005833633244037628 2023-01-22 13:58:43.851229: step: 164/466, loss: 0.0014589038910344243 2023-01-22 13:58:44.541495: step: 166/466, loss: 0.06629382073879242 2023-01-22 13:58:45.119498: step: 168/466, loss: 0.05375420302152634 2023-01-22 13:58:45.750840: step: 170/466, loss: 0.0029997029341757298 2023-01-22 13:58:46.314390: step: 172/466, loss: 0.01608169823884964 2023-01-22 13:58:46.950156: step: 174/466, loss: 0.0902029424905777 2023-01-22 13:58:47.518183: step: 176/466, loss: 0.011634060181677341 2023-01-22 13:58:48.068107: step: 178/466, loss: 0.02953268401324749 2023-01-22 13:58:48.628046: step: 180/466, loss: 0.003666577860713005 2023-01-22 13:58:49.220182: step: 182/466, loss: 0.028573906049132347 2023-01-22 13:58:49.779790: step: 184/466, loss: 0.11055367439985275 2023-01-22 13:58:50.362222: step: 186/466, loss: 4.221461296081543 2023-01-22 13:58:50.987533: step: 188/466, loss: 0.10896050184965134 2023-01-22 13:58:51.574199: step: 190/466, loss: 0.0401657298207283 2023-01-22 13:58:52.198313: step: 192/466, loss: 0.0023471752647310495 2023-01-22 13:58:52.837945: step: 194/466, loss: 0.015245432034134865 2023-01-22 13:58:53.504506: step: 196/466, loss: 0.01414613239467144 2023-01-22 13:58:54.116938: step: 198/466, loss: 0.15259598195552826 2023-01-22 13:58:54.717182: step: 200/466, loss: 0.00308390986174345 2023-01-22 13:58:55.364788: step: 202/466, loss: 0.03860871493816376 2023-01-22 13:58:55.960035: step: 204/466, loss: 0.009423403069376945 2023-01-22 13:58:56.556133: step: 206/466, loss: 0.00128264632076025 2023-01-22 13:58:57.135534: step: 208/466, loss: 0.007487480994313955 2023-01-22 13:58:57.730958: step: 210/466, loss: 5.24006986618042 2023-01-22 13:58:58.334836: step: 212/466, loss: 0.0017698605079203844 2023-01-22 13:58:58.981063: step: 214/466, loss: 0.00010806312639033422 2023-01-22 13:58:59.616962: step: 216/466, loss: 0.006685130763798952 2023-01-22 13:59:00.195016: step: 218/466, loss: 0.001411755452863872 2023-01-22 13:59:00.779500: step: 220/466, loss: 0.0016788601642474532 2023-01-22 13:59:01.464509: step: 222/466, loss: 0.001158503582701087 2023-01-22 13:59:02.071786: step: 224/466, loss: 0.002265445189550519 2023-01-22 13:59:02.663860: step: 226/466, loss: 0.00012758112279698253 2023-01-22 13:59:03.246792: step: 228/466, loss: 0.0001739346917020157 2023-01-22 13:59:03.823240: step: 230/466, loss: 0.029974643141031265 2023-01-22 13:59:04.422396: step: 232/466, loss: 0.002089595189318061 2023-01-22 13:59:05.027619: step: 234/466, loss: 0.015846455469727516 2023-01-22 13:59:05.592311: step: 236/466, loss: 0.00983512494713068 2023-01-22 13:59:06.189005: step: 238/466, loss: 0.0007549841539002955 2023-01-22 13:59:06.769113: step: 240/466, loss: 0.0016962449299171567 2023-01-22 13:59:07.414624: step: 242/466, loss: 0.00811100471764803 2023-01-22 13:59:08.073310: step: 244/466, loss: 0.016276143491268158 2023-01-22 13:59:08.643424: step: 246/466, loss: 0.00047804482164792717 2023-01-22 13:59:09.218181: step: 248/466, loss: 0.0029368638060986996 2023-01-22 13:59:09.784385: step: 250/466, loss: 0.005448102951049805 2023-01-22 13:59:10.390854: step: 252/466, loss: 0.008779305964708328 2023-01-22 13:59:10.986668: step: 254/466, loss: 0.005257518962025642 2023-01-22 13:59:11.626730: step: 256/466, loss: 0.0065764570608735085 2023-01-22 13:59:12.221692: step: 258/466, loss: 0.0008405945263803005 2023-01-22 13:59:12.794174: step: 260/466, loss: 0.00186285434756428 2023-01-22 13:59:13.378121: step: 262/466, loss: 0.0009987503290176392 2023-01-22 13:59:13.908612: step: 264/466, loss: 0.0072653088718652725 2023-01-22 13:59:14.729379: step: 266/466, loss: 0.004129834473133087 2023-01-22 13:59:15.343430: step: 268/466, loss: 0.0023377113975584507 2023-01-22 13:59:15.967312: step: 270/466, loss: 0.0005597401759587228 2023-01-22 13:59:16.552073: step: 272/466, loss: 0.00031847835634835064 2023-01-22 13:59:17.111943: step: 274/466, loss: 0.0003899749426636845 2023-01-22 13:59:17.687424: step: 276/466, loss: 0.01791643165051937 2023-01-22 13:59:18.322937: step: 278/466, loss: 0.011705256067216396 2023-01-22 13:59:18.952716: step: 280/466, loss: 0.015126674436032772 2023-01-22 13:59:19.523140: step: 282/466, loss: 0.002787497593089938 2023-01-22 13:59:20.103815: step: 284/466, loss: 0.004077774006873369 2023-01-22 13:59:20.731391: step: 286/466, loss: 0.007059828843921423 2023-01-22 13:59:21.334647: step: 288/466, loss: 0.01327449269592762 2023-01-22 13:59:21.918388: step: 290/466, loss: 0.00043224674300290644 2023-01-22 13:59:22.539925: step: 292/466, loss: 0.00018172396812587976 2023-01-22 13:59:23.107999: step: 294/466, loss: 0.009487542323768139 2023-01-22 13:59:23.725839: step: 296/466, loss: 1.0592328310012817 2023-01-22 13:59:24.317776: step: 298/466, loss: 0.001033150008879602 2023-01-22 13:59:25.032275: step: 300/466, loss: 0.00043564982479438186 2023-01-22 13:59:25.714098: step: 302/466, loss: 0.0006059581646695733 2023-01-22 13:59:26.285579: step: 304/466, loss: 0.03221989795565605 2023-01-22 13:59:26.859750: step: 306/466, loss: 0.004947112873196602 2023-01-22 13:59:27.502167: step: 308/466, loss: 0.005399622954428196 2023-01-22 13:59:28.115079: step: 310/466, loss: 0.026465794071555138 2023-01-22 13:59:28.685527: step: 312/466, loss: 0.0011521864216774702 2023-01-22 13:59:29.315991: step: 314/466, loss: 0.0005903297569602728 2023-01-22 13:59:29.897221: step: 316/466, loss: 0.022255612537264824 2023-01-22 13:59:30.633136: step: 318/466, loss: 0.032267697155475616 2023-01-22 13:59:31.220346: step: 320/466, loss: 0.022860009223222733 2023-01-22 13:59:31.847922: step: 322/466, loss: 0.00012504737242124975 2023-01-22 13:59:32.407659: step: 324/466, loss: 0.05044516921043396 2023-01-22 13:59:32.971354: step: 326/466, loss: 0.008676145225763321 2023-01-22 13:59:33.514376: step: 328/466, loss: 0.005747594870626926 2023-01-22 13:59:34.112209: step: 330/466, loss: 0.01149928942322731 2023-01-22 13:59:34.730118: step: 332/466, loss: 0.020429149270057678 2023-01-22 13:59:35.376786: step: 334/466, loss: 0.6362595558166504 2023-01-22 13:59:35.978149: step: 336/466, loss: 0.0003849474887829274 2023-01-22 13:59:36.565876: step: 338/466, loss: 0.000748310936614871 2023-01-22 13:59:37.259379: step: 340/466, loss: 0.22001215815544128 2023-01-22 13:59:37.864402: step: 342/466, loss: 0.0038748010993003845 2023-01-22 13:59:38.483141: step: 344/466, loss: 0.007948961108922958 2023-01-22 13:59:39.128923: step: 346/466, loss: 0.07563716918230057 2023-01-22 13:59:39.744382: step: 348/466, loss: 0.019224612042307854 2023-01-22 13:59:40.395233: step: 350/466, loss: 0.01067200768738985 2023-01-22 13:59:40.902859: step: 352/466, loss: 0.00024166921502910554 2023-01-22 13:59:41.517252: step: 354/466, loss: 0.012266403995454311 2023-01-22 13:59:42.095750: step: 356/466, loss: 0.011130928993225098 2023-01-22 13:59:42.667957: step: 358/466, loss: 0.012516900897026062 2023-01-22 13:59:43.222235: step: 360/466, loss: 0.03315199911594391 2023-01-22 13:59:43.838906: step: 362/466, loss: 0.0007820624159649014 2023-01-22 13:59:44.471072: step: 364/466, loss: 0.005528890527784824 2023-01-22 13:59:45.029064: step: 366/466, loss: 0.04025204852223396 2023-01-22 13:59:45.611946: step: 368/466, loss: 0.04432765766978264 2023-01-22 13:59:46.195679: step: 370/466, loss: 2.249654608021956e-05 2023-01-22 13:59:46.814566: step: 372/466, loss: 0.050257954746484756 2023-01-22 13:59:47.434976: step: 374/466, loss: 0.002168118953704834 2023-01-22 13:59:48.013467: step: 376/466, loss: 0.000975790957454592 2023-01-22 13:59:48.588799: step: 378/466, loss: 0.04330888390541077 2023-01-22 13:59:49.207678: step: 380/466, loss: 0.02073555439710617 2023-01-22 13:59:49.859497: step: 382/466, loss: 0.0008999567362479866 2023-01-22 13:59:50.461710: step: 384/466, loss: 0.006720571778714657 2023-01-22 13:59:51.037532: step: 386/466, loss: 0.015682553872466087 2023-01-22 13:59:51.669502: step: 388/466, loss: 0.014605056494474411 2023-01-22 13:59:52.228909: step: 390/466, loss: 0.004097847267985344 2023-01-22 13:59:52.853435: step: 392/466, loss: 0.0360754057765007 2023-01-22 13:59:53.451624: step: 394/466, loss: 0.008980442769825459 2023-01-22 13:59:54.028433: step: 396/466, loss: 0.0016102999215945601 2023-01-22 13:59:54.634426: step: 398/466, loss: 0.012340670451521873 2023-01-22 13:59:55.220388: step: 400/466, loss: 0.004707478452473879 2023-01-22 13:59:55.852734: step: 402/466, loss: 0.003513404866680503 2023-01-22 13:59:56.460972: step: 404/466, loss: 0.018040353432297707 2023-01-22 13:59:57.092199: step: 406/466, loss: 0.007725434377789497 2023-01-22 13:59:57.665710: step: 408/466, loss: 0.007257946766912937 2023-01-22 13:59:58.268706: step: 410/466, loss: 0.0008977011311799288 2023-01-22 13:59:58.874199: step: 412/466, loss: 0.0028205220587551594 2023-01-22 13:59:59.518657: step: 414/466, loss: 0.0014505174476653337 2023-01-22 14:00:00.082655: step: 416/466, loss: 0.007715018931776285 2023-01-22 14:00:00.674463: step: 418/466, loss: 0.0150665994733572 2023-01-22 14:00:01.343793: step: 420/466, loss: 0.000246486539253965 2023-01-22 14:00:02.025574: step: 422/466, loss: 0.04313650727272034 2023-01-22 14:00:02.602528: step: 424/466, loss: 0.02649090252816677 2023-01-22 14:00:03.187566: step: 426/466, loss: 0.004771926905959845 2023-01-22 14:00:03.886902: step: 428/466, loss: 0.0037256137002259493 2023-01-22 14:00:04.456438: step: 430/466, loss: 0.00038914073957130313 2023-01-22 14:00:04.999828: step: 432/466, loss: 0.0022230239119380713 2023-01-22 14:00:05.662574: step: 434/466, loss: 0.03072909638285637 2023-01-22 14:00:06.285698: step: 436/466, loss: 0.10898005962371826 2023-01-22 14:00:06.885582: step: 438/466, loss: 0.04404553398489952 2023-01-22 14:00:07.519320: step: 440/466, loss: 0.0014041807735338807 2023-01-22 14:00:08.171819: step: 442/466, loss: 0.0072995019145309925 2023-01-22 14:00:08.776792: step: 444/466, loss: 0.09468042850494385 2023-01-22 14:00:09.383883: step: 446/466, loss: 0.019442066550254822 2023-01-22 14:00:10.018534: step: 448/466, loss: 0.013830686919391155 2023-01-22 14:00:10.639681: step: 450/466, loss: 0.07783650606870651 2023-01-22 14:00:11.329657: step: 452/466, loss: 0.03487912937998772 2023-01-22 14:00:11.949780: step: 454/466, loss: 0.02686239592730999 2023-01-22 14:00:12.559661: step: 456/466, loss: 0.0005249048699624836 2023-01-22 14:00:13.149533: step: 458/466, loss: 0.00393849890679121 2023-01-22 14:00:13.731946: step: 460/466, loss: 0.27787500619888306 2023-01-22 14:00:14.351371: step: 462/466, loss: 0.00037776704994030297 2023-01-22 14:00:14.949068: step: 464/466, loss: 0.00618037860840559 2023-01-22 14:00:15.565864: step: 466/466, loss: 0.0005530905327759683 2023-01-22 14:00:16.229660: step: 468/466, loss: 0.03683909401297569 2023-01-22 14:00:16.805693: step: 470/466, loss: 0.0035309072118252516 2023-01-22 14:00:17.387942: step: 472/466, loss: 0.0008134430972859263 2023-01-22 14:00:17.967791: step: 474/466, loss: 0.009414694271981716 2023-01-22 14:00:18.613930: step: 476/466, loss: 0.0031402541790157557 2023-01-22 14:00:19.180719: step: 478/466, loss: 0.012147171422839165 2023-01-22 14:00:19.795287: step: 480/466, loss: 0.00014230998931452632 2023-01-22 14:00:20.361882: step: 482/466, loss: 0.0010596913052722812 2023-01-22 14:00:20.954053: step: 484/466, loss: 0.0105279004201293 2023-01-22 14:00:21.534124: step: 486/466, loss: 0.012947582639753819 2023-01-22 14:00:22.177947: step: 488/466, loss: 0.004752716515213251 2023-01-22 14:00:22.785390: step: 490/466, loss: 0.0015016966499388218 2023-01-22 14:00:23.376744: step: 492/466, loss: 0.0045246221125125885 2023-01-22 14:00:23.993842: step: 494/466, loss: 0.003018786199390888 2023-01-22 14:00:24.616237: step: 496/466, loss: 0.0028835893608629704 2023-01-22 14:00:25.206363: step: 498/466, loss: 0.004857239313423634 2023-01-22 14:00:25.803874: step: 500/466, loss: 0.0025022828485816717 2023-01-22 14:00:26.419531: step: 502/466, loss: 0.005179490428417921 2023-01-22 14:00:26.985910: step: 504/466, loss: 0.0005254794377833605 2023-01-22 14:00:27.563904: step: 506/466, loss: 0.46436429023742676 2023-01-22 14:00:28.160486: step: 508/466, loss: 0.008739815093576908 2023-01-22 14:00:28.749303: step: 510/466, loss: 0.003550920868292451 2023-01-22 14:00:29.339176: step: 512/466, loss: 0.003878939663991332 2023-01-22 14:00:29.975918: step: 514/466, loss: 0.00949602760374546 2023-01-22 14:00:30.539256: step: 516/466, loss: 0.0002720048651099205 2023-01-22 14:00:31.115946: step: 518/466, loss: 0.0005417861975729465 2023-01-22 14:00:31.711019: step: 520/466, loss: 0.018731823191046715 2023-01-22 14:00:32.358318: step: 522/466, loss: 0.0026590144261717796 2023-01-22 14:00:32.962208: step: 524/466, loss: 0.01841605268418789 2023-01-22 14:00:33.564058: step: 526/466, loss: 0.052160799503326416 2023-01-22 14:00:34.095233: step: 528/466, loss: 0.0019730718340724707 2023-01-22 14:00:34.666867: step: 530/466, loss: 7.600292155984789e-05 2023-01-22 14:00:35.254382: step: 532/466, loss: 0.04475203529000282 2023-01-22 14:00:35.827686: step: 534/466, loss: 0.0004294338868930936 2023-01-22 14:00:36.386055: step: 536/466, loss: 0.017182378098368645 2023-01-22 14:00:37.064796: step: 538/466, loss: 0.008043097332119942 2023-01-22 14:00:37.668612: step: 540/466, loss: 0.0004025986127089709 2023-01-22 14:00:38.295438: step: 542/466, loss: 0.008009443059563637 2023-01-22 14:00:38.939276: step: 544/466, loss: 0.0009922012686729431 2023-01-22 14:00:39.521906: step: 546/466, loss: 1.1559067388589028e-05 2023-01-22 14:00:40.165192: step: 548/466, loss: 0.0022911306004971266 2023-01-22 14:00:40.794933: step: 550/466, loss: 0.04490361735224724 2023-01-22 14:00:41.389880: step: 552/466, loss: 0.00046182976802811027 2023-01-22 14:00:41.974149: step: 554/466, loss: 0.012909126468002796 2023-01-22 14:00:42.583199: step: 556/466, loss: 0.0005594027461484075 2023-01-22 14:00:43.207602: step: 558/466, loss: 0.0003095760475844145 2023-01-22 14:00:43.837489: step: 560/466, loss: 0.002141920616850257 2023-01-22 14:00:44.497730: step: 562/466, loss: 0.0038322710897773504 2023-01-22 14:00:45.030590: step: 564/466, loss: 0.09330803900957108 2023-01-22 14:00:45.702805: step: 566/466, loss: 0.0011068833991885185 2023-01-22 14:00:46.346084: step: 568/466, loss: 0.0008409665897488594 2023-01-22 14:00:46.956755: step: 570/466, loss: 0.001502808416262269 2023-01-22 14:00:47.615491: step: 572/466, loss: 0.2815001606941223 2023-01-22 14:00:48.212242: step: 574/466, loss: 0.0001007162791211158 2023-01-22 14:00:48.844605: step: 576/466, loss: 0.03037295676767826 2023-01-22 14:00:49.570404: step: 578/466, loss: 0.17398248612880707 2023-01-22 14:00:50.222654: step: 580/466, loss: 0.04349957033991814 2023-01-22 14:00:50.869810: step: 582/466, loss: 0.1279125064611435 2023-01-22 14:00:51.447852: step: 584/466, loss: 0.0009704646654427052 2023-01-22 14:00:52.083797: step: 586/466, loss: 0.0023126662708818913 2023-01-22 14:00:52.727687: step: 588/466, loss: 0.008005252107977867 2023-01-22 14:00:53.290864: step: 590/466, loss: 0.0005765442620031536 2023-01-22 14:00:53.869767: step: 592/466, loss: 0.05980457738041878 2023-01-22 14:00:54.507204: step: 594/466, loss: 0.00028216736973263323 2023-01-22 14:00:55.078775: step: 596/466, loss: 0.000833461235743016 2023-01-22 14:00:55.714267: step: 598/466, loss: 0.0018226697575300932 2023-01-22 14:00:56.315079: step: 600/466, loss: 0.00600542314350605 2023-01-22 14:00:56.873726: step: 602/466, loss: 0.013094187714159489 2023-01-22 14:00:57.497607: step: 604/466, loss: 0.003840026678517461 2023-01-22 14:00:58.149996: step: 606/466, loss: 0.004819925874471664 2023-01-22 14:00:58.753448: step: 608/466, loss: 0.004980082157999277 2023-01-22 14:00:59.427491: step: 610/466, loss: 0.0036126424092799425 2023-01-22 14:01:00.013708: step: 612/466, loss: 0.0014587597688660026 2023-01-22 14:01:00.715552: step: 614/466, loss: 0.00035369230317883193 2023-01-22 14:01:01.365993: step: 616/466, loss: 0.014491337351500988 2023-01-22 14:01:02.014650: step: 618/466, loss: 0.12356384098529816 2023-01-22 14:01:02.641606: step: 620/466, loss: 0.00992063619196415 2023-01-22 14:01:03.269063: step: 622/466, loss: 0.00022361463925335556 2023-01-22 14:01:03.930645: step: 624/466, loss: 0.026897968724370003 2023-01-22 14:01:04.500912: step: 626/466, loss: 0.0015888429479673505 2023-01-22 14:01:05.097765: step: 628/466, loss: 0.022302014753222466 2023-01-22 14:01:05.746165: step: 630/466, loss: 0.0008641614695079625 2023-01-22 14:01:06.362140: step: 632/466, loss: 0.00048572392552159727 2023-01-22 14:01:06.957107: step: 634/466, loss: 0.04261472076177597 2023-01-22 14:01:07.628517: step: 636/466, loss: 0.027452753856778145 2023-01-22 14:01:08.274646: step: 638/466, loss: 0.0005783818196505308 2023-01-22 14:01:08.825849: step: 640/466, loss: 0.020351797342300415 2023-01-22 14:01:09.417636: step: 642/466, loss: 0.0030048314947634935 2023-01-22 14:01:10.043041: step: 644/466, loss: 0.00014717664453200996 2023-01-22 14:01:10.664342: step: 646/466, loss: 0.007317872252315283 2023-01-22 14:01:11.236756: step: 648/466, loss: 0.034398648887872696 2023-01-22 14:01:11.861009: step: 650/466, loss: 0.034762825816869736 2023-01-22 14:01:12.439832: step: 652/466, loss: 0.0005621562013402581 2023-01-22 14:01:13.037917: step: 654/466, loss: 0.002596135251224041 2023-01-22 14:01:13.643421: step: 656/466, loss: 0.03442927077412605 2023-01-22 14:01:14.271846: step: 658/466, loss: 0.0009023649035952985 2023-01-22 14:01:14.867232: step: 660/466, loss: 0.026509709656238556 2023-01-22 14:01:15.499516: step: 662/466, loss: 0.014872992411255836 2023-01-22 14:01:16.084761: step: 664/466, loss: 0.00032420759089291096 2023-01-22 14:01:16.769864: step: 666/466, loss: 0.040809109807014465 2023-01-22 14:01:17.428786: step: 668/466, loss: 0.0006128854001872241 2023-01-22 14:01:18.023654: step: 670/466, loss: 0.14126506447792053 2023-01-22 14:01:18.686896: step: 672/466, loss: 0.0026687774807214737 2023-01-22 14:01:19.341055: step: 674/466, loss: 0.017287014052271843 2023-01-22 14:01:19.990303: step: 676/466, loss: 0.0005365812685340643 2023-01-22 14:01:20.654864: step: 678/466, loss: 0.049192655831575394 2023-01-22 14:01:21.270671: step: 680/466, loss: 0.012509983032941818 2023-01-22 14:01:21.918567: step: 682/466, loss: 0.058808889240026474 2023-01-22 14:01:22.514780: step: 684/466, loss: 0.004044192377477884 2023-01-22 14:01:23.050577: step: 686/466, loss: 0.013986448757350445 2023-01-22 14:01:23.708703: step: 688/466, loss: 0.00043944790377281606 2023-01-22 14:01:24.355417: step: 690/466, loss: 0.00938647985458374 2023-01-22 14:01:24.964257: step: 692/466, loss: 0.0008670755196362734 2023-01-22 14:01:25.581715: step: 694/466, loss: 0.003636277047917247 2023-01-22 14:01:26.259281: step: 696/466, loss: 0.023744331672787666 2023-01-22 14:01:26.880205: step: 698/466, loss: 0.0008914788486436009 2023-01-22 14:01:27.426415: step: 700/466, loss: 0.007494314573705196 2023-01-22 14:01:28.101051: step: 702/466, loss: 0.007531964685767889 2023-01-22 14:01:28.737710: step: 704/466, loss: 0.0007346156635321677 2023-01-22 14:01:29.352304: step: 706/466, loss: 0.015438981354236603 2023-01-22 14:01:29.927290: step: 708/466, loss: 0.038965754210948944 2023-01-22 14:01:30.526488: step: 710/466, loss: 0.029853714630007744 2023-01-22 14:01:31.087463: step: 712/466, loss: 0.06593216955661774 2023-01-22 14:01:31.736771: step: 714/466, loss: 0.01110734324902296 2023-01-22 14:01:32.357144: step: 716/466, loss: 0.016697432845830917 2023-01-22 14:01:32.997654: step: 718/466, loss: 0.04144787788391113 2023-01-22 14:01:33.661379: step: 720/466, loss: 0.0013874376891180873 2023-01-22 14:01:34.252649: step: 722/466, loss: 0.009951998479664326 2023-01-22 14:01:34.906230: step: 724/466, loss: 0.022471958771348 2023-01-22 14:01:35.581695: step: 726/466, loss: 0.14198991656303406 2023-01-22 14:01:36.218042: step: 728/466, loss: 0.010681631043553352 2023-01-22 14:01:36.782950: step: 730/466, loss: 0.044323984533548355 2023-01-22 14:01:37.360839: step: 732/466, loss: 0.03531954064965248 2023-01-22 14:01:37.940964: step: 734/466, loss: 0.0006543992785736918 2023-01-22 14:01:38.539334: step: 736/466, loss: 0.001076264539733529 2023-01-22 14:01:39.142782: step: 738/466, loss: 0.011064611375331879 2023-01-22 14:01:39.726147: step: 740/466, loss: 0.004203278571367264 2023-01-22 14:01:40.312856: step: 742/466, loss: 0.0006326843285933137 2023-01-22 14:01:40.918404: step: 744/466, loss: 0.0007471975404769182 2023-01-22 14:01:41.500567: step: 746/466, loss: 0.007592168636620045 2023-01-22 14:01:42.116021: step: 748/466, loss: 0.006634784862399101 2023-01-22 14:01:42.736285: step: 750/466, loss: 0.03417897969484329 2023-01-22 14:01:43.357073: step: 752/466, loss: 0.001295391470193863 2023-01-22 14:01:43.945072: step: 754/466, loss: 0.0022968819830566645 2023-01-22 14:01:44.531828: step: 756/466, loss: 0.0014519159449264407 2023-01-22 14:01:45.102492: step: 758/466, loss: 0.1301531195640564 2023-01-22 14:01:45.693247: step: 760/466, loss: 0.043121397495269775 2023-01-22 14:01:46.378069: step: 762/466, loss: 0.003836657153442502 2023-01-22 14:01:47.041624: step: 764/466, loss: 0.007856789045035839 2023-01-22 14:01:47.619842: step: 766/466, loss: 0.05979446694254875 2023-01-22 14:01:48.205177: step: 768/466, loss: 0.025634147226810455 2023-01-22 14:01:48.806780: step: 770/466, loss: 0.0024480284191668034 2023-01-22 14:01:49.452083: step: 772/466, loss: 0.011907415464520454 2023-01-22 14:01:50.125583: step: 774/466, loss: 0.004685855470597744 2023-01-22 14:01:50.726738: step: 776/466, loss: 0.00906300637871027 2023-01-22 14:01:51.366186: step: 778/466, loss: 0.15089234709739685 2023-01-22 14:01:52.006409: step: 780/466, loss: 0.06680942326784134 2023-01-22 14:01:52.599592: step: 782/466, loss: 0.0020852303132414818 2023-01-22 14:01:53.191127: step: 784/466, loss: 0.012161944061517715 2023-01-22 14:01:53.749028: step: 786/466, loss: 0.03335161507129669 2023-01-22 14:01:54.396984: step: 788/466, loss: 0.0030902328435331583 2023-01-22 14:01:55.088789: step: 790/466, loss: 0.0237263310700655 2023-01-22 14:01:55.645191: step: 792/466, loss: 0.002544113900512457 2023-01-22 14:01:56.323316: step: 794/466, loss: 0.06023455411195755 2023-01-22 14:01:56.965216: step: 796/466, loss: 0.24715900421142578 2023-01-22 14:01:57.528385: step: 798/466, loss: 0.022844484075903893 2023-01-22 14:01:58.157022: step: 800/466, loss: 0.40122318267822266 2023-01-22 14:01:58.802701: step: 802/466, loss: 0.025761272758245468 2023-01-22 14:01:59.483849: step: 804/466, loss: 0.019390100613236427 2023-01-22 14:02:00.081840: step: 806/466, loss: 0.0019449134124442935 2023-01-22 14:02:00.695520: step: 808/466, loss: 0.0016414802521467209 2023-01-22 14:02:01.320595: step: 810/466, loss: 0.009476746432483196 2023-01-22 14:02:01.929861: step: 812/466, loss: 0.011853148229420185 2023-01-22 14:02:02.744896: step: 814/466, loss: 0.037017446011304855 2023-01-22 14:02:03.327655: step: 816/466, loss: 0.012307077646255493 2023-01-22 14:02:03.897713: step: 818/466, loss: 0.0005546507891267538 2023-01-22 14:02:04.558617: step: 820/466, loss: 0.017318541184067726 2023-01-22 14:02:05.164380: step: 822/466, loss: 0.002184445969760418 2023-01-22 14:02:05.805745: step: 824/466, loss: 0.0024101058952510357 2023-01-22 14:02:06.415183: step: 826/466, loss: 0.01470540463924408 2023-01-22 14:02:07.004950: step: 828/466, loss: 0.00307606253772974 2023-01-22 14:02:07.671197: step: 830/466, loss: 0.0006091810064390302 2023-01-22 14:02:08.302023: step: 832/466, loss: 0.006069089286029339 2023-01-22 14:02:08.968134: step: 834/466, loss: 0.09584932774305344 2023-01-22 14:02:09.588975: step: 836/466, loss: 0.006171443499624729 2023-01-22 14:02:10.201289: step: 838/466, loss: 0.006870781537145376 2023-01-22 14:02:10.806831: step: 840/466, loss: 0.010167393833398819 2023-01-22 14:02:11.432735: step: 842/466, loss: 0.0011944527504965663 2023-01-22 14:02:12.085626: step: 844/466, loss: 0.0006144341314211488 2023-01-22 14:02:12.743881: step: 846/466, loss: 0.2855139672756195 2023-01-22 14:02:13.357127: step: 848/466, loss: 1.5135047435760498 2023-01-22 14:02:14.012874: step: 850/466, loss: 0.006842055357992649 2023-01-22 14:02:14.615330: step: 852/466, loss: 0.01040316466242075 2023-01-22 14:02:15.239060: step: 854/466, loss: 0.003283366095274687 2023-01-22 14:02:15.894546: step: 856/466, loss: 0.00498466519638896 2023-01-22 14:02:16.531706: step: 858/466, loss: 0.004681904800236225 2023-01-22 14:02:17.056892: step: 860/466, loss: 0.0169754009693861 2023-01-22 14:02:17.664333: step: 862/466, loss: 0.0372665636241436 2023-01-22 14:02:18.310746: step: 864/466, loss: 0.02054089680314064 2023-01-22 14:02:18.894438: step: 866/466, loss: 0.03896929696202278 2023-01-22 14:02:19.533559: step: 868/466, loss: 0.007434012833982706 2023-01-22 14:02:20.167043: step: 870/466, loss: 0.0048303971998393536 2023-01-22 14:02:20.717409: step: 872/466, loss: 0.004994220100343227 2023-01-22 14:02:21.350768: step: 874/466, loss: 0.0015695391921326518 2023-01-22 14:02:21.959739: step: 876/466, loss: 0.002584850648418069 2023-01-22 14:02:22.601967: step: 878/466, loss: 0.006896187085658312 2023-01-22 14:02:23.185429: step: 880/466, loss: 0.003456108272075653 2023-01-22 14:02:23.835146: step: 882/466, loss: 0.028172794729471207 2023-01-22 14:02:24.429493: step: 884/466, loss: 0.00853010918945074 2023-01-22 14:02:25.021702: step: 886/466, loss: 0.007580365054309368 2023-01-22 14:02:25.686148: step: 888/466, loss: 0.006292473059147596 2023-01-22 14:02:26.266592: step: 890/466, loss: 0.0060486397705972195 2023-01-22 14:02:26.879646: step: 892/466, loss: 0.03900180757045746 2023-01-22 14:02:27.478864: step: 894/466, loss: 0.004179924260824919 2023-01-22 14:02:28.089584: step: 896/466, loss: 0.0010741227306425571 2023-01-22 14:02:28.697906: step: 898/466, loss: 0.0005468535237014294 2023-01-22 14:02:29.428881: step: 900/466, loss: 0.02371526136994362 2023-01-22 14:02:30.026332: step: 902/466, loss: 0.009221994318068027 2023-01-22 14:02:30.658263: step: 904/466, loss: 0.2059815227985382 2023-01-22 14:02:31.228242: step: 906/466, loss: 0.0070160552859306335 2023-01-22 14:02:31.791018: step: 908/466, loss: 0.032636530697345734 2023-01-22 14:02:32.368855: step: 910/466, loss: 0.01037522405385971 2023-01-22 14:02:33.045299: step: 912/466, loss: 0.008214067667722702 2023-01-22 14:02:33.699669: step: 914/466, loss: 0.0076082623563706875 2023-01-22 14:02:34.273756: step: 916/466, loss: 0.1538480818271637 2023-01-22 14:02:34.867880: step: 918/466, loss: 9.918749856296927e-05 2023-01-22 14:02:35.485309: step: 920/466, loss: 0.008902657777071 2023-01-22 14:02:36.042570: step: 922/466, loss: 0.0007451887940987945 2023-01-22 14:02:36.608001: step: 924/466, loss: 5.677406443282962e-05 2023-01-22 14:02:37.268246: step: 926/466, loss: 0.0005498333484865725 2023-01-22 14:02:38.013149: step: 928/466, loss: 0.10708189755678177 2023-01-22 14:02:38.560478: step: 930/466, loss: 0.0016411797842010856 2023-01-22 14:02:39.114360: step: 932/466, loss: 0.027343858033418655 ================================================== Loss: 0.050 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30210293726741094, 'r': 0.32675270254729455, 'f1': 0.31394471147205877}, 'combined': 0.23132768213730645, 'epoch': 36} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3615788998910369, 'r': 0.32708308116769425, 'f1': 0.3434670242134349}, 'combined': 0.22779160155087907, 'epoch': 36} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2846124229979466, 'r': 0.2625118371212121, 'f1': 0.273115763546798}, 'combined': 0.18207717569786533, 'epoch': 36} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3593344436253723, 'r': 0.29965259831735197, 'f1': 0.3267909468442065}, 'combined': 0.21327409162464, 'epoch': 36} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28958146895121684, 'r': 0.32694681978363194, 'f1': 0.30713186100886636}, 'combined': 0.22630768705916468, 'epoch': 36} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3503510116633564, 'r': 0.31020662491026346, 'f1': 0.3290589612307767}, 'combined': 0.21823599501315757, 'epoch': 36} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.303030303030303, 'r': 0.38095238095238093, 'f1': 0.3375527426160337}, 'combined': 0.22503516174402244, 'epoch': 36} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47619047619047616, 'r': 0.43478260869565216, 'f1': 0.4545454545454545}, 'combined': 0.303030303030303, 'epoch': 36} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.23275862068965517, 'f1': 0.2872340425531915}, 'combined': 0.19148936170212766, 'epoch': 36} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29515095352204085, 'r': 0.3399556523489161, 'f1': 0.3159728902784459}, 'combined': 0.23282212967885485, 'epoch': 26} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32557465157002546, 'r': 0.326702182830874, 'f1': 0.3261374426704058}, 'combined': 0.2162984075741551, 'epoch': 26} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39473684210526316, 'r': 0.25862068965517243, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 26} ****************************** Epoch: 37 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:05:07.153564: step: 2/466, loss: 0.0008805061224848032 2023-01-22 14:05:07.711658: step: 4/466, loss: 3.681400266941637e-05 2023-01-22 14:05:08.390859: step: 6/466, loss: 0.036304231733083725 2023-01-22 14:05:08.966383: step: 8/466, loss: 0.0014005315024405718 2023-01-22 14:05:09.592285: step: 10/466, loss: 0.2493869513273239 2023-01-22 14:05:10.227388: step: 12/466, loss: 0.005454708356410265 2023-01-22 14:05:10.872059: step: 14/466, loss: 0.0005787847330793738 2023-01-22 14:05:11.475914: step: 16/466, loss: 0.008444041945040226 2023-01-22 14:05:12.089650: step: 18/466, loss: 0.0016095231985673308 2023-01-22 14:05:12.711680: step: 20/466, loss: 0.00022498948965221643 2023-01-22 14:05:13.388718: step: 22/466, loss: 0.012059216387569904 2023-01-22 14:05:14.004382: step: 24/466, loss: 0.0009429472265765071 2023-01-22 14:05:14.663381: step: 26/466, loss: 0.004221235867589712 2023-01-22 14:05:15.308066: step: 28/466, loss: 0.07568201422691345 2023-01-22 14:05:15.952869: step: 30/466, loss: 0.0034840416628867388 2023-01-22 14:05:16.594459: step: 32/466, loss: 0.012441596947610378 2023-01-22 14:05:17.114676: step: 34/466, loss: 0.00023383184452541173 2023-01-22 14:05:17.828992: step: 36/466, loss: 0.00031923266942612827 2023-01-22 14:05:18.432654: step: 38/466, loss: 0.003666159464046359 2023-01-22 14:05:19.096712: step: 40/466, loss: 0.00039576643030159175 2023-01-22 14:05:19.728118: step: 42/466, loss: 0.0009798625251278281 2023-01-22 14:05:20.342547: step: 44/466, loss: 0.005588307045400143 2023-01-22 14:05:20.966367: step: 46/466, loss: 0.03727317228913307 2023-01-22 14:05:21.594086: step: 48/466, loss: 0.004420082084834576 2023-01-22 14:05:22.136410: step: 50/466, loss: 2.9930888558737934e-06 2023-01-22 14:05:22.772020: step: 52/466, loss: 0.01459225732833147 2023-01-22 14:05:23.336944: step: 54/466, loss: 0.00021791511971969157 2023-01-22 14:05:23.950969: step: 56/466, loss: 0.00026564046856947243 2023-01-22 14:05:24.621782: step: 58/466, loss: 0.015419005416333675 2023-01-22 14:05:25.226649: step: 60/466, loss: 0.04069914296269417 2023-01-22 14:05:25.821082: step: 62/466, loss: 0.057508617639541626 2023-01-22 14:05:26.384086: step: 64/466, loss: 0.0007234219228848815 2023-01-22 14:05:26.995270: step: 66/466, loss: 0.0033933455124497414 2023-01-22 14:05:27.584283: step: 68/466, loss: 0.011373174376785755 2023-01-22 14:05:28.256278: step: 70/466, loss: 0.007621078286319971 2023-01-22 14:05:28.811859: step: 72/466, loss: 0.0011365091195330024 2023-01-22 14:05:29.413386: step: 74/466, loss: 0.0007960588554851711 2023-01-22 14:05:30.031263: step: 76/466, loss: 0.020760798826813698 2023-01-22 14:05:30.616673: step: 78/466, loss: 0.013785758055746555 2023-01-22 14:05:31.161974: step: 80/466, loss: 0.003760263556614518 2023-01-22 14:05:31.727163: step: 82/466, loss: 0.010897494852542877 2023-01-22 14:05:32.373668: step: 84/466, loss: 0.000967467378359288 2023-01-22 14:05:32.991312: step: 86/466, loss: 0.005571451503783464 2023-01-22 14:05:33.702224: step: 88/466, loss: 0.04830334335565567 2023-01-22 14:05:34.316264: step: 90/466, loss: 0.026635482907295227 2023-01-22 14:05:34.900490: step: 92/466, loss: 0.00018935652042273432 2023-01-22 14:05:35.533802: step: 94/466, loss: 0.002730258274823427 2023-01-22 14:05:36.084759: step: 96/466, loss: 0.4884699285030365 2023-01-22 14:05:36.657010: step: 98/466, loss: 0.0043935151770710945 2023-01-22 14:05:37.344406: step: 100/466, loss: 0.005710270255804062 2023-01-22 14:05:37.979924: step: 102/466, loss: 0.03708332031965256 2023-01-22 14:05:38.547395: step: 104/466, loss: 0.0016062766080722213 2023-01-22 14:05:39.106785: step: 106/466, loss: 0.02114509418606758 2023-01-22 14:05:39.680454: step: 108/466, loss: 0.0015665657119825482 2023-01-22 14:05:40.209907: step: 110/466, loss: 0.0013903353828936815 2023-01-22 14:05:40.732911: step: 112/466, loss: 0.004117294680327177 2023-01-22 14:05:41.371046: step: 114/466, loss: 9.603073704056442e-05 2023-01-22 14:05:42.105163: step: 116/466, loss: 0.0019076470052823424 2023-01-22 14:05:42.693306: step: 118/466, loss: 0.03609192371368408 2023-01-22 14:05:43.306785: step: 120/466, loss: 0.0002453723573125899 2023-01-22 14:05:43.836034: step: 122/466, loss: 0.0007782530738040805 2023-01-22 14:05:44.460093: step: 124/466, loss: 0.0011430905433371663 2023-01-22 14:05:45.067114: step: 126/466, loss: 0.022032130509614944 2023-01-22 14:05:45.667034: step: 128/466, loss: 0.07144544273614883 2023-01-22 14:05:46.276361: step: 130/466, loss: 0.004590063355863094 2023-01-22 14:05:46.892293: step: 132/466, loss: 0.028195375576615334 2023-01-22 14:05:47.475456: step: 134/466, loss: 0.023656990379095078 2023-01-22 14:05:48.077849: step: 136/466, loss: 0.05975626781582832 2023-01-22 14:05:48.734941: step: 138/466, loss: 0.003439907915890217 2023-01-22 14:05:49.286603: step: 140/466, loss: 0.0012263577664270997 2023-01-22 14:05:49.927814: step: 142/466, loss: 0.01997874490916729 2023-01-22 14:05:50.453247: step: 144/466, loss: 0.0015897410921752453 2023-01-22 14:05:51.092663: step: 146/466, loss: 0.04166782647371292 2023-01-22 14:05:51.781380: step: 148/466, loss: 0.014002179726958275 2023-01-22 14:05:52.386046: step: 150/466, loss: 0.000593072734773159 2023-01-22 14:05:53.029858: step: 152/466, loss: 0.0024511227384209633 2023-01-22 14:05:53.650038: step: 154/466, loss: 0.003020963165909052 2023-01-22 14:05:54.302633: step: 156/466, loss: 0.008708332665264606 2023-01-22 14:05:54.969383: step: 158/466, loss: 0.018368329852819443 2023-01-22 14:05:55.558757: step: 160/466, loss: 0.04773017019033432 2023-01-22 14:05:56.108750: step: 162/466, loss: 0.0012530928943306208 2023-01-22 14:05:56.670248: step: 164/466, loss: 0.0264136865735054 2023-01-22 14:05:57.303805: step: 166/466, loss: 0.005277651362121105 2023-01-22 14:05:57.911926: step: 168/466, loss: 0.006790454499423504 2023-01-22 14:05:58.462706: step: 170/466, loss: 0.007004170678555965 2023-01-22 14:05:59.083542: step: 172/466, loss: 0.0011634072288870811 2023-01-22 14:05:59.734908: step: 174/466, loss: 0.039164841175079346 2023-01-22 14:06:00.269866: step: 176/466, loss: 0.00011331143468851224 2023-01-22 14:06:00.913276: step: 178/466, loss: 0.0017837842460721731 2023-01-22 14:06:01.521191: step: 180/466, loss: 0.007201091386377811 2023-01-22 14:06:02.130389: step: 182/466, loss: 2.483464231772814e-05 2023-01-22 14:06:02.784220: step: 184/466, loss: 0.00038544295239262283 2023-01-22 14:06:03.358492: step: 186/466, loss: 0.003666855860501528 2023-01-22 14:06:03.926258: step: 188/466, loss: 0.0021711955778300762 2023-01-22 14:06:04.588620: step: 190/466, loss: 0.009761896915733814 2023-01-22 14:06:05.208536: step: 192/466, loss: 0.006024029105901718 2023-01-22 14:06:05.761717: step: 194/466, loss: 0.001021972973830998 2023-01-22 14:06:06.412799: step: 196/466, loss: 0.07072720676660538 2023-01-22 14:06:06.987640: step: 198/466, loss: 0.21727712452411652 2023-01-22 14:06:07.501895: step: 200/466, loss: 0.0025600316002964973 2023-01-22 14:06:08.077293: step: 202/466, loss: 0.008431457914412022 2023-01-22 14:06:08.697072: step: 204/466, loss: 0.0926947146654129 2023-01-22 14:06:09.296334: step: 206/466, loss: 0.011736077256500721 2023-01-22 14:06:09.911986: step: 208/466, loss: 0.01215405948460102 2023-01-22 14:06:10.525913: step: 210/466, loss: 0.016161885112524033 2023-01-22 14:06:11.124144: step: 212/466, loss: 0.0162214208394289 2023-01-22 14:06:11.775692: step: 214/466, loss: 0.011673922650516033 2023-01-22 14:06:12.466889: step: 216/466, loss: 0.02247869037091732 2023-01-22 14:06:13.056441: step: 218/466, loss: 0.025550978258252144 2023-01-22 14:06:13.709621: step: 220/466, loss: 0.20616596937179565 2023-01-22 14:06:14.348394: step: 222/466, loss: 0.012072236277163029 2023-01-22 14:06:14.921033: step: 224/466, loss: 0.0014509232714772224 2023-01-22 14:06:15.690269: step: 226/466, loss: 0.28090688586235046 2023-01-22 14:06:16.282324: step: 228/466, loss: 0.0036569759249687195 2023-01-22 14:06:16.880507: step: 230/466, loss: 0.004075853154063225 2023-01-22 14:06:17.506639: step: 232/466, loss: 0.01301665510982275 2023-01-22 14:06:18.100182: step: 234/466, loss: 0.0003938743029721081 2023-01-22 14:06:18.702608: step: 236/466, loss: 0.31214165687561035 2023-01-22 14:06:19.340782: step: 238/466, loss: 0.01012321375310421 2023-01-22 14:06:19.896742: step: 240/466, loss: 0.00015666645776946098 2023-01-22 14:06:20.458284: step: 242/466, loss: 0.0032289193477481604 2023-01-22 14:06:21.064624: step: 244/466, loss: 2.2957598048378713e-05 2023-01-22 14:06:21.713170: step: 246/466, loss: 0.6001042723655701 2023-01-22 14:06:22.298746: step: 248/466, loss: 0.003817453980445862 2023-01-22 14:06:22.974187: step: 250/466, loss: 0.006529581733047962 2023-01-22 14:06:23.512587: step: 252/466, loss: 0.0010104465764015913 2023-01-22 14:06:24.130622: step: 254/466, loss: 0.02891940250992775 2023-01-22 14:06:24.723882: step: 256/466, loss: 0.01952749490737915 2023-01-22 14:06:25.285440: step: 258/466, loss: 0.0005210338858887553 2023-01-22 14:06:25.984356: step: 260/466, loss: 0.05206609517335892 2023-01-22 14:06:26.561774: step: 262/466, loss: 0.006175199057906866 2023-01-22 14:06:27.145780: step: 264/466, loss: 0.01327715627849102 2023-01-22 14:06:27.708809: step: 266/466, loss: 0.01733010821044445 2023-01-22 14:06:28.309760: step: 268/466, loss: 0.0005196294514462352 2023-01-22 14:06:28.898926: step: 270/466, loss: 0.0004086203407496214 2023-01-22 14:06:29.531437: step: 272/466, loss: 0.07791987806558609 2023-01-22 14:06:30.115230: step: 274/466, loss: 0.6732566356658936 2023-01-22 14:06:30.659886: step: 276/466, loss: 0.000941718346439302 2023-01-22 14:06:31.318446: step: 278/466, loss: 0.024449041113257408 2023-01-22 14:06:31.905274: step: 280/466, loss: 0.15648572146892548 2023-01-22 14:06:32.529115: step: 282/466, loss: 0.013371721841394901 2023-01-22 14:06:33.111560: step: 284/466, loss: 0.0007062857621349394 2023-01-22 14:06:33.676642: step: 286/466, loss: 0.010413425974547863 2023-01-22 14:06:34.249547: step: 288/466, loss: 0.0002909430186264217 2023-01-22 14:06:34.871751: step: 290/466, loss: 0.02714431658387184 2023-01-22 14:06:35.537000: step: 292/466, loss: 0.015656817704439163 2023-01-22 14:06:36.161844: step: 294/466, loss: 0.034121233969926834 2023-01-22 14:06:36.750543: step: 296/466, loss: 0.02271353267133236 2023-01-22 14:06:37.306703: step: 298/466, loss: 0.0004174639761913568 2023-01-22 14:06:37.944793: step: 300/466, loss: 0.0024831509217619896 2023-01-22 14:06:38.490767: step: 302/466, loss: 0.0013438124442473054 2023-01-22 14:06:39.096610: step: 304/466, loss: 0.005839589983224869 2023-01-22 14:06:39.719966: step: 306/466, loss: 0.020242193713784218 2023-01-22 14:06:40.411454: step: 308/466, loss: 0.00034786100150085986 2023-01-22 14:06:41.025952: step: 310/466, loss: 0.00068121642107144 2023-01-22 14:06:41.680310: step: 312/466, loss: 0.01574590802192688 2023-01-22 14:06:42.250395: step: 314/466, loss: 0.059288665652275085 2023-01-22 14:06:42.842702: step: 316/466, loss: 2.29655165639997e-06 2023-01-22 14:06:43.477848: step: 318/466, loss: 0.0023216346744447947 2023-01-22 14:06:44.111339: step: 320/466, loss: 0.01720270700752735 2023-01-22 14:06:44.678032: step: 322/466, loss: 0.010333416052162647 2023-01-22 14:06:45.302056: step: 324/466, loss: 0.019171064719557762 2023-01-22 14:06:45.848989: step: 326/466, loss: 0.00010471227869857103 2023-01-22 14:06:46.486944: step: 328/466, loss: 0.00028663675766438246 2023-01-22 14:06:47.134077: step: 330/466, loss: 0.044246140867471695 2023-01-22 14:06:47.746877: step: 332/466, loss: 0.0018243632512167096 2023-01-22 14:06:48.359753: step: 334/466, loss: 0.0005321518983691931 2023-01-22 14:06:48.927109: step: 336/466, loss: 0.0016758107813075185 2023-01-22 14:06:49.481168: step: 338/466, loss: 0.07432331889867783 2023-01-22 14:06:50.062152: step: 340/466, loss: 0.002926712855696678 2023-01-22 14:06:50.637586: step: 342/466, loss: 0.011686854995787144 2023-01-22 14:06:51.290959: step: 344/466, loss: 0.003963540308177471 2023-01-22 14:06:51.884302: step: 346/466, loss: 0.0013557865750044584 2023-01-22 14:06:52.467844: step: 348/466, loss: 0.006266581825911999 2023-01-22 14:06:53.067734: step: 350/466, loss: 0.005226859822869301 2023-01-22 14:06:53.679078: step: 352/466, loss: 1.3675599802809302e-05 2023-01-22 14:06:54.340706: step: 354/466, loss: 0.0035143913701176643 2023-01-22 14:06:55.000651: step: 356/466, loss: 0.0013365527847781777 2023-01-22 14:06:55.682449: step: 358/466, loss: 0.000295504491077736 2023-01-22 14:06:56.302191: step: 360/466, loss: 0.010652300901710987 2023-01-22 14:06:56.905862: step: 362/466, loss: 0.0011146770557388663 2023-01-22 14:06:57.523877: step: 364/466, loss: 0.16300763189792633 2023-01-22 14:06:58.125058: step: 366/466, loss: 0.00569057185202837 2023-01-22 14:06:58.765690: step: 368/466, loss: 0.04630326107144356 2023-01-22 14:06:59.387050: step: 370/466, loss: 0.00736673828214407 2023-01-22 14:07:00.012203: step: 372/466, loss: 0.24240000545978546 2023-01-22 14:07:00.577464: step: 374/466, loss: 0.00043700882815755904 2023-01-22 14:07:01.202034: step: 376/466, loss: 0.003471532603725791 2023-01-22 14:07:01.837707: step: 378/466, loss: 0.013358261436223984 2023-01-22 14:07:02.506314: step: 380/466, loss: 0.04184216260910034 2023-01-22 14:07:03.088918: step: 382/466, loss: 0.006497319787740707 2023-01-22 14:07:03.720100: step: 384/466, loss: 0.01057523861527443 2023-01-22 14:07:04.338983: step: 386/466, loss: 0.00025049291434697807 2023-01-22 14:07:04.942878: step: 388/466, loss: 0.09662456810474396 2023-01-22 14:07:05.642499: step: 390/466, loss: 0.0034395684488117695 2023-01-22 14:07:06.227940: step: 392/466, loss: 0.04491157457232475 2023-01-22 14:07:06.876817: step: 394/466, loss: 0.00925894919782877 2023-01-22 14:07:07.572701: step: 396/466, loss: 0.008593680337071419 2023-01-22 14:07:08.134010: step: 398/466, loss: 0.000266447284957394 2023-01-22 14:07:08.823452: step: 400/466, loss: 0.0008228529477491975 2023-01-22 14:07:09.328948: step: 402/466, loss: 0.0007457354222424328 2023-01-22 14:07:09.931892: step: 404/466, loss: 0.0006555092404596508 2023-01-22 14:07:10.533698: step: 406/466, loss: 0.013119361363351345 2023-01-22 14:07:11.167138: step: 408/466, loss: 0.004940561484545469 2023-01-22 14:07:11.804489: step: 410/466, loss: 0.0086433170363307 2023-01-22 14:07:12.394983: step: 412/466, loss: 0.00023552594939246774 2023-01-22 14:07:12.985633: step: 414/466, loss: 0.01769222877919674 2023-01-22 14:07:13.572749: step: 416/466, loss: 0.003074225503951311 2023-01-22 14:07:14.143128: step: 418/466, loss: 0.0005861958488821983 2023-01-22 14:07:14.806907: step: 420/466, loss: 0.012538060545921326 2023-01-22 14:07:15.365554: step: 422/466, loss: 0.06709732115268707 2023-01-22 14:07:15.949292: step: 424/466, loss: 0.021091319620609283 2023-01-22 14:07:16.587776: step: 426/466, loss: 0.006403537467122078 2023-01-22 14:07:17.246944: step: 428/466, loss: 0.03239838406443596 2023-01-22 14:07:17.826878: step: 430/466, loss: 0.0004718708514701575 2023-01-22 14:07:18.411891: step: 432/466, loss: 0.04298185929656029 2023-01-22 14:07:19.025006: step: 434/466, loss: 0.001556994509883225 2023-01-22 14:07:19.665921: step: 436/466, loss: 0.0026232635136693716 2023-01-22 14:07:20.279055: step: 438/466, loss: 0.0005475019570440054 2023-01-22 14:07:20.909364: step: 440/466, loss: 0.00032511187600903213 2023-01-22 14:07:21.535420: step: 442/466, loss: 0.030677784234285355 2023-01-22 14:07:22.135325: step: 444/466, loss: 0.028117796406149864 2023-01-22 14:07:22.755429: step: 446/466, loss: 0.008695948868989944 2023-01-22 14:07:23.361463: step: 448/466, loss: 0.009362148120999336 2023-01-22 14:07:23.914529: step: 450/466, loss: 0.0016575742047280073 2023-01-22 14:07:24.526846: step: 452/466, loss: 0.04321468248963356 2023-01-22 14:07:25.165533: step: 454/466, loss: 0.02245822176337242 2023-01-22 14:07:25.812836: step: 456/466, loss: 0.006367537658661604 2023-01-22 14:07:26.420321: step: 458/466, loss: 0.014981807209551334 2023-01-22 14:07:26.983244: step: 460/466, loss: 0.015233676880598068 2023-01-22 14:07:27.610104: step: 462/466, loss: 0.0007042231736704707 2023-01-22 14:07:28.307859: step: 464/466, loss: 0.011992243118584156 2023-01-22 14:07:28.873331: step: 466/466, loss: 0.07552166283130646 2023-01-22 14:07:29.433427: step: 468/466, loss: 0.0010221432894468307 2023-01-22 14:07:30.089630: step: 470/466, loss: 0.002009578747674823 2023-01-22 14:07:30.688621: step: 472/466, loss: 0.00045798145583830774 2023-01-22 14:07:31.326837: step: 474/466, loss: 0.0030494979582726955 2023-01-22 14:07:31.987757: step: 476/466, loss: 0.07997559756040573 2023-01-22 14:07:32.542145: step: 478/466, loss: 0.0004329228540882468 2023-01-22 14:07:33.107661: step: 480/466, loss: 0.0002875140926335007 2023-01-22 14:07:33.657631: step: 482/466, loss: 0.039382219314575195 2023-01-22 14:07:34.294982: step: 484/466, loss: 0.029435431584715843 2023-01-22 14:07:34.926344: step: 486/466, loss: 0.008995798416435719 2023-01-22 14:07:35.536441: step: 488/466, loss: 0.0009553866693750024 2023-01-22 14:07:36.093644: step: 490/466, loss: 0.0010284394957125187 2023-01-22 14:07:36.682578: step: 492/466, loss: 0.0042778304778039455 2023-01-22 14:07:37.271875: step: 494/466, loss: 5.903772034798749e-05 2023-01-22 14:07:37.867303: step: 496/466, loss: 0.0021234445739537477 2023-01-22 14:07:38.402362: step: 498/466, loss: 0.01050239522010088 2023-01-22 14:07:39.047626: step: 500/466, loss: 0.008695174008607864 2023-01-22 14:07:39.621791: step: 502/466, loss: 0.004072991665452719 2023-01-22 14:07:40.272700: step: 504/466, loss: 0.002067763125523925 2023-01-22 14:07:40.862348: step: 506/466, loss: 0.02591841109097004 2023-01-22 14:07:41.462371: step: 508/466, loss: 0.012583310715854168 2023-01-22 14:07:42.042963: step: 510/466, loss: 0.009463530965149403 2023-01-22 14:07:42.656783: step: 512/466, loss: 0.0017065029824152589 2023-01-22 14:07:43.286672: step: 514/466, loss: 0.28508394956588745 2023-01-22 14:07:43.906926: step: 516/466, loss: 0.002218435751274228 2023-01-22 14:07:44.535244: step: 518/466, loss: 0.031737249344587326 2023-01-22 14:07:45.104943: step: 520/466, loss: 0.004431023262441158 2023-01-22 14:07:45.767434: step: 522/466, loss: 0.002075351309031248 2023-01-22 14:07:46.320905: step: 524/466, loss: 0.027918415144085884 2023-01-22 14:07:46.893127: step: 526/466, loss: 0.0009299259399995208 2023-01-22 14:07:47.471246: step: 528/466, loss: 4.171831096755341e-05 2023-01-22 14:07:48.073095: step: 530/466, loss: 0.00861969031393528 2023-01-22 14:07:48.598811: step: 532/466, loss: 0.000704513571690768 2023-01-22 14:07:49.211331: step: 534/466, loss: 0.023732317611575127 2023-01-22 14:07:49.849149: step: 536/466, loss: 0.006008785683661699 2023-01-22 14:07:50.428250: step: 538/466, loss: 0.0002596065169200301 2023-01-22 14:07:51.105766: step: 540/466, loss: 0.013545497320592403 2023-01-22 14:07:51.694991: step: 542/466, loss: 0.00020320458861533552 2023-01-22 14:07:52.323521: step: 544/466, loss: 0.002717326395213604 2023-01-22 14:07:52.943768: step: 546/466, loss: 0.03454839065670967 2023-01-22 14:07:53.634324: step: 548/466, loss: 0.005441261455416679 2023-01-22 14:07:54.239533: step: 550/466, loss: 0.00033432990312576294 2023-01-22 14:07:54.880649: step: 552/466, loss: 0.0008576879044994712 2023-01-22 14:07:55.561530: step: 554/466, loss: 0.01396965142339468 2023-01-22 14:07:56.103872: step: 556/466, loss: 0.00013199001841712743 2023-01-22 14:07:56.684142: step: 558/466, loss: 0.0009953243425115943 2023-01-22 14:07:57.292045: step: 560/466, loss: 0.0013477897737175226 2023-01-22 14:07:57.938310: step: 562/466, loss: 0.006996339187026024 2023-01-22 14:07:58.475107: step: 564/466, loss: 0.004686971195042133 2023-01-22 14:07:59.132846: step: 566/466, loss: 0.0018786239670589566 2023-01-22 14:07:59.726092: step: 568/466, loss: 0.001708917785435915 2023-01-22 14:08:00.397914: step: 570/466, loss: 0.00029886234551668167 2023-01-22 14:08:00.962143: step: 572/466, loss: 0.0005396566120907664 2023-01-22 14:08:01.589146: step: 574/466, loss: 0.0022396999411284924 2023-01-22 14:08:02.189069: step: 576/466, loss: 0.039275556802749634 2023-01-22 14:08:02.802701: step: 578/466, loss: 0.00020643284369725734 2023-01-22 14:08:03.447409: step: 580/466, loss: 0.004480894189327955 2023-01-22 14:08:04.131305: step: 582/466, loss: 0.14224693179130554 2023-01-22 14:08:04.654319: step: 584/466, loss: 0.005192149896174669 2023-01-22 14:08:05.336324: step: 586/466, loss: 0.014921769499778748 2023-01-22 14:08:05.935070: step: 588/466, loss: 0.04195178672671318 2023-01-22 14:08:06.628796: step: 590/466, loss: 0.021067453548312187 2023-01-22 14:08:07.247684: step: 592/466, loss: 0.0013888038229197264 2023-01-22 14:08:07.823732: step: 594/466, loss: 9.107340883929282e-05 2023-01-22 14:08:08.385237: step: 596/466, loss: 0.039545539766550064 2023-01-22 14:08:09.000927: step: 598/466, loss: 0.0014226339990273118 2023-01-22 14:08:09.603473: step: 600/466, loss: 0.011670433916151524 2023-01-22 14:08:10.202605: step: 602/466, loss: 0.03440529480576515 2023-01-22 14:08:10.844533: step: 604/466, loss: 0.010719393379986286 2023-01-22 14:08:11.413042: step: 606/466, loss: 0.0003759894461836666 2023-01-22 14:08:11.998657: step: 608/466, loss: 0.028834575787186623 2023-01-22 14:08:12.586356: step: 610/466, loss: 0.014594810083508492 2023-01-22 14:08:13.211322: step: 612/466, loss: 0.001817152719013393 2023-01-22 14:08:13.730784: step: 614/466, loss: 0.027273844927549362 2023-01-22 14:08:14.376204: step: 616/466, loss: 0.016842560842633247 2023-01-22 14:08:15.020461: step: 618/466, loss: 0.005247347056865692 2023-01-22 14:08:15.683935: step: 620/466, loss: 0.05189330130815506 2023-01-22 14:08:16.294272: step: 622/466, loss: 0.002156839007511735 2023-01-22 14:08:16.952958: step: 624/466, loss: 0.037101082503795624 2023-01-22 14:08:17.540068: step: 626/466, loss: 0.021021481603384018 2023-01-22 14:08:18.152644: step: 628/466, loss: 0.011911360546946526 2023-01-22 14:08:18.778734: step: 630/466, loss: 0.3343954086303711 2023-01-22 14:08:19.415352: step: 632/466, loss: 0.0003303734411019832 2023-01-22 14:08:20.052231: step: 634/466, loss: 0.0020633211825042963 2023-01-22 14:08:20.643783: step: 636/466, loss: 0.02070808969438076 2023-01-22 14:08:21.254076: step: 638/466, loss: 0.6909323930740356 2023-01-22 14:08:21.827958: step: 640/466, loss: 0.030152659863233566 2023-01-22 14:08:22.421532: step: 642/466, loss: 0.009435637854039669 2023-01-22 14:08:23.056983: step: 644/466, loss: 0.022851625457406044 2023-01-22 14:08:23.645322: step: 646/466, loss: 0.004921406973153353 2023-01-22 14:08:24.360263: step: 648/466, loss: 0.007689262740314007 2023-01-22 14:08:24.900399: step: 650/466, loss: 1.8792456103255972e-05 2023-01-22 14:08:25.485455: step: 652/466, loss: 0.001837368356063962 2023-01-22 14:08:26.134483: step: 654/466, loss: 0.5769074559211731 2023-01-22 14:08:26.737654: step: 656/466, loss: 0.025163520127534866 2023-01-22 14:08:27.328940: step: 658/466, loss: 0.3724534213542938 2023-01-22 14:08:27.903332: step: 660/466, loss: 0.006495846901088953 2023-01-22 14:08:28.536839: step: 662/466, loss: 0.737528383731842 2023-01-22 14:08:29.114367: step: 664/466, loss: 0.030375540256500244 2023-01-22 14:08:29.674482: step: 666/466, loss: 6.750689499313012e-05 2023-01-22 14:08:30.188065: step: 668/466, loss: 0.0002596633567009121 2023-01-22 14:08:30.807348: step: 670/466, loss: 0.00030843622516840696 2023-01-22 14:08:31.372304: step: 672/466, loss: 0.003468897892162204 2023-01-22 14:08:32.002117: step: 674/466, loss: 1.4518646001815796 2023-01-22 14:08:32.628788: step: 676/466, loss: 0.003203595755621791 2023-01-22 14:08:33.396967: step: 678/466, loss: 0.0002415669005131349 2023-01-22 14:08:33.975508: step: 680/466, loss: 0.1821751594543457 2023-01-22 14:08:34.560807: step: 682/466, loss: 0.022470567375421524 2023-01-22 14:08:35.163113: step: 684/466, loss: 0.0009556738659739494 2023-01-22 14:08:35.742500: step: 686/466, loss: 0.008491684682667255 2023-01-22 14:08:36.439646: step: 688/466, loss: 0.10258602350950241 2023-01-22 14:08:36.994406: step: 690/466, loss: 0.0031227143481373787 2023-01-22 14:08:37.590409: step: 692/466, loss: 0.0009964940836653113 2023-01-22 14:08:38.144496: step: 694/466, loss: 0.001099542947486043 2023-01-22 14:08:38.799442: step: 696/466, loss: 0.011481222696602345 2023-01-22 14:08:39.451437: step: 698/466, loss: 3.475100517272949 2023-01-22 14:08:40.077042: step: 700/466, loss: 0.002428211271762848 2023-01-22 14:08:40.730923: step: 702/466, loss: 0.00325836637057364 2023-01-22 14:08:41.336819: step: 704/466, loss: 0.06448805332183838 2023-01-22 14:08:41.908819: step: 706/466, loss: 19.545780181884766 2023-01-22 14:08:42.508861: step: 708/466, loss: 0.00236521870829165 2023-01-22 14:08:43.150752: step: 710/466, loss: 0.007757192011922598 2023-01-22 14:08:43.775318: step: 712/466, loss: 0.47919291257858276 2023-01-22 14:08:44.389427: step: 714/466, loss: 0.06101994961500168 2023-01-22 14:08:45.032826: step: 716/466, loss: 0.002332099014893174 2023-01-22 14:08:45.658933: step: 718/466, loss: 2.3008902644505724e-05 2023-01-22 14:08:46.199978: step: 720/466, loss: 0.015303296968340874 2023-01-22 14:08:46.820416: step: 722/466, loss: 0.009421579539775848 2023-01-22 14:08:47.351658: step: 724/466, loss: 3.652509258245118e-05 2023-01-22 14:08:47.964100: step: 726/466, loss: 0.008834085427224636 2023-01-22 14:08:48.514975: step: 728/466, loss: 4.781947791343555e-05 2023-01-22 14:08:49.116302: step: 730/466, loss: 0.004869956523180008 2023-01-22 14:08:49.673188: step: 732/466, loss: 0.0074061122722923756 2023-01-22 14:08:50.302307: step: 734/466, loss: 0.001221657614223659 2023-01-22 14:08:50.912321: step: 736/466, loss: 0.026640279218554497 2023-01-22 14:08:51.494300: step: 738/466, loss: 0.0030338140204548836 2023-01-22 14:08:52.151259: step: 740/466, loss: 0.03298734873533249 2023-01-22 14:08:52.714592: step: 742/466, loss: 0.06399720907211304 2023-01-22 14:08:53.327016: step: 744/466, loss: 0.01769380457699299 2023-01-22 14:08:53.928007: step: 746/466, loss: 0.030083784833550453 2023-01-22 14:08:54.558631: step: 748/466, loss: 0.001962339971214533 2023-01-22 14:08:55.129693: step: 750/466, loss: 0.009762940928339958 2023-01-22 14:08:55.690598: step: 752/466, loss: 0.005155081860721111 2023-01-22 14:08:56.305254: step: 754/466, loss: 0.03969765082001686 2023-01-22 14:08:56.951187: step: 756/466, loss: 0.00046790673513896763 2023-01-22 14:08:57.519569: step: 758/466, loss: 9.174644947052002e-05 2023-01-22 14:08:58.105659: step: 760/466, loss: 0.001274529262445867 2023-01-22 14:08:58.751842: step: 762/466, loss: 0.009623071178793907 2023-01-22 14:08:59.391797: step: 764/466, loss: 0.0006088234367780387 2023-01-22 14:09:00.058595: step: 766/466, loss: 0.005249843932688236 2023-01-22 14:09:00.607468: step: 768/466, loss: 0.00038335684803314507 2023-01-22 14:09:01.228841: step: 770/466, loss: 0.015080186538398266 2023-01-22 14:09:01.831280: step: 772/466, loss: 0.00324824801646173 2023-01-22 14:09:02.430636: step: 774/466, loss: 0.0041448757983744144 2023-01-22 14:09:03.081208: step: 776/466, loss: 0.0011435933411121368 2023-01-22 14:09:03.708167: step: 778/466, loss: 0.0011415882036089897 2023-01-22 14:09:04.294014: step: 780/466, loss: 0.0017376645701006055 2023-01-22 14:09:05.026569: step: 782/466, loss: 0.06268323957920074 2023-01-22 14:09:05.620810: step: 784/466, loss: 0.04164966568350792 2023-01-22 14:09:06.299841: step: 786/466, loss: 0.05158247798681259 2023-01-22 14:09:06.867319: step: 788/466, loss: 0.28123775124549866 2023-01-22 14:09:07.431609: step: 790/466, loss: 0.002174936467781663 2023-01-22 14:09:08.098154: step: 792/466, loss: 0.009450436569750309 2023-01-22 14:09:08.746892: step: 794/466, loss: 0.00193881057202816 2023-01-22 14:09:09.379457: step: 796/466, loss: 0.009928989224135876 2023-01-22 14:09:09.941383: step: 798/466, loss: 2.243615199404303e-06 2023-01-22 14:09:10.667461: step: 800/466, loss: 0.001064679236151278 2023-01-22 14:09:11.278036: step: 802/466, loss: 0.0488688126206398 2023-01-22 14:09:11.843308: step: 804/466, loss: 0.0025524573866277933 2023-01-22 14:09:12.438754: step: 806/466, loss: 0.010675763711333275 2023-01-22 14:09:13.043239: step: 808/466, loss: 0.00010395424033049494 2023-01-22 14:09:13.633792: step: 810/466, loss: 0.0003952126717194915 2023-01-22 14:09:14.233021: step: 812/466, loss: 0.04357978701591492 2023-01-22 14:09:14.840536: step: 814/466, loss: 0.0015918684657663107 2023-01-22 14:09:15.434831: step: 816/466, loss: 0.004090688657015562 2023-01-22 14:09:16.042946: step: 818/466, loss: 2.7879799745278433e-05 2023-01-22 14:09:16.644429: step: 820/466, loss: 0.010990914888679981 2023-01-22 14:09:17.243920: step: 822/466, loss: 0.006704024970531464 2023-01-22 14:09:17.864171: step: 824/466, loss: 0.014601575210690498 2023-01-22 14:09:18.445844: step: 826/466, loss: 0.01189486589282751 2023-01-22 14:09:19.046236: step: 828/466, loss: 0.1457836776971817 2023-01-22 14:09:19.623618: step: 830/466, loss: 6.903712346684188e-06 2023-01-22 14:09:20.233921: step: 832/466, loss: 0.000658975972328335 2023-01-22 14:09:20.843142: step: 834/466, loss: 0.004803813993930817 2023-01-22 14:09:21.441796: step: 836/466, loss: 0.07124597579240799 2023-01-22 14:09:22.033124: step: 838/466, loss: 0.00013458853936754167 2023-01-22 14:09:22.632275: step: 840/466, loss: 0.016244329512119293 2023-01-22 14:09:23.338524: step: 842/466, loss: 0.027946149930357933 2023-01-22 14:09:23.945401: step: 844/466, loss: 0.002878320636227727 2023-01-22 14:09:24.535611: step: 846/466, loss: 0.0013908261898905039 2023-01-22 14:09:25.118339: step: 848/466, loss: 3.669026045827195e-05 2023-01-22 14:09:25.713677: step: 850/466, loss: 0.014631629921495914 2023-01-22 14:09:26.289128: step: 852/466, loss: 0.038550667464733124 2023-01-22 14:09:26.854126: step: 854/466, loss: 0.0003395678650122136 2023-01-22 14:09:27.483328: step: 856/466, loss: 0.015413991175591946 2023-01-22 14:09:28.076524: step: 858/466, loss: 0.5240403413772583 2023-01-22 14:09:28.799643: step: 860/466, loss: 0.04657680168747902 2023-01-22 14:09:29.433834: step: 862/466, loss: 0.000312518939608708 2023-01-22 14:09:30.094270: step: 864/466, loss: 0.011041994206607342 2023-01-22 14:09:30.726400: step: 866/466, loss: 0.005065944045782089 2023-01-22 14:09:31.339176: step: 868/466, loss: 0.11262018233537674 2023-01-22 14:09:31.905681: step: 870/466, loss: 0.00018041301518678665 2023-01-22 14:09:32.503235: step: 872/466, loss: 0.0028136800974607468 2023-01-22 14:09:33.088772: step: 874/466, loss: 0.0003731518518179655 2023-01-22 14:09:33.707920: step: 876/466, loss: 0.03876349702477455 2023-01-22 14:09:34.402250: step: 878/466, loss: 0.00015004277520347387 2023-01-22 14:09:35.074515: step: 880/466, loss: 0.08454664051532745 2023-01-22 14:09:35.635471: step: 882/466, loss: 0.0009456683765165508 2023-01-22 14:09:36.292454: step: 884/466, loss: 0.0033473100047558546 2023-01-22 14:09:36.940020: step: 886/466, loss: 0.005513956304639578 2023-01-22 14:09:37.562615: step: 888/466, loss: 0.018945837393403053 2023-01-22 14:09:38.140570: step: 890/466, loss: 0.000268537609372288 2023-01-22 14:09:38.798259: step: 892/466, loss: 0.0008538711117580533 2023-01-22 14:09:39.403325: step: 894/466, loss: 0.049071550369262695 2023-01-22 14:09:40.054701: step: 896/466, loss: 0.023004358634352684 2023-01-22 14:09:40.675004: step: 898/466, loss: 0.03628868982195854 2023-01-22 14:09:41.354888: step: 900/466, loss: 0.00012156509910710156 2023-01-22 14:09:41.978273: step: 902/466, loss: 0.0007333770045079291 2023-01-22 14:09:42.574528: step: 904/466, loss: 0.012230328284204006 2023-01-22 14:09:43.231983: step: 906/466, loss: 0.021114924922585487 2023-01-22 14:09:43.840997: step: 908/466, loss: 0.0005755637539550662 2023-01-22 14:09:44.448607: step: 910/466, loss: 0.001959437970072031 2023-01-22 14:09:45.079829: step: 912/466, loss: 0.003534089308232069 2023-01-22 14:09:45.833500: step: 914/466, loss: 0.025242313742637634 2023-01-22 14:09:46.470027: step: 916/466, loss: 0.0010080791544169188 2023-01-22 14:09:47.060454: step: 918/466, loss: 0.002278506988659501 2023-01-22 14:09:47.708447: step: 920/466, loss: 9.028924978338182e-05 2023-01-22 14:09:48.319816: step: 922/466, loss: 0.004241098649799824 2023-01-22 14:09:48.957817: step: 924/466, loss: 0.02510761469602585 2023-01-22 14:09:49.553895: step: 926/466, loss: 0.0008595963008701801 2023-01-22 14:09:50.212221: step: 928/466, loss: 0.004858372732996941 2023-01-22 14:09:50.797063: step: 930/466, loss: 0.0005197401624172926 2023-01-22 14:09:51.418324: step: 932/466, loss: 0.03285631164908409 ================================================== Loss: 0.083 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3153392847861116, 'r': 0.34884782358691285, 'f1': 0.33124829374829384}, 'combined': 0.24407769013032177, 'epoch': 37} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.35159523079414584, 'r': 0.3384828327679982, 'f1': 0.34491445530844184}, 'combined': 0.22875155585223084, 'epoch': 37} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3073424796747968, 'r': 0.2863873106060606, 'f1': 0.2964950980392157}, 'combined': 0.19766339869281047, 'epoch': 37} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3617907429020613, 'r': 0.31037507406675974, 'f1': 0.3341164510009708}, 'combined': 0.2180549469690546, 'epoch': 37} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3011744296340672, 'r': 0.34689350813639236, 'f1': 0.3224213029768585}, 'combined': 0.23757359166715888, 'epoch': 37} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.35174068312933854, 'r': 0.3298139911940032, 'f1': 0.34042462898040543}, 'combined': 0.22577384719943985, 'epoch': 37} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27721088435374147, 'r': 0.38809523809523805, 'f1': 0.3234126984126983}, 'combined': 0.21560846560846553, 'epoch': 37} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45454545454545453, 'r': 0.43478260869565216, 'f1': 0.4444444444444445}, 'combined': 0.2962962962962963, 'epoch': 37} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3194444444444444, 'r': 0.19827586206896552, 'f1': 0.24468085106382978}, 'combined': 0.1631205673758865, 'epoch': 37} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29515095352204085, 'r': 0.3399556523489161, 'f1': 0.3159728902784459}, 'combined': 0.23282212967885485, 'epoch': 26} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32557465157002546, 'r': 0.326702182830874, 'f1': 0.3261374426704058}, 'combined': 0.2162984075741551, 'epoch': 26} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39473684210526316, 'r': 0.25862068965517243, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 26} ****************************** Epoch: 38 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:12:19.077864: step: 2/466, loss: 0.021889768540859222 2023-01-22 14:12:19.655914: step: 4/466, loss: 0.06989212334156036 2023-01-22 14:12:20.276447: step: 6/466, loss: 0.004475723020732403 2023-01-22 14:12:20.824633: step: 8/466, loss: 0.054539233446121216 2023-01-22 14:12:21.536647: step: 10/466, loss: 0.04412826523184776 2023-01-22 14:12:22.131166: step: 12/466, loss: 0.0008282770286314189 2023-01-22 14:12:22.697252: step: 14/466, loss: 0.00038537505315616727 2023-01-22 14:12:23.361204: step: 16/466, loss: 0.005497610196471214 2023-01-22 14:12:23.925635: step: 18/466, loss: 0.0009423164883628488 2023-01-22 14:12:24.537115: step: 20/466, loss: 0.02297457680106163 2023-01-22 14:12:25.141268: step: 22/466, loss: 0.041743602603673935 2023-01-22 14:12:25.714054: step: 24/466, loss: 0.0004014494188595563 2023-01-22 14:12:26.318069: step: 26/466, loss: 0.003360750386491418 2023-01-22 14:12:26.944112: step: 28/466, loss: 0.001229410874657333 2023-01-22 14:12:27.583682: step: 30/466, loss: 0.007953322492539883 2023-01-22 14:12:28.197031: step: 32/466, loss: 0.5298741459846497 2023-01-22 14:12:28.866161: step: 34/466, loss: 0.0012196226743981242 2023-01-22 14:12:29.496257: step: 36/466, loss: 0.23362146317958832 2023-01-22 14:12:30.079321: step: 38/466, loss: 0.002468062797561288 2023-01-22 14:12:30.743813: step: 40/466, loss: 0.06696293503046036 2023-01-22 14:12:31.372211: step: 42/466, loss: 0.013891519047319889 2023-01-22 14:12:32.069873: step: 44/466, loss: 6.297170330071822e-05 2023-01-22 14:12:32.668255: step: 46/466, loss: 0.06854761391878128 2023-01-22 14:12:33.236205: step: 48/466, loss: 0.11188627034425735 2023-01-22 14:12:33.831702: step: 50/466, loss: 0.002684258623048663 2023-01-22 14:12:34.470964: step: 52/466, loss: 2.0872628283541417e-06 2023-01-22 14:12:35.085159: step: 54/466, loss: 0.03521451726555824 2023-01-22 14:12:35.707464: step: 56/466, loss: 0.0017311619594693184 2023-01-22 14:12:36.278509: step: 58/466, loss: 0.006770215928554535 2023-01-22 14:12:36.913511: step: 60/466, loss: 0.0028816615231335163 2023-01-22 14:12:37.540665: step: 62/466, loss: 0.0008021043031476438 2023-01-22 14:12:38.177123: step: 64/466, loss: 0.0010710136266425252 2023-01-22 14:12:38.831470: step: 66/466, loss: 0.00023852888261899352 2023-01-22 14:12:39.451130: step: 68/466, loss: 0.00030558169237338006 2023-01-22 14:12:40.076067: step: 70/466, loss: 0.0077896034345030785 2023-01-22 14:12:40.619507: step: 72/466, loss: 0.0009368745377287269 2023-01-22 14:12:41.261659: step: 74/466, loss: 0.0009904556209221482 2023-01-22 14:12:41.802738: step: 76/466, loss: 0.00010989147267537192 2023-01-22 14:12:42.400319: step: 78/466, loss: 0.02084057591855526 2023-01-22 14:12:43.029098: step: 80/466, loss: 0.4380775988101959 2023-01-22 14:12:43.650339: step: 82/466, loss: 0.010385693050920963 2023-01-22 14:12:44.305621: step: 84/466, loss: 0.010272561572492123 2023-01-22 14:12:44.888751: step: 86/466, loss: 0.0013637507800012827 2023-01-22 14:12:45.420070: step: 88/466, loss: 0.000447495753178373 2023-01-22 14:12:46.063340: step: 90/466, loss: 0.000879900180734694 2023-01-22 14:12:46.647913: step: 92/466, loss: 0.03361436724662781 2023-01-22 14:12:47.289121: step: 94/466, loss: 0.0165041983127594 2023-01-22 14:12:47.906610: step: 96/466, loss: 0.022173665463924408 2023-01-22 14:12:48.478483: step: 98/466, loss: 0.005095276981592178 2023-01-22 14:12:49.029838: step: 100/466, loss: 0.00683171022683382 2023-01-22 14:12:49.643614: step: 102/466, loss: 0.01334309671074152 2023-01-22 14:12:50.253011: step: 104/466, loss: 5.372382656787522e-05 2023-01-22 14:12:50.904495: step: 106/466, loss: 0.0027603597845882177 2023-01-22 14:12:51.480316: step: 108/466, loss: 0.046707384288311005 2023-01-22 14:12:52.078372: step: 110/466, loss: 6.69806604491896e-06 2023-01-22 14:12:52.682106: step: 112/466, loss: 0.0007423114730045199 2023-01-22 14:12:53.309204: step: 114/466, loss: 0.002536195795983076 2023-01-22 14:12:53.922754: step: 116/466, loss: 0.0004711195360869169 2023-01-22 14:12:54.455052: step: 118/466, loss: 0.010566012002527714 2023-01-22 14:12:55.102969: step: 120/466, loss: 0.004253188613802195 2023-01-22 14:12:55.721189: step: 122/466, loss: 0.06324297934770584 2023-01-22 14:12:56.313905: step: 124/466, loss: 0.015077656134963036 2023-01-22 14:12:56.974637: step: 126/466, loss: 0.0012931502424180508 2023-01-22 14:12:57.602475: step: 128/466, loss: 0.010562596842646599 2023-01-22 14:12:58.226118: step: 130/466, loss: 0.13915599882602692 2023-01-22 14:12:58.844419: step: 132/466, loss: 0.016481924802064896 2023-01-22 14:12:59.436812: step: 134/466, loss: 0.019297005608677864 2023-01-22 14:13:00.089322: step: 136/466, loss: 0.06798158586025238 2023-01-22 14:13:00.727094: step: 138/466, loss: 0.02616349793970585 2023-01-22 14:13:01.349969: step: 140/466, loss: 0.06994859874248505 2023-01-22 14:13:01.977703: step: 142/466, loss: 0.01263127289712429 2023-01-22 14:13:02.615555: step: 144/466, loss: 0.028079643845558167 2023-01-22 14:13:03.181353: step: 146/466, loss: 0.00032038360950537026 2023-01-22 14:13:03.796050: step: 148/466, loss: 0.039556972682476044 2023-01-22 14:13:04.361839: step: 150/466, loss: 2.667648186616134e-05 2023-01-22 14:13:04.967585: step: 152/466, loss: 0.00024154878337867558 2023-01-22 14:13:05.697334: step: 154/466, loss: 0.010044950991868973 2023-01-22 14:13:06.250001: step: 156/466, loss: 0.00782605167478323 2023-01-22 14:13:06.854797: step: 158/466, loss: 0.00023160962155088782 2023-01-22 14:13:07.401145: step: 160/466, loss: 0.056918852031230927 2023-01-22 14:13:07.965105: step: 162/466, loss: 0.0318228043615818 2023-01-22 14:13:08.566046: step: 164/466, loss: 0.0011338344775140285 2023-01-22 14:13:09.158925: step: 166/466, loss: 0.028528816998004913 2023-01-22 14:13:09.747381: step: 168/466, loss: 0.0003143611247651279 2023-01-22 14:13:10.307939: step: 170/466, loss: 0.02056513912975788 2023-01-22 14:13:10.911443: step: 172/466, loss: 0.0010503178928047419 2023-01-22 14:13:11.521453: step: 174/466, loss: 0.025755485519766808 2023-01-22 14:13:12.165329: step: 176/466, loss: 3.9795515476725996e-05 2023-01-22 14:13:12.803713: step: 178/466, loss: 0.031630728393793106 2023-01-22 14:13:13.404120: step: 180/466, loss: 0.061293408274650574 2023-01-22 14:13:13.986302: step: 182/466, loss: 0.010040693916380405 2023-01-22 14:13:14.582459: step: 184/466, loss: 1.6368253231048584 2023-01-22 14:13:15.144778: step: 186/466, loss: 0.00037069921381771564 2023-01-22 14:13:15.741846: step: 188/466, loss: 0.0012840436538681388 2023-01-22 14:13:16.313246: step: 190/466, loss: 0.001338571310043335 2023-01-22 14:13:16.954454: step: 192/466, loss: 0.013955621048808098 2023-01-22 14:13:17.537702: step: 194/466, loss: 0.04023696482181549 2023-01-22 14:13:18.177228: step: 196/466, loss: 0.0016070693964138627 2023-01-22 14:13:18.706440: step: 198/466, loss: 0.0025192557368427515 2023-01-22 14:13:19.335431: step: 200/466, loss: 0.0037205498665571213 2023-01-22 14:13:19.973793: step: 202/466, loss: 0.008088977076113224 2023-01-22 14:13:20.574767: step: 204/466, loss: 0.006108959671109915 2023-01-22 14:13:21.148940: step: 206/466, loss: 0.0037351585924625397 2023-01-22 14:13:21.723296: step: 208/466, loss: 0.04453587904572487 2023-01-22 14:13:22.280651: step: 210/466, loss: 0.00010247220052406192 2023-01-22 14:13:22.930105: step: 212/466, loss: 0.07220045477151871 2023-01-22 14:13:23.529891: step: 214/466, loss: 0.03526448830962181 2023-01-22 14:13:24.115190: step: 216/466, loss: 0.0002246480726171285 2023-01-22 14:13:24.702047: step: 218/466, loss: 0.16548627614974976 2023-01-22 14:13:25.267889: step: 220/466, loss: 0.001846380764618516 2023-01-22 14:13:25.867789: step: 222/466, loss: 0.03966742008924484 2023-01-22 14:13:26.503112: step: 224/466, loss: 0.0020343000069260597 2023-01-22 14:13:27.077865: step: 226/466, loss: 0.005603497382253408 2023-01-22 14:13:27.624003: step: 228/466, loss: 0.010272718966007233 2023-01-22 14:13:28.238421: step: 230/466, loss: 0.035140909254550934 2023-01-22 14:13:28.829996: step: 232/466, loss: 0.008486488834023476 2023-01-22 14:13:29.353377: step: 234/466, loss: 0.004173364490270615 2023-01-22 14:13:29.939239: step: 236/466, loss: 0.004337500315159559 2023-01-22 14:13:30.547097: step: 238/466, loss: 0.006094334181398153 2023-01-22 14:13:31.258373: step: 240/466, loss: 0.007469480391591787 2023-01-22 14:13:31.830321: step: 242/466, loss: 0.0025166794657707214 2023-01-22 14:13:32.423382: step: 244/466, loss: 0.04684692248702049 2023-01-22 14:13:33.047277: step: 246/466, loss: 0.014013230800628662 2023-01-22 14:13:33.657957: step: 248/466, loss: 0.0654238760471344 2023-01-22 14:13:34.189255: step: 250/466, loss: 0.0053533343598246574 2023-01-22 14:13:34.810876: step: 252/466, loss: 0.00657287985086441 2023-01-22 14:13:35.348326: step: 254/466, loss: 0.0019773482345044613 2023-01-22 14:13:35.947568: step: 256/466, loss: 0.009218761697411537 2023-01-22 14:13:36.548781: step: 258/466, loss: 0.0005720287445001304 2023-01-22 14:13:37.143604: step: 260/466, loss: 0.0006210109568201005 2023-01-22 14:13:37.784750: step: 262/466, loss: 0.004660551901906729 2023-01-22 14:13:38.488362: step: 264/466, loss: 0.004938114434480667 2023-01-22 14:13:39.096877: step: 266/466, loss: 0.0023985709995031357 2023-01-22 14:13:39.684643: step: 268/466, loss: 0.0012294613989070058 2023-01-22 14:13:40.300003: step: 270/466, loss: 0.0014288395177572966 2023-01-22 14:13:40.982825: step: 272/466, loss: 0.0003633807064034045 2023-01-22 14:13:41.603402: step: 274/466, loss: 0.008363694883883 2023-01-22 14:13:42.241622: step: 276/466, loss: 0.009151811711490154 2023-01-22 14:13:42.850804: step: 278/466, loss: 0.005131376441568136 2023-01-22 14:13:43.452627: step: 280/466, loss: 0.0008800456416793168 2023-01-22 14:13:44.068863: step: 282/466, loss: 0.00010396930883871391 2023-01-22 14:13:44.657655: step: 284/466, loss: 0.002452635671943426 2023-01-22 14:13:45.214553: step: 286/466, loss: 0.010570264421403408 2023-01-22 14:13:45.809140: step: 288/466, loss: 0.05791933462023735 2023-01-22 14:13:46.363462: step: 290/466, loss: 0.0001513109600637108 2023-01-22 14:13:46.971527: step: 292/466, loss: 0.527622640132904 2023-01-22 14:13:47.533107: step: 294/466, loss: 0.03949645906686783 2023-01-22 14:13:48.126567: step: 296/466, loss: 6.994983673095703 2023-01-22 14:13:48.753565: step: 298/466, loss: 0.00429984787479043 2023-01-22 14:13:49.350631: step: 300/466, loss: 0.018944447860121727 2023-01-22 14:13:49.940307: step: 302/466, loss: 0.006439780816435814 2023-01-22 14:13:50.582602: step: 304/466, loss: 0.001012927619740367 2023-01-22 14:13:51.167029: step: 306/466, loss: 0.029987772926688194 2023-01-22 14:13:51.732095: step: 308/466, loss: 0.0018202938372269273 2023-01-22 14:13:52.395338: step: 310/466, loss: 0.014104348607361317 2023-01-22 14:13:53.043651: step: 312/466, loss: 0.009202539920806885 2023-01-22 14:13:53.743830: step: 314/466, loss: 0.006665595341473818 2023-01-22 14:13:54.421657: step: 316/466, loss: 0.022786777466535568 2023-01-22 14:13:55.030014: step: 318/466, loss: 0.1801539659500122 2023-01-22 14:13:55.583893: step: 320/466, loss: 0.0052084228955209255 2023-01-22 14:13:56.222506: step: 322/466, loss: 0.010929033160209656 2023-01-22 14:13:56.913465: step: 324/466, loss: 0.0002629157970659435 2023-01-22 14:13:57.573921: step: 326/466, loss: 0.003500701393932104 2023-01-22 14:13:58.257839: step: 328/466, loss: 0.0722888931632042 2023-01-22 14:13:58.824429: step: 330/466, loss: 0.0023090096656233072 2023-01-22 14:13:59.455008: step: 332/466, loss: 0.00232186121866107 2023-01-22 14:14:00.139191: step: 334/466, loss: 0.000827113923151046 2023-01-22 14:14:00.711895: step: 336/466, loss: 0.01287727802991867 2023-01-22 14:14:01.303205: step: 338/466, loss: 2.9299275411176495e-05 2023-01-22 14:14:01.939723: step: 340/466, loss: 0.00022971018915995955 2023-01-22 14:14:02.490420: step: 342/466, loss: 0.20835307240486145 2023-01-22 14:14:03.088132: step: 344/466, loss: 0.0007434620638377964 2023-01-22 14:14:03.751981: step: 346/466, loss: 0.0011136470129713416 2023-01-22 14:14:04.408803: step: 348/466, loss: 0.0001844180515035987 2023-01-22 14:14:05.054646: step: 350/466, loss: 7.553988689323887e-05 2023-01-22 14:14:05.585809: step: 352/466, loss: 0.005245328415185213 2023-01-22 14:14:06.135012: step: 354/466, loss: 9.995513391913846e-05 2023-01-22 14:14:06.718923: step: 356/466, loss: 0.0033678971230983734 2023-01-22 14:14:07.390366: step: 358/466, loss: 0.007808188907802105 2023-01-22 14:14:08.023072: step: 360/466, loss: 0.03540022298693657 2023-01-22 14:14:08.612424: step: 362/466, loss: 0.00361061654984951 2023-01-22 14:14:09.185624: step: 364/466, loss: 0.007014125119894743 2023-01-22 14:14:09.771702: step: 366/466, loss: 0.13524511456489563 2023-01-22 14:14:10.417028: step: 368/466, loss: 0.007791826035827398 2023-01-22 14:14:10.985043: step: 370/466, loss: 0.010278028436005116 2023-01-22 14:14:11.663221: step: 372/466, loss: 0.0022314770612865686 2023-01-22 14:14:12.206834: step: 374/466, loss: 0.0009229807765223086 2023-01-22 14:14:12.879830: step: 376/466, loss: 0.028267759829759598 2023-01-22 14:14:13.514134: step: 378/466, loss: 0.0022364002652466297 2023-01-22 14:14:14.113773: step: 380/466, loss: 0.0007268958725035191 2023-01-22 14:14:14.703994: step: 382/466, loss: 0.005729918368160725 2023-01-22 14:14:15.352602: step: 384/466, loss: 4.04190068366006e-05 2023-01-22 14:14:15.887843: step: 386/466, loss: 0.0007473346777260303 2023-01-22 14:14:16.493683: step: 388/466, loss: 0.012823650613427162 2023-01-22 14:14:17.096070: step: 390/466, loss: 0.13350141048431396 2023-01-22 14:14:17.753743: step: 392/466, loss: 0.04062308743596077 2023-01-22 14:14:18.393475: step: 394/466, loss: 0.023992329835891724 2023-01-22 14:14:18.939303: step: 396/466, loss: 0.0023777533788233995 2023-01-22 14:14:19.531741: step: 398/466, loss: 0.36690983176231384 2023-01-22 14:14:20.142604: step: 400/466, loss: 0.06401252001523972 2023-01-22 14:14:20.786245: step: 402/466, loss: 0.005016879644244909 2023-01-22 14:14:21.420631: step: 404/466, loss: 0.0005979741690680385 2023-01-22 14:14:22.050446: step: 406/466, loss: 0.08845721185207367 2023-01-22 14:14:22.620346: step: 408/466, loss: 0.04313786327838898 2023-01-22 14:14:23.216699: step: 410/466, loss: 0.0065497104078531265 2023-01-22 14:14:23.750576: step: 412/466, loss: 0.0006080567254684865 2023-01-22 14:14:24.375176: step: 414/466, loss: 0.008212543092668056 2023-01-22 14:14:24.986593: step: 416/466, loss: 2.887671689677518e-05 2023-01-22 14:14:25.579689: step: 418/466, loss: 0.016217680647969246 2023-01-22 14:14:26.215948: step: 420/466, loss: 0.5591292381286621 2023-01-22 14:14:26.862385: step: 422/466, loss: 0.002283054869621992 2023-01-22 14:14:27.423624: step: 424/466, loss: 0.001116280909627676 2023-01-22 14:14:28.057126: step: 426/466, loss: 0.0035510158631950617 2023-01-22 14:14:28.641649: step: 428/466, loss: 0.010575790889561176 2023-01-22 14:14:29.231675: step: 430/466, loss: 0.04808664321899414 2023-01-22 14:14:29.819773: step: 432/466, loss: 5.420338766271016e-06 2023-01-22 14:14:30.390812: step: 434/466, loss: 0.0012021951843053102 2023-01-22 14:14:31.072907: step: 436/466, loss: 3.396605825400911e-05 2023-01-22 14:14:31.692441: step: 438/466, loss: 0.02876610867679119 2023-01-22 14:14:32.332225: step: 440/466, loss: 0.0016284855082631111 2023-01-22 14:14:32.952667: step: 442/466, loss: 0.024594612419605255 2023-01-22 14:14:33.644780: step: 444/466, loss: 0.004494968801736832 2023-01-22 14:14:34.219874: step: 446/466, loss: 0.009465239942073822 2023-01-22 14:14:34.840568: step: 448/466, loss: 2.231669714092277e-05 2023-01-22 14:14:35.484021: step: 450/466, loss: 0.0015232398873195052 2023-01-22 14:14:36.076780: step: 452/466, loss: 0.0008695161668583751 2023-01-22 14:14:36.649613: step: 454/466, loss: 0.2243443727493286 2023-01-22 14:14:37.202010: step: 456/466, loss: 0.0012619862100109458 2023-01-22 14:14:37.826048: step: 458/466, loss: 0.00047538834041915834 2023-01-22 14:14:38.476754: step: 460/466, loss: 0.002151229651644826 2023-01-22 14:14:39.107832: step: 462/466, loss: 0.005750596057623625 2023-01-22 14:14:39.752864: step: 464/466, loss: 0.0219623614102602 2023-01-22 14:14:40.350201: step: 466/466, loss: 0.0009176198509521782 2023-01-22 14:14:40.972669: step: 468/466, loss: 0.0002723286161199212 2023-01-22 14:14:41.606328: step: 470/466, loss: 0.005967268254607916 2023-01-22 14:14:42.221909: step: 472/466, loss: 0.021503537893295288 2023-01-22 14:14:42.820345: step: 474/466, loss: 0.12058486044406891 2023-01-22 14:14:43.454074: step: 476/466, loss: 0.16243629157543182 2023-01-22 14:14:44.066925: step: 478/466, loss: 0.00038640364073216915 2023-01-22 14:14:44.682883: step: 480/466, loss: 0.008760428056120872 2023-01-22 14:14:45.311857: step: 482/466, loss: 0.0006672271993011236 2023-01-22 14:14:45.968443: step: 484/466, loss: 0.0004302192246541381 2023-01-22 14:14:46.587514: step: 486/466, loss: 2.0018022041767836e-05 2023-01-22 14:14:47.290442: step: 488/466, loss: 0.0021745488047599792 2023-01-22 14:14:47.885154: step: 490/466, loss: 0.005278285127133131 2023-01-22 14:14:48.505825: step: 492/466, loss: 0.02675752528011799 2023-01-22 14:14:49.079740: step: 494/466, loss: 0.0060821110382676125 2023-01-22 14:14:49.668516: step: 496/466, loss: 0.14712998270988464 2023-01-22 14:14:50.261113: step: 498/466, loss: 0.12543053925037384 2023-01-22 14:14:51.086456: step: 500/466, loss: 0.007587286178022623 2023-01-22 14:14:51.685399: step: 502/466, loss: 0.0019519716734066606 2023-01-22 14:14:52.265441: step: 504/466, loss: 0.0018390478799119592 2023-01-22 14:14:52.815532: step: 506/466, loss: 0.0011793702142313123 2023-01-22 14:14:53.454685: step: 508/466, loss: 0.0064719608053565025 2023-01-22 14:14:54.056756: step: 510/466, loss: 0.025509748607873917 2023-01-22 14:14:54.706553: step: 512/466, loss: 0.0004068021953571588 2023-01-22 14:14:55.248580: step: 514/466, loss: 0.0006000860594213009 2023-01-22 14:14:55.816640: step: 516/466, loss: 0.007948077283799648 2023-01-22 14:14:56.453623: step: 518/466, loss: 0.0008945147856138647 2023-01-22 14:14:57.086948: step: 520/466, loss: 0.0008437213837169111 2023-01-22 14:14:57.704263: step: 522/466, loss: 0.000982102588750422 2023-01-22 14:14:58.302935: step: 524/466, loss: 0.0002092236973112449 2023-01-22 14:14:58.927148: step: 526/466, loss: 0.018853899091482162 2023-01-22 14:14:59.539667: step: 528/466, loss: 0.0017451480962336063 2023-01-22 14:15:00.169829: step: 530/466, loss: 0.0051223840564489365 2023-01-22 14:15:00.780278: step: 532/466, loss: 0.008472130633890629 2023-01-22 14:15:01.427748: step: 534/466, loss: 0.0018088719807565212 2023-01-22 14:15:02.134025: step: 536/466, loss: 0.005146315321326256 2023-01-22 14:15:02.792376: step: 538/466, loss: 0.011584267020225525 2023-01-22 14:15:03.444664: step: 540/466, loss: 0.002393155125901103 2023-01-22 14:15:04.044891: step: 542/466, loss: 0.0217942725867033 2023-01-22 14:15:04.671980: step: 544/466, loss: 0.0023021483793854713 2023-01-22 14:15:05.296814: step: 546/466, loss: 1.6428306480520405e-05 2023-01-22 14:15:05.911264: step: 548/466, loss: 0.001946398988366127 2023-01-22 14:15:06.607657: step: 550/466, loss: 0.003615370951592922 2023-01-22 14:15:07.213668: step: 552/466, loss: 0.004972133319824934 2023-01-22 14:15:07.832179: step: 554/466, loss: 0.3673803508281708 2023-01-22 14:15:08.421531: step: 556/466, loss: 0.0005943301948718727 2023-01-22 14:15:09.029565: step: 558/466, loss: 0.002106464933604002 2023-01-22 14:15:09.696652: step: 560/466, loss: 0.027747251093387604 2023-01-22 14:15:10.278991: step: 562/466, loss: 0.005243944935500622 2023-01-22 14:15:10.901620: step: 564/466, loss: 0.03971351683139801 2023-01-22 14:15:11.505539: step: 566/466, loss: 0.0243286844342947 2023-01-22 14:15:12.079245: step: 568/466, loss: 0.0017740450566634536 2023-01-22 14:15:12.673784: step: 570/466, loss: 0.001602650503627956 2023-01-22 14:15:13.300630: step: 572/466, loss: 0.0005635821144096553 2023-01-22 14:15:14.030015: step: 574/466, loss: 0.0005379511276260018 2023-01-22 14:15:14.621080: step: 576/466, loss: 0.001342284376733005 2023-01-22 14:15:15.225000: step: 578/466, loss: 0.005829032510519028 2023-01-22 14:15:15.831034: step: 580/466, loss: 0.0016859722090885043 2023-01-22 14:15:16.418233: step: 582/466, loss: 0.02985936589539051 2023-01-22 14:15:16.994798: step: 584/466, loss: 0.02248987928032875 2023-01-22 14:15:17.573331: step: 586/466, loss: 0.003450884949415922 2023-01-22 14:15:18.181867: step: 588/466, loss: 0.003009258070960641 2023-01-22 14:15:18.799510: step: 590/466, loss: 0.003541896352544427 2023-01-22 14:15:19.428607: step: 592/466, loss: 0.005958197638392448 2023-01-22 14:15:20.009838: step: 594/466, loss: 0.8511886596679688 2023-01-22 14:15:20.635617: step: 596/466, loss: 0.001087585580535233 2023-01-22 14:15:21.265495: step: 598/466, loss: 0.05506772920489311 2023-01-22 14:15:21.908067: step: 600/466, loss: 0.017406271770596504 2023-01-22 14:15:22.558386: step: 602/466, loss: 0.016767336055636406 2023-01-22 14:15:23.332578: step: 604/466, loss: 0.014688264578580856 2023-01-22 14:15:23.926658: step: 606/466, loss: 0.0007069968269206583 2023-01-22 14:15:24.498617: step: 608/466, loss: 0.0014018111396580935 2023-01-22 14:15:25.073749: step: 610/466, loss: 0.016013823449611664 2023-01-22 14:15:25.675098: step: 612/466, loss: 0.02588215284049511 2023-01-22 14:15:26.349693: step: 614/466, loss: 0.0009318602387793362 2023-01-22 14:15:26.981424: step: 616/466, loss: 0.00710050854831934 2023-01-22 14:15:27.519519: step: 618/466, loss: 0.018266601487994194 2023-01-22 14:15:28.149134: step: 620/466, loss: 0.001025032834149897 2023-01-22 14:15:28.767366: step: 622/466, loss: 0.5549904704093933 2023-01-22 14:15:29.380987: step: 624/466, loss: 0.001709211734123528 2023-01-22 14:15:29.953044: step: 626/466, loss: 0.025714827701449394 2023-01-22 14:15:30.568927: step: 628/466, loss: 0.010942323133349419 2023-01-22 14:15:31.157690: step: 630/466, loss: 0.00045645053614862263 2023-01-22 14:15:31.819342: step: 632/466, loss: 0.047666460275650024 2023-01-22 14:15:32.398456: step: 634/466, loss: 0.011204993352293968 2023-01-22 14:15:32.980008: step: 636/466, loss: 0.0030038023833185434 2023-01-22 14:15:33.581883: step: 638/466, loss: 1.2626858949661255 2023-01-22 14:15:34.246888: step: 640/466, loss: 0.009039514698088169 2023-01-22 14:15:34.846294: step: 642/466, loss: 0.246150940656662 2023-01-22 14:15:35.435290: step: 644/466, loss: 0.000953633920289576 2023-01-22 14:15:35.980937: step: 646/466, loss: 0.0077830590307712555 2023-01-22 14:15:36.531246: step: 648/466, loss: 0.04225435107946396 2023-01-22 14:15:37.143413: step: 650/466, loss: 0.008064341731369495 2023-01-22 14:15:37.807485: step: 652/466, loss: 0.013870935887098312 2023-01-22 14:15:38.392048: step: 654/466, loss: 6.591837882297114e-05 2023-01-22 14:15:38.954299: step: 656/466, loss: 0.04355807229876518 2023-01-22 14:15:39.587149: step: 658/466, loss: 0.002071819268167019 2023-01-22 14:15:40.190150: step: 660/466, loss: 0.08513505756855011 2023-01-22 14:15:40.828734: step: 662/466, loss: 2.543550729751587 2023-01-22 14:15:41.488335: step: 664/466, loss: 0.0010810650419443846 2023-01-22 14:15:42.137712: step: 666/466, loss: 0.042096130549907684 2023-01-22 14:15:42.720812: step: 668/466, loss: 0.013255462050437927 2023-01-22 14:15:43.320693: step: 670/466, loss: 0.06090088561177254 2023-01-22 14:15:43.958313: step: 672/466, loss: 0.004362513776868582 2023-01-22 14:15:44.637146: step: 674/466, loss: 0.057001881301403046 2023-01-22 14:15:45.250469: step: 676/466, loss: 0.11104325205087662 2023-01-22 14:15:45.850057: step: 678/466, loss: 0.0040749735198915005 2023-01-22 14:15:46.441296: step: 680/466, loss: 0.0012445810716599226 2023-01-22 14:15:47.087074: step: 682/466, loss: 0.00045973557280376554 2023-01-22 14:15:47.655562: step: 684/466, loss: 0.014887809753417969 2023-01-22 14:15:48.235248: step: 686/466, loss: 0.004492264240980148 2023-01-22 14:15:48.889219: step: 688/466, loss: 0.03745220601558685 2023-01-22 14:15:49.554624: step: 690/466, loss: 0.006510350853204727 2023-01-22 14:15:50.153065: step: 692/466, loss: 0.008633963763713837 2023-01-22 14:15:50.777947: step: 694/466, loss: 0.0186244398355484 2023-01-22 14:15:51.384589: step: 696/466, loss: 0.0014183515449985862 2023-01-22 14:15:52.025107: step: 698/466, loss: 0.030302129685878754 2023-01-22 14:15:52.594027: step: 700/466, loss: 0.0003668579738587141 2023-01-22 14:15:53.182584: step: 702/466, loss: 0.013108673505485058 2023-01-22 14:15:53.837662: step: 704/466, loss: 0.05892335623502731 2023-01-22 14:15:54.492436: step: 706/466, loss: 0.005821194499731064 2023-01-22 14:15:55.212307: step: 708/466, loss: 0.024348394945263863 2023-01-22 14:15:55.821519: step: 710/466, loss: 0.0029098910745233297 2023-01-22 14:15:56.404515: step: 712/466, loss: 0.0014831717126071453 2023-01-22 14:15:57.007480: step: 714/466, loss: 0.0011402704985812306 2023-01-22 14:15:57.620618: step: 716/466, loss: 0.004425371065735817 2023-01-22 14:15:58.268501: step: 718/466, loss: 0.0004287810006644577 2023-01-22 14:15:58.860304: step: 720/466, loss: 0.06450676918029785 2023-01-22 14:15:59.536410: step: 722/466, loss: 0.006807366851717234 2023-01-22 14:16:00.126026: step: 724/466, loss: 0.00010520143405301496 2023-01-22 14:16:00.779000: step: 726/466, loss: 0.03818788751959801 2023-01-22 14:16:01.309495: step: 728/466, loss: 0.001018814742565155 2023-01-22 14:16:01.924773: step: 730/466, loss: 0.038347743451595306 2023-01-22 14:16:02.609306: step: 732/466, loss: 0.0014345066156238317 2023-01-22 14:16:03.179466: step: 734/466, loss: 0.04617827758193016 2023-01-22 14:16:03.757265: step: 736/466, loss: 0.004945599474012852 2023-01-22 14:16:04.365722: step: 738/466, loss: 0.008316083811223507 2023-01-22 14:16:04.986634: step: 740/466, loss: 0.014469870366156101 2023-01-22 14:16:05.601519: step: 742/466, loss: 0.023186495527625084 2023-01-22 14:16:06.285743: step: 744/466, loss: 0.018661288544535637 2023-01-22 14:16:06.897294: step: 746/466, loss: 0.011117344722151756 2023-01-22 14:16:07.433118: step: 748/466, loss: 0.0007372678956016898 2023-01-22 14:16:08.046001: step: 750/466, loss: 0.005543145816773176 2023-01-22 14:16:08.664365: step: 752/466, loss: 0.03786356747150421 2023-01-22 14:16:09.259827: step: 754/466, loss: 0.0009820330888032913 2023-01-22 14:16:09.885462: step: 756/466, loss: 0.004211984109133482 2023-01-22 14:16:10.520235: step: 758/466, loss: 0.008682195097208023 2023-01-22 14:16:11.057370: step: 760/466, loss: 0.006925350055098534 2023-01-22 14:16:11.624739: step: 762/466, loss: 0.003041060408577323 2023-01-22 14:16:12.154592: step: 764/466, loss: 0.013051096349954605 2023-01-22 14:16:12.757758: step: 766/466, loss: 0.0006539585301652551 2023-01-22 14:16:13.334461: step: 768/466, loss: 0.09705096483230591 2023-01-22 14:16:14.031537: step: 770/466, loss: 0.007492034696042538 2023-01-22 14:16:14.621239: step: 772/466, loss: 0.006348648574203253 2023-01-22 14:16:15.317083: step: 774/466, loss: 0.017989221960306168 2023-01-22 14:16:15.905282: step: 776/466, loss: 0.17219893634319305 2023-01-22 14:16:16.562982: step: 778/466, loss: 0.010285553522408009 2023-01-22 14:16:17.171848: step: 780/466, loss: 0.008526391349732876 2023-01-22 14:16:17.785644: step: 782/466, loss: 0.008109268732368946 2023-01-22 14:16:18.357941: step: 784/466, loss: 0.005808887537568808 2023-01-22 14:16:18.947790: step: 786/466, loss: 0.0009838847909122705 2023-01-22 14:16:19.505202: step: 788/466, loss: 0.1035638228058815 2023-01-22 14:16:20.121418: step: 790/466, loss: 0.0033334919717162848 2023-01-22 14:16:20.652461: step: 792/466, loss: 0.0018809232860803604 2023-01-22 14:16:21.232601: step: 794/466, loss: 0.0032806447707116604 2023-01-22 14:16:21.844359: step: 796/466, loss: 0.0874895304441452 2023-01-22 14:16:22.466313: step: 798/466, loss: 0.00040602550143375993 2023-01-22 14:16:23.048620: step: 800/466, loss: 0.03953491151332855 2023-01-22 14:16:23.586708: step: 802/466, loss: 0.00030488843913190067 2023-01-22 14:16:24.212070: step: 804/466, loss: 0.0029468308202922344 2023-01-22 14:16:24.782069: step: 806/466, loss: 0.02056989260017872 2023-01-22 14:16:25.442342: step: 808/466, loss: 0.0040731183253228664 2023-01-22 14:16:26.062716: step: 810/466, loss: 0.008805769495666027 2023-01-22 14:16:26.691634: step: 812/466, loss: 0.003765393979847431 2023-01-22 14:16:27.263613: step: 814/466, loss: 0.0022657178342342377 2023-01-22 14:16:27.864984: step: 816/466, loss: 0.0012500463053584099 2023-01-22 14:16:28.451998: step: 818/466, loss: 0.00040347143658436835 2023-01-22 14:16:29.075368: step: 820/466, loss: 0.0158883985131979 2023-01-22 14:16:29.736172: step: 822/466, loss: 0.007519667502492666 2023-01-22 14:16:30.353755: step: 824/466, loss: 0.009993246756494045 2023-01-22 14:16:30.957913: step: 826/466, loss: 0.015041830018162727 2023-01-22 14:16:31.556142: step: 828/466, loss: 0.00337592582218349 2023-01-22 14:16:32.153085: step: 830/466, loss: 7.99222761997953e-05 2023-01-22 14:16:32.864670: step: 832/466, loss: 0.5875672101974487 2023-01-22 14:16:33.492661: step: 834/466, loss: 0.057680025696754456 2023-01-22 14:16:34.124667: step: 836/466, loss: 0.019776932895183563 2023-01-22 14:16:34.827719: step: 838/466, loss: 0.014451738446950912 2023-01-22 14:16:35.411543: step: 840/466, loss: 0.004580538719892502 2023-01-22 14:16:36.036069: step: 842/466, loss: 0.0045847478322684765 2023-01-22 14:16:36.683993: step: 844/466, loss: 0.0069063575938344 2023-01-22 14:16:37.295279: step: 846/466, loss: 0.079214908182621 2023-01-22 14:16:37.917311: step: 848/466, loss: 0.0007532158633694053 2023-01-22 14:16:38.548200: step: 850/466, loss: 0.1626470386981964 2023-01-22 14:16:39.171911: step: 852/466, loss: 0.0035434728488326073 2023-01-22 14:16:39.780939: step: 854/466, loss: 0.0006868537748232484 2023-01-22 14:16:40.358621: step: 856/466, loss: 0.008462544530630112 2023-01-22 14:16:40.941500: step: 858/466, loss: 0.0036610299721360207 2023-01-22 14:16:41.496950: step: 860/466, loss: 0.0031512551940977573 2023-01-22 14:16:42.076707: step: 862/466, loss: 0.045224349945783615 2023-01-22 14:16:42.675948: step: 864/466, loss: 7.859440665924922e-05 2023-01-22 14:16:43.280530: step: 866/466, loss: 0.0007107240962795913 2023-01-22 14:16:43.835430: step: 868/466, loss: 0.0005173115059733391 2023-01-22 14:16:44.497053: step: 870/466, loss: 0.0029340023174881935 2023-01-22 14:16:45.037349: step: 872/466, loss: 0.000720936746802181 2023-01-22 14:16:45.667134: step: 874/466, loss: 0.18558841943740845 2023-01-22 14:16:46.279194: step: 876/466, loss: 0.0594550259411335 2023-01-22 14:16:46.915907: step: 878/466, loss: 0.0013480983907356858 2023-01-22 14:16:47.484447: step: 880/466, loss: 0.000333346746629104 2023-01-22 14:16:48.052811: step: 882/466, loss: 0.0003667432174552232 2023-01-22 14:16:48.629895: step: 884/466, loss: 0.014059399254620075 2023-01-22 14:16:49.225290: step: 886/466, loss: 0.0007658023969270289 2023-01-22 14:16:49.934936: step: 888/466, loss: 0.004983733873814344 2023-01-22 14:16:50.510434: step: 890/466, loss: 0.0006161926430650055 2023-01-22 14:16:51.159566: step: 892/466, loss: 0.0013628561282530427 2023-01-22 14:16:51.739172: step: 894/466, loss: 0.0006844276795163751 2023-01-22 14:16:52.393265: step: 896/466, loss: 0.001898131798952818 2023-01-22 14:16:53.034654: step: 898/466, loss: 0.007265943568199873 2023-01-22 14:16:53.658368: step: 900/466, loss: 0.019399205222725868 2023-01-22 14:16:54.243891: step: 902/466, loss: 0.05709183216094971 2023-01-22 14:16:54.864967: step: 904/466, loss: 0.0287553071975708 2023-01-22 14:16:55.495281: step: 906/466, loss: 0.0010598688386380672 2023-01-22 14:16:56.063754: step: 908/466, loss: 0.00038106116699054837 2023-01-22 14:16:56.655425: step: 910/466, loss: 0.0017812260193750262 2023-01-22 14:16:57.244355: step: 912/466, loss: 0.01407301239669323 2023-01-22 14:16:57.834958: step: 914/466, loss: 0.008931309916079044 2023-01-22 14:16:58.427994: step: 916/466, loss: 0.04127160459756851 2023-01-22 14:16:59.005683: step: 918/466, loss: 0.0005697328597307205 2023-01-22 14:16:59.649633: step: 920/466, loss: 0.010193925350904465 2023-01-22 14:17:00.291618: step: 922/466, loss: 0.0004008575342595577 2023-01-22 14:17:00.861990: step: 924/466, loss: 0.0037713053170591593 2023-01-22 14:17:01.491149: step: 926/466, loss: 0.009518631733953953 2023-01-22 14:17:02.076443: step: 928/466, loss: 0.005249551497399807 2023-01-22 14:17:02.775018: step: 930/466, loss: 0.002054632408544421 2023-01-22 14:17:03.433998: step: 932/466, loss: 0.04487023502588272 ================================================== Loss: 0.055 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3101105532064835, 'r': 0.3542439336438388, 'f1': 0.33071134283490355}, 'combined': 0.24368204208887628, 'epoch': 38} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3585143251955013, 'r': 0.3361460137448288, 'f1': 0.3469700356543224}, 'combined': 0.23011484229923967, 'epoch': 38} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29209700772200775, 'r': 0.2865648674242424, 'f1': 0.2893044933078394}, 'combined': 0.19286966220522628, 'epoch': 38} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.37430308782760013, 'r': 0.3127826841189115, 'f1': 0.34078867374313077}, 'combined': 0.222409450232359, 'epoch': 38} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2906697469197469, 'r': 0.3513408515899028, 'f1': 0.31813853743621784}, 'combined': 0.2344178696898447, 'epoch': 38} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.34962682341359763, 'r': 0.3187506883588903, 'f1': 0.333475584288513}, 'combined': 0.2211651543467858, 'epoch': 38} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24696969696969695, 'r': 0.38809523809523805, 'f1': 0.3018518518518518}, 'combined': 0.20123456790123453, 'epoch': 38} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 38} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3055555555555556, 'r': 0.1896551724137931, 'f1': 0.23404255319148937}, 'combined': 0.15602836879432624, 'epoch': 38} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29515095352204085, 'r': 0.3399556523489161, 'f1': 0.3159728902784459}, 'combined': 0.23282212967885485, 'epoch': 26} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32557465157002546, 'r': 0.326702182830874, 'f1': 0.3261374426704058}, 'combined': 0.2162984075741551, 'epoch': 26} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39473684210526316, 'r': 0.25862068965517243, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 26} ****************************** Epoch: 39 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:19:32.972654: step: 2/466, loss: 0.018085388466715813 2023-01-22 14:19:33.691144: step: 4/466, loss: 0.0055228341370821 2023-01-22 14:19:34.309298: step: 6/466, loss: 0.03207993879914284 2023-01-22 14:19:34.890681: step: 8/466, loss: 0.02125280536711216 2023-01-22 14:19:35.510508: step: 10/466, loss: 0.0024259898345917463 2023-01-22 14:19:36.123361: step: 12/466, loss: 3.278315853094682e-05 2023-01-22 14:19:36.704028: step: 14/466, loss: 0.00158892129547894 2023-01-22 14:19:37.262496: step: 16/466, loss: 0.001289099338464439 2023-01-22 14:19:37.832940: step: 18/466, loss: 0.5699836611747742 2023-01-22 14:19:38.351445: step: 20/466, loss: 4.0209528378909454e-05 2023-01-22 14:19:38.894374: step: 22/466, loss: 0.0016772763337939978 2023-01-22 14:19:39.521167: step: 24/466, loss: 0.05627201870083809 2023-01-22 14:19:40.152104: step: 26/466, loss: 0.0013877139426767826 2023-01-22 14:19:40.765819: step: 28/466, loss: 0.0003633845772128552 2023-01-22 14:19:41.492466: step: 30/466, loss: 0.009575406089425087 2023-01-22 14:19:42.064863: step: 32/466, loss: 1.4860745523037622e-06 2023-01-22 14:19:42.622144: step: 34/466, loss: 0.0001350823586108163 2023-01-22 14:19:43.259294: step: 36/466, loss: 0.008692679926753044 2023-01-22 14:19:43.852884: step: 38/466, loss: 0.03054310567677021 2023-01-22 14:19:44.493402: step: 40/466, loss: 0.005888329353183508 2023-01-22 14:19:45.061122: step: 42/466, loss: 0.00015866171452216804 2023-01-22 14:19:45.679573: step: 44/466, loss: 0.007867840118706226 2023-01-22 14:19:46.241633: step: 46/466, loss: 0.0014041324611753225 2023-01-22 14:19:46.862964: step: 48/466, loss: 0.0002700977784115821 2023-01-22 14:19:47.495680: step: 50/466, loss: 0.012439551763236523 2023-01-22 14:19:48.142268: step: 52/466, loss: 0.024994459003210068 2023-01-22 14:19:48.771460: step: 54/466, loss: 0.0005091347265988588 2023-01-22 14:19:49.419114: step: 56/466, loss: 0.012248466722667217 2023-01-22 14:19:50.004138: step: 58/466, loss: 0.00040014597470872104 2023-01-22 14:19:50.583409: step: 60/466, loss: 0.005358839873224497 2023-01-22 14:19:51.188266: step: 62/466, loss: 0.06228935718536377 2023-01-22 14:19:51.728515: step: 64/466, loss: 0.0008789148996584117 2023-01-22 14:19:52.333025: step: 66/466, loss: 0.02750891074538231 2023-01-22 14:19:52.872739: step: 68/466, loss: 0.019110241904854774 2023-01-22 14:19:53.513333: step: 70/466, loss: 0.0016093218000605702 2023-01-22 14:19:54.153725: step: 72/466, loss: 4.843094211537391e-05 2023-01-22 14:19:54.808083: step: 74/466, loss: 0.0009375785011798143 2023-01-22 14:19:55.428529: step: 76/466, loss: 0.0005191043601371348 2023-01-22 14:19:55.998258: step: 78/466, loss: 0.00801012758165598 2023-01-22 14:19:56.633248: step: 80/466, loss: 0.03936135768890381 2023-01-22 14:19:57.313172: step: 82/466, loss: 0.0033180981408804655 2023-01-22 14:19:57.902463: step: 84/466, loss: 0.047777898609638214 2023-01-22 14:19:58.530399: step: 86/466, loss: 0.023926258087158203 2023-01-22 14:19:59.144846: step: 88/466, loss: 0.008756158873438835 2023-01-22 14:19:59.817149: step: 90/466, loss: 0.002737005241215229 2023-01-22 14:20:00.442483: step: 92/466, loss: 0.027509748935699463 2023-01-22 14:20:01.061357: step: 94/466, loss: 0.000658797740470618 2023-01-22 14:20:01.778099: step: 96/466, loss: 0.04544578492641449 2023-01-22 14:20:02.388421: step: 98/466, loss: 2.4974578991532326e-05 2023-01-22 14:20:03.014413: step: 100/466, loss: 0.000990367727354169 2023-01-22 14:20:03.668521: step: 102/466, loss: 0.009184726513922215 2023-01-22 14:20:04.411892: step: 104/466, loss: 0.01867060735821724 2023-01-22 14:20:05.040285: step: 106/466, loss: 8.735142182558775e-05 2023-01-22 14:20:05.599117: step: 108/466, loss: 0.020845962688326836 2023-01-22 14:20:06.176888: step: 110/466, loss: 0.0040642134845256805 2023-01-22 14:20:06.787446: step: 112/466, loss: 0.016638586297631264 2023-01-22 14:20:07.343996: step: 114/466, loss: 0.0012167844688519835 2023-01-22 14:20:07.897229: step: 116/466, loss: 7.273747905855998e-05 2023-01-22 14:20:08.452970: step: 118/466, loss: 0.0003589347761590034 2023-01-22 14:20:09.034556: step: 120/466, loss: 0.000928523309994489 2023-01-22 14:20:09.692669: step: 122/466, loss: 0.005702007096260786 2023-01-22 14:20:10.332192: step: 124/466, loss: 0.007791510783135891 2023-01-22 14:20:10.880149: step: 126/466, loss: 0.008224002085626125 2023-01-22 14:20:11.461947: step: 128/466, loss: 0.0030422741547226906 2023-01-22 14:20:12.070678: step: 130/466, loss: 0.011904054321348667 2023-01-22 14:20:12.674795: step: 132/466, loss: 0.0015938390279188752 2023-01-22 14:20:13.356142: step: 134/466, loss: 0.01457253284752369 2023-01-22 14:20:13.959128: step: 136/466, loss: 0.08270794153213501 2023-01-22 14:20:14.565779: step: 138/466, loss: 0.009393492713570595 2023-01-22 14:20:15.171873: step: 140/466, loss: 0.0021446438040584326 2023-01-22 14:20:15.822469: step: 142/466, loss: 0.005898400209844112 2023-01-22 14:20:16.434774: step: 144/466, loss: 0.03170425072312355 2023-01-22 14:20:17.061768: step: 146/466, loss: 0.000670681125484407 2023-01-22 14:20:17.661268: step: 148/466, loss: 0.00017904472770169377 2023-01-22 14:20:18.207737: step: 150/466, loss: 0.002040478866547346 2023-01-22 14:20:18.749485: step: 152/466, loss: 0.0013364459155127406 2023-01-22 14:20:19.392864: step: 154/466, loss: 0.013797912746667862 2023-01-22 14:20:19.963588: step: 156/466, loss: 0.14163194596767426 2023-01-22 14:20:20.570267: step: 158/466, loss: 0.0034958203323185444 2023-01-22 14:20:21.242854: step: 160/466, loss: 0.0715586319565773 2023-01-22 14:20:21.882258: step: 162/466, loss: 5.988257908029482e-05 2023-01-22 14:20:22.475358: step: 164/466, loss: 0.0022908237297087908 2023-01-22 14:20:23.056502: step: 166/466, loss: 0.07565037161111832 2023-01-22 14:20:23.659619: step: 168/466, loss: 0.002730026375502348 2023-01-22 14:20:24.233243: step: 170/466, loss: 0.0032308921217918396 2023-01-22 14:20:24.874998: step: 172/466, loss: 0.0012114137643948197 2023-01-22 14:20:25.471135: step: 174/466, loss: 2.362889289855957 2023-01-22 14:20:26.050280: step: 176/466, loss: 0.0025763954035937786 2023-01-22 14:20:26.659741: step: 178/466, loss: 0.0015755045460537076 2023-01-22 14:20:27.242306: step: 180/466, loss: 0.0049422625452280045 2023-01-22 14:20:27.792979: step: 182/466, loss: 0.0011983857257291675 2023-01-22 14:20:28.385834: step: 184/466, loss: 0.0022618311922997236 2023-01-22 14:20:28.957627: step: 186/466, loss: 0.0013922039652243257 2023-01-22 14:20:29.503926: step: 188/466, loss: 0.002917697886005044 2023-01-22 14:20:30.114544: step: 190/466, loss: 0.004662810824811459 2023-01-22 14:20:30.742035: step: 192/466, loss: 0.016557466238737106 2023-01-22 14:20:31.349215: step: 194/466, loss: 0.0029304565396159887 2023-01-22 14:20:31.994057: step: 196/466, loss: 0.0011918117525056005 2023-01-22 14:20:32.595013: step: 198/466, loss: 0.00022702784917782992 2023-01-22 14:20:33.192934: step: 200/466, loss: 0.024145185947418213 2023-01-22 14:20:33.870456: step: 202/466, loss: 0.002579348860308528 2023-01-22 14:20:34.518345: step: 204/466, loss: 0.00012517660798039287 2023-01-22 14:20:35.106154: step: 206/466, loss: 0.004631017800420523 2023-01-22 14:20:35.780316: step: 208/466, loss: 0.03834258392453194 2023-01-22 14:20:36.391134: step: 210/466, loss: 0.00010660316911526024 2023-01-22 14:20:36.912703: step: 212/466, loss: 0.0031571954023092985 2023-01-22 14:20:37.500474: step: 214/466, loss: 0.00038896503974683583 2023-01-22 14:20:38.082500: step: 216/466, loss: 0.001999725354835391 2023-01-22 14:20:38.685731: step: 218/466, loss: 0.03977908939123154 2023-01-22 14:20:39.302248: step: 220/466, loss: 0.0003452486707828939 2023-01-22 14:20:39.944006: step: 222/466, loss: 0.00011602720041992143 2023-01-22 14:20:40.527204: step: 224/466, loss: 0.0012798572424799204 2023-01-22 14:20:41.201673: step: 226/466, loss: 0.0071009122766554356 2023-01-22 14:20:41.835086: step: 228/466, loss: 0.023304520174860954 2023-01-22 14:20:42.387170: step: 230/466, loss: 0.027974234893918037 2023-01-22 14:20:42.975938: step: 232/466, loss: 0.0005350593710318208 2023-01-22 14:20:43.549112: step: 234/466, loss: 0.026748616248369217 2023-01-22 14:20:44.164616: step: 236/466, loss: 0.003761754836887121 2023-01-22 14:20:44.774707: step: 238/466, loss: 0.020880121737718582 2023-01-22 14:20:45.366926: step: 240/466, loss: 0.002736175199970603 2023-01-22 14:20:45.969685: step: 242/466, loss: 0.013938499614596367 2023-01-22 14:20:46.616114: step: 244/466, loss: 0.0015270530711859465 2023-01-22 14:20:47.266827: step: 246/466, loss: 0.005346247460693121 2023-01-22 14:20:47.874612: step: 248/466, loss: 0.00041722002788446844 2023-01-22 14:20:48.542108: step: 250/466, loss: 0.0007358561852015555 2023-01-22 14:20:49.117264: step: 252/466, loss: 0.0042246123775839806 2023-01-22 14:20:49.757376: step: 254/466, loss: 0.0004641209670808166 2023-01-22 14:20:50.298189: step: 256/466, loss: 0.004896281752735376 2023-01-22 14:20:50.941590: step: 258/466, loss: 0.006107239983975887 2023-01-22 14:20:51.546169: step: 260/466, loss: 0.00105644844006747 2023-01-22 14:20:52.147738: step: 262/466, loss: 0.00015503763279411942 2023-01-22 14:20:52.743509: step: 264/466, loss: 0.019540099427103996 2023-01-22 14:20:53.315697: step: 266/466, loss: 0.0032516028732061386 2023-01-22 14:20:53.910061: step: 268/466, loss: 0.00861570704728365 2023-01-22 14:20:54.497633: step: 270/466, loss: 0.01724906452000141 2023-01-22 14:20:55.044264: step: 272/466, loss: 0.0008817288908176124 2023-01-22 14:20:55.666892: step: 274/466, loss: 0.0004552304744720459 2023-01-22 14:20:56.250022: step: 276/466, loss: 0.11388035118579865 2023-01-22 14:20:56.886890: step: 278/466, loss: 0.001476992736570537 2023-01-22 14:20:57.504620: step: 280/466, loss: 0.0038886708207428455 2023-01-22 14:20:58.099173: step: 282/466, loss: 1.6863867131178267e-05 2023-01-22 14:20:58.755003: step: 284/466, loss: 0.037166088819503784 2023-01-22 14:20:59.380599: step: 286/466, loss: 0.01547346543520689 2023-01-22 14:20:59.940467: step: 288/466, loss: 0.0002924564469140023 2023-01-22 14:21:00.480137: step: 290/466, loss: 0.01838175393640995 2023-01-22 14:21:01.095548: step: 292/466, loss: 0.0003352120111230761 2023-01-22 14:21:01.666998: step: 294/466, loss: 8.774209709372371e-05 2023-01-22 14:21:02.286067: step: 296/466, loss: 0.06365593522787094 2023-01-22 14:21:02.921565: step: 298/466, loss: 0.07829616218805313 2023-01-22 14:21:03.514199: step: 300/466, loss: 0.004876286722719669 2023-01-22 14:21:04.162189: step: 302/466, loss: 0.010864168405532837 2023-01-22 14:21:04.721216: step: 304/466, loss: 0.083220936357975 2023-01-22 14:21:05.344944: step: 306/466, loss: 0.019162513315677643 2023-01-22 14:21:06.042831: step: 308/466, loss: 0.09040261805057526 2023-01-22 14:21:06.608840: step: 310/466, loss: 0.038859039545059204 2023-01-22 14:21:07.244160: step: 312/466, loss: 0.04216773808002472 2023-01-22 14:21:07.928988: step: 314/466, loss: 0.12224699556827545 2023-01-22 14:21:08.496274: step: 316/466, loss: 0.00013712629151996225 2023-01-22 14:21:09.168066: step: 318/466, loss: 0.08204283565282822 2023-01-22 14:21:09.876600: step: 320/466, loss: 0.0006768596358597279 2023-01-22 14:21:10.509895: step: 322/466, loss: 0.07268203049898148 2023-01-22 14:21:11.075482: step: 324/466, loss: 0.011528857052326202 2023-01-22 14:21:11.665498: step: 326/466, loss: 0.009545979090034962 2023-01-22 14:21:12.252739: step: 328/466, loss: 0.0033273734152317047 2023-01-22 14:21:12.837194: step: 330/466, loss: 0.0013916256139054894 2023-01-22 14:21:13.426454: step: 332/466, loss: 0.15865670144557953 2023-01-22 14:21:14.042018: step: 334/466, loss: 0.011998772621154785 2023-01-22 14:21:14.586157: step: 336/466, loss: 3.965425639762543e-05 2023-01-22 14:21:15.180630: step: 338/466, loss: 0.009822729974985123 2023-01-22 14:21:15.792513: step: 340/466, loss: 0.00204171659424901 2023-01-22 14:21:16.319624: step: 342/466, loss: 0.04342419654130936 2023-01-22 14:21:16.901439: step: 344/466, loss: 0.027267329394817352 2023-01-22 14:21:17.496654: step: 346/466, loss: 0.008024612441658974 2023-01-22 14:21:18.113562: step: 348/466, loss: 0.0008232182590290904 2023-01-22 14:21:18.693705: step: 350/466, loss: 4.253632505424321e-05 2023-01-22 14:21:19.377780: step: 352/466, loss: 0.012067830190062523 2023-01-22 14:21:20.018045: step: 354/466, loss: 0.6041533350944519 2023-01-22 14:21:20.611187: step: 356/466, loss: 0.02979869954288006 2023-01-22 14:21:21.238312: step: 358/466, loss: 0.015235206112265587 2023-01-22 14:21:21.812283: step: 360/466, loss: 0.0009958329610526562 2023-01-22 14:21:22.400723: step: 362/466, loss: 0.0004881559289060533 2023-01-22 14:21:23.036143: step: 364/466, loss: 0.01994839869439602 2023-01-22 14:21:23.678655: step: 366/466, loss: 0.00024279524222947657 2023-01-22 14:21:24.281080: step: 368/466, loss: 0.0027458854019641876 2023-01-22 14:21:24.918532: step: 370/466, loss: 0.0752294585108757 2023-01-22 14:21:25.524075: step: 372/466, loss: 0.002107922686263919 2023-01-22 14:21:26.118762: step: 374/466, loss: 0.021487440913915634 2023-01-22 14:21:26.702547: step: 376/466, loss: 0.0018568960949778557 2023-01-22 14:21:27.278148: step: 378/466, loss: 0.011506550945341587 2023-01-22 14:21:27.916598: step: 380/466, loss: 0.002839966444298625 2023-01-22 14:21:28.481606: step: 382/466, loss: 0.001984285656362772 2023-01-22 14:21:29.148068: step: 384/466, loss: 0.015150221064686775 2023-01-22 14:21:29.856860: step: 386/466, loss: 0.0021268455311656 2023-01-22 14:21:30.487532: step: 388/466, loss: 0.006002889946103096 2023-01-22 14:21:31.081629: step: 390/466, loss: 0.0029884742107242346 2023-01-22 14:21:31.655542: step: 392/466, loss: 0.0004607290029525757 2023-01-22 14:21:32.355211: step: 394/466, loss: 0.06509587913751602 2023-01-22 14:21:32.987920: step: 396/466, loss: 0.004332786425948143 2023-01-22 14:21:33.560393: step: 398/466, loss: 0.00031428266083821654 2023-01-22 14:21:34.146616: step: 400/466, loss: 0.002126887906342745 2023-01-22 14:21:34.695889: step: 402/466, loss: 0.004601453896611929 2023-01-22 14:21:35.304071: step: 404/466, loss: 0.018566809594631195 2023-01-22 14:21:35.846823: step: 406/466, loss: 0.005234793294221163 2023-01-22 14:21:36.460614: step: 408/466, loss: 0.013058140873908997 2023-01-22 14:21:37.045718: step: 410/466, loss: 0.00037722065462730825 2023-01-22 14:21:37.745327: step: 412/466, loss: 0.0024766004644334316 2023-01-22 14:21:38.337106: step: 414/466, loss: 3.426059629418887e-05 2023-01-22 14:21:38.959533: step: 416/466, loss: 0.00046308644232340157 2023-01-22 14:21:39.594635: step: 418/466, loss: 0.006776104681193829 2023-01-22 14:21:40.244739: step: 420/466, loss: 0.0001457214675610885 2023-01-22 14:21:40.902966: step: 422/466, loss: 0.1614760011434555 2023-01-22 14:21:41.501200: step: 424/466, loss: 0.04326094686985016 2023-01-22 14:21:42.110134: step: 426/466, loss: 0.01517427433282137 2023-01-22 14:21:42.676050: step: 428/466, loss: 0.0005652170511893928 2023-01-22 14:21:43.266787: step: 430/466, loss: 0.005981959402561188 2023-01-22 14:21:43.868572: step: 432/466, loss: 0.013260902836918831 2023-01-22 14:21:44.464411: step: 434/466, loss: 0.019691504538059235 2023-01-22 14:21:45.090317: step: 436/466, loss: 0.0031405503395944834 2023-01-22 14:21:45.698704: step: 438/466, loss: 0.0012364968424662948 2023-01-22 14:21:46.278780: step: 440/466, loss: 0.001196483033709228 2023-01-22 14:21:46.903728: step: 442/466, loss: 0.0004985735868103802 2023-01-22 14:21:47.489244: step: 444/466, loss: 0.038667019456624985 2023-01-22 14:21:48.078095: step: 446/466, loss: 0.07287077605724335 2023-01-22 14:21:48.682232: step: 448/466, loss: 0.006370658054947853 2023-01-22 14:21:49.344918: step: 450/466, loss: 0.13874077796936035 2023-01-22 14:21:49.877909: step: 452/466, loss: 0.004097006283700466 2023-01-22 14:21:50.534934: step: 454/466, loss: 0.015667414292693138 2023-01-22 14:21:51.146344: step: 456/466, loss: 0.007470360491424799 2023-01-22 14:21:51.764071: step: 458/466, loss: 0.00036517850821837783 2023-01-22 14:21:52.392428: step: 460/466, loss: 0.00029116624500602484 2023-01-22 14:21:53.063323: step: 462/466, loss: 0.001083413721062243 2023-01-22 14:21:53.643437: step: 464/466, loss: 0.0014429357834160328 2023-01-22 14:21:54.332032: step: 466/466, loss: 0.028476649895310402 2023-01-22 14:21:54.950541: step: 468/466, loss: 0.0031984311062842607 2023-01-22 14:21:55.529061: step: 470/466, loss: 3.856273542623967e-05 2023-01-22 14:21:56.095481: step: 472/466, loss: 0.0018934006802737713 2023-01-22 14:21:56.697452: step: 474/466, loss: 6.685448170173913e-06 2023-01-22 14:21:57.332092: step: 476/466, loss: 0.007408824283629656 2023-01-22 14:21:57.922036: step: 478/466, loss: 0.002856920473277569 2023-01-22 14:21:58.529441: step: 480/466, loss: 0.0033477952238172293 2023-01-22 14:21:59.159777: step: 482/466, loss: 0.6972894668579102 2023-01-22 14:21:59.711115: step: 484/466, loss: 0.0016652895137667656 2023-01-22 14:22:00.321099: step: 486/466, loss: 0.0008347773109562695 2023-01-22 14:22:00.944648: step: 488/466, loss: 0.0007183398702181876 2023-01-22 14:22:01.608482: step: 490/466, loss: 0.01576983742415905 2023-01-22 14:22:02.239843: step: 492/466, loss: 0.0009741125977598131 2023-01-22 14:22:02.841406: step: 494/466, loss: 0.017025131732225418 2023-01-22 14:22:03.446810: step: 496/466, loss: 0.0027654848527163267 2023-01-22 14:22:04.003359: step: 498/466, loss: 0.0006783697754144669 2023-01-22 14:22:04.597653: step: 500/466, loss: 0.0022116999607533216 2023-01-22 14:22:05.268472: step: 502/466, loss: 0.17197492718696594 2023-01-22 14:22:05.898279: step: 504/466, loss: 0.019571226090192795 2023-01-22 14:22:06.491325: step: 506/466, loss: 0.029277196153998375 2023-01-22 14:22:07.080191: step: 508/466, loss: 0.0432414673268795 2023-01-22 14:22:07.657166: step: 510/466, loss: 1.600991890882142e-05 2023-01-22 14:22:08.230307: step: 512/466, loss: 0.000689912645611912 2023-01-22 14:22:08.861870: step: 514/466, loss: 0.0004268517659511417 2023-01-22 14:22:09.530145: step: 516/466, loss: 0.0882229208946228 2023-01-22 14:22:10.163409: step: 518/466, loss: 4.202971831546165e-05 2023-01-22 14:22:10.725516: step: 520/466, loss: 0.0021449143532663584 2023-01-22 14:22:11.325421: step: 522/466, loss: 4.7994446504162624e-05 2023-01-22 14:22:11.911769: step: 524/466, loss: 0.00016817261348478496 2023-01-22 14:22:12.443924: step: 526/466, loss: 0.003549777902662754 2023-01-22 14:22:13.089771: step: 528/466, loss: 0.03497760370373726 2023-01-22 14:22:13.702825: step: 530/466, loss: 0.0014893412590026855 2023-01-22 14:22:14.300871: step: 532/466, loss: 0.005852533038705587 2023-01-22 14:22:14.909386: step: 534/466, loss: 0.01630992442369461 2023-01-22 14:22:15.498400: step: 536/466, loss: 0.0014065640280023217 2023-01-22 14:22:16.136292: step: 538/466, loss: 0.04440496116876602 2023-01-22 14:22:16.719778: step: 540/466, loss: 0.009069890715181828 2023-01-22 14:22:17.304534: step: 542/466, loss: 0.2030581682920456 2023-01-22 14:22:17.995499: step: 544/466, loss: 0.037008028477430344 2023-01-22 14:22:18.614832: step: 546/466, loss: 0.10246668010950089 2023-01-22 14:22:19.198152: step: 548/466, loss: 0.00014032924082130194 2023-01-22 14:22:19.764281: step: 550/466, loss: 0.0036922169383615255 2023-01-22 14:22:20.396888: step: 552/466, loss: 0.0041724443435668945 2023-01-22 14:22:21.078333: step: 554/466, loss: 0.10033238679170609 2023-01-22 14:22:21.729979: step: 556/466, loss: 0.001182153937406838 2023-01-22 14:22:22.336782: step: 558/466, loss: 0.026487678289413452 2023-01-22 14:22:22.940469: step: 560/466, loss: 0.0033549184445291758 2023-01-22 14:22:23.488933: step: 562/466, loss: 0.00424228236079216 2023-01-22 14:22:24.016077: step: 564/466, loss: 0.0006431190413422883 2023-01-22 14:22:24.657544: step: 566/466, loss: 0.014425793662667274 2023-01-22 14:22:25.284053: step: 568/466, loss: 0.000643086910713464 2023-01-22 14:22:25.937066: step: 570/466, loss: 0.00036597021971829236 2023-01-22 14:22:26.616731: step: 572/466, loss: 0.0026783770881593227 2023-01-22 14:22:27.233530: step: 574/466, loss: 0.004231403581798077 2023-01-22 14:22:27.833290: step: 576/466, loss: 0.000492847990244627 2023-01-22 14:22:28.389383: step: 578/466, loss: 0.011168533936142921 2023-01-22 14:22:28.970688: step: 580/466, loss: 0.005795257166028023 2023-01-22 14:22:29.578151: step: 582/466, loss: 0.001321491552516818 2023-01-22 14:22:30.212959: step: 584/466, loss: 0.0002804806281346828 2023-01-22 14:22:30.772476: step: 586/466, loss: 0.00013717249385081232 2023-01-22 14:22:31.339285: step: 588/466, loss: 0.007665012031793594 2023-01-22 14:22:31.956633: step: 590/466, loss: 0.0003840482677333057 2023-01-22 14:22:32.606850: step: 592/466, loss: 0.0006111133843660355 2023-01-22 14:22:33.188004: step: 594/466, loss: 0.0006169495172798634 2023-01-22 14:22:33.927617: step: 596/466, loss: 0.00024222326464951038 2023-01-22 14:22:34.523260: step: 598/466, loss: 0.02187812514603138 2023-01-22 14:22:35.106161: step: 600/466, loss: 0.023061878979206085 2023-01-22 14:22:35.708984: step: 602/466, loss: 0.015324532985687256 2023-01-22 14:22:36.307345: step: 604/466, loss: 5.273514398140833e-05 2023-01-22 14:22:36.951849: step: 606/466, loss: 0.0560213066637516 2023-01-22 14:22:37.606196: step: 608/466, loss: 0.04418664053082466 2023-01-22 14:22:38.197739: step: 610/466, loss: 0.0077951340936124325 2023-01-22 14:22:38.815315: step: 612/466, loss: 0.006088362541049719 2023-01-22 14:22:39.416752: step: 614/466, loss: 0.036048773676157 2023-01-22 14:22:40.065883: step: 616/466, loss: 0.00034599084756337106 2023-01-22 14:22:40.686514: step: 618/466, loss: 0.010976719669997692 2023-01-22 14:22:41.286862: step: 620/466, loss: 0.016567006707191467 2023-01-22 14:22:41.903444: step: 622/466, loss: 2.328640221094247e-05 2023-01-22 14:22:42.554136: step: 624/466, loss: 0.0007390398532152176 2023-01-22 14:22:43.181983: step: 626/466, loss: 0.00033580331364646554 2023-01-22 14:22:43.772398: step: 628/466, loss: 0.00021214628941379488 2023-01-22 14:22:44.384566: step: 630/466, loss: 0.0009931318927556276 2023-01-22 14:22:44.971942: step: 632/466, loss: 0.0009459779830649495 2023-01-22 14:22:45.552583: step: 634/466, loss: 0.009291564114391804 2023-01-22 14:22:46.191435: step: 636/466, loss: 0.013678831979632378 2023-01-22 14:22:46.796931: step: 638/466, loss: 0.00016148884606081992 2023-01-22 14:22:47.356056: step: 640/466, loss: 0.0016017744783312082 2023-01-22 14:22:47.913864: step: 642/466, loss: 0.0026671478990465403 2023-01-22 14:22:48.514592: step: 644/466, loss: 0.14576734602451324 2023-01-22 14:22:49.139222: step: 646/466, loss: 0.025174817070364952 2023-01-22 14:22:49.721195: step: 648/466, loss: 0.00022869682288728654 2023-01-22 14:22:50.352979: step: 650/466, loss: 0.005399439483880997 2023-01-22 14:22:50.938087: step: 652/466, loss: 4.397485463414341e-05 2023-01-22 14:22:51.533924: step: 654/466, loss: 0.017691288143396378 2023-01-22 14:22:52.218696: step: 656/466, loss: 0.006667081732302904 2023-01-22 14:22:52.771366: step: 658/466, loss: 0.0010638827225193381 2023-01-22 14:22:53.393541: step: 660/466, loss: 0.0013960172655060887 2023-01-22 14:22:53.919178: step: 662/466, loss: 0.020952045917510986 2023-01-22 14:22:54.591476: step: 664/466, loss: 0.00041501622763462365 2023-01-22 14:22:55.178241: step: 666/466, loss: 9.108463564189151e-05 2023-01-22 14:22:55.767543: step: 668/466, loss: 9.921830496750772e-05 2023-01-22 14:22:56.326779: step: 670/466, loss: 0.00023376091849058867 2023-01-22 14:22:56.919703: step: 672/466, loss: 0.00015998842718545347 2023-01-22 14:22:57.509048: step: 674/466, loss: 8.416108175879344e-05 2023-01-22 14:22:58.086731: step: 676/466, loss: 0.022522931918501854 2023-01-22 14:22:58.773246: step: 678/466, loss: 0.0007208925671875477 2023-01-22 14:22:59.426764: step: 680/466, loss: 0.004861995577812195 2023-01-22 14:23:00.007444: step: 682/466, loss: 0.01086998637765646 2023-01-22 14:23:00.687223: step: 684/466, loss: 0.004682690836489201 2023-01-22 14:23:01.269308: step: 686/466, loss: 0.00023975843214429915 2023-01-22 14:23:01.950035: step: 688/466, loss: 0.00241883029229939 2023-01-22 14:23:02.616893: step: 690/466, loss: 1.4486318826675415 2023-01-22 14:23:03.277752: step: 692/466, loss: 0.010795808397233486 2023-01-22 14:23:03.881869: step: 694/466, loss: 0.0011310448171570897 2023-01-22 14:23:04.456129: step: 696/466, loss: 0.2947859466075897 2023-01-22 14:23:05.040968: step: 698/466, loss: 9.568103996571153e-05 2023-01-22 14:23:05.568816: step: 700/466, loss: 0.004735068883746862 2023-01-22 14:23:06.139761: step: 702/466, loss: 0.0025225926656275988 2023-01-22 14:23:06.872749: step: 704/466, loss: 1.4744700193405151 2023-01-22 14:23:07.502542: step: 706/466, loss: 0.013943861238658428 2023-01-22 14:23:08.128138: step: 708/466, loss: 0.011442775838077068 2023-01-22 14:23:08.731488: step: 710/466, loss: 0.7480612993240356 2023-01-22 14:23:09.279501: step: 712/466, loss: 0.008141737431287766 2023-01-22 14:23:09.926856: step: 714/466, loss: 0.009706948883831501 2023-01-22 14:23:10.545211: step: 716/466, loss: 0.003717868123203516 2023-01-22 14:23:11.179058: step: 718/466, loss: 0.01340749766677618 2023-01-22 14:23:11.837417: step: 720/466, loss: 0.001570258755236864 2023-01-22 14:23:12.366984: step: 722/466, loss: 0.013996385037899017 2023-01-22 14:23:12.991334: step: 724/466, loss: 0.008785456418991089 2023-01-22 14:23:13.622118: step: 726/466, loss: 0.04284227639436722 2023-01-22 14:23:14.235144: step: 728/466, loss: 0.007695217151194811 2023-01-22 14:23:14.933473: step: 730/466, loss: 0.00438790675252676 2023-01-22 14:23:15.480687: step: 732/466, loss: 0.0010599372908473015 2023-01-22 14:23:16.060391: step: 734/466, loss: 0.0034963488578796387 2023-01-22 14:23:16.688866: step: 736/466, loss: 0.0013097894843667746 2023-01-22 14:23:17.334456: step: 738/466, loss: 0.00042985836626030505 2023-01-22 14:23:17.916475: step: 740/466, loss: 0.0009328377200290561 2023-01-22 14:23:18.555131: step: 742/466, loss: 0.9467132687568665 2023-01-22 14:23:19.144140: step: 744/466, loss: 0.0005049612373113632 2023-01-22 14:23:19.845816: step: 746/466, loss: 0.6162884831428528 2023-01-22 14:23:20.406757: step: 748/466, loss: 0.0048098317347466946 2023-01-22 14:23:21.049826: step: 750/466, loss: 0.02664458565413952 2023-01-22 14:23:21.695139: step: 752/466, loss: 0.01973588392138481 2023-01-22 14:23:22.293153: step: 754/466, loss: 0.09814440459012985 2023-01-22 14:23:22.926300: step: 756/466, loss: 0.009177024476230145 2023-01-22 14:23:23.508067: step: 758/466, loss: 0.006227452773600817 2023-01-22 14:23:24.142939: step: 760/466, loss: 7.741794252069667e-05 2023-01-22 14:23:24.818365: step: 762/466, loss: 0.024184376001358032 2023-01-22 14:23:25.431841: step: 764/466, loss: 0.0005583571037277579 2023-01-22 14:23:26.011069: step: 766/466, loss: 0.16849538683891296 2023-01-22 14:23:26.604765: step: 768/466, loss: 0.0015892699593678117 2023-01-22 14:23:27.188900: step: 770/466, loss: 3.370045669726096e-05 2023-01-22 14:23:27.804080: step: 772/466, loss: 0.007952672429382801 2023-01-22 14:23:28.382304: step: 774/466, loss: 0.0034520758781582117 2023-01-22 14:23:28.995205: step: 776/466, loss: 0.14407941699028015 2023-01-22 14:23:29.632941: step: 778/466, loss: 0.005839360412210226 2023-01-22 14:23:30.278552: step: 780/466, loss: 0.003146085422486067 2023-01-22 14:23:30.865828: step: 782/466, loss: 0.000626790220849216 2023-01-22 14:23:31.488000: step: 784/466, loss: 0.012974369339644909 2023-01-22 14:23:32.146086: step: 786/466, loss: 6.935989404155407e-06 2023-01-22 14:23:32.796126: step: 788/466, loss: 5.563042213907465e-05 2023-01-22 14:23:33.474162: step: 790/466, loss: 0.0004825711075682193 2023-01-22 14:23:34.062743: step: 792/466, loss: 0.014958060346543789 2023-01-22 14:23:34.660573: step: 794/466, loss: 0.012933915480971336 2023-01-22 14:23:35.265570: step: 796/466, loss: 5.619807325274451e-06 2023-01-22 14:23:35.819280: step: 798/466, loss: 0.00030835132929496467 2023-01-22 14:23:36.414637: step: 800/466, loss: 0.008561772294342518 2023-01-22 14:23:37.227791: step: 802/466, loss: 0.034978095442056656 2023-01-22 14:23:37.836297: step: 804/466, loss: 0.001175934448838234 2023-01-22 14:23:38.449779: step: 806/466, loss: 0.056332968175411224 2023-01-22 14:23:39.047581: step: 808/466, loss: 0.009805538691580296 2023-01-22 14:23:39.644143: step: 810/466, loss: 0.00032590050250291824 2023-01-22 14:23:40.173005: step: 812/466, loss: 0.005281270947307348 2023-01-22 14:23:40.721538: step: 814/466, loss: 0.00117440742906183 2023-01-22 14:23:41.310070: step: 816/466, loss: 0.003840024583041668 2023-01-22 14:23:41.896449: step: 818/466, loss: 0.00021635252051055431 2023-01-22 14:23:42.563729: step: 820/466, loss: 0.01948506012558937 2023-01-22 14:23:43.208825: step: 822/466, loss: 0.0017720076721161604 2023-01-22 14:23:43.783206: step: 824/466, loss: 0.003839016892015934 2023-01-22 14:23:44.329418: step: 826/466, loss: 1.951284139067866e-05 2023-01-22 14:23:44.884695: step: 828/466, loss: 0.005112734157592058 2023-01-22 14:23:45.485424: step: 830/466, loss: 4.354869088274427e-05 2023-01-22 14:23:46.111066: step: 832/466, loss: 0.0506337434053421 2023-01-22 14:23:46.722405: step: 834/466, loss: 0.0003722644178196788 2023-01-22 14:23:47.357029: step: 836/466, loss: 4.691800131695345e-05 2023-01-22 14:23:47.955015: step: 838/466, loss: 0.0004766513593494892 2023-01-22 14:23:48.558010: step: 840/466, loss: 4.759379226015881e-05 2023-01-22 14:23:49.127151: step: 842/466, loss: 0.0035098448861390352 2023-01-22 14:23:49.687050: step: 844/466, loss: 0.013005075044929981 2023-01-22 14:23:50.269346: step: 846/466, loss: 0.01408541202545166 2023-01-22 14:23:50.873146: step: 848/466, loss: 0.00024867948377504945 2023-01-22 14:23:51.521030: step: 850/466, loss: 0.016286736354231834 2023-01-22 14:23:52.168232: step: 852/466, loss: 0.0002770618593785912 2023-01-22 14:23:52.827339: step: 854/466, loss: 0.006749913562089205 2023-01-22 14:23:53.481338: step: 856/466, loss: 0.1688026338815689 2023-01-22 14:23:54.121643: step: 858/466, loss: 0.05698254704475403 2023-01-22 14:23:54.751113: step: 860/466, loss: 0.0005167814088054001 2023-01-22 14:23:55.318097: step: 862/466, loss: 0.00023729843087494373 2023-01-22 14:23:55.974344: step: 864/466, loss: 0.011458028107881546 2023-01-22 14:23:56.572604: step: 866/466, loss: 0.004539316054433584 2023-01-22 14:23:57.170066: step: 868/466, loss: 0.29944872856140137 2023-01-22 14:23:57.695300: step: 870/466, loss: 2.1613164790323935e-05 2023-01-22 14:23:58.304251: step: 872/466, loss: 0.06793544441461563 2023-01-22 14:23:58.891606: step: 874/466, loss: 0.01879395917057991 2023-01-22 14:23:59.478544: step: 876/466, loss: 0.003989489749073982 2023-01-22 14:24:00.151435: step: 878/466, loss: 0.0003668160643428564 2023-01-22 14:24:00.762184: step: 880/466, loss: 0.0028202959802001715 2023-01-22 14:24:01.425470: step: 882/466, loss: 0.013466686941683292 2023-01-22 14:24:01.986997: step: 884/466, loss: 0.008356669917702675 2023-01-22 14:24:02.617456: step: 886/466, loss: 0.0007907028775662184 2023-01-22 14:24:03.236567: step: 888/466, loss: 2.8483047572080977e-05 2023-01-22 14:24:03.817315: step: 890/466, loss: 0.04728523641824722 2023-01-22 14:24:04.439758: step: 892/466, loss: 0.000980711542069912 2023-01-22 14:24:05.009633: step: 894/466, loss: 0.01104824896901846 2023-01-22 14:24:05.640390: step: 896/466, loss: 0.04990740492939949 2023-01-22 14:24:06.241065: step: 898/466, loss: 0.02798609994351864 2023-01-22 14:24:06.801276: step: 900/466, loss: 0.005783462896943092 2023-01-22 14:24:07.414212: step: 902/466, loss: 1.5856760001042858e-05 2023-01-22 14:24:07.991796: step: 904/466, loss: 0.00041372032137587667 2023-01-22 14:24:08.597252: step: 906/466, loss: 0.83101886510849 2023-01-22 14:24:09.191115: step: 908/466, loss: 1.1503672112667118e-06 2023-01-22 14:24:09.792281: step: 910/466, loss: 0.001675062463618815 2023-01-22 14:24:10.469780: step: 912/466, loss: 0.556420624256134 2023-01-22 14:24:11.054572: step: 914/466, loss: 0.0005540436832234263 2023-01-22 14:24:11.693741: step: 916/466, loss: 0.0001962615642696619 2023-01-22 14:24:12.311656: step: 918/466, loss: 1.648462176322937 2023-01-22 14:24:12.913168: step: 920/466, loss: 0.0024485636968165636 2023-01-22 14:24:13.518204: step: 922/466, loss: 2.499064248695504e-05 2023-01-22 14:24:14.160805: step: 924/466, loss: 0.0010540563380345702 2023-01-22 14:24:14.725307: step: 926/466, loss: 0.0013296870747581124 2023-01-22 14:24:15.329741: step: 928/466, loss: 0.006474127992987633 2023-01-22 14:24:15.969183: step: 930/466, loss: 0.049021899700164795 2023-01-22 14:24:16.526769: step: 932/466, loss: 0.007290661800652742 ================================================== Loss: 0.043 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30630933933490756, 'r': 0.35803900005750106, 'f1': 0.3301601977783081}, 'combined': 0.2432759352050691, 'epoch': 39} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.34291022516236774, 'r': 0.33132140472793764, 'f1': 0.33701621952934335}, 'combined': 0.22351334766712924, 'epoch': 39} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2946987666034156, 'r': 0.294140625, 'f1': 0.29441943127962084}, 'combined': 0.19627962085308054, 'epoch': 39} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3517525533077942, 'r': 0.30846931017979873, 'f1': 0.3286921366089453}, 'combined': 0.21451486810268006, 'epoch': 39} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28756862953013806, 'r': 0.35086646828819507, 'f1': 0.31607970732970736}, 'combined': 0.23290083697978436, 'epoch': 39} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.3344725355235639, 'r': 0.3150870165961601, 'f1': 0.32449050439658006}, 'combined': 0.21520613763089244, 'epoch': 39} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2217514124293785, 'r': 0.37380952380952376, 'f1': 0.2783687943262411}, 'combined': 0.18557919621749408, 'epoch': 39} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45454545454545453, 'r': 0.43478260869565216, 'f1': 0.4444444444444445}, 'combined': 0.2962962962962963, 'epoch': 39} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.34210526315789475, 'r': 0.22413793103448276, 'f1': 0.2708333333333333}, 'combined': 0.18055555555555552, 'epoch': 39} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33802641840514724, 'r': 0.28607169375464075, 'f1': 0.30988650073729845}, 'combined': 0.22833742159590412, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32186738272647164, 'r': 0.23269200396242753, 'f1': 0.27010981364482795}, 'combined': 0.17914018728776152, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3075250420553602, 'r': 0.29296419726107226, 'f1': 0.30006808177273747}, 'combined': 0.20004538784849163, 'epoch': 15} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.3639498658901325, 'r': 0.32389645777224096, 'f1': 0.342757003456365}, 'combined': 0.22369404436099607, 'epoch': 15} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4880952380952381, 'r': 0.44565217391304346, 'f1': 0.4659090909090909}, 'combined': 0.31060606060606055, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29515095352204085, 'r': 0.3399556523489161, 'f1': 0.3159728902784459}, 'combined': 0.23282212967885485, 'epoch': 26} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.32557465157002546, 'r': 0.326702182830874, 'f1': 0.3261374426704058}, 'combined': 0.2162984075741551, 'epoch': 26} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39473684210526316, 'r': 0.25862068965517243, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 26}