Command that produces this log: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> basic_gcn.T_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.0.bias: torch.Size([1024]) >>> basic_gcn.T_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.1.bias: torch.Size([1024]) >>> basic_gcn.T_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.2.bias: torch.Size([1024]) >>> basic_gcn.T_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.0.bias: torch.Size([1024]) >>> basic_gcn.T_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.1.bias: torch.Size([1024]) >>> basic_gcn.T_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.2.bias: torch.Size([1024]) >>> basic_gcn.E_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.0.bias: torch.Size([1024]) >>> basic_gcn.E_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.1.bias: torch.Size([1024]) >>> basic_gcn.E_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.2.bias: torch.Size([1024]) >>> basic_gcn.E_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.0.bias: torch.Size([1024]) >>> basic_gcn.E_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.1.bias: torch.Size([1024]) >>> basic_gcn.E_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.2.bias: torch.Size([1024]) >>> basic_gcn.f_t.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_t.0.bias: torch.Size([1024]) >>> basic_gcn.f_e.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_e.0.bias: torch.Size([1024]) >>> name2classifier.occupy-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.occupy-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.occupy-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.occupy-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outcome-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outcome-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outcome-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outcome-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.when-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.when-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.when-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.when-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.where-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.where-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.where-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.where-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.who-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.who-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.who-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.who-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-against-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-against-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-against-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-against-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-for-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-for-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-for-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-for-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.wounded-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.wounded-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.wounded-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.wounded-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.arrested-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.arrested-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.arrested-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.arrested-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.imprisoned-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.imprisoned-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.imprisoned-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.imprisoned-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.corrupt-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.corrupt-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.corrupt-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.corrupt-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.judicial-actions-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.judicial-actions-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.judicial-actions-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.judicial-actions-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.charged-with-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.charged-with-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.charged-with-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.charged-with-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.prison-term-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.prison-term-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.prison-term-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.prison-term-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.fine-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.fine-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.fine-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.fine-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.npi-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.npi-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.npi-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.npi-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outbreak-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outbreak-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outbreak-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outbreak-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blamed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blamed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blamed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blamed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.claimed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.claimed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.claimed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.claimed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.terror-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.terror-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.terror-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.terror-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.kidnapped-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.kidnapped-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.kidnapped-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.kidnapped-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-org-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-org-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-org-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-org-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-physical-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-physical-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-physical-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-physical-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-human-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-human-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-human-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-human-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-captured-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-captured-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-captured-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-captured-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-objective-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-objective-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-objective-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-objective-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.weapon-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.weapon-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.weapon-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.weapon-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.damage-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.damage-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.damage-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.damage-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.major-disaster-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.major-disaster-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.major-disaster-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.major-disaster-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.responders-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.responders-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.responders-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.responders-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-provided-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-provided-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-provided-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-provided-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescue-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescue-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescue-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescue-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.individuals-affected-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.individuals-affected-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.individuals-affected-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.individuals-affected-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.missing-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.missing-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.missing-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.missing-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.injured-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.injured-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.injured-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.injured-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescued-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescued-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescued-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescued-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-needed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-needed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-needed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-needed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.repair-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.repair-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.repair-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.repair-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.declare-emergency-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.declare-emergency-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.declare-emergency-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.declare-emergency-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.current-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.current-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.current-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.current-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.group-identity-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.group-identity-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.group-identity-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.group-identity-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.total-displaced-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.total-displaced-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.total-displaced-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.total-displaced-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transitory-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transitory-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transitory-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transitory-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.destination-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.destination-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.destination-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.destination-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transiting-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transiting-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transiting-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transiting-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.detained-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.detained-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.detained-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.detained-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blocked-migration-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blocked-migration-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.cybercrime-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.cybercrime-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.cybercrime-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.cybercrime-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perpetrator-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perpetrator-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perpetrator-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perpetrator-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.response-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.response-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.response-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.response-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.information-stolen-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.information-stolen-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.information-stolen-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.information-stolen-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-crimes-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-crimes-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-crimes-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-crimes-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-impact-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-impact-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-impact-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-impact-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.etip-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.etip-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.etip-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.etip-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-name-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-name-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-name-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-name-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-recipient-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-recipient-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-recipient-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-recipient-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-source-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-source-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-source-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-source-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.overall-project-value-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.overall-project-value-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.overall-project-value-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.overall-project-value-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.signatories-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.signatories-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.signatories-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.signatories-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awardee-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awardee-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awardee-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awardee-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awarder-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awarder-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awarder-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awarder-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.agreement-length-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.agreement-length-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.agreement-length-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.agreement-length-ffn.layers.1.bias: torch.Size([2]) >>> irrealis_classifier.layers.0.weight: torch.Size([350, 1127]) >>> irrealis_classifier.layers.0.bias: torch.Size([350]) >>> irrealis_classifier.layers.1.weight: torch.Size([7, 350]) >>> irrealis_classifier.layers.1.bias: torch.Size([7]) n_trainable_params: 613743345, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:27:27.414870: step: 2/470, loss: 18.837427139282227 2023-01-24 01:27:28.174972: step: 4/470, loss: 6.08452033996582 2023-01-24 01:27:28.953490: step: 6/470, loss: 14.960529327392578 2023-01-24 01:27:29.652577: step: 8/470, loss: 9.693341255187988 2023-01-24 01:27:30.353380: step: 10/470, loss: 20.36423683166504 2023-01-24 01:27:31.180699: step: 12/470, loss: 12.639719009399414 2023-01-24 01:27:31.959300: step: 14/470, loss: 10.887170791625977 2023-01-24 01:27:32.820003: step: 16/470, loss: 30.509140014648438 2023-01-24 01:27:33.577075: step: 18/470, loss: 21.185964584350586 2023-01-24 01:27:34.282138: step: 20/470, loss: 22.507814407348633 2023-01-24 01:27:35.027885: step: 22/470, loss: 13.584888458251953 2023-01-24 01:27:35.723433: step: 24/470, loss: 10.651383399963379 2023-01-24 01:27:36.549751: step: 26/470, loss: 19.368906021118164 2023-01-24 01:27:37.299164: step: 28/470, loss: 14.308650970458984 2023-01-24 01:27:38.068846: step: 30/470, loss: 7.055563926696777 2023-01-24 01:27:38.884750: step: 32/470, loss: 14.257768630981445 2023-01-24 01:27:39.580914: step: 34/470, loss: 8.895671844482422 2023-01-24 01:27:40.262397: step: 36/470, loss: 26.354061126708984 2023-01-24 01:27:40.975394: step: 38/470, loss: 8.183581352233887 2023-01-24 01:27:41.684651: step: 40/470, loss: 23.308879852294922 2023-01-24 01:27:42.382339: step: 42/470, loss: 9.376198768615723 2023-01-24 01:27:43.142489: step: 44/470, loss: 40.81726837158203 2023-01-24 01:27:43.878668: step: 46/470, loss: 15.613691329956055 2023-01-24 01:27:44.550172: step: 48/470, loss: 13.526906967163086 2023-01-24 01:27:45.299721: step: 50/470, loss: 24.901996612548828 2023-01-24 01:27:46.089659: step: 52/470, loss: 12.065844535827637 2023-01-24 01:27:46.788215: step: 54/470, loss: 42.65087127685547 2023-01-24 01:27:47.650442: step: 56/470, loss: 27.784313201904297 2023-01-24 01:27:48.362576: step: 58/470, loss: 16.313859939575195 2023-01-24 01:27:49.106910: step: 60/470, loss: 23.540321350097656 2023-01-24 01:27:49.936051: step: 62/470, loss: 13.617783546447754 2023-01-24 01:27:50.661084: step: 64/470, loss: 22.79100799560547 2023-01-24 01:27:51.391833: step: 66/470, loss: 19.00048065185547 2023-01-24 01:27:52.211767: step: 68/470, loss: 19.12494468688965 2023-01-24 01:27:52.890322: step: 70/470, loss: 12.588727951049805 2023-01-24 01:27:53.612382: step: 72/470, loss: 16.181161880493164 2023-01-24 01:27:54.375690: step: 74/470, loss: 17.37213897705078 2023-01-24 01:27:55.204290: step: 76/470, loss: 13.227319717407227 2023-01-24 01:27:55.919068: step: 78/470, loss: 21.182538986206055 2023-01-24 01:27:56.700873: step: 80/470, loss: 16.062942504882812 2023-01-24 01:27:57.441172: step: 82/470, loss: 10.652351379394531 2023-01-24 01:27:58.185290: step: 84/470, loss: 5.419546604156494 2023-01-24 01:27:58.955872: step: 86/470, loss: 10.083927154541016 2023-01-24 01:27:59.721704: step: 88/470, loss: 6.871520042419434 2023-01-24 01:28:00.400001: step: 90/470, loss: 11.351171493530273 2023-01-24 01:28:01.199118: step: 92/470, loss: 20.639118194580078 2023-01-24 01:28:01.925976: step: 94/470, loss: 4.432590484619141 2023-01-24 01:28:02.681108: step: 96/470, loss: 18.61043357849121 2023-01-24 01:28:03.562290: step: 98/470, loss: 5.183197021484375 2023-01-24 01:28:04.245699: step: 100/470, loss: 22.156150817871094 2023-01-24 01:28:05.017110: step: 102/470, loss: 11.253484725952148 2023-01-24 01:28:05.714877: step: 104/470, loss: 9.191667556762695 2023-01-24 01:28:06.360417: step: 106/470, loss: 17.978792190551758 2023-01-24 01:28:07.030866: step: 108/470, loss: 26.32501983642578 2023-01-24 01:28:07.741279: step: 110/470, loss: 15.27033805847168 2023-01-24 01:28:08.651672: step: 112/470, loss: 18.11617660522461 2023-01-24 01:28:09.419768: step: 114/470, loss: 5.689704895019531 2023-01-24 01:28:10.167309: step: 116/470, loss: 17.882823944091797 2023-01-24 01:28:10.975025: step: 118/470, loss: 14.880837440490723 2023-01-24 01:28:11.703195: step: 120/470, loss: 3.9773387908935547 2023-01-24 01:28:12.681789: step: 122/470, loss: 17.60595703125 2023-01-24 01:28:13.419916: step: 124/470, loss: 13.977578163146973 2023-01-24 01:28:14.119377: step: 126/470, loss: 7.939039707183838 2023-01-24 01:28:14.914816: step: 128/470, loss: 5.822059631347656 2023-01-24 01:28:15.605085: step: 130/470, loss: 4.221154689788818 2023-01-24 01:28:16.271682: step: 132/470, loss: 11.863361358642578 2023-01-24 01:28:17.038001: step: 134/470, loss: 7.579117774963379 2023-01-24 01:28:17.712314: step: 136/470, loss: 16.241559982299805 2023-01-24 01:28:18.438374: step: 138/470, loss: 13.835257530212402 2023-01-24 01:28:19.192285: step: 140/470, loss: 11.795804977416992 2023-01-24 01:28:19.943859: step: 142/470, loss: 10.111894607543945 2023-01-24 01:28:20.786788: step: 144/470, loss: 3.8433728218078613 2023-01-24 01:28:21.582802: step: 146/470, loss: 8.96932315826416 2023-01-24 01:28:22.351041: step: 148/470, loss: 3.0903584957122803 2023-01-24 01:28:23.123583: step: 150/470, loss: 9.47050666809082 2023-01-24 01:28:23.791293: step: 152/470, loss: 12.27602767944336 2023-01-24 01:28:24.491336: step: 154/470, loss: 5.426934242248535 2023-01-24 01:28:25.206884: step: 156/470, loss: 12.116996765136719 2023-01-24 01:28:25.906647: step: 158/470, loss: 9.07990550994873 2023-01-24 01:28:26.633986: step: 160/470, loss: 4.529577732086182 2023-01-24 01:28:27.391015: step: 162/470, loss: 10.44383430480957 2023-01-24 01:28:28.096685: step: 164/470, loss: 6.473989963531494 2023-01-24 01:28:28.851712: step: 166/470, loss: 4.3379807472229 2023-01-24 01:28:29.648269: step: 168/470, loss: 9.986034393310547 2023-01-24 01:28:30.457092: step: 170/470, loss: 3.3363029956817627 2023-01-24 01:28:31.199911: step: 172/470, loss: 7.920467376708984 2023-01-24 01:28:32.025149: step: 174/470, loss: 8.45045280456543 2023-01-24 01:28:32.785912: step: 176/470, loss: 12.504642486572266 2023-01-24 01:28:33.474479: step: 178/470, loss: 5.040508270263672 2023-01-24 01:28:34.190372: step: 180/470, loss: 7.040112018585205 2023-01-24 01:28:34.975096: step: 182/470, loss: 10.85146713256836 2023-01-24 01:28:35.757984: step: 184/470, loss: 10.689022064208984 2023-01-24 01:28:36.503882: step: 186/470, loss: 3.291524648666382 2023-01-24 01:28:37.235978: step: 188/470, loss: 6.111932277679443 2023-01-24 01:28:37.906081: step: 190/470, loss: 2.9675354957580566 2023-01-24 01:28:38.751684: step: 192/470, loss: 4.350892066955566 2023-01-24 01:28:39.464491: step: 194/470, loss: 6.658123970031738 2023-01-24 01:28:40.206884: step: 196/470, loss: 7.661910057067871 2023-01-24 01:28:40.907626: step: 198/470, loss: 4.766957759857178 2023-01-24 01:28:41.695432: step: 200/470, loss: 4.938291549682617 2023-01-24 01:28:42.387335: step: 202/470, loss: 12.754507064819336 2023-01-24 01:28:43.224874: step: 204/470, loss: 8.91333293914795 2023-01-24 01:28:44.019433: step: 206/470, loss: 17.913904190063477 2023-01-24 01:28:44.728652: step: 208/470, loss: 5.756092071533203 2023-01-24 01:28:45.489564: step: 210/470, loss: 7.458624839782715 2023-01-24 01:28:46.255359: step: 212/470, loss: 15.039164543151855 2023-01-24 01:28:46.980909: step: 214/470, loss: 7.663404941558838 2023-01-24 01:28:47.664044: step: 216/470, loss: 2.9406890869140625 2023-01-24 01:28:48.480017: step: 218/470, loss: 7.196639060974121 2023-01-24 01:28:49.240449: step: 220/470, loss: 11.003211975097656 2023-01-24 01:28:49.926047: step: 222/470, loss: 2.7577972412109375 2023-01-24 01:28:50.684323: step: 224/470, loss: 13.001374244689941 2023-01-24 01:28:51.469517: step: 226/470, loss: 8.773453712463379 2023-01-24 01:28:52.131863: step: 228/470, loss: 8.044055938720703 2023-01-24 01:28:52.833394: step: 230/470, loss: 3.6388256549835205 2023-01-24 01:28:53.546157: step: 232/470, loss: 10.504514694213867 2023-01-24 01:28:54.331400: step: 234/470, loss: 8.363357543945312 2023-01-24 01:28:54.996286: step: 236/470, loss: 13.068225860595703 2023-01-24 01:28:55.714617: step: 238/470, loss: 9.169629096984863 2023-01-24 01:28:56.401165: step: 240/470, loss: 10.499161720275879 2023-01-24 01:28:57.196843: step: 242/470, loss: 2.7228548526763916 2023-01-24 01:28:57.933350: step: 244/470, loss: 2.8359575271606445 2023-01-24 01:28:58.594329: step: 246/470, loss: 11.094869613647461 2023-01-24 01:28:59.318060: step: 248/470, loss: 6.501433849334717 2023-01-24 01:29:00.028904: step: 250/470, loss: 5.333680629730225 2023-01-24 01:29:00.832227: step: 252/470, loss: 8.988299369812012 2023-01-24 01:29:01.596027: step: 254/470, loss: 3.2130463123321533 2023-01-24 01:29:02.330770: step: 256/470, loss: 6.5167083740234375 2023-01-24 01:29:03.067463: step: 258/470, loss: 5.343503952026367 2023-01-24 01:29:03.846427: step: 260/470, loss: 4.025961399078369 2023-01-24 01:29:04.553996: step: 262/470, loss: 5.961394786834717 2023-01-24 01:29:05.243878: step: 264/470, loss: 2.129112482070923 2023-01-24 01:29:05.907961: step: 266/470, loss: 3.658996820449829 2023-01-24 01:29:06.634413: step: 268/470, loss: 6.239549160003662 2023-01-24 01:29:07.355596: step: 270/470, loss: 10.180370330810547 2023-01-24 01:29:08.078122: step: 272/470, loss: 7.914863586425781 2023-01-24 01:29:08.903174: step: 274/470, loss: 6.526845932006836 2023-01-24 01:29:09.635373: step: 276/470, loss: 9.641494750976562 2023-01-24 01:29:10.324605: step: 278/470, loss: 8.146503448486328 2023-01-24 01:29:11.021721: step: 280/470, loss: 2.807060480117798 2023-01-24 01:29:11.653683: step: 282/470, loss: 7.910976409912109 2023-01-24 01:29:12.438812: step: 284/470, loss: 9.166391372680664 2023-01-24 01:29:13.304008: step: 286/470, loss: 12.144872665405273 2023-01-24 01:29:14.020278: step: 288/470, loss: 12.169351577758789 2023-01-24 01:29:14.734249: step: 290/470, loss: 5.240389823913574 2023-01-24 01:29:15.378152: step: 292/470, loss: 11.680598258972168 2023-01-24 01:29:16.160854: step: 294/470, loss: 5.746752738952637 2023-01-24 01:29:16.976758: step: 296/470, loss: 4.9092020988464355 2023-01-24 01:29:17.693972: step: 298/470, loss: 4.1507954597473145 2023-01-24 01:29:18.439731: step: 300/470, loss: 13.095625877380371 2023-01-24 01:29:19.215497: step: 302/470, loss: 7.79514217376709 2023-01-24 01:29:20.035384: step: 304/470, loss: 1.9198462963104248 2023-01-24 01:29:20.793834: step: 306/470, loss: 4.214913845062256 2023-01-24 01:29:21.530413: step: 308/470, loss: 5.49434757232666 2023-01-24 01:29:22.280972: step: 310/470, loss: 2.4591972827911377 2023-01-24 01:29:23.028657: step: 312/470, loss: 7.487868309020996 2023-01-24 01:29:23.883471: step: 314/470, loss: 7.941952228546143 2023-01-24 01:29:24.637039: step: 316/470, loss: 12.883258819580078 2023-01-24 01:29:25.418977: step: 318/470, loss: 12.775262832641602 2023-01-24 01:29:26.163440: step: 320/470, loss: 5.390825271606445 2023-01-24 01:29:26.913732: step: 322/470, loss: 7.754670143127441 2023-01-24 01:29:27.590173: step: 324/470, loss: 8.214757919311523 2023-01-24 01:29:28.260361: step: 326/470, loss: 8.570985794067383 2023-01-24 01:29:28.994384: step: 328/470, loss: 2.6725447177886963 2023-01-24 01:29:29.816474: step: 330/470, loss: 2.4087343215942383 2023-01-24 01:29:30.632136: step: 332/470, loss: 8.013860702514648 2023-01-24 01:29:31.368933: step: 334/470, loss: 7.368355751037598 2023-01-24 01:29:32.192344: step: 336/470, loss: 2.468770742416382 2023-01-24 01:29:32.919063: step: 338/470, loss: 11.629497528076172 2023-01-24 01:29:33.616000: step: 340/470, loss: 3.3276002407073975 2023-01-24 01:29:34.510101: step: 342/470, loss: 19.408966064453125 2023-01-24 01:29:35.208162: step: 344/470, loss: 2.7381513118743896 2023-01-24 01:29:36.040015: step: 346/470, loss: 12.861856460571289 2023-01-24 01:29:36.753841: step: 348/470, loss: 5.905899524688721 2023-01-24 01:29:37.521684: step: 350/470, loss: 8.441301345825195 2023-01-24 01:29:38.269263: step: 352/470, loss: 3.920839309692383 2023-01-24 01:29:39.028176: step: 354/470, loss: 9.670235633850098 2023-01-24 01:29:39.880888: step: 356/470, loss: 1.8189231157302856 2023-01-24 01:29:40.673993: step: 358/470, loss: 1.7387617826461792 2023-01-24 01:29:41.466212: step: 360/470, loss: 10.194907188415527 2023-01-24 01:29:42.121574: step: 362/470, loss: 2.97975492477417 2023-01-24 01:29:42.881674: step: 364/470, loss: 5.879580497741699 2023-01-24 01:29:43.665405: step: 366/470, loss: 6.046767711639404 2023-01-24 01:29:44.413765: step: 368/470, loss: 5.067502975463867 2023-01-24 01:29:45.180199: step: 370/470, loss: 9.547842025756836 2023-01-24 01:29:45.976848: step: 372/470, loss: 8.196964263916016 2023-01-24 01:29:46.653149: step: 374/470, loss: 5.261054992675781 2023-01-24 01:29:47.342443: step: 376/470, loss: 2.565000534057617 2023-01-24 01:29:48.033133: step: 378/470, loss: 2.8714776039123535 2023-01-24 01:29:48.713074: step: 380/470, loss: 3.206409454345703 2023-01-24 01:29:49.498939: step: 382/470, loss: 6.930261611938477 2023-01-24 01:29:50.312010: step: 384/470, loss: 2.868443012237549 2023-01-24 01:29:51.028569: step: 386/470, loss: 3.763054132461548 2023-01-24 01:29:51.729927: step: 388/470, loss: 5.591135025024414 2023-01-24 01:29:52.460569: step: 390/470, loss: 6.832804203033447 2023-01-24 01:29:53.170793: step: 392/470, loss: 2.2691705226898193 2023-01-24 01:29:53.929856: step: 394/470, loss: 2.894192695617676 2023-01-24 01:29:54.717244: step: 396/470, loss: 3.6917879581451416 2023-01-24 01:29:55.453577: step: 398/470, loss: 1.6198911666870117 2023-01-24 01:29:56.209801: step: 400/470, loss: 2.115755558013916 2023-01-24 01:29:56.930635: step: 402/470, loss: 3.108741283416748 2023-01-24 01:29:57.625641: step: 404/470, loss: 0.9105663895606995 2023-01-24 01:29:58.405211: step: 406/470, loss: 4.219695091247559 2023-01-24 01:29:59.129034: step: 408/470, loss: 2.287531852722168 2023-01-24 01:29:59.864204: step: 410/470, loss: 4.253011226654053 2023-01-24 01:30:00.607066: step: 412/470, loss: 4.190493106842041 2023-01-24 01:30:01.328505: step: 414/470, loss: 2.540191411972046 2023-01-24 01:30:02.064603: step: 416/470, loss: 1.3966948986053467 2023-01-24 01:30:02.830697: step: 418/470, loss: 2.3591842651367188 2023-01-24 01:30:03.587319: step: 420/470, loss: 2.5741522312164307 2023-01-24 01:30:04.427754: step: 422/470, loss: 0.9306092262268066 2023-01-24 01:30:05.155847: step: 424/470, loss: 1.9692203998565674 2023-01-24 01:30:05.917515: step: 426/470, loss: 6.810105323791504 2023-01-24 01:30:06.552000: step: 428/470, loss: 3.282905101776123 2023-01-24 01:30:07.294847: step: 430/470, loss: 1.8343316316604614 2023-01-24 01:30:07.996865: step: 432/470, loss: 1.6074039936065674 2023-01-24 01:30:08.704295: step: 434/470, loss: 0.9173816442489624 2023-01-24 01:30:09.400098: step: 436/470, loss: 2.9239602088928223 2023-01-24 01:30:10.119964: step: 438/470, loss: 2.229067802429199 2023-01-24 01:30:10.783270: step: 440/470, loss: 6.852571487426758 2023-01-24 01:30:11.515588: step: 442/470, loss: 3.8243494033813477 2023-01-24 01:30:12.228091: step: 444/470, loss: 5.331119537353516 2023-01-24 01:30:13.003536: step: 446/470, loss: 1.5648657083511353 2023-01-24 01:30:13.756068: step: 448/470, loss: 5.23274040222168 2023-01-24 01:30:14.556116: step: 450/470, loss: 2.8647255897521973 2023-01-24 01:30:15.283328: step: 452/470, loss: 4.566137790679932 2023-01-24 01:30:16.011905: step: 454/470, loss: 2.325453042984009 2023-01-24 01:30:16.682940: step: 456/470, loss: 1.3142560720443726 2023-01-24 01:30:17.438707: step: 458/470, loss: 1.2290973663330078 2023-01-24 01:30:18.140984: step: 460/470, loss: 0.9515959620475769 2023-01-24 01:30:18.836539: step: 462/470, loss: 0.4390300512313843 2023-01-24 01:30:19.781911: step: 464/470, loss: 5.895995140075684 2023-01-24 01:30:20.544333: step: 466/470, loss: 10.302230834960938 2023-01-24 01:30:21.363168: step: 468/470, loss: 0.4556967318058014 2023-01-24 01:30:22.195096: step: 470/470, loss: 4.270013332366943 2023-01-24 01:30:22.897903: step: 472/470, loss: 1.8166708946228027 2023-01-24 01:30:23.590353: step: 474/470, loss: 1.0261110067367554 2023-01-24 01:30:24.396199: step: 476/470, loss: 7.151349067687988 2023-01-24 01:30:25.208470: step: 478/470, loss: 1.8550636768341064 2023-01-24 01:30:25.971687: step: 480/470, loss: 4.943986415863037 2023-01-24 01:30:26.638497: step: 482/470, loss: 1.3664697408676147 2023-01-24 01:30:27.260233: step: 484/470, loss: 0.2491106390953064 2023-01-24 01:30:28.066695: step: 486/470, loss: 2.626844644546509 2023-01-24 01:30:28.761696: step: 488/470, loss: 2.570115804672241 2023-01-24 01:30:29.470232: step: 490/470, loss: 4.202301502227783 2023-01-24 01:30:30.233278: step: 492/470, loss: 0.742546021938324 2023-01-24 01:30:30.959557: step: 494/470, loss: 2.54813814163208 2023-01-24 01:30:31.703921: step: 496/470, loss: 2.840670585632324 2023-01-24 01:30:32.407401: step: 498/470, loss: 1.0759106874465942 2023-01-24 01:30:33.188684: step: 500/470, loss: 3.8876378536224365 2023-01-24 01:30:33.852214: step: 502/470, loss: 1.4235085248947144 2023-01-24 01:30:34.528540: step: 504/470, loss: 0.9331973195075989 2023-01-24 01:30:35.225167: step: 506/470, loss: 0.9420661926269531 2023-01-24 01:30:35.999931: step: 508/470, loss: 3.353362798690796 2023-01-24 01:30:36.784135: step: 510/470, loss: 7.415338516235352 2023-01-24 01:30:37.537262: step: 512/470, loss: 1.1503777503967285 2023-01-24 01:30:38.269143: step: 514/470, loss: 0.5799806118011475 2023-01-24 01:30:39.028608: step: 516/470, loss: 2.527512788772583 2023-01-24 01:30:39.834348: step: 518/470, loss: 3.385176181793213 2023-01-24 01:30:40.525821: step: 520/470, loss: 1.6425435543060303 2023-01-24 01:30:41.206463: step: 522/470, loss: 4.16226863861084 2023-01-24 01:30:42.077116: step: 524/470, loss: 6.3848772048950195 2023-01-24 01:30:42.812226: step: 526/470, loss: 4.581264019012451 2023-01-24 01:30:43.531098: step: 528/470, loss: 1.923591136932373 2023-01-24 01:30:44.316661: step: 530/470, loss: 4.6132893562316895 2023-01-24 01:30:45.094910: step: 532/470, loss: 2.3744194507598877 2023-01-24 01:30:45.809780: step: 534/470, loss: 1.7940552234649658 2023-01-24 01:30:46.564910: step: 536/470, loss: 1.6240293979644775 2023-01-24 01:30:47.242426: step: 538/470, loss: 5.111881256103516 2023-01-24 01:30:48.006909: step: 540/470, loss: 6.182251453399658 2023-01-24 01:30:48.755003: step: 542/470, loss: 5.470218181610107 2023-01-24 01:30:49.558235: step: 544/470, loss: 3.7869865894317627 2023-01-24 01:30:50.311562: step: 546/470, loss: 2.6353919506073 2023-01-24 01:30:51.015794: step: 548/470, loss: 3.2712767124176025 2023-01-24 01:30:51.849909: step: 550/470, loss: 6.047873497009277 2023-01-24 01:30:52.585659: step: 552/470, loss: 5.375014305114746 2023-01-24 01:30:53.247070: step: 554/470, loss: 4.62559175491333 2023-01-24 01:30:53.946505: step: 556/470, loss: 2.0044353008270264 2023-01-24 01:30:54.668830: step: 558/470, loss: 1.5262951850891113 2023-01-24 01:30:55.397162: step: 560/470, loss: 5.368670463562012 2023-01-24 01:30:56.231995: step: 562/470, loss: 4.242199897766113 2023-01-24 01:30:56.969662: step: 564/470, loss: 0.8480384349822998 2023-01-24 01:30:57.725161: step: 566/470, loss: 4.037248134613037 2023-01-24 01:30:58.412967: step: 568/470, loss: 3.518568515777588 2023-01-24 01:30:59.151423: step: 570/470, loss: 2.1327133178710938 2023-01-24 01:30:59.865636: step: 572/470, loss: 2.1160264015197754 2023-01-24 01:31:00.545231: step: 574/470, loss: 3.609269618988037 2023-01-24 01:31:01.248752: step: 576/470, loss: 1.870286226272583 2023-01-24 01:31:01.958814: step: 578/470, loss: 2.7097601890563965 2023-01-24 01:31:02.733951: step: 580/470, loss: 14.343367576599121 2023-01-24 01:31:03.520641: step: 582/470, loss: 2.831430196762085 2023-01-24 01:31:04.298998: step: 584/470, loss: 1.094900369644165 2023-01-24 01:31:05.040497: step: 586/470, loss: 1.2436193227767944 2023-01-24 01:31:05.790688: step: 588/470, loss: 2.6703548431396484 2023-01-24 01:31:06.610613: step: 590/470, loss: 5.652037143707275 2023-01-24 01:31:07.373931: step: 592/470, loss: 0.5362025499343872 2023-01-24 01:31:08.105905: step: 594/470, loss: 2.1328539848327637 2023-01-24 01:31:08.820777: step: 596/470, loss: 1.8717509508132935 2023-01-24 01:31:09.516726: step: 598/470, loss: 0.5863834619522095 2023-01-24 01:31:10.249822: step: 600/470, loss: 1.6512610912322998 2023-01-24 01:31:11.066359: step: 602/470, loss: 2.197679281234741 2023-01-24 01:31:11.784335: step: 604/470, loss: 1.5977303981781006 2023-01-24 01:31:12.495295: step: 606/470, loss: 0.8425391316413879 2023-01-24 01:31:13.246035: step: 608/470, loss: 2.4796876907348633 2023-01-24 01:31:14.006530: step: 610/470, loss: 0.7105655670166016 2023-01-24 01:31:14.752788: step: 612/470, loss: 4.145379066467285 2023-01-24 01:31:15.500914: step: 614/470, loss: 1.9634544849395752 2023-01-24 01:31:16.223488: step: 616/470, loss: 3.2371010780334473 2023-01-24 01:31:17.009477: step: 618/470, loss: 0.9649048447608948 2023-01-24 01:31:17.775732: step: 620/470, loss: 2.4109811782836914 2023-01-24 01:31:18.530333: step: 622/470, loss: 2.8423123359680176 2023-01-24 01:31:19.248013: step: 624/470, loss: 0.7191053628921509 2023-01-24 01:31:19.968237: step: 626/470, loss: 1.9427146911621094 2023-01-24 01:31:20.611139: step: 628/470, loss: 5.957315921783447 2023-01-24 01:31:21.334244: step: 630/470, loss: 1.467832088470459 2023-01-24 01:31:21.997843: step: 632/470, loss: 0.9858548641204834 2023-01-24 01:31:22.788873: step: 634/470, loss: 1.0836055278778076 2023-01-24 01:31:23.562187: step: 636/470, loss: 2.614570140838623 2023-01-24 01:31:24.258751: step: 638/470, loss: 0.810189425945282 2023-01-24 01:31:25.065076: step: 640/470, loss: 3.5685133934020996 2023-01-24 01:31:25.867238: step: 642/470, loss: 2.486414909362793 2023-01-24 01:31:26.537316: step: 644/470, loss: 1.3245995044708252 2023-01-24 01:31:27.295695: step: 646/470, loss: 1.389783263206482 2023-01-24 01:31:28.064122: step: 648/470, loss: 4.989058494567871 2023-01-24 01:31:28.749595: step: 650/470, loss: 0.5294803977012634 2023-01-24 01:31:29.409490: step: 652/470, loss: 2.751239776611328 2023-01-24 01:31:30.067648: step: 654/470, loss: 3.698434829711914 2023-01-24 01:31:30.837734: step: 656/470, loss: 1.105159878730774 2023-01-24 01:31:31.521985: step: 658/470, loss: 1.0109115839004517 2023-01-24 01:31:32.267668: step: 660/470, loss: 3.3653693199157715 2023-01-24 01:31:32.987371: step: 662/470, loss: 3.7181482315063477 2023-01-24 01:31:33.746751: step: 664/470, loss: 2.8250913619995117 2023-01-24 01:31:34.503651: step: 666/470, loss: 0.444732666015625 2023-01-24 01:31:35.199434: step: 668/470, loss: 1.967690348625183 2023-01-24 01:31:35.968223: step: 670/470, loss: 2.671586513519287 2023-01-24 01:31:36.685929: step: 672/470, loss: 0.917386531829834 2023-01-24 01:31:37.458019: step: 674/470, loss: 4.624493598937988 2023-01-24 01:31:38.129465: step: 676/470, loss: 1.2864645719528198 2023-01-24 01:31:38.880628: step: 678/470, loss: 2.9388926029205322 2023-01-24 01:31:39.639338: step: 680/470, loss: 2.476867437362671 2023-01-24 01:31:40.391427: step: 682/470, loss: 1.6455367803573608 2023-01-24 01:31:41.122329: step: 684/470, loss: 0.6645324230194092 2023-01-24 01:31:41.880039: step: 686/470, loss: 2.182851791381836 2023-01-24 01:31:42.581581: step: 688/470, loss: 3.1452040672302246 2023-01-24 01:31:43.319550: step: 690/470, loss: 0.7404079437255859 2023-01-24 01:31:44.094443: step: 692/470, loss: 0.627692461013794 2023-01-24 01:31:44.762277: step: 694/470, loss: 1.6905364990234375 2023-01-24 01:31:45.552036: step: 696/470, loss: 1.0452758073806763 2023-01-24 01:31:46.308207: step: 698/470, loss: 0.6959218382835388 2023-01-24 01:31:47.078967: step: 700/470, loss: 1.0421323776245117 2023-01-24 01:31:47.810242: step: 702/470, loss: 1.0812638998031616 2023-01-24 01:31:48.547857: step: 704/470, loss: 3.0148813724517822 2023-01-24 01:31:49.375630: step: 706/470, loss: 0.7072650790214539 2023-01-24 01:31:50.129958: step: 708/470, loss: 0.5093586444854736 2023-01-24 01:31:50.854714: step: 710/470, loss: 1.3678839206695557 2023-01-24 01:31:51.561230: step: 712/470, loss: 1.5004351139068604 2023-01-24 01:31:52.333524: step: 714/470, loss: 1.0333999395370483 2023-01-24 01:31:53.053563: step: 716/470, loss: 5.288928985595703 2023-01-24 01:31:53.832593: step: 718/470, loss: 4.261580467224121 2023-01-24 01:31:54.563332: step: 720/470, loss: 1.5531013011932373 2023-01-24 01:31:55.264344: step: 722/470, loss: 4.859399795532227 2023-01-24 01:31:55.993521: step: 724/470, loss: 7.980612754821777 2023-01-24 01:31:56.767694: step: 726/470, loss: 4.004562854766846 2023-01-24 01:31:57.477618: step: 728/470, loss: 2.1123268604278564 2023-01-24 01:31:58.273023: step: 730/470, loss: 0.8399812579154968 2023-01-24 01:31:59.088020: step: 732/470, loss: 0.6894631385803223 2023-01-24 01:31:59.833892: step: 734/470, loss: 6.760388374328613 2023-01-24 01:32:00.537759: step: 736/470, loss: 1.1757826805114746 2023-01-24 01:32:01.226817: step: 738/470, loss: 8.832976341247559 2023-01-24 01:32:01.984824: step: 740/470, loss: 2.382441997528076 2023-01-24 01:32:02.734963: step: 742/470, loss: 1.6580554246902466 2023-01-24 01:32:03.451662: step: 744/470, loss: 1.966712236404419 2023-01-24 01:32:04.086449: step: 746/470, loss: 0.506031334400177 2023-01-24 01:32:04.853792: step: 748/470, loss: 1.3848555088043213 2023-01-24 01:32:05.562059: step: 750/470, loss: 1.399755597114563 2023-01-24 01:32:06.278123: step: 752/470, loss: 2.190246343612671 2023-01-24 01:32:07.103214: step: 754/470, loss: 3.1123580932617188 2023-01-24 01:32:07.784750: step: 756/470, loss: 0.292216420173645 2023-01-24 01:32:08.513522: step: 758/470, loss: 5.910808086395264 2023-01-24 01:32:09.156679: step: 760/470, loss: 3.2733473777770996 2023-01-24 01:32:09.988368: step: 762/470, loss: 2.373502254486084 2023-01-24 01:32:10.707799: step: 764/470, loss: 1.994886875152588 2023-01-24 01:32:11.420958: step: 766/470, loss: 8.774368286132812 2023-01-24 01:32:12.086298: step: 768/470, loss: 1.556334376335144 2023-01-24 01:32:12.905666: step: 770/470, loss: 1.234019160270691 2023-01-24 01:32:13.784601: step: 772/470, loss: 2.650935649871826 2023-01-24 01:32:14.498895: step: 774/470, loss: 2.818275213241577 2023-01-24 01:32:15.200589: step: 776/470, loss: 1.808180570602417 2023-01-24 01:32:15.918621: step: 778/470, loss: 1.791804552078247 2023-01-24 01:32:16.640248: step: 780/470, loss: 6.736164569854736 2023-01-24 01:32:17.421973: step: 782/470, loss: 1.3359016180038452 2023-01-24 01:32:18.261272: step: 784/470, loss: 2.8063719272613525 2023-01-24 01:32:19.008165: step: 786/470, loss: 1.5591973066329956 2023-01-24 01:32:19.705733: step: 788/470, loss: 2.6075632572174072 2023-01-24 01:32:20.372960: step: 790/470, loss: 6.299186706542969 2023-01-24 01:32:21.145769: step: 792/470, loss: 2.2583096027374268 2023-01-24 01:32:21.943351: step: 794/470, loss: 0.5692673921585083 2023-01-24 01:32:22.700546: step: 796/470, loss: 1.2285269498825073 2023-01-24 01:32:23.503833: step: 798/470, loss: 3.3300468921661377 2023-01-24 01:32:24.219400: step: 800/470, loss: 0.6767564415931702 2023-01-24 01:32:24.955927: step: 802/470, loss: 1.033461570739746 2023-01-24 01:32:25.667506: step: 804/470, loss: 0.8145455121994019 2023-01-24 01:32:26.386114: step: 806/470, loss: 1.6333547830581665 2023-01-24 01:32:27.139578: step: 808/470, loss: 0.8114295601844788 2023-01-24 01:32:27.841674: step: 810/470, loss: 1.497851014137268 2023-01-24 01:32:28.514817: step: 812/470, loss: 6.200095176696777 2023-01-24 01:32:29.353235: step: 814/470, loss: 6.356876373291016 2023-01-24 01:32:30.036955: step: 816/470, loss: 1.4136056900024414 2023-01-24 01:32:30.761473: step: 818/470, loss: 0.7242187261581421 2023-01-24 01:32:31.509602: step: 820/470, loss: 4.6752166748046875 2023-01-24 01:32:32.229137: step: 822/470, loss: 1.517682433128357 2023-01-24 01:32:32.957605: step: 824/470, loss: 1.7950592041015625 2023-01-24 01:32:33.658338: step: 826/470, loss: 0.6983448266983032 2023-01-24 01:32:34.386672: step: 828/470, loss: 6.089503288269043 2023-01-24 01:32:35.189255: step: 830/470, loss: 0.9322970509529114 2023-01-24 01:32:35.866202: step: 832/470, loss: 1.1994924545288086 2023-01-24 01:32:36.578385: step: 834/470, loss: 7.563248157501221 2023-01-24 01:32:37.269945: step: 836/470, loss: 1.9371384382247925 2023-01-24 01:32:38.048170: step: 838/470, loss: 0.46454671025276184 2023-01-24 01:32:38.755914: step: 840/470, loss: 11.879871368408203 2023-01-24 01:32:39.457598: step: 842/470, loss: 1.3663816452026367 2023-01-24 01:32:40.186448: step: 844/470, loss: 1.1602144241333008 2023-01-24 01:32:40.882294: step: 846/470, loss: 1.8294084072113037 2023-01-24 01:32:41.713004: step: 848/470, loss: 1.413466453552246 2023-01-24 01:32:42.464166: step: 850/470, loss: 0.6919409036636353 2023-01-24 01:32:43.185902: step: 852/470, loss: 1.45651113986969 2023-01-24 01:32:43.882879: step: 854/470, loss: 1.881666898727417 2023-01-24 01:32:44.632097: step: 856/470, loss: 1.315659999847412 2023-01-24 01:32:45.319505: step: 858/470, loss: 1.2165385484695435 2023-01-24 01:32:46.076864: step: 860/470, loss: 0.6869033575057983 2023-01-24 01:32:46.822999: step: 862/470, loss: 4.98539924621582 2023-01-24 01:32:47.597995: step: 864/470, loss: 1.0015603303909302 2023-01-24 01:32:48.325385: step: 866/470, loss: 0.30697131156921387 2023-01-24 01:32:49.038801: step: 868/470, loss: 1.4575281143188477 2023-01-24 01:32:49.775981: step: 870/470, loss: 0.506115734577179 2023-01-24 01:32:50.508910: step: 872/470, loss: 8.884336471557617 2023-01-24 01:32:51.207016: step: 874/470, loss: 1.0489484071731567 2023-01-24 01:32:52.002156: step: 876/470, loss: 3.248522996902466 2023-01-24 01:32:52.789420: step: 878/470, loss: 0.8829824924468994 2023-01-24 01:32:53.594607: step: 880/470, loss: 2.079348564147949 2023-01-24 01:32:54.262092: step: 882/470, loss: 1.2602165937423706 2023-01-24 01:32:55.038566: step: 884/470, loss: 2.2233707904815674 2023-01-24 01:32:55.993085: step: 886/470, loss: 1.2698169946670532 2023-01-24 01:32:56.759756: step: 888/470, loss: 3.4488766193389893 2023-01-24 01:32:57.467011: step: 890/470, loss: 0.7761567831039429 2023-01-24 01:32:58.151885: step: 892/470, loss: 3.291733741760254 2023-01-24 01:32:58.847354: step: 894/470, loss: 1.81295907497406 2023-01-24 01:32:59.535434: step: 896/470, loss: 1.6111295223236084 2023-01-24 01:33:00.266941: step: 898/470, loss: 1.894487977027893 2023-01-24 01:33:01.017097: step: 900/470, loss: 0.7100313901901245 2023-01-24 01:33:01.729795: step: 902/470, loss: 5.061795234680176 2023-01-24 01:33:02.470604: step: 904/470, loss: 1.990469217300415 2023-01-24 01:33:03.244398: step: 906/470, loss: 0.8769509196281433 2023-01-24 01:33:04.040643: step: 908/470, loss: 1.2958335876464844 2023-01-24 01:33:04.732723: step: 910/470, loss: 1.5320318937301636 2023-01-24 01:33:05.486336: step: 912/470, loss: 8.486604690551758 2023-01-24 01:33:06.220253: step: 914/470, loss: 0.8106694221496582 2023-01-24 01:33:06.896980: step: 916/470, loss: 0.887906551361084 2023-01-24 01:33:07.536799: step: 918/470, loss: 2.665186882019043 2023-01-24 01:33:08.204666: step: 920/470, loss: 1.0016103982925415 2023-01-24 01:33:08.982535: step: 922/470, loss: 0.6781519651412964 2023-01-24 01:33:09.848567: step: 924/470, loss: 3.2051515579223633 2023-01-24 01:33:10.603676: step: 926/470, loss: 1.7840149402618408 2023-01-24 01:33:11.298632: step: 928/470, loss: 2.064391851425171 2023-01-24 01:33:12.091722: step: 930/470, loss: 2.9013218879699707 2023-01-24 01:33:12.828056: step: 932/470, loss: 0.4819917678833008 2023-01-24 01:33:13.585000: step: 934/470, loss: 4.124395847320557 2023-01-24 01:33:14.351847: step: 936/470, loss: 2.3460421562194824 2023-01-24 01:33:15.080475: step: 938/470, loss: 8.79629135131836 2023-01-24 01:33:15.811533: step: 940/470, loss: 1.3817631006240845 2023-01-24 01:33:16.526573: step: 942/470, loss: 1.4883410930633545 ================================================== Loss: 5.731 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34500999304831426, 'r': 0.08935041407867496, 'f1': 0.14194104819104822}, 'combined': 0.10458814077235132, 'epoch': 0} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3205732613614787, 'r': 0.092732878286558, 'f1': 0.14385308310723113}, 'combined': 0.09590205540482073, 'epoch': 0} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3077922077922078, 'r': 0.08960302457466919, 'f1': 0.13879941434846266}, 'combined': 0.10227325267781458, 'epoch': 0} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3265080530505562, 'r': 0.09820261365696804, 'f1': 0.15099194205872554}, 'combined': 0.100661294705817, 'epoch': 0} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32422385620915034, 'r': 0.09377362948960304, 'f1': 0.14547287390029326}, 'combined': 0.10719053866337397, 'epoch': 0} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3028371547402792, 'r': 0.09601446189562493, 'f1': 0.14580232468222898}, 'combined': 0.09720154978815262, 'epoch': 0} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.7708333333333334, 'r': 0.13214285714285715, 'f1': 0.225609756097561}, 'combined': 0.15040650406504066, 'epoch': 0} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6666666666666666, 'r': 0.06896551724137931, 'f1': 0.125}, 'combined': 0.08333333333333333, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34500999304831426, 'r': 0.08935041407867496, 'f1': 0.14194104819104822}, 'combined': 0.10458814077235132, 'epoch': 0} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3205732613614787, 'r': 0.092732878286558, 'f1': 0.14385308310723113}, 'combined': 0.09590205540482073, 'epoch': 0} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.7708333333333334, 'r': 0.13214285714285715, 'f1': 0.225609756097561}, 'combined': 0.15040650406504066, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3077922077922078, 'r': 0.08960302457466919, 'f1': 0.13879941434846266}, 'combined': 0.10227325267781458, 'epoch': 0} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3265080530505562, 'r': 0.09820261365696804, 'f1': 0.15099194205872554}, 'combined': 0.100661294705817, 'epoch': 0} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32422385620915034, 'r': 0.09377362948960304, 'f1': 0.14547287390029326}, 'combined': 0.10719053866337397, 'epoch': 0} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3028371547402792, 'r': 0.09601446189562493, 'f1': 0.14580232468222898}, 'combined': 0.09720154978815262, 'epoch': 0} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6666666666666666, 'r': 0.06896551724137931, 'f1': 0.125}, 'combined': 0.08333333333333333, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:36:13.838453: step: 2/470, loss: 1.862076997756958 2023-01-24 01:36:14.591911: step: 4/470, loss: 8.447455406188965 2023-01-24 01:36:15.288934: step: 6/470, loss: 2.0555386543273926 2023-01-24 01:36:15.978822: step: 8/470, loss: 1.5882792472839355 2023-01-24 01:36:16.758304: step: 10/470, loss: 3.0211076736450195 2023-01-24 01:36:17.540354: step: 12/470, loss: 6.071506500244141 2023-01-24 01:36:18.244502: step: 14/470, loss: 2.025646209716797 2023-01-24 01:36:19.030128: step: 16/470, loss: 5.890591621398926 2023-01-24 01:36:19.774403: step: 18/470, loss: 0.7686997056007385 2023-01-24 01:36:20.461450: step: 20/470, loss: 0.4861363172531128 2023-01-24 01:36:21.217645: step: 22/470, loss: 4.2360734939575195 2023-01-24 01:36:22.028576: step: 24/470, loss: 2.479112386703491 2023-01-24 01:36:22.805137: step: 26/470, loss: 1.051332712173462 2023-01-24 01:36:23.526893: step: 28/470, loss: 1.299823522567749 2023-01-24 01:36:24.247699: step: 30/470, loss: 1.6598849296569824 2023-01-24 01:36:24.931351: step: 32/470, loss: 1.582511067390442 2023-01-24 01:36:25.688591: step: 34/470, loss: 2.467061758041382 2023-01-24 01:36:26.408365: step: 36/470, loss: 1.9889912605285645 2023-01-24 01:36:27.140897: step: 38/470, loss: 2.0867104530334473 2023-01-24 01:36:27.891406: step: 40/470, loss: 1.2533296346664429 2023-01-24 01:36:28.654717: step: 42/470, loss: 1.3080697059631348 2023-01-24 01:36:29.428290: step: 44/470, loss: 1.3241928815841675 2023-01-24 01:36:30.136888: step: 46/470, loss: 5.139832019805908 2023-01-24 01:36:30.873078: step: 48/470, loss: 0.5771458148956299 2023-01-24 01:36:31.576001: step: 50/470, loss: 0.29020386934280396 2023-01-24 01:36:32.266693: step: 52/470, loss: 0.8561909198760986 2023-01-24 01:36:33.037893: step: 54/470, loss: 1.4810162782669067 2023-01-24 01:36:33.780879: step: 56/470, loss: 2.2414615154266357 2023-01-24 01:36:34.810455: step: 58/470, loss: 2.6271400451660156 2023-01-24 01:36:35.558644: step: 60/470, loss: 17.625469207763672 2023-01-24 01:36:36.261342: step: 62/470, loss: 2.8227107524871826 2023-01-24 01:36:37.076646: step: 64/470, loss: 0.8661047220230103 2023-01-24 01:36:37.789994: step: 66/470, loss: 1.7071223258972168 2023-01-24 01:36:38.503461: step: 68/470, loss: 2.8164873123168945 2023-01-24 01:36:39.212116: step: 70/470, loss: 0.5790960788726807 2023-01-24 01:36:39.919261: step: 72/470, loss: 2.1545395851135254 2023-01-24 01:36:40.603747: step: 74/470, loss: 1.5863559246063232 2023-01-24 01:36:41.380491: step: 76/470, loss: 2.1607186794281006 2023-01-24 01:36:42.272625: step: 78/470, loss: 9.826147079467773 2023-01-24 01:36:43.099391: step: 80/470, loss: 0.798759937286377 2023-01-24 01:36:43.893684: step: 82/470, loss: 0.9904079437255859 2023-01-24 01:36:44.700203: step: 84/470, loss: 1.9608020782470703 2023-01-24 01:36:45.485624: step: 86/470, loss: 1.5990160703659058 2023-01-24 01:36:46.225340: step: 88/470, loss: 0.8718377351760864 2023-01-24 01:36:47.142531: step: 90/470, loss: 2.45064640045166 2023-01-24 01:36:47.800937: step: 92/470, loss: 1.367138385772705 2023-01-24 01:36:48.528123: step: 94/470, loss: 0.5553440451622009 2023-01-24 01:36:49.266913: step: 96/470, loss: 2.1003284454345703 2023-01-24 01:36:49.958472: step: 98/470, loss: 0.6784709692001343 2023-01-24 01:36:50.687725: step: 100/470, loss: 4.6824727058410645 2023-01-24 01:36:51.431386: step: 102/470, loss: 3.344106674194336 2023-01-24 01:36:52.112421: step: 104/470, loss: 2.412670612335205 2023-01-24 01:36:52.869751: step: 106/470, loss: 2.828399419784546 2023-01-24 01:36:53.648175: step: 108/470, loss: 6.7077202796936035 2023-01-24 01:36:54.378823: step: 110/470, loss: 0.7992213368415833 2023-01-24 01:36:55.061569: step: 112/470, loss: 2.1331422328948975 2023-01-24 01:36:55.898440: step: 114/470, loss: 3.3567557334899902 2023-01-24 01:36:56.642932: step: 116/470, loss: 0.6175652742385864 2023-01-24 01:36:57.384437: step: 118/470, loss: 1.5179295539855957 2023-01-24 01:36:58.116162: step: 120/470, loss: 10.409992218017578 2023-01-24 01:36:58.891252: step: 122/470, loss: 2.6204042434692383 2023-01-24 01:36:59.638647: step: 124/470, loss: 2.7609641551971436 2023-01-24 01:37:00.475577: step: 126/470, loss: 2.3943400382995605 2023-01-24 01:37:01.160513: step: 128/470, loss: 2.3017160892486572 2023-01-24 01:37:01.903937: step: 130/470, loss: 1.209132194519043 2023-01-24 01:37:02.713598: step: 132/470, loss: 0.7397157549858093 2023-01-24 01:37:03.438048: step: 134/470, loss: 0.7620857954025269 2023-01-24 01:37:04.174212: step: 136/470, loss: 1.7000327110290527 2023-01-24 01:37:04.866131: step: 138/470, loss: 10.088142395019531 2023-01-24 01:37:05.606828: step: 140/470, loss: 1.530067801475525 2023-01-24 01:37:06.295787: step: 142/470, loss: 2.131448984146118 2023-01-24 01:37:06.990626: step: 144/470, loss: 5.388104438781738 2023-01-24 01:37:07.683223: step: 146/470, loss: 0.946931004524231 2023-01-24 01:37:08.412134: step: 148/470, loss: 0.22896254062652588 2023-01-24 01:37:09.131842: step: 150/470, loss: 1.9135855436325073 2023-01-24 01:37:09.863322: step: 152/470, loss: 6.019766807556152 2023-01-24 01:37:10.592613: step: 154/470, loss: 1.4003325700759888 2023-01-24 01:37:11.357959: step: 156/470, loss: 2.2941765785217285 2023-01-24 01:37:12.154560: step: 158/470, loss: 1.0491797924041748 2023-01-24 01:37:12.846746: step: 160/470, loss: 4.415127754211426 2023-01-24 01:37:13.532199: step: 162/470, loss: 1.0620689392089844 2023-01-24 01:37:14.343860: step: 164/470, loss: 1.4155352115631104 2023-01-24 01:37:15.070701: step: 166/470, loss: 1.2267463207244873 2023-01-24 01:37:15.811129: step: 168/470, loss: 1.4995251893997192 2023-01-24 01:37:16.533465: step: 170/470, loss: 0.8310667872428894 2023-01-24 01:37:17.217026: step: 172/470, loss: 1.5570124387741089 2023-01-24 01:37:17.992382: step: 174/470, loss: 0.6260913610458374 2023-01-24 01:37:18.701828: step: 176/470, loss: 1.448913335800171 2023-01-24 01:37:19.453224: step: 178/470, loss: 1.0051169395446777 2023-01-24 01:37:20.177608: step: 180/470, loss: 1.780196189880371 2023-01-24 01:37:20.937966: step: 182/470, loss: 6.025088310241699 2023-01-24 01:37:21.756486: step: 184/470, loss: 0.4942382574081421 2023-01-24 01:37:22.580104: step: 186/470, loss: 1.7729337215423584 2023-01-24 01:37:23.382368: step: 188/470, loss: 3.7398040294647217 2023-01-24 01:37:24.142278: step: 190/470, loss: 1.550480604171753 2023-01-24 01:37:24.907891: step: 192/470, loss: 4.191380023956299 2023-01-24 01:37:25.696824: step: 194/470, loss: 6.224770545959473 2023-01-24 01:37:26.510219: step: 196/470, loss: 4.869048595428467 2023-01-24 01:37:27.220468: step: 198/470, loss: 4.839520454406738 2023-01-24 01:37:28.107284: step: 200/470, loss: 0.34329086542129517 2023-01-24 01:37:28.839558: step: 202/470, loss: 3.1021952629089355 2023-01-24 01:37:29.656968: step: 204/470, loss: 1.2622747421264648 2023-01-24 01:37:30.470507: step: 206/470, loss: 1.1812101602554321 2023-01-24 01:37:31.195136: step: 208/470, loss: 2.3265373706817627 2023-01-24 01:37:31.967251: step: 210/470, loss: 1.3936223983764648 2023-01-24 01:37:32.754948: step: 212/470, loss: 0.3111218810081482 2023-01-24 01:37:33.497666: step: 214/470, loss: 0.7424604892730713 2023-01-24 01:37:34.221046: step: 216/470, loss: 3.502570152282715 2023-01-24 01:37:34.985739: step: 218/470, loss: 0.8425999283790588 2023-01-24 01:37:35.769096: step: 220/470, loss: 5.331447601318359 2023-01-24 01:37:36.467931: step: 222/470, loss: 1.8785395622253418 2023-01-24 01:37:37.314170: step: 224/470, loss: 3.651517868041992 2023-01-24 01:37:38.043777: step: 226/470, loss: 1.545884370803833 2023-01-24 01:37:38.901361: step: 228/470, loss: 0.853326141834259 2023-01-24 01:37:39.668767: step: 230/470, loss: 0.5514891147613525 2023-01-24 01:37:40.478238: step: 232/470, loss: 7.522525787353516 2023-01-24 01:37:41.184289: step: 234/470, loss: 1.4783458709716797 2023-01-24 01:37:41.905719: step: 236/470, loss: 1.1576764583587646 2023-01-24 01:37:42.555763: step: 238/470, loss: 1.4029746055603027 2023-01-24 01:37:43.268562: step: 240/470, loss: 0.9465068578720093 2023-01-24 01:37:43.947666: step: 242/470, loss: 1.4843324422836304 2023-01-24 01:37:44.667774: step: 244/470, loss: 4.074316501617432 2023-01-24 01:37:45.425364: step: 246/470, loss: 0.47795766592025757 2023-01-24 01:37:46.169760: step: 248/470, loss: 0.9381461143493652 2023-01-24 01:37:46.929952: step: 250/470, loss: 1.1530299186706543 2023-01-24 01:37:47.678215: step: 252/470, loss: 5.419069290161133 2023-01-24 01:37:48.491911: step: 254/470, loss: 2.582890272140503 2023-01-24 01:37:49.218836: step: 256/470, loss: 0.4698558449745178 2023-01-24 01:37:49.978155: step: 258/470, loss: 1.4230782985687256 2023-01-24 01:37:50.716419: step: 260/470, loss: 2.6051998138427734 2023-01-24 01:37:51.450342: step: 262/470, loss: 6.613999366760254 2023-01-24 01:37:52.231685: step: 264/470, loss: 0.8363248705863953 2023-01-24 01:37:52.997515: step: 266/470, loss: 2.7337307929992676 2023-01-24 01:37:53.716005: step: 268/470, loss: 0.22095702588558197 2023-01-24 01:37:54.461981: step: 270/470, loss: 3.376373291015625 2023-01-24 01:37:55.204863: step: 272/470, loss: 2.5851681232452393 2023-01-24 01:37:55.971971: step: 274/470, loss: 2.131282091140747 2023-01-24 01:37:56.766542: step: 276/470, loss: 1.776853322982788 2023-01-24 01:37:57.462578: step: 278/470, loss: 1.506990909576416 2023-01-24 01:37:58.241399: step: 280/470, loss: 6.9677934646606445 2023-01-24 01:37:58.993859: step: 282/470, loss: 1.1140801906585693 2023-01-24 01:37:59.704291: step: 284/470, loss: 2.3784890174865723 2023-01-24 01:38:00.413967: step: 286/470, loss: 0.3859087824821472 2023-01-24 01:38:01.251420: step: 288/470, loss: 4.417560577392578 2023-01-24 01:38:02.016317: step: 290/470, loss: 2.1486141681671143 2023-01-24 01:38:02.732144: step: 292/470, loss: 1.0960376262664795 2023-01-24 01:38:03.409227: step: 294/470, loss: 3.9408044815063477 2023-01-24 01:38:04.142516: step: 296/470, loss: 0.7118826508522034 2023-01-24 01:38:04.842301: step: 298/470, loss: 0.3212831914424896 2023-01-24 01:38:05.561316: step: 300/470, loss: 1.278481364250183 2023-01-24 01:38:06.262117: step: 302/470, loss: 5.880828380584717 2023-01-24 01:38:06.963421: step: 304/470, loss: 0.6283556222915649 2023-01-24 01:38:07.803774: step: 306/470, loss: 2.122545003890991 2023-01-24 01:38:08.543931: step: 308/470, loss: 0.7743858098983765 2023-01-24 01:38:09.377678: step: 310/470, loss: 4.460456848144531 2023-01-24 01:38:10.254413: step: 312/470, loss: 2.6492180824279785 2023-01-24 01:38:10.978762: step: 314/470, loss: 1.6523792743682861 2023-01-24 01:38:11.708689: step: 316/470, loss: 0.35584282875061035 2023-01-24 01:38:12.445608: step: 318/470, loss: 0.9775692820549011 2023-01-24 01:38:13.174164: step: 320/470, loss: 1.9322388172149658 2023-01-24 01:38:13.989885: step: 322/470, loss: 1.2334778308868408 2023-01-24 01:38:14.709396: step: 324/470, loss: 0.7135185599327087 2023-01-24 01:38:15.451837: step: 326/470, loss: 2.28690242767334 2023-01-24 01:38:16.173347: step: 328/470, loss: 1.8421454429626465 2023-01-24 01:38:16.894442: step: 330/470, loss: 0.3285214900970459 2023-01-24 01:38:17.557586: step: 332/470, loss: 1.2045437097549438 2023-01-24 01:38:18.233657: step: 334/470, loss: 0.5976580381393433 2023-01-24 01:38:18.997842: step: 336/470, loss: 0.5577397346496582 2023-01-24 01:38:19.721147: step: 338/470, loss: 1.911345362663269 2023-01-24 01:38:20.500663: step: 340/470, loss: 1.0533931255340576 2023-01-24 01:38:21.196581: step: 342/470, loss: 1.1119427680969238 2023-01-24 01:38:21.882473: step: 344/470, loss: 3.2432820796966553 2023-01-24 01:38:22.720518: step: 346/470, loss: 1.752776861190796 2023-01-24 01:38:23.434858: step: 348/470, loss: 0.7811027765274048 2023-01-24 01:38:24.174040: step: 350/470, loss: 9.563486099243164 2023-01-24 01:38:24.895408: step: 352/470, loss: 1.5302809476852417 2023-01-24 01:38:25.661349: step: 354/470, loss: 0.256759375333786 2023-01-24 01:38:26.379106: step: 356/470, loss: 0.8103635311126709 2023-01-24 01:38:27.066784: step: 358/470, loss: 1.6359080076217651 2023-01-24 01:38:27.753173: step: 360/470, loss: 1.4214690923690796 2023-01-24 01:38:28.471427: step: 362/470, loss: 1.7611870765686035 2023-01-24 01:38:29.114897: step: 364/470, loss: 1.1694140434265137 2023-01-24 01:38:29.789786: step: 366/470, loss: 0.360617458820343 2023-01-24 01:38:30.561212: step: 368/470, loss: 1.3414287567138672 2023-01-24 01:38:31.344896: step: 370/470, loss: 3.1062469482421875 2023-01-24 01:38:32.106587: step: 372/470, loss: 1.2116090059280396 2023-01-24 01:38:32.892792: step: 374/470, loss: 2.1322052478790283 2023-01-24 01:38:33.703412: step: 376/470, loss: 2.370061159133911 2023-01-24 01:38:34.439873: step: 378/470, loss: 0.9331423044204712 2023-01-24 01:38:35.238917: step: 380/470, loss: 2.5580132007598877 2023-01-24 01:38:35.990146: step: 382/470, loss: 0.35974249243736267 2023-01-24 01:38:36.839810: step: 384/470, loss: 3.127124786376953 2023-01-24 01:38:37.570893: step: 386/470, loss: 0.8177119493484497 2023-01-24 01:38:38.358182: step: 388/470, loss: 2.3154079914093018 2023-01-24 01:38:39.061163: step: 390/470, loss: 1.1731178760528564 2023-01-24 01:38:39.853135: step: 392/470, loss: 2.1064229011535645 2023-01-24 01:38:40.602147: step: 394/470, loss: 2.3349862098693848 2023-01-24 01:38:41.368557: step: 396/470, loss: 1.1178443431854248 2023-01-24 01:38:42.058721: step: 398/470, loss: 0.28772637248039246 2023-01-24 01:38:42.848613: step: 400/470, loss: 0.7768524289131165 2023-01-24 01:38:43.555858: step: 402/470, loss: 1.5863418579101562 2023-01-24 01:38:44.258928: step: 404/470, loss: 2.128065347671509 2023-01-24 01:38:45.007861: step: 406/470, loss: 0.3368366062641144 2023-01-24 01:38:45.757657: step: 408/470, loss: 1.258863091468811 2023-01-24 01:38:46.471556: step: 410/470, loss: 2.4128575325012207 2023-01-24 01:38:47.207441: step: 412/470, loss: 1.5281002521514893 2023-01-24 01:38:47.862326: step: 414/470, loss: 1.2488266229629517 2023-01-24 01:38:48.591982: step: 416/470, loss: 3.86458683013916 2023-01-24 01:38:49.335937: step: 418/470, loss: 1.467484474182129 2023-01-24 01:38:50.072795: step: 420/470, loss: 6.8370680809021 2023-01-24 01:38:50.847565: step: 422/470, loss: 1.9299228191375732 2023-01-24 01:38:51.536253: step: 424/470, loss: 3.437924861907959 2023-01-24 01:38:52.256572: step: 426/470, loss: 3.256579875946045 2023-01-24 01:38:53.028738: step: 428/470, loss: 7.5270185470581055 2023-01-24 01:38:53.810838: step: 430/470, loss: 2.0738301277160645 2023-01-24 01:38:54.539423: step: 432/470, loss: 1.6853046417236328 2023-01-24 01:38:55.277035: step: 434/470, loss: 1.0764847993850708 2023-01-24 01:38:56.092625: step: 436/470, loss: 2.1314823627471924 2023-01-24 01:38:56.871789: step: 438/470, loss: 0.8061450719833374 2023-01-24 01:38:57.590324: step: 440/470, loss: 0.616207480430603 2023-01-24 01:38:58.404985: step: 442/470, loss: 3.1026275157928467 2023-01-24 01:38:59.237604: step: 444/470, loss: 2.10392689704895 2023-01-24 01:38:59.959440: step: 446/470, loss: 0.5284572839736938 2023-01-24 01:39:00.696408: step: 448/470, loss: 0.8803726434707642 2023-01-24 01:39:01.434238: step: 450/470, loss: 4.2363104820251465 2023-01-24 01:39:02.248100: step: 452/470, loss: 3.7615818977355957 2023-01-24 01:39:02.999706: step: 454/470, loss: 1.4120464324951172 2023-01-24 01:39:03.721350: step: 456/470, loss: 2.512500524520874 2023-01-24 01:39:04.526783: step: 458/470, loss: 0.9251059889793396 2023-01-24 01:39:05.235798: step: 460/470, loss: 1.114088535308838 2023-01-24 01:39:05.924263: step: 462/470, loss: 0.8354222178459167 2023-01-24 01:39:06.606534: step: 464/470, loss: 4.140608310699463 2023-01-24 01:39:07.375124: step: 466/470, loss: 1.2718843221664429 2023-01-24 01:39:08.170392: step: 468/470, loss: 0.8437068462371826 2023-01-24 01:39:08.969421: step: 470/470, loss: 0.7166956663131714 2023-01-24 01:39:09.627428: step: 472/470, loss: 0.8796911835670471 2023-01-24 01:39:10.368640: step: 474/470, loss: 1.966931700706482 2023-01-24 01:39:11.170356: step: 476/470, loss: 1.254460334777832 2023-01-24 01:39:11.835020: step: 478/470, loss: 0.2941884994506836 2023-01-24 01:39:12.583797: step: 480/470, loss: 0.5314381122589111 2023-01-24 01:39:13.401792: step: 482/470, loss: 0.2590300142765045 2023-01-24 01:39:14.236369: step: 484/470, loss: 0.6093426942825317 2023-01-24 01:39:14.922732: step: 486/470, loss: 0.6896905303001404 2023-01-24 01:39:15.746162: step: 488/470, loss: 6.51729679107666 2023-01-24 01:39:16.578176: step: 490/470, loss: 1.2721233367919922 2023-01-24 01:39:17.303909: step: 492/470, loss: 0.5004115700721741 2023-01-24 01:39:18.019825: step: 494/470, loss: 1.5959545373916626 2023-01-24 01:39:18.769485: step: 496/470, loss: 2.6702709197998047 2023-01-24 01:39:19.513395: step: 498/470, loss: 0.9216657876968384 2023-01-24 01:39:20.231500: step: 500/470, loss: 1.1959245204925537 2023-01-24 01:39:21.004799: step: 502/470, loss: 3.9152817726135254 2023-01-24 01:39:21.760385: step: 504/470, loss: 1.635777473449707 2023-01-24 01:39:22.475912: step: 506/470, loss: 0.926995038986206 2023-01-24 01:39:23.233004: step: 508/470, loss: 0.5806446075439453 2023-01-24 01:39:23.944433: step: 510/470, loss: 1.9550533294677734 2023-01-24 01:39:24.748108: step: 512/470, loss: 0.8963161706924438 2023-01-24 01:39:25.550014: step: 514/470, loss: 0.9910989999771118 2023-01-24 01:39:26.192290: step: 516/470, loss: 1.1451404094696045 2023-01-24 01:39:26.916092: step: 518/470, loss: 0.4220796227455139 2023-01-24 01:39:27.729648: step: 520/470, loss: 0.37119582295417786 2023-01-24 01:39:28.598045: step: 522/470, loss: 0.8361790180206299 2023-01-24 01:39:29.345609: step: 524/470, loss: 0.5909321308135986 2023-01-24 01:39:30.124903: step: 526/470, loss: 1.3367969989776611 2023-01-24 01:39:30.824385: step: 528/470, loss: 0.8631378412246704 2023-01-24 01:39:31.552747: step: 530/470, loss: 0.7489875555038452 2023-01-24 01:39:32.292445: step: 532/470, loss: 0.8519724607467651 2023-01-24 01:39:32.963112: step: 534/470, loss: 1.9926557540893555 2023-01-24 01:39:33.699421: step: 536/470, loss: 9.287836074829102 2023-01-24 01:39:34.387019: step: 538/470, loss: 0.34389472007751465 2023-01-24 01:39:35.176815: step: 540/470, loss: 0.8343438506126404 2023-01-24 01:39:35.904043: step: 542/470, loss: 0.582844614982605 2023-01-24 01:39:36.653186: step: 544/470, loss: 8.287338256835938 2023-01-24 01:39:37.339272: step: 546/470, loss: 0.3143865764141083 2023-01-24 01:39:38.058347: step: 548/470, loss: 2.0470499992370605 2023-01-24 01:39:38.729027: step: 550/470, loss: 0.6878257989883423 2023-01-24 01:39:39.445062: step: 552/470, loss: 0.37249213457107544 2023-01-24 01:39:40.170441: step: 554/470, loss: 0.5701853036880493 2023-01-24 01:39:40.915352: step: 556/470, loss: 0.40355125069618225 2023-01-24 01:39:41.589633: step: 558/470, loss: 3.7175612449645996 2023-01-24 01:39:42.217201: step: 560/470, loss: 0.47289979457855225 2023-01-24 01:39:42.961136: step: 562/470, loss: 0.6108911633491516 2023-01-24 01:39:43.675981: step: 564/470, loss: 3.1750166416168213 2023-01-24 01:39:44.475438: step: 566/470, loss: 0.4857003092765808 2023-01-24 01:39:45.194705: step: 568/470, loss: 2.3505191802978516 2023-01-24 01:39:45.952297: step: 570/470, loss: 1.422705888748169 2023-01-24 01:39:46.666761: step: 572/470, loss: 5.138402938842773 2023-01-24 01:39:47.398664: step: 574/470, loss: 3.7409205436706543 2023-01-24 01:39:48.124024: step: 576/470, loss: 4.080702781677246 2023-01-24 01:39:48.823379: step: 578/470, loss: 1.1940529346466064 2023-01-24 01:39:49.614107: step: 580/470, loss: 1.1898057460784912 2023-01-24 01:39:50.445676: step: 582/470, loss: 4.188379764556885 2023-01-24 01:39:51.353345: step: 584/470, loss: 2.8826589584350586 2023-01-24 01:39:52.056175: step: 586/470, loss: 2.7253611087799072 2023-01-24 01:39:52.757922: step: 588/470, loss: 1.339220643043518 2023-01-24 01:39:53.452365: step: 590/470, loss: 0.9713016748428345 2023-01-24 01:39:54.209444: step: 592/470, loss: 0.6565048098564148 2023-01-24 01:39:54.942070: step: 594/470, loss: 0.6998676061630249 2023-01-24 01:39:55.688613: step: 596/470, loss: 5.920351028442383 2023-01-24 01:39:56.410557: step: 598/470, loss: 1.8115699291229248 2023-01-24 01:39:57.123397: step: 600/470, loss: 1.0007771253585815 2023-01-24 01:39:57.871188: step: 602/470, loss: 0.6656914949417114 2023-01-24 01:39:58.578127: step: 604/470, loss: 1.722856879234314 2023-01-24 01:39:59.390253: step: 606/470, loss: 1.1696670055389404 2023-01-24 01:40:00.116215: step: 608/470, loss: 1.6859041452407837 2023-01-24 01:40:00.896881: step: 610/470, loss: 1.470185399055481 2023-01-24 01:40:01.730888: step: 612/470, loss: 1.2091671228408813 2023-01-24 01:40:02.403933: step: 614/470, loss: 0.2507552206516266 2023-01-24 01:40:03.231972: step: 616/470, loss: 0.7892252802848816 2023-01-24 01:40:04.047727: step: 618/470, loss: 5.00029182434082 2023-01-24 01:40:04.713118: step: 620/470, loss: 0.6601501703262329 2023-01-24 01:40:05.456234: step: 622/470, loss: 0.43022409081459045 2023-01-24 01:40:06.251839: step: 624/470, loss: 0.3008459806442261 2023-01-24 01:40:07.024710: step: 626/470, loss: 3.7314624786376953 2023-01-24 01:40:07.740269: step: 628/470, loss: 1.765567421913147 2023-01-24 01:40:08.595623: step: 630/470, loss: 1.6932260990142822 2023-01-24 01:40:09.416941: step: 632/470, loss: 1.9966813325881958 2023-01-24 01:40:10.142189: step: 634/470, loss: 0.7442370653152466 2023-01-24 01:40:10.855468: step: 636/470, loss: 0.33508580923080444 2023-01-24 01:40:11.598400: step: 638/470, loss: 0.5471466779708862 2023-01-24 01:40:12.409506: step: 640/470, loss: 0.6237709522247314 2023-01-24 01:40:13.182701: step: 642/470, loss: 1.7720106840133667 2023-01-24 01:40:14.037512: step: 644/470, loss: 0.4505968987941742 2023-01-24 01:40:14.714994: step: 646/470, loss: 1.4006216526031494 2023-01-24 01:40:15.467652: step: 648/470, loss: 2.9941649436950684 2023-01-24 01:40:16.221829: step: 650/470, loss: 0.3443487286567688 2023-01-24 01:40:16.898821: step: 652/470, loss: 0.7548519968986511 2023-01-24 01:40:17.634405: step: 654/470, loss: 7.085939884185791 2023-01-24 01:40:18.380229: step: 656/470, loss: 1.421372652053833 2023-01-24 01:40:19.146290: step: 658/470, loss: 0.9636402130126953 2023-01-24 01:40:19.817844: step: 660/470, loss: 5.655693054199219 2023-01-24 01:40:20.563431: step: 662/470, loss: 1.7296228408813477 2023-01-24 01:40:21.211606: step: 664/470, loss: 1.15548837184906 2023-01-24 01:40:22.014560: step: 666/470, loss: 5.414955139160156 2023-01-24 01:40:22.765932: step: 668/470, loss: 1.2120192050933838 2023-01-24 01:40:23.569636: step: 670/470, loss: 0.7238351106643677 2023-01-24 01:40:24.263394: step: 672/470, loss: 0.3124326169490814 2023-01-24 01:40:25.033088: step: 674/470, loss: 0.9182902574539185 2023-01-24 01:40:25.783385: step: 676/470, loss: 1.8736960887908936 2023-01-24 01:40:26.505853: step: 678/470, loss: 2.540367841720581 2023-01-24 01:40:27.170548: step: 680/470, loss: 3.663296699523926 2023-01-24 01:40:27.961512: step: 682/470, loss: 0.9578647613525391 2023-01-24 01:40:28.670344: step: 684/470, loss: 1.4884737730026245 2023-01-24 01:40:29.448564: step: 686/470, loss: 0.906496524810791 2023-01-24 01:40:30.138963: step: 688/470, loss: 1.3751282691955566 2023-01-24 01:40:30.863150: step: 690/470, loss: 0.4115382134914398 2023-01-24 01:40:31.649105: step: 692/470, loss: 1.414757251739502 2023-01-24 01:40:32.359112: step: 694/470, loss: 0.8882244825363159 2023-01-24 01:40:33.157737: step: 696/470, loss: 0.3683246076107025 2023-01-24 01:40:33.868157: step: 698/470, loss: 1.179138422012329 2023-01-24 01:40:34.551920: step: 700/470, loss: 1.4897336959838867 2023-01-24 01:40:35.385304: step: 702/470, loss: 0.3474949300289154 2023-01-24 01:40:36.216114: step: 704/470, loss: 2.322219133377075 2023-01-24 01:40:36.954770: step: 706/470, loss: 2.4184319972991943 2023-01-24 01:40:37.638423: step: 708/470, loss: 0.44905775785446167 2023-01-24 01:40:38.400138: step: 710/470, loss: 1.451400637626648 2023-01-24 01:40:39.089246: step: 712/470, loss: 2.693408489227295 2023-01-24 01:40:39.819998: step: 714/470, loss: 0.6166913509368896 2023-01-24 01:40:40.506718: step: 716/470, loss: 1.744429349899292 2023-01-24 01:40:41.264889: step: 718/470, loss: 0.4033623933792114 2023-01-24 01:40:41.955991: step: 720/470, loss: 1.1890370845794678 2023-01-24 01:40:42.662322: step: 722/470, loss: 1.769646406173706 2023-01-24 01:40:43.477653: step: 724/470, loss: 7.846860885620117 2023-01-24 01:40:44.209653: step: 726/470, loss: 1.7710165977478027 2023-01-24 01:40:44.987382: step: 728/470, loss: 0.9685930013656616 2023-01-24 01:40:45.713794: step: 730/470, loss: 1.263967514038086 2023-01-24 01:40:46.576358: step: 732/470, loss: 0.39430445432662964 2023-01-24 01:40:47.345332: step: 734/470, loss: 5.113945960998535 2023-01-24 01:40:48.127563: step: 736/470, loss: 0.29610970616340637 2023-01-24 01:40:48.820199: step: 738/470, loss: 0.3253304660320282 2023-01-24 01:40:49.628598: step: 740/470, loss: 0.6527763605117798 2023-01-24 01:40:50.424903: step: 742/470, loss: 1.0859628915786743 2023-01-24 01:40:51.194071: step: 744/470, loss: 0.3612285256385803 2023-01-24 01:40:51.924245: step: 746/470, loss: 1.7214696407318115 2023-01-24 01:40:52.659798: step: 748/470, loss: 3.29121470451355 2023-01-24 01:40:53.481427: step: 750/470, loss: 2.983771324157715 2023-01-24 01:40:54.249132: step: 752/470, loss: 1.9223625659942627 2023-01-24 01:40:54.957868: step: 754/470, loss: 1.0342754125595093 2023-01-24 01:40:55.707089: step: 756/470, loss: 0.21807289123535156 2023-01-24 01:40:56.439851: step: 758/470, loss: 0.3551430106163025 2023-01-24 01:40:57.168559: step: 760/470, loss: 6.2613372802734375 2023-01-24 01:40:57.882278: step: 762/470, loss: 2.6288414001464844 2023-01-24 01:40:58.539775: step: 764/470, loss: 0.3011544942855835 2023-01-24 01:40:59.263355: step: 766/470, loss: 1.0295944213867188 2023-01-24 01:41:00.066778: step: 768/470, loss: 1.5300159454345703 2023-01-24 01:41:00.837342: step: 770/470, loss: 1.7910230159759521 2023-01-24 01:41:01.631678: step: 772/470, loss: 3.126624584197998 2023-01-24 01:41:02.387826: step: 774/470, loss: 1.3450465202331543 2023-01-24 01:41:03.168753: step: 776/470, loss: 2.3319666385650635 2023-01-24 01:41:03.938165: step: 778/470, loss: 0.3326357901096344 2023-01-24 01:41:04.732348: step: 780/470, loss: 3.0053279399871826 2023-01-24 01:41:05.542337: step: 782/470, loss: 1.9255592823028564 2023-01-24 01:41:06.251134: step: 784/470, loss: 3.3029167652130127 2023-01-24 01:41:07.000523: step: 786/470, loss: 1.8911656141281128 2023-01-24 01:41:07.794186: step: 788/470, loss: 1.2011070251464844 2023-01-24 01:41:08.504845: step: 790/470, loss: 1.5344570875167847 2023-01-24 01:41:09.194113: step: 792/470, loss: 0.8595614433288574 2023-01-24 01:41:09.965258: step: 794/470, loss: 0.2718544006347656 2023-01-24 01:41:10.714239: step: 796/470, loss: 0.30841192603111267 2023-01-24 01:41:11.489064: step: 798/470, loss: 2.8022773265838623 2023-01-24 01:41:12.271057: step: 800/470, loss: 1.4509482383728027 2023-01-24 01:41:13.082069: step: 802/470, loss: 0.4220521152019501 2023-01-24 01:41:13.824570: step: 804/470, loss: 3.620166301727295 2023-01-24 01:41:14.574155: step: 806/470, loss: 0.8263979554176331 2023-01-24 01:41:15.372947: step: 808/470, loss: 3.336337089538574 2023-01-24 01:41:16.177844: step: 810/470, loss: 0.8141534924507141 2023-01-24 01:41:16.934827: step: 812/470, loss: 1.7943094968795776 2023-01-24 01:41:17.759892: step: 814/470, loss: 3.9635958671569824 2023-01-24 01:41:18.512436: step: 816/470, loss: 1.4038106203079224 2023-01-24 01:41:19.210196: step: 818/470, loss: 0.2446841448545456 2023-01-24 01:41:19.940045: step: 820/470, loss: 0.7909644246101379 2023-01-24 01:41:20.619871: step: 822/470, loss: 1.7075693607330322 2023-01-24 01:41:21.315310: step: 824/470, loss: 1.881824254989624 2023-01-24 01:41:22.084581: step: 826/470, loss: 0.8060756325721741 2023-01-24 01:41:22.860348: step: 828/470, loss: 1.0279686450958252 2023-01-24 01:41:23.644584: step: 830/470, loss: 0.47068899869918823 2023-01-24 01:41:24.389127: step: 832/470, loss: 0.6771692633628845 2023-01-24 01:41:25.174577: step: 834/470, loss: 1.3380112648010254 2023-01-24 01:41:25.891181: step: 836/470, loss: 1.1310175657272339 2023-01-24 01:41:26.612636: step: 838/470, loss: 0.6593484878540039 2023-01-24 01:41:27.339501: step: 840/470, loss: 2.9913806915283203 2023-01-24 01:41:28.108399: step: 842/470, loss: 2.463867664337158 2023-01-24 01:41:28.836318: step: 844/470, loss: 1.755760669708252 2023-01-24 01:41:29.577035: step: 846/470, loss: 0.5958117246627808 2023-01-24 01:41:30.323932: step: 848/470, loss: 1.926990032196045 2023-01-24 01:41:31.063425: step: 850/470, loss: 1.5289037227630615 2023-01-24 01:41:31.806178: step: 852/470, loss: 4.594353199005127 2023-01-24 01:41:32.609930: step: 854/470, loss: 1.013411283493042 2023-01-24 01:41:33.431063: step: 856/470, loss: 14.002585411071777 2023-01-24 01:41:34.149916: step: 858/470, loss: 0.3551334738731384 2023-01-24 01:41:35.005008: step: 860/470, loss: 3.323481559753418 2023-01-24 01:41:35.759829: step: 862/470, loss: 0.4271811842918396 2023-01-24 01:41:36.478365: step: 864/470, loss: 2.759626865386963 2023-01-24 01:41:37.123841: step: 866/470, loss: 2.3287715911865234 2023-01-24 01:41:37.847260: step: 868/470, loss: 1.9157121181488037 2023-01-24 01:41:38.656328: step: 870/470, loss: 0.848456859588623 2023-01-24 01:41:39.364912: step: 872/470, loss: 9.034523963928223 2023-01-24 01:41:40.143657: step: 874/470, loss: 3.3581533432006836 2023-01-24 01:41:40.885600: step: 876/470, loss: 0.41928038001060486 2023-01-24 01:41:41.549294: step: 878/470, loss: 4.671534538269043 2023-01-24 01:41:42.378525: step: 880/470, loss: 1.3843767642974854 2023-01-24 01:41:43.182288: step: 882/470, loss: 0.8925577998161316 2023-01-24 01:41:43.918871: step: 884/470, loss: 0.2983015179634094 2023-01-24 01:41:44.610189: step: 886/470, loss: 1.8470473289489746 2023-01-24 01:41:45.350137: step: 888/470, loss: 2.3493099212646484 2023-01-24 01:41:46.104911: step: 890/470, loss: 1.196245551109314 2023-01-24 01:41:46.828286: step: 892/470, loss: 1.8996955156326294 2023-01-24 01:41:47.602234: step: 894/470, loss: 4.634321212768555 2023-01-24 01:41:48.396894: step: 896/470, loss: 2.013730764389038 2023-01-24 01:41:49.259416: step: 898/470, loss: 0.5299916863441467 2023-01-24 01:41:49.995938: step: 900/470, loss: 1.3865522146224976 2023-01-24 01:41:50.736473: step: 902/470, loss: 0.5649352073669434 2023-01-24 01:41:51.484173: step: 904/470, loss: 0.6543347835540771 2023-01-24 01:41:52.201142: step: 906/470, loss: 1.2292380332946777 2023-01-24 01:41:52.973710: step: 908/470, loss: 0.953291654586792 2023-01-24 01:41:53.692995: step: 910/470, loss: 5.457536220550537 2023-01-24 01:41:54.464835: step: 912/470, loss: 3.373645782470703 2023-01-24 01:41:55.144768: step: 914/470, loss: 3.1682276725769043 2023-01-24 01:41:55.869163: step: 916/470, loss: 1.7389862537384033 2023-01-24 01:41:56.599873: step: 918/470, loss: 0.5257987976074219 2023-01-24 01:41:57.378011: step: 920/470, loss: 0.9832305908203125 2023-01-24 01:41:58.134095: step: 922/470, loss: 1.7744770050048828 2023-01-24 01:41:58.905255: step: 924/470, loss: 0.5747044086456299 2023-01-24 01:41:59.711120: step: 926/470, loss: 0.39155885577201843 2023-01-24 01:42:00.519136: step: 928/470, loss: 0.668175220489502 2023-01-24 01:42:01.221013: step: 930/470, loss: 0.4773980975151062 2023-01-24 01:42:02.057884: step: 932/470, loss: 0.8903824090957642 2023-01-24 01:42:02.806587: step: 934/470, loss: 1.2178103923797607 2023-01-24 01:42:03.562649: step: 936/470, loss: 2.6373496055603027 2023-01-24 01:42:04.357148: step: 938/470, loss: 1.7452850341796875 2023-01-24 01:42:05.026361: step: 940/470, loss: 0.6965129375457764 2023-01-24 01:42:05.652290: step: 942/470, loss: 0.40335753560066223 ================================================== Loss: 2.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2707386507094885, 'r': 0.23081877404532952, 'f1': 0.2491900642244476}, 'combined': 0.18361373153380348, 'epoch': 1} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3279803550577765, 'r': 0.2078571569445736, 'f1': 0.2544542425976412}, 'combined': 0.16963616173176077, 'epoch': 1} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2745133526383527, 'r': 0.23040440183634892, 'f1': 0.250532227279401}, 'combined': 0.18460269378482178, 'epoch': 1} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3246109494838766, 'r': 0.20945653787406415, 'f1': 0.2546190780916654}, 'combined': 0.16974605206111024, 'epoch': 1} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.25838488445869234, 'r': 0.22468250822494984, 'f1': 0.2403580320545975}, 'combined': 0.1771059183560192, 'epoch': 1} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.31532402962177447, 'r': 0.21374313417314916, 'f1': 0.2547818159343938}, 'combined': 0.1698545439562625, 'epoch': 1} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.19387755102040816, 'r': 0.2714285714285714, 'f1': 0.2261904761904762}, 'combined': 0.15079365079365079, 'epoch': 1} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.2391304347826087, 'f1': 0.3055555555555556}, 'combined': 0.20370370370370372, 'epoch': 1} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5416666666666666, 'r': 0.22413793103448276, 'f1': 0.3170731707317073}, 'combined': 0.2113821138211382, 'epoch': 1} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2707386507094885, 'r': 0.23081877404532952, 'f1': 0.2491900642244476}, 'combined': 0.18361373153380348, 'epoch': 1} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3279803550577765, 'r': 0.2078571569445736, 'f1': 0.2544542425976412}, 'combined': 0.16963616173176077, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.19387755102040816, 'r': 0.2714285714285714, 'f1': 0.2261904761904762}, 'combined': 0.15079365079365079, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2745133526383527, 'r': 0.23040440183634892, 'f1': 0.250532227279401}, 'combined': 0.18460269378482178, 'epoch': 1} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3246109494838766, 'r': 0.20945653787406415, 'f1': 0.2546190780916654}, 'combined': 0.16974605206111024, 'epoch': 1} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.2391304347826087, 'f1': 0.3055555555555556}, 'combined': 0.20370370370370372, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.25838488445869234, 'r': 0.22468250822494984, 'f1': 0.2403580320545975}, 'combined': 0.1771059183560192, 'epoch': 1} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.31532402962177447, 'r': 0.21374313417314916, 'f1': 0.2547818159343938}, 'combined': 0.1698545439562625, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5416666666666666, 'r': 0.22413793103448276, 'f1': 0.3170731707317073}, 'combined': 0.2113821138211382, 'epoch': 1} ****************************** Epoch: 2 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:45:07.233251: step: 2/470, loss: 2.46811842918396 2023-01-24 01:45:08.060567: step: 4/470, loss: 1.7362735271453857 2023-01-24 01:45:08.797527: step: 6/470, loss: 0.5531305074691772 2023-01-24 01:45:09.560383: step: 8/470, loss: 1.096218228340149 2023-01-24 01:45:10.314741: step: 10/470, loss: 0.9702210426330566 2023-01-24 01:45:11.034159: step: 12/470, loss: 1.731630802154541 2023-01-24 01:45:11.758563: step: 14/470, loss: 2.6520066261291504 2023-01-24 01:45:12.577762: step: 16/470, loss: 0.960263729095459 2023-01-24 01:45:13.209271: step: 18/470, loss: 2.0434067249298096 2023-01-24 01:45:13.928489: step: 20/470, loss: 1.127629280090332 2023-01-24 01:45:14.661018: step: 22/470, loss: 0.27968132495880127 2023-01-24 01:45:15.363883: step: 24/470, loss: 0.7146601676940918 2023-01-24 01:45:16.127518: step: 26/470, loss: 0.5010353922843933 2023-01-24 01:45:16.913063: step: 28/470, loss: 0.7857263088226318 2023-01-24 01:45:17.632715: step: 30/470, loss: 6.040136337280273 2023-01-24 01:45:18.504038: step: 32/470, loss: 0.8596094846725464 2023-01-24 01:45:19.302477: step: 34/470, loss: 0.8374332189559937 2023-01-24 01:45:20.052946: step: 36/470, loss: 0.49315130710601807 2023-01-24 01:45:20.795785: step: 38/470, loss: 0.49098867177963257 2023-01-24 01:45:21.523099: step: 40/470, loss: 0.5622952580451965 2023-01-24 01:45:22.255216: step: 42/470, loss: 0.7310096621513367 2023-01-24 01:45:23.064049: step: 44/470, loss: 0.5263378024101257 2023-01-24 01:45:23.814135: step: 46/470, loss: 1.4505245685577393 2023-01-24 01:45:24.586328: step: 48/470, loss: 0.34201517701148987 2023-01-24 01:45:25.351679: step: 50/470, loss: 8.089656829833984 2023-01-24 01:45:26.098762: step: 52/470, loss: 0.4995441138744354 2023-01-24 01:45:27.017604: step: 54/470, loss: 2.23051118850708 2023-01-24 01:45:27.802595: step: 56/470, loss: 0.6585686206817627 2023-01-24 01:45:28.485777: step: 58/470, loss: 1.034457802772522 2023-01-24 01:45:29.260240: step: 60/470, loss: 1.8960305452346802 2023-01-24 01:45:30.024829: step: 62/470, loss: 1.0263447761535645 2023-01-24 01:45:30.786571: step: 64/470, loss: 1.5714144706726074 2023-01-24 01:45:31.509129: step: 66/470, loss: 2.10054612159729 2023-01-24 01:45:32.258440: step: 68/470, loss: 0.5159252882003784 2023-01-24 01:45:32.992715: step: 70/470, loss: 0.5332794189453125 2023-01-24 01:45:33.818803: step: 72/470, loss: 1.1697885990142822 2023-01-24 01:45:34.521726: step: 74/470, loss: 0.3312620222568512 2023-01-24 01:45:35.196399: step: 76/470, loss: 1.2939296960830688 2023-01-24 01:45:35.929864: step: 78/470, loss: 1.7509288787841797 2023-01-24 01:45:36.587866: step: 80/470, loss: 0.5027635097503662 2023-01-24 01:45:37.367138: step: 82/470, loss: 1.1433383226394653 2023-01-24 01:45:38.108198: step: 84/470, loss: 0.35809245705604553 2023-01-24 01:45:38.840978: step: 86/470, loss: 0.18918129801750183 2023-01-24 01:45:39.637740: step: 88/470, loss: 0.9970061779022217 2023-01-24 01:45:40.359537: step: 90/470, loss: 3.602595090866089 2023-01-24 01:45:41.061623: step: 92/470, loss: 0.8049639463424683 2023-01-24 01:45:41.799298: step: 94/470, loss: 0.3445468544960022 2023-01-24 01:45:42.596065: step: 96/470, loss: 4.520965576171875 2023-01-24 01:45:43.397706: step: 98/470, loss: 1.303832769393921 2023-01-24 01:45:44.120044: step: 100/470, loss: 1.4521981477737427 2023-01-24 01:45:44.784569: step: 102/470, loss: 0.2904055118560791 2023-01-24 01:45:45.517600: step: 104/470, loss: 1.4000365734100342 2023-01-24 01:45:46.275149: step: 106/470, loss: 0.7857203483581543 2023-01-24 01:45:47.081060: step: 108/470, loss: 0.29761266708374023 2023-01-24 01:45:47.815905: step: 110/470, loss: 1.4033076763153076 2023-01-24 01:45:48.519554: step: 112/470, loss: 1.4501266479492188 2023-01-24 01:45:49.284925: step: 114/470, loss: 1.0425773859024048 2023-01-24 01:45:49.982959: step: 116/470, loss: 0.5747327208518982 2023-01-24 01:45:50.686104: step: 118/470, loss: 1.055211067199707 2023-01-24 01:45:51.437735: step: 120/470, loss: 2.8011343479156494 2023-01-24 01:45:52.204324: step: 122/470, loss: 1.3757091760635376 2023-01-24 01:45:53.069541: step: 124/470, loss: 0.9192894101142883 2023-01-24 01:45:53.789540: step: 126/470, loss: 1.9871468544006348 2023-01-24 01:45:54.610006: step: 128/470, loss: 0.4054987132549286 2023-01-24 01:45:55.318656: step: 130/470, loss: 0.4849776327610016 2023-01-24 01:45:56.065069: step: 132/470, loss: 1.0256551504135132 2023-01-24 01:45:56.687626: step: 134/470, loss: 0.4269829988479614 2023-01-24 01:45:57.465839: step: 136/470, loss: 0.9165063500404358 2023-01-24 01:45:58.211507: step: 138/470, loss: 1.5846391916275024 2023-01-24 01:45:59.003168: step: 140/470, loss: 0.4875439703464508 2023-01-24 01:45:59.678075: step: 142/470, loss: 4.090658187866211 2023-01-24 01:46:00.386219: step: 144/470, loss: 2.0160574913024902 2023-01-24 01:46:01.084448: step: 146/470, loss: 0.6893378496170044 2023-01-24 01:46:01.921201: step: 148/470, loss: 9.704105377197266 2023-01-24 01:46:02.668773: step: 150/470, loss: 2.638101100921631 2023-01-24 01:46:03.374803: step: 152/470, loss: 1.744828462600708 2023-01-24 01:46:04.117899: step: 154/470, loss: 0.3475514054298401 2023-01-24 01:46:04.816750: step: 156/470, loss: 0.37182146310806274 2023-01-24 01:46:05.601537: step: 158/470, loss: 2.7568507194519043 2023-01-24 01:46:06.258624: step: 160/470, loss: 4.175437927246094 2023-01-24 01:46:07.067474: step: 162/470, loss: 0.8821776509284973 2023-01-24 01:46:07.821015: step: 164/470, loss: 1.03183913230896 2023-01-24 01:46:08.556007: step: 166/470, loss: 1.8309249877929688 2023-01-24 01:46:09.241889: step: 168/470, loss: 0.4933227300643921 2023-01-24 01:46:09.997978: step: 170/470, loss: 1.0841269493103027 2023-01-24 01:46:10.713756: step: 172/470, loss: 5.927791595458984 2023-01-24 01:46:11.566917: step: 174/470, loss: 1.4856324195861816 2023-01-24 01:46:12.305755: step: 176/470, loss: 0.46990150213241577 2023-01-24 01:46:13.035549: step: 178/470, loss: 1.1630284786224365 2023-01-24 01:46:13.796772: step: 180/470, loss: 0.9261336922645569 2023-01-24 01:46:14.507604: step: 182/470, loss: 0.8008025884628296 2023-01-24 01:46:15.301778: step: 184/470, loss: 6.333198547363281 2023-01-24 01:46:16.081844: step: 186/470, loss: 0.42174702882766724 2023-01-24 01:46:16.812353: step: 188/470, loss: 0.4755881726741791 2023-01-24 01:46:17.568258: step: 190/470, loss: 1.147223949432373 2023-01-24 01:46:18.347349: step: 192/470, loss: 0.19824132323265076 2023-01-24 01:46:19.081741: step: 194/470, loss: 4.473010540008545 2023-01-24 01:46:19.751775: step: 196/470, loss: 0.8954963684082031 2023-01-24 01:46:20.437201: step: 198/470, loss: 0.2654518783092499 2023-01-24 01:46:21.171043: step: 200/470, loss: 2.4173974990844727 2023-01-24 01:46:21.942334: step: 202/470, loss: 0.2253369241952896 2023-01-24 01:46:22.784221: step: 204/470, loss: 0.5632342100143433 2023-01-24 01:46:23.569566: step: 206/470, loss: 1.0098700523376465 2023-01-24 01:46:24.245935: step: 208/470, loss: 0.7151339054107666 2023-01-24 01:46:24.960281: step: 210/470, loss: 1.4566631317138672 2023-01-24 01:46:25.642775: step: 212/470, loss: 1.1144832372665405 2023-01-24 01:46:26.378788: step: 214/470, loss: 0.6245443224906921 2023-01-24 01:46:27.074909: step: 216/470, loss: 3.356032371520996 2023-01-24 01:46:27.796445: step: 218/470, loss: 0.36491915583610535 2023-01-24 01:46:28.524211: step: 220/470, loss: 0.3796614408493042 2023-01-24 01:46:29.284477: step: 222/470, loss: 9.564358711242676 2023-01-24 01:46:30.044839: step: 224/470, loss: 1.751694679260254 2023-01-24 01:46:30.733713: step: 226/470, loss: 0.19614216685295105 2023-01-24 01:46:31.538255: step: 228/470, loss: 1.9366538524627686 2023-01-24 01:46:32.288243: step: 230/470, loss: 2.795724391937256 2023-01-24 01:46:32.963310: step: 232/470, loss: 1.4865249395370483 2023-01-24 01:46:33.679705: step: 234/470, loss: 0.4379088282585144 2023-01-24 01:46:34.411139: step: 236/470, loss: 0.5391400456428528 2023-01-24 01:46:35.217221: step: 238/470, loss: 1.4847371578216553 2023-01-24 01:46:36.050380: step: 240/470, loss: 3.0173943042755127 2023-01-24 01:46:36.831614: step: 242/470, loss: 2.1348071098327637 2023-01-24 01:46:37.497894: step: 244/470, loss: 0.5090938806533813 2023-01-24 01:46:38.275722: step: 246/470, loss: 2.2077693939208984 2023-01-24 01:46:39.033506: step: 248/470, loss: 1.9846214056015015 2023-01-24 01:46:39.804449: step: 250/470, loss: 0.22293855249881744 2023-01-24 01:46:40.480746: step: 252/470, loss: 7.160884857177734 2023-01-24 01:46:41.227162: step: 254/470, loss: 1.7741347551345825 2023-01-24 01:46:42.006416: step: 256/470, loss: 1.3045562505722046 2023-01-24 01:46:42.727562: step: 258/470, loss: 1.059791922569275 2023-01-24 01:46:43.488252: step: 260/470, loss: 1.5100021362304688 2023-01-24 01:46:44.272775: step: 262/470, loss: 0.9539613723754883 2023-01-24 01:46:45.024822: step: 264/470, loss: 0.6949822902679443 2023-01-24 01:46:45.750044: step: 266/470, loss: 0.7878126502037048 2023-01-24 01:46:46.503996: step: 268/470, loss: 0.5556519031524658 2023-01-24 01:46:47.252474: step: 270/470, loss: 0.49442341923713684 2023-01-24 01:46:48.093034: step: 272/470, loss: 1.6977099180221558 2023-01-24 01:46:48.810665: step: 274/470, loss: 0.9501678347587585 2023-01-24 01:46:49.607788: step: 276/470, loss: 0.25947466492652893 2023-01-24 01:46:50.419852: step: 278/470, loss: 8.976275444030762 2023-01-24 01:46:51.171920: step: 280/470, loss: 0.5047112703323364 2023-01-24 01:46:51.920367: step: 282/470, loss: 1.2007783651351929 2023-01-24 01:46:52.714943: step: 284/470, loss: 3.0851497650146484 2023-01-24 01:46:53.550471: step: 286/470, loss: 1.024082064628601 2023-01-24 01:46:54.311958: step: 288/470, loss: 0.8622781038284302 2023-01-24 01:46:55.019370: step: 290/470, loss: 0.44212666153907776 2023-01-24 01:46:55.747667: step: 292/470, loss: 1.0162142515182495 2023-01-24 01:46:56.486428: step: 294/470, loss: 1.515852689743042 2023-01-24 01:46:57.227234: step: 296/470, loss: 0.5383193492889404 2023-01-24 01:46:57.947709: step: 298/470, loss: 1.2464834451675415 2023-01-24 01:46:58.695563: step: 300/470, loss: 4.228458881378174 2023-01-24 01:46:59.547037: step: 302/470, loss: 2.1866626739501953 2023-01-24 01:47:00.227192: step: 304/470, loss: 2.0377941131591797 2023-01-24 01:47:00.944410: step: 306/470, loss: 1.7903894186019897 2023-01-24 01:47:01.688133: step: 308/470, loss: 2.7119550704956055 2023-01-24 01:47:02.533829: step: 310/470, loss: 4.348893642425537 2023-01-24 01:47:03.260186: step: 312/470, loss: 0.4842892289161682 2023-01-24 01:47:04.027829: step: 314/470, loss: 2.1156063079833984 2023-01-24 01:47:04.710548: step: 316/470, loss: 0.7636069059371948 2023-01-24 01:47:05.401424: step: 318/470, loss: 1.8225529193878174 2023-01-24 01:47:06.124224: step: 320/470, loss: 1.8462271690368652 2023-01-24 01:47:06.757854: step: 322/470, loss: 1.2222322225570679 2023-01-24 01:47:07.503706: step: 324/470, loss: 1.3601785898208618 2023-01-24 01:47:08.272185: step: 326/470, loss: 1.628795862197876 2023-01-24 01:47:08.955862: step: 328/470, loss: 0.41737183928489685 2023-01-24 01:47:09.744539: step: 330/470, loss: 0.7214322686195374 2023-01-24 01:47:10.534148: step: 332/470, loss: 0.3991130590438843 2023-01-24 01:47:11.162838: step: 334/470, loss: 1.107088565826416 2023-01-24 01:47:11.887223: step: 336/470, loss: 0.8664273023605347 2023-01-24 01:47:12.565142: step: 338/470, loss: 1.5518001317977905 2023-01-24 01:47:13.288873: step: 340/470, loss: 0.44015198945999146 2023-01-24 01:47:14.030941: step: 342/470, loss: 0.8017760515213013 2023-01-24 01:47:14.692430: step: 344/470, loss: 2.1953208446502686 2023-01-24 01:47:15.412667: step: 346/470, loss: 0.18744251132011414 2023-01-24 01:47:16.110056: step: 348/470, loss: 0.693583071231842 2023-01-24 01:47:16.855397: step: 350/470, loss: 3.926697015762329 2023-01-24 01:47:17.574415: step: 352/470, loss: 2.6467742919921875 2023-01-24 01:47:18.347902: step: 354/470, loss: 0.4489498734474182 2023-01-24 01:47:19.117663: step: 356/470, loss: 0.6220064163208008 2023-01-24 01:47:19.822313: step: 358/470, loss: 0.6211023330688477 2023-01-24 01:47:20.672908: step: 360/470, loss: 1.920097827911377 2023-01-24 01:47:21.396292: step: 362/470, loss: 0.7365934252738953 2023-01-24 01:47:22.120097: step: 364/470, loss: 0.5808885097503662 2023-01-24 01:47:22.895243: step: 366/470, loss: 0.21604259312152863 2023-01-24 01:47:23.656453: step: 368/470, loss: 0.4554995000362396 2023-01-24 01:47:24.364808: step: 370/470, loss: 0.5395200848579407 2023-01-24 01:47:25.146710: step: 372/470, loss: 3.7871570587158203 2023-01-24 01:47:25.866913: step: 374/470, loss: 1.1446315050125122 2023-01-24 01:47:26.581434: step: 376/470, loss: 3.1051321029663086 2023-01-24 01:47:27.300989: step: 378/470, loss: 0.9708766341209412 2023-01-24 01:47:28.039342: step: 380/470, loss: 5.53814697265625 2023-01-24 01:47:28.800269: step: 382/470, loss: 1.891498327255249 2023-01-24 01:47:29.512625: step: 384/470, loss: 0.2550835609436035 2023-01-24 01:47:30.293445: step: 386/470, loss: 0.6128339171409607 2023-01-24 01:47:31.010176: step: 388/470, loss: 0.6527276635169983 2023-01-24 01:47:31.709882: step: 390/470, loss: 1.466606855392456 2023-01-24 01:47:32.454956: step: 392/470, loss: 0.995391845703125 2023-01-24 01:47:33.184828: step: 394/470, loss: 1.0292932987213135 2023-01-24 01:47:33.886129: step: 396/470, loss: 0.5598565340042114 2023-01-24 01:47:34.601662: step: 398/470, loss: 5.429760932922363 2023-01-24 01:47:35.373355: step: 400/470, loss: 0.38686808943748474 2023-01-24 01:47:36.069475: step: 402/470, loss: 1.3042653799057007 2023-01-24 01:47:36.735797: step: 404/470, loss: 0.5991760492324829 2023-01-24 01:47:37.478635: step: 406/470, loss: 0.37910622358322144 2023-01-24 01:47:38.171735: step: 408/470, loss: 1.0450007915496826 2023-01-24 01:47:38.912463: step: 410/470, loss: 0.5937882661819458 2023-01-24 01:47:39.661055: step: 412/470, loss: 9.856136322021484 2023-01-24 01:47:40.355511: step: 414/470, loss: 1.9305442571640015 2023-01-24 01:47:41.083724: step: 416/470, loss: 0.7384703159332275 2023-01-24 01:47:41.883982: step: 418/470, loss: 0.9498000144958496 2023-01-24 01:47:42.669010: step: 420/470, loss: 0.6850473880767822 2023-01-24 01:47:43.392638: step: 422/470, loss: 6.5229716300964355 2023-01-24 01:47:44.107445: step: 424/470, loss: 1.0934593677520752 2023-01-24 01:47:44.857897: step: 426/470, loss: 1.6166236400604248 2023-01-24 01:47:45.550564: step: 428/470, loss: 2.7700743675231934 2023-01-24 01:47:46.300864: step: 430/470, loss: 1.4552366733551025 2023-01-24 01:47:46.981137: step: 432/470, loss: 1.5347661972045898 2023-01-24 01:47:47.710942: step: 434/470, loss: 0.6893869042396545 2023-01-24 01:47:48.527533: step: 436/470, loss: 0.4312663972377777 2023-01-24 01:47:49.314521: step: 438/470, loss: 0.7760770320892334 2023-01-24 01:47:50.002339: step: 440/470, loss: 0.6980346441268921 2023-01-24 01:47:50.789179: step: 442/470, loss: 4.75489616394043 2023-01-24 01:47:51.585353: step: 444/470, loss: 0.6150807738304138 2023-01-24 01:47:52.520217: step: 446/470, loss: 0.39284422993659973 2023-01-24 01:47:53.318492: step: 448/470, loss: 0.1718873381614685 2023-01-24 01:47:54.096952: step: 450/470, loss: 0.7158945798873901 2023-01-24 01:47:54.818334: step: 452/470, loss: 1.1925913095474243 2023-01-24 01:47:55.557537: step: 454/470, loss: 0.5722603797912598 2023-01-24 01:47:56.272681: step: 456/470, loss: 0.9510567784309387 2023-01-24 01:47:57.061347: step: 458/470, loss: 1.4540069103240967 2023-01-24 01:47:57.757665: step: 460/470, loss: 0.47908085584640503 2023-01-24 01:47:58.470791: step: 462/470, loss: 0.5568799376487732 2023-01-24 01:47:59.151495: step: 464/470, loss: 0.4244616627693176 2023-01-24 01:47:59.865944: step: 466/470, loss: 4.02734899520874 2023-01-24 01:48:00.679712: step: 468/470, loss: 0.6745584011077881 2023-01-24 01:48:01.463601: step: 470/470, loss: 1.6514837741851807 2023-01-24 01:48:02.189579: step: 472/470, loss: 1.545167326927185 2023-01-24 01:48:02.956889: step: 474/470, loss: 0.4058355391025543 2023-01-24 01:48:03.700019: step: 476/470, loss: 1.8532984256744385 2023-01-24 01:48:04.420905: step: 478/470, loss: 1.1376888751983643 2023-01-24 01:48:05.178794: step: 480/470, loss: 0.7555640339851379 2023-01-24 01:48:05.950197: step: 482/470, loss: 0.19936880469322205 2023-01-24 01:48:06.709723: step: 484/470, loss: 0.494382381439209 2023-01-24 01:48:07.440865: step: 486/470, loss: 1.0066436529159546 2023-01-24 01:48:08.326528: step: 488/470, loss: 0.6097040772438049 2023-01-24 01:48:09.174541: step: 490/470, loss: 0.35165679454803467 2023-01-24 01:48:09.887797: step: 492/470, loss: 0.39196884632110596 2023-01-24 01:48:10.602794: step: 494/470, loss: 2.287322998046875 2023-01-24 01:48:11.364039: step: 496/470, loss: 3.783491611480713 2023-01-24 01:48:12.078726: step: 498/470, loss: 2.2058379650115967 2023-01-24 01:48:12.886855: step: 500/470, loss: 1.644723653793335 2023-01-24 01:48:13.595597: step: 502/470, loss: 1.0776886940002441 2023-01-24 01:48:14.363461: step: 504/470, loss: 0.33019930124282837 2023-01-24 01:48:15.104871: step: 506/470, loss: 1.2233861684799194 2023-01-24 01:48:15.780143: step: 508/470, loss: 1.0420408248901367 2023-01-24 01:48:16.491640: step: 510/470, loss: 1.1336497068405151 2023-01-24 01:48:17.189104: step: 512/470, loss: 0.4762951731681824 2023-01-24 01:48:18.020301: step: 514/470, loss: 1.8861290216445923 2023-01-24 01:48:18.712928: step: 516/470, loss: 0.4970181882381439 2023-01-24 01:48:19.407601: step: 518/470, loss: 1.4443624019622803 2023-01-24 01:48:20.159824: step: 520/470, loss: 0.6937436461448669 2023-01-24 01:48:20.940187: step: 522/470, loss: 0.9865797758102417 2023-01-24 01:48:21.692266: step: 524/470, loss: 1.8651926517486572 2023-01-24 01:48:22.430632: step: 526/470, loss: 1.1333483457565308 2023-01-24 01:48:23.144848: step: 528/470, loss: 0.9567941427230835 2023-01-24 01:48:23.890822: step: 530/470, loss: 5.231558322906494 2023-01-24 01:48:24.699259: step: 532/470, loss: 2.1264209747314453 2023-01-24 01:48:25.442268: step: 534/470, loss: 1.152343511581421 2023-01-24 01:48:26.234067: step: 536/470, loss: 1.2699055671691895 2023-01-24 01:48:27.022657: step: 538/470, loss: 2.440457820892334 2023-01-24 01:48:27.779177: step: 540/470, loss: 12.110101699829102 2023-01-24 01:48:28.501315: step: 542/470, loss: 0.8827940225601196 2023-01-24 01:48:29.248084: step: 544/470, loss: 4.373961925506592 2023-01-24 01:48:29.996019: step: 546/470, loss: 0.31021592020988464 2023-01-24 01:48:30.674925: step: 548/470, loss: 4.370842456817627 2023-01-24 01:48:31.415485: step: 550/470, loss: 1.5527100563049316 2023-01-24 01:48:32.190816: step: 552/470, loss: 0.6078340411186218 2023-01-24 01:48:33.018245: step: 554/470, loss: 0.5594943761825562 2023-01-24 01:48:33.703422: step: 556/470, loss: 0.26643770933151245 2023-01-24 01:48:34.484264: step: 558/470, loss: 0.5052505731582642 2023-01-24 01:48:35.445545: step: 560/470, loss: 2.0178771018981934 2023-01-24 01:48:36.224972: step: 562/470, loss: 5.040163993835449 2023-01-24 01:48:36.963672: step: 564/470, loss: 1.0909911394119263 2023-01-24 01:48:37.706198: step: 566/470, loss: 0.4415287375450134 2023-01-24 01:48:38.523698: step: 568/470, loss: 2.973421096801758 2023-01-24 01:48:39.149809: step: 570/470, loss: 0.12367182970046997 2023-01-24 01:48:39.944364: step: 572/470, loss: 1.0943719148635864 2023-01-24 01:48:40.702667: step: 574/470, loss: 0.7173348069190979 2023-01-24 01:48:41.412411: step: 576/470, loss: 1.5143998861312866 2023-01-24 01:48:42.130517: step: 578/470, loss: 1.2005535364151 2023-01-24 01:48:42.885143: step: 580/470, loss: 1.2034974098205566 2023-01-24 01:48:43.580816: step: 582/470, loss: 1.0075057744979858 2023-01-24 01:48:44.309013: step: 584/470, loss: 1.3117833137512207 2023-01-24 01:48:45.057196: step: 586/470, loss: 0.3217265009880066 2023-01-24 01:48:45.793266: step: 588/470, loss: 1.2585625648498535 2023-01-24 01:48:46.589121: step: 590/470, loss: 1.6573166847229004 2023-01-24 01:48:47.317201: step: 592/470, loss: 0.7473699450492859 2023-01-24 01:48:48.142565: step: 594/470, loss: 1.4826210737228394 2023-01-24 01:48:48.913097: step: 596/470, loss: 3.1206932067871094 2023-01-24 01:48:49.749118: step: 598/470, loss: 1.286102056503296 2023-01-24 01:48:50.472703: step: 600/470, loss: 0.4325481653213501 2023-01-24 01:48:51.211251: step: 602/470, loss: 0.6148621439933777 2023-01-24 01:48:52.036433: step: 604/470, loss: 0.8197581768035889 2023-01-24 01:48:52.726941: step: 606/470, loss: 1.3260382413864136 2023-01-24 01:48:53.536562: step: 608/470, loss: 0.716848611831665 2023-01-24 01:48:54.220805: step: 610/470, loss: 1.1792848110198975 2023-01-24 01:48:55.033047: step: 612/470, loss: 1.3996143341064453 2023-01-24 01:48:55.742652: step: 614/470, loss: 1.7410094738006592 2023-01-24 01:48:56.561491: step: 616/470, loss: 3.7007956504821777 2023-01-24 01:48:57.269677: step: 618/470, loss: 0.2247745543718338 2023-01-24 01:48:58.043136: step: 620/470, loss: 2.267205238342285 2023-01-24 01:48:58.737726: step: 622/470, loss: 0.4186290204524994 2023-01-24 01:48:59.444436: step: 624/470, loss: 0.8166482448577881 2023-01-24 01:49:00.117375: step: 626/470, loss: 0.4580449163913727 2023-01-24 01:49:00.935226: step: 628/470, loss: 2.071941375732422 2023-01-24 01:49:01.762131: step: 630/470, loss: 1.0789272785186768 2023-01-24 01:49:02.545894: step: 632/470, loss: 1.5344969034194946 2023-01-24 01:49:03.373020: step: 634/470, loss: 1.948580026626587 2023-01-24 01:49:04.124572: step: 636/470, loss: 3.033902168273926 2023-01-24 01:49:04.897442: step: 638/470, loss: 0.23131220042705536 2023-01-24 01:49:05.557649: step: 640/470, loss: 5.123125076293945 2023-01-24 01:49:06.321170: step: 642/470, loss: 0.3747403919696808 2023-01-24 01:49:07.018119: step: 644/470, loss: 0.9803649187088013 2023-01-24 01:49:07.726562: step: 646/470, loss: 1.3845829963684082 2023-01-24 01:49:08.469902: step: 648/470, loss: 0.9319378137588501 2023-01-24 01:49:09.219001: step: 650/470, loss: 1.185803771018982 2023-01-24 01:49:09.963231: step: 652/470, loss: 1.5447531938552856 2023-01-24 01:49:10.695067: step: 654/470, loss: 0.3655240833759308 2023-01-24 01:49:11.350125: step: 656/470, loss: 1.5592364072799683 2023-01-24 01:49:12.110310: step: 658/470, loss: 0.8640881776809692 2023-01-24 01:49:12.878225: step: 660/470, loss: 2.0770931243896484 2023-01-24 01:49:13.645796: step: 662/470, loss: 0.6485579013824463 2023-01-24 01:49:14.409978: step: 664/470, loss: 0.644680380821228 2023-01-24 01:49:15.067133: step: 666/470, loss: 1.5386685132980347 2023-01-24 01:49:15.836246: step: 668/470, loss: 1.689470648765564 2023-01-24 01:49:16.535805: step: 670/470, loss: 0.8974085450172424 2023-01-24 01:49:17.265821: step: 672/470, loss: 2.1679720878601074 2023-01-24 01:49:17.969033: step: 674/470, loss: 0.3004220426082611 2023-01-24 01:49:18.787401: step: 676/470, loss: 1.7437026500701904 2023-01-24 01:49:19.538016: step: 678/470, loss: 1.1159954071044922 2023-01-24 01:49:20.253652: step: 680/470, loss: 0.47660908102989197 2023-01-24 01:49:21.000986: step: 682/470, loss: 7.534053325653076 2023-01-24 01:49:21.780900: step: 684/470, loss: 0.7392002940177917 2023-01-24 01:49:22.538840: step: 686/470, loss: 4.810300350189209 2023-01-24 01:49:23.327342: step: 688/470, loss: 0.2002626359462738 2023-01-24 01:49:24.132153: step: 690/470, loss: 0.3717239499092102 2023-01-24 01:49:24.803041: step: 692/470, loss: 0.9321846961975098 2023-01-24 01:49:25.523602: step: 694/470, loss: 0.8831678032875061 2023-01-24 01:49:26.214915: step: 696/470, loss: 1.0070817470550537 2023-01-24 01:49:26.948212: step: 698/470, loss: 1.1700540781021118 2023-01-24 01:49:27.707864: step: 700/470, loss: 1.3304771184921265 2023-01-24 01:49:28.441987: step: 702/470, loss: 1.0372437238693237 2023-01-24 01:49:29.186708: step: 704/470, loss: 0.636731743812561 2023-01-24 01:49:29.955339: step: 706/470, loss: 0.39378777146339417 2023-01-24 01:49:30.731669: step: 708/470, loss: 2.0515360832214355 2023-01-24 01:49:31.511863: step: 710/470, loss: 0.6573086977005005 2023-01-24 01:49:32.287478: step: 712/470, loss: 2.0945732593536377 2023-01-24 01:49:32.991773: step: 714/470, loss: 0.41838696599006653 2023-01-24 01:49:33.710959: step: 716/470, loss: 0.5202152729034424 2023-01-24 01:49:34.494083: step: 718/470, loss: 1.1282581090927124 2023-01-24 01:49:35.207544: step: 720/470, loss: 0.8712818622589111 2023-01-24 01:49:35.925713: step: 722/470, loss: 0.28079456090927124 2023-01-24 01:49:36.729584: step: 724/470, loss: 1.146299123764038 2023-01-24 01:49:37.435422: step: 726/470, loss: 2.3962433338165283 2023-01-24 01:49:38.142300: step: 728/470, loss: 1.109757423400879 2023-01-24 01:49:38.856465: step: 730/470, loss: 0.8224062323570251 2023-01-24 01:49:39.544562: step: 732/470, loss: 0.716911792755127 2023-01-24 01:49:40.266292: step: 734/470, loss: 0.7856378555297852 2023-01-24 01:49:40.961338: step: 736/470, loss: 0.9323542714118958 2023-01-24 01:49:41.690306: step: 738/470, loss: 1.7010040283203125 2023-01-24 01:49:42.565789: step: 740/470, loss: 0.9458099603652954 2023-01-24 01:49:43.310677: step: 742/470, loss: 0.4535662531852722 2023-01-24 01:49:44.107021: step: 744/470, loss: 1.6949098110198975 2023-01-24 01:49:45.009289: step: 746/470, loss: 0.36973050236701965 2023-01-24 01:49:45.875085: step: 748/470, loss: 1.6900949478149414 2023-01-24 01:49:46.626580: step: 750/470, loss: 0.700052797794342 2023-01-24 01:49:47.516590: step: 752/470, loss: 2.680393934249878 2023-01-24 01:49:48.350143: step: 754/470, loss: 2.4601190090179443 2023-01-24 01:49:49.119769: step: 756/470, loss: 1.6700736284255981 2023-01-24 01:49:49.908784: step: 758/470, loss: 1.34128737449646 2023-01-24 01:49:50.702542: step: 760/470, loss: 1.392914056777954 2023-01-24 01:49:51.511632: step: 762/470, loss: 0.7549951076507568 2023-01-24 01:49:52.228029: step: 764/470, loss: 3.097191333770752 2023-01-24 01:49:52.891898: step: 766/470, loss: 1.1415493488311768 2023-01-24 01:49:53.636571: step: 768/470, loss: 1.2913892269134521 2023-01-24 01:49:54.341102: step: 770/470, loss: 1.9585628509521484 2023-01-24 01:49:55.120665: step: 772/470, loss: 1.1048572063446045 2023-01-24 01:49:55.844674: step: 774/470, loss: 1.6778151988983154 2023-01-24 01:49:56.617021: step: 776/470, loss: 0.4695655107498169 2023-01-24 01:49:57.296487: step: 778/470, loss: 1.7176390886306763 2023-01-24 01:49:58.147038: step: 780/470, loss: 0.1662791669368744 2023-01-24 01:49:58.920834: step: 782/470, loss: 3.580739736557007 2023-01-24 01:49:59.659451: step: 784/470, loss: 0.5405029058456421 2023-01-24 01:50:00.425976: step: 786/470, loss: 0.7567795515060425 2023-01-24 01:50:01.221420: step: 788/470, loss: 1.8015867471694946 2023-01-24 01:50:01.946039: step: 790/470, loss: 3.115386724472046 2023-01-24 01:50:02.731375: step: 792/470, loss: 1.5635459423065186 2023-01-24 01:50:03.494087: step: 794/470, loss: 0.3208344578742981 2023-01-24 01:50:04.281121: step: 796/470, loss: 3.5830130577087402 2023-01-24 01:50:05.017367: step: 798/470, loss: 1.7164645195007324 2023-01-24 01:50:05.757317: step: 800/470, loss: 0.2683294117450714 2023-01-24 01:50:06.426532: step: 802/470, loss: 9.538615226745605 2023-01-24 01:50:07.238843: step: 804/470, loss: 4.4917073249816895 2023-01-24 01:50:07.999964: step: 806/470, loss: 2.0354855060577393 2023-01-24 01:50:08.719100: step: 808/470, loss: 1.513418197631836 2023-01-24 01:50:09.443432: step: 810/470, loss: 1.2347862720489502 2023-01-24 01:50:10.225627: step: 812/470, loss: 2.0256099700927734 2023-01-24 01:50:10.950947: step: 814/470, loss: 2.208359718322754 2023-01-24 01:50:11.731557: step: 816/470, loss: 0.9855014681816101 2023-01-24 01:50:12.490991: step: 818/470, loss: 0.9821873903274536 2023-01-24 01:50:13.270785: step: 820/470, loss: 17.047348022460938 2023-01-24 01:50:14.022314: step: 822/470, loss: 1.7741284370422363 2023-01-24 01:50:14.748086: step: 824/470, loss: 0.6360207200050354 2023-01-24 01:50:15.461986: step: 826/470, loss: 1.1706583499908447 2023-01-24 01:50:16.240844: step: 828/470, loss: 1.1831610202789307 2023-01-24 01:50:16.959427: step: 830/470, loss: 0.9164629578590393 2023-01-24 01:50:17.740736: step: 832/470, loss: 0.7324165105819702 2023-01-24 01:50:18.445224: step: 834/470, loss: 2.8356175422668457 2023-01-24 01:50:19.158006: step: 836/470, loss: 1.6835747957229614 2023-01-24 01:50:19.849363: step: 838/470, loss: 1.1602541208267212 2023-01-24 01:50:20.566159: step: 840/470, loss: 0.8675392866134644 2023-01-24 01:50:21.302936: step: 842/470, loss: 0.9252186417579651 2023-01-24 01:50:22.065546: step: 844/470, loss: 2.3158793449401855 2023-01-24 01:50:22.821593: step: 846/470, loss: 6.0092453956604 2023-01-24 01:50:23.476558: step: 848/470, loss: 1.1773701906204224 2023-01-24 01:50:24.204224: step: 850/470, loss: 2.684903144836426 2023-01-24 01:50:24.973680: step: 852/470, loss: 1.871997356414795 2023-01-24 01:50:25.785132: step: 854/470, loss: 3.4746434688568115 2023-01-24 01:50:26.532042: step: 856/470, loss: 3.444838523864746 2023-01-24 01:50:27.269274: step: 858/470, loss: 2.6179463863372803 2023-01-24 01:50:28.077301: step: 860/470, loss: 0.5549417734146118 2023-01-24 01:50:28.855634: step: 862/470, loss: 1.6258676052093506 2023-01-24 01:50:29.521560: step: 864/470, loss: 0.6284773349761963 2023-01-24 01:50:30.238609: step: 866/470, loss: 2.572608232498169 2023-01-24 01:50:30.900932: step: 868/470, loss: 1.005090594291687 2023-01-24 01:50:31.608479: step: 870/470, loss: 0.31928566098213196 2023-01-24 01:50:32.348819: step: 872/470, loss: 0.3751024603843689 2023-01-24 01:50:33.108772: step: 874/470, loss: 2.8960084915161133 2023-01-24 01:50:33.867269: step: 876/470, loss: 0.4217081069946289 2023-01-24 01:50:34.611055: step: 878/470, loss: 2.551321029663086 2023-01-24 01:50:35.316638: step: 880/470, loss: 5.909186363220215 2023-01-24 01:50:36.073000: step: 882/470, loss: 2.9134228229522705 2023-01-24 01:50:36.882348: step: 884/470, loss: 2.1783947944641113 2023-01-24 01:50:37.599969: step: 886/470, loss: 0.5688473582267761 2023-01-24 01:50:38.336875: step: 888/470, loss: 3.83414888381958 2023-01-24 01:50:39.073261: step: 890/470, loss: 1.6998573541641235 2023-01-24 01:50:39.976286: step: 892/470, loss: 1.6830124855041504 2023-01-24 01:50:40.775219: step: 894/470, loss: 1.0185949802398682 2023-01-24 01:50:41.446203: step: 896/470, loss: 1.8073221445083618 2023-01-24 01:50:42.193062: step: 898/470, loss: 0.8843210339546204 2023-01-24 01:50:42.987260: step: 900/470, loss: 4.365537166595459 2023-01-24 01:50:43.751821: step: 902/470, loss: 0.808355987071991 2023-01-24 01:50:44.648139: step: 904/470, loss: 1.7078111171722412 2023-01-24 01:50:45.401308: step: 906/470, loss: 0.526005208492279 2023-01-24 01:50:46.161867: step: 908/470, loss: 0.30686283111572266 2023-01-24 01:50:46.901039: step: 910/470, loss: 0.9184078574180603 2023-01-24 01:50:47.661996: step: 912/470, loss: 0.605542778968811 2023-01-24 01:50:48.405360: step: 914/470, loss: 0.6337424516677856 2023-01-24 01:50:49.138888: step: 916/470, loss: 1.6130006313323975 2023-01-24 01:50:49.906621: step: 918/470, loss: 4.33975887298584 2023-01-24 01:50:50.730389: step: 920/470, loss: 3.3465535640716553 2023-01-24 01:50:51.525372: step: 922/470, loss: 0.68430495262146 2023-01-24 01:50:52.264308: step: 924/470, loss: 1.328958511352539 2023-01-24 01:50:53.071243: step: 926/470, loss: 2.2398757934570312 2023-01-24 01:50:53.831811: step: 928/470, loss: 1.0955443382263184 2023-01-24 01:50:54.431163: step: 930/470, loss: 1.5726451873779297 2023-01-24 01:50:55.115568: step: 932/470, loss: 1.2137433290481567 2023-01-24 01:50:55.852834: step: 934/470, loss: 1.729867935180664 2023-01-24 01:50:56.747428: step: 936/470, loss: 0.5839881300926208 2023-01-24 01:50:57.443029: step: 938/470, loss: 0.5170664191246033 2023-01-24 01:50:58.184005: step: 940/470, loss: 0.8649019002914429 2023-01-24 01:50:58.960331: step: 942/470, loss: 2.5765528678894043 ================================================== Loss: 1.597 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3213942479544022, 'r': 0.26284804718851484, 'f1': 0.2891877262387209}, 'combined': 0.21308569301800487, 'epoch': 2} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35411825897044286, 'r': 0.2415215791248938, 'f1': 0.28717757152371726}, 'combined': 0.1914517143491448, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3144859954269342, 'r': 0.2583917192027751, 'f1': 0.2836925750413802}, 'combined': 0.20903663424101696, 'epoch': 2} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.358303072115371, 'r': 0.24265481829138957, 'f1': 0.28935127816877043}, 'combined': 0.19290085211251357, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2953586162642576, 'r': 0.2561269215042993, 'f1': 0.27434733258692223}, 'combined': 0.20215066611667953, 'epoch': 2} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35446642248577526, 'r': 0.2482286474468109, 'f1': 0.291984205640825}, 'combined': 0.1946561370938833, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.2785714285714286, 'f1': 0.319672131147541}, 'combined': 0.21311475409836067, 'epoch': 2} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6785714285714286, 'r': 0.20652173913043478, 'f1': 0.31666666666666665}, 'combined': 0.21111111111111108, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45454545454545453, 'r': 0.1724137931034483, 'f1': 0.25000000000000006}, 'combined': 0.16666666666666669, 'epoch': 2} New best chinese model... New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3213942479544022, 'r': 0.26284804718851484, 'f1': 0.2891877262387209}, 'combined': 0.21308569301800487, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35411825897044286, 'r': 0.2415215791248938, 'f1': 0.28717757152371726}, 'combined': 0.1914517143491448, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.2785714285714286, 'f1': 0.319672131147541}, 'combined': 0.21311475409836067, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3144859954269342, 'r': 0.2583917192027751, 'f1': 0.2836925750413802}, 'combined': 0.20903663424101696, 'epoch': 2} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.358303072115371, 'r': 0.24265481829138957, 'f1': 0.28935127816877043}, 'combined': 0.19290085211251357, 'epoch': 2} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6785714285714286, 'r': 0.20652173913043478, 'f1': 0.31666666666666665}, 'combined': 0.21111111111111108, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.25838488445869234, 'r': 0.22468250822494984, 'f1': 0.2403580320545975}, 'combined': 0.1771059183560192, 'epoch': 1} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.31532402962177447, 'r': 0.21374313417314916, 'f1': 0.2547818159343938}, 'combined': 0.1698545439562625, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5416666666666666, 'r': 0.22413793103448276, 'f1': 0.3170731707317073}, 'combined': 0.2113821138211382, 'epoch': 1} ****************************** Epoch: 3 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:53:48.638167: step: 2/470, loss: 0.36363697052001953 2023-01-24 01:53:49.390785: step: 4/470, loss: 1.200259804725647 2023-01-24 01:53:50.087156: step: 6/470, loss: 0.9495527744293213 2023-01-24 01:53:50.844435: step: 8/470, loss: 6.474288463592529 2023-01-24 01:53:51.605184: step: 10/470, loss: 0.4756370186805725 2023-01-24 01:53:52.377934: step: 12/470, loss: 0.5976176261901855 2023-01-24 01:53:53.125631: step: 14/470, loss: 1.184709906578064 2023-01-24 01:53:53.859646: step: 16/470, loss: 0.3212374746799469 2023-01-24 01:53:54.583096: step: 18/470, loss: 0.17789015173912048 2023-01-24 01:53:55.274921: step: 20/470, loss: 1.2034302949905396 2023-01-24 01:53:56.067185: step: 22/470, loss: 0.7665480375289917 2023-01-24 01:53:56.756072: step: 24/470, loss: 0.40206414461135864 2023-01-24 01:53:57.495067: step: 26/470, loss: 1.6805636882781982 2023-01-24 01:53:58.288914: step: 28/470, loss: 1.1735949516296387 2023-01-24 01:53:59.037783: step: 30/470, loss: 1.2147589921951294 2023-01-24 01:53:59.707623: step: 32/470, loss: 1.4510881900787354 2023-01-24 01:54:00.393587: step: 34/470, loss: 0.7579923272132874 2023-01-24 01:54:01.088306: step: 36/470, loss: 0.22519704699516296 2023-01-24 01:54:01.940649: step: 38/470, loss: 0.5341430306434631 2023-01-24 01:54:02.677364: step: 40/470, loss: 0.6423546075820923 2023-01-24 01:54:03.346734: step: 42/470, loss: 0.5371485948562622 2023-01-24 01:54:04.069866: step: 44/470, loss: 1.21221923828125 2023-01-24 01:54:04.829372: step: 46/470, loss: 2.2409815788269043 2023-01-24 01:54:05.581614: step: 48/470, loss: 2.1587605476379395 2023-01-24 01:54:06.253206: step: 50/470, loss: 0.41026097536087036 2023-01-24 01:54:06.986902: step: 52/470, loss: 1.3984534740447998 2023-01-24 01:54:07.784486: step: 54/470, loss: 0.902085542678833 2023-01-24 01:54:08.533015: step: 56/470, loss: 0.4346156120300293 2023-01-24 01:54:09.290516: step: 58/470, loss: 1.1000165939331055 2023-01-24 01:54:10.064205: step: 60/470, loss: 0.9447662830352783 2023-01-24 01:54:10.720079: step: 62/470, loss: 4.6900763511657715 2023-01-24 01:54:11.433190: step: 64/470, loss: 1.7766426801681519 2023-01-24 01:54:12.166777: step: 66/470, loss: 0.977912187576294 2023-01-24 01:54:12.910918: step: 68/470, loss: 0.8247696161270142 2023-01-24 01:54:13.618887: step: 70/470, loss: 2.7817070484161377 2023-01-24 01:54:14.333807: step: 72/470, loss: 3.9839916229248047 2023-01-24 01:54:15.180224: step: 74/470, loss: 1.374387264251709 2023-01-24 01:54:15.900826: step: 76/470, loss: 0.7330376505851746 2023-01-24 01:54:16.728894: step: 78/470, loss: 2.2703309059143066 2023-01-24 01:54:17.452828: step: 80/470, loss: 0.8914951682090759 2023-01-24 01:54:18.169200: step: 82/470, loss: 1.1255903244018555 2023-01-24 01:54:19.007299: step: 84/470, loss: 2.9205687046051025 2023-01-24 01:54:19.788899: step: 86/470, loss: 0.7798607349395752 2023-01-24 01:54:20.477025: step: 88/470, loss: 1.1443251371383667 2023-01-24 01:54:21.209347: step: 90/470, loss: 1.046618938446045 2023-01-24 01:54:21.959044: step: 92/470, loss: 4.348773956298828 2023-01-24 01:54:22.660254: step: 94/470, loss: 3.3680717945098877 2023-01-24 01:54:23.439779: step: 96/470, loss: 1.9639177322387695 2023-01-24 01:54:24.177788: step: 98/470, loss: 0.4387917220592499 2023-01-24 01:54:24.898316: step: 100/470, loss: 0.3733503222465515 2023-01-24 01:54:25.645178: step: 102/470, loss: 0.8334224224090576 2023-01-24 01:54:26.382637: step: 104/470, loss: 0.9581044912338257 2023-01-24 01:54:27.114922: step: 106/470, loss: 1.0296529531478882 2023-01-24 01:54:27.902718: step: 108/470, loss: 5.17448091506958 2023-01-24 01:54:28.616316: step: 110/470, loss: 0.4054220914840698 2023-01-24 01:54:29.366784: step: 112/470, loss: 0.47016581892967224 2023-01-24 01:54:30.100129: step: 114/470, loss: 0.7138015031814575 2023-01-24 01:54:30.800667: step: 116/470, loss: 0.9061262607574463 2023-01-24 01:54:31.473540: step: 118/470, loss: 0.30310964584350586 2023-01-24 01:54:32.220809: step: 120/470, loss: 0.6186302304267883 2023-01-24 01:54:32.978337: step: 122/470, loss: 1.7593357563018799 2023-01-24 01:54:33.645779: step: 124/470, loss: 0.7808341979980469 2023-01-24 01:54:34.360294: step: 126/470, loss: 1.6488226652145386 2023-01-24 01:54:35.221598: step: 128/470, loss: 0.45947107672691345 2023-01-24 01:54:35.981821: step: 130/470, loss: 1.057709813117981 2023-01-24 01:54:36.630467: step: 132/470, loss: 0.8940787315368652 2023-01-24 01:54:37.364004: step: 134/470, loss: 0.5826739072799683 2023-01-24 01:54:38.106506: step: 136/470, loss: 1.4600317478179932 2023-01-24 01:54:38.888632: step: 138/470, loss: 0.3595348298549652 2023-01-24 01:54:39.691090: step: 140/470, loss: 1.4183976650238037 2023-01-24 01:54:40.385016: step: 142/470, loss: 0.9755016565322876 2023-01-24 01:54:41.145979: step: 144/470, loss: 0.27329689264297485 2023-01-24 01:54:41.871509: step: 146/470, loss: 1.2449324131011963 2023-01-24 01:54:42.605857: step: 148/470, loss: 0.7816654443740845 2023-01-24 01:54:43.432816: step: 150/470, loss: 0.28573817014694214 2023-01-24 01:54:44.214687: step: 152/470, loss: 0.9601019620895386 2023-01-24 01:54:44.999436: step: 154/470, loss: 0.4332647919654846 2023-01-24 01:54:45.745272: step: 156/470, loss: 0.21661068499088287 2023-01-24 01:54:46.386621: step: 158/470, loss: 1.6688331365585327 2023-01-24 01:54:47.123192: step: 160/470, loss: 0.8690783977508545 2023-01-24 01:54:47.797203: step: 162/470, loss: 0.490153968334198 2023-01-24 01:54:48.584768: step: 164/470, loss: 1.6668365001678467 2023-01-24 01:54:49.357176: step: 166/470, loss: 0.7929226160049438 2023-01-24 01:54:50.063056: step: 168/470, loss: 1.0394725799560547 2023-01-24 01:54:50.840039: step: 170/470, loss: 0.30205702781677246 2023-01-24 01:54:51.570177: step: 172/470, loss: 0.9545096158981323 2023-01-24 01:54:52.276923: step: 174/470, loss: 0.2452886998653412 2023-01-24 01:54:53.031817: step: 176/470, loss: 0.3460700213909149 2023-01-24 01:54:53.841557: step: 178/470, loss: 1.1051923036575317 2023-01-24 01:54:54.548509: step: 180/470, loss: 1.239685297012329 2023-01-24 01:54:55.366842: step: 182/470, loss: 0.9798608422279358 2023-01-24 01:54:56.057308: step: 184/470, loss: 2.91426420211792 2023-01-24 01:54:56.768756: step: 186/470, loss: 1.605899453163147 2023-01-24 01:54:57.582812: step: 188/470, loss: 0.4200703501701355 2023-01-24 01:54:58.294916: step: 190/470, loss: 2.6780664920806885 2023-01-24 01:54:59.090194: step: 192/470, loss: 0.3517438471317291 2023-01-24 01:54:59.823285: step: 194/470, loss: 0.46974340081214905 2023-01-24 01:55:00.629188: step: 196/470, loss: 0.29148203134536743 2023-01-24 01:55:01.346610: step: 198/470, loss: 1.264331579208374 2023-01-24 01:55:02.039292: step: 200/470, loss: 0.47580063343048096 2023-01-24 01:55:02.767958: step: 202/470, loss: 1.759827971458435 2023-01-24 01:55:03.479900: step: 204/470, loss: 1.8886005878448486 2023-01-24 01:55:04.201586: step: 206/470, loss: 1.2642725706100464 2023-01-24 01:55:04.965215: step: 208/470, loss: 1.3302885293960571 2023-01-24 01:55:05.742232: step: 210/470, loss: 0.262773334980011 2023-01-24 01:55:06.444429: step: 212/470, loss: 0.39958715438842773 2023-01-24 01:55:07.153605: step: 214/470, loss: 1.108762264251709 2023-01-24 01:55:07.980565: step: 216/470, loss: 0.5211945176124573 2023-01-24 01:55:08.735322: step: 218/470, loss: 0.5695528984069824 2023-01-24 01:55:09.518222: step: 220/470, loss: 0.9404783248901367 2023-01-24 01:55:10.230336: step: 222/470, loss: 5.394157409667969 2023-01-24 01:55:11.016025: step: 224/470, loss: 0.3269270658493042 2023-01-24 01:55:11.720804: step: 226/470, loss: 0.12120553106069565 2023-01-24 01:55:12.392520: step: 228/470, loss: 2.013479232788086 2023-01-24 01:55:13.142871: step: 230/470, loss: 0.7474368810653687 2023-01-24 01:55:13.861718: step: 232/470, loss: 0.8971037864685059 2023-01-24 01:55:14.574376: step: 234/470, loss: 0.8100451827049255 2023-01-24 01:55:15.349410: step: 236/470, loss: 0.3806197941303253 2023-01-24 01:55:16.085267: step: 238/470, loss: 0.3603185713291168 2023-01-24 01:55:16.883597: step: 240/470, loss: 8.618203163146973 2023-01-24 01:55:17.641778: step: 242/470, loss: 0.440914511680603 2023-01-24 01:55:18.484834: step: 244/470, loss: 0.538329005241394 2023-01-24 01:55:19.231152: step: 246/470, loss: 0.6860210299491882 2023-01-24 01:55:19.947104: step: 248/470, loss: 0.5581848621368408 2023-01-24 01:55:20.725176: step: 250/470, loss: 1.4989908933639526 2023-01-24 01:55:21.461274: step: 252/470, loss: 0.3475220799446106 2023-01-24 01:55:22.200383: step: 254/470, loss: 0.6883515119552612 2023-01-24 01:55:22.929128: step: 256/470, loss: 0.3571469783782959 2023-01-24 01:55:23.642438: step: 258/470, loss: 1.115850567817688 2023-01-24 01:55:24.428891: step: 260/470, loss: 0.4317207932472229 2023-01-24 01:55:25.180251: step: 262/470, loss: 0.49563005566596985 2023-01-24 01:55:25.891461: step: 264/470, loss: 0.5665435791015625 2023-01-24 01:55:26.596997: step: 266/470, loss: 0.4394031763076782 2023-01-24 01:55:27.326576: step: 268/470, loss: 1.2470353841781616 2023-01-24 01:55:28.063354: step: 270/470, loss: 0.5768090486526489 2023-01-24 01:55:28.766425: step: 272/470, loss: 1.6476483345031738 2023-01-24 01:55:29.506710: step: 274/470, loss: 1.6945419311523438 2023-01-24 01:55:30.221130: step: 276/470, loss: 0.6581287980079651 2023-01-24 01:55:30.992185: step: 278/470, loss: 0.7141035199165344 2023-01-24 01:55:31.743623: step: 280/470, loss: 1.2979902029037476 2023-01-24 01:55:32.487043: step: 282/470, loss: 2.7254929542541504 2023-01-24 01:55:33.309229: step: 284/470, loss: 3.011003017425537 2023-01-24 01:55:34.053942: step: 286/470, loss: 0.907600998878479 2023-01-24 01:55:34.947366: step: 288/470, loss: 1.4326318502426147 2023-01-24 01:55:35.809760: step: 290/470, loss: 1.6391098499298096 2023-01-24 01:55:36.500667: step: 292/470, loss: 0.27279168367385864 2023-01-24 01:55:37.187952: step: 294/470, loss: 1.117002248764038 2023-01-24 01:55:37.961240: step: 296/470, loss: 0.19795672595500946 2023-01-24 01:55:38.633388: step: 298/470, loss: 0.3689045011997223 2023-01-24 01:55:39.338772: step: 300/470, loss: 2.757559061050415 2023-01-24 01:55:40.087352: step: 302/470, loss: 0.2532775402069092 2023-01-24 01:55:40.862535: step: 304/470, loss: 0.5779852271080017 2023-01-24 01:55:41.531702: step: 306/470, loss: 0.2892742156982422 2023-01-24 01:55:42.282077: step: 308/470, loss: 6.725302696228027 2023-01-24 01:55:42.932744: step: 310/470, loss: 0.16188186407089233 2023-01-24 01:55:43.777697: step: 312/470, loss: 0.713376522064209 2023-01-24 01:55:44.562570: step: 314/470, loss: 0.9798615574836731 2023-01-24 01:55:45.242814: step: 316/470, loss: 0.2618459463119507 2023-01-24 01:55:46.082678: step: 318/470, loss: 0.3729875385761261 2023-01-24 01:55:46.771449: step: 320/470, loss: 1.94132399559021 2023-01-24 01:55:47.552529: step: 322/470, loss: 0.37356746196746826 2023-01-24 01:55:48.249958: step: 324/470, loss: 5.201150417327881 2023-01-24 01:55:49.014684: step: 326/470, loss: 0.7417845129966736 2023-01-24 01:55:49.739041: step: 328/470, loss: 1.832114815711975 2023-01-24 01:55:50.443653: step: 330/470, loss: 1.1253231763839722 2023-01-24 01:55:51.203684: step: 332/470, loss: 0.22344562411308289 2023-01-24 01:55:51.986481: step: 334/470, loss: 0.8852127194404602 2023-01-24 01:55:52.756071: step: 336/470, loss: 0.9841639399528503 2023-01-24 01:55:53.508817: step: 338/470, loss: 1.8902114629745483 2023-01-24 01:55:54.248632: step: 340/470, loss: 0.27074041962623596 2023-01-24 01:55:54.950887: step: 342/470, loss: 0.16591957211494446 2023-01-24 01:55:55.711930: step: 344/470, loss: 0.5446414947509766 2023-01-24 01:55:56.472059: step: 346/470, loss: 1.8010767698287964 2023-01-24 01:55:57.194708: step: 348/470, loss: 0.44357505440711975 2023-01-24 01:55:57.884819: step: 350/470, loss: 4.071453094482422 2023-01-24 01:55:58.829060: step: 352/470, loss: 1.5298190116882324 2023-01-24 01:55:59.655458: step: 354/470, loss: 0.6060335636138916 2023-01-24 01:56:00.329073: step: 356/470, loss: 0.4127192497253418 2023-01-24 01:56:01.053922: step: 358/470, loss: 1.532690167427063 2023-01-24 01:56:01.829206: step: 360/470, loss: 0.8012219667434692 2023-01-24 01:56:02.579432: step: 362/470, loss: 0.5713666677474976 2023-01-24 01:56:03.378653: step: 364/470, loss: 2.333641529083252 2023-01-24 01:56:04.137878: step: 366/470, loss: 1.080212116241455 2023-01-24 01:56:04.821947: step: 368/470, loss: 1.0552024841308594 2023-01-24 01:56:05.502644: step: 370/470, loss: 2.2088849544525146 2023-01-24 01:56:06.243487: step: 372/470, loss: 0.6469262838363647 2023-01-24 01:56:06.956737: step: 374/470, loss: 0.5959089994430542 2023-01-24 01:56:07.740554: step: 376/470, loss: 0.6726745963096619 2023-01-24 01:56:08.416511: step: 378/470, loss: 0.41327551007270813 2023-01-24 01:56:09.260195: step: 380/470, loss: 1.2673792839050293 2023-01-24 01:56:10.001503: step: 382/470, loss: 1.0747371912002563 2023-01-24 01:56:10.783587: step: 384/470, loss: 0.8380264043807983 2023-01-24 01:56:11.495815: step: 386/470, loss: 0.47480469942092896 2023-01-24 01:56:12.248346: step: 388/470, loss: 0.09107540547847748 2023-01-24 01:56:12.971315: step: 390/470, loss: 1.519758939743042 2023-01-24 01:56:13.633762: step: 392/470, loss: 0.8037975430488586 2023-01-24 01:56:14.322758: step: 394/470, loss: 0.9735026359558105 2023-01-24 01:56:15.309024: step: 396/470, loss: 0.7729388475418091 2023-01-24 01:56:16.029929: step: 398/470, loss: 0.43985089659690857 2023-01-24 01:56:16.786124: step: 400/470, loss: 1.1569021940231323 2023-01-24 01:56:17.570848: step: 402/470, loss: 0.90910404920578 2023-01-24 01:56:18.314201: step: 404/470, loss: 1.4025219678878784 2023-01-24 01:56:19.018845: step: 406/470, loss: 0.174116849899292 2023-01-24 01:56:19.782906: step: 408/470, loss: 0.9569568634033203 2023-01-24 01:56:20.567087: step: 410/470, loss: 0.889129638671875 2023-01-24 01:56:21.266376: step: 412/470, loss: 0.5545390844345093 2023-01-24 01:56:22.054265: step: 414/470, loss: 2.280895709991455 2023-01-24 01:56:22.761033: step: 416/470, loss: 0.6171635389328003 2023-01-24 01:56:23.598669: step: 418/470, loss: 2.749620199203491 2023-01-24 01:56:24.306232: step: 420/470, loss: 0.6688118577003479 2023-01-24 01:56:25.023983: step: 422/470, loss: 0.48191848397254944 2023-01-24 01:56:25.733618: step: 424/470, loss: 0.9727869629859924 2023-01-24 01:56:26.449750: step: 426/470, loss: 1.0109034776687622 2023-01-24 01:56:27.230439: step: 428/470, loss: 1.8949697017669678 2023-01-24 01:56:28.036538: step: 430/470, loss: 0.29387497901916504 2023-01-24 01:56:28.784781: step: 432/470, loss: 1.4768874645233154 2023-01-24 01:56:29.576335: step: 434/470, loss: 0.6072927117347717 2023-01-24 01:56:30.332352: step: 436/470, loss: 2.1941230297088623 2023-01-24 01:56:30.977144: step: 438/470, loss: 0.42255786061286926 2023-01-24 01:56:31.677031: step: 440/470, loss: 0.3252505660057068 2023-01-24 01:56:32.461522: step: 442/470, loss: 0.7017578482627869 2023-01-24 01:56:33.202913: step: 444/470, loss: 0.40087300539016724 2023-01-24 01:56:34.006566: step: 446/470, loss: 1.6672818660736084 2023-01-24 01:56:34.749932: step: 448/470, loss: 1.3076132535934448 2023-01-24 01:56:35.511259: step: 450/470, loss: 1.0871223211288452 2023-01-24 01:56:36.308225: step: 452/470, loss: 4.525178909301758 2023-01-24 01:56:37.121395: step: 454/470, loss: 0.15165036916732788 2023-01-24 01:56:37.931283: step: 456/470, loss: 0.9667521715164185 2023-01-24 01:56:38.810276: step: 458/470, loss: 0.9113951325416565 2023-01-24 01:56:39.583538: step: 460/470, loss: 0.8675466775894165 2023-01-24 01:56:40.349127: step: 462/470, loss: 1.114274263381958 2023-01-24 01:56:41.096236: step: 464/470, loss: 0.7635269165039062 2023-01-24 01:56:41.899894: step: 466/470, loss: 0.9710996747016907 2023-01-24 01:56:42.554013: step: 468/470, loss: 0.2852608561515808 2023-01-24 01:56:43.338851: step: 470/470, loss: 0.7204350233078003 2023-01-24 01:56:44.244728: step: 472/470, loss: 9.467118263244629 2023-01-24 01:56:44.986421: step: 474/470, loss: 0.6275573372840881 2023-01-24 01:56:45.722612: step: 476/470, loss: 1.6595731973648071 2023-01-24 01:56:46.564274: step: 478/470, loss: 1.0893810987472534 2023-01-24 01:56:47.345155: step: 480/470, loss: 0.4000731408596039 2023-01-24 01:56:48.078741: step: 482/470, loss: 11.773370742797852 2023-01-24 01:56:48.805451: step: 484/470, loss: 3.65254545211792 2023-01-24 01:56:49.573437: step: 486/470, loss: 1.3855931758880615 2023-01-24 01:56:50.399215: step: 488/470, loss: 0.3679276406764984 2023-01-24 01:56:51.066133: step: 490/470, loss: 0.6360776424407959 2023-01-24 01:56:51.809018: step: 492/470, loss: 3.5798442363739014 2023-01-24 01:56:52.531291: step: 494/470, loss: 0.6142995357513428 2023-01-24 01:56:53.311186: step: 496/470, loss: 2.273555278778076 2023-01-24 01:56:54.086889: step: 498/470, loss: 0.5966622233390808 2023-01-24 01:56:54.857527: step: 500/470, loss: 0.5236387848854065 2023-01-24 01:56:55.622765: step: 502/470, loss: 0.5399066209793091 2023-01-24 01:56:56.305685: step: 504/470, loss: 1.1629985570907593 2023-01-24 01:56:57.163720: step: 506/470, loss: 0.5488350987434387 2023-01-24 01:56:57.851976: step: 508/470, loss: 0.6530537009239197 2023-01-24 01:56:58.637573: step: 510/470, loss: 0.6670851707458496 2023-01-24 01:56:59.394626: step: 512/470, loss: 1.0367801189422607 2023-01-24 01:57:00.127324: step: 514/470, loss: 1.2847917079925537 2023-01-24 01:57:00.970723: step: 516/470, loss: 0.6694021821022034 2023-01-24 01:57:01.671074: step: 518/470, loss: 2.218747138977051 2023-01-24 01:57:02.442537: step: 520/470, loss: 0.42207273840904236 2023-01-24 01:57:03.250014: step: 522/470, loss: 1.1089502573013306 2023-01-24 01:57:03.868602: step: 524/470, loss: 1.3500819206237793 2023-01-24 01:57:04.646016: step: 526/470, loss: 0.6098719835281372 2023-01-24 01:57:05.419754: step: 528/470, loss: 1.7950224876403809 2023-01-24 01:57:06.204607: step: 530/470, loss: 0.34178969264030457 2023-01-24 01:57:07.009981: step: 532/470, loss: 4.002415657043457 2023-01-24 01:57:07.719874: step: 534/470, loss: 0.6252081394195557 2023-01-24 01:57:08.446330: step: 536/470, loss: 1.957798719406128 2023-01-24 01:57:09.227840: step: 538/470, loss: 0.6906619071960449 2023-01-24 01:57:10.026489: step: 540/470, loss: 0.5147668123245239 2023-01-24 01:57:10.765740: step: 542/470, loss: 1.0314600467681885 2023-01-24 01:57:11.509218: step: 544/470, loss: 0.37217873334884644 2023-01-24 01:57:12.269942: step: 546/470, loss: 1.1730372905731201 2023-01-24 01:57:12.979071: step: 548/470, loss: 0.7325170636177063 2023-01-24 01:57:13.735756: step: 550/470, loss: 0.49213165044784546 2023-01-24 01:57:14.519226: step: 552/470, loss: 0.47592616081237793 2023-01-24 01:57:15.382871: step: 554/470, loss: 0.8138816952705383 2023-01-24 01:57:16.153328: step: 556/470, loss: 0.5806734561920166 2023-01-24 01:57:16.930613: step: 558/470, loss: 1.703600525856018 2023-01-24 01:57:17.612698: step: 560/470, loss: 2.787195920944214 2023-01-24 01:57:18.335004: step: 562/470, loss: 0.3978123664855957 2023-01-24 01:57:19.089269: step: 564/470, loss: 1.3924106359481812 2023-01-24 01:57:19.879295: step: 566/470, loss: 0.9471191167831421 2023-01-24 01:57:20.603756: step: 568/470, loss: 0.41248780488967896 2023-01-24 01:57:21.436475: step: 570/470, loss: 1.1843392848968506 2023-01-24 01:57:22.133648: step: 572/470, loss: 1.386232852935791 2023-01-24 01:57:22.860802: step: 574/470, loss: 0.30617228150367737 2023-01-24 01:57:23.690240: step: 576/470, loss: 0.6840832233428955 2023-01-24 01:57:24.471530: step: 578/470, loss: 0.8363122940063477 2023-01-24 01:57:25.191804: step: 580/470, loss: 0.6774252653121948 2023-01-24 01:57:25.993243: step: 582/470, loss: 0.34732234477996826 2023-01-24 01:57:26.682336: step: 584/470, loss: 3.143740653991699 2023-01-24 01:57:27.393527: step: 586/470, loss: 1.0732841491699219 2023-01-24 01:57:28.120874: step: 588/470, loss: 0.6088477373123169 2023-01-24 01:57:28.881984: step: 590/470, loss: 1.6710162162780762 2023-01-24 01:57:29.524907: step: 592/470, loss: 0.8715441226959229 2023-01-24 01:57:30.240070: step: 594/470, loss: 1.7189347743988037 2023-01-24 01:57:30.956997: step: 596/470, loss: 7.218348979949951 2023-01-24 01:57:31.617466: step: 598/470, loss: 2.807617664337158 2023-01-24 01:57:32.362855: step: 600/470, loss: 1.4759058952331543 2023-01-24 01:57:33.166644: step: 602/470, loss: 7.785268306732178 2023-01-24 01:57:33.897887: step: 604/470, loss: 1.1287786960601807 2023-01-24 01:57:34.634579: step: 606/470, loss: 0.3461344242095947 2023-01-24 01:57:35.336199: step: 608/470, loss: 0.34514838457107544 2023-01-24 01:57:36.045396: step: 610/470, loss: 0.30250078439712524 2023-01-24 01:57:36.806008: step: 612/470, loss: 0.9699506163597107 2023-01-24 01:57:37.573261: step: 614/470, loss: 0.9597812294960022 2023-01-24 01:57:38.305501: step: 616/470, loss: 0.7912279963493347 2023-01-24 01:57:39.010878: step: 618/470, loss: 1.2856942415237427 2023-01-24 01:57:39.751427: step: 620/470, loss: 1.5808870792388916 2023-01-24 01:57:40.578360: step: 622/470, loss: 2.507807731628418 2023-01-24 01:57:41.368310: step: 624/470, loss: 0.46560168266296387 2023-01-24 01:57:42.144883: step: 626/470, loss: 0.5538115501403809 2023-01-24 01:57:42.981413: step: 628/470, loss: 0.9292363524436951 2023-01-24 01:57:43.829399: step: 630/470, loss: 0.7725551724433899 2023-01-24 01:57:44.617848: step: 632/470, loss: 1.3639918565750122 2023-01-24 01:57:45.339171: step: 634/470, loss: 1.2045042514801025 2023-01-24 01:57:46.185823: step: 636/470, loss: 1.0402116775512695 2023-01-24 01:57:46.890911: step: 638/470, loss: 1.0203620195388794 2023-01-24 01:57:47.669740: step: 640/470, loss: 1.0372285842895508 2023-01-24 01:57:48.373308: step: 642/470, loss: 4.547273635864258 2023-01-24 01:57:49.055524: step: 644/470, loss: 0.877484917640686 2023-01-24 01:57:49.775067: step: 646/470, loss: 1.7687321901321411 2023-01-24 01:57:50.533378: step: 648/470, loss: 1.9696149826049805 2023-01-24 01:57:51.200391: step: 650/470, loss: 1.9689509868621826 2023-01-24 01:57:51.931232: step: 652/470, loss: 0.36682435870170593 2023-01-24 01:57:52.759798: step: 654/470, loss: 1.2010406255722046 2023-01-24 01:57:53.505932: step: 656/470, loss: 0.7754824757575989 2023-01-24 01:57:54.292275: step: 658/470, loss: 0.16382497549057007 2023-01-24 01:57:55.019588: step: 660/470, loss: 0.6597549915313721 2023-01-24 01:57:55.792875: step: 662/470, loss: 1.6874454021453857 2023-01-24 01:57:56.603211: step: 664/470, loss: 1.7826333045959473 2023-01-24 01:57:57.301763: step: 666/470, loss: 1.3918657302856445 2023-01-24 01:57:58.074185: step: 668/470, loss: 0.9533292055130005 2023-01-24 01:57:58.747678: step: 670/470, loss: 0.17870013415813446 2023-01-24 01:57:59.487807: step: 672/470, loss: 0.4774816632270813 2023-01-24 01:58:00.223971: step: 674/470, loss: 1.4324686527252197 2023-01-24 01:58:00.966968: step: 676/470, loss: 1.8398833274841309 2023-01-24 01:58:01.719038: step: 678/470, loss: 0.42002177238464355 2023-01-24 01:58:02.440126: step: 680/470, loss: 0.953679621219635 2023-01-24 01:58:03.124998: step: 682/470, loss: 0.5611454248428345 2023-01-24 01:58:03.890567: step: 684/470, loss: 0.5755783319473267 2023-01-24 01:58:04.673224: step: 686/470, loss: 0.5575564503669739 2023-01-24 01:58:05.364136: step: 688/470, loss: 0.7334228157997131 2023-01-24 01:58:06.083429: step: 690/470, loss: 4.685910224914551 2023-01-24 01:58:06.809099: step: 692/470, loss: 0.306856632232666 2023-01-24 01:58:07.502440: step: 694/470, loss: 0.8744876980781555 2023-01-24 01:58:08.195174: step: 696/470, loss: 1.340862512588501 2023-01-24 01:58:08.969069: step: 698/470, loss: 2.6037561893463135 2023-01-24 01:58:09.697461: step: 700/470, loss: 0.3846321403980255 2023-01-24 01:58:10.544241: step: 702/470, loss: 1.2338802814483643 2023-01-24 01:58:11.302726: step: 704/470, loss: 0.6107156872749329 2023-01-24 01:58:12.034526: step: 706/470, loss: 1.1513054370880127 2023-01-24 01:58:12.788662: step: 708/470, loss: 1.4285832643508911 2023-01-24 01:58:13.563337: step: 710/470, loss: 1.7846858501434326 2023-01-24 01:58:14.252046: step: 712/470, loss: 0.536953330039978 2023-01-24 01:58:15.011394: step: 714/470, loss: 0.8060521483421326 2023-01-24 01:58:15.723936: step: 716/470, loss: 1.0133894681930542 2023-01-24 01:58:16.461884: step: 718/470, loss: 0.5618113875389099 2023-01-24 01:58:17.195068: step: 720/470, loss: 3.355659246444702 2023-01-24 01:58:17.965435: step: 722/470, loss: 0.5400373935699463 2023-01-24 01:58:18.662596: step: 724/470, loss: 9.46622085571289 2023-01-24 01:58:19.409432: step: 726/470, loss: 1.4518628120422363 2023-01-24 01:58:20.221801: step: 728/470, loss: 0.7577975988388062 2023-01-24 01:58:20.958215: step: 730/470, loss: 3.8897013664245605 2023-01-24 01:58:21.717759: step: 732/470, loss: 0.8022290468215942 2023-01-24 01:58:22.460733: step: 734/470, loss: 1.1684952974319458 2023-01-24 01:58:23.203687: step: 736/470, loss: 1.0844392776489258 2023-01-24 01:58:23.950973: step: 738/470, loss: 1.2642875909805298 2023-01-24 01:58:24.694748: step: 740/470, loss: 0.4668574333190918 2023-01-24 01:58:25.386755: step: 742/470, loss: 0.4412175118923187 2023-01-24 01:58:26.088150: step: 744/470, loss: 0.6135561466217041 2023-01-24 01:58:26.796106: step: 746/470, loss: 2.345003604888916 2023-01-24 01:58:27.536561: step: 748/470, loss: 0.6416370868682861 2023-01-24 01:58:28.313802: step: 750/470, loss: 0.5936519503593445 2023-01-24 01:58:29.042115: step: 752/470, loss: 1.1450408697128296 2023-01-24 01:58:29.717204: step: 754/470, loss: 1.5515800714492798 2023-01-24 01:58:30.472626: step: 756/470, loss: 3.4162683486938477 2023-01-24 01:58:31.149694: step: 758/470, loss: 0.24610131978988647 2023-01-24 01:58:31.861068: step: 760/470, loss: 1.518908977508545 2023-01-24 01:58:32.592995: step: 762/470, loss: 2.093191146850586 2023-01-24 01:58:33.302621: step: 764/470, loss: 0.7881523370742798 2023-01-24 01:58:34.016329: step: 766/470, loss: 1.1381288766860962 2023-01-24 01:58:34.740899: step: 768/470, loss: 0.9814718961715698 2023-01-24 01:58:35.451900: step: 770/470, loss: 1.3323256969451904 2023-01-24 01:58:36.197391: step: 772/470, loss: 2.680574655532837 2023-01-24 01:58:36.962742: step: 774/470, loss: 2.739765167236328 2023-01-24 01:58:37.685963: step: 776/470, loss: 1.2006040811538696 2023-01-24 01:58:38.419269: step: 778/470, loss: 0.9175070524215698 2023-01-24 01:58:39.138500: step: 780/470, loss: 1.995614767074585 2023-01-24 01:58:39.874614: step: 782/470, loss: 0.7214681506156921 2023-01-24 01:58:40.681389: step: 784/470, loss: 1.2614259719848633 2023-01-24 01:58:41.446305: step: 786/470, loss: 1.1801100969314575 2023-01-24 01:58:42.135995: step: 788/470, loss: 1.3768471479415894 2023-01-24 01:58:42.831971: step: 790/470, loss: 0.4173336625099182 2023-01-24 01:58:43.601634: step: 792/470, loss: 0.45784080028533936 2023-01-24 01:58:44.352805: step: 794/470, loss: 1.0468000173568726 2023-01-24 01:58:45.117861: step: 796/470, loss: 2.4224095344543457 2023-01-24 01:58:45.779396: step: 798/470, loss: 2.1978325843811035 2023-01-24 01:58:46.560244: step: 800/470, loss: 0.6861466765403748 2023-01-24 01:58:47.247761: step: 802/470, loss: 2.6235058307647705 2023-01-24 01:58:48.005850: step: 804/470, loss: 1.106905221939087 2023-01-24 01:58:48.711886: step: 806/470, loss: 3.2012083530426025 2023-01-24 01:58:49.464462: step: 808/470, loss: 1.7539875507354736 2023-01-24 01:58:50.170263: step: 810/470, loss: 0.43845510482788086 2023-01-24 01:58:50.885105: step: 812/470, loss: 1.7235209941864014 2023-01-24 01:58:51.697908: step: 814/470, loss: 1.9387578964233398 2023-01-24 01:58:52.438596: step: 816/470, loss: 0.919151782989502 2023-01-24 01:58:53.183041: step: 818/470, loss: 2.7217369079589844 2023-01-24 01:58:53.888800: step: 820/470, loss: 0.5926926732063293 2023-01-24 01:58:54.634743: step: 822/470, loss: 2.346853494644165 2023-01-24 01:58:55.393976: step: 824/470, loss: 0.5080801844596863 2023-01-24 01:58:56.076439: step: 826/470, loss: 1.3107140064239502 2023-01-24 01:58:56.863607: step: 828/470, loss: 0.6457303166389465 2023-01-24 01:58:57.639731: step: 830/470, loss: 0.620394229888916 2023-01-24 01:58:58.391892: step: 832/470, loss: 1.6627405881881714 2023-01-24 01:58:59.102918: step: 834/470, loss: 0.5543290376663208 2023-01-24 01:58:59.858692: step: 836/470, loss: 0.7170817852020264 2023-01-24 01:59:00.632269: step: 838/470, loss: 0.6219049096107483 2023-01-24 01:59:01.398849: step: 840/470, loss: 0.889132022857666 2023-01-24 01:59:02.198779: step: 842/470, loss: 0.6863826513290405 2023-01-24 01:59:02.906748: step: 844/470, loss: 3.6622161865234375 2023-01-24 01:59:03.670978: step: 846/470, loss: 0.48640185594558716 2023-01-24 01:59:04.389060: step: 848/470, loss: 0.3788396418094635 2023-01-24 01:59:05.099525: step: 850/470, loss: 0.2160799503326416 2023-01-24 01:59:05.772608: step: 852/470, loss: 0.4344361126422882 2023-01-24 01:59:06.589696: step: 854/470, loss: 2.5788564682006836 2023-01-24 01:59:07.364701: step: 856/470, loss: 0.9327910542488098 2023-01-24 01:59:08.105594: step: 858/470, loss: 0.6671287417411804 2023-01-24 01:59:08.732472: step: 860/470, loss: 0.8870024085044861 2023-01-24 01:59:09.379674: step: 862/470, loss: 1.443222999572754 2023-01-24 01:59:10.152967: step: 864/470, loss: 0.40049707889556885 2023-01-24 01:59:10.850756: step: 866/470, loss: 0.6292204260826111 2023-01-24 01:59:11.584977: step: 868/470, loss: 0.7831001877784729 2023-01-24 01:59:12.283697: step: 870/470, loss: 1.5764271020889282 2023-01-24 01:59:12.979434: step: 872/470, loss: 0.44661274552345276 2023-01-24 01:59:13.658350: step: 874/470, loss: 1.3428727388381958 2023-01-24 01:59:14.425028: step: 876/470, loss: 0.4933682084083557 2023-01-24 01:59:15.121153: step: 878/470, loss: 3.2525839805603027 2023-01-24 01:59:15.933075: step: 880/470, loss: 2.814858913421631 2023-01-24 01:59:16.812059: step: 882/470, loss: 0.4872850775718689 2023-01-24 01:59:17.625049: step: 884/470, loss: 0.6432516574859619 2023-01-24 01:59:18.387980: step: 886/470, loss: 3.085758924484253 2023-01-24 01:59:19.217400: step: 888/470, loss: 0.3077142834663391 2023-01-24 01:59:19.972316: step: 890/470, loss: 0.7868139743804932 2023-01-24 01:59:20.693148: step: 892/470, loss: 0.7199787497520447 2023-01-24 01:59:21.473508: step: 894/470, loss: 0.8233124017715454 2023-01-24 01:59:22.212788: step: 896/470, loss: 1.426706075668335 2023-01-24 01:59:22.924997: step: 898/470, loss: 3.785362720489502 2023-01-24 01:59:23.729966: step: 900/470, loss: 0.2815132439136505 2023-01-24 01:59:24.469815: step: 902/470, loss: 0.5339265465736389 2023-01-24 01:59:25.161044: step: 904/470, loss: 2.60254168510437 2023-01-24 01:59:25.904317: step: 906/470, loss: 1.4491829872131348 2023-01-24 01:59:26.749813: step: 908/470, loss: 2.853203058242798 2023-01-24 01:59:27.514565: step: 910/470, loss: 0.6764827966690063 2023-01-24 01:59:28.330773: step: 912/470, loss: 1.3102974891662598 2023-01-24 01:59:29.048592: step: 914/470, loss: 1.3560751676559448 2023-01-24 01:59:29.762359: step: 916/470, loss: 1.378153681755066 2023-01-24 01:59:30.458821: step: 918/470, loss: 1.9102532863616943 2023-01-24 01:59:31.131908: step: 920/470, loss: 0.6327913999557495 2023-01-24 01:59:31.834286: step: 922/470, loss: 1.337106466293335 2023-01-24 01:59:32.581771: step: 924/470, loss: 0.5834131836891174 2023-01-24 01:59:33.292916: step: 926/470, loss: 1.6707696914672852 2023-01-24 01:59:34.003732: step: 928/470, loss: 1.1640057563781738 2023-01-24 01:59:34.826341: step: 930/470, loss: 0.6929572224617004 2023-01-24 01:59:35.555684: step: 932/470, loss: 0.8510525226593018 2023-01-24 01:59:36.299273: step: 934/470, loss: 1.042264461517334 2023-01-24 01:59:37.095381: step: 936/470, loss: 2.107961654663086 2023-01-24 01:59:37.943630: step: 938/470, loss: 0.5670161247253418 2023-01-24 01:59:38.692681: step: 940/470, loss: 1.541463017463684 2023-01-24 01:59:39.330842: step: 942/470, loss: 2.885584592819214 ================================================== Loss: 1.284 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3449041193181818, 'r': 0.2295014177693762, 'f1': 0.27561010215664017}, 'combined': 0.20308112790489274, 'epoch': 3} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.40620059967183403, 'r': 0.2228055162465103, 'f1': 0.28776742234816033}, 'combined': 0.19184494823210685, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34558098591549297, 'r': 0.2319116257088847, 'f1': 0.2775593891402715}, 'combined': 0.20451744462967372, 'epoch': 3} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.4272517696102831, 'r': 0.23558358862276899, 'f1': 0.3037059018653901}, 'combined': 0.20247060124359334, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32384002770083103, 'r': 0.22099480151228734, 'f1': 0.26271067415730337}, 'combined': 0.1935762862211709, 'epoch': 3} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.40791775483075643, 'r': 0.2294048026207517, 'f1': 0.29366069328110966}, 'combined': 0.19577379552073973, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25961538461538464, 'r': 0.2892857142857143, 'f1': 0.2736486486486487}, 'combined': 0.18243243243243246, 'epoch': 3} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65, 'r': 0.2826086956521739, 'f1': 0.3939393939393939}, 'combined': 0.2626262626262626, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.575, 'r': 0.19827586206896552, 'f1': 0.2948717948717949}, 'combined': 0.19658119658119658, 'epoch': 3} New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3213942479544022, 'r': 0.26284804718851484, 'f1': 0.2891877262387209}, 'combined': 0.21308569301800487, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35411825897044286, 'r': 0.2415215791248938, 'f1': 0.28717757152371726}, 'combined': 0.1914517143491448, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.2785714285714286, 'f1': 0.319672131147541}, 'combined': 0.21311475409836067, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34558098591549297, 'r': 0.2319116257088847, 'f1': 0.2775593891402715}, 'combined': 0.20451744462967372, 'epoch': 3} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.4272517696102831, 'r': 0.23558358862276899, 'f1': 0.3037059018653901}, 'combined': 0.20247060124359334, 'epoch': 3} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65, 'r': 0.2826086956521739, 'f1': 0.3939393939393939}, 'combined': 0.2626262626262626, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32384002770083103, 'r': 0.22099480151228734, 'f1': 0.26271067415730337}, 'combined': 0.1935762862211709, 'epoch': 3} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.40791775483075643, 'r': 0.2294048026207517, 'f1': 0.29366069328110966}, 'combined': 0.19577379552073973, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.575, 'r': 0.19827586206896552, 'f1': 0.2948717948717949}, 'combined': 0.19658119658119658, 'epoch': 3} ****************************** Epoch: 4 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:02:29.706150: step: 2/470, loss: 1.2434356212615967 2023-01-24 02:02:30.378014: step: 4/470, loss: 1.2220540046691895 2023-01-24 02:02:31.056121: step: 6/470, loss: 1.7187833786010742 2023-01-24 02:02:31.915004: step: 8/470, loss: 1.2707059383392334 2023-01-24 02:02:32.583564: step: 10/470, loss: 1.190471887588501 2023-01-24 02:02:33.331444: step: 12/470, loss: 0.3669613301753998 2023-01-24 02:02:34.055729: step: 14/470, loss: 2.916283130645752 2023-01-24 02:02:34.873874: step: 16/470, loss: 1.854135513305664 2023-01-24 02:02:35.643562: step: 18/470, loss: 0.7268097400665283 2023-01-24 02:02:36.377689: step: 20/470, loss: 0.864444375038147 2023-01-24 02:02:37.127582: step: 22/470, loss: 0.24146677553653717 2023-01-24 02:02:37.943308: step: 24/470, loss: 1.435685634613037 2023-01-24 02:02:38.693398: step: 26/470, loss: 0.8564450144767761 2023-01-24 02:02:39.393239: step: 28/470, loss: 1.2398236989974976 2023-01-24 02:02:40.162973: step: 30/470, loss: 0.5465511679649353 2023-01-24 02:02:40.836235: step: 32/470, loss: 1.0255184173583984 2023-01-24 02:02:41.569883: step: 34/470, loss: 1.3359484672546387 2023-01-24 02:02:42.229308: step: 36/470, loss: 0.2009323537349701 2023-01-24 02:02:43.070751: step: 38/470, loss: 0.4609377980232239 2023-01-24 02:02:43.810710: step: 40/470, loss: 0.290250301361084 2023-01-24 02:02:44.579402: step: 42/470, loss: 0.2391287386417389 2023-01-24 02:02:45.310245: step: 44/470, loss: 1.3748838901519775 2023-01-24 02:02:46.064012: step: 46/470, loss: 0.31518200039863586 2023-01-24 02:02:46.818415: step: 48/470, loss: 1.317157506942749 2023-01-24 02:02:47.563078: step: 50/470, loss: 0.3318483829498291 2023-01-24 02:02:48.299773: step: 52/470, loss: 2.242248296737671 2023-01-24 02:02:49.035767: step: 54/470, loss: 0.5057197213172913 2023-01-24 02:02:49.739319: step: 56/470, loss: 1.300000786781311 2023-01-24 02:02:50.440150: step: 58/470, loss: 0.9471038579940796 2023-01-24 02:02:51.203998: step: 60/470, loss: 0.8389618992805481 2023-01-24 02:02:51.891990: step: 62/470, loss: 0.5545322895050049 2023-01-24 02:02:52.612290: step: 64/470, loss: 0.9042381048202515 2023-01-24 02:02:53.316490: step: 66/470, loss: 1.5064542293548584 2023-01-24 02:02:54.018702: step: 68/470, loss: 0.651218056678772 2023-01-24 02:02:54.752653: step: 70/470, loss: 4.524369239807129 2023-01-24 02:02:55.480279: step: 72/470, loss: 0.3224298655986786 2023-01-24 02:02:56.127120: step: 74/470, loss: 0.5799638628959656 2023-01-24 02:02:56.874927: step: 76/470, loss: 1.0132986307144165 2023-01-24 02:02:57.624109: step: 78/470, loss: 0.2071341574192047 2023-01-24 02:02:58.412040: step: 80/470, loss: 0.13997377455234528 2023-01-24 02:02:59.158668: step: 82/470, loss: 0.8105186223983765 2023-01-24 02:02:59.768385: step: 84/470, loss: 0.37218907475471497 2023-01-24 02:03:00.451730: step: 86/470, loss: 0.5148276090621948 2023-01-24 02:03:01.192310: step: 88/470, loss: 0.8321148753166199 2023-01-24 02:03:01.912802: step: 90/470, loss: 0.889289379119873 2023-01-24 02:03:02.615103: step: 92/470, loss: 0.5049362182617188 2023-01-24 02:03:03.326475: step: 94/470, loss: 0.9081429243087769 2023-01-24 02:03:04.069643: step: 96/470, loss: 0.8203686475753784 2023-01-24 02:03:04.878120: step: 98/470, loss: 1.3147096633911133 2023-01-24 02:03:05.593722: step: 100/470, loss: 0.49650490283966064 2023-01-24 02:03:06.265566: step: 102/470, loss: 0.6795364022254944 2023-01-24 02:03:07.124291: step: 104/470, loss: 5.672989845275879 2023-01-24 02:03:07.902771: step: 106/470, loss: 0.5761081576347351 2023-01-24 02:03:08.583886: step: 108/470, loss: 0.8563786149024963 2023-01-24 02:03:09.281510: step: 110/470, loss: 1.6315613985061646 2023-01-24 02:03:10.066846: step: 112/470, loss: 0.5608785152435303 2023-01-24 02:03:10.812197: step: 114/470, loss: 0.5131690502166748 2023-01-24 02:03:11.515453: step: 116/470, loss: 0.20318368077278137 2023-01-24 02:03:12.190366: step: 118/470, loss: 0.45412278175354004 2023-01-24 02:03:12.949362: step: 120/470, loss: 0.656235933303833 2023-01-24 02:03:13.661058: step: 122/470, loss: 0.1925651729106903 2023-01-24 02:03:14.317094: step: 124/470, loss: 0.43493232131004333 2023-01-24 02:03:15.093737: step: 126/470, loss: 0.27077344059944153 2023-01-24 02:03:15.830584: step: 128/470, loss: 0.3672603666782379 2023-01-24 02:03:16.611799: step: 130/470, loss: 0.9226909875869751 2023-01-24 02:03:17.316786: step: 132/470, loss: 0.32075291872024536 2023-01-24 02:03:18.142056: step: 134/470, loss: 0.6723787784576416 2023-01-24 02:03:18.844340: step: 136/470, loss: 1.2128407955169678 2023-01-24 02:03:19.605316: step: 138/470, loss: 0.8287149667739868 2023-01-24 02:03:20.333544: step: 140/470, loss: 0.4041156768798828 2023-01-24 02:03:21.073718: step: 142/470, loss: 0.16680775582790375 2023-01-24 02:03:21.847508: step: 144/470, loss: 2.376657485961914 2023-01-24 02:03:22.569781: step: 146/470, loss: 0.47927552461624146 2023-01-24 02:03:23.285807: step: 148/470, loss: 1.4806530475616455 2023-01-24 02:03:24.062399: step: 150/470, loss: 0.7680869698524475 2023-01-24 02:03:24.741600: step: 152/470, loss: 0.3706393539905548 2023-01-24 02:03:25.432883: step: 154/470, loss: 2.1673104763031006 2023-01-24 02:03:26.182230: step: 156/470, loss: 0.905188798904419 2023-01-24 02:03:26.949515: step: 158/470, loss: 0.19686326384544373 2023-01-24 02:03:27.702601: step: 160/470, loss: 1.0826082229614258 2023-01-24 02:03:28.469086: step: 162/470, loss: 1.3279461860656738 2023-01-24 02:03:29.327148: step: 164/470, loss: 0.796668291091919 2023-01-24 02:03:30.229422: step: 166/470, loss: 0.6903431415557861 2023-01-24 02:03:30.964832: step: 168/470, loss: 0.4284389317035675 2023-01-24 02:03:31.680284: step: 170/470, loss: 1.0535764694213867 2023-01-24 02:03:32.427654: step: 172/470, loss: 0.5381697416305542 2023-01-24 02:03:33.115951: step: 174/470, loss: 0.5848957896232605 2023-01-24 02:03:33.766205: step: 176/470, loss: 1.5677191019058228 2023-01-24 02:03:34.501816: step: 178/470, loss: 1.2079250812530518 2023-01-24 02:03:35.315708: step: 180/470, loss: 0.6007267236709595 2023-01-24 02:03:36.088346: step: 182/470, loss: 1.3235710859298706 2023-01-24 02:03:36.975830: step: 184/470, loss: 1.0727261304855347 2023-01-24 02:03:37.702416: step: 186/470, loss: 3.2140517234802246 2023-01-24 02:03:38.398177: step: 188/470, loss: 2.628072500228882 2023-01-24 02:03:39.093922: step: 190/470, loss: 0.410220205783844 2023-01-24 02:03:39.834940: step: 192/470, loss: 1.1373114585876465 2023-01-24 02:03:40.521593: step: 194/470, loss: 1.4659881591796875 2023-01-24 02:03:41.201876: step: 196/470, loss: 0.6437451839447021 2023-01-24 02:03:41.886614: step: 198/470, loss: 0.30417031049728394 2023-01-24 02:03:42.582013: step: 200/470, loss: 1.0152573585510254 2023-01-24 02:03:43.275433: step: 202/470, loss: 0.33993643522262573 2023-01-24 02:03:44.119379: step: 204/470, loss: 1.0089237689971924 2023-01-24 02:03:44.876293: step: 206/470, loss: 0.37781351804733276 2023-01-24 02:03:45.573264: step: 208/470, loss: 0.689214825630188 2023-01-24 02:03:46.302187: step: 210/470, loss: 0.4352153539657593 2023-01-24 02:03:47.020486: step: 212/470, loss: 1.1648905277252197 2023-01-24 02:03:47.835585: step: 214/470, loss: 0.3460136651992798 2023-01-24 02:03:48.640688: step: 216/470, loss: 0.7107413411140442 2023-01-24 02:03:49.364979: step: 218/470, loss: 0.08092077076435089 2023-01-24 02:03:50.094714: step: 220/470, loss: 1.2060343027114868 2023-01-24 02:03:50.735783: step: 222/470, loss: 1.078926920890808 2023-01-24 02:03:51.419556: step: 224/470, loss: 0.8819116950035095 2023-01-24 02:03:52.196659: step: 226/470, loss: 0.7317195534706116 2023-01-24 02:03:52.907558: step: 228/470, loss: 0.30972084403038025 2023-01-24 02:03:53.746310: step: 230/470, loss: 1.6904860734939575 2023-01-24 02:03:54.484061: step: 232/470, loss: 1.5937964916229248 2023-01-24 02:03:55.193548: step: 234/470, loss: 0.4739353656768799 2023-01-24 02:03:56.034234: step: 236/470, loss: 0.939883828163147 2023-01-24 02:03:56.802686: step: 238/470, loss: 0.4551941752433777 2023-01-24 02:03:57.510787: step: 240/470, loss: 0.5418661832809448 2023-01-24 02:03:58.275941: step: 242/470, loss: 1.2252233028411865 2023-01-24 02:03:59.126777: step: 244/470, loss: 1.1485824584960938 2023-01-24 02:03:59.868126: step: 246/470, loss: 0.8404242992401123 2023-01-24 02:04:00.621654: step: 248/470, loss: 0.6991187334060669 2023-01-24 02:04:01.411576: step: 250/470, loss: 1.8893976211547852 2023-01-24 02:04:02.116993: step: 252/470, loss: 0.6644213795661926 2023-01-24 02:04:02.923173: step: 254/470, loss: 0.8386790752410889 2023-01-24 02:04:03.647576: step: 256/470, loss: 1.4650182723999023 2023-01-24 02:04:04.316680: step: 258/470, loss: 0.5462832450866699 2023-01-24 02:04:05.125928: step: 260/470, loss: 5.154643535614014 2023-01-24 02:04:05.858216: step: 262/470, loss: 0.7503474354743958 2023-01-24 02:04:06.533637: step: 264/470, loss: 0.34197211265563965 2023-01-24 02:04:07.248835: step: 266/470, loss: 0.46603813767433167 2023-01-24 02:04:08.020356: step: 268/470, loss: 0.20978543162345886 2023-01-24 02:04:08.731200: step: 270/470, loss: 0.8326939344406128 2023-01-24 02:04:09.459148: step: 272/470, loss: 1.0488128662109375 2023-01-24 02:04:10.202156: step: 274/470, loss: 1.5846171379089355 2023-01-24 02:04:10.949509: step: 276/470, loss: 0.6706929802894592 2023-01-24 02:04:11.630789: step: 278/470, loss: 2.744781732559204 2023-01-24 02:04:12.425384: step: 280/470, loss: 0.7086952328681946 2023-01-24 02:04:13.297356: step: 282/470, loss: 1.4308860301971436 2023-01-24 02:04:14.122382: step: 284/470, loss: 0.30105042457580566 2023-01-24 02:04:14.929487: step: 286/470, loss: 0.7560703754425049 2023-01-24 02:04:15.657738: step: 288/470, loss: 1.4897514581680298 2023-01-24 02:04:16.446582: step: 290/470, loss: 0.583710253238678 2023-01-24 02:04:17.080891: step: 292/470, loss: 0.9798109531402588 2023-01-24 02:04:17.848566: step: 294/470, loss: 1.5323822498321533 2023-01-24 02:04:18.615439: step: 296/470, loss: 2.428147792816162 2023-01-24 02:04:19.350624: step: 298/470, loss: 0.5572175979614258 2023-01-24 02:04:20.081369: step: 300/470, loss: 0.8898938894271851 2023-01-24 02:04:20.805525: step: 302/470, loss: 1.1512093544006348 2023-01-24 02:04:21.627518: step: 304/470, loss: 2.286986827850342 2023-01-24 02:04:22.403515: step: 306/470, loss: 1.5244067907333374 2023-01-24 02:04:23.231630: step: 308/470, loss: 0.6587584018707275 2023-01-24 02:04:24.001560: step: 310/470, loss: 0.5230644345283508 2023-01-24 02:04:24.702825: step: 312/470, loss: 0.8668720126152039 2023-01-24 02:04:25.443747: step: 314/470, loss: 0.6899608373641968 2023-01-24 02:04:26.186024: step: 316/470, loss: 0.9593360424041748 2023-01-24 02:04:26.857537: step: 318/470, loss: 0.6568881869316101 2023-01-24 02:04:27.629021: step: 320/470, loss: 0.8749952912330627 2023-01-24 02:04:28.375402: step: 322/470, loss: 0.46149203181266785 2023-01-24 02:04:29.074710: step: 324/470, loss: 1.287691354751587 2023-01-24 02:04:29.857740: step: 326/470, loss: 0.5143733620643616 2023-01-24 02:04:30.566847: step: 328/470, loss: 0.423993319272995 2023-01-24 02:04:31.274740: step: 330/470, loss: 1.9649615287780762 2023-01-24 02:04:32.007299: step: 332/470, loss: 1.341388463973999 2023-01-24 02:04:32.799092: step: 334/470, loss: 0.20511598885059357 2023-01-24 02:04:33.653723: step: 336/470, loss: 0.7076253890991211 2023-01-24 02:04:34.412017: step: 338/470, loss: 0.2398476004600525 2023-01-24 02:04:35.192747: step: 340/470, loss: 0.43527939915657043 2023-01-24 02:04:35.838390: step: 342/470, loss: 0.27098989486694336 2023-01-24 02:04:36.532410: step: 344/470, loss: 0.42461445927619934 2023-01-24 02:04:37.296372: step: 346/470, loss: 0.6119195222854614 2023-01-24 02:04:38.105433: step: 348/470, loss: 0.5161367058753967 2023-01-24 02:04:38.888026: step: 350/470, loss: 0.35241755843162537 2023-01-24 02:04:39.644174: step: 352/470, loss: 0.33312055468559265 2023-01-24 02:04:40.482496: step: 354/470, loss: 2.3488333225250244 2023-01-24 02:04:41.306388: step: 356/470, loss: 2.8952274322509766 2023-01-24 02:04:42.060821: step: 358/470, loss: 1.2893620729446411 2023-01-24 02:04:42.793654: step: 360/470, loss: 0.3711218535900116 2023-01-24 02:04:43.538056: step: 362/470, loss: 0.2599567174911499 2023-01-24 02:04:44.298311: step: 364/470, loss: 5.034658908843994 2023-01-24 02:04:45.131623: step: 366/470, loss: 0.35741162300109863 2023-01-24 02:04:45.886204: step: 368/470, loss: 0.5050439238548279 2023-01-24 02:04:46.777999: step: 370/470, loss: 0.46297574043273926 2023-01-24 02:04:47.566982: step: 372/470, loss: 1.4221333265304565 2023-01-24 02:04:48.308306: step: 374/470, loss: 0.3796398341655731 2023-01-24 02:04:49.044574: step: 376/470, loss: 0.4562537372112274 2023-01-24 02:04:49.849055: step: 378/470, loss: 0.6173561811447144 2023-01-24 02:04:50.649155: step: 380/470, loss: 1.5535647869110107 2023-01-24 02:04:51.438953: step: 382/470, loss: 0.6625960469245911 2023-01-24 02:04:52.166223: step: 384/470, loss: 2.1780447959899902 2023-01-24 02:04:52.850870: step: 386/470, loss: 1.2066243886947632 2023-01-24 02:04:53.705006: step: 388/470, loss: 0.383740097284317 2023-01-24 02:04:54.497625: step: 390/470, loss: 0.36354339122772217 2023-01-24 02:04:55.300139: step: 392/470, loss: 0.3574196994304657 2023-01-24 02:04:56.098563: step: 394/470, loss: 1.139248251914978 2023-01-24 02:04:56.807083: step: 396/470, loss: 0.5587860345840454 2023-01-24 02:04:57.593886: step: 398/470, loss: 1.277011513710022 2023-01-24 02:04:58.371473: step: 400/470, loss: 0.6317623257637024 2023-01-24 02:04:59.219619: step: 402/470, loss: 1.273547649383545 2023-01-24 02:04:59.889048: step: 404/470, loss: 5.302393913269043 2023-01-24 02:05:00.560039: step: 406/470, loss: 0.3220556676387787 2023-01-24 02:05:01.285420: step: 408/470, loss: 1.9869134426116943 2023-01-24 02:05:02.038353: step: 410/470, loss: 0.33363187313079834 2023-01-24 02:05:02.780351: step: 412/470, loss: 0.9794175624847412 2023-01-24 02:05:03.581320: step: 414/470, loss: 1.7475107908248901 2023-01-24 02:05:04.281752: step: 416/470, loss: 1.0983277559280396 2023-01-24 02:05:05.029071: step: 418/470, loss: 2.780597686767578 2023-01-24 02:05:05.826678: step: 420/470, loss: 0.28116825222969055 2023-01-24 02:05:06.554717: step: 422/470, loss: 0.4581286311149597 2023-01-24 02:05:07.283874: step: 424/470, loss: 0.6359933018684387 2023-01-24 02:05:07.949698: step: 426/470, loss: 0.4756195843219757 2023-01-24 02:05:08.666820: step: 428/470, loss: 0.43690595030784607 2023-01-24 02:05:09.379672: step: 430/470, loss: 0.886446475982666 2023-01-24 02:05:10.146230: step: 432/470, loss: 1.1900947093963623 2023-01-24 02:05:10.889369: step: 434/470, loss: 0.9924726486206055 2023-01-24 02:05:11.600846: step: 436/470, loss: 1.7097697257995605 2023-01-24 02:05:12.297229: step: 438/470, loss: 0.9539413452148438 2023-01-24 02:05:13.041202: step: 440/470, loss: 1.100608229637146 2023-01-24 02:05:13.790816: step: 442/470, loss: 1.6105233430862427 2023-01-24 02:05:14.500779: step: 444/470, loss: 0.42767927050590515 2023-01-24 02:05:15.241585: step: 446/470, loss: 0.4778148829936981 2023-01-24 02:05:16.050086: step: 448/470, loss: 1.9240278005599976 2023-01-24 02:05:16.801289: step: 450/470, loss: 0.6379396319389343 2023-01-24 02:05:17.616252: step: 452/470, loss: 0.8942621350288391 2023-01-24 02:05:18.345885: step: 454/470, loss: 2.3800547122955322 2023-01-24 02:05:19.186820: step: 456/470, loss: 1.0691368579864502 2023-01-24 02:05:19.937471: step: 458/470, loss: 0.7086570858955383 2023-01-24 02:05:20.754725: step: 460/470, loss: 0.39457079768180847 2023-01-24 02:05:21.479309: step: 462/470, loss: 0.27901706099510193 2023-01-24 02:05:22.167335: step: 464/470, loss: 0.8260778784751892 2023-01-24 02:05:22.896414: step: 466/470, loss: 1.605619192123413 2023-01-24 02:05:23.689447: step: 468/470, loss: 0.8436158299446106 2023-01-24 02:05:24.415490: step: 470/470, loss: 1.076684832572937 2023-01-24 02:05:25.104484: step: 472/470, loss: 1.1909767389297485 2023-01-24 02:05:25.885094: step: 474/470, loss: 0.6028439998626709 2023-01-24 02:05:26.559138: step: 476/470, loss: 2.0072274208068848 2023-01-24 02:05:27.238203: step: 478/470, loss: 2.010789155960083 2023-01-24 02:05:27.985967: step: 480/470, loss: 0.4975433945655823 2023-01-24 02:05:28.773058: step: 482/470, loss: 0.9296523928642273 2023-01-24 02:05:29.507572: step: 484/470, loss: 0.5860471725463867 2023-01-24 02:05:30.273503: step: 486/470, loss: 0.750137209892273 2023-01-24 02:05:31.011581: step: 488/470, loss: 1.2189258337020874 2023-01-24 02:05:31.805399: step: 490/470, loss: 3.423570156097412 2023-01-24 02:05:32.603129: step: 492/470, loss: 1.0367627143859863 2023-01-24 02:05:33.361140: step: 494/470, loss: 1.586470365524292 2023-01-24 02:05:34.083726: step: 496/470, loss: 0.3984842300415039 2023-01-24 02:05:34.844098: step: 498/470, loss: 1.551780104637146 2023-01-24 02:05:35.572038: step: 500/470, loss: 0.6801697015762329 2023-01-24 02:05:36.290229: step: 502/470, loss: 0.6109309792518616 2023-01-24 02:05:37.060118: step: 504/470, loss: 2.4782888889312744 2023-01-24 02:05:37.890401: step: 506/470, loss: 1.3563328981399536 2023-01-24 02:05:38.578807: step: 508/470, loss: 1.7630181312561035 2023-01-24 02:05:39.310431: step: 510/470, loss: 0.9129251837730408 2023-01-24 02:05:40.012218: step: 512/470, loss: 1.837816834449768 2023-01-24 02:05:40.910202: step: 514/470, loss: 1.0328463315963745 2023-01-24 02:05:41.687296: step: 516/470, loss: 0.43190184235572815 2023-01-24 02:05:42.425869: step: 518/470, loss: 0.677377462387085 2023-01-24 02:05:43.250261: step: 520/470, loss: 1.7360193729400635 2023-01-24 02:05:43.994203: step: 522/470, loss: 1.5820202827453613 2023-01-24 02:05:44.752830: step: 524/470, loss: 3.5445146560668945 2023-01-24 02:05:45.595549: step: 526/470, loss: 9.609672546386719 2023-01-24 02:05:46.437229: step: 528/470, loss: 2.3321354389190674 2023-01-24 02:05:47.239955: step: 530/470, loss: 0.533820629119873 2023-01-24 02:05:48.039004: step: 532/470, loss: 0.19819556176662445 2023-01-24 02:05:48.759286: step: 534/470, loss: 1.0940450429916382 2023-01-24 02:05:49.505785: step: 536/470, loss: 0.9480574727058411 2023-01-24 02:05:50.211814: step: 538/470, loss: 0.29331231117248535 2023-01-24 02:05:50.895562: step: 540/470, loss: 0.4723627269268036 2023-01-24 02:05:51.578940: step: 542/470, loss: 0.6444365978240967 2023-01-24 02:05:52.362173: step: 544/470, loss: 0.3107144236564636 2023-01-24 02:05:53.146139: step: 546/470, loss: 0.4428755044937134 2023-01-24 02:05:53.909014: step: 548/470, loss: 0.4551246762275696 2023-01-24 02:05:54.617452: step: 550/470, loss: 0.9456477165222168 2023-01-24 02:05:55.505113: step: 552/470, loss: 0.4225022792816162 2023-01-24 02:05:56.250383: step: 554/470, loss: 0.21875688433647156 2023-01-24 02:05:56.961118: step: 556/470, loss: 0.5006171464920044 2023-01-24 02:05:57.735324: step: 558/470, loss: 0.7604213953018188 2023-01-24 02:05:58.698482: step: 560/470, loss: 1.2393240928649902 2023-01-24 02:05:59.448199: step: 562/470, loss: 1.712594985961914 2023-01-24 02:06:00.242240: step: 564/470, loss: 0.17193198204040527 2023-01-24 02:06:01.066328: step: 566/470, loss: 0.7961640357971191 2023-01-24 02:06:01.857172: step: 568/470, loss: 0.5501372218132019 2023-01-24 02:06:02.600268: step: 570/470, loss: 0.4883258044719696 2023-01-24 02:06:03.321491: step: 572/470, loss: 0.6990291476249695 2023-01-24 02:06:04.082763: step: 574/470, loss: 1.1884771585464478 2023-01-24 02:06:04.841060: step: 576/470, loss: 1.0261270999908447 2023-01-24 02:06:05.524867: step: 578/470, loss: 0.6111153960227966 2023-01-24 02:06:06.256034: step: 580/470, loss: 0.7478018403053284 2023-01-24 02:06:06.949796: step: 582/470, loss: 1.9652400016784668 2023-01-24 02:06:07.721325: step: 584/470, loss: 0.6027696132659912 2023-01-24 02:06:08.456844: step: 586/470, loss: 0.38277050852775574 2023-01-24 02:06:09.273733: step: 588/470, loss: 0.5123499035835266 2023-01-24 02:06:09.993992: step: 590/470, loss: 0.3519931137561798 2023-01-24 02:06:10.787393: step: 592/470, loss: 0.7184064984321594 2023-01-24 02:06:11.476958: step: 594/470, loss: 1.8940832614898682 2023-01-24 02:06:12.188975: step: 596/470, loss: 0.23110346496105194 2023-01-24 02:06:12.930527: step: 598/470, loss: 0.5986997485160828 2023-01-24 02:06:13.698415: step: 600/470, loss: 0.29345154762268066 2023-01-24 02:06:14.440565: step: 602/470, loss: 0.6144067645072937 2023-01-24 02:06:15.188783: step: 604/470, loss: 0.31155356764793396 2023-01-24 02:06:15.900719: step: 606/470, loss: 0.4316118359565735 2023-01-24 02:06:16.677345: step: 608/470, loss: 1.4895285367965698 2023-01-24 02:06:17.404617: step: 610/470, loss: 0.3460380434989929 2023-01-24 02:06:18.103325: step: 612/470, loss: 4.381056308746338 2023-01-24 02:06:18.821419: step: 614/470, loss: 0.26738184690475464 2023-01-24 02:06:19.579635: step: 616/470, loss: 1.009708285331726 2023-01-24 02:06:20.320268: step: 618/470, loss: 1.4514302015304565 2023-01-24 02:06:21.157653: step: 620/470, loss: 0.7399471998214722 2023-01-24 02:06:21.892470: step: 622/470, loss: 2.295945644378662 2023-01-24 02:06:22.658964: step: 624/470, loss: 0.5105316638946533 2023-01-24 02:06:23.350900: step: 626/470, loss: 0.2828077971935272 2023-01-24 02:06:24.104226: step: 628/470, loss: 1.003699541091919 2023-01-24 02:06:24.822706: step: 630/470, loss: 0.38585180044174194 2023-01-24 02:06:25.543775: step: 632/470, loss: 1.3301163911819458 2023-01-24 02:06:26.317249: step: 634/470, loss: 1.180068016052246 2023-01-24 02:06:27.051378: step: 636/470, loss: 0.9667542576789856 2023-01-24 02:06:27.769764: step: 638/470, loss: 0.6113733649253845 2023-01-24 02:06:28.602176: step: 640/470, loss: 1.1039268970489502 2023-01-24 02:06:29.376409: step: 642/470, loss: 0.7926710844039917 2023-01-24 02:06:30.105825: step: 644/470, loss: 1.8398045301437378 2023-01-24 02:06:30.901173: step: 646/470, loss: 0.5182595252990723 2023-01-24 02:06:31.651675: step: 648/470, loss: 1.8043544292449951 2023-01-24 02:06:32.367485: step: 650/470, loss: 1.3427050113677979 2023-01-24 02:06:33.149439: step: 652/470, loss: 0.2207910120487213 2023-01-24 02:06:33.903721: step: 654/470, loss: 0.7075690031051636 2023-01-24 02:06:34.556875: step: 656/470, loss: 1.3858473300933838 2023-01-24 02:06:35.272840: step: 658/470, loss: 0.40335145592689514 2023-01-24 02:06:35.999483: step: 660/470, loss: 0.8443214893341064 2023-01-24 02:06:36.778777: step: 662/470, loss: 1.4021592140197754 2023-01-24 02:06:37.476280: step: 664/470, loss: 0.8110883235931396 2023-01-24 02:06:38.268371: step: 666/470, loss: 0.2092544585466385 2023-01-24 02:06:38.980587: step: 668/470, loss: 4.8981614112854 2023-01-24 02:06:39.706325: step: 670/470, loss: 0.4453684389591217 2023-01-24 02:06:40.483147: step: 672/470, loss: 0.47258445620536804 2023-01-24 02:06:41.181379: step: 674/470, loss: 0.7400813102722168 2023-01-24 02:06:41.905986: step: 676/470, loss: 0.281443327665329 2023-01-24 02:06:42.604905: step: 678/470, loss: 0.5487380027770996 2023-01-24 02:06:43.333736: step: 680/470, loss: 0.4541553854942322 2023-01-24 02:06:44.077042: step: 682/470, loss: 0.6198228597640991 2023-01-24 02:06:44.829272: step: 684/470, loss: 0.6936663389205933 2023-01-24 02:06:45.570656: step: 686/470, loss: 1.3587620258331299 2023-01-24 02:06:46.371940: step: 688/470, loss: 1.5153071880340576 2023-01-24 02:06:47.037106: step: 690/470, loss: 1.0358877182006836 2023-01-24 02:06:47.741515: step: 692/470, loss: 1.0552879571914673 2023-01-24 02:06:48.453852: step: 694/470, loss: 1.0266014337539673 2023-01-24 02:06:49.192388: step: 696/470, loss: 1.048282504081726 2023-01-24 02:06:49.960034: step: 698/470, loss: 1.082366943359375 2023-01-24 02:06:50.700485: step: 700/470, loss: 3.19528865814209 2023-01-24 02:06:51.431038: step: 702/470, loss: 0.5113957524299622 2023-01-24 02:06:52.119741: step: 704/470, loss: 0.6064223051071167 2023-01-24 02:06:52.859556: step: 706/470, loss: 0.29762303829193115 2023-01-24 02:06:53.586052: step: 708/470, loss: 1.149381399154663 2023-01-24 02:06:54.398888: step: 710/470, loss: 0.45705515146255493 2023-01-24 02:06:55.122529: step: 712/470, loss: 0.4075102210044861 2023-01-24 02:06:55.874372: step: 714/470, loss: 0.7546180486679077 2023-01-24 02:06:56.582281: step: 716/470, loss: 0.7100285291671753 2023-01-24 02:06:57.283238: step: 718/470, loss: 2.0336670875549316 2023-01-24 02:06:58.066178: step: 720/470, loss: 0.505089521408081 2023-01-24 02:06:58.833087: step: 722/470, loss: 0.5286163091659546 2023-01-24 02:06:59.549602: step: 724/470, loss: 3.978087902069092 2023-01-24 02:07:00.253965: step: 726/470, loss: 1.456428050994873 2023-01-24 02:07:01.057672: step: 728/470, loss: 6.835483551025391 2023-01-24 02:07:01.803280: step: 730/470, loss: 1.0272594690322876 2023-01-24 02:07:02.447970: step: 732/470, loss: 1.0365326404571533 2023-01-24 02:07:03.174926: step: 734/470, loss: 0.3310011327266693 2023-01-24 02:07:03.972142: step: 736/470, loss: 0.6663587689399719 2023-01-24 02:07:04.741777: step: 738/470, loss: 0.6486376523971558 2023-01-24 02:07:05.487429: step: 740/470, loss: 0.29191356897354126 2023-01-24 02:07:06.261101: step: 742/470, loss: 0.32745280861854553 2023-01-24 02:07:07.034670: step: 744/470, loss: 2.1090571880340576 2023-01-24 02:07:07.805502: step: 746/470, loss: 1.205168604850769 2023-01-24 02:07:08.520189: step: 748/470, loss: 0.9146398901939392 2023-01-24 02:07:09.253848: step: 750/470, loss: 8.422287940979004 2023-01-24 02:07:10.058336: step: 752/470, loss: 1.7295546531677246 2023-01-24 02:07:10.828888: step: 754/470, loss: 1.0920233726501465 2023-01-24 02:07:11.529685: step: 756/470, loss: 2.5528173446655273 2023-01-24 02:07:12.343585: step: 758/470, loss: 0.6946008801460266 2023-01-24 02:07:13.115399: step: 760/470, loss: 0.9669357538223267 2023-01-24 02:07:13.818130: step: 762/470, loss: 3.9733476638793945 2023-01-24 02:07:14.508351: step: 764/470, loss: 0.6126687526702881 2023-01-24 02:07:15.290309: step: 766/470, loss: 1.2648811340332031 2023-01-24 02:07:16.046000: step: 768/470, loss: 1.0761961936950684 2023-01-24 02:07:16.747505: step: 770/470, loss: 0.36128804087638855 2023-01-24 02:07:17.444628: step: 772/470, loss: 2.7535998821258545 2023-01-24 02:07:18.138898: step: 774/470, loss: 1.588283658027649 2023-01-24 02:07:18.982094: step: 776/470, loss: 1.3686546087265015 2023-01-24 02:07:19.807078: step: 778/470, loss: 0.6748623847961426 2023-01-24 02:07:20.558484: step: 780/470, loss: 0.4505867660045624 2023-01-24 02:07:21.286047: step: 782/470, loss: 0.2927594780921936 2023-01-24 02:07:21.967517: step: 784/470, loss: 0.8455650806427002 2023-01-24 02:07:22.658443: step: 786/470, loss: 0.3428768515586853 2023-01-24 02:07:23.412082: step: 788/470, loss: 1.1048076152801514 2023-01-24 02:07:24.169255: step: 790/470, loss: 3.1269044876098633 2023-01-24 02:07:24.931535: step: 792/470, loss: 1.0847244262695312 2023-01-24 02:07:25.834275: step: 794/470, loss: 5.910243511199951 2023-01-24 02:07:26.577430: step: 796/470, loss: 0.3136829137802124 2023-01-24 02:07:27.410240: step: 798/470, loss: 0.6312191486358643 2023-01-24 02:07:28.079633: step: 800/470, loss: 1.0586059093475342 2023-01-24 02:07:28.794711: step: 802/470, loss: 0.7780605554580688 2023-01-24 02:07:29.501504: step: 804/470, loss: 0.42219239473342896 2023-01-24 02:07:30.214241: step: 806/470, loss: 0.661794900894165 2023-01-24 02:07:30.911316: step: 808/470, loss: 0.5359216332435608 2023-01-24 02:07:31.628077: step: 810/470, loss: 0.20118995010852814 2023-01-24 02:07:32.381964: step: 812/470, loss: 2.887706995010376 2023-01-24 02:07:33.146372: step: 814/470, loss: 0.8478198051452637 2023-01-24 02:07:33.861821: step: 816/470, loss: 0.7324924468994141 2023-01-24 02:07:34.596661: step: 818/470, loss: 1.24992835521698 2023-01-24 02:07:35.351236: step: 820/470, loss: 0.47944867610931396 2023-01-24 02:07:36.099998: step: 822/470, loss: 3.812171459197998 2023-01-24 02:07:36.881261: step: 824/470, loss: 1.0839214324951172 2023-01-24 02:07:37.604192: step: 826/470, loss: 0.307800829410553 2023-01-24 02:07:38.348726: step: 828/470, loss: 0.6476705074310303 2023-01-24 02:07:39.208133: step: 830/470, loss: 0.3994646668434143 2023-01-24 02:07:39.982835: step: 832/470, loss: 1.0658934116363525 2023-01-24 02:07:40.726254: step: 834/470, loss: 0.22010746598243713 2023-01-24 02:07:41.472831: step: 836/470, loss: 0.7103077173233032 2023-01-24 02:07:42.355592: step: 838/470, loss: 0.9640560150146484 2023-01-24 02:07:43.068653: step: 840/470, loss: 1.359990119934082 2023-01-24 02:07:43.927417: step: 842/470, loss: 1.2544822692871094 2023-01-24 02:07:44.772404: step: 844/470, loss: 1.0046195983886719 2023-01-24 02:07:45.448839: step: 846/470, loss: 1.0893107652664185 2023-01-24 02:07:46.174153: step: 848/470, loss: 0.3106290102005005 2023-01-24 02:07:46.953051: step: 850/470, loss: 3.447446346282959 2023-01-24 02:07:47.649154: step: 852/470, loss: 1.2170885801315308 2023-01-24 02:07:48.342978: step: 854/470, loss: 0.2806965112686157 2023-01-24 02:07:49.174642: step: 856/470, loss: 4.124128341674805 2023-01-24 02:07:49.941288: step: 858/470, loss: 1.0237302780151367 2023-01-24 02:07:50.677186: step: 860/470, loss: 1.629802942276001 2023-01-24 02:07:51.424949: step: 862/470, loss: 6.174650192260742 2023-01-24 02:07:52.200257: step: 864/470, loss: 0.9307006001472473 2023-01-24 02:07:52.974274: step: 866/470, loss: 0.4283657371997833 2023-01-24 02:07:53.779507: step: 868/470, loss: 2.484179973602295 2023-01-24 02:07:54.454071: step: 870/470, loss: 0.7242858409881592 2023-01-24 02:07:55.163767: step: 872/470, loss: 0.5142600536346436 2023-01-24 02:07:55.894696: step: 874/470, loss: 0.43242359161376953 2023-01-24 02:07:56.581276: step: 876/470, loss: 1.1232759952545166 2023-01-24 02:07:57.280302: step: 878/470, loss: 0.9467688798904419 2023-01-24 02:07:57.998618: step: 880/470, loss: 0.1361820548772812 2023-01-24 02:07:58.811335: step: 882/470, loss: 2.624424934387207 2023-01-24 02:07:59.473644: step: 884/470, loss: 1.0939950942993164 2023-01-24 02:08:00.195602: step: 886/470, loss: 0.4590575098991394 2023-01-24 02:08:00.912415: step: 888/470, loss: 0.7806578874588013 2023-01-24 02:08:01.725246: step: 890/470, loss: 0.7640312910079956 2023-01-24 02:08:02.473558: step: 892/470, loss: 7.197998046875 2023-01-24 02:08:03.203110: step: 894/470, loss: 0.49443161487579346 2023-01-24 02:08:03.932323: step: 896/470, loss: 0.8511555790901184 2023-01-24 02:08:04.584430: step: 898/470, loss: 2.684095621109009 2023-01-24 02:08:05.319532: step: 900/470, loss: 0.8094006776809692 2023-01-24 02:08:05.978993: step: 902/470, loss: 0.8172462582588196 2023-01-24 02:08:06.772235: step: 904/470, loss: 3.9463624954223633 2023-01-24 02:08:07.464688: step: 906/470, loss: 2.681535243988037 2023-01-24 02:08:08.194920: step: 908/470, loss: 1.012972116470337 2023-01-24 02:08:08.948470: step: 910/470, loss: 2.3120675086975098 2023-01-24 02:08:09.716026: step: 912/470, loss: 1.2887799739837646 2023-01-24 02:08:10.506138: step: 914/470, loss: 0.8099074959754944 2023-01-24 02:08:11.283868: step: 916/470, loss: 0.5998104810714722 2023-01-24 02:08:11.959284: step: 918/470, loss: 1.7960407733917236 2023-01-24 02:08:12.676598: step: 920/470, loss: 0.6787109971046448 2023-01-24 02:08:13.404010: step: 922/470, loss: 1.4123334884643555 2023-01-24 02:08:14.068421: step: 924/470, loss: 0.24107448756694794 2023-01-24 02:08:14.772474: step: 926/470, loss: 0.34054338932037354 2023-01-24 02:08:15.489100: step: 928/470, loss: 0.6413167119026184 2023-01-24 02:08:16.253180: step: 930/470, loss: 0.6883243322372437 2023-01-24 02:08:16.943210: step: 932/470, loss: 0.3803039491176605 2023-01-24 02:08:17.630577: step: 934/470, loss: 0.22277651727199554 2023-01-24 02:08:18.376801: step: 936/470, loss: 1.7990632057189941 2023-01-24 02:08:19.280655: step: 938/470, loss: 0.1533496081829071 2023-01-24 02:08:20.002816: step: 940/470, loss: 0.6035867929458618 2023-01-24 02:08:20.724206: step: 942/470, loss: 0.27842485904693604 ================================================== Loss: 1.112 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34952429203074004, 'r': 0.23213188275286342, 'f1': 0.2789817610279567}, 'combined': 0.2055655081258628, 'epoch': 4} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36691349274432955, 'r': 0.2527156333311088, 'f1': 0.29929121080510157}, 'combined': 0.19952747387006767, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3340823419483803, 'r': 0.2117334007794289, 'f1': 0.2591951270865483}, 'combined': 0.190985883116404, 'epoch': 4} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.37829910920569393, 'r': 0.25401640474042275, 'f1': 0.3039437670514713}, 'combined': 0.20262917803431416, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3117917847025496, 'r': 0.2088472485768501, 'f1': 0.25014204545454544}, 'combined': 0.18431519138755978, 'epoch': 4} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36508771056516787, 'r': 0.257069444615051, 'f1': 0.301701569159265}, 'combined': 0.20113437943950996, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2962962962962963, 'r': 0.22857142857142856, 'f1': 0.25806451612903225}, 'combined': 0.17204301075268816, 'epoch': 4} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5961538461538461, 'r': 0.33695652173913043, 'f1': 0.4305555555555555}, 'combined': 0.287037037037037, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2857142857142857, 'r': 0.06896551724137931, 'f1': 0.1111111111111111}, 'combined': 0.07407407407407407, 'epoch': 4} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3213942479544022, 'r': 0.26284804718851484, 'f1': 0.2891877262387209}, 'combined': 0.21308569301800487, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35411825897044286, 'r': 0.2415215791248938, 'f1': 0.28717757152371726}, 'combined': 0.1914517143491448, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.2785714285714286, 'f1': 0.319672131147541}, 'combined': 0.21311475409836067, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3340823419483803, 'r': 0.2117334007794289, 'f1': 0.2591951270865483}, 'combined': 0.190985883116404, 'epoch': 4} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.37829910920569393, 'r': 0.25401640474042275, 'f1': 0.3039437670514713}, 'combined': 0.20262917803431416, 'epoch': 4} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5961538461538461, 'r': 0.33695652173913043, 'f1': 0.4305555555555555}, 'combined': 0.287037037037037, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32384002770083103, 'r': 0.22099480151228734, 'f1': 0.26271067415730337}, 'combined': 0.1935762862211709, 'epoch': 3} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.40791775483075643, 'r': 0.2294048026207517, 'f1': 0.29366069328110966}, 'combined': 0.19577379552073973, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.575, 'r': 0.19827586206896552, 'f1': 0.2948717948717949}, 'combined': 0.19658119658119658, 'epoch': 3} ****************************** Epoch: 5 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:10:57.555412: step: 2/470, loss: 0.22235730290412903 2023-01-24 02:10:58.278015: step: 4/470, loss: 0.21198546886444092 2023-01-24 02:10:59.068505: step: 6/470, loss: 0.49766987562179565 2023-01-24 02:10:59.799000: step: 8/470, loss: 0.3908613622188568 2023-01-24 02:11:00.585629: step: 10/470, loss: 0.16270841658115387 2023-01-24 02:11:01.412445: step: 12/470, loss: 0.20050539076328278 2023-01-24 02:11:02.096142: step: 14/470, loss: 0.7683029174804688 2023-01-24 02:11:02.866412: step: 16/470, loss: 0.3124810755252838 2023-01-24 02:11:03.645814: step: 18/470, loss: 1.0326603651046753 2023-01-24 02:11:04.325669: step: 20/470, loss: 0.4175031781196594 2023-01-24 02:11:05.065662: step: 22/470, loss: 0.8004642724990845 2023-01-24 02:11:05.868311: step: 24/470, loss: 0.31021180748939514 2023-01-24 02:11:06.575847: step: 26/470, loss: 0.43700122833251953 2023-01-24 02:11:07.390837: step: 28/470, loss: 0.9346105456352234 2023-01-24 02:11:08.140922: step: 30/470, loss: 0.3246211111545563 2023-01-24 02:11:08.844124: step: 32/470, loss: 0.7316885590553284 2023-01-24 02:11:09.618781: step: 34/470, loss: 0.9466471076011658 2023-01-24 02:11:10.358778: step: 36/470, loss: 0.294159471988678 2023-01-24 02:11:11.134207: step: 38/470, loss: 2.3637757301330566 2023-01-24 02:11:11.879749: step: 40/470, loss: 0.3901894688606262 2023-01-24 02:11:12.678032: step: 42/470, loss: 0.89076167345047 2023-01-24 02:11:13.403111: step: 44/470, loss: 1.0623388290405273 2023-01-24 02:11:14.189939: step: 46/470, loss: 0.1770210862159729 2023-01-24 02:11:14.959645: step: 48/470, loss: 0.3942860960960388 2023-01-24 02:11:15.654635: step: 50/470, loss: 0.8177869915962219 2023-01-24 02:11:16.351413: step: 52/470, loss: 1.5779149532318115 2023-01-24 02:11:17.145770: step: 54/470, loss: 1.26964271068573 2023-01-24 02:11:18.001944: step: 56/470, loss: 1.0765001773834229 2023-01-24 02:11:18.746536: step: 58/470, loss: 0.8852806091308594 2023-01-24 02:11:19.459001: step: 60/470, loss: 0.265352338552475 2023-01-24 02:11:20.132944: step: 62/470, loss: 0.4972139298915863 2023-01-24 02:11:20.832793: step: 64/470, loss: 0.9439274668693542 2023-01-24 02:11:21.566418: step: 66/470, loss: 1.225905179977417 2023-01-24 02:11:22.326299: step: 68/470, loss: 0.6210903525352478 2023-01-24 02:11:23.091453: step: 70/470, loss: 0.33965161442756653 2023-01-24 02:11:23.833183: step: 72/470, loss: 0.17134492099285126 2023-01-24 02:11:24.521748: step: 74/470, loss: 0.3916568458080292 2023-01-24 02:11:25.251510: step: 76/470, loss: 0.5033536553382874 2023-01-24 02:11:26.042529: step: 78/470, loss: 0.39052829146385193 2023-01-24 02:11:26.832499: step: 80/470, loss: 0.5496810078620911 2023-01-24 02:11:27.686029: step: 82/470, loss: 0.7038556337356567 2023-01-24 02:11:28.411093: step: 84/470, loss: 2.2712252140045166 2023-01-24 02:11:29.111071: step: 86/470, loss: 0.3298700749874115 2023-01-24 02:11:29.908556: step: 88/470, loss: 0.6470965147018433 2023-01-24 02:11:30.701839: step: 90/470, loss: 0.3787349760532379 2023-01-24 02:11:31.466935: step: 92/470, loss: 0.6228494644165039 2023-01-24 02:11:32.218939: step: 94/470, loss: 0.611712634563446 2023-01-24 02:11:32.909050: step: 96/470, loss: 0.8371176719665527 2023-01-24 02:11:33.612746: step: 98/470, loss: 0.6518052220344543 2023-01-24 02:11:34.322529: step: 100/470, loss: 0.47128623723983765 2023-01-24 02:11:35.020008: step: 102/470, loss: 0.8628720045089722 2023-01-24 02:11:35.794268: step: 104/470, loss: 0.4808705747127533 2023-01-24 02:11:36.558543: step: 106/470, loss: 0.4174078702926636 2023-01-24 02:11:37.388648: step: 108/470, loss: 0.3857043981552124 2023-01-24 02:11:38.100048: step: 110/470, loss: 0.21693837642669678 2023-01-24 02:11:38.892464: step: 112/470, loss: 0.6489458084106445 2023-01-24 02:11:39.589005: step: 114/470, loss: 0.5416321158409119 2023-01-24 02:11:40.417633: step: 116/470, loss: 0.19514138996601105 2023-01-24 02:11:41.115907: step: 118/470, loss: 0.3536219596862793 2023-01-24 02:11:41.948292: step: 120/470, loss: 1.3784006834030151 2023-01-24 02:11:42.693197: step: 122/470, loss: 2.3029212951660156 2023-01-24 02:11:43.454826: step: 124/470, loss: 1.3331398963928223 2023-01-24 02:11:44.215690: step: 126/470, loss: 0.3695890009403229 2023-01-24 02:11:44.903347: step: 128/470, loss: 1.1727420091629028 2023-01-24 02:11:45.646046: step: 130/470, loss: 1.360120415687561 2023-01-24 02:11:46.378087: step: 132/470, loss: 0.36498603224754333 2023-01-24 02:11:47.101870: step: 134/470, loss: 0.5842001438140869 2023-01-24 02:11:47.903511: step: 136/470, loss: 0.5986462235450745 2023-01-24 02:11:48.748111: step: 138/470, loss: 0.48415353894233704 2023-01-24 02:11:49.506184: step: 140/470, loss: 0.2749024033546448 2023-01-24 02:11:50.298485: step: 142/470, loss: 1.0702927112579346 2023-01-24 02:11:51.120281: step: 144/470, loss: 0.2774654030799866 2023-01-24 02:11:51.897883: step: 146/470, loss: 0.38094377517700195 2023-01-24 02:11:52.673638: step: 148/470, loss: 0.20257362723350525 2023-01-24 02:11:53.436147: step: 150/470, loss: 1.267014503479004 2023-01-24 02:11:54.170273: step: 152/470, loss: 1.0668094158172607 2023-01-24 02:11:54.950466: step: 154/470, loss: 1.20293128490448 2023-01-24 02:11:55.690169: step: 156/470, loss: 0.724846363067627 2023-01-24 02:11:56.377796: step: 158/470, loss: 0.979896068572998 2023-01-24 02:11:57.092231: step: 160/470, loss: 0.9010365605354309 2023-01-24 02:11:57.777861: step: 162/470, loss: 0.7949374914169312 2023-01-24 02:11:58.515753: step: 164/470, loss: 0.45920002460479736 2023-01-24 02:11:59.222029: step: 166/470, loss: 2.250549077987671 2023-01-24 02:11:59.983706: step: 168/470, loss: 0.19924406707286835 2023-01-24 02:12:00.789914: step: 170/470, loss: 0.7295480966567993 2023-01-24 02:12:01.553425: step: 172/470, loss: 1.2707566022872925 2023-01-24 02:12:02.215626: step: 174/470, loss: 0.5840062499046326 2023-01-24 02:12:02.997322: step: 176/470, loss: 0.5518929362297058 2023-01-24 02:12:03.859940: step: 178/470, loss: 1.6353731155395508 2023-01-24 02:12:04.682494: step: 180/470, loss: 0.9040023684501648 2023-01-24 02:12:05.432834: step: 182/470, loss: 1.9166462421417236 2023-01-24 02:12:06.154047: step: 184/470, loss: 0.33155763149261475 2023-01-24 02:12:06.880746: step: 186/470, loss: 0.1614404320716858 2023-01-24 02:12:07.606339: step: 188/470, loss: 0.4751662313938141 2023-01-24 02:12:08.300434: step: 190/470, loss: 0.3759790360927582 2023-01-24 02:12:09.037529: step: 192/470, loss: 0.6819899678230286 2023-01-24 02:12:09.819585: step: 194/470, loss: 2.1987533569335938 2023-01-24 02:12:10.579801: step: 196/470, loss: 0.6992949843406677 2023-01-24 02:12:11.369885: step: 198/470, loss: 1.0487316846847534 2023-01-24 02:12:12.101394: step: 200/470, loss: 0.5224685072898865 2023-01-24 02:12:12.829985: step: 202/470, loss: 0.9172898530960083 2023-01-24 02:12:13.605165: step: 204/470, loss: 0.6186913847923279 2023-01-24 02:12:14.366908: step: 206/470, loss: 3.2034780979156494 2023-01-24 02:12:15.145751: step: 208/470, loss: 0.720175564289093 2023-01-24 02:12:15.930248: step: 210/470, loss: 2.231804847717285 2023-01-24 02:12:16.649679: step: 212/470, loss: 0.12492244690656662 2023-01-24 02:12:17.496716: step: 214/470, loss: 0.5483446717262268 2023-01-24 02:12:18.275046: step: 216/470, loss: 1.2820593118667603 2023-01-24 02:12:19.025283: step: 218/470, loss: 0.47009074687957764 2023-01-24 02:12:19.761745: step: 220/470, loss: 0.3459213972091675 2023-01-24 02:12:20.454564: step: 222/470, loss: 0.20750129222869873 2023-01-24 02:12:21.221844: step: 224/470, loss: 0.8360021114349365 2023-01-24 02:12:22.025084: step: 226/470, loss: 0.3708805739879608 2023-01-24 02:12:22.821688: step: 228/470, loss: 0.7422099709510803 2023-01-24 02:12:23.563241: step: 230/470, loss: 0.4704272449016571 2023-01-24 02:12:24.263309: step: 232/470, loss: 0.7870251536369324 2023-01-24 02:12:25.091053: step: 234/470, loss: 0.8178002238273621 2023-01-24 02:12:25.730956: step: 236/470, loss: 0.5741672515869141 2023-01-24 02:12:26.455335: step: 238/470, loss: 0.3935246467590332 2023-01-24 02:12:27.122328: step: 240/470, loss: 0.7990236878395081 2023-01-24 02:12:27.897025: step: 242/470, loss: 1.1086221933364868 2023-01-24 02:12:28.563701: step: 244/470, loss: 1.578269124031067 2023-01-24 02:12:29.310981: step: 246/470, loss: 1.1762317419052124 2023-01-24 02:12:29.986002: step: 248/470, loss: 0.1974961757659912 2023-01-24 02:12:30.771656: step: 250/470, loss: 1.7104171514511108 2023-01-24 02:12:31.546456: step: 252/470, loss: 0.31335341930389404 2023-01-24 02:12:32.317698: step: 254/470, loss: 0.48065751791000366 2023-01-24 02:12:33.007282: step: 256/470, loss: 0.8966016173362732 2023-01-24 02:12:33.756943: step: 258/470, loss: 0.5578399896621704 2023-01-24 02:12:34.480325: step: 260/470, loss: 2.122208833694458 2023-01-24 02:12:35.209377: step: 262/470, loss: 1.1785131692886353 2023-01-24 02:12:35.973192: step: 264/470, loss: 0.5294274091720581 2023-01-24 02:12:36.743898: step: 266/470, loss: 0.9701927900314331 2023-01-24 02:12:37.477047: step: 268/470, loss: 0.23452767729759216 2023-01-24 02:12:38.189540: step: 270/470, loss: 0.6140098571777344 2023-01-24 02:12:38.918837: step: 272/470, loss: 0.4548776149749756 2023-01-24 02:12:39.699152: step: 274/470, loss: 0.9099631309509277 2023-01-24 02:12:40.494293: step: 276/470, loss: 0.5374931693077087 2023-01-24 02:12:41.260006: step: 278/470, loss: 1.1470648050308228 2023-01-24 02:12:42.001313: step: 280/470, loss: 0.719149649143219 2023-01-24 02:12:42.756532: step: 282/470, loss: 1.6352453231811523 2023-01-24 02:12:43.571226: step: 284/470, loss: 0.3871826231479645 2023-01-24 02:12:44.335938: step: 286/470, loss: 0.7986884117126465 2023-01-24 02:12:45.032872: step: 288/470, loss: 1.6879855394363403 2023-01-24 02:12:45.829022: step: 290/470, loss: 0.24217256903648376 2023-01-24 02:12:46.557080: step: 292/470, loss: 0.7460265755653381 2023-01-24 02:12:47.348385: step: 294/470, loss: 2.9903945922851562 2023-01-24 02:12:48.098746: step: 296/470, loss: 0.8292760848999023 2023-01-24 02:12:48.808578: step: 298/470, loss: 1.7127366065979004 2023-01-24 02:12:49.536088: step: 300/470, loss: 1.0309264659881592 2023-01-24 02:12:50.272970: step: 302/470, loss: 0.19080205261707306 2023-01-24 02:12:51.043133: step: 304/470, loss: 0.7009590864181519 2023-01-24 02:12:51.871569: step: 306/470, loss: 0.6426486968994141 2023-01-24 02:12:52.609083: step: 308/470, loss: 0.7753332853317261 2023-01-24 02:12:53.293659: step: 310/470, loss: 0.6057764887809753 2023-01-24 02:12:54.021510: step: 312/470, loss: 0.5348176956176758 2023-01-24 02:12:54.741013: step: 314/470, loss: 0.1700417399406433 2023-01-24 02:12:55.487851: step: 316/470, loss: 1.0512360334396362 2023-01-24 02:12:56.261573: step: 318/470, loss: 2.0664336681365967 2023-01-24 02:12:56.979476: step: 320/470, loss: 1.663642168045044 2023-01-24 02:12:57.746057: step: 322/470, loss: 0.18958774209022522 2023-01-24 02:12:58.523439: step: 324/470, loss: 0.4568879008293152 2023-01-24 02:12:59.140734: step: 326/470, loss: 1.1089987754821777 2023-01-24 02:12:59.934409: step: 328/470, loss: 0.6072306632995605 2023-01-24 02:13:00.682700: step: 330/470, loss: 0.9218146800994873 2023-01-24 02:13:01.461007: step: 332/470, loss: 0.6980925798416138 2023-01-24 02:13:02.320146: step: 334/470, loss: 0.33780360221862793 2023-01-24 02:13:03.062418: step: 336/470, loss: 0.2144942283630371 2023-01-24 02:13:03.709241: step: 338/470, loss: 0.5171912908554077 2023-01-24 02:13:04.461629: step: 340/470, loss: 0.2723813056945801 2023-01-24 02:13:05.120476: step: 342/470, loss: 1.158375859260559 2023-01-24 02:13:05.840002: step: 344/470, loss: 1.1009371280670166 2023-01-24 02:13:06.606792: step: 346/470, loss: 0.8941398859024048 2023-01-24 02:13:07.448105: step: 348/470, loss: 0.8668206334114075 2023-01-24 02:13:08.305710: step: 350/470, loss: 0.43403181433677673 2023-01-24 02:13:09.039213: step: 352/470, loss: 0.3821115791797638 2023-01-24 02:13:09.793831: step: 354/470, loss: 0.5914931893348694 2023-01-24 02:13:10.539922: step: 356/470, loss: 0.8853386640548706 2023-01-24 02:13:11.302967: step: 358/470, loss: 1.1184029579162598 2023-01-24 02:13:12.081427: step: 360/470, loss: 0.33806416392326355 2023-01-24 02:13:12.839479: step: 362/470, loss: 0.4669269323348999 2023-01-24 02:13:13.587977: step: 364/470, loss: 1.3949357271194458 2023-01-24 02:13:14.253983: step: 366/470, loss: 0.25456702709198 2023-01-24 02:13:15.012783: step: 368/470, loss: 0.23126432299613953 2023-01-24 02:13:15.672086: step: 370/470, loss: 0.48816195130348206 2023-01-24 02:13:16.476203: step: 372/470, loss: 0.9824779629707336 2023-01-24 02:13:17.377033: step: 374/470, loss: 0.9794360399246216 2023-01-24 02:13:18.130202: step: 376/470, loss: 0.8073421120643616 2023-01-24 02:13:18.801703: step: 378/470, loss: 1.1331653594970703 2023-01-24 02:13:19.500647: step: 380/470, loss: 0.23539990186691284 2023-01-24 02:13:20.218617: step: 382/470, loss: 1.2172232866287231 2023-01-24 02:13:20.892311: step: 384/470, loss: 0.4674474000930786 2023-01-24 02:13:21.641009: step: 386/470, loss: 1.1901929378509521 2023-01-24 02:13:22.385670: step: 388/470, loss: 0.27445924282073975 2023-01-24 02:13:23.156555: step: 390/470, loss: 0.4656749665737152 2023-01-24 02:13:23.946981: step: 392/470, loss: 0.29722970724105835 2023-01-24 02:13:24.672982: step: 394/470, loss: 0.16318340599536896 2023-01-24 02:13:25.330995: step: 396/470, loss: 0.7538102865219116 2023-01-24 02:13:26.051896: step: 398/470, loss: 0.34675395488739014 2023-01-24 02:13:26.815188: step: 400/470, loss: 1.1929813623428345 2023-01-24 02:13:27.520190: step: 402/470, loss: 0.44265463948249817 2023-01-24 02:13:28.287489: step: 404/470, loss: 8.768211364746094 2023-01-24 02:13:29.102143: step: 406/470, loss: 0.8950780034065247 2023-01-24 02:13:29.893399: step: 408/470, loss: 0.7870341539382935 2023-01-24 02:13:30.578868: step: 410/470, loss: 2.6422438621520996 2023-01-24 02:13:31.296003: step: 412/470, loss: 1.0197150707244873 2023-01-24 02:13:32.168548: step: 414/470, loss: 1.3697253465652466 2023-01-24 02:13:32.939357: step: 416/470, loss: 1.2400399446487427 2023-01-24 02:13:33.735082: step: 418/470, loss: 0.4381885826587677 2023-01-24 02:13:34.490857: step: 420/470, loss: 0.4691491425037384 2023-01-24 02:13:35.221479: step: 422/470, loss: 0.3527713418006897 2023-01-24 02:13:35.925633: step: 424/470, loss: 0.6151089072227478 2023-01-24 02:13:36.728734: step: 426/470, loss: 0.35179460048675537 2023-01-24 02:13:37.396015: step: 428/470, loss: 0.31551289558410645 2023-01-24 02:13:38.093538: step: 430/470, loss: 0.3387365937232971 2023-01-24 02:13:38.773789: step: 432/470, loss: 0.9518783092498779 2023-01-24 02:13:39.437248: step: 434/470, loss: 0.33223646879196167 2023-01-24 02:13:40.218826: step: 436/470, loss: 0.948758065700531 2023-01-24 02:13:41.077626: step: 438/470, loss: 0.7131083607673645 2023-01-24 02:13:41.775796: step: 440/470, loss: 0.22820569574832916 2023-01-24 02:13:42.548952: step: 442/470, loss: 1.1058170795440674 2023-01-24 02:13:43.277014: step: 444/470, loss: 0.5581318140029907 2023-01-24 02:13:44.041738: step: 446/470, loss: 0.977337658405304 2023-01-24 02:13:44.838888: step: 448/470, loss: 0.9387962222099304 2023-01-24 02:13:45.571353: step: 450/470, loss: 0.45336490869522095 2023-01-24 02:13:46.257683: step: 452/470, loss: 0.7090914249420166 2023-01-24 02:13:46.987575: step: 454/470, loss: 1.7675325870513916 2023-01-24 02:13:47.810584: step: 456/470, loss: 0.7944380044937134 2023-01-24 02:13:48.520292: step: 458/470, loss: 0.595599353313446 2023-01-24 02:13:49.293477: step: 460/470, loss: 0.7079035043716431 2023-01-24 02:13:50.016276: step: 462/470, loss: 1.0472602844238281 2023-01-24 02:13:50.768072: step: 464/470, loss: 0.6768998503684998 2023-01-24 02:13:51.548857: step: 466/470, loss: 0.6595719456672668 2023-01-24 02:13:52.329574: step: 468/470, loss: 1.1421606540679932 2023-01-24 02:13:53.125637: step: 470/470, loss: 0.4721830487251282 2023-01-24 02:13:53.836221: step: 472/470, loss: 0.27509674429893494 2023-01-24 02:13:54.589665: step: 474/470, loss: 2.248384714126587 2023-01-24 02:13:55.307016: step: 476/470, loss: 0.5151785016059875 2023-01-24 02:13:56.095937: step: 478/470, loss: 1.4433629512786865 2023-01-24 02:13:56.799156: step: 480/470, loss: 0.573886513710022 2023-01-24 02:13:57.505670: step: 482/470, loss: 0.4885188937187195 2023-01-24 02:13:58.210464: step: 484/470, loss: 0.9017962217330933 2023-01-24 02:13:58.980310: step: 486/470, loss: 0.47882330417633057 2023-01-24 02:13:59.731439: step: 488/470, loss: 0.43378081917762756 2023-01-24 02:14:00.598065: step: 490/470, loss: 1.083345890045166 2023-01-24 02:14:01.317561: step: 492/470, loss: 0.5740756392478943 2023-01-24 02:14:02.066384: step: 494/470, loss: 0.7037774324417114 2023-01-24 02:14:02.809487: step: 496/470, loss: 1.5782699584960938 2023-01-24 02:14:03.533373: step: 498/470, loss: 1.1951762437820435 2023-01-24 02:14:04.205918: step: 500/470, loss: 3.3578271865844727 2023-01-24 02:14:04.913758: step: 502/470, loss: 0.5678948760032654 2023-01-24 02:14:05.562590: step: 504/470, loss: 0.7719627022743225 2023-01-24 02:14:06.271337: step: 506/470, loss: 0.2801421284675598 2023-01-24 02:14:06.970602: step: 508/470, loss: 0.18048332631587982 2023-01-24 02:14:07.649170: step: 510/470, loss: 1.8545786142349243 2023-01-24 02:14:08.407673: step: 512/470, loss: 1.0798882246017456 2023-01-24 02:14:09.174623: step: 514/470, loss: 1.414536952972412 2023-01-24 02:14:09.930007: step: 516/470, loss: 0.5196714997291565 2023-01-24 02:14:10.688742: step: 518/470, loss: 0.41817137598991394 2023-01-24 02:14:11.442303: step: 520/470, loss: 0.5657111406326294 2023-01-24 02:14:12.196164: step: 522/470, loss: 0.34673720598220825 2023-01-24 02:14:12.914732: step: 524/470, loss: 0.7128427028656006 2023-01-24 02:14:13.637103: step: 526/470, loss: 0.45498931407928467 2023-01-24 02:14:14.367661: step: 528/470, loss: 0.45157119631767273 2023-01-24 02:14:15.055617: step: 530/470, loss: 0.4576375186443329 2023-01-24 02:14:15.844453: step: 532/470, loss: 0.33187219500541687 2023-01-24 02:14:16.632340: step: 534/470, loss: 0.35542771220207214 2023-01-24 02:14:17.365672: step: 536/470, loss: 0.6708710193634033 2023-01-24 02:14:18.145228: step: 538/470, loss: 0.8843319416046143 2023-01-24 02:14:18.795843: step: 540/470, loss: 0.8301752805709839 2023-01-24 02:14:19.582929: step: 542/470, loss: 0.2672118842601776 2023-01-24 02:14:20.405019: step: 544/470, loss: 0.3918260335922241 2023-01-24 02:14:21.240178: step: 546/470, loss: 0.4030534029006958 2023-01-24 02:14:22.078526: step: 548/470, loss: 3.20204496383667 2023-01-24 02:14:22.810342: step: 550/470, loss: 2.8569650650024414 2023-01-24 02:14:23.473184: step: 552/470, loss: 0.45698219537734985 2023-01-24 02:14:24.186304: step: 554/470, loss: 0.2088402956724167 2023-01-24 02:14:25.061042: step: 556/470, loss: 0.5278826951980591 2023-01-24 02:14:25.727365: step: 558/470, loss: 0.3262355327606201 2023-01-24 02:14:26.456974: step: 560/470, loss: 0.48174381256103516 2023-01-24 02:14:27.273672: step: 562/470, loss: 0.8578274250030518 2023-01-24 02:14:28.000509: step: 564/470, loss: 2.5235321521759033 2023-01-24 02:14:28.770380: step: 566/470, loss: 0.13515417277812958 2023-01-24 02:14:29.569343: step: 568/470, loss: 0.5105624794960022 2023-01-24 02:14:30.332792: step: 570/470, loss: 1.3545113801956177 2023-01-24 02:14:31.036056: step: 572/470, loss: 0.43925705552101135 2023-01-24 02:14:31.752316: step: 574/470, loss: 0.8375481367111206 2023-01-24 02:14:32.472841: step: 576/470, loss: 1.22329580783844 2023-01-24 02:14:33.202495: step: 578/470, loss: 1.2642005681991577 2023-01-24 02:14:33.953392: step: 580/470, loss: 0.7364957332611084 2023-01-24 02:14:34.733765: step: 582/470, loss: 12.388585090637207 2023-01-24 02:14:35.451846: step: 584/470, loss: 0.9263208508491516 2023-01-24 02:14:36.183762: step: 586/470, loss: 0.48008114099502563 2023-01-24 02:14:36.886320: step: 588/470, loss: 5.323960781097412 2023-01-24 02:14:37.568932: step: 590/470, loss: 0.3100872337818146 2023-01-24 02:14:38.395375: step: 592/470, loss: 0.39643803238868713 2023-01-24 02:14:39.099311: step: 594/470, loss: 0.5192726254463196 2023-01-24 02:14:39.834706: step: 596/470, loss: 0.08385767787694931 2023-01-24 02:14:40.627977: step: 598/470, loss: 0.23493848741054535 2023-01-24 02:14:41.385699: step: 600/470, loss: 0.8058767914772034 2023-01-24 02:14:42.124221: step: 602/470, loss: 0.7003133296966553 2023-01-24 02:14:42.862395: step: 604/470, loss: 0.3727822005748749 2023-01-24 02:14:43.620353: step: 606/470, loss: 1.2153240442276 2023-01-24 02:14:44.362801: step: 608/470, loss: 1.3041852712631226 2023-01-24 02:14:45.098682: step: 610/470, loss: 1.750708818435669 2023-01-24 02:14:45.836788: step: 612/470, loss: 2.3848016262054443 2023-01-24 02:14:46.685675: step: 614/470, loss: 0.21485082805156708 2023-01-24 02:14:47.430646: step: 616/470, loss: 1.353153109550476 2023-01-24 02:14:48.180948: step: 618/470, loss: 0.9749501347541809 2023-01-24 02:14:49.039728: step: 620/470, loss: 1.2679924964904785 2023-01-24 02:14:49.764496: step: 622/470, loss: 0.6863827705383301 2023-01-24 02:14:50.529243: step: 624/470, loss: 0.29089784622192383 2023-01-24 02:14:51.263448: step: 626/470, loss: 0.26793599128723145 2023-01-24 02:14:52.166196: step: 628/470, loss: 0.4520954489707947 2023-01-24 02:14:52.862744: step: 630/470, loss: 0.6169873476028442 2023-01-24 02:14:53.708804: step: 632/470, loss: 0.570949375629425 2023-01-24 02:14:54.423997: step: 634/470, loss: 0.278425008058548 2023-01-24 02:14:55.218500: step: 636/470, loss: 1.7957154512405396 2023-01-24 02:14:55.983453: step: 638/470, loss: 0.29361969232559204 2023-01-24 02:14:56.847943: step: 640/470, loss: 0.5023148059844971 2023-01-24 02:14:57.542273: step: 642/470, loss: 0.48273128271102905 2023-01-24 02:14:58.284692: step: 644/470, loss: 1.2374017238616943 2023-01-24 02:14:59.060485: step: 646/470, loss: 0.22361309826374054 2023-01-24 02:14:59.804196: step: 648/470, loss: 0.7312672734260559 2023-01-24 02:15:00.574799: step: 650/470, loss: 0.360087126493454 2023-01-24 02:15:01.258780: step: 652/470, loss: 1.0528044700622559 2023-01-24 02:15:01.990172: step: 654/470, loss: 1.5701942443847656 2023-01-24 02:15:02.691991: step: 656/470, loss: 0.09973641484975815 2023-01-24 02:15:03.393308: step: 658/470, loss: 0.39809560775756836 2023-01-24 02:15:04.127096: step: 660/470, loss: 0.2803775668144226 2023-01-24 02:15:05.014813: step: 662/470, loss: 0.5510927438735962 2023-01-24 02:15:05.695478: step: 664/470, loss: 0.28761452436447144 2023-01-24 02:15:06.365392: step: 666/470, loss: 1.0456538200378418 2023-01-24 02:15:07.116895: step: 668/470, loss: 1.3108751773834229 2023-01-24 02:15:07.839555: step: 670/470, loss: 0.3738193213939667 2023-01-24 02:15:08.633172: step: 672/470, loss: 0.9928417205810547 2023-01-24 02:15:09.351874: step: 674/470, loss: 1.9106264114379883 2023-01-24 02:15:10.074251: step: 676/470, loss: 1.6555376052856445 2023-01-24 02:15:10.727767: step: 678/470, loss: 0.9879150390625 2023-01-24 02:15:11.381106: step: 680/470, loss: 1.1278126239776611 2023-01-24 02:15:12.147722: step: 682/470, loss: 0.7462809085845947 2023-01-24 02:15:12.815337: step: 684/470, loss: 0.2949332594871521 2023-01-24 02:15:13.507934: step: 686/470, loss: 0.5867691040039062 2023-01-24 02:15:14.271626: step: 688/470, loss: 0.783571720123291 2023-01-24 02:15:14.982279: step: 690/470, loss: 0.28259211778640747 2023-01-24 02:15:15.823479: step: 692/470, loss: 2.536674976348877 2023-01-24 02:15:16.572340: step: 694/470, loss: 0.3623759150505066 2023-01-24 02:15:17.251482: step: 696/470, loss: 0.904730498790741 2023-01-24 02:15:17.934440: step: 698/470, loss: 0.15288175642490387 2023-01-24 02:15:18.684787: step: 700/470, loss: 0.5689459443092346 2023-01-24 02:15:19.412300: step: 702/470, loss: 0.19390466809272766 2023-01-24 02:15:20.160575: step: 704/470, loss: 5.31660270690918 2023-01-24 02:15:20.898833: step: 706/470, loss: 3.0160305500030518 2023-01-24 02:15:21.608261: step: 708/470, loss: 0.5570680499076843 2023-01-24 02:15:22.327442: step: 710/470, loss: 0.6179437637329102 2023-01-24 02:15:23.072356: step: 712/470, loss: 0.9332218170166016 2023-01-24 02:15:23.774945: step: 714/470, loss: 0.5039190649986267 2023-01-24 02:15:24.501532: step: 716/470, loss: 2.8622703552246094 2023-01-24 02:15:25.363918: step: 718/470, loss: 1.5712451934814453 2023-01-24 02:15:26.094867: step: 720/470, loss: 0.47472241520881653 2023-01-24 02:15:26.850389: step: 722/470, loss: 0.4161088764667511 2023-01-24 02:15:27.618977: step: 724/470, loss: 0.408611923456192 2023-01-24 02:15:28.456641: step: 726/470, loss: 0.4773179292678833 2023-01-24 02:15:29.283466: step: 728/470, loss: 0.8895754814147949 2023-01-24 02:15:30.067857: step: 730/470, loss: 0.5631923675537109 2023-01-24 02:15:30.820792: step: 732/470, loss: 2.1085853576660156 2023-01-24 02:15:31.614187: step: 734/470, loss: 0.3119111657142639 2023-01-24 02:15:32.404606: step: 736/470, loss: 1.9866414070129395 2023-01-24 02:15:33.174266: step: 738/470, loss: 4.040884494781494 2023-01-24 02:15:33.878438: step: 740/470, loss: 1.7604376077651978 2023-01-24 02:15:34.728781: step: 742/470, loss: 1.824992060661316 2023-01-24 02:15:35.487979: step: 744/470, loss: 1.3587254285812378 2023-01-24 02:15:36.226466: step: 746/470, loss: 0.6129835844039917 2023-01-24 02:15:36.956451: step: 748/470, loss: 0.756197988986969 2023-01-24 02:15:37.743711: step: 750/470, loss: 1.2754197120666504 2023-01-24 02:15:38.624724: step: 752/470, loss: 2.0039122104644775 2023-01-24 02:15:39.372926: step: 754/470, loss: 0.694054365158081 2023-01-24 02:15:40.192163: step: 756/470, loss: 1.3741402626037598 2023-01-24 02:15:40.910094: step: 758/470, loss: 1.2446845769882202 2023-01-24 02:15:41.615990: step: 760/470, loss: 1.1512802839279175 2023-01-24 02:15:42.400261: step: 762/470, loss: 0.31948322057724 2023-01-24 02:15:43.112531: step: 764/470, loss: 0.9004114866256714 2023-01-24 02:15:43.931953: step: 766/470, loss: 0.7966269254684448 2023-01-24 02:15:44.713910: step: 768/470, loss: 0.5935583114624023 2023-01-24 02:15:45.473483: step: 770/470, loss: 1.2309718132019043 2023-01-24 02:15:46.190392: step: 772/470, loss: 0.7194991707801819 2023-01-24 02:15:46.905823: step: 774/470, loss: 0.9128872156143188 2023-01-24 02:15:47.619782: step: 776/470, loss: 0.262151837348938 2023-01-24 02:15:48.344150: step: 778/470, loss: 0.8558886051177979 2023-01-24 02:15:49.167980: step: 780/470, loss: 0.5099863409996033 2023-01-24 02:15:50.040998: step: 782/470, loss: 0.5506338477134705 2023-01-24 02:15:50.754872: step: 784/470, loss: 1.1489386558532715 2023-01-24 02:15:51.448371: step: 786/470, loss: 0.35874584317207336 2023-01-24 02:15:52.401298: step: 788/470, loss: 0.16319824755191803 2023-01-24 02:15:53.043438: step: 790/470, loss: 2.4045443534851074 2023-01-24 02:15:53.760642: step: 792/470, loss: 0.33701398968696594 2023-01-24 02:15:54.478118: step: 794/470, loss: 0.24340465664863586 2023-01-24 02:15:55.209258: step: 796/470, loss: 0.34215545654296875 2023-01-24 02:15:55.980535: step: 798/470, loss: 0.5952900648117065 2023-01-24 02:15:56.745046: step: 800/470, loss: 0.30067580938339233 2023-01-24 02:15:57.492421: step: 802/470, loss: 0.27011197805404663 2023-01-24 02:15:58.192755: step: 804/470, loss: 0.5309256315231323 2023-01-24 02:15:58.916006: step: 806/470, loss: 0.8290297985076904 2023-01-24 02:15:59.649607: step: 808/470, loss: 0.8590260148048401 2023-01-24 02:16:00.616203: step: 810/470, loss: 0.4753422737121582 2023-01-24 02:16:01.381138: step: 812/470, loss: 1.0361976623535156 2023-01-24 02:16:02.062797: step: 814/470, loss: 0.4226698577404022 2023-01-24 02:16:02.849309: step: 816/470, loss: 0.7795215249061584 2023-01-24 02:16:03.643610: step: 818/470, loss: 0.30003947019577026 2023-01-24 02:16:04.367692: step: 820/470, loss: 0.40043067932128906 2023-01-24 02:16:05.200980: step: 822/470, loss: 1.1841524839401245 2023-01-24 02:16:05.917416: step: 824/470, loss: 0.3702228367328644 2023-01-24 02:16:06.636581: step: 826/470, loss: 0.14006973803043365 2023-01-24 02:16:07.324684: step: 828/470, loss: 2.2968199253082275 2023-01-24 02:16:08.037998: step: 830/470, loss: 0.25178012251853943 2023-01-24 02:16:08.762222: step: 832/470, loss: 3.981785774230957 2023-01-24 02:16:09.455798: step: 834/470, loss: 0.2811685800552368 2023-01-24 02:16:10.198326: step: 836/470, loss: 1.4167596101760864 2023-01-24 02:16:11.017430: step: 838/470, loss: 1.3315188884735107 2023-01-24 02:16:11.715391: step: 840/470, loss: 0.20409701764583588 2023-01-24 02:16:12.424571: step: 842/470, loss: 0.794757604598999 2023-01-24 02:16:13.223148: step: 844/470, loss: 0.4948848485946655 2023-01-24 02:16:13.933453: step: 846/470, loss: 2.682526111602783 2023-01-24 02:16:14.714530: step: 848/470, loss: 0.9555333852767944 2023-01-24 02:16:15.474126: step: 850/470, loss: 1.1651482582092285 2023-01-24 02:16:16.199503: step: 852/470, loss: 0.5566208362579346 2023-01-24 02:16:16.928543: step: 854/470, loss: 0.6673471927642822 2023-01-24 02:16:17.650183: step: 856/470, loss: 0.4028908610343933 2023-01-24 02:16:18.397121: step: 858/470, loss: 1.8048272132873535 2023-01-24 02:16:19.182572: step: 860/470, loss: 2.100435972213745 2023-01-24 02:16:19.880711: step: 862/470, loss: 0.656417965888977 2023-01-24 02:16:20.655259: step: 864/470, loss: 0.5497065186500549 2023-01-24 02:16:21.406754: step: 866/470, loss: 0.4357500970363617 2023-01-24 02:16:22.069114: step: 868/470, loss: 0.40799766778945923 2023-01-24 02:16:22.776150: step: 870/470, loss: 1.0405466556549072 2023-01-24 02:16:23.483296: step: 872/470, loss: 1.2654999494552612 2023-01-24 02:16:24.231888: step: 874/470, loss: 0.4379280209541321 2023-01-24 02:16:25.026472: step: 876/470, loss: 1.0722965002059937 2023-01-24 02:16:25.717820: step: 878/470, loss: 0.22580832242965698 2023-01-24 02:16:26.406178: step: 880/470, loss: 0.5495216846466064 2023-01-24 02:16:27.153662: step: 882/470, loss: 0.23686954379081726 2023-01-24 02:16:27.836306: step: 884/470, loss: 0.6185952425003052 2023-01-24 02:16:28.608225: step: 886/470, loss: 1.4941705465316772 2023-01-24 02:16:29.316287: step: 888/470, loss: 6.853786945343018 2023-01-24 02:16:30.048986: step: 890/470, loss: 0.35840287804603577 2023-01-24 02:16:30.840975: step: 892/470, loss: 0.45041191577911377 2023-01-24 02:16:31.564572: step: 894/470, loss: 0.6162185668945312 2023-01-24 02:16:32.312438: step: 896/470, loss: 0.2759486436843872 2023-01-24 02:16:33.046231: step: 898/470, loss: 0.5360721349716187 2023-01-24 02:16:33.846699: step: 900/470, loss: 0.7364007234573364 2023-01-24 02:16:34.573236: step: 902/470, loss: 1.1799520254135132 2023-01-24 02:16:35.362338: step: 904/470, loss: 0.29465073347091675 2023-01-24 02:16:36.109185: step: 906/470, loss: 0.6081302165985107 2023-01-24 02:16:36.907819: step: 908/470, loss: 1.4914206266403198 2023-01-24 02:16:37.570783: step: 910/470, loss: 0.7673165202140808 2023-01-24 02:16:38.496182: step: 912/470, loss: 0.6707650423049927 2023-01-24 02:16:39.306373: step: 914/470, loss: 0.4942966103553772 2023-01-24 02:16:40.025717: step: 916/470, loss: 1.598168969154358 2023-01-24 02:16:40.822540: step: 918/470, loss: 1.6423366069793701 2023-01-24 02:16:41.687338: step: 920/470, loss: 1.2543729543685913 2023-01-24 02:16:42.409210: step: 922/470, loss: 0.7959638237953186 2023-01-24 02:16:43.148515: step: 924/470, loss: 1.5553340911865234 2023-01-24 02:16:43.955908: step: 926/470, loss: 1.098331093788147 2023-01-24 02:16:44.662858: step: 928/470, loss: 0.5700544118881226 2023-01-24 02:16:45.455506: step: 930/470, loss: 0.5074666738510132 2023-01-24 02:16:46.142788: step: 932/470, loss: 0.383233904838562 2023-01-24 02:16:46.837284: step: 934/470, loss: 1.1787450313568115 2023-01-24 02:16:47.534080: step: 936/470, loss: 5.3494744300842285 2023-01-24 02:16:48.267956: step: 938/470, loss: 1.421501636505127 2023-01-24 02:16:48.999476: step: 940/470, loss: 1.8558794260025024 2023-01-24 02:16:49.706615: step: 942/470, loss: 0.21680720150470734 ================================================== Loss: 0.910 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.302766948414596, 'r': 0.29989439672185786, 'f1': 0.3013238266395026}, 'combined': 0.2220280827870019, 'epoch': 5} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3457029344307223, 'r': 0.31013542098448454, 'f1': 0.326954726633415}, 'combined': 0.21796981775560997, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29238880893387975, 'r': 0.2901695390368484, 'f1': 0.2912749468046078}, 'combined': 0.21462364501392153, 'epoch': 5} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.356603558767513, 'r': 0.31442833018250904, 'f1': 0.3341905604392534}, 'combined': 0.2227937069595022, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2795942277881988, 'r': 0.29285771107985914, 'f1': 0.28607231462295785}, 'combined': 0.21079012656428472, 'epoch': 5} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.34104195191703285, 'r': 0.3167755053383209, 'f1': 0.3284611421254773}, 'combined': 0.21897409475031815, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.35714285714285715, 'f1': 0.2941176470588235}, 'combined': 0.196078431372549, 'epoch': 5} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.2391304347826087, 'f1': 0.3055555555555556}, 'combined': 0.20370370370370372, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.13793103448275862, 'f1': 0.21052631578947367}, 'combined': 0.14035087719298245, 'epoch': 5} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3213942479544022, 'r': 0.26284804718851484, 'f1': 0.2891877262387209}, 'combined': 0.21308569301800487, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35411825897044286, 'r': 0.2415215791248938, 'f1': 0.28717757152371726}, 'combined': 0.1914517143491448, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.2785714285714286, 'f1': 0.319672131147541}, 'combined': 0.21311475409836067, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3340823419483803, 'r': 0.2117334007794289, 'f1': 0.2591951270865483}, 'combined': 0.190985883116404, 'epoch': 4} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.37829910920569393, 'r': 0.25401640474042275, 'f1': 0.3039437670514713}, 'combined': 0.20262917803431416, 'epoch': 4} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5961538461538461, 'r': 0.33695652173913043, 'f1': 0.4305555555555555}, 'combined': 0.287037037037037, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32384002770083103, 'r': 0.22099480151228734, 'f1': 0.26271067415730337}, 'combined': 0.1935762862211709, 'epoch': 3} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.40791775483075643, 'r': 0.2294048026207517, 'f1': 0.29366069328110966}, 'combined': 0.19577379552073973, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.575, 'r': 0.19827586206896552, 'f1': 0.2948717948717949}, 'combined': 0.19658119658119658, 'epoch': 3} ****************************** Epoch: 6 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:19:24.588858: step: 2/470, loss: 0.46913114190101624 2023-01-24 02:19:25.361866: step: 4/470, loss: 0.3887912929058075 2023-01-24 02:19:26.073402: step: 6/470, loss: 0.4061891436576843 2023-01-24 02:19:26.750961: step: 8/470, loss: 0.3760465979576111 2023-01-24 02:19:27.453789: step: 10/470, loss: 0.7916153073310852 2023-01-24 02:19:28.241330: step: 12/470, loss: 0.27006056904792786 2023-01-24 02:19:29.047351: step: 14/470, loss: 0.5665428042411804 2023-01-24 02:19:29.768205: step: 16/470, loss: 1.7454071044921875 2023-01-24 02:19:30.546117: step: 18/470, loss: 0.7772339582443237 2023-01-24 02:19:31.437194: step: 20/470, loss: 0.10461916774511337 2023-01-24 02:19:32.245618: step: 22/470, loss: 0.2878537178039551 2023-01-24 02:19:33.002339: step: 24/470, loss: 0.647771418094635 2023-01-24 02:19:33.751563: step: 26/470, loss: 0.3383640646934509 2023-01-24 02:19:34.486463: step: 28/470, loss: 0.6665865778923035 2023-01-24 02:19:35.256920: step: 30/470, loss: 0.4555095434188843 2023-01-24 02:19:36.021703: step: 32/470, loss: 0.3503432273864746 2023-01-24 02:19:36.732627: step: 34/470, loss: 0.2352353185415268 2023-01-24 02:19:37.410693: step: 36/470, loss: 1.3757781982421875 2023-01-24 02:19:38.097135: step: 38/470, loss: 0.4522511959075928 2023-01-24 02:19:38.913013: step: 40/470, loss: 0.2714459002017975 2023-01-24 02:19:39.645483: step: 42/470, loss: 0.26817435026168823 2023-01-24 02:19:40.388342: step: 44/470, loss: 0.6385064125061035 2023-01-24 02:19:41.178040: step: 46/470, loss: 0.2124578356742859 2023-01-24 02:19:41.957334: step: 48/470, loss: 0.12701259553432465 2023-01-24 02:19:42.703983: step: 50/470, loss: 0.8603786826133728 2023-01-24 02:19:43.529573: step: 52/470, loss: 0.4419752061367035 2023-01-24 02:19:44.263780: step: 54/470, loss: 3.501265287399292 2023-01-24 02:19:44.943349: step: 56/470, loss: 1.0722558498382568 2023-01-24 02:19:45.805165: step: 58/470, loss: 0.18907511234283447 2023-01-24 02:19:46.522168: step: 60/470, loss: 3.298689842224121 2023-01-24 02:19:47.261785: step: 62/470, loss: 0.30768173933029175 2023-01-24 02:19:47.988602: step: 64/470, loss: 0.3563343286514282 2023-01-24 02:19:48.763793: step: 66/470, loss: 0.727003276348114 2023-01-24 02:19:49.509074: step: 68/470, loss: 0.4526892602443695 2023-01-24 02:19:50.282282: step: 70/470, loss: 0.3136780560016632 2023-01-24 02:19:51.040390: step: 72/470, loss: 0.8561743497848511 2023-01-24 02:19:51.831402: step: 74/470, loss: 0.3809729814529419 2023-01-24 02:19:52.620031: step: 76/470, loss: 0.3150908648967743 2023-01-24 02:19:53.298461: step: 78/470, loss: 0.3166169822216034 2023-01-24 02:19:54.064504: step: 80/470, loss: 1.1344679594039917 2023-01-24 02:19:54.819064: step: 82/470, loss: 0.2568296790122986 2023-01-24 02:19:55.563977: step: 84/470, loss: 0.19643135368824005 2023-01-24 02:19:56.202173: step: 86/470, loss: 0.7549906373023987 2023-01-24 02:19:56.984089: step: 88/470, loss: 1.3208808898925781 2023-01-24 02:19:57.730736: step: 90/470, loss: 0.7214917540550232 2023-01-24 02:19:58.488995: step: 92/470, loss: 0.5928862690925598 2023-01-24 02:19:59.246520: step: 94/470, loss: 1.5356335639953613 2023-01-24 02:19:59.938368: step: 96/470, loss: 0.336683452129364 2023-01-24 02:20:00.716671: step: 98/470, loss: 5.519505500793457 2023-01-24 02:20:01.465477: step: 100/470, loss: 0.6986427307128906 2023-01-24 02:20:02.201607: step: 102/470, loss: 1.0318654775619507 2023-01-24 02:20:03.000966: step: 104/470, loss: 0.5973689556121826 2023-01-24 02:20:03.765655: step: 106/470, loss: 0.46960803866386414 2023-01-24 02:20:04.491129: step: 108/470, loss: 0.45779475569725037 2023-01-24 02:20:05.260713: step: 110/470, loss: 1.0601186752319336 2023-01-24 02:20:05.979188: step: 112/470, loss: 0.5745185613632202 2023-01-24 02:20:06.720897: step: 114/470, loss: 0.34605327248573303 2023-01-24 02:20:07.421860: step: 116/470, loss: 0.9498859643936157 2023-01-24 02:20:08.149125: step: 118/470, loss: 0.25999417901039124 2023-01-24 02:20:08.943791: step: 120/470, loss: 0.32462361454963684 2023-01-24 02:20:09.668424: step: 122/470, loss: 0.1615123152732849 2023-01-24 02:20:10.337829: step: 124/470, loss: 0.34102755784988403 2023-01-24 02:20:11.115087: step: 126/470, loss: 0.18280981481075287 2023-01-24 02:20:11.903159: step: 128/470, loss: 0.6337746977806091 2023-01-24 02:20:12.664729: step: 130/470, loss: 0.35255953669548035 2023-01-24 02:20:13.411403: step: 132/470, loss: 0.36493754386901855 2023-01-24 02:20:14.146027: step: 134/470, loss: 0.4938736855983734 2023-01-24 02:20:14.924730: step: 136/470, loss: 0.8310595154762268 2023-01-24 02:20:15.764592: step: 138/470, loss: 0.36032968759536743 2023-01-24 02:20:16.529475: step: 140/470, loss: 0.6380537748336792 2023-01-24 02:20:17.217009: step: 142/470, loss: 0.634308397769928 2023-01-24 02:20:17.934818: step: 144/470, loss: 0.3668132722377777 2023-01-24 02:20:18.692860: step: 146/470, loss: 0.263011634349823 2023-01-24 02:20:19.502482: step: 148/470, loss: 0.5096966624259949 2023-01-24 02:20:20.283626: step: 150/470, loss: 0.811173141002655 2023-01-24 02:20:20.996788: step: 152/470, loss: 0.5178455114364624 2023-01-24 02:20:21.722623: step: 154/470, loss: 0.7749607563018799 2023-01-24 02:20:22.495169: step: 156/470, loss: 0.5281555652618408 2023-01-24 02:20:23.327819: step: 158/470, loss: 0.7538830637931824 2023-01-24 02:20:24.098779: step: 160/470, loss: 0.13230903446674347 2023-01-24 02:20:24.923324: step: 162/470, loss: 1.0788747072219849 2023-01-24 02:20:25.690265: step: 164/470, loss: 0.6544629335403442 2023-01-24 02:20:26.386002: step: 166/470, loss: 0.6774575114250183 2023-01-24 02:20:27.028142: step: 168/470, loss: 0.5532941818237305 2023-01-24 02:20:27.776347: step: 170/470, loss: 0.3752504289150238 2023-01-24 02:20:28.553143: step: 172/470, loss: 1.2700722217559814 2023-01-24 02:20:29.247776: step: 174/470, loss: 0.4598991572856903 2023-01-24 02:20:30.067301: step: 176/470, loss: 2.486417293548584 2023-01-24 02:20:30.753329: step: 178/470, loss: 1.1655266284942627 2023-01-24 02:20:31.460469: step: 180/470, loss: 0.5170928239822388 2023-01-24 02:20:32.228621: step: 182/470, loss: 0.3279862403869629 2023-01-24 02:20:32.990301: step: 184/470, loss: 0.6931787729263306 2023-01-24 02:20:33.661366: step: 186/470, loss: 3.434424877166748 2023-01-24 02:20:34.366529: step: 188/470, loss: 0.5698443055152893 2023-01-24 02:20:34.991437: step: 190/470, loss: 0.19923467934131622 2023-01-24 02:20:35.646467: step: 192/470, loss: 0.250698983669281 2023-01-24 02:20:36.315824: step: 194/470, loss: 0.8043193817138672 2023-01-24 02:20:36.992358: step: 196/470, loss: 0.6029735803604126 2023-01-24 02:20:37.776810: step: 198/470, loss: 0.30597418546676636 2023-01-24 02:20:38.431614: step: 200/470, loss: 0.4779759645462036 2023-01-24 02:20:39.185889: step: 202/470, loss: 0.23415221273899078 2023-01-24 02:20:39.895727: step: 204/470, loss: 0.5951058268547058 2023-01-24 02:20:40.627590: step: 206/470, loss: 1.3589835166931152 2023-01-24 02:20:41.348926: step: 208/470, loss: 0.26382386684417725 2023-01-24 02:20:42.127469: step: 210/470, loss: 0.6494569182395935 2023-01-24 02:20:42.828587: step: 212/470, loss: 0.566684365272522 2023-01-24 02:20:43.590298: step: 214/470, loss: 0.5304621458053589 2023-01-24 02:20:44.370761: step: 216/470, loss: 0.36520957946777344 2023-01-24 02:20:45.105642: step: 218/470, loss: 1.9913074970245361 2023-01-24 02:20:45.769738: step: 220/470, loss: 0.5152336955070496 2023-01-24 02:20:46.504045: step: 222/470, loss: 0.7162929773330688 2023-01-24 02:20:47.268869: step: 224/470, loss: 0.4547756612300873 2023-01-24 02:20:48.018200: step: 226/470, loss: 1.3032582998275757 2023-01-24 02:20:48.714407: step: 228/470, loss: 0.29936328530311584 2023-01-24 02:20:49.554811: step: 230/470, loss: 0.4857710599899292 2023-01-24 02:20:50.328007: step: 232/470, loss: 0.701897382736206 2023-01-24 02:20:51.073193: step: 234/470, loss: 1.8443883657455444 2023-01-24 02:20:51.830230: step: 236/470, loss: 0.3022279441356659 2023-01-24 02:20:52.515223: step: 238/470, loss: 0.4773560166358948 2023-01-24 02:20:53.202596: step: 240/470, loss: 0.23034065961837769 2023-01-24 02:20:53.924388: step: 242/470, loss: 0.5290822386741638 2023-01-24 02:20:54.783243: step: 244/470, loss: 0.9485624432563782 2023-01-24 02:20:55.519900: step: 246/470, loss: 0.16987621784210205 2023-01-24 02:20:56.326300: step: 248/470, loss: 0.24645563960075378 2023-01-24 02:20:57.090303: step: 250/470, loss: 0.35188791155815125 2023-01-24 02:20:57.939738: step: 252/470, loss: 0.45709526538848877 2023-01-24 02:20:58.627043: step: 254/470, loss: 0.9818328022956848 2023-01-24 02:20:59.393687: step: 256/470, loss: 0.13087573647499084 2023-01-24 02:21:00.158071: step: 258/470, loss: 0.2843538224697113 2023-01-24 02:21:00.823885: step: 260/470, loss: 0.8852912783622742 2023-01-24 02:21:01.527468: step: 262/470, loss: 0.7009601593017578 2023-01-24 02:21:02.234228: step: 264/470, loss: 0.623857855796814 2023-01-24 02:21:02.945821: step: 266/470, loss: 0.18668121099472046 2023-01-24 02:21:03.697368: step: 268/470, loss: 0.9859969615936279 2023-01-24 02:21:04.403823: step: 270/470, loss: 0.6130749583244324 2023-01-24 02:21:05.075760: step: 272/470, loss: 0.7168523669242859 2023-01-24 02:21:05.846057: step: 274/470, loss: 0.4143531620502472 2023-01-24 02:21:06.599785: step: 276/470, loss: 0.7964304089546204 2023-01-24 02:21:07.313455: step: 278/470, loss: 0.6704311370849609 2023-01-24 02:21:08.043793: step: 280/470, loss: 1.371323823928833 2023-01-24 02:21:08.874339: step: 282/470, loss: 0.8651912212371826 2023-01-24 02:21:09.629782: step: 284/470, loss: 0.3530345559120178 2023-01-24 02:21:10.346344: step: 286/470, loss: 0.3043641448020935 2023-01-24 02:21:11.032283: step: 288/470, loss: 0.1724073439836502 2023-01-24 02:21:11.808009: step: 290/470, loss: 0.2577505111694336 2023-01-24 02:21:12.587127: step: 292/470, loss: 0.3419148623943329 2023-01-24 02:21:13.364738: step: 294/470, loss: 0.2580012083053589 2023-01-24 02:21:14.131130: step: 296/470, loss: 0.7738077044487 2023-01-24 02:21:14.923450: step: 298/470, loss: 1.5503582954406738 2023-01-24 02:21:15.740562: step: 300/470, loss: 0.36178097128868103 2023-01-24 02:21:16.497957: step: 302/470, loss: 3.4303958415985107 2023-01-24 02:21:17.263040: step: 304/470, loss: 1.861863374710083 2023-01-24 02:21:18.054970: step: 306/470, loss: 0.3691105246543884 2023-01-24 02:21:18.901888: step: 308/470, loss: 0.2734450399875641 2023-01-24 02:21:19.748197: step: 310/470, loss: 1.402500033378601 2023-01-24 02:21:20.632039: step: 312/470, loss: 0.5831880569458008 2023-01-24 02:21:21.383942: step: 314/470, loss: 0.6897637844085693 2023-01-24 02:21:22.080319: step: 316/470, loss: 0.45082640647888184 2023-01-24 02:21:22.869003: step: 318/470, loss: 0.24482515454292297 2023-01-24 02:21:23.598111: step: 320/470, loss: 0.731791615486145 2023-01-24 02:21:24.378133: step: 322/470, loss: 0.4253387749195099 2023-01-24 02:21:25.093396: step: 324/470, loss: 0.7619374394416809 2023-01-24 02:21:25.796153: step: 326/470, loss: 0.3442683815956116 2023-01-24 02:21:26.670251: step: 328/470, loss: 0.7861752510070801 2023-01-24 02:21:27.421614: step: 330/470, loss: 0.6463299989700317 2023-01-24 02:21:28.147614: step: 332/470, loss: 0.7153461575508118 2023-01-24 02:21:28.910801: step: 334/470, loss: 0.2146746665239334 2023-01-24 02:21:29.655912: step: 336/470, loss: 0.26649871468544006 2023-01-24 02:21:30.373260: step: 338/470, loss: 0.24172751605510712 2023-01-24 02:21:31.111037: step: 340/470, loss: 0.26765137910842896 2023-01-24 02:21:31.856657: step: 342/470, loss: 0.253934770822525 2023-01-24 02:21:32.688920: step: 344/470, loss: 0.4995259940624237 2023-01-24 02:21:33.516021: step: 346/470, loss: 0.9960381388664246 2023-01-24 02:21:34.363488: step: 348/470, loss: 0.3678852617740631 2023-01-24 02:21:35.154407: step: 350/470, loss: 1.4511865377426147 2023-01-24 02:21:35.901167: step: 352/470, loss: 0.4098103940486908 2023-01-24 02:21:36.629512: step: 354/470, loss: 0.8686992526054382 2023-01-24 02:21:37.504803: step: 356/470, loss: 0.3895498514175415 2023-01-24 02:21:38.306509: step: 358/470, loss: 0.4124021828174591 2023-01-24 02:21:38.995938: step: 360/470, loss: 0.39607658982276917 2023-01-24 02:21:39.733495: step: 362/470, loss: 0.5164311528205872 2023-01-24 02:21:40.510834: step: 364/470, loss: 0.38875195384025574 2023-01-24 02:21:41.275834: step: 366/470, loss: 1.0487358570098877 2023-01-24 02:21:42.004219: step: 368/470, loss: 0.2708939015865326 2023-01-24 02:21:42.874804: step: 370/470, loss: 0.3877793848514557 2023-01-24 02:21:43.725807: step: 372/470, loss: 1.2002607583999634 2023-01-24 02:21:44.478244: step: 374/470, loss: 0.542573869228363 2023-01-24 02:21:45.196126: step: 376/470, loss: 0.11055511236190796 2023-01-24 02:21:46.014921: step: 378/470, loss: 0.9897867441177368 2023-01-24 02:21:46.734364: step: 380/470, loss: 1.5830128192901611 2023-01-24 02:21:47.485613: step: 382/470, loss: 0.3970726430416107 2023-01-24 02:21:48.269219: step: 384/470, loss: 0.5450624227523804 2023-01-24 02:21:49.001306: step: 386/470, loss: 0.47805434465408325 2023-01-24 02:21:49.780030: step: 388/470, loss: 0.5078467130661011 2023-01-24 02:21:50.533762: step: 390/470, loss: 0.8410879969596863 2023-01-24 02:21:51.218482: step: 392/470, loss: 0.564120352268219 2023-01-24 02:21:51.980342: step: 394/470, loss: 0.4679042398929596 2023-01-24 02:21:52.708086: step: 396/470, loss: 0.3612174689769745 2023-01-24 02:21:53.490386: step: 398/470, loss: 0.36533409357070923 2023-01-24 02:21:54.211883: step: 400/470, loss: 1.2119630575180054 2023-01-24 02:21:54.977955: step: 402/470, loss: 0.5056576132774353 2023-01-24 02:21:55.703375: step: 404/470, loss: 0.634218156337738 2023-01-24 02:21:56.345247: step: 406/470, loss: 3.289701223373413 2023-01-24 02:21:57.072294: step: 408/470, loss: 0.0705670490860939 2023-01-24 02:21:57.823573: step: 410/470, loss: 0.584409773349762 2023-01-24 02:21:58.583865: step: 412/470, loss: 0.5824598073959351 2023-01-24 02:21:59.313993: step: 414/470, loss: 1.616589069366455 2023-01-24 02:22:00.131680: step: 416/470, loss: 0.17866788804531097 2023-01-24 02:22:00.867338: step: 418/470, loss: 0.4966486692428589 2023-01-24 02:22:01.629720: step: 420/470, loss: 0.14873385429382324 2023-01-24 02:22:02.432209: step: 422/470, loss: 0.6694574952125549 2023-01-24 02:22:03.097115: step: 424/470, loss: 0.6328750848770142 2023-01-24 02:22:03.833417: step: 426/470, loss: 0.42485523223876953 2023-01-24 02:22:04.571370: step: 428/470, loss: 0.36120638251304626 2023-01-24 02:22:05.339324: step: 430/470, loss: 0.38846999406814575 2023-01-24 02:22:06.050407: step: 432/470, loss: 1.087398886680603 2023-01-24 02:22:06.807875: step: 434/470, loss: 0.24200265109539032 2023-01-24 02:22:07.515330: step: 436/470, loss: 0.44511035084724426 2023-01-24 02:22:08.293719: step: 438/470, loss: 0.578136146068573 2023-01-24 02:22:09.025628: step: 440/470, loss: 0.04400842636823654 2023-01-24 02:22:09.782710: step: 442/470, loss: 0.42095282673835754 2023-01-24 02:22:10.537065: step: 444/470, loss: 0.2756405174732208 2023-01-24 02:22:11.309005: step: 446/470, loss: 0.6643534302711487 2023-01-24 02:22:12.056697: step: 448/470, loss: 0.5027168989181519 2023-01-24 02:22:12.730602: step: 450/470, loss: 0.5610426664352417 2023-01-24 02:22:13.475005: step: 452/470, loss: 0.7037179470062256 2023-01-24 02:22:14.207040: step: 454/470, loss: 1.122239112854004 2023-01-24 02:22:15.102870: step: 456/470, loss: 0.4908750653266907 2023-01-24 02:22:15.860842: step: 458/470, loss: 0.4792795181274414 2023-01-24 02:22:16.618564: step: 460/470, loss: 0.3053261637687683 2023-01-24 02:22:17.348497: step: 462/470, loss: 0.5884460806846619 2023-01-24 02:22:18.069799: step: 464/470, loss: 2.739895820617676 2023-01-24 02:22:18.867791: step: 466/470, loss: 0.8006578683853149 2023-01-24 02:22:19.591872: step: 468/470, loss: 1.319366455078125 2023-01-24 02:22:20.380343: step: 470/470, loss: 2.922891139984131 2023-01-24 02:22:21.102855: step: 472/470, loss: 0.2505470812320709 2023-01-24 02:22:21.902753: step: 474/470, loss: 0.4484995901584625 2023-01-24 02:22:22.632121: step: 476/470, loss: 0.5939066410064697 2023-01-24 02:22:23.354469: step: 478/470, loss: 0.27172547578811646 2023-01-24 02:22:24.279034: step: 480/470, loss: 0.3544227182865143 2023-01-24 02:22:24.881322: step: 482/470, loss: 0.19915227591991425 2023-01-24 02:22:25.593397: step: 484/470, loss: 0.5538665056228638 2023-01-24 02:22:26.468855: step: 486/470, loss: 1.7567211389541626 2023-01-24 02:22:27.266748: step: 488/470, loss: 0.23877158761024475 2023-01-24 02:22:28.020129: step: 490/470, loss: 0.37857306003570557 2023-01-24 02:22:28.730858: step: 492/470, loss: 0.2932409346103668 2023-01-24 02:22:29.603176: step: 494/470, loss: 0.3827472925186157 2023-01-24 02:22:30.303001: step: 496/470, loss: 1.6333781480789185 2023-01-24 02:22:31.114706: step: 498/470, loss: 0.33791467547416687 2023-01-24 02:22:31.835830: step: 500/470, loss: 0.32591983675956726 2023-01-24 02:22:32.583666: step: 502/470, loss: 0.4133923649787903 2023-01-24 02:22:33.362157: step: 504/470, loss: 1.181382417678833 2023-01-24 02:22:34.162358: step: 506/470, loss: 5.112921237945557 2023-01-24 02:22:34.952427: step: 508/470, loss: 0.24143043160438538 2023-01-24 02:22:35.678986: step: 510/470, loss: 0.14529116451740265 2023-01-24 02:22:36.429555: step: 512/470, loss: 0.4227316975593567 2023-01-24 02:22:37.204530: step: 514/470, loss: 1.3871934413909912 2023-01-24 02:22:37.969087: step: 516/470, loss: 1.0044560432434082 2023-01-24 02:22:38.729921: step: 518/470, loss: 0.29355016350746155 2023-01-24 02:22:39.473417: step: 520/470, loss: 0.29415249824523926 2023-01-24 02:22:40.193556: step: 522/470, loss: 1.2313815355300903 2023-01-24 02:22:40.898781: step: 524/470, loss: 0.27359142899513245 2023-01-24 02:22:41.620593: step: 526/470, loss: 0.9681866765022278 2023-01-24 02:22:42.342876: step: 528/470, loss: 0.4213339388370514 2023-01-24 02:22:43.104380: step: 530/470, loss: 0.6697089672088623 2023-01-24 02:22:43.845669: step: 532/470, loss: 1.0336536169052124 2023-01-24 02:22:44.602214: step: 534/470, loss: 0.18524248898029327 2023-01-24 02:22:45.378163: step: 536/470, loss: 0.5117719173431396 2023-01-24 02:22:46.143290: step: 538/470, loss: 0.563843846321106 2023-01-24 02:22:46.935435: step: 540/470, loss: 0.42840808629989624 2023-01-24 02:22:47.729113: step: 542/470, loss: 1.106241226196289 2023-01-24 02:22:48.435273: step: 544/470, loss: 0.27418795228004456 2023-01-24 02:22:49.129827: step: 546/470, loss: 0.1472853273153305 2023-01-24 02:22:49.889799: step: 548/470, loss: 7.418033599853516 2023-01-24 02:22:50.592634: step: 550/470, loss: 0.27359166741371155 2023-01-24 02:22:51.440878: step: 552/470, loss: 0.23063817620277405 2023-01-24 02:22:52.208716: step: 554/470, loss: 1.2608001232147217 2023-01-24 02:22:52.929151: step: 556/470, loss: 0.40279632806777954 2023-01-24 02:22:53.701762: step: 558/470, loss: 1.5408741235733032 2023-01-24 02:22:54.459406: step: 560/470, loss: 0.8065625429153442 2023-01-24 02:22:55.214163: step: 562/470, loss: 0.20651859045028687 2023-01-24 02:22:55.964849: step: 564/470, loss: 1.4523169994354248 2023-01-24 02:22:56.668718: step: 566/470, loss: 0.589959442615509 2023-01-24 02:22:57.363410: step: 568/470, loss: 1.7879338264465332 2023-01-24 02:22:58.033866: step: 570/470, loss: 1.0214474201202393 2023-01-24 02:22:58.805900: step: 572/470, loss: 0.5045061707496643 2023-01-24 02:22:59.527945: step: 574/470, loss: 1.857149600982666 2023-01-24 02:23:00.282400: step: 576/470, loss: 0.49319571256637573 2023-01-24 02:23:00.982971: step: 578/470, loss: 1.0521998405456543 2023-01-24 02:23:01.692866: step: 580/470, loss: 0.8370808362960815 2023-01-24 02:23:02.553916: step: 582/470, loss: 0.752477765083313 2023-01-24 02:23:03.271696: step: 584/470, loss: 0.2725905478000641 2023-01-24 02:23:04.042985: step: 586/470, loss: 1.0730834007263184 2023-01-24 02:23:04.735623: step: 588/470, loss: 0.5087326765060425 2023-01-24 02:23:05.457616: step: 590/470, loss: 0.23478223383426666 2023-01-24 02:23:06.327572: step: 592/470, loss: 0.5487679243087769 2023-01-24 02:23:07.098793: step: 594/470, loss: 0.7396395206451416 2023-01-24 02:23:07.894301: step: 596/470, loss: 1.1245198249816895 2023-01-24 02:23:08.634451: step: 598/470, loss: 0.6135616302490234 2023-01-24 02:23:09.457842: step: 600/470, loss: 1.019770622253418 2023-01-24 02:23:10.217336: step: 602/470, loss: 0.26452648639678955 2023-01-24 02:23:10.944454: step: 604/470, loss: 1.3409970998764038 2023-01-24 02:23:11.662213: step: 606/470, loss: 0.41237398982048035 2023-01-24 02:23:12.426649: step: 608/470, loss: 0.1840699464082718 2023-01-24 02:23:13.266526: step: 610/470, loss: 1.9304275512695312 2023-01-24 02:23:14.059517: step: 612/470, loss: 0.3491050899028778 2023-01-24 02:23:14.851935: step: 614/470, loss: 0.7268772721290588 2023-01-24 02:23:15.864911: step: 616/470, loss: 0.4124700129032135 2023-01-24 02:23:16.611838: step: 618/470, loss: 0.2458016574382782 2023-01-24 02:23:17.349527: step: 620/470, loss: 0.25328996777534485 2023-01-24 02:23:18.077127: step: 622/470, loss: 0.3315480351448059 2023-01-24 02:23:18.760659: step: 624/470, loss: 0.4825563430786133 2023-01-24 02:23:19.457598: step: 626/470, loss: 0.6606676578521729 2023-01-24 02:23:20.268193: step: 628/470, loss: 0.5025643110275269 2023-01-24 02:23:21.036652: step: 630/470, loss: 0.3678370714187622 2023-01-24 02:23:21.878368: step: 632/470, loss: 0.4976312816143036 2023-01-24 02:23:22.591869: step: 634/470, loss: 0.22208793461322784 2023-01-24 02:23:23.328582: step: 636/470, loss: 1.0294278860092163 2023-01-24 02:23:24.066944: step: 638/470, loss: 1.9563071727752686 2023-01-24 02:23:24.865990: step: 640/470, loss: 2.2986676692962646 2023-01-24 02:23:25.585073: step: 642/470, loss: 1.1405041217803955 2023-01-24 02:23:26.362605: step: 644/470, loss: 0.3489605784416199 2023-01-24 02:23:27.139414: step: 646/470, loss: 0.9580307602882385 2023-01-24 02:23:27.854067: step: 648/470, loss: 0.27914896607398987 2023-01-24 02:23:28.539954: step: 650/470, loss: 0.983113169670105 2023-01-24 02:23:29.150143: step: 652/470, loss: 0.4380299746990204 2023-01-24 02:23:29.892893: step: 654/470, loss: 2.07478666305542 2023-01-24 02:23:30.564032: step: 656/470, loss: 0.8824518918991089 2023-01-24 02:23:31.246613: step: 658/470, loss: 0.3353792130947113 2023-01-24 02:23:32.014127: step: 660/470, loss: 0.4645380675792694 2023-01-24 02:23:32.798973: step: 662/470, loss: 0.8738151788711548 2023-01-24 02:23:33.505050: step: 664/470, loss: 0.2929144501686096 2023-01-24 02:23:34.231096: step: 666/470, loss: 0.2078137993812561 2023-01-24 02:23:35.045975: step: 668/470, loss: 0.3449633717536926 2023-01-24 02:23:35.857615: step: 670/470, loss: 4.962142467498779 2023-01-24 02:23:36.606021: step: 672/470, loss: 0.266704261302948 2023-01-24 02:23:37.363997: step: 674/470, loss: 0.7107980847358704 2023-01-24 02:23:38.152965: step: 676/470, loss: 0.7811194658279419 2023-01-24 02:23:38.962155: step: 678/470, loss: 0.6608456969261169 2023-01-24 02:23:39.670855: step: 680/470, loss: 0.36729899048805237 2023-01-24 02:23:40.522991: step: 682/470, loss: 1.2359304428100586 2023-01-24 02:23:41.272245: step: 684/470, loss: 0.6000874638557434 2023-01-24 02:23:41.963159: step: 686/470, loss: 0.30050334334373474 2023-01-24 02:23:42.631189: step: 688/470, loss: 1.0338342189788818 2023-01-24 02:23:43.337058: step: 690/470, loss: 0.6685682535171509 2023-01-24 02:23:44.122381: step: 692/470, loss: 2.2404136657714844 2023-01-24 02:23:44.907793: step: 694/470, loss: 0.9425269365310669 2023-01-24 02:23:45.665260: step: 696/470, loss: 0.9702392220497131 2023-01-24 02:23:46.401699: step: 698/470, loss: 0.9032459259033203 2023-01-24 02:23:47.113913: step: 700/470, loss: 0.46420717239379883 2023-01-24 02:23:47.855139: step: 702/470, loss: 0.16542571783065796 2023-01-24 02:23:48.549787: step: 704/470, loss: 0.4053605794906616 2023-01-24 02:23:49.330098: step: 706/470, loss: 0.5979849100112915 2023-01-24 02:23:50.018476: step: 708/470, loss: 0.4328975975513458 2023-01-24 02:23:50.783914: step: 710/470, loss: 0.603645920753479 2023-01-24 02:23:51.504387: step: 712/470, loss: 0.6759111881256104 2023-01-24 02:23:52.318593: step: 714/470, loss: 0.17363420128822327 2023-01-24 02:23:53.140721: step: 716/470, loss: 0.45107150077819824 2023-01-24 02:23:53.830539: step: 718/470, loss: 0.30570104718208313 2023-01-24 02:23:54.570089: step: 720/470, loss: 0.8583315014839172 2023-01-24 02:23:55.374522: step: 722/470, loss: 1.9609408378601074 2023-01-24 02:23:56.059843: step: 724/470, loss: 0.3080098032951355 2023-01-24 02:23:56.832908: step: 726/470, loss: 0.6534498333930969 2023-01-24 02:23:57.470008: step: 728/470, loss: 0.9306233525276184 2023-01-24 02:23:58.246735: step: 730/470, loss: 0.4405674934387207 2023-01-24 02:23:59.027166: step: 732/470, loss: 1.8319543600082397 2023-01-24 02:23:59.777781: step: 734/470, loss: 0.3661953806877136 2023-01-24 02:24:00.494899: step: 736/470, loss: 1.5648008584976196 2023-01-24 02:24:01.187463: step: 738/470, loss: 1.1446764469146729 2023-01-24 02:24:01.880107: step: 740/470, loss: 0.5488044619560242 2023-01-24 02:24:02.604077: step: 742/470, loss: 0.7896547913551331 2023-01-24 02:24:03.293243: step: 744/470, loss: 0.2640857994556427 2023-01-24 02:24:04.042588: step: 746/470, loss: 0.12843212485313416 2023-01-24 02:24:04.765842: step: 748/470, loss: 0.7997981905937195 2023-01-24 02:24:05.558980: step: 750/470, loss: 0.9078993797302246 2023-01-24 02:24:06.268320: step: 752/470, loss: 0.5914628505706787 2023-01-24 02:24:06.962706: step: 754/470, loss: 0.6052213311195374 2023-01-24 02:24:07.691673: step: 756/470, loss: 0.16759061813354492 2023-01-24 02:24:08.414491: step: 758/470, loss: 0.423061728477478 2023-01-24 02:24:09.224326: step: 760/470, loss: 0.9458290934562683 2023-01-24 02:24:10.014395: step: 762/470, loss: 0.44426000118255615 2023-01-24 02:24:10.755583: step: 764/470, loss: 0.48349910974502563 2023-01-24 02:24:11.534644: step: 766/470, loss: 1.0650396347045898 2023-01-24 02:24:12.366673: step: 768/470, loss: 0.9286936521530151 2023-01-24 02:24:13.153451: step: 770/470, loss: 0.5421870946884155 2023-01-24 02:24:13.846428: step: 772/470, loss: 0.6645587682723999 2023-01-24 02:24:14.575672: step: 774/470, loss: 0.3658778965473175 2023-01-24 02:24:15.280972: step: 776/470, loss: 0.4753032624721527 2023-01-24 02:24:15.967079: step: 778/470, loss: 3.5970163345336914 2023-01-24 02:24:16.864119: step: 780/470, loss: 0.8341478109359741 2023-01-24 02:24:17.746625: step: 782/470, loss: 0.875688910484314 2023-01-24 02:24:18.514444: step: 784/470, loss: 0.44907113909721375 2023-01-24 02:24:19.296832: step: 786/470, loss: 0.9514230489730835 2023-01-24 02:24:20.095565: step: 788/470, loss: 0.46913057565689087 2023-01-24 02:24:20.968035: step: 790/470, loss: 1.4260585308074951 2023-01-24 02:24:21.840945: step: 792/470, loss: 0.3309740424156189 2023-01-24 02:24:22.570619: step: 794/470, loss: 0.2095767855644226 2023-01-24 02:24:23.253468: step: 796/470, loss: 1.0972883701324463 2023-01-24 02:24:23.977942: step: 798/470, loss: 0.6162765026092529 2023-01-24 02:24:24.681265: step: 800/470, loss: 2.2116286754608154 2023-01-24 02:24:25.453802: step: 802/470, loss: 0.3036021888256073 2023-01-24 02:24:26.225590: step: 804/470, loss: 0.762950599193573 2023-01-24 02:24:26.959644: step: 806/470, loss: 0.6544211506843567 2023-01-24 02:24:27.633346: step: 808/470, loss: 0.37966063618659973 2023-01-24 02:24:28.396654: step: 810/470, loss: 1.9399421215057373 2023-01-24 02:24:29.160093: step: 812/470, loss: 1.2812671661376953 2023-01-24 02:24:29.952631: step: 814/470, loss: 0.187590554356575 2023-01-24 02:24:30.664909: step: 816/470, loss: 0.9980336427688599 2023-01-24 02:24:31.354244: step: 818/470, loss: 0.3564274311065674 2023-01-24 02:24:31.980783: step: 820/470, loss: 0.34761491417884827 2023-01-24 02:24:32.722834: step: 822/470, loss: 0.26210886240005493 2023-01-24 02:24:33.415053: step: 824/470, loss: 0.43427425622940063 2023-01-24 02:24:34.089102: step: 826/470, loss: 0.3225962817668915 2023-01-24 02:24:34.842465: step: 828/470, loss: 1.4615715742111206 2023-01-24 02:24:35.577971: step: 830/470, loss: 0.1873364895582199 2023-01-24 02:24:36.389077: step: 832/470, loss: 0.9489684104919434 2023-01-24 02:24:37.054534: step: 834/470, loss: 0.24692928791046143 2023-01-24 02:24:37.830737: step: 836/470, loss: 0.5066057443618774 2023-01-24 02:24:38.608449: step: 838/470, loss: 0.8843993544578552 2023-01-24 02:24:39.341889: step: 840/470, loss: 1.108005166053772 2023-01-24 02:24:40.074603: step: 842/470, loss: 0.8157863616943359 2023-01-24 02:24:40.812518: step: 844/470, loss: 0.5103515386581421 2023-01-24 02:24:41.572767: step: 846/470, loss: 0.9719839096069336 2023-01-24 02:24:42.232166: step: 848/470, loss: 0.6897994875907898 2023-01-24 02:24:42.937023: step: 850/470, loss: 0.6400911211967468 2023-01-24 02:24:43.690579: step: 852/470, loss: 0.7998232841491699 2023-01-24 02:24:44.520496: step: 854/470, loss: 1.2332285642623901 2023-01-24 02:24:45.293835: step: 856/470, loss: 0.7018003463745117 2023-01-24 02:24:46.031337: step: 858/470, loss: 0.46146655082702637 2023-01-24 02:24:46.847040: step: 860/470, loss: 0.7082826495170593 2023-01-24 02:24:47.553603: step: 862/470, loss: 1.376923680305481 2023-01-24 02:24:48.199123: step: 864/470, loss: 0.12437704205513 2023-01-24 02:24:48.858313: step: 866/470, loss: 1.9006187915802002 2023-01-24 02:24:49.629121: step: 868/470, loss: 0.4418231248855591 2023-01-24 02:24:50.391339: step: 870/470, loss: 0.7229202389717102 2023-01-24 02:24:51.189940: step: 872/470, loss: 0.44860169291496277 2023-01-24 02:24:51.947740: step: 874/470, loss: 0.46400347352027893 2023-01-24 02:24:52.664562: step: 876/470, loss: 0.865450918674469 2023-01-24 02:24:53.360782: step: 878/470, loss: 0.2543260157108307 2023-01-24 02:24:54.217819: step: 880/470, loss: 0.8604297041893005 2023-01-24 02:24:55.042693: step: 882/470, loss: 0.4274219274520874 2023-01-24 02:24:55.834193: step: 884/470, loss: 0.669735848903656 2023-01-24 02:24:56.525373: step: 886/470, loss: 0.3069272041320801 2023-01-24 02:24:57.310548: step: 888/470, loss: 1.567513346672058 2023-01-24 02:24:58.020345: step: 890/470, loss: 0.3045634925365448 2023-01-24 02:24:58.831805: step: 892/470, loss: 0.5335885286331177 2023-01-24 02:24:59.548911: step: 894/470, loss: 1.0912373065948486 2023-01-24 02:25:00.316561: step: 896/470, loss: 0.39587855339050293 2023-01-24 02:25:01.093487: step: 898/470, loss: 0.22928422689437866 2023-01-24 02:25:01.836327: step: 900/470, loss: 0.5208638906478882 2023-01-24 02:25:02.618716: step: 902/470, loss: 1.4234001636505127 2023-01-24 02:25:03.449058: step: 904/470, loss: 0.8621724247932434 2023-01-24 02:25:04.195874: step: 906/470, loss: 0.5629292726516724 2023-01-24 02:25:04.985170: step: 908/470, loss: 0.2364557683467865 2023-01-24 02:25:05.734614: step: 910/470, loss: 0.18407753109931946 2023-01-24 02:25:06.413183: step: 912/470, loss: 1.1533396244049072 2023-01-24 02:25:07.116709: step: 914/470, loss: 0.2399020940065384 2023-01-24 02:25:07.841536: step: 916/470, loss: 0.565222978591919 2023-01-24 02:25:08.619833: step: 918/470, loss: 0.31371474266052246 2023-01-24 02:25:09.279533: step: 920/470, loss: 0.29471805691719055 2023-01-24 02:25:09.998150: step: 922/470, loss: 0.6195104718208313 2023-01-24 02:25:10.800986: step: 924/470, loss: 0.24831870198249817 2023-01-24 02:25:11.551763: step: 926/470, loss: 0.25780385732650757 2023-01-24 02:25:12.423309: step: 928/470, loss: 0.13444189727306366 2023-01-24 02:25:13.099844: step: 930/470, loss: 0.08237230777740479 2023-01-24 02:25:13.825961: step: 932/470, loss: 1.3069645166397095 2023-01-24 02:25:14.598808: step: 934/470, loss: 0.43915003538131714 2023-01-24 02:25:15.342985: step: 936/470, loss: 0.34764325618743896 2023-01-24 02:25:16.147472: step: 938/470, loss: 0.43518635630607605 2023-01-24 02:25:16.890890: step: 940/470, loss: 0.7495778799057007 2023-01-24 02:25:17.501955: step: 942/470, loss: 2.3461673259735107 ================================================== Loss: 0.732 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33634076736755314, 'r': 0.26805146545421693, 'f1': 0.2983381674643555}, 'combined': 0.21982812339478824, 'epoch': 6} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.34650525317606024, 'r': 0.3042322780047253, 'f1': 0.32399570476001954}, 'combined': 0.21599713650667965, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34438401012862735, 'r': 0.27315467976046726, 'f1': 0.30466141001855285}, 'combined': 0.22448735475051262, 'epoch': 6} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3564830513109351, 'r': 0.3027195171554915, 'f1': 0.32740884920401725}, 'combined': 0.21827256613601145, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32656041388518026, 'r': 0.2652141501761995, 'f1': 0.29270755422587885}, 'combined': 0.2156792504822265, 'epoch': 6} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.34543563262253457, 'r': 0.3039568102615194, 'f1': 0.3233715273196134}, 'combined': 0.21558101821307554, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22977941176470587, 'r': 0.22321428571428573, 'f1': 0.22644927536231882}, 'combined': 0.1509661835748792, 'epoch': 6} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.43333333333333335, 'r': 0.2826086956521739, 'f1': 0.34210526315789475}, 'combined': 0.22807017543859648, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4583333333333333, 'r': 0.1896551724137931, 'f1': 0.2682926829268293}, 'combined': 0.17886178861788618, 'epoch': 6} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3213942479544022, 'r': 0.26284804718851484, 'f1': 0.2891877262387209}, 'combined': 0.21308569301800487, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35411825897044286, 'r': 0.2415215791248938, 'f1': 0.28717757152371726}, 'combined': 0.1914517143491448, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.2785714285714286, 'f1': 0.319672131147541}, 'combined': 0.21311475409836067, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3340823419483803, 'r': 0.2117334007794289, 'f1': 0.2591951270865483}, 'combined': 0.190985883116404, 'epoch': 4} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.37829910920569393, 'r': 0.25401640474042275, 'f1': 0.3039437670514713}, 'combined': 0.20262917803431416, 'epoch': 4} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5961538461538461, 'r': 0.33695652173913043, 'f1': 0.4305555555555555}, 'combined': 0.287037037037037, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32656041388518026, 'r': 0.2652141501761995, 'f1': 0.29270755422587885}, 'combined': 0.2156792504822265, 'epoch': 6} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.34543563262253457, 'r': 0.3039568102615194, 'f1': 0.3233715273196134}, 'combined': 0.21558101821307554, 'epoch': 6} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4583333333333333, 'r': 0.1896551724137931, 'f1': 0.2682926829268293}, 'combined': 0.17886178861788618, 'epoch': 6} ****************************** Epoch: 7 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:27:59.183995: step: 2/470, loss: 0.3670995235443115 2023-01-24 02:27:59.962916: step: 4/470, loss: 0.4737393856048584 2023-01-24 02:28:00.710666: step: 6/470, loss: 0.24018116295337677 2023-01-24 02:28:01.379777: step: 8/470, loss: 2.259197473526001 2023-01-24 02:28:02.131821: step: 10/470, loss: 0.07207592576742172 2023-01-24 02:28:02.841468: step: 12/470, loss: 0.2164432555437088 2023-01-24 02:28:03.592683: step: 14/470, loss: 0.33616185188293457 2023-01-24 02:28:04.300478: step: 16/470, loss: 0.23780158162117004 2023-01-24 02:28:05.101675: step: 18/470, loss: 0.8281857371330261 2023-01-24 02:28:05.903733: step: 20/470, loss: 0.6098978519439697 2023-01-24 02:28:06.661319: step: 22/470, loss: 0.8348177671432495 2023-01-24 02:28:07.523373: step: 24/470, loss: 0.13545599579811096 2023-01-24 02:28:08.261942: step: 26/470, loss: 1.4059125185012817 2023-01-24 02:28:09.096681: step: 28/470, loss: 0.25741472840309143 2023-01-24 02:28:09.806585: step: 30/470, loss: 0.3914801776409149 2023-01-24 02:28:10.586288: step: 32/470, loss: 0.2636181116104126 2023-01-24 02:28:11.373935: step: 34/470, loss: 0.3287275433540344 2023-01-24 02:28:12.177835: step: 36/470, loss: 1.114621877670288 2023-01-24 02:28:12.901508: step: 38/470, loss: 0.4752127230167389 2023-01-24 02:28:13.596415: step: 40/470, loss: 0.4046996235847473 2023-01-24 02:28:14.281711: step: 42/470, loss: 1.4045734405517578 2023-01-24 02:28:15.083890: step: 44/470, loss: 0.22467926144599915 2023-01-24 02:28:15.919390: step: 46/470, loss: 0.46707257628440857 2023-01-24 02:28:16.646006: step: 48/470, loss: 0.8245423436164856 2023-01-24 02:28:17.398724: step: 50/470, loss: 0.4884158968925476 2023-01-24 02:28:18.152953: step: 52/470, loss: 0.38864484429359436 2023-01-24 02:28:19.013679: step: 54/470, loss: 0.3698347210884094 2023-01-24 02:28:19.744192: step: 56/470, loss: 0.5932155847549438 2023-01-24 02:28:20.486757: step: 58/470, loss: 0.5183910131454468 2023-01-24 02:28:21.233145: step: 60/470, loss: 0.20385155081748962 2023-01-24 02:28:21.945317: step: 62/470, loss: 0.35150226950645447 2023-01-24 02:28:22.727492: step: 64/470, loss: 2.04789400100708 2023-01-24 02:28:23.446825: step: 66/470, loss: 0.38406845927238464 2023-01-24 02:28:24.147202: step: 68/470, loss: 0.18193836510181427 2023-01-24 02:28:24.956652: step: 70/470, loss: 0.5214804410934448 2023-01-24 02:28:25.671951: step: 72/470, loss: 0.5263029336929321 2023-01-24 02:28:26.441645: step: 74/470, loss: 0.3935713469982147 2023-01-24 02:28:27.224884: step: 76/470, loss: 0.43086206912994385 2023-01-24 02:28:28.026164: step: 78/470, loss: 0.2726958990097046 2023-01-24 02:28:28.703466: step: 80/470, loss: 0.23460204899311066 2023-01-24 02:28:29.461417: step: 82/470, loss: 0.3521403968334198 2023-01-24 02:28:30.162771: step: 84/470, loss: 0.3516944646835327 2023-01-24 02:28:30.832872: step: 86/470, loss: 1.4504263401031494 2023-01-24 02:28:31.586832: step: 88/470, loss: 0.3307155668735504 2023-01-24 02:28:32.350650: step: 90/470, loss: 0.9473370313644409 2023-01-24 02:28:33.123108: step: 92/470, loss: 0.19111379981040955 2023-01-24 02:28:33.894596: step: 94/470, loss: 0.723115861415863 2023-01-24 02:28:34.702923: step: 96/470, loss: 0.24940860271453857 2023-01-24 02:28:35.486733: step: 98/470, loss: 0.276034414768219 2023-01-24 02:28:36.377538: step: 100/470, loss: 0.7983378171920776 2023-01-24 02:28:37.143515: step: 102/470, loss: 0.6610528826713562 2023-01-24 02:28:37.904219: step: 104/470, loss: 0.3130510449409485 2023-01-24 02:28:38.686315: step: 106/470, loss: 1.4086874723434448 2023-01-24 02:28:39.350300: step: 108/470, loss: 0.41898658871650696 2023-01-24 02:28:40.098966: step: 110/470, loss: 0.28429749608039856 2023-01-24 02:28:40.855804: step: 112/470, loss: 0.3643468916416168 2023-01-24 02:28:41.578883: step: 114/470, loss: 0.07166903465986252 2023-01-24 02:28:42.378341: step: 116/470, loss: 0.2408754527568817 2023-01-24 02:28:43.131869: step: 118/470, loss: 0.5108166933059692 2023-01-24 02:28:43.893444: step: 120/470, loss: 1.3073979616165161 2023-01-24 02:28:44.678702: step: 122/470, loss: 0.13918791711330414 2023-01-24 02:28:45.423375: step: 124/470, loss: 0.22650255262851715 2023-01-24 02:28:46.184092: step: 126/470, loss: 0.3875696361064911 2023-01-24 02:28:46.932593: step: 128/470, loss: 0.9161421060562134 2023-01-24 02:28:47.677584: step: 130/470, loss: 0.273093581199646 2023-01-24 02:28:48.524095: step: 132/470, loss: 0.5050501227378845 2023-01-24 02:28:49.280470: step: 134/470, loss: 0.2323817014694214 2023-01-24 02:28:49.958566: step: 136/470, loss: 0.4136822521686554 2023-01-24 02:28:50.694041: step: 138/470, loss: 0.6526037454605103 2023-01-24 02:28:51.427720: step: 140/470, loss: 0.17813269793987274 2023-01-24 02:28:52.184205: step: 142/470, loss: 0.4769699275493622 2023-01-24 02:28:52.933610: step: 144/470, loss: 0.40157580375671387 2023-01-24 02:28:53.729563: step: 146/470, loss: 0.8085362911224365 2023-01-24 02:28:54.485370: step: 148/470, loss: 0.33716344833374023 2023-01-24 02:28:55.276247: step: 150/470, loss: 1.0862269401550293 2023-01-24 02:28:56.083942: step: 152/470, loss: 0.640563428401947 2023-01-24 02:28:56.863967: step: 154/470, loss: 0.4485277235507965 2023-01-24 02:28:57.701601: step: 156/470, loss: 0.5932475328445435 2023-01-24 02:28:58.459914: step: 158/470, loss: 0.26284921169281006 2023-01-24 02:28:59.244221: step: 160/470, loss: 0.22903652489185333 2023-01-24 02:29:00.009097: step: 162/470, loss: 1.9001516103744507 2023-01-24 02:29:00.730500: step: 164/470, loss: 1.4668172597885132 2023-01-24 02:29:01.515881: step: 166/470, loss: 0.4874824285507202 2023-01-24 02:29:02.215785: step: 168/470, loss: 0.8743273019790649 2023-01-24 02:29:02.945368: step: 170/470, loss: 0.4101831614971161 2023-01-24 02:29:03.581903: step: 172/470, loss: 0.362973153591156 2023-01-24 02:29:04.319238: step: 174/470, loss: 0.43648961186408997 2023-01-24 02:29:05.162526: step: 176/470, loss: 0.13790124654769897 2023-01-24 02:29:05.924596: step: 178/470, loss: 0.08411405980587006 2023-01-24 02:29:06.670349: step: 180/470, loss: 0.24036382138729095 2023-01-24 02:29:07.501448: step: 182/470, loss: 0.6687015891075134 2023-01-24 02:29:08.288878: step: 184/470, loss: 0.35721492767333984 2023-01-24 02:29:09.120662: step: 186/470, loss: 0.48564645648002625 2023-01-24 02:29:09.855244: step: 188/470, loss: 1.280880331993103 2023-01-24 02:29:10.580736: step: 190/470, loss: 0.5749147534370422 2023-01-24 02:29:11.407455: step: 192/470, loss: 3.5238230228424072 2023-01-24 02:29:12.261257: step: 194/470, loss: 0.6828189492225647 2023-01-24 02:29:13.097847: step: 196/470, loss: 1.2112079858779907 2023-01-24 02:29:13.836243: step: 198/470, loss: 0.612357497215271 2023-01-24 02:29:14.597716: step: 200/470, loss: 0.1848372370004654 2023-01-24 02:29:15.358442: step: 202/470, loss: 0.24752122163772583 2023-01-24 02:29:16.143924: step: 204/470, loss: 0.7347160577774048 2023-01-24 02:29:16.888284: step: 206/470, loss: 0.28709715604782104 2023-01-24 02:29:17.622875: step: 208/470, loss: 1.068163514137268 2023-01-24 02:29:18.388697: step: 210/470, loss: 0.9789504408836365 2023-01-24 02:29:19.209405: step: 212/470, loss: 0.8234558701515198 2023-01-24 02:29:19.993543: step: 214/470, loss: 1.2307389974594116 2023-01-24 02:29:20.739025: step: 216/470, loss: 0.13372446596622467 2023-01-24 02:29:21.407526: step: 218/470, loss: 0.11340329051017761 2023-01-24 02:29:22.143455: step: 220/470, loss: 0.5686858892440796 2023-01-24 02:29:22.842656: step: 222/470, loss: 0.3490334451198578 2023-01-24 02:29:23.721475: step: 224/470, loss: 0.43614357709884644 2023-01-24 02:29:24.448144: step: 226/470, loss: 0.44121402502059937 2023-01-24 02:29:25.272241: step: 228/470, loss: 0.9783852100372314 2023-01-24 02:29:26.071123: step: 230/470, loss: 0.8979207873344421 2023-01-24 02:29:26.776228: step: 232/470, loss: 0.3936818242073059 2023-01-24 02:29:27.429564: step: 234/470, loss: 0.7134736776351929 2023-01-24 02:29:28.152524: step: 236/470, loss: 0.35246771574020386 2023-01-24 02:29:28.909522: step: 238/470, loss: 1.3471710681915283 2023-01-24 02:29:29.670397: step: 240/470, loss: 0.5043021440505981 2023-01-24 02:29:30.365145: step: 242/470, loss: 1.0671354532241821 2023-01-24 02:29:31.083754: step: 244/470, loss: 0.628767728805542 2023-01-24 02:29:31.864163: step: 246/470, loss: 0.5917458534240723 2023-01-24 02:29:32.556866: step: 248/470, loss: 0.9961802363395691 2023-01-24 02:29:33.316242: step: 250/470, loss: 0.8598777651786804 2023-01-24 02:29:34.113999: step: 252/470, loss: 0.2611555755138397 2023-01-24 02:29:34.858627: step: 254/470, loss: 0.2617139518260956 2023-01-24 02:29:35.636328: step: 256/470, loss: 1.007016897201538 2023-01-24 02:29:36.378131: step: 258/470, loss: 0.6046254634857178 2023-01-24 02:29:37.166854: step: 260/470, loss: 0.4044310450553894 2023-01-24 02:29:37.848441: step: 262/470, loss: 1.1649810075759888 2023-01-24 02:29:38.582446: step: 264/470, loss: 0.6738048195838928 2023-01-24 02:29:39.278496: step: 266/470, loss: 1.02718186378479 2023-01-24 02:29:40.020328: step: 268/470, loss: 1.9333301782608032 2023-01-24 02:29:41.005823: step: 270/470, loss: 0.3444717824459076 2023-01-24 02:29:41.769839: step: 272/470, loss: 1.909464955329895 2023-01-24 02:29:42.552351: step: 274/470, loss: 0.5475513935089111 2023-01-24 02:29:43.301179: step: 276/470, loss: 0.24307137727737427 2023-01-24 02:29:44.068254: step: 278/470, loss: 0.22565793991088867 2023-01-24 02:29:44.755030: step: 280/470, loss: 0.11121651530265808 2023-01-24 02:29:45.610975: step: 282/470, loss: 0.6998484134674072 2023-01-24 02:29:46.326711: step: 284/470, loss: 0.8299077749252319 2023-01-24 02:29:46.996966: step: 286/470, loss: 0.19652137160301208 2023-01-24 02:29:47.762330: step: 288/470, loss: 0.29388874769210815 2023-01-24 02:29:48.497627: step: 290/470, loss: 0.4312860369682312 2023-01-24 02:29:49.237900: step: 292/470, loss: 0.6240105032920837 2023-01-24 02:29:49.964759: step: 294/470, loss: 0.16214123368263245 2023-01-24 02:29:50.704997: step: 296/470, loss: 0.5396556854248047 2023-01-24 02:29:51.408587: step: 298/470, loss: 0.5401109457015991 2023-01-24 02:29:52.125455: step: 300/470, loss: 0.9766262769699097 2023-01-24 02:29:52.828992: step: 302/470, loss: 0.8032498359680176 2023-01-24 02:29:53.663709: step: 304/470, loss: 0.5323628783226013 2023-01-24 02:29:54.429567: step: 306/470, loss: 0.35838931798934937 2023-01-24 02:29:55.290537: step: 308/470, loss: 0.15917706489562988 2023-01-24 02:29:56.130631: step: 310/470, loss: 0.825878381729126 2023-01-24 02:29:56.911144: step: 312/470, loss: 0.5406302213668823 2023-01-24 02:29:57.664005: step: 314/470, loss: 0.8202871680259705 2023-01-24 02:29:58.343869: step: 316/470, loss: 0.14145931601524353 2023-01-24 02:29:59.061699: step: 318/470, loss: 1.2496742010116577 2023-01-24 02:29:59.786041: step: 320/470, loss: 0.45300769805908203 2023-01-24 02:30:00.481649: step: 322/470, loss: 0.2567143440246582 2023-01-24 02:30:01.190375: step: 324/470, loss: 0.3849056363105774 2023-01-24 02:30:02.017782: step: 326/470, loss: 0.17785045504570007 2023-01-24 02:30:02.741161: step: 328/470, loss: 0.5878977179527283 2023-01-24 02:30:03.435592: step: 330/470, loss: 0.3370095491409302 2023-01-24 02:30:04.121655: step: 332/470, loss: 0.2186794877052307 2023-01-24 02:30:04.894762: step: 334/470, loss: 0.30272671580314636 2023-01-24 02:30:05.610665: step: 336/470, loss: 0.4330812394618988 2023-01-24 02:30:06.336332: step: 338/470, loss: 0.1457645744085312 2023-01-24 02:30:07.017822: step: 340/470, loss: 0.5812200307846069 2023-01-24 02:30:07.669101: step: 342/470, loss: 0.4354366958141327 2023-01-24 02:30:08.438290: step: 344/470, loss: 1.0845625400543213 2023-01-24 02:30:09.182286: step: 346/470, loss: 0.5721340775489807 2023-01-24 02:30:09.919331: step: 348/470, loss: 0.5678675174713135 2023-01-24 02:30:10.711628: step: 350/470, loss: 0.46091604232788086 2023-01-24 02:30:11.500489: step: 352/470, loss: 1.013948678970337 2023-01-24 02:30:12.256410: step: 354/470, loss: 0.4761132597923279 2023-01-24 02:30:12.987059: step: 356/470, loss: 0.34836894273757935 2023-01-24 02:30:13.694677: step: 358/470, loss: 0.24478018283843994 2023-01-24 02:30:14.425361: step: 360/470, loss: 2.375002384185791 2023-01-24 02:30:15.117098: step: 362/470, loss: 0.3373410105705261 2023-01-24 02:30:15.852766: step: 364/470, loss: 0.435524046421051 2023-01-24 02:30:16.644525: step: 366/470, loss: 0.9048151969909668 2023-01-24 02:30:17.396028: step: 368/470, loss: 0.6413469314575195 2023-01-24 02:30:18.167206: step: 370/470, loss: 0.27448025345802307 2023-01-24 02:30:19.016094: step: 372/470, loss: 1.3750803470611572 2023-01-24 02:30:19.829541: step: 374/470, loss: 0.48742660880088806 2023-01-24 02:30:20.610114: step: 376/470, loss: 0.8035762310028076 2023-01-24 02:30:21.367696: step: 378/470, loss: 0.5545987486839294 2023-01-24 02:30:22.092692: step: 380/470, loss: 0.28187888860702515 2023-01-24 02:30:22.779194: step: 382/470, loss: 0.30163049697875977 2023-01-24 02:30:23.534026: step: 384/470, loss: 0.9044409990310669 2023-01-24 02:30:24.259865: step: 386/470, loss: 1.142310619354248 2023-01-24 02:30:25.008688: step: 388/470, loss: 1.2953393459320068 2023-01-24 02:30:25.726593: step: 390/470, loss: 0.15360940992832184 2023-01-24 02:30:26.468264: step: 392/470, loss: 0.46768492460250854 2023-01-24 02:30:27.199210: step: 394/470, loss: 0.44785362482070923 2023-01-24 02:30:27.925870: step: 396/470, loss: 0.46685466170310974 2023-01-24 02:30:28.694293: step: 398/470, loss: 0.939845085144043 2023-01-24 02:30:29.383404: step: 400/470, loss: 0.7884602546691895 2023-01-24 02:30:30.137214: step: 402/470, loss: 1.0477254390716553 2023-01-24 02:30:30.907061: step: 404/470, loss: 0.7993929982185364 2023-01-24 02:30:31.686678: step: 406/470, loss: 0.8812851905822754 2023-01-24 02:30:32.411648: step: 408/470, loss: 0.2269768863916397 2023-01-24 02:30:33.097679: step: 410/470, loss: 1.8129310607910156 2023-01-24 02:30:33.840139: step: 412/470, loss: 0.3134598731994629 2023-01-24 02:30:34.557648: step: 414/470, loss: 0.1766653060913086 2023-01-24 02:30:35.310282: step: 416/470, loss: 0.9808623194694519 2023-01-24 02:30:36.133177: step: 418/470, loss: 0.9330800175666809 2023-01-24 02:30:36.861886: step: 420/470, loss: 0.4644604027271271 2023-01-24 02:30:37.635131: step: 422/470, loss: 0.5063079595565796 2023-01-24 02:30:38.374578: step: 424/470, loss: 0.5117875933647156 2023-01-24 02:30:39.067454: step: 426/470, loss: 0.3270796537399292 2023-01-24 02:30:39.777140: step: 428/470, loss: 0.7015947103500366 2023-01-24 02:30:40.547023: step: 430/470, loss: 0.3183510899543762 2023-01-24 02:30:41.264000: step: 432/470, loss: 0.8972214460372925 2023-01-24 02:30:42.032938: step: 434/470, loss: 0.9476366639137268 2023-01-24 02:30:42.720663: step: 436/470, loss: 0.8914029002189636 2023-01-24 02:30:43.435972: step: 438/470, loss: 0.702813446521759 2023-01-24 02:30:44.164073: step: 440/470, loss: 1.0427266359329224 2023-01-24 02:30:45.010007: step: 442/470, loss: 0.3731694221496582 2023-01-24 02:30:45.707042: step: 444/470, loss: 0.3855333924293518 2023-01-24 02:30:46.566269: step: 446/470, loss: 0.382285475730896 2023-01-24 02:30:47.245290: step: 448/470, loss: 0.3315264880657196 2023-01-24 02:30:48.093001: step: 450/470, loss: 0.6684935092926025 2023-01-24 02:30:48.989069: step: 452/470, loss: 0.30676472187042236 2023-01-24 02:30:49.776061: step: 454/470, loss: 0.5190876126289368 2023-01-24 02:30:50.571879: step: 456/470, loss: 0.26106128096580505 2023-01-24 02:30:51.319279: step: 458/470, loss: 1.2300512790679932 2023-01-24 02:30:52.171713: step: 460/470, loss: 0.28473010659217834 2023-01-24 02:30:52.899626: step: 462/470, loss: 0.5453563332557678 2023-01-24 02:30:53.646587: step: 464/470, loss: 0.6540701985359192 2023-01-24 02:30:54.321586: step: 466/470, loss: 0.8440691232681274 2023-01-24 02:30:55.172017: step: 468/470, loss: 0.5105758309364319 2023-01-24 02:30:55.973927: step: 470/470, loss: 0.20678004622459412 2023-01-24 02:30:56.719909: step: 472/470, loss: 0.45606935024261475 2023-01-24 02:30:57.425726: step: 474/470, loss: 1.512797236442566 2023-01-24 02:30:58.269920: step: 476/470, loss: 0.2980559766292572 2023-01-24 02:30:59.014212: step: 478/470, loss: 0.6002671718597412 2023-01-24 02:30:59.722908: step: 480/470, loss: 0.27965351939201355 2023-01-24 02:31:00.475418: step: 482/470, loss: 0.5044351816177368 2023-01-24 02:31:01.210809: step: 484/470, loss: 0.1443350464105606 2023-01-24 02:31:01.886986: step: 486/470, loss: 2.0403623580932617 2023-01-24 02:31:02.623170: step: 488/470, loss: 0.4997430443763733 2023-01-24 02:31:03.360348: step: 490/470, loss: 0.14253254234790802 2023-01-24 02:31:04.093545: step: 492/470, loss: 1.0414563417434692 2023-01-24 02:31:04.832836: step: 494/470, loss: 2.314917802810669 2023-01-24 02:31:05.612464: step: 496/470, loss: 0.8212421536445618 2023-01-24 02:31:06.326939: step: 498/470, loss: 0.17680610716342926 2023-01-24 02:31:07.038831: step: 500/470, loss: 0.41409242153167725 2023-01-24 02:31:07.799520: step: 502/470, loss: 0.43370938301086426 2023-01-24 02:31:08.565579: step: 504/470, loss: 0.8938826322555542 2023-01-24 02:31:09.309127: step: 506/470, loss: 0.12901009619235992 2023-01-24 02:31:10.079959: step: 508/470, loss: 0.2584211528301239 2023-01-24 02:31:10.873263: step: 510/470, loss: 0.46783918142318726 2023-01-24 02:31:11.654915: step: 512/470, loss: 0.5496209263801575 2023-01-24 02:31:12.420602: step: 514/470, loss: 0.3696674108505249 2023-01-24 02:31:13.204443: step: 516/470, loss: 0.31152698397636414 2023-01-24 02:31:13.957995: step: 518/470, loss: 0.3273497223854065 2023-01-24 02:31:14.690205: step: 520/470, loss: 1.6968348026275635 2023-01-24 02:31:15.396851: step: 522/470, loss: 0.9195625185966492 2023-01-24 02:31:16.094639: step: 524/470, loss: 0.1977846920490265 2023-01-24 02:31:16.898044: step: 526/470, loss: 0.2801767587661743 2023-01-24 02:31:17.590375: step: 528/470, loss: 0.9938501715660095 2023-01-24 02:31:18.306256: step: 530/470, loss: 0.26219820976257324 2023-01-24 02:31:19.130972: step: 532/470, loss: 0.6336964964866638 2023-01-24 02:31:19.882788: step: 534/470, loss: 2.7305359840393066 2023-01-24 02:31:20.680280: step: 536/470, loss: 0.3516845703125 2023-01-24 02:31:21.368065: step: 538/470, loss: 0.5453897714614868 2023-01-24 02:31:22.130228: step: 540/470, loss: 0.6060112118721008 2023-01-24 02:31:22.853633: step: 542/470, loss: 0.27010199427604675 2023-01-24 02:31:23.538448: step: 544/470, loss: 2.1664516925811768 2023-01-24 02:31:24.301711: step: 546/470, loss: 0.24300040304660797 2023-01-24 02:31:25.027648: step: 548/470, loss: 0.11754722148180008 2023-01-24 02:31:25.831224: step: 550/470, loss: 0.48181718587875366 2023-01-24 02:31:26.566612: step: 552/470, loss: 0.7739644050598145 2023-01-24 02:31:27.318604: step: 554/470, loss: 0.6491574048995972 2023-01-24 02:31:28.059724: step: 556/470, loss: 0.49489882588386536 2023-01-24 02:31:28.787132: step: 558/470, loss: 1.208168625831604 2023-01-24 02:31:29.523483: step: 560/470, loss: 0.798096239566803 2023-01-24 02:31:30.207369: step: 562/470, loss: 0.31834012269973755 2023-01-24 02:31:30.985134: step: 564/470, loss: 2.36625337600708 2023-01-24 02:31:31.749712: step: 566/470, loss: 0.34011802077293396 2023-01-24 02:31:32.486841: step: 568/470, loss: 0.841728687286377 2023-01-24 02:31:33.253631: step: 570/470, loss: 0.7265416383743286 2023-01-24 02:31:33.979661: step: 572/470, loss: 0.9253761768341064 2023-01-24 02:31:34.851493: step: 574/470, loss: 0.23297470808029175 2023-01-24 02:31:35.561282: step: 576/470, loss: 1.5338727235794067 2023-01-24 02:31:36.229742: step: 578/470, loss: 1.0285563468933105 2023-01-24 02:31:36.946504: step: 580/470, loss: 0.3040406107902527 2023-01-24 02:31:37.615146: step: 582/470, loss: 1.5267548561096191 2023-01-24 02:31:38.332530: step: 584/470, loss: 0.6100663542747498 2023-01-24 02:31:39.045194: step: 586/470, loss: 1.353019118309021 2023-01-24 02:31:39.813769: step: 588/470, loss: 0.6592656970024109 2023-01-24 02:31:40.532376: step: 590/470, loss: 0.1781897246837616 2023-01-24 02:31:41.337283: step: 592/470, loss: 0.27507659792900085 2023-01-24 02:31:42.038197: step: 594/470, loss: 0.3861004710197449 2023-01-24 02:31:42.863482: step: 596/470, loss: 0.4841284453868866 2023-01-24 02:31:43.604942: step: 598/470, loss: 3.2577338218688965 2023-01-24 02:31:44.368522: step: 600/470, loss: 0.2953993082046509 2023-01-24 02:31:45.165000: step: 602/470, loss: 0.5135447382926941 2023-01-24 02:31:45.897603: step: 604/470, loss: 0.3044143617153168 2023-01-24 02:31:46.724126: step: 606/470, loss: 0.5463592410087585 2023-01-24 02:31:47.464777: step: 608/470, loss: 0.32110247015953064 2023-01-24 02:31:48.201641: step: 610/470, loss: 0.3176352381706238 2023-01-24 02:31:48.986256: step: 612/470, loss: 0.2889813184738159 2023-01-24 02:31:49.771866: step: 614/470, loss: 0.11909591406583786 2023-01-24 02:31:50.544641: step: 616/470, loss: 0.6773265600204468 2023-01-24 02:31:51.458979: step: 618/470, loss: 0.5883302092552185 2023-01-24 02:31:52.195184: step: 620/470, loss: 0.2453279346227646 2023-01-24 02:31:52.900416: step: 622/470, loss: 0.7039670944213867 2023-01-24 02:31:53.671178: step: 624/470, loss: 0.38650938868522644 2023-01-24 02:31:54.415392: step: 626/470, loss: 0.34423843026161194 2023-01-24 02:31:55.147899: step: 628/470, loss: 0.939646303653717 2023-01-24 02:31:55.949603: step: 630/470, loss: 0.3436138927936554 2023-01-24 02:31:56.707997: step: 632/470, loss: 1.440124750137329 2023-01-24 02:31:57.496849: step: 634/470, loss: 0.5489599704742432 2023-01-24 02:31:58.162425: step: 636/470, loss: 1.5086581707000732 2023-01-24 02:31:58.915598: step: 638/470, loss: 0.39192065596580505 2023-01-24 02:31:59.650987: step: 640/470, loss: 1.1574674844741821 2023-01-24 02:32:00.388476: step: 642/470, loss: 1.1345126628875732 2023-01-24 02:32:01.261995: step: 644/470, loss: 0.14073410630226135 2023-01-24 02:32:02.045374: step: 646/470, loss: 0.8759070038795471 2023-01-24 02:32:02.778760: step: 648/470, loss: 0.3100970983505249 2023-01-24 02:32:03.443035: step: 650/470, loss: 0.4851211607456207 2023-01-24 02:32:04.176206: step: 652/470, loss: 0.8310330510139465 2023-01-24 02:32:04.867753: step: 654/470, loss: 0.20696614682674408 2023-01-24 02:32:05.734243: step: 656/470, loss: 0.4490260183811188 2023-01-24 02:32:06.429530: step: 658/470, loss: 0.7504600286483765 2023-01-24 02:32:07.212365: step: 660/470, loss: 9.698339462280273 2023-01-24 02:32:07.978491: step: 662/470, loss: 0.3065306544303894 2023-01-24 02:32:08.765306: step: 664/470, loss: 0.46533751487731934 2023-01-24 02:32:09.557774: step: 666/470, loss: 0.24716587364673615 2023-01-24 02:32:10.335934: step: 668/470, loss: 0.4091520309448242 2023-01-24 02:32:11.060539: step: 670/470, loss: 0.44841229915618896 2023-01-24 02:32:11.809041: step: 672/470, loss: 0.17277362942695618 2023-01-24 02:32:12.556619: step: 674/470, loss: 0.3857150375843048 2023-01-24 02:32:13.387147: step: 676/470, loss: 0.36999809741973877 2023-01-24 02:32:14.133022: step: 678/470, loss: 0.21060891449451447 2023-01-24 02:32:14.853671: step: 680/470, loss: 0.23017674684524536 2023-01-24 02:32:15.556294: step: 682/470, loss: 0.40644142031669617 2023-01-24 02:32:16.316903: step: 684/470, loss: 0.6531462073326111 2023-01-24 02:32:17.065422: step: 686/470, loss: 0.062062717974185944 2023-01-24 02:32:17.870591: step: 688/470, loss: 0.45378008484840393 2023-01-24 02:32:18.625651: step: 690/470, loss: 0.6536173224449158 2023-01-24 02:32:19.383371: step: 692/470, loss: 0.8225758075714111 2023-01-24 02:32:20.159335: step: 694/470, loss: 0.27153313159942627 2023-01-24 02:32:20.891608: step: 696/470, loss: 0.19550104439258575 2023-01-24 02:32:21.596308: step: 698/470, loss: 0.28255999088287354 2023-01-24 02:32:22.383185: step: 700/470, loss: 0.9339784979820251 2023-01-24 02:32:23.125045: step: 702/470, loss: 0.9983887672424316 2023-01-24 02:32:23.906326: step: 704/470, loss: 0.7024455070495605 2023-01-24 02:32:24.672687: step: 706/470, loss: 0.24352699518203735 2023-01-24 02:32:25.454028: step: 708/470, loss: 0.8246220350265503 2023-01-24 02:32:26.242537: step: 710/470, loss: 0.5286746621131897 2023-01-24 02:32:26.916641: step: 712/470, loss: 0.4412599802017212 2023-01-24 02:32:27.686144: step: 714/470, loss: 0.7559401988983154 2023-01-24 02:32:28.388107: step: 716/470, loss: 0.6777728796005249 2023-01-24 02:32:29.137238: step: 718/470, loss: 0.5251533389091492 2023-01-24 02:32:29.991721: step: 720/470, loss: 0.5137686729431152 2023-01-24 02:32:30.715824: step: 722/470, loss: 0.2733078598976135 2023-01-24 02:32:31.488994: step: 724/470, loss: 0.5248976349830627 2023-01-24 02:32:32.187433: step: 726/470, loss: 0.9532487988471985 2023-01-24 02:32:32.912396: step: 728/470, loss: 0.1258946657180786 2023-01-24 02:32:33.785936: step: 730/470, loss: 0.9458543658256531 2023-01-24 02:32:34.566361: step: 732/470, loss: 1.3671302795410156 2023-01-24 02:32:35.305694: step: 734/470, loss: 0.15608762204647064 2023-01-24 02:32:36.085885: step: 736/470, loss: 0.7429428100585938 2023-01-24 02:32:36.804338: step: 738/470, loss: 0.2898152768611908 2023-01-24 02:32:37.554195: step: 740/470, loss: 1.8488043546676636 2023-01-24 02:32:38.282309: step: 742/470, loss: 0.16220681369304657 2023-01-24 02:32:39.009366: step: 744/470, loss: 0.5282084345817566 2023-01-24 02:32:39.818842: step: 746/470, loss: 0.7009122371673584 2023-01-24 02:32:40.547040: step: 748/470, loss: 0.4774022698402405 2023-01-24 02:32:41.257063: step: 750/470, loss: 0.8804485201835632 2023-01-24 02:32:42.069431: step: 752/470, loss: 0.2241060435771942 2023-01-24 02:32:42.801231: step: 754/470, loss: 0.3125775158405304 2023-01-24 02:32:43.497229: step: 756/470, loss: 3.320235013961792 2023-01-24 02:32:44.258352: step: 758/470, loss: 0.3262557089328766 2023-01-24 02:32:44.995896: step: 760/470, loss: 0.7048736214637756 2023-01-24 02:32:45.710383: step: 762/470, loss: 0.39851272106170654 2023-01-24 02:32:46.496492: step: 764/470, loss: 0.2439843863248825 2023-01-24 02:32:47.236524: step: 766/470, loss: 0.309922993183136 2023-01-24 02:32:47.909356: step: 768/470, loss: 0.19634529948234558 2023-01-24 02:32:48.653240: step: 770/470, loss: 0.46125590801239014 2023-01-24 02:32:49.452057: step: 772/470, loss: 0.9932135343551636 2023-01-24 02:32:50.205105: step: 774/470, loss: 0.3272228240966797 2023-01-24 02:32:51.010470: step: 776/470, loss: 0.6332528591156006 2023-01-24 02:32:51.773446: step: 778/470, loss: 0.6290199160575867 2023-01-24 02:32:52.529657: step: 780/470, loss: 0.17212772369384766 2023-01-24 02:32:53.334493: step: 782/470, loss: 0.4054405689239502 2023-01-24 02:32:54.033321: step: 784/470, loss: 0.21502184867858887 2023-01-24 02:32:54.827695: step: 786/470, loss: 0.2917923927307129 2023-01-24 02:32:55.612541: step: 788/470, loss: 0.39834240078926086 2023-01-24 02:32:56.352568: step: 790/470, loss: 0.7714516520500183 2023-01-24 02:32:57.054384: step: 792/470, loss: 1.9584661722183228 2023-01-24 02:32:57.809401: step: 794/470, loss: 1.415565013885498 2023-01-24 02:32:58.533669: step: 796/470, loss: 0.8934961557388306 2023-01-24 02:32:59.485964: step: 798/470, loss: 0.19620083272457123 2023-01-24 02:33:00.191148: step: 800/470, loss: 1.1191173791885376 2023-01-24 02:33:00.951734: step: 802/470, loss: 0.15313296020030975 2023-01-24 02:33:01.695237: step: 804/470, loss: 0.5882033109664917 2023-01-24 02:33:02.411703: step: 806/470, loss: 0.44391781091690063 2023-01-24 02:33:03.176284: step: 808/470, loss: 0.5112501382827759 2023-01-24 02:33:03.916864: step: 810/470, loss: 0.6775764226913452 2023-01-24 02:33:04.652418: step: 812/470, loss: 0.6724539399147034 2023-01-24 02:33:05.434480: step: 814/470, loss: 0.7998963594436646 2023-01-24 02:33:06.105109: step: 816/470, loss: 0.40762490034103394 2023-01-24 02:33:06.842490: step: 818/470, loss: 0.32030996680259705 2023-01-24 02:33:07.555792: step: 820/470, loss: 0.15251174569129944 2023-01-24 02:33:08.268208: step: 822/470, loss: 0.2601527273654938 2023-01-24 02:33:09.037998: step: 824/470, loss: 0.21793058514595032 2023-01-24 02:33:09.849033: step: 826/470, loss: 0.7894784808158875 2023-01-24 02:33:10.593233: step: 828/470, loss: 0.30823183059692383 2023-01-24 02:33:11.333479: step: 830/470, loss: 0.25014978647232056 2023-01-24 02:33:12.122248: step: 832/470, loss: 0.3014823794364929 2023-01-24 02:33:12.890203: step: 834/470, loss: 0.16695991158485413 2023-01-24 02:33:13.612845: step: 836/470, loss: 0.4740227162837982 2023-01-24 02:33:14.380837: step: 838/470, loss: 0.6113675236701965 2023-01-24 02:33:15.079591: step: 840/470, loss: 0.5166838765144348 2023-01-24 02:33:15.856083: step: 842/470, loss: 0.8357441425323486 2023-01-24 02:33:16.603704: step: 844/470, loss: 0.07860533893108368 2023-01-24 02:33:17.372642: step: 846/470, loss: 0.90674889087677 2023-01-24 02:33:18.156991: step: 848/470, loss: 0.928524374961853 2023-01-24 02:33:18.962558: step: 850/470, loss: 0.3567863702774048 2023-01-24 02:33:19.717271: step: 852/470, loss: 0.1565251648426056 2023-01-24 02:33:20.452310: step: 854/470, loss: 0.9612409472465515 2023-01-24 02:33:21.207651: step: 856/470, loss: 0.7247118353843689 2023-01-24 02:33:21.936020: step: 858/470, loss: 0.47365325689315796 2023-01-24 02:33:22.646774: step: 860/470, loss: 0.716667652130127 2023-01-24 02:33:23.378427: step: 862/470, loss: 0.808570384979248 2023-01-24 02:33:24.149724: step: 864/470, loss: 0.3732510507106781 2023-01-24 02:33:24.979746: step: 866/470, loss: 0.23475761711597443 2023-01-24 02:33:25.692199: step: 868/470, loss: 0.642833948135376 2023-01-24 02:33:26.391199: step: 870/470, loss: 0.1018008217215538 2023-01-24 02:33:27.120363: step: 872/470, loss: 0.676296591758728 2023-01-24 02:33:27.861440: step: 874/470, loss: 1.0268975496292114 2023-01-24 02:33:28.617077: step: 876/470, loss: 0.5832228064537048 2023-01-24 02:33:29.282926: step: 878/470, loss: 0.17225563526153564 2023-01-24 02:33:30.027310: step: 880/470, loss: 0.3411220908164978 2023-01-24 02:33:30.830371: step: 882/470, loss: 0.9757616519927979 2023-01-24 02:33:31.507457: step: 884/470, loss: 0.26120713353157043 2023-01-24 02:33:32.227136: step: 886/470, loss: 1.29201340675354 2023-01-24 02:33:32.902327: step: 888/470, loss: 0.48865917325019836 2023-01-24 02:33:33.692087: step: 890/470, loss: 0.9293355345726013 2023-01-24 02:33:34.417613: step: 892/470, loss: 0.2673357129096985 2023-01-24 02:33:35.118658: step: 894/470, loss: 0.4621124565601349 2023-01-24 02:33:35.931638: step: 896/470, loss: 0.4767850935459137 2023-01-24 02:33:36.663191: step: 898/470, loss: 0.9976144433021545 2023-01-24 02:33:37.385783: step: 900/470, loss: 0.17807906866073608 2023-01-24 02:33:38.246265: step: 902/470, loss: 0.3325049877166748 2023-01-24 02:33:39.034596: step: 904/470, loss: 0.32338327169418335 2023-01-24 02:33:39.738338: step: 906/470, loss: 0.3096674680709839 2023-01-24 02:33:40.546611: step: 908/470, loss: 1.2738778591156006 2023-01-24 02:33:41.280415: step: 910/470, loss: 0.6537337899208069 2023-01-24 02:33:42.011714: step: 912/470, loss: 0.7893470525741577 2023-01-24 02:33:42.800641: step: 914/470, loss: 0.5990859270095825 2023-01-24 02:33:43.466387: step: 916/470, loss: 0.527148425579071 2023-01-24 02:33:44.245027: step: 918/470, loss: 0.2752528786659241 2023-01-24 02:33:44.912355: step: 920/470, loss: 0.40670332312583923 2023-01-24 02:33:45.664325: step: 922/470, loss: 0.9234625101089478 2023-01-24 02:33:46.446531: step: 924/470, loss: 0.8040624856948853 2023-01-24 02:33:47.188435: step: 926/470, loss: 0.4370003640651703 2023-01-24 02:33:47.902511: step: 928/470, loss: 0.2960697412490845 2023-01-24 02:33:48.639956: step: 930/470, loss: 1.2495319843292236 2023-01-24 02:33:49.416237: step: 932/470, loss: 0.8773002624511719 2023-01-24 02:33:50.222823: step: 934/470, loss: 0.43924665451049805 2023-01-24 02:33:50.989347: step: 936/470, loss: 0.423836350440979 2023-01-24 02:33:51.772967: step: 938/470, loss: 0.36395108699798584 2023-01-24 02:33:52.582197: step: 940/470, loss: 0.4329916536808014 2023-01-24 02:33:53.225608: step: 942/470, loss: 0.15708792209625244 ================================================== Loss: 0.632 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30556972567402146, 'r': 0.3113680126887088, 'f1': 0.3084416215920104}, 'combined': 0.22727277380463923, 'epoch': 7} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3373148248574617, 'r': 0.3191517189035984, 'f1': 0.3279820036163461}, 'combined': 0.21865466907756403, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30950132625512705, 'r': 0.3100886152992544, 'f1': 0.3097946924411508}, 'combined': 0.22826977337769006, 'epoch': 7} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3463740826755645, 'r': 0.325411962398369, 'f1': 0.33556597608390504}, 'combined': 0.22371065072260332, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2894798876436626, 'r': 0.3152968795208014, 'f1': 0.3018373397047454}, 'combined': 0.22240646083507556, 'epoch': 7} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3358336786103416, 'r': 0.33066700663172094, 'f1': 0.333230316760649}, 'combined': 0.2221535445070993, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2717391304347826, 'r': 0.35714285714285715, 'f1': 0.308641975308642}, 'combined': 0.205761316872428, 'epoch': 7} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5625, 'r': 0.4891304347826087, 'f1': 0.5232558139534884}, 'combined': 0.3488372093023256, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3486842105263158, 'r': 0.12023593466424683, 'f1': 0.17881241565452094}, 'combined': 0.11920827710301396, 'epoch': 7} New best chinese model... New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30556972567402146, 'r': 0.3113680126887088, 'f1': 0.3084416215920104}, 'combined': 0.22727277380463923, 'epoch': 7} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3373148248574617, 'r': 0.3191517189035984, 'f1': 0.3279820036163461}, 'combined': 0.21865466907756403, 'epoch': 7} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2717391304347826, 'r': 0.35714285714285715, 'f1': 0.308641975308642}, 'combined': 0.205761316872428, 'epoch': 7} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30950132625512705, 'r': 0.3100886152992544, 'f1': 0.3097946924411508}, 'combined': 0.22826977337769006, 'epoch': 7} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3463740826755645, 'r': 0.325411962398369, 'f1': 0.33556597608390504}, 'combined': 0.22371065072260332, 'epoch': 7} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5625, 'r': 0.4891304347826087, 'f1': 0.5232558139534884}, 'combined': 0.3488372093023256, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32656041388518026, 'r': 0.2652141501761995, 'f1': 0.29270755422587885}, 'combined': 0.2156792504822265, 'epoch': 6} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.34543563262253457, 'r': 0.3039568102615194, 'f1': 0.3233715273196134}, 'combined': 0.21558101821307554, 'epoch': 6} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4583333333333333, 'r': 0.1896551724137931, 'f1': 0.2682926829268293}, 'combined': 0.17886178861788618, 'epoch': 6} ****************************** Epoch: 8 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:36:40.578930: step: 2/470, loss: 0.439403235912323 2023-01-24 02:36:41.363369: step: 4/470, loss: 0.4706476330757141 2023-01-24 02:36:42.118968: step: 6/470, loss: 0.16600431501865387 2023-01-24 02:36:42.841555: step: 8/470, loss: 0.3319374620914459 2023-01-24 02:36:43.514239: step: 10/470, loss: 0.3293440639972687 2023-01-24 02:36:44.260131: step: 12/470, loss: 0.5663127899169922 2023-01-24 02:36:45.007122: step: 14/470, loss: 0.45440736413002014 2023-01-24 02:36:45.795862: step: 16/470, loss: 0.5999109745025635 2023-01-24 02:36:46.528398: step: 18/470, loss: 0.3009253442287445 2023-01-24 02:36:47.184671: step: 20/470, loss: 0.1967972069978714 2023-01-24 02:36:47.901623: step: 22/470, loss: 0.26259857416152954 2023-01-24 02:36:48.613011: step: 24/470, loss: 0.492567241191864 2023-01-24 02:36:49.340600: step: 26/470, loss: 0.24835701286792755 2023-01-24 02:36:50.066408: step: 28/470, loss: 0.11328727006912231 2023-01-24 02:36:50.803024: step: 30/470, loss: 0.38536766171455383 2023-01-24 02:36:51.643835: step: 32/470, loss: 0.14614824950695038 2023-01-24 02:36:52.442145: step: 34/470, loss: 0.8179748058319092 2023-01-24 02:36:53.190320: step: 36/470, loss: 0.6151262521743774 2023-01-24 02:36:53.913666: step: 38/470, loss: 0.11764610558748245 2023-01-24 02:36:54.659164: step: 40/470, loss: 0.6587837338447571 2023-01-24 02:36:55.387404: step: 42/470, loss: 0.168352410197258 2023-01-24 02:36:56.090294: step: 44/470, loss: 0.2563874423503876 2023-01-24 02:36:56.823559: step: 46/470, loss: 0.6100096702575684 2023-01-24 02:36:57.584289: step: 48/470, loss: 1.589992880821228 2023-01-24 02:36:58.396966: step: 50/470, loss: 0.5913103222846985 2023-01-24 02:36:59.119261: step: 52/470, loss: 0.4870699942111969 2023-01-24 02:36:59.908471: step: 54/470, loss: 0.1689032018184662 2023-01-24 02:37:00.662062: step: 56/470, loss: 0.1719590425491333 2023-01-24 02:37:01.447080: step: 58/470, loss: 0.3236994743347168 2023-01-24 02:37:02.230716: step: 60/470, loss: 0.4069594740867615 2023-01-24 02:37:02.905327: step: 62/470, loss: 0.25249919295310974 2023-01-24 02:37:03.680009: step: 64/470, loss: 0.6021767854690552 2023-01-24 02:37:04.375921: step: 66/470, loss: 1.9965308904647827 2023-01-24 02:37:05.096977: step: 68/470, loss: 0.6415644884109497 2023-01-24 02:37:05.809762: step: 70/470, loss: 0.22782424092292786 2023-01-24 02:37:06.568282: step: 72/470, loss: 0.39690855145454407 2023-01-24 02:37:07.331724: step: 74/470, loss: 0.31533282995224 2023-01-24 02:37:08.117140: step: 76/470, loss: 0.1685294210910797 2023-01-24 02:37:08.957398: step: 78/470, loss: 0.2691434323787689 2023-01-24 02:37:09.769188: step: 80/470, loss: 0.16956226527690887 2023-01-24 02:37:10.561037: step: 82/470, loss: 1.0252076387405396 2023-01-24 02:37:11.309599: step: 84/470, loss: 0.39817947149276733 2023-01-24 02:37:12.148882: step: 86/470, loss: 0.23661364614963531 2023-01-24 02:37:12.845104: step: 88/470, loss: 0.4424164593219757 2023-01-24 02:37:13.622078: step: 90/470, loss: 0.2330237776041031 2023-01-24 02:37:14.498327: step: 92/470, loss: 0.5269856452941895 2023-01-24 02:37:15.290255: step: 94/470, loss: 0.7007234692573547 2023-01-24 02:37:15.990936: step: 96/470, loss: 0.29682648181915283 2023-01-24 02:37:16.713665: step: 98/470, loss: 0.23675212264060974 2023-01-24 02:37:17.382829: step: 100/470, loss: 0.3501730263233185 2023-01-24 02:37:18.067038: step: 102/470, loss: 0.1919550895690918 2023-01-24 02:37:18.839774: step: 104/470, loss: 1.0851826667785645 2023-01-24 02:37:19.664149: step: 106/470, loss: 0.4781433343887329 2023-01-24 02:37:20.387466: step: 108/470, loss: 0.1578303575515747 2023-01-24 02:37:21.169786: step: 110/470, loss: 0.9350889325141907 2023-01-24 02:37:21.894143: step: 112/470, loss: 0.3748646378517151 2023-01-24 02:37:22.568277: step: 114/470, loss: 0.16980983316898346 2023-01-24 02:37:23.367459: step: 116/470, loss: 0.30513840913772583 2023-01-24 02:37:24.127735: step: 118/470, loss: 0.15757113695144653 2023-01-24 02:37:24.830812: step: 120/470, loss: 0.23493137955665588 2023-01-24 02:37:25.566183: step: 122/470, loss: 0.06003459915518761 2023-01-24 02:37:26.373995: step: 124/470, loss: 0.7464461326599121 2023-01-24 02:37:27.094618: step: 126/470, loss: 0.4456807076931 2023-01-24 02:37:27.887276: step: 128/470, loss: 0.14154116809368134 2023-01-24 02:37:28.646977: step: 130/470, loss: 0.3404453992843628 2023-01-24 02:37:29.367889: step: 132/470, loss: 0.27857649326324463 2023-01-24 02:37:30.172144: step: 134/470, loss: 0.029560066759586334 2023-01-24 02:37:30.913609: step: 136/470, loss: 0.18978171050548553 2023-01-24 02:37:31.691083: step: 138/470, loss: 0.5333614349365234 2023-01-24 02:37:32.420411: step: 140/470, loss: 0.13952665030956268 2023-01-24 02:37:33.106112: step: 142/470, loss: 1.607400894165039 2023-01-24 02:37:33.848527: step: 144/470, loss: 0.4059104919433594 2023-01-24 02:37:34.557739: step: 146/470, loss: 0.7796083688735962 2023-01-24 02:37:35.333979: step: 148/470, loss: 0.24871180951595306 2023-01-24 02:37:36.248495: step: 150/470, loss: 0.38717377185821533 2023-01-24 02:37:36.977902: step: 152/470, loss: 0.8463844656944275 2023-01-24 02:37:37.806675: step: 154/470, loss: 0.4317239820957184 2023-01-24 02:37:38.520689: step: 156/470, loss: 0.2562013864517212 2023-01-24 02:37:39.259645: step: 158/470, loss: 0.2918889820575714 2023-01-24 02:37:40.029532: step: 160/470, loss: 0.16746020317077637 2023-01-24 02:37:40.775108: step: 162/470, loss: 0.8833320140838623 2023-01-24 02:37:41.513278: step: 164/470, loss: 0.19680127501487732 2023-01-24 02:37:42.206646: step: 166/470, loss: 0.21358007192611694 2023-01-24 02:37:42.995616: step: 168/470, loss: 1.5736429691314697 2023-01-24 02:37:43.806113: step: 170/470, loss: 0.2858242094516754 2023-01-24 02:37:44.475424: step: 172/470, loss: 0.436594158411026 2023-01-24 02:37:45.267443: step: 174/470, loss: 0.25975707173347473 2023-01-24 02:37:46.020493: step: 176/470, loss: 0.5039308071136475 2023-01-24 02:37:46.775109: step: 178/470, loss: 0.5124750137329102 2023-01-24 02:37:47.576645: step: 180/470, loss: 0.3290499746799469 2023-01-24 02:37:48.315335: step: 182/470, loss: 0.3273886740207672 2023-01-24 02:37:49.110541: step: 184/470, loss: 0.18152934312820435 2023-01-24 02:37:49.774938: step: 186/470, loss: 0.14718110859394073 2023-01-24 02:37:50.490602: step: 188/470, loss: 0.18126271665096283 2023-01-24 02:37:51.305257: step: 190/470, loss: 0.34319642186164856 2023-01-24 02:37:51.980664: step: 192/470, loss: 0.179406076669693 2023-01-24 02:37:52.691580: step: 194/470, loss: 0.38715943694114685 2023-01-24 02:37:53.433368: step: 196/470, loss: 0.42315950989723206 2023-01-24 02:37:54.168478: step: 198/470, loss: 0.4406783878803253 2023-01-24 02:37:54.939440: step: 200/470, loss: 1.8089121580123901 2023-01-24 02:37:55.715274: step: 202/470, loss: 0.11779771745204926 2023-01-24 02:37:56.421778: step: 204/470, loss: 0.19911624491214752 2023-01-24 02:37:57.167012: step: 206/470, loss: 0.8379590511322021 2023-01-24 02:37:57.957465: step: 208/470, loss: 0.1292008012533188 2023-01-24 02:37:58.680590: step: 210/470, loss: 1.0208288431167603 2023-01-24 02:37:59.521807: step: 212/470, loss: 0.29888206720352173 2023-01-24 02:38:00.238057: step: 214/470, loss: 0.5137273073196411 2023-01-24 02:38:01.054316: step: 216/470, loss: 0.16628697514533997 2023-01-24 02:38:01.773646: step: 218/470, loss: 1.1978864669799805 2023-01-24 02:38:02.456731: step: 220/470, loss: 0.34227266907691956 2023-01-24 02:38:03.169817: step: 222/470, loss: 0.4677780866622925 2023-01-24 02:38:03.916408: step: 224/470, loss: 0.7787224054336548 2023-01-24 02:38:04.706740: step: 226/470, loss: 0.5353028178215027 2023-01-24 02:38:05.482122: step: 228/470, loss: 0.8215941786766052 2023-01-24 02:38:06.153785: step: 230/470, loss: 0.46585798263549805 2023-01-24 02:38:06.939600: step: 232/470, loss: 0.2040032297372818 2023-01-24 02:38:07.668709: step: 234/470, loss: 0.4652753174304962 2023-01-24 02:38:08.357457: step: 236/470, loss: 0.4290299713611603 2023-01-24 02:38:09.088621: step: 238/470, loss: 0.7905390858650208 2023-01-24 02:38:09.840783: step: 240/470, loss: 0.8647803664207458 2023-01-24 02:38:10.662689: step: 242/470, loss: 0.4174932837486267 2023-01-24 02:38:11.387727: step: 244/470, loss: 0.27988308668136597 2023-01-24 02:38:12.146963: step: 246/470, loss: 0.5162237882614136 2023-01-24 02:38:12.906914: step: 248/470, loss: 0.5198226571083069 2023-01-24 02:38:13.655342: step: 250/470, loss: 0.9243934154510498 2023-01-24 02:38:14.410658: step: 252/470, loss: 0.9011183977127075 2023-01-24 02:38:15.166484: step: 254/470, loss: 0.1754419207572937 2023-01-24 02:38:15.937351: step: 256/470, loss: 0.5413885712623596 2023-01-24 02:38:16.654168: step: 258/470, loss: 0.28291255235671997 2023-01-24 02:38:17.408630: step: 260/470, loss: 0.3509974181652069 2023-01-24 02:38:18.271854: step: 262/470, loss: 0.22708922624588013 2023-01-24 02:38:19.011363: step: 264/470, loss: 0.22111210227012634 2023-01-24 02:38:19.791266: step: 266/470, loss: 0.19584240019321442 2023-01-24 02:38:20.583980: step: 268/470, loss: 0.3218596279621124 2023-01-24 02:38:21.273527: step: 270/470, loss: 0.5958482623100281 2023-01-24 02:38:21.995732: step: 272/470, loss: 0.6783241629600525 2023-01-24 02:38:22.651324: step: 274/470, loss: 0.40668368339538574 2023-01-24 02:38:23.451787: step: 276/470, loss: 0.3130112290382385 2023-01-24 02:38:24.198454: step: 278/470, loss: 0.26188164949417114 2023-01-24 02:38:25.082329: step: 280/470, loss: 0.21814662218093872 2023-01-24 02:38:25.910416: step: 282/470, loss: 0.5404313206672668 2023-01-24 02:38:26.634531: step: 284/470, loss: 0.9255490303039551 2023-01-24 02:38:27.358700: step: 286/470, loss: 1.064405918121338 2023-01-24 02:38:28.196468: step: 288/470, loss: 0.6848692893981934 2023-01-24 02:38:28.905280: step: 290/470, loss: 0.1509696990251541 2023-01-24 02:38:29.655294: step: 292/470, loss: 0.4635472297668457 2023-01-24 02:38:30.447840: step: 294/470, loss: 0.6151084899902344 2023-01-24 02:38:31.189079: step: 296/470, loss: 0.11453315615653992 2023-01-24 02:38:31.968156: step: 298/470, loss: 0.1999867707490921 2023-01-24 02:38:32.744061: step: 300/470, loss: 0.21419981122016907 2023-01-24 02:38:33.567671: step: 302/470, loss: 0.46333491802215576 2023-01-24 02:38:34.406409: step: 304/470, loss: 0.8135701417922974 2023-01-24 02:38:35.138453: step: 306/470, loss: 0.2585707902908325 2023-01-24 02:38:35.993101: step: 308/470, loss: 0.46079540252685547 2023-01-24 02:38:36.769297: step: 310/470, loss: 0.27376070618629456 2023-01-24 02:38:37.527329: step: 312/470, loss: 0.4137362837791443 2023-01-24 02:38:38.225430: step: 314/470, loss: 0.24535976350307465 2023-01-24 02:38:38.989652: step: 316/470, loss: 0.349854052066803 2023-01-24 02:38:39.780499: step: 318/470, loss: 0.4655457139015198 2023-01-24 02:38:40.516789: step: 320/470, loss: 0.2523977756500244 2023-01-24 02:38:41.348636: step: 322/470, loss: 0.4521579146385193 2023-01-24 02:38:42.077771: step: 324/470, loss: 0.4393160045146942 2023-01-24 02:38:42.842198: step: 326/470, loss: 0.8228650689125061 2023-01-24 02:38:43.608338: step: 328/470, loss: 0.4634270966053009 2023-01-24 02:38:44.372321: step: 330/470, loss: 0.6964781284332275 2023-01-24 02:38:45.081392: step: 332/470, loss: 0.45030978322029114 2023-01-24 02:38:45.869151: step: 334/470, loss: 0.3737616539001465 2023-01-24 02:38:46.710246: step: 336/470, loss: 0.8672752976417542 2023-01-24 02:38:47.474642: step: 338/470, loss: 0.14447632431983948 2023-01-24 02:38:48.211517: step: 340/470, loss: 0.5296969413757324 2023-01-24 02:38:48.964460: step: 342/470, loss: 0.7490493655204773 2023-01-24 02:38:49.792086: step: 344/470, loss: 0.28861531615257263 2023-01-24 02:38:50.607719: step: 346/470, loss: 0.5648653507232666 2023-01-24 02:38:51.415599: step: 348/470, loss: 0.964550256729126 2023-01-24 02:38:52.110587: step: 350/470, loss: 0.8278896808624268 2023-01-24 02:38:52.792721: step: 352/470, loss: 0.17448605597019196 2023-01-24 02:38:53.542672: step: 354/470, loss: 0.6799153089523315 2023-01-24 02:38:54.221954: step: 356/470, loss: 0.4022669792175293 2023-01-24 02:38:54.977912: step: 358/470, loss: 0.2065814882516861 2023-01-24 02:38:55.802927: step: 360/470, loss: 0.3100614845752716 2023-01-24 02:38:56.594718: step: 362/470, loss: 0.3368839919567108 2023-01-24 02:38:57.257269: step: 364/470, loss: 1.1598883867263794 2023-01-24 02:38:58.026116: step: 366/470, loss: 0.31141793727874756 2023-01-24 02:38:58.781330: step: 368/470, loss: 0.23780982196331024 2023-01-24 02:38:59.547004: step: 370/470, loss: 0.22582471370697021 2023-01-24 02:39:00.373760: step: 372/470, loss: 0.803047239780426 2023-01-24 02:39:01.179090: step: 374/470, loss: 0.3405497670173645 2023-01-24 02:39:01.949103: step: 376/470, loss: 0.7714990973472595 2023-01-24 02:39:02.711394: step: 378/470, loss: 0.1867769956588745 2023-01-24 02:39:03.476321: step: 380/470, loss: 0.3200242519378662 2023-01-24 02:39:04.222306: step: 382/470, loss: 0.16745364665985107 2023-01-24 02:39:04.888757: step: 384/470, loss: 0.09259352087974548 2023-01-24 02:39:05.619472: step: 386/470, loss: 0.4721934497356415 2023-01-24 02:39:06.325671: step: 388/470, loss: 0.26353833079338074 2023-01-24 02:39:07.150031: step: 390/470, loss: 0.2055242657661438 2023-01-24 02:39:07.993095: step: 392/470, loss: 0.9652345180511475 2023-01-24 02:39:08.764298: step: 394/470, loss: 0.3389044404029846 2023-01-24 02:39:09.428106: step: 396/470, loss: 0.24200768768787384 2023-01-24 02:39:10.161156: step: 398/470, loss: 0.17797183990478516 2023-01-24 02:39:10.913907: step: 400/470, loss: 0.15373848378658295 2023-01-24 02:39:11.651537: step: 402/470, loss: 0.7525474429130554 2023-01-24 02:39:12.392047: step: 404/470, loss: 0.8085270524024963 2023-01-24 02:39:13.102574: step: 406/470, loss: 0.13830354809761047 2023-01-24 02:39:13.916133: step: 408/470, loss: 0.14868831634521484 2023-01-24 02:39:14.683489: step: 410/470, loss: 0.14518840610980988 2023-01-24 02:39:15.630206: step: 412/470, loss: 0.4489766061306 2023-01-24 02:39:16.378578: step: 414/470, loss: 0.24416552484035492 2023-01-24 02:39:17.096905: step: 416/470, loss: 0.2164216786623001 2023-01-24 02:39:17.812596: step: 418/470, loss: 0.0824371725320816 2023-01-24 02:39:18.651412: step: 420/470, loss: 0.09185123443603516 2023-01-24 02:39:19.466753: step: 422/470, loss: 0.3424897789955139 2023-01-24 02:39:20.187821: step: 424/470, loss: 0.3741994798183441 2023-01-24 02:39:20.922678: step: 426/470, loss: 0.14311550557613373 2023-01-24 02:39:21.625234: step: 428/470, loss: 0.22634892165660858 2023-01-24 02:39:22.384914: step: 430/470, loss: 0.2907640337944031 2023-01-24 02:39:23.141421: step: 432/470, loss: 0.1375235915184021 2023-01-24 02:39:23.830272: step: 434/470, loss: 0.27444496750831604 2023-01-24 02:39:24.514575: step: 436/470, loss: 0.18194928765296936 2023-01-24 02:39:25.245602: step: 438/470, loss: 0.3027937412261963 2023-01-24 02:39:25.972763: step: 440/470, loss: 0.22203978896141052 2023-01-24 02:39:26.743103: step: 442/470, loss: 0.40211665630340576 2023-01-24 02:39:27.498392: step: 444/470, loss: 0.9534308910369873 2023-01-24 02:39:28.349131: step: 446/470, loss: 0.3148941993713379 2023-01-24 02:39:29.092544: step: 448/470, loss: 0.25301098823547363 2023-01-24 02:39:29.807106: step: 450/470, loss: 0.283833384513855 2023-01-24 02:39:30.622434: step: 452/470, loss: 0.33523184061050415 2023-01-24 02:39:31.428958: step: 454/470, loss: 0.15356293320655823 2023-01-24 02:39:32.095472: step: 456/470, loss: 0.5051017999649048 2023-01-24 02:39:32.883388: step: 458/470, loss: 0.4202573895454407 2023-01-24 02:39:33.606227: step: 460/470, loss: 0.12335009127855301 2023-01-24 02:39:34.341938: step: 462/470, loss: 0.4301653504371643 2023-01-24 02:39:35.026008: step: 464/470, loss: 1.2519079446792603 2023-01-24 02:39:35.774747: step: 466/470, loss: 0.2109188437461853 2023-01-24 02:39:36.549432: step: 468/470, loss: 0.3727503716945648 2023-01-24 02:39:37.321473: step: 470/470, loss: 0.46644580364227295 2023-01-24 02:39:38.172135: step: 472/470, loss: 0.8295896649360657 2023-01-24 02:39:38.866770: step: 474/470, loss: 0.5774760246276855 2023-01-24 02:39:39.598499: step: 476/470, loss: 0.9126483201980591 2023-01-24 02:39:40.326862: step: 478/470, loss: 0.31328561902046204 2023-01-24 02:39:41.055074: step: 480/470, loss: 0.22493775188922882 2023-01-24 02:39:41.817548: step: 482/470, loss: 0.36869755387306213 2023-01-24 02:39:42.624248: step: 484/470, loss: 0.2161940336227417 2023-01-24 02:39:43.318744: step: 486/470, loss: 0.6317044496536255 2023-01-24 02:39:44.063188: step: 488/470, loss: 0.5619537234306335 2023-01-24 02:39:44.833240: step: 490/470, loss: 1.1349985599517822 2023-01-24 02:39:45.648696: step: 492/470, loss: 0.09379585087299347 2023-01-24 02:39:46.477215: step: 494/470, loss: 0.9672868847846985 2023-01-24 02:39:47.228815: step: 496/470, loss: 1.1263362169265747 2023-01-24 02:39:48.000511: step: 498/470, loss: 0.2689302861690521 2023-01-24 02:39:48.799026: step: 500/470, loss: 0.3239392042160034 2023-01-24 02:39:49.591416: step: 502/470, loss: 0.2518550157546997 2023-01-24 02:39:50.311088: step: 504/470, loss: 0.6391338109970093 2023-01-24 02:39:51.069355: step: 506/470, loss: 0.3393190801143646 2023-01-24 02:39:51.788502: step: 508/470, loss: 0.6354091763496399 2023-01-24 02:39:52.539394: step: 510/470, loss: 0.26952916383743286 2023-01-24 02:39:53.285889: step: 512/470, loss: 0.839598536491394 2023-01-24 02:39:54.070869: step: 514/470, loss: 0.41435518860816956 2023-01-24 02:39:54.854906: step: 516/470, loss: 0.5042211413383484 2023-01-24 02:39:55.638508: step: 518/470, loss: 1.5641471147537231 2023-01-24 02:39:56.383429: step: 520/470, loss: 0.34855765104293823 2023-01-24 02:39:57.135302: step: 522/470, loss: 0.521365761756897 2023-01-24 02:39:57.961879: step: 524/470, loss: 0.33301544189453125 2023-01-24 02:39:58.619773: step: 526/470, loss: 0.5081035494804382 2023-01-24 02:39:59.346724: step: 528/470, loss: 0.20576249063014984 2023-01-24 02:40:00.042600: step: 530/470, loss: 0.10231142491102219 2023-01-24 02:40:00.801053: step: 532/470, loss: 0.46384578943252563 2023-01-24 02:40:01.551838: step: 534/470, loss: 4.53569221496582 2023-01-24 02:40:02.323433: step: 536/470, loss: 0.2767690420150757 2023-01-24 02:40:03.116743: step: 538/470, loss: 0.24536767601966858 2023-01-24 02:40:03.846921: step: 540/470, loss: 0.5284467935562134 2023-01-24 02:40:04.697622: step: 542/470, loss: 1.4372402429580688 2023-01-24 02:40:05.470856: step: 544/470, loss: 0.2401868849992752 2023-01-24 02:40:06.267753: step: 546/470, loss: 0.6595420837402344 2023-01-24 02:40:07.032799: step: 548/470, loss: 0.8081770539283752 2023-01-24 02:40:07.749614: step: 550/470, loss: 0.17885182797908783 2023-01-24 02:40:08.560395: step: 552/470, loss: 0.21516308188438416 2023-01-24 02:40:09.280206: step: 554/470, loss: 0.5545898675918579 2023-01-24 02:40:09.945487: step: 556/470, loss: 0.20706087350845337 2023-01-24 02:40:10.724860: step: 558/470, loss: 0.13563042879104614 2023-01-24 02:40:11.430653: step: 560/470, loss: 0.2453116625547409 2023-01-24 02:40:12.169818: step: 562/470, loss: 0.7842438817024231 2023-01-24 02:40:13.029198: step: 564/470, loss: 0.2593076229095459 2023-01-24 02:40:13.785463: step: 566/470, loss: 0.12692120671272278 2023-01-24 02:40:14.461818: step: 568/470, loss: 0.4532640874385834 2023-01-24 02:40:15.193607: step: 570/470, loss: 0.3847683072090149 2023-01-24 02:40:16.023682: step: 572/470, loss: 0.19840320944786072 2023-01-24 02:40:16.753848: step: 574/470, loss: 0.15931807458400726 2023-01-24 02:40:17.520308: step: 576/470, loss: 0.2908623516559601 2023-01-24 02:40:18.233482: step: 578/470, loss: 0.6550799608230591 2023-01-24 02:40:18.956454: step: 580/470, loss: 0.6560707688331604 2023-01-24 02:40:19.716145: step: 582/470, loss: 0.41067826747894287 2023-01-24 02:40:20.479081: step: 584/470, loss: 0.11992362141609192 2023-01-24 02:40:21.178932: step: 586/470, loss: 0.460469126701355 2023-01-24 02:40:21.994979: step: 588/470, loss: 0.8359988331794739 2023-01-24 02:40:22.736727: step: 590/470, loss: 0.36444351077079773 2023-01-24 02:40:23.521525: step: 592/470, loss: 0.2154173105955124 2023-01-24 02:40:24.267749: step: 594/470, loss: 0.5687950849533081 2023-01-24 02:40:24.991701: step: 596/470, loss: 0.9395169615745544 2023-01-24 02:40:25.718209: step: 598/470, loss: 0.49935394525527954 2023-01-24 02:40:26.467020: step: 600/470, loss: 0.49853333830833435 2023-01-24 02:40:27.171745: step: 602/470, loss: 1.7896212339401245 2023-01-24 02:40:27.879790: step: 604/470, loss: 0.1358480155467987 2023-01-24 02:40:28.616814: step: 606/470, loss: 0.7760083079338074 2023-01-24 02:40:29.311992: step: 608/470, loss: 0.3000049293041229 2023-01-24 02:40:30.038187: step: 610/470, loss: 0.4000217616558075 2023-01-24 02:40:30.768412: step: 612/470, loss: 0.3303145170211792 2023-01-24 02:40:31.540814: step: 614/470, loss: 0.517999529838562 2023-01-24 02:40:32.287914: step: 616/470, loss: 0.37297362089157104 2023-01-24 02:40:33.052746: step: 618/470, loss: 0.2246372401714325 2023-01-24 02:40:33.799408: step: 620/470, loss: 0.9546456336975098 2023-01-24 02:40:34.543942: step: 622/470, loss: 0.9173968434333801 2023-01-24 02:40:35.309191: step: 624/470, loss: 0.35250478982925415 2023-01-24 02:40:36.161869: step: 626/470, loss: 0.8878437280654907 2023-01-24 02:40:36.828945: step: 628/470, loss: 0.49247634410858154 2023-01-24 02:40:37.537187: step: 630/470, loss: 0.3329099416732788 2023-01-24 02:40:38.309295: step: 632/470, loss: 1.0935202836990356 2023-01-24 02:40:39.043989: step: 634/470, loss: 0.2755989730358124 2023-01-24 02:40:39.729319: step: 636/470, loss: 0.6668111085891724 2023-01-24 02:40:40.472631: step: 638/470, loss: 0.24469828605651855 2023-01-24 02:40:41.244235: step: 640/470, loss: 0.22104227542877197 2023-01-24 02:40:41.982455: step: 642/470, loss: 0.16912305355072021 2023-01-24 02:40:42.683138: step: 644/470, loss: 0.10374326258897781 2023-01-24 02:40:43.406370: step: 646/470, loss: 0.20086267590522766 2023-01-24 02:40:44.202709: step: 648/470, loss: 0.1727813482284546 2023-01-24 02:40:44.984203: step: 650/470, loss: 0.13764482736587524 2023-01-24 02:40:45.742363: step: 652/470, loss: 0.3168490529060364 2023-01-24 02:40:46.568243: step: 654/470, loss: 0.5362805128097534 2023-01-24 02:40:47.269830: step: 656/470, loss: 0.40005090832710266 2023-01-24 02:40:47.994917: step: 658/470, loss: 2.0221118927001953 2023-01-24 02:40:48.741291: step: 660/470, loss: 0.46189936995506287 2023-01-24 02:40:49.456616: step: 662/470, loss: 0.6272675395011902 2023-01-24 02:40:50.223789: step: 664/470, loss: 0.44351091980934143 2023-01-24 02:40:50.990745: step: 666/470, loss: 0.31063738465309143 2023-01-24 02:40:51.770186: step: 668/470, loss: 0.41334736347198486 2023-01-24 02:40:52.501593: step: 670/470, loss: 0.10286879539489746 2023-01-24 02:40:53.256343: step: 672/470, loss: 0.5979005098342896 2023-01-24 02:40:53.976482: step: 674/470, loss: 4.1260666847229 2023-01-24 02:40:54.766917: step: 676/470, loss: 0.8004294633865356 2023-01-24 02:40:55.485628: step: 678/470, loss: 0.24940814077854156 2023-01-24 02:40:56.214012: step: 680/470, loss: 0.16425098478794098 2023-01-24 02:40:56.927617: step: 682/470, loss: 0.671233594417572 2023-01-24 02:40:57.687129: step: 684/470, loss: 0.15050573647022247 2023-01-24 02:40:58.502083: step: 686/470, loss: 0.36023542284965515 2023-01-24 02:40:59.254264: step: 688/470, loss: 1.1040581464767456 2023-01-24 02:41:00.019545: step: 690/470, loss: 0.3097226321697235 2023-01-24 02:41:00.728480: step: 692/470, loss: 0.27629736065864563 2023-01-24 02:41:01.424749: step: 694/470, loss: 2.556936264038086 2023-01-24 02:41:02.198175: step: 696/470, loss: 0.2204158753156662 2023-01-24 02:41:02.946938: step: 698/470, loss: 0.7285110950469971 2023-01-24 02:41:03.763668: step: 700/470, loss: 4.453820705413818 2023-01-24 02:41:04.529387: step: 702/470, loss: 0.6035624742507935 2023-01-24 02:41:05.287430: step: 704/470, loss: 0.21447139978408813 2023-01-24 02:41:05.995304: step: 706/470, loss: 0.23186089098453522 2023-01-24 02:41:06.838193: step: 708/470, loss: 0.40758103132247925 2023-01-24 02:41:07.613654: step: 710/470, loss: 0.13845710456371307 2023-01-24 02:41:08.441749: step: 712/470, loss: 3.9527015686035156 2023-01-24 02:41:09.195209: step: 714/470, loss: 0.26543423533439636 2023-01-24 02:41:09.927284: step: 716/470, loss: 0.45242205262184143 2023-01-24 02:41:10.646096: step: 718/470, loss: 2.2168221473693848 2023-01-24 02:41:11.438402: step: 720/470, loss: 0.22453352808952332 2023-01-24 02:41:12.165980: step: 722/470, loss: 0.5967671871185303 2023-01-24 02:41:12.944197: step: 724/470, loss: 0.45324817299842834 2023-01-24 02:41:13.723726: step: 726/470, loss: 1.0313066244125366 2023-01-24 02:41:14.461926: step: 728/470, loss: 0.11269115656614304 2023-01-24 02:41:15.307888: step: 730/470, loss: 0.49560102820396423 2023-01-24 02:41:16.101640: step: 732/470, loss: 0.2794017791748047 2023-01-24 02:41:16.794067: step: 734/470, loss: 0.6522477865219116 2023-01-24 02:41:17.519656: step: 736/470, loss: 0.28647884726524353 2023-01-24 02:41:18.234483: step: 738/470, loss: 0.20137378573417664 2023-01-24 02:41:18.999853: step: 740/470, loss: 0.6079099178314209 2023-01-24 02:41:19.768761: step: 742/470, loss: 0.7093207240104675 2023-01-24 02:41:20.621466: step: 744/470, loss: 0.21907228231430054 2023-01-24 02:41:21.388208: step: 746/470, loss: 0.131643146276474 2023-01-24 02:41:22.205873: step: 748/470, loss: 0.4653341472148895 2023-01-24 02:41:22.934802: step: 750/470, loss: 5.116756916046143 2023-01-24 02:41:23.690305: step: 752/470, loss: 0.3567095100879669 2023-01-24 02:41:24.462095: step: 754/470, loss: 0.18065395951271057 2023-01-24 02:41:25.166049: step: 756/470, loss: 0.2523786723613739 2023-01-24 02:41:25.930574: step: 758/470, loss: 0.9525728225708008 2023-01-24 02:41:26.659747: step: 760/470, loss: 0.30923041701316833 2023-01-24 02:41:27.485191: step: 762/470, loss: 0.32964733242988586 2023-01-24 02:41:28.293747: step: 764/470, loss: 0.2381993532180786 2023-01-24 02:41:29.031559: step: 766/470, loss: 0.18978539109230042 2023-01-24 02:41:29.819676: step: 768/470, loss: 0.7485260963439941 2023-01-24 02:41:30.557265: step: 770/470, loss: 0.25674277544021606 2023-01-24 02:41:31.346778: step: 772/470, loss: 0.22901327908039093 2023-01-24 02:41:32.064398: step: 774/470, loss: 0.2624027729034424 2023-01-24 02:41:32.849520: step: 776/470, loss: 0.14591439068317413 2023-01-24 02:41:33.683630: step: 778/470, loss: 1.4718310832977295 2023-01-24 02:41:34.451467: step: 780/470, loss: 0.26930221915245056 2023-01-24 02:41:35.211502: step: 782/470, loss: 0.7424861192703247 2023-01-24 02:41:35.934741: step: 784/470, loss: 0.29697778820991516 2023-01-24 02:41:36.656133: step: 786/470, loss: 1.1480909585952759 2023-01-24 02:41:37.358099: step: 788/470, loss: 0.8097434639930725 2023-01-24 02:41:38.105121: step: 790/470, loss: 0.5420349836349487 2023-01-24 02:41:38.810910: step: 792/470, loss: 0.21315941214561462 2023-01-24 02:41:39.536312: step: 794/470, loss: 0.2593609094619751 2023-01-24 02:41:40.244392: step: 796/470, loss: 0.21246762573719025 2023-01-24 02:41:41.007381: step: 798/470, loss: 0.45392927527427673 2023-01-24 02:41:41.771073: step: 800/470, loss: 0.4097192883491516 2023-01-24 02:41:42.585705: step: 802/470, loss: 0.9928512573242188 2023-01-24 02:41:43.316973: step: 804/470, loss: 0.5397576689720154 2023-01-24 02:41:44.038147: step: 806/470, loss: 0.0932132676243782 2023-01-24 02:41:44.796495: step: 808/470, loss: 0.43611350655555725 2023-01-24 02:41:45.563858: step: 810/470, loss: 0.5918594598770142 2023-01-24 02:41:46.338242: step: 812/470, loss: 0.23620811104774475 2023-01-24 02:41:47.174160: step: 814/470, loss: 0.5560216307640076 2023-01-24 02:41:47.957813: step: 816/470, loss: 0.598693311214447 2023-01-24 02:41:48.746169: step: 818/470, loss: 0.40362799167633057 2023-01-24 02:41:49.573379: step: 820/470, loss: 0.23154190182685852 2023-01-24 02:41:50.292031: step: 822/470, loss: 0.5826197266578674 2023-01-24 02:41:50.990031: step: 824/470, loss: 0.2394208461046219 2023-01-24 02:41:51.676208: step: 826/470, loss: 0.17906901240348816 2023-01-24 02:41:52.503965: step: 828/470, loss: 0.372158408164978 2023-01-24 02:41:53.227320: step: 830/470, loss: 1.067903757095337 2023-01-24 02:41:53.992282: step: 832/470, loss: 0.1843709796667099 2023-01-24 02:41:54.778719: step: 834/470, loss: 0.2733551263809204 2023-01-24 02:41:55.679782: step: 836/470, loss: 1.0208349227905273 2023-01-24 02:41:56.403240: step: 838/470, loss: 0.6295246481895447 2023-01-24 02:41:57.116428: step: 840/470, loss: 0.357212096452713 2023-01-24 02:41:57.870093: step: 842/470, loss: 0.20953968167304993 2023-01-24 02:41:58.653388: step: 844/470, loss: 0.630314290523529 2023-01-24 02:41:59.504030: step: 846/470, loss: 1.2299977540969849 2023-01-24 02:42:00.267357: step: 848/470, loss: 0.599105179309845 2023-01-24 02:42:01.130904: step: 850/470, loss: 0.8288000226020813 2023-01-24 02:42:01.908855: step: 852/470, loss: 0.1812320500612259 2023-01-24 02:42:02.686197: step: 854/470, loss: 0.23691441118717194 2023-01-24 02:42:03.371965: step: 856/470, loss: 0.3210389316082001 2023-01-24 02:42:04.050850: step: 858/470, loss: 0.5429426431655884 2023-01-24 02:42:04.809584: step: 860/470, loss: 0.13180933892726898 2023-01-24 02:42:05.520805: step: 862/470, loss: 1.8928117752075195 2023-01-24 02:42:06.248925: step: 864/470, loss: 0.7599416971206665 2023-01-24 02:42:06.994076: step: 866/470, loss: 0.5055848360061646 2023-01-24 02:42:07.727613: step: 868/470, loss: 0.1821976751089096 2023-01-24 02:42:08.402145: step: 870/470, loss: 1.22907555103302 2023-01-24 02:42:09.196797: step: 872/470, loss: 0.3916356861591339 2023-01-24 02:42:09.869027: step: 874/470, loss: 0.26299530267715454 2023-01-24 02:42:10.596751: step: 876/470, loss: 0.25578999519348145 2023-01-24 02:42:11.371049: step: 878/470, loss: 0.37457263469696045 2023-01-24 02:42:12.058469: step: 880/470, loss: 0.3378101885318756 2023-01-24 02:42:12.716824: step: 882/470, loss: 1.6557979583740234 2023-01-24 02:42:13.495437: step: 884/470, loss: 0.39262452721595764 2023-01-24 02:42:14.264813: step: 886/470, loss: 0.4881884753704071 2023-01-24 02:42:14.965686: step: 888/470, loss: 0.19406718015670776 2023-01-24 02:42:15.809801: step: 890/470, loss: 0.2705124318599701 2023-01-24 02:42:16.534704: step: 892/470, loss: 0.34521085023880005 2023-01-24 02:42:17.301478: step: 894/470, loss: 0.45954829454421997 2023-01-24 02:42:18.060086: step: 896/470, loss: 0.14961890876293182 2023-01-24 02:42:18.817986: step: 898/470, loss: 1.0215060710906982 2023-01-24 02:42:19.613612: step: 900/470, loss: 1.122787594795227 2023-01-24 02:42:20.376180: step: 902/470, loss: 0.2572832703590393 2023-01-24 02:42:21.241580: step: 904/470, loss: 0.19345666468143463 2023-01-24 02:42:22.016724: step: 906/470, loss: 0.4549930989742279 2023-01-24 02:42:22.751402: step: 908/470, loss: 0.32068750262260437 2023-01-24 02:42:23.495052: step: 910/470, loss: 0.5561640858650208 2023-01-24 02:42:24.493959: step: 912/470, loss: 0.2232218235731125 2023-01-24 02:42:25.263059: step: 914/470, loss: 0.2716374099254608 2023-01-24 02:42:25.969420: step: 916/470, loss: 0.1163991242647171 2023-01-24 02:42:26.809569: step: 918/470, loss: 0.3325546085834503 2023-01-24 02:42:27.487296: step: 920/470, loss: 0.5324735641479492 2023-01-24 02:42:28.205708: step: 922/470, loss: 0.1445450484752655 2023-01-24 02:42:29.012423: step: 924/470, loss: 0.61869215965271 2023-01-24 02:42:29.745655: step: 926/470, loss: 0.2812950611114502 2023-01-24 02:42:30.567097: step: 928/470, loss: 0.8153495192527771 2023-01-24 02:42:31.300587: step: 930/470, loss: 0.4129032492637634 2023-01-24 02:42:32.084006: step: 932/470, loss: 0.3423391282558441 2023-01-24 02:42:32.925014: step: 934/470, loss: 1.508226752281189 2023-01-24 02:42:33.723439: step: 936/470, loss: 0.32150372862815857 2023-01-24 02:42:34.468591: step: 938/470, loss: 0.3547199070453644 2023-01-24 02:42:35.224209: step: 940/470, loss: 0.3670746386051178 2023-01-24 02:42:35.916560: step: 942/470, loss: 0.5506512522697449 ================================================== Loss: 0.508 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34562181790955376, 'r': 0.31283037408511793, 'f1': 0.3284095759817871}, 'combined': 0.241986003355001, 'epoch': 8} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.34933396859747257, 'r': 0.33220316821432727, 'f1': 0.3405532724917697}, 'combined': 0.22703551499451308, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32730593587400786, 'r': 0.30432620223579476, 'f1': 0.31539805030140383}, 'combined': 0.23239856337998177, 'epoch': 8} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3589521176709698, 'r': 0.33515990237865767, 'f1': 0.3466482447850796}, 'combined': 0.23109882985671967, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3186660603452772, 'r': 0.30113035683481604, 'f1': 0.3096501425403864}, 'combined': 0.22816326292449524, 'epoch': 8} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.34992977964036415, 'r': 0.3438791206264097, 'f1': 0.3468780664458261}, 'combined': 0.23125204429721732, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2916666666666667, 'r': 0.25, 'f1': 0.2692307692307692}, 'combined': 0.17948717948717946, 'epoch': 8} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5666666666666667, 'r': 0.3695652173913043, 'f1': 0.4473684210526315}, 'combined': 0.29824561403508765, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45454545454545453, 'r': 0.1724137931034483, 'f1': 0.25000000000000006}, 'combined': 0.16666666666666669, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30556972567402146, 'r': 0.3113680126887088, 'f1': 0.3084416215920104}, 'combined': 0.22727277380463923, 'epoch': 7} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3373148248574617, 'r': 0.3191517189035984, 'f1': 0.3279820036163461}, 'combined': 0.21865466907756403, 'epoch': 7} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2717391304347826, 'r': 0.35714285714285715, 'f1': 0.308641975308642}, 'combined': 0.205761316872428, 'epoch': 7} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30950132625512705, 'r': 0.3100886152992544, 'f1': 0.3097946924411508}, 'combined': 0.22826977337769006, 'epoch': 7} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3463740826755645, 'r': 0.325411962398369, 'f1': 0.33556597608390504}, 'combined': 0.22371065072260332, 'epoch': 7} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5625, 'r': 0.4891304347826087, 'f1': 0.5232558139534884}, 'combined': 0.3488372093023256, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32656041388518026, 'r': 0.2652141501761995, 'f1': 0.29270755422587885}, 'combined': 0.2156792504822265, 'epoch': 6} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.34543563262253457, 'r': 0.3039568102615194, 'f1': 0.3233715273196134}, 'combined': 0.21558101821307554, 'epoch': 6} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4583333333333333, 'r': 0.1896551724137931, 'f1': 0.2682926829268293}, 'combined': 0.17886178861788618, 'epoch': 6} ****************************** Epoch: 9 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:45:12.835546: step: 2/470, loss: 0.5447205901145935 2023-01-24 02:45:13.582560: step: 4/470, loss: 0.22318868339061737 2023-01-24 02:45:14.299614: step: 6/470, loss: 0.3136242926120758 2023-01-24 02:45:15.032127: step: 8/470, loss: 0.28907865285873413 2023-01-24 02:45:15.810985: step: 10/470, loss: 0.2637608051300049 2023-01-24 02:45:16.600419: step: 12/470, loss: 0.5738874077796936 2023-01-24 02:45:17.339865: step: 14/470, loss: 1.0422797203063965 2023-01-24 02:45:18.137251: step: 16/470, loss: 0.602383553981781 2023-01-24 02:45:18.908168: step: 18/470, loss: 0.08700596541166306 2023-01-24 02:45:19.740590: step: 20/470, loss: 0.6579717993736267 2023-01-24 02:45:20.581505: step: 22/470, loss: 0.17143812775611877 2023-01-24 02:45:21.367471: step: 24/470, loss: 0.21379876136779785 2023-01-24 02:45:22.196410: step: 26/470, loss: 0.6119391322135925 2023-01-24 02:45:22.971775: step: 28/470, loss: 0.23522713780403137 2023-01-24 02:45:23.707572: step: 30/470, loss: 0.5445173978805542 2023-01-24 02:45:24.463116: step: 32/470, loss: 3.121372938156128 2023-01-24 02:45:25.190378: step: 34/470, loss: 0.2587936520576477 2023-01-24 02:45:25.961597: step: 36/470, loss: 0.5228575468063354 2023-01-24 02:45:26.817047: step: 38/470, loss: 0.3780156075954437 2023-01-24 02:45:27.522985: step: 40/470, loss: 0.4384208023548126 2023-01-24 02:45:28.335696: step: 42/470, loss: 0.2887096107006073 2023-01-24 02:45:29.082703: step: 44/470, loss: 0.2585183382034302 2023-01-24 02:45:29.835728: step: 46/470, loss: 0.2520710825920105 2023-01-24 02:45:30.524377: step: 48/470, loss: 0.7488427758216858 2023-01-24 02:45:31.258630: step: 50/470, loss: 0.10620920360088348 2023-01-24 02:45:32.088875: step: 52/470, loss: 0.14906106889247894 2023-01-24 02:45:32.823070: step: 54/470, loss: 0.6837480068206787 2023-01-24 02:45:33.645100: step: 56/470, loss: 0.15488582849502563 2023-01-24 02:45:34.362635: step: 58/470, loss: 0.9800782203674316 2023-01-24 02:45:35.122023: step: 60/470, loss: 0.2453881949186325 2023-01-24 02:45:35.839805: step: 62/470, loss: 0.25628599524497986 2023-01-24 02:45:36.599139: step: 64/470, loss: 0.5019969940185547 2023-01-24 02:45:37.280664: step: 66/470, loss: 0.15023790299892426 2023-01-24 02:45:38.098977: step: 68/470, loss: 0.39702898263931274 2023-01-24 02:45:38.834874: step: 70/470, loss: 0.5472665429115295 2023-01-24 02:45:39.531705: step: 72/470, loss: 0.5863292217254639 2023-01-24 02:45:40.264543: step: 74/470, loss: 0.13345842063426971 2023-01-24 02:45:41.142116: step: 76/470, loss: 0.1856902539730072 2023-01-24 02:45:41.885395: step: 78/470, loss: 0.19066360592842102 2023-01-24 02:45:42.626969: step: 80/470, loss: 0.2122010886669159 2023-01-24 02:45:43.390677: step: 82/470, loss: 0.49828097224235535 2023-01-24 02:45:44.072072: step: 84/470, loss: 0.15491345524787903 2023-01-24 02:45:44.844394: step: 86/470, loss: 0.1964101791381836 2023-01-24 02:45:45.568984: step: 88/470, loss: 0.1925598680973053 2023-01-24 02:45:46.310060: step: 90/470, loss: 0.5155028700828552 2023-01-24 02:45:46.982409: step: 92/470, loss: 0.21076497435569763 2023-01-24 02:45:47.700922: step: 94/470, loss: 0.18973298370838165 2023-01-24 02:45:48.491426: step: 96/470, loss: 0.5644153356552124 2023-01-24 02:45:49.285630: step: 98/470, loss: 0.10288572311401367 2023-01-24 02:45:49.993407: step: 100/470, loss: 0.07585206627845764 2023-01-24 02:45:50.736696: step: 102/470, loss: 0.3527693748474121 2023-01-24 02:45:51.484163: step: 104/470, loss: 0.14347948133945465 2023-01-24 02:45:52.307099: step: 106/470, loss: 0.36973798274993896 2023-01-24 02:45:53.009537: step: 108/470, loss: 0.38219624757766724 2023-01-24 02:45:53.710280: step: 110/470, loss: 0.744696319103241 2023-01-24 02:45:54.470029: step: 112/470, loss: 0.36855944991111755 2023-01-24 02:45:55.301985: step: 114/470, loss: 0.1453740894794464 2023-01-24 02:45:56.067046: step: 116/470, loss: 1.321637511253357 2023-01-24 02:45:56.814841: step: 118/470, loss: 0.9926891922950745 2023-01-24 02:45:57.506995: step: 120/470, loss: 0.15449316799640656 2023-01-24 02:45:58.292144: step: 122/470, loss: 0.14840567111968994 2023-01-24 02:45:59.028920: step: 124/470, loss: 0.18444156646728516 2023-01-24 02:45:59.747323: step: 126/470, loss: 0.3966729938983917 2023-01-24 02:46:00.457720: step: 128/470, loss: 1.7696688175201416 2023-01-24 02:46:01.156275: step: 130/470, loss: 0.30441614985466003 2023-01-24 02:46:01.898218: step: 132/470, loss: 0.17320138216018677 2023-01-24 02:46:02.606843: step: 134/470, loss: 0.2448633313179016 2023-01-24 02:46:03.406034: step: 136/470, loss: 0.2567095458507538 2023-01-24 02:46:04.219363: step: 138/470, loss: 0.04755272716283798 2023-01-24 02:46:04.895781: step: 140/470, loss: 0.15079620480537415 2023-01-24 02:46:05.669335: step: 142/470, loss: 0.5248032212257385 2023-01-24 02:46:06.388895: step: 144/470, loss: 0.3448765277862549 2023-01-24 02:46:07.149719: step: 146/470, loss: 0.45058566331863403 2023-01-24 02:46:07.876017: step: 148/470, loss: 0.1982106864452362 2023-01-24 02:46:08.634390: step: 150/470, loss: 0.17295490205287933 2023-01-24 02:46:09.399018: step: 152/470, loss: 1.0078809261322021 2023-01-24 02:46:10.172144: step: 154/470, loss: 0.33238446712493896 2023-01-24 02:46:10.917977: step: 156/470, loss: 0.15200795233249664 2023-01-24 02:46:11.690825: step: 158/470, loss: 0.07408700883388519 2023-01-24 02:46:12.488381: step: 160/470, loss: 0.26883095502853394 2023-01-24 02:46:13.222203: step: 162/470, loss: 0.9753660559654236 2023-01-24 02:46:14.029901: step: 164/470, loss: 0.3113194704055786 2023-01-24 02:46:14.740654: step: 166/470, loss: 0.25497275590896606 2023-01-24 02:46:15.567927: step: 168/470, loss: 0.4309341311454773 2023-01-24 02:46:16.361230: step: 170/470, loss: 0.07410217821598053 2023-01-24 02:46:17.070471: step: 172/470, loss: 0.8484674096107483 2023-01-24 02:46:17.826387: step: 174/470, loss: 0.28762853145599365 2023-01-24 02:46:18.618713: step: 176/470, loss: 0.14900214970111847 2023-01-24 02:46:19.419571: step: 178/470, loss: 0.14563746750354767 2023-01-24 02:46:20.209972: step: 180/470, loss: 0.2309417426586151 2023-01-24 02:46:20.989470: step: 182/470, loss: 0.7100458145141602 2023-01-24 02:46:21.778160: step: 184/470, loss: 0.2716662287712097 2023-01-24 02:46:22.544985: step: 186/470, loss: 0.28548765182495117 2023-01-24 02:46:23.324286: step: 188/470, loss: 0.8044639229774475 2023-01-24 02:46:24.273250: step: 190/470, loss: 0.19010743498802185 2023-01-24 02:46:24.957445: step: 192/470, loss: 0.2690931558609009 2023-01-24 02:46:25.685035: step: 194/470, loss: 0.1267169564962387 2023-01-24 02:46:26.468868: step: 196/470, loss: 0.9669914245605469 2023-01-24 02:46:27.295843: step: 198/470, loss: 0.5740648508071899 2023-01-24 02:46:28.063562: step: 200/470, loss: 0.16114988923072815 2023-01-24 02:46:28.808964: step: 202/470, loss: 0.14519870281219482 2023-01-24 02:46:29.541621: step: 204/470, loss: 0.18535593152046204 2023-01-24 02:46:30.275288: step: 206/470, loss: 0.34165194630622864 2023-01-24 02:46:31.038581: step: 208/470, loss: 0.1012420654296875 2023-01-24 02:46:31.856139: step: 210/470, loss: 0.35992568731307983 2023-01-24 02:46:32.679888: step: 212/470, loss: 1.1616597175598145 2023-01-24 02:46:33.493685: step: 214/470, loss: 0.3503967523574829 2023-01-24 02:46:34.280950: step: 216/470, loss: 0.5987624526023865 2023-01-24 02:46:35.091736: step: 218/470, loss: 0.17567022144794464 2023-01-24 02:46:35.839245: step: 220/470, loss: 0.16215509176254272 2023-01-24 02:46:36.651007: step: 222/470, loss: 0.3935254216194153 2023-01-24 02:46:37.371538: step: 224/470, loss: 0.45323100686073303 2023-01-24 02:46:38.181360: step: 226/470, loss: 1.0208436250686646 2023-01-24 02:46:38.943390: step: 228/470, loss: 0.39432036876678467 2023-01-24 02:46:39.746768: step: 230/470, loss: 0.2786700129508972 2023-01-24 02:46:40.451223: step: 232/470, loss: 0.19462734460830688 2023-01-24 02:46:41.315804: step: 234/470, loss: 0.12101204693317413 2023-01-24 02:46:42.070121: step: 236/470, loss: 0.17866027355194092 2023-01-24 02:46:42.806733: step: 238/470, loss: 0.2924273610115051 2023-01-24 02:46:43.590884: step: 240/470, loss: 0.5252290368080139 2023-01-24 02:46:44.292119: step: 242/470, loss: 0.13447031378746033 2023-01-24 02:46:45.063322: step: 244/470, loss: 0.08729273080825806 2023-01-24 02:46:45.893672: step: 246/470, loss: 0.13195963203907013 2023-01-24 02:46:46.577444: step: 248/470, loss: 0.40723153948783875 2023-01-24 02:46:47.357304: step: 250/470, loss: 0.8148595094680786 2023-01-24 02:46:48.103479: step: 252/470, loss: 0.10905633121728897 2023-01-24 02:46:48.953480: step: 254/470, loss: 0.3877982497215271 2023-01-24 02:46:49.690681: step: 256/470, loss: 0.6170598864555359 2023-01-24 02:46:50.362750: step: 258/470, loss: 0.3111323416233063 2023-01-24 02:46:51.068143: step: 260/470, loss: 0.24980317056179047 2023-01-24 02:46:51.762818: step: 262/470, loss: 0.26733797788619995 2023-01-24 02:46:52.585452: step: 264/470, loss: 0.04581334814429283 2023-01-24 02:46:53.370905: step: 266/470, loss: 0.3556976616382599 2023-01-24 02:46:54.110470: step: 268/470, loss: 0.20936265587806702 2023-01-24 02:46:54.789318: step: 270/470, loss: 0.07905016839504242 2023-01-24 02:46:55.560052: step: 272/470, loss: 0.5653750896453857 2023-01-24 02:46:56.226556: step: 274/470, loss: 0.15013067424297333 2023-01-24 02:46:56.927971: step: 276/470, loss: 0.25626760721206665 2023-01-24 02:46:57.679682: step: 278/470, loss: 0.19960841536521912 2023-01-24 02:46:58.461153: step: 280/470, loss: 0.302801251411438 2023-01-24 02:46:59.168892: step: 282/470, loss: 0.2810971140861511 2023-01-24 02:46:59.904731: step: 284/470, loss: 0.22355952858924866 2023-01-24 02:47:00.650545: step: 286/470, loss: 0.7538304924964905 2023-01-24 02:47:01.374457: step: 288/470, loss: 0.9847416281700134 2023-01-24 02:47:02.177628: step: 290/470, loss: 0.12261106073856354 2023-01-24 02:47:02.894096: step: 292/470, loss: 0.3926984667778015 2023-01-24 02:47:03.637847: step: 294/470, loss: 0.6225259900093079 2023-01-24 02:47:04.420244: step: 296/470, loss: 0.5642341375350952 2023-01-24 02:47:05.191510: step: 298/470, loss: 0.3781619668006897 2023-01-24 02:47:05.917606: step: 300/470, loss: 0.6121640205383301 2023-01-24 02:47:06.680025: step: 302/470, loss: 0.4178549647331238 2023-01-24 02:47:07.333885: step: 304/470, loss: 0.3481173515319824 2023-01-24 02:47:08.105645: step: 306/470, loss: 0.48398879170417786 2023-01-24 02:47:08.850281: step: 308/470, loss: 1.0851445198059082 2023-01-24 02:47:09.586746: step: 310/470, loss: 0.42979589104652405 2023-01-24 02:47:10.295885: step: 312/470, loss: 0.3409073054790497 2023-01-24 02:47:11.177136: step: 314/470, loss: 0.1891537755727768 2023-01-24 02:47:11.916164: step: 316/470, loss: 0.08484365046024323 2023-01-24 02:47:12.665817: step: 318/470, loss: 0.08334700763225555 2023-01-24 02:47:13.370512: step: 320/470, loss: 0.13253387808799744 2023-01-24 02:47:14.102325: step: 322/470, loss: 0.18955287337303162 2023-01-24 02:47:14.832976: step: 324/470, loss: 0.4056585729122162 2023-01-24 02:47:15.629423: step: 326/470, loss: 0.398506224155426 2023-01-24 02:47:16.412358: step: 328/470, loss: 0.4494258165359497 2023-01-24 02:47:17.160271: step: 330/470, loss: 0.5383342504501343 2023-01-24 02:47:17.920648: step: 332/470, loss: 0.3049156665802002 2023-01-24 02:47:18.626034: step: 334/470, loss: 0.3931887745857239 2023-01-24 02:47:19.340900: step: 336/470, loss: 0.3626604676246643 2023-01-24 02:47:20.118029: step: 338/470, loss: 0.19436430931091309 2023-01-24 02:47:20.830523: step: 340/470, loss: 0.12340643256902695 2023-01-24 02:47:21.596060: step: 342/470, loss: 0.10149870067834854 2023-01-24 02:47:22.387348: step: 344/470, loss: 0.26956290006637573 2023-01-24 02:47:23.184527: step: 346/470, loss: 0.35809779167175293 2023-01-24 02:47:23.991649: step: 348/470, loss: 0.2530977129936218 2023-01-24 02:47:24.629216: step: 350/470, loss: 0.20895856618881226 2023-01-24 02:47:25.450146: step: 352/470, loss: 0.1304413229227066 2023-01-24 02:47:26.191725: step: 354/470, loss: 0.31440871953964233 2023-01-24 02:47:26.934366: step: 356/470, loss: 0.09652471542358398 2023-01-24 02:47:27.751923: step: 358/470, loss: 0.8130881786346436 2023-01-24 02:47:28.488252: step: 360/470, loss: 0.2621191740036011 2023-01-24 02:47:29.183703: step: 362/470, loss: 0.2604580819606781 2023-01-24 02:47:29.884492: step: 364/470, loss: 0.27661120891571045 2023-01-24 02:47:30.613093: step: 366/470, loss: 0.23585830628871918 2023-01-24 02:47:31.347674: step: 368/470, loss: 0.2118925154209137 2023-01-24 02:47:32.039718: step: 370/470, loss: 0.9197829365730286 2023-01-24 02:47:32.823663: step: 372/470, loss: 0.3997812867164612 2023-01-24 02:47:33.602151: step: 374/470, loss: 0.20830069482326508 2023-01-24 02:47:34.311768: step: 376/470, loss: 0.3739283084869385 2023-01-24 02:47:35.087360: step: 378/470, loss: 0.3728155195713043 2023-01-24 02:47:35.900445: step: 380/470, loss: 0.1496754288673401 2023-01-24 02:47:36.608497: step: 382/470, loss: 0.08371566236019135 2023-01-24 02:47:37.404742: step: 384/470, loss: 0.33781763911247253 2023-01-24 02:47:38.242475: step: 386/470, loss: 0.1618962287902832 2023-01-24 02:47:38.936912: step: 388/470, loss: 0.42494645714759827 2023-01-24 02:47:39.663493: step: 390/470, loss: 0.19731223583221436 2023-01-24 02:47:40.387288: step: 392/470, loss: 0.21920977532863617 2023-01-24 02:47:41.222797: step: 394/470, loss: 0.767740786075592 2023-01-24 02:47:42.018244: step: 396/470, loss: 0.29436159133911133 2023-01-24 02:47:42.793449: step: 398/470, loss: 0.1768275797367096 2023-01-24 02:47:43.478581: step: 400/470, loss: 0.27189314365386963 2023-01-24 02:47:44.226994: step: 402/470, loss: 0.6092924475669861 2023-01-24 02:47:44.965572: step: 404/470, loss: 0.31457844376564026 2023-01-24 02:47:45.811851: step: 406/470, loss: 0.18229728937149048 2023-01-24 02:47:46.553777: step: 408/470, loss: 0.24030300974845886 2023-01-24 02:47:47.336783: step: 410/470, loss: 0.2986292243003845 2023-01-24 02:47:48.139803: step: 412/470, loss: 0.3573097586631775 2023-01-24 02:47:48.848645: step: 414/470, loss: 0.25912851095199585 2023-01-24 02:47:49.632827: step: 416/470, loss: 0.42473936080932617 2023-01-24 02:47:50.393231: step: 418/470, loss: 0.1592417061328888 2023-01-24 02:47:51.138978: step: 420/470, loss: 0.18681542575359344 2023-01-24 02:47:51.832843: step: 422/470, loss: 0.7109906077384949 2023-01-24 02:47:52.586192: step: 424/470, loss: 0.18564318120479584 2023-01-24 02:47:53.382952: step: 426/470, loss: 0.2508256137371063 2023-01-24 02:47:54.137633: step: 428/470, loss: 0.8340566158294678 2023-01-24 02:47:54.907781: step: 430/470, loss: 0.2437361627817154 2023-01-24 02:47:55.639754: step: 432/470, loss: 0.5458521246910095 2023-01-24 02:47:56.504740: step: 434/470, loss: 0.4009840786457062 2023-01-24 02:47:57.245977: step: 436/470, loss: 0.2011309266090393 2023-01-24 02:47:57.989702: step: 438/470, loss: 0.20616890490055084 2023-01-24 02:47:58.706174: step: 440/470, loss: 0.3620125651359558 2023-01-24 02:47:59.489808: step: 442/470, loss: 0.2353145331144333 2023-01-24 02:48:00.240771: step: 444/470, loss: 0.6447851657867432 2023-01-24 02:48:00.963099: step: 446/470, loss: 1.2982354164123535 2023-01-24 02:48:01.763151: step: 448/470, loss: 0.16182303428649902 2023-01-24 02:48:02.524707: step: 450/470, loss: 0.9789178371429443 2023-01-24 02:48:03.274602: step: 452/470, loss: 0.23408180475234985 2023-01-24 02:48:03.995981: step: 454/470, loss: 0.44805386662483215 2023-01-24 02:48:04.707416: step: 456/470, loss: 0.24204997718334198 2023-01-24 02:48:05.443895: step: 458/470, loss: 2.4867639541625977 2023-01-24 02:48:06.225819: step: 460/470, loss: 0.21604380011558533 2023-01-24 02:48:07.027801: step: 462/470, loss: 0.11348915100097656 2023-01-24 02:48:07.691699: step: 464/470, loss: 0.6297299265861511 2023-01-24 02:48:08.480786: step: 466/470, loss: 1.2539268732070923 2023-01-24 02:48:09.219442: step: 468/470, loss: 0.3964780867099762 2023-01-24 02:48:10.016723: step: 470/470, loss: 0.3635796904563904 2023-01-24 02:48:10.869013: step: 472/470, loss: 0.15717843174934387 2023-01-24 02:48:11.617849: step: 474/470, loss: 0.17565882205963135 2023-01-24 02:48:12.460680: step: 476/470, loss: 0.3112245202064514 2023-01-24 02:48:13.225791: step: 478/470, loss: 0.18767109513282776 2023-01-24 02:48:13.940986: step: 480/470, loss: 0.5524680018424988 2023-01-24 02:48:14.664219: step: 482/470, loss: 0.2581130266189575 2023-01-24 02:48:15.354209: step: 484/470, loss: 0.27895861864089966 2023-01-24 02:48:16.110654: step: 486/470, loss: 0.42541971802711487 2023-01-24 02:48:16.773555: step: 488/470, loss: 0.23396803438663483 2023-01-24 02:48:17.471727: step: 490/470, loss: 0.8111799359321594 2023-01-24 02:48:18.166009: step: 492/470, loss: 0.5374805331230164 2023-01-24 02:48:18.957627: step: 494/470, loss: 0.1266305297613144 2023-01-24 02:48:19.682925: step: 496/470, loss: 0.5135464668273926 2023-01-24 02:48:20.463279: step: 498/470, loss: 0.19670523703098297 2023-01-24 02:48:21.106239: step: 500/470, loss: 0.3329498767852783 2023-01-24 02:48:21.868788: step: 502/470, loss: 0.09014225006103516 2023-01-24 02:48:22.577425: step: 504/470, loss: 0.15468470752239227 2023-01-24 02:48:23.285104: step: 506/470, loss: 0.9147515296936035 2023-01-24 02:48:24.013233: step: 508/470, loss: 0.2802393138408661 2023-01-24 02:48:24.875779: step: 510/470, loss: 0.30511119961738586 2023-01-24 02:48:25.563984: step: 512/470, loss: 0.20601776242256165 2023-01-24 02:48:26.288680: step: 514/470, loss: 0.6622411012649536 2023-01-24 02:48:27.094120: step: 516/470, loss: 0.30970463156700134 2023-01-24 02:48:27.772783: step: 518/470, loss: 0.18782645463943481 2023-01-24 02:48:28.471484: step: 520/470, loss: 0.3975909948348999 2023-01-24 02:48:29.239301: step: 522/470, loss: 0.2327815592288971 2023-01-24 02:48:29.993167: step: 524/470, loss: 0.09739214926958084 2023-01-24 02:48:30.744231: step: 526/470, loss: 0.8930928111076355 2023-01-24 02:48:31.553823: step: 528/470, loss: 0.365343302488327 2023-01-24 02:48:32.309650: step: 530/470, loss: 0.203633651137352 2023-01-24 02:48:33.103274: step: 532/470, loss: 1.6148757934570312 2023-01-24 02:48:33.846355: step: 534/470, loss: 0.30228936672210693 2023-01-24 02:48:34.772513: step: 536/470, loss: 0.23546025156974792 2023-01-24 02:48:35.459717: step: 538/470, loss: 0.1620427817106247 2023-01-24 02:48:36.118633: step: 540/470, loss: 0.16190826892852783 2023-01-24 02:48:36.860071: step: 542/470, loss: 0.0964878499507904 2023-01-24 02:48:37.669943: step: 544/470, loss: 0.12071750313043594 2023-01-24 02:48:38.481050: step: 546/470, loss: 0.43174058198928833 2023-01-24 02:48:39.284882: step: 548/470, loss: 1.1310662031173706 2023-01-24 02:48:40.044258: step: 550/470, loss: 0.5546995401382446 2023-01-24 02:48:40.934679: step: 552/470, loss: 0.7945947051048279 2023-01-24 02:48:41.684384: step: 554/470, loss: 0.5049540996551514 2023-01-24 02:48:42.437563: step: 556/470, loss: 0.09438646584749222 2023-01-24 02:48:43.162802: step: 558/470, loss: 0.9007434844970703 2023-01-24 02:48:43.845776: step: 560/470, loss: 0.10236416757106781 2023-01-24 02:48:44.571505: step: 562/470, loss: 0.957366406917572 2023-01-24 02:48:45.364464: step: 564/470, loss: 0.2416938692331314 2023-01-24 02:48:46.122465: step: 566/470, loss: 0.7880321145057678 2023-01-24 02:48:46.874861: step: 568/470, loss: 0.5304555296897888 2023-01-24 02:48:47.642267: step: 570/470, loss: 0.3842546045780182 2023-01-24 02:48:48.509013: step: 572/470, loss: 0.22081181406974792 2023-01-24 02:48:49.350807: step: 574/470, loss: 0.10166381299495697 2023-01-24 02:48:50.139817: step: 576/470, loss: 0.19878722727298737 2023-01-24 02:48:50.916125: step: 578/470, loss: 0.3669015169143677 2023-01-24 02:48:51.749859: step: 580/470, loss: 0.27071359753608704 2023-01-24 02:48:52.534250: step: 582/470, loss: 0.1869358867406845 2023-01-24 02:48:53.295193: step: 584/470, loss: 0.2697611451148987 2023-01-24 02:48:53.965030: step: 586/470, loss: 0.27031758427619934 2023-01-24 02:48:54.724070: step: 588/470, loss: 0.9854125380516052 2023-01-24 02:48:55.460647: step: 590/470, loss: 0.14197781682014465 2023-01-24 02:48:56.233339: step: 592/470, loss: 0.3427768051624298 2023-01-24 02:48:57.024645: step: 594/470, loss: 0.5351526141166687 2023-01-24 02:48:57.752246: step: 596/470, loss: 0.14945143461227417 2023-01-24 02:48:58.447766: step: 598/470, loss: 0.47398558259010315 2023-01-24 02:48:59.201408: step: 600/470, loss: 0.4863567650318146 2023-01-24 02:48:59.928836: step: 602/470, loss: 0.48081153631210327 2023-01-24 02:49:00.676487: step: 604/470, loss: 0.13413645327091217 2023-01-24 02:49:01.496361: step: 606/470, loss: 0.5402474403381348 2023-01-24 02:49:02.280944: step: 608/470, loss: 0.5544330477714539 2023-01-24 02:49:03.006049: step: 610/470, loss: 0.388392835855484 2023-01-24 02:49:03.737390: step: 612/470, loss: 1.0991339683532715 2023-01-24 02:49:04.533850: step: 614/470, loss: 0.35932278633117676 2023-01-24 02:49:05.303372: step: 616/470, loss: 0.26169684529304504 2023-01-24 02:49:06.080386: step: 618/470, loss: 0.15292510390281677 2023-01-24 02:49:06.957457: step: 620/470, loss: 0.503343939781189 2023-01-24 02:49:07.634798: step: 622/470, loss: 0.2210429310798645 2023-01-24 02:49:08.466745: step: 624/470, loss: 0.11323003470897675 2023-01-24 02:49:09.230513: step: 626/470, loss: 0.49351710081100464 2023-01-24 02:49:09.941835: step: 628/470, loss: 0.19296793639659882 2023-01-24 02:49:10.682216: step: 630/470, loss: 0.17127367854118347 2023-01-24 02:49:11.369300: step: 632/470, loss: 0.3556111454963684 2023-01-24 02:49:12.145429: step: 634/470, loss: 0.31057944893836975 2023-01-24 02:49:12.795271: step: 636/470, loss: 0.42370232939720154 2023-01-24 02:49:13.604277: step: 638/470, loss: 0.8841353058815002 2023-01-24 02:49:14.375178: step: 640/470, loss: 0.24589870870113373 2023-01-24 02:49:15.123725: step: 642/470, loss: 0.1649733930826187 2023-01-24 02:49:15.831775: step: 644/470, loss: 0.6155900955200195 2023-01-24 02:49:16.608121: step: 646/470, loss: 0.36966732144355774 2023-01-24 02:49:17.353838: step: 648/470, loss: 0.2079157680273056 2023-01-24 02:49:18.264805: step: 650/470, loss: 0.8073568344116211 2023-01-24 02:49:18.983469: step: 652/470, loss: 0.19634874165058136 2023-01-24 02:49:19.774056: step: 654/470, loss: 0.3249640166759491 2023-01-24 02:49:20.568582: step: 656/470, loss: 1.5049084424972534 2023-01-24 02:49:21.372594: step: 658/470, loss: 0.42330726981163025 2023-01-24 02:49:22.136028: step: 660/470, loss: 0.20834825932979584 2023-01-24 02:49:22.926015: step: 662/470, loss: 0.4067968428134918 2023-01-24 02:49:23.623027: step: 664/470, loss: 0.3112955093383789 2023-01-24 02:49:24.365107: step: 666/470, loss: 1.358984351158142 2023-01-24 02:49:25.113939: step: 668/470, loss: 0.25829464197158813 2023-01-24 02:49:25.877958: step: 670/470, loss: 0.4780522286891937 2023-01-24 02:49:26.609069: step: 672/470, loss: 0.18391302227973938 2023-01-24 02:49:27.429859: step: 674/470, loss: 0.32501521706581116 2023-01-24 02:49:28.339998: step: 676/470, loss: 0.11137870699167252 2023-01-24 02:49:29.066259: step: 678/470, loss: 1.4713650941848755 2023-01-24 02:49:29.930650: step: 680/470, loss: 1.0248942375183105 2023-01-24 02:49:30.716570: step: 682/470, loss: 0.17500053346157074 2023-01-24 02:49:31.481282: step: 684/470, loss: 0.5887848734855652 2023-01-24 02:49:32.227251: step: 686/470, loss: 0.3135591149330139 2023-01-24 02:49:32.993726: step: 688/470, loss: 0.47807666659355164 2023-01-24 02:49:33.773963: step: 690/470, loss: 0.15201377868652344 2023-01-24 02:49:34.605349: step: 692/470, loss: 0.1818476915359497 2023-01-24 02:49:35.328195: step: 694/470, loss: 0.15883833169937134 2023-01-24 02:49:36.132155: step: 696/470, loss: 0.2723800539970398 2023-01-24 02:49:37.024970: step: 698/470, loss: 0.9609336853027344 2023-01-24 02:49:37.718159: step: 700/470, loss: 0.14401142299175262 2023-01-24 02:49:38.438034: step: 702/470, loss: 0.3710044026374817 2023-01-24 02:49:39.223144: step: 704/470, loss: 0.49077916145324707 2023-01-24 02:49:39.973053: step: 706/470, loss: 0.19224616885185242 2023-01-24 02:49:40.650904: step: 708/470, loss: 0.09910887479782104 2023-01-24 02:49:41.540131: step: 710/470, loss: 2.2329654693603516 2023-01-24 02:49:42.301831: step: 712/470, loss: 0.3099704384803772 2023-01-24 02:49:42.975636: step: 714/470, loss: 0.07138052582740784 2023-01-24 02:49:43.780808: step: 716/470, loss: 0.21385283768177032 2023-01-24 02:49:44.481923: step: 718/470, loss: 0.6778049468994141 2023-01-24 02:49:45.240811: step: 720/470, loss: 0.43242332339286804 2023-01-24 02:49:46.028482: step: 722/470, loss: 0.4446261525154114 2023-01-24 02:49:46.789328: step: 724/470, loss: 0.2377616912126541 2023-01-24 02:49:47.545822: step: 726/470, loss: 0.1549036204814911 2023-01-24 02:49:48.301722: step: 728/470, loss: 0.25162452459335327 2023-01-24 02:49:49.020782: step: 730/470, loss: 0.6771296262741089 2023-01-24 02:49:49.796103: step: 732/470, loss: 0.7284178137779236 2023-01-24 02:49:50.531775: step: 734/470, loss: 0.8691809177398682 2023-01-24 02:49:51.311217: step: 736/470, loss: 0.19968397915363312 2023-01-24 02:49:52.060252: step: 738/470, loss: 0.1259831339120865 2023-01-24 02:49:52.863116: step: 740/470, loss: 0.10791204124689102 2023-01-24 02:49:53.617484: step: 742/470, loss: 0.31472277641296387 2023-01-24 02:49:54.369936: step: 744/470, loss: 0.19977986812591553 2023-01-24 02:49:55.078730: step: 746/470, loss: 0.15267623960971832 2023-01-24 02:49:55.832847: step: 748/470, loss: 2.1145780086517334 2023-01-24 02:49:56.552865: step: 750/470, loss: 0.2913758456707001 2023-01-24 02:49:57.339011: step: 752/470, loss: 0.16115057468414307 2023-01-24 02:49:58.072989: step: 754/470, loss: 0.1917055994272232 2023-01-24 02:49:58.832596: step: 756/470, loss: 0.13388597965240479 2023-01-24 02:49:59.553091: step: 758/470, loss: 0.8417869210243225 2023-01-24 02:50:00.361571: step: 760/470, loss: 0.259040892124176 2023-01-24 02:50:01.182152: step: 762/470, loss: 1.860278606414795 2023-01-24 02:50:01.995401: step: 764/470, loss: 0.15627700090408325 2023-01-24 02:50:02.694554: step: 766/470, loss: 0.2632465958595276 2023-01-24 02:50:03.389174: step: 768/470, loss: 0.5329182147979736 2023-01-24 02:50:04.111694: step: 770/470, loss: 0.17463798820972443 2023-01-24 02:50:04.828382: step: 772/470, loss: 0.3025292158126831 2023-01-24 02:50:05.600263: step: 774/470, loss: 0.0922599583864212 2023-01-24 02:50:06.420028: step: 776/470, loss: 0.17804045975208282 2023-01-24 02:50:07.110500: step: 778/470, loss: 0.5846086740493774 2023-01-24 02:50:07.887341: step: 780/470, loss: 1.0991511344909668 2023-01-24 02:50:08.586777: step: 782/470, loss: 0.20432426035404205 2023-01-24 02:50:09.218455: step: 784/470, loss: 0.1008855476975441 2023-01-24 02:50:09.962274: step: 786/470, loss: 1.1069185733795166 2023-01-24 02:50:10.708782: step: 788/470, loss: 0.32851848006248474 2023-01-24 02:50:11.473073: step: 790/470, loss: 0.3457392454147339 2023-01-24 02:50:12.218110: step: 792/470, loss: 0.14442162215709686 2023-01-24 02:50:12.968953: step: 794/470, loss: 0.12679028511047363 2023-01-24 02:50:13.696951: step: 796/470, loss: 0.12389878928661346 2023-01-24 02:50:14.384978: step: 798/470, loss: 0.4223572909832001 2023-01-24 02:50:15.144159: step: 800/470, loss: 0.9779374599456787 2023-01-24 02:50:15.863868: step: 802/470, loss: 0.849580705165863 2023-01-24 02:50:16.567998: step: 804/470, loss: 0.32323935627937317 2023-01-24 02:50:17.329587: step: 806/470, loss: 0.3388105034828186 2023-01-24 02:50:18.097247: step: 808/470, loss: 0.7310128211975098 2023-01-24 02:50:18.912637: step: 810/470, loss: 0.24826985597610474 2023-01-24 02:50:19.710264: step: 812/470, loss: 0.2552504539489746 2023-01-24 02:50:20.471425: step: 814/470, loss: 0.2834188938140869 2023-01-24 02:50:21.196356: step: 816/470, loss: 0.6483306884765625 2023-01-24 02:50:21.933728: step: 818/470, loss: 0.4713938236236572 2023-01-24 02:50:22.740468: step: 820/470, loss: 0.26083284616470337 2023-01-24 02:50:23.502076: step: 822/470, loss: 0.20542579889297485 2023-01-24 02:50:24.210924: step: 824/470, loss: 0.2176228165626526 2023-01-24 02:50:24.914389: step: 826/470, loss: 0.2208702117204666 2023-01-24 02:50:25.762394: step: 828/470, loss: 0.27593034505844116 2023-01-24 02:50:26.498186: step: 830/470, loss: 0.4739680588245392 2023-01-24 02:50:27.253119: step: 832/470, loss: 0.2514054477214813 2023-01-24 02:50:28.051769: step: 834/470, loss: 0.250079482793808 2023-01-24 02:50:28.798991: step: 836/470, loss: 0.17073297500610352 2023-01-24 02:50:29.589823: step: 838/470, loss: 1.1584669351577759 2023-01-24 02:50:30.316278: step: 840/470, loss: 0.38292911648750305 2023-01-24 02:50:31.041106: step: 842/470, loss: 0.2206546664237976 2023-01-24 02:50:31.831951: step: 844/470, loss: 0.49227169156074524 2023-01-24 02:50:32.650624: step: 846/470, loss: 0.2030370980501175 2023-01-24 02:50:33.403067: step: 848/470, loss: 0.5499109625816345 2023-01-24 02:50:34.227543: step: 850/470, loss: 0.3381698727607727 2023-01-24 02:50:34.900370: step: 852/470, loss: 0.6527577638626099 2023-01-24 02:50:35.689281: step: 854/470, loss: 0.22261139750480652 2023-01-24 02:50:36.571331: step: 856/470, loss: 0.6786369681358337 2023-01-24 02:50:37.324781: step: 858/470, loss: 0.3246194124221802 2023-01-24 02:50:38.184830: step: 860/470, loss: 0.17136085033416748 2023-01-24 02:50:38.943074: step: 862/470, loss: 0.7740350365638733 2023-01-24 02:50:39.834356: step: 864/470, loss: 6.283660888671875 2023-01-24 02:50:40.578940: step: 866/470, loss: 0.3030953109264374 2023-01-24 02:50:41.457834: step: 868/470, loss: 0.5647780299186707 2023-01-24 02:50:42.182789: step: 870/470, loss: 0.3866567313671112 2023-01-24 02:50:42.939064: step: 872/470, loss: 1.285661220550537 2023-01-24 02:50:43.668072: step: 874/470, loss: 0.18321466445922852 2023-01-24 02:50:44.411932: step: 876/470, loss: 0.2706562876701355 2023-01-24 02:50:45.203703: step: 878/470, loss: 0.30584192276000977 2023-01-24 02:50:45.974318: step: 880/470, loss: 0.143959641456604 2023-01-24 02:50:46.794515: step: 882/470, loss: 0.4257970452308655 2023-01-24 02:50:47.573895: step: 884/470, loss: 0.32551464438438416 2023-01-24 02:50:48.353853: step: 886/470, loss: 0.21082256734371185 2023-01-24 02:50:49.128242: step: 888/470, loss: 0.2381884753704071 2023-01-24 02:50:49.894735: step: 890/470, loss: 0.30021804571151733 2023-01-24 02:50:50.705900: step: 892/470, loss: 0.33029264211654663 2023-01-24 02:50:51.499420: step: 894/470, loss: 0.11910691857337952 2023-01-24 02:50:52.219703: step: 896/470, loss: 0.3467719554901123 2023-01-24 02:50:52.957179: step: 898/470, loss: 0.7879258394241333 2023-01-24 02:50:53.689371: step: 900/470, loss: 0.2893523871898651 2023-01-24 02:50:54.439007: step: 902/470, loss: 0.25279688835144043 2023-01-24 02:50:55.119439: step: 904/470, loss: 0.15351781249046326 2023-01-24 02:50:55.812260: step: 906/470, loss: 0.41283392906188965 2023-01-24 02:50:56.558340: step: 908/470, loss: 0.14755499362945557 2023-01-24 02:50:57.305852: step: 910/470, loss: 0.07284556329250336 2023-01-24 02:50:58.033666: step: 912/470, loss: 0.21138407289981842 2023-01-24 02:50:58.920569: step: 914/470, loss: 0.503731906414032 2023-01-24 02:50:59.694723: step: 916/470, loss: 0.367249995470047 2023-01-24 02:51:00.391721: step: 918/470, loss: 0.2869451344013214 2023-01-24 02:51:01.164711: step: 920/470, loss: 0.5139609575271606 2023-01-24 02:51:01.913627: step: 922/470, loss: 0.4172641932964325 2023-01-24 02:51:02.614708: step: 924/470, loss: 0.1905202567577362 2023-01-24 02:51:03.386056: step: 926/470, loss: 0.16458773612976074 2023-01-24 02:51:04.195051: step: 928/470, loss: 0.20133328437805176 2023-01-24 02:51:04.898613: step: 930/470, loss: 0.2865738272666931 2023-01-24 02:51:05.597546: step: 932/470, loss: 0.10317887365818024 2023-01-24 02:51:06.252780: step: 934/470, loss: 0.2089749574661255 2023-01-24 02:51:06.999984: step: 936/470, loss: 0.7139807939529419 2023-01-24 02:51:07.658358: step: 938/470, loss: 5.944061279296875 2023-01-24 02:51:08.424395: step: 940/470, loss: 0.49773871898651123 2023-01-24 02:51:09.064253: step: 942/470, loss: 0.29733988642692566 ================================================== Loss: 0.424 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3130350753897265, 'r': 0.332042897804283, 'f1': 0.3222589450144699}, 'combined': 0.23745395948434622, 'epoch': 9} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.33810196782859, 'r': 0.3293327525246784, 'f1': 0.33365975219288585}, 'combined': 0.2224398347952572, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3103002789195115, 'r': 0.32855323650301216, 'f1': 0.3191660011743547}, 'combined': 0.235174948233735, 'epoch': 9} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.33817477473265645, 'r': 0.3300455734169676, 'f1': 0.3340607263782446}, 'combined': 0.22270715091882967, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2958260170951498, 'r': 0.3300677382389906, 'f1': 0.31201022072098306}, 'combined': 0.22990226789967172, 'epoch': 9} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.328371145320051, 'r': 0.33437023355185963, 'f1': 0.3313435377741153}, 'combined': 0.22089569184941016, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26851851851851855, 'r': 0.4142857142857143, 'f1': 0.3258426966292135}, 'combined': 0.21722846441947566, 'epoch': 9} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4722222222222222, 'r': 0.3695652173913043, 'f1': 0.41463414634146345}, 'combined': 0.2764227642276423, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.359375, 'r': 0.19827586206896552, 'f1': 0.2555555555555556}, 'combined': 0.1703703703703704, 'epoch': 9} New best chinese model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3130350753897265, 'r': 0.332042897804283, 'f1': 0.3222589450144699}, 'combined': 0.23745395948434622, 'epoch': 9} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.33810196782859, 'r': 0.3293327525246784, 'f1': 0.33365975219288585}, 'combined': 0.2224398347952572, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26851851851851855, 'r': 0.4142857142857143, 'f1': 0.3258426966292135}, 'combined': 0.21722846441947566, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30950132625512705, 'r': 0.3100886152992544, 'f1': 0.3097946924411508}, 'combined': 0.22826977337769006, 'epoch': 7} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3463740826755645, 'r': 0.325411962398369, 'f1': 0.33556597608390504}, 'combined': 0.22371065072260332, 'epoch': 7} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5625, 'r': 0.4891304347826087, 'f1': 0.5232558139534884}, 'combined': 0.3488372093023256, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2958260170951498, 'r': 0.3300677382389906, 'f1': 0.31201022072098306}, 'combined': 0.22990226789967172, 'epoch': 9} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.328371145320051, 'r': 0.33437023355185963, 'f1': 0.3313435377741153}, 'combined': 0.22089569184941016, 'epoch': 9} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.359375, 'r': 0.19827586206896552, 'f1': 0.2555555555555556}, 'combined': 0.1703703703703704, 'epoch': 9} ****************************** Epoch: 10 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:54:00.502407: step: 2/470, loss: 0.056591589003801346 2023-01-24 02:54:01.246528: step: 4/470, loss: 0.13280236721038818 2023-01-24 02:54:01.975179: step: 6/470, loss: 0.4893929958343506 2023-01-24 02:54:02.660436: step: 8/470, loss: 0.2868712842464447 2023-01-24 02:54:03.386911: step: 10/470, loss: 0.157928004860878 2023-01-24 02:54:04.092880: step: 12/470, loss: 0.7920506000518799 2023-01-24 02:54:04.864008: step: 14/470, loss: 0.37678074836730957 2023-01-24 02:54:05.630395: step: 16/470, loss: 0.29826444387435913 2023-01-24 02:54:06.336171: step: 18/470, loss: 0.0674176812171936 2023-01-24 02:54:07.048664: step: 20/470, loss: 0.08113688230514526 2023-01-24 02:54:07.786290: step: 22/470, loss: 0.13426075875759125 2023-01-24 02:54:08.510255: step: 24/470, loss: 0.1328933835029602 2023-01-24 02:54:09.220544: step: 26/470, loss: 0.06381936371326447 2023-01-24 02:54:10.090335: step: 28/470, loss: 0.07034029066562653 2023-01-24 02:54:10.790961: step: 30/470, loss: 0.06103328615427017 2023-01-24 02:54:11.597837: step: 32/470, loss: 0.045456431806087494 2023-01-24 02:54:12.342689: step: 34/470, loss: 0.373674601316452 2023-01-24 02:54:13.120399: step: 36/470, loss: 0.04646005108952522 2023-01-24 02:54:13.914303: step: 38/470, loss: 0.5150260329246521 2023-01-24 02:54:14.677504: step: 40/470, loss: 0.3609604835510254 2023-01-24 02:54:15.401956: step: 42/470, loss: 0.34102901816368103 2023-01-24 02:54:16.199707: step: 44/470, loss: 0.293104887008667 2023-01-24 02:54:16.951626: step: 46/470, loss: 0.2512171268463135 2023-01-24 02:54:17.727506: step: 48/470, loss: 0.2921146750450134 2023-01-24 02:54:18.582052: step: 50/470, loss: 0.14088846743106842 2023-01-24 02:54:19.249771: step: 52/470, loss: 0.3724920451641083 2023-01-24 02:54:19.990676: step: 54/470, loss: 0.2027990221977234 2023-01-24 02:54:20.699818: step: 56/470, loss: 0.19799435138702393 2023-01-24 02:54:21.487396: step: 58/470, loss: 0.3948134779930115 2023-01-24 02:54:22.341941: step: 60/470, loss: 0.08492279052734375 2023-01-24 02:54:23.042956: step: 62/470, loss: 0.12444708496332169 2023-01-24 02:54:23.777482: step: 64/470, loss: 0.08406122028827667 2023-01-24 02:54:24.521728: step: 66/470, loss: 5.688138008117676 2023-01-24 02:54:25.232657: step: 68/470, loss: 0.10036779195070267 2023-01-24 02:54:25.935128: step: 70/470, loss: 0.14819800853729248 2023-01-24 02:54:26.718620: step: 72/470, loss: 0.4329559803009033 2023-01-24 02:54:27.457870: step: 74/470, loss: 0.3488907814025879 2023-01-24 02:54:28.164030: step: 76/470, loss: 0.3759381175041199 2023-01-24 02:54:28.929797: step: 78/470, loss: 0.49098092317581177 2023-01-24 02:54:29.638872: step: 80/470, loss: 0.5063804984092712 2023-01-24 02:54:30.426010: step: 82/470, loss: 0.14004996418952942 2023-01-24 02:54:31.184348: step: 84/470, loss: 0.09356193989515305 2023-01-24 02:54:31.922000: step: 86/470, loss: 0.17746534943580627 2023-01-24 02:54:32.699598: step: 88/470, loss: 0.09610351175069809 2023-01-24 02:54:33.445377: step: 90/470, loss: 0.14981889724731445 2023-01-24 02:54:34.141943: step: 92/470, loss: 1.6448625326156616 2023-01-24 02:54:34.886984: step: 94/470, loss: 0.2239532321691513 2023-01-24 02:54:35.560673: step: 96/470, loss: 0.1338728666305542 2023-01-24 02:54:36.336927: step: 98/470, loss: 0.2755134701728821 2023-01-24 02:54:37.092185: step: 100/470, loss: 0.18865349888801575 2023-01-24 02:54:37.834719: step: 102/470, loss: 0.06774507462978363 2023-01-24 02:54:38.602000: step: 104/470, loss: 0.8064208030700684 2023-01-24 02:54:39.348693: step: 106/470, loss: 0.08489122241735458 2023-01-24 02:54:40.076840: step: 108/470, loss: 0.08381538838148117 2023-01-24 02:54:40.793263: step: 110/470, loss: 0.3395574390888214 2023-01-24 02:54:41.651268: step: 112/470, loss: 0.15705344080924988 2023-01-24 02:54:42.358900: step: 114/470, loss: 0.4517096281051636 2023-01-24 02:54:43.066865: step: 116/470, loss: 0.1848120093345642 2023-01-24 02:54:43.772811: step: 118/470, loss: 0.2923411726951599 2023-01-24 02:54:44.470475: step: 120/470, loss: 0.24967914819717407 2023-01-24 02:54:45.175283: step: 122/470, loss: 0.8194922804832458 2023-01-24 02:54:45.956677: step: 124/470, loss: 0.44352638721466064 2023-01-24 02:54:46.729267: step: 126/470, loss: 0.319720059633255 2023-01-24 02:54:47.449588: step: 128/470, loss: 0.652948796749115 2023-01-24 02:54:48.213157: step: 130/470, loss: 0.6255174279212952 2023-01-24 02:54:48.978283: step: 132/470, loss: 0.5446348190307617 2023-01-24 02:54:49.674553: step: 134/470, loss: 0.3582460284233093 2023-01-24 02:54:50.473126: step: 136/470, loss: 0.655910074710846 2023-01-24 02:54:51.242345: step: 138/470, loss: 0.08800873160362244 2023-01-24 02:54:51.929807: step: 140/470, loss: 0.09775952994823456 2023-01-24 02:54:52.661435: step: 142/470, loss: 0.6266007423400879 2023-01-24 02:54:53.417005: step: 144/470, loss: 0.3636000454425812 2023-01-24 02:54:54.136969: step: 146/470, loss: 0.1658439189195633 2023-01-24 02:54:54.845021: step: 148/470, loss: 0.059976786375045776 2023-01-24 02:54:55.524838: step: 150/470, loss: 0.11647554486989975 2023-01-24 02:54:56.255012: step: 152/470, loss: 0.14295542240142822 2023-01-24 02:54:57.058271: step: 154/470, loss: 0.7250072956085205 2023-01-24 02:54:57.748243: step: 156/470, loss: 0.12420438975095749 2023-01-24 02:54:58.490535: step: 158/470, loss: 0.22176730632781982 2023-01-24 02:54:59.263093: step: 160/470, loss: 0.13311302661895752 2023-01-24 02:54:59.970235: step: 162/470, loss: 0.1882036328315735 2023-01-24 02:55:00.678208: step: 164/470, loss: 0.3161405920982361 2023-01-24 02:55:01.426743: step: 166/470, loss: 0.38165828585624695 2023-01-24 02:55:02.312161: step: 168/470, loss: 0.5611687302589417 2023-01-24 02:55:03.056495: step: 170/470, loss: 0.7891055941581726 2023-01-24 02:55:03.786300: step: 172/470, loss: 0.2864110469818115 2023-01-24 02:55:04.517691: step: 174/470, loss: 1.51108717918396 2023-01-24 02:55:05.225983: step: 176/470, loss: 0.3315257430076599 2023-01-24 02:55:05.977680: step: 178/470, loss: 0.8074526190757751 2023-01-24 02:55:06.675293: step: 180/470, loss: 0.13533882796764374 2023-01-24 02:55:07.338253: step: 182/470, loss: 0.6222794055938721 2023-01-24 02:55:08.056856: step: 184/470, loss: 1.3589999675750732 2023-01-24 02:55:08.836882: step: 186/470, loss: 0.250261515378952 2023-01-24 02:55:09.592929: step: 188/470, loss: 0.1996300369501114 2023-01-24 02:55:10.346504: step: 190/470, loss: 0.8840703368186951 2023-01-24 02:55:11.040566: step: 192/470, loss: 0.5001525282859802 2023-01-24 02:55:11.854589: step: 194/470, loss: 0.20161223411560059 2023-01-24 02:55:12.596706: step: 196/470, loss: 0.5042540431022644 2023-01-24 02:55:13.303225: step: 198/470, loss: 0.28795769810676575 2023-01-24 02:55:14.139949: step: 200/470, loss: 0.6410402655601501 2023-01-24 02:55:14.857021: step: 202/470, loss: 0.2731340527534485 2023-01-24 02:55:15.597256: step: 204/470, loss: 0.12334098666906357 2023-01-24 02:55:16.279186: step: 206/470, loss: 0.22625799477100372 2023-01-24 02:55:16.977345: step: 208/470, loss: 0.5543537735939026 2023-01-24 02:55:17.707210: step: 210/470, loss: 0.2280716747045517 2023-01-24 02:55:18.450054: step: 212/470, loss: 0.08557216078042984 2023-01-24 02:55:19.199199: step: 214/470, loss: 0.18337619304656982 2023-01-24 02:55:19.943890: step: 216/470, loss: 1.1990138292312622 2023-01-24 02:55:20.656064: step: 218/470, loss: 0.13386143743991852 2023-01-24 02:55:21.388180: step: 220/470, loss: 0.5193535685539246 2023-01-24 02:55:22.213951: step: 222/470, loss: 0.17291080951690674 2023-01-24 02:55:23.004999: step: 224/470, loss: 0.2032584249973297 2023-01-24 02:55:23.801992: step: 226/470, loss: 0.6475588083267212 2023-01-24 02:55:24.523570: step: 228/470, loss: 0.26753678917884827 2023-01-24 02:55:25.330839: step: 230/470, loss: 0.46687716245651245 2023-01-24 02:55:26.117390: step: 232/470, loss: 0.2332172989845276 2023-01-24 02:55:26.950918: step: 234/470, loss: 0.3625449240207672 2023-01-24 02:55:27.717532: step: 236/470, loss: 13.1072359085083 2023-01-24 02:55:28.425881: step: 238/470, loss: 0.16786223649978638 2023-01-24 02:55:29.151677: step: 240/470, loss: 0.1498054563999176 2023-01-24 02:55:29.876610: step: 242/470, loss: 0.7497914433479309 2023-01-24 02:55:30.517962: step: 244/470, loss: 0.7256143093109131 2023-01-24 02:55:31.299748: step: 246/470, loss: 0.9502539038658142 2023-01-24 02:55:32.071569: step: 248/470, loss: 0.6548596620559692 2023-01-24 02:55:32.840364: step: 250/470, loss: 0.21729148924350739 2023-01-24 02:55:33.532321: step: 252/470, loss: 0.17379331588745117 2023-01-24 02:55:34.306589: step: 254/470, loss: 0.10878202319145203 2023-01-24 02:55:35.029355: step: 256/470, loss: 0.342523455619812 2023-01-24 02:55:35.905276: step: 258/470, loss: 1.1544556617736816 2023-01-24 02:55:36.605494: step: 260/470, loss: 0.23245486617088318 2023-01-24 02:55:37.368307: step: 262/470, loss: 0.543807864189148 2023-01-24 02:55:38.125106: step: 264/470, loss: 0.154671311378479 2023-01-24 02:55:38.872858: step: 266/470, loss: 0.2436520904302597 2023-01-24 02:55:39.575874: step: 268/470, loss: 0.11845968663692474 2023-01-24 02:55:40.334484: step: 270/470, loss: 0.5449745655059814 2023-01-24 02:55:41.006029: step: 272/470, loss: 0.3452926278114319 2023-01-24 02:55:41.722682: step: 274/470, loss: 0.2843412756919861 2023-01-24 02:55:42.464730: step: 276/470, loss: 0.2615320682525635 2023-01-24 02:55:43.149961: step: 278/470, loss: 0.12062683701515198 2023-01-24 02:55:43.955919: step: 280/470, loss: 0.08838797360658646 2023-01-24 02:55:44.713704: step: 282/470, loss: 0.5510808229446411 2023-01-24 02:55:45.529266: step: 284/470, loss: 0.18408583104610443 2023-01-24 02:55:46.315220: step: 286/470, loss: 0.3631865084171295 2023-01-24 02:55:47.113236: step: 288/470, loss: 0.1815035343170166 2023-01-24 02:55:47.888464: step: 290/470, loss: 0.13083362579345703 2023-01-24 02:55:48.621878: step: 292/470, loss: 0.18467651307582855 2023-01-24 02:55:49.351820: step: 294/470, loss: 0.28844454884529114 2023-01-24 02:55:50.057378: step: 296/470, loss: 0.11280845105648041 2023-01-24 02:55:50.863843: step: 298/470, loss: 0.1065002903342247 2023-01-24 02:55:51.657057: step: 300/470, loss: 0.13871590793132782 2023-01-24 02:55:52.384166: step: 302/470, loss: 0.7692720890045166 2023-01-24 02:55:53.153546: step: 304/470, loss: 0.42822524905204773 2023-01-24 02:55:53.893805: step: 306/470, loss: 0.17703254520893097 2023-01-24 02:55:54.717442: step: 308/470, loss: 0.6865633130073547 2023-01-24 02:55:55.488934: step: 310/470, loss: 0.17033447325229645 2023-01-24 02:55:56.240598: step: 312/470, loss: 0.1849544495344162 2023-01-24 02:55:57.086624: step: 314/470, loss: 0.5009557604789734 2023-01-24 02:55:57.893856: step: 316/470, loss: 0.1985601931810379 2023-01-24 02:55:58.617412: step: 318/470, loss: 0.7720727920532227 2023-01-24 02:55:59.325637: step: 320/470, loss: 0.5260408520698547 2023-01-24 02:56:00.108306: step: 322/470, loss: 0.5189958810806274 2023-01-24 02:56:00.995786: step: 324/470, loss: 0.34944188594818115 2023-01-24 02:56:01.706992: step: 326/470, loss: 0.309198260307312 2023-01-24 02:56:02.446727: step: 328/470, loss: 1.9898673295974731 2023-01-24 02:56:03.144139: step: 330/470, loss: 0.07750724256038666 2023-01-24 02:56:03.873277: step: 332/470, loss: 0.26930928230285645 2023-01-24 02:56:04.604066: step: 334/470, loss: 0.3827114403247833 2023-01-24 02:56:05.288639: step: 336/470, loss: 0.18784800171852112 2023-01-24 02:56:06.067525: step: 338/470, loss: 0.5030959844589233 2023-01-24 02:56:06.735457: step: 340/470, loss: 0.17444992065429688 2023-01-24 02:56:07.649223: step: 342/470, loss: 0.16230079531669617 2023-01-24 02:56:08.413215: step: 344/470, loss: 0.7461163997650146 2023-01-24 02:56:09.136163: step: 346/470, loss: 0.3461681008338928 2023-01-24 02:56:09.933264: step: 348/470, loss: 0.9509586691856384 2023-01-24 02:56:10.663028: step: 350/470, loss: 0.23319120705127716 2023-01-24 02:56:11.406192: step: 352/470, loss: 0.2624581456184387 2023-01-24 02:56:12.181083: step: 354/470, loss: 0.09867286682128906 2023-01-24 02:56:12.898026: step: 356/470, loss: 0.3626404404640198 2023-01-24 02:56:13.707700: step: 358/470, loss: 0.1559712439775467 2023-01-24 02:56:14.376506: step: 360/470, loss: 0.2118682861328125 2023-01-24 02:56:15.123084: step: 362/470, loss: 0.1414262056350708 2023-01-24 02:56:15.831847: step: 364/470, loss: 0.21667654812335968 2023-01-24 02:56:16.603515: step: 366/470, loss: 0.36082273721694946 2023-01-24 02:56:17.336471: step: 368/470, loss: 0.12949353456497192 2023-01-24 02:56:18.078424: step: 370/470, loss: 1.4123237133026123 2023-01-24 02:56:18.735392: step: 372/470, loss: 0.3567815124988556 2023-01-24 02:56:19.540344: step: 374/470, loss: 0.958263635635376 2023-01-24 02:56:20.235686: step: 376/470, loss: 0.40245184302330017 2023-01-24 02:56:20.946402: step: 378/470, loss: 0.412080854177475 2023-01-24 02:56:21.702852: step: 380/470, loss: 0.02421180158853531 2023-01-24 02:56:22.501581: step: 382/470, loss: 0.1654946208000183 2023-01-24 02:56:23.375737: step: 384/470, loss: 0.12343335151672363 2023-01-24 02:56:24.103049: step: 386/470, loss: 0.1281142681837082 2023-01-24 02:56:24.811169: step: 388/470, loss: 0.36397942900657654 2023-01-24 02:56:25.704556: step: 390/470, loss: 0.5486847162246704 2023-01-24 02:56:26.412622: step: 392/470, loss: 0.3194306790828705 2023-01-24 02:56:27.151950: step: 394/470, loss: 0.061125390231609344 2023-01-24 02:56:27.891529: step: 396/470, loss: 0.21372345089912415 2023-01-24 02:56:28.602016: step: 398/470, loss: 0.38437792658805847 2023-01-24 02:56:29.359075: step: 400/470, loss: 0.1892595738172531 2023-01-24 02:56:30.045266: step: 402/470, loss: 0.3131447732448578 2023-01-24 02:56:30.921519: step: 404/470, loss: 0.12467707693576813 2023-01-24 02:56:31.686212: step: 406/470, loss: 0.18171878159046173 2023-01-24 02:56:32.431662: step: 408/470, loss: 0.41719669103622437 2023-01-24 02:56:33.173505: step: 410/470, loss: 1.214868426322937 2023-01-24 02:56:33.896091: step: 412/470, loss: 0.30766603350639343 2023-01-24 02:56:34.530474: step: 414/470, loss: 0.28935831785202026 2023-01-24 02:56:35.245238: step: 416/470, loss: 0.27082717418670654 2023-01-24 02:56:35.949371: step: 418/470, loss: 0.17194396257400513 2023-01-24 02:56:36.749625: step: 420/470, loss: 0.36183246970176697 2023-01-24 02:56:37.543779: step: 422/470, loss: 0.4918966591358185 2023-01-24 02:56:38.323058: step: 424/470, loss: 0.5111696720123291 2023-01-24 02:56:39.048593: step: 426/470, loss: 2.1021931171417236 2023-01-24 02:56:39.757550: step: 428/470, loss: 0.24027954041957855 2023-01-24 02:56:40.469955: step: 430/470, loss: 0.47709715366363525 2023-01-24 02:56:41.300319: step: 432/470, loss: 0.3914461135864258 2023-01-24 02:56:41.924545: step: 434/470, loss: 0.26286399364471436 2023-01-24 02:56:42.673965: step: 436/470, loss: 0.21852940320968628 2023-01-24 02:56:43.410341: step: 438/470, loss: 0.1935357004404068 2023-01-24 02:56:44.248375: step: 440/470, loss: 0.21522024273872375 2023-01-24 02:56:45.005869: step: 442/470, loss: 6.902156829833984 2023-01-24 02:56:45.851355: step: 444/470, loss: 0.3409741222858429 2023-01-24 02:56:46.580928: step: 446/470, loss: 0.24355652928352356 2023-01-24 02:56:47.347561: step: 448/470, loss: 0.6423745155334473 2023-01-24 02:56:48.155378: step: 450/470, loss: 0.24495574831962585 2023-01-24 02:56:48.984833: step: 452/470, loss: 0.22389310598373413 2023-01-24 02:56:49.748599: step: 454/470, loss: 0.3068379759788513 2023-01-24 02:56:50.436348: step: 456/470, loss: 0.11338848620653152 2023-01-24 02:56:51.149065: step: 458/470, loss: 0.29847270250320435 2023-01-24 02:56:51.888649: step: 460/470, loss: 0.42329713702201843 2023-01-24 02:56:52.573403: step: 462/470, loss: 0.046367011964321136 2023-01-24 02:56:53.278326: step: 464/470, loss: 0.3115167021751404 2023-01-24 02:56:54.057763: step: 466/470, loss: 0.22372061014175415 2023-01-24 02:56:54.709476: step: 468/470, loss: 0.15685810148715973 2023-01-24 02:56:55.511713: step: 470/470, loss: 0.340934157371521 2023-01-24 02:56:56.314321: step: 472/470, loss: 0.3756090998649597 2023-01-24 02:56:57.111977: step: 474/470, loss: 0.4003685712814331 2023-01-24 02:56:57.790197: step: 476/470, loss: 0.565597414970398 2023-01-24 02:56:58.501630: step: 478/470, loss: 0.35876619815826416 2023-01-24 02:56:59.248726: step: 480/470, loss: 0.10507578402757645 2023-01-24 02:56:59.927051: step: 482/470, loss: 0.12312311679124832 2023-01-24 02:57:00.677975: step: 484/470, loss: 0.07300717383623123 2023-01-24 02:57:01.415343: step: 486/470, loss: 0.5644948482513428 2023-01-24 02:57:02.146562: step: 488/470, loss: 0.20122499763965607 2023-01-24 02:57:02.938858: step: 490/470, loss: 0.25764524936676025 2023-01-24 02:57:03.688321: step: 492/470, loss: 0.843420684337616 2023-01-24 02:57:04.416295: step: 494/470, loss: 0.22662875056266785 2023-01-24 02:57:05.166030: step: 496/470, loss: 0.37807315587997437 2023-01-24 02:57:05.913944: step: 498/470, loss: 0.15613622963428497 2023-01-24 02:57:06.649833: step: 500/470, loss: 0.20546023547649384 2023-01-24 02:57:07.479049: step: 502/470, loss: 0.26250770688056946 2023-01-24 02:57:08.267610: step: 504/470, loss: 0.5560357570648193 2023-01-24 02:57:09.043415: step: 506/470, loss: 0.2005891352891922 2023-01-24 02:57:09.782240: step: 508/470, loss: 0.08451083302497864 2023-01-24 02:57:10.465641: step: 510/470, loss: 0.09619811177253723 2023-01-24 02:57:11.159688: step: 512/470, loss: 1.73138427734375 2023-01-24 02:57:11.873750: step: 514/470, loss: 0.9232888221740723 2023-01-24 02:57:12.624048: step: 516/470, loss: 0.4726581573486328 2023-01-24 02:57:13.316683: step: 518/470, loss: 0.1541184037923813 2023-01-24 02:57:14.063767: step: 520/470, loss: 0.1253805309534073 2023-01-24 02:57:14.888987: step: 522/470, loss: 0.6628063321113586 2023-01-24 02:57:15.622767: step: 524/470, loss: 1.1210182905197144 2023-01-24 02:57:16.385519: step: 526/470, loss: 0.4437292218208313 2023-01-24 02:57:17.137607: step: 528/470, loss: 1.0097692012786865 2023-01-24 02:57:17.826566: step: 530/470, loss: 0.398298054933548 2023-01-24 02:57:18.519427: step: 532/470, loss: 0.1273142546415329 2023-01-24 02:57:19.268128: step: 534/470, loss: 0.3876824378967285 2023-01-24 02:57:20.110692: step: 536/470, loss: 0.31943345069885254 2023-01-24 02:57:20.885091: step: 538/470, loss: 0.5478048920631409 2023-01-24 02:57:21.568192: step: 540/470, loss: 0.06618456542491913 2023-01-24 02:57:22.333715: step: 542/470, loss: 0.14831160008907318 2023-01-24 02:57:23.099793: step: 544/470, loss: 0.11674753576517105 2023-01-24 02:57:23.873368: step: 546/470, loss: 0.6683562994003296 2023-01-24 02:57:24.642046: step: 548/470, loss: 0.2430974245071411 2023-01-24 02:57:25.343218: step: 550/470, loss: 0.2425515353679657 2023-01-24 02:57:26.074825: step: 552/470, loss: 0.5050057768821716 2023-01-24 02:57:26.823707: step: 554/470, loss: 0.11187369376420975 2023-01-24 02:57:27.498788: step: 556/470, loss: 0.5013543963432312 2023-01-24 02:57:28.245340: step: 558/470, loss: 0.36751458048820496 2023-01-24 02:57:29.052114: step: 560/470, loss: 0.28177356719970703 2023-01-24 02:57:29.809711: step: 562/470, loss: 0.576210618019104 2023-01-24 02:57:30.583136: step: 564/470, loss: 0.3474891781806946 2023-01-24 02:57:31.350355: step: 566/470, loss: 0.30152004957199097 2023-01-24 02:57:32.032116: step: 568/470, loss: 0.3646096885204315 2023-01-24 02:57:32.806614: step: 570/470, loss: 0.35699066519737244 2023-01-24 02:57:33.536853: step: 572/470, loss: 0.13822859525680542 2023-01-24 02:57:34.271129: step: 574/470, loss: 0.3227802515029907 2023-01-24 02:57:34.975245: step: 576/470, loss: 0.10874330252408981 2023-01-24 02:57:35.719978: step: 578/470, loss: 0.08362071961164474 2023-01-24 02:57:36.446614: step: 580/470, loss: 0.2959776222705841 2023-01-24 02:57:37.224893: step: 582/470, loss: 0.09202943742275238 2023-01-24 02:57:38.044432: step: 584/470, loss: 0.12419924139976501 2023-01-24 02:57:38.705952: step: 586/470, loss: 0.4467095732688904 2023-01-24 02:57:39.400064: step: 588/470, loss: 0.17356723546981812 2023-01-24 02:57:40.352506: step: 590/470, loss: 0.18288274109363556 2023-01-24 02:57:41.129947: step: 592/470, loss: 0.2276880443096161 2023-01-24 02:57:41.942460: step: 594/470, loss: 0.10493405163288116 2023-01-24 02:57:42.671868: step: 596/470, loss: 0.13986705243587494 2023-01-24 02:57:43.404804: step: 598/470, loss: 0.18077510595321655 2023-01-24 02:57:44.103536: step: 600/470, loss: 0.12601624429225922 2023-01-24 02:57:44.772701: step: 602/470, loss: 0.3344309628009796 2023-01-24 02:57:45.594728: step: 604/470, loss: 0.3879287838935852 2023-01-24 02:57:46.441994: step: 606/470, loss: 0.16008992493152618 2023-01-24 02:57:47.393627: step: 608/470, loss: 0.19730323553085327 2023-01-24 02:57:48.048533: step: 610/470, loss: 1.7209839820861816 2023-01-24 02:57:48.714230: step: 612/470, loss: 0.17242218554019928 2023-01-24 02:57:49.495183: step: 614/470, loss: 0.42183101177215576 2023-01-24 02:57:50.219967: step: 616/470, loss: 0.1833951324224472 2023-01-24 02:57:50.949059: step: 618/470, loss: 0.1907496601343155 2023-01-24 02:57:51.699081: step: 620/470, loss: 0.21647794544696808 2023-01-24 02:57:52.520303: step: 622/470, loss: 0.39642030000686646 2023-01-24 02:57:53.285871: step: 624/470, loss: 0.20872657001018524 2023-01-24 02:57:53.981460: step: 626/470, loss: 0.16940362751483917 2023-01-24 02:57:54.684018: step: 628/470, loss: 0.29818278551101685 2023-01-24 02:57:55.339578: step: 630/470, loss: 0.12899573147296906 2023-01-24 02:57:56.027372: step: 632/470, loss: 0.08327148109674454 2023-01-24 02:57:56.748451: step: 634/470, loss: 0.2249470353126526 2023-01-24 02:57:57.656132: step: 636/470, loss: 0.1515919417142868 2023-01-24 02:57:58.449337: step: 638/470, loss: 0.8622671365737915 2023-01-24 02:57:59.248663: step: 640/470, loss: 0.37675124406814575 2023-01-24 02:57:59.934947: step: 642/470, loss: 0.2437460869550705 2023-01-24 02:58:00.642592: step: 644/470, loss: 0.4033161997795105 2023-01-24 02:58:01.415654: step: 646/470, loss: 0.24557210505008698 2023-01-24 02:58:02.222586: step: 648/470, loss: 0.25930410623550415 2023-01-24 02:58:02.880138: step: 650/470, loss: 0.49194440245628357 2023-01-24 02:58:03.685658: step: 652/470, loss: 0.4548197090625763 2023-01-24 02:58:04.458182: step: 654/470, loss: 0.2559857666492462 2023-01-24 02:58:05.136259: step: 656/470, loss: 0.3703431189060211 2023-01-24 02:58:05.894544: step: 658/470, loss: 0.0801117792725563 2023-01-24 02:58:06.651384: step: 660/470, loss: 0.23692208528518677 2023-01-24 02:58:07.416526: step: 662/470, loss: 0.8581908941268921 2023-01-24 02:58:08.172758: step: 664/470, loss: 0.09321422874927521 2023-01-24 02:58:08.850893: step: 666/470, loss: 0.5431147217750549 2023-01-24 02:58:09.579158: step: 668/470, loss: 0.17868438363075256 2023-01-24 02:58:10.245075: step: 670/470, loss: 0.2592407763004303 2023-01-24 02:58:10.989770: step: 672/470, loss: 0.22203195095062256 2023-01-24 02:58:11.766864: step: 674/470, loss: 0.2031756341457367 2023-01-24 02:58:12.536734: step: 676/470, loss: 1.166580319404602 2023-01-24 02:58:13.278914: step: 678/470, loss: 0.20502953231334686 2023-01-24 02:58:13.985995: step: 680/470, loss: 0.14193998277187347 2023-01-24 02:58:14.699804: step: 682/470, loss: 0.16833271086215973 2023-01-24 02:58:15.451419: step: 684/470, loss: 0.20455804467201233 2023-01-24 02:58:16.106193: step: 686/470, loss: 0.7733557224273682 2023-01-24 02:58:16.888482: step: 688/470, loss: 0.20254459977149963 2023-01-24 02:58:17.681758: step: 690/470, loss: 0.8873804211616516 2023-01-24 02:58:18.415380: step: 692/470, loss: 0.4637981057167053 2023-01-24 02:58:19.170360: step: 694/470, loss: 0.4916077256202698 2023-01-24 02:58:19.920832: step: 696/470, loss: 0.826230525970459 2023-01-24 02:58:20.700745: step: 698/470, loss: 0.3543017506599426 2023-01-24 02:58:21.415468: step: 700/470, loss: 0.10216841101646423 2023-01-24 02:58:22.153206: step: 702/470, loss: 0.21498167514801025 2023-01-24 02:58:22.901118: step: 704/470, loss: 0.10220889002084732 2023-01-24 02:58:23.693294: step: 706/470, loss: 0.08111105859279633 2023-01-24 02:58:24.405364: step: 708/470, loss: 0.24107980728149414 2023-01-24 02:58:25.176831: step: 710/470, loss: 0.21305520832538605 2023-01-24 02:58:25.927022: step: 712/470, loss: 0.14277955889701843 2023-01-24 02:58:26.675612: step: 714/470, loss: 0.9436085224151611 2023-01-24 02:58:27.435358: step: 716/470, loss: 0.6749270558357239 2023-01-24 02:58:28.148602: step: 718/470, loss: 0.14690832793712616 2023-01-24 02:58:28.925292: step: 720/470, loss: 0.1711893379688263 2023-01-24 02:58:29.662651: step: 722/470, loss: 0.22402094304561615 2023-01-24 02:58:30.423806: step: 724/470, loss: 0.3463267683982849 2023-01-24 02:58:31.129819: step: 726/470, loss: 0.34018969535827637 2023-01-24 02:58:31.898532: step: 728/470, loss: 0.5446484088897705 2023-01-24 02:58:32.620840: step: 730/470, loss: 0.11589177697896957 2023-01-24 02:58:33.353912: step: 732/470, loss: 1.1572738885879517 2023-01-24 02:58:34.168445: step: 734/470, loss: 0.20397549867630005 2023-01-24 02:58:35.004766: step: 736/470, loss: 0.6422154307365417 2023-01-24 02:58:35.692351: step: 738/470, loss: 0.19147703051567078 2023-01-24 02:58:36.328967: step: 740/470, loss: 0.520534336566925 2023-01-24 02:58:37.184681: step: 742/470, loss: 0.14674602448940277 2023-01-24 02:58:37.997000: step: 744/470, loss: 0.8777087926864624 2023-01-24 02:58:38.710315: step: 746/470, loss: 1.0014443397521973 2023-01-24 02:58:39.355117: step: 748/470, loss: 0.3216632306575775 2023-01-24 02:58:40.146400: step: 750/470, loss: 0.20832213759422302 2023-01-24 02:58:40.887434: step: 752/470, loss: 0.5051476359367371 2023-01-24 02:58:41.671383: step: 754/470, loss: 0.2241048663854599 2023-01-24 02:58:42.444789: step: 756/470, loss: 0.8101214170455933 2023-01-24 02:58:43.227830: step: 758/470, loss: 0.12697064876556396 2023-01-24 02:58:43.948970: step: 760/470, loss: 1.2407550811767578 2023-01-24 02:58:44.733487: step: 762/470, loss: 0.39389562606811523 2023-01-24 02:58:45.498689: step: 764/470, loss: 0.4802873730659485 2023-01-24 02:58:46.263055: step: 766/470, loss: 0.7842137813568115 2023-01-24 02:58:47.034804: step: 768/470, loss: 0.204922616481781 2023-01-24 02:58:47.831843: step: 770/470, loss: 0.25310850143432617 2023-01-24 02:58:48.547835: step: 772/470, loss: 0.07442860305309296 2023-01-24 02:58:49.325375: step: 774/470, loss: 0.16630667448043823 2023-01-24 02:58:50.163302: step: 776/470, loss: 0.4407159388065338 2023-01-24 02:58:50.977520: step: 778/470, loss: 0.2151498794555664 2023-01-24 02:58:51.729631: step: 780/470, loss: 0.4645977020263672 2023-01-24 02:58:52.518930: step: 782/470, loss: 0.4509057104587555 2023-01-24 02:58:53.216218: step: 784/470, loss: 0.4268851578235626 2023-01-24 02:58:53.933962: step: 786/470, loss: 0.2786135971546173 2023-01-24 02:58:54.804268: step: 788/470, loss: 0.42344367504119873 2023-01-24 02:58:55.531668: step: 790/470, loss: 0.22360637784004211 2023-01-24 02:58:56.268579: step: 792/470, loss: 0.2640346884727478 2023-01-24 02:58:57.043125: step: 794/470, loss: 1.436629056930542 2023-01-24 02:58:57.850336: step: 796/470, loss: 0.168908953666687 2023-01-24 02:58:58.520589: step: 798/470, loss: 0.2268424928188324 2023-01-24 02:58:59.259751: step: 800/470, loss: 0.16930358111858368 2023-01-24 02:59:00.010082: step: 802/470, loss: 0.19378112256526947 2023-01-24 02:59:00.865576: step: 804/470, loss: 0.4183207154273987 2023-01-24 02:59:01.601687: step: 806/470, loss: 0.39443352818489075 2023-01-24 02:59:02.357468: step: 808/470, loss: 0.22412896156311035 2023-01-24 02:59:03.105947: step: 810/470, loss: 0.23135864734649658 2023-01-24 02:59:03.884927: step: 812/470, loss: 0.3557632863521576 2023-01-24 02:59:04.693719: step: 814/470, loss: 0.23979082703590393 2023-01-24 02:59:05.425454: step: 816/470, loss: 0.4324354827404022 2023-01-24 02:59:06.116202: step: 818/470, loss: 0.20390821993350983 2023-01-24 02:59:06.765208: step: 820/470, loss: 0.11008242517709732 2023-01-24 02:59:07.520308: step: 822/470, loss: 0.7956212759017944 2023-01-24 02:59:08.313623: step: 824/470, loss: 0.09564602375030518 2023-01-24 02:59:09.103054: step: 826/470, loss: 0.10002394765615463 2023-01-24 02:59:09.913785: step: 828/470, loss: 0.12996633350849152 2023-01-24 02:59:10.732938: step: 830/470, loss: 0.16086608171463013 2023-01-24 02:59:11.447601: step: 832/470, loss: 0.32509753108024597 2023-01-24 02:59:12.221873: step: 834/470, loss: 0.6973584890365601 2023-01-24 02:59:12.956628: step: 836/470, loss: 0.5293657779693604 2023-01-24 02:59:13.728024: step: 838/470, loss: 0.4230278730392456 2023-01-24 02:59:14.529843: step: 840/470, loss: 0.26785650849342346 2023-01-24 02:59:15.267195: step: 842/470, loss: 0.10598357021808624 2023-01-24 02:59:16.078703: step: 844/470, loss: 0.08085938543081284 2023-01-24 02:59:16.800985: step: 846/470, loss: 0.0774892121553421 2023-01-24 02:59:17.523895: step: 848/470, loss: 0.3891444206237793 2023-01-24 02:59:18.288723: step: 850/470, loss: 0.20412513613700867 2023-01-24 02:59:19.036760: step: 852/470, loss: 0.7257307171821594 2023-01-24 02:59:19.789244: step: 854/470, loss: 0.19312706589698792 2023-01-24 02:59:20.548649: step: 856/470, loss: 0.2457079440355301 2023-01-24 02:59:21.285149: step: 858/470, loss: 0.22375904023647308 2023-01-24 02:59:21.986729: step: 860/470, loss: 0.07559472322463989 2023-01-24 02:59:22.820045: step: 862/470, loss: 5.461023330688477 2023-01-24 02:59:23.598103: step: 864/470, loss: 0.28360649943351746 2023-01-24 02:59:24.369207: step: 866/470, loss: 0.1746789664030075 2023-01-24 02:59:25.193377: step: 868/470, loss: 0.2900182604789734 2023-01-24 02:59:25.905191: step: 870/470, loss: 0.5256376266479492 2023-01-24 02:59:26.668223: step: 872/470, loss: 0.5666700601577759 2023-01-24 02:59:27.475640: step: 874/470, loss: 0.3807283341884613 2023-01-24 02:59:28.269341: step: 876/470, loss: 0.39703118801116943 2023-01-24 02:59:29.123612: step: 878/470, loss: 0.3907458186149597 2023-01-24 02:59:29.892979: step: 880/470, loss: 0.2166074514389038 2023-01-24 02:59:30.632038: step: 882/470, loss: 0.6513550281524658 2023-01-24 02:59:31.386513: step: 884/470, loss: 0.17348910868167877 2023-01-24 02:59:32.136123: step: 886/470, loss: 0.5899196863174438 2023-01-24 02:59:32.797584: step: 888/470, loss: 0.33359190821647644 2023-01-24 02:59:33.606930: step: 890/470, loss: 0.2644504904747009 2023-01-24 02:59:34.344404: step: 892/470, loss: 0.0992155447602272 2023-01-24 02:59:35.096339: step: 894/470, loss: 0.07050655782222748 2023-01-24 02:59:36.019375: step: 896/470, loss: 0.15530425310134888 2023-01-24 02:59:36.744978: step: 898/470, loss: 1.7606289386749268 2023-01-24 02:59:37.508293: step: 900/470, loss: 0.39144524931907654 2023-01-24 02:59:38.341502: step: 902/470, loss: 0.10359073430299759 2023-01-24 02:59:39.074069: step: 904/470, loss: 0.5953198075294495 2023-01-24 02:59:39.789460: step: 906/470, loss: 0.07822130620479584 2023-01-24 02:59:40.438872: step: 908/470, loss: 0.16700084507465363 2023-01-24 02:59:41.298046: step: 910/470, loss: 0.48216283321380615 2023-01-24 02:59:42.107027: step: 912/470, loss: 0.14438587427139282 2023-01-24 02:59:42.924151: step: 914/470, loss: 0.28868240118026733 2023-01-24 02:59:43.703969: step: 916/470, loss: 0.15756703913211823 2023-01-24 02:59:44.414247: step: 918/470, loss: 0.25201234221458435 2023-01-24 02:59:45.217110: step: 920/470, loss: 0.13608524203300476 2023-01-24 02:59:45.985031: step: 922/470, loss: 0.11261554062366486 2023-01-24 02:59:46.676342: step: 924/470, loss: 0.12380633503198624 2023-01-24 02:59:47.473751: step: 926/470, loss: 0.3758620023727417 2023-01-24 02:59:48.203412: step: 928/470, loss: 0.11808370053768158 2023-01-24 02:59:48.952966: step: 930/470, loss: 0.5944559574127197 2023-01-24 02:59:49.678748: step: 932/470, loss: 0.38902971148490906 2023-01-24 02:59:50.447497: step: 934/470, loss: 0.08703064173460007 2023-01-24 02:59:51.250707: step: 936/470, loss: 0.1778847724199295 2023-01-24 02:59:52.026279: step: 938/470, loss: 0.3809297978878021 2023-01-24 02:59:52.724081: step: 940/470, loss: 0.24091286957263947 2023-01-24 02:59:53.406138: step: 942/470, loss: 0.10962583124637604 ================================================== Loss: 0.416 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33121436342954136, 'r': 0.3067032435552489, 'f1': 0.31848790020416984}, 'combined': 0.23467529488728303, 'epoch': 10} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3698629019746699, 'r': 0.3115943948432137, 'f1': 0.3382375026400266}, 'combined': 0.22549166842668436, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3331003656930242, 'r': 0.3084496744937302, 'f1': 0.32030143538560746}, 'combined': 0.23601158396834232, 'epoch': 10} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3805783367493191, 'r': 0.304901376415881, 'f1': 0.33856248837219427}, 'combined': 0.2257083255814628, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3176152436672239, 'r': 0.30435616328642895, 'f1': 0.3108443760696668}, 'combined': 0.22904322447238606, 'epoch': 10} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3714783299504381, 'r': 0.319685678178502, 'f1': 0.3436414525122916}, 'combined': 0.22909430167486103, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.2571428571428571, 'f1': 0.2535211267605634}, 'combined': 0.16901408450704225, 'epoch': 10} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5227272727272727, 'r': 0.25, 'f1': 0.3382352941176471}, 'combined': 0.22549019607843138, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 10} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3130350753897265, 'r': 0.332042897804283, 'f1': 0.3222589450144699}, 'combined': 0.23745395948434622, 'epoch': 9} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.33810196782859, 'r': 0.3293327525246784, 'f1': 0.33365975219288585}, 'combined': 0.2224398347952572, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26851851851851855, 'r': 0.4142857142857143, 'f1': 0.3258426966292135}, 'combined': 0.21722846441947566, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30950132625512705, 'r': 0.3100886152992544, 'f1': 0.3097946924411508}, 'combined': 0.22826977337769006, 'epoch': 7} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3463740826755645, 'r': 0.325411962398369, 'f1': 0.33556597608390504}, 'combined': 0.22371065072260332, 'epoch': 7} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5625, 'r': 0.4891304347826087, 'f1': 0.5232558139534884}, 'combined': 0.3488372093023256, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3176152436672239, 'r': 0.30435616328642895, 'f1': 0.3108443760696668}, 'combined': 0.22904322447238606, 'epoch': 10} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3714783299504381, 'r': 0.319685678178502, 'f1': 0.3436414525122916}, 'combined': 0.22909430167486103, 'epoch': 10} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 10} ****************************** Epoch: 11 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:02:35.724823: step: 2/470, loss: 0.3994400203227997 2023-01-24 03:02:36.472649: step: 4/470, loss: 0.10796771198511124 2023-01-24 03:02:37.196311: step: 6/470, loss: 0.06982637196779251 2023-01-24 03:02:37.974359: step: 8/470, loss: 0.6267420649528503 2023-01-24 03:02:38.695457: step: 10/470, loss: 0.12536020576953888 2023-01-24 03:02:39.401881: step: 12/470, loss: 0.2619161605834961 2023-01-24 03:02:40.209596: step: 14/470, loss: 0.1946829855442047 2023-01-24 03:02:40.894327: step: 16/470, loss: 0.20657046139240265 2023-01-24 03:02:41.607107: step: 18/470, loss: 0.369625061750412 2023-01-24 03:02:42.358813: step: 20/470, loss: 0.24612738192081451 2023-01-24 03:02:43.146393: step: 22/470, loss: 0.17834819853305817 2023-01-24 03:02:43.926512: step: 24/470, loss: 0.18645793199539185 2023-01-24 03:02:44.717202: step: 26/470, loss: 0.22116416692733765 2023-01-24 03:02:45.516331: step: 28/470, loss: 0.12848031520843506 2023-01-24 03:02:46.236136: step: 30/470, loss: 0.2887888252735138 2023-01-24 03:02:46.984813: step: 32/470, loss: 0.09650327265262604 2023-01-24 03:02:47.823905: step: 34/470, loss: 0.1269495189189911 2023-01-24 03:02:48.587579: step: 36/470, loss: 0.05965163931250572 2023-01-24 03:02:49.295800: step: 38/470, loss: 0.09995022416114807 2023-01-24 03:02:50.074835: step: 40/470, loss: 0.1404799371957779 2023-01-24 03:02:50.822918: step: 42/470, loss: 0.07613828033208847 2023-01-24 03:02:51.594949: step: 44/470, loss: 0.21182122826576233 2023-01-24 03:02:52.423846: step: 46/470, loss: 0.5042648911476135 2023-01-24 03:02:53.177236: step: 48/470, loss: 0.5012888312339783 2023-01-24 03:02:53.853746: step: 50/470, loss: 0.4014410972595215 2023-01-24 03:02:54.578009: step: 52/470, loss: 1.0104151964187622 2023-01-24 03:02:55.404799: step: 54/470, loss: 0.5547516942024231 2023-01-24 03:02:56.252793: step: 56/470, loss: 1.227064847946167 2023-01-24 03:02:57.044262: step: 58/470, loss: 0.4300103783607483 2023-01-24 03:02:57.699692: step: 60/470, loss: 0.20219558477401733 2023-01-24 03:02:58.417839: step: 62/470, loss: 0.13512657582759857 2023-01-24 03:02:59.214631: step: 64/470, loss: 0.1292756050825119 2023-01-24 03:02:59.948603: step: 66/470, loss: 0.3424178957939148 2023-01-24 03:03:00.770829: step: 68/470, loss: 0.8694711327552795 2023-01-24 03:03:01.587915: step: 70/470, loss: 0.10702022910118103 2023-01-24 03:03:02.313538: step: 72/470, loss: 0.07732173800468445 2023-01-24 03:03:03.044678: step: 74/470, loss: 0.21523575484752655 2023-01-24 03:03:03.814489: step: 76/470, loss: 0.9190123081207275 2023-01-24 03:03:04.502077: step: 78/470, loss: 0.09142867475748062 2023-01-24 03:03:05.277817: step: 80/470, loss: 0.2203410565853119 2023-01-24 03:03:06.020638: step: 82/470, loss: 0.21649318933486938 2023-01-24 03:03:06.756069: step: 84/470, loss: 0.13063064217567444 2023-01-24 03:03:07.609875: step: 86/470, loss: 0.3328152596950531 2023-01-24 03:03:08.485955: step: 88/470, loss: 0.2513705790042877 2023-01-24 03:03:09.313378: step: 90/470, loss: 0.20509395003318787 2023-01-24 03:03:10.153399: step: 92/470, loss: 0.07670695334672928 2023-01-24 03:03:10.871004: step: 94/470, loss: 0.10654658079147339 2023-01-24 03:03:11.569196: step: 96/470, loss: 0.44697830080986023 2023-01-24 03:03:12.342934: step: 98/470, loss: 0.13903780281543732 2023-01-24 03:03:13.131439: step: 100/470, loss: 0.028267454355955124 2023-01-24 03:03:13.961255: step: 102/470, loss: 0.15167954564094543 2023-01-24 03:03:14.818166: step: 104/470, loss: 0.284242182970047 2023-01-24 03:03:15.515618: step: 106/470, loss: 0.3138585090637207 2023-01-24 03:03:16.434973: step: 108/470, loss: 0.07407403737306595 2023-01-24 03:03:17.272226: step: 110/470, loss: 0.6403460502624512 2023-01-24 03:03:18.001129: step: 112/470, loss: 0.11428175866603851 2023-01-24 03:03:18.783772: step: 114/470, loss: 0.26986491680145264 2023-01-24 03:03:19.651319: step: 116/470, loss: 0.1651705801486969 2023-01-24 03:03:20.429601: step: 118/470, loss: 0.07059428840875626 2023-01-24 03:03:21.174857: step: 120/470, loss: 0.0850813016295433 2023-01-24 03:03:21.916850: step: 122/470, loss: 0.23236671090126038 2023-01-24 03:03:22.694078: step: 124/470, loss: 1.0825673341751099 2023-01-24 03:03:23.411063: step: 126/470, loss: 0.18390889465808868 2023-01-24 03:03:24.162250: step: 128/470, loss: 0.12133748084306717 2023-01-24 03:03:24.870947: step: 130/470, loss: 0.26943790912628174 2023-01-24 03:03:25.594095: step: 132/470, loss: 0.059222038835287094 2023-01-24 03:03:26.313149: step: 134/470, loss: 0.4377175569534302 2023-01-24 03:03:27.022677: step: 136/470, loss: 0.1320364624261856 2023-01-24 03:03:27.801260: step: 138/470, loss: 0.2063646763563156 2023-01-24 03:03:28.508071: step: 140/470, loss: 0.17360788583755493 2023-01-24 03:03:29.222834: step: 142/470, loss: 0.17847619950771332 2023-01-24 03:03:29.991890: step: 144/470, loss: 0.4014095664024353 2023-01-24 03:03:30.733141: step: 146/470, loss: 0.26048025488853455 2023-01-24 03:03:31.523509: step: 148/470, loss: 0.4044667184352875 2023-01-24 03:03:32.302353: step: 150/470, loss: 0.6233318448066711 2023-01-24 03:03:32.981698: step: 152/470, loss: 0.10592886805534363 2023-01-24 03:03:33.796720: step: 154/470, loss: 0.6332147121429443 2023-01-24 03:03:34.633175: step: 156/470, loss: 0.4474610686302185 2023-01-24 03:03:35.292957: step: 158/470, loss: 0.22585195302963257 2023-01-24 03:03:36.015479: step: 160/470, loss: 0.10840116441249847 2023-01-24 03:03:36.782154: step: 162/470, loss: 0.17162242531776428 2023-01-24 03:03:37.565112: step: 164/470, loss: 0.1772492676973343 2023-01-24 03:03:38.243060: step: 166/470, loss: 0.3632153868675232 2023-01-24 03:03:39.000363: step: 168/470, loss: 0.14816507697105408 2023-01-24 03:03:39.711559: step: 170/470, loss: 0.9186021089553833 2023-01-24 03:03:40.458377: step: 172/470, loss: 0.22490334510803223 2023-01-24 03:03:41.136419: step: 174/470, loss: 0.20037584006786346 2023-01-24 03:03:41.810094: step: 176/470, loss: 0.13259491324424744 2023-01-24 03:03:42.513037: step: 178/470, loss: 0.42409461736679077 2023-01-24 03:03:43.279407: step: 180/470, loss: 0.12134432792663574 2023-01-24 03:03:44.010584: step: 182/470, loss: 0.10569081455469131 2023-01-24 03:03:44.709598: step: 184/470, loss: 0.07015454769134521 2023-01-24 03:03:45.453010: step: 186/470, loss: 0.18518798053264618 2023-01-24 03:03:46.202602: step: 188/470, loss: 0.1714620441198349 2023-01-24 03:03:46.863280: step: 190/470, loss: 0.5320131182670593 2023-01-24 03:03:47.545680: step: 192/470, loss: 0.09196509420871735 2023-01-24 03:03:48.222108: step: 194/470, loss: 0.24720802903175354 2023-01-24 03:03:49.019070: step: 196/470, loss: 0.381684273481369 2023-01-24 03:03:49.764905: step: 198/470, loss: 0.4392554759979248 2023-01-24 03:03:50.545092: step: 200/470, loss: 0.1657257229089737 2023-01-24 03:03:51.291994: step: 202/470, loss: 0.9423061013221741 2023-01-24 03:03:51.962283: step: 204/470, loss: 0.22993268072605133 2023-01-24 03:03:52.653958: step: 206/470, loss: 0.08702653646469116 2023-01-24 03:03:53.297440: step: 208/470, loss: 0.4365525543689728 2023-01-24 03:03:54.086051: step: 210/470, loss: 0.21688061952590942 2023-01-24 03:03:54.784975: step: 212/470, loss: 0.37000590562820435 2023-01-24 03:03:55.568446: step: 214/470, loss: 0.1840032935142517 2023-01-24 03:03:56.319541: step: 216/470, loss: 0.21828442811965942 2023-01-24 03:03:57.069092: step: 218/470, loss: 0.6175702214241028 2023-01-24 03:03:57.860247: step: 220/470, loss: 0.3277837038040161 2023-01-24 03:03:58.674323: step: 222/470, loss: 0.20514245331287384 2023-01-24 03:03:59.376032: step: 224/470, loss: 0.27700185775756836 2023-01-24 03:04:00.036004: step: 226/470, loss: 0.8384249210357666 2023-01-24 03:04:00.813456: step: 228/470, loss: 0.3489769697189331 2023-01-24 03:04:01.593684: step: 230/470, loss: 0.167975515127182 2023-01-24 03:04:02.377503: step: 232/470, loss: 0.12696748971939087 2023-01-24 03:04:03.154213: step: 234/470, loss: 0.36338022351264954 2023-01-24 03:04:03.986796: step: 236/470, loss: 0.17215953767299652 2023-01-24 03:04:04.730962: step: 238/470, loss: 0.14851003885269165 2023-01-24 03:04:05.434045: step: 240/470, loss: 0.5321226119995117 2023-01-24 03:04:06.189634: step: 242/470, loss: 0.37152212858200073 2023-01-24 03:04:07.011101: step: 244/470, loss: 0.11287014931440353 2023-01-24 03:04:07.707506: step: 246/470, loss: 0.1164015606045723 2023-01-24 03:04:08.540980: step: 248/470, loss: 0.3851315379142761 2023-01-24 03:04:09.362959: step: 250/470, loss: 0.5415903925895691 2023-01-24 03:04:10.079129: step: 252/470, loss: 0.07699915021657944 2023-01-24 03:04:10.828792: step: 254/470, loss: 0.10870891064405441 2023-01-24 03:04:11.577718: step: 256/470, loss: 0.18335126340389252 2023-01-24 03:04:12.338432: step: 258/470, loss: 0.1485547423362732 2023-01-24 03:04:13.065590: step: 260/470, loss: 0.440429151058197 2023-01-24 03:04:13.691251: step: 262/470, loss: 0.16356483101844788 2023-01-24 03:04:14.395467: step: 264/470, loss: 0.5098501443862915 2023-01-24 03:04:15.145397: step: 266/470, loss: 0.49232688546180725 2023-01-24 03:04:15.936195: step: 268/470, loss: 0.22366316616535187 2023-01-24 03:04:16.689538: step: 270/470, loss: 2.619814395904541 2023-01-24 03:04:17.376202: step: 272/470, loss: 0.2295650839805603 2023-01-24 03:04:18.105925: step: 274/470, loss: 0.11074075847864151 2023-01-24 03:04:18.830445: step: 276/470, loss: 0.3683517873287201 2023-01-24 03:04:19.618273: step: 278/470, loss: 0.13178575038909912 2023-01-24 03:04:20.296353: step: 280/470, loss: 0.16775688529014587 2023-01-24 03:04:20.983579: step: 282/470, loss: 0.0917547270655632 2023-01-24 03:04:21.722874: step: 284/470, loss: 0.24330279231071472 2023-01-24 03:04:22.454495: step: 286/470, loss: 0.21394579112529755 2023-01-24 03:04:23.184935: step: 288/470, loss: 0.16779950261116028 2023-01-24 03:04:23.942394: step: 290/470, loss: 0.21933045983314514 2023-01-24 03:04:24.715280: step: 292/470, loss: 0.16219143569469452 2023-01-24 03:04:25.469924: step: 294/470, loss: 0.11340250819921494 2023-01-24 03:04:26.202298: step: 296/470, loss: 0.26899465918540955 2023-01-24 03:04:26.948533: step: 298/470, loss: 0.15036383271217346 2023-01-24 03:04:27.712089: step: 300/470, loss: 0.20359119772911072 2023-01-24 03:04:28.473396: step: 302/470, loss: 0.08829786628484726 2023-01-24 03:04:29.256306: step: 304/470, loss: 0.32991862297058105 2023-01-24 03:04:29.995190: step: 306/470, loss: 0.09574191272258759 2023-01-24 03:04:30.660485: step: 308/470, loss: 0.1676369309425354 2023-01-24 03:04:31.371018: step: 310/470, loss: 0.07528150081634521 2023-01-24 03:04:32.084732: step: 312/470, loss: 0.2053246796131134 2023-01-24 03:04:32.831692: step: 314/470, loss: 0.5107622146606445 2023-01-24 03:04:33.611073: step: 316/470, loss: 0.2811824381351471 2023-01-24 03:04:34.469124: step: 318/470, loss: 1.0416152477264404 2023-01-24 03:04:35.253832: step: 320/470, loss: 0.24820706248283386 2023-01-24 03:04:35.967514: step: 322/470, loss: 0.1375453770160675 2023-01-24 03:04:36.639463: step: 324/470, loss: 0.2664700448513031 2023-01-24 03:04:37.447684: step: 326/470, loss: 0.9418017864227295 2023-01-24 03:04:38.164105: step: 328/470, loss: 1.100771427154541 2023-01-24 03:04:38.983634: step: 330/470, loss: 0.1819295734167099 2023-01-24 03:04:39.775395: step: 332/470, loss: 0.17129111289978027 2023-01-24 03:04:40.572860: step: 334/470, loss: 0.38183021545410156 2023-01-24 03:04:41.285413: step: 336/470, loss: 0.1744094341993332 2023-01-24 03:04:42.032701: step: 338/470, loss: 0.21806088089942932 2023-01-24 03:04:42.710795: step: 340/470, loss: 0.1362285017967224 2023-01-24 03:04:43.484188: step: 342/470, loss: 0.08650539070367813 2023-01-24 03:04:44.298876: step: 344/470, loss: 0.14947454631328583 2023-01-24 03:04:45.047435: step: 346/470, loss: 0.04758727550506592 2023-01-24 03:04:45.795608: step: 348/470, loss: 0.09763923287391663 2023-01-24 03:04:46.524607: step: 350/470, loss: 0.12080357223749161 2023-01-24 03:04:47.270230: step: 352/470, loss: 0.6871947050094604 2023-01-24 03:04:48.041380: step: 354/470, loss: 0.24329255521297455 2023-01-24 03:04:48.803993: step: 356/470, loss: 0.4151495099067688 2023-01-24 03:04:49.501241: step: 358/470, loss: 0.1159839779138565 2023-01-24 03:04:50.236826: step: 360/470, loss: 0.42312848567962646 2023-01-24 03:04:50.953707: step: 362/470, loss: 0.19960635900497437 2023-01-24 03:04:51.699489: step: 364/470, loss: 0.5251233577728271 2023-01-24 03:04:52.351531: step: 366/470, loss: 0.4900449514389038 2023-01-24 03:04:53.039889: step: 368/470, loss: 0.45938706398010254 2023-01-24 03:04:53.798720: step: 370/470, loss: 0.12873217463493347 2023-01-24 03:04:54.494170: step: 372/470, loss: 0.20823711156845093 2023-01-24 03:04:55.262345: step: 374/470, loss: 0.30151286721229553 2023-01-24 03:04:56.002638: step: 376/470, loss: 0.4461778700351715 2023-01-24 03:04:56.814650: step: 378/470, loss: 1.0548112392425537 2023-01-24 03:04:57.534453: step: 380/470, loss: 0.4324851632118225 2023-01-24 03:04:58.244436: step: 382/470, loss: 0.9602062702178955 2023-01-24 03:04:58.961909: step: 384/470, loss: 0.6398365497589111 2023-01-24 03:04:59.725499: step: 386/470, loss: 0.22366558015346527 2023-01-24 03:05:00.541256: step: 388/470, loss: 0.21046940982341766 2023-01-24 03:05:01.294944: step: 390/470, loss: 0.22315052151679993 2023-01-24 03:05:01.996096: step: 392/470, loss: 0.22419774532318115 2023-01-24 03:05:02.776440: step: 394/470, loss: 0.71689373254776 2023-01-24 03:05:03.548277: step: 396/470, loss: 0.16539393365383148 2023-01-24 03:05:04.330783: step: 398/470, loss: 0.37051236629486084 2023-01-24 03:05:05.108976: step: 400/470, loss: 0.17021459341049194 2023-01-24 03:05:05.798037: step: 402/470, loss: 0.23352442681789398 2023-01-24 03:05:06.524922: step: 404/470, loss: 0.13187989592552185 2023-01-24 03:05:07.290860: step: 406/470, loss: 0.6370197534561157 2023-01-24 03:05:08.095987: step: 408/470, loss: 0.1171882152557373 2023-01-24 03:05:08.844894: step: 410/470, loss: 0.2786184549331665 2023-01-24 03:05:09.595627: step: 412/470, loss: 0.6290634870529175 2023-01-24 03:05:10.286287: step: 414/470, loss: 0.1813952922821045 2023-01-24 03:05:11.011339: step: 416/470, loss: 0.3490357995033264 2023-01-24 03:05:11.750722: step: 418/470, loss: 0.23016490042209625 2023-01-24 03:05:12.464610: step: 420/470, loss: 1.1225723028182983 2023-01-24 03:05:13.229426: step: 422/470, loss: 0.7987148761749268 2023-01-24 03:05:14.073016: step: 424/470, loss: 0.1776483952999115 2023-01-24 03:05:14.769186: step: 426/470, loss: 0.2515849769115448 2023-01-24 03:05:15.517037: step: 428/470, loss: 0.22037197649478912 2023-01-24 03:05:16.286172: step: 430/470, loss: 0.1036430150270462 2023-01-24 03:05:17.042697: step: 432/470, loss: 0.2177654653787613 2023-01-24 03:05:17.806877: step: 434/470, loss: 0.1274358332157135 2023-01-24 03:05:18.548473: step: 436/470, loss: 0.33280283212661743 2023-01-24 03:05:19.350585: step: 438/470, loss: 0.26393696665763855 2023-01-24 03:05:20.063772: step: 440/470, loss: 0.13263751566410065 2023-01-24 03:05:20.777009: step: 442/470, loss: 0.13120940327644348 2023-01-24 03:05:21.460160: step: 444/470, loss: 0.4707326292991638 2023-01-24 03:05:22.291482: step: 446/470, loss: 0.46750926971435547 2023-01-24 03:05:22.942805: step: 448/470, loss: 0.06135157495737076 2023-01-24 03:05:23.622597: step: 450/470, loss: 0.09700169414281845 2023-01-24 03:05:24.393330: step: 452/470, loss: 0.23718973994255066 2023-01-24 03:05:25.074323: step: 454/470, loss: 0.3361484706401825 2023-01-24 03:05:25.851047: step: 456/470, loss: 0.13764187693595886 2023-01-24 03:05:26.603546: step: 458/470, loss: 0.14335286617279053 2023-01-24 03:05:27.394998: step: 460/470, loss: 0.12495262920856476 2023-01-24 03:05:28.160907: step: 462/470, loss: 0.881492018699646 2023-01-24 03:05:28.895689: step: 464/470, loss: 0.4301450848579407 2023-01-24 03:05:29.588179: step: 466/470, loss: 1.3912532329559326 2023-01-24 03:05:30.358682: step: 468/470, loss: 0.11058748513460159 2023-01-24 03:05:31.070470: step: 470/470, loss: 0.4207277297973633 2023-01-24 03:05:31.827726: step: 472/470, loss: 0.15675045549869537 2023-01-24 03:05:32.607380: step: 474/470, loss: 0.32435324788093567 2023-01-24 03:05:33.304760: step: 476/470, loss: 0.21787133812904358 2023-01-24 03:05:34.039845: step: 478/470, loss: 0.4728389382362366 2023-01-24 03:05:34.766171: step: 480/470, loss: 0.29139944911003113 2023-01-24 03:05:35.551288: step: 482/470, loss: 0.19343923032283783 2023-01-24 03:05:36.282385: step: 484/470, loss: 0.15642260015010834 2023-01-24 03:05:37.074666: step: 486/470, loss: 0.2686142027378082 2023-01-24 03:05:37.816246: step: 488/470, loss: 0.03354679420590401 2023-01-24 03:05:38.487593: step: 490/470, loss: 0.14455752074718475 2023-01-24 03:05:39.194504: step: 492/470, loss: 0.29530078172683716 2023-01-24 03:05:39.943746: step: 494/470, loss: 0.21259760856628418 2023-01-24 03:05:40.619131: step: 496/470, loss: 0.12661580741405487 2023-01-24 03:05:41.326035: step: 498/470, loss: 0.24475648999214172 2023-01-24 03:05:42.106531: step: 500/470, loss: 0.5884524583816528 2023-01-24 03:05:42.848096: step: 502/470, loss: 0.21985645592212677 2023-01-24 03:05:43.572363: step: 504/470, loss: 0.12764428555965424 2023-01-24 03:05:44.272160: step: 506/470, loss: 0.2050672471523285 2023-01-24 03:05:44.939327: step: 508/470, loss: 0.22950823605060577 2023-01-24 03:05:45.644267: step: 510/470, loss: 0.10886823385953903 2023-01-24 03:05:46.414184: step: 512/470, loss: 0.3298364281654358 2023-01-24 03:05:47.153865: step: 514/470, loss: 0.45132243633270264 2023-01-24 03:05:47.864803: step: 516/470, loss: 0.03829793632030487 2023-01-24 03:05:48.622625: step: 518/470, loss: 0.2860611081123352 2023-01-24 03:05:49.382755: step: 520/470, loss: 0.18205812573432922 2023-01-24 03:05:50.185563: step: 522/470, loss: 0.2330647110939026 2023-01-24 03:05:50.957206: step: 524/470, loss: 0.16605840623378754 2023-01-24 03:05:51.644205: step: 526/470, loss: 0.1584710031747818 2023-01-24 03:05:52.340710: step: 528/470, loss: 0.28219524025917053 2023-01-24 03:05:53.086004: step: 530/470, loss: 0.1415386199951172 2023-01-24 03:05:53.855501: step: 532/470, loss: 0.187623992562294 2023-01-24 03:05:54.581782: step: 534/470, loss: 0.20938031375408173 2023-01-24 03:05:55.317582: step: 536/470, loss: 0.3544868528842926 2023-01-24 03:05:56.167494: step: 538/470, loss: 0.11445646733045578 2023-01-24 03:05:56.972634: step: 540/470, loss: 0.4840231239795685 2023-01-24 03:05:57.673860: step: 542/470, loss: 0.32870981097221375 2023-01-24 03:05:58.508701: step: 544/470, loss: 0.2359415590763092 2023-01-24 03:05:59.244610: step: 546/470, loss: 0.14783021807670593 2023-01-24 03:05:59.957371: step: 548/470, loss: 0.16923844814300537 2023-01-24 03:06:00.656558: step: 550/470, loss: 0.14774803817272186 2023-01-24 03:06:01.382301: step: 552/470, loss: 0.20552322268486023 2023-01-24 03:06:02.083918: step: 554/470, loss: 0.12092557549476624 2023-01-24 03:06:02.815801: step: 556/470, loss: 0.5868374705314636 2023-01-24 03:06:03.516089: step: 558/470, loss: 0.2626172602176666 2023-01-24 03:06:04.300661: step: 560/470, loss: 0.27606257796287537 2023-01-24 03:06:04.985061: step: 562/470, loss: 0.08012336492538452 2023-01-24 03:06:05.782314: step: 564/470, loss: 1.1808509826660156 2023-01-24 03:06:06.546874: step: 566/470, loss: 1.3284801244735718 2023-01-24 03:06:07.313479: step: 568/470, loss: 2.19006609916687 2023-01-24 03:06:08.030017: step: 570/470, loss: 0.13095425069332123 2023-01-24 03:06:08.774855: step: 572/470, loss: 0.461041122674942 2023-01-24 03:06:09.484968: step: 574/470, loss: 0.2042723149061203 2023-01-24 03:06:10.246397: step: 576/470, loss: 0.32484281063079834 2023-01-24 03:06:11.181138: step: 578/470, loss: 0.2067028135061264 2023-01-24 03:06:11.830501: step: 580/470, loss: 0.32187509536743164 2023-01-24 03:06:12.578772: step: 582/470, loss: 0.21868886053562164 2023-01-24 03:06:13.400295: step: 584/470, loss: 0.2576506733894348 2023-01-24 03:06:14.182904: step: 586/470, loss: 0.7114197611808777 2023-01-24 03:06:14.901242: step: 588/470, loss: 0.1497022956609726 2023-01-24 03:06:15.663067: step: 590/470, loss: 0.10337451845407486 2023-01-24 03:06:16.402673: step: 592/470, loss: 0.30149146914482117 2023-01-24 03:06:17.165302: step: 594/470, loss: 0.11470700055360794 2023-01-24 03:06:17.877398: step: 596/470, loss: 0.1277574747800827 2023-01-24 03:06:18.616500: step: 598/470, loss: 0.09452803432941437 2023-01-24 03:06:19.465298: step: 600/470, loss: 0.25843483209609985 2023-01-24 03:06:20.124367: step: 602/470, loss: 0.8243882060050964 2023-01-24 03:06:20.962448: step: 604/470, loss: 0.27867597341537476 2023-01-24 03:06:21.658386: step: 606/470, loss: 0.2572006583213806 2023-01-24 03:06:22.410007: step: 608/470, loss: 0.08993202447891235 2023-01-24 03:06:23.250464: step: 610/470, loss: 0.23165597021579742 2023-01-24 03:06:24.011564: step: 612/470, loss: 0.4315301179885864 2023-01-24 03:06:24.849311: step: 614/470, loss: 0.17089776694774628 2023-01-24 03:06:25.595978: step: 616/470, loss: 0.1850140392780304 2023-01-24 03:06:26.310544: step: 618/470, loss: 0.37521472573280334 2023-01-24 03:06:27.033536: step: 620/470, loss: 0.1793273687362671 2023-01-24 03:06:27.798428: step: 622/470, loss: 0.2895492911338806 2023-01-24 03:06:28.510383: step: 624/470, loss: 0.1320488601922989 2023-01-24 03:06:29.283009: step: 626/470, loss: 0.15132221579551697 2023-01-24 03:06:30.080851: step: 628/470, loss: 0.15033775568008423 2023-01-24 03:06:30.799705: step: 630/470, loss: 0.0726805329322815 2023-01-24 03:06:31.540071: step: 632/470, loss: 0.225330650806427 2023-01-24 03:06:32.361740: step: 634/470, loss: 0.19526013731956482 2023-01-24 03:06:33.060302: step: 636/470, loss: 0.18398520350456238 2023-01-24 03:06:33.787232: step: 638/470, loss: 0.6068989634513855 2023-01-24 03:06:34.476858: step: 640/470, loss: 0.1774880290031433 2023-01-24 03:06:35.170506: step: 642/470, loss: 0.04690373316407204 2023-01-24 03:06:35.828324: step: 644/470, loss: 0.08224115520715714 2023-01-24 03:06:36.656299: step: 646/470, loss: 0.20977337658405304 2023-01-24 03:06:37.425232: step: 648/470, loss: 0.6167982220649719 2023-01-24 03:06:38.133364: step: 650/470, loss: 2.5424869060516357 2023-01-24 03:06:38.914210: step: 652/470, loss: 0.26920145750045776 2023-01-24 03:06:39.652061: step: 654/470, loss: 0.09911765903234482 2023-01-24 03:06:40.421580: step: 656/470, loss: 1.6140161752700806 2023-01-24 03:06:41.138184: step: 658/470, loss: 0.07306616753339767 2023-01-24 03:06:41.982293: step: 660/470, loss: 0.13788312673568726 2023-01-24 03:06:42.684206: step: 662/470, loss: 0.2710290551185608 2023-01-24 03:06:43.500039: step: 664/470, loss: 0.6698711514472961 2023-01-24 03:06:44.218968: step: 666/470, loss: 4.250461578369141 2023-01-24 03:06:44.903919: step: 668/470, loss: 0.44460374116897583 2023-01-24 03:06:45.584962: step: 670/470, loss: 0.23895730078220367 2023-01-24 03:06:46.311789: step: 672/470, loss: 0.13557671010494232 2023-01-24 03:06:47.028966: step: 674/470, loss: 0.0696479007601738 2023-01-24 03:06:47.709981: step: 676/470, loss: 0.061501167714595795 2023-01-24 03:06:48.478080: step: 678/470, loss: 0.18382152915000916 2023-01-24 03:06:49.248667: step: 680/470, loss: 0.1674911230802536 2023-01-24 03:06:49.977903: step: 682/470, loss: 0.5173250436782837 2023-01-24 03:06:50.672229: step: 684/470, loss: 0.1093844622373581 2023-01-24 03:06:51.541245: step: 686/470, loss: 0.5219208598136902 2023-01-24 03:06:52.330438: step: 688/470, loss: 0.16668257117271423 2023-01-24 03:06:53.059588: step: 690/470, loss: 0.5589529871940613 2023-01-24 03:06:53.811012: step: 692/470, loss: 0.16195274889469147 2023-01-24 03:06:54.537829: step: 694/470, loss: 0.17775781452655792 2023-01-24 03:06:55.282254: step: 696/470, loss: 14.064286231994629 2023-01-24 03:06:56.046041: step: 698/470, loss: 1.6122430562973022 2023-01-24 03:06:56.759081: step: 700/470, loss: 0.1364855021238327 2023-01-24 03:06:57.474669: step: 702/470, loss: 0.2651909291744232 2023-01-24 03:06:58.122361: step: 704/470, loss: 0.10258844494819641 2023-01-24 03:06:58.872247: step: 706/470, loss: 0.4878748059272766 2023-01-24 03:06:59.581071: step: 708/470, loss: 0.09945238381624222 2023-01-24 03:07:00.448611: step: 710/470, loss: 0.25955334305763245 2023-01-24 03:07:01.155611: step: 712/470, loss: 0.3970111906528473 2023-01-24 03:07:01.901267: step: 714/470, loss: 0.5209802985191345 2023-01-24 03:07:02.620946: step: 716/470, loss: 0.11759445071220398 2023-01-24 03:07:03.362130: step: 718/470, loss: 0.15108118951320648 2023-01-24 03:07:04.057501: step: 720/470, loss: 0.36267387866973877 2023-01-24 03:07:04.874208: step: 722/470, loss: 0.07263346761465073 2023-01-24 03:07:05.651712: step: 724/470, loss: 0.10593510419130325 2023-01-24 03:07:06.330490: step: 726/470, loss: 0.26219284534454346 2023-01-24 03:07:07.147641: step: 728/470, loss: 0.17348651587963104 2023-01-24 03:07:07.851977: step: 730/470, loss: 0.1823439598083496 2023-01-24 03:07:08.601035: step: 732/470, loss: 0.20073945820331573 2023-01-24 03:07:09.371230: step: 734/470, loss: 0.27448293566703796 2023-01-24 03:07:10.161701: step: 736/470, loss: 0.33971887826919556 2023-01-24 03:07:10.942517: step: 738/470, loss: 0.1402978003025055 2023-01-24 03:07:11.715986: step: 740/470, loss: 0.5981901288032532 2023-01-24 03:07:12.489972: step: 742/470, loss: 0.04211556911468506 2023-01-24 03:07:13.285258: step: 744/470, loss: 0.11189659684896469 2023-01-24 03:07:13.891503: step: 746/470, loss: 0.12144724279642105 2023-01-24 03:07:14.544095: step: 748/470, loss: 0.3307271897792816 2023-01-24 03:07:15.259678: step: 750/470, loss: 0.2954995632171631 2023-01-24 03:07:15.921439: step: 752/470, loss: 0.4646444022655487 2023-01-24 03:07:16.582153: step: 754/470, loss: 0.06638015806674957 2023-01-24 03:07:17.411007: step: 756/470, loss: 0.07224085181951523 2023-01-24 03:07:18.083930: step: 758/470, loss: 0.2975500822067261 2023-01-24 03:07:18.821913: step: 760/470, loss: 0.8255310654640198 2023-01-24 03:07:19.583772: step: 762/470, loss: 0.12929823994636536 2023-01-24 03:07:20.355759: step: 764/470, loss: 0.1628466248512268 2023-01-24 03:07:21.128657: step: 766/470, loss: 0.14177992939949036 2023-01-24 03:07:21.898810: step: 768/470, loss: 0.393669992685318 2023-01-24 03:07:22.666896: step: 770/470, loss: 0.04610329121351242 2023-01-24 03:07:23.473694: step: 772/470, loss: 0.45871657133102417 2023-01-24 03:07:24.220182: step: 774/470, loss: 0.3327120244503021 2023-01-24 03:07:24.948274: step: 776/470, loss: 0.43813204765319824 2023-01-24 03:07:25.851970: step: 778/470, loss: 1.1366257667541504 2023-01-24 03:07:26.580813: step: 780/470, loss: 0.23157668113708496 2023-01-24 03:07:27.359599: step: 782/470, loss: 0.3343833088874817 2023-01-24 03:07:28.124907: step: 784/470, loss: 0.06032833456993103 2023-01-24 03:07:29.033861: step: 786/470, loss: 0.18906527757644653 2023-01-24 03:07:29.727393: step: 788/470, loss: 0.2094613015651703 2023-01-24 03:07:30.490555: step: 790/470, loss: 0.26915594935417175 2023-01-24 03:07:31.292542: step: 792/470, loss: 0.21122537553310394 2023-01-24 03:07:32.060073: step: 794/470, loss: 0.07323165982961655 2023-01-24 03:07:32.827610: step: 796/470, loss: 0.21279887855052948 2023-01-24 03:07:33.600613: step: 798/470, loss: 0.561419665813446 2023-01-24 03:07:34.298224: step: 800/470, loss: 0.16097694635391235 2023-01-24 03:07:35.123311: step: 802/470, loss: 0.2642227113246918 2023-01-24 03:07:35.883734: step: 804/470, loss: 0.2735598385334015 2023-01-24 03:07:36.579446: step: 806/470, loss: 0.1263493448495865 2023-01-24 03:07:37.359694: step: 808/470, loss: 0.33118510246276855 2023-01-24 03:07:38.021049: step: 810/470, loss: 0.3240700960159302 2023-01-24 03:07:38.832789: step: 812/470, loss: 0.09242391586303711 2023-01-24 03:07:39.590162: step: 814/470, loss: 0.15188434720039368 2023-01-24 03:07:40.400853: step: 816/470, loss: 0.5113020539283752 2023-01-24 03:07:41.094608: step: 818/470, loss: 0.1256381869316101 2023-01-24 03:07:41.841108: step: 820/470, loss: 0.08072460442781448 2023-01-24 03:07:42.520508: step: 822/470, loss: 0.4400816559791565 2023-01-24 03:07:43.272399: step: 824/470, loss: 0.22420644760131836 2023-01-24 03:07:44.049496: step: 826/470, loss: 0.13756045699119568 2023-01-24 03:07:44.835610: step: 828/470, loss: 0.05152636021375656 2023-01-24 03:07:45.860832: step: 830/470, loss: 0.1861773133277893 2023-01-24 03:07:46.523004: step: 832/470, loss: 0.08842433243989944 2023-01-24 03:07:47.236659: step: 834/470, loss: 0.1939559429883957 2023-01-24 03:07:47.994057: step: 836/470, loss: 0.06441611051559448 2023-01-24 03:07:48.912216: step: 838/470, loss: 0.18977122008800507 2023-01-24 03:07:49.614811: step: 840/470, loss: 0.02858108840882778 2023-01-24 03:07:50.440073: step: 842/470, loss: 0.2794545888900757 2023-01-24 03:07:51.182325: step: 844/470, loss: 0.666828453540802 2023-01-24 03:07:51.946337: step: 846/470, loss: 0.5264043807983398 2023-01-24 03:07:52.681850: step: 848/470, loss: 0.2906803488731384 2023-01-24 03:07:53.442071: step: 850/470, loss: 0.4317677617073059 2023-01-24 03:07:54.208813: step: 852/470, loss: 0.05206689238548279 2023-01-24 03:07:54.911789: step: 854/470, loss: 0.3372666537761688 2023-01-24 03:07:55.714805: step: 856/470, loss: 0.22663447260856628 2023-01-24 03:07:56.447901: step: 858/470, loss: 0.35830050706863403 2023-01-24 03:07:57.216615: step: 860/470, loss: 0.28675583004951477 2023-01-24 03:07:57.952036: step: 862/470, loss: 0.1459394097328186 2023-01-24 03:07:58.671731: step: 864/470, loss: 0.2950734496116638 2023-01-24 03:07:59.405907: step: 866/470, loss: 0.18734970688819885 2023-01-24 03:08:00.086752: step: 868/470, loss: 0.08804669976234436 2023-01-24 03:08:00.808267: step: 870/470, loss: 0.18895027041435242 2023-01-24 03:08:01.607498: step: 872/470, loss: 0.2600648105144501 2023-01-24 03:08:02.397978: step: 874/470, loss: 0.44221991300582886 2023-01-24 03:08:03.139750: step: 876/470, loss: 0.17826533317565918 2023-01-24 03:08:03.881122: step: 878/470, loss: 0.17969872057437897 2023-01-24 03:08:04.628068: step: 880/470, loss: 0.2378416508436203 2023-01-24 03:08:05.487824: step: 882/470, loss: 0.12380614876747131 2023-01-24 03:08:06.269854: step: 884/470, loss: 0.35380885004997253 2023-01-24 03:08:07.118658: step: 886/470, loss: 0.18434365093708038 2023-01-24 03:08:07.940013: step: 888/470, loss: 0.09525241702795029 2023-01-24 03:08:08.610895: step: 890/470, loss: 0.05519452691078186 2023-01-24 03:08:09.334343: step: 892/470, loss: 0.12015210837125778 2023-01-24 03:08:10.028318: step: 894/470, loss: 0.15597781538963318 2023-01-24 03:08:10.753025: step: 896/470, loss: 0.052873898297548294 2023-01-24 03:08:11.506440: step: 898/470, loss: 0.587254524230957 2023-01-24 03:08:12.279201: step: 900/470, loss: 0.21591414511203766 2023-01-24 03:08:12.980877: step: 902/470, loss: 0.1420390009880066 2023-01-24 03:08:13.829026: step: 904/470, loss: 0.5451937317848206 2023-01-24 03:08:14.722848: step: 906/470, loss: 0.4762882590293884 2023-01-24 03:08:15.419339: step: 908/470, loss: 0.08040604740381241 2023-01-24 03:08:16.179595: step: 910/470, loss: 0.1304711103439331 2023-01-24 03:08:16.923383: step: 912/470, loss: 0.43863868713378906 2023-01-24 03:08:17.692521: step: 914/470, loss: 0.07513883709907532 2023-01-24 03:08:18.441518: step: 916/470, loss: 1.0359426736831665 2023-01-24 03:08:19.245760: step: 918/470, loss: 0.2730335593223572 2023-01-24 03:08:20.060492: step: 920/470, loss: 0.16464608907699585 2023-01-24 03:08:20.815093: step: 922/470, loss: 0.5824774503707886 2023-01-24 03:08:21.637435: step: 924/470, loss: 0.6275187134742737 2023-01-24 03:08:22.412016: step: 926/470, loss: 0.1604541540145874 2023-01-24 03:08:23.194714: step: 928/470, loss: 0.27200543880462646 2023-01-24 03:08:23.891961: step: 930/470, loss: 0.886191189289093 2023-01-24 03:08:24.668459: step: 932/470, loss: 0.22329403460025787 2023-01-24 03:08:25.412760: step: 934/470, loss: 0.12522681057453156 2023-01-24 03:08:26.097402: step: 936/470, loss: 0.1403811126947403 2023-01-24 03:08:26.841537: step: 938/470, loss: 0.22944138944149017 2023-01-24 03:08:27.583307: step: 940/470, loss: 0.12413433194160461 2023-01-24 03:08:28.286452: step: 942/470, loss: 0.30450427532196045 ================================================== Loss: 0.340 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3357460400478183, 'r': 0.3045286663052318, 'f1': 0.3193763326226013}, 'combined': 0.2353299293008641, 'epoch': 11} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36502366061223734, 'r': 0.32746834168386296, 'f1': 0.3452276486074176}, 'combined': 0.23015176573827836, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.332823273172558, 'r': 0.3044038285942561, 'f1': 0.3179798169854766}, 'combined': 0.23430091777877224, 'epoch': 11} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3720234634091542, 'r': 0.32194338179638343, 'f1': 0.34517640934869975}, 'combined': 0.23011760623246644, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.322361932938856, 'r': 0.3101280834914611, 'f1': 0.3161266924564797}, 'combined': 0.23293545759951131, 'epoch': 11} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3599233728797589, 'r': 0.33465952074492966, 'f1': 0.3468319896110881}, 'combined': 0.23122132640739199, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2648809523809524, 'r': 0.31785714285714284, 'f1': 0.28896103896103903}, 'combined': 0.19264069264069267, 'epoch': 11} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45454545454545453, 'r': 0.21739130434782608, 'f1': 0.29411764705882354}, 'combined': 0.19607843137254902, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3130350753897265, 'r': 0.332042897804283, 'f1': 0.3222589450144699}, 'combined': 0.23745395948434622, 'epoch': 9} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.33810196782859, 'r': 0.3293327525246784, 'f1': 0.33365975219288585}, 'combined': 0.2224398347952572, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26851851851851855, 'r': 0.4142857142857143, 'f1': 0.3258426966292135}, 'combined': 0.21722846441947566, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30950132625512705, 'r': 0.3100886152992544, 'f1': 0.3097946924411508}, 'combined': 0.22826977337769006, 'epoch': 7} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3463740826755645, 'r': 0.325411962398369, 'f1': 0.33556597608390504}, 'combined': 0.22371065072260332, 'epoch': 7} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5625, 'r': 0.4891304347826087, 'f1': 0.5232558139534884}, 'combined': 0.3488372093023256, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3176152436672239, 'r': 0.30435616328642895, 'f1': 0.3108443760696668}, 'combined': 0.22904322447238606, 'epoch': 10} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3714783299504381, 'r': 0.319685678178502, 'f1': 0.3436414525122916}, 'combined': 0.22909430167486103, 'epoch': 10} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 10} ****************************** Epoch: 12 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:11:02.160819: step: 2/470, loss: 1.6027824878692627 2023-01-24 03:11:02.898561: step: 4/470, loss: 0.513495922088623 2023-01-24 03:11:03.600598: step: 6/470, loss: 0.4840008020401001 2023-01-24 03:11:04.351011: step: 8/470, loss: 0.2687922716140747 2023-01-24 03:11:05.108916: step: 10/470, loss: 0.8560855388641357 2023-01-24 03:11:05.993819: step: 12/470, loss: 0.17021265625953674 2023-01-24 03:11:06.757683: step: 14/470, loss: 0.18634700775146484 2023-01-24 03:11:07.446591: step: 16/470, loss: 0.05369818955659866 2023-01-24 03:11:08.210007: step: 18/470, loss: 0.19231680035591125 2023-01-24 03:11:08.965990: step: 20/470, loss: 0.19430579245090485 2023-01-24 03:11:09.704359: step: 22/470, loss: 0.7491946220397949 2023-01-24 03:11:10.515774: step: 24/470, loss: 0.19225333631038666 2023-01-24 03:11:11.248971: step: 26/470, loss: 0.32709619402885437 2023-01-24 03:11:11.949580: step: 28/470, loss: 0.1636715531349182 2023-01-24 03:11:12.773228: step: 30/470, loss: 0.08695600926876068 2023-01-24 03:11:13.488649: step: 32/470, loss: 0.33848845958709717 2023-01-24 03:11:14.242882: step: 34/470, loss: 0.039047807455062866 2023-01-24 03:11:15.064152: step: 36/470, loss: 0.09349209070205688 2023-01-24 03:11:15.810858: step: 38/470, loss: 0.12132520228624344 2023-01-24 03:11:16.614414: step: 40/470, loss: 0.24875620007514954 2023-01-24 03:11:17.480721: step: 42/470, loss: 0.37038731575012207 2023-01-24 03:11:18.226818: step: 44/470, loss: 0.14126403629779816 2023-01-24 03:11:18.988576: step: 46/470, loss: 0.41059553623199463 2023-01-24 03:11:19.697340: step: 48/470, loss: 0.09567522257566452 2023-01-24 03:11:20.510429: step: 50/470, loss: 0.20272527635097504 2023-01-24 03:11:21.287791: step: 52/470, loss: 0.13555192947387695 2023-01-24 03:11:22.057047: step: 54/470, loss: 0.08126991242170334 2023-01-24 03:11:22.827518: step: 56/470, loss: 0.13052316009998322 2023-01-24 03:11:23.521974: step: 58/470, loss: 0.08066800236701965 2023-01-24 03:11:24.293955: step: 60/470, loss: 0.17245280742645264 2023-01-24 03:11:25.047799: step: 62/470, loss: 0.3052728772163391 2023-01-24 03:11:25.873954: step: 64/470, loss: 0.3907634913921356 2023-01-24 03:11:26.707811: step: 66/470, loss: 0.2599896192550659 2023-01-24 03:11:27.443713: step: 68/470, loss: 0.141568124294281 2023-01-24 03:11:28.199080: step: 70/470, loss: 0.10497156530618668 2023-01-24 03:11:28.996044: step: 72/470, loss: 0.2574126124382019 2023-01-24 03:11:29.719627: step: 74/470, loss: 0.18920814990997314 2023-01-24 03:11:30.458217: step: 76/470, loss: 0.20635807514190674 2023-01-24 03:11:31.125061: step: 78/470, loss: 0.11566061526536942 2023-01-24 03:11:31.938816: step: 80/470, loss: 0.11462550610303879 2023-01-24 03:11:32.704384: step: 82/470, loss: 0.3002080023288727 2023-01-24 03:11:33.411160: step: 84/470, loss: 0.13464710116386414 2023-01-24 03:11:34.157461: step: 86/470, loss: 0.044168468564748764 2023-01-24 03:11:34.991441: step: 88/470, loss: 0.29472196102142334 2023-01-24 03:11:35.775100: step: 90/470, loss: 0.20468667149543762 2023-01-24 03:11:36.549710: step: 92/470, loss: 0.2128215730190277 2023-01-24 03:11:37.294258: step: 94/470, loss: 0.03551870957016945 2023-01-24 03:11:37.961930: step: 96/470, loss: 0.050920166075229645 2023-01-24 03:11:38.734372: step: 98/470, loss: 0.13329026103019714 2023-01-24 03:11:39.471057: step: 100/470, loss: 0.04325414076447487 2023-01-24 03:11:40.191310: step: 102/470, loss: 0.5133774876594543 2023-01-24 03:11:40.909053: step: 104/470, loss: 0.19939365983009338 2023-01-24 03:11:41.616244: step: 106/470, loss: 0.14586398005485535 2023-01-24 03:11:42.253630: step: 108/470, loss: 0.49300411343574524 2023-01-24 03:11:43.002850: step: 110/470, loss: 0.22268635034561157 2023-01-24 03:11:43.788549: step: 112/470, loss: 0.07112699747085571 2023-01-24 03:11:44.629042: step: 114/470, loss: 0.8724595308303833 2023-01-24 03:11:45.413664: step: 116/470, loss: 0.040853098034858704 2023-01-24 03:11:46.199332: step: 118/470, loss: 0.11627252399921417 2023-01-24 03:11:46.899881: step: 120/470, loss: 0.44871455430984497 2023-01-24 03:11:47.654557: step: 122/470, loss: 0.10901898890733719 2023-01-24 03:11:48.390341: step: 124/470, loss: 0.1313031166791916 2023-01-24 03:11:49.161712: step: 126/470, loss: 0.35010117292404175 2023-01-24 03:11:49.932556: step: 128/470, loss: 0.20002852380275726 2023-01-24 03:11:50.646978: step: 130/470, loss: 0.21068109571933746 2023-01-24 03:11:51.378732: step: 132/470, loss: 0.07485038787126541 2023-01-24 03:11:52.137320: step: 134/470, loss: 0.20554983615875244 2023-01-24 03:11:52.933171: step: 136/470, loss: 0.09597218781709671 2023-01-24 03:11:53.731833: step: 138/470, loss: 0.1307460069656372 2023-01-24 03:11:54.505422: step: 140/470, loss: 0.0812094509601593 2023-01-24 03:11:55.223671: step: 142/470, loss: 0.2051864117383957 2023-01-24 03:11:55.948416: step: 144/470, loss: 0.22790312767028809 2023-01-24 03:11:56.610463: step: 146/470, loss: 0.07648234069347382 2023-01-24 03:11:57.357604: step: 148/470, loss: 0.45176663994789124 2023-01-24 03:11:58.154711: step: 150/470, loss: 0.17060501873493195 2023-01-24 03:11:58.859684: step: 152/470, loss: 0.1733158975839615 2023-01-24 03:11:59.703302: step: 154/470, loss: 0.10912138223648071 2023-01-24 03:12:00.454531: step: 156/470, loss: 0.15763217210769653 2023-01-24 03:12:01.178261: step: 158/470, loss: 0.40257176756858826 2023-01-24 03:12:01.911469: step: 160/470, loss: 0.30875569581985474 2023-01-24 03:12:02.656916: step: 162/470, loss: 0.8680994510650635 2023-01-24 03:12:03.433325: step: 164/470, loss: 0.2595478892326355 2023-01-24 03:12:04.182077: step: 166/470, loss: 0.06596395373344421 2023-01-24 03:12:04.900349: step: 168/470, loss: 0.12518943846225739 2023-01-24 03:12:05.622698: step: 170/470, loss: 0.24903565645217896 2023-01-24 03:12:06.411500: step: 172/470, loss: 0.1930832415819168 2023-01-24 03:12:07.154251: step: 174/470, loss: 0.021100223064422607 2023-01-24 03:12:07.937437: step: 176/470, loss: 0.23497158288955688 2023-01-24 03:12:08.811146: step: 178/470, loss: 0.1109008640050888 2023-01-24 03:12:09.562332: step: 180/470, loss: 0.35954418778419495 2023-01-24 03:12:10.296307: step: 182/470, loss: 0.11698159575462341 2023-01-24 03:12:10.978147: step: 184/470, loss: 0.10983223468065262 2023-01-24 03:12:11.728992: step: 186/470, loss: 0.12484107166528702 2023-01-24 03:12:12.494462: step: 188/470, loss: 0.19515368342399597 2023-01-24 03:12:13.251805: step: 190/470, loss: 0.26054152846336365 2023-01-24 03:12:13.947696: step: 192/470, loss: 0.14039883017539978 2023-01-24 03:12:14.607036: step: 194/470, loss: 0.16187353432178497 2023-01-24 03:12:15.280561: step: 196/470, loss: 0.14299781620502472 2023-01-24 03:12:15.999549: step: 198/470, loss: 0.21850766241550446 2023-01-24 03:12:16.772388: step: 200/470, loss: 0.18616288900375366 2023-01-24 03:12:17.488145: step: 202/470, loss: 0.08063409477472305 2023-01-24 03:12:18.188078: step: 204/470, loss: 0.061108168214559555 2023-01-24 03:12:18.911636: step: 206/470, loss: 0.10571593791246414 2023-01-24 03:12:19.720873: step: 208/470, loss: 0.5858561992645264 2023-01-24 03:12:20.421544: step: 210/470, loss: 0.0865621417760849 2023-01-24 03:12:21.125391: step: 212/470, loss: 0.3058476150035858 2023-01-24 03:12:21.793125: step: 214/470, loss: 0.30787721276283264 2023-01-24 03:12:22.536028: step: 216/470, loss: 0.05481060966849327 2023-01-24 03:12:23.237074: step: 218/470, loss: 0.1102779284119606 2023-01-24 03:12:23.997921: step: 220/470, loss: 0.21178603172302246 2023-01-24 03:12:24.792937: step: 222/470, loss: 0.10403713583946228 2023-01-24 03:12:25.661766: step: 224/470, loss: 0.6571042537689209 2023-01-24 03:12:26.395219: step: 226/470, loss: 0.10259111225605011 2023-01-24 03:12:27.094588: step: 228/470, loss: 0.1595088392496109 2023-01-24 03:12:27.886821: step: 230/470, loss: 0.17050521075725555 2023-01-24 03:12:28.665044: step: 232/470, loss: 0.5048624873161316 2023-01-24 03:12:29.477066: step: 234/470, loss: 0.20384322106838226 2023-01-24 03:12:30.223070: step: 236/470, loss: 0.11227284371852875 2023-01-24 03:12:30.989611: step: 238/470, loss: 0.07610318064689636 2023-01-24 03:12:31.713987: step: 240/470, loss: 0.4045836627483368 2023-01-24 03:12:32.458498: step: 242/470, loss: 0.068994902074337 2023-01-24 03:12:33.168098: step: 244/470, loss: 0.14514769613742828 2023-01-24 03:12:33.940535: step: 246/470, loss: 0.06832067668437958 2023-01-24 03:12:34.685603: step: 248/470, loss: 0.415750116109848 2023-01-24 03:12:35.424587: step: 250/470, loss: 0.2067742943763733 2023-01-24 03:12:36.239385: step: 252/470, loss: 0.3949142098426819 2023-01-24 03:12:36.982047: step: 254/470, loss: 0.09282752871513367 2023-01-24 03:12:37.761740: step: 256/470, loss: 0.2223929464817047 2023-01-24 03:12:38.531300: step: 258/470, loss: 0.252516508102417 2023-01-24 03:12:39.320304: step: 260/470, loss: 2.0077507495880127 2023-01-24 03:12:40.125778: step: 262/470, loss: 0.5033937096595764 2023-01-24 03:12:40.829947: step: 264/470, loss: 0.14328442513942719 2023-01-24 03:12:41.533909: step: 266/470, loss: 0.30592918395996094 2023-01-24 03:12:42.260785: step: 268/470, loss: 0.15917566418647766 2023-01-24 03:12:42.947980: step: 270/470, loss: 0.14353780448436737 2023-01-24 03:12:43.698247: step: 272/470, loss: 0.064031682908535 2023-01-24 03:12:44.595476: step: 274/470, loss: 0.19164782762527466 2023-01-24 03:12:45.286941: step: 276/470, loss: 0.1831219345331192 2023-01-24 03:12:46.119294: step: 278/470, loss: 0.10314060747623444 2023-01-24 03:12:46.871194: step: 280/470, loss: 0.11051566898822784 2023-01-24 03:12:47.661545: step: 282/470, loss: 0.12450094521045685 2023-01-24 03:12:48.584364: step: 284/470, loss: 0.19194933772087097 2023-01-24 03:12:49.344327: step: 286/470, loss: 0.3615543842315674 2023-01-24 03:12:50.034575: step: 288/470, loss: 0.07781562954187393 2023-01-24 03:12:50.781824: step: 290/470, loss: 0.12711076438426971 2023-01-24 03:12:51.508718: step: 292/470, loss: 0.5902228951454163 2023-01-24 03:12:52.216527: step: 294/470, loss: 0.4633359909057617 2023-01-24 03:12:53.064154: step: 296/470, loss: 0.14595328271389008 2023-01-24 03:12:53.813810: step: 298/470, loss: 0.18961495161056519 2023-01-24 03:12:54.526640: step: 300/470, loss: 1.7901432514190674 2023-01-24 03:12:55.324078: step: 302/470, loss: 0.1573238968849182 2023-01-24 03:12:56.121183: step: 304/470, loss: 2.244335651397705 2023-01-24 03:12:56.782907: step: 306/470, loss: 0.12172958999872208 2023-01-24 03:12:57.563968: step: 308/470, loss: 0.07545558363199234 2023-01-24 03:12:58.320861: step: 310/470, loss: 0.0413089245557785 2023-01-24 03:12:59.049604: step: 312/470, loss: 0.3559807538986206 2023-01-24 03:12:59.788374: step: 314/470, loss: 0.08101952075958252 2023-01-24 03:13:00.515683: step: 316/470, loss: 0.1672833114862442 2023-01-24 03:13:01.203870: step: 318/470, loss: 0.16862982511520386 2023-01-24 03:13:01.966179: step: 320/470, loss: 0.06150231882929802 2023-01-24 03:13:02.792698: step: 322/470, loss: 0.10682566463947296 2023-01-24 03:13:03.545856: step: 324/470, loss: 0.233676478266716 2023-01-24 03:13:04.437497: step: 326/470, loss: 0.17197014391422272 2023-01-24 03:13:05.190854: step: 328/470, loss: 0.3912794589996338 2023-01-24 03:13:05.960031: step: 330/470, loss: 0.4253474771976471 2023-01-24 03:13:06.664226: step: 332/470, loss: 0.15210644900798798 2023-01-24 03:13:07.469874: step: 334/470, loss: 0.2834761440753937 2023-01-24 03:13:08.245024: step: 336/470, loss: 0.09623689949512482 2023-01-24 03:13:09.082395: step: 338/470, loss: 0.32247433066368103 2023-01-24 03:13:09.767491: step: 340/470, loss: 0.059781987220048904 2023-01-24 03:13:10.482132: step: 342/470, loss: 0.2658904492855072 2023-01-24 03:13:11.266455: step: 344/470, loss: 0.3153829872608185 2023-01-24 03:13:11.984460: step: 346/470, loss: 0.08230673521757126 2023-01-24 03:13:12.774645: step: 348/470, loss: 0.17258965969085693 2023-01-24 03:13:13.523534: step: 350/470, loss: 0.28981152176856995 2023-01-24 03:13:14.155789: step: 352/470, loss: 0.1635764241218567 2023-01-24 03:13:14.931388: step: 354/470, loss: 0.21193452179431915 2023-01-24 03:13:15.637284: step: 356/470, loss: 0.07874863594770432 2023-01-24 03:13:16.398973: step: 358/470, loss: 0.09164706617593765 2023-01-24 03:13:17.178074: step: 360/470, loss: 5.99439811706543 2023-01-24 03:13:17.926116: step: 362/470, loss: 0.16293644905090332 2023-01-24 03:13:18.658776: step: 364/470, loss: 0.09890060126781464 2023-01-24 03:13:19.378584: step: 366/470, loss: 0.21786588430404663 2023-01-24 03:13:20.119210: step: 368/470, loss: 0.33316826820373535 2023-01-24 03:13:20.881426: step: 370/470, loss: 0.055760059505701065 2023-01-24 03:13:21.563413: step: 372/470, loss: 0.3406805098056793 2023-01-24 03:13:22.347317: step: 374/470, loss: 0.6691519021987915 2023-01-24 03:13:23.040562: step: 376/470, loss: 0.0685260146856308 2023-01-24 03:13:23.822897: step: 378/470, loss: 0.12233281135559082 2023-01-24 03:13:24.483629: step: 380/470, loss: 0.14288434386253357 2023-01-24 03:13:25.230637: step: 382/470, loss: 0.19023549556732178 2023-01-24 03:13:25.958183: step: 384/470, loss: 0.08775024116039276 2023-01-24 03:13:26.659693: step: 386/470, loss: 0.27361616492271423 2023-01-24 03:13:27.465879: step: 388/470, loss: 0.13204854726791382 2023-01-24 03:13:28.224692: step: 390/470, loss: 0.18165439367294312 2023-01-24 03:13:29.111831: step: 392/470, loss: 0.12773683667182922 2023-01-24 03:13:29.883223: step: 394/470, loss: 0.0818340927362442 2023-01-24 03:13:30.633956: step: 396/470, loss: 0.1057894304394722 2023-01-24 03:13:31.358393: step: 398/470, loss: 0.5085698366165161 2023-01-24 03:13:32.051190: step: 400/470, loss: 0.05038611590862274 2023-01-24 03:13:32.751613: step: 402/470, loss: 0.168458491563797 2023-01-24 03:13:33.546362: step: 404/470, loss: 0.18103928864002228 2023-01-24 03:13:34.242954: step: 406/470, loss: 0.18114416301250458 2023-01-24 03:13:34.992969: step: 408/470, loss: 0.13039417564868927 2023-01-24 03:13:35.731335: step: 410/470, loss: 0.12932468950748444 2023-01-24 03:13:36.464982: step: 412/470, loss: 0.30325838923454285 2023-01-24 03:13:37.240232: step: 414/470, loss: 0.3485577702522278 2023-01-24 03:13:37.949926: step: 416/470, loss: 0.2910950183868408 2023-01-24 03:13:38.717945: step: 418/470, loss: 0.12932966649532318 2023-01-24 03:13:39.448587: step: 420/470, loss: 0.08383800089359283 2023-01-24 03:13:40.158984: step: 422/470, loss: 0.17576591670513153 2023-01-24 03:13:40.852346: step: 424/470, loss: 0.1013016402721405 2023-01-24 03:13:41.523295: step: 426/470, loss: 0.2601459324359894 2023-01-24 03:13:42.430578: step: 428/470, loss: 0.09860192984342575 2023-01-24 03:13:43.129925: step: 430/470, loss: 0.12635904550552368 2023-01-24 03:13:43.897420: step: 432/470, loss: 0.08993472903966904 2023-01-24 03:13:44.564049: step: 434/470, loss: 0.2409133017063141 2023-01-24 03:13:45.369610: step: 436/470, loss: 0.7562994956970215 2023-01-24 03:13:46.097809: step: 438/470, loss: 0.14493654668331146 2023-01-24 03:13:46.838818: step: 440/470, loss: 0.2697089910507202 2023-01-24 03:13:47.546906: step: 442/470, loss: 0.17629806697368622 2023-01-24 03:13:48.320460: step: 444/470, loss: 0.3165607452392578 2023-01-24 03:13:49.109707: step: 446/470, loss: 0.24010397493839264 2023-01-24 03:13:49.860197: step: 448/470, loss: 0.08545230329036713 2023-01-24 03:13:50.610150: step: 450/470, loss: 0.24912571907043457 2023-01-24 03:13:51.460840: step: 452/470, loss: 0.42028677463531494 2023-01-24 03:13:52.191430: step: 454/470, loss: 0.12412894517183304 2023-01-24 03:13:52.947831: step: 456/470, loss: 0.07028322666883469 2023-01-24 03:13:53.631858: step: 458/470, loss: 0.045641541481018066 2023-01-24 03:13:54.366371: step: 460/470, loss: 0.2509128451347351 2023-01-24 03:13:55.113262: step: 462/470, loss: 0.7367977499961853 2023-01-24 03:13:55.817708: step: 464/470, loss: 0.09236481040716171 2023-01-24 03:13:56.633039: step: 466/470, loss: 0.1762775182723999 2023-01-24 03:13:57.338646: step: 468/470, loss: 0.12322688847780228 2023-01-24 03:13:58.100882: step: 470/470, loss: 0.1871875375509262 2023-01-24 03:13:58.900653: step: 472/470, loss: 0.17395652830600739 2023-01-24 03:13:59.663419: step: 474/470, loss: 0.5580199956893921 2023-01-24 03:14:00.431129: step: 476/470, loss: 0.11032622307538986 2023-01-24 03:14:01.158656: step: 478/470, loss: 0.0684078112244606 2023-01-24 03:14:01.905054: step: 480/470, loss: 0.12853725254535675 2023-01-24 03:14:02.653125: step: 482/470, loss: 0.11245977878570557 2023-01-24 03:14:03.428645: step: 484/470, loss: 0.44150134921073914 2023-01-24 03:14:04.151197: step: 486/470, loss: 0.10275861620903015 2023-01-24 03:14:04.860244: step: 488/470, loss: 1.2313703298568726 2023-01-24 03:14:05.639615: step: 490/470, loss: 0.16322949528694153 2023-01-24 03:14:06.322031: step: 492/470, loss: 0.23350676894187927 2023-01-24 03:14:07.030912: step: 494/470, loss: 0.16384433209896088 2023-01-24 03:14:07.787987: step: 496/470, loss: 0.22567422688007355 2023-01-24 03:14:08.562234: step: 498/470, loss: 0.568155825138092 2023-01-24 03:14:09.403318: step: 500/470, loss: 0.2018314152956009 2023-01-24 03:14:10.131115: step: 502/470, loss: 0.30602437257766724 2023-01-24 03:14:10.842413: step: 504/470, loss: 0.5151650905609131 2023-01-24 03:14:11.561302: step: 506/470, loss: 0.1632133275270462 2023-01-24 03:14:12.259119: step: 508/470, loss: 0.39310041069984436 2023-01-24 03:14:13.109857: step: 510/470, loss: 0.32889223098754883 2023-01-24 03:14:13.888598: step: 512/470, loss: 0.16092555224895477 2023-01-24 03:14:14.619612: step: 514/470, loss: 0.0802043080329895 2023-01-24 03:14:15.283946: step: 516/470, loss: 0.4701124131679535 2023-01-24 03:14:16.025417: step: 518/470, loss: 0.33139216899871826 2023-01-24 03:14:16.762200: step: 520/470, loss: 0.19706635177135468 2023-01-24 03:14:17.538908: step: 522/470, loss: 1.2813704013824463 2023-01-24 03:14:18.241449: step: 524/470, loss: 0.16043278574943542 2023-01-24 03:14:18.994086: step: 526/470, loss: 0.2318851202726364 2023-01-24 03:14:19.790373: step: 528/470, loss: 0.5282825827598572 2023-01-24 03:14:20.590440: step: 530/470, loss: 0.19871442019939423 2023-01-24 03:14:21.287145: step: 532/470, loss: 0.08099329471588135 2023-01-24 03:14:21.994211: step: 534/470, loss: 0.03250506892800331 2023-01-24 03:14:22.678253: step: 536/470, loss: 0.25197839736938477 2023-01-24 03:14:23.472016: step: 538/470, loss: 0.11561629921197891 2023-01-24 03:14:24.241724: step: 540/470, loss: 0.24496881663799286 2023-01-24 03:14:24.990971: step: 542/470, loss: 0.13660961389541626 2023-01-24 03:14:25.670763: step: 544/470, loss: 0.15981179475784302 2023-01-24 03:14:26.357490: step: 546/470, loss: 0.5604614019393921 2023-01-24 03:14:27.118182: step: 548/470, loss: 0.1567903310060501 2023-01-24 03:14:27.876362: step: 550/470, loss: 0.3064483404159546 2023-01-24 03:14:28.626163: step: 552/470, loss: 0.2242286205291748 2023-01-24 03:14:29.358560: step: 554/470, loss: 0.06868718564510345 2023-01-24 03:14:30.130377: step: 556/470, loss: 0.40813806653022766 2023-01-24 03:14:30.863061: step: 558/470, loss: 0.36897414922714233 2023-01-24 03:14:31.585239: step: 560/470, loss: 0.1006973534822464 2023-01-24 03:14:32.362158: step: 562/470, loss: 0.09448845684528351 2023-01-24 03:14:33.161846: step: 564/470, loss: 0.7840608954429626 2023-01-24 03:14:33.941098: step: 566/470, loss: 0.22917340695858002 2023-01-24 03:14:34.659959: step: 568/470, loss: 0.07258880138397217 2023-01-24 03:14:35.386837: step: 570/470, loss: 0.17850033938884735 2023-01-24 03:14:36.190179: step: 572/470, loss: 0.19725555181503296 2023-01-24 03:14:36.953782: step: 574/470, loss: 0.08748982846736908 2023-01-24 03:14:37.876661: step: 576/470, loss: 0.1885845810174942 2023-01-24 03:14:38.593438: step: 578/470, loss: 0.21629391610622406 2023-01-24 03:14:39.314705: step: 580/470, loss: 0.1099558100104332 2023-01-24 03:14:40.031681: step: 582/470, loss: 0.07752203196287155 2023-01-24 03:14:40.853266: step: 584/470, loss: 0.21718913316726685 2023-01-24 03:14:41.634130: step: 586/470, loss: 0.11684189736843109 2023-01-24 03:14:42.327403: step: 588/470, loss: 0.14402146637439728 2023-01-24 03:14:43.187066: step: 590/470, loss: 0.30400651693344116 2023-01-24 03:14:43.884712: step: 592/470, loss: 0.14186808466911316 2023-01-24 03:14:44.587969: step: 594/470, loss: 0.15555933117866516 2023-01-24 03:14:45.326222: step: 596/470, loss: 0.1906273365020752 2023-01-24 03:14:46.172081: step: 598/470, loss: 1.0738638639450073 2023-01-24 03:14:46.941809: step: 600/470, loss: 0.3519884943962097 2023-01-24 03:14:47.709357: step: 602/470, loss: 0.1826198250055313 2023-01-24 03:14:48.431643: step: 604/470, loss: 0.07761722803115845 2023-01-24 03:14:49.506445: step: 606/470, loss: 0.15749958157539368 2023-01-24 03:14:50.302701: step: 608/470, loss: 0.3068345785140991 2023-01-24 03:14:50.999088: step: 610/470, loss: 0.12547728419303894 2023-01-24 03:14:51.707391: step: 612/470, loss: 0.2842352092266083 2023-01-24 03:14:52.408036: step: 614/470, loss: 0.6424673795700073 2023-01-24 03:14:53.169487: step: 616/470, loss: 0.13101936876773834 2023-01-24 03:14:53.987458: step: 618/470, loss: 0.12014391273260117 2023-01-24 03:14:54.690009: step: 620/470, loss: 0.33476048707962036 2023-01-24 03:14:55.415578: step: 622/470, loss: 0.1459568589925766 2023-01-24 03:14:56.357566: step: 624/470, loss: 0.1385248303413391 2023-01-24 03:14:57.095523: step: 626/470, loss: 0.21259213984012604 2023-01-24 03:14:57.795773: step: 628/470, loss: 0.4322361946105957 2023-01-24 03:14:58.499599: step: 630/470, loss: 0.03826691210269928 2023-01-24 03:14:59.212962: step: 632/470, loss: 0.23468813300132751 2023-01-24 03:14:59.915430: step: 634/470, loss: 0.7665181756019592 2023-01-24 03:15:00.663191: step: 636/470, loss: 0.4738726317882538 2023-01-24 03:15:01.325549: step: 638/470, loss: 0.12215352058410645 2023-01-24 03:15:02.028943: step: 640/470, loss: 0.0886230543255806 2023-01-24 03:15:02.761339: step: 642/470, loss: 0.25113239884376526 2023-01-24 03:15:03.560650: step: 644/470, loss: 1.358510136604309 2023-01-24 03:15:04.317675: step: 646/470, loss: 0.02677135542035103 2023-01-24 03:15:05.021807: step: 648/470, loss: 0.0627710297703743 2023-01-24 03:15:05.770246: step: 650/470, loss: 0.17032356560230255 2023-01-24 03:15:06.476108: step: 652/470, loss: 0.14521194994449615 2023-01-24 03:15:07.205062: step: 654/470, loss: 0.10609611868858337 2023-01-24 03:15:07.998350: step: 656/470, loss: 0.18908576667308807 2023-01-24 03:15:08.790055: step: 658/470, loss: 0.2079189121723175 2023-01-24 03:15:09.548605: step: 660/470, loss: 1.0469714403152466 2023-01-24 03:15:10.305593: step: 662/470, loss: 0.32972848415374756 2023-01-24 03:15:11.096227: step: 664/470, loss: 0.28195086121559143 2023-01-24 03:15:11.868033: step: 666/470, loss: 0.16927146911621094 2023-01-24 03:15:12.666916: step: 668/470, loss: 0.0880625993013382 2023-01-24 03:15:13.492760: step: 670/470, loss: 0.3091566264629364 2023-01-24 03:15:14.251322: step: 672/470, loss: 0.13625238835811615 2023-01-24 03:15:15.034940: step: 674/470, loss: 0.16259387135505676 2023-01-24 03:15:15.835565: step: 676/470, loss: 0.10333267599344254 2023-01-24 03:15:16.631446: step: 678/470, loss: 0.08288844674825668 2023-01-24 03:15:17.286697: step: 680/470, loss: 0.1420072615146637 2023-01-24 03:15:18.020389: step: 682/470, loss: 0.23318932950496674 2023-01-24 03:15:18.864176: step: 684/470, loss: 0.16551189124584198 2023-01-24 03:15:19.748792: step: 686/470, loss: 0.1825304627418518 2023-01-24 03:15:20.457284: step: 688/470, loss: 0.3301374614238739 2023-01-24 03:15:21.218802: step: 690/470, loss: 0.24686174094676971 2023-01-24 03:15:21.952984: step: 692/470, loss: 0.1321008801460266 2023-01-24 03:15:22.608603: step: 694/470, loss: 0.163587749004364 2023-01-24 03:15:23.281622: step: 696/470, loss: 0.24472731351852417 2023-01-24 03:15:24.002325: step: 698/470, loss: 0.05031857639551163 2023-01-24 03:15:24.736151: step: 700/470, loss: 0.5361093282699585 2023-01-24 03:15:25.438444: step: 702/470, loss: 0.12264763563871384 2023-01-24 03:15:26.133328: step: 704/470, loss: 0.6803569197654724 2023-01-24 03:15:26.900561: step: 706/470, loss: 0.14722055196762085 2023-01-24 03:15:27.627179: step: 708/470, loss: 0.33901581168174744 2023-01-24 03:15:28.390203: step: 710/470, loss: 0.10217005759477615 2023-01-24 03:15:29.093249: step: 712/470, loss: 0.11541683226823807 2023-01-24 03:15:29.833489: step: 714/470, loss: 0.19055843353271484 2023-01-24 03:15:30.571082: step: 716/470, loss: 0.14943887293338776 2023-01-24 03:15:31.318842: step: 718/470, loss: 0.5411022901535034 2023-01-24 03:15:31.961621: step: 720/470, loss: 0.10385146737098694 2023-01-24 03:15:32.660553: step: 722/470, loss: 0.1555739790201187 2023-01-24 03:15:33.320103: step: 724/470, loss: 0.27904900908470154 2023-01-24 03:15:34.097502: step: 726/470, loss: 0.13543671369552612 2023-01-24 03:15:34.831329: step: 728/470, loss: 0.0534023754298687 2023-01-24 03:15:35.526261: step: 730/470, loss: 0.23771588504314423 2023-01-24 03:15:36.319034: step: 732/470, loss: 0.12300637364387512 2023-01-24 03:15:37.021054: step: 734/470, loss: 0.08528205752372742 2023-01-24 03:15:37.778537: step: 736/470, loss: 0.41384363174438477 2023-01-24 03:15:38.588344: step: 738/470, loss: 0.11085784435272217 2023-01-24 03:15:39.384081: step: 740/470, loss: 0.4100949168205261 2023-01-24 03:15:40.166848: step: 742/470, loss: 0.5684028267860413 2023-01-24 03:15:40.986764: step: 744/470, loss: 0.21597033739089966 2023-01-24 03:15:41.715369: step: 746/470, loss: 0.1797659546136856 2023-01-24 03:15:42.466149: step: 748/470, loss: 0.12278896570205688 2023-01-24 03:15:43.206594: step: 750/470, loss: 0.06112644821405411 2023-01-24 03:15:43.890381: step: 752/470, loss: 0.13294389843940735 2023-01-24 03:15:44.559715: step: 754/470, loss: 0.4018506109714508 2023-01-24 03:15:45.321315: step: 756/470, loss: 0.312796026468277 2023-01-24 03:15:46.064715: step: 758/470, loss: 0.17056278884410858 2023-01-24 03:15:46.783011: step: 760/470, loss: 0.12827859818935394 2023-01-24 03:15:47.607146: step: 762/470, loss: 0.3029170036315918 2023-01-24 03:15:48.264701: step: 764/470, loss: 0.08240678906440735 2023-01-24 03:15:48.967196: step: 766/470, loss: 0.18388421833515167 2023-01-24 03:15:49.725596: step: 768/470, loss: 0.05635695904493332 2023-01-24 03:15:50.441292: step: 770/470, loss: 0.3035624027252197 2023-01-24 03:15:51.250260: step: 772/470, loss: 0.0933908224105835 2023-01-24 03:15:51.993864: step: 774/470, loss: 0.15930312871932983 2023-01-24 03:15:52.728394: step: 776/470, loss: 0.5322938561439514 2023-01-24 03:15:53.552452: step: 778/470, loss: 0.34571999311447144 2023-01-24 03:15:54.345045: step: 780/470, loss: 0.14167526364326477 2023-01-24 03:15:55.102698: step: 782/470, loss: 0.08634297549724579 2023-01-24 03:15:55.822940: step: 784/470, loss: 3.5037174224853516 2023-01-24 03:15:56.654868: step: 786/470, loss: 0.16057580709457397 2023-01-24 03:15:57.369186: step: 788/470, loss: 0.10480234026908875 2023-01-24 03:15:58.079780: step: 790/470, loss: 0.40698543190956116 2023-01-24 03:15:58.840370: step: 792/470, loss: 0.2794446647167206 2023-01-24 03:15:59.564918: step: 794/470, loss: 0.14553791284561157 2023-01-24 03:16:00.260228: step: 796/470, loss: 0.05922282487154007 2023-01-24 03:16:00.994451: step: 798/470, loss: 0.25358933210372925 2023-01-24 03:16:01.721494: step: 800/470, loss: 0.07599953562021255 2023-01-24 03:16:02.394466: step: 802/470, loss: 0.11059369891881943 2023-01-24 03:16:03.068017: step: 804/470, loss: 0.22309422492980957 2023-01-24 03:16:03.801132: step: 806/470, loss: 0.07817824929952621 2023-01-24 03:16:04.543488: step: 808/470, loss: 0.24265766143798828 2023-01-24 03:16:05.315571: step: 810/470, loss: 0.02267645113170147 2023-01-24 03:16:06.107728: step: 812/470, loss: 0.16064167022705078 2023-01-24 03:16:06.778435: step: 814/470, loss: 0.18023733794689178 2023-01-24 03:16:07.547758: step: 816/470, loss: 0.13179604709148407 2023-01-24 03:16:08.328147: step: 818/470, loss: 0.28788211941719055 2023-01-24 03:16:09.075340: step: 820/470, loss: 0.11537092179059982 2023-01-24 03:16:09.849532: step: 822/470, loss: 0.12304264307022095 2023-01-24 03:16:10.611484: step: 824/470, loss: 0.20748859643936157 2023-01-24 03:16:11.396790: step: 826/470, loss: 0.19135302305221558 2023-01-24 03:16:12.124806: step: 828/470, loss: 0.1294027864933014 2023-01-24 03:16:12.807131: step: 830/470, loss: 0.15381257236003876 2023-01-24 03:16:13.567221: step: 832/470, loss: 0.09458699077367783 2023-01-24 03:16:14.304547: step: 834/470, loss: 0.10095732659101486 2023-01-24 03:16:15.054305: step: 836/470, loss: 0.12210088223218918 2023-01-24 03:16:15.765623: step: 838/470, loss: 0.12754324078559875 2023-01-24 03:16:16.532971: step: 840/470, loss: 0.12278389185667038 2023-01-24 03:16:17.333293: step: 842/470, loss: 0.10551811009645462 2023-01-24 03:16:18.100061: step: 844/470, loss: 0.13036848604679108 2023-01-24 03:16:18.827690: step: 846/470, loss: 0.598879873752594 2023-01-24 03:16:19.623626: step: 848/470, loss: 0.10476802289485931 2023-01-24 03:16:20.320928: step: 850/470, loss: 0.16438759863376617 2023-01-24 03:16:20.965659: step: 852/470, loss: 0.06739547103643417 2023-01-24 03:16:21.806463: step: 854/470, loss: 0.11674723029136658 2023-01-24 03:16:22.601426: step: 856/470, loss: 0.17071661353111267 2023-01-24 03:16:23.352999: step: 858/470, loss: 0.16365188360214233 2023-01-24 03:16:24.036790: step: 860/470, loss: 0.15071061253547668 2023-01-24 03:16:24.781454: step: 862/470, loss: 0.46078917384147644 2023-01-24 03:16:25.449684: step: 864/470, loss: 0.08874162286520004 2023-01-24 03:16:26.128901: step: 866/470, loss: 0.08609306067228317 2023-01-24 03:16:26.936826: step: 868/470, loss: 0.13420622050762177 2023-01-24 03:16:27.759896: step: 870/470, loss: 0.5047072172164917 2023-01-24 03:16:28.525989: step: 872/470, loss: 0.4354456961154938 2023-01-24 03:16:29.223666: step: 874/470, loss: 0.5482062101364136 2023-01-24 03:16:30.086638: step: 876/470, loss: 0.22582288086414337 2023-01-24 03:16:30.900734: step: 878/470, loss: 0.1643458604812622 2023-01-24 03:16:31.679249: step: 880/470, loss: 0.0947733074426651 2023-01-24 03:16:32.516758: step: 882/470, loss: 0.059879835695028305 2023-01-24 03:16:33.285528: step: 884/470, loss: 0.10321329534053802 2023-01-24 03:16:34.058561: step: 886/470, loss: 0.11775056272745132 2023-01-24 03:16:34.790611: step: 888/470, loss: 0.12950459122657776 2023-01-24 03:16:35.560416: step: 890/470, loss: 0.3330346941947937 2023-01-24 03:16:36.312957: step: 892/470, loss: 0.14579877257347107 2023-01-24 03:16:37.052743: step: 894/470, loss: 0.12271419167518616 2023-01-24 03:16:37.834945: step: 896/470, loss: 0.23777388036251068 2023-01-24 03:16:38.644123: step: 898/470, loss: 0.20127157866954803 2023-01-24 03:16:39.265441: step: 900/470, loss: 0.033625878393650055 2023-01-24 03:16:40.012486: step: 902/470, loss: 0.21832531690597534 2023-01-24 03:16:40.775985: step: 904/470, loss: 0.19224612414836884 2023-01-24 03:16:41.506481: step: 906/470, loss: 0.11556744575500488 2023-01-24 03:16:42.251445: step: 908/470, loss: 0.11261487007141113 2023-01-24 03:16:43.021680: step: 910/470, loss: 0.1271752119064331 2023-01-24 03:16:43.787224: step: 912/470, loss: 0.1272096335887909 2023-01-24 03:16:44.619331: step: 914/470, loss: 0.19481460750102997 2023-01-24 03:16:45.338914: step: 916/470, loss: 0.445446252822876 2023-01-24 03:16:46.145014: step: 918/470, loss: 0.1915096789598465 2023-01-24 03:16:46.826699: step: 920/470, loss: 0.11832750588655472 2023-01-24 03:16:47.553312: step: 922/470, loss: 0.43603986501693726 2023-01-24 03:16:48.394189: step: 924/470, loss: 0.20713625848293304 2023-01-24 03:16:49.216443: step: 926/470, loss: 0.3290116488933563 2023-01-24 03:16:50.031477: step: 928/470, loss: 1.042456865310669 2023-01-24 03:16:50.770625: step: 930/470, loss: 0.13364510238170624 2023-01-24 03:16:51.514547: step: 932/470, loss: 0.5061575770378113 2023-01-24 03:16:52.137115: step: 934/470, loss: 1.4723851680755615 2023-01-24 03:16:52.893219: step: 936/470, loss: 0.20052802562713623 2023-01-24 03:16:53.587643: step: 938/470, loss: 0.7012951970100403 2023-01-24 03:16:54.387537: step: 940/470, loss: 0.3045251667499542 2023-01-24 03:16:55.051159: step: 942/470, loss: 0.33923205733299255 ================================================== Loss: 0.263 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33187203994221154, 'r': 0.3186475374018198, 'f1': 0.3251253673005983}, 'combined': 0.23956606011623033, 'epoch': 12} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35670723203612636, 'r': 0.3265243124023003, 'f1': 0.34094908122328543}, 'combined': 0.22729938748219025, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32139239414112, 'r': 0.30858548659469964, 'f1': 0.3148587636697129}, 'combined': 0.23200119428294633, 'epoch': 12} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3582156680154217, 'r': 0.31791640536368676, 'f1': 0.33686506528602567}, 'combined': 0.22457671019068373, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30718539637932163, 'r': 0.31184855230158837, 'f1': 0.3094994106646649}, 'combined': 0.22805219733185833, 'epoch': 12} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35588885174248575, 'r': 0.33775220833637826, 'f1': 0.3465834204931755}, 'combined': 0.23105561366211694, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2554347826086957, 'r': 0.3357142857142857, 'f1': 0.2901234567901234}, 'combined': 0.19341563786008226, 'epoch': 12} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4583333333333333, 'r': 0.2391304347826087, 'f1': 0.3142857142857143}, 'combined': 0.2095238095238095, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5454545454545454, 'r': 0.20689655172413793, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 12} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3130350753897265, 'r': 0.332042897804283, 'f1': 0.3222589450144699}, 'combined': 0.23745395948434622, 'epoch': 9} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.33810196782859, 'r': 0.3293327525246784, 'f1': 0.33365975219288585}, 'combined': 0.2224398347952572, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26851851851851855, 'r': 0.4142857142857143, 'f1': 0.3258426966292135}, 'combined': 0.21722846441947566, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30950132625512705, 'r': 0.3100886152992544, 'f1': 0.3097946924411508}, 'combined': 0.22826977337769006, 'epoch': 7} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3463740826755645, 'r': 0.325411962398369, 'f1': 0.33556597608390504}, 'combined': 0.22371065072260332, 'epoch': 7} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5625, 'r': 0.4891304347826087, 'f1': 0.5232558139534884}, 'combined': 0.3488372093023256, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30718539637932163, 'r': 0.31184855230158837, 'f1': 0.3094994106646649}, 'combined': 0.22805219733185833, 'epoch': 12} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35588885174248575, 'r': 0.33775220833637826, 'f1': 0.3465834204931755}, 'combined': 0.23105561366211694, 'epoch': 12} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5454545454545454, 'r': 0.20689655172413793, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 12} ****************************** Epoch: 13 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:19:35.085807: step: 2/470, loss: 0.11869052797555923 2023-01-24 03:19:35.772715: step: 4/470, loss: 0.087203748524189 2023-01-24 03:19:36.470166: step: 6/470, loss: 0.1547858864068985 2023-01-24 03:19:37.194291: step: 8/470, loss: 0.24847088754177094 2023-01-24 03:19:37.866151: step: 10/470, loss: 0.4256363809108734 2023-01-24 03:19:38.638724: step: 12/470, loss: 0.09074677526950836 2023-01-24 03:19:39.372725: step: 14/470, loss: 0.19883684813976288 2023-01-24 03:19:40.063206: step: 16/470, loss: 0.2916736602783203 2023-01-24 03:19:40.852115: step: 18/470, loss: 0.08487699925899506 2023-01-24 03:19:41.749715: step: 20/470, loss: 0.37382224202156067 2023-01-24 03:19:42.477713: step: 22/470, loss: 0.05458125099539757 2023-01-24 03:19:43.263907: step: 24/470, loss: 0.07370239496231079 2023-01-24 03:19:44.030235: step: 26/470, loss: 0.19649724662303925 2023-01-24 03:19:44.779350: step: 28/470, loss: 0.14036303758621216 2023-01-24 03:19:45.494582: step: 30/470, loss: 0.12299536913633347 2023-01-24 03:19:46.243537: step: 32/470, loss: 0.13496029376983643 2023-01-24 03:19:47.000536: step: 34/470, loss: 0.07654550671577454 2023-01-24 03:19:47.730499: step: 36/470, loss: 0.12526889145374298 2023-01-24 03:19:48.506967: step: 38/470, loss: 0.08467797935009003 2023-01-24 03:19:49.279468: step: 40/470, loss: 0.10701703280210495 2023-01-24 03:19:49.948978: step: 42/470, loss: 0.30219635367393494 2023-01-24 03:19:50.682385: step: 44/470, loss: 0.09338068217039108 2023-01-24 03:19:51.376108: step: 46/470, loss: 0.5995178818702698 2023-01-24 03:19:52.170190: step: 48/470, loss: 0.10941080749034882 2023-01-24 03:19:52.937449: step: 50/470, loss: 0.20678047835826874 2023-01-24 03:19:53.662823: step: 52/470, loss: 0.05293460562825203 2023-01-24 03:19:54.339898: step: 54/470, loss: 0.7169974446296692 2023-01-24 03:19:55.051711: step: 56/470, loss: 0.12911155819892883 2023-01-24 03:19:55.800879: step: 58/470, loss: 0.10857953131198883 2023-01-24 03:19:56.563547: step: 60/470, loss: 0.11933538317680359 2023-01-24 03:19:57.304092: step: 62/470, loss: 0.09998290985822678 2023-01-24 03:19:58.059989: step: 64/470, loss: 0.14585649967193604 2023-01-24 03:19:58.844916: step: 66/470, loss: 0.1499679684638977 2023-01-24 03:19:59.703917: step: 68/470, loss: 0.05815058946609497 2023-01-24 03:20:00.448921: step: 70/470, loss: 0.060805968940258026 2023-01-24 03:20:01.215849: step: 72/470, loss: 0.6332342624664307 2023-01-24 03:20:01.896316: step: 74/470, loss: 0.03274387866258621 2023-01-24 03:20:02.666326: step: 76/470, loss: 0.15731528401374817 2023-01-24 03:20:03.345592: step: 78/470, loss: 0.12228990346193314 2023-01-24 03:20:04.143258: step: 80/470, loss: 0.2630844712257385 2023-01-24 03:20:04.852149: step: 82/470, loss: 0.285457044839859 2023-01-24 03:20:05.586206: step: 84/470, loss: 0.06887723505496979 2023-01-24 03:20:06.338372: step: 86/470, loss: 0.17145711183547974 2023-01-24 03:20:07.060990: step: 88/470, loss: 0.16493162512779236 2023-01-24 03:20:07.755092: step: 90/470, loss: 0.9345531463623047 2023-01-24 03:20:08.510656: step: 92/470, loss: 0.08099890500307083 2023-01-24 03:20:09.231943: step: 94/470, loss: 0.13539457321166992 2023-01-24 03:20:09.960977: step: 96/470, loss: 0.2658799886703491 2023-01-24 03:20:10.645410: step: 98/470, loss: 0.4507644772529602 2023-01-24 03:20:11.374144: step: 100/470, loss: 0.06587552279233932 2023-01-24 03:20:12.114353: step: 102/470, loss: 0.9483935832977295 2023-01-24 03:20:12.782732: step: 104/470, loss: 0.15638364851474762 2023-01-24 03:20:13.615793: step: 106/470, loss: 0.23561960458755493 2023-01-24 03:20:14.356765: step: 108/470, loss: 0.08107713609933853 2023-01-24 03:20:15.129584: step: 110/470, loss: 0.4484912157058716 2023-01-24 03:20:15.844016: step: 112/470, loss: 0.17864054441452026 2023-01-24 03:20:16.620406: step: 114/470, loss: 0.11197991669178009 2023-01-24 03:20:17.309771: step: 116/470, loss: 1.0874292850494385 2023-01-24 03:20:18.047704: step: 118/470, loss: 0.19661211967468262 2023-01-24 03:20:18.836727: step: 120/470, loss: 0.09106841683387756 2023-01-24 03:20:19.623130: step: 122/470, loss: 0.6079081296920776 2023-01-24 03:20:20.357187: step: 124/470, loss: 0.14605315029621124 2023-01-24 03:20:21.129601: step: 126/470, loss: 0.9814774990081787 2023-01-24 03:20:21.999121: step: 128/470, loss: 0.16904780268669128 2023-01-24 03:20:22.800273: step: 130/470, loss: 0.1599675416946411 2023-01-24 03:20:23.536162: step: 132/470, loss: 0.2830541431903839 2023-01-24 03:20:24.289130: step: 134/470, loss: 0.031339630484580994 2023-01-24 03:20:24.980858: step: 136/470, loss: 0.15232618153095245 2023-01-24 03:20:25.717812: step: 138/470, loss: 0.17885567247867584 2023-01-24 03:20:26.456656: step: 140/470, loss: 0.13249598443508148 2023-01-24 03:20:27.177049: step: 142/470, loss: 0.07661613076925278 2023-01-24 03:20:27.949526: step: 144/470, loss: 0.13011841475963593 2023-01-24 03:20:28.651666: step: 146/470, loss: 0.10363959521055222 2023-01-24 03:20:29.504074: step: 148/470, loss: 0.04361295700073242 2023-01-24 03:20:30.241773: step: 150/470, loss: 0.12805719673633575 2023-01-24 03:20:31.003693: step: 152/470, loss: 0.09809063374996185 2023-01-24 03:20:31.712257: step: 154/470, loss: 0.09070705622434616 2023-01-24 03:20:32.496721: step: 156/470, loss: 0.44800418615341187 2023-01-24 03:20:33.279765: step: 158/470, loss: 0.15312308073043823 2023-01-24 03:20:34.036944: step: 160/470, loss: 0.06737309694290161 2023-01-24 03:20:34.786034: step: 162/470, loss: 0.3796755075454712 2023-01-24 03:20:35.553631: step: 164/470, loss: 0.18876059353351593 2023-01-24 03:20:36.299776: step: 166/470, loss: 0.25417694449424744 2023-01-24 03:20:37.068108: step: 168/470, loss: 0.17342378199100494 2023-01-24 03:20:37.825604: step: 170/470, loss: 0.10127972066402435 2023-01-24 03:20:38.551002: step: 172/470, loss: 0.16987819969654083 2023-01-24 03:20:39.228123: step: 174/470, loss: 0.3513706624507904 2023-01-24 03:20:40.110760: step: 176/470, loss: 0.10086065530776978 2023-01-24 03:20:40.786633: step: 178/470, loss: 0.09717612713575363 2023-01-24 03:20:41.528895: step: 180/470, loss: 0.02228609286248684 2023-01-24 03:20:42.281979: step: 182/470, loss: 0.08578619360923767 2023-01-24 03:20:43.022983: step: 184/470, loss: 0.6068621277809143 2023-01-24 03:20:43.763308: step: 186/470, loss: 0.026224324479699135 2023-01-24 03:20:44.518009: step: 188/470, loss: 0.34837964177131653 2023-01-24 03:20:45.331680: step: 190/470, loss: 0.28307831287384033 2023-01-24 03:20:46.056119: step: 192/470, loss: 0.21804864704608917 2023-01-24 03:20:46.934786: step: 194/470, loss: 0.10385201871395111 2023-01-24 03:20:47.647736: step: 196/470, loss: 0.06404811888933182 2023-01-24 03:20:48.358984: step: 198/470, loss: 0.13678865134716034 2023-01-24 03:20:49.078969: step: 200/470, loss: 0.2867162823677063 2023-01-24 03:20:49.825930: step: 202/470, loss: 0.12509754300117493 2023-01-24 03:20:50.567750: step: 204/470, loss: 0.21526752412319183 2023-01-24 03:20:51.309227: step: 206/470, loss: 0.410299688577652 2023-01-24 03:20:52.000492: step: 208/470, loss: 0.1994316130876541 2023-01-24 03:20:52.802206: step: 210/470, loss: 0.21224021911621094 2023-01-24 03:20:53.635572: step: 212/470, loss: 1.471451759338379 2023-01-24 03:20:54.329882: step: 214/470, loss: 0.040065914392471313 2023-01-24 03:20:55.092509: step: 216/470, loss: 0.1329803615808487 2023-01-24 03:20:55.915330: step: 218/470, loss: 0.05075236037373543 2023-01-24 03:20:56.742970: step: 220/470, loss: 0.25361740589141846 2023-01-24 03:20:57.521903: step: 222/470, loss: 1.064577341079712 2023-01-24 03:20:58.293750: step: 224/470, loss: 0.13827519118785858 2023-01-24 03:20:59.050950: step: 226/470, loss: 0.13590987026691437 2023-01-24 03:20:59.760642: step: 228/470, loss: 0.20364883542060852 2023-01-24 03:21:00.471731: step: 230/470, loss: 0.2405257523059845 2023-01-24 03:21:01.241207: step: 232/470, loss: 0.11050628870725632 2023-01-24 03:21:02.049047: step: 234/470, loss: 0.18924807012081146 2023-01-24 03:21:02.793472: step: 236/470, loss: 0.18803924322128296 2023-01-24 03:21:03.584027: step: 238/470, loss: 0.1766262650489807 2023-01-24 03:21:04.552757: step: 240/470, loss: 0.24651962518692017 2023-01-24 03:21:05.237784: step: 242/470, loss: 0.5090709924697876 2023-01-24 03:21:06.064502: step: 244/470, loss: 0.07496100664138794 2023-01-24 03:21:06.777245: step: 246/470, loss: 0.39603158831596375 2023-01-24 03:21:07.453982: step: 248/470, loss: 0.09065035730600357 2023-01-24 03:21:08.235934: step: 250/470, loss: 0.662716269493103 2023-01-24 03:21:08.986424: step: 252/470, loss: 0.11193042248487473 2023-01-24 03:21:09.751960: step: 254/470, loss: 0.24775205552577972 2023-01-24 03:21:10.623507: step: 256/470, loss: 20.37836456298828 2023-01-24 03:21:11.334676: step: 258/470, loss: 0.34449484944343567 2023-01-24 03:21:12.093011: step: 260/470, loss: 0.07279616594314575 2023-01-24 03:21:12.902225: step: 262/470, loss: 0.18098340928554535 2023-01-24 03:21:13.652771: step: 264/470, loss: 0.16679392755031586 2023-01-24 03:21:14.401194: step: 266/470, loss: 0.4016364812850952 2023-01-24 03:21:15.153917: step: 268/470, loss: 0.22494986653327942 2023-01-24 03:21:15.830659: step: 270/470, loss: 0.05715346336364746 2023-01-24 03:21:16.583408: step: 272/470, loss: 0.2364903837442398 2023-01-24 03:21:17.298894: step: 274/470, loss: 0.09979073703289032 2023-01-24 03:21:18.013844: step: 276/470, loss: 0.08214342594146729 2023-01-24 03:21:18.855389: step: 278/470, loss: 0.1216912716627121 2023-01-24 03:21:19.543252: step: 280/470, loss: 0.07261183857917786 2023-01-24 03:21:20.329332: step: 282/470, loss: 0.1756569743156433 2023-01-24 03:21:21.101395: step: 284/470, loss: 0.6266811490058899 2023-01-24 03:21:21.787354: step: 286/470, loss: 0.1553003340959549 2023-01-24 03:21:22.495006: step: 288/470, loss: 0.33585941791534424 2023-01-24 03:21:23.172229: step: 290/470, loss: 0.05042559280991554 2023-01-24 03:21:23.893786: step: 292/470, loss: 0.2024879902601242 2023-01-24 03:21:24.554769: step: 294/470, loss: 0.026582282036542892 2023-01-24 03:21:25.259404: step: 296/470, loss: 0.11875536292791367 2023-01-24 03:21:26.086429: step: 298/470, loss: 0.16971439123153687 2023-01-24 03:21:26.850034: step: 300/470, loss: 0.34994009137153625 2023-01-24 03:21:27.613012: step: 302/470, loss: 0.1329270601272583 2023-01-24 03:21:28.455460: step: 304/470, loss: 0.30022162199020386 2023-01-24 03:21:29.254607: step: 306/470, loss: 0.16325759887695312 2023-01-24 03:21:30.043061: step: 308/470, loss: 0.15821513533592224 2023-01-24 03:21:30.786025: step: 310/470, loss: 0.11898909509181976 2023-01-24 03:21:31.554762: step: 312/470, loss: 0.11442035436630249 2023-01-24 03:21:32.256310: step: 314/470, loss: 0.2317693531513214 2023-01-24 03:21:32.985118: step: 316/470, loss: 0.210124209523201 2023-01-24 03:21:33.722915: step: 318/470, loss: 0.22964359819889069 2023-01-24 03:21:34.497419: step: 320/470, loss: 0.06219105422496796 2023-01-24 03:21:35.234905: step: 322/470, loss: 0.12114651501178741 2023-01-24 03:21:36.044848: step: 324/470, loss: 0.36422327160835266 2023-01-24 03:21:36.800406: step: 326/470, loss: 0.10115069150924683 2023-01-24 03:21:37.515363: step: 328/470, loss: 0.9817755818367004 2023-01-24 03:21:38.216082: step: 330/470, loss: 0.02880793623626232 2023-01-24 03:21:39.012390: step: 332/470, loss: 0.1212698295712471 2023-01-24 03:21:39.825805: step: 334/470, loss: 0.10181504487991333 2023-01-24 03:21:40.594062: step: 336/470, loss: 0.049668557941913605 2023-01-24 03:21:41.345271: step: 338/470, loss: 8.260157585144043 2023-01-24 03:21:42.107598: step: 340/470, loss: 0.1510852873325348 2023-01-24 03:21:42.874108: step: 342/470, loss: 0.2651934027671814 2023-01-24 03:21:43.595474: step: 344/470, loss: 0.2363731414079666 2023-01-24 03:21:44.326435: step: 346/470, loss: 0.28845059871673584 2023-01-24 03:21:45.163384: step: 348/470, loss: 0.17618459463119507 2023-01-24 03:21:45.878688: step: 350/470, loss: 0.3283766210079193 2023-01-24 03:21:46.565487: step: 352/470, loss: 0.3739703595638275 2023-01-24 03:21:47.369574: step: 354/470, loss: 0.06648049503564835 2023-01-24 03:21:48.104229: step: 356/470, loss: 0.1445521116256714 2023-01-24 03:21:48.817615: step: 358/470, loss: 0.06190051883459091 2023-01-24 03:21:49.592351: step: 360/470, loss: 0.12461217492818832 2023-01-24 03:21:50.278266: step: 362/470, loss: 0.033573780208826065 2023-01-24 03:21:51.009409: step: 364/470, loss: 0.13312368094921112 2023-01-24 03:21:51.766690: step: 366/470, loss: 0.1520005315542221 2023-01-24 03:21:52.562654: step: 368/470, loss: 0.9422814249992371 2023-01-24 03:21:53.356078: step: 370/470, loss: 0.1411898136138916 2023-01-24 03:21:54.092373: step: 372/470, loss: 0.051140353083610535 2023-01-24 03:21:54.818596: step: 374/470, loss: 0.3242073357105255 2023-01-24 03:21:55.525828: step: 376/470, loss: 0.16489548981189728 2023-01-24 03:21:56.312892: step: 378/470, loss: 1.8228318691253662 2023-01-24 03:21:57.165888: step: 380/470, loss: 1.1325253248214722 2023-01-24 03:21:57.933758: step: 382/470, loss: 0.36451154947280884 2023-01-24 03:21:58.637326: step: 384/470, loss: 0.0645284503698349 2023-01-24 03:21:59.393840: step: 386/470, loss: 0.10189077258110046 2023-01-24 03:22:00.078407: step: 388/470, loss: 0.18132157623767853 2023-01-24 03:22:00.851929: step: 390/470, loss: 0.24271626770496368 2023-01-24 03:22:01.549933: step: 392/470, loss: 0.11971033364534378 2023-01-24 03:22:02.271424: step: 394/470, loss: 0.1440243124961853 2023-01-24 03:22:03.076556: step: 396/470, loss: 0.12291662395000458 2023-01-24 03:22:03.805162: step: 398/470, loss: 0.07444258779287338 2023-01-24 03:22:04.586151: step: 400/470, loss: 0.7031933069229126 2023-01-24 03:22:05.371166: step: 402/470, loss: 0.16021253168582916 2023-01-24 03:22:06.044827: step: 404/470, loss: 0.2858032286167145 2023-01-24 03:22:06.807214: step: 406/470, loss: 0.15188010036945343 2023-01-24 03:22:07.571216: step: 408/470, loss: 0.5649017691612244 2023-01-24 03:22:08.294727: step: 410/470, loss: 0.06190142035484314 2023-01-24 03:22:09.038039: step: 412/470, loss: 0.18122579157352448 2023-01-24 03:22:09.733564: step: 414/470, loss: 0.11591540277004242 2023-01-24 03:22:10.383453: step: 416/470, loss: 0.07641121000051498 2023-01-24 03:22:11.100728: step: 418/470, loss: 0.07922433316707611 2023-01-24 03:22:11.796983: step: 420/470, loss: 0.04138394817709923 2023-01-24 03:22:12.590726: step: 422/470, loss: 0.30484187602996826 2023-01-24 03:22:13.358162: step: 424/470, loss: 0.11592315137386322 2023-01-24 03:22:14.236646: step: 426/470, loss: 0.47379979491233826 2023-01-24 03:22:14.997413: step: 428/470, loss: 0.05659574270248413 2023-01-24 03:22:15.718511: step: 430/470, loss: 0.10657653957605362 2023-01-24 03:22:16.422788: step: 432/470, loss: 0.14987456798553467 2023-01-24 03:22:17.128897: step: 434/470, loss: 0.526384174823761 2023-01-24 03:22:17.924686: step: 436/470, loss: 0.1093791201710701 2023-01-24 03:22:18.620956: step: 438/470, loss: 0.16932667791843414 2023-01-24 03:22:19.427679: step: 440/470, loss: 0.2574578523635864 2023-01-24 03:22:20.179847: step: 442/470, loss: 0.06636017560958862 2023-01-24 03:22:20.924404: step: 444/470, loss: 0.7201718091964722 2023-01-24 03:22:21.678952: step: 446/470, loss: 0.11673801392316818 2023-01-24 03:22:22.383884: step: 448/470, loss: 0.1540815830230713 2023-01-24 03:22:23.096346: step: 450/470, loss: 0.2399187535047531 2023-01-24 03:22:23.827344: step: 452/470, loss: 0.1115301102399826 2023-01-24 03:22:24.676454: step: 454/470, loss: 0.3593323230743408 2023-01-24 03:22:25.453257: step: 456/470, loss: 1.4198927879333496 2023-01-24 03:22:26.189709: step: 458/470, loss: 0.6755783557891846 2023-01-24 03:22:26.862440: step: 460/470, loss: 0.050497036427259445 2023-01-24 03:22:27.569009: step: 462/470, loss: 0.10053464025259018 2023-01-24 03:22:28.327610: step: 464/470, loss: 0.03807546943426132 2023-01-24 03:22:29.048696: step: 466/470, loss: 0.3850559592247009 2023-01-24 03:22:29.800985: step: 468/470, loss: 0.06416615843772888 2023-01-24 03:22:30.595507: step: 470/470, loss: 0.06735183298587799 2023-01-24 03:22:31.277182: step: 472/470, loss: 0.0959152951836586 2023-01-24 03:22:31.959769: step: 474/470, loss: 0.49368205666542053 2023-01-24 03:22:32.731056: step: 476/470, loss: 0.13260005414485931 2023-01-24 03:22:33.460507: step: 478/470, loss: 0.17261973023414612 2023-01-24 03:22:34.185959: step: 480/470, loss: 0.08892907947301865 2023-01-24 03:22:34.986898: step: 482/470, loss: 0.15367530286312103 2023-01-24 03:22:35.759734: step: 484/470, loss: 0.11208932101726532 2023-01-24 03:22:36.428179: step: 486/470, loss: 0.15923534333705902 2023-01-24 03:22:37.180366: step: 488/470, loss: 0.09813371300697327 2023-01-24 03:22:37.837424: step: 490/470, loss: 0.46980464458465576 2023-01-24 03:22:38.585055: step: 492/470, loss: 0.14123369753360748 2023-01-24 03:22:39.296338: step: 494/470, loss: 0.105765201151371 2023-01-24 03:22:39.997096: step: 496/470, loss: 0.21727946400642395 2023-01-24 03:22:40.729614: step: 498/470, loss: 0.13507087528705597 2023-01-24 03:22:41.513694: step: 500/470, loss: 0.12536272406578064 2023-01-24 03:22:42.294850: step: 502/470, loss: 0.06248565390706062 2023-01-24 03:22:43.092026: step: 504/470, loss: 0.29726463556289673 2023-01-24 03:22:43.787672: step: 506/470, loss: 0.10283642262220383 2023-01-24 03:22:44.542472: step: 508/470, loss: 0.88539719581604 2023-01-24 03:22:45.281470: step: 510/470, loss: 0.14810267090797424 2023-01-24 03:22:46.036337: step: 512/470, loss: 0.18709732592105865 2023-01-24 03:22:46.758798: step: 514/470, loss: 0.043946363031864166 2023-01-24 03:22:47.467143: step: 516/470, loss: 0.20995289087295532 2023-01-24 03:22:48.231700: step: 518/470, loss: 0.19275762140750885 2023-01-24 03:22:49.004918: step: 520/470, loss: 0.3477177619934082 2023-01-24 03:22:49.872607: step: 522/470, loss: 0.3986015319824219 2023-01-24 03:22:50.598006: step: 524/470, loss: 0.1369524747133255 2023-01-24 03:22:51.361428: step: 526/470, loss: 0.07624433934688568 2023-01-24 03:22:52.094269: step: 528/470, loss: 0.29792025685310364 2023-01-24 03:22:52.819913: step: 530/470, loss: 0.17045418918132782 2023-01-24 03:22:53.577029: step: 532/470, loss: 0.3308311998844147 2023-01-24 03:22:54.280936: step: 534/470, loss: 0.11802157014608383 2023-01-24 03:22:55.007722: step: 536/470, loss: 0.510388195514679 2023-01-24 03:22:55.659753: step: 538/470, loss: 0.22368597984313965 2023-01-24 03:22:56.359029: step: 540/470, loss: 0.13283610343933105 2023-01-24 03:22:57.185533: step: 542/470, loss: 0.6673287153244019 2023-01-24 03:22:57.882017: step: 544/470, loss: 0.25879979133605957 2023-01-24 03:22:58.594096: step: 546/470, loss: 0.13402698934078217 2023-01-24 03:22:59.342118: step: 548/470, loss: 0.28232210874557495 2023-01-24 03:23:00.121952: step: 550/470, loss: 0.1593974232673645 2023-01-24 03:23:00.867470: step: 552/470, loss: 0.15679427981376648 2023-01-24 03:23:01.539919: step: 554/470, loss: 0.5649197101593018 2023-01-24 03:23:02.206372: step: 556/470, loss: 0.1094643771648407 2023-01-24 03:23:02.958085: step: 558/470, loss: 0.21649640798568726 2023-01-24 03:23:03.714006: step: 560/470, loss: 0.4756641089916229 2023-01-24 03:23:04.433767: step: 562/470, loss: 0.17450223863124847 2023-01-24 03:23:05.166168: step: 564/470, loss: 0.15516166388988495 2023-01-24 03:23:05.909419: step: 566/470, loss: 0.15304596722126007 2023-01-24 03:23:06.689912: step: 568/470, loss: 0.15756924450397491 2023-01-24 03:23:07.502612: step: 570/470, loss: 0.17908306419849396 2023-01-24 03:23:08.198968: step: 572/470, loss: 0.1460331678390503 2023-01-24 03:23:08.966795: step: 574/470, loss: 0.5658903121948242 2023-01-24 03:23:09.720088: step: 576/470, loss: 0.12265262007713318 2023-01-24 03:23:10.472584: step: 578/470, loss: 0.22960016131401062 2023-01-24 03:23:11.225621: step: 580/470, loss: 0.10772555321455002 2023-01-24 03:23:11.931752: step: 582/470, loss: 0.36096444725990295 2023-01-24 03:23:12.662749: step: 584/470, loss: 0.19996917247772217 2023-01-24 03:23:13.469163: step: 586/470, loss: 0.09180688112974167 2023-01-24 03:23:14.134519: step: 588/470, loss: 0.18549667298793793 2023-01-24 03:23:14.957900: step: 590/470, loss: 0.09706774353981018 2023-01-24 03:23:15.747456: step: 592/470, loss: 0.128683939576149 2023-01-24 03:23:16.579124: step: 594/470, loss: 0.11436925083398819 2023-01-24 03:23:17.394027: step: 596/470, loss: 0.1896413415670395 2023-01-24 03:23:18.116006: step: 598/470, loss: 0.06010688096284866 2023-01-24 03:23:18.886361: step: 600/470, loss: 0.2670471966266632 2023-01-24 03:23:19.653780: step: 602/470, loss: 0.11307162046432495 2023-01-24 03:23:20.473089: step: 604/470, loss: 0.11850042641162872 2023-01-24 03:23:21.191657: step: 606/470, loss: 0.444345623254776 2023-01-24 03:23:21.929308: step: 608/470, loss: 0.15307031571865082 2023-01-24 03:23:22.722083: step: 610/470, loss: 0.20294825732707977 2023-01-24 03:23:23.463292: step: 612/470, loss: 0.1727193295955658 2023-01-24 03:23:24.310202: step: 614/470, loss: 0.1558392196893692 2023-01-24 03:23:25.031121: step: 616/470, loss: 0.11232329159975052 2023-01-24 03:23:25.804545: step: 618/470, loss: 0.4051525294780731 2023-01-24 03:23:26.493393: step: 620/470, loss: 0.18653224408626556 2023-01-24 03:23:27.224259: step: 622/470, loss: 0.07225558906793594 2023-01-24 03:23:28.023161: step: 624/470, loss: 0.21247851848602295 2023-01-24 03:23:28.724521: step: 626/470, loss: 0.1073872521519661 2023-01-24 03:23:29.498435: step: 628/470, loss: 0.1231706514954567 2023-01-24 03:23:30.272451: step: 630/470, loss: 0.03070944733917713 2023-01-24 03:23:30.973379: step: 632/470, loss: 0.3459760546684265 2023-01-24 03:23:31.752020: step: 634/470, loss: 0.14295950531959534 2023-01-24 03:23:32.491960: step: 636/470, loss: 0.17095544934272766 2023-01-24 03:23:33.173870: step: 638/470, loss: 0.08782177418470383 2023-01-24 03:23:33.890935: step: 640/470, loss: 2.7348151206970215 2023-01-24 03:23:34.687614: step: 642/470, loss: 0.18300358951091766 2023-01-24 03:23:35.377934: step: 644/470, loss: 0.610358715057373 2023-01-24 03:23:36.234006: step: 646/470, loss: 0.07318799197673798 2023-01-24 03:23:36.988811: step: 648/470, loss: 0.2117634117603302 2023-01-24 03:23:37.793910: step: 650/470, loss: 1.6853128671646118 2023-01-24 03:23:38.581826: step: 652/470, loss: 0.3203961253166199 2023-01-24 03:23:39.341211: step: 654/470, loss: 0.10503191500902176 2023-01-24 03:23:40.081038: step: 656/470, loss: 0.12326573580503464 2023-01-24 03:23:40.785774: step: 658/470, loss: 0.0717514380812645 2023-01-24 03:23:41.512537: step: 660/470, loss: 0.04833994433283806 2023-01-24 03:23:42.258335: step: 662/470, loss: 0.025357216596603394 2023-01-24 03:23:43.010927: step: 664/470, loss: 0.0652332752943039 2023-01-24 03:23:43.797688: step: 666/470, loss: 0.1199808269739151 2023-01-24 03:23:44.504470: step: 668/470, loss: 0.09009598940610886 2023-01-24 03:23:45.250448: step: 670/470, loss: 0.5139247179031372 2023-01-24 03:23:45.939182: step: 672/470, loss: 0.05846283957362175 2023-01-24 03:23:46.673986: step: 674/470, loss: 0.1617492139339447 2023-01-24 03:23:47.474513: step: 676/470, loss: 0.06745065003633499 2023-01-24 03:23:48.284547: step: 678/470, loss: 0.14190346002578735 2023-01-24 03:23:49.075218: step: 680/470, loss: 0.04410451278090477 2023-01-24 03:23:49.850708: step: 682/470, loss: 0.35122886300086975 2023-01-24 03:23:50.609285: step: 684/470, loss: 0.1424378901720047 2023-01-24 03:23:51.338332: step: 686/470, loss: 0.13077247142791748 2023-01-24 03:23:52.103574: step: 688/470, loss: 0.067043736577034 2023-01-24 03:23:52.837755: step: 690/470, loss: 0.22250400483608246 2023-01-24 03:23:53.588305: step: 692/470, loss: 0.07035396248102188 2023-01-24 03:23:54.293723: step: 694/470, loss: 0.18889664113521576 2023-01-24 03:23:54.980776: step: 696/470, loss: 0.15015734732151031 2023-01-24 03:23:55.812747: step: 698/470, loss: 0.16164840757846832 2023-01-24 03:23:56.609783: step: 700/470, loss: 0.0613911971449852 2023-01-24 03:23:57.383309: step: 702/470, loss: 0.2514897584915161 2023-01-24 03:23:58.276579: step: 704/470, loss: 0.09639281034469604 2023-01-24 03:23:58.990541: step: 706/470, loss: 0.4936193525791168 2023-01-24 03:23:59.744611: step: 708/470, loss: 0.23112571239471436 2023-01-24 03:24:00.460587: step: 710/470, loss: 0.21785999834537506 2023-01-24 03:24:01.161265: step: 712/470, loss: 0.26574963331222534 2023-01-24 03:24:01.920374: step: 714/470, loss: 0.1496441513299942 2023-01-24 03:24:02.716945: step: 716/470, loss: 0.13528425991535187 2023-01-24 03:24:03.591640: step: 718/470, loss: 0.4600887894630432 2023-01-24 03:24:04.327716: step: 720/470, loss: 0.3019505739212036 2023-01-24 03:24:05.011959: step: 722/470, loss: 0.2099757045507431 2023-01-24 03:24:05.768727: step: 724/470, loss: 0.22414270043373108 2023-01-24 03:24:06.587482: step: 726/470, loss: 0.10880865156650543 2023-01-24 03:24:07.365926: step: 728/470, loss: 0.2567862272262573 2023-01-24 03:24:08.077978: step: 730/470, loss: 0.12142646312713623 2023-01-24 03:24:08.866723: step: 732/470, loss: 0.1998579055070877 2023-01-24 03:24:09.612317: step: 734/470, loss: 0.13268613815307617 2023-01-24 03:24:10.347654: step: 736/470, loss: 0.14437507092952728 2023-01-24 03:24:11.181077: step: 738/470, loss: 0.16617853939533234 2023-01-24 03:24:11.982780: step: 740/470, loss: 0.10675939172506332 2023-01-24 03:24:12.839211: step: 742/470, loss: 0.20530328154563904 2023-01-24 03:24:13.657596: step: 744/470, loss: 0.23302339017391205 2023-01-24 03:24:14.431025: step: 746/470, loss: 0.13051211833953857 2023-01-24 03:24:15.139091: step: 748/470, loss: 0.04546245560050011 2023-01-24 03:24:15.864868: step: 750/470, loss: 0.16170257329940796 2023-01-24 03:24:16.598029: step: 752/470, loss: 0.3879983723163605 2023-01-24 03:24:17.444468: step: 754/470, loss: 0.17441397905349731 2023-01-24 03:24:18.146492: step: 756/470, loss: 0.12725909054279327 2023-01-24 03:24:18.907420: step: 758/470, loss: 0.11163493990898132 2023-01-24 03:24:19.671508: step: 760/470, loss: 0.2151859700679779 2023-01-24 03:24:20.427849: step: 762/470, loss: 0.3291402757167816 2023-01-24 03:24:21.163421: step: 764/470, loss: 0.10096342861652374 2023-01-24 03:24:21.991454: step: 766/470, loss: 0.10235702246427536 2023-01-24 03:24:22.670031: step: 768/470, loss: 0.03586709871888161 2023-01-24 03:24:23.473762: step: 770/470, loss: 0.5148991346359253 2023-01-24 03:24:24.172603: step: 772/470, loss: 0.2516341507434845 2023-01-24 03:24:24.963896: step: 774/470, loss: 0.33739376068115234 2023-01-24 03:24:25.630087: step: 776/470, loss: 0.09920867532491684 2023-01-24 03:24:26.382101: step: 778/470, loss: 0.15994364023208618 2023-01-24 03:24:27.175682: step: 780/470, loss: 0.08426479995250702 2023-01-24 03:24:28.053185: step: 782/470, loss: 0.18891899287700653 2023-01-24 03:24:28.792481: step: 784/470, loss: 0.07184939086437225 2023-01-24 03:24:29.623534: step: 786/470, loss: 0.16742153465747833 2023-01-24 03:24:30.376119: step: 788/470, loss: 0.42684534192085266 2023-01-24 03:24:31.153376: step: 790/470, loss: 0.12125355750322342 2023-01-24 03:24:31.885069: step: 792/470, loss: 0.08575651794672012 2023-01-24 03:24:32.637774: step: 794/470, loss: 0.11674223095178604 2023-01-24 03:24:33.322584: step: 796/470, loss: 0.10641640424728394 2023-01-24 03:24:34.010294: step: 798/470, loss: 0.4802986681461334 2023-01-24 03:24:34.759529: step: 800/470, loss: 0.08506816625595093 2023-01-24 03:24:35.508791: step: 802/470, loss: 0.057946957647800446 2023-01-24 03:24:36.237232: step: 804/470, loss: 0.16969476640224457 2023-01-24 03:24:36.919726: step: 806/470, loss: 0.1266757696866989 2023-01-24 03:24:37.652970: step: 808/470, loss: 0.22330895066261292 2023-01-24 03:24:38.437124: step: 810/470, loss: 0.14940103888511658 2023-01-24 03:24:39.204685: step: 812/470, loss: 0.10136136412620544 2023-01-24 03:24:39.953016: step: 814/470, loss: 0.3054467439651489 2023-01-24 03:24:40.734996: step: 816/470, loss: 0.32518401741981506 2023-01-24 03:24:41.450094: step: 818/470, loss: 0.16174927353858948 2023-01-24 03:24:42.231775: step: 820/470, loss: 0.0795656368136406 2023-01-24 03:24:42.972687: step: 822/470, loss: 0.45578598976135254 2023-01-24 03:24:43.691499: step: 824/470, loss: 0.13583436608314514 2023-01-24 03:24:44.387397: step: 826/470, loss: 0.22682815790176392 2023-01-24 03:24:45.103242: step: 828/470, loss: 0.06597944349050522 2023-01-24 03:24:45.829730: step: 830/470, loss: 0.06516607105731964 2023-01-24 03:24:46.530167: step: 832/470, loss: 0.18027722835540771 2023-01-24 03:24:47.247047: step: 834/470, loss: 0.11800383776426315 2023-01-24 03:24:47.958079: step: 836/470, loss: 0.12889358401298523 2023-01-24 03:24:48.739965: step: 838/470, loss: 0.04844401776790619 2023-01-24 03:24:49.433199: step: 840/470, loss: 0.12319555133581161 2023-01-24 03:24:50.170927: step: 842/470, loss: 0.06649138778448105 2023-01-24 03:24:50.932459: step: 844/470, loss: 0.11697138100862503 2023-01-24 03:24:51.677911: step: 846/470, loss: 0.4858449399471283 2023-01-24 03:24:52.434617: step: 848/470, loss: 0.18316267430782318 2023-01-24 03:24:53.232189: step: 850/470, loss: 0.2892132103443146 2023-01-24 03:24:53.998225: step: 852/470, loss: 0.1974104344844818 2023-01-24 03:24:54.709565: step: 854/470, loss: 0.1520778387784958 2023-01-24 03:24:55.437277: step: 856/470, loss: 0.2704646587371826 2023-01-24 03:24:56.240674: step: 858/470, loss: 0.2902047634124756 2023-01-24 03:24:56.875246: step: 860/470, loss: 0.08880189806222916 2023-01-24 03:24:57.723040: step: 862/470, loss: 0.11974621564149857 2023-01-24 03:24:58.488422: step: 864/470, loss: 0.09045007079839706 2023-01-24 03:24:59.222795: step: 866/470, loss: 0.22416849434375763 2023-01-24 03:24:59.995612: step: 868/470, loss: 0.19987420737743378 2023-01-24 03:25:00.779773: step: 870/470, loss: 0.17100857198238373 2023-01-24 03:25:01.591355: step: 872/470, loss: 0.3392637073993683 2023-01-24 03:25:02.354019: step: 874/470, loss: 0.1819869726896286 2023-01-24 03:25:03.042455: step: 876/470, loss: 0.16775751113891602 2023-01-24 03:25:03.829299: step: 878/470, loss: 0.5377464890480042 2023-01-24 03:25:04.562188: step: 880/470, loss: 1.2753757238388062 2023-01-24 03:25:05.357297: step: 882/470, loss: 0.5215312838554382 2023-01-24 03:25:06.085011: step: 884/470, loss: 0.18393318355083466 2023-01-24 03:25:06.802575: step: 886/470, loss: 0.10737390071153641 2023-01-24 03:25:07.523675: step: 888/470, loss: 0.15539099276065826 2023-01-24 03:25:08.360281: step: 890/470, loss: 0.9249545335769653 2023-01-24 03:25:09.066737: step: 892/470, loss: 0.29865944385528564 2023-01-24 03:25:09.845211: step: 894/470, loss: 0.31313595175743103 2023-01-24 03:25:10.594634: step: 896/470, loss: 0.09606197476387024 2023-01-24 03:25:11.377600: step: 898/470, loss: 0.308594286441803 2023-01-24 03:25:12.227648: step: 900/470, loss: 0.11458901315927505 2023-01-24 03:25:13.068269: step: 902/470, loss: 0.07904741168022156 2023-01-24 03:25:13.870988: step: 904/470, loss: 0.1473604291677475 2023-01-24 03:25:14.618631: step: 906/470, loss: 0.22174973785877228 2023-01-24 03:25:15.385902: step: 908/470, loss: 1.0358433723449707 2023-01-24 03:25:16.141125: step: 910/470, loss: 0.49611371755599976 2023-01-24 03:25:16.882487: step: 912/470, loss: 0.15608665347099304 2023-01-24 03:25:17.617943: step: 914/470, loss: 0.15466263890266418 2023-01-24 03:25:18.390471: step: 916/470, loss: 0.20168907940387726 2023-01-24 03:25:19.153723: step: 918/470, loss: 0.4854433536529541 2023-01-24 03:25:19.885720: step: 920/470, loss: 0.2656620144844055 2023-01-24 03:25:20.620858: step: 922/470, loss: 0.17299237847328186 2023-01-24 03:25:21.364224: step: 924/470, loss: 0.41194406151771545 2023-01-24 03:25:22.062283: step: 926/470, loss: 0.11019831150770187 2023-01-24 03:25:22.693311: step: 928/470, loss: 0.17341381311416626 2023-01-24 03:25:23.524706: step: 930/470, loss: 0.11898980289697647 2023-01-24 03:25:24.315571: step: 932/470, loss: 0.0407705120742321 2023-01-24 03:25:25.114854: step: 934/470, loss: 0.16358143091201782 2023-01-24 03:25:25.830529: step: 936/470, loss: 0.148806631565094 2023-01-24 03:25:26.552152: step: 938/470, loss: 0.08406595140695572 2023-01-24 03:25:27.273857: step: 940/470, loss: 0.17969819903373718 2023-01-24 03:25:28.004104: step: 942/470, loss: 0.07237522304058075 ================================================== Loss: 0.291 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3304758430609598, 'r': 0.32232368753953194, 'f1': 0.3263488632724944}, 'combined': 0.24046758346394323, 'epoch': 13} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3570720242403975, 'r': 0.3409506168059127, 'f1': 0.3488251519360739}, 'combined': 0.23255010129071588, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3306016676866585, 'r': 0.3237010636172975, 'f1': 0.3271149770399152}, 'combined': 0.24103208834520068, 'epoch': 13} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36562718021154794, 'r': 0.3441817398337552, 'f1': 0.35458049472719705}, 'combined': 0.23638699648479797, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3182185767097967, 'r': 0.3266722011385199, 'f1': 0.32238998127340823}, 'combined': 0.23755051251724815, 'epoch': 13} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3560998414936084, 'r': 0.35199099716868215, 'f1': 0.3540334981193708}, 'combined': 0.23602233207958048, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3046875, 'r': 0.2785714285714286, 'f1': 0.291044776119403}, 'combined': 0.19402985074626866, 'epoch': 13} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6458333333333334, 'r': 0.33695652173913043, 'f1': 0.44285714285714284}, 'combined': 0.2952380952380952, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3130350753897265, 'r': 0.332042897804283, 'f1': 0.3222589450144699}, 'combined': 0.23745395948434622, 'epoch': 9} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.33810196782859, 'r': 0.3293327525246784, 'f1': 0.33365975219288585}, 'combined': 0.2224398347952572, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26851851851851855, 'r': 0.4142857142857143, 'f1': 0.3258426966292135}, 'combined': 0.21722846441947566, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30950132625512705, 'r': 0.3100886152992544, 'f1': 0.3097946924411508}, 'combined': 0.22826977337769006, 'epoch': 7} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3463740826755645, 'r': 0.325411962398369, 'f1': 0.33556597608390504}, 'combined': 0.22371065072260332, 'epoch': 7} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5625, 'r': 0.4891304347826087, 'f1': 0.5232558139534884}, 'combined': 0.3488372093023256, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30718539637932163, 'r': 0.31184855230158837, 'f1': 0.3094994106646649}, 'combined': 0.22805219733185833, 'epoch': 12} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35588885174248575, 'r': 0.33775220833637826, 'f1': 0.3465834204931755}, 'combined': 0.23105561366211694, 'epoch': 12} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5454545454545454, 'r': 0.20689655172413793, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 12} ****************************** Epoch: 14 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:28:03.436675: step: 2/470, loss: 0.11083290725946426 2023-01-24 03:28:04.165703: step: 4/470, loss: 0.11919131875038147 2023-01-24 03:28:04.854147: step: 6/470, loss: 0.22162587940692902 2023-01-24 03:28:05.613295: step: 8/470, loss: 0.1282801777124405 2023-01-24 03:28:06.391962: step: 10/470, loss: 0.05915132537484169 2023-01-24 03:28:07.105632: step: 12/470, loss: 0.03638844937086105 2023-01-24 03:28:07.983138: step: 14/470, loss: 0.12631765007972717 2023-01-24 03:28:08.788395: step: 16/470, loss: 0.30836811661720276 2023-01-24 03:28:09.479662: step: 18/470, loss: 0.08455128967761993 2023-01-24 03:28:10.197033: step: 20/470, loss: 0.4171124994754791 2023-01-24 03:28:10.898809: step: 22/470, loss: 0.12701795995235443 2023-01-24 03:28:11.705073: step: 24/470, loss: 0.06594716012477875 2023-01-24 03:28:12.377840: step: 26/470, loss: 0.15928548574447632 2023-01-24 03:28:13.153645: step: 28/470, loss: 0.0775287002325058 2023-01-24 03:28:13.926197: step: 30/470, loss: 0.11700832098722458 2023-01-24 03:28:14.655439: step: 32/470, loss: 0.11270631104707718 2023-01-24 03:28:15.488592: step: 34/470, loss: 0.14612014591693878 2023-01-24 03:28:16.184261: step: 36/470, loss: 0.12699899077415466 2023-01-24 03:28:16.944809: step: 38/470, loss: 0.20811907947063446 2023-01-24 03:28:17.688667: step: 40/470, loss: 0.12297671288251877 2023-01-24 03:28:18.640438: step: 42/470, loss: 0.11870261281728745 2023-01-24 03:28:19.353902: step: 44/470, loss: 0.20325523614883423 2023-01-24 03:28:20.068216: step: 46/470, loss: 0.07447163760662079 2023-01-24 03:28:20.807489: step: 48/470, loss: 0.10491825640201569 2023-01-24 03:28:21.621851: step: 50/470, loss: 0.09760572761297226 2023-01-24 03:28:22.379008: step: 52/470, loss: 0.3343038856983185 2023-01-24 03:28:23.129835: step: 54/470, loss: 0.19771350920200348 2023-01-24 03:28:23.882769: step: 56/470, loss: 0.09543336182832718 2023-01-24 03:28:24.584837: step: 58/470, loss: 0.4951615333557129 2023-01-24 03:28:25.307652: step: 60/470, loss: 0.08876185119152069 2023-01-24 03:28:26.043409: step: 62/470, loss: 0.11803992092609406 2023-01-24 03:28:26.828131: step: 64/470, loss: 0.14623303711414337 2023-01-24 03:28:27.561889: step: 66/470, loss: 0.24651095271110535 2023-01-24 03:28:28.348205: step: 68/470, loss: 0.119205541908741 2023-01-24 03:28:29.097353: step: 70/470, loss: 0.10121873021125793 2023-01-24 03:28:29.823938: step: 72/470, loss: 0.2187107503414154 2023-01-24 03:28:30.555073: step: 74/470, loss: 0.07482891529798508 2023-01-24 03:28:31.296686: step: 76/470, loss: 0.8483836650848389 2023-01-24 03:28:32.119363: step: 78/470, loss: 0.04838128015398979 2023-01-24 03:28:32.887911: step: 80/470, loss: 0.1129889115691185 2023-01-24 03:28:33.631364: step: 82/470, loss: 0.16097694635391235 2023-01-24 03:28:34.402146: step: 84/470, loss: 0.08617453277111053 2023-01-24 03:28:35.148966: step: 86/470, loss: 0.12346760928630829 2023-01-24 03:28:35.957578: step: 88/470, loss: 0.763508677482605 2023-01-24 03:28:36.684937: step: 90/470, loss: 0.09003492444753647 2023-01-24 03:28:37.388164: step: 92/470, loss: 0.11231306940317154 2023-01-24 03:28:38.090621: step: 94/470, loss: 0.3491142988204956 2023-01-24 03:28:38.812037: step: 96/470, loss: 0.4276227653026581 2023-01-24 03:28:39.672918: step: 98/470, loss: 1.9698024988174438 2023-01-24 03:28:40.444321: step: 100/470, loss: 0.16232357919216156 2023-01-24 03:28:41.202258: step: 102/470, loss: 0.07967112213373184 2023-01-24 03:28:41.906391: step: 104/470, loss: 0.04950232803821564 2023-01-24 03:28:42.661072: step: 106/470, loss: 0.10787200182676315 2023-01-24 03:28:43.385603: step: 108/470, loss: 0.10786844789981842 2023-01-24 03:28:44.117083: step: 110/470, loss: 0.10731559991836548 2023-01-24 03:28:44.943863: step: 112/470, loss: 0.07661902159452438 2023-01-24 03:28:45.711955: step: 114/470, loss: 0.030627790838479996 2023-01-24 03:28:46.431768: step: 116/470, loss: 0.13857971131801605 2023-01-24 03:28:47.260360: step: 118/470, loss: 0.10013086348772049 2023-01-24 03:28:48.014941: step: 120/470, loss: 0.15490609407424927 2023-01-24 03:28:48.789884: step: 122/470, loss: 1.2364153861999512 2023-01-24 03:28:49.531280: step: 124/470, loss: 0.12906284630298615 2023-01-24 03:28:50.298732: step: 126/470, loss: 0.1098984032869339 2023-01-24 03:28:51.027024: step: 128/470, loss: 0.11757684499025345 2023-01-24 03:28:51.753572: step: 130/470, loss: 0.07572782039642334 2023-01-24 03:28:52.455282: step: 132/470, loss: 0.1737520694732666 2023-01-24 03:28:53.181321: step: 134/470, loss: 0.13601481914520264 2023-01-24 03:28:53.944546: step: 136/470, loss: 0.09237069636583328 2023-01-24 03:28:54.636863: step: 138/470, loss: 0.3090798258781433 2023-01-24 03:28:55.366067: step: 140/470, loss: 0.14041438698768616 2023-01-24 03:28:56.123602: step: 142/470, loss: 0.17475241422653198 2023-01-24 03:28:56.860520: step: 144/470, loss: 0.11330439150333405 2023-01-24 03:28:57.622968: step: 146/470, loss: 0.17179451882839203 2023-01-24 03:28:58.402089: step: 148/470, loss: 0.1465025544166565 2023-01-24 03:28:59.112075: step: 150/470, loss: 0.013929449953138828 2023-01-24 03:28:59.879139: step: 152/470, loss: 0.08549786359071732 2023-01-24 03:29:00.647634: step: 154/470, loss: 0.03197602927684784 2023-01-24 03:29:01.365861: step: 156/470, loss: 0.5165415406227112 2023-01-24 03:29:02.130809: step: 158/470, loss: 0.05465986579656601 2023-01-24 03:29:02.964157: step: 160/470, loss: 0.08515684306621552 2023-01-24 03:29:03.744956: step: 162/470, loss: 0.12424996495246887 2023-01-24 03:29:04.436859: step: 164/470, loss: 0.12669268250465393 2023-01-24 03:29:05.118077: step: 166/470, loss: 0.03578896448016167 2023-01-24 03:29:05.830315: step: 168/470, loss: 0.07446733117103577 2023-01-24 03:29:06.626409: step: 170/470, loss: 0.14528201520442963 2023-01-24 03:29:07.276658: step: 172/470, loss: 0.06307399272918701 2023-01-24 03:29:08.085225: step: 174/470, loss: 0.08100112527608871 2023-01-24 03:29:08.773034: step: 176/470, loss: 0.06932196021080017 2023-01-24 03:29:09.496676: step: 178/470, loss: 0.10252606868743896 2023-01-24 03:29:10.188396: step: 180/470, loss: 0.05974116176366806 2023-01-24 03:29:10.884926: step: 182/470, loss: 0.0428239107131958 2023-01-24 03:29:11.685894: step: 184/470, loss: 0.1692391037940979 2023-01-24 03:29:12.395296: step: 186/470, loss: 0.06524728983640671 2023-01-24 03:29:13.145957: step: 188/470, loss: 0.1310448795557022 2023-01-24 03:29:13.890161: step: 190/470, loss: 0.07662883400917053 2023-01-24 03:29:14.594481: step: 192/470, loss: 0.08161720633506775 2023-01-24 03:29:15.346892: step: 194/470, loss: 0.20154725015163422 2023-01-24 03:29:16.041692: step: 196/470, loss: 0.17144368588924408 2023-01-24 03:29:16.730074: step: 198/470, loss: 0.05600402504205704 2023-01-24 03:29:17.423095: step: 200/470, loss: 0.1903025358915329 2023-01-24 03:29:18.238261: step: 202/470, loss: 0.10436660796403885 2023-01-24 03:29:19.030602: step: 204/470, loss: 0.23059800267219543 2023-01-24 03:29:19.805163: step: 206/470, loss: 0.17381146550178528 2023-01-24 03:29:20.516811: step: 208/470, loss: 0.3720693588256836 2023-01-24 03:29:21.217582: step: 210/470, loss: 0.0502849817276001 2023-01-24 03:29:21.940136: step: 212/470, loss: 0.13139395415782928 2023-01-24 03:29:22.674229: step: 214/470, loss: 0.038059886544942856 2023-01-24 03:29:23.420179: step: 216/470, loss: 0.09123457223176956 2023-01-24 03:29:24.177862: step: 218/470, loss: 0.2840889096260071 2023-01-24 03:29:24.875116: step: 220/470, loss: 0.022772392258048058 2023-01-24 03:29:25.550814: step: 222/470, loss: 0.10895684361457825 2023-01-24 03:29:26.261036: step: 224/470, loss: 0.1379581242799759 2023-01-24 03:29:26.968215: step: 226/470, loss: 0.08646092563867569 2023-01-24 03:29:27.711470: step: 228/470, loss: 0.06485362350940704 2023-01-24 03:29:28.421482: step: 230/470, loss: 0.10785852372646332 2023-01-24 03:29:29.136210: step: 232/470, loss: 0.149741530418396 2023-01-24 03:29:29.851969: step: 234/470, loss: 0.2812889516353607 2023-01-24 03:29:30.610366: step: 236/470, loss: 0.11035189032554626 2023-01-24 03:29:31.373190: step: 238/470, loss: 0.42968517541885376 2023-01-24 03:29:32.092298: step: 240/470, loss: 0.014646702446043491 2023-01-24 03:29:32.798014: step: 242/470, loss: 0.09640643745660782 2023-01-24 03:29:33.562667: step: 244/470, loss: 0.02477741241455078 2023-01-24 03:29:34.358612: step: 246/470, loss: 0.17379818856716156 2023-01-24 03:29:35.099099: step: 248/470, loss: 0.27178406715393066 2023-01-24 03:29:35.792978: step: 250/470, loss: 0.12189687788486481 2023-01-24 03:29:36.537512: step: 252/470, loss: 0.020987318828701973 2023-01-24 03:29:37.255297: step: 254/470, loss: 0.09624991565942764 2023-01-24 03:29:37.949548: step: 256/470, loss: 0.15449093282222748 2023-01-24 03:29:38.756042: step: 258/470, loss: 0.14626288414001465 2023-01-24 03:29:39.487563: step: 260/470, loss: 0.28058895468711853 2023-01-24 03:29:40.243593: step: 262/470, loss: 0.06803245097398758 2023-01-24 03:29:40.949479: step: 264/470, loss: 0.19811493158340454 2023-01-24 03:29:41.683927: step: 266/470, loss: 0.04378509148955345 2023-01-24 03:29:42.425803: step: 268/470, loss: 0.43500596284866333 2023-01-24 03:29:43.189194: step: 270/470, loss: 0.5609925985336304 2023-01-24 03:29:43.933223: step: 272/470, loss: 0.4385612905025482 2023-01-24 03:29:44.651095: step: 274/470, loss: 0.14124025404453278 2023-01-24 03:29:45.367867: step: 276/470, loss: 0.08243583142757416 2023-01-24 03:29:46.086033: step: 278/470, loss: 0.2784011960029602 2023-01-24 03:29:46.827297: step: 280/470, loss: 0.3031136393547058 2023-01-24 03:29:47.530205: step: 282/470, loss: 0.09605658054351807 2023-01-24 03:29:48.261166: step: 284/470, loss: 0.07707042992115021 2023-01-24 03:29:49.029245: step: 286/470, loss: 0.03834238275885582 2023-01-24 03:29:49.815761: step: 288/470, loss: 0.13241738080978394 2023-01-24 03:29:50.495338: step: 290/470, loss: 0.06490999460220337 2023-01-24 03:29:51.180254: step: 292/470, loss: 0.07722263038158417 2023-01-24 03:29:51.913670: step: 294/470, loss: 0.1114049032330513 2023-01-24 03:29:52.643825: step: 296/470, loss: 0.25798553228378296 2023-01-24 03:29:53.337269: step: 298/470, loss: 0.09492284804582596 2023-01-24 03:29:54.031142: step: 300/470, loss: 0.06170547008514404 2023-01-24 03:29:54.809081: step: 302/470, loss: 0.0761180892586708 2023-01-24 03:29:55.574570: step: 304/470, loss: 0.17359501123428345 2023-01-24 03:29:56.306140: step: 306/470, loss: 0.08720303326845169 2023-01-24 03:29:57.075623: step: 308/470, loss: 0.037650175392627716 2023-01-24 03:29:57.787412: step: 310/470, loss: 0.042883120477199554 2023-01-24 03:29:58.518443: step: 312/470, loss: 0.14848671853542328 2023-01-24 03:29:59.337340: step: 314/470, loss: 0.5215410590171814 2023-01-24 03:30:00.065021: step: 316/470, loss: 0.016007540747523308 2023-01-24 03:30:00.779672: step: 318/470, loss: 0.11587570607662201 2023-01-24 03:30:01.514987: step: 320/470, loss: 0.32297030091285706 2023-01-24 03:30:02.235666: step: 322/470, loss: 0.1690657138824463 2023-01-24 03:30:03.054455: step: 324/470, loss: 0.030179066583514214 2023-01-24 03:30:03.779912: step: 326/470, loss: 0.0764576867222786 2023-01-24 03:30:04.518971: step: 328/470, loss: 0.11027715355157852 2023-01-24 03:30:05.264820: step: 330/470, loss: 0.09635484218597412 2023-01-24 03:30:05.909125: step: 332/470, loss: 0.26524147391319275 2023-01-24 03:30:06.610622: step: 334/470, loss: 0.48851656913757324 2023-01-24 03:30:07.315304: step: 336/470, loss: 0.12017401307821274 2023-01-24 03:30:08.131932: step: 338/470, loss: 0.3046230971813202 2023-01-24 03:30:09.007992: step: 340/470, loss: 0.0651002898812294 2023-01-24 03:30:09.760723: step: 342/470, loss: 0.5883811712265015 2023-01-24 03:30:10.450204: step: 344/470, loss: 0.20933616161346436 2023-01-24 03:30:11.241712: step: 346/470, loss: 0.08957860618829727 2023-01-24 03:30:11.985091: step: 348/470, loss: 0.13973873853683472 2023-01-24 03:30:12.669900: step: 350/470, loss: 0.4042279124259949 2023-01-24 03:30:13.359762: step: 352/470, loss: 0.09614955633878708 2023-01-24 03:30:14.025163: step: 354/470, loss: 0.023717915639281273 2023-01-24 03:30:14.764995: step: 356/470, loss: 0.2741442024707794 2023-01-24 03:30:15.521743: step: 358/470, loss: 0.13002316653728485 2023-01-24 03:30:16.178157: step: 360/470, loss: 0.30730727314949036 2023-01-24 03:30:16.899227: step: 362/470, loss: 0.23137405514717102 2023-01-24 03:30:17.563852: step: 364/470, loss: 0.283146470785141 2023-01-24 03:30:18.314959: step: 366/470, loss: 0.34386488795280457 2023-01-24 03:30:19.116232: step: 368/470, loss: 0.09826954454183578 2023-01-24 03:30:19.831683: step: 370/470, loss: 0.1936502754688263 2023-01-24 03:30:20.585996: step: 372/470, loss: 0.08318967372179031 2023-01-24 03:30:21.320145: step: 374/470, loss: 0.16155041754245758 2023-01-24 03:30:21.988121: step: 376/470, loss: 0.3982257843017578 2023-01-24 03:30:22.722547: step: 378/470, loss: 0.0868930071592331 2023-01-24 03:30:23.483223: step: 380/470, loss: 0.5517705082893372 2023-01-24 03:30:24.201178: step: 382/470, loss: 0.12113271653652191 2023-01-24 03:30:24.903689: step: 384/470, loss: 0.15076488256454468 2023-01-24 03:30:25.591464: step: 386/470, loss: 0.17827637493610382 2023-01-24 03:30:26.351743: step: 388/470, loss: 0.056175053119659424 2023-01-24 03:30:27.087387: step: 390/470, loss: 0.28909632563591003 2023-01-24 03:30:27.870353: step: 392/470, loss: 0.16936790943145752 2023-01-24 03:30:28.558769: step: 394/470, loss: 0.10315996408462524 2023-01-24 03:30:29.302590: step: 396/470, loss: 0.08916748315095901 2023-01-24 03:30:30.007829: step: 398/470, loss: 0.08588486909866333 2023-01-24 03:30:30.704937: step: 400/470, loss: 0.7330968976020813 2023-01-24 03:30:31.450058: step: 402/470, loss: 0.8898875117301941 2023-01-24 03:30:32.221512: step: 404/470, loss: 0.22537262737751007 2023-01-24 03:30:32.892530: step: 406/470, loss: 0.09088273346424103 2023-01-24 03:30:33.626930: step: 408/470, loss: 0.2716708183288574 2023-01-24 03:30:34.356636: step: 410/470, loss: 0.10589412599802017 2023-01-24 03:30:35.060034: step: 412/470, loss: 0.12420433014631271 2023-01-24 03:30:35.790103: step: 414/470, loss: 0.25589585304260254 2023-01-24 03:30:36.452680: step: 416/470, loss: 0.15810632705688477 2023-01-24 03:30:37.205222: step: 418/470, loss: 0.11597256362438202 2023-01-24 03:30:37.906163: step: 420/470, loss: 0.28290224075317383 2023-01-24 03:30:38.717259: step: 422/470, loss: 0.07624372839927673 2023-01-24 03:30:39.476625: step: 424/470, loss: 6.502451419830322 2023-01-24 03:30:40.191960: step: 426/470, loss: 0.054799798876047134 2023-01-24 03:30:40.947681: step: 428/470, loss: 0.29792070388793945 2023-01-24 03:30:41.773787: step: 430/470, loss: 0.20813298225402832 2023-01-24 03:30:42.547340: step: 432/470, loss: 0.04179874062538147 2023-01-24 03:30:43.285078: step: 434/470, loss: 0.0941062718629837 2023-01-24 03:30:44.078404: step: 436/470, loss: 0.21649214625358582 2023-01-24 03:30:44.800125: step: 438/470, loss: 0.12335845082998276 2023-01-24 03:30:45.529213: step: 440/470, loss: 0.0966825857758522 2023-01-24 03:30:46.347704: step: 442/470, loss: 0.07399679720401764 2023-01-24 03:30:47.176750: step: 444/470, loss: 0.12531259655952454 2023-01-24 03:30:47.919701: step: 446/470, loss: 0.08484052866697311 2023-01-24 03:30:48.676109: step: 448/470, loss: 0.13579043745994568 2023-01-24 03:30:49.468577: step: 450/470, loss: 0.1644921749830246 2023-01-24 03:30:50.157041: step: 452/470, loss: 0.11840783804655075 2023-01-24 03:30:50.835812: step: 454/470, loss: 0.05891914665699005 2023-01-24 03:30:51.557149: step: 456/470, loss: 0.23505060374736786 2023-01-24 03:30:52.259981: step: 458/470, loss: 0.15571844577789307 2023-01-24 03:30:52.967805: step: 460/470, loss: 0.19243013858795166 2023-01-24 03:30:53.846464: step: 462/470, loss: 0.14108626544475555 2023-01-24 03:30:54.606100: step: 464/470, loss: 0.15079638361930847 2023-01-24 03:30:55.386785: step: 466/470, loss: 0.25679388642311096 2023-01-24 03:30:56.207813: step: 468/470, loss: 0.110379159450531 2023-01-24 03:30:56.904378: step: 470/470, loss: 0.233668714761734 2023-01-24 03:30:57.656205: step: 472/470, loss: 0.1186995655298233 2023-01-24 03:30:58.348150: step: 474/470, loss: 0.4312833249568939 2023-01-24 03:30:59.098033: step: 476/470, loss: 0.1033293753862381 2023-01-24 03:30:59.855863: step: 478/470, loss: 0.32570046186447144 2023-01-24 03:31:00.668699: step: 480/470, loss: 0.09971433132886887 2023-01-24 03:31:01.447771: step: 482/470, loss: 1.5249170064926147 2023-01-24 03:31:02.182915: step: 484/470, loss: 0.4827004671096802 2023-01-24 03:31:02.857915: step: 486/470, loss: 0.06607785820960999 2023-01-24 03:31:03.640136: step: 488/470, loss: 0.10160824656486511 2023-01-24 03:31:04.339138: step: 490/470, loss: 0.04615769162774086 2023-01-24 03:31:05.067840: step: 492/470, loss: 0.1232377365231514 2023-01-24 03:31:05.840612: step: 494/470, loss: 0.15672165155410767 2023-01-24 03:31:06.554984: step: 496/470, loss: 0.1081511378288269 2023-01-24 03:31:07.364716: step: 498/470, loss: 0.2734154164791107 2023-01-24 03:31:08.178284: step: 500/470, loss: 0.2586720883846283 2023-01-24 03:31:08.877647: step: 502/470, loss: 0.48053133487701416 2023-01-24 03:31:09.641619: step: 504/470, loss: 0.2579320967197418 2023-01-24 03:31:10.391820: step: 506/470, loss: 0.3125365376472473 2023-01-24 03:31:11.105659: step: 508/470, loss: 0.04690413922071457 2023-01-24 03:31:11.865188: step: 510/470, loss: 0.184226855635643 2023-01-24 03:31:12.583740: step: 512/470, loss: 0.046648863703012466 2023-01-24 03:31:13.316932: step: 514/470, loss: 0.7336933016777039 2023-01-24 03:31:14.131489: step: 516/470, loss: 0.287691205739975 2023-01-24 03:31:14.882765: step: 518/470, loss: 0.2230347841978073 2023-01-24 03:31:15.606672: step: 520/470, loss: 0.15412744879722595 2023-01-24 03:31:16.311639: step: 522/470, loss: 0.2500941753387451 2023-01-24 03:31:16.942361: step: 524/470, loss: 0.19288751482963562 2023-01-24 03:31:17.852834: step: 526/470, loss: 0.216958686709404 2023-01-24 03:31:18.660396: step: 528/470, loss: 0.298077791929245 2023-01-24 03:31:19.473263: step: 530/470, loss: 0.25814521312713623 2023-01-24 03:31:20.171938: step: 532/470, loss: 0.07196559756994247 2023-01-24 03:31:20.886681: step: 534/470, loss: 0.05458388477563858 2023-01-24 03:31:21.604639: step: 536/470, loss: 0.355390727519989 2023-01-24 03:31:22.311804: step: 538/470, loss: 0.14116257429122925 2023-01-24 03:31:23.102604: step: 540/470, loss: 0.027166053652763367 2023-01-24 03:31:23.780451: step: 542/470, loss: 0.04634636268019676 2023-01-24 03:31:24.592576: step: 544/470, loss: 0.14489805698394775 2023-01-24 03:31:25.324562: step: 546/470, loss: 0.03581508621573448 2023-01-24 03:31:26.056268: step: 548/470, loss: 0.14004434645175934 2023-01-24 03:31:26.777336: step: 550/470, loss: 0.22724568843841553 2023-01-24 03:31:27.461722: step: 552/470, loss: 0.0776071846485138 2023-01-24 03:31:28.259602: step: 554/470, loss: 0.09695343673229218 2023-01-24 03:31:29.062301: step: 556/470, loss: 0.7682523131370544 2023-01-24 03:31:29.962753: step: 558/470, loss: 0.20350007712841034 2023-01-24 03:31:30.691144: step: 560/470, loss: 0.6482030749320984 2023-01-24 03:31:31.466019: step: 562/470, loss: 0.07920963317155838 2023-01-24 03:31:32.259067: step: 564/470, loss: 0.06108212471008301 2023-01-24 03:31:32.958976: step: 566/470, loss: 0.3057776391506195 2023-01-24 03:31:33.673208: step: 568/470, loss: 0.10524436086416245 2023-01-24 03:31:34.385748: step: 570/470, loss: 0.0752614364027977 2023-01-24 03:31:35.197067: step: 572/470, loss: 0.10594554245471954 2023-01-24 03:31:35.944173: step: 574/470, loss: 0.11854086071252823 2023-01-24 03:31:36.792309: step: 576/470, loss: 0.10533951222896576 2023-01-24 03:31:37.599477: step: 578/470, loss: 0.1088142916560173 2023-01-24 03:31:38.408317: step: 580/470, loss: 0.17378634214401245 2023-01-24 03:31:39.355212: step: 582/470, loss: 0.19175052642822266 2023-01-24 03:31:40.158774: step: 584/470, loss: 0.11666764318943024 2023-01-24 03:31:40.866773: step: 586/470, loss: 0.06512768566608429 2023-01-24 03:31:41.687515: step: 588/470, loss: 0.0760718509554863 2023-01-24 03:31:42.451195: step: 590/470, loss: 0.028491565957665443 2023-01-24 03:31:43.211649: step: 592/470, loss: 0.07426527142524719 2023-01-24 03:31:43.955389: step: 594/470, loss: 0.056701093912124634 2023-01-24 03:31:44.737711: step: 596/470, loss: 0.1391458362340927 2023-01-24 03:31:45.477873: step: 598/470, loss: 0.11566898971796036 2023-01-24 03:31:46.198915: step: 600/470, loss: 0.14809054136276245 2023-01-24 03:31:47.020078: step: 602/470, loss: 0.13627216219902039 2023-01-24 03:31:47.774885: step: 604/470, loss: 0.11067622900009155 2023-01-24 03:31:48.504590: step: 606/470, loss: 0.3479231595993042 2023-01-24 03:31:49.193657: step: 608/470, loss: 0.6823694109916687 2023-01-24 03:31:49.890079: step: 610/470, loss: 0.10552807152271271 2023-01-24 03:31:50.620189: step: 612/470, loss: 0.5463363528251648 2023-01-24 03:31:51.357774: step: 614/470, loss: 0.10402689129114151 2023-01-24 03:31:52.025790: step: 616/470, loss: 0.34647348523139954 2023-01-24 03:31:52.747500: step: 618/470, loss: 0.07819667458534241 2023-01-24 03:31:53.433640: step: 620/470, loss: 0.03418436273932457 2023-01-24 03:31:54.088803: step: 622/470, loss: 0.0040921662002801895 2023-01-24 03:31:54.843735: step: 624/470, loss: 0.06056486442685127 2023-01-24 03:31:55.599395: step: 626/470, loss: 0.142343208193779 2023-01-24 03:31:56.301819: step: 628/470, loss: 0.3498281240463257 2023-01-24 03:31:57.093666: step: 630/470, loss: 0.1718958616256714 2023-01-24 03:31:57.779503: step: 632/470, loss: 0.14138327538967133 2023-01-24 03:31:58.527293: step: 634/470, loss: 0.1276867389678955 2023-01-24 03:31:59.258907: step: 636/470, loss: 0.0756273865699768 2023-01-24 03:32:00.000399: step: 638/470, loss: 0.03293231502175331 2023-01-24 03:32:00.750424: step: 640/470, loss: 0.16658903658390045 2023-01-24 03:32:01.611781: step: 642/470, loss: 1.328713297843933 2023-01-24 03:32:02.346200: step: 644/470, loss: 0.14836126565933228 2023-01-24 03:32:03.024818: step: 646/470, loss: 0.12975986301898956 2023-01-24 03:32:03.857406: step: 648/470, loss: 0.2378319948911667 2023-01-24 03:32:04.603853: step: 650/470, loss: 0.1449371576309204 2023-01-24 03:32:05.318512: step: 652/470, loss: 0.0579066202044487 2023-01-24 03:32:06.034069: step: 654/470, loss: 0.17443352937698364 2023-01-24 03:32:06.703725: step: 656/470, loss: 0.08816853910684586 2023-01-24 03:32:07.445154: step: 658/470, loss: 0.19708070158958435 2023-01-24 03:32:08.208816: step: 660/470, loss: 0.3211943507194519 2023-01-24 03:32:08.909387: step: 662/470, loss: 0.08856945484876633 2023-01-24 03:32:09.703240: step: 664/470, loss: 0.14771507680416107 2023-01-24 03:32:10.421457: step: 666/470, loss: 0.6534758806228638 2023-01-24 03:32:11.193984: step: 668/470, loss: 0.5298561453819275 2023-01-24 03:32:11.907576: step: 670/470, loss: 0.3639879822731018 2023-01-24 03:32:12.637042: step: 672/470, loss: 0.06922122836112976 2023-01-24 03:32:13.388995: step: 674/470, loss: 0.44452109932899475 2023-01-24 03:32:14.097281: step: 676/470, loss: 0.10079913586378098 2023-01-24 03:32:14.844312: step: 678/470, loss: 0.12544503808021545 2023-01-24 03:32:15.589133: step: 680/470, loss: 0.14979170262813568 2023-01-24 03:32:16.358500: step: 682/470, loss: 0.10007375478744507 2023-01-24 03:32:17.153525: step: 684/470, loss: 0.015384846366941929 2023-01-24 03:32:17.905426: step: 686/470, loss: 0.04832969605922699 2023-01-24 03:32:18.549468: step: 688/470, loss: 0.1544710397720337 2023-01-24 03:32:19.386844: step: 690/470, loss: 0.3322787284851074 2023-01-24 03:32:20.143103: step: 692/470, loss: 0.3732565939426422 2023-01-24 03:32:20.958040: step: 694/470, loss: 0.13223111629486084 2023-01-24 03:32:21.722241: step: 696/470, loss: 0.301310271024704 2023-01-24 03:32:22.477303: step: 698/470, loss: 0.09329129755496979 2023-01-24 03:32:23.191735: step: 700/470, loss: 0.07543506473302841 2023-01-24 03:32:23.981968: step: 702/470, loss: 3.3870034217834473 2023-01-24 03:32:24.694717: step: 704/470, loss: 0.4438471496105194 2023-01-24 03:32:25.485588: step: 706/470, loss: 0.10665325820446014 2023-01-24 03:32:26.154553: step: 708/470, loss: 0.1168651431798935 2023-01-24 03:32:26.888398: step: 710/470, loss: 0.1690247803926468 2023-01-24 03:32:27.680433: step: 712/470, loss: 0.09398052096366882 2023-01-24 03:32:28.429074: step: 714/470, loss: 0.10398267954587936 2023-01-24 03:32:29.216416: step: 716/470, loss: 0.08926042914390564 2023-01-24 03:32:29.921877: step: 718/470, loss: 0.2242121696472168 2023-01-24 03:32:30.680951: step: 720/470, loss: 0.09120961278676987 2023-01-24 03:32:31.421087: step: 722/470, loss: 0.13982771337032318 2023-01-24 03:32:32.170157: step: 724/470, loss: 0.07675092667341232 2023-01-24 03:32:32.934587: step: 726/470, loss: 0.1070447787642479 2023-01-24 03:32:33.666994: step: 728/470, loss: 0.10396528244018555 2023-01-24 03:32:34.432483: step: 730/470, loss: 0.13090196251869202 2023-01-24 03:32:35.144236: step: 732/470, loss: 0.10720758885145187 2023-01-24 03:32:35.856621: step: 734/470, loss: 0.31446948647499084 2023-01-24 03:32:36.590509: step: 736/470, loss: 0.2971380054950714 2023-01-24 03:32:37.295309: step: 738/470, loss: 0.11717523634433746 2023-01-24 03:32:38.108338: step: 740/470, loss: 0.3187268376350403 2023-01-24 03:32:38.888572: step: 742/470, loss: 0.1835777312517166 2023-01-24 03:32:39.738476: step: 744/470, loss: 0.10451960563659668 2023-01-24 03:32:40.454472: step: 746/470, loss: 0.381958931684494 2023-01-24 03:32:41.226094: step: 748/470, loss: 0.14256399869918823 2023-01-24 03:32:41.983681: step: 750/470, loss: 0.1951764076948166 2023-01-24 03:32:42.685937: step: 752/470, loss: 0.08754151314496994 2023-01-24 03:32:43.454697: step: 754/470, loss: 0.11652766168117523 2023-01-24 03:32:44.173729: step: 756/470, loss: 0.029814664274454117 2023-01-24 03:32:44.959164: step: 758/470, loss: 0.14490105211734772 2023-01-24 03:32:45.566625: step: 760/470, loss: 0.08405127376317978 2023-01-24 03:32:46.366554: step: 762/470, loss: 0.10964828729629517 2023-01-24 03:32:47.098200: step: 764/470, loss: 0.08635842055082321 2023-01-24 03:32:47.882748: step: 766/470, loss: 0.18066643178462982 2023-01-24 03:32:48.669049: step: 768/470, loss: 0.3340843915939331 2023-01-24 03:32:49.416665: step: 770/470, loss: 0.20563696324825287 2023-01-24 03:32:50.181593: step: 772/470, loss: 0.4167340397834778 2023-01-24 03:32:50.957187: step: 774/470, loss: 0.07869897782802582 2023-01-24 03:32:51.713213: step: 776/470, loss: 0.12975147366523743 2023-01-24 03:32:52.506574: step: 778/470, loss: 0.06549026817083359 2023-01-24 03:32:53.242374: step: 780/470, loss: 0.6230465769767761 2023-01-24 03:32:54.000596: step: 782/470, loss: 0.09621866047382355 2023-01-24 03:32:54.783268: step: 784/470, loss: 0.38288676738739014 2023-01-24 03:32:55.572262: step: 786/470, loss: 0.13935059309005737 2023-01-24 03:32:56.291237: step: 788/470, loss: 0.13502168655395508 2023-01-24 03:32:57.060553: step: 790/470, loss: 0.10943473875522614 2023-01-24 03:32:57.794615: step: 792/470, loss: 0.11306705325841904 2023-01-24 03:32:58.550242: step: 794/470, loss: 0.15334700047969818 2023-01-24 03:32:59.371249: step: 796/470, loss: 0.19283847510814667 2023-01-24 03:33:00.101926: step: 798/470, loss: 0.2030116617679596 2023-01-24 03:33:00.858043: step: 800/470, loss: 0.20117712020874023 2023-01-24 03:33:01.603972: step: 802/470, loss: 0.021305864676833153 2023-01-24 03:33:02.349627: step: 804/470, loss: 0.5952919721603394 2023-01-24 03:33:03.085693: step: 806/470, loss: 0.11923728138208389 2023-01-24 03:33:03.752630: step: 808/470, loss: 0.12033215165138245 2023-01-24 03:33:04.504444: step: 810/470, loss: 0.1041664257645607 2023-01-24 03:33:05.323511: step: 812/470, loss: 0.07111985236406326 2023-01-24 03:33:06.157567: step: 814/470, loss: 0.25388404726982117 2023-01-24 03:33:06.994968: step: 816/470, loss: 0.3828502297401428 2023-01-24 03:33:07.736140: step: 818/470, loss: 0.4347081780433655 2023-01-24 03:33:08.530360: step: 820/470, loss: 0.041309136897325516 2023-01-24 03:33:09.260034: step: 822/470, loss: 0.08783011138439178 2023-01-24 03:33:10.024015: step: 824/470, loss: 0.49217742681503296 2023-01-24 03:33:10.754215: step: 826/470, loss: 0.24008692800998688 2023-01-24 03:33:11.539981: step: 828/470, loss: 0.11003921926021576 2023-01-24 03:33:12.252736: step: 830/470, loss: 0.15111075341701508 2023-01-24 03:33:12.973360: step: 832/470, loss: 0.14422738552093506 2023-01-24 03:33:13.660665: step: 834/470, loss: 0.31494441628456116 2023-01-24 03:33:14.334988: step: 836/470, loss: 0.09576459974050522 2023-01-24 03:33:15.024696: step: 838/470, loss: 0.07254704087972641 2023-01-24 03:33:15.772144: step: 840/470, loss: 0.1577906757593155 2023-01-24 03:33:16.514424: step: 842/470, loss: 1.3641383647918701 2023-01-24 03:33:17.286111: step: 844/470, loss: 0.16647478938102722 2023-01-24 03:33:18.063353: step: 846/470, loss: 0.06729204952716827 2023-01-24 03:33:18.880988: step: 848/470, loss: 0.6149294972419739 2023-01-24 03:33:19.606487: step: 850/470, loss: 0.058741990476846695 2023-01-24 03:33:20.358681: step: 852/470, loss: 0.13534869253635406 2023-01-24 03:33:21.150689: step: 854/470, loss: 0.11562196910381317 2023-01-24 03:33:21.868924: step: 856/470, loss: 0.14991667866706848 2023-01-24 03:33:22.589580: step: 858/470, loss: 0.12905238568782806 2023-01-24 03:33:23.368881: step: 860/470, loss: 0.0312718003988266 2023-01-24 03:33:24.162467: step: 862/470, loss: 0.1413513869047165 2023-01-24 03:33:24.873474: step: 864/470, loss: 0.2965477705001831 2023-01-24 03:33:25.626701: step: 866/470, loss: 0.1007094457745552 2023-01-24 03:33:26.440648: step: 868/470, loss: 0.1321275383234024 2023-01-24 03:33:27.325227: step: 870/470, loss: 0.1397213190793991 2023-01-24 03:33:28.108973: step: 872/470, loss: 0.5523156523704529 2023-01-24 03:33:28.866547: step: 874/470, loss: 0.03732849657535553 2023-01-24 03:33:29.726703: step: 876/470, loss: 0.17241162061691284 2023-01-24 03:33:30.499471: step: 878/470, loss: 0.5599587559700012 2023-01-24 03:33:31.233555: step: 880/470, loss: 0.1906861662864685 2023-01-24 03:33:32.004369: step: 882/470, loss: 0.07153639942407608 2023-01-24 03:33:32.887828: step: 884/470, loss: 0.17692595720291138 2023-01-24 03:33:33.593505: step: 886/470, loss: 0.08000165224075317 2023-01-24 03:33:34.405142: step: 888/470, loss: 7.325255870819092 2023-01-24 03:33:35.175375: step: 890/470, loss: 0.07783879339694977 2023-01-24 03:33:35.910274: step: 892/470, loss: 0.06207854673266411 2023-01-24 03:33:36.574038: step: 894/470, loss: 0.1321595311164856 2023-01-24 03:33:37.384203: step: 896/470, loss: 0.19343137741088867 2023-01-24 03:33:38.156816: step: 898/470, loss: 0.10653921216726303 2023-01-24 03:33:38.868761: step: 900/470, loss: 0.12337514758110046 2023-01-24 03:33:39.581787: step: 902/470, loss: 0.13530763983726501 2023-01-24 03:33:40.388516: step: 904/470, loss: 0.05055097118020058 2023-01-24 03:33:41.060382: step: 906/470, loss: 0.05119376629590988 2023-01-24 03:33:41.764018: step: 908/470, loss: 0.05396522581577301 2023-01-24 03:33:42.490863: step: 910/470, loss: 0.16279923915863037 2023-01-24 03:33:43.254456: step: 912/470, loss: 0.10443281382322311 2023-01-24 03:33:44.071527: step: 914/470, loss: 0.08299147337675095 2023-01-24 03:33:44.804882: step: 916/470, loss: 0.11369689553976059 2023-01-24 03:33:45.540960: step: 918/470, loss: 0.9923412799835205 2023-01-24 03:33:46.346266: step: 920/470, loss: 0.12374808639287949 2023-01-24 03:33:47.186668: step: 922/470, loss: 0.29581671953201294 2023-01-24 03:33:47.960539: step: 924/470, loss: 0.20488214492797852 2023-01-24 03:33:48.732430: step: 926/470, loss: 0.2639607787132263 2023-01-24 03:33:49.439490: step: 928/470, loss: 0.11370810866355896 2023-01-24 03:33:50.182144: step: 930/470, loss: 0.3266356289386749 2023-01-24 03:33:50.877719: step: 932/470, loss: 0.0699765533208847 2023-01-24 03:33:51.620119: step: 934/470, loss: 0.05949941277503967 2023-01-24 03:33:52.311428: step: 936/470, loss: 0.09237470477819443 2023-01-24 03:33:53.057228: step: 938/470, loss: 0.20467214286327362 2023-01-24 03:33:53.742437: step: 940/470, loss: 0.23523172736167908 2023-01-24 03:33:54.485482: step: 942/470, loss: 0.17894554138183594 ================================================== Loss: 0.225 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3319842533616419, 'r': 0.29670698924731187, 'f1': 0.31335587842351376}, 'combined': 0.23089380515416802, 'epoch': 14} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36783198065078127, 'r': 0.33776878992451553, 'f1': 0.35215994137493345}, 'combined': 0.23477329424995558, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3330867375027527, 'r': 0.3021166233895935, 'f1': 0.3168466876145588}, 'combined': 0.23346598034756963, 'epoch': 14} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3765380428522701, 'r': 0.34250479667139183, 'f1': 0.35871600054204184}, 'combined': 0.23914400036136116, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32009730538922154, 'r': 0.3043050284629981, 'f1': 0.3120014591439689}, 'combined': 0.22989581200081916, 'epoch': 14} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3650647247553185, 'r': 0.3499852020228702, 'f1': 0.3573659591032936}, 'combined': 0.23824397273552902, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3346774193548387, 'r': 0.29642857142857143, 'f1': 0.3143939393939394}, 'combined': 0.20959595959595959, 'epoch': 14} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5357142857142857, 'r': 0.32608695652173914, 'f1': 0.40540540540540543}, 'combined': 0.2702702702702703, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 14} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3130350753897265, 'r': 0.332042897804283, 'f1': 0.3222589450144699}, 'combined': 0.23745395948434622, 'epoch': 9} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.33810196782859, 'r': 0.3293327525246784, 'f1': 0.33365975219288585}, 'combined': 0.2224398347952572, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26851851851851855, 'r': 0.4142857142857143, 'f1': 0.3258426966292135}, 'combined': 0.21722846441947566, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30950132625512705, 'r': 0.3100886152992544, 'f1': 0.3097946924411508}, 'combined': 0.22826977337769006, 'epoch': 7} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3463740826755645, 'r': 0.325411962398369, 'f1': 0.33556597608390504}, 'combined': 0.22371065072260332, 'epoch': 7} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5625, 'r': 0.4891304347826087, 'f1': 0.5232558139534884}, 'combined': 0.3488372093023256, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32009730538922154, 'r': 0.3043050284629981, 'f1': 0.3120014591439689}, 'combined': 0.22989581200081916, 'epoch': 14} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3650647247553185, 'r': 0.3499852020228702, 'f1': 0.3573659591032936}, 'combined': 0.23824397273552902, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 14} ****************************** Epoch: 15 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:36:33.246003: step: 2/470, loss: 0.11473778635263443 2023-01-24 03:36:33.967144: step: 4/470, loss: 0.14706557989120483 2023-01-24 03:36:34.728347: step: 6/470, loss: 0.1552312821149826 2023-01-24 03:36:35.487005: step: 8/470, loss: 0.23627066612243652 2023-01-24 03:36:36.344395: step: 10/470, loss: 0.0939834862947464 2023-01-24 03:36:37.022475: step: 12/470, loss: 0.12837252020835876 2023-01-24 03:36:37.782593: step: 14/470, loss: 0.15545101463794708 2023-01-24 03:36:38.566322: step: 16/470, loss: 0.23269550502300262 2023-01-24 03:36:39.376521: step: 18/470, loss: 0.2941681742668152 2023-01-24 03:36:40.079597: step: 20/470, loss: 0.05719856545329094 2023-01-24 03:36:40.824148: step: 22/470, loss: 0.1427365392446518 2023-01-24 03:36:41.609344: step: 24/470, loss: 0.12609969079494476 2023-01-24 03:36:42.301833: step: 26/470, loss: 0.03879668563604355 2023-01-24 03:36:43.137271: step: 28/470, loss: 0.09652974456548691 2023-01-24 03:36:43.938625: step: 30/470, loss: 0.08992653340101242 2023-01-24 03:36:44.654746: step: 32/470, loss: 0.18987002968788147 2023-01-24 03:36:45.309976: step: 34/470, loss: 0.021707141771912575 2023-01-24 03:36:46.101833: step: 36/470, loss: 0.1298495978116989 2023-01-24 03:36:46.838013: step: 38/470, loss: 0.19483181834220886 2023-01-24 03:36:47.571624: step: 40/470, loss: 0.0935438945889473 2023-01-24 03:36:48.350070: step: 42/470, loss: 0.038643594831228256 2023-01-24 03:36:49.098505: step: 44/470, loss: 0.08340389281511307 2023-01-24 03:36:49.791543: step: 46/470, loss: 0.02248873934149742 2023-01-24 03:36:50.558001: step: 48/470, loss: 0.08780361711978912 2023-01-24 03:36:51.259997: step: 50/470, loss: 0.9724255204200745 2023-01-24 03:36:52.000091: step: 52/470, loss: 0.19970646500587463 2023-01-24 03:36:52.671124: step: 54/470, loss: 0.1309264898300171 2023-01-24 03:36:53.392709: step: 56/470, loss: 0.050499871373176575 2023-01-24 03:36:54.122403: step: 58/470, loss: 0.07953242212533951 2023-01-24 03:36:54.860372: step: 60/470, loss: 0.04383343458175659 2023-01-24 03:36:55.599665: step: 62/470, loss: 0.08836504071950912 2023-01-24 03:36:56.379889: step: 64/470, loss: 2.471203565597534 2023-01-24 03:36:57.100693: step: 66/470, loss: 0.03604745864868164 2023-01-24 03:36:57.843314: step: 68/470, loss: 0.0975373387336731 2023-01-24 03:36:58.561776: step: 70/470, loss: 0.06281895935535431 2023-01-24 03:36:59.424992: step: 72/470, loss: 0.1086447536945343 2023-01-24 03:37:00.214451: step: 74/470, loss: 0.058805786073207855 2023-01-24 03:37:00.984072: step: 76/470, loss: 0.14832088351249695 2023-01-24 03:37:01.813652: step: 78/470, loss: 0.30360713601112366 2023-01-24 03:37:02.651737: step: 80/470, loss: 2.0807321071624756 2023-01-24 03:37:03.371094: step: 82/470, loss: 0.15625305473804474 2023-01-24 03:37:04.135711: step: 84/470, loss: 0.3062744140625 2023-01-24 03:37:04.997032: step: 86/470, loss: 0.05567606911063194 2023-01-24 03:37:05.709744: step: 88/470, loss: 0.09600371867418289 2023-01-24 03:37:06.411360: step: 90/470, loss: 0.024420877918601036 2023-01-24 03:37:07.232398: step: 92/470, loss: 0.2148466855287552 2023-01-24 03:37:07.953959: step: 94/470, loss: 0.10316638648509979 2023-01-24 03:37:08.649697: step: 96/470, loss: 0.06793716549873352 2023-01-24 03:37:09.377143: step: 98/470, loss: 0.11314757913351059 2023-01-24 03:37:10.178599: step: 100/470, loss: 0.10866573452949524 2023-01-24 03:37:10.938832: step: 102/470, loss: 0.06647631525993347 2023-01-24 03:37:11.663167: step: 104/470, loss: 0.053145021200180054 2023-01-24 03:37:12.497169: step: 106/470, loss: 0.0989309549331665 2023-01-24 03:37:13.223230: step: 108/470, loss: 0.07991360872983932 2023-01-24 03:37:13.990850: step: 110/470, loss: 0.28862452507019043 2023-01-24 03:37:14.791064: step: 112/470, loss: 0.05426064506173134 2023-01-24 03:37:15.523030: step: 114/470, loss: 0.090945765376091 2023-01-24 03:37:16.275502: step: 116/470, loss: 0.05981391668319702 2023-01-24 03:37:16.954058: step: 118/470, loss: 0.04086308926343918 2023-01-24 03:37:17.687068: step: 120/470, loss: 0.279904305934906 2023-01-24 03:37:18.376975: step: 122/470, loss: 0.08591174334287643 2023-01-24 03:37:19.093412: step: 124/470, loss: 0.025589006021618843 2023-01-24 03:37:19.811705: step: 126/470, loss: 0.2623073160648346 2023-01-24 03:37:20.564529: step: 128/470, loss: 0.03692317008972168 2023-01-24 03:37:21.285410: step: 130/470, loss: 0.16277796030044556 2023-01-24 03:37:22.050321: step: 132/470, loss: 0.11714790016412735 2023-01-24 03:37:22.712420: step: 134/470, loss: 0.16155806183815002 2023-01-24 03:37:23.413692: step: 136/470, loss: 0.034791506826877594 2023-01-24 03:37:24.128541: step: 138/470, loss: 0.15954738855361938 2023-01-24 03:37:25.009283: step: 140/470, loss: 0.100949726998806 2023-01-24 03:37:25.890336: step: 142/470, loss: 0.08963809162378311 2023-01-24 03:37:26.687240: step: 144/470, loss: 0.597632884979248 2023-01-24 03:37:27.410005: step: 146/470, loss: 0.07044071704149246 2023-01-24 03:37:28.209832: step: 148/470, loss: 0.09197443723678589 2023-01-24 03:37:29.015742: step: 150/470, loss: 0.09861261397600174 2023-01-24 03:37:29.768938: step: 152/470, loss: 0.054308950901031494 2023-01-24 03:37:30.483572: step: 154/470, loss: 0.224558487534523 2023-01-24 03:37:31.206694: step: 156/470, loss: 1.2589021921157837 2023-01-24 03:37:31.982375: step: 158/470, loss: 0.6944888234138489 2023-01-24 03:37:32.683837: step: 160/470, loss: 0.2508222460746765 2023-01-24 03:37:33.479191: step: 162/470, loss: 0.16872183978557587 2023-01-24 03:37:34.188337: step: 164/470, loss: 0.13078132271766663 2023-01-24 03:37:34.820272: step: 166/470, loss: 0.09100139886140823 2023-01-24 03:37:35.557066: step: 168/470, loss: 0.05044722184538841 2023-01-24 03:37:36.273953: step: 170/470, loss: 0.13399793207645416 2023-01-24 03:37:36.965276: step: 172/470, loss: 0.6419817209243774 2023-01-24 03:37:37.741336: step: 174/470, loss: 0.01634124107658863 2023-01-24 03:37:38.417665: step: 176/470, loss: 0.025893285870552063 2023-01-24 03:37:39.129909: step: 178/470, loss: 0.11607389897108078 2023-01-24 03:37:39.885711: step: 180/470, loss: 0.07055290043354034 2023-01-24 03:37:40.642975: step: 182/470, loss: 0.11892832070589066 2023-01-24 03:37:41.341362: step: 184/470, loss: 0.04048566147685051 2023-01-24 03:37:42.071440: step: 186/470, loss: 0.06207974627614021 2023-01-24 03:37:42.844861: step: 188/470, loss: 0.026901859790086746 2023-01-24 03:37:43.617781: step: 190/470, loss: 0.15540508925914764 2023-01-24 03:37:44.459778: step: 192/470, loss: 0.05698360875248909 2023-01-24 03:37:45.199749: step: 194/470, loss: 0.9431207180023193 2023-01-24 03:37:45.965168: step: 196/470, loss: 0.5588711500167847 2023-01-24 03:37:46.704510: step: 198/470, loss: 0.6502451300621033 2023-01-24 03:37:47.545243: step: 200/470, loss: 0.08674420416355133 2023-01-24 03:37:48.290737: step: 202/470, loss: 0.04215487837791443 2023-01-24 03:37:49.087015: step: 204/470, loss: 0.11216975003480911 2023-01-24 03:37:49.773462: step: 206/470, loss: 0.016669681295752525 2023-01-24 03:37:50.662701: step: 208/470, loss: 0.11073028296232224 2023-01-24 03:37:51.452835: step: 210/470, loss: 0.26849380135536194 2023-01-24 03:37:52.260078: step: 212/470, loss: 0.13260619342327118 2023-01-24 03:37:53.050299: step: 214/470, loss: 0.0956636592745781 2023-01-24 03:37:53.761228: step: 216/470, loss: 0.04913400486111641 2023-01-24 03:37:54.588005: step: 218/470, loss: 0.11870007961988449 2023-01-24 03:37:55.343192: step: 220/470, loss: 0.051276322454214096 2023-01-24 03:37:56.099039: step: 222/470, loss: 0.15064826607704163 2023-01-24 03:37:56.822232: step: 224/470, loss: 0.0929044634103775 2023-01-24 03:37:57.683573: step: 226/470, loss: 0.09530377388000488 2023-01-24 03:37:58.549230: step: 228/470, loss: 0.5640994906425476 2023-01-24 03:37:59.243892: step: 230/470, loss: 0.0663192942738533 2023-01-24 03:38:00.025600: step: 232/470, loss: 0.10693991929292679 2023-01-24 03:38:00.756929: step: 234/470, loss: 0.05415727570652962 2023-01-24 03:38:01.642280: step: 236/470, loss: 0.048255015164613724 2023-01-24 03:38:02.415447: step: 238/470, loss: 0.12781493365764618 2023-01-24 03:38:03.138285: step: 240/470, loss: 0.06164423003792763 2023-01-24 03:38:03.873969: step: 242/470, loss: 1.3429557085037231 2023-01-24 03:38:04.608995: step: 244/470, loss: 0.14472408592700958 2023-01-24 03:38:05.570629: step: 246/470, loss: 0.2433803528547287 2023-01-24 03:38:06.275097: step: 248/470, loss: 0.17759829759597778 2023-01-24 03:38:07.024575: step: 250/470, loss: 0.15550030767917633 2023-01-24 03:38:07.740807: step: 252/470, loss: 0.1389748603105545 2023-01-24 03:38:08.463710: step: 254/470, loss: 0.11234139651060104 2023-01-24 03:38:09.169100: step: 256/470, loss: 0.0669410452246666 2023-01-24 03:38:10.017534: step: 258/470, loss: 0.1081426739692688 2023-01-24 03:38:10.776680: step: 260/470, loss: 0.048209790140390396 2023-01-24 03:38:11.504227: step: 262/470, loss: 0.09756072610616684 2023-01-24 03:38:12.289759: step: 264/470, loss: 0.07871279865503311 2023-01-24 03:38:13.029709: step: 266/470, loss: 0.04785219579935074 2023-01-24 03:38:13.788386: step: 268/470, loss: 0.1019335687160492 2023-01-24 03:38:14.509907: step: 270/470, loss: 0.16639164090156555 2023-01-24 03:38:15.328796: step: 272/470, loss: 0.18241894245147705 2023-01-24 03:38:16.113124: step: 274/470, loss: 0.024366773664951324 2023-01-24 03:38:16.874177: step: 276/470, loss: 0.5418151021003723 2023-01-24 03:38:17.561637: step: 278/470, loss: 0.031031997874379158 2023-01-24 03:38:18.255887: step: 280/470, loss: 0.06488221883773804 2023-01-24 03:38:19.021776: step: 282/470, loss: 0.07829637080430984 2023-01-24 03:38:19.801109: step: 284/470, loss: 0.11502385139465332 2023-01-24 03:38:20.562912: step: 286/470, loss: 0.036991775035858154 2023-01-24 03:38:21.366050: step: 288/470, loss: 0.04660653695464134 2023-01-24 03:38:22.044997: step: 290/470, loss: 0.06741064786911011 2023-01-24 03:38:22.692091: step: 292/470, loss: 0.054885994642972946 2023-01-24 03:38:23.448605: step: 294/470, loss: 0.03092462196946144 2023-01-24 03:38:24.156338: step: 296/470, loss: 0.022188784554600716 2023-01-24 03:38:24.974276: step: 298/470, loss: 0.30903947353363037 2023-01-24 03:38:25.758947: step: 300/470, loss: 0.07914907485246658 2023-01-24 03:38:26.490580: step: 302/470, loss: 0.19007016718387604 2023-01-24 03:38:27.243852: step: 304/470, loss: 0.12614548206329346 2023-01-24 03:38:28.038912: step: 306/470, loss: 0.05936804041266441 2023-01-24 03:38:28.719183: step: 308/470, loss: 0.08794538676738739 2023-01-24 03:38:29.587781: step: 310/470, loss: 0.014910301193594933 2023-01-24 03:38:30.349479: step: 312/470, loss: 0.44476574659347534 2023-01-24 03:38:31.221558: step: 314/470, loss: 0.2262786328792572 2023-01-24 03:38:31.960449: step: 316/470, loss: 0.04540727660059929 2023-01-24 03:38:32.727006: step: 318/470, loss: 0.09350919723510742 2023-01-24 03:38:33.482784: step: 320/470, loss: 0.3775199353694916 2023-01-24 03:38:34.241651: step: 322/470, loss: 0.09253061562776566 2023-01-24 03:38:34.949186: step: 324/470, loss: 0.040534548461437225 2023-01-24 03:38:35.663913: step: 326/470, loss: 0.5241174697875977 2023-01-24 03:38:36.440369: step: 328/470, loss: 0.24729689955711365 2023-01-24 03:38:37.184939: step: 330/470, loss: 0.09145756810903549 2023-01-24 03:38:37.952220: step: 332/470, loss: 0.08529587835073471 2023-01-24 03:38:38.676776: step: 334/470, loss: 0.07263697683811188 2023-01-24 03:38:39.346134: step: 336/470, loss: 0.04024118930101395 2023-01-24 03:38:40.110332: step: 338/470, loss: 0.06803514063358307 2023-01-24 03:38:40.901902: step: 340/470, loss: 0.4728766679763794 2023-01-24 03:38:41.617417: step: 342/470, loss: 0.10786987096071243 2023-01-24 03:38:42.404591: step: 344/470, loss: 0.05304402485489845 2023-01-24 03:38:43.085006: step: 346/470, loss: 0.13090533018112183 2023-01-24 03:38:43.750655: step: 348/470, loss: 0.02692641317844391 2023-01-24 03:38:44.476262: step: 350/470, loss: 0.292754203081131 2023-01-24 03:38:45.212092: step: 352/470, loss: 0.10036718100309372 2023-01-24 03:38:45.873387: step: 354/470, loss: 0.08418302237987518 2023-01-24 03:38:46.674704: step: 356/470, loss: 0.8037522435188293 2023-01-24 03:38:47.496583: step: 358/470, loss: 0.10909231752157211 2023-01-24 03:38:48.230460: step: 360/470, loss: 0.15878289937973022 2023-01-24 03:38:48.961536: step: 362/470, loss: 0.41693437099456787 2023-01-24 03:38:49.642817: step: 364/470, loss: 0.526682436466217 2023-01-24 03:38:50.446757: step: 366/470, loss: 1.07395601272583 2023-01-24 03:38:51.144158: step: 368/470, loss: 0.10555024445056915 2023-01-24 03:38:51.946270: step: 370/470, loss: 0.06377702951431274 2023-01-24 03:38:52.769880: step: 372/470, loss: 0.06496112048625946 2023-01-24 03:38:53.482356: step: 374/470, loss: 0.28721484541893005 2023-01-24 03:38:54.215778: step: 376/470, loss: 0.06607744842767715 2023-01-24 03:38:54.945936: step: 378/470, loss: 0.10380702465772629 2023-01-24 03:38:55.699298: step: 380/470, loss: 0.545688807964325 2023-01-24 03:38:56.541228: step: 382/470, loss: 0.08570117503404617 2023-01-24 03:38:57.352019: step: 384/470, loss: 0.09269419312477112 2023-01-24 03:38:58.014367: step: 386/470, loss: 0.13103975355625153 2023-01-24 03:38:58.756118: step: 388/470, loss: 0.16990436613559723 2023-01-24 03:38:59.490725: step: 390/470, loss: 0.07417949289083481 2023-01-24 03:39:00.338098: step: 392/470, loss: 0.30628538131713867 2023-01-24 03:39:01.087645: step: 394/470, loss: 0.037749405950307846 2023-01-24 03:39:01.906575: step: 396/470, loss: 0.17938406765460968 2023-01-24 03:39:02.574402: step: 398/470, loss: 0.1570398360490799 2023-01-24 03:39:03.275786: step: 400/470, loss: 0.07525985687971115 2023-01-24 03:39:03.933294: step: 402/470, loss: 0.3523021340370178 2023-01-24 03:39:04.621454: step: 404/470, loss: 0.7962073087692261 2023-01-24 03:39:05.283094: step: 406/470, loss: 0.620932936668396 2023-01-24 03:39:06.000579: step: 408/470, loss: 0.1963292360305786 2023-01-24 03:39:06.673246: step: 410/470, loss: 0.29872724413871765 2023-01-24 03:39:07.378962: step: 412/470, loss: 0.20817507803440094 2023-01-24 03:39:08.143424: step: 414/470, loss: 0.12282727658748627 2023-01-24 03:39:08.753163: step: 416/470, loss: 0.10243619978427887 2023-01-24 03:39:09.528491: step: 418/470, loss: 0.17507648468017578 2023-01-24 03:39:10.322078: step: 420/470, loss: 0.17442834377288818 2023-01-24 03:39:11.133759: step: 422/470, loss: 0.25709930062294006 2023-01-24 03:39:11.968345: step: 424/470, loss: 0.08250849694013596 2023-01-24 03:39:12.696285: step: 426/470, loss: 0.09402203559875488 2023-01-24 03:39:13.412206: step: 428/470, loss: 0.11240358650684357 2023-01-24 03:39:14.259054: step: 430/470, loss: 0.1616356074810028 2023-01-24 03:39:15.011466: step: 432/470, loss: 0.048348795622587204 2023-01-24 03:39:15.735137: step: 434/470, loss: 0.06427058577537537 2023-01-24 03:39:16.469961: step: 436/470, loss: 0.10640700906515121 2023-01-24 03:39:17.184847: step: 438/470, loss: 0.23929402232170105 2023-01-24 03:39:18.009509: step: 440/470, loss: 0.12179520726203918 2023-01-24 03:39:18.785325: step: 442/470, loss: 0.10428435355424881 2023-01-24 03:39:19.591190: step: 444/470, loss: 0.06364937126636505 2023-01-24 03:39:20.263804: step: 446/470, loss: 0.18099617958068848 2023-01-24 03:39:20.956083: step: 448/470, loss: 0.19195039570331573 2023-01-24 03:39:21.684887: step: 450/470, loss: 0.11918045580387115 2023-01-24 03:39:22.465740: step: 452/470, loss: 0.24603021144866943 2023-01-24 03:39:23.182487: step: 454/470, loss: 0.09445057809352875 2023-01-24 03:39:23.967250: step: 456/470, loss: 0.0714806392788887 2023-01-24 03:39:24.732099: step: 458/470, loss: 0.06616587191820145 2023-01-24 03:39:25.477275: step: 460/470, loss: 0.23478007316589355 2023-01-24 03:39:26.299332: step: 462/470, loss: 0.10817045718431473 2023-01-24 03:39:26.933012: step: 464/470, loss: 0.12772588431835175 2023-01-24 03:39:27.616947: step: 466/470, loss: 0.11464595049619675 2023-01-24 03:39:28.369553: step: 468/470, loss: 0.4346560835838318 2023-01-24 03:39:29.096673: step: 470/470, loss: 0.055237311869859695 2023-01-24 03:39:29.815569: step: 472/470, loss: 0.13824693858623505 2023-01-24 03:39:30.543807: step: 474/470, loss: 0.1086035966873169 2023-01-24 03:39:31.263510: step: 476/470, loss: 0.26932814717292786 2023-01-24 03:39:32.015995: step: 478/470, loss: 3.1218864917755127 2023-01-24 03:39:32.788129: step: 480/470, loss: 0.06301096081733704 2023-01-24 03:39:33.486296: step: 482/470, loss: 0.21947330236434937 2023-01-24 03:39:34.236451: step: 484/470, loss: 0.0712946206331253 2023-01-24 03:39:35.064560: step: 486/470, loss: 0.35837650299072266 2023-01-24 03:39:35.813683: step: 488/470, loss: 0.05311376228928566 2023-01-24 03:39:36.606914: step: 490/470, loss: 0.07864044606685638 2023-01-24 03:39:37.455861: step: 492/470, loss: 0.15913601219654083 2023-01-24 03:39:38.146788: step: 494/470, loss: 0.49420005083084106 2023-01-24 03:39:38.833355: step: 496/470, loss: 0.07326009124517441 2023-01-24 03:39:39.532069: step: 498/470, loss: 0.09878735989332199 2023-01-24 03:39:40.324474: step: 500/470, loss: 0.18507295846939087 2023-01-24 03:39:41.190338: step: 502/470, loss: 0.23065824806690216 2023-01-24 03:39:41.954383: step: 504/470, loss: 0.056721873581409454 2023-01-24 03:39:42.656052: step: 506/470, loss: 0.04180415719747543 2023-01-24 03:39:43.389691: step: 508/470, loss: 0.4849397838115692 2023-01-24 03:39:44.142953: step: 510/470, loss: 0.12329412996768951 2023-01-24 03:39:44.859035: step: 512/470, loss: 0.4908522963523865 2023-01-24 03:39:45.686205: step: 514/470, loss: 0.42387697100639343 2023-01-24 03:39:46.422948: step: 516/470, loss: 0.07153800129890442 2023-01-24 03:39:47.174931: step: 518/470, loss: 0.0721924901008606 2023-01-24 03:39:47.924716: step: 520/470, loss: 0.6146575212478638 2023-01-24 03:39:48.681428: step: 522/470, loss: 0.10892462730407715 2023-01-24 03:39:49.453622: step: 524/470, loss: 0.06572824716567993 2023-01-24 03:39:50.238483: step: 526/470, loss: 0.15284620225429535 2023-01-24 03:39:51.023006: step: 528/470, loss: 0.18958349525928497 2023-01-24 03:39:51.738478: step: 530/470, loss: 0.03336469456553459 2023-01-24 03:39:52.497892: step: 532/470, loss: 0.1496352255344391 2023-01-24 03:39:53.227817: step: 534/470, loss: 0.13351905345916748 2023-01-24 03:39:53.917715: step: 536/470, loss: 0.08257067203521729 2023-01-24 03:39:54.673489: step: 538/470, loss: 0.7537316679954529 2023-01-24 03:39:55.415762: step: 540/470, loss: 0.3301614224910736 2023-01-24 03:39:56.115128: step: 542/470, loss: 0.08029845356941223 2023-01-24 03:39:56.866339: step: 544/470, loss: 0.08368469774723053 2023-01-24 03:39:57.634692: step: 546/470, loss: 0.054839249700307846 2023-01-24 03:39:58.357611: step: 548/470, loss: 0.0818573608994484 2023-01-24 03:39:59.110208: step: 550/470, loss: 0.09326575696468353 2023-01-24 03:39:59.902502: step: 552/470, loss: 0.08490006625652313 2023-01-24 03:40:00.579166: step: 554/470, loss: 0.2637978792190552 2023-01-24 03:40:01.318907: step: 556/470, loss: 0.02072441391646862 2023-01-24 03:40:02.052424: step: 558/470, loss: 0.07177267968654633 2023-01-24 03:40:02.882995: step: 560/470, loss: 0.17377981543540955 2023-01-24 03:40:03.596608: step: 562/470, loss: 0.10091854631900787 2023-01-24 03:40:04.305816: step: 564/470, loss: 0.07952025532722473 2023-01-24 03:40:04.999354: step: 566/470, loss: 0.021815728396177292 2023-01-24 03:40:05.776347: step: 568/470, loss: 0.1319160908460617 2023-01-24 03:40:06.398226: step: 570/470, loss: 0.03820590302348137 2023-01-24 03:40:07.127535: step: 572/470, loss: 0.05425729230046272 2023-01-24 03:40:07.845521: step: 574/470, loss: 0.07900302857160568 2023-01-24 03:40:08.625567: step: 576/470, loss: 0.10675648599863052 2023-01-24 03:40:09.359334: step: 578/470, loss: 0.13442978262901306 2023-01-24 03:40:10.114146: step: 580/470, loss: 0.16306370496749878 2023-01-24 03:40:10.847058: step: 582/470, loss: 0.22952473163604736 2023-01-24 03:40:11.645299: step: 584/470, loss: 0.2049272507429123 2023-01-24 03:40:12.420668: step: 586/470, loss: 0.01995740458369255 2023-01-24 03:40:13.172333: step: 588/470, loss: 0.10787932574748993 2023-01-24 03:40:13.929229: step: 590/470, loss: 0.06048334389925003 2023-01-24 03:40:14.743822: step: 592/470, loss: 0.23972097039222717 2023-01-24 03:40:15.437560: step: 594/470, loss: 0.3398410975933075 2023-01-24 03:40:16.134840: step: 596/470, loss: 0.23750761151313782 2023-01-24 03:40:16.879599: step: 598/470, loss: 0.07478002458810806 2023-01-24 03:40:17.693159: step: 600/470, loss: 0.046232108026742935 2023-01-24 03:40:18.510653: step: 602/470, loss: 0.18411901593208313 2023-01-24 03:40:19.383272: step: 604/470, loss: 0.249111145734787 2023-01-24 03:40:20.199029: step: 606/470, loss: 0.10080849379301071 2023-01-24 03:40:20.988411: step: 608/470, loss: 0.34081441164016724 2023-01-24 03:40:21.711442: step: 610/470, loss: 0.10482829064130783 2023-01-24 03:40:22.555249: step: 612/470, loss: 0.1037624180316925 2023-01-24 03:40:23.341524: step: 614/470, loss: 0.14347702264785767 2023-01-24 03:40:24.172118: step: 616/470, loss: 0.6947146654129028 2023-01-24 03:40:24.892682: step: 618/470, loss: 0.10346982628107071 2023-01-24 03:40:25.587713: step: 620/470, loss: 0.0636051818728447 2023-01-24 03:40:26.410436: step: 622/470, loss: 0.5542206168174744 2023-01-24 03:40:27.223256: step: 624/470, loss: 0.1027393713593483 2023-01-24 03:40:27.960479: step: 626/470, loss: 0.08677075058221817 2023-01-24 03:40:28.688074: step: 628/470, loss: 0.09158849716186523 2023-01-24 03:40:29.384960: step: 630/470, loss: 0.05454597622156143 2023-01-24 03:40:30.079941: step: 632/470, loss: 0.2582951486110687 2023-01-24 03:40:30.907053: step: 634/470, loss: 0.06388426572084427 2023-01-24 03:40:31.666663: step: 636/470, loss: 0.08858684450387955 2023-01-24 03:40:32.426252: step: 638/470, loss: 0.08609853684902191 2023-01-24 03:40:33.184017: step: 640/470, loss: 0.06940389424562454 2023-01-24 03:40:33.926780: step: 642/470, loss: 0.42918092012405396 2023-01-24 03:40:34.651815: step: 644/470, loss: 0.10172919929027557 2023-01-24 03:40:35.317809: step: 646/470, loss: 0.09180402010679245 2023-01-24 03:40:36.004349: step: 648/470, loss: 0.08429131656885147 2023-01-24 03:40:36.707477: step: 650/470, loss: 0.1133129671216011 2023-01-24 03:40:37.401246: step: 652/470, loss: 0.3817265033721924 2023-01-24 03:40:38.139392: step: 654/470, loss: 0.17819011211395264 2023-01-24 03:40:38.843625: step: 656/470, loss: 0.060797855257987976 2023-01-24 03:40:39.625985: step: 658/470, loss: 0.24461068212985992 2023-01-24 03:40:40.447640: step: 660/470, loss: 5.418313980102539 2023-01-24 03:40:41.159618: step: 662/470, loss: 0.05842447280883789 2023-01-24 03:40:41.843124: step: 664/470, loss: 1.0201159715652466 2023-01-24 03:40:42.646539: step: 666/470, loss: 0.44793573021888733 2023-01-24 03:40:43.360406: step: 668/470, loss: 0.16222833096981049 2023-01-24 03:40:44.058862: step: 670/470, loss: 0.13106600940227509 2023-01-24 03:40:44.812448: step: 672/470, loss: 0.07552053779363632 2023-01-24 03:40:45.524048: step: 674/470, loss: 0.043479401618242264 2023-01-24 03:40:46.315103: step: 676/470, loss: 0.16281522810459137 2023-01-24 03:40:47.129648: step: 678/470, loss: 0.07090484350919724 2023-01-24 03:40:47.881737: step: 680/470, loss: 0.8017717003822327 2023-01-24 03:40:48.646318: step: 682/470, loss: 0.2855534553527832 2023-01-24 03:40:49.409384: step: 684/470, loss: 0.12028669565916061 2023-01-24 03:40:50.172277: step: 686/470, loss: 0.20114941895008087 2023-01-24 03:40:50.927677: step: 688/470, loss: 0.08315178006887436 2023-01-24 03:40:51.633934: step: 690/470, loss: 0.06788298487663269 2023-01-24 03:40:52.435029: step: 692/470, loss: 0.2848212420940399 2023-01-24 03:40:53.190545: step: 694/470, loss: 0.21830137073993683 2023-01-24 03:40:53.906990: step: 696/470, loss: 0.04804154112935066 2023-01-24 03:40:54.578788: step: 698/470, loss: 0.1450975388288498 2023-01-24 03:40:55.235074: step: 700/470, loss: 0.12967953085899353 2023-01-24 03:40:55.996699: step: 702/470, loss: 0.09858199208974838 2023-01-24 03:40:56.726704: step: 704/470, loss: 0.06423236429691315 2023-01-24 03:40:57.465610: step: 706/470, loss: 0.08795824646949768 2023-01-24 03:40:58.196395: step: 708/470, loss: 0.08408377319574356 2023-01-24 03:40:58.932635: step: 710/470, loss: 0.6152773499488831 2023-01-24 03:40:59.626690: step: 712/470, loss: 0.08272521942853928 2023-01-24 03:41:00.444050: step: 714/470, loss: 0.07895615696907043 2023-01-24 03:41:01.219223: step: 716/470, loss: 0.06702841818332672 2023-01-24 03:41:02.003480: step: 718/470, loss: 0.1409570723772049 2023-01-24 03:41:02.658445: step: 720/470, loss: 0.7910665273666382 2023-01-24 03:41:03.369210: step: 722/470, loss: 0.0968073159456253 2023-01-24 03:41:04.218001: step: 724/470, loss: 0.12350492179393768 2023-01-24 03:41:04.921250: step: 726/470, loss: 0.18203213810920715 2023-01-24 03:41:05.661325: step: 728/470, loss: 0.12851355969905853 2023-01-24 03:41:06.443695: step: 730/470, loss: 0.22939978539943695 2023-01-24 03:41:07.165963: step: 732/470, loss: 0.05603186413645744 2023-01-24 03:41:07.929680: step: 734/470, loss: 0.16891199350357056 2023-01-24 03:41:08.652256: step: 736/470, loss: 0.14518122375011444 2023-01-24 03:41:09.413519: step: 738/470, loss: 0.12637437880039215 2023-01-24 03:41:10.153719: step: 740/470, loss: 0.9105393290519714 2023-01-24 03:41:10.953251: step: 742/470, loss: 0.7299985289573669 2023-01-24 03:41:11.729272: step: 744/470, loss: 0.11732316762208939 2023-01-24 03:41:12.474186: step: 746/470, loss: 0.1654384881258011 2023-01-24 03:41:13.257651: step: 748/470, loss: 0.16303178668022156 2023-01-24 03:41:13.976063: step: 750/470, loss: 0.03869093954563141 2023-01-24 03:41:14.763391: step: 752/470, loss: 0.33318397402763367 2023-01-24 03:41:15.495790: step: 754/470, loss: 0.2498822659254074 2023-01-24 03:41:16.234725: step: 756/470, loss: 0.19036591053009033 2023-01-24 03:41:16.962369: step: 758/470, loss: 0.24502049386501312 2023-01-24 03:41:17.674745: step: 760/470, loss: 0.05735207721590996 2023-01-24 03:41:18.357308: step: 762/470, loss: 0.1237659901380539 2023-01-24 03:41:19.077023: step: 764/470, loss: 0.05063727870583534 2023-01-24 03:41:19.869987: step: 766/470, loss: 0.09125498682260513 2023-01-24 03:41:20.552076: step: 768/470, loss: 0.09228543937206268 2023-01-24 03:41:21.373676: step: 770/470, loss: 0.09300341457128525 2023-01-24 03:41:22.114619: step: 772/470, loss: 0.12723006308078766 2023-01-24 03:41:22.831223: step: 774/470, loss: 0.19253137707710266 2023-01-24 03:41:23.575716: step: 776/470, loss: 0.16553926467895508 2023-01-24 03:41:24.355703: step: 778/470, loss: 0.08979497104883194 2023-01-24 03:41:25.104809: step: 780/470, loss: 0.29222023487091064 2023-01-24 03:41:25.837423: step: 782/470, loss: 0.1268426924943924 2023-01-24 03:41:26.585967: step: 784/470, loss: 0.10541335493326187 2023-01-24 03:41:27.363849: step: 786/470, loss: 0.14692994952201843 2023-01-24 03:41:28.077609: step: 788/470, loss: 0.06541899591684341 2023-01-24 03:41:28.811390: step: 790/470, loss: 0.07346995174884796 2023-01-24 03:41:29.588168: step: 792/470, loss: 0.13709881901741028 2023-01-24 03:41:30.289076: step: 794/470, loss: 0.09347955882549286 2023-01-24 03:41:31.055242: step: 796/470, loss: 0.08111049234867096 2023-01-24 03:41:31.879990: step: 798/470, loss: 0.07415740191936493 2023-01-24 03:41:32.618034: step: 800/470, loss: 0.05813341215252876 2023-01-24 03:41:33.361676: step: 802/470, loss: 0.09454671293497086 2023-01-24 03:41:34.013890: step: 804/470, loss: 0.037501685321331024 2023-01-24 03:41:34.782006: step: 806/470, loss: 0.1539684534072876 2023-01-24 03:41:35.624228: step: 808/470, loss: 0.11793018132448196 2023-01-24 03:41:36.365035: step: 810/470, loss: 0.1428481787443161 2023-01-24 03:41:37.099721: step: 812/470, loss: 0.1079527959227562 2023-01-24 03:41:37.819202: step: 814/470, loss: 0.1281379610300064 2023-01-24 03:41:38.598114: step: 816/470, loss: 0.1891109198331833 2023-01-24 03:41:39.397496: step: 818/470, loss: 0.34778857231140137 2023-01-24 03:41:40.155624: step: 820/470, loss: 0.07001124322414398 2023-01-24 03:41:40.894327: step: 822/470, loss: 0.04569976404309273 2023-01-24 03:41:41.650159: step: 824/470, loss: 0.04330173507332802 2023-01-24 03:41:42.368651: step: 826/470, loss: 0.09614764899015427 2023-01-24 03:41:43.160528: step: 828/470, loss: 0.35434603691101074 2023-01-24 03:41:43.893264: step: 830/470, loss: 0.35880136489868164 2023-01-24 03:41:44.778033: step: 832/470, loss: 0.15067602694034576 2023-01-24 03:41:45.518058: step: 834/470, loss: 0.0773734450340271 2023-01-24 03:41:46.277285: step: 836/470, loss: 0.14914895594120026 2023-01-24 03:41:47.053800: step: 838/470, loss: 0.21950659155845642 2023-01-24 03:41:47.823354: step: 840/470, loss: 0.03406314551830292 2023-01-24 03:41:48.571888: step: 842/470, loss: 0.07147298008203506 2023-01-24 03:41:49.316205: step: 844/470, loss: 0.07960271090269089 2023-01-24 03:41:49.996896: step: 846/470, loss: 0.04376105219125748 2023-01-24 03:41:50.743931: step: 848/470, loss: 0.13630259037017822 2023-01-24 03:41:51.455746: step: 850/470, loss: 0.09853252023458481 2023-01-24 03:41:52.193884: step: 852/470, loss: 0.05566703900694847 2023-01-24 03:41:52.978796: step: 854/470, loss: 0.09490513801574707 2023-01-24 03:41:53.718197: step: 856/470, loss: 0.11718861013650894 2023-01-24 03:41:54.567378: step: 858/470, loss: 0.05593058466911316 2023-01-24 03:41:55.326312: step: 860/470, loss: 0.5031771063804626 2023-01-24 03:41:56.078326: step: 862/470, loss: 0.1421380341053009 2023-01-24 03:41:56.920907: step: 864/470, loss: 0.20217017829418182 2023-01-24 03:41:57.595966: step: 866/470, loss: 0.08146615326404572 2023-01-24 03:41:58.343910: step: 868/470, loss: 0.07637915015220642 2023-01-24 03:41:59.108817: step: 870/470, loss: 0.6401387453079224 2023-01-24 03:41:59.910563: step: 872/470, loss: 0.1042008101940155 2023-01-24 03:42:00.641508: step: 874/470, loss: 0.18115811049938202 2023-01-24 03:42:01.316762: step: 876/470, loss: 0.1381768435239792 2023-01-24 03:42:02.009740: step: 878/470, loss: 0.1072993129491806 2023-01-24 03:42:02.744049: step: 880/470, loss: 0.055918753147125244 2023-01-24 03:42:03.513280: step: 882/470, loss: 0.12597472965717316 2023-01-24 03:42:04.217904: step: 884/470, loss: 2.0582730770111084 2023-01-24 03:42:04.995664: step: 886/470, loss: 0.03130277991294861 2023-01-24 03:42:05.773639: step: 888/470, loss: 0.11425133794546127 2023-01-24 03:42:06.546383: step: 890/470, loss: 0.1338731199502945 2023-01-24 03:42:07.315071: step: 892/470, loss: 0.29117199778556824 2023-01-24 03:42:08.049698: step: 894/470, loss: 0.40173524618148804 2023-01-24 03:42:08.823355: step: 896/470, loss: 0.24469105899333954 2023-01-24 03:42:09.497035: step: 898/470, loss: 0.07817135006189346 2023-01-24 03:42:10.240846: step: 900/470, loss: 0.1031189039349556 2023-01-24 03:42:11.058077: step: 902/470, loss: 0.5347837805747986 2023-01-24 03:42:11.805775: step: 904/470, loss: 0.223730206489563 2023-01-24 03:42:12.598058: step: 906/470, loss: 0.16207782924175262 2023-01-24 03:42:13.303085: step: 908/470, loss: 0.062290169298648834 2023-01-24 03:42:14.047932: step: 910/470, loss: 0.1052016094326973 2023-01-24 03:42:14.787496: step: 912/470, loss: 0.16469532251358032 2023-01-24 03:42:15.553043: step: 914/470, loss: 0.08949004858732224 2023-01-24 03:42:16.332022: step: 916/470, loss: 0.23239319026470184 2023-01-24 03:42:17.139450: step: 918/470, loss: 0.29859524965286255 2023-01-24 03:42:17.914509: step: 920/470, loss: 0.1092582494020462 2023-01-24 03:42:18.728224: step: 922/470, loss: 0.4721441864967346 2023-01-24 03:42:19.436893: step: 924/470, loss: 0.02895715832710266 2023-01-24 03:42:20.200687: step: 926/470, loss: 0.6657435894012451 2023-01-24 03:42:20.859390: step: 928/470, loss: 0.07168980687856674 2023-01-24 03:42:21.503850: step: 930/470, loss: 0.07691118121147156 2023-01-24 03:42:22.304961: step: 932/470, loss: 0.5273089408874512 2023-01-24 03:42:22.984143: step: 934/470, loss: 0.1843612641096115 2023-01-24 03:42:23.718000: step: 936/470, loss: 0.0656123235821724 2023-01-24 03:42:24.586793: step: 938/470, loss: 0.1719179004430771 2023-01-24 03:42:25.371423: step: 940/470, loss: 0.14247168600559235 2023-01-24 03:42:26.057630: step: 942/470, loss: 0.2547386884689331 ================================================== Loss: 0.204 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31639389626818476, 'r': 0.31639389626818476, 'f1': 0.31639389626818476}, 'combined': 0.23313234461866245, 'epoch': 15} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3559700411783955, 'r': 0.3391983757767211, 'f1': 0.3473818914896996}, 'combined': 0.23158792765979969, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32475799663299665, 'r': 0.32167680121902137, 'f1': 0.3232100557529538}, 'combined': 0.23815477792322912, 'epoch': 15} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3566004828494833, 'r': 0.3315698720340869, 'f1': 0.34362996204828133}, 'combined': 0.22908664136552084, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.318911995177818, 'r': 0.33464579380139153, 'f1': 0.32658950617283955}, 'combined': 0.24064489928525018, 'epoch': 15} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35258744062038705, 'r': 0.3471630184569965, 'f1': 0.3498542046465856}, 'combined': 0.23323613643105703, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2755681818181818, 'r': 0.3464285714285714, 'f1': 0.3069620253164557}, 'combined': 0.20464135021097046, 'epoch': 15} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5166666666666667, 'r': 0.33695652173913043, 'f1': 0.40789473684210525}, 'combined': 0.27192982456140347, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3130350753897265, 'r': 0.332042897804283, 'f1': 0.3222589450144699}, 'combined': 0.23745395948434622, 'epoch': 9} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.33810196782859, 'r': 0.3293327525246784, 'f1': 0.33365975219288585}, 'combined': 0.2224398347952572, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26851851851851855, 'r': 0.4142857142857143, 'f1': 0.3258426966292135}, 'combined': 0.21722846441947566, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30950132625512705, 'r': 0.3100886152992544, 'f1': 0.3097946924411508}, 'combined': 0.22826977337769006, 'epoch': 7} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3463740826755645, 'r': 0.325411962398369, 'f1': 0.33556597608390504}, 'combined': 0.22371065072260332, 'epoch': 7} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5625, 'r': 0.4891304347826087, 'f1': 0.5232558139534884}, 'combined': 0.3488372093023256, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32009730538922154, 'r': 0.3043050284629981, 'f1': 0.3120014591439689}, 'combined': 0.22989581200081916, 'epoch': 14} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3650647247553185, 'r': 0.3499852020228702, 'f1': 0.3573659591032936}, 'combined': 0.23824397273552902, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 14} ****************************** Epoch: 16 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:45:00.932907: step: 2/470, loss: 0.08884326368570328 2023-01-24 03:45:01.727705: step: 4/470, loss: 0.061807192862033844 2023-01-24 03:45:02.468131: step: 6/470, loss: 0.08429668098688126 2023-01-24 03:45:03.218704: step: 8/470, loss: 1.1761099100112915 2023-01-24 03:45:03.923530: step: 10/470, loss: 0.28416168689727783 2023-01-24 03:45:04.643222: step: 12/470, loss: 0.08847323060035706 2023-01-24 03:45:05.377381: step: 14/470, loss: 0.06219949573278427 2023-01-24 03:45:06.167005: step: 16/470, loss: 0.04178072139620781 2023-01-24 03:45:06.869597: step: 18/470, loss: 0.40100765228271484 2023-01-24 03:45:07.580618: step: 20/470, loss: 0.1000329926609993 2023-01-24 03:45:08.327738: step: 22/470, loss: 0.5311740040779114 2023-01-24 03:45:09.013562: step: 24/470, loss: 0.13259932398796082 2023-01-24 03:45:09.783079: step: 26/470, loss: 0.09883047640323639 2023-01-24 03:45:10.479543: step: 28/470, loss: 0.034732915461063385 2023-01-24 03:45:11.258857: step: 30/470, loss: 0.07482585310935974 2023-01-24 03:45:11.977882: step: 32/470, loss: 0.22222690284252167 2023-01-24 03:45:12.682648: step: 34/470, loss: 0.08215279877185822 2023-01-24 03:45:13.412987: step: 36/470, loss: 0.4559054970741272 2023-01-24 03:45:14.124939: step: 38/470, loss: 0.10464425384998322 2023-01-24 03:45:14.873841: step: 40/470, loss: 0.15875568985939026 2023-01-24 03:45:15.548704: step: 42/470, loss: 0.048225291073322296 2023-01-24 03:45:16.345472: step: 44/470, loss: 0.020997516810894012 2023-01-24 03:45:17.113907: step: 46/470, loss: 0.10919099301099777 2023-01-24 03:45:17.829117: step: 48/470, loss: 0.04074873775243759 2023-01-24 03:45:18.633545: step: 50/470, loss: 0.03056240826845169 2023-01-24 03:45:19.388010: step: 52/470, loss: 0.12954410910606384 2023-01-24 03:45:20.083131: step: 54/470, loss: 0.22133603692054749 2023-01-24 03:45:20.850747: step: 56/470, loss: 0.20818258821964264 2023-01-24 03:45:21.569056: step: 58/470, loss: 0.029186418280005455 2023-01-24 03:45:22.308889: step: 60/470, loss: 0.07272211462259293 2023-01-24 03:45:23.112898: step: 62/470, loss: 0.09152626991271973 2023-01-24 03:45:23.924450: step: 64/470, loss: 0.015041274018585682 2023-01-24 03:45:24.705202: step: 66/470, loss: 1.1043593883514404 2023-01-24 03:45:25.433416: step: 68/470, loss: 0.34245747327804565 2023-01-24 03:45:26.227665: step: 70/470, loss: 0.15388578176498413 2023-01-24 03:45:26.989387: step: 72/470, loss: 0.11390519142150879 2023-01-24 03:45:27.755031: step: 74/470, loss: 0.05043567717075348 2023-01-24 03:45:28.536032: step: 76/470, loss: 0.11875592917203903 2023-01-24 03:45:29.364267: step: 78/470, loss: 0.0501096136868 2023-01-24 03:45:30.103823: step: 80/470, loss: 0.3267119526863098 2023-01-24 03:45:30.942012: step: 82/470, loss: 0.04021922126412392 2023-01-24 03:45:31.693538: step: 84/470, loss: 0.14459112286567688 2023-01-24 03:45:32.439606: step: 86/470, loss: 0.09926001727581024 2023-01-24 03:45:33.211676: step: 88/470, loss: 0.8933921456336975 2023-01-24 03:45:33.984322: step: 90/470, loss: 0.06707798689603806 2023-01-24 03:45:34.736231: step: 92/470, loss: 0.41027313470840454 2023-01-24 03:45:35.490735: step: 94/470, loss: 0.0322352796792984 2023-01-24 03:45:36.199482: step: 96/470, loss: 0.029441189020872116 2023-01-24 03:45:36.909000: step: 98/470, loss: 0.21683290600776672 2023-01-24 03:45:37.550661: step: 100/470, loss: 0.06455568969249725 2023-01-24 03:45:38.372315: step: 102/470, loss: 0.050209976732730865 2023-01-24 03:45:39.098480: step: 104/470, loss: 0.036682043224573135 2023-01-24 03:45:39.810674: step: 106/470, loss: 0.14355768263339996 2023-01-24 03:45:40.507453: step: 108/470, loss: 0.023732803761959076 2023-01-24 03:45:41.229844: step: 110/470, loss: 0.02847733162343502 2023-01-24 03:45:41.952322: step: 112/470, loss: 0.1768191009759903 2023-01-24 03:45:42.661977: step: 114/470, loss: 0.06269264221191406 2023-01-24 03:45:43.411099: step: 116/470, loss: 0.008748441934585571 2023-01-24 03:45:44.140797: step: 118/470, loss: 0.10878868401050568 2023-01-24 03:45:44.911986: step: 120/470, loss: 0.09285591542720795 2023-01-24 03:45:45.690632: step: 122/470, loss: 0.6085570454597473 2023-01-24 03:45:46.428000: step: 124/470, loss: 0.06355506926774979 2023-01-24 03:45:47.159608: step: 126/470, loss: 0.11871222406625748 2023-01-24 03:45:47.938283: step: 128/470, loss: 0.08303084224462509 2023-01-24 03:45:48.746714: step: 130/470, loss: 0.11246610432863235 2023-01-24 03:45:49.556549: step: 132/470, loss: 0.25137367844581604 2023-01-24 03:45:50.355575: step: 134/470, loss: 0.24548359215259552 2023-01-24 03:45:51.062838: step: 136/470, loss: 0.12862925231456757 2023-01-24 03:45:51.703579: step: 138/470, loss: 0.059389956295490265 2023-01-24 03:45:52.387937: step: 140/470, loss: 0.05305398255586624 2023-01-24 03:45:53.126080: step: 142/470, loss: 0.12227759510278702 2023-01-24 03:45:53.916810: step: 144/470, loss: 0.12327904254198074 2023-01-24 03:45:54.624041: step: 146/470, loss: 0.05995366722345352 2023-01-24 03:45:55.345386: step: 148/470, loss: 0.13276717066764832 2023-01-24 03:45:56.057176: step: 150/470, loss: 0.050435952842235565 2023-01-24 03:45:56.754232: step: 152/470, loss: 0.32247716188430786 2023-01-24 03:45:57.477887: step: 154/470, loss: 0.06091221049427986 2023-01-24 03:45:58.176556: step: 156/470, loss: 0.2275611311197281 2023-01-24 03:45:58.847410: step: 158/470, loss: 0.28586113452911377 2023-01-24 03:45:59.514447: step: 160/470, loss: 0.0427815206348896 2023-01-24 03:46:00.318268: step: 162/470, loss: 0.4265275001525879 2023-01-24 03:46:00.984020: step: 164/470, loss: 0.06603558361530304 2023-01-24 03:46:01.800823: step: 166/470, loss: 0.08618386089801788 2023-01-24 03:46:02.509490: step: 168/470, loss: 0.13042916357517242 2023-01-24 03:46:03.239878: step: 170/470, loss: 0.10537727922201157 2023-01-24 03:46:03.916604: step: 172/470, loss: 0.025995755568146706 2023-01-24 03:46:04.747480: step: 174/470, loss: 0.07933039963245392 2023-01-24 03:46:05.394061: step: 176/470, loss: 0.026341214776039124 2023-01-24 03:46:06.100235: step: 178/470, loss: 0.07335160672664642 2023-01-24 03:46:06.798880: step: 180/470, loss: 0.012121165171265602 2023-01-24 03:46:07.479672: step: 182/470, loss: 0.11823894083499908 2023-01-24 03:46:08.188183: step: 184/470, loss: 0.1767769157886505 2023-01-24 03:46:08.920404: step: 186/470, loss: 2.089386224746704 2023-01-24 03:46:09.711046: step: 188/470, loss: 0.08466898649930954 2023-01-24 03:46:10.528088: step: 190/470, loss: 0.48831504583358765 2023-01-24 03:46:11.281991: step: 192/470, loss: 0.328980952501297 2023-01-24 03:46:11.953790: step: 194/470, loss: 0.18098825216293335 2023-01-24 03:46:12.671434: step: 196/470, loss: 0.10612848401069641 2023-01-24 03:46:13.383302: step: 198/470, loss: 0.09899935126304626 2023-01-24 03:46:14.073204: step: 200/470, loss: 0.06739851832389832 2023-01-24 03:46:15.108289: step: 202/470, loss: 0.6201255321502686 2023-01-24 03:46:15.815881: step: 204/470, loss: 0.031738992780447006 2023-01-24 03:46:16.571108: step: 206/470, loss: 0.03578285127878189 2023-01-24 03:46:17.227122: step: 208/470, loss: 0.024340057745575905 2023-01-24 03:46:17.985173: step: 210/470, loss: 0.7221086025238037 2023-01-24 03:46:18.759964: step: 212/470, loss: 0.06913825124502182 2023-01-24 03:46:19.642049: step: 214/470, loss: 0.11934089660644531 2023-01-24 03:46:20.395564: step: 216/470, loss: 0.31613805890083313 2023-01-24 03:46:21.069947: step: 218/470, loss: 0.08025716245174408 2023-01-24 03:46:21.830659: step: 220/470, loss: 0.07177558541297913 2023-01-24 03:46:22.554077: step: 222/470, loss: 0.38496649265289307 2023-01-24 03:46:23.266499: step: 224/470, loss: 0.20351849496364594 2023-01-24 03:46:23.959166: step: 226/470, loss: 0.1382501870393753 2023-01-24 03:46:24.736892: step: 228/470, loss: 0.08902169018983841 2023-01-24 03:46:25.578304: step: 230/470, loss: 0.1085016131401062 2023-01-24 03:46:26.272268: step: 232/470, loss: 0.084829181432724 2023-01-24 03:46:26.992709: step: 234/470, loss: 0.2627657353878021 2023-01-24 03:46:27.732193: step: 236/470, loss: 0.08875922858715057 2023-01-24 03:46:28.449542: step: 238/470, loss: 0.11912406980991364 2023-01-24 03:46:29.150544: step: 240/470, loss: 0.061584122478961945 2023-01-24 03:46:29.819704: step: 242/470, loss: 0.026997093111276627 2023-01-24 03:46:30.549189: step: 244/470, loss: 0.09223021566867828 2023-01-24 03:46:31.320148: step: 246/470, loss: 0.4238168001174927 2023-01-24 03:46:32.048699: step: 248/470, loss: 0.034468550235033035 2023-01-24 03:46:32.745217: step: 250/470, loss: 0.06773322820663452 2023-01-24 03:46:33.548076: step: 252/470, loss: 0.3193717300891876 2023-01-24 03:46:34.357970: step: 254/470, loss: 0.2410053014755249 2023-01-24 03:46:35.228450: step: 256/470, loss: 0.07392468303442001 2023-01-24 03:46:35.998998: step: 258/470, loss: 0.05369978025555611 2023-01-24 03:46:36.769987: step: 260/470, loss: 0.11789437383413315 2023-01-24 03:46:37.479206: step: 262/470, loss: 0.06956873089075089 2023-01-24 03:46:38.199440: step: 264/470, loss: 0.0506395660340786 2023-01-24 03:46:38.917409: step: 266/470, loss: 0.05074666440486908 2023-01-24 03:46:39.614978: step: 268/470, loss: 0.37575647234916687 2023-01-24 03:46:40.324112: step: 270/470, loss: 0.05577372387051582 2023-01-24 03:46:41.068473: step: 272/470, loss: 0.3809775114059448 2023-01-24 03:46:41.741573: step: 274/470, loss: 0.1822841912508011 2023-01-24 03:46:42.492212: step: 276/470, loss: 0.12010300159454346 2023-01-24 03:46:43.228026: step: 278/470, loss: 0.2520848214626312 2023-01-24 03:46:44.018896: step: 280/470, loss: 0.057388413697481155 2023-01-24 03:46:44.824106: step: 282/470, loss: 0.0458437018096447 2023-01-24 03:46:45.578728: step: 284/470, loss: 0.025038981810212135 2023-01-24 03:46:46.292560: step: 286/470, loss: 0.025928793475031853 2023-01-24 03:46:47.082943: step: 288/470, loss: 0.12468525022268295 2023-01-24 03:46:47.845309: step: 290/470, loss: 0.1737351417541504 2023-01-24 03:46:48.566653: step: 292/470, loss: 0.07722808420658112 2023-01-24 03:46:49.331483: step: 294/470, loss: 0.12904956936836243 2023-01-24 03:46:50.018148: step: 296/470, loss: 0.011522680521011353 2023-01-24 03:46:50.723426: step: 298/470, loss: 0.06235891208052635 2023-01-24 03:46:51.436709: step: 300/470, loss: 0.09630980342626572 2023-01-24 03:46:52.188543: step: 302/470, loss: 0.07960200309753418 2023-01-24 03:46:52.941736: step: 304/470, loss: 0.11289746314287186 2023-01-24 03:46:53.727547: step: 306/470, loss: 0.03673465922474861 2023-01-24 03:46:54.415815: step: 308/470, loss: 0.047929637134075165 2023-01-24 03:46:55.103128: step: 310/470, loss: 0.18434062600135803 2023-01-24 03:46:55.912206: step: 312/470, loss: 0.10890356451272964 2023-01-24 03:46:56.590243: step: 314/470, loss: 0.09170568734407425 2023-01-24 03:46:57.319834: step: 316/470, loss: 0.07127133011817932 2023-01-24 03:46:58.042205: step: 318/470, loss: 0.09422150999307632 2023-01-24 03:46:58.729335: step: 320/470, loss: 0.0380314365029335 2023-01-24 03:46:59.463830: step: 322/470, loss: 0.047910191118717194 2023-01-24 03:47:00.278278: step: 324/470, loss: 0.18401391804218292 2023-01-24 03:47:01.054289: step: 326/470, loss: 0.12445008754730225 2023-01-24 03:47:01.795783: step: 328/470, loss: 0.21150392293930054 2023-01-24 03:47:02.579757: step: 330/470, loss: 0.053453728556632996 2023-01-24 03:47:03.288602: step: 332/470, loss: 0.13986550271511078 2023-01-24 03:47:03.973085: step: 334/470, loss: 0.09363771975040436 2023-01-24 03:47:04.720105: step: 336/470, loss: 0.0591520257294178 2023-01-24 03:47:05.475281: step: 338/470, loss: 0.05675804242491722 2023-01-24 03:47:06.225445: step: 340/470, loss: 0.5691471695899963 2023-01-24 03:47:06.971868: step: 342/470, loss: 0.19100329279899597 2023-01-24 03:47:07.814790: step: 344/470, loss: 0.34803155064582825 2023-01-24 03:47:08.495579: step: 346/470, loss: 0.404619425535202 2023-01-24 03:47:09.261043: step: 348/470, loss: 0.04862171784043312 2023-01-24 03:47:10.034748: step: 350/470, loss: 0.15870419144630432 2023-01-24 03:47:10.764842: step: 352/470, loss: 0.0585881844162941 2023-01-24 03:47:11.530970: step: 354/470, loss: 0.09602773189544678 2023-01-24 03:47:12.301557: step: 356/470, loss: 0.0472840778529644 2023-01-24 03:47:13.083133: step: 358/470, loss: 0.05619442090392113 2023-01-24 03:47:13.962105: step: 360/470, loss: 0.11173807084560394 2023-01-24 03:47:14.672976: step: 362/470, loss: 0.26064473390579224 2023-01-24 03:47:15.394995: step: 364/470, loss: 0.17030306160449982 2023-01-24 03:47:16.269886: step: 366/470, loss: 0.06355582922697067 2023-01-24 03:47:16.992776: step: 368/470, loss: 0.6005545258522034 2023-01-24 03:47:17.762969: step: 370/470, loss: 0.24275268614292145 2023-01-24 03:47:18.580608: step: 372/470, loss: 0.09774607419967651 2023-01-24 03:47:19.333800: step: 374/470, loss: 0.2669150233268738 2023-01-24 03:47:20.075452: step: 376/470, loss: 0.10955331474542618 2023-01-24 03:47:20.785143: step: 378/470, loss: 0.28141841292381287 2023-01-24 03:47:21.571952: step: 380/470, loss: 0.05212775245308876 2023-01-24 03:47:22.271808: step: 382/470, loss: 0.1570778340101242 2023-01-24 03:47:22.997484: step: 384/470, loss: 0.09866324067115784 2023-01-24 03:47:23.688487: step: 386/470, loss: 0.17579655349254608 2023-01-24 03:47:24.403034: step: 388/470, loss: 0.17191143333911896 2023-01-24 03:47:25.094547: step: 390/470, loss: 0.10095331817865372 2023-01-24 03:47:25.809984: step: 392/470, loss: 0.07796035706996918 2023-01-24 03:47:26.615362: step: 394/470, loss: 0.10861441493034363 2023-01-24 03:47:27.347024: step: 396/470, loss: 0.15292930603027344 2023-01-24 03:47:28.071330: step: 398/470, loss: 0.058260828256607056 2023-01-24 03:47:28.869238: step: 400/470, loss: 0.058411069214344025 2023-01-24 03:47:29.630266: step: 402/470, loss: 0.0554041787981987 2023-01-24 03:47:30.327649: step: 404/470, loss: 1.0163735151290894 2023-01-24 03:47:31.143925: step: 406/470, loss: 0.3198876678943634 2023-01-24 03:47:31.847132: step: 408/470, loss: 0.0814136192202568 2023-01-24 03:47:32.588636: step: 410/470, loss: 0.7785913944244385 2023-01-24 03:47:33.354250: step: 412/470, loss: 0.06416447460651398 2023-01-24 03:47:34.193073: step: 414/470, loss: 0.26347091794013977 2023-01-24 03:47:34.960283: step: 416/470, loss: 0.08544527739286423 2023-01-24 03:47:35.756363: step: 418/470, loss: 0.45779088139533997 2023-01-24 03:47:36.525451: step: 420/470, loss: 0.9352731108665466 2023-01-24 03:47:37.366258: step: 422/470, loss: 0.1798955500125885 2023-01-24 03:47:38.168103: step: 424/470, loss: 0.04317692294716835 2023-01-24 03:47:38.978032: step: 426/470, loss: 0.152594193816185 2023-01-24 03:47:39.767110: step: 428/470, loss: 0.45149144530296326 2023-01-24 03:47:40.536119: step: 430/470, loss: 0.06854032725095749 2023-01-24 03:47:41.246801: step: 432/470, loss: 0.15352515876293182 2023-01-24 03:47:41.941079: step: 434/470, loss: 0.24940556287765503 2023-01-24 03:47:42.663303: step: 436/470, loss: 0.04720636084675789 2023-01-24 03:47:43.334880: step: 438/470, loss: 0.09284143149852753 2023-01-24 03:47:44.107358: step: 440/470, loss: 0.05546830594539642 2023-01-24 03:47:44.806762: step: 442/470, loss: 0.04827970266342163 2023-01-24 03:47:45.569709: step: 444/470, loss: 0.0933823436498642 2023-01-24 03:47:46.240967: step: 446/470, loss: 0.2462373822927475 2023-01-24 03:47:46.952310: step: 448/470, loss: 0.14782100915908813 2023-01-24 03:47:47.687777: step: 450/470, loss: 0.04853854700922966 2023-01-24 03:47:48.415775: step: 452/470, loss: 0.4875885844230652 2023-01-24 03:47:49.243952: step: 454/470, loss: 0.29292890429496765 2023-01-24 03:47:49.999841: step: 456/470, loss: 0.07768307626247406 2023-01-24 03:47:50.748669: step: 458/470, loss: 0.29037296772003174 2023-01-24 03:47:51.486110: step: 460/470, loss: 0.05874329432845116 2023-01-24 03:47:52.200225: step: 462/470, loss: 0.1875893473625183 2023-01-24 03:47:52.993187: step: 464/470, loss: 0.04672089219093323 2023-01-24 03:47:53.728244: step: 466/470, loss: 0.3265398144721985 2023-01-24 03:47:54.422335: step: 468/470, loss: 0.11972032487392426 2023-01-24 03:47:55.072081: step: 470/470, loss: 0.11014973372220993 2023-01-24 03:47:55.862509: step: 472/470, loss: 0.06904499232769012 2023-01-24 03:47:56.674880: step: 474/470, loss: 0.17454054951667786 2023-01-24 03:47:57.452655: step: 476/470, loss: 0.07249860465526581 2023-01-24 03:47:58.161846: step: 478/470, loss: 0.08408724516630173 2023-01-24 03:47:58.944761: step: 480/470, loss: 0.15881037712097168 2023-01-24 03:47:59.701785: step: 482/470, loss: 0.08231707662343979 2023-01-24 03:48:00.440184: step: 484/470, loss: 0.09459856152534485 2023-01-24 03:48:01.180758: step: 486/470, loss: 0.8342221975326538 2023-01-24 03:48:01.919114: step: 488/470, loss: 0.08584868907928467 2023-01-24 03:48:02.578474: step: 490/470, loss: 0.131908118724823 2023-01-24 03:48:03.301110: step: 492/470, loss: 0.06489690393209457 2023-01-24 03:48:04.089058: step: 494/470, loss: 0.1734696328639984 2023-01-24 03:48:04.819072: step: 496/470, loss: 0.17412090301513672 2023-01-24 03:48:05.637704: step: 498/470, loss: 0.01628013141453266 2023-01-24 03:48:06.317460: step: 500/470, loss: 0.02044498547911644 2023-01-24 03:48:06.998702: step: 502/470, loss: 0.1580786108970642 2023-01-24 03:48:07.688980: step: 504/470, loss: 0.0427110381424427 2023-01-24 03:48:08.438833: step: 506/470, loss: 0.06857065856456757 2023-01-24 03:48:09.117461: step: 508/470, loss: 0.9077308177947998 2023-01-24 03:48:09.777474: step: 510/470, loss: 0.12271424382925034 2023-01-24 03:48:10.507086: step: 512/470, loss: 0.08618942648172379 2023-01-24 03:48:11.232498: step: 514/470, loss: 0.09517448395490646 2023-01-24 03:48:11.954664: step: 516/470, loss: 0.0746607780456543 2023-01-24 03:48:12.630397: step: 518/470, loss: 0.06032832711935043 2023-01-24 03:48:13.407403: step: 520/470, loss: 0.07779128104448318 2023-01-24 03:48:14.235859: step: 522/470, loss: 0.2717607021331787 2023-01-24 03:48:14.916029: step: 524/470, loss: 0.1430988907814026 2023-01-24 03:48:15.649572: step: 526/470, loss: 0.0658825933933258 2023-01-24 03:48:16.406813: step: 528/470, loss: 0.1088942140340805 2023-01-24 03:48:17.189005: step: 530/470, loss: 0.12835966050624847 2023-01-24 03:48:17.934521: step: 532/470, loss: 0.055738162249326706 2023-01-24 03:48:18.735388: step: 534/470, loss: 0.0650675892829895 2023-01-24 03:48:19.606147: step: 536/470, loss: 0.03095190040767193 2023-01-24 03:48:20.442433: step: 538/470, loss: 0.21852679550647736 2023-01-24 03:48:21.224453: step: 540/470, loss: 0.050854191184043884 2023-01-24 03:48:21.961402: step: 542/470, loss: 0.07948761433362961 2023-01-24 03:48:22.692049: step: 544/470, loss: 0.7797904014587402 2023-01-24 03:48:23.391432: step: 546/470, loss: 0.07549011707305908 2023-01-24 03:48:24.156172: step: 548/470, loss: 0.07842864841222763 2023-01-24 03:48:24.909867: step: 550/470, loss: 0.0789763554930687 2023-01-24 03:48:25.576868: step: 552/470, loss: 0.07106094062328339 2023-01-24 03:48:26.274251: step: 554/470, loss: 0.397691935300827 2023-01-24 03:48:27.127393: step: 556/470, loss: 0.378380686044693 2023-01-24 03:48:27.795808: step: 558/470, loss: 0.07716850936412811 2023-01-24 03:48:28.465206: step: 560/470, loss: 0.08261480927467346 2023-01-24 03:48:29.216815: step: 562/470, loss: 0.1947333961725235 2023-01-24 03:48:30.016502: step: 564/470, loss: 0.07653369754552841 2023-01-24 03:48:30.769625: step: 566/470, loss: 0.13547946512699127 2023-01-24 03:48:31.513955: step: 568/470, loss: 0.03947483003139496 2023-01-24 03:48:32.333999: step: 570/470, loss: 0.543188750743866 2023-01-24 03:48:33.076541: step: 572/470, loss: 0.1665872037410736 2023-01-24 03:48:33.782177: step: 574/470, loss: 5.638265609741211 2023-01-24 03:48:34.476046: step: 576/470, loss: 0.11097951233386993 2023-01-24 03:48:35.213698: step: 578/470, loss: 0.2536165714263916 2023-01-24 03:48:35.932942: step: 580/470, loss: 0.029628895223140717 2023-01-24 03:48:36.654490: step: 582/470, loss: 0.19913624227046967 2023-01-24 03:48:37.436650: step: 584/470, loss: 0.2235163003206253 2023-01-24 03:48:38.239335: step: 586/470, loss: 0.10956007242202759 2023-01-24 03:48:38.977114: step: 588/470, loss: 0.039578016847372055 2023-01-24 03:48:39.705174: step: 590/470, loss: 0.12435401976108551 2023-01-24 03:48:40.442239: step: 592/470, loss: 0.21014420688152313 2023-01-24 03:48:41.087625: step: 594/470, loss: 0.012091286480426788 2023-01-24 03:48:41.881773: step: 596/470, loss: 0.08850495517253876 2023-01-24 03:48:42.657725: step: 598/470, loss: 0.05613786727190018 2023-01-24 03:48:43.440732: step: 600/470, loss: 0.3040921986103058 2023-01-24 03:48:44.181141: step: 602/470, loss: 0.24259580671787262 2023-01-24 03:48:45.016982: step: 604/470, loss: 0.006539918482303619 2023-01-24 03:48:45.730780: step: 606/470, loss: 0.029432980343699455 2023-01-24 03:48:46.495125: step: 608/470, loss: 0.3112735450267792 2023-01-24 03:48:47.282852: step: 610/470, loss: 0.18472446501255035 2023-01-24 03:48:48.076876: step: 612/470, loss: 0.03998330980539322 2023-01-24 03:48:48.800019: step: 614/470, loss: 0.028782224282622337 2023-01-24 03:48:49.544416: step: 616/470, loss: 0.10626031458377838 2023-01-24 03:48:50.349835: step: 618/470, loss: 0.03874929994344711 2023-01-24 03:48:51.155388: step: 620/470, loss: 0.05607333034276962 2023-01-24 03:48:51.864375: step: 622/470, loss: 0.03576376661658287 2023-01-24 03:48:52.610944: step: 624/470, loss: 0.10499916970729828 2023-01-24 03:48:53.378350: step: 626/470, loss: 0.09289852529764175 2023-01-24 03:48:54.118258: step: 628/470, loss: 0.1215863972902298 2023-01-24 03:48:54.840244: step: 630/470, loss: 0.02862360328435898 2023-01-24 03:48:55.641918: step: 632/470, loss: 0.05546386539936066 2023-01-24 03:48:56.356248: step: 634/470, loss: 0.0644495040178299 2023-01-24 03:48:57.151194: step: 636/470, loss: 0.3706085681915283 2023-01-24 03:48:57.888830: step: 638/470, loss: 0.057276401668787 2023-01-24 03:48:58.679688: step: 640/470, loss: 0.04506406560540199 2023-01-24 03:48:59.390223: step: 642/470, loss: 0.15678298473358154 2023-01-24 03:49:00.166254: step: 644/470, loss: 0.11337969452142715 2023-01-24 03:49:00.972752: step: 646/470, loss: 0.5968871116638184 2023-01-24 03:49:01.832404: step: 648/470, loss: 0.04819050803780556 2023-01-24 03:49:02.553111: step: 650/470, loss: 0.09025636315345764 2023-01-24 03:49:03.258407: step: 652/470, loss: 1.1916296482086182 2023-01-24 03:49:04.048252: step: 654/470, loss: 0.03702091798186302 2023-01-24 03:49:04.846385: step: 656/470, loss: 0.12011308968067169 2023-01-24 03:49:05.576725: step: 658/470, loss: 0.4539858102798462 2023-01-24 03:49:06.396886: step: 660/470, loss: 0.09682758152484894 2023-01-24 03:49:07.127044: step: 662/470, loss: 0.05873757600784302 2023-01-24 03:49:07.877576: step: 664/470, loss: 0.10081055015325546 2023-01-24 03:49:08.649956: step: 666/470, loss: 0.06736086308956146 2023-01-24 03:49:09.412709: step: 668/470, loss: 0.22437730431556702 2023-01-24 03:49:10.232327: step: 670/470, loss: 0.11580760031938553 2023-01-24 03:49:11.028632: step: 672/470, loss: 0.02462395839393139 2023-01-24 03:49:11.739475: step: 674/470, loss: 0.1718244105577469 2023-01-24 03:49:12.525836: step: 676/470, loss: 0.02999183163046837 2023-01-24 03:49:13.310119: step: 678/470, loss: 0.06906621158123016 2023-01-24 03:49:14.091385: step: 680/470, loss: 0.4289069175720215 2023-01-24 03:49:14.883478: step: 682/470, loss: 0.26691025495529175 2023-01-24 03:49:15.697001: step: 684/470, loss: 0.11220283061265945 2023-01-24 03:49:16.471870: step: 686/470, loss: 0.0480489581823349 2023-01-24 03:49:17.313305: step: 688/470, loss: 0.7784223556518555 2023-01-24 03:49:18.088457: step: 690/470, loss: 0.012960987165570259 2023-01-24 03:49:19.012026: step: 692/470, loss: 0.5317075252532959 2023-01-24 03:49:19.750848: step: 694/470, loss: 0.17809821665287018 2023-01-24 03:49:20.572401: step: 696/470, loss: 0.07087529450654984 2023-01-24 03:49:21.335262: step: 698/470, loss: 0.19641615450382233 2023-01-24 03:49:22.042685: step: 700/470, loss: 0.08749356865882874 2023-01-24 03:49:22.863475: step: 702/470, loss: 0.09865260124206543 2023-01-24 03:49:23.657213: step: 704/470, loss: 0.11657115072011948 2023-01-24 03:49:24.413244: step: 706/470, loss: 0.15794336795806885 2023-01-24 03:49:25.118967: step: 708/470, loss: 0.10114340484142303 2023-01-24 03:49:25.851266: step: 710/470, loss: 0.214201882481575 2023-01-24 03:49:26.679257: step: 712/470, loss: 0.04397993162274361 2023-01-24 03:49:27.383900: step: 714/470, loss: 0.1239086389541626 2023-01-24 03:49:28.107938: step: 716/470, loss: 0.11514618992805481 2023-01-24 03:49:28.858045: step: 718/470, loss: 0.09257897734642029 2023-01-24 03:49:29.588607: step: 720/470, loss: 0.25863686203956604 2023-01-24 03:49:30.368992: step: 722/470, loss: 0.03553589805960655 2023-01-24 03:49:31.183500: step: 724/470, loss: 0.16370436549186707 2023-01-24 03:49:31.936992: step: 726/470, loss: 0.06427986919879913 2023-01-24 03:49:32.743146: step: 728/470, loss: 0.08685880899429321 2023-01-24 03:49:33.554756: step: 730/470, loss: 0.08242062479257584 2023-01-24 03:49:34.413317: step: 732/470, loss: 0.03324902057647705 2023-01-24 03:49:35.192724: step: 734/470, loss: 0.044015150517225266 2023-01-24 03:49:35.974233: step: 736/470, loss: 0.06782012432813644 2023-01-24 03:49:36.680040: step: 738/470, loss: 0.06624935567378998 2023-01-24 03:49:37.416353: step: 740/470, loss: 0.09224491566419601 2023-01-24 03:49:38.185694: step: 742/470, loss: 0.10784140229225159 2023-01-24 03:49:38.881825: step: 744/470, loss: 0.0007243788568302989 2023-01-24 03:49:39.587730: step: 746/470, loss: 0.0974595695734024 2023-01-24 03:49:40.269336: step: 748/470, loss: 0.2744196653366089 2023-01-24 03:49:40.982549: step: 750/470, loss: 0.013780027627944946 2023-01-24 03:49:41.673156: step: 752/470, loss: 0.11913798004388809 2023-01-24 03:49:42.491145: step: 754/470, loss: 0.21007420122623444 2023-01-24 03:49:43.279408: step: 756/470, loss: 0.15533891320228577 2023-01-24 03:49:44.080436: step: 758/470, loss: 3.9849534034729004 2023-01-24 03:49:44.737602: step: 760/470, loss: 3.1590588092803955 2023-01-24 03:49:45.505451: step: 762/470, loss: 0.1593295782804489 2023-01-24 03:49:46.173705: step: 764/470, loss: 0.06664041429758072 2023-01-24 03:49:46.944486: step: 766/470, loss: 0.26761144399642944 2023-01-24 03:49:47.739488: step: 768/470, loss: 0.09391117095947266 2023-01-24 03:49:48.492586: step: 770/470, loss: 0.043674781918525696 2023-01-24 03:49:49.380217: step: 772/470, loss: 0.14461608231067657 2023-01-24 03:49:50.101151: step: 774/470, loss: 0.23033620417118073 2023-01-24 03:49:50.874317: step: 776/470, loss: 0.05211387947201729 2023-01-24 03:49:51.613721: step: 778/470, loss: 0.09210966527462006 2023-01-24 03:49:52.360546: step: 780/470, loss: 0.0924573689699173 2023-01-24 03:49:53.126878: step: 782/470, loss: 0.10759110748767853 2023-01-24 03:49:53.854952: step: 784/470, loss: 0.1636630892753601 2023-01-24 03:49:54.575547: step: 786/470, loss: 0.08606382459402084 2023-01-24 03:49:55.351905: step: 788/470, loss: 0.10852282494306564 2023-01-24 03:49:56.094045: step: 790/470, loss: 0.7059446573257446 2023-01-24 03:49:56.825746: step: 792/470, loss: 0.03020882047712803 2023-01-24 03:49:57.545088: step: 794/470, loss: 0.05972389131784439 2023-01-24 03:49:58.285710: step: 796/470, loss: 0.0445023775100708 2023-01-24 03:49:59.033143: step: 798/470, loss: 0.07505831122398376 2023-01-24 03:49:59.840536: step: 800/470, loss: 0.07195542752742767 2023-01-24 03:50:00.497440: step: 802/470, loss: 0.23525984585285187 2023-01-24 03:50:01.306884: step: 804/470, loss: 0.0642390102148056 2023-01-24 03:50:02.055481: step: 806/470, loss: 0.048134271055459976 2023-01-24 03:50:02.765664: step: 808/470, loss: 0.02630416862666607 2023-01-24 03:50:03.491683: step: 810/470, loss: 0.03595459833741188 2023-01-24 03:50:04.226019: step: 812/470, loss: 0.5879958868026733 2023-01-24 03:50:04.959781: step: 814/470, loss: 0.025413349270820618 2023-01-24 03:50:05.660538: step: 816/470, loss: 0.1703702062368393 2023-01-24 03:50:06.399431: step: 818/470, loss: 0.14653141796588898 2023-01-24 03:50:07.079242: step: 820/470, loss: 0.026079224422574043 2023-01-24 03:50:07.874467: step: 822/470, loss: 0.06979137659072876 2023-01-24 03:50:08.774313: step: 824/470, loss: 0.15548716485500336 2023-01-24 03:50:09.575107: step: 826/470, loss: 0.10300223529338837 2023-01-24 03:50:10.270018: step: 828/470, loss: 0.10399255156517029 2023-01-24 03:50:11.067727: step: 830/470, loss: 0.06566653400659561 2023-01-24 03:50:11.791491: step: 832/470, loss: 0.15286491811275482 2023-01-24 03:50:12.558650: step: 834/470, loss: 0.0035868631675839424 2023-01-24 03:50:13.391392: step: 836/470, loss: 0.10651691257953644 2023-01-24 03:50:14.113982: step: 838/470, loss: 0.08586788922548294 2023-01-24 03:50:14.870138: step: 840/470, loss: 0.09690944850444794 2023-01-24 03:50:15.660474: step: 842/470, loss: 0.08737190812826157 2023-01-24 03:50:16.454344: step: 844/470, loss: 0.5851885676383972 2023-01-24 03:50:17.113655: step: 846/470, loss: 0.05255848914384842 2023-01-24 03:50:17.897763: step: 848/470, loss: 0.055961720645427704 2023-01-24 03:50:18.643003: step: 850/470, loss: 0.1446704864501953 2023-01-24 03:50:19.397737: step: 852/470, loss: 0.19530963897705078 2023-01-24 03:50:20.094063: step: 854/470, loss: 0.10102560371160507 2023-01-24 03:50:20.809416: step: 856/470, loss: 0.047898419201374054 2023-01-24 03:50:21.495133: step: 858/470, loss: 0.2797347903251648 2023-01-24 03:50:22.220064: step: 860/470, loss: 0.021785497665405273 2023-01-24 03:50:22.910858: step: 862/470, loss: 0.08698924630880356 2023-01-24 03:50:23.627682: step: 864/470, loss: 0.06238389387726784 2023-01-24 03:50:24.359151: step: 866/470, loss: 0.03106139786541462 2023-01-24 03:50:25.196408: step: 868/470, loss: 0.40585944056510925 2023-01-24 03:50:25.998715: step: 870/470, loss: 0.40325307846069336 2023-01-24 03:50:26.727684: step: 872/470, loss: 0.04273195564746857 2023-01-24 03:50:27.495044: step: 874/470, loss: 0.04423359036445618 2023-01-24 03:50:28.278095: step: 876/470, loss: 0.052610598504543304 2023-01-24 03:50:29.057782: step: 878/470, loss: 0.06774067133665085 2023-01-24 03:50:29.861700: step: 880/470, loss: 0.14847953617572784 2023-01-24 03:50:30.552310: step: 882/470, loss: 0.24171356856822968 2023-01-24 03:50:31.263549: step: 884/470, loss: 0.03664424270391464 2023-01-24 03:50:32.034998: step: 886/470, loss: 0.32642123103141785 2023-01-24 03:50:32.783270: step: 888/470, loss: 0.09348136186599731 2023-01-24 03:50:33.521047: step: 890/470, loss: 0.07769238203763962 2023-01-24 03:50:34.278536: step: 892/470, loss: 0.02832121029496193 2023-01-24 03:50:35.095953: step: 894/470, loss: 0.07402808964252472 2023-01-24 03:50:35.777352: step: 896/470, loss: 0.04894760623574257 2023-01-24 03:50:36.505705: step: 898/470, loss: 0.04703076183795929 2023-01-24 03:50:37.229844: step: 900/470, loss: 0.037153225392103195 2023-01-24 03:50:37.961596: step: 902/470, loss: 0.21902887523174286 2023-01-24 03:50:38.669873: step: 904/470, loss: 0.0130607383325696 2023-01-24 03:50:39.377022: step: 906/470, loss: 0.08418157696723938 2023-01-24 03:50:40.210839: step: 908/470, loss: 0.036279212683439255 2023-01-24 03:50:40.968354: step: 910/470, loss: 0.05508041754364967 2023-01-24 03:50:41.712116: step: 912/470, loss: 0.07078361511230469 2023-01-24 03:50:42.380841: step: 914/470, loss: 0.050121258944272995 2023-01-24 03:50:43.083548: step: 916/470, loss: 0.05768989026546478 2023-01-24 03:50:43.870285: step: 918/470, loss: 0.2717893123626709 2023-01-24 03:50:44.578026: step: 920/470, loss: 0.1305137723684311 2023-01-24 03:50:45.401421: step: 922/470, loss: 0.10596335679292679 2023-01-24 03:50:46.193537: step: 924/470, loss: 0.16074761748313904 2023-01-24 03:50:46.895944: step: 926/470, loss: 0.34018227458000183 2023-01-24 03:50:47.659527: step: 928/470, loss: 0.05126188322901726 2023-01-24 03:50:48.357715: step: 930/470, loss: 0.15903814136981964 2023-01-24 03:50:49.179799: step: 932/470, loss: 0.0827934592962265 2023-01-24 03:50:49.953548: step: 934/470, loss: 0.19107471406459808 2023-01-24 03:50:50.702513: step: 936/470, loss: 0.11879337579011917 2023-01-24 03:50:51.445622: step: 938/470, loss: 0.07795053720474243 2023-01-24 03:50:52.200769: step: 940/470, loss: 0.026459183543920517 2023-01-24 03:50:53.019914: step: 942/470, loss: 0.029973674565553665 ================================================== Loss: 0.184 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3285139647844566, 'r': 0.34222802024035426, 'f1': 0.33523079306071873}, 'combined': 0.247012163307898, 'epoch': 16} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35646940080436673, 'r': 0.33934791181280255, 'f1': 0.34769800806803886}, 'combined': 0.23179867204535917, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32350511210840643, 'r': 0.33639620765731826, 'f1': 0.32982474685657065}, 'combined': 0.24302876084168362, 'epoch': 16} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3659007330893624, 'r': 0.34059347777482435, 'f1': 0.3527938411578031}, 'combined': 0.235195894105202, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30801502714153317, 'r': 0.33957633922055175, 'f1': 0.3230265898361566}, 'combined': 0.23801959251085222, 'epoch': 16} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3547111579078519, 'r': 0.34789634219396426, 'f1': 0.3512707005081637}, 'combined': 0.23418046700544243, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23958333333333334, 'r': 0.32857142857142857, 'f1': 0.27710843373493976}, 'combined': 0.18473895582329317, 'epoch': 16} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5588235294117647, 'r': 0.41304347826086957, 'f1': 0.475}, 'combined': 0.31666666666666665, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4666666666666667, 'r': 0.2413793103448276, 'f1': 0.3181818181818182}, 'combined': 0.2121212121212121, 'epoch': 16} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3130350753897265, 'r': 0.332042897804283, 'f1': 0.3222589450144699}, 'combined': 0.23745395948434622, 'epoch': 9} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.33810196782859, 'r': 0.3293327525246784, 'f1': 0.33365975219288585}, 'combined': 0.2224398347952572, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26851851851851855, 'r': 0.4142857142857143, 'f1': 0.3258426966292135}, 'combined': 0.21722846441947566, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30950132625512705, 'r': 0.3100886152992544, 'f1': 0.3097946924411508}, 'combined': 0.22826977337769006, 'epoch': 7} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3463740826755645, 'r': 0.325411962398369, 'f1': 0.33556597608390504}, 'combined': 0.22371065072260332, 'epoch': 7} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5625, 'r': 0.4891304347826087, 'f1': 0.5232558139534884}, 'combined': 0.3488372093023256, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30801502714153317, 'r': 0.33957633922055175, 'f1': 0.3230265898361566}, 'combined': 0.23801959251085222, 'epoch': 16} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3547111579078519, 'r': 0.34789634219396426, 'f1': 0.3512707005081637}, 'combined': 0.23418046700544243, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4666666666666667, 'r': 0.2413793103448276, 'f1': 0.3181818181818182}, 'combined': 0.2121212121212121, 'epoch': 16} ****************************** Epoch: 17 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:53:32.812515: step: 2/470, loss: 0.4924387037754059 2023-01-24 03:53:33.508665: step: 4/470, loss: 0.0939476415514946 2023-01-24 03:53:34.269321: step: 6/470, loss: 0.11822464317083359 2023-01-24 03:53:34.913793: step: 8/470, loss: 3.426934242248535 2023-01-24 03:53:35.629016: step: 10/470, loss: 0.07467096298933029 2023-01-24 03:53:36.336879: step: 12/470, loss: 0.107358917593956 2023-01-24 03:53:37.069698: step: 14/470, loss: 0.07628848403692245 2023-01-24 03:53:37.816625: step: 16/470, loss: 0.06268688291311264 2023-01-24 03:53:38.573134: step: 18/470, loss: 0.10565035790205002 2023-01-24 03:53:39.365293: step: 20/470, loss: 0.4389980733394623 2023-01-24 03:53:40.065536: step: 22/470, loss: 0.050272297114133835 2023-01-24 03:53:40.786897: step: 24/470, loss: 0.02019389532506466 2023-01-24 03:53:41.488215: step: 26/470, loss: 0.1535024642944336 2023-01-24 03:53:42.132091: step: 28/470, loss: 0.09952471405267715 2023-01-24 03:53:42.875395: step: 30/470, loss: 0.0704805925488472 2023-01-24 03:53:43.542689: step: 32/470, loss: 0.28921636939048767 2023-01-24 03:53:44.306020: step: 34/470, loss: 0.090544693171978 2023-01-24 03:53:45.093784: step: 36/470, loss: 0.16174203157424927 2023-01-24 03:53:45.809735: step: 38/470, loss: 0.01790589466691017 2023-01-24 03:53:46.692363: step: 40/470, loss: 0.06238782778382301 2023-01-24 03:53:47.487737: step: 42/470, loss: 0.017772074788808823 2023-01-24 03:53:48.229867: step: 44/470, loss: 0.17414626479148865 2023-01-24 03:53:48.996445: step: 46/470, loss: 0.08931662142276764 2023-01-24 03:53:49.762507: step: 48/470, loss: 0.1393406093120575 2023-01-24 03:53:50.595761: step: 50/470, loss: 0.0651363730430603 2023-01-24 03:53:51.326033: step: 52/470, loss: 0.0816269963979721 2023-01-24 03:53:52.027998: step: 54/470, loss: 0.057508260011672974 2023-01-24 03:53:52.776588: step: 56/470, loss: 0.04575724899768829 2023-01-24 03:53:53.441487: step: 58/470, loss: 0.059317465871572495 2023-01-24 03:53:54.175437: step: 60/470, loss: 0.016667962074279785 2023-01-24 03:53:54.884860: step: 62/470, loss: 0.04501299932599068 2023-01-24 03:53:55.829058: step: 64/470, loss: 0.04957332834601402 2023-01-24 03:53:56.531351: step: 66/470, loss: 0.06242368370294571 2023-01-24 03:53:57.207864: step: 68/470, loss: 0.3788084387779236 2023-01-24 03:53:57.900283: step: 70/470, loss: 0.01375067513436079 2023-01-24 03:53:58.644302: step: 72/470, loss: 0.010087433271110058 2023-01-24 03:53:59.415255: step: 74/470, loss: 0.04814882203936577 2023-01-24 03:54:00.066183: step: 76/470, loss: 0.025498254224658012 2023-01-24 03:54:00.821193: step: 78/470, loss: 0.03883810341358185 2023-01-24 03:54:01.501424: step: 80/470, loss: 0.020485306158661842 2023-01-24 03:54:02.213914: step: 82/470, loss: 0.09962893277406693 2023-01-24 03:54:02.988657: step: 84/470, loss: 0.08178018778562546 2023-01-24 03:54:03.667661: step: 86/470, loss: 0.058422110974788666 2023-01-24 03:54:04.392062: step: 88/470, loss: 0.073599673807621 2023-01-24 03:54:05.171213: step: 90/470, loss: 0.06374432146549225 2023-01-24 03:54:05.891384: step: 92/470, loss: 0.16310732066631317 2023-01-24 03:54:06.644988: step: 94/470, loss: 0.6309743523597717 2023-01-24 03:54:07.533867: step: 96/470, loss: 0.043083060532808304 2023-01-24 03:54:08.267509: step: 98/470, loss: 0.07802923768758774 2023-01-24 03:54:08.984304: step: 100/470, loss: 0.06232817471027374 2023-01-24 03:54:09.688817: step: 102/470, loss: 0.152653768658638 2023-01-24 03:54:10.433170: step: 104/470, loss: 0.054097384214401245 2023-01-24 03:54:11.227104: step: 106/470, loss: 0.04027635604143143 2023-01-24 03:54:11.947183: step: 108/470, loss: 0.03251076117157936 2023-01-24 03:54:12.647953: step: 110/470, loss: 0.0775684118270874 2023-01-24 03:54:13.452199: step: 112/470, loss: 0.04833029955625534 2023-01-24 03:54:14.154207: step: 114/470, loss: 0.07459627836942673 2023-01-24 03:54:14.869446: step: 116/470, loss: 0.01013647299259901 2023-01-24 03:54:15.603231: step: 118/470, loss: 0.07257718592882156 2023-01-24 03:54:16.365308: step: 120/470, loss: 0.22532156109809875 2023-01-24 03:54:17.054686: step: 122/470, loss: 0.5136581659317017 2023-01-24 03:54:17.713584: step: 124/470, loss: 0.02729104831814766 2023-01-24 03:54:18.529704: step: 126/470, loss: 0.07071585953235626 2023-01-24 03:54:19.359725: step: 128/470, loss: 0.06299882382154465 2023-01-24 03:54:20.052186: step: 130/470, loss: 0.07367923855781555 2023-01-24 03:54:20.769116: step: 132/470, loss: 0.10907070338726044 2023-01-24 03:54:21.541326: step: 134/470, loss: 0.10193345695734024 2023-01-24 03:54:22.237444: step: 136/470, loss: 0.4187318980693817 2023-01-24 03:54:22.942103: step: 138/470, loss: 0.07536610960960388 2023-01-24 03:54:23.708844: step: 140/470, loss: 0.14740565419197083 2023-01-24 03:54:24.421563: step: 142/470, loss: 0.13611826300621033 2023-01-24 03:54:25.200249: step: 144/470, loss: 0.5455102324485779 2023-01-24 03:54:25.917715: step: 146/470, loss: 0.11672738194465637 2023-01-24 03:54:26.590755: step: 148/470, loss: 0.07301642745733261 2023-01-24 03:54:27.393836: step: 150/470, loss: 0.07370822876691818 2023-01-24 03:54:28.109941: step: 152/470, loss: 0.24967250227928162 2023-01-24 03:54:28.950621: step: 154/470, loss: 0.012790908105671406 2023-01-24 03:54:29.668970: step: 156/470, loss: 0.1556771844625473 2023-01-24 03:54:30.393718: step: 158/470, loss: 0.06868446618318558 2023-01-24 03:54:31.156943: step: 160/470, loss: 0.06958885490894318 2023-01-24 03:54:31.946589: step: 162/470, loss: 0.03733561187982559 2023-01-24 03:54:32.638634: step: 164/470, loss: 0.1148039773106575 2023-01-24 03:54:33.308401: step: 166/470, loss: 0.5623202323913574 2023-01-24 03:54:34.066031: step: 168/470, loss: 0.6287685632705688 2023-01-24 03:54:34.798412: step: 170/470, loss: 0.11442598700523376 2023-01-24 03:54:35.552664: step: 172/470, loss: 0.06008310243487358 2023-01-24 03:54:36.335480: step: 174/470, loss: 0.06911762058734894 2023-01-24 03:54:37.070004: step: 176/470, loss: 0.005540335550904274 2023-01-24 03:54:37.781348: step: 178/470, loss: 0.07414489984512329 2023-01-24 03:54:38.537376: step: 180/470, loss: 0.07211507856845856 2023-01-24 03:54:39.435032: step: 182/470, loss: 0.050867147743701935 2023-01-24 03:54:40.116805: step: 184/470, loss: 0.013534078374505043 2023-01-24 03:54:40.910216: step: 186/470, loss: 0.05755781754851341 2023-01-24 03:54:41.681212: step: 188/470, loss: 0.7376323938369751 2023-01-24 03:54:42.370565: step: 190/470, loss: 0.023348214104771614 2023-01-24 03:54:43.157204: step: 192/470, loss: 0.1323942095041275 2023-01-24 03:54:43.878732: step: 194/470, loss: 0.5270007848739624 2023-01-24 03:54:44.606009: step: 196/470, loss: 0.01694687083363533 2023-01-24 03:54:45.411013: step: 198/470, loss: 0.04331256076693535 2023-01-24 03:54:46.254144: step: 200/470, loss: 0.1521039754152298 2023-01-24 03:54:46.934154: step: 202/470, loss: 0.05080660060048103 2023-01-24 03:54:47.586716: step: 204/470, loss: 0.23060721158981323 2023-01-24 03:54:48.235794: step: 206/470, loss: 0.08327927440404892 2023-01-24 03:54:48.958410: step: 208/470, loss: 0.1002572625875473 2023-01-24 03:54:49.656074: step: 210/470, loss: 0.051106516271829605 2023-01-24 03:54:50.435705: step: 212/470, loss: 0.05127272009849548 2023-01-24 03:54:51.187874: step: 214/470, loss: 0.0413568951189518 2023-01-24 03:54:52.001632: step: 216/470, loss: 0.04580258950591087 2023-01-24 03:54:52.773210: step: 218/470, loss: 0.10093220323324203 2023-01-24 03:54:53.551298: step: 220/470, loss: 0.12144112586975098 2023-01-24 03:54:54.310838: step: 222/470, loss: 0.06039506569504738 2023-01-24 03:54:55.017465: step: 224/470, loss: 0.028329022228717804 2023-01-24 03:54:55.693354: step: 226/470, loss: 0.16305898129940033 2023-01-24 03:54:56.439494: step: 228/470, loss: 0.10858103632926941 2023-01-24 03:54:57.150840: step: 230/470, loss: 0.05416190251708031 2023-01-24 03:54:57.959173: step: 232/470, loss: 0.03306606039404869 2023-01-24 03:54:58.756981: step: 234/470, loss: 0.08176732063293457 2023-01-24 03:54:59.487357: step: 236/470, loss: 0.4104897975921631 2023-01-24 03:55:00.252241: step: 238/470, loss: 0.257541686296463 2023-01-24 03:55:00.993581: step: 240/470, loss: 0.045913856476545334 2023-01-24 03:55:01.672334: step: 242/470, loss: 0.030991556122899055 2023-01-24 03:55:02.398626: step: 244/470, loss: 0.056961365044116974 2023-01-24 03:55:03.271753: step: 246/470, loss: 0.1344350129365921 2023-01-24 03:55:04.039600: step: 248/470, loss: 0.10876922309398651 2023-01-24 03:55:04.801801: step: 250/470, loss: 0.05064590647816658 2023-01-24 03:55:05.638032: step: 252/470, loss: 0.05507608503103256 2023-01-24 03:55:06.368435: step: 254/470, loss: 0.1291094422340393 2023-01-24 03:55:07.101814: step: 256/470, loss: 0.1127728670835495 2023-01-24 03:55:07.813170: step: 258/470, loss: 0.2618001699447632 2023-01-24 03:55:08.575108: step: 260/470, loss: 0.30301615595817566 2023-01-24 03:55:09.376028: step: 262/470, loss: 0.08459173142910004 2023-01-24 03:55:10.155093: step: 264/470, loss: 0.036896541714668274 2023-01-24 03:55:10.872668: step: 266/470, loss: 0.03432435914874077 2023-01-24 03:55:11.658593: step: 268/470, loss: 0.04738718271255493 2023-01-24 03:55:12.407525: step: 270/470, loss: 0.2793467342853546 2023-01-24 03:55:13.113811: step: 272/470, loss: 0.07192108035087585 2023-01-24 03:55:13.834810: step: 274/470, loss: 0.12388882040977478 2023-01-24 03:55:14.505233: step: 276/470, loss: 0.05751514807343483 2023-01-24 03:55:15.319287: step: 278/470, loss: 0.12290464341640472 2023-01-24 03:55:16.081296: step: 280/470, loss: 0.0717790424823761 2023-01-24 03:55:16.862139: step: 282/470, loss: 0.07550595700740814 2023-01-24 03:55:17.624659: step: 284/470, loss: 0.133597269654274 2023-01-24 03:55:18.361013: step: 286/470, loss: 0.14142487943172455 2023-01-24 03:55:19.028949: step: 288/470, loss: 0.17519471049308777 2023-01-24 03:55:19.833767: step: 290/470, loss: 0.04353666305541992 2023-01-24 03:55:20.648171: step: 292/470, loss: 1.747153639793396 2023-01-24 03:55:21.442977: step: 294/470, loss: 0.499929815530777 2023-01-24 03:55:22.287309: step: 296/470, loss: 0.07138823717832565 2023-01-24 03:55:23.107798: step: 298/470, loss: 0.03818880766630173 2023-01-24 03:55:23.818719: step: 300/470, loss: 0.29137733578681946 2023-01-24 03:55:24.553393: step: 302/470, loss: 0.43531379103660583 2023-01-24 03:55:25.401234: step: 304/470, loss: 0.06073610112071037 2023-01-24 03:55:26.153156: step: 306/470, loss: 0.12262983620166779 2023-01-24 03:55:26.908547: step: 308/470, loss: 0.2984396815299988 2023-01-24 03:55:27.672482: step: 310/470, loss: 0.5396106243133545 2023-01-24 03:55:28.515978: step: 312/470, loss: 0.14061273634433746 2023-01-24 03:55:29.284674: step: 314/470, loss: 0.28197863698005676 2023-01-24 03:55:29.992085: step: 316/470, loss: 0.04360943287611008 2023-01-24 03:55:30.665745: step: 318/470, loss: 0.0115542346611619 2023-01-24 03:55:31.375067: step: 320/470, loss: 0.1039966344833374 2023-01-24 03:55:32.124195: step: 322/470, loss: 0.08922134339809418 2023-01-24 03:55:32.815904: step: 324/470, loss: 0.08642390370368958 2023-01-24 03:55:33.590466: step: 326/470, loss: 0.05966321378946304 2023-01-24 03:55:34.457612: step: 328/470, loss: 0.0547308623790741 2023-01-24 03:55:35.208678: step: 330/470, loss: 0.08806410431861877 2023-01-24 03:55:36.000723: step: 332/470, loss: 0.07780762016773224 2023-01-24 03:55:36.709143: step: 334/470, loss: 0.08751654624938965 2023-01-24 03:55:37.468632: step: 336/470, loss: 0.07845862209796906 2023-01-24 03:55:38.234189: step: 338/470, loss: 0.034335438162088394 2023-01-24 03:55:39.063884: step: 340/470, loss: 0.028561709448695183 2023-01-24 03:55:39.819090: step: 342/470, loss: 0.10376818478107452 2023-01-24 03:55:40.578736: step: 344/470, loss: 0.05915750935673714 2023-01-24 03:55:41.371399: step: 346/470, loss: 0.25353285670280457 2023-01-24 03:55:42.147863: step: 348/470, loss: 0.13213659822940826 2023-01-24 03:55:42.887084: step: 350/470, loss: 0.31304770708084106 2023-01-24 03:55:43.618783: step: 352/470, loss: 0.1196952536702156 2023-01-24 03:55:44.485576: step: 354/470, loss: 0.11991266906261444 2023-01-24 03:55:45.222984: step: 356/470, loss: 0.03160081058740616 2023-01-24 03:55:45.953200: step: 358/470, loss: 0.11861895769834518 2023-01-24 03:55:46.753809: step: 360/470, loss: 0.06153372302651405 2023-01-24 03:55:47.514899: step: 362/470, loss: 0.08058642596006393 2023-01-24 03:55:48.293283: step: 364/470, loss: 0.13972775638103485 2023-01-24 03:55:49.086923: step: 366/470, loss: 0.12595215439796448 2023-01-24 03:55:49.851142: step: 368/470, loss: 0.05694999545812607 2023-01-24 03:55:50.613149: step: 370/470, loss: 0.08451858162879944 2023-01-24 03:55:51.327803: step: 372/470, loss: 0.09174233675003052 2023-01-24 03:55:52.038052: step: 374/470, loss: 0.00898781232535839 2023-01-24 03:55:52.781126: step: 376/470, loss: 0.025392355397343636 2023-01-24 03:55:53.522349: step: 378/470, loss: 0.04026934504508972 2023-01-24 03:55:54.264962: step: 380/470, loss: 0.01192416436970234 2023-01-24 03:55:55.056405: step: 382/470, loss: 0.0662471279501915 2023-01-24 03:55:55.841854: step: 384/470, loss: 0.08708631247282028 2023-01-24 03:55:56.587975: step: 386/470, loss: 0.01594570279121399 2023-01-24 03:55:57.342090: step: 388/470, loss: 0.15144769847393036 2023-01-24 03:55:58.128306: step: 390/470, loss: 0.03741728886961937 2023-01-24 03:55:58.850195: step: 392/470, loss: 1.6541204452514648 2023-01-24 03:55:59.521280: step: 394/470, loss: 0.07524050027132034 2023-01-24 03:56:00.298511: step: 396/470, loss: 0.004373232834041119 2023-01-24 03:56:01.044868: step: 398/470, loss: 0.12484033405780792 2023-01-24 03:56:01.851177: step: 400/470, loss: 0.1493765264749527 2023-01-24 03:56:02.498575: step: 402/470, loss: 0.029209822416305542 2023-01-24 03:56:03.179101: step: 404/470, loss: 0.07852840423583984 2023-01-24 03:56:03.887790: step: 406/470, loss: 0.03971928730607033 2023-01-24 03:56:04.518691: step: 408/470, loss: 0.027434857562184334 2023-01-24 03:56:05.265560: step: 410/470, loss: 0.149210125207901 2023-01-24 03:56:05.985828: step: 412/470, loss: 0.06711407005786896 2023-01-24 03:56:06.762411: step: 414/470, loss: 0.041621264070272446 2023-01-24 03:56:07.428373: step: 416/470, loss: 0.06607449799776077 2023-01-24 03:56:08.233872: step: 418/470, loss: 0.03028223291039467 2023-01-24 03:56:08.950250: step: 420/470, loss: 0.08194995671510696 2023-01-24 03:56:09.616721: step: 422/470, loss: 1.202500820159912 2023-01-24 03:56:10.248931: step: 424/470, loss: 0.03522910550236702 2023-01-24 03:56:10.928688: step: 426/470, loss: 0.11419682204723358 2023-01-24 03:56:11.689845: step: 428/470, loss: 0.027502695098519325 2023-01-24 03:56:12.360005: step: 430/470, loss: 0.1228659600019455 2023-01-24 03:56:13.061404: step: 432/470, loss: 0.013959257863461971 2023-01-24 03:56:13.859411: step: 434/470, loss: 0.12251380831003189 2023-01-24 03:56:14.658531: step: 436/470, loss: 0.03891543298959732 2023-01-24 03:56:15.465289: step: 438/470, loss: 0.058498311787843704 2023-01-24 03:56:16.223385: step: 440/470, loss: 0.02747928909957409 2023-01-24 03:56:16.966266: step: 442/470, loss: 0.10275592654943466 2023-01-24 03:56:17.710538: step: 444/470, loss: 0.5213198065757751 2023-01-24 03:56:18.396983: step: 446/470, loss: 0.19728517532348633 2023-01-24 03:56:19.167606: step: 448/470, loss: 0.4907250702381134 2023-01-24 03:56:19.919976: step: 450/470, loss: 0.10592763125896454 2023-01-24 03:56:20.654136: step: 452/470, loss: 0.0349123552441597 2023-01-24 03:56:21.453566: step: 454/470, loss: 0.05272674188017845 2023-01-24 03:56:22.175868: step: 456/470, loss: 0.0323164276778698 2023-01-24 03:56:22.918409: step: 458/470, loss: 0.08098090440034866 2023-01-24 03:56:23.654390: step: 460/470, loss: 0.07947229593992233 2023-01-24 03:56:24.431573: step: 462/470, loss: 0.194414883852005 2023-01-24 03:56:25.183642: step: 464/470, loss: 0.12346761673688889 2023-01-24 03:56:25.992225: step: 466/470, loss: 0.05517350882291794 2023-01-24 03:56:26.879665: step: 468/470, loss: 0.4561017155647278 2023-01-24 03:56:27.525783: step: 470/470, loss: 0.08983838558197021 2023-01-24 03:56:28.250888: step: 472/470, loss: 0.4218137264251709 2023-01-24 03:56:29.056999: step: 474/470, loss: 0.05566902086138725 2023-01-24 03:56:29.806657: step: 476/470, loss: 0.05252564325928688 2023-01-24 03:56:30.511426: step: 478/470, loss: 0.031113555654883385 2023-01-24 03:56:31.237896: step: 480/470, loss: 0.6434183716773987 2023-01-24 03:56:31.988020: step: 482/470, loss: 1.0721107721328735 2023-01-24 03:56:32.715034: step: 484/470, loss: 0.01686706766486168 2023-01-24 03:56:33.473811: step: 486/470, loss: 0.016208596527576447 2023-01-24 03:56:34.172457: step: 488/470, loss: 0.07484219968318939 2023-01-24 03:56:34.884003: step: 490/470, loss: 0.03733556717634201 2023-01-24 03:56:35.586562: step: 492/470, loss: 0.107573002576828 2023-01-24 03:56:36.245139: step: 494/470, loss: 0.06075252965092659 2023-01-24 03:56:37.044201: step: 496/470, loss: 0.058279551565647125 2023-01-24 03:56:37.849420: step: 498/470, loss: 0.05781916528940201 2023-01-24 03:56:38.648173: step: 500/470, loss: 0.13210691511631012 2023-01-24 03:56:39.387341: step: 502/470, loss: 1.0277457237243652 2023-01-24 03:56:40.110732: step: 504/470, loss: 0.04871448129415512 2023-01-24 03:56:40.845168: step: 506/470, loss: 0.5733938813209534 2023-01-24 03:56:41.591269: step: 508/470, loss: 0.016267672181129456 2023-01-24 03:56:42.326524: step: 510/470, loss: 0.05442097783088684 2023-01-24 03:56:43.092544: step: 512/470, loss: 1.0532729625701904 2023-01-24 03:56:43.881405: step: 514/470, loss: 0.0817883089184761 2023-01-24 03:56:44.692523: step: 516/470, loss: 0.08425731956958771 2023-01-24 03:56:45.457326: step: 518/470, loss: 0.1442107856273651 2023-01-24 03:56:46.305222: step: 520/470, loss: 0.5000361204147339 2023-01-24 03:56:47.072218: step: 522/470, loss: 0.05786876752972603 2023-01-24 03:56:47.738870: step: 524/470, loss: 0.1195589229464531 2023-01-24 03:56:48.503847: step: 526/470, loss: 0.06997283548116684 2023-01-24 03:56:49.267218: step: 528/470, loss: 0.020335132256150246 2023-01-24 03:56:49.998873: step: 530/470, loss: 0.08447545021772385 2023-01-24 03:56:50.766225: step: 532/470, loss: 0.04109674319624901 2023-01-24 03:56:51.515689: step: 534/470, loss: 0.11775404959917068 2023-01-24 03:56:52.243955: step: 536/470, loss: 0.1142866313457489 2023-01-24 03:56:53.003903: step: 538/470, loss: 0.3399909436702728 2023-01-24 03:56:53.693053: step: 540/470, loss: 0.04983226954936981 2023-01-24 03:56:54.414329: step: 542/470, loss: 0.038683708757162094 2023-01-24 03:56:55.125220: step: 544/470, loss: 0.00999883096665144 2023-01-24 03:56:55.854174: step: 546/470, loss: 0.03342743217945099 2023-01-24 03:56:56.637285: step: 548/470, loss: 0.11033465713262558 2023-01-24 03:56:57.449006: step: 550/470, loss: 0.04878013953566551 2023-01-24 03:56:58.201979: step: 552/470, loss: 0.09103644639253616 2023-01-24 03:56:58.874000: step: 554/470, loss: 0.015820780768990517 2023-01-24 03:56:59.647999: step: 556/470, loss: 0.07098133116960526 2023-01-24 03:57:00.403191: step: 558/470, loss: 0.5348267555236816 2023-01-24 03:57:01.169149: step: 560/470, loss: 0.06724416464567184 2023-01-24 03:57:01.967735: step: 562/470, loss: 0.05711890384554863 2023-01-24 03:57:02.723071: step: 564/470, loss: 0.025366008281707764 2023-01-24 03:57:03.426694: step: 566/470, loss: 0.030365778133273125 2023-01-24 03:57:04.156253: step: 568/470, loss: 0.08409339189529419 2023-01-24 03:57:04.875015: step: 570/470, loss: 1.9741016626358032 2023-01-24 03:57:05.523387: step: 572/470, loss: 0.0606938898563385 2023-01-24 03:57:06.334725: step: 574/470, loss: 0.08465118706226349 2023-01-24 03:57:07.051490: step: 576/470, loss: 0.03369426727294922 2023-01-24 03:57:07.849807: step: 578/470, loss: 0.042784154415130615 2023-01-24 03:57:08.499090: step: 580/470, loss: 0.11120610684156418 2023-01-24 03:57:09.212118: step: 582/470, loss: 0.11828941851854324 2023-01-24 03:57:09.883828: step: 584/470, loss: 0.08425047993659973 2023-01-24 03:57:10.645593: step: 586/470, loss: 0.03659561276435852 2023-01-24 03:57:11.377761: step: 588/470, loss: 0.050091128796339035 2023-01-24 03:57:12.059252: step: 590/470, loss: 0.022585172206163406 2023-01-24 03:57:12.878751: step: 592/470, loss: 0.1083977222442627 2023-01-24 03:57:13.582525: step: 594/470, loss: 0.008220894262194633 2023-01-24 03:57:14.257033: step: 596/470, loss: 0.12439019232988358 2023-01-24 03:57:15.077095: step: 598/470, loss: 0.027765508741140366 2023-01-24 03:57:15.795699: step: 600/470, loss: 0.036968786269426346 2023-01-24 03:57:16.630487: step: 602/470, loss: 0.07465256750583649 2023-01-24 03:57:17.298901: step: 604/470, loss: 0.01624050922691822 2023-01-24 03:57:18.016150: step: 606/470, loss: 0.012987270019948483 2023-01-24 03:57:18.869650: step: 608/470, loss: 0.05419148504734039 2023-01-24 03:57:19.634424: step: 610/470, loss: 0.1707671582698822 2023-01-24 03:57:20.350453: step: 612/470, loss: 0.08779844641685486 2023-01-24 03:57:21.174216: step: 614/470, loss: 0.12106174230575562 2023-01-24 03:57:21.881073: step: 616/470, loss: 0.5510255694389343 2023-01-24 03:57:22.679804: step: 618/470, loss: 0.07842077314853668 2023-01-24 03:57:23.337971: step: 620/470, loss: 0.08350328356027603 2023-01-24 03:57:24.004528: step: 622/470, loss: 0.1282799243927002 2023-01-24 03:57:24.721236: step: 624/470, loss: 0.08514758944511414 2023-01-24 03:57:25.454898: step: 626/470, loss: 0.08214154094457626 2023-01-24 03:57:26.133669: step: 628/470, loss: 0.02037135139107704 2023-01-24 03:57:26.865845: step: 630/470, loss: 0.0328313373029232 2023-01-24 03:57:27.612849: step: 632/470, loss: 0.12586846947669983 2023-01-24 03:57:28.409117: step: 634/470, loss: 1.1721528768539429 2023-01-24 03:57:29.225540: step: 636/470, loss: 0.06453651934862137 2023-01-24 03:57:30.014495: step: 638/470, loss: 0.12365590780973434 2023-01-24 03:57:30.759331: step: 640/470, loss: 0.23456744849681854 2023-01-24 03:57:31.501990: step: 642/470, loss: 0.06679598242044449 2023-01-24 03:57:32.409570: step: 644/470, loss: 0.30910104513168335 2023-01-24 03:57:33.132277: step: 646/470, loss: 0.06718228757381439 2023-01-24 03:57:33.908524: step: 648/470, loss: 0.07001607865095139 2023-01-24 03:57:34.557774: step: 650/470, loss: 0.05453366041183472 2023-01-24 03:57:35.346295: step: 652/470, loss: 0.08327239006757736 2023-01-24 03:57:36.056411: step: 654/470, loss: 0.4447418451309204 2023-01-24 03:57:36.786609: step: 656/470, loss: 0.08664055168628693 2023-01-24 03:57:37.548653: step: 658/470, loss: 0.038746707141399384 2023-01-24 03:57:38.428826: step: 660/470, loss: 0.8028762340545654 2023-01-24 03:57:39.131676: step: 662/470, loss: 0.03993918374180794 2023-01-24 03:57:39.862673: step: 664/470, loss: 0.029848841950297356 2023-01-24 03:57:40.540251: step: 666/470, loss: 0.09658187627792358 2023-01-24 03:57:41.281274: step: 668/470, loss: 0.06456496566534042 2023-01-24 03:57:42.068450: step: 670/470, loss: 0.04641230031847954 2023-01-24 03:57:42.828712: step: 672/470, loss: 0.09238805621862411 2023-01-24 03:57:43.550132: step: 674/470, loss: 0.056428153067827225 2023-01-24 03:57:44.268462: step: 676/470, loss: 0.16270509362220764 2023-01-24 03:57:44.994075: step: 678/470, loss: 0.07294663041830063 2023-01-24 03:57:45.798830: step: 680/470, loss: 0.1018824577331543 2023-01-24 03:57:46.574445: step: 682/470, loss: 0.3484339118003845 2023-01-24 03:57:47.375367: step: 684/470, loss: 0.10737051069736481 2023-01-24 03:57:48.170814: step: 686/470, loss: 0.05558640882372856 2023-01-24 03:57:48.875419: step: 688/470, loss: 0.13277633488178253 2023-01-24 03:57:49.652139: step: 690/470, loss: 0.08180245012044907 2023-01-24 03:57:50.398629: step: 692/470, loss: 0.06329744309186935 2023-01-24 03:57:51.150579: step: 694/470, loss: 0.14542846381664276 2023-01-24 03:57:51.880828: step: 696/470, loss: 0.0733979269862175 2023-01-24 03:57:52.642061: step: 698/470, loss: 0.08746114373207092 2023-01-24 03:57:53.446670: step: 700/470, loss: 0.0837959498167038 2023-01-24 03:57:54.153156: step: 702/470, loss: 0.20224621891975403 2023-01-24 03:57:54.847101: step: 704/470, loss: 0.35301804542541504 2023-01-24 03:57:55.647466: step: 706/470, loss: 0.06424695998430252 2023-01-24 03:57:56.432984: step: 708/470, loss: 0.08036937564611435 2023-01-24 03:57:57.138327: step: 710/470, loss: 0.022141428664326668 2023-01-24 03:57:57.875113: step: 712/470, loss: 0.16465261578559875 2023-01-24 03:57:58.609460: step: 714/470, loss: 0.03278358653187752 2023-01-24 03:57:59.328973: step: 716/470, loss: 0.1571766436100006 2023-01-24 03:58:00.042660: step: 718/470, loss: 0.24385596811771393 2023-01-24 03:58:00.760227: step: 720/470, loss: 0.11035379767417908 2023-01-24 03:58:01.476963: step: 722/470, loss: 0.1532362550497055 2023-01-24 03:58:02.288729: step: 724/470, loss: 0.11130030453205109 2023-01-24 03:58:03.051270: step: 726/470, loss: 0.3597228527069092 2023-01-24 03:58:03.905700: step: 728/470, loss: 0.02049565315246582 2023-01-24 03:58:04.745721: step: 730/470, loss: 0.11515626311302185 2023-01-24 03:58:05.518075: step: 732/470, loss: 0.020818457007408142 2023-01-24 03:58:06.282013: step: 734/470, loss: 0.06096053496003151 2023-01-24 03:58:07.015090: step: 736/470, loss: 0.061641495674848557 2023-01-24 03:58:07.727111: step: 738/470, loss: 0.09029534459114075 2023-01-24 03:58:08.402621: step: 740/470, loss: 0.07005026191473007 2023-01-24 03:58:09.152921: step: 742/470, loss: 0.07654117792844772 2023-01-24 03:58:09.895167: step: 744/470, loss: 0.06442206352949142 2023-01-24 03:58:10.630958: step: 746/470, loss: 0.6992735862731934 2023-01-24 03:58:11.378356: step: 748/470, loss: 0.060101427137851715 2023-01-24 03:58:12.096935: step: 750/470, loss: 0.25749671459198 2023-01-24 03:58:12.798940: step: 752/470, loss: 0.1867312639951706 2023-01-24 03:58:13.529184: step: 754/470, loss: 0.045231420546770096 2023-01-24 03:58:14.246468: step: 756/470, loss: 0.04085368663072586 2023-01-24 03:58:15.009062: step: 758/470, loss: 0.6277973055839539 2023-01-24 03:58:15.739878: step: 760/470, loss: 0.1876017451286316 2023-01-24 03:58:16.560578: step: 762/470, loss: 0.23550686240196228 2023-01-24 03:58:17.277629: step: 764/470, loss: 0.06056235358119011 2023-01-24 03:58:18.024592: step: 766/470, loss: 0.11358806490898132 2023-01-24 03:58:18.780606: step: 768/470, loss: 0.16502447426319122 2023-01-24 03:58:19.473375: step: 770/470, loss: 0.11180621385574341 2023-01-24 03:58:20.236090: step: 772/470, loss: 0.05992017686367035 2023-01-24 03:58:21.066354: step: 774/470, loss: 0.09332225471735 2023-01-24 03:58:21.778238: step: 776/470, loss: 0.1730080544948578 2023-01-24 03:58:22.522866: step: 778/470, loss: 0.9607817530632019 2023-01-24 03:58:23.287082: step: 780/470, loss: 0.11541511863470078 2023-01-24 03:58:24.082150: step: 782/470, loss: 0.028602128848433495 2023-01-24 03:58:24.817365: step: 784/470, loss: 0.060707904398441315 2023-01-24 03:58:25.524973: step: 786/470, loss: 0.01682162657380104 2023-01-24 03:58:26.279408: step: 788/470, loss: 0.05692780017852783 2023-01-24 03:58:26.978144: step: 790/470, loss: 0.23850642144680023 2023-01-24 03:58:27.643615: step: 792/470, loss: 0.0492679700255394 2023-01-24 03:58:28.341902: step: 794/470, loss: 0.058511458337306976 2023-01-24 03:58:29.084506: step: 796/470, loss: 0.06941919773817062 2023-01-24 03:58:29.827571: step: 798/470, loss: 0.07419852167367935 2023-01-24 03:58:30.520819: step: 800/470, loss: 0.0205177403986454 2023-01-24 03:58:31.265079: step: 802/470, loss: 0.08371607959270477 2023-01-24 03:58:32.012564: step: 804/470, loss: 0.03052728809416294 2023-01-24 03:58:32.724933: step: 806/470, loss: 0.040770161896944046 2023-01-24 03:58:33.508922: step: 808/470, loss: 0.09312587976455688 2023-01-24 03:58:34.271259: step: 810/470, loss: 0.33695071935653687 2023-01-24 03:58:35.050520: step: 812/470, loss: 0.1501253843307495 2023-01-24 03:58:35.808588: step: 814/470, loss: 0.05207722634077072 2023-01-24 03:58:36.505736: step: 816/470, loss: 0.026118360459804535 2023-01-24 03:58:37.294327: step: 818/470, loss: 0.029398392885923386 2023-01-24 03:58:38.077157: step: 820/470, loss: 0.10104161500930786 2023-01-24 03:58:38.901835: step: 822/470, loss: 0.05879708379507065 2023-01-24 03:58:39.567535: step: 824/470, loss: 0.07474382221698761 2023-01-24 03:58:40.269057: step: 826/470, loss: 0.038804251700639725 2023-01-24 03:58:41.004860: step: 828/470, loss: 0.09933701902627945 2023-01-24 03:58:41.742806: step: 830/470, loss: 0.03747892752289772 2023-01-24 03:58:42.617724: step: 832/470, loss: 0.21376895904541016 2023-01-24 03:58:43.297164: step: 834/470, loss: 0.11364637315273285 2023-01-24 03:58:44.010562: step: 836/470, loss: 0.22915977239608765 2023-01-24 03:58:44.756783: step: 838/470, loss: 0.10806091874837875 2023-01-24 03:58:45.562028: step: 840/470, loss: 0.15018446743488312 2023-01-24 03:58:46.448481: step: 842/470, loss: 0.11992557346820831 2023-01-24 03:58:47.188840: step: 844/470, loss: 0.08834873884916306 2023-01-24 03:58:47.953143: step: 846/470, loss: 0.08360497653484344 2023-01-24 03:58:48.689874: step: 848/470, loss: 0.04521460458636284 2023-01-24 03:58:49.449249: step: 850/470, loss: 0.07406667619943619 2023-01-24 03:58:50.154343: step: 852/470, loss: 0.1984773576259613 2023-01-24 03:58:50.926028: step: 854/470, loss: 0.07086524367332458 2023-01-24 03:58:51.631099: step: 856/470, loss: 2.956345319747925 2023-01-24 03:58:52.374106: step: 858/470, loss: 0.28626927733421326 2023-01-24 03:58:53.067887: step: 860/470, loss: 0.02902461588382721 2023-01-24 03:58:53.844968: step: 862/470, loss: 0.2611776292324066 2023-01-24 03:58:54.598950: step: 864/470, loss: 0.09869827330112457 2023-01-24 03:58:55.362143: step: 866/470, loss: 0.08181178569793701 2023-01-24 03:58:56.113489: step: 868/470, loss: 0.16328763961791992 2023-01-24 03:58:56.867569: step: 870/470, loss: 0.01159185916185379 2023-01-24 03:58:57.716828: step: 872/470, loss: 0.29373496770858765 2023-01-24 03:58:58.454675: step: 874/470, loss: 0.027659697458148003 2023-01-24 03:58:59.220107: step: 876/470, loss: 0.05898299440741539 2023-01-24 03:58:59.887771: step: 878/470, loss: 0.11096679419279099 2023-01-24 03:59:00.597532: step: 880/470, loss: 0.3364698886871338 2023-01-24 03:59:01.380792: step: 882/470, loss: 0.04068123549222946 2023-01-24 03:59:02.229972: step: 884/470, loss: 0.08637043088674545 2023-01-24 03:59:03.011104: step: 886/470, loss: 0.10517950356006622 2023-01-24 03:59:03.725174: step: 888/470, loss: 0.1388533115386963 2023-01-24 03:59:04.559951: step: 890/470, loss: 0.10692066699266434 2023-01-24 03:59:05.254278: step: 892/470, loss: 0.4482387900352478 2023-01-24 03:59:06.029840: step: 894/470, loss: 0.04289408028125763 2023-01-24 03:59:06.713621: step: 896/470, loss: 0.1445654332637787 2023-01-24 03:59:07.455175: step: 898/470, loss: 0.3577795922756195 2023-01-24 03:59:08.177572: step: 900/470, loss: 0.03457728773355484 2023-01-24 03:59:08.970931: step: 902/470, loss: 0.07262220978736877 2023-01-24 03:59:09.692198: step: 904/470, loss: 0.0956735610961914 2023-01-24 03:59:10.431469: step: 906/470, loss: 0.2117413431406021 2023-01-24 03:59:11.295058: step: 908/470, loss: 0.08425169438123703 2023-01-24 03:59:12.005340: step: 910/470, loss: 0.23610138893127441 2023-01-24 03:59:12.793413: step: 912/470, loss: 0.6577050685882568 2023-01-24 03:59:13.567557: step: 914/470, loss: 0.06360519677400589 2023-01-24 03:59:14.215823: step: 916/470, loss: 0.06351134926080704 2023-01-24 03:59:14.980074: step: 918/470, loss: 0.042916182428598404 2023-01-24 03:59:15.839089: step: 920/470, loss: 0.07389490306377411 2023-01-24 03:59:16.614218: step: 922/470, loss: 0.1284465193748474 2023-01-24 03:59:17.380175: step: 924/470, loss: 0.07059228420257568 2023-01-24 03:59:18.127988: step: 926/470, loss: 0.1461246758699417 2023-01-24 03:59:18.855754: step: 928/470, loss: 0.24688246846199036 2023-01-24 03:59:19.583280: step: 930/470, loss: 0.016211118549108505 2023-01-24 03:59:20.245823: step: 932/470, loss: 0.05001834034919739 2023-01-24 03:59:21.027653: step: 934/470, loss: 0.043632302433252335 2023-01-24 03:59:21.776757: step: 936/470, loss: 0.10320833325386047 2023-01-24 03:59:22.540203: step: 938/470, loss: 0.03066897578537464 2023-01-24 03:59:23.193853: step: 940/470, loss: 0.06406714022159576 2023-01-24 03:59:23.859433: step: 942/470, loss: 0.11159685254096985 ================================================== Loss: 0.157 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3448376840696118, 'r': 0.32586179633143586, 'f1': 0.33508130081300813}, 'combined': 0.2469020111253744, 'epoch': 17} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3617054222274556, 'r': 0.31579665709858623, 'f1': 0.33719560922230973}, 'combined': 0.2247970728148731, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3227639296187683, 'r': 0.3227639296187683, 'f1': 0.3227639296187683}, 'combined': 0.23782605340330296, 'epoch': 17} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3594935609776605, 'r': 0.3238898717654499, 'f1': 0.34076425557518253}, 'combined': 0.22717617038345497, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29069767441860467, 'r': 0.35714285714285715, 'f1': 0.32051282051282054}, 'combined': 0.2136752136752137, 'epoch': 17} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42857142857142855, 'r': 0.20689655172413793, 'f1': 0.2790697674418604}, 'combined': 0.18604651162790692, 'epoch': 17} New best chinese model... New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3448376840696118, 'r': 0.32586179633143586, 'f1': 0.33508130081300813}, 'combined': 0.2469020111253744, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3617054222274556, 'r': 0.31579665709858623, 'f1': 0.33719560922230973}, 'combined': 0.2247970728148731, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29069767441860467, 'r': 0.35714285714285715, 'f1': 0.32051282051282054}, 'combined': 0.2136752136752137, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30801502714153317, 'r': 0.33957633922055175, 'f1': 0.3230265898361566}, 'combined': 0.23801959251085222, 'epoch': 16} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3547111579078519, 'r': 0.34789634219396426, 'f1': 0.3512707005081637}, 'combined': 0.23418046700544243, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4666666666666667, 'r': 0.2413793103448276, 'f1': 0.3181818181818182}, 'combined': 0.2121212121212121, 'epoch': 16} ****************************** Epoch: 18 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:02:15.055463: step: 2/470, loss: 0.06617684662342072 2023-01-24 04:02:15.731133: step: 4/470, loss: 0.017266876995563507 2023-01-24 04:02:16.460471: step: 6/470, loss: 0.04035760834813118 2023-01-24 04:02:17.202652: step: 8/470, loss: 0.07709954679012299 2023-01-24 04:02:17.985726: step: 10/470, loss: 0.160027876496315 2023-01-24 04:02:18.740018: step: 12/470, loss: 0.06833804398775101 2023-01-24 04:02:19.507427: step: 14/470, loss: 0.4198529124259949 2023-01-24 04:02:20.371271: step: 16/470, loss: 0.1530308574438095 2023-01-24 04:02:21.151988: step: 18/470, loss: 0.3755422532558441 2023-01-24 04:02:21.951609: step: 20/470, loss: 0.14509013295173645 2023-01-24 04:02:22.714951: step: 22/470, loss: 0.10982964187860489 2023-01-24 04:02:23.407804: step: 24/470, loss: 0.038370661437511444 2023-01-24 04:02:24.107402: step: 26/470, loss: 0.0846431627869606 2023-01-24 04:02:24.877820: step: 28/470, loss: 0.11229250580072403 2023-01-24 04:02:25.677687: step: 30/470, loss: 0.7171079516410828 2023-01-24 04:02:26.410299: step: 32/470, loss: 0.08063667267560959 2023-01-24 04:02:27.117879: step: 34/470, loss: 0.026313627138733864 2023-01-24 04:02:27.890351: step: 36/470, loss: 0.07363957166671753 2023-01-24 04:02:28.650845: step: 38/470, loss: 1.1096469163894653 2023-01-24 04:02:29.405379: step: 40/470, loss: 0.07688609510660172 2023-01-24 04:02:30.150428: step: 42/470, loss: 0.18271422386169434 2023-01-24 04:02:30.920886: step: 44/470, loss: 0.07778210192918777 2023-01-24 04:02:31.677774: step: 46/470, loss: 0.04570697247982025 2023-01-24 04:02:32.563818: step: 48/470, loss: 0.19306838512420654 2023-01-24 04:02:33.314179: step: 50/470, loss: 0.01399903278797865 2023-01-24 04:02:34.007839: step: 52/470, loss: 0.12448009103536606 2023-01-24 04:02:34.865121: step: 54/470, loss: 0.08125516772270203 2023-01-24 04:02:35.681329: step: 56/470, loss: 0.0601322315633297 2023-01-24 04:02:36.500664: step: 58/470, loss: 0.5188689231872559 2023-01-24 04:02:37.194500: step: 60/470, loss: 0.03932400047779083 2023-01-24 04:02:37.895473: step: 62/470, loss: 0.027753813192248344 2023-01-24 04:02:38.639841: step: 64/470, loss: 0.019212109968066216 2023-01-24 04:02:39.287840: step: 66/470, loss: 0.003584251506254077 2023-01-24 04:02:40.122483: step: 68/470, loss: 0.09412702918052673 2023-01-24 04:02:40.797306: step: 70/470, loss: 0.038881633430719376 2023-01-24 04:02:41.528967: step: 72/470, loss: 0.0578327551484108 2023-01-24 04:02:42.233987: step: 74/470, loss: 0.23435544967651367 2023-01-24 04:02:43.023916: step: 76/470, loss: 0.034016791731119156 2023-01-24 04:02:43.658822: step: 78/470, loss: 0.011392496526241302 2023-01-24 04:02:44.501962: step: 80/470, loss: 0.07721582800149918 2023-01-24 04:02:45.194902: step: 82/470, loss: 0.025755221024155617 2023-01-24 04:02:45.890168: step: 84/470, loss: 0.03272271901369095 2023-01-24 04:02:46.502928: step: 86/470, loss: 0.013092340901494026 2023-01-24 04:02:47.349637: step: 88/470, loss: 0.057706430554389954 2023-01-24 04:02:48.104716: step: 90/470, loss: 0.05745483562350273 2023-01-24 04:02:48.890981: step: 92/470, loss: 0.04043126851320267 2023-01-24 04:02:49.648286: step: 94/470, loss: 0.09307193011045456 2023-01-24 04:02:50.379141: step: 96/470, loss: 0.10384157299995422 2023-01-24 04:02:51.169584: step: 98/470, loss: 0.058362387120723724 2023-01-24 04:02:51.893092: step: 100/470, loss: 0.08502303808927536 2023-01-24 04:02:52.726453: step: 102/470, loss: 0.1189383789896965 2023-01-24 04:02:53.395542: step: 104/470, loss: 0.005357891321182251 2023-01-24 04:02:54.167549: step: 106/470, loss: 0.07590699940919876 2023-01-24 04:02:54.893076: step: 108/470, loss: 0.010280979797244072 2023-01-24 04:02:55.649032: step: 110/470, loss: 0.033743537962436676 2023-01-24 04:02:56.381545: step: 112/470, loss: 0.053621165454387665 2023-01-24 04:02:57.097512: step: 114/470, loss: 0.2918620705604553 2023-01-24 04:02:57.882740: step: 116/470, loss: 0.08658783882856369 2023-01-24 04:02:58.598287: step: 118/470, loss: 0.0276590958237648 2023-01-24 04:02:59.299848: step: 120/470, loss: 0.08560670167207718 2023-01-24 04:03:00.033100: step: 122/470, loss: 0.3002013862133026 2023-01-24 04:03:00.757366: step: 124/470, loss: 0.2824411690235138 2023-01-24 04:03:01.498721: step: 126/470, loss: 0.03612075001001358 2023-01-24 04:03:02.170859: step: 128/470, loss: 0.06621766090393066 2023-01-24 04:03:02.862284: step: 130/470, loss: 0.039325080811977386 2023-01-24 04:03:03.563865: step: 132/470, loss: 0.042571116238832474 2023-01-24 04:03:04.340965: step: 134/470, loss: 0.09064992517232895 2023-01-24 04:03:05.045487: step: 136/470, loss: 0.08911306411027908 2023-01-24 04:03:05.862771: step: 138/470, loss: 0.241469144821167 2023-01-24 04:03:06.543971: step: 140/470, loss: 0.028448637574911118 2023-01-24 04:03:07.259281: step: 142/470, loss: 0.0016861435724422336 2023-01-24 04:03:08.041616: step: 144/470, loss: 0.04221523180603981 2023-01-24 04:03:08.674278: step: 146/470, loss: 0.18134230375289917 2023-01-24 04:03:09.424953: step: 148/470, loss: 0.030191145837306976 2023-01-24 04:03:10.226683: step: 150/470, loss: 0.16584885120391846 2023-01-24 04:03:11.055569: step: 152/470, loss: 0.08478162437677383 2023-01-24 04:03:11.842245: step: 154/470, loss: 0.08762053400278091 2023-01-24 04:03:12.578341: step: 156/470, loss: 0.11175291985273361 2023-01-24 04:03:13.274039: step: 158/470, loss: 0.3621843159198761 2023-01-24 04:03:13.996158: step: 160/470, loss: 0.1218324527144432 2023-01-24 04:03:14.699944: step: 162/470, loss: 0.043930042535066605 2023-01-24 04:03:15.440242: step: 164/470, loss: 0.07743663340806961 2023-01-24 04:03:16.136411: step: 166/470, loss: 0.015336059965193272 2023-01-24 04:03:16.933817: step: 168/470, loss: 0.04017908126115799 2023-01-24 04:03:17.698500: step: 170/470, loss: 0.21953128278255463 2023-01-24 04:03:18.334955: step: 172/470, loss: 0.04235304147005081 2023-01-24 04:03:19.188512: step: 174/470, loss: 0.12579070031642914 2023-01-24 04:03:20.000654: step: 176/470, loss: 0.07374074310064316 2023-01-24 04:03:20.745017: step: 178/470, loss: 0.08439971506595612 2023-01-24 04:03:22.063746: step: 180/470, loss: 0.15861716866493225 2023-01-24 04:03:22.780506: step: 182/470, loss: 0.01040814071893692 2023-01-24 04:03:23.469207: step: 184/470, loss: 0.027954377233982086 2023-01-24 04:03:24.213882: step: 186/470, loss: 0.04491977393627167 2023-01-24 04:03:24.923361: step: 188/470, loss: 0.05019805580377579 2023-01-24 04:03:25.609854: step: 190/470, loss: 0.0625576600432396 2023-01-24 04:03:26.311921: step: 192/470, loss: 0.06504880636930466 2023-01-24 04:03:27.138286: step: 194/470, loss: 0.1290799230337143 2023-01-24 04:03:28.095567: step: 196/470, loss: 0.08973461389541626 2023-01-24 04:03:28.818607: step: 198/470, loss: 0.09824441373348236 2023-01-24 04:03:29.647747: step: 200/470, loss: 0.08071193844079971 2023-01-24 04:03:30.408538: step: 202/470, loss: 0.1525980830192566 2023-01-24 04:03:31.200155: step: 204/470, loss: 0.0688774362206459 2023-01-24 04:03:31.887068: step: 206/470, loss: 0.034748729318380356 2023-01-24 04:03:32.542498: step: 208/470, loss: 0.05070429667830467 2023-01-24 04:03:33.264470: step: 210/470, loss: 0.07284100353717804 2023-01-24 04:03:34.100353: step: 212/470, loss: 0.18640294671058655 2023-01-24 04:03:34.847908: step: 214/470, loss: 0.04599619284272194 2023-01-24 04:03:35.550565: step: 216/470, loss: 0.045409638434648514 2023-01-24 04:03:36.328438: step: 218/470, loss: 0.1961684674024582 2023-01-24 04:03:36.999920: step: 220/470, loss: 0.040671225637197495 2023-01-24 04:03:37.939796: step: 222/470, loss: 0.1723230630159378 2023-01-24 04:03:38.637152: step: 224/470, loss: 0.011676449328660965 2023-01-24 04:03:39.397427: step: 226/470, loss: 0.04717397689819336 2023-01-24 04:03:40.192923: step: 228/470, loss: 0.08629091829061508 2023-01-24 04:03:40.974455: step: 230/470, loss: 0.06894738972187042 2023-01-24 04:03:41.621429: step: 232/470, loss: 0.747312605381012 2023-01-24 04:03:42.355909: step: 234/470, loss: 0.09399952739477158 2023-01-24 04:03:43.046767: step: 236/470, loss: 0.055615007877349854 2023-01-24 04:03:43.778350: step: 238/470, loss: 0.04675089567899704 2023-01-24 04:03:44.592523: step: 240/470, loss: 0.04450520500540733 2023-01-24 04:03:45.252593: step: 242/470, loss: 0.12249691784381866 2023-01-24 04:03:45.997409: step: 244/470, loss: 0.020723912864923477 2023-01-24 04:03:46.764716: step: 246/470, loss: 1.1003464460372925 2023-01-24 04:03:47.516082: step: 248/470, loss: 0.18243949115276337 2023-01-24 04:03:48.212653: step: 250/470, loss: 0.06836390495300293 2023-01-24 04:03:48.955903: step: 252/470, loss: 0.10224248468875885 2023-01-24 04:03:49.656203: step: 254/470, loss: 0.03016090951859951 2023-01-24 04:03:50.431632: step: 256/470, loss: 0.7845423221588135 2023-01-24 04:03:51.162659: step: 258/470, loss: 0.12435699254274368 2023-01-24 04:03:51.922868: step: 260/470, loss: 0.02161235734820366 2023-01-24 04:03:52.656133: step: 262/470, loss: 0.06350536644458771 2023-01-24 04:03:53.487491: step: 264/470, loss: 0.16371527314186096 2023-01-24 04:03:54.203497: step: 266/470, loss: 0.03186386078596115 2023-01-24 04:03:54.976312: step: 268/470, loss: 0.0583181194961071 2023-01-24 04:03:55.653647: step: 270/470, loss: 0.03716019168496132 2023-01-24 04:03:56.477062: step: 272/470, loss: 0.054880399256944656 2023-01-24 04:03:57.267304: step: 274/470, loss: 0.08905167132616043 2023-01-24 04:03:58.025044: step: 276/470, loss: 0.20153184235095978 2023-01-24 04:03:58.860757: step: 278/470, loss: 0.10790564119815826 2023-01-24 04:03:59.679911: step: 280/470, loss: 0.07056804746389389 2023-01-24 04:04:00.379770: step: 282/470, loss: 0.05816777050495148 2023-01-24 04:04:01.222842: step: 284/470, loss: 0.4308560788631439 2023-01-24 04:04:01.965380: step: 286/470, loss: 0.07634017616510391 2023-01-24 04:04:02.710510: step: 288/470, loss: 0.03175393491983414 2023-01-24 04:04:03.381650: step: 290/470, loss: 0.13919062912464142 2023-01-24 04:04:04.096355: step: 292/470, loss: 0.032145872712135315 2023-01-24 04:04:04.822943: step: 294/470, loss: 0.1720714420080185 2023-01-24 04:04:05.608079: step: 296/470, loss: 1.0381907224655151 2023-01-24 04:04:06.367442: step: 298/470, loss: 0.08752277493476868 2023-01-24 04:04:07.076204: step: 300/470, loss: 5.016946792602539 2023-01-24 04:04:07.843495: step: 302/470, loss: 0.048376813530921936 2023-01-24 04:04:08.639714: step: 304/470, loss: 0.020166104659438133 2023-01-24 04:04:09.361748: step: 306/470, loss: 0.3207864463329315 2023-01-24 04:04:10.100383: step: 308/470, loss: 0.42874202132225037 2023-01-24 04:04:10.832049: step: 310/470, loss: 0.04274129867553711 2023-01-24 04:04:11.524136: step: 312/470, loss: 0.012306938879191875 2023-01-24 04:04:12.270761: step: 314/470, loss: 0.03849383816123009 2023-01-24 04:04:13.033416: step: 316/470, loss: 0.6824967265129089 2023-01-24 04:04:13.719702: step: 318/470, loss: 0.055570270866155624 2023-01-24 04:04:14.426269: step: 320/470, loss: 0.06937985122203827 2023-01-24 04:04:15.190277: step: 322/470, loss: 0.044349320232868195 2023-01-24 04:04:15.964308: step: 324/470, loss: 0.09120985120534897 2023-01-24 04:04:16.663936: step: 326/470, loss: 0.025129251182079315 2023-01-24 04:04:17.371435: step: 328/470, loss: 0.06615206599235535 2023-01-24 04:04:18.083979: step: 330/470, loss: 0.13011448085308075 2023-01-24 04:04:18.842941: step: 332/470, loss: 1.3872214555740356 2023-01-24 04:04:19.592672: step: 334/470, loss: 0.03803999722003937 2023-01-24 04:04:20.359208: step: 336/470, loss: 0.0787554532289505 2023-01-24 04:04:21.085625: step: 338/470, loss: 0.05206383764743805 2023-01-24 04:04:21.804031: step: 340/470, loss: 0.08769537508487701 2023-01-24 04:04:22.609243: step: 342/470, loss: 0.28150492906570435 2023-01-24 04:04:23.376331: step: 344/470, loss: 0.3160443902015686 2023-01-24 04:04:24.183861: step: 346/470, loss: 0.01590467244386673 2023-01-24 04:04:24.949268: step: 348/470, loss: 0.08347859978675842 2023-01-24 04:04:25.617084: step: 350/470, loss: 0.18649937212467194 2023-01-24 04:04:26.310436: step: 352/470, loss: 0.06714587658643723 2023-01-24 04:04:27.057997: step: 354/470, loss: 0.2771860361099243 2023-01-24 04:04:27.799237: step: 356/470, loss: 0.10782112181186676 2023-01-24 04:04:28.530355: step: 358/470, loss: 0.04154413565993309 2023-01-24 04:04:29.333366: step: 360/470, loss: 0.16717395186424255 2023-01-24 04:04:30.012661: step: 362/470, loss: 0.06858609616756439 2023-01-24 04:04:30.782945: step: 364/470, loss: 0.09976839274168015 2023-01-24 04:04:31.460012: step: 366/470, loss: 0.04712813347578049 2023-01-24 04:04:32.170460: step: 368/470, loss: 0.004059855360537767 2023-01-24 04:04:32.904751: step: 370/470, loss: 0.04966858774423599 2023-01-24 04:04:33.657773: step: 372/470, loss: 0.05597155913710594 2023-01-24 04:04:34.402149: step: 374/470, loss: 0.03586982935667038 2023-01-24 04:04:35.096535: step: 376/470, loss: 0.07770383358001709 2023-01-24 04:04:35.889801: step: 378/470, loss: 2.023169994354248 2023-01-24 04:04:36.699785: step: 380/470, loss: 0.025740159675478935 2023-01-24 04:04:37.431232: step: 382/470, loss: 0.09446519613265991 2023-01-24 04:04:38.221262: step: 384/470, loss: 0.12673798203468323 2023-01-24 04:04:39.015612: step: 386/470, loss: 0.1375817209482193 2023-01-24 04:04:39.692889: step: 388/470, loss: 0.16045433282852173 2023-01-24 04:04:40.433037: step: 390/470, loss: 0.06736897677183151 2023-01-24 04:04:41.132173: step: 392/470, loss: 0.02480211667716503 2023-01-24 04:04:41.848476: step: 394/470, loss: 0.030097251757979393 2023-01-24 04:04:42.642850: step: 396/470, loss: 0.13856565952301025 2023-01-24 04:04:43.379600: step: 398/470, loss: 0.10427145659923553 2023-01-24 04:04:44.143330: step: 400/470, loss: 0.10263396799564362 2023-01-24 04:04:44.896698: step: 402/470, loss: 0.06050116941332817 2023-01-24 04:04:45.663265: step: 404/470, loss: 0.13885028660297394 2023-01-24 04:04:46.460656: step: 406/470, loss: 0.07277980446815491 2023-01-24 04:04:47.202234: step: 408/470, loss: 3.110565662384033 2023-01-24 04:04:47.915601: step: 410/470, loss: 0.10733214020729065 2023-01-24 04:04:48.591522: step: 412/470, loss: 0.12563331425189972 2023-01-24 04:04:49.333317: step: 414/470, loss: 0.04333885759115219 2023-01-24 04:04:50.200368: step: 416/470, loss: 0.0657099261879921 2023-01-24 04:04:51.045501: step: 418/470, loss: 0.07604394853115082 2023-01-24 04:04:51.794598: step: 420/470, loss: 0.06309117376804352 2023-01-24 04:04:52.562326: step: 422/470, loss: 0.07451029866933823 2023-01-24 04:04:53.366845: step: 424/470, loss: 0.2557286024093628 2023-01-24 04:04:54.108207: step: 426/470, loss: 0.021574387326836586 2023-01-24 04:04:54.852239: step: 428/470, loss: 0.16285766661167145 2023-01-24 04:04:55.646958: step: 430/470, loss: 0.13354262709617615 2023-01-24 04:04:56.314773: step: 432/470, loss: 0.01653607189655304 2023-01-24 04:04:57.018820: step: 434/470, loss: 0.052297040820121765 2023-01-24 04:04:57.784482: step: 436/470, loss: 0.0829712525010109 2023-01-24 04:04:58.633624: step: 438/470, loss: 0.06701302528381348 2023-01-24 04:04:59.323563: step: 440/470, loss: 0.08787357807159424 2023-01-24 04:04:59.980975: step: 442/470, loss: 0.013717160560190678 2023-01-24 04:05:00.713341: step: 444/470, loss: 0.028695937246084213 2023-01-24 04:05:01.466954: step: 446/470, loss: 0.16002628207206726 2023-01-24 04:05:02.189502: step: 448/470, loss: 0.08444713056087494 2023-01-24 04:05:02.879462: step: 450/470, loss: 0.02779926173388958 2023-01-24 04:05:03.563978: step: 452/470, loss: 0.07570282369852066 2023-01-24 04:05:04.297052: step: 454/470, loss: 0.053751636296510696 2023-01-24 04:05:05.023254: step: 456/470, loss: 0.20872299373149872 2023-01-24 04:05:05.772800: step: 458/470, loss: 0.02030782587826252 2023-01-24 04:05:06.381048: step: 460/470, loss: 0.10540497303009033 2023-01-24 04:05:07.102730: step: 462/470, loss: 0.07553648203611374 2023-01-24 04:05:07.889950: step: 464/470, loss: 0.03500121459364891 2023-01-24 04:05:08.654879: step: 466/470, loss: 1.096193790435791 2023-01-24 04:05:09.359409: step: 468/470, loss: 0.020026426762342453 2023-01-24 04:05:10.149595: step: 470/470, loss: 0.012369709089398384 2023-01-24 04:05:10.846561: step: 472/470, loss: 0.17361843585968018 2023-01-24 04:05:11.636716: step: 474/470, loss: 0.07756824046373367 2023-01-24 04:05:12.418957: step: 476/470, loss: 0.5463083386421204 2023-01-24 04:05:13.157374: step: 478/470, loss: 0.46463775634765625 2023-01-24 04:05:13.865711: step: 480/470, loss: 0.05993659421801567 2023-01-24 04:05:14.520653: step: 482/470, loss: 0.469533234834671 2023-01-24 04:05:15.239136: step: 484/470, loss: 0.0658133402466774 2023-01-24 04:05:15.989032: step: 486/470, loss: 0.03351491689682007 2023-01-24 04:05:16.809132: step: 488/470, loss: 1.313239336013794 2023-01-24 04:05:17.480546: step: 490/470, loss: 0.03160230442881584 2023-01-24 04:05:18.211718: step: 492/470, loss: 0.008728746324777603 2023-01-24 04:05:18.954363: step: 494/470, loss: 0.05184914916753769 2023-01-24 04:05:19.704337: step: 496/470, loss: 0.12218936532735825 2023-01-24 04:05:20.412768: step: 498/470, loss: 0.014416754245758057 2023-01-24 04:05:21.119986: step: 500/470, loss: 0.0723162293434143 2023-01-24 04:05:21.758735: step: 502/470, loss: 0.09827473759651184 2023-01-24 04:05:22.514269: step: 504/470, loss: 0.018960921093821526 2023-01-24 04:05:23.299796: step: 506/470, loss: 0.11824572086334229 2023-01-24 04:05:24.028478: step: 508/470, loss: 0.22170805931091309 2023-01-24 04:05:24.815903: step: 510/470, loss: 0.02900109253823757 2023-01-24 04:05:25.616144: step: 512/470, loss: 0.044659458100795746 2023-01-24 04:05:26.408223: step: 514/470, loss: 0.0988159105181694 2023-01-24 04:05:27.113846: step: 516/470, loss: 0.08702954649925232 2023-01-24 04:05:27.840991: step: 518/470, loss: 0.07674989104270935 2023-01-24 04:05:28.577250: step: 520/470, loss: 0.9716522693634033 2023-01-24 04:05:29.281100: step: 522/470, loss: 0.036884721368551254 2023-01-24 04:05:30.033645: step: 524/470, loss: 0.03706406056880951 2023-01-24 04:05:30.805538: step: 526/470, loss: 0.10544019937515259 2023-01-24 04:05:31.605745: step: 528/470, loss: 0.32073140144348145 2023-01-24 04:05:32.365791: step: 530/470, loss: 0.06953152269124985 2023-01-24 04:05:33.148960: step: 532/470, loss: 0.03154062107205391 2023-01-24 04:05:33.911968: step: 534/470, loss: 0.05240052565932274 2023-01-24 04:05:34.708030: step: 536/470, loss: 0.07844150811433792 2023-01-24 04:05:35.532241: step: 538/470, loss: 0.040399134159088135 2023-01-24 04:05:36.246377: step: 540/470, loss: 0.0194169282913208 2023-01-24 04:05:36.998316: step: 542/470, loss: 0.05020745098590851 2023-01-24 04:05:37.742725: step: 544/470, loss: 0.07957773655653 2023-01-24 04:05:38.498795: step: 546/470, loss: 0.08744139224290848 2023-01-24 04:05:39.254381: step: 548/470, loss: 0.026095112785696983 2023-01-24 04:05:39.989559: step: 550/470, loss: 0.2702293395996094 2023-01-24 04:05:40.711838: step: 552/470, loss: 0.10846442729234695 2023-01-24 04:05:41.500585: step: 554/470, loss: 0.3181234300136566 2023-01-24 04:05:42.256984: step: 556/470, loss: 0.0786203145980835 2023-01-24 04:05:42.984937: step: 558/470, loss: 0.0033358277287334204 2023-01-24 04:05:43.720173: step: 560/470, loss: 0.07501276582479477 2023-01-24 04:05:44.528213: step: 562/470, loss: 0.05452758073806763 2023-01-24 04:05:45.235696: step: 564/470, loss: 0.09366501122713089 2023-01-24 04:05:45.928755: step: 566/470, loss: 0.014939019456505775 2023-01-24 04:05:46.575794: step: 568/470, loss: 0.06117936596274376 2023-01-24 04:05:47.347945: step: 570/470, loss: 0.21456541121006012 2023-01-24 04:05:48.109071: step: 572/470, loss: 0.06580285727977753 2023-01-24 04:05:48.877938: step: 574/470, loss: 0.16548193991184235 2023-01-24 04:05:49.632153: step: 576/470, loss: 0.06566378474235535 2023-01-24 04:05:50.328631: step: 578/470, loss: 0.0672176256775856 2023-01-24 04:05:51.140191: step: 580/470, loss: 0.1079394519329071 2023-01-24 04:05:51.923937: step: 582/470, loss: 0.08780059963464737 2023-01-24 04:05:52.725006: step: 584/470, loss: 0.10914134234189987 2023-01-24 04:05:53.361486: step: 586/470, loss: 0.02274099551141262 2023-01-24 04:05:54.024115: step: 588/470, loss: 0.0968480110168457 2023-01-24 04:05:54.723801: step: 590/470, loss: 0.10516564548015594 2023-01-24 04:05:55.511323: step: 592/470, loss: 0.03458961844444275 2023-01-24 04:05:56.313705: step: 594/470, loss: 0.0399974063038826 2023-01-24 04:05:57.015984: step: 596/470, loss: 0.10185714811086655 2023-01-24 04:05:57.802169: step: 598/470, loss: 0.0844685435295105 2023-01-24 04:05:58.574723: step: 600/470, loss: 0.24795423448085785 2023-01-24 04:05:59.378067: step: 602/470, loss: 0.0773269459605217 2023-01-24 04:06:00.196575: step: 604/470, loss: 0.534523606300354 2023-01-24 04:06:01.035483: step: 606/470, loss: 0.09742405265569687 2023-01-24 04:06:01.815738: step: 608/470, loss: 0.06567207723855972 2023-01-24 04:06:02.612336: step: 610/470, loss: 0.14598585665225983 2023-01-24 04:06:03.389108: step: 612/470, loss: 0.1488225907087326 2023-01-24 04:06:04.132621: step: 614/470, loss: 0.038161151111125946 2023-01-24 04:06:04.845814: step: 616/470, loss: 0.05915544927120209 2023-01-24 04:06:05.608590: step: 618/470, loss: 0.1564996987581253 2023-01-24 04:06:06.386319: step: 620/470, loss: 0.05826148763298988 2023-01-24 04:06:07.166237: step: 622/470, loss: 0.011290734633803368 2023-01-24 04:06:07.946453: step: 624/470, loss: 0.09952137619256973 2023-01-24 04:06:08.629429: step: 626/470, loss: 0.030536210164427757 2023-01-24 04:06:09.364196: step: 628/470, loss: 0.10485729575157166 2023-01-24 04:06:10.183598: step: 630/470, loss: 0.05025089532136917 2023-01-24 04:06:10.985244: step: 632/470, loss: 0.022319285199046135 2023-01-24 04:06:11.801800: step: 634/470, loss: 0.24370166659355164 2023-01-24 04:06:12.565946: step: 636/470, loss: 0.060697052627801895 2023-01-24 04:06:13.299665: step: 638/470, loss: 0.11710238456726074 2023-01-24 04:06:14.022570: step: 640/470, loss: 0.028134386986494064 2023-01-24 04:06:14.780930: step: 642/470, loss: 0.1157449260354042 2023-01-24 04:06:15.548195: step: 644/470, loss: 0.5257167816162109 2023-01-24 04:06:16.259507: step: 646/470, loss: 0.09944950044155121 2023-01-24 04:06:17.047581: step: 648/470, loss: 0.019325170665979385 2023-01-24 04:06:17.730949: step: 650/470, loss: 0.2056947946548462 2023-01-24 04:06:18.502315: step: 652/470, loss: 0.09543462097644806 2023-01-24 04:06:19.344451: step: 654/470, loss: 0.06548590958118439 2023-01-24 04:06:20.045941: step: 656/470, loss: 0.06484925746917725 2023-01-24 04:06:20.775809: step: 658/470, loss: 0.3009560704231262 2023-01-24 04:06:21.552438: step: 660/470, loss: 0.08352717012166977 2023-01-24 04:06:22.305513: step: 662/470, loss: 0.0790558010339737 2023-01-24 04:06:23.099844: step: 664/470, loss: 0.18998511135578156 2023-01-24 04:06:23.990394: step: 666/470, loss: 0.15023095905780792 2023-01-24 04:06:24.697002: step: 668/470, loss: 0.14798638224601746 2023-01-24 04:06:25.565674: step: 670/470, loss: 0.03329675644636154 2023-01-24 04:06:26.304358: step: 672/470, loss: 0.0386032834649086 2023-01-24 04:06:27.007336: step: 674/470, loss: 0.373938649892807 2023-01-24 04:06:27.726929: step: 676/470, loss: 0.03455100581049919 2023-01-24 04:06:28.486184: step: 678/470, loss: 0.05225971341133118 2023-01-24 04:06:29.226382: step: 680/470, loss: 0.9394486546516418 2023-01-24 04:06:29.942857: step: 682/470, loss: 0.07498309016227722 2023-01-24 04:06:30.657108: step: 684/470, loss: 0.04044778645038605 2023-01-24 04:06:31.580381: step: 686/470, loss: 0.04468849301338196 2023-01-24 04:06:32.266198: step: 688/470, loss: 0.038324546068906784 2023-01-24 04:06:32.939788: step: 690/470, loss: 0.05637683719396591 2023-01-24 04:06:33.614574: step: 692/470, loss: 0.02472817339003086 2023-01-24 04:06:34.282591: step: 694/470, loss: 0.096453458070755 2023-01-24 04:06:35.034824: step: 696/470, loss: 0.03920508921146393 2023-01-24 04:06:35.680982: step: 698/470, loss: 0.007813246920704842 2023-01-24 04:06:36.404863: step: 700/470, loss: 0.04346537962555885 2023-01-24 04:06:37.174773: step: 702/470, loss: 0.038816504180431366 2023-01-24 04:06:37.841287: step: 704/470, loss: 0.030292358249425888 2023-01-24 04:06:38.552519: step: 706/470, loss: 0.016416076570749283 2023-01-24 04:06:39.262790: step: 708/470, loss: 0.2907312214374542 2023-01-24 04:06:39.969497: step: 710/470, loss: 0.06063879653811455 2023-01-24 04:06:40.779853: step: 712/470, loss: 0.041221536695957184 2023-01-24 04:06:41.519339: step: 714/470, loss: 0.07422397285699844 2023-01-24 04:06:42.220016: step: 716/470, loss: 0.040925491601228714 2023-01-24 04:06:42.900298: step: 718/470, loss: 0.806887149810791 2023-01-24 04:06:43.672320: step: 720/470, loss: 0.036033619195222855 2023-01-24 04:06:44.352543: step: 722/470, loss: 0.024567672982811928 2023-01-24 04:06:45.170589: step: 724/470, loss: 0.12131427973508835 2023-01-24 04:06:45.861878: step: 726/470, loss: 0.10178028047084808 2023-01-24 04:06:46.583741: step: 728/470, loss: 0.055488470941782 2023-01-24 04:06:47.252335: step: 730/470, loss: 0.0426790677011013 2023-01-24 04:06:48.033611: step: 732/470, loss: 0.13151328265666962 2023-01-24 04:06:48.726089: step: 734/470, loss: 0.1700894981622696 2023-01-24 04:06:49.501442: step: 736/470, loss: 0.0167455542832613 2023-01-24 04:06:50.249844: step: 738/470, loss: 0.041223183274269104 2023-01-24 04:06:50.863487: step: 740/470, loss: 1.210181713104248 2023-01-24 04:06:51.545953: step: 742/470, loss: 0.019830353558063507 2023-01-24 04:06:52.257515: step: 744/470, loss: 0.0577569454908371 2023-01-24 04:06:53.131852: step: 746/470, loss: 0.15902186930179596 2023-01-24 04:06:53.905713: step: 748/470, loss: 0.06879295408725739 2023-01-24 04:06:54.620068: step: 750/470, loss: 0.2863670587539673 2023-01-24 04:06:55.404132: step: 752/470, loss: 0.0369873046875 2023-01-24 04:06:56.191650: step: 754/470, loss: 0.06788759678602219 2023-01-24 04:06:56.982127: step: 756/470, loss: 0.06014617532491684 2023-01-24 04:06:57.761669: step: 758/470, loss: 0.12267798185348511 2023-01-24 04:06:58.546861: step: 760/470, loss: 2.288891553878784 2023-01-24 04:06:59.316869: step: 762/470, loss: 0.09062454104423523 2023-01-24 04:07:00.027365: step: 764/470, loss: 0.09816782921552658 2023-01-24 04:07:00.799536: step: 766/470, loss: 0.028900889679789543 2023-01-24 04:07:01.489173: step: 768/470, loss: 0.1546131819486618 2023-01-24 04:07:02.347592: step: 770/470, loss: 0.06838797777891159 2023-01-24 04:07:03.153999: step: 772/470, loss: 0.1268923431634903 2023-01-24 04:07:03.957103: step: 774/470, loss: 0.038736216723918915 2023-01-24 04:07:04.698993: step: 776/470, loss: 0.09154653549194336 2023-01-24 04:07:05.471910: step: 778/470, loss: 0.17325304448604584 2023-01-24 04:07:06.168713: step: 780/470, loss: 0.04542065039277077 2023-01-24 04:07:06.835373: step: 782/470, loss: 0.045644138008356094 2023-01-24 04:07:07.545676: step: 784/470, loss: 0.19901789724826813 2023-01-24 04:07:08.365182: step: 786/470, loss: 0.07332056015729904 2023-01-24 04:07:09.006869: step: 788/470, loss: 0.017355095595121384 2023-01-24 04:07:09.701513: step: 790/470, loss: 0.047633036971092224 2023-01-24 04:07:10.401921: step: 792/470, loss: 0.09777167439460754 2023-01-24 04:07:11.214357: step: 794/470, loss: 0.14233334362506866 2023-01-24 04:07:11.969715: step: 796/470, loss: 0.094623863697052 2023-01-24 04:07:12.729771: step: 798/470, loss: 0.04363381117582321 2023-01-24 04:07:13.392880: step: 800/470, loss: 0.03856759890913963 2023-01-24 04:07:14.181293: step: 802/470, loss: 0.047487128525972366 2023-01-24 04:07:14.964814: step: 804/470, loss: 0.08930141478776932 2023-01-24 04:07:15.859120: step: 806/470, loss: 0.05340230092406273 2023-01-24 04:07:16.575068: step: 808/470, loss: 0.02979603409767151 2023-01-24 04:07:17.306131: step: 810/470, loss: 0.07141705602407455 2023-01-24 04:07:18.007992: step: 812/470, loss: 0.05251197889447212 2023-01-24 04:07:18.708534: step: 814/470, loss: 0.02951197512447834 2023-01-24 04:07:19.471333: step: 816/470, loss: 0.016900768503546715 2023-01-24 04:07:20.199540: step: 818/470, loss: 0.028845131397247314 2023-01-24 04:07:21.017560: step: 820/470, loss: 0.10073421150445938 2023-01-24 04:07:21.753558: step: 822/470, loss: 0.2575160264968872 2023-01-24 04:07:22.517359: step: 824/470, loss: 2.035039186477661 2023-01-24 04:07:23.290569: step: 826/470, loss: 0.03579817712306976 2023-01-24 04:07:24.035726: step: 828/470, loss: 0.04633788764476776 2023-01-24 04:07:24.800704: step: 830/470, loss: 0.4512938857078552 2023-01-24 04:07:25.565872: step: 832/470, loss: 0.06470350176095963 2023-01-24 04:07:26.282622: step: 834/470, loss: 0.07506409287452698 2023-01-24 04:07:26.979420: step: 836/470, loss: 0.026313841342926025 2023-01-24 04:07:27.741456: step: 838/470, loss: 0.020502228289842606 2023-01-24 04:07:28.485005: step: 840/470, loss: 0.7991170883178711 2023-01-24 04:07:29.208495: step: 842/470, loss: 0.1815629005432129 2023-01-24 04:07:29.972139: step: 844/470, loss: 0.08853790909051895 2023-01-24 04:07:30.744147: step: 846/470, loss: 0.05072927847504616 2023-01-24 04:07:31.486256: step: 848/470, loss: 0.20891301333904266 2023-01-24 04:07:32.218621: step: 850/470, loss: 0.09082869440317154 2023-01-24 04:07:32.998325: step: 852/470, loss: 0.032474786043167114 2023-01-24 04:07:33.780196: step: 854/470, loss: 0.18315306305885315 2023-01-24 04:07:34.545595: step: 856/470, loss: 0.16794231534004211 2023-01-24 04:07:35.315889: step: 858/470, loss: 0.03778474032878876 2023-01-24 04:07:36.035216: step: 860/470, loss: 0.07641467452049255 2023-01-24 04:07:36.849528: step: 862/470, loss: 0.11089635640382767 2023-01-24 04:07:37.576538: step: 864/470, loss: 0.2194468230009079 2023-01-24 04:07:38.367513: step: 866/470, loss: 0.028484074398875237 2023-01-24 04:07:39.160440: step: 868/470, loss: 0.19141916930675507 2023-01-24 04:07:39.888625: step: 870/470, loss: 0.23752793669700623 2023-01-24 04:07:40.641424: step: 872/470, loss: 0.08412695676088333 2023-01-24 04:07:41.400777: step: 874/470, loss: 0.055731259286403656 2023-01-24 04:07:42.164181: step: 876/470, loss: 0.05968417599797249 2023-01-24 04:07:42.894253: step: 878/470, loss: 0.14127624034881592 2023-01-24 04:07:43.686186: step: 880/470, loss: 0.0697111114859581 2023-01-24 04:07:44.437678: step: 882/470, loss: 0.21765391528606415 2023-01-24 04:07:45.152517: step: 884/470, loss: 0.02833917923271656 2023-01-24 04:07:45.883593: step: 886/470, loss: 0.27807044982910156 2023-01-24 04:07:46.608078: step: 888/470, loss: 0.25375717878341675 2023-01-24 04:07:47.424368: step: 890/470, loss: 0.08540809154510498 2023-01-24 04:07:48.195238: step: 892/470, loss: 0.04102737829089165 2023-01-24 04:07:48.872219: step: 894/470, loss: 0.2897949516773224 2023-01-24 04:07:49.608615: step: 896/470, loss: 0.04384997487068176 2023-01-24 04:07:50.362778: step: 898/470, loss: 0.06272339075803757 2023-01-24 04:07:51.151983: step: 900/470, loss: 0.13991448283195496 2023-01-24 04:07:51.915207: step: 902/470, loss: 0.03492581099271774 2023-01-24 04:07:52.602424: step: 904/470, loss: 4.457045078277588 2023-01-24 04:07:53.424144: step: 906/470, loss: 0.05300872027873993 2023-01-24 04:07:54.293557: step: 908/470, loss: 0.17280398309230804 2023-01-24 04:07:55.019445: step: 910/470, loss: 0.12466652691364288 2023-01-24 04:07:55.716951: step: 912/470, loss: 0.05572166293859482 2023-01-24 04:07:56.486938: step: 914/470, loss: 0.11022337526082993 2023-01-24 04:07:57.264908: step: 916/470, loss: 0.15707486867904663 2023-01-24 04:07:57.982144: step: 918/470, loss: 0.03694531321525574 2023-01-24 04:07:58.700269: step: 920/470, loss: 0.05348490923643112 2023-01-24 04:07:59.475530: step: 922/470, loss: 0.04882257059216499 2023-01-24 04:08:00.188688: step: 924/470, loss: 0.08925341814756393 2023-01-24 04:08:00.901367: step: 926/470, loss: 0.06599713861942291 2023-01-24 04:08:01.607287: step: 928/470, loss: 0.024995747953653336 2023-01-24 04:08:02.336178: step: 930/470, loss: 0.1284753382205963 2023-01-24 04:08:03.096991: step: 932/470, loss: 0.5296043157577515 2023-01-24 04:08:03.890312: step: 934/470, loss: 0.07279351353645325 2023-01-24 04:08:04.588451: step: 936/470, loss: 0.23871983587741852 2023-01-24 04:08:05.321962: step: 938/470, loss: 0.14109857380390167 2023-01-24 04:08:06.063461: step: 940/470, loss: 0.015602878294885159 2023-01-24 04:08:06.895035: step: 942/470, loss: 0.18439671397209167 ================================================== Loss: 0.165 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34891618043791955, 'r': 0.3197846587315278, 'f1': 0.3337158715871587}, 'combined': 0.24589590538001166, 'epoch': 18} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3630754646871506, 'r': 0.3470317842110613, 'f1': 0.3548723844437277}, 'combined': 0.23658158962915174, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34240737647740777, 'r': 0.3164182017922155, 'f1': 0.3289001821390485}, 'combined': 0.24234750262877255, 'epoch': 18} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36684256153757644, 'r': 0.34393692609094584, 'f1': 0.3550206644131627}, 'combined': 0.2366804429421084, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3338376367222521, 'r': 0.32116827669484216, 'f1': 0.32738042904870757}, 'combined': 0.24122768456220556, 'epoch': 18} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35185000712313164, 'r': 0.34745610693811657, 'f1': 0.34963925309094185}, 'combined': 0.23309283539396117, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25675675675675674, 'r': 0.2714285714285714, 'f1': 0.2638888888888889}, 'combined': 0.17592592592592593, 'epoch': 18} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40625, 'r': 0.2826086956521739, 'f1': 0.33333333333333337}, 'combined': 0.22222222222222224, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3448376840696118, 'r': 0.32586179633143586, 'f1': 0.33508130081300813}, 'combined': 0.2469020111253744, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3617054222274556, 'r': 0.31579665709858623, 'f1': 0.33719560922230973}, 'combined': 0.2247970728148731, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29069767441860467, 'r': 0.35714285714285715, 'f1': 0.32051282051282054}, 'combined': 0.2136752136752137, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30801502714153317, 'r': 0.33957633922055175, 'f1': 0.3230265898361566}, 'combined': 0.23801959251085222, 'epoch': 16} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3547111579078519, 'r': 0.34789634219396426, 'f1': 0.3512707005081637}, 'combined': 0.23418046700544243, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4666666666666667, 'r': 0.2413793103448276, 'f1': 0.3181818181818182}, 'combined': 0.2121212121212121, 'epoch': 16} ****************************** Epoch: 19 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:10:40.774749: step: 2/470, loss: 0.0112921092659235 2023-01-24 04:10:41.574851: step: 4/470, loss: 0.07280120998620987 2023-01-24 04:10:42.398425: step: 6/470, loss: 0.003984284121543169 2023-01-24 04:10:43.294398: step: 8/470, loss: 0.07252970337867737 2023-01-24 04:10:44.105956: step: 10/470, loss: 0.3424142301082611 2023-01-24 04:10:44.902892: step: 12/470, loss: 0.0439504012465477 2023-01-24 04:10:45.634710: step: 14/470, loss: 0.053510237485170364 2023-01-24 04:10:46.370478: step: 16/470, loss: 0.11842609941959381 2023-01-24 04:10:47.095063: step: 18/470, loss: 0.04970640316605568 2023-01-24 04:10:47.858757: step: 20/470, loss: 0.026285069063305855 2023-01-24 04:10:48.582357: step: 22/470, loss: 0.7065211534500122 2023-01-24 04:10:49.358007: step: 24/470, loss: 0.010196846909821033 2023-01-24 04:10:50.106931: step: 26/470, loss: 0.260796457529068 2023-01-24 04:10:50.822260: step: 28/470, loss: 0.067171610891819 2023-01-24 04:10:51.616176: step: 30/470, loss: 0.030061058700084686 2023-01-24 04:10:52.317892: step: 32/470, loss: 0.06347762048244476 2023-01-24 04:10:53.075644: step: 34/470, loss: 0.029726915061473846 2023-01-24 04:10:53.811198: step: 36/470, loss: 0.03759774938225746 2023-01-24 04:10:54.598813: step: 38/470, loss: 0.029163209721446037 2023-01-24 04:10:55.291900: step: 40/470, loss: 0.3536049723625183 2023-01-24 04:10:56.047975: step: 42/470, loss: 0.01684482768177986 2023-01-24 04:10:56.766594: step: 44/470, loss: 0.0455920547246933 2023-01-24 04:10:57.437168: step: 46/470, loss: 0.1695699840784073 2023-01-24 04:10:58.174974: step: 48/470, loss: 0.03408074751496315 2023-01-24 04:10:58.884299: step: 50/470, loss: 0.07696592807769775 2023-01-24 04:10:59.626343: step: 52/470, loss: 0.02807328663766384 2023-01-24 04:11:00.302134: step: 54/470, loss: 0.017425425350666046 2023-01-24 04:11:01.090119: step: 56/470, loss: 0.09263166040182114 2023-01-24 04:11:01.880868: step: 58/470, loss: 0.0654522031545639 2023-01-24 04:11:02.567156: step: 60/470, loss: 0.04144338145852089 2023-01-24 04:11:03.444008: step: 62/470, loss: 0.06384022533893585 2023-01-24 04:11:04.252806: step: 64/470, loss: 0.034149911254644394 2023-01-24 04:11:04.975269: step: 66/470, loss: 0.05268741771578789 2023-01-24 04:11:05.750509: step: 68/470, loss: 0.07051324099302292 2023-01-24 04:11:06.518077: step: 70/470, loss: 0.046795789152383804 2023-01-24 04:11:07.306306: step: 72/470, loss: 0.0369303859770298 2023-01-24 04:11:08.002523: step: 74/470, loss: 0.1018744483590126 2023-01-24 04:11:08.690174: step: 76/470, loss: 0.012132197618484497 2023-01-24 04:11:09.400750: step: 78/470, loss: 0.023422876372933388 2023-01-24 04:11:10.167812: step: 80/470, loss: 0.04017108678817749 2023-01-24 04:11:10.988662: step: 82/470, loss: 0.1267279088497162 2023-01-24 04:11:11.726877: step: 84/470, loss: 0.09292332828044891 2023-01-24 04:11:12.452660: step: 86/470, loss: 0.013976332731544971 2023-01-24 04:11:13.188599: step: 88/470, loss: 0.060683172196149826 2023-01-24 04:11:14.032110: step: 90/470, loss: 0.025250034406781197 2023-01-24 04:11:14.771691: step: 92/470, loss: 0.7996619343757629 2023-01-24 04:11:15.497773: step: 94/470, loss: 0.47412997484207153 2023-01-24 04:11:16.171370: step: 96/470, loss: 0.048665400594472885 2023-01-24 04:11:16.985718: step: 98/470, loss: 0.1110273227095604 2023-01-24 04:11:17.684358: step: 100/470, loss: 0.00958691630512476 2023-01-24 04:11:18.518316: step: 102/470, loss: 0.08475369215011597 2023-01-24 04:11:19.322088: step: 104/470, loss: 0.1207355335354805 2023-01-24 04:11:20.142318: step: 106/470, loss: 0.15649910271167755 2023-01-24 04:11:20.912184: step: 108/470, loss: 0.05611814185976982 2023-01-24 04:11:21.669961: step: 110/470, loss: 0.05005284771323204 2023-01-24 04:11:22.420493: step: 112/470, loss: 0.060722410678863525 2023-01-24 04:11:23.163456: step: 114/470, loss: 0.3491228520870209 2023-01-24 04:11:23.916833: step: 116/470, loss: 0.03365219384431839 2023-01-24 04:11:24.698429: step: 118/470, loss: 0.18201521039009094 2023-01-24 04:11:25.467177: step: 120/470, loss: 0.09609755873680115 2023-01-24 04:11:26.242028: step: 122/470, loss: 0.0444912314414978 2023-01-24 04:11:27.064962: step: 124/470, loss: 0.02517714537680149 2023-01-24 04:11:27.802614: step: 126/470, loss: 0.07190493494272232 2023-01-24 04:11:28.517106: step: 128/470, loss: 0.06636962294578552 2023-01-24 04:11:29.256664: step: 130/470, loss: 0.06659968942403793 2023-01-24 04:11:29.986179: step: 132/470, loss: 0.36475393176078796 2023-01-24 04:11:30.666772: step: 134/470, loss: 0.15585525333881378 2023-01-24 04:11:31.372371: step: 136/470, loss: 0.02720065414905548 2023-01-24 04:11:32.078135: step: 138/470, loss: 0.0021766903810203075 2023-01-24 04:11:32.970242: step: 140/470, loss: 0.023098669946193695 2023-01-24 04:11:33.710012: step: 142/470, loss: 0.09725446254014969 2023-01-24 04:11:34.395207: step: 144/470, loss: 0.0761142149567604 2023-01-24 04:11:35.143375: step: 146/470, loss: 0.03910772502422333 2023-01-24 04:11:35.837161: step: 148/470, loss: 0.045949943363666534 2023-01-24 04:11:36.531069: step: 150/470, loss: 0.18881461024284363 2023-01-24 04:11:37.284768: step: 152/470, loss: 0.08599704504013062 2023-01-24 04:11:37.995059: step: 154/470, loss: 0.09139792621135712 2023-01-24 04:11:38.716884: step: 156/470, loss: 0.023786649107933044 2023-01-24 04:11:39.447981: step: 158/470, loss: 0.0356992706656456 2023-01-24 04:11:40.200598: step: 160/470, loss: 0.20965898036956787 2023-01-24 04:11:40.913542: step: 162/470, loss: 0.05315234512090683 2023-01-24 04:11:41.732701: step: 164/470, loss: 0.09431155771017075 2023-01-24 04:11:42.438109: step: 166/470, loss: 0.08751388639211655 2023-01-24 04:11:43.220598: step: 168/470, loss: 0.035697486251592636 2023-01-24 04:11:44.035840: step: 170/470, loss: 0.08968447893857956 2023-01-24 04:11:44.852963: step: 172/470, loss: 0.04234302416443825 2023-01-24 04:11:45.637338: step: 174/470, loss: 0.03975873067975044 2023-01-24 04:11:46.551064: step: 176/470, loss: 0.03170318529009819 2023-01-24 04:11:47.271000: step: 178/470, loss: 0.032391466200351715 2023-01-24 04:11:48.041084: step: 180/470, loss: 0.0803714469075203 2023-01-24 04:11:48.761599: step: 182/470, loss: 0.06298526376485825 2023-01-24 04:11:49.519954: step: 184/470, loss: 0.10829924792051315 2023-01-24 04:11:50.382027: step: 186/470, loss: 0.046199098229408264 2023-01-24 04:11:51.236969: step: 188/470, loss: 8.178955078125 2023-01-24 04:11:51.967705: step: 190/470, loss: 0.04769464582204819 2023-01-24 04:11:52.712512: step: 192/470, loss: 0.0582704059779644 2023-01-24 04:11:53.499715: step: 194/470, loss: 0.08901997655630112 2023-01-24 04:11:54.194015: step: 196/470, loss: 0.0373103991150856 2023-01-24 04:11:54.885104: step: 198/470, loss: 0.028127502650022507 2023-01-24 04:11:55.659729: step: 200/470, loss: 0.07751333713531494 2023-01-24 04:11:56.430788: step: 202/470, loss: 0.04906482994556427 2023-01-24 04:11:57.102839: step: 204/470, loss: 0.04419788345694542 2023-01-24 04:11:57.878536: step: 206/470, loss: 0.044183794409036636 2023-01-24 04:11:58.637399: step: 208/470, loss: 0.026250679045915604 2023-01-24 04:11:59.392895: step: 210/470, loss: 0.03679383546113968 2023-01-24 04:12:00.138502: step: 212/470, loss: 0.04583572596311569 2023-01-24 04:12:01.019676: step: 214/470, loss: 0.07375669479370117 2023-01-24 04:12:01.796799: step: 216/470, loss: 0.2075904905796051 2023-01-24 04:12:02.569236: step: 218/470, loss: 0.050344791263341904 2023-01-24 04:12:03.255422: step: 220/470, loss: 0.7687535881996155 2023-01-24 04:12:04.000373: step: 222/470, loss: 0.01730995625257492 2023-01-24 04:12:04.793173: step: 224/470, loss: 0.0984438806772232 2023-01-24 04:12:05.524344: step: 226/470, loss: 0.014674522913992405 2023-01-24 04:12:06.250204: step: 228/470, loss: 0.07806924730539322 2023-01-24 04:12:07.059681: step: 230/470, loss: 0.10137531161308289 2023-01-24 04:12:07.817538: step: 232/470, loss: 0.021633736789226532 2023-01-24 04:12:08.573300: step: 234/470, loss: 0.02728520892560482 2023-01-24 04:12:09.230165: step: 236/470, loss: 0.023789340630173683 2023-01-24 04:12:09.919928: step: 238/470, loss: 0.08473862707614899 2023-01-24 04:12:10.631761: step: 240/470, loss: 0.5015853047370911 2023-01-24 04:12:11.357687: step: 242/470, loss: 0.3454964756965637 2023-01-24 04:12:12.181751: step: 244/470, loss: 0.104658342897892 2023-01-24 04:12:12.912177: step: 246/470, loss: 0.0792723074555397 2023-01-24 04:12:13.754492: step: 248/470, loss: 0.22262783348560333 2023-01-24 04:12:14.550481: step: 250/470, loss: 0.04709459841251373 2023-01-24 04:12:15.326596: step: 252/470, loss: 0.11567848920822144 2023-01-24 04:12:16.162723: step: 254/470, loss: 0.022635197266936302 2023-01-24 04:12:16.916731: step: 256/470, loss: 0.032210566103458405 2023-01-24 04:12:17.631308: step: 258/470, loss: 0.09060268104076385 2023-01-24 04:12:18.371229: step: 260/470, loss: 0.06221858039498329 2023-01-24 04:12:19.176927: step: 262/470, loss: 0.04487275332212448 2023-01-24 04:12:19.920719: step: 264/470, loss: 0.145310178399086 2023-01-24 04:12:20.640259: step: 266/470, loss: 0.025644149631261826 2023-01-24 04:12:21.530963: step: 268/470, loss: 0.03474476560950279 2023-01-24 04:12:22.240358: step: 270/470, loss: 0.06833195686340332 2023-01-24 04:12:22.946049: step: 272/470, loss: 0.1358962208032608 2023-01-24 04:12:23.702994: step: 274/470, loss: 0.08055564761161804 2023-01-24 04:12:24.426213: step: 276/470, loss: 0.060504548251628876 2023-01-24 04:12:25.206722: step: 278/470, loss: 0.1443237066268921 2023-01-24 04:12:25.938392: step: 280/470, loss: 0.0553126223385334 2023-01-24 04:12:26.686254: step: 282/470, loss: 0.047990117222070694 2023-01-24 04:12:27.398640: step: 284/470, loss: 0.053962647914886475 2023-01-24 04:12:28.115517: step: 286/470, loss: 0.06690418720245361 2023-01-24 04:12:28.829247: step: 288/470, loss: 0.02675960212945938 2023-01-24 04:12:29.530798: step: 290/470, loss: 0.10729622840881348 2023-01-24 04:12:30.244856: step: 292/470, loss: 0.009620921686291695 2023-01-24 04:12:30.901446: step: 294/470, loss: 0.017848452553153038 2023-01-24 04:12:31.702053: step: 296/470, loss: 0.0679577887058258 2023-01-24 04:12:32.463951: step: 298/470, loss: 0.009503382258117199 2023-01-24 04:12:33.210601: step: 300/470, loss: 0.008858383633196354 2023-01-24 04:12:33.916031: step: 302/470, loss: 6.043306827545166 2023-01-24 04:12:34.662173: step: 304/470, loss: 0.07553528249263763 2023-01-24 04:12:35.345176: step: 306/470, loss: 0.016572916880249977 2023-01-24 04:12:36.076388: step: 308/470, loss: 0.10772523283958435 2023-01-24 04:12:36.824958: step: 310/470, loss: 0.08887186646461487 2023-01-24 04:12:37.527079: step: 312/470, loss: 0.06254518777132034 2023-01-24 04:12:38.260572: step: 314/470, loss: 0.07799230515956879 2023-01-24 04:12:38.968461: step: 316/470, loss: 0.1513335406780243 2023-01-24 04:12:39.706180: step: 318/470, loss: 0.03515966981649399 2023-01-24 04:12:40.494965: step: 320/470, loss: 0.028799178078770638 2023-01-24 04:12:41.225782: step: 322/470, loss: 0.02126074954867363 2023-01-24 04:12:42.016403: step: 324/470, loss: 0.06063699349761009 2023-01-24 04:12:42.779340: step: 326/470, loss: 0.05479899421334267 2023-01-24 04:12:43.797930: step: 328/470, loss: 0.042343273758888245 2023-01-24 04:12:44.550948: step: 330/470, loss: 0.04537075757980347 2023-01-24 04:12:45.257666: step: 332/470, loss: 0.02867543324828148 2023-01-24 04:12:45.982232: step: 334/470, loss: 0.03638402745127678 2023-01-24 04:12:46.802938: step: 336/470, loss: 0.09336365759372711 2023-01-24 04:12:47.553364: step: 338/470, loss: 0.07571189105510712 2023-01-24 04:12:48.264927: step: 340/470, loss: 0.07789936661720276 2023-01-24 04:12:49.058859: step: 342/470, loss: 0.22282201051712036 2023-01-24 04:12:49.791412: step: 344/470, loss: 0.06214971840381622 2023-01-24 04:12:50.548476: step: 346/470, loss: 0.1050262525677681 2023-01-24 04:12:51.286236: step: 348/470, loss: 0.12013573199510574 2023-01-24 04:12:51.987513: step: 350/470, loss: 0.021225430071353912 2023-01-24 04:12:52.700244: step: 352/470, loss: 0.06574123352766037 2023-01-24 04:12:53.502385: step: 354/470, loss: 0.01238292921334505 2023-01-24 04:12:54.280714: step: 356/470, loss: 0.05769104138016701 2023-01-24 04:12:54.983129: step: 358/470, loss: 0.036605287343263626 2023-01-24 04:12:55.833808: step: 360/470, loss: 0.04926653951406479 2023-01-24 04:12:56.544409: step: 362/470, loss: 0.05047943815588951 2023-01-24 04:12:57.350095: step: 364/470, loss: 0.2702573835849762 2023-01-24 04:12:58.022066: step: 366/470, loss: 0.09223837405443192 2023-01-24 04:12:58.763246: step: 368/470, loss: 0.10138967633247375 2023-01-24 04:12:59.478075: step: 370/470, loss: 0.04373805224895477 2023-01-24 04:13:00.184097: step: 372/470, loss: 0.03066779114305973 2023-01-24 04:13:00.925891: step: 374/470, loss: 0.077591672539711 2023-01-24 04:13:01.608000: step: 376/470, loss: 0.196303129196167 2023-01-24 04:13:02.357350: step: 378/470, loss: 0.06719297915697098 2023-01-24 04:13:03.142598: step: 380/470, loss: 0.44437164068222046 2023-01-24 04:13:03.932220: step: 382/470, loss: 0.042737286537885666 2023-01-24 04:13:04.654271: step: 384/470, loss: 0.0069000255316495895 2023-01-24 04:13:05.416727: step: 386/470, loss: 0.05957155302166939 2023-01-24 04:13:06.105670: step: 388/470, loss: 0.08688811212778091 2023-01-24 04:13:06.878282: step: 390/470, loss: 0.05815081670880318 2023-01-24 04:13:07.575227: step: 392/470, loss: 0.016378343105316162 2023-01-24 04:13:08.290998: step: 394/470, loss: 0.035406243056058884 2023-01-24 04:13:09.027291: step: 396/470, loss: 0.008739456534385681 2023-01-24 04:13:09.755248: step: 398/470, loss: 0.18853512406349182 2023-01-24 04:13:10.476174: step: 400/470, loss: 0.03277993202209473 2023-01-24 04:13:11.238168: step: 402/470, loss: 0.13732224702835083 2023-01-24 04:13:11.895527: step: 404/470, loss: 0.008723941631615162 2023-01-24 04:13:12.575373: step: 406/470, loss: 0.06874186545610428 2023-01-24 04:13:13.324057: step: 408/470, loss: 0.7236286401748657 2023-01-24 04:13:14.024600: step: 410/470, loss: 0.28471216559410095 2023-01-24 04:13:14.784055: step: 412/470, loss: 0.08407147228717804 2023-01-24 04:13:15.529902: step: 414/470, loss: 0.038917530328035355 2023-01-24 04:13:16.231433: step: 416/470, loss: 0.08968863636255264 2023-01-24 04:13:16.950294: step: 418/470, loss: 0.1135501116514206 2023-01-24 04:13:17.675459: step: 420/470, loss: 0.051190122961997986 2023-01-24 04:13:18.370606: step: 422/470, loss: 0.06608197093009949 2023-01-24 04:13:19.085610: step: 424/470, loss: 0.005093181971460581 2023-01-24 04:13:19.818904: step: 426/470, loss: 0.04653076454997063 2023-01-24 04:13:20.605228: step: 428/470, loss: 0.01797359250485897 2023-01-24 04:13:21.309944: step: 430/470, loss: 0.12045972049236298 2023-01-24 04:13:22.030699: step: 432/470, loss: 0.04225793853402138 2023-01-24 04:13:22.755724: step: 434/470, loss: 0.3560810685157776 2023-01-24 04:13:23.420571: step: 436/470, loss: 0.03933987766504288 2023-01-24 04:13:24.131961: step: 438/470, loss: 0.10735952109098434 2023-01-24 04:13:24.815401: step: 440/470, loss: 0.04517611861228943 2023-01-24 04:13:25.586816: step: 442/470, loss: 0.04805009067058563 2023-01-24 04:13:26.303200: step: 444/470, loss: 0.034436922520399094 2023-01-24 04:13:27.096554: step: 446/470, loss: 0.0980946347117424 2023-01-24 04:13:27.817058: step: 448/470, loss: 0.02357635833323002 2023-01-24 04:13:28.565808: step: 450/470, loss: 0.40950459241867065 2023-01-24 04:13:29.268718: step: 452/470, loss: 0.09088637679815292 2023-01-24 04:13:30.005520: step: 454/470, loss: 0.06179659068584442 2023-01-24 04:13:30.670968: step: 456/470, loss: 0.024704836308956146 2023-01-24 04:13:31.426891: step: 458/470, loss: 0.013954793103039265 2023-01-24 04:13:32.154298: step: 460/470, loss: 0.012341751717031002 2023-01-24 04:13:32.825685: step: 462/470, loss: 0.4283565282821655 2023-01-24 04:13:33.543943: step: 464/470, loss: 0.12819719314575195 2023-01-24 04:13:34.348501: step: 466/470, loss: 0.10749958455562592 2023-01-24 04:13:35.057777: step: 468/470, loss: 0.0625910684466362 2023-01-24 04:13:35.820437: step: 470/470, loss: 0.0827048048377037 2023-01-24 04:13:36.480772: step: 472/470, loss: 0.05317022651433945 2023-01-24 04:13:37.199955: step: 474/470, loss: 0.020091721788048744 2023-01-24 04:13:37.901210: step: 476/470, loss: 0.06902855634689331 2023-01-24 04:13:38.603821: step: 478/470, loss: 0.044647008180618286 2023-01-24 04:13:39.326956: step: 480/470, loss: 0.061124030500650406 2023-01-24 04:13:40.077200: step: 482/470, loss: 0.09614740312099457 2023-01-24 04:13:40.818080: step: 484/470, loss: 0.09537113457918167 2023-01-24 04:13:41.679616: step: 486/470, loss: 0.1970382034778595 2023-01-24 04:13:42.431289: step: 488/470, loss: 0.08817904442548752 2023-01-24 04:13:43.212170: step: 490/470, loss: 0.19899365305900574 2023-01-24 04:13:43.943517: step: 492/470, loss: 0.010458926670253277 2023-01-24 04:13:44.664397: step: 494/470, loss: 0.09982287138700485 2023-01-24 04:13:45.366933: step: 496/470, loss: 0.02451414242386818 2023-01-24 04:13:46.030509: step: 498/470, loss: 0.05906842648983002 2023-01-24 04:13:46.829490: step: 500/470, loss: 0.02308048866689205 2023-01-24 04:13:47.680118: step: 502/470, loss: 0.4586753845214844 2023-01-24 04:13:48.466832: step: 504/470, loss: 0.001469065435230732 2023-01-24 04:13:49.181384: step: 506/470, loss: 0.39444416761398315 2023-01-24 04:13:49.880724: step: 508/470, loss: 0.07880131155252457 2023-01-24 04:13:50.634090: step: 510/470, loss: 0.34225597977638245 2023-01-24 04:13:51.375772: step: 512/470, loss: 0.07212910801172256 2023-01-24 04:13:52.208289: step: 514/470, loss: 0.0419406034052372 2023-01-24 04:13:52.893924: step: 516/470, loss: 0.24012605845928192 2023-01-24 04:13:53.618303: step: 518/470, loss: 0.02023383416235447 2023-01-24 04:13:54.332303: step: 520/470, loss: 0.2502630650997162 2023-01-24 04:13:55.067938: step: 522/470, loss: 0.2373182326555252 2023-01-24 04:13:55.789671: step: 524/470, loss: 0.07686462253332138 2023-01-24 04:13:56.548416: step: 526/470, loss: 0.11860328167676926 2023-01-24 04:13:57.296663: step: 528/470, loss: 0.07658058404922485 2023-01-24 04:13:58.079411: step: 530/470, loss: 0.033899374306201935 2023-01-24 04:13:58.772898: step: 532/470, loss: 0.010917163453996181 2023-01-24 04:13:59.489147: step: 534/470, loss: 0.017348704859614372 2023-01-24 04:14:00.234459: step: 536/470, loss: 0.32592514157295227 2023-01-24 04:14:00.960921: step: 538/470, loss: 0.12130807340145111 2023-01-24 04:14:01.631871: step: 540/470, loss: 0.06882882863283157 2023-01-24 04:14:02.507597: step: 542/470, loss: 0.009247006848454475 2023-01-24 04:14:03.318320: step: 544/470, loss: 0.05684985592961311 2023-01-24 04:14:04.153839: step: 546/470, loss: 0.08473062515258789 2023-01-24 04:14:04.909056: step: 548/470, loss: 0.0109866289421916 2023-01-24 04:14:05.604434: step: 550/470, loss: 0.04165481775999069 2023-01-24 04:14:06.375258: step: 552/470, loss: 0.01383355911821127 2023-01-24 04:14:07.061459: step: 554/470, loss: 0.08635249733924866 2023-01-24 04:14:07.807839: step: 556/470, loss: 0.05461049824953079 2023-01-24 04:14:08.467162: step: 558/470, loss: 0.015371817164123058 2023-01-24 04:14:09.124822: step: 560/470, loss: 0.6815204620361328 2023-01-24 04:14:09.860970: step: 562/470, loss: 0.09630201011896133 2023-01-24 04:14:10.519886: step: 564/470, loss: 0.05369073897600174 2023-01-24 04:14:11.206695: step: 566/470, loss: 0.058216556906700134 2023-01-24 04:14:11.965326: step: 568/470, loss: 0.02524617128074169 2023-01-24 04:14:12.649114: step: 570/470, loss: 0.04199972376227379 2023-01-24 04:14:13.295319: step: 572/470, loss: 0.015193572267889977 2023-01-24 04:14:14.036265: step: 574/470, loss: 0.039925094693899155 2023-01-24 04:14:14.829644: step: 576/470, loss: 0.13338324427604675 2023-01-24 04:14:15.538462: step: 578/470, loss: 0.062471386045217514 2023-01-24 04:14:16.291854: step: 580/470, loss: 0.09208384156227112 2023-01-24 04:14:17.027623: step: 582/470, loss: 0.030092593282461166 2023-01-24 04:14:17.739644: step: 584/470, loss: 0.06586410105228424 2023-01-24 04:14:18.470696: step: 586/470, loss: 0.02899825945496559 2023-01-24 04:14:19.217904: step: 588/470, loss: 0.10743267834186554 2023-01-24 04:14:19.919067: step: 590/470, loss: 0.10501951724290848 2023-01-24 04:14:20.644186: step: 592/470, loss: 1.813976526260376 2023-01-24 04:14:21.392119: step: 594/470, loss: 0.06850782036781311 2023-01-24 04:14:22.154817: step: 596/470, loss: 0.08768956363201141 2023-01-24 04:14:22.924149: step: 598/470, loss: 0.0878264382481575 2023-01-24 04:14:23.656330: step: 600/470, loss: 0.022833187133073807 2023-01-24 04:14:24.383179: step: 602/470, loss: 0.07362283021211624 2023-01-24 04:14:25.174250: step: 604/470, loss: 0.02391095831990242 2023-01-24 04:14:25.993947: step: 606/470, loss: 0.0804729014635086 2023-01-24 04:14:26.683088: step: 608/470, loss: 0.012506152503192425 2023-01-24 04:14:27.409664: step: 610/470, loss: 0.134328231215477 2023-01-24 04:14:28.126370: step: 612/470, loss: 0.10547507554292679 2023-01-24 04:14:28.898359: step: 614/470, loss: 0.12143544852733612 2023-01-24 04:14:29.684514: step: 616/470, loss: 0.08066742867231369 2023-01-24 04:14:30.441240: step: 618/470, loss: 0.03654317930340767 2023-01-24 04:14:31.240645: step: 620/470, loss: 0.03914722800254822 2023-01-24 04:14:32.001512: step: 622/470, loss: 0.05489708110690117 2023-01-24 04:14:32.828002: step: 624/470, loss: 0.06426960974931717 2023-01-24 04:14:33.556125: step: 626/470, loss: 0.06736937165260315 2023-01-24 04:14:34.284105: step: 628/470, loss: 0.11102497577667236 2023-01-24 04:14:35.035431: step: 630/470, loss: 0.012651097029447556 2023-01-24 04:14:35.760323: step: 632/470, loss: 2.867856502532959 2023-01-24 04:14:36.443778: step: 634/470, loss: 0.15367330610752106 2023-01-24 04:14:37.199489: step: 636/470, loss: 0.0965505912899971 2023-01-24 04:14:37.968729: step: 638/470, loss: 0.015126102603971958 2023-01-24 04:14:38.685479: step: 640/470, loss: 0.11571274697780609 2023-01-24 04:14:39.374266: step: 642/470, loss: 0.057073745876550674 2023-01-24 04:14:40.174572: step: 644/470, loss: 0.13571985065937042 2023-01-24 04:14:41.000540: step: 646/470, loss: 0.14553725719451904 2023-01-24 04:14:41.777924: step: 648/470, loss: 0.17457713186740875 2023-01-24 04:14:42.505914: step: 650/470, loss: 0.045833222568035126 2023-01-24 04:14:43.233692: step: 652/470, loss: 0.08109059184789658 2023-01-24 04:14:43.938833: step: 654/470, loss: 0.06932274252176285 2023-01-24 04:14:44.704823: step: 656/470, loss: 0.020560914650559425 2023-01-24 04:14:45.429757: step: 658/470, loss: 0.08866900950670242 2023-01-24 04:14:46.204477: step: 660/470, loss: 0.14196772873401642 2023-01-24 04:14:46.935380: step: 662/470, loss: 0.025075623765587807 2023-01-24 04:14:47.638992: step: 664/470, loss: 0.05653095617890358 2023-01-24 04:14:48.446625: step: 666/470, loss: 0.0553109236061573 2023-01-24 04:14:49.242580: step: 668/470, loss: 0.014820709824562073 2023-01-24 04:14:49.995661: step: 670/470, loss: 0.1057378500699997 2023-01-24 04:14:50.774756: step: 672/470, loss: 0.04209692403674126 2023-01-24 04:14:51.524609: step: 674/470, loss: 0.009847085922956467 2023-01-24 04:14:52.309359: step: 676/470, loss: 0.0047174664214253426 2023-01-24 04:14:52.983912: step: 678/470, loss: 0.04511541128158569 2023-01-24 04:14:53.771600: step: 680/470, loss: 0.04158242791891098 2023-01-24 04:14:54.483027: step: 682/470, loss: 0.02903454191982746 2023-01-24 04:14:55.240301: step: 684/470, loss: 0.015415815636515617 2023-01-24 04:14:55.902549: step: 686/470, loss: 0.050201885402202606 2023-01-24 04:14:56.802941: step: 688/470, loss: 0.05017707124352455 2023-01-24 04:14:57.486881: step: 690/470, loss: 0.09871582686901093 2023-01-24 04:14:58.232734: step: 692/470, loss: 0.12438881397247314 2023-01-24 04:14:58.962128: step: 694/470, loss: 1.1262582540512085 2023-01-24 04:14:59.707341: step: 696/470, loss: 0.15150487422943115 2023-01-24 04:15:00.437301: step: 698/470, loss: 0.8048862814903259 2023-01-24 04:15:01.152848: step: 700/470, loss: 0.04326051101088524 2023-01-24 04:15:01.922852: step: 702/470, loss: 0.05587514489889145 2023-01-24 04:15:02.698977: step: 704/470, loss: 0.07721059769392014 2023-01-24 04:15:03.475319: step: 706/470, loss: 0.06396961212158203 2023-01-24 04:15:04.257279: step: 708/470, loss: 0.057133980095386505 2023-01-24 04:15:05.153106: step: 710/470, loss: 0.14436408877372742 2023-01-24 04:15:05.921766: step: 712/470, loss: 0.058494627475738525 2023-01-24 04:15:06.699853: step: 714/470, loss: 0.6884530782699585 2023-01-24 04:15:07.474964: step: 716/470, loss: 0.07548058032989502 2023-01-24 04:15:08.196147: step: 718/470, loss: 0.020441729575395584 2023-01-24 04:15:08.915241: step: 720/470, loss: 0.23590579628944397 2023-01-24 04:15:09.578685: step: 722/470, loss: 0.05578876659274101 2023-01-24 04:15:10.305461: step: 724/470, loss: 0.12205921858549118 2023-01-24 04:15:10.993503: step: 726/470, loss: 0.08680590242147446 2023-01-24 04:15:11.741150: step: 728/470, loss: 0.07038237154483795 2023-01-24 04:15:12.494977: step: 730/470, loss: 0.0950479656457901 2023-01-24 04:15:13.278716: step: 732/470, loss: 0.019861508160829544 2023-01-24 04:15:13.999140: step: 734/470, loss: 0.014996448531746864 2023-01-24 04:15:14.713051: step: 736/470, loss: 0.10738176852464676 2023-01-24 04:15:15.476778: step: 738/470, loss: 0.07451841235160828 2023-01-24 04:15:16.265827: step: 740/470, loss: 0.1438053697347641 2023-01-24 04:15:16.976014: step: 742/470, loss: 0.11218611896038055 2023-01-24 04:15:17.697655: step: 744/470, loss: 0.06557218730449677 2023-01-24 04:15:18.401360: step: 746/470, loss: 0.04441092908382416 2023-01-24 04:15:19.202866: step: 748/470, loss: 0.09895999729633331 2023-01-24 04:15:19.983732: step: 750/470, loss: 0.22363828122615814 2023-01-24 04:15:20.793846: step: 752/470, loss: 0.029841436073184013 2023-01-24 04:15:21.499878: step: 754/470, loss: 0.14877954125404358 2023-01-24 04:15:22.235831: step: 756/470, loss: 0.025677978992462158 2023-01-24 04:15:22.858578: step: 758/470, loss: 0.05344592407345772 2023-01-24 04:15:23.557022: step: 760/470, loss: 0.029401803389191628 2023-01-24 04:15:24.236604: step: 762/470, loss: 0.03349433094263077 2023-01-24 04:15:24.964852: step: 764/470, loss: 0.08564404398202896 2023-01-24 04:15:25.687058: step: 766/470, loss: 0.04291162267327309 2023-01-24 04:15:26.373329: step: 768/470, loss: 0.02198793552815914 2023-01-24 04:15:27.106715: step: 770/470, loss: 0.04124632105231285 2023-01-24 04:15:27.869216: step: 772/470, loss: 0.07800045609474182 2023-01-24 04:15:28.663992: step: 774/470, loss: 0.008934520184993744 2023-01-24 04:15:29.351721: step: 776/470, loss: 0.006552144419401884 2023-01-24 04:15:30.228076: step: 778/470, loss: 0.008206459693610668 2023-01-24 04:15:30.895146: step: 780/470, loss: 0.07419215887784958 2023-01-24 04:15:31.649662: step: 782/470, loss: 0.07244870811700821 2023-01-24 04:15:32.450741: step: 784/470, loss: 0.010627111420035362 2023-01-24 04:15:33.264977: step: 786/470, loss: 0.02431710809469223 2023-01-24 04:15:34.014510: step: 788/470, loss: 0.053509872406721115 2023-01-24 04:15:34.770768: step: 790/470, loss: 0.030945895239710808 2023-01-24 04:15:35.519105: step: 792/470, loss: 0.03115873783826828 2023-01-24 04:15:36.275804: step: 794/470, loss: 0.07354782521724701 2023-01-24 04:15:36.997742: step: 796/470, loss: 0.04989173263311386 2023-01-24 04:15:37.746421: step: 798/470, loss: 0.047723811119794846 2023-01-24 04:15:38.517886: step: 800/470, loss: 0.15342743694782257 2023-01-24 04:15:39.275548: step: 802/470, loss: 0.018950236961245537 2023-01-24 04:15:39.969630: step: 804/470, loss: 0.12344154715538025 2023-01-24 04:15:40.722778: step: 806/470, loss: 0.04489344358444214 2023-01-24 04:15:41.431605: step: 808/470, loss: 0.013071177527308464 2023-01-24 04:15:42.174101: step: 810/470, loss: 0.1920340359210968 2023-01-24 04:15:42.877009: step: 812/470, loss: 0.041845425963401794 2023-01-24 04:15:43.644449: step: 814/470, loss: 0.031873006373643875 2023-01-24 04:15:44.339724: step: 816/470, loss: 0.008344985544681549 2023-01-24 04:15:45.121936: step: 818/470, loss: 0.4394215941429138 2023-01-24 04:15:45.877440: step: 820/470, loss: 0.046475548297166824 2023-01-24 04:15:46.769115: step: 822/470, loss: 0.023420801386237144 2023-01-24 04:15:47.441865: step: 824/470, loss: 0.046169888228178024 2023-01-24 04:15:48.182852: step: 826/470, loss: 0.0012928505893796682 2023-01-24 04:15:48.970601: step: 828/470, loss: 0.06589809060096741 2023-01-24 04:15:49.672762: step: 830/470, loss: 0.08819597214460373 2023-01-24 04:15:50.456854: step: 832/470, loss: 0.04559420421719551 2023-01-24 04:15:51.164506: step: 834/470, loss: 0.01821593940258026 2023-01-24 04:15:51.872978: step: 836/470, loss: 0.1270139217376709 2023-01-24 04:15:52.660481: step: 838/470, loss: 0.0652085393667221 2023-01-24 04:15:53.367505: step: 840/470, loss: 0.190468892455101 2023-01-24 04:15:54.150646: step: 842/470, loss: 0.4353700280189514 2023-01-24 04:15:54.938826: step: 844/470, loss: 0.08427219092845917 2023-01-24 04:15:55.705346: step: 846/470, loss: 0.044107187539339066 2023-01-24 04:15:56.404939: step: 848/470, loss: 0.025654705241322517 2023-01-24 04:15:57.193672: step: 850/470, loss: 0.06636069715023041 2023-01-24 04:15:57.959107: step: 852/470, loss: 0.10997164994478226 2023-01-24 04:15:58.665223: step: 854/470, loss: 0.006697851698845625 2023-01-24 04:15:59.457236: step: 856/470, loss: 0.07333116978406906 2023-01-24 04:16:00.124346: step: 858/470, loss: 0.05577976629137993 2023-01-24 04:16:00.816774: step: 860/470, loss: 0.5426846742630005 2023-01-24 04:16:01.644495: step: 862/470, loss: 0.11723057180643082 2023-01-24 04:16:02.445393: step: 864/470, loss: 0.2411772608757019 2023-01-24 04:16:03.128399: step: 866/470, loss: 0.05249849334359169 2023-01-24 04:16:03.815423: step: 868/470, loss: 0.03625642880797386 2023-01-24 04:16:04.597875: step: 870/470, loss: 0.04509961977601051 2023-01-24 04:16:05.358639: step: 872/470, loss: 0.013741428032517433 2023-01-24 04:16:06.080886: step: 874/470, loss: 0.04218994453549385 2023-01-24 04:16:06.864684: step: 876/470, loss: 0.055986031889915466 2023-01-24 04:16:07.682820: step: 878/470, loss: 0.048953864723443985 2023-01-24 04:16:08.370161: step: 880/470, loss: 0.09331956505775452 2023-01-24 04:16:09.145178: step: 882/470, loss: 0.6611769199371338 2023-01-24 04:16:09.841841: step: 884/470, loss: 0.07350049912929535 2023-01-24 04:16:10.646108: step: 886/470, loss: 0.18695667386054993 2023-01-24 04:16:11.466001: step: 888/470, loss: 0.07724187523126602 2023-01-24 04:16:12.222537: step: 890/470, loss: 0.06299719214439392 2023-01-24 04:16:13.033536: step: 892/470, loss: 0.10604791343212128 2023-01-24 04:16:13.769562: step: 894/470, loss: 0.04927706718444824 2023-01-24 04:16:14.504203: step: 896/470, loss: 0.1778610497713089 2023-01-24 04:16:15.208367: step: 898/470, loss: 0.10980663448572159 2023-01-24 04:16:15.973549: step: 900/470, loss: 0.05071067810058594 2023-01-24 04:16:16.728713: step: 902/470, loss: 0.14896638691425323 2023-01-24 04:16:17.483221: step: 904/470, loss: 0.09147992730140686 2023-01-24 04:16:18.231734: step: 906/470, loss: 0.054453279823064804 2023-01-24 04:16:18.995566: step: 908/470, loss: 0.11247234046459198 2023-01-24 04:16:19.808343: step: 910/470, loss: 0.05727868527173996 2023-01-24 04:16:20.679700: step: 912/470, loss: 0.050508055835962296 2023-01-24 04:16:21.375971: step: 914/470, loss: 0.03650575131177902 2023-01-24 04:16:22.219652: step: 916/470, loss: 0.08102092146873474 2023-01-24 04:16:22.973830: step: 918/470, loss: 0.06759768724441528 2023-01-24 04:16:23.735878: step: 920/470, loss: 0.1721310019493103 2023-01-24 04:16:24.552661: step: 922/470, loss: 0.03154829144477844 2023-01-24 04:16:25.363920: step: 924/470, loss: 0.21797621250152588 2023-01-24 04:16:26.012464: step: 926/470, loss: 0.06564341485500336 2023-01-24 04:16:26.929127: step: 928/470, loss: 0.09147854149341583 2023-01-24 04:16:27.714713: step: 930/470, loss: 0.014387188479304314 2023-01-24 04:16:28.418489: step: 932/470, loss: 0.08677081763744354 2023-01-24 04:16:29.144860: step: 934/470, loss: 0.003573360852897167 2023-01-24 04:16:29.904536: step: 936/470, loss: 0.19656676054000854 2023-01-24 04:16:30.672095: step: 938/470, loss: 0.19232219457626343 2023-01-24 04:16:31.439706: step: 940/470, loss: 0.13135120272636414 2023-01-24 04:16:32.042061: step: 942/470, loss: 0.04300255328416824 ================================================== Loss: 0.135 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3260774491094148, 'r': 0.32422122074636306, 'f1': 0.3251466856961624}, 'combined': 0.2395817684076986, 'epoch': 19} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35730778004987124, 'r': 0.3418473472592518, 'f1': 0.34940662520847365}, 'combined': 0.23293775013898238, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32308049602488764, 'r': 0.32246744005520095, 'f1': 0.32277367694034353}, 'combined': 0.2378332356402531, 'epoch': 19} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36072944901631093, 'r': 0.346161528959883, 'f1': 0.35329537793746646}, 'combined': 0.23553025195831093, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3104104907773386, 'r': 0.3251358461273072, 'f1': 0.3176025781447468}, 'combined': 0.23402295231718184, 'epoch': 19} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35474149093497537, 'r': 0.35576478369728776, 'f1': 0.3552524004274405}, 'combined': 0.2368349336182936, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.35714285714285715, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 19} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46875, 'r': 0.32608695652173914, 'f1': 0.38461538461538464}, 'combined': 0.2564102564102564, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 19} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3260774491094148, 'r': 0.32422122074636306, 'f1': 0.3251466856961624}, 'combined': 0.2395817684076986, 'epoch': 19} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35730778004987124, 'r': 0.3418473472592518, 'f1': 0.34940662520847365}, 'combined': 0.23293775013898238, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.35714285714285715, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30801502714153317, 'r': 0.33957633922055175, 'f1': 0.3230265898361566}, 'combined': 0.23801959251085222, 'epoch': 16} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3547111579078519, 'r': 0.34789634219396426, 'f1': 0.3512707005081637}, 'combined': 0.23418046700544243, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4666666666666667, 'r': 0.2413793103448276, 'f1': 0.3181818181818182}, 'combined': 0.2121212121212121, 'epoch': 16} ****************************** Epoch: 20 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:19:15.526168: step: 2/470, loss: 0.0692053884267807 2023-01-24 04:19:16.217663: step: 4/470, loss: 0.011433595791459084 2023-01-24 04:19:16.909671: step: 6/470, loss: 0.01590590551495552 2023-01-24 04:19:17.678777: step: 8/470, loss: 0.33525508642196655 2023-01-24 04:19:18.383793: step: 10/470, loss: 0.005369840655475855 2023-01-24 04:19:19.112140: step: 12/470, loss: 0.0471285842359066 2023-01-24 04:19:19.842457: step: 14/470, loss: 0.06779136508703232 2023-01-24 04:19:20.648597: step: 16/470, loss: 0.08163978904485703 2023-01-24 04:19:21.396044: step: 18/470, loss: 0.03811454027891159 2023-01-24 04:19:22.136241: step: 20/470, loss: 0.0674220472574234 2023-01-24 04:19:22.864672: step: 22/470, loss: 0.08177307993173599 2023-01-24 04:19:23.512818: step: 24/470, loss: 0.18022318184375763 2023-01-24 04:19:24.233395: step: 26/470, loss: 0.00783989205956459 2023-01-24 04:19:24.955998: step: 28/470, loss: 0.10600445419549942 2023-01-24 04:19:25.623596: step: 30/470, loss: 0.026321159675717354 2023-01-24 04:19:26.375739: step: 32/470, loss: 0.022986600175499916 2023-01-24 04:19:27.156004: step: 34/470, loss: 0.11820484697818756 2023-01-24 04:19:27.935915: step: 36/470, loss: 0.011436332017183304 2023-01-24 04:19:28.626569: step: 38/470, loss: 0.020493147894740105 2023-01-24 04:19:29.394088: step: 40/470, loss: 0.05821879953145981 2023-01-24 04:19:30.148415: step: 42/470, loss: 0.0022761637810617685 2023-01-24 04:19:30.809685: step: 44/470, loss: 0.036995094269514084 2023-01-24 04:19:31.599943: step: 46/470, loss: 0.03528263792395592 2023-01-24 04:19:32.376400: step: 48/470, loss: 0.09343452751636505 2023-01-24 04:19:33.091831: step: 50/470, loss: 0.06097423657774925 2023-01-24 04:19:33.850467: step: 52/470, loss: 0.10285371541976929 2023-01-24 04:19:34.453241: step: 54/470, loss: 0.00979323498904705 2023-01-24 04:19:35.181037: step: 56/470, loss: 0.062471531331539154 2023-01-24 04:19:35.915513: step: 58/470, loss: 0.02455725520849228 2023-01-24 04:19:36.627332: step: 60/470, loss: 0.0015147414524108171 2023-01-24 04:19:37.357494: step: 62/470, loss: 0.01592283509671688 2023-01-24 04:19:38.078107: step: 64/470, loss: 0.09599973261356354 2023-01-24 04:19:38.806592: step: 66/470, loss: 0.033546753227710724 2023-01-24 04:19:39.582651: step: 68/470, loss: 0.04527419060468674 2023-01-24 04:19:40.248530: step: 70/470, loss: 0.01420632191002369 2023-01-24 04:19:41.041920: step: 72/470, loss: 0.03015095181763172 2023-01-24 04:19:41.859337: step: 74/470, loss: 0.021656258031725883 2023-01-24 04:19:42.697806: step: 76/470, loss: 0.05661938339471817 2023-01-24 04:19:43.490974: step: 78/470, loss: 0.08553628623485565 2023-01-24 04:19:44.232123: step: 80/470, loss: 0.017559433355927467 2023-01-24 04:19:44.955201: step: 82/470, loss: 0.17091146111488342 2023-01-24 04:19:45.666107: step: 84/470, loss: 0.022843779996037483 2023-01-24 04:19:46.362028: step: 86/470, loss: 0.04321271553635597 2023-01-24 04:19:47.053776: step: 88/470, loss: 0.01985297165811062 2023-01-24 04:19:47.769110: step: 90/470, loss: 0.05657010152935982 2023-01-24 04:19:48.518388: step: 92/470, loss: 0.039768945425748825 2023-01-24 04:19:49.332287: step: 94/470, loss: 0.03442995622754097 2023-01-24 04:19:50.086700: step: 96/470, loss: 0.06425513327121735 2023-01-24 04:19:50.782351: step: 98/470, loss: 0.061561282724142075 2023-01-24 04:19:51.461632: step: 100/470, loss: 0.04602031782269478 2023-01-24 04:19:52.126827: step: 102/470, loss: 0.09504874795675278 2023-01-24 04:19:52.818294: step: 104/470, loss: 0.11075009405612946 2023-01-24 04:19:53.537137: step: 106/470, loss: 0.3181428015232086 2023-01-24 04:19:54.249251: step: 108/470, loss: 0.006885759066790342 2023-01-24 04:19:54.991335: step: 110/470, loss: 0.012126323767006397 2023-01-24 04:19:55.701940: step: 112/470, loss: 0.04041305184364319 2023-01-24 04:19:56.412721: step: 114/470, loss: 0.053244318813085556 2023-01-24 04:19:57.150099: step: 116/470, loss: 0.06406726688146591 2023-01-24 04:19:57.916424: step: 118/470, loss: 0.276273250579834 2023-01-24 04:19:58.684684: step: 120/470, loss: 0.017418205738067627 2023-01-24 04:19:59.441801: step: 122/470, loss: 0.0642717108130455 2023-01-24 04:20:00.233598: step: 124/470, loss: 0.06656879186630249 2023-01-24 04:20:01.050732: step: 126/470, loss: 0.038813501596450806 2023-01-24 04:20:01.762402: step: 128/470, loss: 0.037797726690769196 2023-01-24 04:20:02.594833: step: 130/470, loss: 0.012955233454704285 2023-01-24 04:20:03.318484: step: 132/470, loss: 0.14164546132087708 2023-01-24 04:20:04.095026: step: 134/470, loss: 0.026768483221530914 2023-01-24 04:20:04.837592: step: 136/470, loss: 0.518687903881073 2023-01-24 04:20:05.601644: step: 138/470, loss: 0.08869636803865433 2023-01-24 04:20:06.319002: step: 140/470, loss: 0.4429130554199219 2023-01-24 04:20:07.072545: step: 142/470, loss: 0.03325126692652702 2023-01-24 04:20:07.815209: step: 144/470, loss: 0.046417348086833954 2023-01-24 04:20:08.567490: step: 146/470, loss: 0.022128436714410782 2023-01-24 04:20:09.237625: step: 148/470, loss: 0.08988092094659805 2023-01-24 04:20:09.985422: step: 150/470, loss: 0.05364342778921127 2023-01-24 04:20:10.703415: step: 152/470, loss: 0.017953716218471527 2023-01-24 04:20:11.492919: step: 154/470, loss: 0.09780565649271011 2023-01-24 04:20:12.201073: step: 156/470, loss: 0.009147114120423794 2023-01-24 04:20:12.880239: step: 158/470, loss: 0.09362566471099854 2023-01-24 04:20:13.568121: step: 160/470, loss: 0.013519582338631153 2023-01-24 04:20:14.238825: step: 162/470, loss: 0.45257019996643066 2023-01-24 04:20:14.898664: step: 164/470, loss: 0.08413586020469666 2023-01-24 04:20:15.681981: step: 166/470, loss: 0.04121706634759903 2023-01-24 04:20:16.435469: step: 168/470, loss: 0.026533417403697968 2023-01-24 04:20:17.223353: step: 170/470, loss: 0.21483860909938812 2023-01-24 04:20:18.035994: step: 172/470, loss: 0.024038920179009438 2023-01-24 04:20:18.755914: step: 174/470, loss: 0.2483181357383728 2023-01-24 04:20:19.488855: step: 176/470, loss: 0.030309997498989105 2023-01-24 04:20:20.282979: step: 178/470, loss: 0.02682347223162651 2023-01-24 04:20:21.076964: step: 180/470, loss: 0.17442865669727325 2023-01-24 04:20:21.793531: step: 182/470, loss: 0.27949750423431396 2023-01-24 04:20:22.589379: step: 184/470, loss: 0.035176832228899 2023-01-24 04:20:23.363666: step: 186/470, loss: 0.06619660556316376 2023-01-24 04:20:24.100536: step: 188/470, loss: 1.9417052268981934 2023-01-24 04:20:24.833808: step: 190/470, loss: 0.1208546981215477 2023-01-24 04:20:25.564628: step: 192/470, loss: 0.12052920460700989 2023-01-24 04:20:26.258776: step: 194/470, loss: 0.06708040833473206 2023-01-24 04:20:26.892172: step: 196/470, loss: 0.0005991773214191198 2023-01-24 04:20:27.642466: step: 198/470, loss: 0.18721455335617065 2023-01-24 04:20:28.360795: step: 200/470, loss: 0.031240878626704216 2023-01-24 04:20:29.112296: step: 202/470, loss: 0.2110924869775772 2023-01-24 04:20:29.865662: step: 204/470, loss: 0.11531258374452591 2023-01-24 04:20:30.656119: step: 206/470, loss: 0.04128406569361687 2023-01-24 04:20:31.443632: step: 208/470, loss: 0.07297002524137497 2023-01-24 04:20:32.162369: step: 210/470, loss: 0.04553265497088432 2023-01-24 04:20:32.965451: step: 212/470, loss: 0.02252204902470112 2023-01-24 04:20:33.834409: step: 214/470, loss: 0.25439098477363586 2023-01-24 04:20:34.624924: step: 216/470, loss: 0.7509600520133972 2023-01-24 04:20:35.309401: step: 218/470, loss: 0.034447189420461655 2023-01-24 04:20:36.016345: step: 220/470, loss: 0.01638934761285782 2023-01-24 04:20:36.752136: step: 222/470, loss: 0.04577914625406265 2023-01-24 04:20:37.575742: step: 224/470, loss: 0.04711466655135155 2023-01-24 04:20:38.319026: step: 226/470, loss: 0.30996254086494446 2023-01-24 04:20:39.084556: step: 228/470, loss: 0.00331405783072114 2023-01-24 04:20:39.810251: step: 230/470, loss: 0.07725219428539276 2023-01-24 04:20:40.455260: step: 232/470, loss: 0.5706613659858704 2023-01-24 04:20:41.220235: step: 234/470, loss: 0.03361904248595238 2023-01-24 04:20:41.994783: step: 236/470, loss: 0.04464048147201538 2023-01-24 04:20:42.732750: step: 238/470, loss: 0.024454206228256226 2023-01-24 04:20:43.437679: step: 240/470, loss: 0.012058422900736332 2023-01-24 04:20:44.208478: step: 242/470, loss: 0.07473550736904144 2023-01-24 04:20:44.942983: step: 244/470, loss: 0.06981978565454483 2023-01-24 04:20:45.664482: step: 246/470, loss: 0.06838370859622955 2023-01-24 04:20:46.372663: step: 248/470, loss: 1.1867821216583252 2023-01-24 04:20:47.103723: step: 250/470, loss: 0.1255021095275879 2023-01-24 04:20:47.876067: step: 252/470, loss: 0.009189283475279808 2023-01-24 04:20:48.670370: step: 254/470, loss: 0.060588739812374115 2023-01-24 04:20:49.454126: step: 256/470, loss: 0.08720239251852036 2023-01-24 04:20:50.237061: step: 258/470, loss: 0.04909271374344826 2023-01-24 04:20:51.072751: step: 260/470, loss: 0.018197335302829742 2023-01-24 04:20:51.768613: step: 262/470, loss: 0.034091971814632416 2023-01-24 04:20:52.495041: step: 264/470, loss: 0.036441948264837265 2023-01-24 04:20:53.208595: step: 266/470, loss: 0.00993148423731327 2023-01-24 04:20:53.976409: step: 268/470, loss: 0.053492508828639984 2023-01-24 04:20:54.806754: step: 270/470, loss: 0.04624278470873833 2023-01-24 04:20:55.495022: step: 272/470, loss: 0.17606455087661743 2023-01-24 04:20:56.209002: step: 274/470, loss: 0.018186697736382484 2023-01-24 04:20:56.949729: step: 276/470, loss: 0.059042301028966904 2023-01-24 04:20:57.739863: step: 278/470, loss: 0.05925830453634262 2023-01-24 04:20:58.465381: step: 280/470, loss: 0.017211155965924263 2023-01-24 04:20:59.261310: step: 282/470, loss: 1.1372332572937012 2023-01-24 04:20:59.999679: step: 284/470, loss: 0.011981363408267498 2023-01-24 04:21:00.769888: step: 286/470, loss: 0.0610564649105072 2023-01-24 04:21:01.590289: step: 288/470, loss: 0.016656432300806046 2023-01-24 04:21:02.426248: step: 290/470, loss: 0.04388560354709625 2023-01-24 04:21:03.113311: step: 292/470, loss: 0.03743298724293709 2023-01-24 04:21:03.844381: step: 294/470, loss: 0.021319955587387085 2023-01-24 04:21:04.626372: step: 296/470, loss: 0.08074367791414261 2023-01-24 04:21:05.313381: step: 298/470, loss: 0.14202333986759186 2023-01-24 04:21:06.061182: step: 300/470, loss: 0.09190955758094788 2023-01-24 04:21:06.823517: step: 302/470, loss: 0.053686317056417465 2023-01-24 04:21:07.522233: step: 304/470, loss: 0.05814488232135773 2023-01-24 04:21:08.254556: step: 306/470, loss: 0.028976615518331528 2023-01-24 04:21:08.999652: step: 308/470, loss: 0.03165995329618454 2023-01-24 04:21:09.768648: step: 310/470, loss: 0.18481889367103577 2023-01-24 04:21:10.499560: step: 312/470, loss: 0.06986112892627716 2023-01-24 04:21:11.209615: step: 314/470, loss: 0.08050384372472763 2023-01-24 04:21:11.908138: step: 316/470, loss: 0.03150481358170509 2023-01-24 04:21:12.632233: step: 318/470, loss: 0.027586523443460464 2023-01-24 04:21:13.391095: step: 320/470, loss: 0.00899940449744463 2023-01-24 04:21:14.129384: step: 322/470, loss: 0.02793573960661888 2023-01-24 04:21:14.933944: step: 324/470, loss: 0.04435381293296814 2023-01-24 04:21:15.608319: step: 326/470, loss: 0.037694673985242844 2023-01-24 04:21:16.377753: step: 328/470, loss: 0.8200516700744629 2023-01-24 04:21:17.138674: step: 330/470, loss: 0.02281474508345127 2023-01-24 04:21:17.927920: step: 332/470, loss: 0.09635286033153534 2023-01-24 04:21:18.712204: step: 334/470, loss: 0.15880021452903748 2023-01-24 04:21:19.453088: step: 336/470, loss: 0.00870030838996172 2023-01-24 04:21:20.197952: step: 338/470, loss: 0.0723002627491951 2023-01-24 04:21:21.037019: step: 340/470, loss: 0.07791668176651001 2023-01-24 04:21:21.790773: step: 342/470, loss: 0.041542887687683105 2023-01-24 04:21:22.529748: step: 344/470, loss: 0.07663816213607788 2023-01-24 04:21:23.235164: step: 346/470, loss: 0.02207820490002632 2023-01-24 04:21:23.967452: step: 348/470, loss: 0.11398249864578247 2023-01-24 04:21:24.751504: step: 350/470, loss: 0.06222032010555267 2023-01-24 04:21:25.443345: step: 352/470, loss: 0.7580427527427673 2023-01-24 04:21:26.117097: step: 354/470, loss: 0.31359562277793884 2023-01-24 04:21:26.839505: step: 356/470, loss: 0.09936029464006424 2023-01-24 04:21:27.523776: step: 358/470, loss: 0.10575620830059052 2023-01-24 04:21:28.199405: step: 360/470, loss: 0.08771451562643051 2023-01-24 04:21:28.980333: step: 362/470, loss: 0.045165304094552994 2023-01-24 04:21:29.721101: step: 364/470, loss: 0.08250702917575836 2023-01-24 04:21:30.469438: step: 366/470, loss: 0.036362942308187485 2023-01-24 04:21:31.247438: step: 368/470, loss: 0.035159844905138016 2023-01-24 04:21:31.937887: step: 370/470, loss: 0.06006016209721565 2023-01-24 04:21:32.710978: step: 372/470, loss: 0.11637677252292633 2023-01-24 04:21:33.480286: step: 374/470, loss: 0.012416702695190907 2023-01-24 04:21:34.239422: step: 376/470, loss: 0.1937997043132782 2023-01-24 04:21:34.941641: step: 378/470, loss: 0.0882231742143631 2023-01-24 04:21:35.659445: step: 380/470, loss: 0.015106822364032269 2023-01-24 04:21:36.358989: step: 382/470, loss: 0.1471942663192749 2023-01-24 04:21:37.093965: step: 384/470, loss: 0.024438517168164253 2023-01-24 04:21:37.854607: step: 386/470, loss: 0.015194579027593136 2023-01-24 04:21:38.586875: step: 388/470, loss: 0.18839724361896515 2023-01-24 04:21:39.456592: step: 390/470, loss: 0.05323400720953941 2023-01-24 04:21:40.168086: step: 392/470, loss: 0.06262640655040741 2023-01-24 04:21:40.895861: step: 394/470, loss: 0.052381135523319244 2023-01-24 04:21:41.615481: step: 396/470, loss: 0.1799657940864563 2023-01-24 04:21:42.317747: step: 398/470, loss: 0.004220017232000828 2023-01-24 04:21:43.070625: step: 400/470, loss: 0.4785889685153961 2023-01-24 04:21:43.758432: step: 402/470, loss: 0.03493707254528999 2023-01-24 04:21:44.466175: step: 404/470, loss: 0.07729945331811905 2023-01-24 04:21:45.283219: step: 406/470, loss: 0.1558285653591156 2023-01-24 04:21:46.203137: step: 408/470, loss: 0.14127235114574432 2023-01-24 04:21:46.984564: step: 410/470, loss: 0.24950149655342102 2023-01-24 04:21:47.771255: step: 412/470, loss: 0.041239332407712936 2023-01-24 04:21:48.476681: step: 414/470, loss: 0.2635876536369324 2023-01-24 04:21:49.265726: step: 416/470, loss: 0.10579922795295715 2023-01-24 04:21:49.996326: step: 418/470, loss: 0.03441992029547691 2023-01-24 04:21:50.853436: step: 420/470, loss: 0.032621387392282486 2023-01-24 04:21:51.680424: step: 422/470, loss: 0.05008916184306145 2023-01-24 04:21:52.438613: step: 424/470, loss: 0.13900715112686157 2023-01-24 04:21:53.163884: step: 426/470, loss: 0.023830300197005272 2023-01-24 04:21:53.933784: step: 428/470, loss: 0.03306613117456436 2023-01-24 04:21:54.627118: step: 430/470, loss: 0.014829058200120926 2023-01-24 04:21:55.422540: step: 432/470, loss: 0.042442865669727325 2023-01-24 04:21:56.186316: step: 434/470, loss: 0.32640278339385986 2023-01-24 04:21:56.936198: step: 436/470, loss: 0.010043848305940628 2023-01-24 04:21:57.683949: step: 438/470, loss: 0.017869826406240463 2023-01-24 04:21:58.396835: step: 440/470, loss: 0.1027931347489357 2023-01-24 04:21:59.114889: step: 442/470, loss: 0.0788130909204483 2023-01-24 04:21:59.812762: step: 444/470, loss: 0.12138024717569351 2023-01-24 04:22:00.555864: step: 446/470, loss: 0.01406558882445097 2023-01-24 04:22:01.304741: step: 448/470, loss: 0.004238845780491829 2023-01-24 04:22:02.170495: step: 450/470, loss: 0.0067049539647996426 2023-01-24 04:22:02.918678: step: 452/470, loss: 0.04448673129081726 2023-01-24 04:22:03.728661: step: 454/470, loss: 0.3222072422504425 2023-01-24 04:22:04.422033: step: 456/470, loss: 0.4006147086620331 2023-01-24 04:22:05.180679: step: 458/470, loss: 0.1256273090839386 2023-01-24 04:22:05.858839: step: 460/470, loss: 0.014067552983760834 2023-01-24 04:22:06.608769: step: 462/470, loss: 0.030554937198758125 2023-01-24 04:22:07.305940: step: 464/470, loss: 0.05165807902812958 2023-01-24 04:22:08.009734: step: 466/470, loss: 0.0559585839509964 2023-01-24 04:22:08.769582: step: 468/470, loss: 0.751693844795227 2023-01-24 04:22:09.502446: step: 470/470, loss: 0.042564861476421356 2023-01-24 04:22:10.205371: step: 472/470, loss: 0.04895629733800888 2023-01-24 04:22:11.021931: step: 474/470, loss: 0.037162844091653824 2023-01-24 04:22:11.799461: step: 476/470, loss: 0.025382233783602715 2023-01-24 04:22:12.548897: step: 478/470, loss: 0.06292865425348282 2023-01-24 04:22:13.330021: step: 480/470, loss: 0.014180587604641914 2023-01-24 04:22:14.188089: step: 482/470, loss: 0.027322018519043922 2023-01-24 04:22:14.958744: step: 484/470, loss: 0.1358848214149475 2023-01-24 04:22:15.642481: step: 486/470, loss: 0.050086334347724915 2023-01-24 04:22:16.368660: step: 488/470, loss: 0.01673760637640953 2023-01-24 04:22:17.083826: step: 490/470, loss: 0.021020902320742607 2023-01-24 04:22:17.803880: step: 492/470, loss: 0.015545294620096684 2023-01-24 04:22:18.555469: step: 494/470, loss: 0.3143145740032196 2023-01-24 04:22:19.348362: step: 496/470, loss: 0.0633402094244957 2023-01-24 04:22:20.109643: step: 498/470, loss: 0.018096817657351494 2023-01-24 04:22:20.865059: step: 500/470, loss: 0.0012216436443850398 2023-01-24 04:22:21.586421: step: 502/470, loss: 0.1902422159910202 2023-01-24 04:22:22.358306: step: 504/470, loss: 0.07036527991294861 2023-01-24 04:22:23.162943: step: 506/470, loss: 0.06424633413553238 2023-01-24 04:22:23.900338: step: 508/470, loss: 0.0811762586236 2023-01-24 04:22:24.631543: step: 510/470, loss: 0.06267711520195007 2023-01-24 04:22:25.498530: step: 512/470, loss: 0.023827198892831802 2023-01-24 04:22:26.234978: step: 514/470, loss: 0.028254536911845207 2023-01-24 04:22:26.936433: step: 516/470, loss: 0.012615739367902279 2023-01-24 04:22:27.592225: step: 518/470, loss: 0.12324491143226624 2023-01-24 04:22:28.302839: step: 520/470, loss: 0.08821484446525574 2023-01-24 04:22:29.026946: step: 522/470, loss: 0.01370615977793932 2023-01-24 04:22:29.769560: step: 524/470, loss: 0.02321157045662403 2023-01-24 04:22:30.440104: step: 526/470, loss: 0.02410917542874813 2023-01-24 04:22:31.195137: step: 528/470, loss: 0.052108101546764374 2023-01-24 04:22:31.931424: step: 530/470, loss: 0.0737091451883316 2023-01-24 04:22:32.662990: step: 532/470, loss: 0.05240051448345184 2023-01-24 04:22:33.342507: step: 534/470, loss: 0.14461572468280792 2023-01-24 04:22:34.030121: step: 536/470, loss: 0.06819018721580505 2023-01-24 04:22:34.787853: step: 538/470, loss: 0.08415396511554718 2023-01-24 04:22:35.461605: step: 540/470, loss: 0.039721451699733734 2023-01-24 04:22:36.214973: step: 542/470, loss: 0.24157670140266418 2023-01-24 04:22:36.916736: step: 544/470, loss: 0.05411506071686745 2023-01-24 04:22:37.809307: step: 546/470, loss: 0.07562316954135895 2023-01-24 04:22:38.542985: step: 548/470, loss: 0.1613171249628067 2023-01-24 04:22:39.216534: step: 550/470, loss: 0.04178428277373314 2023-01-24 04:22:39.943580: step: 552/470, loss: 0.014160760678350925 2023-01-24 04:22:40.693472: step: 554/470, loss: 0.04917175695300102 2023-01-24 04:22:41.400234: step: 556/470, loss: 0.01794840767979622 2023-01-24 04:22:42.186488: step: 558/470, loss: 0.07323423773050308 2023-01-24 04:22:42.956346: step: 560/470, loss: 0.014605486765503883 2023-01-24 04:22:43.642063: step: 562/470, loss: 0.07594714313745499 2023-01-24 04:22:44.359454: step: 564/470, loss: 0.029397977516055107 2023-01-24 04:22:45.114248: step: 566/470, loss: 0.01664639636874199 2023-01-24 04:22:45.968912: step: 568/470, loss: 0.02191932313144207 2023-01-24 04:22:46.624195: step: 570/470, loss: 0.06313026696443558 2023-01-24 04:22:47.397215: step: 572/470, loss: 0.022390831261873245 2023-01-24 04:22:48.149489: step: 574/470, loss: 0.16852179169654846 2023-01-24 04:22:48.872436: step: 576/470, loss: 0.041340142488479614 2023-01-24 04:22:49.670294: step: 578/470, loss: 0.0978948175907135 2023-01-24 04:22:50.557813: step: 580/470, loss: 0.10298915952444077 2023-01-24 04:22:51.324188: step: 582/470, loss: 0.051485490053892136 2023-01-24 04:22:52.198213: step: 584/470, loss: 0.03344123065471649 2023-01-24 04:22:52.996274: step: 586/470, loss: 0.03898704797029495 2023-01-24 04:22:53.703380: step: 588/470, loss: 0.15321972966194153 2023-01-24 04:22:54.395342: step: 590/470, loss: 0.3100159466266632 2023-01-24 04:22:55.170220: step: 592/470, loss: 0.03277548775076866 2023-01-24 04:22:55.852290: step: 594/470, loss: 0.06358427554368973 2023-01-24 04:22:56.739545: step: 596/470, loss: 0.21886910498142242 2023-01-24 04:22:57.474533: step: 598/470, loss: 0.06400435417890549 2023-01-24 04:22:58.311524: step: 600/470, loss: 0.021453171968460083 2023-01-24 04:22:59.038063: step: 602/470, loss: 0.029457837343215942 2023-01-24 04:22:59.851564: step: 604/470, loss: 0.06845896691083908 2023-01-24 04:23:00.619578: step: 606/470, loss: 0.33153408765792847 2023-01-24 04:23:01.387375: step: 608/470, loss: 0.40867775678634644 2023-01-24 04:23:02.163237: step: 610/470, loss: 0.8713477849960327 2023-01-24 04:23:02.867086: step: 612/470, loss: 0.05822436138987541 2023-01-24 04:23:03.595183: step: 614/470, loss: 0.01446563471108675 2023-01-24 04:23:04.339539: step: 616/470, loss: 0.05371719226241112 2023-01-24 04:23:05.146149: step: 618/470, loss: 0.22751787304878235 2023-01-24 04:23:05.877025: step: 620/470, loss: 0.20397146046161652 2023-01-24 04:23:06.610477: step: 622/470, loss: 0.041325464844703674 2023-01-24 04:23:07.349449: step: 624/470, loss: 0.0995861068367958 2023-01-24 04:23:08.043819: step: 626/470, loss: 0.4585106074810028 2023-01-24 04:23:08.777252: step: 628/470, loss: 0.11708439886569977 2023-01-24 04:23:09.524747: step: 630/470, loss: 0.07522755861282349 2023-01-24 04:23:10.271483: step: 632/470, loss: 0.9740254878997803 2023-01-24 04:23:10.973594: step: 634/470, loss: 0.32297220826148987 2023-01-24 04:23:11.824096: step: 636/470, loss: 0.03178303316235542 2023-01-24 04:23:12.608247: step: 638/470, loss: 0.07319549471139908 2023-01-24 04:23:13.334149: step: 640/470, loss: 0.02889355830848217 2023-01-24 04:23:14.084306: step: 642/470, loss: 0.10229209810495377 2023-01-24 04:23:14.801087: step: 644/470, loss: 0.039184339344501495 2023-01-24 04:23:15.558583: step: 646/470, loss: 0.07694227993488312 2023-01-24 04:23:16.288394: step: 648/470, loss: 0.10057219117879868 2023-01-24 04:23:17.000677: step: 650/470, loss: 0.05900505557656288 2023-01-24 04:23:17.754318: step: 652/470, loss: 0.03375502675771713 2023-01-24 04:23:18.454333: step: 654/470, loss: 0.011107545346021652 2023-01-24 04:23:19.255673: step: 656/470, loss: 0.04302780702710152 2023-01-24 04:23:19.961185: step: 658/470, loss: 0.05545317381620407 2023-01-24 04:23:20.669488: step: 660/470, loss: 0.1119319424033165 2023-01-24 04:23:21.447845: step: 662/470, loss: 0.043362777680158615 2023-01-24 04:23:22.189303: step: 664/470, loss: 0.1941765397787094 2023-01-24 04:23:22.908078: step: 666/470, loss: 0.06740397959947586 2023-01-24 04:23:23.577111: step: 668/470, loss: 0.04577159881591797 2023-01-24 04:23:24.306537: step: 670/470, loss: 0.11848827451467514 2023-01-24 04:23:25.072739: step: 672/470, loss: 0.03170297294855118 2023-01-24 04:23:25.778862: step: 674/470, loss: 0.14291562139987946 2023-01-24 04:23:26.452692: step: 676/470, loss: 0.07052916288375854 2023-01-24 04:23:27.184662: step: 678/470, loss: 0.08889731764793396 2023-01-24 04:23:27.914478: step: 680/470, loss: 0.18591055274009705 2023-01-24 04:23:28.671844: step: 682/470, loss: 0.058656156063079834 2023-01-24 04:23:29.424386: step: 684/470, loss: 0.06513772159814835 2023-01-24 04:23:30.152805: step: 686/470, loss: 0.10984183102846146 2023-01-24 04:23:30.874277: step: 688/470, loss: 0.07132207602262497 2023-01-24 04:23:31.612126: step: 690/470, loss: 0.020248308777809143 2023-01-24 04:23:32.269404: step: 692/470, loss: 0.07261063158512115 2023-01-24 04:23:33.091681: step: 694/470, loss: 0.03563803806900978 2023-01-24 04:23:33.905273: step: 696/470, loss: 0.0738886147737503 2023-01-24 04:23:34.706865: step: 698/470, loss: 0.06751029938459396 2023-01-24 04:23:35.474088: step: 700/470, loss: 0.23834992945194244 2023-01-24 04:23:36.154077: step: 702/470, loss: 0.013831811025738716 2023-01-24 04:23:36.895489: step: 704/470, loss: 0.3347160816192627 2023-01-24 04:23:37.667579: step: 706/470, loss: 0.05514927953481674 2023-01-24 04:23:38.426339: step: 708/470, loss: 0.16086360812187195 2023-01-24 04:23:39.169182: step: 710/470, loss: 0.011491565965116024 2023-01-24 04:23:40.018832: step: 712/470, loss: 0.09873014688491821 2023-01-24 04:23:40.772662: step: 714/470, loss: 0.10902773588895798 2023-01-24 04:23:41.461538: step: 716/470, loss: 0.018024688586592674 2023-01-24 04:23:42.174314: step: 718/470, loss: 0.046969544142484665 2023-01-24 04:23:42.843895: step: 720/470, loss: 0.4311079680919647 2023-01-24 04:23:43.565081: step: 722/470, loss: 0.0782650038599968 2023-01-24 04:23:44.196083: step: 724/470, loss: 0.35217511653900146 2023-01-24 04:23:44.945913: step: 726/470, loss: 0.07346347719430923 2023-01-24 04:23:45.731628: step: 728/470, loss: 0.09162180125713348 2023-01-24 04:23:46.586946: step: 730/470, loss: 0.10204845666885376 2023-01-24 04:23:47.346731: step: 732/470, loss: 0.009644899517297745 2023-01-24 04:23:48.178630: step: 734/470, loss: 0.0478099063038826 2023-01-24 04:23:48.877041: step: 736/470, loss: 0.07511983066797256 2023-01-24 04:23:49.544674: step: 738/470, loss: 0.06089290976524353 2023-01-24 04:23:50.267358: step: 740/470, loss: 0.05333712324500084 2023-01-24 04:23:51.029305: step: 742/470, loss: 0.04015492647886276 2023-01-24 04:23:51.746795: step: 744/470, loss: 0.04333452507853508 2023-01-24 04:23:52.539670: step: 746/470, loss: 0.11922835558652878 2023-01-24 04:23:53.289499: step: 748/470, loss: 0.03594861179590225 2023-01-24 04:23:53.975566: step: 750/470, loss: 0.06654554605484009 2023-01-24 04:23:54.814951: step: 752/470, loss: 0.07238250225782394 2023-01-24 04:23:55.670287: step: 754/470, loss: 0.08213133364915848 2023-01-24 04:23:56.462296: step: 756/470, loss: 0.08642800152301788 2023-01-24 04:23:57.201358: step: 758/470, loss: 0.06127552315592766 2023-01-24 04:23:58.010041: step: 760/470, loss: 0.05956351384520531 2023-01-24 04:23:58.719374: step: 762/470, loss: 0.03790678456425667 2023-01-24 04:23:59.412383: step: 764/470, loss: 0.00936797820031643 2023-01-24 04:24:00.282643: step: 766/470, loss: 0.07397977262735367 2023-01-24 04:24:01.040879: step: 768/470, loss: 0.049599699676036835 2023-01-24 04:24:01.806914: step: 770/470, loss: 0.04407091066241264 2023-01-24 04:24:02.674091: step: 772/470, loss: 0.02440464124083519 2023-01-24 04:24:03.452406: step: 774/470, loss: 0.015560795553028584 2023-01-24 04:24:04.186480: step: 776/470, loss: 0.03959346562623978 2023-01-24 04:24:04.951121: step: 778/470, loss: 0.014441375620663166 2023-01-24 04:24:05.682098: step: 780/470, loss: 0.02576235495507717 2023-01-24 04:24:06.313666: step: 782/470, loss: 0.06063159555196762 2023-01-24 04:24:07.001179: step: 784/470, loss: 0.005881750024855137 2023-01-24 04:24:07.826597: step: 786/470, loss: 0.019624939188361168 2023-01-24 04:24:08.622593: step: 788/470, loss: 0.016726138070225716 2023-01-24 04:24:09.430575: step: 790/470, loss: 0.044762153178453445 2023-01-24 04:24:10.184732: step: 792/470, loss: 0.017699047923088074 2023-01-24 04:24:10.900035: step: 794/470, loss: 0.03728308528661728 2023-01-24 04:24:11.588013: step: 796/470, loss: 0.01777748577296734 2023-01-24 04:24:12.330419: step: 798/470, loss: 0.20916269719600677 2023-01-24 04:24:13.121289: step: 800/470, loss: 0.13329799473285675 2023-01-24 04:24:13.895326: step: 802/470, loss: 0.01543135941028595 2023-01-24 04:24:14.627686: step: 804/470, loss: 0.0656021237373352 2023-01-24 04:24:15.396837: step: 806/470, loss: 0.027505187317728996 2023-01-24 04:24:16.173529: step: 808/470, loss: 0.10071372240781784 2023-01-24 04:24:16.926543: step: 810/470, loss: 0.10161048173904419 2023-01-24 04:24:17.687973: step: 812/470, loss: 0.006511330138891935 2023-01-24 04:24:18.543735: step: 814/470, loss: 0.05376344919204712 2023-01-24 04:24:19.304996: step: 816/470, loss: 0.07200208306312561 2023-01-24 04:24:19.998745: step: 818/470, loss: 0.05889580398797989 2023-01-24 04:24:20.718484: step: 820/470, loss: 0.009265481494367123 2023-01-24 04:24:21.484016: step: 822/470, loss: 0.12977077066898346 2023-01-24 04:24:22.199703: step: 824/470, loss: 0.041659578680992126 2023-01-24 04:24:23.119749: step: 826/470, loss: 0.29780128598213196 2023-01-24 04:24:23.891701: step: 828/470, loss: 0.027976591140031815 2023-01-24 04:24:24.698453: step: 830/470, loss: 0.1818331480026245 2023-01-24 04:24:25.537067: step: 832/470, loss: 0.04622675105929375 2023-01-24 04:24:26.245943: step: 834/470, loss: 0.061723433434963226 2023-01-24 04:24:26.973786: step: 836/470, loss: 0.1804792582988739 2023-01-24 04:24:27.810363: step: 838/470, loss: 0.08750531077384949 2023-01-24 04:24:28.633818: step: 840/470, loss: 0.12658429145812988 2023-01-24 04:24:29.456855: step: 842/470, loss: 0.029569925740361214 2023-01-24 04:24:30.201956: step: 844/470, loss: 0.01851717382669449 2023-01-24 04:24:31.040047: step: 846/470, loss: 0.0617799237370491 2023-01-24 04:24:31.818862: step: 848/470, loss: 0.016349801793694496 2023-01-24 04:24:32.525103: step: 850/470, loss: 0.11626914143562317 2023-01-24 04:24:33.192637: step: 852/470, loss: 0.24209186434745789 2023-01-24 04:24:33.900274: step: 854/470, loss: 0.056616343557834625 2023-01-24 04:24:34.679729: step: 856/470, loss: 0.18732884526252747 2023-01-24 04:24:35.406315: step: 858/470, loss: 0.05298414081335068 2023-01-24 04:24:36.155339: step: 860/470, loss: 0.42847129702568054 2023-01-24 04:24:36.947778: step: 862/470, loss: 0.033945754170417786 2023-01-24 04:24:37.727371: step: 864/470, loss: 0.026322832331061363 2023-01-24 04:24:38.447037: step: 866/470, loss: 0.02989846095442772 2023-01-24 04:24:39.148547: step: 868/470, loss: 0.3587307929992676 2023-01-24 04:24:39.924122: step: 870/470, loss: 0.05438103526830673 2023-01-24 04:24:40.647996: step: 872/470, loss: 0.04368476942181587 2023-01-24 04:24:41.421460: step: 874/470, loss: 0.03072342276573181 2023-01-24 04:24:42.203023: step: 876/470, loss: 0.21938635408878326 2023-01-24 04:24:42.896023: step: 878/470, loss: 0.08039675652980804 2023-01-24 04:24:43.594786: step: 880/470, loss: 0.010841970331966877 2023-01-24 04:24:44.330247: step: 882/470, loss: 0.043295301496982574 2023-01-24 04:24:45.055348: step: 884/470, loss: 0.013651460409164429 2023-01-24 04:24:45.767155: step: 886/470, loss: 0.2560485005378723 2023-01-24 04:24:46.453980: step: 888/470, loss: 0.021218927577137947 2023-01-24 04:24:47.143412: step: 890/470, loss: 0.054894767701625824 2023-01-24 04:24:47.861738: step: 892/470, loss: 0.0300539992749691 2023-01-24 04:24:48.583877: step: 894/470, loss: 0.0708957314491272 2023-01-24 04:24:49.306348: step: 896/470, loss: 0.09575964510440826 2023-01-24 04:24:50.009012: step: 898/470, loss: 0.031115295365452766 2023-01-24 04:24:50.739234: step: 900/470, loss: 0.06746406108140945 2023-01-24 04:24:51.509450: step: 902/470, loss: 0.06835021823644638 2023-01-24 04:24:52.256052: step: 904/470, loss: 0.021396541967988014 2023-01-24 04:24:53.028608: step: 906/470, loss: 0.02145826816558838 2023-01-24 04:24:53.759389: step: 908/470, loss: 0.1311969757080078 2023-01-24 04:24:54.584620: step: 910/470, loss: 0.13202637434005737 2023-01-24 04:24:55.283837: step: 912/470, loss: 0.04499669000506401 2023-01-24 04:24:56.087542: step: 914/470, loss: 0.11342813074588776 2023-01-24 04:24:56.766113: step: 916/470, loss: 0.898378849029541 2023-01-24 04:24:57.610667: step: 918/470, loss: 0.10744353383779526 2023-01-24 04:24:58.420081: step: 920/470, loss: 0.026462199166417122 2023-01-24 04:24:59.155705: step: 922/470, loss: 0.5927501916885376 2023-01-24 04:24:59.843246: step: 924/470, loss: 0.03971520811319351 2023-01-24 04:25:00.565848: step: 926/470, loss: 0.052058689296245575 2023-01-24 04:25:01.356219: step: 928/470, loss: 0.06348146498203278 2023-01-24 04:25:02.035247: step: 930/470, loss: 0.05130045861005783 2023-01-24 04:25:02.680680: step: 932/470, loss: 0.01657716929912567 2023-01-24 04:25:03.348921: step: 934/470, loss: 3.5439648628234863 2023-01-24 04:25:04.117336: step: 936/470, loss: 0.11058296263217926 2023-01-24 04:25:04.886627: step: 938/470, loss: 0.07302563637495041 2023-01-24 04:25:05.570927: step: 940/470, loss: 0.0987381637096405 2023-01-24 04:25:06.226197: step: 942/470, loss: 0.04440511763095856 ================================================== Loss: 0.110 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33224704930515897, 'r': 0.32468165159802065, 'f1': 0.3284207877008769}, 'combined': 0.24199426462169876, 'epoch': 20} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3493711445537277, 'r': 0.33190258732604133, 'f1': 0.34041291007799107}, 'combined': 0.226941940051994, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3345735054076954, 'r': 0.3250505403581405, 'f1': 0.3297432815567662}, 'combined': 0.24296873377866982, 'epoch': 20} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.354832387716314, 'r': 0.33572602837774324, 'f1': 0.34501489082297726}, 'combined': 0.23000992721531813, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31046092047930285, 'r': 0.31811934925772967, 'f1': 0.31424348089751364}, 'combined': 0.23154782802974688, 'epoch': 20} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3468852509391032, 'r': 0.3452175333865114, 'f1': 0.34604938286455117}, 'combined': 0.23069958857636738, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23958333333333334, 'r': 0.32857142857142857, 'f1': 0.27710843373493976}, 'combined': 0.18473895582329317, 'epoch': 20} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.32608695652173914, 'f1': 0.39473684210526316}, 'combined': 0.2631578947368421, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3260774491094148, 'r': 0.32422122074636306, 'f1': 0.3251466856961624}, 'combined': 0.2395817684076986, 'epoch': 19} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35730778004987124, 'r': 0.3418473472592518, 'f1': 0.34940662520847365}, 'combined': 0.23293775013898238, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.35714285714285715, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30801502714153317, 'r': 0.33957633922055175, 'f1': 0.3230265898361566}, 'combined': 0.23801959251085222, 'epoch': 16} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3547111579078519, 'r': 0.34789634219396426, 'f1': 0.3512707005081637}, 'combined': 0.23418046700544243, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4666666666666667, 'r': 0.2413793103448276, 'f1': 0.3181818181818182}, 'combined': 0.2121212121212121, 'epoch': 16} ****************************** Epoch: 21 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:27:40.378354: step: 2/470, loss: 0.030697286128997803 2023-01-24 04:27:41.215926: step: 4/470, loss: 0.8990464210510254 2023-01-24 04:27:41.918885: step: 6/470, loss: 0.012093383818864822 2023-01-24 04:27:42.612176: step: 8/470, loss: 0.04054703935980797 2023-01-24 04:27:43.344084: step: 10/470, loss: 0.0492924265563488 2023-01-24 04:27:44.084839: step: 12/470, loss: 0.043698910623788834 2023-01-24 04:27:44.885951: step: 14/470, loss: 0.052403904497623444 2023-01-24 04:27:45.560811: step: 16/470, loss: 0.04030047729611397 2023-01-24 04:27:46.407689: step: 18/470, loss: 0.04345276579260826 2023-01-24 04:27:47.155405: step: 20/470, loss: 0.07873324304819107 2023-01-24 04:27:47.915249: step: 22/470, loss: 0.020181206986308098 2023-01-24 04:27:48.655603: step: 24/470, loss: 0.013497297652065754 2023-01-24 04:27:49.452475: step: 26/470, loss: 0.017897669225931168 2023-01-24 04:27:50.376210: step: 28/470, loss: 0.0618402361869812 2023-01-24 04:27:51.098954: step: 30/470, loss: 0.07274410128593445 2023-01-24 04:27:51.790893: step: 32/470, loss: 0.020234020426869392 2023-01-24 04:27:52.636296: step: 34/470, loss: 0.06494118273258209 2023-01-24 04:27:53.366209: step: 36/470, loss: 0.021747954189777374 2023-01-24 04:27:54.137602: step: 38/470, loss: 0.05529887601733208 2023-01-24 04:27:54.901426: step: 40/470, loss: 1.0049673318862915 2023-01-24 04:27:55.686982: step: 42/470, loss: 0.11773818731307983 2023-01-24 04:27:56.406068: step: 44/470, loss: 0.04687252268195152 2023-01-24 04:27:57.096111: step: 46/470, loss: 0.09744272381067276 2023-01-24 04:27:57.834451: step: 48/470, loss: 0.039838120341300964 2023-01-24 04:27:58.592050: step: 50/470, loss: 0.06667645275592804 2023-01-24 04:27:59.433248: step: 52/470, loss: 0.023879552260041237 2023-01-24 04:28:00.160102: step: 54/470, loss: 0.008113712072372437 2023-01-24 04:28:00.872316: step: 56/470, loss: 0.03958437219262123 2023-01-24 04:28:01.645043: step: 58/470, loss: 0.1176476776599884 2023-01-24 04:28:02.462657: step: 60/470, loss: 0.07839195430278778 2023-01-24 04:28:03.157930: step: 62/470, loss: 0.02542118728160858 2023-01-24 04:28:03.908147: step: 64/470, loss: 0.680618166923523 2023-01-24 04:28:04.624607: step: 66/470, loss: 0.017299430444836617 2023-01-24 04:28:05.315336: step: 68/470, loss: 0.02113557793200016 2023-01-24 04:28:06.052796: step: 70/470, loss: 0.16099169850349426 2023-01-24 04:28:06.801375: step: 72/470, loss: 0.04751966521143913 2023-01-24 04:28:07.562124: step: 74/470, loss: 0.04941952973604202 2023-01-24 04:28:08.306673: step: 76/470, loss: 1.7027865648269653 2023-01-24 04:28:09.023272: step: 78/470, loss: 0.025158904492855072 2023-01-24 04:28:09.804417: step: 80/470, loss: 0.05669613182544708 2023-01-24 04:28:10.564384: step: 82/470, loss: 0.11963464319705963 2023-01-24 04:28:11.330463: step: 84/470, loss: 0.047465141862630844 2023-01-24 04:28:12.099607: step: 86/470, loss: 0.0787685364484787 2023-01-24 04:28:12.932941: step: 88/470, loss: 0.044626928865909576 2023-01-24 04:28:13.634695: step: 90/470, loss: 0.0552486777305603 2023-01-24 04:28:14.326619: step: 92/470, loss: 0.006156580988317728 2023-01-24 04:28:14.995292: step: 94/470, loss: 0.11764095723628998 2023-01-24 04:28:15.737148: step: 96/470, loss: 0.03031170181930065 2023-01-24 04:28:16.472283: step: 98/470, loss: 0.029158277437090874 2023-01-24 04:28:17.126378: step: 100/470, loss: 0.03246721625328064 2023-01-24 04:28:17.958128: step: 102/470, loss: 0.016130754724144936 2023-01-24 04:28:18.799133: step: 104/470, loss: 0.05944613739848137 2023-01-24 04:28:19.633724: step: 106/470, loss: 0.034837506711483 2023-01-24 04:28:20.374405: step: 108/470, loss: 0.19040045142173767 2023-01-24 04:28:21.077374: step: 110/470, loss: 0.04526451230049133 2023-01-24 04:28:21.795732: step: 112/470, loss: 0.02529776096343994 2023-01-24 04:28:22.428621: step: 114/470, loss: 0.030075442045927048 2023-01-24 04:28:23.163384: step: 116/470, loss: 0.2894465923309326 2023-01-24 04:28:23.879195: step: 118/470, loss: 0.02525232918560505 2023-01-24 04:28:24.689361: step: 120/470, loss: 1.0722471475601196 2023-01-24 04:28:25.389379: step: 122/470, loss: 0.020707713440060616 2023-01-24 04:28:26.075007: step: 124/470, loss: 0.015486765652894974 2023-01-24 04:28:26.749902: step: 126/470, loss: 0.031173061579465866 2023-01-24 04:28:27.541951: step: 128/470, loss: 0.02271159552037716 2023-01-24 04:28:28.281017: step: 130/470, loss: 0.035077158361673355 2023-01-24 04:28:29.055780: step: 132/470, loss: 0.024227894842624664 2023-01-24 04:28:29.926847: step: 134/470, loss: 0.03320540860295296 2023-01-24 04:28:30.679443: step: 136/470, loss: 0.03366602584719658 2023-01-24 04:28:31.336399: step: 138/470, loss: 0.28107237815856934 2023-01-24 04:28:32.051733: step: 140/470, loss: 0.06899859011173248 2023-01-24 04:28:32.712780: step: 142/470, loss: 0.008150143548846245 2023-01-24 04:28:33.454404: step: 144/470, loss: 0.03204032778739929 2023-01-24 04:28:34.067322: step: 146/470, loss: 0.0016799191944301128 2023-01-24 04:28:34.876780: step: 148/470, loss: 0.021955549716949463 2023-01-24 04:28:35.588706: step: 150/470, loss: 0.03780049830675125 2023-01-24 04:28:36.292501: step: 152/470, loss: 0.007639870513230562 2023-01-24 04:28:37.080516: step: 154/470, loss: 0.06269722431898117 2023-01-24 04:28:37.839666: step: 156/470, loss: 0.025132397189736366 2023-01-24 04:28:38.600198: step: 158/470, loss: 0.07948961853981018 2023-01-24 04:28:39.269498: step: 160/470, loss: 0.005530932452529669 2023-01-24 04:28:40.063611: step: 162/470, loss: 0.10777927935123444 2023-01-24 04:28:40.762800: step: 164/470, loss: 0.07513689249753952 2023-01-24 04:28:41.738821: step: 166/470, loss: 0.013257588259875774 2023-01-24 04:28:42.495164: step: 168/470, loss: 0.1652069240808487 2023-01-24 04:28:43.286996: step: 170/470, loss: 0.007435483392328024 2023-01-24 04:28:44.078919: step: 172/470, loss: 0.07258084416389465 2023-01-24 04:28:44.991332: step: 174/470, loss: 0.043368544429540634 2023-01-24 04:28:45.848084: step: 176/470, loss: 0.0565069355070591 2023-01-24 04:28:46.540013: step: 178/470, loss: 0.04345664754509926 2023-01-24 04:28:47.195114: step: 180/470, loss: 0.008021237328648567 2023-01-24 04:28:48.024318: step: 182/470, loss: 0.0683949664235115 2023-01-24 04:28:48.831437: step: 184/470, loss: 0.04733900725841522 2023-01-24 04:28:49.532288: step: 186/470, loss: 0.14194054901599884 2023-01-24 04:28:50.279327: step: 188/470, loss: 0.06841456145048141 2023-01-24 04:28:51.081825: step: 190/470, loss: 0.01492251455783844 2023-01-24 04:28:51.744376: step: 192/470, loss: 0.07086261361837387 2023-01-24 04:28:52.468075: step: 194/470, loss: 0.16320353746414185 2023-01-24 04:28:53.247081: step: 196/470, loss: 0.08280780166387558 2023-01-24 04:28:53.960589: step: 198/470, loss: 0.017911670729517937 2023-01-24 04:28:54.733648: step: 200/470, loss: 0.08260060846805573 2023-01-24 04:28:55.488665: step: 202/470, loss: 0.044123224914073944 2023-01-24 04:28:56.174879: step: 204/470, loss: 0.010030948556959629 2023-01-24 04:28:56.861917: step: 206/470, loss: 0.0387822724878788 2023-01-24 04:28:57.601605: step: 208/470, loss: 0.034351300448179245 2023-01-24 04:28:58.462024: step: 210/470, loss: 0.010738598182797432 2023-01-24 04:28:59.219928: step: 212/470, loss: 0.06208763271570206 2023-01-24 04:29:00.044624: step: 214/470, loss: 0.08142954856157303 2023-01-24 04:29:00.783766: step: 216/470, loss: 0.0489891953766346 2023-01-24 04:29:01.555080: step: 218/470, loss: 0.05691111460328102 2023-01-24 04:29:02.287594: step: 220/470, loss: 0.10647915303707123 2023-01-24 04:29:03.017402: step: 222/470, loss: 0.029838304966688156 2023-01-24 04:29:03.905178: step: 224/470, loss: 0.043678972870111465 2023-01-24 04:29:04.655590: step: 226/470, loss: 0.039795830845832825 2023-01-24 04:29:05.351692: step: 228/470, loss: 0.0034092552959918976 2023-01-24 04:29:06.107656: step: 230/470, loss: 0.05187808722257614 2023-01-24 04:29:06.932841: step: 232/470, loss: 0.022801529616117477 2023-01-24 04:29:07.688871: step: 234/470, loss: 0.0022835489362478256 2023-01-24 04:29:08.410506: step: 236/470, loss: 0.060452841222286224 2023-01-24 04:29:09.130207: step: 238/470, loss: 0.17915183305740356 2023-01-24 04:29:09.946255: step: 240/470, loss: 0.04132472351193428 2023-01-24 04:29:10.626231: step: 242/470, loss: 0.027403315529227257 2023-01-24 04:29:11.419332: step: 244/470, loss: 0.034430038183927536 2023-01-24 04:29:12.219228: step: 246/470, loss: 0.31158602237701416 2023-01-24 04:29:13.017407: step: 248/470, loss: 0.007013510912656784 2023-01-24 04:29:13.726682: step: 250/470, loss: 0.0664106085896492 2023-01-24 04:29:14.537814: step: 252/470, loss: 0.026947399601340294 2023-01-24 04:29:15.346550: step: 254/470, loss: 0.08892068266868591 2023-01-24 04:29:16.100704: step: 256/470, loss: 0.07869922369718552 2023-01-24 04:29:16.922365: step: 258/470, loss: 0.050374485552310944 2023-01-24 04:29:17.646888: step: 260/470, loss: 0.10828451067209244 2023-01-24 04:29:18.405985: step: 262/470, loss: 0.032886065542697906 2023-01-24 04:29:19.081001: step: 264/470, loss: 0.008889845572412014 2023-01-24 04:29:19.792522: step: 266/470, loss: 0.02972853183746338 2023-01-24 04:29:20.573898: step: 268/470, loss: 0.00936366431415081 2023-01-24 04:29:21.300126: step: 270/470, loss: 0.018172763288021088 2023-01-24 04:29:22.198406: step: 272/470, loss: 0.03780972212553024 2023-01-24 04:29:22.911474: step: 274/470, loss: 0.03203846141695976 2023-01-24 04:29:23.707195: step: 276/470, loss: 0.059636037796735764 2023-01-24 04:29:24.421898: step: 278/470, loss: 0.2443484216928482 2023-01-24 04:29:25.211777: step: 280/470, loss: 0.08666800707578659 2023-01-24 04:29:25.932410: step: 282/470, loss: 0.061435677111148834 2023-01-24 04:29:26.702376: step: 284/470, loss: 0.15919671952724457 2023-01-24 04:29:27.427456: step: 286/470, loss: 0.028493307530879974 2023-01-24 04:29:28.135030: step: 288/470, loss: 0.0469590425491333 2023-01-24 04:29:28.850696: step: 290/470, loss: 0.057702258229255676 2023-01-24 04:29:29.565706: step: 292/470, loss: 0.02364461123943329 2023-01-24 04:29:30.343858: step: 294/470, loss: 0.09106270968914032 2023-01-24 04:29:31.041534: step: 296/470, loss: 0.009492744691669941 2023-01-24 04:29:31.773875: step: 298/470, loss: 0.08927375078201294 2023-01-24 04:29:32.523827: step: 300/470, loss: 0.2396526336669922 2023-01-24 04:29:33.226185: step: 302/470, loss: 0.021701229736208916 2023-01-24 04:29:33.958615: step: 304/470, loss: 0.04955710098147392 2023-01-24 04:29:34.664053: step: 306/470, loss: 0.013163585215806961 2023-01-24 04:29:35.419191: step: 308/470, loss: 2.5453379154205322 2023-01-24 04:29:36.129087: step: 310/470, loss: 0.2919505834579468 2023-01-24 04:29:36.783299: step: 312/470, loss: 0.004827272146940231 2023-01-24 04:29:37.535598: step: 314/470, loss: 0.23596075177192688 2023-01-24 04:29:38.190848: step: 316/470, loss: 0.06390093266963959 2023-01-24 04:29:38.968872: step: 318/470, loss: 0.08488807827234268 2023-01-24 04:29:39.644386: step: 320/470, loss: 0.381671279668808 2023-01-24 04:29:40.322683: step: 322/470, loss: 1.292970061302185 2023-01-24 04:29:41.107459: step: 324/470, loss: 0.03118908405303955 2023-01-24 04:29:41.920663: step: 326/470, loss: 0.4090331196784973 2023-01-24 04:29:42.673785: step: 328/470, loss: 0.01948448270559311 2023-01-24 04:29:43.447789: step: 330/470, loss: 0.03427987918257713 2023-01-24 04:29:44.194538: step: 332/470, loss: 0.11818772554397583 2023-01-24 04:29:44.920219: step: 334/470, loss: 0.019033944234251976 2023-01-24 04:29:45.714965: step: 336/470, loss: 0.05994606763124466 2023-01-24 04:29:46.488735: step: 338/470, loss: 0.11957002431154251 2023-01-24 04:29:47.264885: step: 340/470, loss: 0.013482224196195602 2023-01-24 04:29:48.063266: step: 342/470, loss: 1.4503638744354248 2023-01-24 04:29:48.737943: step: 344/470, loss: 0.02695303224027157 2023-01-24 04:29:49.487473: step: 346/470, loss: 0.050801824778318405 2023-01-24 04:29:50.226627: step: 348/470, loss: 0.07081786543130875 2023-01-24 04:29:50.987170: step: 350/470, loss: 0.15673120319843292 2023-01-24 04:29:51.688088: step: 352/470, loss: 0.033411867916584015 2023-01-24 04:29:52.515782: step: 354/470, loss: 0.012247402220964432 2023-01-24 04:29:53.266602: step: 356/470, loss: 0.04690024256706238 2023-01-24 04:29:54.077470: step: 358/470, loss: 0.012932350859045982 2023-01-24 04:29:54.794874: step: 360/470, loss: 0.03173913061618805 2023-01-24 04:29:55.571025: step: 362/470, loss: 0.07606765627861023 2023-01-24 04:29:56.335215: step: 364/470, loss: 0.04719306901097298 2023-01-24 04:29:57.072381: step: 366/470, loss: 0.04025932401418686 2023-01-24 04:29:57.798526: step: 368/470, loss: 0.040751710534095764 2023-01-24 04:29:58.646274: step: 370/470, loss: 0.10012707859277725 2023-01-24 04:29:59.397007: step: 372/470, loss: 0.1372625231742859 2023-01-24 04:30:00.137290: step: 374/470, loss: 0.29762670397758484 2023-01-24 04:30:00.942429: step: 376/470, loss: 0.01679973676800728 2023-01-24 04:30:01.757461: step: 378/470, loss: 0.1596241146326065 2023-01-24 04:30:02.497291: step: 380/470, loss: 0.01258562970906496 2023-01-24 04:30:03.241367: step: 382/470, loss: 0.012428507208824158 2023-01-24 04:30:03.981781: step: 384/470, loss: 0.06731784343719482 2023-01-24 04:30:04.721722: step: 386/470, loss: 0.039965856820344925 2023-01-24 04:30:05.491421: step: 388/470, loss: 0.022317346185445786 2023-01-24 04:30:06.236102: step: 390/470, loss: 0.07760674506425858 2023-01-24 04:30:06.908262: step: 392/470, loss: 0.04869608208537102 2023-01-24 04:30:07.684663: step: 394/470, loss: 0.12491077929735184 2023-01-24 04:30:08.453737: step: 396/470, loss: 0.017211737111210823 2023-01-24 04:30:09.180749: step: 398/470, loss: 0.03493155539035797 2023-01-24 04:30:09.880740: step: 400/470, loss: 0.008576873689889908 2023-01-24 04:30:10.539716: step: 402/470, loss: 0.004653942305594683 2023-01-24 04:30:11.281635: step: 404/470, loss: 0.03935122489929199 2023-01-24 04:30:11.988619: step: 406/470, loss: 0.019616080448031425 2023-01-24 04:30:12.786754: step: 408/470, loss: 0.016680454835295677 2023-01-24 04:30:13.545866: step: 410/470, loss: 1.8669378757476807 2023-01-24 04:30:14.281381: step: 412/470, loss: 0.050840165466070175 2023-01-24 04:30:14.975188: step: 414/470, loss: 0.017026670277118683 2023-01-24 04:30:15.719341: step: 416/470, loss: 0.02777211181819439 2023-01-24 04:30:16.481695: step: 418/470, loss: 0.047070860862731934 2023-01-24 04:30:17.254173: step: 420/470, loss: 0.018415415659546852 2023-01-24 04:30:17.937517: step: 422/470, loss: 0.11197975277900696 2023-01-24 04:30:18.653768: step: 424/470, loss: 0.022071341052651405 2023-01-24 04:30:19.388972: step: 426/470, loss: 0.016471032053232193 2023-01-24 04:30:20.211930: step: 428/470, loss: 0.0054807537235319614 2023-01-24 04:30:20.962680: step: 430/470, loss: 0.025621192529797554 2023-01-24 04:30:21.712914: step: 432/470, loss: 0.029046742245554924 2023-01-24 04:30:22.423398: step: 434/470, loss: 0.014325280673801899 2023-01-24 04:30:23.135093: step: 436/470, loss: 0.0053360736928880215 2023-01-24 04:30:23.784864: step: 438/470, loss: 0.049321822822093964 2023-01-24 04:30:24.516035: step: 440/470, loss: 0.012010014615952969 2023-01-24 04:30:25.273497: step: 442/470, loss: 0.022885087877511978 2023-01-24 04:30:25.991959: step: 444/470, loss: 0.030774349346756935 2023-01-24 04:30:26.631650: step: 446/470, loss: 0.07810332626104355 2023-01-24 04:30:27.280561: step: 448/470, loss: 0.018929030746221542 2023-01-24 04:30:28.007338: step: 450/470, loss: 0.008509436622262001 2023-01-24 04:30:28.651234: step: 452/470, loss: 0.020978335291147232 2023-01-24 04:30:29.354795: step: 454/470, loss: 0.013563503511250019 2023-01-24 04:30:30.060750: step: 456/470, loss: 0.10911507159471512 2023-01-24 04:30:30.817393: step: 458/470, loss: 0.03588716313242912 2023-01-24 04:30:31.511028: step: 460/470, loss: 0.035442594438791275 2023-01-24 04:30:32.247652: step: 462/470, loss: 0.01282803900539875 2023-01-24 04:30:33.039522: step: 464/470, loss: 0.04582873731851578 2023-01-24 04:30:33.832598: step: 466/470, loss: 0.0383700355887413 2023-01-24 04:30:34.592606: step: 468/470, loss: 0.025660475715994835 2023-01-24 04:30:35.288622: step: 470/470, loss: 0.14962579309940338 2023-01-24 04:30:35.985401: step: 472/470, loss: 0.1222333088517189 2023-01-24 04:30:36.679904: step: 474/470, loss: 0.004696396645158529 2023-01-24 04:30:37.387225: step: 476/470, loss: 0.10672306269407272 2023-01-24 04:30:38.152468: step: 478/470, loss: 0.03075164370238781 2023-01-24 04:30:38.891298: step: 480/470, loss: 0.011191125959157944 2023-01-24 04:30:39.627178: step: 482/470, loss: 0.020779795944690704 2023-01-24 04:30:40.325386: step: 484/470, loss: 0.023522723466157913 2023-01-24 04:30:41.096584: step: 486/470, loss: 0.11427683383226395 2023-01-24 04:30:41.914487: step: 488/470, loss: 0.04714156314730644 2023-01-24 04:30:42.694829: step: 490/470, loss: 0.21134763956069946 2023-01-24 04:30:43.452940: step: 492/470, loss: 0.38170838356018066 2023-01-24 04:30:44.308571: step: 494/470, loss: 0.041793301701545715 2023-01-24 04:30:45.004817: step: 496/470, loss: 0.03228547051548958 2023-01-24 04:30:45.797986: step: 498/470, loss: 0.05064351484179497 2023-01-24 04:30:46.543769: step: 500/470, loss: 0.10985661298036575 2023-01-24 04:30:47.202725: step: 502/470, loss: 0.4626348614692688 2023-01-24 04:30:47.966105: step: 504/470, loss: 0.06298696249723434 2023-01-24 04:30:48.721112: step: 506/470, loss: 0.08736024796962738 2023-01-24 04:30:49.440992: step: 508/470, loss: 0.05532427132129669 2023-01-24 04:30:50.160710: step: 510/470, loss: 0.0031663402915000916 2023-01-24 04:30:50.832204: step: 512/470, loss: 0.1809593141078949 2023-01-24 04:30:51.628853: step: 514/470, loss: 0.031655311584472656 2023-01-24 04:30:52.433739: step: 516/470, loss: 0.0275458712130785 2023-01-24 04:30:53.202431: step: 518/470, loss: 0.046752817928791046 2023-01-24 04:30:53.951222: step: 520/470, loss: 0.024380570277571678 2023-01-24 04:30:54.691769: step: 522/470, loss: 0.03911636769771576 2023-01-24 04:30:55.407028: step: 524/470, loss: 0.011473782360553741 2023-01-24 04:30:56.179966: step: 526/470, loss: 0.03254931420087814 2023-01-24 04:30:56.902356: step: 528/470, loss: 0.04068319499492645 2023-01-24 04:30:57.651949: step: 530/470, loss: 0.07182002812623978 2023-01-24 04:30:58.391270: step: 532/470, loss: 0.18645454943180084 2023-01-24 04:30:59.060807: step: 534/470, loss: 0.005089792422950268 2023-01-24 04:30:59.764423: step: 536/470, loss: 0.02244439162313938 2023-01-24 04:31:00.486690: step: 538/470, loss: 0.02713472954928875 2023-01-24 04:31:01.243414: step: 540/470, loss: 0.09527122229337692 2023-01-24 04:31:02.046734: step: 542/470, loss: 0.04841366410255432 2023-01-24 04:31:02.771205: step: 544/470, loss: 0.03840105980634689 2023-01-24 04:31:03.458976: step: 546/470, loss: 0.003627850441262126 2023-01-24 04:31:04.258457: step: 548/470, loss: 0.06749998778104782 2023-01-24 04:31:04.985608: step: 550/470, loss: 0.058877523988485336 2023-01-24 04:31:05.687746: step: 552/470, loss: 0.018091080710291862 2023-01-24 04:31:06.456997: step: 554/470, loss: 0.12940962612628937 2023-01-24 04:31:07.244774: step: 556/470, loss: 0.1229933425784111 2023-01-24 04:31:07.951176: step: 558/470, loss: 0.024270979687571526 2023-01-24 04:31:08.652484: step: 560/470, loss: 0.10469833761453629 2023-01-24 04:31:09.443430: step: 562/470, loss: 0.05231137201189995 2023-01-24 04:31:10.122142: step: 564/470, loss: 0.04341939464211464 2023-01-24 04:31:10.854133: step: 566/470, loss: 0.06202490255236626 2023-01-24 04:31:11.536805: step: 568/470, loss: 0.2968774437904358 2023-01-24 04:31:12.292057: step: 570/470, loss: 0.22153881192207336 2023-01-24 04:31:13.091068: step: 572/470, loss: 0.07482737302780151 2023-01-24 04:31:13.869681: step: 574/470, loss: 0.049079857766628265 2023-01-24 04:31:14.633986: step: 576/470, loss: 0.015658581629395485 2023-01-24 04:31:15.360827: step: 578/470, loss: 0.016094373539090157 2023-01-24 04:31:16.098086: step: 580/470, loss: 0.05446375906467438 2023-01-24 04:31:16.836938: step: 582/470, loss: 0.08582345396280289 2023-01-24 04:31:17.501577: step: 584/470, loss: 0.055172644555568695 2023-01-24 04:31:18.208075: step: 586/470, loss: 0.03194602578878403 2023-01-24 04:31:19.077972: step: 588/470, loss: 0.06124107167124748 2023-01-24 04:31:19.842546: step: 590/470, loss: 0.06561607867479324 2023-01-24 04:31:20.580299: step: 592/470, loss: 0.387003630399704 2023-01-24 04:31:21.276815: step: 594/470, loss: 0.11961881816387177 2023-01-24 04:31:22.006164: step: 596/470, loss: 0.03727561980485916 2023-01-24 04:31:22.729279: step: 598/470, loss: 0.02133580483496189 2023-01-24 04:31:23.475331: step: 600/470, loss: 0.020184461027383804 2023-01-24 04:31:24.259372: step: 602/470, loss: 0.06725658476352692 2023-01-24 04:31:24.997191: step: 604/470, loss: 0.017879968509078026 2023-01-24 04:31:25.755982: step: 606/470, loss: 0.029784638434648514 2023-01-24 04:31:26.606184: step: 608/470, loss: 0.021767864003777504 2023-01-24 04:31:27.399880: step: 610/470, loss: 0.057398948818445206 2023-01-24 04:31:28.128808: step: 612/470, loss: 0.6706560254096985 2023-01-24 04:31:28.916979: step: 614/470, loss: 0.03931858763098717 2023-01-24 04:31:29.660475: step: 616/470, loss: 0.5221364498138428 2023-01-24 04:31:30.432116: step: 618/470, loss: 0.01334367971867323 2023-01-24 04:31:31.184486: step: 620/470, loss: 0.06150234863162041 2023-01-24 04:31:31.954010: step: 622/470, loss: 0.029126210138201714 2023-01-24 04:31:32.744037: step: 624/470, loss: 0.13291211426258087 2023-01-24 04:31:33.540308: step: 626/470, loss: 0.5435463786125183 2023-01-24 04:31:34.283158: step: 628/470, loss: 0.016903935000300407 2023-01-24 04:31:34.949061: step: 630/470, loss: 0.012967291288077831 2023-01-24 04:31:35.663035: step: 632/470, loss: 0.04698651283979416 2023-01-24 04:31:36.416526: step: 634/470, loss: 0.03846525028347969 2023-01-24 04:31:37.189141: step: 636/470, loss: 0.024630436673760414 2023-01-24 04:31:37.858273: step: 638/470, loss: 0.062395889312028885 2023-01-24 04:31:38.565055: step: 640/470, loss: 0.06373529881238937 2023-01-24 04:31:39.373590: step: 642/470, loss: 0.08350540697574615 2023-01-24 04:31:40.068876: step: 644/470, loss: 0.0395352728664875 2023-01-24 04:31:40.750481: step: 646/470, loss: 0.006685740314424038 2023-01-24 04:31:41.504365: step: 648/470, loss: 0.06376517564058304 2023-01-24 04:31:42.244974: step: 650/470, loss: 0.06809066981077194 2023-01-24 04:31:42.972660: step: 652/470, loss: 0.03920336440205574 2023-01-24 04:31:43.778772: step: 654/470, loss: 0.18704262375831604 2023-01-24 04:31:44.477252: step: 656/470, loss: 0.0032950390595942736 2023-01-24 04:31:45.304699: step: 658/470, loss: 0.06400889903306961 2023-01-24 04:31:46.099766: step: 660/470, loss: 0.026591304689645767 2023-01-24 04:31:46.841842: step: 662/470, loss: 0.006560401059687138 2023-01-24 04:31:47.589909: step: 664/470, loss: 0.08201814442873001 2023-01-24 04:31:48.306509: step: 666/470, loss: 0.059747278690338135 2023-01-24 04:31:49.086072: step: 668/470, loss: 0.11375654488801956 2023-01-24 04:31:49.815258: step: 670/470, loss: 0.028703976422548294 2023-01-24 04:31:50.551818: step: 672/470, loss: 0.19319429993629456 2023-01-24 04:31:51.314315: step: 674/470, loss: 0.07025258243083954 2023-01-24 04:31:52.078934: step: 676/470, loss: 0.032008834183216095 2023-01-24 04:31:52.780231: step: 678/470, loss: 0.13485921919345856 2023-01-24 04:31:53.466014: step: 680/470, loss: 0.050178226083517075 2023-01-24 04:31:54.195218: step: 682/470, loss: 0.04921692609786987 2023-01-24 04:31:54.976505: step: 684/470, loss: 0.022719813510775566 2023-01-24 04:31:55.636701: step: 686/470, loss: 0.04771514609456062 2023-01-24 04:31:56.430149: step: 688/470, loss: 0.033646490424871445 2023-01-24 04:31:57.186686: step: 690/470, loss: 0.07247103005647659 2023-01-24 04:31:58.008180: step: 692/470, loss: 0.05885102227330208 2023-01-24 04:31:58.730610: step: 694/470, loss: 0.005150055047124624 2023-01-24 04:31:59.598513: step: 696/470, loss: 0.0049107871018350124 2023-01-24 04:32:00.291795: step: 698/470, loss: 0.011220758780837059 2023-01-24 04:32:01.028749: step: 700/470, loss: 0.06259648501873016 2023-01-24 04:32:01.764940: step: 702/470, loss: 0.023064645007252693 2023-01-24 04:32:02.541931: step: 704/470, loss: 0.026079723611474037 2023-01-24 04:32:03.293831: step: 706/470, loss: 0.011188274249434471 2023-01-24 04:32:04.019227: step: 708/470, loss: 0.015496410429477692 2023-01-24 04:32:04.823937: step: 710/470, loss: 0.7063004374504089 2023-01-24 04:32:05.614701: step: 712/470, loss: 0.0391618087887764 2023-01-24 04:32:06.363416: step: 714/470, loss: 0.07354892045259476 2023-01-24 04:32:07.195109: step: 716/470, loss: 0.02870473451912403 2023-01-24 04:32:07.919868: step: 718/470, loss: 0.06522411108016968 2023-01-24 04:32:08.793392: step: 720/470, loss: 0.23585094511508942 2023-01-24 04:32:09.527104: step: 722/470, loss: 0.11705224961042404 2023-01-24 04:32:10.239334: step: 724/470, loss: 0.5201276540756226 2023-01-24 04:32:10.927225: step: 726/470, loss: 0.033718567341566086 2023-01-24 04:32:11.655647: step: 728/470, loss: 0.11925947666168213 2023-01-24 04:32:12.425429: step: 730/470, loss: 0.01283105555921793 2023-01-24 04:32:13.163352: step: 732/470, loss: 0.03227861598134041 2023-01-24 04:32:13.966840: step: 734/470, loss: 0.20628242194652557 2023-01-24 04:32:14.677475: step: 736/470, loss: 0.0496375747025013 2023-01-24 04:32:15.538257: step: 738/470, loss: 0.12754222750663757 2023-01-24 04:32:16.293040: step: 740/470, loss: 0.27654582262039185 2023-01-24 04:32:17.013325: step: 742/470, loss: 0.007181598339229822 2023-01-24 04:32:17.802227: step: 744/470, loss: 0.03585813194513321 2023-01-24 04:32:18.523739: step: 746/470, loss: 0.5074892044067383 2023-01-24 04:32:19.290663: step: 748/470, loss: 0.08907847851514816 2023-01-24 04:32:19.973749: step: 750/470, loss: 0.024945350363850594 2023-01-24 04:32:20.766330: step: 752/470, loss: 0.029513496905565262 2023-01-24 04:32:21.549824: step: 754/470, loss: 0.0634850412607193 2023-01-24 04:32:22.298354: step: 756/470, loss: 0.27008649706840515 2023-01-24 04:32:23.124585: step: 758/470, loss: 0.07133635878562927 2023-01-24 04:32:23.863536: step: 760/470, loss: 0.04460354521870613 2023-01-24 04:32:24.684355: step: 762/470, loss: 0.156412735581398 2023-01-24 04:32:25.414681: step: 764/470, loss: 0.020505452528595924 2023-01-24 04:32:26.137106: step: 766/470, loss: 0.020364558324217796 2023-01-24 04:32:26.823793: step: 768/470, loss: 0.12095355987548828 2023-01-24 04:32:27.597050: step: 770/470, loss: 0.05226239189505577 2023-01-24 04:32:28.382095: step: 772/470, loss: 0.04754621163010597 2023-01-24 04:32:29.090058: step: 774/470, loss: 0.056481197476387024 2023-01-24 04:32:29.858878: step: 776/470, loss: 0.02067723497748375 2023-01-24 04:32:30.671381: step: 778/470, loss: 0.02264983393251896 2023-01-24 04:32:31.475899: step: 780/470, loss: 0.01082298532128334 2023-01-24 04:32:32.233876: step: 782/470, loss: 0.01869276538491249 2023-01-24 04:32:32.948475: step: 784/470, loss: 0.030841980129480362 2023-01-24 04:32:33.658296: step: 786/470, loss: 0.01216953992843628 2023-01-24 04:32:34.398851: step: 788/470, loss: 0.32434332370758057 2023-01-24 04:32:35.159079: step: 790/470, loss: 0.07466316968202591 2023-01-24 04:32:35.891962: step: 792/470, loss: 0.020550915971398354 2023-01-24 04:32:36.621683: step: 794/470, loss: 0.012714998796582222 2023-01-24 04:32:37.394001: step: 796/470, loss: 0.07571426033973694 2023-01-24 04:32:38.202222: step: 798/470, loss: 0.05830766260623932 2023-01-24 04:32:38.963088: step: 800/470, loss: 0.06974931806325912 2023-01-24 04:32:39.717506: step: 802/470, loss: 0.042594559490680695 2023-01-24 04:32:40.472136: step: 804/470, loss: 0.0458582304418087 2023-01-24 04:32:41.169421: step: 806/470, loss: 0.024310944601893425 2023-01-24 04:32:41.904784: step: 808/470, loss: 0.06325914710760117 2023-01-24 04:32:42.629995: step: 810/470, loss: 0.2123449146747589 2023-01-24 04:32:43.445208: step: 812/470, loss: 0.08683294802904129 2023-01-24 04:32:44.214842: step: 814/470, loss: 0.0271016675978899 2023-01-24 04:32:44.980589: step: 816/470, loss: 0.06353487074375153 2023-01-24 04:32:45.784064: step: 818/470, loss: 0.028592655435204506 2023-01-24 04:32:46.473254: step: 820/470, loss: 0.013006173074245453 2023-01-24 04:32:47.218839: step: 822/470, loss: 0.030378835275769234 2023-01-24 04:32:47.962459: step: 824/470, loss: 0.001919006579555571 2023-01-24 04:32:48.717204: step: 826/470, loss: 0.03218744695186615 2023-01-24 04:32:49.445304: step: 828/470, loss: 0.06677714735269547 2023-01-24 04:32:50.135727: step: 830/470, loss: 0.016359543427824974 2023-01-24 04:32:50.877375: step: 832/470, loss: 0.013201995752751827 2023-01-24 04:32:51.571788: step: 834/470, loss: 0.023714501410722733 2023-01-24 04:32:52.270821: step: 836/470, loss: 0.06850353628396988 2023-01-24 04:32:53.066469: step: 838/470, loss: 0.05281051993370056 2023-01-24 04:32:53.813395: step: 840/470, loss: 0.08620928972959518 2023-01-24 04:32:54.528476: step: 842/470, loss: 0.04048202931880951 2023-01-24 04:32:55.388073: step: 844/470, loss: 0.11093959212303162 2023-01-24 04:32:56.102953: step: 846/470, loss: 0.11896642297506332 2023-01-24 04:32:56.807770: step: 848/470, loss: 0.8897960186004639 2023-01-24 04:32:57.461105: step: 850/470, loss: 0.0081399567425251 2023-01-24 04:32:58.171788: step: 852/470, loss: 0.013954234309494495 2023-01-24 04:32:58.862978: step: 854/470, loss: 0.05100409686565399 2023-01-24 04:32:59.617254: step: 856/470, loss: 0.2577970325946808 2023-01-24 04:33:00.306292: step: 858/470, loss: 0.07873797416687012 2023-01-24 04:33:01.010475: step: 860/470, loss: 0.06876885890960693 2023-01-24 04:33:01.819691: step: 862/470, loss: 0.03414501994848251 2023-01-24 04:33:02.694149: step: 864/470, loss: 0.05697346851229668 2023-01-24 04:33:03.378914: step: 866/470, loss: 0.07026387751102448 2023-01-24 04:33:04.130699: step: 868/470, loss: 0.06572747230529785 2023-01-24 04:33:04.872166: step: 870/470, loss: 0.4360022246837616 2023-01-24 04:33:05.624436: step: 872/470, loss: 0.03277615085244179 2023-01-24 04:33:06.321292: step: 874/470, loss: 0.04744017496705055 2023-01-24 04:33:07.063973: step: 876/470, loss: 0.03529006615281105 2023-01-24 04:33:07.769516: step: 878/470, loss: 0.04175432026386261 2023-01-24 04:33:08.537384: step: 880/470, loss: 0.035176780074834824 2023-01-24 04:33:09.283292: step: 882/470, loss: 0.037990227341651917 2023-01-24 04:33:10.039373: step: 884/470, loss: 0.0412430465221405 2023-01-24 04:33:10.852108: step: 886/470, loss: 0.059082094579935074 2023-01-24 04:33:11.667777: step: 888/470, loss: 0.0283492561429739 2023-01-24 04:33:12.455019: step: 890/470, loss: 0.08661621809005737 2023-01-24 04:33:13.123210: step: 892/470, loss: 0.04014264792203903 2023-01-24 04:33:13.914413: step: 894/470, loss: 0.016945162788033485 2023-01-24 04:33:14.625942: step: 896/470, loss: 0.020986400544643402 2023-01-24 04:33:15.294301: step: 898/470, loss: 0.06758707016706467 2023-01-24 04:33:16.005503: step: 900/470, loss: 0.10314557701349258 2023-01-24 04:33:16.716848: step: 902/470, loss: 0.12326005101203918 2023-01-24 04:33:17.522284: step: 904/470, loss: 0.047155629843473434 2023-01-24 04:33:18.322103: step: 906/470, loss: 0.025163166224956512 2023-01-24 04:33:19.065437: step: 908/470, loss: 0.032101456075906754 2023-01-24 04:33:19.794297: step: 910/470, loss: 0.0255853533744812 2023-01-24 04:33:20.617502: step: 912/470, loss: 0.031632889062166214 2023-01-24 04:33:21.385169: step: 914/470, loss: 0.07384685426950455 2023-01-24 04:33:22.225442: step: 916/470, loss: 0.09033031016588211 2023-01-24 04:33:22.961967: step: 918/470, loss: 0.061557587236166 2023-01-24 04:33:23.712882: step: 920/470, loss: 0.1455271691083908 2023-01-24 04:33:24.483171: step: 922/470, loss: 0.017164742574095726 2023-01-24 04:33:25.207029: step: 924/470, loss: 0.03933778777718544 2023-01-24 04:33:25.973512: step: 926/470, loss: 0.040244653820991516 2023-01-24 04:33:26.745856: step: 928/470, loss: 0.051455993205308914 2023-01-24 04:33:27.478372: step: 930/470, loss: 0.07561231404542923 2023-01-24 04:33:28.148915: step: 932/470, loss: 0.02452336624264717 2023-01-24 04:33:28.931456: step: 934/470, loss: 0.03766307979822159 2023-01-24 04:33:29.661427: step: 936/470, loss: 0.041747868061065674 2023-01-24 04:33:30.446800: step: 938/470, loss: 0.02425321564078331 2023-01-24 04:33:31.245850: step: 940/470, loss: 0.27364808320999146 2023-01-24 04:33:31.907545: step: 942/470, loss: 0.016857439652085304 ================================================== Loss: 0.097 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3254668863779034, 'r': 0.3279372232764074, 'f1': 0.32669738500315065}, 'combined': 0.24072438894968995, 'epoch': 21} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3634110060582147, 'r': 0.35153026162938844, 'f1': 0.35737191798100093}, 'combined': 0.23824794532066723, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32631822117083803, 'r': 0.33251021777749534, 'f1': 0.3293851217457519}, 'combined': 0.2427048265495014, 'epoch': 21} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36422666106346513, 'r': 0.3463655459536222, 'f1': 0.3550716291688191}, 'combined': 0.23671441944587937, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30707145019867443, 'r': 0.3274651897754365, 'f1': 0.3169405968992746}, 'combined': 0.23353517666262338, 'epoch': 21} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3538407772196438, 'r': 0.3552017032858732, 'f1': 0.35451993418167765}, 'combined': 0.23634662278778504, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3055555555555556, 'r': 0.3142857142857143, 'f1': 0.3098591549295775}, 'combined': 0.20657276995305165, 'epoch': 21} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.48214285714285715, 'r': 0.29347826086956524, 'f1': 0.36486486486486486}, 'combined': 0.24324324324324323, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3076923076923077, 'r': 0.13793103448275862, 'f1': 0.1904761904761905}, 'combined': 0.12698412698412698, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3260774491094148, 'r': 0.32422122074636306, 'f1': 0.3251466856961624}, 'combined': 0.2395817684076986, 'epoch': 19} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35730778004987124, 'r': 0.3418473472592518, 'f1': 0.34940662520847365}, 'combined': 0.23293775013898238, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.35714285714285715, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30801502714153317, 'r': 0.33957633922055175, 'f1': 0.3230265898361566}, 'combined': 0.23801959251085222, 'epoch': 16} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3547111579078519, 'r': 0.34789634219396426, 'f1': 0.3512707005081637}, 'combined': 0.23418046700544243, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4666666666666667, 'r': 0.2413793103448276, 'f1': 0.3181818181818182}, 'combined': 0.2121212121212121, 'epoch': 16} ****************************** Epoch: 22 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:36:04.936084: step: 2/470, loss: 0.07132654637098312 2023-01-24 04:36:05.657023: step: 4/470, loss: 0.04968281090259552 2023-01-24 04:36:06.401320: step: 6/470, loss: 0.025600193068385124 2023-01-24 04:36:07.174321: step: 8/470, loss: 0.11954417079687119 2023-01-24 04:36:07.901586: step: 10/470, loss: 0.004893624223768711 2023-01-24 04:36:08.661770: step: 12/470, loss: 0.032928865402936935 2023-01-24 04:36:09.424376: step: 14/470, loss: 0.01784546673297882 2023-01-24 04:36:10.178709: step: 16/470, loss: 0.02579541504383087 2023-01-24 04:36:10.895799: step: 18/470, loss: 0.0351317934691906 2023-01-24 04:36:11.572081: step: 20/470, loss: 0.009095302782952785 2023-01-24 04:36:12.253415: step: 22/470, loss: 0.004136822652071714 2023-01-24 04:36:12.892425: step: 24/470, loss: 0.014530248008668423 2023-01-24 04:36:13.565724: step: 26/470, loss: 0.03549989312887192 2023-01-24 04:36:14.384206: step: 28/470, loss: 0.04445575922727585 2023-01-24 04:36:15.102110: step: 30/470, loss: 0.01351911574602127 2023-01-24 04:36:15.957304: step: 32/470, loss: 0.12070487439632416 2023-01-24 04:36:16.736128: step: 34/470, loss: 0.007829630747437477 2023-01-24 04:36:17.408847: step: 36/470, loss: 0.06734476238489151 2023-01-24 04:36:18.131213: step: 38/470, loss: 0.016050485894083977 2023-01-24 04:36:18.858167: step: 40/470, loss: 0.025376321747899055 2023-01-24 04:36:19.597841: step: 42/470, loss: 0.005891892127692699 2023-01-24 04:36:20.246780: step: 44/470, loss: 0.1300346851348877 2023-01-24 04:36:20.966570: step: 46/470, loss: 0.03699648007750511 2023-01-24 04:36:21.673110: step: 48/470, loss: 0.014293630607426167 2023-01-24 04:36:22.423591: step: 50/470, loss: 0.02472846582531929 2023-01-24 04:36:23.163689: step: 52/470, loss: 0.020292188972234726 2023-01-24 04:36:23.958267: step: 54/470, loss: 0.0226434413343668 2023-01-24 04:36:24.667760: step: 56/470, loss: 0.09266740083694458 2023-01-24 04:36:25.421160: step: 58/470, loss: 0.01962917111814022 2023-01-24 04:36:26.229598: step: 60/470, loss: 0.03125033900141716 2023-01-24 04:36:27.013689: step: 62/470, loss: 0.02047717012465 2023-01-24 04:36:27.781424: step: 64/470, loss: 0.018749892711639404 2023-01-24 04:36:28.531917: step: 66/470, loss: 0.033666905015707016 2023-01-24 04:36:29.345320: step: 68/470, loss: 0.03922034800052643 2023-01-24 04:36:30.097748: step: 70/470, loss: 0.019506221637129784 2023-01-24 04:36:30.780925: step: 72/470, loss: 0.0295857060700655 2023-01-24 04:36:31.509889: step: 74/470, loss: 0.05708824843168259 2023-01-24 04:36:32.243365: step: 76/470, loss: 0.035686276853084564 2023-01-24 04:36:32.954882: step: 78/470, loss: 0.0031681691762059927 2023-01-24 04:36:33.682188: step: 80/470, loss: 0.06565077602863312 2023-01-24 04:36:34.433890: step: 82/470, loss: 0.013326709158718586 2023-01-24 04:36:35.114935: step: 84/470, loss: 0.059001680463552475 2023-01-24 04:36:35.829130: step: 86/470, loss: 0.04502973333001137 2023-01-24 04:36:36.534294: step: 88/470, loss: 0.05877036228775978 2023-01-24 04:36:37.377675: step: 90/470, loss: 0.03713955730199814 2023-01-24 04:36:38.113322: step: 92/470, loss: 0.4271875321865082 2023-01-24 04:36:38.877710: step: 94/470, loss: 0.0117900799959898 2023-01-24 04:36:39.724086: step: 96/470, loss: 0.023388944566249847 2023-01-24 04:36:40.556871: step: 98/470, loss: 0.042075708508491516 2023-01-24 04:36:41.230067: step: 100/470, loss: 0.023363204672932625 2023-01-24 04:36:41.969049: step: 102/470, loss: 0.04883814603090286 2023-01-24 04:36:42.645730: step: 104/470, loss: 0.02259974554181099 2023-01-24 04:36:43.345014: step: 106/470, loss: 0.06939293444156647 2023-01-24 04:36:44.075831: step: 108/470, loss: 0.07869086414575577 2023-01-24 04:36:44.834319: step: 110/470, loss: 0.01648254692554474 2023-01-24 04:36:45.516122: step: 112/470, loss: 2.4709521312615834e-05 2023-01-24 04:36:46.293702: step: 114/470, loss: 0.027162201702594757 2023-01-24 04:36:47.033661: step: 116/470, loss: 0.042458001524209976 2023-01-24 04:36:47.810343: step: 118/470, loss: 0.008122868835926056 2023-01-24 04:36:48.714813: step: 120/470, loss: 0.08594401180744171 2023-01-24 04:36:49.502733: step: 122/470, loss: 0.06479133665561676 2023-01-24 04:36:50.238644: step: 124/470, loss: 0.05414796248078346 2023-01-24 04:36:50.937179: step: 126/470, loss: 0.013639035634696484 2023-01-24 04:36:51.618314: step: 128/470, loss: 0.014860237948596478 2023-01-24 04:36:52.310713: step: 130/470, loss: 0.009585319086909294 2023-01-24 04:36:53.050105: step: 132/470, loss: 0.0008866112912073731 2023-01-24 04:36:53.781280: step: 134/470, loss: 0.07709459215402603 2023-01-24 04:36:54.648151: step: 136/470, loss: 0.08505282551050186 2023-01-24 04:36:55.340501: step: 138/470, loss: 0.03271591290831566 2023-01-24 04:36:56.100642: step: 140/470, loss: 0.36453869938850403 2023-01-24 04:36:56.834396: step: 142/470, loss: 0.06906229257583618 2023-01-24 04:36:57.617324: step: 144/470, loss: 0.09906693547964096 2023-01-24 04:36:58.415268: step: 146/470, loss: 0.15732789039611816 2023-01-24 04:36:59.148299: step: 148/470, loss: 0.0644831508398056 2023-01-24 04:36:59.876139: step: 150/470, loss: 0.0066782585345208645 2023-01-24 04:37:00.705309: step: 152/470, loss: 0.034233175218105316 2023-01-24 04:37:01.461803: step: 154/470, loss: 0.061225440353155136 2023-01-24 04:37:02.180007: step: 156/470, loss: 0.05808292329311371 2023-01-24 04:37:02.906015: step: 158/470, loss: 0.039374712854623795 2023-01-24 04:37:03.645426: step: 160/470, loss: 0.029254967346787453 2023-01-24 04:37:04.449851: step: 162/470, loss: 0.17803142964839935 2023-01-24 04:37:05.156834: step: 164/470, loss: 0.023453183472156525 2023-01-24 04:37:05.906047: step: 166/470, loss: 0.11502149701118469 2023-01-24 04:37:06.677252: step: 168/470, loss: 0.18725618720054626 2023-01-24 04:37:07.484316: step: 170/470, loss: 0.07862892746925354 2023-01-24 04:37:08.203049: step: 172/470, loss: 0.02760915271937847 2023-01-24 04:37:08.913061: step: 174/470, loss: 0.009849579073488712 2023-01-24 04:37:09.704695: step: 176/470, loss: 0.03337179869413376 2023-01-24 04:37:10.408248: step: 178/470, loss: 0.02945728227496147 2023-01-24 04:37:11.128479: step: 180/470, loss: 1.613263726234436 2023-01-24 04:37:11.816950: step: 182/470, loss: 0.03204035386443138 2023-01-24 04:37:12.494785: step: 184/470, loss: 0.04056650400161743 2023-01-24 04:37:13.229537: step: 186/470, loss: 0.06198835000395775 2023-01-24 04:37:13.924898: step: 188/470, loss: 0.013698305003345013 2023-01-24 04:37:14.628110: step: 190/470, loss: 0.26306137442588806 2023-01-24 04:37:15.475271: step: 192/470, loss: 0.007418145891278982 2023-01-24 04:37:16.229749: step: 194/470, loss: 0.45867234468460083 2023-01-24 04:37:16.999370: step: 196/470, loss: 0.014525515027344227 2023-01-24 04:37:17.757404: step: 198/470, loss: 0.06874248385429382 2023-01-24 04:37:18.514625: step: 200/470, loss: 1.0923312902450562 2023-01-24 04:37:19.244006: step: 202/470, loss: 0.012392209842801094 2023-01-24 04:37:19.996291: step: 204/470, loss: 0.10479408502578735 2023-01-24 04:37:20.764993: step: 206/470, loss: 0.013904137536883354 2023-01-24 04:37:21.476480: step: 208/470, loss: 0.03619790077209473 2023-01-24 04:37:22.230775: step: 210/470, loss: 0.36961981654167175 2023-01-24 04:37:22.936584: step: 212/470, loss: 0.08961759507656097 2023-01-24 04:37:23.736935: step: 214/470, loss: 0.14479438960552216 2023-01-24 04:37:24.395313: step: 216/470, loss: 0.027769066393375397 2023-01-24 04:37:25.085980: step: 218/470, loss: 0.11556971818208694 2023-01-24 04:37:25.806930: step: 220/470, loss: 0.03934004157781601 2023-01-24 04:37:26.583572: step: 222/470, loss: 0.08625157177448273 2023-01-24 04:37:27.332137: step: 224/470, loss: 2.8860392570495605 2023-01-24 04:37:28.119503: step: 226/470, loss: 0.03975597769021988 2023-01-24 04:37:28.906948: step: 228/470, loss: 0.013374090194702148 2023-01-24 04:37:29.686826: step: 230/470, loss: 0.035754621028900146 2023-01-24 04:37:30.451911: step: 232/470, loss: 0.19659562408924103 2023-01-24 04:37:31.171717: step: 234/470, loss: 0.23096928000450134 2023-01-24 04:37:31.847185: step: 236/470, loss: 0.04870816692709923 2023-01-24 04:37:32.664783: step: 238/470, loss: 0.05176544934511185 2023-01-24 04:37:33.409079: step: 240/470, loss: 0.034658364951610565 2023-01-24 04:37:34.161563: step: 242/470, loss: 0.06927529722452164 2023-01-24 04:37:34.842070: step: 244/470, loss: 0.010075340047478676 2023-01-24 04:37:35.651816: step: 246/470, loss: 0.03195039927959442 2023-01-24 04:37:36.451166: step: 248/470, loss: 0.02179635688662529 2023-01-24 04:37:37.224875: step: 250/470, loss: 0.13300231099128723 2023-01-24 04:37:37.969474: step: 252/470, loss: 0.04315396025776863 2023-01-24 04:37:38.752650: step: 254/470, loss: 0.02336571365594864 2023-01-24 04:37:39.506883: step: 256/470, loss: 0.04286831617355347 2023-01-24 04:37:40.105148: step: 258/470, loss: 0.17317862808704376 2023-01-24 04:37:40.875371: step: 260/470, loss: 0.15021666884422302 2023-01-24 04:37:41.508077: step: 262/470, loss: 0.039372485131025314 2023-01-24 04:37:42.349944: step: 264/470, loss: 0.014304211363196373 2023-01-24 04:37:43.107119: step: 266/470, loss: 0.04224678874015808 2023-01-24 04:37:43.838058: step: 268/470, loss: 0.0519757904112339 2023-01-24 04:37:44.572333: step: 270/470, loss: 0.018106307834386826 2023-01-24 04:37:45.277670: step: 272/470, loss: 0.22459372878074646 2023-01-24 04:37:45.943456: step: 274/470, loss: 0.00824404414743185 2023-01-24 04:37:46.702748: step: 276/470, loss: 0.020545797422528267 2023-01-24 04:37:47.381689: step: 278/470, loss: 0.013951159082353115 2023-01-24 04:37:48.220064: step: 280/470, loss: 0.0626802146434784 2023-01-24 04:37:49.024515: step: 282/470, loss: 0.0456269308924675 2023-01-24 04:37:49.755611: step: 284/470, loss: 1.7039402723312378 2023-01-24 04:37:50.492912: step: 286/470, loss: 0.024264952167868614 2023-01-24 04:37:51.253176: step: 288/470, loss: 0.060633957386016846 2023-01-24 04:37:52.020316: step: 290/470, loss: 0.01541169360280037 2023-01-24 04:37:52.767431: step: 292/470, loss: 0.02074408531188965 2023-01-24 04:37:53.483981: step: 294/470, loss: 0.008654659613966942 2023-01-24 04:37:54.204088: step: 296/470, loss: 0.03434770554304123 2023-01-24 04:37:54.924248: step: 298/470, loss: 0.1805637627840042 2023-01-24 04:37:55.675531: step: 300/470, loss: 0.1149728000164032 2023-01-24 04:37:56.403520: step: 302/470, loss: 0.015428435057401657 2023-01-24 04:37:57.121853: step: 304/470, loss: 0.018192537128925323 2023-01-24 04:37:57.893300: step: 306/470, loss: 0.0014132431242614985 2023-01-24 04:37:58.583773: step: 308/470, loss: 1.010900616645813 2023-01-24 04:37:59.282576: step: 310/470, loss: 0.025461159646511078 2023-01-24 04:38:00.055066: step: 312/470, loss: 0.031049776822328568 2023-01-24 04:38:00.861919: step: 314/470, loss: 0.04371613636612892 2023-01-24 04:38:01.572911: step: 316/470, loss: 0.11215726286172867 2023-01-24 04:38:02.351964: step: 318/470, loss: 0.002360484329983592 2023-01-24 04:38:03.095676: step: 320/470, loss: 0.04522380232810974 2023-01-24 04:38:03.760503: step: 322/470, loss: 0.048406392335891724 2023-01-24 04:38:04.434757: step: 324/470, loss: 0.002590734278783202 2023-01-24 04:38:05.155457: step: 326/470, loss: 0.17612895369529724 2023-01-24 04:38:05.886745: step: 328/470, loss: 0.04783718287944794 2023-01-24 04:38:06.663195: step: 330/470, loss: 0.08684522658586502 2023-01-24 04:38:07.423650: step: 332/470, loss: 0.0222398079931736 2023-01-24 04:38:08.134107: step: 334/470, loss: 0.09281022101640701 2023-01-24 04:38:08.893686: step: 336/470, loss: 0.018266357481479645 2023-01-24 04:38:09.682520: step: 338/470, loss: 0.05038531869649887 2023-01-24 04:38:10.363864: step: 340/470, loss: 0.12421422451734543 2023-01-24 04:38:11.037910: step: 342/470, loss: 0.030940696597099304 2023-01-24 04:38:11.828727: step: 344/470, loss: 0.0634685754776001 2023-01-24 04:38:12.577248: step: 346/470, loss: 0.06550871580839157 2023-01-24 04:38:13.410684: step: 348/470, loss: 0.02845531329512596 2023-01-24 04:38:14.239173: step: 350/470, loss: 0.03609459847211838 2023-01-24 04:38:15.046182: step: 352/470, loss: 0.0488816574215889 2023-01-24 04:38:15.826892: step: 354/470, loss: 0.013090868480503559 2023-01-24 04:38:16.464420: step: 356/470, loss: 0.009363566525280476 2023-01-24 04:38:17.157886: step: 358/470, loss: 0.06547439098358154 2023-01-24 04:38:17.902848: step: 360/470, loss: 0.11378369480371475 2023-01-24 04:38:18.674780: step: 362/470, loss: 0.10389399528503418 2023-01-24 04:38:19.400401: step: 364/470, loss: 0.1084824800491333 2023-01-24 04:38:20.186862: step: 366/470, loss: 0.03515470772981644 2023-01-24 04:38:20.895837: step: 368/470, loss: 0.03233461454510689 2023-01-24 04:38:21.590421: step: 370/470, loss: 0.010006394237279892 2023-01-24 04:38:22.312727: step: 372/470, loss: 0.0295244287699461 2023-01-24 04:38:23.068228: step: 374/470, loss: 0.006042733788490295 2023-01-24 04:38:23.815163: step: 376/470, loss: 0.06146341562271118 2023-01-24 04:38:24.765323: step: 378/470, loss: 0.03354865685105324 2023-01-24 04:38:25.543972: step: 380/470, loss: 0.024119818583130836 2023-01-24 04:38:26.292809: step: 382/470, loss: 0.12077596783638 2023-01-24 04:38:26.975503: step: 384/470, loss: 0.1539594531059265 2023-01-24 04:38:27.729143: step: 386/470, loss: 0.03286004438996315 2023-01-24 04:38:28.474697: step: 388/470, loss: 0.12388432770967484 2023-01-24 04:38:29.209558: step: 390/470, loss: 0.02824668027460575 2023-01-24 04:38:29.994993: step: 392/470, loss: 0.054188963025808334 2023-01-24 04:38:30.796946: step: 394/470, loss: 0.025531675666570663 2023-01-24 04:38:31.525187: step: 396/470, loss: 0.0763605609536171 2023-01-24 04:38:32.235745: step: 398/470, loss: 0.010550078935921192 2023-01-24 04:38:32.930139: step: 400/470, loss: 0.01099458895623684 2023-01-24 04:38:33.788799: step: 402/470, loss: 0.04127969220280647 2023-01-24 04:38:34.574558: step: 404/470, loss: 0.018147412687540054 2023-01-24 04:38:35.287955: step: 406/470, loss: 0.007162865251302719 2023-01-24 04:38:35.997417: step: 408/470, loss: 0.04229717701673508 2023-01-24 04:38:36.679184: step: 410/470, loss: 0.03728770837187767 2023-01-24 04:38:37.394124: step: 412/470, loss: 0.014461766928434372 2023-01-24 04:38:38.204812: step: 414/470, loss: 0.09930559992790222 2023-01-24 04:38:38.999483: step: 416/470, loss: 0.08320431411266327 2023-01-24 04:38:39.736659: step: 418/470, loss: 0.08579551428556442 2023-01-24 04:38:40.539356: step: 420/470, loss: 0.05773615464568138 2023-01-24 04:38:41.290897: step: 422/470, loss: 0.007528889924287796 2023-01-24 04:38:42.013508: step: 424/470, loss: 0.04420556128025055 2023-01-24 04:38:42.839955: step: 426/470, loss: 0.015755586326122284 2023-01-24 04:38:43.545845: step: 428/470, loss: 0.015585158951580524 2023-01-24 04:38:44.294363: step: 430/470, loss: 0.01235332153737545 2023-01-24 04:38:45.037867: step: 432/470, loss: 0.09456421434879303 2023-01-24 04:38:45.778853: step: 434/470, loss: 0.22690565884113312 2023-01-24 04:38:46.580981: step: 436/470, loss: 0.4971714913845062 2023-01-24 04:38:47.330162: step: 438/470, loss: 0.02801249548792839 2023-01-24 04:38:48.064316: step: 440/470, loss: 0.007650639396160841 2023-01-24 04:38:48.773833: step: 442/470, loss: 0.054860420525074005 2023-01-24 04:38:49.532557: step: 444/470, loss: 0.029611477628350258 2023-01-24 04:38:50.261803: step: 446/470, loss: 0.07205002754926682 2023-01-24 04:38:51.047420: step: 448/470, loss: 0.02133980020880699 2023-01-24 04:38:51.849536: step: 450/470, loss: 0.4700184762477875 2023-01-24 04:38:52.553497: step: 452/470, loss: 0.20984295010566711 2023-01-24 04:38:53.254772: step: 454/470, loss: 0.038992129266262054 2023-01-24 04:38:54.023252: step: 456/470, loss: 0.04633091017603874 2023-01-24 04:38:54.733033: step: 458/470, loss: 0.05490873381495476 2023-01-24 04:38:55.425563: step: 460/470, loss: 0.0526411309838295 2023-01-24 04:38:56.181950: step: 462/470, loss: 0.091351717710495 2023-01-24 04:38:56.898433: step: 464/470, loss: 0.008627299219369888 2023-01-24 04:38:57.572831: step: 466/470, loss: 0.00773763470351696 2023-01-24 04:38:58.310500: step: 468/470, loss: 0.004108825232833624 2023-01-24 04:38:59.058181: step: 470/470, loss: 0.06798605620861053 2023-01-24 04:38:59.797119: step: 472/470, loss: 0.04032795503735542 2023-01-24 04:39:00.556784: step: 474/470, loss: 0.03978598490357399 2023-01-24 04:39:01.335545: step: 476/470, loss: 0.022391952574253082 2023-01-24 04:39:02.128326: step: 478/470, loss: 0.08442055433988571 2023-01-24 04:39:03.003704: step: 480/470, loss: 0.041006091982126236 2023-01-24 04:39:03.754803: step: 482/470, loss: 0.054082486778497696 2023-01-24 04:39:04.570304: step: 484/470, loss: 0.056996893137693405 2023-01-24 04:39:05.337298: step: 486/470, loss: 0.21792449057102203 2023-01-24 04:39:06.124995: step: 488/470, loss: 0.09255671501159668 2023-01-24 04:39:06.846761: step: 490/470, loss: 0.0614955797791481 2023-01-24 04:39:07.642856: step: 492/470, loss: 0.034090857952833176 2023-01-24 04:39:08.319483: step: 494/470, loss: 0.27086207270622253 2023-01-24 04:39:09.134017: step: 496/470, loss: 0.3944252133369446 2023-01-24 04:39:09.805110: step: 498/470, loss: 0.06420669704675674 2023-01-24 04:39:10.577612: step: 500/470, loss: 2.5734214782714844 2023-01-24 04:39:11.270957: step: 502/470, loss: 0.16732986271381378 2023-01-24 04:39:12.008134: step: 504/470, loss: 0.0660572499036789 2023-01-24 04:39:12.771791: step: 506/470, loss: 0.03720968961715698 2023-01-24 04:39:13.543632: step: 508/470, loss: 0.10983237624168396 2023-01-24 04:39:14.269428: step: 510/470, loss: 0.09013104438781738 2023-01-24 04:39:14.968699: step: 512/470, loss: 0.020754126831889153 2023-01-24 04:39:15.652169: step: 514/470, loss: 0.017007894814014435 2023-01-24 04:39:16.349342: step: 516/470, loss: 0.035360775887966156 2023-01-24 04:39:17.112789: step: 518/470, loss: 0.04036063700914383 2023-01-24 04:39:17.839443: step: 520/470, loss: 0.08036433905363083 2023-01-24 04:39:18.598158: step: 522/470, loss: 0.06309843808412552 2023-01-24 04:39:19.429150: step: 524/470, loss: 0.03624216467142105 2023-01-24 04:39:20.136934: step: 526/470, loss: 0.016651665791869164 2023-01-24 04:39:20.964721: step: 528/470, loss: 0.03483118116855621 2023-01-24 04:39:21.706718: step: 530/470, loss: 0.0016926492098718882 2023-01-24 04:39:22.435671: step: 532/470, loss: 0.04679853096604347 2023-01-24 04:39:23.171458: step: 534/470, loss: 0.08570143580436707 2023-01-24 04:39:23.926573: step: 536/470, loss: 0.01729772239923477 2023-01-24 04:39:24.665511: step: 538/470, loss: 0.026970678940415382 2023-01-24 04:39:25.426270: step: 540/470, loss: 0.13230566680431366 2023-01-24 04:39:26.190341: step: 542/470, loss: 0.06809458136558533 2023-01-24 04:39:26.921175: step: 544/470, loss: 0.0313749760389328 2023-01-24 04:39:27.617199: step: 546/470, loss: 0.0077603161334991455 2023-01-24 04:39:28.419422: step: 548/470, loss: 0.031418073922395706 2023-01-24 04:39:29.239639: step: 550/470, loss: 0.0281534343957901 2023-01-24 04:39:30.036364: step: 552/470, loss: 1.547700047492981 2023-01-24 04:39:30.750128: step: 554/470, loss: 0.041558846831321716 2023-01-24 04:39:31.515790: step: 556/470, loss: 0.05451665446162224 2023-01-24 04:39:32.253021: step: 558/470, loss: 0.03179001063108444 2023-01-24 04:39:32.951986: step: 560/470, loss: 0.04917387664318085 2023-01-24 04:39:33.605595: step: 562/470, loss: 0.019586345180869102 2023-01-24 04:39:34.383319: step: 564/470, loss: 0.03305808827280998 2023-01-24 04:39:35.120434: step: 566/470, loss: 0.05808739736676216 2023-01-24 04:39:35.920553: step: 568/470, loss: 0.08200092613697052 2023-01-24 04:39:36.641580: step: 570/470, loss: 0.12195821106433868 2023-01-24 04:39:37.393885: step: 572/470, loss: 0.0011931839399039745 2023-01-24 04:39:38.105795: step: 574/470, loss: 0.06672245264053345 2023-01-24 04:39:38.866726: step: 576/470, loss: 0.10479816049337387 2023-01-24 04:39:39.535992: step: 578/470, loss: 0.09342262893915176 2023-01-24 04:39:40.325241: step: 580/470, loss: 0.04020530730485916 2023-01-24 04:39:41.052831: step: 582/470, loss: 0.058462418615818024 2023-01-24 04:39:41.817836: step: 584/470, loss: 0.03125924617052078 2023-01-24 04:39:42.560406: step: 586/470, loss: 0.12108433991670609 2023-01-24 04:39:43.282425: step: 588/470, loss: 0.008420255035161972 2023-01-24 04:39:44.090485: step: 590/470, loss: 0.03752874210476875 2023-01-24 04:39:44.944688: step: 592/470, loss: 0.30069804191589355 2023-01-24 04:39:45.693080: step: 594/470, loss: 0.050650861114263535 2023-01-24 04:39:46.404599: step: 596/470, loss: 0.06926306337118149 2023-01-24 04:39:47.110183: step: 598/470, loss: 0.052621450275182724 2023-01-24 04:39:47.926692: step: 600/470, loss: 0.05443857982754707 2023-01-24 04:39:48.631187: step: 602/470, loss: 0.0711992159485817 2023-01-24 04:39:49.387854: step: 604/470, loss: 0.005964161362498999 2023-01-24 04:39:50.265675: step: 606/470, loss: 0.18475210666656494 2023-01-24 04:39:50.988634: step: 608/470, loss: 0.09569357335567474 2023-01-24 04:39:51.786511: step: 610/470, loss: 0.07499527186155319 2023-01-24 04:39:52.503490: step: 612/470, loss: 0.13202105462551117 2023-01-24 04:39:53.242985: step: 614/470, loss: 0.04779544472694397 2023-01-24 04:39:54.254470: step: 616/470, loss: 0.04207386076450348 2023-01-24 04:39:55.015255: step: 618/470, loss: 0.08963283151388168 2023-01-24 04:39:55.783368: step: 620/470, loss: 0.02025291509926319 2023-01-24 04:39:56.517929: step: 622/470, loss: 0.22763288021087646 2023-01-24 04:39:57.313086: step: 624/470, loss: 0.0565686859190464 2023-01-24 04:39:57.935940: step: 626/470, loss: 0.07253819704055786 2023-01-24 04:39:58.598248: step: 628/470, loss: 0.1353847086429596 2023-01-24 04:39:59.293459: step: 630/470, loss: 0.1034807413816452 2023-01-24 04:40:00.075365: step: 632/470, loss: 0.04602696746587753 2023-01-24 04:40:00.810641: step: 634/470, loss: 0.0005324460798874497 2023-01-24 04:40:01.547305: step: 636/470, loss: 0.04955475404858589 2023-01-24 04:40:02.300658: step: 638/470, loss: 0.04272598400712013 2023-01-24 04:40:03.000503: step: 640/470, loss: 0.08605597168207169 2023-01-24 04:40:03.725433: step: 642/470, loss: 0.013445237651467323 2023-01-24 04:40:04.471727: step: 644/470, loss: 0.018981290981173515 2023-01-24 04:40:05.194240: step: 646/470, loss: 0.039564017206430435 2023-01-24 04:40:05.923065: step: 648/470, loss: 0.03769481182098389 2023-01-24 04:40:06.691607: step: 650/470, loss: 0.02641088329255581 2023-01-24 04:40:07.445749: step: 652/470, loss: 0.00315751601010561 2023-01-24 04:40:08.206595: step: 654/470, loss: 0.030088361352682114 2023-01-24 04:40:08.948622: step: 656/470, loss: 0.0600021593272686 2023-01-24 04:40:09.698464: step: 658/470, loss: 0.05731518939137459 2023-01-24 04:40:10.482635: step: 660/470, loss: 0.02242978662252426 2023-01-24 04:40:11.232960: step: 662/470, loss: 0.11539184302091599 2023-01-24 04:40:12.021335: step: 664/470, loss: 0.08133013546466827 2023-01-24 04:40:12.801473: step: 666/470, loss: 0.0326472632586956 2023-01-24 04:40:13.569519: step: 668/470, loss: 0.04640135541558266 2023-01-24 04:40:14.237908: step: 670/470, loss: 0.05912771821022034 2023-01-24 04:40:15.005026: step: 672/470, loss: 0.08168984949588776 2023-01-24 04:40:15.700951: step: 674/470, loss: 0.021860754117369652 2023-01-24 04:40:16.398507: step: 676/470, loss: 0.00997862871736288 2023-01-24 04:40:17.113184: step: 678/470, loss: 0.015150370076298714 2023-01-24 04:40:17.986659: step: 680/470, loss: 0.23191246390342712 2023-01-24 04:40:18.694439: step: 682/470, loss: 0.005268333945423365 2023-01-24 04:40:19.463627: step: 684/470, loss: 0.050369635224342346 2023-01-24 04:40:20.148893: step: 686/470, loss: 0.19161257147789001 2023-01-24 04:40:20.932906: step: 688/470, loss: 0.01200926210731268 2023-01-24 04:40:21.647065: step: 690/470, loss: 0.10803577303886414 2023-01-24 04:40:22.367673: step: 692/470, loss: 0.13654294610023499 2023-01-24 04:40:23.111809: step: 694/470, loss: 0.06747213006019592 2023-01-24 04:40:23.873539: step: 696/470, loss: 0.05152638256549835 2023-01-24 04:40:24.633086: step: 698/470, loss: 0.06810185313224792 2023-01-24 04:40:25.361663: step: 700/470, loss: 0.004062777850776911 2023-01-24 04:40:26.019098: step: 702/470, loss: 0.021000513806939125 2023-01-24 04:40:26.780078: step: 704/470, loss: 0.0744672566652298 2023-01-24 04:40:27.611131: step: 706/470, loss: 0.06481971591711044 2023-01-24 04:40:28.409327: step: 708/470, loss: 0.040443241596221924 2023-01-24 04:40:29.110738: step: 710/470, loss: 0.03952976316213608 2023-01-24 04:40:29.829867: step: 712/470, loss: 0.05129685252904892 2023-01-24 04:40:30.625333: step: 714/470, loss: 0.08018472790718079 2023-01-24 04:40:31.317232: step: 716/470, loss: 0.0011982826981693506 2023-01-24 04:40:31.984308: step: 718/470, loss: 0.01237798947840929 2023-01-24 04:40:32.647622: step: 720/470, loss: 0.030002785846590996 2023-01-24 04:40:33.301849: step: 722/470, loss: 0.029811648651957512 2023-01-24 04:40:34.038745: step: 724/470, loss: 0.028446856886148453 2023-01-24 04:40:34.798760: step: 726/470, loss: 0.03428805246949196 2023-01-24 04:40:35.492974: step: 728/470, loss: 0.04685278609395027 2023-01-24 04:40:36.286202: step: 730/470, loss: 0.011925329454243183 2023-01-24 04:40:37.106560: step: 732/470, loss: 0.10092224925756454 2023-01-24 04:40:37.779934: step: 734/470, loss: 0.014230447821319103 2023-01-24 04:40:38.526147: step: 736/470, loss: 0.06267601996660233 2023-01-24 04:40:39.259713: step: 738/470, loss: 0.0850699171423912 2023-01-24 04:40:40.062850: step: 740/470, loss: 0.014023186638951302 2023-01-24 04:40:40.763953: step: 742/470, loss: 0.11571689695119858 2023-01-24 04:40:41.437917: step: 744/470, loss: 0.05851830914616585 2023-01-24 04:40:42.131196: step: 746/470, loss: 0.7251389622688293 2023-01-24 04:40:42.915604: step: 748/470, loss: 0.21461817622184753 2023-01-24 04:40:43.640790: step: 750/470, loss: 0.056537434458732605 2023-01-24 04:40:44.336592: step: 752/470, loss: 0.06353659927845001 2023-01-24 04:40:45.167949: step: 754/470, loss: 0.14348191022872925 2023-01-24 04:40:45.854522: step: 756/470, loss: 0.021871326491236687 2023-01-24 04:40:46.635876: step: 758/470, loss: 0.059082724153995514 2023-01-24 04:40:47.384239: step: 760/470, loss: 0.0077812704257667065 2023-01-24 04:40:48.174166: step: 762/470, loss: 0.1153213232755661 2023-01-24 04:40:48.823867: step: 764/470, loss: 0.16919955611228943 2023-01-24 04:40:49.505801: step: 766/470, loss: 0.02236533723771572 2023-01-24 04:40:50.249887: step: 768/470, loss: 0.010637041181325912 2023-01-24 04:40:50.963245: step: 770/470, loss: 0.09718048572540283 2023-01-24 04:40:51.604607: step: 772/470, loss: 0.02509610913693905 2023-01-24 04:40:52.294256: step: 774/470, loss: 0.41436123847961426 2023-01-24 04:40:53.150190: step: 776/470, loss: 0.046184662729501724 2023-01-24 04:40:53.886945: step: 778/470, loss: 0.043335553258657455 2023-01-24 04:40:54.625164: step: 780/470, loss: 0.011558422818779945 2023-01-24 04:40:55.455794: step: 782/470, loss: 0.008647819980978966 2023-01-24 04:40:56.179363: step: 784/470, loss: 0.010468276217579842 2023-01-24 04:40:56.922611: step: 786/470, loss: 0.012684951536357403 2023-01-24 04:40:57.743569: step: 788/470, loss: 0.060763075947761536 2023-01-24 04:40:58.476668: step: 790/470, loss: 0.019749129191040993 2023-01-24 04:40:59.177688: step: 792/470, loss: 0.01861565373837948 2023-01-24 04:40:59.950774: step: 794/470, loss: 0.023979458957910538 2023-01-24 04:41:00.742485: step: 796/470, loss: 0.07430318742990494 2023-01-24 04:41:01.542963: step: 798/470, loss: 0.00071112992009148 2023-01-24 04:41:02.327504: step: 800/470, loss: 0.16077010333538055 2023-01-24 04:41:03.107209: step: 802/470, loss: 0.04622608423233032 2023-01-24 04:41:03.882569: step: 804/470, loss: 0.03751537576317787 2023-01-24 04:41:04.653754: step: 806/470, loss: 0.02206636592745781 2023-01-24 04:41:05.443587: step: 808/470, loss: 0.037656575441360474 2023-01-24 04:41:06.180077: step: 810/470, loss: 0.0038295581471174955 2023-01-24 04:41:06.888188: step: 812/470, loss: 0.007838837802410126 2023-01-24 04:41:07.543237: step: 814/470, loss: 0.0843760147690773 2023-01-24 04:41:08.324228: step: 816/470, loss: 0.12722979485988617 2023-01-24 04:41:09.031740: step: 818/470, loss: 0.004428504034876823 2023-01-24 04:41:09.784630: step: 820/470, loss: 0.017400939017534256 2023-01-24 04:41:10.516377: step: 822/470, loss: 0.11112985014915466 2023-01-24 04:41:11.324836: step: 824/470, loss: 0.06626398861408234 2023-01-24 04:41:12.076266: step: 826/470, loss: 0.006494198925793171 2023-01-24 04:41:12.781030: step: 828/470, loss: 0.003452116623520851 2023-01-24 04:41:13.530372: step: 830/470, loss: 0.04936700686812401 2023-01-24 04:41:14.277628: step: 832/470, loss: 0.06773626059293747 2023-01-24 04:41:15.033298: step: 834/470, loss: 0.25022315979003906 2023-01-24 04:41:15.749898: step: 836/470, loss: 0.09486892819404602 2023-01-24 04:41:16.518374: step: 838/470, loss: 0.06972448527812958 2023-01-24 04:41:17.366356: step: 840/470, loss: 0.05532016232609749 2023-01-24 04:41:18.054633: step: 842/470, loss: 0.06207747757434845 2023-01-24 04:41:18.734112: step: 844/470, loss: 0.07339915633201599 2023-01-24 04:41:19.541683: step: 846/470, loss: 0.04446965828537941 2023-01-24 04:41:20.295348: step: 848/470, loss: 0.1462089866399765 2023-01-24 04:41:21.011761: step: 850/470, loss: 0.06136621907353401 2023-01-24 04:41:21.745948: step: 852/470, loss: 0.08713188767433167 2023-01-24 04:41:22.469745: step: 854/470, loss: 0.06073979288339615 2023-01-24 04:41:23.099625: step: 856/470, loss: 0.024500612169504166 2023-01-24 04:41:23.812933: step: 858/470, loss: 0.0761837363243103 2023-01-24 04:41:24.574676: step: 860/470, loss: 0.037365589290857315 2023-01-24 04:41:25.344835: step: 862/470, loss: 0.018834102898836136 2023-01-24 04:41:26.007458: step: 864/470, loss: 0.009648923762142658 2023-01-24 04:41:26.762299: step: 866/470, loss: 0.006375704426318407 2023-01-24 04:41:27.556008: step: 868/470, loss: 0.047611527144908905 2023-01-24 04:41:28.304920: step: 870/470, loss: 0.009436516091227531 2023-01-24 04:41:29.095775: step: 872/470, loss: 0.1485144942998886 2023-01-24 04:41:29.819203: step: 874/470, loss: 0.030754825100302696 2023-01-24 04:41:30.535349: step: 876/470, loss: 0.03466818481683731 2023-01-24 04:41:31.313404: step: 878/470, loss: 0.05955606326460838 2023-01-24 04:41:32.055289: step: 880/470, loss: 0.036698900163173676 2023-01-24 04:41:32.812470: step: 882/470, loss: 0.03446055203676224 2023-01-24 04:41:33.635397: step: 884/470, loss: 0.04526906460523605 2023-01-24 04:41:34.308342: step: 886/470, loss: 0.015627926215529442 2023-01-24 04:41:35.094662: step: 888/470, loss: 0.0814151018857956 2023-01-24 04:41:35.819639: step: 890/470, loss: 0.25850528478622437 2023-01-24 04:41:36.642821: step: 892/470, loss: 0.18925324082374573 2023-01-24 04:41:37.373067: step: 894/470, loss: 0.04655158147215843 2023-01-24 04:41:38.108473: step: 896/470, loss: 0.08437757939100266 2023-01-24 04:41:38.844632: step: 898/470, loss: 0.04652692750096321 2023-01-24 04:41:39.639518: step: 900/470, loss: 3.10775089263916 2023-01-24 04:41:40.421613: step: 902/470, loss: 0.059189215302467346 2023-01-24 04:41:41.125345: step: 904/470, loss: 0.00953720137476921 2023-01-24 04:41:41.831707: step: 906/470, loss: 0.06070271134376526 2023-01-24 04:41:42.633610: step: 908/470, loss: 0.008491786196827888 2023-01-24 04:41:43.339802: step: 910/470, loss: 0.11616123467683792 2023-01-24 04:41:44.023934: step: 912/470, loss: 0.004306926857680082 2023-01-24 04:41:44.775190: step: 914/470, loss: 0.03736087679862976 2023-01-24 04:41:45.544467: step: 916/470, loss: 0.06064944341778755 2023-01-24 04:41:46.310558: step: 918/470, loss: 0.02283492125570774 2023-01-24 04:41:47.013414: step: 920/470, loss: 0.010427909903228283 2023-01-24 04:41:47.856963: step: 922/470, loss: 0.034641899168491364 2023-01-24 04:41:48.591860: step: 924/470, loss: 0.03633886203169823 2023-01-24 04:41:49.328309: step: 926/470, loss: 0.31999385356903076 2023-01-24 04:41:50.034965: step: 928/470, loss: 0.06611615419387817 2023-01-24 04:41:50.682230: step: 930/470, loss: 0.058460332453250885 2023-01-24 04:41:51.355102: step: 932/470, loss: 0.08226493000984192 2023-01-24 04:41:52.103640: step: 934/470, loss: 0.05760214105248451 2023-01-24 04:41:52.836817: step: 936/470, loss: 0.018661608919501305 2023-01-24 04:41:53.507260: step: 938/470, loss: 0.03469004109501839 2023-01-24 04:41:54.281778: step: 940/470, loss: 0.05704415589570999 2023-01-24 04:41:54.963425: step: 942/470, loss: 0.05449334532022476 ================================================== Loss: 0.096 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34430731752160326, 'r': 0.33842730640643354, 'f1': 0.3413419913419914}, 'combined': 0.2515151515151515, 'epoch': 22} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.34277232457694784, 'r': 0.34079479193515777, 'f1': 0.34178069779417947}, 'combined': 0.22785379852945292, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33780271064851425, 'r': 0.33523874320526176, 'f1': 0.33651584317937705}, 'combined': 0.24795904234269886, 'epoch': 22} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3483830617631938, 'r': 0.3339925990774903, 'f1': 0.34103609184861927}, 'combined': 0.22735739456574613, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32891998312473636, 'r': 0.3414027149321267, 'f1': 0.3350451224752901}, 'combined': 0.24687535340284533, 'epoch': 22} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3365370415997711, 'r': 0.34786280742284037, 'f1': 0.34210621250094936}, 'combined': 0.2280708083339662, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27564102564102566, 'r': 0.30714285714285716, 'f1': 0.2905405405405405}, 'combined': 0.19369369369369366, 'epoch': 22} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5416666666666666, 'r': 0.2826086956521739, 'f1': 0.3714285714285714}, 'combined': 0.24761904761904757, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29912280701754385, 'r': 0.15471869328493645, 'f1': 0.20394736842105263}, 'combined': 0.13596491228070173, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3260774491094148, 'r': 0.32422122074636306, 'f1': 0.3251466856961624}, 'combined': 0.2395817684076986, 'epoch': 19} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35730778004987124, 'r': 0.3418473472592518, 'f1': 0.34940662520847365}, 'combined': 0.23293775013898238, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.35714285714285715, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30801502714153317, 'r': 0.33957633922055175, 'f1': 0.3230265898361566}, 'combined': 0.23801959251085222, 'epoch': 16} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3547111579078519, 'r': 0.34789634219396426, 'f1': 0.3512707005081637}, 'combined': 0.23418046700544243, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4666666666666667, 'r': 0.2413793103448276, 'f1': 0.3181818181818182}, 'combined': 0.2121212121212121, 'epoch': 16} ****************************** Epoch: 23 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:44:29.731449: step: 2/470, loss: 0.020145274698734283 2023-01-24 04:44:30.488633: step: 4/470, loss: 0.012752152979373932 2023-01-24 04:44:31.180796: step: 6/470, loss: 0.018607724457979202 2023-01-24 04:44:31.925483: step: 8/470, loss: 0.04674272611737251 2023-01-24 04:44:32.641230: step: 10/470, loss: 0.08120296895503998 2023-01-24 04:44:33.404499: step: 12/470, loss: 0.04648889973759651 2023-01-24 04:44:34.210950: step: 14/470, loss: 0.041562411934137344 2023-01-24 04:44:34.967596: step: 16/470, loss: 0.24655379354953766 2023-01-24 04:44:35.731397: step: 18/470, loss: 0.02498440630733967 2023-01-24 04:44:36.406716: step: 20/470, loss: 0.05179232358932495 2023-01-24 04:44:37.125539: step: 22/470, loss: 0.03058280237019062 2023-01-24 04:44:37.932602: step: 24/470, loss: 0.11315041780471802 2023-01-24 04:44:38.632118: step: 26/470, loss: 0.018697861582040787 2023-01-24 04:44:39.316221: step: 28/470, loss: 0.006492226850241423 2023-01-24 04:44:40.055564: step: 30/470, loss: 0.09545272588729858 2023-01-24 04:44:40.785203: step: 32/470, loss: 0.024350939318537712 2023-01-24 04:44:41.500682: step: 34/470, loss: 0.5242292881011963 2023-01-24 04:44:42.195939: step: 36/470, loss: 0.017941568046808243 2023-01-24 04:44:42.930813: step: 38/470, loss: 0.08385952562093735 2023-01-24 04:44:43.636578: step: 40/470, loss: 0.03444410860538483 2023-01-24 04:44:44.474253: step: 42/470, loss: 0.007343780715018511 2023-01-24 04:44:45.183681: step: 44/470, loss: 0.08139993250370026 2023-01-24 04:44:46.060093: step: 46/470, loss: 0.029229672625660896 2023-01-24 04:44:46.805714: step: 48/470, loss: 0.031573113054037094 2023-01-24 04:44:47.691498: step: 50/470, loss: 0.01538156345486641 2023-01-24 04:44:48.435038: step: 52/470, loss: 0.01159243006259203 2023-01-24 04:44:49.198558: step: 54/470, loss: 0.08330456912517548 2023-01-24 04:44:49.932363: step: 56/470, loss: 0.004855206701904535 2023-01-24 04:44:50.649184: step: 58/470, loss: 0.12275192141532898 2023-01-24 04:44:51.477754: step: 60/470, loss: 0.07747679203748703 2023-01-24 04:44:52.187650: step: 62/470, loss: 0.009875631891191006 2023-01-24 04:44:52.969045: step: 64/470, loss: 0.016967376694083214 2023-01-24 04:44:53.661657: step: 66/470, loss: 0.003874419489875436 2023-01-24 04:44:54.442450: step: 68/470, loss: 0.018619472160935402 2023-01-24 04:44:55.206050: step: 70/470, loss: 0.08524934202432632 2023-01-24 04:44:55.917104: step: 72/470, loss: 0.027295870706439018 2023-01-24 04:44:56.664276: step: 74/470, loss: 0.0906219631433487 2023-01-24 04:44:57.440390: step: 76/470, loss: 0.04819449409842491 2023-01-24 04:44:58.192829: step: 78/470, loss: 0.0014960899716243148 2023-01-24 04:44:58.923143: step: 80/470, loss: 0.05570714548230171 2023-01-24 04:44:59.690017: step: 82/470, loss: 0.0980103462934494 2023-01-24 04:45:00.475404: step: 84/470, loss: 0.014589948579668999 2023-01-24 04:45:01.283009: step: 86/470, loss: 0.05184318125247955 2023-01-24 04:45:02.169438: step: 88/470, loss: 0.11208540946245193 2023-01-24 04:45:03.020098: step: 90/470, loss: 0.5855203866958618 2023-01-24 04:45:03.733667: step: 92/470, loss: 0.028581831604242325 2023-01-24 04:45:04.423859: step: 94/470, loss: 0.11515840142965317 2023-01-24 04:45:05.094613: step: 96/470, loss: 0.013613392598927021 2023-01-24 04:45:05.882142: step: 98/470, loss: 0.030206475406885147 2023-01-24 04:45:06.605816: step: 100/470, loss: 0.028599973767995834 2023-01-24 04:45:07.406448: step: 102/470, loss: 0.03127244487404823 2023-01-24 04:45:08.112756: step: 104/470, loss: 0.06208731234073639 2023-01-24 04:45:08.832719: step: 106/470, loss: 0.03612957522273064 2023-01-24 04:45:09.577848: step: 108/470, loss: 0.010129529982805252 2023-01-24 04:45:10.308448: step: 110/470, loss: 0.007978275418281555 2023-01-24 04:45:11.077913: step: 112/470, loss: 0.153214231133461 2023-01-24 04:45:11.857094: step: 114/470, loss: 0.022448312491178513 2023-01-24 04:45:12.631591: step: 116/470, loss: 0.035694267600774765 2023-01-24 04:45:13.423877: step: 118/470, loss: 0.10163560509681702 2023-01-24 04:45:14.217845: step: 120/470, loss: 0.015968233346939087 2023-01-24 04:45:14.952947: step: 122/470, loss: 0.025551877915859222 2023-01-24 04:45:15.627526: step: 124/470, loss: 0.019368741661310196 2023-01-24 04:45:16.401069: step: 126/470, loss: 0.027455579489469528 2023-01-24 04:45:17.106767: step: 128/470, loss: 0.052771057933568954 2023-01-24 04:45:18.027782: step: 130/470, loss: 0.007331357337534428 2023-01-24 04:45:18.836451: step: 132/470, loss: 0.05879795923829079 2023-01-24 04:45:19.702792: step: 134/470, loss: 0.013023512437939644 2023-01-24 04:45:20.429319: step: 136/470, loss: 0.009905382990837097 2023-01-24 04:45:21.134302: step: 138/470, loss: 0.0017207978526130319 2023-01-24 04:45:21.881341: step: 140/470, loss: 0.04617958515882492 2023-01-24 04:45:22.563625: step: 142/470, loss: 0.009676694869995117 2023-01-24 04:45:23.368813: step: 144/470, loss: 0.10442635416984558 2023-01-24 04:45:24.082165: step: 146/470, loss: 0.03223101794719696 2023-01-24 04:45:24.772717: step: 148/470, loss: 0.17558103799819946 2023-01-24 04:45:25.438089: step: 150/470, loss: 0.007184363901615143 2023-01-24 04:45:26.122798: step: 152/470, loss: 0.016059819608926773 2023-01-24 04:45:26.901097: step: 154/470, loss: 0.017226694151759148 2023-01-24 04:45:27.758180: step: 156/470, loss: 0.054372578859329224 2023-01-24 04:45:28.516817: step: 158/470, loss: 0.09041217714548111 2023-01-24 04:45:29.192605: step: 160/470, loss: 0.010833369567990303 2023-01-24 04:45:29.899887: step: 162/470, loss: 0.007517059799283743 2023-01-24 04:45:30.593213: step: 164/470, loss: 0.029931971803307533 2023-01-24 04:45:31.339671: step: 166/470, loss: 0.029189355671405792 2023-01-24 04:45:32.097985: step: 168/470, loss: 0.03147708997130394 2023-01-24 04:45:32.857675: step: 170/470, loss: 0.023568544536828995 2023-01-24 04:45:33.678502: step: 172/470, loss: 0.011535699479281902 2023-01-24 04:45:34.447420: step: 174/470, loss: 0.05071718618273735 2023-01-24 04:45:35.224773: step: 176/470, loss: 0.043926727026700974 2023-01-24 04:45:36.003129: step: 178/470, loss: 0.019493265077471733 2023-01-24 04:45:36.663589: step: 180/470, loss: 0.09586971253156662 2023-01-24 04:45:37.333387: step: 182/470, loss: 0.0498097725212574 2023-01-24 04:45:38.166151: step: 184/470, loss: 0.07411568611860275 2023-01-24 04:45:38.937845: step: 186/470, loss: 0.014985108748078346 2023-01-24 04:45:39.674741: step: 188/470, loss: 0.02051725424826145 2023-01-24 04:45:40.401527: step: 190/470, loss: 0.022670986130833626 2023-01-24 04:45:41.143924: step: 192/470, loss: 0.033521879464387894 2023-01-24 04:45:41.877367: step: 194/470, loss: 0.000296061800327152 2023-01-24 04:45:42.754799: step: 196/470, loss: 0.025557244196534157 2023-01-24 04:45:43.509187: step: 198/470, loss: 0.00772892776876688 2023-01-24 04:45:44.377377: step: 200/470, loss: 0.035560574382543564 2023-01-24 04:45:45.102846: step: 202/470, loss: 0.007423603907227516 2023-01-24 04:45:45.939631: step: 204/470, loss: 0.017200924456119537 2023-01-24 04:45:46.696413: step: 206/470, loss: 0.44262224435806274 2023-01-24 04:45:47.509597: step: 208/470, loss: 0.03299145773053169 2023-01-24 04:45:48.215794: step: 210/470, loss: 0.0001655942905927077 2023-01-24 04:45:49.005404: step: 212/470, loss: 0.01341554056853056 2023-01-24 04:45:49.749329: step: 214/470, loss: 0.3039006292819977 2023-01-24 04:45:50.531430: step: 216/470, loss: 0.0034072119742631912 2023-01-24 04:45:51.291255: step: 218/470, loss: 0.25233790278434753 2023-01-24 04:45:52.110264: step: 220/470, loss: 0.0021255554165691137 2023-01-24 04:45:52.832406: step: 222/470, loss: 0.7128084301948547 2023-01-24 04:45:53.627160: step: 224/470, loss: 0.0008448681910522282 2023-01-24 04:45:54.391154: step: 226/470, loss: 0.01992366835474968 2023-01-24 04:45:55.145008: step: 228/470, loss: 0.04137638583779335 2023-01-24 04:45:55.965461: step: 230/470, loss: 0.024264074862003326 2023-01-24 04:45:56.635690: step: 232/470, loss: 0.010322164744138718 2023-01-24 04:45:57.346584: step: 234/470, loss: 0.00993278156965971 2023-01-24 04:45:58.024408: step: 236/470, loss: 0.0286688432097435 2023-01-24 04:45:58.934291: step: 238/470, loss: 0.039216842502355576 2023-01-24 04:45:59.668587: step: 240/470, loss: 0.12787392735481262 2023-01-24 04:46:00.392058: step: 242/470, loss: 0.1834522932767868 2023-01-24 04:46:01.115495: step: 244/470, loss: 0.12740002572536469 2023-01-24 04:46:01.960602: step: 246/470, loss: 0.03960498049855232 2023-01-24 04:46:02.686358: step: 248/470, loss: 0.03761105611920357 2023-01-24 04:46:03.459891: step: 250/470, loss: 0.004436446353793144 2023-01-24 04:46:04.179991: step: 252/470, loss: 0.009433421306312084 2023-01-24 04:46:04.903304: step: 254/470, loss: 0.04656631872057915 2023-01-24 04:46:05.694290: step: 256/470, loss: 0.0027728108689188957 2023-01-24 04:46:06.489198: step: 258/470, loss: 0.9103434085845947 2023-01-24 04:46:07.145089: step: 260/470, loss: 0.04946238175034523 2023-01-24 04:46:07.856144: step: 262/470, loss: 0.0284775011241436 2023-01-24 04:46:08.607777: step: 264/470, loss: 0.12191655486822128 2023-01-24 04:46:09.376621: step: 266/470, loss: 0.07705827057361603 2023-01-24 04:46:10.115219: step: 268/470, loss: 0.05367019772529602 2023-01-24 04:46:10.884453: step: 270/470, loss: 0.03151824325323105 2023-01-24 04:46:11.650276: step: 272/470, loss: 0.0288032628595829 2023-01-24 04:46:12.402122: step: 274/470, loss: 0.02198009565472603 2023-01-24 04:46:13.088015: step: 276/470, loss: 0.3013882637023926 2023-01-24 04:46:13.955981: step: 278/470, loss: 0.00810268521308899 2023-01-24 04:46:14.681323: step: 280/470, loss: 0.3207792043685913 2023-01-24 04:46:15.450787: step: 282/470, loss: 0.03247154504060745 2023-01-24 04:46:16.228799: step: 284/470, loss: 0.11384246498346329 2023-01-24 04:46:16.996775: step: 286/470, loss: 0.06521860510110855 2023-01-24 04:46:17.704320: step: 288/470, loss: 0.08328651636838913 2023-01-24 04:46:18.331965: step: 290/470, loss: 0.004215493332594633 2023-01-24 04:46:19.104521: step: 292/470, loss: 0.08901931345462799 2023-01-24 04:46:19.911989: step: 294/470, loss: 0.00959330890327692 2023-01-24 04:46:20.659257: step: 296/470, loss: 0.6423669457435608 2023-01-24 04:46:21.419297: step: 298/470, loss: 0.032605089247226715 2023-01-24 04:46:22.219446: step: 300/470, loss: 0.02665814571082592 2023-01-24 04:46:22.890264: step: 302/470, loss: 0.04625895991921425 2023-01-24 04:46:23.600967: step: 304/470, loss: 0.19270682334899902 2023-01-24 04:46:24.317185: step: 306/470, loss: 0.005112417042255402 2023-01-24 04:46:25.018905: step: 308/470, loss: 0.01898195967078209 2023-01-24 04:46:25.794436: step: 310/470, loss: 0.007997587323188782 2023-01-24 04:46:26.581345: step: 312/470, loss: 0.03887288644909859 2023-01-24 04:46:27.291084: step: 314/470, loss: 0.03566152974963188 2023-01-24 04:46:28.029168: step: 316/470, loss: 0.017126863822340965 2023-01-24 04:46:28.860110: step: 318/470, loss: 0.16232366859912872 2023-01-24 04:46:29.653869: step: 320/470, loss: 0.005544004961848259 2023-01-24 04:46:30.383827: step: 322/470, loss: 0.02310621552169323 2023-01-24 04:46:31.134616: step: 324/470, loss: 0.0022491663694381714 2023-01-24 04:46:31.813594: step: 326/470, loss: 0.02474634349346161 2023-01-24 04:46:32.578526: step: 328/470, loss: 0.04117470234632492 2023-01-24 04:46:33.263717: step: 330/470, loss: 0.41619208455085754 2023-01-24 04:46:33.984825: step: 332/470, loss: 0.006768751889467239 2023-01-24 04:46:34.657211: step: 334/470, loss: 0.5537669062614441 2023-01-24 04:46:35.383620: step: 336/470, loss: 0.023046918213367462 2023-01-24 04:46:36.137476: step: 338/470, loss: 0.03325394541025162 2023-01-24 04:46:36.901560: step: 340/470, loss: 0.04114500433206558 2023-01-24 04:46:37.699327: step: 342/470, loss: 0.028026726096868515 2023-01-24 04:46:38.401734: step: 344/470, loss: 0.09893575310707092 2023-01-24 04:46:39.180456: step: 346/470, loss: 0.018749456852674484 2023-01-24 04:46:39.930960: step: 348/470, loss: 0.05088183283805847 2023-01-24 04:46:40.671637: step: 350/470, loss: 0.0032982854172587395 2023-01-24 04:46:41.389861: step: 352/470, loss: 0.054463837295770645 2023-01-24 04:46:42.144589: step: 354/470, loss: 0.11088722199201584 2023-01-24 04:46:42.899949: step: 356/470, loss: 0.13968579471111298 2023-01-24 04:46:43.608652: step: 358/470, loss: 0.030351005494594574 2023-01-24 04:46:44.293441: step: 360/470, loss: 0.03071235679090023 2023-01-24 04:46:44.990181: step: 362/470, loss: 0.0850130170583725 2023-01-24 04:46:45.650588: step: 364/470, loss: 0.759006679058075 2023-01-24 04:46:46.311135: step: 366/470, loss: 0.06871728599071503 2023-01-24 04:46:47.049630: step: 368/470, loss: 0.077363520860672 2023-01-24 04:46:47.714116: step: 370/470, loss: 0.01663314364850521 2023-01-24 04:46:48.446010: step: 372/470, loss: 0.03362368047237396 2023-01-24 04:46:49.259109: step: 374/470, loss: 0.01191516499966383 2023-01-24 04:46:49.962311: step: 376/470, loss: 0.029850145801901817 2023-01-24 04:46:50.641503: step: 378/470, loss: 0.007099964190274477 2023-01-24 04:46:51.351093: step: 380/470, loss: 0.9981684684753418 2023-01-24 04:46:52.212487: step: 382/470, loss: 0.018105922266840935 2023-01-24 04:46:52.869137: step: 384/470, loss: 0.07014259696006775 2023-01-24 04:46:53.647602: step: 386/470, loss: 0.04365800693631172 2023-01-24 04:46:54.434362: step: 388/470, loss: 0.10529167205095291 2023-01-24 04:46:55.120785: step: 390/470, loss: 0.07534227520227432 2023-01-24 04:46:55.849187: step: 392/470, loss: 0.07272927463054657 2023-01-24 04:46:56.553922: step: 394/470, loss: 0.06634674966335297 2023-01-24 04:46:57.308397: step: 396/470, loss: 0.04313298314809799 2023-01-24 04:46:58.017880: step: 398/470, loss: 0.0057480488903820515 2023-01-24 04:46:58.747714: step: 400/470, loss: 0.04004475101828575 2023-01-24 04:46:59.488637: step: 402/470, loss: 0.11582747101783752 2023-01-24 04:47:00.209607: step: 404/470, loss: 0.028773313388228416 2023-01-24 04:47:00.969588: step: 406/470, loss: 0.0024830615147948265 2023-01-24 04:47:01.721759: step: 408/470, loss: 0.07367391884326935 2023-01-24 04:47:02.516951: step: 410/470, loss: 0.08376302570104599 2023-01-24 04:47:03.244854: step: 412/470, loss: 0.021925557404756546 2023-01-24 04:47:03.944888: step: 414/470, loss: 0.0081571564078331 2023-01-24 04:47:04.658024: step: 416/470, loss: 0.01600341498851776 2023-01-24 04:47:05.398982: step: 418/470, loss: 0.07978179305791855 2023-01-24 04:47:06.141710: step: 420/470, loss: 0.03541692718863487 2023-01-24 04:47:06.893920: step: 422/470, loss: 0.0004405932268127799 2023-01-24 04:47:07.671375: step: 424/470, loss: 0.04251012206077576 2023-01-24 04:47:08.540253: step: 426/470, loss: 0.026230430230498314 2023-01-24 04:47:09.268474: step: 428/470, loss: 0.034719884395599365 2023-01-24 04:47:09.994150: step: 430/470, loss: 0.7030810117721558 2023-01-24 04:47:10.712138: step: 432/470, loss: 0.03227429836988449 2023-01-24 04:47:11.478492: step: 434/470, loss: 0.013126318342983723 2023-01-24 04:47:12.210581: step: 436/470, loss: 0.024919021874666214 2023-01-24 04:47:13.044224: step: 438/470, loss: 0.26918357610702515 2023-01-24 04:47:13.788046: step: 440/470, loss: 0.215628519654274 2023-01-24 04:47:14.491999: step: 442/470, loss: 0.03114582598209381 2023-01-24 04:47:15.196770: step: 444/470, loss: 0.0005924082943238318 2023-01-24 04:47:15.967587: step: 446/470, loss: 0.30109506845474243 2023-01-24 04:47:16.703924: step: 448/470, loss: 0.34661105275154114 2023-01-24 04:47:17.386699: step: 450/470, loss: 0.016731295734643936 2023-01-24 04:47:18.095070: step: 452/470, loss: 0.048470642417669296 2023-01-24 04:47:18.859197: step: 454/470, loss: 0.05453205108642578 2023-01-24 04:47:19.640003: step: 456/470, loss: 0.05783369764685631 2023-01-24 04:47:20.330002: step: 458/470, loss: 0.03494717925786972 2023-01-24 04:47:21.020371: step: 460/470, loss: 0.032794974744319916 2023-01-24 04:47:21.713600: step: 462/470, loss: 0.0035079510416835546 2023-01-24 04:47:22.407917: step: 464/470, loss: 0.06600449979305267 2023-01-24 04:47:23.140189: step: 466/470, loss: 0.0054876902140676975 2023-01-24 04:47:23.916009: step: 468/470, loss: 0.14131823182106018 2023-01-24 04:47:24.741045: step: 470/470, loss: 0.027415527030825615 2023-01-24 04:47:25.522957: step: 472/470, loss: 0.014964505098760128 2023-01-24 04:47:26.258974: step: 474/470, loss: 0.06728798151016235 2023-01-24 04:47:26.999925: step: 476/470, loss: 0.007798313163220882 2023-01-24 04:47:27.851477: step: 478/470, loss: 0.0030685949604958296 2023-01-24 04:47:28.593927: step: 480/470, loss: 0.07129280269145966 2023-01-24 04:47:29.319106: step: 482/470, loss: 0.03215346857905388 2023-01-24 04:47:30.053095: step: 484/470, loss: 0.0043903798796236515 2023-01-24 04:47:30.772147: step: 486/470, loss: 0.10907711833715439 2023-01-24 04:47:31.491845: step: 488/470, loss: 0.030243542045354843 2023-01-24 04:47:32.207467: step: 490/470, loss: 0.721255362033844 2023-01-24 04:47:33.035483: step: 492/470, loss: 0.030622025951743126 2023-01-24 04:47:33.744704: step: 494/470, loss: 0.026004338636994362 2023-01-24 04:47:34.525991: step: 496/470, loss: 0.03940839692950249 2023-01-24 04:47:35.245602: step: 498/470, loss: 0.08969125896692276 2023-01-24 04:47:35.968568: step: 500/470, loss: 0.016034258529543877 2023-01-24 04:47:36.765858: step: 502/470, loss: 0.054923348128795624 2023-01-24 04:47:37.526852: step: 504/470, loss: 0.03527640178799629 2023-01-24 04:47:38.301598: step: 506/470, loss: 0.05074672773480415 2023-01-24 04:47:39.026502: step: 508/470, loss: 0.019439974799752235 2023-01-24 04:47:39.902293: step: 510/470, loss: 0.029996780678629875 2023-01-24 04:47:40.625174: step: 512/470, loss: 0.0256869625300169 2023-01-24 04:47:41.441708: step: 514/470, loss: 0.03297635540366173 2023-01-24 04:47:42.207040: step: 516/470, loss: 0.01902947574853897 2023-01-24 04:47:42.887540: step: 518/470, loss: 0.1115870252251625 2023-01-24 04:47:43.565039: step: 520/470, loss: 0.013269971124827862 2023-01-24 04:47:44.352844: step: 522/470, loss: 0.030053507536649704 2023-01-24 04:47:45.056462: step: 524/470, loss: 0.4772378206253052 2023-01-24 04:47:45.860031: step: 526/470, loss: 0.12607523798942566 2023-01-24 04:47:46.598341: step: 528/470, loss: 0.007882537320256233 2023-01-24 04:47:47.389052: step: 530/470, loss: 0.041834231466054916 2023-01-24 04:47:48.185470: step: 532/470, loss: 0.13057224452495575 2023-01-24 04:47:49.019049: step: 534/470, loss: 0.14410966634750366 2023-01-24 04:47:49.763252: step: 536/470, loss: 0.03726746886968613 2023-01-24 04:47:50.511052: step: 538/470, loss: 0.023791544139385223 2023-01-24 04:47:51.289501: step: 540/470, loss: 0.01803162507712841 2023-01-24 04:47:52.097078: step: 542/470, loss: 0.1559169441461563 2023-01-24 04:47:52.904270: step: 544/470, loss: 0.024006202816963196 2023-01-24 04:47:53.656734: step: 546/470, loss: 0.018065424636006355 2023-01-24 04:47:54.319219: step: 548/470, loss: 0.01143345795571804 2023-01-24 04:47:55.015203: step: 550/470, loss: 0.13216164708137512 2023-01-24 04:47:55.741697: step: 552/470, loss: 0.09048359096050262 2023-01-24 04:47:56.516594: step: 554/470, loss: 0.016017138957977295 2023-01-24 04:47:57.244166: step: 556/470, loss: 0.028317036107182503 2023-01-24 04:47:57.977867: step: 558/470, loss: 0.01729518733918667 2023-01-24 04:47:58.658048: step: 560/470, loss: 0.01721752993762493 2023-01-24 04:47:59.403953: step: 562/470, loss: 0.0444251224398613 2023-01-24 04:48:00.213363: step: 564/470, loss: 0.005927725229412317 2023-01-24 04:48:00.986647: step: 566/470, loss: 0.027946837246418 2023-01-24 04:48:01.720241: step: 568/470, loss: 0.020798763260245323 2023-01-24 04:48:02.468126: step: 570/470, loss: 0.020342588424682617 2023-01-24 04:48:03.236647: step: 572/470, loss: 0.015030240640044212 2023-01-24 04:48:03.959601: step: 574/470, loss: 0.03206343948841095 2023-01-24 04:48:04.735657: step: 576/470, loss: 0.022603966295719147 2023-01-24 04:48:05.435615: step: 578/470, loss: 0.015919381752610207 2023-01-24 04:48:06.280442: step: 580/470, loss: 0.04253147542476654 2023-01-24 04:48:06.996582: step: 582/470, loss: 0.009290401823818684 2023-01-24 04:48:07.849701: step: 584/470, loss: 0.12215080112218857 2023-01-24 04:48:08.522515: step: 586/470, loss: 0.07029620558023453 2023-01-24 04:48:09.249502: step: 588/470, loss: 0.09041906893253326 2023-01-24 04:48:10.022465: step: 590/470, loss: 0.029489658772945404 2023-01-24 04:48:10.737134: step: 592/470, loss: 0.06253940612077713 2023-01-24 04:48:11.496169: step: 594/470, loss: 0.009752373211085796 2023-01-24 04:48:12.184111: step: 596/470, loss: 0.04571554809808731 2023-01-24 04:48:13.001480: step: 598/470, loss: 0.038326047360897064 2023-01-24 04:48:13.690397: step: 600/470, loss: 0.014072294346988201 2023-01-24 04:48:14.508636: step: 602/470, loss: 0.12891465425491333 2023-01-24 04:48:15.210255: step: 604/470, loss: 0.08130002021789551 2023-01-24 04:48:15.955004: step: 606/470, loss: 0.015089713968336582 2023-01-24 04:48:16.728594: step: 608/470, loss: 0.04352075606584549 2023-01-24 04:48:17.478915: step: 610/470, loss: 0.13570909202098846 2023-01-24 04:48:18.197580: step: 612/470, loss: 0.04086529463529587 2023-01-24 04:48:19.029791: step: 614/470, loss: 1.2370469570159912 2023-01-24 04:48:19.764321: step: 616/470, loss: 0.02251041680574417 2023-01-24 04:48:20.552482: step: 618/470, loss: 0.04271961376070976 2023-01-24 04:48:21.315077: step: 620/470, loss: 0.03870357945561409 2023-01-24 04:48:22.040940: step: 622/470, loss: 0.0048131453804671764 2023-01-24 04:48:22.753897: step: 624/470, loss: 0.05446334183216095 2023-01-24 04:48:23.692130: step: 626/470, loss: 0.025662001222372055 2023-01-24 04:48:24.405300: step: 628/470, loss: 0.036178234964609146 2023-01-24 04:48:25.135068: step: 630/470, loss: 0.05717697739601135 2023-01-24 04:48:25.947026: step: 632/470, loss: 0.030181117355823517 2023-01-24 04:48:26.707660: step: 634/470, loss: 0.013264654204249382 2023-01-24 04:48:27.436845: step: 636/470, loss: 0.26037362217903137 2023-01-24 04:48:28.221877: step: 638/470, loss: 0.10497309267520905 2023-01-24 04:48:28.907770: step: 640/470, loss: 0.0047654202207922935 2023-01-24 04:48:29.614100: step: 642/470, loss: 0.04302401840686798 2023-01-24 04:48:30.306704: step: 644/470, loss: 0.06489899009466171 2023-01-24 04:48:31.087555: step: 646/470, loss: 0.18413802981376648 2023-01-24 04:48:31.818545: step: 648/470, loss: 0.8595938682556152 2023-01-24 04:48:32.570122: step: 650/470, loss: 0.005972123239189386 2023-01-24 04:48:33.239244: step: 652/470, loss: 0.0459543913602829 2023-01-24 04:48:34.016920: step: 654/470, loss: 0.0675494596362114 2023-01-24 04:48:34.834946: step: 656/470, loss: 0.053448911756277084 2023-01-24 04:48:35.547386: step: 658/470, loss: 0.009192129597067833 2023-01-24 04:48:36.287981: step: 660/470, loss: 0.02981734089553356 2023-01-24 04:48:37.021448: step: 662/470, loss: 0.1048010066151619 2023-01-24 04:48:37.767819: step: 664/470, loss: 0.09085634350776672 2023-01-24 04:48:38.558579: step: 666/470, loss: 0.026635007932782173 2023-01-24 04:48:39.255231: step: 668/470, loss: 0.0010363566689193249 2023-01-24 04:48:39.892345: step: 670/470, loss: 0.009339814074337482 2023-01-24 04:48:40.582121: step: 672/470, loss: 0.0018867823528125882 2023-01-24 04:48:41.295194: step: 674/470, loss: 0.2046578824520111 2023-01-24 04:48:42.061206: step: 676/470, loss: 0.06945720314979553 2023-01-24 04:48:42.881700: step: 678/470, loss: 0.003970560152083635 2023-01-24 04:48:43.592366: step: 680/470, loss: 0.01167396642267704 2023-01-24 04:48:44.276772: step: 682/470, loss: 0.035459429025650024 2023-01-24 04:48:44.982066: step: 684/470, loss: 0.024924641475081444 2023-01-24 04:48:45.625410: step: 686/470, loss: 0.013809597119688988 2023-01-24 04:48:46.431106: step: 688/470, loss: 0.1835506558418274 2023-01-24 04:48:47.186983: step: 690/470, loss: 0.022687483578920364 2023-01-24 04:48:47.939837: step: 692/470, loss: 0.008208868093788624 2023-01-24 04:48:48.621477: step: 694/470, loss: 0.010190270841121674 2023-01-24 04:48:49.418395: step: 696/470, loss: 0.035994742065668106 2023-01-24 04:48:50.194619: step: 698/470, loss: 0.037245918065309525 2023-01-24 04:48:50.968022: step: 700/470, loss: 0.10593031346797943 2023-01-24 04:48:51.695962: step: 702/470, loss: 0.0002846399147529155 2023-01-24 04:48:52.391957: step: 704/470, loss: 0.01668049953877926 2023-01-24 04:48:53.132450: step: 706/470, loss: 0.15217755734920502 2023-01-24 04:48:53.821106: step: 708/470, loss: 0.02269909158349037 2023-01-24 04:48:54.646468: step: 710/470, loss: 0.02121802605688572 2023-01-24 04:48:55.357137: step: 712/470, loss: 0.06413817405700684 2023-01-24 04:48:56.027343: step: 714/470, loss: 0.026240617036819458 2023-01-24 04:48:56.725609: step: 716/470, loss: 0.04450815171003342 2023-01-24 04:48:57.471369: step: 718/470, loss: 0.012417121790349483 2023-01-24 04:48:58.224922: step: 720/470, loss: 0.053188763558864594 2023-01-24 04:48:58.966221: step: 722/470, loss: 0.006177571602165699 2023-01-24 04:48:59.663685: step: 724/470, loss: 0.007825598120689392 2023-01-24 04:49:00.431031: step: 726/470, loss: 0.808793306350708 2023-01-24 04:49:01.117299: step: 728/470, loss: 0.021689899265766144 2023-01-24 04:49:01.882195: step: 730/470, loss: 0.015059007331728935 2023-01-24 04:49:02.675069: step: 732/470, loss: 0.14715488255023956 2023-01-24 04:49:03.380557: step: 734/470, loss: 0.008766661398112774 2023-01-24 04:49:04.153407: step: 736/470, loss: 0.07060243934392929 2023-01-24 04:49:04.963074: step: 738/470, loss: 0.05194990336894989 2023-01-24 04:49:05.687380: step: 740/470, loss: 0.1445123255252838 2023-01-24 04:49:06.418862: step: 742/470, loss: 0.020801223814487457 2023-01-24 04:49:07.124638: step: 744/470, loss: 0.010486302897334099 2023-01-24 04:49:07.804945: step: 746/470, loss: 0.037710774689912796 2023-01-24 04:49:08.585874: step: 748/470, loss: 0.20508471131324768 2023-01-24 04:49:09.298159: step: 750/470, loss: 0.047555048018693924 2023-01-24 04:49:10.056132: step: 752/470, loss: 0.020251981914043427 2023-01-24 04:49:10.848588: step: 754/470, loss: 0.09589096158742905 2023-01-24 04:49:11.641928: step: 756/470, loss: 0.030110789462924004 2023-01-24 04:49:12.299633: step: 758/470, loss: 0.012403149157762527 2023-01-24 04:49:13.086398: step: 760/470, loss: 0.017901351675391197 2023-01-24 04:49:13.844338: step: 762/470, loss: 0.049641214311122894 2023-01-24 04:49:14.619995: step: 764/470, loss: 0.09870140254497528 2023-01-24 04:49:15.364574: step: 766/470, loss: 0.03105722926557064 2023-01-24 04:49:16.132282: step: 768/470, loss: 0.026463506743311882 2023-01-24 04:49:16.820709: step: 770/470, loss: 0.010589334182441235 2023-01-24 04:49:17.624176: step: 772/470, loss: 0.033002715557813644 2023-01-24 04:49:18.400293: step: 774/470, loss: 0.25736182928085327 2023-01-24 04:49:19.205850: step: 776/470, loss: 0.025068465620279312 2023-01-24 04:49:19.903561: step: 778/470, loss: 0.334775447845459 2023-01-24 04:49:20.589350: step: 780/470, loss: 0.042881835252046585 2023-01-24 04:49:21.297879: step: 782/470, loss: 0.04640914872288704 2023-01-24 04:49:21.978627: step: 784/470, loss: 0.009364593774080276 2023-01-24 04:49:22.625738: step: 786/470, loss: 0.04730172082781792 2023-01-24 04:49:23.357859: step: 788/470, loss: 0.1467842310667038 2023-01-24 04:49:24.163455: step: 790/470, loss: 0.09448494762182236 2023-01-24 04:49:24.937289: step: 792/470, loss: 0.03505510836839676 2023-01-24 04:49:25.717855: step: 794/470, loss: 0.1019258052110672 2023-01-24 04:49:26.515702: step: 796/470, loss: 0.2154480367898941 2023-01-24 04:49:27.156010: step: 798/470, loss: 0.04859409108757973 2023-01-24 04:49:27.886201: step: 800/470, loss: 0.005880818236619234 2023-01-24 04:49:28.571212: step: 802/470, loss: 0.0315140001475811 2023-01-24 04:49:29.291637: step: 804/470, loss: 0.023350920528173447 2023-01-24 04:49:30.009131: step: 806/470, loss: 0.0514444075524807 2023-01-24 04:49:30.749373: step: 808/470, loss: 0.004267614334821701 2023-01-24 04:49:31.509248: step: 810/470, loss: 0.12531037628650665 2023-01-24 04:49:32.263760: step: 812/470, loss: 0.15931852161884308 2023-01-24 04:49:32.996317: step: 814/470, loss: 0.028155192732810974 2023-01-24 04:49:33.708712: step: 816/470, loss: 0.013806957751512527 2023-01-24 04:49:34.401393: step: 818/470, loss: 0.014839094132184982 2023-01-24 04:49:35.086009: step: 820/470, loss: 0.01098904013633728 2023-01-24 04:49:35.813085: step: 822/470, loss: 0.06523977220058441 2023-01-24 04:49:36.509377: step: 824/470, loss: 0.022953085601329803 2023-01-24 04:49:37.263868: step: 826/470, loss: 0.019507238641381264 2023-01-24 04:49:38.089838: step: 828/470, loss: 0.036449965089559555 2023-01-24 04:49:38.803526: step: 830/470, loss: 0.368724524974823 2023-01-24 04:49:39.618438: step: 832/470, loss: 0.007018078118562698 2023-01-24 04:49:40.345856: step: 834/470, loss: 0.023230871185660362 2023-01-24 04:49:41.018844: step: 836/470, loss: 0.0515056848526001 2023-01-24 04:49:41.775393: step: 838/470, loss: 0.12788495421409607 2023-01-24 04:49:42.561875: step: 840/470, loss: 0.0066448175348341465 2023-01-24 04:49:43.251998: step: 842/470, loss: 0.01157106552273035 2023-01-24 04:49:43.994919: step: 844/470, loss: 0.002934497781097889 2023-01-24 04:49:44.717304: step: 846/470, loss: 0.023657750338315964 2023-01-24 04:49:45.420403: step: 848/470, loss: 0.050684839487075806 2023-01-24 04:49:46.212769: step: 850/470, loss: 0.016548197716474533 2023-01-24 04:49:46.993174: step: 852/470, loss: 0.042522866278886795 2023-01-24 04:49:47.685850: step: 854/470, loss: 0.41254308819770813 2023-01-24 04:49:48.398729: step: 856/470, loss: 0.0507955327630043 2023-01-24 04:49:49.206291: step: 858/470, loss: 0.023885458707809448 2023-01-24 04:49:49.957869: step: 860/470, loss: 0.02228032425045967 2023-01-24 04:49:50.689969: step: 862/470, loss: 0.034615080803632736 2023-01-24 04:49:51.404118: step: 864/470, loss: 0.09413474798202515 2023-01-24 04:49:52.241828: step: 866/470, loss: 0.03352166339755058 2023-01-24 04:49:52.967355: step: 868/470, loss: 0.05351152643561363 2023-01-24 04:49:53.683650: step: 870/470, loss: 0.024592425674200058 2023-01-24 04:49:54.353889: step: 872/470, loss: 0.06424624472856522 2023-01-24 04:49:55.120698: step: 874/470, loss: 0.0035424302332103252 2023-01-24 04:49:55.831742: step: 876/470, loss: 0.003755184356123209 2023-01-24 04:49:56.574846: step: 878/470, loss: 0.08013807982206345 2023-01-24 04:49:57.342747: step: 880/470, loss: 0.03286373242735863 2023-01-24 04:49:58.069989: step: 882/470, loss: 0.04773545265197754 2023-01-24 04:49:58.792379: step: 884/470, loss: 0.053945161402225494 2023-01-24 04:49:59.504125: step: 886/470, loss: 0.1546299159526825 2023-01-24 04:50:00.257799: step: 888/470, loss: 0.01916525699198246 2023-01-24 04:50:00.963903: step: 890/470, loss: 0.06273111701011658 2023-01-24 04:50:01.662515: step: 892/470, loss: 0.08657549321651459 2023-01-24 04:50:02.413467: step: 894/470, loss: 0.10691189765930176 2023-01-24 04:50:03.138525: step: 896/470, loss: 0.0036090798676013947 2023-01-24 04:50:03.887082: step: 898/470, loss: 0.05727309733629227 2023-01-24 04:50:04.740403: step: 900/470, loss: 0.04444019868969917 2023-01-24 04:50:05.462438: step: 902/470, loss: 0.06358946114778519 2023-01-24 04:50:06.195164: step: 904/470, loss: 0.0498536042869091 2023-01-24 04:50:06.945311: step: 906/470, loss: 0.03799587860703468 2023-01-24 04:50:07.698554: step: 908/470, loss: 0.0830104649066925 2023-01-24 04:50:08.549836: step: 910/470, loss: 0.0941431000828743 2023-01-24 04:50:09.244794: step: 912/470, loss: 0.01036920491605997 2023-01-24 04:50:09.972646: step: 914/470, loss: 0.05386245623230934 2023-01-24 04:50:10.740344: step: 916/470, loss: 0.02375110797584057 2023-01-24 04:50:11.460508: step: 918/470, loss: 0.033866845071315765 2023-01-24 04:50:12.147230: step: 920/470, loss: 0.02378959022462368 2023-01-24 04:50:12.859476: step: 922/470, loss: 0.07435478270053864 2023-01-24 04:50:13.576275: step: 924/470, loss: 0.004783568903803825 2023-01-24 04:50:14.301764: step: 926/470, loss: 0.015685396268963814 2023-01-24 04:50:15.004269: step: 928/470, loss: 0.056146442890167236 2023-01-24 04:50:15.730434: step: 930/470, loss: 0.05725179240107536 2023-01-24 04:50:16.622742: step: 932/470, loss: 0.05904132127761841 2023-01-24 04:50:17.359774: step: 934/470, loss: 0.015635449439287186 2023-01-24 04:50:18.169054: step: 936/470, loss: 0.04005289822816849 2023-01-24 04:50:18.894823: step: 938/470, loss: 0.018923653289675713 2023-01-24 04:50:19.557830: step: 940/470, loss: 0.011358820833265781 2023-01-24 04:50:20.301741: step: 942/470, loss: 0.037756726145744324 ================================================== Loss: 0.075 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3511194674965422, 'r': 0.3211377292852625, 'f1': 0.33546002642880746}, 'combined': 0.24718107210543705, 'epoch': 23} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3596607912606083, 'r': 0.35239840989861526, 'f1': 0.35599256560909165}, 'combined': 0.23732837707272772, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3377878289473684, 'r': 0.31279024767801855, 'f1': 0.3248087892144153}, 'combined': 0.23933279205272706, 'epoch': 23} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36129983705090635, 'r': 0.34636147840360926, 'f1': 0.35367298727516305}, 'combined': 0.2357819915167753, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32794813406437917, 'r': 0.31985833189580815, 'f1': 0.32385272028643786}, 'combined': 0.23862832021105945, 'epoch': 23} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3589724520009027, 'r': 0.3638047734701456, 'f1': 0.36137245884331565}, 'combined': 0.24091497256221037, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29545454545454547, 'r': 0.2785714285714286, 'f1': 0.2867647058823529}, 'combined': 0.19117647058823528, 'epoch': 23} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6176470588235294, 'r': 0.45652173913043476, 'f1': 0.5249999999999999}, 'combined': 0.3499999999999999, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6363636363636364, 'r': 0.2413793103448276, 'f1': 0.35}, 'combined': 0.2333333333333333, 'epoch': 23} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3260774491094148, 'r': 0.32422122074636306, 'f1': 0.3251466856961624}, 'combined': 0.2395817684076986, 'epoch': 19} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35730778004987124, 'r': 0.3418473472592518, 'f1': 0.34940662520847365}, 'combined': 0.23293775013898238, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.35714285714285715, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32794813406437917, 'r': 0.31985833189580815, 'f1': 0.32385272028643786}, 'combined': 0.23862832021105945, 'epoch': 23} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3589724520009027, 'r': 0.3638047734701456, 'f1': 0.36137245884331565}, 'combined': 0.24091497256221037, 'epoch': 23} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6363636363636364, 'r': 0.2413793103448276, 'f1': 0.35}, 'combined': 0.2333333333333333, 'epoch': 23} ****************************** Epoch: 24 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:53:00.785831: step: 2/470, loss: 0.09071078896522522 2023-01-24 04:53:01.558111: step: 4/470, loss: 0.025325173512101173 2023-01-24 04:53:02.359811: step: 6/470, loss: 0.08036006987094879 2023-01-24 04:53:03.096353: step: 8/470, loss: 0.021646693348884583 2023-01-24 04:53:03.879755: step: 10/470, loss: 0.049812790006399155 2023-01-24 04:53:04.657692: step: 12/470, loss: 0.01448730006814003 2023-01-24 04:53:05.355130: step: 14/470, loss: 0.030710160732269287 2023-01-24 04:53:06.139185: step: 16/470, loss: 0.019698522984981537 2023-01-24 04:53:06.888984: step: 18/470, loss: 0.007313847541809082 2023-01-24 04:53:07.744959: step: 20/470, loss: 0.058628715574741364 2023-01-24 04:53:08.428618: step: 22/470, loss: 0.008414885960519314 2023-01-24 04:53:09.194935: step: 24/470, loss: 0.04240531101822853 2023-01-24 04:53:09.926135: step: 26/470, loss: 0.011772439815104008 2023-01-24 04:53:10.633438: step: 28/470, loss: 0.018150269985198975 2023-01-24 04:53:11.266570: step: 30/470, loss: 0.1334639936685562 2023-01-24 04:53:12.027311: step: 32/470, loss: 0.006515135522931814 2023-01-24 04:53:12.716285: step: 34/470, loss: 0.0021217716857790947 2023-01-24 04:53:13.460290: step: 36/470, loss: 0.10562937706708908 2023-01-24 04:53:14.321676: step: 38/470, loss: 0.012553170323371887 2023-01-24 04:53:15.040174: step: 40/470, loss: 0.020525362342596054 2023-01-24 04:53:15.933693: step: 42/470, loss: 0.10595296323299408 2023-01-24 04:53:16.640840: step: 44/470, loss: 0.08366550505161285 2023-01-24 04:53:17.335960: step: 46/470, loss: 0.020429229363799095 2023-01-24 04:53:18.063327: step: 48/470, loss: 0.024408716708421707 2023-01-24 04:53:18.746483: step: 50/470, loss: 0.03457806631922722 2023-01-24 04:53:19.519758: step: 52/470, loss: 0.06466998159885406 2023-01-24 04:53:20.281781: step: 54/470, loss: 0.009562093764543533 2023-01-24 04:53:21.030103: step: 56/470, loss: 1.4924793243408203 2023-01-24 04:53:21.759930: step: 58/470, loss: 0.04375853016972542 2023-01-24 04:53:22.555148: step: 60/470, loss: 0.0036672528367489576 2023-01-24 04:53:23.249397: step: 62/470, loss: 0.0689840093255043 2023-01-24 04:53:24.067106: step: 64/470, loss: 0.342257022857666 2023-01-24 04:53:24.785924: step: 66/470, loss: 0.10635102540254593 2023-01-24 04:53:25.427013: step: 68/470, loss: 0.026408610865473747 2023-01-24 04:53:26.130016: step: 70/470, loss: 0.051238052546978 2023-01-24 04:53:26.924409: step: 72/470, loss: 0.036443691700696945 2023-01-24 04:53:27.662618: step: 74/470, loss: 0.06277167797088623 2023-01-24 04:53:28.400400: step: 76/470, loss: 0.01571185700595379 2023-01-24 04:53:29.163727: step: 78/470, loss: 0.0491529181599617 2023-01-24 04:53:29.955727: step: 80/470, loss: 0.04565410315990448 2023-01-24 04:53:30.651275: step: 82/470, loss: 0.00350984581746161 2023-01-24 04:53:31.343109: step: 84/470, loss: 0.011095493100583553 2023-01-24 04:53:32.030209: step: 86/470, loss: 0.09822019934654236 2023-01-24 04:53:32.851698: step: 88/470, loss: 0.007752100471407175 2023-01-24 04:53:33.594130: step: 90/470, loss: 0.031544268131256104 2023-01-24 04:53:34.294350: step: 92/470, loss: 0.017505712807178497 2023-01-24 04:53:35.040671: step: 94/470, loss: 0.008974706754088402 2023-01-24 04:53:35.747538: step: 96/470, loss: 0.004836901556700468 2023-01-24 04:53:36.433735: step: 98/470, loss: 0.026484636589884758 2023-01-24 04:53:37.207295: step: 100/470, loss: 0.3591688275337219 2023-01-24 04:53:37.876995: step: 102/470, loss: 0.030483385547995567 2023-01-24 04:53:38.614790: step: 104/470, loss: 0.0007766528287902474 2023-01-24 04:53:39.326219: step: 106/470, loss: 0.015138084068894386 2023-01-24 04:53:40.138470: step: 108/470, loss: 0.02235487848520279 2023-01-24 04:53:40.962904: step: 110/470, loss: 0.20309732854366302 2023-01-24 04:53:41.672282: step: 112/470, loss: 0.17128710448741913 2023-01-24 04:53:42.472205: step: 114/470, loss: 0.032276567071676254 2023-01-24 04:53:43.194655: step: 116/470, loss: 0.02128906175494194 2023-01-24 04:53:43.932660: step: 118/470, loss: 0.002846565330401063 2023-01-24 04:53:44.714674: step: 120/470, loss: 0.5752863883972168 2023-01-24 04:53:45.421787: step: 122/470, loss: 0.0021211388520896435 2023-01-24 04:53:46.153884: step: 124/470, loss: 0.01313631609082222 2023-01-24 04:53:46.854809: step: 126/470, loss: 0.009652921929955482 2023-01-24 04:53:47.628703: step: 128/470, loss: 0.07296048104763031 2023-01-24 04:53:48.355945: step: 130/470, loss: 0.01581062190234661 2023-01-24 04:53:49.292661: step: 132/470, loss: 0.032663170248270035 2023-01-24 04:53:50.043531: step: 134/470, loss: 0.19434767961502075 2023-01-24 04:53:50.810326: step: 136/470, loss: 0.03558581322431564 2023-01-24 04:53:51.645088: step: 138/470, loss: 0.02339303307235241 2023-01-24 04:53:52.389444: step: 140/470, loss: 0.015170947648584843 2023-01-24 04:53:53.061892: step: 142/470, loss: 0.1154085025191307 2023-01-24 04:53:53.856592: step: 144/470, loss: 0.007798160891979933 2023-01-24 04:53:54.638537: step: 146/470, loss: 0.01072743721306324 2023-01-24 04:53:55.380918: step: 148/470, loss: 0.018375081941485405 2023-01-24 04:53:56.144769: step: 150/470, loss: 0.06826124340295792 2023-01-24 04:53:56.815983: step: 152/470, loss: 0.020997071638703346 2023-01-24 04:53:57.491633: step: 154/470, loss: 0.20793934166431427 2023-01-24 04:53:58.275371: step: 156/470, loss: 0.015171626582741737 2023-01-24 04:53:59.103243: step: 158/470, loss: 0.026798272505402565 2023-01-24 04:53:59.837788: step: 160/470, loss: 0.027822552248835564 2023-01-24 04:54:00.575711: step: 162/470, loss: 0.0003175846068188548 2023-01-24 04:54:01.357099: step: 164/470, loss: 0.007902979850769043 2023-01-24 04:54:02.181899: step: 166/470, loss: 0.034732285887002945 2023-01-24 04:54:02.925398: step: 168/470, loss: 0.18924480676651 2023-01-24 04:54:03.725208: step: 170/470, loss: 0.08738310635089874 2023-01-24 04:54:04.552992: step: 172/470, loss: 0.018750211223959923 2023-01-24 04:54:05.285502: step: 174/470, loss: 0.05455208197236061 2023-01-24 04:54:05.954156: step: 176/470, loss: 0.014304988086223602 2023-01-24 04:54:06.752807: step: 178/470, loss: 0.015335150994360447 2023-01-24 04:54:07.452697: step: 180/470, loss: 0.0569082647562027 2023-01-24 04:54:08.283466: step: 182/470, loss: 0.07909173518419266 2023-01-24 04:54:08.958469: step: 184/470, loss: 0.014027910307049751 2023-01-24 04:54:09.691230: step: 186/470, loss: 0.018430842086672783 2023-01-24 04:54:10.443598: step: 188/470, loss: 0.004810623824596405 2023-01-24 04:54:11.178154: step: 190/470, loss: 0.03506871312856674 2023-01-24 04:54:11.945334: step: 192/470, loss: 0.05087616667151451 2023-01-24 04:54:12.633907: step: 194/470, loss: 0.025519607588648796 2023-01-24 04:54:13.500968: step: 196/470, loss: 0.010457420721650124 2023-01-24 04:54:14.180800: step: 198/470, loss: 0.010845007374882698 2023-01-24 04:54:14.852826: step: 200/470, loss: 0.015168682672083378 2023-01-24 04:54:15.642895: step: 202/470, loss: 0.026464076712727547 2023-01-24 04:54:16.422710: step: 204/470, loss: 0.015966270118951797 2023-01-24 04:54:17.223383: step: 206/470, loss: 0.011198129504919052 2023-01-24 04:54:17.940695: step: 208/470, loss: 0.0012512399116531014 2023-01-24 04:54:18.667036: step: 210/470, loss: 0.024326816201210022 2023-01-24 04:54:19.406723: step: 212/470, loss: 0.03241521865129471 2023-01-24 04:54:20.169618: step: 214/470, loss: 0.08937463164329529 2023-01-24 04:54:20.960755: step: 216/470, loss: 7.647789607290179e-05 2023-01-24 04:54:21.735888: step: 218/470, loss: 0.09640821069478989 2023-01-24 04:54:22.463148: step: 220/470, loss: 0.051727913320064545 2023-01-24 04:54:23.181696: step: 222/470, loss: 0.25078266859054565 2023-01-24 04:54:23.952019: step: 224/470, loss: 0.001110507408156991 2023-01-24 04:54:24.791066: step: 226/470, loss: 0.023081980645656586 2023-01-24 04:54:25.490288: step: 228/470, loss: 0.2709239721298218 2023-01-24 04:54:26.154063: step: 230/470, loss: 0.012674129568040371 2023-01-24 04:54:26.985238: step: 232/470, loss: 0.010699287056922913 2023-01-24 04:54:27.759399: step: 234/470, loss: 0.007047413848340511 2023-01-24 04:54:28.501324: step: 236/470, loss: 0.011672616936266422 2023-01-24 04:54:29.275569: step: 238/470, loss: 0.013916079886257648 2023-01-24 04:54:29.990259: step: 240/470, loss: 0.18569129705429077 2023-01-24 04:54:30.695158: step: 242/470, loss: 0.011190307326614857 2023-01-24 04:54:31.490969: step: 244/470, loss: 0.09461580961942673 2023-01-24 04:54:32.274209: step: 246/470, loss: 0.03384866937994957 2023-01-24 04:54:32.964524: step: 248/470, loss: 0.004926465917378664 2023-01-24 04:54:33.749511: step: 250/470, loss: 0.017905734479427338 2023-01-24 04:54:34.550881: step: 252/470, loss: 2.1755669116973877 2023-01-24 04:54:35.306239: step: 254/470, loss: 0.05439945310354233 2023-01-24 04:54:36.069105: step: 256/470, loss: 0.12149098515510559 2023-01-24 04:54:36.842501: step: 258/470, loss: 0.012873425148427486 2023-01-24 04:54:37.709765: step: 260/470, loss: 0.08463416993618011 2023-01-24 04:54:38.484264: step: 262/470, loss: 0.06308569759130478 2023-01-24 04:54:39.293138: step: 264/470, loss: 0.0345967635512352 2023-01-24 04:54:40.008241: step: 266/470, loss: 0.03556538745760918 2023-01-24 04:54:40.822110: step: 268/470, loss: 0.036040298640728 2023-01-24 04:54:41.561363: step: 270/470, loss: 0.02199576050043106 2023-01-24 04:54:42.266416: step: 272/470, loss: 0.04857220500707626 2023-01-24 04:54:43.018876: step: 274/470, loss: 0.009337391704320908 2023-01-24 04:54:43.867314: step: 276/470, loss: 0.017865784466266632 2023-01-24 04:54:44.604323: step: 278/470, loss: 0.05819854885339737 2023-01-24 04:54:45.436658: step: 280/470, loss: 0.03172709047794342 2023-01-24 04:54:46.240580: step: 282/470, loss: 0.017821263521909714 2023-01-24 04:54:47.122575: step: 284/470, loss: 0.12068352103233337 2023-01-24 04:54:47.880883: step: 286/470, loss: 0.010906575247645378 2023-01-24 04:54:48.652496: step: 288/470, loss: 0.017595164477825165 2023-01-24 04:54:49.370578: step: 290/470, loss: 0.01801401562988758 2023-01-24 04:54:50.200581: step: 292/470, loss: 0.012894317507743835 2023-01-24 04:54:51.053967: step: 294/470, loss: 2.5075581073760986 2023-01-24 04:54:51.846712: step: 296/470, loss: 0.053497977554798126 2023-01-24 04:54:52.613277: step: 298/470, loss: 0.007422546856105328 2023-01-24 04:54:53.366469: step: 300/470, loss: 0.30072087049484253 2023-01-24 04:54:54.125739: step: 302/470, loss: 0.08959358930587769 2023-01-24 04:54:54.926757: step: 304/470, loss: 0.009266148321330547 2023-01-24 04:54:55.674506: step: 306/470, loss: 0.014265595935285091 2023-01-24 04:54:56.403935: step: 308/470, loss: 0.010140521451830864 2023-01-24 04:54:57.083389: step: 310/470, loss: 0.07897411286830902 2023-01-24 04:54:57.902127: step: 312/470, loss: 0.025504056364297867 2023-01-24 04:54:58.709566: step: 314/470, loss: 0.02470008283853531 2023-01-24 04:54:59.680983: step: 316/470, loss: 0.07380572706460953 2023-01-24 04:55:00.407133: step: 318/470, loss: 0.013543383218348026 2023-01-24 04:55:01.205986: step: 320/470, loss: 0.05348219349980354 2023-01-24 04:55:01.921060: step: 322/470, loss: 0.03541599214076996 2023-01-24 04:55:02.635504: step: 324/470, loss: 0.01773747242987156 2023-01-24 04:55:03.391426: step: 326/470, loss: 0.7491196990013123 2023-01-24 04:55:04.099694: step: 328/470, loss: 0.03136305883526802 2023-01-24 04:55:04.853344: step: 330/470, loss: 0.042486634105443954 2023-01-24 04:55:05.723299: step: 332/470, loss: 0.025926154106855392 2023-01-24 04:55:06.430367: step: 334/470, loss: 0.02339969575405121 2023-01-24 04:55:07.186162: step: 336/470, loss: 0.04502348601818085 2023-01-24 04:55:07.949493: step: 338/470, loss: 0.014835294336080551 2023-01-24 04:55:08.665485: step: 340/470, loss: 0.04183325543999672 2023-01-24 04:55:09.431818: step: 342/470, loss: 0.0881599634885788 2023-01-24 04:55:10.165485: step: 344/470, loss: 0.0676642656326294 2023-01-24 04:55:10.946986: step: 346/470, loss: 0.05381879583001137 2023-01-24 04:55:11.650225: step: 348/470, loss: 0.056068144738674164 2023-01-24 04:55:12.416863: step: 350/470, loss: 0.03975946456193924 2023-01-24 04:55:13.177639: step: 352/470, loss: 0.004915451630949974 2023-01-24 04:55:13.900459: step: 354/470, loss: 0.037539318203926086 2023-01-24 04:55:14.639759: step: 356/470, loss: 0.017015933990478516 2023-01-24 04:55:15.457030: step: 358/470, loss: 0.01815337873995304 2023-01-24 04:55:16.134142: step: 360/470, loss: 0.011578904464840889 2023-01-24 04:55:16.859272: step: 362/470, loss: 0.0952455922961235 2023-01-24 04:55:17.614993: step: 364/470, loss: 0.029356520622968674 2023-01-24 04:55:18.296513: step: 366/470, loss: 0.03291197121143341 2023-01-24 04:55:19.032532: step: 368/470, loss: 0.044890958815813065 2023-01-24 04:55:19.852356: step: 370/470, loss: 0.014463989064097404 2023-01-24 04:55:20.572717: step: 372/470, loss: 0.029671261087059975 2023-01-24 04:55:21.396846: step: 374/470, loss: 0.1202852725982666 2023-01-24 04:55:22.145491: step: 376/470, loss: 0.02309921756386757 2023-01-24 04:55:22.862994: step: 378/470, loss: 0.0008545005111955106 2023-01-24 04:55:23.647094: step: 380/470, loss: 0.02537860907614231 2023-01-24 04:55:24.401228: step: 382/470, loss: 0.01265999861061573 2023-01-24 04:55:25.176581: step: 384/470, loss: 0.06934566795825958 2023-01-24 04:55:25.845592: step: 386/470, loss: 0.04263785108923912 2023-01-24 04:55:26.524458: step: 388/470, loss: 0.055505167692899704 2023-01-24 04:55:27.289243: step: 390/470, loss: 0.05536120757460594 2023-01-24 04:55:28.116055: step: 392/470, loss: 0.01172667182981968 2023-01-24 04:55:28.894313: step: 394/470, loss: 0.13547594845294952 2023-01-24 04:55:29.661687: step: 396/470, loss: 0.0048871287144720554 2023-01-24 04:55:30.434697: step: 398/470, loss: 0.22317151725292206 2023-01-24 04:55:31.176537: step: 400/470, loss: 0.02091968059539795 2023-01-24 04:55:31.933950: step: 402/470, loss: 0.0756245031952858 2023-01-24 04:55:32.626073: step: 404/470, loss: 0.08365000039339066 2023-01-24 04:55:33.381910: step: 406/470, loss: 0.021144846454262733 2023-01-24 04:55:34.097700: step: 408/470, loss: 0.02719028852880001 2023-01-24 04:55:34.784675: step: 410/470, loss: 0.002817704575136304 2023-01-24 04:55:35.524325: step: 412/470, loss: 0.020920658484101295 2023-01-24 04:55:36.377815: step: 414/470, loss: 0.037627965211868286 2023-01-24 04:55:37.063568: step: 416/470, loss: 0.0015627248212695122 2023-01-24 04:55:37.786659: step: 418/470, loss: 0.027174735441803932 2023-01-24 04:55:38.518166: step: 420/470, loss: 0.016970563679933548 2023-01-24 04:55:39.293669: step: 422/470, loss: 0.006052455864846706 2023-01-24 04:55:39.993703: step: 424/470, loss: 0.14455130696296692 2023-01-24 04:55:40.723814: step: 426/470, loss: 0.05896090343594551 2023-01-24 04:55:41.468667: step: 428/470, loss: 0.016825811937451363 2023-01-24 04:55:42.278409: step: 430/470, loss: 0.009072363376617432 2023-01-24 04:55:43.094484: step: 432/470, loss: 0.004168605897575617 2023-01-24 04:55:43.883579: step: 434/470, loss: 0.05164012685418129 2023-01-24 04:55:44.664442: step: 436/470, loss: 0.1026514321565628 2023-01-24 04:55:45.362184: step: 438/470, loss: 0.0008245318895205855 2023-01-24 04:55:46.078884: step: 440/470, loss: 0.001791208516806364 2023-01-24 04:55:46.850413: step: 442/470, loss: 0.042904458940029144 2023-01-24 04:55:47.650502: step: 444/470, loss: 0.007724056486040354 2023-01-24 04:55:48.418681: step: 446/470, loss: 0.043044883757829666 2023-01-24 04:55:49.190341: step: 448/470, loss: 0.011818325147032738 2023-01-24 04:55:49.958790: step: 450/470, loss: 0.061016157269477844 2023-01-24 04:55:50.776079: step: 452/470, loss: 0.06787383556365967 2023-01-24 04:55:51.566323: step: 454/470, loss: 0.03366108238697052 2023-01-24 04:55:52.436707: step: 456/470, loss: 0.042890019714832306 2023-01-24 04:55:53.223516: step: 458/470, loss: 0.0524488128721714 2023-01-24 04:55:54.036091: step: 460/470, loss: 0.013294907286763191 2023-01-24 04:55:54.775700: step: 462/470, loss: 0.05481591448187828 2023-01-24 04:55:55.480940: step: 464/470, loss: 0.01968459226191044 2023-01-24 04:55:56.278143: step: 466/470, loss: 0.011313038878142834 2023-01-24 04:55:57.175884: step: 468/470, loss: 0.18432138860225677 2023-01-24 04:55:57.908693: step: 470/470, loss: 0.04200728237628937 2023-01-24 04:55:58.762975: step: 472/470, loss: 0.03416682779788971 2023-01-24 04:55:59.533317: step: 474/470, loss: 0.013105719350278378 2023-01-24 04:56:00.314798: step: 476/470, loss: 0.008894217200577259 2023-01-24 04:56:01.068383: step: 478/470, loss: 0.03804539516568184 2023-01-24 04:56:01.733141: step: 480/470, loss: 0.0294265728443861 2023-01-24 04:56:02.455271: step: 482/470, loss: 0.021114999428391457 2023-01-24 04:56:03.176884: step: 484/470, loss: 0.049864597618579865 2023-01-24 04:56:03.960547: step: 486/470, loss: 0.021185453981161118 2023-01-24 04:56:04.709867: step: 488/470, loss: 0.0015564959030598402 2023-01-24 04:56:05.340156: step: 490/470, loss: 0.02223014645278454 2023-01-24 04:56:06.096295: step: 492/470, loss: 0.02108120173215866 2023-01-24 04:56:06.808716: step: 494/470, loss: 0.10416891425848007 2023-01-24 04:56:07.516592: step: 496/470, loss: 0.04583205655217171 2023-01-24 04:56:08.165284: step: 498/470, loss: 0.02937087044119835 2023-01-24 04:56:08.956998: step: 500/470, loss: 0.00716931140050292 2023-01-24 04:56:09.777340: step: 502/470, loss: 0.01689540222287178 2023-01-24 04:56:10.485958: step: 504/470, loss: 0.029801655560731888 2023-01-24 04:56:11.176057: step: 506/470, loss: 0.05905457213521004 2023-01-24 04:56:11.889362: step: 508/470, loss: 0.11677312850952148 2023-01-24 04:56:12.726453: step: 510/470, loss: 0.018942374736070633 2023-01-24 04:56:13.437415: step: 512/470, loss: 0.00495618861168623 2023-01-24 04:56:14.235775: step: 514/470, loss: 0.0023889688309282064 2023-01-24 04:56:14.972344: step: 516/470, loss: 0.07100418955087662 2023-01-24 04:56:15.722303: step: 518/470, loss: 0.021969538182020187 2023-01-24 04:56:16.506243: step: 520/470, loss: 0.05502269044518471 2023-01-24 04:56:17.214078: step: 522/470, loss: 0.15007732808589935 2023-01-24 04:56:17.938599: step: 524/470, loss: 0.11543755233287811 2023-01-24 04:56:18.692980: step: 526/470, loss: 0.051974616944789886 2023-01-24 04:56:19.449581: step: 528/470, loss: 0.04661441594362259 2023-01-24 04:56:20.200955: step: 530/470, loss: 0.010496355593204498 2023-01-24 04:56:20.973816: step: 532/470, loss: 0.022741319611668587 2023-01-24 04:56:21.730799: step: 534/470, loss: 0.0360884964466095 2023-01-24 04:56:22.375449: step: 536/470, loss: 0.28176605701446533 2023-01-24 04:56:23.134208: step: 538/470, loss: 0.000371862348401919 2023-01-24 04:56:23.911898: step: 540/470, loss: 0.016311364248394966 2023-01-24 04:56:24.609138: step: 542/470, loss: 0.6123561859130859 2023-01-24 04:56:25.339069: step: 544/470, loss: 0.06656290590763092 2023-01-24 04:56:26.157948: step: 546/470, loss: 0.00262643164023757 2023-01-24 04:56:27.024964: step: 548/470, loss: 0.07897511124610901 2023-01-24 04:56:27.833923: step: 550/470, loss: 0.06647037714719772 2023-01-24 04:56:28.556015: step: 552/470, loss: 0.06914768368005753 2023-01-24 04:56:29.305599: step: 554/470, loss: 0.023323729634284973 2023-01-24 04:56:30.056910: step: 556/470, loss: 0.05517176166176796 2023-01-24 04:56:30.856919: step: 558/470, loss: 0.01975119113922119 2023-01-24 04:56:31.590040: step: 560/470, loss: 0.010775907896459103 2023-01-24 04:56:32.342782: step: 562/470, loss: 0.06182064861059189 2023-01-24 04:56:32.984729: step: 564/470, loss: 0.0013573778560385108 2023-01-24 04:56:33.703957: step: 566/470, loss: 0.026441359892487526 2023-01-24 04:56:34.420918: step: 568/470, loss: 0.03428468853235245 2023-01-24 04:56:35.221601: step: 570/470, loss: 0.015539498068392277 2023-01-24 04:56:36.034817: step: 572/470, loss: 0.038939252495765686 2023-01-24 04:56:36.722837: step: 574/470, loss: 0.02000570483505726 2023-01-24 04:56:37.398113: step: 576/470, loss: 0.01621418446302414 2023-01-24 04:56:38.148052: step: 578/470, loss: 0.08785783499479294 2023-01-24 04:56:38.925401: step: 580/470, loss: 0.10585880279541016 2023-01-24 04:56:39.690826: step: 582/470, loss: 0.037938281893730164 2023-01-24 04:56:40.420796: step: 584/470, loss: 0.0022830679081380367 2023-01-24 04:56:41.168601: step: 586/470, loss: 0.007178565952926874 2023-01-24 04:56:41.973543: step: 588/470, loss: 0.009578043594956398 2023-01-24 04:56:42.746168: step: 590/470, loss: 0.06791942566633224 2023-01-24 04:56:43.577689: step: 592/470, loss: 0.3170134127140045 2023-01-24 04:56:44.198203: step: 594/470, loss: 0.00753396563231945 2023-01-24 04:56:44.933920: step: 596/470, loss: 0.04484763741493225 2023-01-24 04:56:45.648746: step: 598/470, loss: 0.013004643842577934 2023-01-24 04:56:46.365718: step: 600/470, loss: 0.02657913789153099 2023-01-24 04:56:47.151246: step: 602/470, loss: 0.0408782996237278 2023-01-24 04:56:47.910629: step: 604/470, loss: 0.012685599736869335 2023-01-24 04:56:48.654976: step: 606/470, loss: 0.003009774489328265 2023-01-24 04:56:49.375653: step: 608/470, loss: 0.2504468560218811 2023-01-24 04:56:50.029439: step: 610/470, loss: 0.0925365537405014 2023-01-24 04:56:50.759492: step: 612/470, loss: 0.018450234085321426 2023-01-24 04:56:51.507718: step: 614/470, loss: 0.04527146741747856 2023-01-24 04:56:52.212972: step: 616/470, loss: 0.03232667222619057 2023-01-24 04:56:52.900814: step: 618/470, loss: 0.037647586315870285 2023-01-24 04:56:53.673250: step: 620/470, loss: 0.02571326307952404 2023-01-24 04:56:54.459240: step: 622/470, loss: 0.054269880056381226 2023-01-24 04:56:55.200047: step: 624/470, loss: 0.028405921533703804 2023-01-24 04:56:55.902151: step: 626/470, loss: 0.020112626254558563 2023-01-24 04:56:56.672735: step: 628/470, loss: 0.005061836447566748 2023-01-24 04:56:57.389379: step: 630/470, loss: 0.14112667739391327 2023-01-24 04:56:58.110917: step: 632/470, loss: 0.006791027262806892 2023-01-24 04:56:58.815821: step: 634/470, loss: 0.009881870821118355 2023-01-24 04:56:59.551993: step: 636/470, loss: 0.06305323541164398 2023-01-24 04:57:00.264252: step: 638/470, loss: 0.05328913778066635 2023-01-24 04:57:00.944364: step: 640/470, loss: 0.002296677092090249 2023-01-24 04:57:01.692557: step: 642/470, loss: 0.007883894257247448 2023-01-24 04:57:02.530229: step: 644/470, loss: 0.05610523000359535 2023-01-24 04:57:03.264594: step: 646/470, loss: 0.022641412913799286 2023-01-24 04:57:03.940869: step: 648/470, loss: 0.025629589334130287 2023-01-24 04:57:04.696523: step: 650/470, loss: 0.047313909977674484 2023-01-24 04:57:05.360670: step: 652/470, loss: 0.02791016362607479 2023-01-24 04:57:06.051068: step: 654/470, loss: 0.01351831667125225 2023-01-24 04:57:06.882180: step: 656/470, loss: 0.01588682271540165 2023-01-24 04:57:07.569971: step: 658/470, loss: 0.04910393804311752 2023-01-24 04:57:08.393361: step: 660/470, loss: 0.11366312205791473 2023-01-24 04:57:09.127813: step: 662/470, loss: 0.12863023579120636 2023-01-24 04:57:09.775745: step: 664/470, loss: 0.09127775579690933 2023-01-24 04:57:10.543845: step: 666/470, loss: 0.21341535449028015 2023-01-24 04:57:11.275568: step: 668/470, loss: 0.015620485879480839 2023-01-24 04:57:11.972084: step: 670/470, loss: 0.03588823601603508 2023-01-24 04:57:12.687633: step: 672/470, loss: 0.025998149067163467 2023-01-24 04:57:13.483532: step: 674/470, loss: 0.06447438150644302 2023-01-24 04:57:14.135604: step: 676/470, loss: 0.06617400795221329 2023-01-24 04:57:14.847030: step: 678/470, loss: 0.2020503729581833 2023-01-24 04:57:15.638896: step: 680/470, loss: 0.03483056649565697 2023-01-24 04:57:16.367390: step: 682/470, loss: 0.017954112961888313 2023-01-24 04:57:17.095442: step: 684/470, loss: 0.01776493713259697 2023-01-24 04:57:17.799214: step: 686/470, loss: 0.018190190196037292 2023-01-24 04:57:18.563684: step: 688/470, loss: 0.0707760602235794 2023-01-24 04:57:19.359234: step: 690/470, loss: 0.06905476748943329 2023-01-24 04:57:20.222784: step: 692/470, loss: 0.08601033687591553 2023-01-24 04:57:20.991506: step: 694/470, loss: 0.007111764047294855 2023-01-24 04:57:21.720258: step: 696/470, loss: 0.026374198496341705 2023-01-24 04:57:22.421487: step: 698/470, loss: 0.03967350348830223 2023-01-24 04:57:23.180288: step: 700/470, loss: 0.03607969358563423 2023-01-24 04:57:23.894768: step: 702/470, loss: 0.017128845676779747 2023-01-24 04:57:24.636134: step: 704/470, loss: 0.04204748570919037 2023-01-24 04:57:25.464782: step: 706/470, loss: 0.17484718561172485 2023-01-24 04:57:26.191163: step: 708/470, loss: 0.0665166974067688 2023-01-24 04:57:26.825793: step: 710/470, loss: 0.03982820361852646 2023-01-24 04:57:27.664589: step: 712/470, loss: 0.015020914375782013 2023-01-24 04:57:28.359287: step: 714/470, loss: 0.014696701429784298 2023-01-24 04:57:29.126711: step: 716/470, loss: 0.2812211513519287 2023-01-24 04:57:29.860843: step: 718/470, loss: 2.2691924571990967 2023-01-24 04:57:30.653929: step: 720/470, loss: 0.06255054473876953 2023-01-24 04:57:31.438821: step: 722/470, loss: 0.025868145748972893 2023-01-24 04:57:32.203154: step: 724/470, loss: 0.0933215320110321 2023-01-24 04:57:32.930185: step: 726/470, loss: 0.156637504696846 2023-01-24 04:57:33.639195: step: 728/470, loss: 0.11742421239614487 2023-01-24 04:57:34.369297: step: 730/470, loss: 0.09174972772598267 2023-01-24 04:57:35.155728: step: 732/470, loss: 0.04268868640065193 2023-01-24 04:57:35.975226: step: 734/470, loss: 0.07636651396751404 2023-01-24 04:57:36.704923: step: 736/470, loss: 0.013568645343184471 2023-01-24 04:57:37.437595: step: 738/470, loss: 0.03813782334327698 2023-01-24 04:57:38.148826: step: 740/470, loss: 0.019819870591163635 2023-01-24 04:57:38.947542: step: 742/470, loss: 0.04321083426475525 2023-01-24 04:57:39.676255: step: 744/470, loss: 0.006098398473113775 2023-01-24 04:57:40.382041: step: 746/470, loss: 0.022449664771556854 2023-01-24 04:57:41.145504: step: 748/470, loss: 0.05610019341111183 2023-01-24 04:57:41.819434: step: 750/470, loss: 0.05538138747215271 2023-01-24 04:57:42.545744: step: 752/470, loss: 0.03515056148171425 2023-01-24 04:57:43.252979: step: 754/470, loss: 0.06996199488639832 2023-01-24 04:57:43.991836: step: 756/470, loss: 0.010419724509119987 2023-01-24 04:57:44.767893: step: 758/470, loss: 0.02060377597808838 2023-01-24 04:57:45.532386: step: 760/470, loss: 0.021778682246804237 2023-01-24 04:57:46.265837: step: 762/470, loss: 0.14664672315120697 2023-01-24 04:57:46.967570: step: 764/470, loss: 0.04197612777352333 2023-01-24 04:57:47.598347: step: 766/470, loss: 0.00862900447100401 2023-01-24 04:57:48.372024: step: 768/470, loss: 0.01935235597193241 2023-01-24 04:57:49.079802: step: 770/470, loss: 0.0040095048025250435 2023-01-24 04:57:49.782202: step: 772/470, loss: 0.03624457120895386 2023-01-24 04:57:50.514889: step: 774/470, loss: 0.10813373327255249 2023-01-24 04:57:51.245533: step: 776/470, loss: 0.019472643733024597 2023-01-24 04:57:52.025782: step: 778/470, loss: 0.03459063544869423 2023-01-24 04:57:52.775154: step: 780/470, loss: 0.055417634546756744 2023-01-24 04:57:53.539447: step: 782/470, loss: 0.0014409045688807964 2023-01-24 04:57:54.232278: step: 784/470, loss: 0.029017573222517967 2023-01-24 04:57:55.001358: step: 786/470, loss: 0.04178899899125099 2023-01-24 04:57:55.736620: step: 788/470, loss: 0.18817931413650513 2023-01-24 04:57:56.442015: step: 790/470, loss: 1.237821102142334 2023-01-24 04:57:57.286371: step: 792/470, loss: 0.05888526514172554 2023-01-24 04:57:57.993115: step: 794/470, loss: 0.04464132338762283 2023-01-24 04:57:58.728880: step: 796/470, loss: 0.03926714509725571 2023-01-24 04:57:59.445294: step: 798/470, loss: 0.003827402601018548 2023-01-24 04:58:00.130738: step: 800/470, loss: 0.02599688246846199 2023-01-24 04:58:00.869984: step: 802/470, loss: 0.03956460580229759 2023-01-24 04:58:01.603056: step: 804/470, loss: 0.025580542162060738 2023-01-24 04:58:02.417267: step: 806/470, loss: 0.07505716383457184 2023-01-24 04:58:03.126159: step: 808/470, loss: 0.019929545000195503 2023-01-24 04:58:03.834860: step: 810/470, loss: 0.04150541499257088 2023-01-24 04:58:04.702080: step: 812/470, loss: 0.2298429161310196 2023-01-24 04:58:05.463947: step: 814/470, loss: 0.14862318336963654 2023-01-24 04:58:06.194054: step: 816/470, loss: 0.4220031499862671 2023-01-24 04:58:06.905223: step: 818/470, loss: 0.018468687310814857 2023-01-24 04:58:07.556653: step: 820/470, loss: 0.08981668949127197 2023-01-24 04:58:08.373396: step: 822/470, loss: 0.08471503853797913 2023-01-24 04:58:09.120143: step: 824/470, loss: 0.021649127826094627 2023-01-24 04:58:09.946101: step: 826/470, loss: 0.2502903342247009 2023-01-24 04:58:10.746964: step: 828/470, loss: 0.01829441450536251 2023-01-24 04:58:11.407597: step: 830/470, loss: 0.011802778579294682 2023-01-24 04:58:12.129435: step: 832/470, loss: 0.14194156229496002 2023-01-24 04:58:12.836416: step: 834/470, loss: 0.007301392499357462 2023-01-24 04:58:13.547770: step: 836/470, loss: 0.012469622306525707 2023-01-24 04:58:14.219054: step: 838/470, loss: 0.07566139101982117 2023-01-24 04:58:14.920083: step: 840/470, loss: 0.033544205129146576 2023-01-24 04:58:15.684808: step: 842/470, loss: 0.6390538215637207 2023-01-24 04:58:16.499828: step: 844/470, loss: 0.0382160022854805 2023-01-24 04:58:17.160499: step: 846/470, loss: 0.1456291675567627 2023-01-24 04:58:17.918394: step: 848/470, loss: 0.019455960020422935 2023-01-24 04:58:18.672244: step: 850/470, loss: 0.05021538957953453 2023-01-24 04:58:19.467756: step: 852/470, loss: 0.03165812790393829 2023-01-24 04:58:20.156582: step: 854/470, loss: 0.03875069320201874 2023-01-24 04:58:20.843253: step: 856/470, loss: 0.07675700634717941 2023-01-24 04:58:21.636795: step: 858/470, loss: 0.0549500472843647 2023-01-24 04:58:22.353267: step: 860/470, loss: 0.07575095444917679 2023-01-24 04:58:23.060345: step: 862/470, loss: 0.01323747355490923 2023-01-24 04:58:23.860382: step: 864/470, loss: 0.19846421480178833 2023-01-24 04:58:24.650779: step: 866/470, loss: 0.0069488100707530975 2023-01-24 04:58:25.341246: step: 868/470, loss: 0.09136679023504257 2023-01-24 04:58:26.091778: step: 870/470, loss: 0.027809590101242065 2023-01-24 04:58:26.815852: step: 872/470, loss: 0.003995862323790789 2023-01-24 04:58:27.482240: step: 874/470, loss: 0.010341154411435127 2023-01-24 04:58:28.262414: step: 876/470, loss: 0.014673410914838314 2023-01-24 04:58:28.999834: step: 878/470, loss: 0.035946592688560486 2023-01-24 04:58:29.751954: step: 880/470, loss: 0.034462690353393555 2023-01-24 04:58:30.453997: step: 882/470, loss: 0.0298713780939579 2023-01-24 04:58:31.209490: step: 884/470, loss: 0.01902618259191513 2023-01-24 04:58:31.944951: step: 886/470, loss: 0.007162892259657383 2023-01-24 04:58:32.698838: step: 888/470, loss: 0.024051036685705185 2023-01-24 04:58:33.465944: step: 890/470, loss: 0.014205212704837322 2023-01-24 04:58:34.290556: step: 892/470, loss: 0.0005725105293095112 2023-01-24 04:58:35.079873: step: 894/470, loss: 0.04063863679766655 2023-01-24 04:58:35.838598: step: 896/470, loss: 0.11876030266284943 2023-01-24 04:58:36.641290: step: 898/470, loss: 0.012916138395667076 2023-01-24 04:58:37.439561: step: 900/470, loss: 0.013642225414514542 2023-01-24 04:58:38.243970: step: 902/470, loss: 0.022717280313372612 2023-01-24 04:58:38.979764: step: 904/470, loss: 0.4222767651081085 2023-01-24 04:58:39.676264: step: 906/470, loss: 0.013618819415569305 2023-01-24 04:58:40.429109: step: 908/470, loss: 0.061890263110399246 2023-01-24 04:58:41.187806: step: 910/470, loss: 0.04235636442899704 2023-01-24 04:58:41.946555: step: 912/470, loss: 0.0523671992123127 2023-01-24 04:58:42.781336: step: 914/470, loss: 0.0566101111471653 2023-01-24 04:58:43.489789: step: 916/470, loss: 0.07533188164234161 2023-01-24 04:58:44.245161: step: 918/470, loss: 0.019266333431005478 2023-01-24 04:58:44.984060: step: 920/470, loss: 0.008472178131341934 2023-01-24 04:58:45.666538: step: 922/470, loss: 0.03950265422463417 2023-01-24 04:58:46.352573: step: 924/470, loss: 0.02345268428325653 2023-01-24 04:58:47.149896: step: 926/470, loss: 0.015790196135640144 2023-01-24 04:58:47.845259: step: 928/470, loss: 0.027424369007349014 2023-01-24 04:58:48.661540: step: 930/470, loss: 0.1694997251033783 2023-01-24 04:58:49.392738: step: 932/470, loss: 0.04345071688294411 2023-01-24 04:58:50.162090: step: 934/470, loss: 0.029627729207277298 2023-01-24 04:58:50.867257: step: 936/470, loss: 1.4033803939819336 2023-01-24 04:58:51.582141: step: 938/470, loss: 0.07272469252347946 2023-01-24 04:58:52.321440: step: 940/470, loss: 0.07386843860149384 2023-01-24 04:58:52.970224: step: 942/470, loss: 0.00953019130975008 ================================================== Loss: 0.078 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3369976943346509, 'r': 0.3235689437065149, 'f1': 0.3301468215553405}, 'combined': 0.24326607904077718, 'epoch': 24} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.34876134084266536, 'r': 0.34943203572890125, 'f1': 0.3490963661460685}, 'combined': 0.2327309107640456, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3369874296986486, 'r': 0.32803520196471864, 'f1': 0.3324510604527053}, 'combined': 0.24496393928094073, 'epoch': 24} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.34759314297630933, 'r': 0.3445851253928605, 'f1': 0.3460825981734186}, 'combined': 0.23072173211561234, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3247302029731275, 'r': 0.326578762003335, 'f1': 0.3256518591783492}, 'combined': 0.23995400149983626, 'epoch': 24} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3462616463218727, 'r': 0.359579401949637, 'f1': 0.35279488493171934}, 'combined': 0.2351965899544795, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2635135135135135, 'r': 0.2785714285714286, 'f1': 0.2708333333333333}, 'combined': 0.18055555555555552, 'epoch': 24} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.59375, 'r': 0.41304347826086957, 'f1': 0.4871794871794871}, 'combined': 0.32478632478632474, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6363636363636364, 'r': 0.2413793103448276, 'f1': 0.35}, 'combined': 0.2333333333333333, 'epoch': 24} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3260774491094148, 'r': 0.32422122074636306, 'f1': 0.3251466856961624}, 'combined': 0.2395817684076986, 'epoch': 19} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35730778004987124, 'r': 0.3418473472592518, 'f1': 0.34940662520847365}, 'combined': 0.23293775013898238, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.35714285714285715, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3247302029731275, 'r': 0.326578762003335, 'f1': 0.3256518591783492}, 'combined': 0.23995400149983626, 'epoch': 24} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3462616463218727, 'r': 0.359579401949637, 'f1': 0.35279488493171934}, 'combined': 0.2351965899544795, 'epoch': 24} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6363636363636364, 'r': 0.2413793103448276, 'f1': 0.35}, 'combined': 0.2333333333333333, 'epoch': 24} ****************************** Epoch: 25 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:01:38.186832: step: 2/470, loss: 0.02162099815905094 2023-01-24 05:01:38.967218: step: 4/470, loss: 0.012446287088096142 2023-01-24 05:01:39.720052: step: 6/470, loss: 0.034468457102775574 2023-01-24 05:01:40.508719: step: 8/470, loss: 0.016727212816476822 2023-01-24 05:01:41.299262: step: 10/470, loss: 0.03275219723582268 2023-01-24 05:01:42.014717: step: 12/470, loss: 0.012032398022711277 2023-01-24 05:01:42.857877: step: 14/470, loss: 0.04827987775206566 2023-01-24 05:01:43.615663: step: 16/470, loss: 0.039694756269454956 2023-01-24 05:01:44.383016: step: 18/470, loss: 0.018378468230366707 2023-01-24 05:01:44.998659: step: 20/470, loss: 0.010981571860611439 2023-01-24 05:01:45.873793: step: 22/470, loss: 1.3256505727767944 2023-01-24 05:01:46.613853: step: 24/470, loss: 0.007463351357728243 2023-01-24 05:01:47.408554: step: 26/470, loss: 0.008972594514489174 2023-01-24 05:01:48.120784: step: 28/470, loss: 0.005426797084510326 2023-01-24 05:01:48.906030: step: 30/470, loss: 0.005324822850525379 2023-01-24 05:01:49.643376: step: 32/470, loss: 0.07613864541053772 2023-01-24 05:01:50.373822: step: 34/470, loss: 0.03191568702459335 2023-01-24 05:01:51.117028: step: 36/470, loss: 0.05562712624669075 2023-01-24 05:01:51.882417: step: 38/470, loss: 0.01600096933543682 2023-01-24 05:01:52.639043: step: 40/470, loss: 0.044862739741802216 2023-01-24 05:01:53.418022: step: 42/470, loss: 0.3098965287208557 2023-01-24 05:01:54.171723: step: 44/470, loss: 0.027755551040172577 2023-01-24 05:01:54.920212: step: 46/470, loss: 0.09233322739601135 2023-01-24 05:01:55.635504: step: 48/470, loss: 0.39350318908691406 2023-01-24 05:01:56.349063: step: 50/470, loss: 0.04757951945066452 2023-01-24 05:01:57.109633: step: 52/470, loss: 0.004094702657312155 2023-01-24 05:01:57.878023: step: 54/470, loss: 0.037220463156700134 2023-01-24 05:01:58.613297: step: 56/470, loss: 0.0336788184940815 2023-01-24 05:01:59.448673: step: 58/470, loss: 0.010243861936032772 2023-01-24 05:02:00.175781: step: 60/470, loss: 0.08532825112342834 2023-01-24 05:02:00.893767: step: 62/470, loss: 0.031235769391059875 2023-01-24 05:02:01.525953: step: 64/470, loss: 0.012863215059041977 2023-01-24 05:02:02.352160: step: 66/470, loss: 1.0012062788009644 2023-01-24 05:02:03.106784: step: 68/470, loss: 0.02800358459353447 2023-01-24 05:02:03.900836: step: 70/470, loss: 0.021341489627957344 2023-01-24 05:02:04.639679: step: 72/470, loss: 0.013578714802861214 2023-01-24 05:02:05.452787: step: 74/470, loss: 0.01855326071381569 2023-01-24 05:02:06.225919: step: 76/470, loss: 0.04243265837430954 2023-01-24 05:02:06.932208: step: 78/470, loss: 0.03518841415643692 2023-01-24 05:02:07.634991: step: 80/470, loss: 0.08064284920692444 2023-01-24 05:02:08.367484: step: 82/470, loss: 0.017204048112034798 2023-01-24 05:02:09.158069: step: 84/470, loss: 0.014039833098649979 2023-01-24 05:02:09.886990: step: 86/470, loss: 0.023023655638098717 2023-01-24 05:02:10.607013: step: 88/470, loss: 0.021660305559635162 2023-01-24 05:02:11.358843: step: 90/470, loss: 0.10536940395832062 2023-01-24 05:02:12.111053: step: 92/470, loss: 0.024169238284230232 2023-01-24 05:02:12.789173: step: 94/470, loss: 0.029674746096134186 2023-01-24 05:02:13.585054: step: 96/470, loss: 0.028852693736553192 2023-01-24 05:02:14.311791: step: 98/470, loss: 0.00507341930642724 2023-01-24 05:02:15.093421: step: 100/470, loss: 0.017849357798695564 2023-01-24 05:02:15.895309: step: 102/470, loss: 0.0505632720887661 2023-01-24 05:02:16.740475: step: 104/470, loss: 0.009880991652607918 2023-01-24 05:02:17.544785: step: 106/470, loss: 0.02503761276602745 2023-01-24 05:02:18.245485: step: 108/470, loss: 0.0026527876034379005 2023-01-24 05:02:19.027384: step: 110/470, loss: 0.017480017617344856 2023-01-24 05:02:19.713203: step: 112/470, loss: 0.011955924332141876 2023-01-24 05:02:20.400138: step: 114/470, loss: 0.035263460129499435 2023-01-24 05:02:21.209005: step: 116/470, loss: 0.0948866754770279 2023-01-24 05:02:21.988510: step: 118/470, loss: 0.01817036233842373 2023-01-24 05:02:22.658731: step: 120/470, loss: 0.005630629137158394 2023-01-24 05:02:23.306820: step: 122/470, loss: 0.05774114280939102 2023-01-24 05:02:24.052956: step: 124/470, loss: 0.0010737971169874072 2023-01-24 05:02:24.980882: step: 126/470, loss: 0.01047587115317583 2023-01-24 05:02:25.687830: step: 128/470, loss: 0.0022798320278525352 2023-01-24 05:02:26.411198: step: 130/470, loss: 0.03199382126331329 2023-01-24 05:02:27.148050: step: 132/470, loss: 0.10577887296676636 2023-01-24 05:02:27.860937: step: 134/470, loss: 0.020609887316823006 2023-01-24 05:02:28.592858: step: 136/470, loss: 0.0020259118173271418 2023-01-24 05:02:29.392772: step: 138/470, loss: 0.020044716075062752 2023-01-24 05:02:30.135769: step: 140/470, loss: 0.004093657713383436 2023-01-24 05:02:30.899708: step: 142/470, loss: 0.062444720417261124 2023-01-24 05:02:31.613221: step: 144/470, loss: 0.03266696259379387 2023-01-24 05:02:32.431189: step: 146/470, loss: 0.05523476377129555 2023-01-24 05:02:33.202498: step: 148/470, loss: 0.0045193941332399845 2023-01-24 05:02:33.917832: step: 150/470, loss: 0.009763001464307308 2023-01-24 05:02:34.640326: step: 152/470, loss: 0.042540084570646286 2023-01-24 05:02:35.431148: step: 154/470, loss: 0.016827277839183807 2023-01-24 05:02:36.224620: step: 156/470, loss: 0.17072248458862305 2023-01-24 05:02:37.022917: step: 158/470, loss: 0.005717588122934103 2023-01-24 05:02:37.782899: step: 160/470, loss: 0.009820440784096718 2023-01-24 05:02:38.544630: step: 162/470, loss: 0.010699333623051643 2023-01-24 05:02:39.354845: step: 164/470, loss: 0.08145033568143845 2023-01-24 05:02:40.106390: step: 166/470, loss: 0.0005983722512610257 2023-01-24 05:02:40.937005: step: 168/470, loss: 0.028514329344034195 2023-01-24 05:02:41.686617: step: 170/470, loss: 0.006405293010175228 2023-01-24 05:02:42.414005: step: 172/470, loss: 0.00240780645981431 2023-01-24 05:02:43.164694: step: 174/470, loss: 0.026528997346758842 2023-01-24 05:02:44.004065: step: 176/470, loss: 0.02798590436577797 2023-01-24 05:02:44.760285: step: 178/470, loss: 0.014988838694989681 2023-01-24 05:02:45.506880: step: 180/470, loss: 0.13888667523860931 2023-01-24 05:02:46.245491: step: 182/470, loss: 0.020751958712935448 2023-01-24 05:02:46.903752: step: 184/470, loss: 0.0074022323824465275 2023-01-24 05:02:47.627096: step: 186/470, loss: 0.16846947371959686 2023-01-24 05:02:48.367784: step: 188/470, loss: 0.02288518100976944 2023-01-24 05:02:49.183327: step: 190/470, loss: 0.025580741465091705 2023-01-24 05:02:49.966352: step: 192/470, loss: 0.01590382121503353 2023-01-24 05:02:50.778139: step: 194/470, loss: 0.018982110545039177 2023-01-24 05:02:51.635176: step: 196/470, loss: 0.028010079637169838 2023-01-24 05:02:52.333527: step: 198/470, loss: 0.009743987582623959 2023-01-24 05:02:53.083269: step: 200/470, loss: 0.0009152348502539098 2023-01-24 05:02:53.945600: step: 202/470, loss: 0.0641738623380661 2023-01-24 05:02:54.779830: step: 204/470, loss: 0.006123876199126244 2023-01-24 05:02:55.537358: step: 206/470, loss: 0.03843360021710396 2023-01-24 05:02:56.353611: step: 208/470, loss: 0.0037462252657860518 2023-01-24 05:02:57.099060: step: 210/470, loss: 0.002261190675199032 2023-01-24 05:02:57.945266: step: 212/470, loss: 0.16585534811019897 2023-01-24 05:02:58.688627: step: 214/470, loss: 0.2700032889842987 2023-01-24 05:02:59.373049: step: 216/470, loss: 0.024171005934476852 2023-01-24 05:03:00.108131: step: 218/470, loss: 0.004075937904417515 2023-01-24 05:03:00.841934: step: 220/470, loss: 0.004899140447378159 2023-01-24 05:03:01.555378: step: 222/470, loss: 0.04851776361465454 2023-01-24 05:03:02.352753: step: 224/470, loss: 0.01797030307352543 2023-01-24 05:03:03.123053: step: 226/470, loss: 0.0035274296533316374 2023-01-24 05:03:03.907394: step: 228/470, loss: 0.024435440078377724 2023-01-24 05:03:04.713870: step: 230/470, loss: 0.14094187319278717 2023-01-24 05:03:05.564961: step: 232/470, loss: 0.009624892845749855 2023-01-24 05:03:06.349966: step: 234/470, loss: 0.03491733595728874 2023-01-24 05:03:07.126349: step: 236/470, loss: 0.028152016922831535 2023-01-24 05:03:07.884457: step: 238/470, loss: 0.02921573631465435 2023-01-24 05:03:08.723470: step: 240/470, loss: 0.018016021698713303 2023-01-24 05:03:09.503407: step: 242/470, loss: 0.21584904193878174 2023-01-24 05:03:10.189460: step: 244/470, loss: 0.0007679513073526323 2023-01-24 05:03:10.893748: step: 246/470, loss: 0.007389437407255173 2023-01-24 05:03:11.587597: step: 248/470, loss: 0.019924050197005272 2023-01-24 05:03:12.390472: step: 250/470, loss: 0.0013338790740817785 2023-01-24 05:03:13.146159: step: 252/470, loss: 0.09131523221731186 2023-01-24 05:03:13.892943: step: 254/470, loss: 0.04288085922598839 2023-01-24 05:03:14.699405: step: 256/470, loss: 0.008014186285436153 2023-01-24 05:03:15.438889: step: 258/470, loss: 0.002048267750069499 2023-01-24 05:03:16.213088: step: 260/470, loss: 0.007588651031255722 2023-01-24 05:03:16.935321: step: 262/470, loss: 0.010957635007798672 2023-01-24 05:03:17.705486: step: 264/470, loss: 0.011484313756227493 2023-01-24 05:03:18.447465: step: 266/470, loss: 0.04040658846497536 2023-01-24 05:03:19.226548: step: 268/470, loss: 0.015184608288109303 2023-01-24 05:03:19.979342: step: 270/470, loss: 0.010376902297139168 2023-01-24 05:03:20.646699: step: 272/470, loss: 0.04080960154533386 2023-01-24 05:03:21.362828: step: 274/470, loss: 1.0133119821548462 2023-01-24 05:03:22.121870: step: 276/470, loss: 0.005866447929292917 2023-01-24 05:03:22.898338: step: 278/470, loss: 0.11790863424539566 2023-01-24 05:03:23.633537: step: 280/470, loss: 0.00046028837095946074 2023-01-24 05:03:24.384164: step: 282/470, loss: 0.03480033576488495 2023-01-24 05:03:25.137102: step: 284/470, loss: 0.012186521664261818 2023-01-24 05:03:25.897608: step: 286/470, loss: 0.03706849738955498 2023-01-24 05:03:26.670142: step: 288/470, loss: 0.02106441929936409 2023-01-24 05:03:27.449312: step: 290/470, loss: 0.004235308617353439 2023-01-24 05:03:28.218231: step: 292/470, loss: 0.0014547484461218119 2023-01-24 05:03:29.006899: step: 294/470, loss: 0.029318470507860184 2023-01-24 05:03:29.735855: step: 296/470, loss: 0.05262761935591698 2023-01-24 05:03:30.464960: step: 298/470, loss: 0.004162387456744909 2023-01-24 05:03:31.221231: step: 300/470, loss: 0.08064429461956024 2023-01-24 05:03:31.884171: step: 302/470, loss: 0.011829620227217674 2023-01-24 05:03:32.669298: step: 304/470, loss: 0.020030930638313293 2023-01-24 05:03:33.363933: step: 306/470, loss: 0.006781530566513538 2023-01-24 05:03:34.069230: step: 308/470, loss: 0.10114534944295883 2023-01-24 05:03:34.806029: step: 310/470, loss: 0.03585520759224892 2023-01-24 05:03:35.594003: step: 312/470, loss: 0.02825266122817993 2023-01-24 05:03:36.306101: step: 314/470, loss: 0.043976690620183945 2023-01-24 05:03:37.110026: step: 316/470, loss: 0.045031096786260605 2023-01-24 05:03:37.895100: step: 318/470, loss: 0.04603327438235283 2023-01-24 05:03:38.698330: step: 320/470, loss: 0.04247571900486946 2023-01-24 05:03:39.378315: step: 322/470, loss: 0.0015748925507068634 2023-01-24 05:03:40.193226: step: 324/470, loss: 0.06393693387508392 2023-01-24 05:03:40.954093: step: 326/470, loss: 0.035000111907720566 2023-01-24 05:03:41.699730: step: 328/470, loss: 0.014710234478116035 2023-01-24 05:03:42.384734: step: 330/470, loss: 0.028020773082971573 2023-01-24 05:03:43.167506: step: 332/470, loss: 0.06262163072824478 2023-01-24 05:03:43.834663: step: 334/470, loss: 0.0007441341294907033 2023-01-24 05:03:44.521285: step: 336/470, loss: 0.08218454569578171 2023-01-24 05:03:45.409390: step: 338/470, loss: 0.4393693506717682 2023-01-24 05:03:46.182223: step: 340/470, loss: 0.014411939308047295 2023-01-24 05:03:46.914185: step: 342/470, loss: 0.060469575226306915 2023-01-24 05:03:47.689904: step: 344/470, loss: 0.036549992859363556 2023-01-24 05:03:48.514065: step: 346/470, loss: 0.031213141977787018 2023-01-24 05:03:49.292424: step: 348/470, loss: 0.0008316990570165217 2023-01-24 05:03:50.078592: step: 350/470, loss: 1.0978364944458008 2023-01-24 05:03:50.777468: step: 352/470, loss: 0.014219812117516994 2023-01-24 05:03:51.516229: step: 354/470, loss: 0.06110917776823044 2023-01-24 05:03:52.444608: step: 356/470, loss: 0.06231218948960304 2023-01-24 05:03:53.133079: step: 358/470, loss: 0.024384677410125732 2023-01-24 05:03:54.009004: step: 360/470, loss: 0.06801487505435944 2023-01-24 05:03:54.747371: step: 362/470, loss: 0.043319717049598694 2023-01-24 05:03:55.504966: step: 364/470, loss: 0.006488930433988571 2023-01-24 05:03:56.216664: step: 366/470, loss: 1.2596720457077026 2023-01-24 05:03:56.916679: step: 368/470, loss: 0.009231701493263245 2023-01-24 05:03:57.652697: step: 370/470, loss: 0.07412904500961304 2023-01-24 05:03:58.483445: step: 372/470, loss: 0.054003313183784485 2023-01-24 05:03:59.285363: step: 374/470, loss: 0.04891440272331238 2023-01-24 05:04:00.118563: step: 376/470, loss: 0.03218501806259155 2023-01-24 05:04:00.830821: step: 378/470, loss: 0.010670957155525684 2023-01-24 05:04:01.621547: step: 380/470, loss: 0.3814714848995209 2023-01-24 05:04:02.405644: step: 382/470, loss: 0.029765913262963295 2023-01-24 05:04:03.162850: step: 384/470, loss: 0.03948010876774788 2023-01-24 05:04:03.849194: step: 386/470, loss: 0.008174107410013676 2023-01-24 05:04:04.623370: step: 388/470, loss: 0.0791185200214386 2023-01-24 05:04:05.376681: step: 390/470, loss: 0.0023983244318515062 2023-01-24 05:04:06.148424: step: 392/470, loss: 0.0599571131169796 2023-01-24 05:04:06.840193: step: 394/470, loss: 0.05517046898603439 2023-01-24 05:04:07.609480: step: 396/470, loss: 0.044240474700927734 2023-01-24 05:04:08.353508: step: 398/470, loss: 0.017728475853800774 2023-01-24 05:04:09.136688: step: 400/470, loss: 0.06311778724193573 2023-01-24 05:04:09.890485: step: 402/470, loss: 0.00163270381744951 2023-01-24 05:04:10.715375: step: 404/470, loss: 0.014112650416791439 2023-01-24 05:04:11.429107: step: 406/470, loss: 0.0612499974668026 2023-01-24 05:04:12.233676: step: 408/470, loss: 0.01821378618478775 2023-01-24 05:04:12.958350: step: 410/470, loss: 0.20836558938026428 2023-01-24 05:04:13.776453: step: 412/470, loss: 0.03567926958203316 2023-01-24 05:04:14.524716: step: 414/470, loss: 0.07208713889122009 2023-01-24 05:04:15.301147: step: 416/470, loss: 0.13654865324497223 2023-01-24 05:04:16.042308: step: 418/470, loss: 0.03147239238023758 2023-01-24 05:04:16.765652: step: 420/470, loss: 0.017493341118097305 2023-01-24 05:04:17.537256: step: 422/470, loss: 0.09981317818164825 2023-01-24 05:04:18.270838: step: 424/470, loss: 0.032189272344112396 2023-01-24 05:04:19.058639: step: 426/470, loss: 0.014166963286697865 2023-01-24 05:04:19.974889: step: 428/470, loss: 0.09840293973684311 2023-01-24 05:04:20.765275: step: 430/470, loss: 0.08024189621210098 2023-01-24 05:04:21.524194: step: 432/470, loss: 0.04482298344373703 2023-01-24 05:04:22.307344: step: 434/470, loss: 0.038904860615730286 2023-01-24 05:04:23.120892: step: 436/470, loss: 0.2726028561592102 2023-01-24 05:04:23.849239: step: 438/470, loss: 0.02278323471546173 2023-01-24 05:04:24.633941: step: 440/470, loss: 0.01581525057554245 2023-01-24 05:04:25.380284: step: 442/470, loss: 0.04916553571820259 2023-01-24 05:04:26.199331: step: 444/470, loss: 0.018240492790937424 2023-01-24 05:04:26.924578: step: 446/470, loss: 0.05501729995012283 2023-01-24 05:04:27.732102: step: 448/470, loss: 0.014724891632795334 2023-01-24 05:04:28.584127: step: 450/470, loss: 0.09602244943380356 2023-01-24 05:04:29.271291: step: 452/470, loss: 0.004648915026336908 2023-01-24 05:04:30.041599: step: 454/470, loss: 0.01640310324728489 2023-01-24 05:04:30.805974: step: 456/470, loss: 0.03248788043856621 2023-01-24 05:04:31.553684: step: 458/470, loss: 1.1334494352340698 2023-01-24 05:04:32.341062: step: 460/470, loss: 0.14382703602313995 2023-01-24 05:04:33.105623: step: 462/470, loss: 0.06437462568283081 2023-01-24 05:04:33.974482: step: 464/470, loss: 0.020893843844532967 2023-01-24 05:04:34.647109: step: 466/470, loss: 0.020247068256139755 2023-01-24 05:04:35.433073: step: 468/470, loss: 0.06369510293006897 2023-01-24 05:04:36.204453: step: 470/470, loss: 0.002879982814192772 2023-01-24 05:04:36.917700: step: 472/470, loss: 0.01400853507220745 2023-01-24 05:04:37.684140: step: 474/470, loss: 0.03950931504368782 2023-01-24 05:04:38.425127: step: 476/470, loss: 0.02229795604944229 2023-01-24 05:04:39.272870: step: 478/470, loss: 0.017860140651464462 2023-01-24 05:04:40.051598: step: 480/470, loss: 0.00995658803731203 2023-01-24 05:04:40.796765: step: 482/470, loss: 0.0026193808298557997 2023-01-24 05:04:41.560886: step: 484/470, loss: 0.022055814042687416 2023-01-24 05:04:42.308691: step: 486/470, loss: 0.2256946861743927 2023-01-24 05:04:43.117380: step: 488/470, loss: 0.01226732973009348 2023-01-24 05:04:43.848427: step: 490/470, loss: 0.017396673560142517 2023-01-24 05:04:44.637530: step: 492/470, loss: 0.0489540733397007 2023-01-24 05:04:45.413723: step: 494/470, loss: 0.03391120210289955 2023-01-24 05:04:46.138668: step: 496/470, loss: 0.01827331632375717 2023-01-24 05:04:46.996291: step: 498/470, loss: 0.012197432108223438 2023-01-24 05:04:47.757036: step: 500/470, loss: 0.02815680392086506 2023-01-24 05:04:48.496893: step: 502/470, loss: 0.07767844945192337 2023-01-24 05:04:49.366711: step: 504/470, loss: 0.01819152943789959 2023-01-24 05:04:50.177510: step: 506/470, loss: 0.008796506561338902 2023-01-24 05:04:50.909050: step: 508/470, loss: 0.26540952920913696 2023-01-24 05:04:51.691411: step: 510/470, loss: 0.03128129988908768 2023-01-24 05:04:52.437549: step: 512/470, loss: 0.021161213517189026 2023-01-24 05:04:53.241570: step: 514/470, loss: 0.031146377325057983 2023-01-24 05:04:53.974715: step: 516/470, loss: 0.0022348014172166586 2023-01-24 05:04:54.827249: step: 518/470, loss: 0.0887453556060791 2023-01-24 05:04:55.516050: step: 520/470, loss: 0.023102333769202232 2023-01-24 05:04:56.370798: step: 522/470, loss: 0.035375066101551056 2023-01-24 05:04:57.103241: step: 524/470, loss: 0.006511027924716473 2023-01-24 05:04:57.864744: step: 526/470, loss: 0.0845172256231308 2023-01-24 05:04:58.613098: step: 528/470, loss: 0.03893345594406128 2023-01-24 05:04:59.388460: step: 530/470, loss: 0.22155825793743134 2023-01-24 05:05:00.138780: step: 532/470, loss: 0.06408680230379105 2023-01-24 05:05:00.910972: step: 534/470, loss: 0.08290005475282669 2023-01-24 05:05:01.644120: step: 536/470, loss: 0.018902825191617012 2023-01-24 05:05:02.389549: step: 538/470, loss: 0.05063078925013542 2023-01-24 05:05:03.130978: step: 540/470, loss: 0.06731826812028885 2023-01-24 05:05:03.828487: step: 542/470, loss: 0.04933793842792511 2023-01-24 05:05:04.530325: step: 544/470, loss: 0.12002909183502197 2023-01-24 05:05:05.235899: step: 546/470, loss: 0.042481400072574615 2023-01-24 05:05:05.915997: step: 548/470, loss: 0.11045798659324646 2023-01-24 05:05:06.582556: step: 550/470, loss: 0.003042022930458188 2023-01-24 05:05:07.366167: step: 552/470, loss: 0.03922036290168762 2023-01-24 05:05:08.088811: step: 554/470, loss: 0.03836175426840782 2023-01-24 05:05:08.851792: step: 556/470, loss: 0.03422430902719498 2023-01-24 05:05:09.613334: step: 558/470, loss: 0.01736604794859886 2023-01-24 05:05:10.378069: step: 560/470, loss: 0.029811743646860123 2023-01-24 05:05:11.108837: step: 562/470, loss: 0.010475804097950459 2023-01-24 05:05:11.830043: step: 564/470, loss: 0.0307230856269598 2023-01-24 05:05:12.562230: step: 566/470, loss: 0.020388364791870117 2023-01-24 05:05:13.336335: step: 568/470, loss: 0.18014267086982727 2023-01-24 05:05:13.994286: step: 570/470, loss: 0.03197851777076721 2023-01-24 05:05:14.759647: step: 572/470, loss: 0.6306474804878235 2023-01-24 05:05:15.405500: step: 574/470, loss: 0.21226122975349426 2023-01-24 05:05:16.111091: step: 576/470, loss: 0.012852661311626434 2023-01-24 05:05:16.850000: step: 578/470, loss: 0.07441854476928711 2023-01-24 05:05:17.524857: step: 580/470, loss: 0.016917813569307327 2023-01-24 05:05:18.230969: step: 582/470, loss: 0.08907926082611084 2023-01-24 05:05:18.946214: step: 584/470, loss: 0.003389795310795307 2023-01-24 05:05:19.715854: step: 586/470, loss: 0.040450599044561386 2023-01-24 05:05:20.516307: step: 588/470, loss: 0.054036617279052734 2023-01-24 05:05:21.229561: step: 590/470, loss: 0.05018281936645508 2023-01-24 05:05:21.939993: step: 592/470, loss: 0.01974686235189438 2023-01-24 05:05:22.637676: step: 594/470, loss: 0.10064013302326202 2023-01-24 05:05:23.370043: step: 596/470, loss: 0.008306887932121754 2023-01-24 05:05:24.190117: step: 598/470, loss: 0.04838766157627106 2023-01-24 05:05:24.993532: step: 600/470, loss: 0.001629787846468389 2023-01-24 05:05:25.775089: step: 602/470, loss: 0.028213849291205406 2023-01-24 05:05:26.478684: step: 604/470, loss: 0.024601630866527557 2023-01-24 05:05:27.201887: step: 606/470, loss: 0.028212811797857285 2023-01-24 05:05:27.975286: step: 608/470, loss: 0.030642293393611908 2023-01-24 05:05:28.725684: step: 610/470, loss: 0.011128339916467667 2023-01-24 05:05:29.406533: step: 612/470, loss: 0.000944534142035991 2023-01-24 05:05:30.111940: step: 614/470, loss: 0.029674546793103218 2023-01-24 05:05:30.790961: step: 616/470, loss: 0.007636074908077717 2023-01-24 05:05:31.590685: step: 618/470, loss: 0.014691839925944805 2023-01-24 05:05:32.472318: step: 620/470, loss: 0.05527832731604576 2023-01-24 05:05:33.219700: step: 622/470, loss: 0.015919934958219528 2023-01-24 05:05:33.935139: step: 624/470, loss: 0.02508891187608242 2023-01-24 05:05:34.748524: step: 626/470, loss: 0.01099168136715889 2023-01-24 05:05:35.437352: step: 628/470, loss: 0.035270821303129196 2023-01-24 05:05:36.066758: step: 630/470, loss: 0.03347640857100487 2023-01-24 05:05:36.718724: step: 632/470, loss: 0.05645357072353363 2023-01-24 05:05:37.402208: step: 634/470, loss: 0.011641307733952999 2023-01-24 05:05:38.223044: step: 636/470, loss: 0.06422200053930283 2023-01-24 05:05:38.924281: step: 638/470, loss: 0.01144405733793974 2023-01-24 05:05:39.708380: step: 640/470, loss: 0.029585037380456924 2023-01-24 05:05:40.405229: step: 642/470, loss: 0.04478341341018677 2023-01-24 05:05:41.138711: step: 644/470, loss: 0.14443320035934448 2023-01-24 05:05:41.843703: step: 646/470, loss: 0.000602882297243923 2023-01-24 05:05:42.572109: step: 648/470, loss: 0.01970474235713482 2023-01-24 05:05:43.302394: step: 650/470, loss: 0.05201108008623123 2023-01-24 05:05:44.024001: step: 652/470, loss: 0.022257506847381592 2023-01-24 05:05:44.709904: step: 654/470, loss: 0.08728256821632385 2023-01-24 05:05:45.423586: step: 656/470, loss: 0.015211334452033043 2023-01-24 05:05:46.163924: step: 658/470, loss: 0.2973870038986206 2023-01-24 05:05:46.892717: step: 660/470, loss: 0.035559751093387604 2023-01-24 05:05:47.568400: step: 662/470, loss: 0.026579704135656357 2023-01-24 05:05:48.320737: step: 664/470, loss: 0.02378200925886631 2023-01-24 05:05:49.092402: step: 666/470, loss: 0.08443901687860489 2023-01-24 05:05:49.804611: step: 668/470, loss: 0.006817949004471302 2023-01-24 05:05:50.529158: step: 670/470, loss: 0.08191236853599548 2023-01-24 05:05:51.332622: step: 672/470, loss: 0.00972555298358202 2023-01-24 05:05:52.021112: step: 674/470, loss: 0.002309043426066637 2023-01-24 05:05:52.733197: step: 676/470, loss: 0.03206325322389603 2023-01-24 05:05:53.466150: step: 678/470, loss: 0.023099826648831367 2023-01-24 05:05:54.157657: step: 680/470, loss: 0.03047085553407669 2023-01-24 05:05:54.914739: step: 682/470, loss: 0.26479578018188477 2023-01-24 05:05:55.642338: step: 684/470, loss: 0.04502062872052193 2023-01-24 05:05:56.380748: step: 686/470, loss: 0.017808128148317337 2023-01-24 05:05:57.059906: step: 688/470, loss: 0.14120163023471832 2023-01-24 05:05:57.866501: step: 690/470, loss: 0.006991311442106962 2023-01-24 05:05:58.579485: step: 692/470, loss: 0.04135780781507492 2023-01-24 05:05:59.279256: step: 694/470, loss: 0.024161092936992645 2023-01-24 05:05:59.989167: step: 696/470, loss: 0.02334967628121376 2023-01-24 05:06:00.714004: step: 698/470, loss: 0.04725329205393791 2023-01-24 05:06:01.463374: step: 700/470, loss: 0.13663579523563385 2023-01-24 05:06:02.160499: step: 702/470, loss: 0.029831871390342712 2023-01-24 05:06:02.893902: step: 704/470, loss: 0.06156457960605621 2023-01-24 05:06:03.643682: step: 706/470, loss: 0.08219487965106964 2023-01-24 05:06:04.453252: step: 708/470, loss: 0.23499932885169983 2023-01-24 05:06:05.325856: step: 710/470, loss: 0.05763453617691994 2023-01-24 05:06:06.091514: step: 712/470, loss: 0.6921198964118958 2023-01-24 05:06:06.891919: step: 714/470, loss: 0.025135423988103867 2023-01-24 05:06:07.680313: step: 716/470, loss: 0.06057261303067207 2023-01-24 05:06:08.388072: step: 718/470, loss: 0.0005019385716877878 2023-01-24 05:06:09.158250: step: 720/470, loss: 0.03366491198539734 2023-01-24 05:06:09.927082: step: 722/470, loss: 0.0844867005944252 2023-01-24 05:06:10.650587: step: 724/470, loss: 0.001989529235288501 2023-01-24 05:06:11.650855: step: 726/470, loss: 0.05526869744062424 2023-01-24 05:06:12.376334: step: 728/470, loss: 0.003986681811511517 2023-01-24 05:06:13.076978: step: 730/470, loss: 0.06542991101741791 2023-01-24 05:06:13.759496: step: 732/470, loss: 0.024848083034157753 2023-01-24 05:06:14.513005: step: 734/470, loss: 0.017168505117297173 2023-01-24 05:06:15.206416: step: 736/470, loss: 0.00605942215770483 2023-01-24 05:06:15.851690: step: 738/470, loss: 0.013854175806045532 2023-01-24 05:06:16.610357: step: 740/470, loss: 0.02051333710551262 2023-01-24 05:06:17.385860: step: 742/470, loss: 0.01970795914530754 2023-01-24 05:06:18.160557: step: 744/470, loss: 0.052142977714538574 2023-01-24 05:06:18.977891: step: 746/470, loss: 0.03665049001574516 2023-01-24 05:06:19.644667: step: 748/470, loss: 0.014172352850437164 2023-01-24 05:06:20.358847: step: 750/470, loss: 0.0662955716252327 2023-01-24 05:06:21.069307: step: 752/470, loss: 0.04770228639245033 2023-01-24 05:06:21.846558: step: 754/470, loss: 0.024025220423936844 2023-01-24 05:06:22.535716: step: 756/470, loss: 0.02347307652235031 2023-01-24 05:06:23.403815: step: 758/470, loss: 0.03415545076131821 2023-01-24 05:06:24.225664: step: 760/470, loss: 0.08358060568571091 2023-01-24 05:06:24.913191: step: 762/470, loss: 0.07076893001794815 2023-01-24 05:06:25.696395: step: 764/470, loss: 0.9924178719520569 2023-01-24 05:06:26.482127: step: 766/470, loss: 0.02128605917096138 2023-01-24 05:06:27.168877: step: 768/470, loss: 0.060910508036613464 2023-01-24 05:06:27.854066: step: 770/470, loss: 0.010893095284700394 2023-01-24 05:06:28.572659: step: 772/470, loss: 0.004468762315809727 2023-01-24 05:06:29.259958: step: 774/470, loss: 0.0033063499722629786 2023-01-24 05:06:30.019319: step: 776/470, loss: 0.16144682466983795 2023-01-24 05:06:30.739760: step: 778/470, loss: 0.01916014961898327 2023-01-24 05:06:31.416582: step: 780/470, loss: 0.007597747258841991 2023-01-24 05:06:32.095062: step: 782/470, loss: 0.013567719608545303 2023-01-24 05:06:32.781586: step: 784/470, loss: 0.0008880794048309326 2023-01-24 05:06:33.515228: step: 786/470, loss: 0.015842584893107414 2023-01-24 05:06:34.210858: step: 788/470, loss: 0.0290867630392313 2023-01-24 05:06:34.944226: step: 790/470, loss: 0.5169112682342529 2023-01-24 05:06:35.698332: step: 792/470, loss: 0.012393763288855553 2023-01-24 05:06:36.368208: step: 794/470, loss: 0.010337642394006252 2023-01-24 05:06:37.087842: step: 796/470, loss: 0.20648834109306335 2023-01-24 05:06:37.832971: step: 798/470, loss: 0.07127867639064789 2023-01-24 05:06:38.529702: step: 800/470, loss: 0.022180991247296333 2023-01-24 05:06:39.317933: step: 802/470, loss: 0.17413491010665894 2023-01-24 05:06:40.052992: step: 804/470, loss: 0.056421320885419846 2023-01-24 05:06:40.730071: step: 806/470, loss: 0.01221885159611702 2023-01-24 05:06:41.406068: step: 808/470, loss: 0.03740542382001877 2023-01-24 05:06:42.153927: step: 810/470, loss: 0.10675573348999023 2023-01-24 05:06:42.887125: step: 812/470, loss: 0.0472906269133091 2023-01-24 05:06:43.620410: step: 814/470, loss: 0.005474620033055544 2023-01-24 05:06:44.369062: step: 816/470, loss: 0.0021934225223958492 2023-01-24 05:06:45.123032: step: 818/470, loss: 0.028478167951107025 2023-01-24 05:06:45.862962: step: 820/470, loss: 0.07020837813615799 2023-01-24 05:06:46.538791: step: 822/470, loss: 0.004713045433163643 2023-01-24 05:06:47.316273: step: 824/470, loss: 0.034453753381967545 2023-01-24 05:06:48.116751: step: 826/470, loss: 0.08387379348278046 2023-01-24 05:06:48.779954: step: 828/470, loss: 0.008352800272405148 2023-01-24 05:06:49.569805: step: 830/470, loss: 0.06293504685163498 2023-01-24 05:06:50.239736: step: 832/470, loss: 0.05639214813709259 2023-01-24 05:06:50.980289: step: 834/470, loss: 0.016982192173600197 2023-01-24 05:06:51.710455: step: 836/470, loss: 0.08415093272924423 2023-01-24 05:06:52.394130: step: 838/470, loss: 0.0067184302024543285 2023-01-24 05:06:53.158692: step: 840/470, loss: 0.01616278663277626 2023-01-24 05:06:53.906210: step: 842/470, loss: 0.035632479935884476 2023-01-24 05:06:55.352938: step: 844/470, loss: 0.10122037678956985 2023-01-24 05:06:56.174921: step: 846/470, loss: 0.06999140232801437 2023-01-24 05:06:56.903884: step: 848/470, loss: 0.0007854777504689991 2023-01-24 05:06:57.653933: step: 850/470, loss: 0.0418260395526886 2023-01-24 05:06:58.423380: step: 852/470, loss: 0.15163059532642365 2023-01-24 05:06:59.254502: step: 854/470, loss: 0.05955205485224724 2023-01-24 05:07:00.114692: step: 856/470, loss: 0.005851359572261572 2023-01-24 05:07:00.889785: step: 858/470, loss: 0.025483133271336555 2023-01-24 05:07:01.620133: step: 860/470, loss: 0.03567005693912506 2023-01-24 05:07:02.332709: step: 862/470, loss: 0.007423198316246271 2023-01-24 05:07:03.067097: step: 864/470, loss: 0.00906333327293396 2023-01-24 05:07:03.903345: step: 866/470, loss: 0.02991068735718727 2023-01-24 05:07:04.644219: step: 868/470, loss: 0.03796318545937538 2023-01-24 05:07:05.270876: step: 870/470, loss: 0.03030499443411827 2023-01-24 05:07:06.065688: step: 872/470, loss: 0.003593269968405366 2023-01-24 05:07:06.753097: step: 874/470, loss: 0.0007432375568896532 2023-01-24 05:07:07.426377: step: 876/470, loss: 0.06043066084384918 2023-01-24 05:07:08.122168: step: 878/470, loss: 0.04188834875822067 2023-01-24 05:07:08.813143: step: 880/470, loss: 0.004259438719600439 2023-01-24 05:07:09.590387: step: 882/470, loss: 0.2212732881307602 2023-01-24 05:07:10.353942: step: 884/470, loss: 0.01849282905459404 2023-01-24 05:07:11.158424: step: 886/470, loss: 0.087897889316082 2023-01-24 05:07:11.996363: step: 888/470, loss: 0.028527600690722466 2023-01-24 05:07:12.771248: step: 890/470, loss: 0.009783981367945671 2023-01-24 05:07:13.418423: step: 892/470, loss: 0.010732408612966537 2023-01-24 05:07:14.095072: step: 894/470, loss: 0.005476772785186768 2023-01-24 05:07:14.809557: step: 896/470, loss: 0.11816710978746414 2023-01-24 05:07:15.528822: step: 898/470, loss: 0.03139311075210571 2023-01-24 05:07:16.206809: step: 900/470, loss: 0.05651181936264038 2023-01-24 05:07:16.847812: step: 902/470, loss: 0.011831426993012428 2023-01-24 05:07:17.547162: step: 904/470, loss: 0.05420336499810219 2023-01-24 05:07:18.250661: step: 906/470, loss: 0.06597546488046646 2023-01-24 05:07:19.042692: step: 908/470, loss: 0.6028205156326294 2023-01-24 05:07:19.811006: step: 910/470, loss: 0.036814119666814804 2023-01-24 05:07:20.774315: step: 912/470, loss: 0.022296173498034477 2023-01-24 05:07:21.555791: step: 914/470, loss: 0.02219575271010399 2023-01-24 05:07:22.342722: step: 916/470, loss: 0.041941218078136444 2023-01-24 05:07:23.113835: step: 918/470, loss: 0.010028064250946045 2023-01-24 05:07:23.781175: step: 920/470, loss: 0.04588298127055168 2023-01-24 05:07:24.509621: step: 922/470, loss: 0.054243478924036026 2023-01-24 05:07:25.362050: step: 924/470, loss: 0.1619579941034317 2023-01-24 05:07:26.169831: step: 926/470, loss: 0.04308824613690376 2023-01-24 05:07:26.885891: step: 928/470, loss: 0.0242212675511837 2023-01-24 05:07:27.598443: step: 930/470, loss: 0.0006321282708086073 2023-01-24 05:07:28.293842: step: 932/470, loss: 0.01612095721065998 2023-01-24 05:07:29.141609: step: 934/470, loss: 0.0768963098526001 2023-01-24 05:07:29.855929: step: 936/470, loss: 0.009668453596532345 2023-01-24 05:07:30.601984: step: 938/470, loss: 0.06522442400455475 2023-01-24 05:07:31.356808: step: 940/470, loss: 0.04865960404276848 2023-01-24 05:07:32.065848: step: 942/470, loss: 0.011983740143477917 ================================================== Loss: 0.066 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33732090809296694, 'r': 0.3315602094727835, 'f1': 0.33441575194671175}, 'combined': 0.24641160669757706, 'epoch': 25} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3501057576502552, 'r': 0.33327375007091603, 'f1': 0.3414824631268499}, 'combined': 0.22765497541789986, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3326379215998408, 'r': 0.33390030460401476, 'f1': 0.33326791766347685}, 'combined': 0.24556583406782503, 'epoch': 25} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3501837579007456, 'r': 0.32560355181732786, 'f1': 0.33744663068263175}, 'combined': 0.2249644204550878, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3227413306549009, 'r': 0.3441757643796097, 'f1': 0.33311410069431463}, 'combined': 0.24545249524844234, 'epoch': 25} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.34602943300066596, 'r': 0.3417040650881576, 'f1': 0.3438531472585234}, 'combined': 0.2292354315056822, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3107142857142857, 'r': 0.3107142857142857, 'f1': 0.3107142857142857}, 'combined': 0.20714285714285713, 'epoch': 25} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.7083333333333334, 'r': 0.3695652173913043, 'f1': 0.4857142857142857}, 'combined': 0.32380952380952377, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3260774491094148, 'r': 0.32422122074636306, 'f1': 0.3251466856961624}, 'combined': 0.2395817684076986, 'epoch': 19} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35730778004987124, 'r': 0.3418473472592518, 'f1': 0.34940662520847365}, 'combined': 0.23293775013898238, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.35714285714285715, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3247302029731275, 'r': 0.326578762003335, 'f1': 0.3256518591783492}, 'combined': 0.23995400149983626, 'epoch': 24} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3462616463218727, 'r': 0.359579401949637, 'f1': 0.35279488493171934}, 'combined': 0.2351965899544795, 'epoch': 24} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6363636363636364, 'r': 0.2413793103448276, 'f1': 0.35}, 'combined': 0.2333333333333333, 'epoch': 24} ****************************** Epoch: 26 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:10:07.204708: step: 2/470, loss: 0.019485026597976685 2023-01-24 05:10:07.958012: step: 4/470, loss: 0.04196685925126076 2023-01-24 05:10:08.676055: step: 6/470, loss: 0.048435427248477936 2023-01-24 05:10:09.404737: step: 8/470, loss: 0.08164027333259583 2023-01-24 05:10:10.174345: step: 10/470, loss: 0.031989555805921555 2023-01-24 05:10:10.955859: step: 12/470, loss: 0.03375351428985596 2023-01-24 05:10:11.692438: step: 14/470, loss: 0.04762651026248932 2023-01-24 05:10:12.469965: step: 16/470, loss: 0.030239004641771317 2023-01-24 05:10:13.139500: step: 18/470, loss: 0.00040057190926745534 2023-01-24 05:10:13.873237: step: 20/470, loss: 0.010928238742053509 2023-01-24 05:10:14.628052: step: 22/470, loss: 0.013470311649143696 2023-01-24 05:10:15.396034: step: 24/470, loss: 0.04967685043811798 2023-01-24 05:10:16.096728: step: 26/470, loss: 0.023781154304742813 2023-01-24 05:10:16.828813: step: 28/470, loss: 0.015070714987814426 2023-01-24 05:10:17.556620: step: 30/470, loss: 0.00012959256127942353 2023-01-24 05:10:18.297734: step: 32/470, loss: 0.020162206143140793 2023-01-24 05:10:19.065332: step: 34/470, loss: 0.05546073988080025 2023-01-24 05:10:19.719599: step: 36/470, loss: 0.06939268857240677 2023-01-24 05:10:20.517762: step: 38/470, loss: 0.0007489689160138369 2023-01-24 05:10:21.278437: step: 40/470, loss: 0.014805969782173634 2023-01-24 05:10:21.942614: step: 42/470, loss: 0.02614147588610649 2023-01-24 05:10:22.676795: step: 44/470, loss: 0.11476456373929977 2023-01-24 05:10:23.358272: step: 46/470, loss: 0.000534221762791276 2023-01-24 05:10:24.116701: step: 48/470, loss: 0.005874789785593748 2023-01-24 05:10:24.871834: step: 50/470, loss: 0.0771356076002121 2023-01-24 05:10:25.573547: step: 52/470, loss: 0.016250325366854668 2023-01-24 05:10:26.366169: step: 54/470, loss: 0.06606414169073105 2023-01-24 05:10:27.051255: step: 56/470, loss: 0.16989466547966003 2023-01-24 05:10:27.733213: step: 58/470, loss: 0.025766367092728615 2023-01-24 05:10:28.419991: step: 60/470, loss: 0.004818424582481384 2023-01-24 05:10:29.202517: step: 62/470, loss: 0.20682062208652496 2023-01-24 05:10:29.904268: step: 64/470, loss: 0.04396335780620575 2023-01-24 05:10:30.624363: step: 66/470, loss: 0.12180660665035248 2023-01-24 05:10:31.356102: step: 68/470, loss: 0.03703419491648674 2023-01-24 05:10:32.160871: step: 70/470, loss: 0.0014291710685938597 2023-01-24 05:10:32.897313: step: 72/470, loss: 0.046893779188394547 2023-01-24 05:10:33.575617: step: 74/470, loss: 0.012694248929619789 2023-01-24 05:10:34.357717: step: 76/470, loss: 0.019860833883285522 2023-01-24 05:10:35.082300: step: 78/470, loss: 1.4195009469985962 2023-01-24 05:10:35.806409: step: 80/470, loss: 0.23771710693836212 2023-01-24 05:10:36.653725: step: 82/470, loss: 0.24715445935726166 2023-01-24 05:10:37.409716: step: 84/470, loss: 0.01750795915722847 2023-01-24 05:10:38.149433: step: 86/470, loss: 0.20475627481937408 2023-01-24 05:10:38.949324: step: 88/470, loss: 0.0445147342979908 2023-01-24 05:10:39.641324: step: 90/470, loss: 0.0016261684941127896 2023-01-24 05:10:40.472442: step: 92/470, loss: 0.011423871852457523 2023-01-24 05:10:41.271088: step: 94/470, loss: 0.21087023615837097 2023-01-24 05:10:42.002402: step: 96/470, loss: 0.006003097631037235 2023-01-24 05:10:42.677684: step: 98/470, loss: 0.04363858327269554 2023-01-24 05:10:43.290611: step: 100/470, loss: 0.04111883044242859 2023-01-24 05:10:44.022633: step: 102/470, loss: 0.00875331461429596 2023-01-24 05:10:44.765015: step: 104/470, loss: 0.01105851773172617 2023-01-24 05:10:45.516532: step: 106/470, loss: 0.05842787027359009 2023-01-24 05:10:46.307426: step: 108/470, loss: 0.02871653437614441 2023-01-24 05:10:47.019278: step: 110/470, loss: 0.6604738235473633 2023-01-24 05:10:47.725698: step: 112/470, loss: 0.02996966242790222 2023-01-24 05:10:48.466286: step: 114/470, loss: 0.040596798062324524 2023-01-24 05:10:49.232959: step: 116/470, loss: 2.317312717437744 2023-01-24 05:10:49.991028: step: 118/470, loss: 0.00040206126868724823 2023-01-24 05:10:50.713491: step: 120/470, loss: 0.0035905339755117893 2023-01-24 05:10:51.473311: step: 122/470, loss: 0.006906208582222462 2023-01-24 05:10:52.209462: step: 124/470, loss: 0.08038660883903503 2023-01-24 05:10:52.917676: step: 126/470, loss: 0.003461079904809594 2023-01-24 05:10:53.695806: step: 128/470, loss: 0.012931152246892452 2023-01-24 05:10:54.424527: step: 130/470, loss: 0.04952579736709595 2023-01-24 05:10:55.177398: step: 132/470, loss: 0.5545482635498047 2023-01-24 05:10:55.902718: step: 134/470, loss: 0.009975926019251347 2023-01-24 05:10:56.652230: step: 136/470, loss: 0.0052613853476941586 2023-01-24 05:10:57.535411: step: 138/470, loss: 0.005113921128213406 2023-01-24 05:10:58.236995: step: 140/470, loss: 0.062149327248334885 2023-01-24 05:10:59.017573: step: 142/470, loss: 0.18093203008174896 2023-01-24 05:10:59.784140: step: 144/470, loss: 0.021489135921001434 2023-01-24 05:11:00.487339: step: 146/470, loss: 0.07839653640985489 2023-01-24 05:11:01.315240: step: 148/470, loss: 0.04690365120768547 2023-01-24 05:11:02.127342: step: 150/470, loss: 0.03063216619193554 2023-01-24 05:11:02.829539: step: 152/470, loss: 0.03267190232872963 2023-01-24 05:11:03.535849: step: 154/470, loss: 0.035660747438669205 2023-01-24 05:11:04.250144: step: 156/470, loss: 0.09285139292478561 2023-01-24 05:11:04.959259: step: 158/470, loss: 0.0034859278239309788 2023-01-24 05:11:05.662421: step: 160/470, loss: 0.018955281004309654 2023-01-24 05:11:06.525860: step: 162/470, loss: 0.0037439637817442417 2023-01-24 05:11:07.235060: step: 164/470, loss: 0.0017843381501734257 2023-01-24 05:11:08.028164: step: 166/470, loss: 0.04432709515094757 2023-01-24 05:11:08.836453: step: 168/470, loss: 0.04893715679645538 2023-01-24 05:11:09.518484: step: 170/470, loss: 0.0067788949236273766 2023-01-24 05:11:10.249115: step: 172/470, loss: 0.03473373129963875 2023-01-24 05:11:10.967403: step: 174/470, loss: 0.018431130796670914 2023-01-24 05:11:11.655424: step: 176/470, loss: 0.36892861127853394 2023-01-24 05:11:12.370585: step: 178/470, loss: 0.0027816283982247114 2023-01-24 05:11:13.171117: step: 180/470, loss: 0.15821048617362976 2023-01-24 05:11:13.929188: step: 182/470, loss: 0.6537167429924011 2023-01-24 05:11:14.629195: step: 184/470, loss: 0.0012202489888295531 2023-01-24 05:11:15.338502: step: 186/470, loss: 0.011547365225851536 2023-01-24 05:11:16.063355: step: 188/470, loss: 0.03449565917253494 2023-01-24 05:11:16.741267: step: 190/470, loss: 0.007175063248723745 2023-01-24 05:11:17.477495: step: 192/470, loss: 0.05448294058442116 2023-01-24 05:11:18.218093: step: 194/470, loss: 0.013299376703798771 2023-01-24 05:11:19.039129: step: 196/470, loss: 0.02153635025024414 2023-01-24 05:11:19.892579: step: 198/470, loss: 0.06576363742351532 2023-01-24 05:11:20.678883: step: 200/470, loss: 0.028407655656337738 2023-01-24 05:11:21.422991: step: 202/470, loss: 0.021715879440307617 2023-01-24 05:11:22.205076: step: 204/470, loss: 0.009564381092786789 2023-01-24 05:11:23.064073: step: 206/470, loss: 0.010834124870598316 2023-01-24 05:11:23.759143: step: 208/470, loss: 0.002131436485797167 2023-01-24 05:11:24.522430: step: 210/470, loss: 0.04402882605791092 2023-01-24 05:11:25.291480: step: 212/470, loss: 0.045613422989845276 2023-01-24 05:11:25.991971: step: 214/470, loss: 0.12864279747009277 2023-01-24 05:11:26.689542: step: 216/470, loss: 0.056581586599349976 2023-01-24 05:11:27.431405: step: 218/470, loss: 0.022101709619164467 2023-01-24 05:11:28.216575: step: 220/470, loss: 0.009549708105623722 2023-01-24 05:11:28.932452: step: 222/470, loss: 0.01309411134570837 2023-01-24 05:11:29.652482: step: 224/470, loss: 0.0033089620992541313 2023-01-24 05:11:30.344214: step: 226/470, loss: 0.023617535829544067 2023-01-24 05:11:31.046277: step: 228/470, loss: 0.00651435274630785 2023-01-24 05:11:31.786336: step: 230/470, loss: 0.004386777523905039 2023-01-24 05:11:32.453939: step: 232/470, loss: 0.7032762169837952 2023-01-24 05:11:33.321408: step: 234/470, loss: 0.04321502149105072 2023-01-24 05:11:34.138571: step: 236/470, loss: 0.008020198903977871 2023-01-24 05:11:34.824400: step: 238/470, loss: 0.016117895022034645 2023-01-24 05:11:35.501598: step: 240/470, loss: 0.0072062062099576 2023-01-24 05:11:36.256417: step: 242/470, loss: 0.040230728685855865 2023-01-24 05:11:36.990977: step: 244/470, loss: 0.021644841879606247 2023-01-24 05:11:37.747522: step: 246/470, loss: 0.013207136653363705 2023-01-24 05:11:38.513031: step: 248/470, loss: 0.0457160547375679 2023-01-24 05:11:39.218722: step: 250/470, loss: 0.03336277976632118 2023-01-24 05:11:39.919129: step: 252/470, loss: 0.0007846613880246878 2023-01-24 05:11:40.616461: step: 254/470, loss: 0.016548141837120056 2023-01-24 05:11:41.384131: step: 256/470, loss: 0.029072560369968414 2023-01-24 05:11:42.096336: step: 258/470, loss: 0.023857641965150833 2023-01-24 05:11:42.782576: step: 260/470, loss: 0.016008907929062843 2023-01-24 05:11:43.495660: step: 262/470, loss: 0.005333620123565197 2023-01-24 05:11:44.299237: step: 264/470, loss: 0.025424938648939133 2023-01-24 05:11:44.992784: step: 266/470, loss: 0.008555452339351177 2023-01-24 05:11:45.687106: step: 268/470, loss: 0.13639460504055023 2023-01-24 05:11:46.473729: step: 270/470, loss: 0.06864644587039948 2023-01-24 05:11:47.157601: step: 272/470, loss: 0.00018786423606798053 2023-01-24 05:11:47.922369: step: 274/470, loss: 0.13025711476802826 2023-01-24 05:11:48.626276: step: 276/470, loss: 0.1863914430141449 2023-01-24 05:11:49.348253: step: 278/470, loss: 0.15763676166534424 2023-01-24 05:11:50.052785: step: 280/470, loss: 0.017980456352233887 2023-01-24 05:11:50.805892: step: 282/470, loss: 0.04068325459957123 2023-01-24 05:11:51.557967: step: 284/470, loss: 0.008834258653223515 2023-01-24 05:11:52.299951: step: 286/470, loss: 0.09366513043642044 2023-01-24 05:11:52.977060: step: 288/470, loss: 0.0012397804530337453 2023-01-24 05:11:53.796062: step: 290/470, loss: 0.09330645203590393 2023-01-24 05:11:54.488812: step: 292/470, loss: 0.0060478076338768005 2023-01-24 05:11:55.241719: step: 294/470, loss: 0.01826561614871025 2023-01-24 05:11:56.082568: step: 296/470, loss: 0.016506120562553406 2023-01-24 05:11:56.748107: step: 298/470, loss: 0.018143299967050552 2023-01-24 05:11:57.469978: step: 300/470, loss: 0.002731665037572384 2023-01-24 05:11:58.221018: step: 302/470, loss: 0.042497992515563965 2023-01-24 05:11:58.912916: step: 304/470, loss: 0.0025194089394062757 2023-01-24 05:11:59.624730: step: 306/470, loss: 0.035188719630241394 2023-01-24 05:12:00.343425: step: 308/470, loss: 0.004296632017940283 2023-01-24 05:12:01.067136: step: 310/470, loss: 0.001335528795607388 2023-01-24 05:12:01.834725: step: 312/470, loss: 0.039289023727178574 2023-01-24 05:12:02.595034: step: 314/470, loss: 0.01676071807742119 2023-01-24 05:12:03.391884: step: 316/470, loss: 0.005507394205778837 2023-01-24 05:12:04.058724: step: 318/470, loss: 0.0006347045418806374 2023-01-24 05:12:04.693285: step: 320/470, loss: 0.0023768669925630093 2023-01-24 05:12:05.379915: step: 322/470, loss: 0.02550424449145794 2023-01-24 05:12:05.998339: step: 324/470, loss: 0.013288677670061588 2023-01-24 05:12:06.661330: step: 326/470, loss: 0.00595318665727973 2023-01-24 05:12:07.462170: step: 328/470, loss: 0.09272654354572296 2023-01-24 05:12:08.216780: step: 330/470, loss: 0.007605270016938448 2023-01-24 05:12:08.928143: step: 332/470, loss: 0.010236957110464573 2023-01-24 05:12:09.618411: step: 334/470, loss: 0.007872911170125008 2023-01-24 05:12:10.328815: step: 336/470, loss: 0.0018507775384932756 2023-01-24 05:12:11.087145: step: 338/470, loss: 0.028767062351107597 2023-01-24 05:12:11.813481: step: 340/470, loss: 0.01310647651553154 2023-01-24 05:12:12.587279: step: 342/470, loss: 0.03576286509633064 2023-01-24 05:12:13.318980: step: 344/470, loss: 0.01042783074080944 2023-01-24 05:12:14.029167: step: 346/470, loss: 0.4090971052646637 2023-01-24 05:12:14.978180: step: 348/470, loss: 0.006796136498451233 2023-01-24 05:12:15.714160: step: 350/470, loss: 0.039724674075841904 2023-01-24 05:12:16.476791: step: 352/470, loss: 0.010841785930097103 2023-01-24 05:12:17.249355: step: 354/470, loss: 0.03900299593806267 2023-01-24 05:12:18.066448: step: 356/470, loss: 0.048096682876348495 2023-01-24 05:12:18.776810: step: 358/470, loss: 0.0033374207559973 2023-01-24 05:12:19.523714: step: 360/470, loss: 0.08358647674322128 2023-01-24 05:12:20.286971: step: 362/470, loss: 0.2687551975250244 2023-01-24 05:12:21.018185: step: 364/470, loss: 0.01693040318787098 2023-01-24 05:12:21.724518: step: 366/470, loss: 0.005727311596274376 2023-01-24 05:12:22.422747: step: 368/470, loss: 0.0020413741003721952 2023-01-24 05:12:23.187258: step: 370/470, loss: 0.021475672721862793 2023-01-24 05:12:23.960902: step: 372/470, loss: 0.19835321605205536 2023-01-24 05:12:24.652870: step: 374/470, loss: 0.01151078287512064 2023-01-24 05:12:25.529165: step: 376/470, loss: 0.015609940513968468 2023-01-24 05:12:26.255950: step: 378/470, loss: 0.04101203382015228 2023-01-24 05:12:26.980775: step: 380/470, loss: 0.006876502186059952 2023-01-24 05:12:27.812482: step: 382/470, loss: 1.5188071727752686 2023-01-24 05:12:28.540217: step: 384/470, loss: 0.03681845963001251 2023-01-24 05:12:29.242126: step: 386/470, loss: 0.05127471312880516 2023-01-24 05:12:29.962136: step: 388/470, loss: 0.018120506778359413 2023-01-24 05:12:30.684545: step: 390/470, loss: 0.01568225957453251 2023-01-24 05:12:31.337128: step: 392/470, loss: 0.058441437780857086 2023-01-24 05:12:32.164445: step: 394/470, loss: 0.013389245606958866 2023-01-24 05:12:32.931571: step: 396/470, loss: 0.0006397226825356483 2023-01-24 05:12:33.696881: step: 398/470, loss: 0.026342155411839485 2023-01-24 05:12:34.509788: step: 400/470, loss: 0.044728025794029236 2023-01-24 05:12:35.215628: step: 402/470, loss: 0.008601291105151176 2023-01-24 05:12:35.948196: step: 404/470, loss: 0.04410114884376526 2023-01-24 05:12:36.751549: step: 406/470, loss: 0.006642171647399664 2023-01-24 05:12:37.562630: step: 408/470, loss: 0.0413130447268486 2023-01-24 05:12:38.326886: step: 410/470, loss: 0.04338083043694496 2023-01-24 05:12:38.993229: step: 412/470, loss: 0.006818010471761227 2023-01-24 05:12:39.839598: step: 414/470, loss: 0.03733073174953461 2023-01-24 05:12:40.611122: step: 416/470, loss: 0.1958683431148529 2023-01-24 05:12:41.395446: step: 418/470, loss: 0.0009550434770062566 2023-01-24 05:12:42.151517: step: 420/470, loss: 0.020624913275241852 2023-01-24 05:12:42.790989: step: 422/470, loss: 0.004369684495031834 2023-01-24 05:12:43.497230: step: 424/470, loss: 0.029023706912994385 2023-01-24 05:12:44.271711: step: 426/470, loss: 0.03372396156191826 2023-01-24 05:12:45.011376: step: 428/470, loss: 0.1394890546798706 2023-01-24 05:12:45.707624: step: 430/470, loss: 0.03233359009027481 2023-01-24 05:12:46.580173: step: 432/470, loss: 0.019605809822678566 2023-01-24 05:12:47.386791: step: 434/470, loss: 0.02610873058438301 2023-01-24 05:12:48.175703: step: 436/470, loss: 0.04683273285627365 2023-01-24 05:12:49.009171: step: 438/470, loss: 0.049369730055332184 2023-01-24 05:12:49.704680: step: 440/470, loss: 0.0025972574949264526 2023-01-24 05:12:50.433739: step: 442/470, loss: 0.011458461172878742 2023-01-24 05:12:51.200789: step: 444/470, loss: 0.008207599632441998 2023-01-24 05:12:52.036065: step: 446/470, loss: 0.0014893775805830956 2023-01-24 05:12:52.733965: step: 448/470, loss: 0.0214060191065073 2023-01-24 05:12:53.429813: step: 450/470, loss: 0.03423811122775078 2023-01-24 05:12:54.150860: step: 452/470, loss: 0.023825276643037796 2023-01-24 05:12:54.802484: step: 454/470, loss: 0.00232740119099617 2023-01-24 05:12:55.557827: step: 456/470, loss: 0.06115736439824104 2023-01-24 05:12:56.398925: step: 458/470, loss: 0.3774428963661194 2023-01-24 05:12:57.131964: step: 460/470, loss: 0.02544984221458435 2023-01-24 05:12:57.844229: step: 462/470, loss: 0.0458686426281929 2023-01-24 05:12:58.556729: step: 464/470, loss: 0.0019502779468894005 2023-01-24 05:12:59.152493: step: 466/470, loss: 0.0001484197418903932 2023-01-24 05:12:59.833869: step: 468/470, loss: 0.019259247928857803 2023-01-24 05:13:00.633396: step: 470/470, loss: 0.04199659079313278 2023-01-24 05:13:01.396199: step: 472/470, loss: 0.0764729380607605 2023-01-24 05:13:02.113159: step: 474/470, loss: 0.012650109827518463 2023-01-24 05:13:02.856215: step: 476/470, loss: 0.19659210741519928 2023-01-24 05:13:03.546023: step: 478/470, loss: 0.016243983060121536 2023-01-24 05:13:04.302488: step: 480/470, loss: 0.7099974751472473 2023-01-24 05:13:05.033581: step: 482/470, loss: 0.008293403312563896 2023-01-24 05:13:05.761379: step: 484/470, loss: 0.024716457352042198 2023-01-24 05:13:06.517576: step: 486/470, loss: 0.005962827242910862 2023-01-24 05:13:07.222330: step: 488/470, loss: 0.0011397538473829627 2023-01-24 05:13:08.028607: step: 490/470, loss: 0.007134813815355301 2023-01-24 05:13:08.792905: step: 492/470, loss: 0.1319524198770523 2023-01-24 05:13:09.508700: step: 494/470, loss: 0.5560120344161987 2023-01-24 05:13:10.181899: step: 496/470, loss: 0.058050792664289474 2023-01-24 05:13:10.880196: step: 498/470, loss: 0.025943944230675697 2023-01-24 05:13:11.617132: step: 500/470, loss: 0.031523432582616806 2023-01-24 05:13:12.352675: step: 502/470, loss: 0.008412609808146954 2023-01-24 05:13:13.043109: step: 504/470, loss: 0.009613175876438618 2023-01-24 05:13:13.800924: step: 506/470, loss: 0.01078837364912033 2023-01-24 05:13:14.573675: step: 508/470, loss: 0.0254935584962368 2023-01-24 05:13:15.249329: step: 510/470, loss: 0.009062767960131168 2023-01-24 05:13:15.945963: step: 512/470, loss: 0.048958227038383484 2023-01-24 05:13:16.664522: step: 514/470, loss: 0.015435706824064255 2023-01-24 05:13:17.416735: step: 516/470, loss: 0.017597481608390808 2023-01-24 05:13:18.104438: step: 518/470, loss: 0.01942635513842106 2023-01-24 05:13:18.787580: step: 520/470, loss: 0.007770019117742777 2023-01-24 05:13:19.514522: step: 522/470, loss: 0.0021495139226317406 2023-01-24 05:13:20.309940: step: 524/470, loss: 0.034324824810028076 2023-01-24 05:13:21.091392: step: 526/470, loss: 0.4315849840641022 2023-01-24 05:13:21.808046: step: 528/470, loss: 0.007081069517880678 2023-01-24 05:13:22.470066: step: 530/470, loss: 0.02178391069173813 2023-01-24 05:13:23.265696: step: 532/470, loss: 0.01920427940785885 2023-01-24 05:13:24.034122: step: 534/470, loss: 0.005591890309005976 2023-01-24 05:13:24.764241: step: 536/470, loss: 0.011183743365108967 2023-01-24 05:13:25.459585: step: 538/470, loss: 0.016883065924048424 2023-01-24 05:13:26.153671: step: 540/470, loss: 0.002876394661143422 2023-01-24 05:13:27.010730: step: 542/470, loss: 0.007916656322777271 2023-01-24 05:13:27.776924: step: 544/470, loss: 0.023198112845420837 2023-01-24 05:13:28.527402: step: 546/470, loss: 0.004471739754080772 2023-01-24 05:13:29.214272: step: 548/470, loss: 0.14783816039562225 2023-01-24 05:13:29.874995: step: 550/470, loss: 0.004815774969756603 2023-01-24 05:13:30.570605: step: 552/470, loss: 0.01722968928515911 2023-01-24 05:13:31.277334: step: 554/470, loss: 0.30158334970474243 2023-01-24 05:13:32.005375: step: 556/470, loss: 0.055766280740499496 2023-01-24 05:13:32.684715: step: 558/470, loss: 0.033450160175561905 2023-01-24 05:13:33.396057: step: 560/470, loss: 0.03725216165184975 2023-01-24 05:13:34.231093: step: 562/470, loss: 0.0021749266888946295 2023-01-24 05:13:35.022804: step: 564/470, loss: 0.006228404585272074 2023-01-24 05:13:35.810815: step: 566/470, loss: 0.003034188412129879 2023-01-24 05:13:36.530888: step: 568/470, loss: 0.0010111165465787053 2023-01-24 05:13:37.296201: step: 570/470, loss: 0.7480594515800476 2023-01-24 05:13:38.224250: step: 572/470, loss: 0.15064027905464172 2023-01-24 05:13:38.916003: step: 574/470, loss: 0.0038994362112134695 2023-01-24 05:13:39.594935: step: 576/470, loss: 0.00042104304884560406 2023-01-24 05:13:40.322480: step: 578/470, loss: 0.1475483477115631 2023-01-24 05:13:41.037824: step: 580/470, loss: 0.21218250691890717 2023-01-24 05:13:41.719632: step: 582/470, loss: 0.009204575791954994 2023-01-24 05:13:42.493876: step: 584/470, loss: 0.22064003348350525 2023-01-24 05:13:43.206014: step: 586/470, loss: 0.017769113183021545 2023-01-24 05:13:43.937039: step: 588/470, loss: 0.006627894006669521 2023-01-24 05:13:44.610253: step: 590/470, loss: 0.13599713146686554 2023-01-24 05:13:45.351020: step: 592/470, loss: 0.042819686233997345 2023-01-24 05:13:46.075993: step: 594/470, loss: 0.011788510717451572 2023-01-24 05:13:46.862587: step: 596/470, loss: 0.01742238737642765 2023-01-24 05:13:47.662446: step: 598/470, loss: 0.6136803030967712 2023-01-24 05:13:48.317148: step: 600/470, loss: 0.01876203715801239 2023-01-24 05:13:49.069200: step: 602/470, loss: 0.03775479272007942 2023-01-24 05:13:49.756373: step: 604/470, loss: 0.02878272905945778 2023-01-24 05:13:50.523566: step: 606/470, loss: 0.0009686170378699899 2023-01-24 05:13:51.327964: step: 608/470, loss: 0.065780408680439 2023-01-24 05:13:52.132487: step: 610/470, loss: 0.3254455327987671 2023-01-24 05:13:52.892243: step: 612/470, loss: 0.011034596711397171 2023-01-24 05:13:53.656325: step: 614/470, loss: 0.032889679074287415 2023-01-24 05:13:54.472319: step: 616/470, loss: 0.025389349088072777 2023-01-24 05:13:55.175676: step: 618/470, loss: 0.06543834507465363 2023-01-24 05:13:55.969943: step: 620/470, loss: 0.0328560434281826 2023-01-24 05:13:56.692315: step: 622/470, loss: 0.030454453080892563 2023-01-24 05:13:57.395520: step: 624/470, loss: 0.0002651447430253029 2023-01-24 05:13:58.174181: step: 626/470, loss: 0.008108319714665413 2023-01-24 05:13:58.868089: step: 628/470, loss: 0.028416186571121216 2023-01-24 05:13:59.581021: step: 630/470, loss: 0.014787995256483555 2023-01-24 05:14:00.338812: step: 632/470, loss: 0.4447817802429199 2023-01-24 05:14:01.074282: step: 634/470, loss: 0.00016679373220540583 2023-01-24 05:14:01.823470: step: 636/470, loss: 0.008493703790009022 2023-01-24 05:14:02.553355: step: 638/470, loss: 0.051174718886613846 2023-01-24 05:14:03.368753: step: 640/470, loss: 0.02636360004544258 2023-01-24 05:14:04.077095: step: 642/470, loss: 0.012426851317286491 2023-01-24 05:14:04.868589: step: 644/470, loss: 0.004039624240249395 2023-01-24 05:14:05.569075: step: 646/470, loss: 0.09385068714618683 2023-01-24 05:14:06.348560: step: 648/470, loss: 0.02949446067214012 2023-01-24 05:14:07.125347: step: 650/470, loss: 0.04192233458161354 2023-01-24 05:14:07.830858: step: 652/470, loss: 0.020343631505966187 2023-01-24 05:14:08.613217: step: 654/470, loss: 0.05575917288661003 2023-01-24 05:14:09.430710: step: 656/470, loss: 0.01872909814119339 2023-01-24 05:14:10.176793: step: 658/470, loss: 0.00723220594227314 2023-01-24 05:14:10.959255: step: 660/470, loss: 0.014894779771566391 2023-01-24 05:14:11.770188: step: 662/470, loss: 0.00329477246850729 2023-01-24 05:14:12.456805: step: 664/470, loss: 0.030976993963122368 2023-01-24 05:14:13.174124: step: 666/470, loss: 0.016939232125878334 2023-01-24 05:14:13.872823: step: 668/470, loss: 0.005721811670809984 2023-01-24 05:14:14.640997: step: 670/470, loss: 0.06608849763870239 2023-01-24 05:14:15.397923: step: 672/470, loss: 0.05654697120189667 2023-01-24 05:14:16.131274: step: 674/470, loss: 0.028054917231202126 2023-01-24 05:14:16.845136: step: 676/470, loss: 0.007750812452286482 2023-01-24 05:14:17.551422: step: 678/470, loss: 0.02219163253903389 2023-01-24 05:14:18.317815: step: 680/470, loss: 0.037425290793180466 2023-01-24 05:14:18.999228: step: 682/470, loss: 0.03459089994430542 2023-01-24 05:14:19.766228: step: 684/470, loss: 0.009903617203235626 2023-01-24 05:14:20.474245: step: 686/470, loss: 0.008926295675337315 2023-01-24 05:14:21.141220: step: 688/470, loss: 0.014270035549998283 2023-01-24 05:14:21.868639: step: 690/470, loss: 0.08540667593479156 2023-01-24 05:14:22.580289: step: 692/470, loss: 0.04163281247019768 2023-01-24 05:14:23.292362: step: 694/470, loss: 0.01574213244020939 2023-01-24 05:14:24.165960: step: 696/470, loss: 0.0054738158360123634 2023-01-24 05:14:24.921149: step: 698/470, loss: 0.20161749422550201 2023-01-24 05:14:25.634536: step: 700/470, loss: 0.4112975597381592 2023-01-24 05:14:26.336591: step: 702/470, loss: 0.00508745014667511 2023-01-24 05:14:27.001081: step: 704/470, loss: 0.00045277041499502957 2023-01-24 05:14:27.743546: step: 706/470, loss: 0.14759576320648193 2023-01-24 05:14:28.538374: step: 708/470, loss: 0.041470929980278015 2023-01-24 05:14:29.334110: step: 710/470, loss: 0.0423739068210125 2023-01-24 05:14:30.058351: step: 712/470, loss: 0.6495215892791748 2023-01-24 05:14:30.863352: step: 714/470, loss: 0.03245260939002037 2023-01-24 05:14:31.473635: step: 716/470, loss: 0.027776306495070457 2023-01-24 05:14:32.234022: step: 718/470, loss: 0.01630890928208828 2023-01-24 05:14:32.984305: step: 720/470, loss: 0.04993465170264244 2023-01-24 05:14:33.776132: step: 722/470, loss: 0.030327895656228065 2023-01-24 05:14:34.445335: step: 724/470, loss: 0.028972510248422623 2023-01-24 05:14:35.180633: step: 726/470, loss: 0.05604696646332741 2023-01-24 05:14:35.949375: step: 728/470, loss: 0.010281789116561413 2023-01-24 05:14:36.689441: step: 730/470, loss: 0.03586762771010399 2023-01-24 05:14:37.346842: step: 732/470, loss: 0.0005163901951164007 2023-01-24 05:14:38.053197: step: 734/470, loss: 0.01637759618461132 2023-01-24 05:14:38.698663: step: 736/470, loss: 0.022752603515982628 2023-01-24 05:14:39.476243: step: 738/470, loss: 0.015957066789269447 2023-01-24 05:14:40.141384: step: 740/470, loss: 0.013974886387586594 2023-01-24 05:14:40.826858: step: 742/470, loss: 0.007855596952140331 2023-01-24 05:14:41.556892: step: 744/470, loss: 0.01974407769739628 2023-01-24 05:14:42.305730: step: 746/470, loss: 0.019117096439003944 2023-01-24 05:14:43.044245: step: 748/470, loss: 0.03735620900988579 2023-01-24 05:14:43.754829: step: 750/470, loss: 0.08970566838979721 2023-01-24 05:14:44.442485: step: 752/470, loss: 0.018881360068917274 2023-01-24 05:14:45.154629: step: 754/470, loss: 0.001953059108927846 2023-01-24 05:14:45.964677: step: 756/470, loss: 0.691685140132904 2023-01-24 05:14:46.676017: step: 758/470, loss: 0.06533520668745041 2023-01-24 05:14:47.400840: step: 760/470, loss: 0.0035757829900830984 2023-01-24 05:14:48.109323: step: 762/470, loss: 0.00113035854883492 2023-01-24 05:14:48.835192: step: 764/470, loss: 0.02485186792910099 2023-01-24 05:14:49.572440: step: 766/470, loss: 0.004313065204769373 2023-01-24 05:14:50.262359: step: 768/470, loss: 0.016619572415947914 2023-01-24 05:14:51.071834: step: 770/470, loss: 0.004702294245362282 2023-01-24 05:14:51.864266: step: 772/470, loss: 0.054384443908929825 2023-01-24 05:14:52.596859: step: 774/470, loss: 0.00823363196104765 2023-01-24 05:14:53.375012: step: 776/470, loss: 0.0492500476539135 2023-01-24 05:14:54.105822: step: 778/470, loss: 0.0389709398150444 2023-01-24 05:14:54.851833: step: 780/470, loss: 0.03600706160068512 2023-01-24 05:14:55.510548: step: 782/470, loss: 0.007133893668651581 2023-01-24 05:14:56.213117: step: 784/470, loss: 0.045114368200302124 2023-01-24 05:14:57.054466: step: 786/470, loss: 0.016657711938023567 2023-01-24 05:14:57.834273: step: 788/470, loss: 0.025096865370869637 2023-01-24 05:14:58.572173: step: 790/470, loss: 1.0770021677017212 2023-01-24 05:14:59.346648: step: 792/470, loss: 0.9711056351661682 2023-01-24 05:15:00.037885: step: 794/470, loss: 0.014067924581468105 2023-01-24 05:15:00.764544: step: 796/470, loss: 0.04596575349569321 2023-01-24 05:15:01.623584: step: 798/470, loss: 0.0024457420222461224 2023-01-24 05:15:02.473246: step: 800/470, loss: 0.05553967505693436 2023-01-24 05:15:03.227264: step: 802/470, loss: 0.02744687721133232 2023-01-24 05:15:03.906259: step: 804/470, loss: 0.006461080629378557 2023-01-24 05:15:04.818350: step: 806/470, loss: 0.016188278794288635 2023-01-24 05:15:05.596417: step: 808/470, loss: 0.044517967849969864 2023-01-24 05:15:06.362984: step: 810/470, loss: 0.00267368508502841 2023-01-24 05:15:07.097703: step: 812/470, loss: 0.031701844185590744 2023-01-24 05:15:07.853984: step: 814/470, loss: 0.0011221464956179261 2023-01-24 05:15:08.615696: step: 816/470, loss: 0.013465669006109238 2023-01-24 05:15:09.367667: step: 818/470, loss: 0.2107423096895218 2023-01-24 05:15:10.149240: step: 820/470, loss: 0.028793897479772568 2023-01-24 05:15:10.991638: step: 822/470, loss: 0.01618114300072193 2023-01-24 05:15:11.735299: step: 824/470, loss: 0.09623509645462036 2023-01-24 05:15:12.489323: step: 826/470, loss: 0.041487544775009155 2023-01-24 05:15:13.217332: step: 828/470, loss: 0.011326837353408337 2023-01-24 05:15:13.923400: step: 830/470, loss: 0.045046452432870865 2023-01-24 05:15:14.753365: step: 832/470, loss: 0.10204900801181793 2023-01-24 05:15:15.601727: step: 834/470, loss: 0.07618521898984909 2023-01-24 05:15:16.424293: step: 836/470, loss: 0.08233476430177689 2023-01-24 05:15:17.168376: step: 838/470, loss: 0.06587542593479156 2023-01-24 05:15:17.864923: step: 840/470, loss: 0.01595766469836235 2023-01-24 05:15:18.583178: step: 842/470, loss: 0.0016488569090142846 2023-01-24 05:15:19.333054: step: 844/470, loss: 0.006250299047678709 2023-01-24 05:15:20.144899: step: 846/470, loss: 0.10261400043964386 2023-01-24 05:15:20.849012: step: 848/470, loss: 0.06171036139130592 2023-01-24 05:15:21.557808: step: 850/470, loss: 0.029668550938367844 2023-01-24 05:15:22.404017: step: 852/470, loss: 0.003090712008997798 2023-01-24 05:15:23.155427: step: 854/470, loss: 0.00039391950122080743 2023-01-24 05:15:23.879869: step: 856/470, loss: 0.0029829915147274733 2023-01-24 05:15:24.568138: step: 858/470, loss: 0.06782427430152893 2023-01-24 05:15:25.235271: step: 860/470, loss: 0.037175048142671585 2023-01-24 05:15:25.948770: step: 862/470, loss: 0.012290716171264648 2023-01-24 05:15:26.648666: step: 864/470, loss: 0.004582709167152643 2023-01-24 05:15:27.408040: step: 866/470, loss: 0.06253193318843842 2023-01-24 05:15:28.167208: step: 868/470, loss: 0.26506075263023376 2023-01-24 05:15:28.863656: step: 870/470, loss: 0.036856453865766525 2023-01-24 05:15:29.543360: step: 872/470, loss: 0.06115124374628067 2023-01-24 05:15:30.277401: step: 874/470, loss: 0.0035287141799926758 2023-01-24 05:15:30.951719: step: 876/470, loss: 0.010850594379007816 2023-01-24 05:15:31.731023: step: 878/470, loss: 0.022742489352822304 2023-01-24 05:15:32.489776: step: 880/470, loss: 0.1650201976299286 2023-01-24 05:15:33.245535: step: 882/470, loss: 0.021147647872567177 2023-01-24 05:15:33.978297: step: 884/470, loss: 0.02876165509223938 2023-01-24 05:15:34.696587: step: 886/470, loss: 0.031704407185316086 2023-01-24 05:15:35.386713: step: 888/470, loss: 0.020638834685087204 2023-01-24 05:15:36.083975: step: 890/470, loss: 0.020739523693919182 2023-01-24 05:15:36.776626: step: 892/470, loss: 0.02357344888150692 2023-01-24 05:15:37.496585: step: 894/470, loss: 0.009717467240989208 2023-01-24 05:15:38.182503: step: 896/470, loss: 0.033283960074186325 2023-01-24 05:15:38.995375: step: 898/470, loss: 0.04801145941019058 2023-01-24 05:15:39.758419: step: 900/470, loss: 0.0027546961791813374 2023-01-24 05:15:40.526458: step: 902/470, loss: 0.0054783690720796585 2023-01-24 05:15:41.348369: step: 904/470, loss: 0.14740252494812012 2023-01-24 05:15:42.084333: step: 906/470, loss: 0.047735873609781265 2023-01-24 05:15:42.819840: step: 908/470, loss: 0.01004134863615036 2023-01-24 05:15:43.550953: step: 910/470, loss: 0.048433274030685425 2023-01-24 05:15:44.341441: step: 912/470, loss: 0.018374241888523102 2023-01-24 05:15:45.088531: step: 914/470, loss: 0.015799539163708687 2023-01-24 05:15:45.814303: step: 916/470, loss: 0.01117359846830368 2023-01-24 05:15:46.593284: step: 918/470, loss: 0.012997347861528397 2023-01-24 05:15:47.304122: step: 920/470, loss: 0.02212097868323326 2023-01-24 05:15:47.994725: step: 922/470, loss: 0.08863328397274017 2023-01-24 05:15:48.738701: step: 924/470, loss: 0.008121066726744175 2023-01-24 05:15:49.497639: step: 926/470, loss: 0.2342129349708557 2023-01-24 05:15:50.252091: step: 928/470, loss: 0.011628974229097366 2023-01-24 05:15:50.983415: step: 930/470, loss: 0.019393447786569595 2023-01-24 05:15:51.788806: step: 932/470, loss: 0.013421921990811825 2023-01-24 05:15:52.539586: step: 934/470, loss: 0.3163841962814331 2023-01-24 05:15:53.251929: step: 936/470, loss: 0.003866309067234397 2023-01-24 05:15:53.985702: step: 938/470, loss: 0.04852140694856644 2023-01-24 05:15:54.699496: step: 940/470, loss: 0.008729308843612671 2023-01-24 05:15:55.351808: step: 942/470, loss: 0.04352358356118202 ================================================== Loss: 0.072 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33308853938597777, 'r': 0.33308853938597777, 'f1': 0.33308853938597777}, 'combined': 0.24543366060019414, 'epoch': 26} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35360932136690426, 'r': 0.3430690435184677, 'f1': 0.34825944876447673}, 'combined': 0.23217296584298444, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3302922190344282, 'r': 0.33342592130230697, 'f1': 0.33185167238208835}, 'combined': 0.24452228491311773, 'epoch': 26} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36202192906909336, 'r': 0.3411360485458764, 'f1': 0.3512688024630807}, 'combined': 0.23417920164205372, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32338929219600726, 'r': 0.33811669829222013, 'f1': 0.3305890538033395}, 'combined': 0.24359193438140805, 'epoch': 26} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.348896802572895, 'r': 0.3519161018259297, 'f1': 0.35039994820389364}, 'combined': 0.23359996546926237, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2881944444444444, 'r': 0.29642857142857143, 'f1': 0.2922535211267605}, 'combined': 0.19483568075117366, 'epoch': 26} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.48214285714285715, 'r': 0.29347826086956524, 'f1': 0.36486486486486486}, 'combined': 0.24324324324324323, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5535714285714286, 'r': 0.2672413793103448, 'f1': 0.36046511627906974}, 'combined': 0.24031007751937983, 'epoch': 26} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3260774491094148, 'r': 0.32422122074636306, 'f1': 0.3251466856961624}, 'combined': 0.2395817684076986, 'epoch': 19} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35730778004987124, 'r': 0.3418473472592518, 'f1': 0.34940662520847365}, 'combined': 0.23293775013898238, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.35714285714285715, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32338929219600726, 'r': 0.33811669829222013, 'f1': 0.3305890538033395}, 'combined': 0.24359193438140805, 'epoch': 26} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.348896802572895, 'r': 0.3519161018259297, 'f1': 0.35039994820389364}, 'combined': 0.23359996546926237, 'epoch': 26} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5535714285714286, 'r': 0.2672413793103448, 'f1': 0.36046511627906974}, 'combined': 0.24031007751937983, 'epoch': 26} ****************************** Epoch: 27 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:18:41.420609: step: 2/470, loss: 0.01683763414621353 2023-01-24 05:18:42.083303: step: 4/470, loss: 0.00944485329091549 2023-01-24 05:18:42.718268: step: 6/470, loss: 0.06548219174146652 2023-01-24 05:18:43.508846: step: 8/470, loss: 0.07034937292337418 2023-01-24 05:18:44.247957: step: 10/470, loss: 0.00018511965754441917 2023-01-24 05:18:44.968187: step: 12/470, loss: 0.007175577338784933 2023-01-24 05:18:45.621435: step: 14/470, loss: 0.0017143557779490948 2023-01-24 05:18:46.360217: step: 16/470, loss: 0.038774389773607254 2023-01-24 05:18:47.132876: step: 18/470, loss: 0.067210853099823 2023-01-24 05:18:47.952636: step: 20/470, loss: 0.01777871884405613 2023-01-24 05:18:48.649445: step: 22/470, loss: 0.00815779808908701 2023-01-24 05:18:49.301150: step: 24/470, loss: 0.00567712401971221 2023-01-24 05:18:50.043873: step: 26/470, loss: 0.022142581641674042 2023-01-24 05:18:50.825935: step: 28/470, loss: 0.009569667279720306 2023-01-24 05:18:51.526954: step: 30/470, loss: 0.0004666670865844935 2023-01-24 05:18:52.296720: step: 32/470, loss: 0.011350602842867374 2023-01-24 05:18:52.983050: step: 34/470, loss: 0.029375847429037094 2023-01-24 05:18:53.677221: step: 36/470, loss: 0.006464695557951927 2023-01-24 05:18:54.400783: step: 38/470, loss: 0.0230893325060606 2023-01-24 05:18:55.108820: step: 40/470, loss: 0.015446262434124947 2023-01-24 05:18:55.823814: step: 42/470, loss: 0.026244178414344788 2023-01-24 05:18:56.504211: step: 44/470, loss: 0.019167501479387283 2023-01-24 05:18:57.327917: step: 46/470, loss: 0.025599118322134018 2023-01-24 05:18:58.128230: step: 48/470, loss: 0.06443053483963013 2023-01-24 05:18:58.879248: step: 50/470, loss: 0.016018331050872803 2023-01-24 05:18:59.723460: step: 52/470, loss: 0.003692210651934147 2023-01-24 05:19:00.452295: step: 54/470, loss: 0.02155778743326664 2023-01-24 05:19:01.183096: step: 56/470, loss: 0.1240386813879013 2023-01-24 05:19:01.861935: step: 58/470, loss: 0.013327186927199364 2023-01-24 05:19:02.586471: step: 60/470, loss: 0.009797775186598301 2023-01-24 05:19:03.292849: step: 62/470, loss: 0.02393781952559948 2023-01-24 05:19:04.052910: step: 64/470, loss: 0.00024999401648528874 2023-01-24 05:19:04.774922: step: 66/470, loss: 0.014125204645097256 2023-01-24 05:19:05.586289: step: 68/470, loss: 0.012959994375705719 2023-01-24 05:19:06.317910: step: 70/470, loss: 0.012555788271129131 2023-01-24 05:19:07.047317: step: 72/470, loss: 0.010418666526675224 2023-01-24 05:19:07.727303: step: 74/470, loss: 0.025468451902270317 2023-01-24 05:19:08.442820: step: 76/470, loss: 0.005443628877401352 2023-01-24 05:19:09.168483: step: 78/470, loss: 0.017685700207948685 2023-01-24 05:19:09.922616: step: 80/470, loss: 0.04424380883574486 2023-01-24 05:19:10.674601: step: 82/470, loss: 0.037951018661260605 2023-01-24 05:19:11.426009: step: 84/470, loss: 0.009052631445229053 2023-01-24 05:19:12.175827: step: 86/470, loss: 0.0344005785882473 2023-01-24 05:19:12.879663: step: 88/470, loss: 0.007054035551846027 2023-01-24 05:19:13.578821: step: 90/470, loss: 0.019252652302384377 2023-01-24 05:19:14.450854: step: 92/470, loss: 0.008761339820921421 2023-01-24 05:19:15.340280: step: 94/470, loss: 0.02975519932806492 2023-01-24 05:19:16.144393: step: 96/470, loss: 0.006972640287131071 2023-01-24 05:19:16.909172: step: 98/470, loss: 0.03771801292896271 2023-01-24 05:19:17.595972: step: 100/470, loss: 0.017774201929569244 2023-01-24 05:19:18.340712: step: 102/470, loss: 0.03576158732175827 2023-01-24 05:19:19.176451: step: 104/470, loss: 0.056759800761938095 2023-01-24 05:19:19.957629: step: 106/470, loss: 0.061209507286548615 2023-01-24 05:19:20.658508: step: 108/470, loss: 0.0349019393324852 2023-01-24 05:19:21.407594: step: 110/470, loss: 0.014727793633937836 2023-01-24 05:19:22.188426: step: 112/470, loss: 0.003246006788685918 2023-01-24 05:19:23.038890: step: 114/470, loss: 0.08207906782627106 2023-01-24 05:19:23.846531: step: 116/470, loss: 0.016705762594938278 2023-01-24 05:19:24.656376: step: 118/470, loss: 0.012947305105626583 2023-01-24 05:19:25.401671: step: 120/470, loss: 0.034973785281181335 2023-01-24 05:19:26.146446: step: 122/470, loss: 0.007712268736213446 2023-01-24 05:19:26.890274: step: 124/470, loss: 0.020008675754070282 2023-01-24 05:19:27.721808: step: 126/470, loss: 0.023369409143924713 2023-01-24 05:19:28.474615: step: 128/470, loss: 0.014888121746480465 2023-01-24 05:19:29.194204: step: 130/470, loss: 0.05595388263463974 2023-01-24 05:19:29.968820: step: 132/470, loss: 0.01942775398492813 2023-01-24 05:19:30.723905: step: 134/470, loss: 0.02803257293999195 2023-01-24 05:19:31.415673: step: 136/470, loss: 0.023012829944491386 2023-01-24 05:19:32.093270: step: 138/470, loss: 0.0042619104497134686 2023-01-24 05:19:32.982242: step: 140/470, loss: 0.007180250249803066 2023-01-24 05:19:33.689418: step: 142/470, loss: 0.019708437845110893 2023-01-24 05:19:34.424213: step: 144/470, loss: 0.0061777918599545956 2023-01-24 05:19:35.211223: step: 146/470, loss: 0.0012747780419886112 2023-01-24 05:19:36.000796: step: 148/470, loss: 0.022044524550437927 2023-01-24 05:19:36.699246: step: 150/470, loss: 0.020499933511018753 2023-01-24 05:19:37.445900: step: 152/470, loss: 0.0026768154930323362 2023-01-24 05:19:38.384670: step: 154/470, loss: 0.010907319374382496 2023-01-24 05:19:39.113871: step: 156/470, loss: 0.005641602911055088 2023-01-24 05:19:39.865294: step: 158/470, loss: 0.013856947422027588 2023-01-24 05:19:40.639643: step: 160/470, loss: 0.010804500430822372 2023-01-24 05:19:41.337711: step: 162/470, loss: 0.015558190643787384 2023-01-24 05:19:42.089651: step: 164/470, loss: 0.3554113507270813 2023-01-24 05:19:42.791359: step: 166/470, loss: 0.12705135345458984 2023-01-24 05:19:43.494907: step: 168/470, loss: 0.02279657870531082 2023-01-24 05:19:44.204556: step: 170/470, loss: 0.07756506651639938 2023-01-24 05:19:44.878753: step: 172/470, loss: 0.012663285247981548 2023-01-24 05:19:45.582087: step: 174/470, loss: 8.404543768847361e-05 2023-01-24 05:19:46.357031: step: 176/470, loss: 0.02814382128417492 2023-01-24 05:19:47.090517: step: 178/470, loss: 0.02191387489438057 2023-01-24 05:19:47.831342: step: 180/470, loss: 0.011857813224196434 2023-01-24 05:19:48.563069: step: 182/470, loss: 0.12916842103004456 2023-01-24 05:19:49.349924: step: 184/470, loss: 0.07631305605173111 2023-01-24 05:19:50.081521: step: 186/470, loss: 0.028871390968561172 2023-01-24 05:19:50.810798: step: 188/470, loss: 0.033751778304576874 2023-01-24 05:19:51.655665: step: 190/470, loss: 0.13906638324260712 2023-01-24 05:19:52.418177: step: 192/470, loss: 0.03259306401014328 2023-01-24 05:19:53.173868: step: 194/470, loss: 0.0015212270664051175 2023-01-24 05:19:53.925079: step: 196/470, loss: 0.02239634282886982 2023-01-24 05:19:54.707299: step: 198/470, loss: 0.06503739953041077 2023-01-24 05:19:55.435270: step: 200/470, loss: 0.4716545343399048 2023-01-24 05:19:56.178891: step: 202/470, loss: 0.02160857617855072 2023-01-24 05:19:56.898997: step: 204/470, loss: 0.004003697074949741 2023-01-24 05:19:57.645264: step: 206/470, loss: 0.007205252069979906 2023-01-24 05:19:58.335279: step: 208/470, loss: 0.01853962056338787 2023-01-24 05:19:59.017307: step: 210/470, loss: 0.004126167390495539 2023-01-24 05:19:59.756079: step: 212/470, loss: 0.026516355574131012 2023-01-24 05:20:00.516186: step: 214/470, loss: 0.013195443898439407 2023-01-24 05:20:01.188725: step: 216/470, loss: 0.02729533053934574 2023-01-24 05:20:01.905198: step: 218/470, loss: 0.016501324251294136 2023-01-24 05:20:02.684303: step: 220/470, loss: 0.011663485318422318 2023-01-24 05:20:03.476031: step: 222/470, loss: 0.16120895743370056 2023-01-24 05:20:04.217236: step: 224/470, loss: 0.00041892650187946856 2023-01-24 05:20:04.993031: step: 226/470, loss: 0.032136738300323486 2023-01-24 05:20:05.780980: step: 228/470, loss: 0.023074127733707428 2023-01-24 05:20:06.519698: step: 230/470, loss: 0.02281912975013256 2023-01-24 05:20:07.318126: step: 232/470, loss: 0.012881905771791935 2023-01-24 05:20:08.090857: step: 234/470, loss: 0.01737522892653942 2023-01-24 05:20:08.894488: step: 236/470, loss: 0.32629257440567017 2023-01-24 05:20:09.658944: step: 238/470, loss: 0.015904588624835014 2023-01-24 05:20:10.485976: step: 240/470, loss: 0.007637233939021826 2023-01-24 05:20:11.162714: step: 242/470, loss: 0.030431710183620453 2023-01-24 05:20:11.898741: step: 244/470, loss: 0.028963802382349968 2023-01-24 05:20:12.677172: step: 246/470, loss: 0.0031187073327600956 2023-01-24 05:20:13.471539: step: 248/470, loss: 0.15134938061237335 2023-01-24 05:20:14.246391: step: 250/470, loss: 0.026609888300299644 2023-01-24 05:20:15.112732: step: 252/470, loss: 0.018683232367038727 2023-01-24 05:20:15.860716: step: 254/470, loss: 0.01902608573436737 2023-01-24 05:20:16.631885: step: 256/470, loss: 0.23490701615810394 2023-01-24 05:20:17.264823: step: 258/470, loss: 0.005870967172086239 2023-01-24 05:20:17.962583: step: 260/470, loss: 0.037582509219646454 2023-01-24 05:20:18.706236: step: 262/470, loss: 0.056500356644392014 2023-01-24 05:20:19.488311: step: 264/470, loss: 0.023657605051994324 2023-01-24 05:20:20.197477: step: 266/470, loss: 0.0007725696195848286 2023-01-24 05:20:20.887949: step: 268/470, loss: 0.08393879979848862 2023-01-24 05:20:21.600020: step: 270/470, loss: 0.02035318873822689 2023-01-24 05:20:22.246266: step: 272/470, loss: 0.003998577129095793 2023-01-24 05:20:22.987914: step: 274/470, loss: 0.008322400040924549 2023-01-24 05:20:23.670217: step: 276/470, loss: 0.0008807040867395699 2023-01-24 05:20:24.434731: step: 278/470, loss: 0.0010697413235902786 2023-01-24 05:20:25.141234: step: 280/470, loss: 0.00225759856402874 2023-01-24 05:20:25.909307: step: 282/470, loss: 0.014795668423175812 2023-01-24 05:20:26.722290: step: 284/470, loss: 0.029663510620594025 2023-01-24 05:20:27.453852: step: 286/470, loss: 0.05489639937877655 2023-01-24 05:20:28.240160: step: 288/470, loss: 0.025471646338701248 2023-01-24 05:20:29.033390: step: 290/470, loss: 0.015530110336840153 2023-01-24 05:20:29.944228: step: 292/470, loss: 0.011148087680339813 2023-01-24 05:20:30.738297: step: 294/470, loss: 0.023910433053970337 2023-01-24 05:20:31.492262: step: 296/470, loss: 0.013895057141780853 2023-01-24 05:20:32.123657: step: 298/470, loss: 0.0005625097546726465 2023-01-24 05:20:32.836050: step: 300/470, loss: 0.05426546931266785 2023-01-24 05:20:33.503024: step: 302/470, loss: 0.00862344354391098 2023-01-24 05:20:34.252062: step: 304/470, loss: 0.060839202255010605 2023-01-24 05:20:34.976095: step: 306/470, loss: 0.003655111650004983 2023-01-24 05:20:35.719752: step: 308/470, loss: 0.039200231432914734 2023-01-24 05:20:36.573642: step: 310/470, loss: 0.04813004285097122 2023-01-24 05:20:37.210755: step: 312/470, loss: 0.0009427457116544247 2023-01-24 05:20:37.954502: step: 314/470, loss: 0.004311168100684881 2023-01-24 05:20:38.721066: step: 316/470, loss: 0.06394602358341217 2023-01-24 05:20:39.397763: step: 318/470, loss: 0.003433618927374482 2023-01-24 05:20:40.139521: step: 320/470, loss: 0.021177219226956367 2023-01-24 05:20:40.861807: step: 322/470, loss: 0.001753106014803052 2023-01-24 05:20:41.596166: step: 324/470, loss: 0.08285431563854218 2023-01-24 05:20:42.390540: step: 326/470, loss: 0.09834257513284683 2023-01-24 05:20:43.155134: step: 328/470, loss: 0.07702438533306122 2023-01-24 05:20:43.863217: step: 330/470, loss: 0.01453570369631052 2023-01-24 05:20:44.600079: step: 332/470, loss: 0.03407316654920578 2023-01-24 05:20:45.310420: step: 334/470, loss: 0.0027093144599348307 2023-01-24 05:20:45.980086: step: 336/470, loss: 0.017348872497677803 2023-01-24 05:20:46.802326: step: 338/470, loss: 0.10025624930858612 2023-01-24 05:20:47.545602: step: 340/470, loss: 0.023405015468597412 2023-01-24 05:20:48.308146: step: 342/470, loss: 0.1289200335741043 2023-01-24 05:20:49.083067: step: 344/470, loss: 0.001318649505265057 2023-01-24 05:20:49.787867: step: 346/470, loss: 0.3252336084842682 2023-01-24 05:20:50.500892: step: 348/470, loss: 0.00015436287503689528 2023-01-24 05:20:51.246644: step: 350/470, loss: 0.12891341745853424 2023-01-24 05:20:52.037984: step: 352/470, loss: 0.012002014555037022 2023-01-24 05:20:52.728902: step: 354/470, loss: 0.008551257662475109 2023-01-24 05:20:53.450963: step: 356/470, loss: 0.007327394559979439 2023-01-24 05:20:54.210871: step: 358/470, loss: 0.007789163384586573 2023-01-24 05:20:54.986401: step: 360/470, loss: 0.4248470962047577 2023-01-24 05:20:55.873639: step: 362/470, loss: 0.011140398681163788 2023-01-24 05:20:56.609776: step: 364/470, loss: 0.0026315178256481886 2023-01-24 05:20:57.398989: step: 366/470, loss: 0.016022127121686935 2023-01-24 05:20:58.141194: step: 368/470, loss: 0.05837252363562584 2023-01-24 05:20:58.832920: step: 370/470, loss: 0.0032597638200968504 2023-01-24 05:20:59.602227: step: 372/470, loss: 0.056031063199043274 2023-01-24 05:21:00.365926: step: 374/470, loss: 0.006611979100853205 2023-01-24 05:21:01.144789: step: 376/470, loss: 0.05569504201412201 2023-01-24 05:21:01.843598: step: 378/470, loss: 0.08907879143953323 2023-01-24 05:21:02.664105: step: 380/470, loss: 0.014300604350864887 2023-01-24 05:21:03.302931: step: 382/470, loss: 0.006313091143965721 2023-01-24 05:21:04.188677: step: 384/470, loss: 0.027734091505408287 2023-01-24 05:21:05.023543: step: 386/470, loss: 0.041512493044137955 2023-01-24 05:21:05.719815: step: 388/470, loss: 0.02768852189183235 2023-01-24 05:21:06.511268: step: 390/470, loss: 0.035742416977882385 2023-01-24 05:21:07.296609: step: 392/470, loss: 0.08417049050331116 2023-01-24 05:21:08.062255: step: 394/470, loss: 0.056899912655353546 2023-01-24 05:21:08.818336: step: 396/470, loss: 0.01473038736730814 2023-01-24 05:21:09.649099: step: 398/470, loss: 0.04913134500384331 2023-01-24 05:21:10.344348: step: 400/470, loss: 0.026948675513267517 2023-01-24 05:21:11.160598: step: 402/470, loss: 0.06986348330974579 2023-01-24 05:21:11.945579: step: 404/470, loss: 0.004675567615777254 2023-01-24 05:21:12.600437: step: 406/470, loss: 0.0008290672558359802 2023-01-24 05:21:13.326103: step: 408/470, loss: 0.008351105265319347 2023-01-24 05:21:14.121456: step: 410/470, loss: 0.03763250261545181 2023-01-24 05:21:14.853950: step: 412/470, loss: 0.020851025357842445 2023-01-24 05:21:15.617966: step: 414/470, loss: 0.06433157622814178 2023-01-24 05:21:16.341372: step: 416/470, loss: 0.005936459172517061 2023-01-24 05:21:17.088545: step: 418/470, loss: 0.046613890677690506 2023-01-24 05:21:17.762576: step: 420/470, loss: 0.019648538902401924 2023-01-24 05:21:18.490497: step: 422/470, loss: 0.010377058759331703 2023-01-24 05:21:19.206175: step: 424/470, loss: 0.03797370567917824 2023-01-24 05:21:20.040694: step: 426/470, loss: 0.10179644823074341 2023-01-24 05:21:20.776085: step: 428/470, loss: 0.042833250015974045 2023-01-24 05:21:21.566846: step: 430/470, loss: 0.13208261132240295 2023-01-24 05:21:22.308900: step: 432/470, loss: 0.09129922091960907 2023-01-24 05:21:23.011398: step: 434/470, loss: 0.0105623509734869 2023-01-24 05:21:23.718604: step: 436/470, loss: 0.006014485843479633 2023-01-24 05:21:24.453501: step: 438/470, loss: 0.014641005545854568 2023-01-24 05:21:25.169094: step: 440/470, loss: 0.012232620269060135 2023-01-24 05:21:25.848426: step: 442/470, loss: 2.0588850020430982e-05 2023-01-24 05:21:26.538250: step: 444/470, loss: 0.01798243820667267 2023-01-24 05:21:27.303897: step: 446/470, loss: 0.010261360555887222 2023-01-24 05:21:27.991744: step: 448/470, loss: 0.032064393162727356 2023-01-24 05:21:28.674234: step: 450/470, loss: 0.002512154169380665 2023-01-24 05:21:29.365324: step: 452/470, loss: 0.016261639073491096 2023-01-24 05:21:30.131666: step: 454/470, loss: 0.05349350348114967 2023-01-24 05:21:30.820238: step: 456/470, loss: 0.07273565232753754 2023-01-24 05:21:31.512861: step: 458/470, loss: 0.014662419445812702 2023-01-24 05:21:32.242996: step: 460/470, loss: 0.04515858367085457 2023-01-24 05:21:32.955872: step: 462/470, loss: 0.0028082530479878187 2023-01-24 05:21:33.656138: step: 464/470, loss: 0.06509249657392502 2023-01-24 05:21:34.403629: step: 466/470, loss: 0.015094724483788013 2023-01-24 05:21:35.122648: step: 468/470, loss: 0.0066069429740309715 2023-01-24 05:21:35.887515: step: 470/470, loss: 0.09069467335939407 2023-01-24 05:21:36.569082: step: 472/470, loss: 0.001958508975803852 2023-01-24 05:21:37.309801: step: 474/470, loss: 0.004850469995290041 2023-01-24 05:21:38.036598: step: 476/470, loss: 0.0013880267506465316 2023-01-24 05:21:38.775670: step: 478/470, loss: 0.07323987036943436 2023-01-24 05:21:39.424876: step: 480/470, loss: 0.18070386350154877 2023-01-24 05:21:40.223867: step: 482/470, loss: 0.0033805707935243845 2023-01-24 05:21:40.934848: step: 484/470, loss: 0.024858374148607254 2023-01-24 05:21:41.563923: step: 486/470, loss: 0.0003526340296957642 2023-01-24 05:21:42.293451: step: 488/470, loss: 0.007729541510343552 2023-01-24 05:21:43.050763: step: 490/470, loss: 0.039696142077445984 2023-01-24 05:21:43.757500: step: 492/470, loss: 0.011434253305196762 2023-01-24 05:21:44.498550: step: 494/470, loss: 0.0433819405734539 2023-01-24 05:21:45.228175: step: 496/470, loss: 0.030899059027433395 2023-01-24 05:21:45.937326: step: 498/470, loss: 0.04821249470114708 2023-01-24 05:21:46.672555: step: 500/470, loss: 0.02131025679409504 2023-01-24 05:21:47.527798: step: 502/470, loss: 0.05077216029167175 2023-01-24 05:21:48.263094: step: 504/470, loss: 0.0022677837405353785 2023-01-24 05:21:49.143962: step: 506/470, loss: 0.026514790952205658 2023-01-24 05:21:50.041851: step: 508/470, loss: 0.043541330844163895 2023-01-24 05:21:50.770108: step: 510/470, loss: 0.010781260207295418 2023-01-24 05:21:51.560258: step: 512/470, loss: 0.02016189508140087 2023-01-24 05:21:52.291469: step: 514/470, loss: 0.043015431612730026 2023-01-24 05:21:53.064886: step: 516/470, loss: 0.018223173916339874 2023-01-24 05:21:53.821581: step: 518/470, loss: 0.01400044560432434 2023-01-24 05:21:54.556887: step: 520/470, loss: 0.11256732046604156 2023-01-24 05:21:55.346988: step: 522/470, loss: 0.06279245764017105 2023-01-24 05:21:56.103032: step: 524/470, loss: 0.03156261146068573 2023-01-24 05:21:56.838646: step: 526/470, loss: 0.47835269570350647 2023-01-24 05:21:57.703828: step: 528/470, loss: 0.00015236996114253998 2023-01-24 05:21:58.462301: step: 530/470, loss: 0.12638600170612335 2023-01-24 05:21:59.245456: step: 532/470, loss: 0.012023607268929482 2023-01-24 05:21:59.984515: step: 534/470, loss: 0.015657739713788033 2023-01-24 05:22:00.651129: step: 536/470, loss: 0.007505686488002539 2023-01-24 05:22:01.365062: step: 538/470, loss: 0.0051700943149626255 2023-01-24 05:22:02.164821: step: 540/470, loss: 0.009726532734930515 2023-01-24 05:22:02.959913: step: 542/470, loss: 0.04598125070333481 2023-01-24 05:22:03.706817: step: 544/470, loss: 0.006266491021960974 2023-01-24 05:22:04.390596: step: 546/470, loss: 0.008492393419146538 2023-01-24 05:22:05.037855: step: 548/470, loss: 0.001625984674319625 2023-01-24 05:22:05.812025: step: 550/470, loss: 0.044714074581861496 2023-01-24 05:22:06.505442: step: 552/470, loss: 0.13521620631217957 2023-01-24 05:22:07.162012: step: 554/470, loss: 0.0015927805798128247 2023-01-24 05:22:07.986175: step: 556/470, loss: 0.04356268420815468 2023-01-24 05:22:08.793814: step: 558/470, loss: 0.051665596663951874 2023-01-24 05:22:09.533588: step: 560/470, loss: 0.028444265946745872 2023-01-24 05:22:10.272232: step: 562/470, loss: 0.041954174637794495 2023-01-24 05:22:11.005085: step: 564/470, loss: 0.022699544206261635 2023-01-24 05:22:11.681163: step: 566/470, loss: 0.02368500828742981 2023-01-24 05:22:12.468557: step: 568/470, loss: 0.04992617666721344 2023-01-24 05:22:13.224097: step: 570/470, loss: 0.0033017806708812714 2023-01-24 05:22:13.904464: step: 572/470, loss: 0.005991379264742136 2023-01-24 05:22:14.642478: step: 574/470, loss: 0.3665614724159241 2023-01-24 05:22:15.395446: step: 576/470, loss: 0.01564880833029747 2023-01-24 05:22:16.196278: step: 578/470, loss: 0.04914989322423935 2023-01-24 05:22:16.917262: step: 580/470, loss: 0.007808396127074957 2023-01-24 05:22:17.655734: step: 582/470, loss: 0.13653407990932465 2023-01-24 05:22:18.359495: step: 584/470, loss: 0.0009268509456887841 2023-01-24 05:22:19.120013: step: 586/470, loss: 0.033205803483724594 2023-01-24 05:22:19.814767: step: 588/470, loss: 0.05496671423316002 2023-01-24 05:22:20.551758: step: 590/470, loss: 0.04230504482984543 2023-01-24 05:22:21.308361: step: 592/470, loss: 0.03090505301952362 2023-01-24 05:22:21.977847: step: 594/470, loss: 0.007094330154359341 2023-01-24 05:22:22.711430: step: 596/470, loss: 0.013848799280822277 2023-01-24 05:22:23.464757: step: 598/470, loss: 0.003194592660292983 2023-01-24 05:22:24.258869: step: 600/470, loss: 0.01055984664708376 2023-01-24 05:22:25.089422: step: 602/470, loss: 0.014651630073785782 2023-01-24 05:22:25.858648: step: 604/470, loss: 0.0857321172952652 2023-01-24 05:22:26.591957: step: 606/470, loss: 0.001349782571196556 2023-01-24 05:22:27.306956: step: 608/470, loss: 0.14607299864292145 2023-01-24 05:22:28.068690: step: 610/470, loss: 0.007140479516237974 2023-01-24 05:22:28.795884: step: 612/470, loss: 0.0029003897216171026 2023-01-24 05:22:29.466952: step: 614/470, loss: 0.008922108449041843 2023-01-24 05:22:30.230664: step: 616/470, loss: 0.009059912525117397 2023-01-24 05:22:30.956201: step: 618/470, loss: 0.03438716009259224 2023-01-24 05:22:31.674833: step: 620/470, loss: 0.039528436958789825 2023-01-24 05:22:32.338768: step: 622/470, loss: 0.1352343112230301 2023-01-24 05:22:33.099807: step: 624/470, loss: 0.06945229321718216 2023-01-24 05:22:33.879736: step: 626/470, loss: 0.001418177504092455 2023-01-24 05:22:34.563855: step: 628/470, loss: 0.030531620606780052 2023-01-24 05:22:35.305912: step: 630/470, loss: 0.00512252002954483 2023-01-24 05:22:36.053562: step: 632/470, loss: 0.07616572082042694 2023-01-24 05:22:36.745070: step: 634/470, loss: 0.004690864589065313 2023-01-24 05:22:37.398343: step: 636/470, loss: 0.6737900376319885 2023-01-24 05:22:38.175128: step: 638/470, loss: 0.02247127704322338 2023-01-24 05:22:38.974513: step: 640/470, loss: 0.04330654442310333 2023-01-24 05:22:39.713403: step: 642/470, loss: 0.02554977498948574 2023-01-24 05:22:40.451068: step: 644/470, loss: 0.05359509587287903 2023-01-24 05:22:41.170008: step: 646/470, loss: 0.04412570595741272 2023-01-24 05:22:41.916875: step: 648/470, loss: 0.03207087889313698 2023-01-24 05:22:42.661702: step: 650/470, loss: 0.03689567372202873 2023-01-24 05:22:43.322248: step: 652/470, loss: 0.01859690062701702 2023-01-24 05:22:44.054680: step: 654/470, loss: 0.026259824633598328 2023-01-24 05:22:44.765810: step: 656/470, loss: 0.0039988174103200436 2023-01-24 05:22:45.557258: step: 658/470, loss: 0.02299419976770878 2023-01-24 05:22:46.264345: step: 660/470, loss: 0.020622316747903824 2023-01-24 05:22:46.971443: step: 662/470, loss: 0.029058068990707397 2023-01-24 05:22:47.762116: step: 664/470, loss: 0.002780361333861947 2023-01-24 05:22:48.517877: step: 666/470, loss: 0.0022796066477894783 2023-01-24 05:22:49.253917: step: 668/470, loss: 0.03231862932443619 2023-01-24 05:22:50.034272: step: 670/470, loss: 0.05333375930786133 2023-01-24 05:22:50.856700: step: 672/470, loss: 0.006049145944416523 2023-01-24 05:22:51.590387: step: 674/470, loss: 0.011256158351898193 2023-01-24 05:22:52.282949: step: 676/470, loss: 0.008002914488315582 2023-01-24 05:22:53.048128: step: 678/470, loss: 0.022282328456640244 2023-01-24 05:22:53.694649: step: 680/470, loss: 0.001529976725578308 2023-01-24 05:22:54.413082: step: 682/470, loss: 0.4798831641674042 2023-01-24 05:22:55.204875: step: 684/470, loss: 0.012694069184362888 2023-01-24 05:22:56.036332: step: 686/470, loss: 0.2012556791305542 2023-01-24 05:22:56.739998: step: 688/470, loss: 0.3873967230319977 2023-01-24 05:22:57.563337: step: 690/470, loss: 0.0851685032248497 2023-01-24 05:22:58.333004: step: 692/470, loss: 0.11975271999835968 2023-01-24 05:22:59.053963: step: 694/470, loss: 0.009530964307487011 2023-01-24 05:22:59.733787: step: 696/470, loss: 0.046175580471754074 2023-01-24 05:23:00.468449: step: 698/470, loss: 1.4199903011322021 2023-01-24 05:23:01.217005: step: 700/470, loss: 0.004222056828439236 2023-01-24 05:23:01.910024: step: 702/470, loss: 0.004082622472196817 2023-01-24 05:23:02.705251: step: 704/470, loss: 0.04654672369360924 2023-01-24 05:23:03.464369: step: 706/470, loss: 0.11172907054424286 2023-01-24 05:23:04.248132: step: 708/470, loss: 0.0367024689912796 2023-01-24 05:23:04.976864: step: 710/470, loss: 8.773482841206715e-05 2023-01-24 05:23:05.753507: step: 712/470, loss: 0.01656370982527733 2023-01-24 05:23:06.471512: step: 714/470, loss: 0.012438789010047913 2023-01-24 05:23:07.200449: step: 716/470, loss: 0.009644479490816593 2023-01-24 05:23:07.904203: step: 718/470, loss: 0.013749388046562672 2023-01-24 05:23:08.642852: step: 720/470, loss: 0.023235702887177467 2023-01-24 05:23:09.422786: step: 722/470, loss: 0.07161347568035126 2023-01-24 05:23:10.259485: step: 724/470, loss: 0.043580781668424606 2023-01-24 05:23:11.034088: step: 726/470, loss: 0.05737774074077606 2023-01-24 05:23:11.776524: step: 728/470, loss: 0.0016026493394747376 2023-01-24 05:23:12.562718: step: 730/470, loss: 0.057738371193408966 2023-01-24 05:23:13.332865: step: 732/470, loss: 0.00018781011749524623 2023-01-24 05:23:14.210494: step: 734/470, loss: 0.03806695714592934 2023-01-24 05:23:14.935150: step: 736/470, loss: 0.06477882713079453 2023-01-24 05:23:15.680135: step: 738/470, loss: 0.0006179798510856926 2023-01-24 05:23:16.470539: step: 740/470, loss: 0.047551143914461136 2023-01-24 05:23:17.225203: step: 742/470, loss: 0.47022610902786255 2023-01-24 05:23:18.008265: step: 744/470, loss: 0.02439289726316929 2023-01-24 05:23:18.736152: step: 746/470, loss: 0.023850999772548676 2023-01-24 05:23:19.547112: step: 748/470, loss: 0.13051940500736237 2023-01-24 05:23:20.236485: step: 750/470, loss: 0.011319885030388832 2023-01-24 05:23:21.014469: step: 752/470, loss: 0.05051903426647186 2023-01-24 05:23:21.684086: step: 754/470, loss: 0.05477369949221611 2023-01-24 05:23:22.324448: step: 756/470, loss: 0.02628343366086483 2023-01-24 05:23:23.080044: step: 758/470, loss: 0.006637097802013159 2023-01-24 05:23:23.827096: step: 760/470, loss: 0.07080532610416412 2023-01-24 05:23:24.519249: step: 762/470, loss: 0.03669466823339462 2023-01-24 05:23:25.195287: step: 764/470, loss: 0.0033256318420171738 2023-01-24 05:23:25.881011: step: 766/470, loss: 0.11121101677417755 2023-01-24 05:23:26.603468: step: 768/470, loss: 0.006534420885145664 2023-01-24 05:23:27.357704: step: 770/470, loss: 0.03840123862028122 2023-01-24 05:23:28.112595: step: 772/470, loss: 0.015428757295012474 2023-01-24 05:23:28.854784: step: 774/470, loss: 0.30759334564208984 2023-01-24 05:23:29.690484: step: 776/470, loss: 0.013859348371624947 2023-01-24 05:23:30.353516: step: 778/470, loss: 0.02122689038515091 2023-01-24 05:23:31.149485: step: 780/470, loss: 0.03834621235728264 2023-01-24 05:23:31.869025: step: 782/470, loss: 0.5577226281166077 2023-01-24 05:23:32.579724: step: 784/470, loss: 0.01965031400322914 2023-01-24 05:23:33.377637: step: 786/470, loss: 0.016025938093662262 2023-01-24 05:23:34.140066: step: 788/470, loss: 0.017744455486536026 2023-01-24 05:23:34.872095: step: 790/470, loss: 0.006757447961717844 2023-01-24 05:23:35.641000: step: 792/470, loss: 0.07866999506950378 2023-01-24 05:23:36.532258: step: 794/470, loss: 0.006022907793521881 2023-01-24 05:23:37.299546: step: 796/470, loss: 0.0842958465218544 2023-01-24 05:23:38.011689: step: 798/470, loss: 0.00013147601566743106 2023-01-24 05:23:38.735243: step: 800/470, loss: 0.002207849407568574 2023-01-24 05:23:39.541420: step: 802/470, loss: 0.011025402694940567 2023-01-24 05:23:40.282325: step: 804/470, loss: 0.07100294530391693 2023-01-24 05:23:41.017592: step: 806/470, loss: 0.12755174934864044 2023-01-24 05:23:41.752444: step: 808/470, loss: 0.0014298518653959036 2023-01-24 05:23:42.504729: step: 810/470, loss: 0.07408328354358673 2023-01-24 05:23:43.180100: step: 812/470, loss: 0.0061090909875929356 2023-01-24 05:23:43.959748: step: 814/470, loss: 0.018763698637485504 2023-01-24 05:23:44.654254: step: 816/470, loss: 0.004722012206912041 2023-01-24 05:23:45.479907: step: 818/470, loss: 0.10819140076637268 2023-01-24 05:23:46.246704: step: 820/470, loss: 0.013299317099153996 2023-01-24 05:23:46.996861: step: 822/470, loss: 0.23392438888549805 2023-01-24 05:23:47.672882: step: 824/470, loss: 0.018688471987843513 2023-01-24 05:23:48.408935: step: 826/470, loss: 0.05231665074825287 2023-01-24 05:23:49.203969: step: 828/470, loss: 0.05416341871023178 2023-01-24 05:23:49.981864: step: 830/470, loss: 0.0048498413525521755 2023-01-24 05:23:50.691097: step: 832/470, loss: 0.056482456624507904 2023-01-24 05:23:51.481288: step: 834/470, loss: 0.06690236181020737 2023-01-24 05:23:52.160590: step: 836/470, loss: 0.04062666743993759 2023-01-24 05:23:52.855417: step: 838/470, loss: 0.04290313646197319 2023-01-24 05:23:53.597949: step: 840/470, loss: 0.06305599957704544 2023-01-24 05:23:54.362195: step: 842/470, loss: 9.941688537597656 2023-01-24 05:23:55.100766: step: 844/470, loss: 0.060064323246479034 2023-01-24 05:23:55.897210: step: 846/470, loss: 0.054962724447250366 2023-01-24 05:23:56.664912: step: 848/470, loss: 0.0005881677498109639 2023-01-24 05:23:57.472208: step: 850/470, loss: 0.016850154846906662 2023-01-24 05:23:58.206790: step: 852/470, loss: 0.04970991238951683 2023-01-24 05:23:59.076170: step: 854/470, loss: 0.22735270857810974 2023-01-24 05:23:59.847863: step: 856/470, loss: 0.021856531500816345 2023-01-24 05:24:00.513582: step: 858/470, loss: 0.06958413124084473 2023-01-24 05:24:01.214787: step: 860/470, loss: 0.010652045719325542 2023-01-24 05:24:01.992522: step: 862/470, loss: 0.0281230416148901 2023-01-24 05:24:02.718317: step: 864/470, loss: 0.0192283783107996 2023-01-24 05:24:03.389897: step: 866/470, loss: 0.014173178002238274 2023-01-24 05:24:04.126165: step: 868/470, loss: 0.02912173420190811 2023-01-24 05:24:04.845794: step: 870/470, loss: 0.011722569353878498 2023-01-24 05:24:05.630992: step: 872/470, loss: 0.04161560535430908 2023-01-24 05:24:06.389374: step: 874/470, loss: 0.19505290687084198 2023-01-24 05:24:07.076881: step: 876/470, loss: 0.03534523397684097 2023-01-24 05:24:07.779251: step: 878/470, loss: 0.05336989462375641 2023-01-24 05:24:08.451398: step: 880/470, loss: 0.0025530781131237745 2023-01-24 05:24:09.201372: step: 882/470, loss: 0.016439758241176605 2023-01-24 05:24:10.003530: step: 884/470, loss: 0.006466133054345846 2023-01-24 05:24:10.777912: step: 886/470, loss: 0.01774168759584427 2023-01-24 05:24:11.472983: step: 888/470, loss: 0.04091699793934822 2023-01-24 05:24:12.185918: step: 890/470, loss: 0.1776171773672104 2023-01-24 05:24:13.029483: step: 892/470, loss: 0.11639781296253204 2023-01-24 05:24:13.718496: step: 894/470, loss: 0.01566343568265438 2023-01-24 05:24:14.487969: step: 896/470, loss: 0.024013573303818703 2023-01-24 05:24:15.275739: step: 898/470, loss: 0.004490656778216362 2023-01-24 05:24:16.148947: step: 900/470, loss: 0.0408441387116909 2023-01-24 05:24:16.824226: step: 902/470, loss: 0.08740266412496567 2023-01-24 05:24:17.555459: step: 904/470, loss: 0.026669248938560486 2023-01-24 05:24:18.418919: step: 906/470, loss: 0.037839457392692566 2023-01-24 05:24:19.177541: step: 908/470, loss: 0.19800342619419098 2023-01-24 05:24:19.862642: step: 910/470, loss: 0.007794266100972891 2023-01-24 05:24:20.569719: step: 912/470, loss: 0.36751270294189453 2023-01-24 05:24:21.295619: step: 914/470, loss: 0.5664834976196289 2023-01-24 05:24:22.052241: step: 916/470, loss: 0.021370526403188705 2023-01-24 05:24:22.820431: step: 918/470, loss: 0.010546115227043629 2023-01-24 05:24:23.614281: step: 920/470, loss: 0.019952479749917984 2023-01-24 05:24:24.386699: step: 922/470, loss: 0.03363412991166115 2023-01-24 05:24:25.181797: step: 924/470, loss: 0.018996473401784897 2023-01-24 05:24:26.038137: step: 926/470, loss: 0.052708521485328674 2023-01-24 05:24:26.685107: step: 928/470, loss: 0.3829970955848694 2023-01-24 05:24:27.460892: step: 930/470, loss: 0.05672343447804451 2023-01-24 05:24:28.260008: step: 932/470, loss: 0.19621804356575012 2023-01-24 05:24:29.014041: step: 934/470, loss: 0.040584880858659744 2023-01-24 05:24:29.721427: step: 936/470, loss: 0.04768889769911766 2023-01-24 05:24:30.456517: step: 938/470, loss: 0.30375567078590393 2023-01-24 05:24:31.248161: step: 940/470, loss: 0.2606930136680603 2023-01-24 05:24:31.907444: step: 942/470, loss: 0.0006679189973510802 ================================================== Loss: 0.073 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34753635103830566, 'r': 0.31258487740447227, 'f1': 0.32913532545885493}, 'combined': 0.2425207661275773, 'epoch': 27} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35249201204432823, 'r': 0.32808871890279784, 'f1': 0.3398528562339738}, 'combined': 0.22656857082264917, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3464249633371354, 'r': 0.3122426140135471, 'f1': 0.32844682152722415}, 'combined': 0.24201344744111253, 'epoch': 27} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3670685840224523, 'r': 0.3413031930285686, 'f1': 0.3537173101641369}, 'combined': 0.23581154010942454, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3343202589489718, 'r': 0.32036381550138665, 'f1': 0.3271932766845557}, 'combined': 0.24108978282019894, 'epoch': 27} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3562257224703397, 'r': 0.33944201054625633, 'f1': 0.34763140420296074}, 'combined': 0.23175426946864044, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3046875, 'r': 0.2785714285714286, 'f1': 0.291044776119403}, 'combined': 0.19402985074626866, 'epoch': 27} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6785714285714286, 'r': 0.41304347826086957, 'f1': 0.5135135135135135}, 'combined': 0.3423423423423423, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3260774491094148, 'r': 0.32422122074636306, 'f1': 0.3251466856961624}, 'combined': 0.2395817684076986, 'epoch': 19} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35730778004987124, 'r': 0.3418473472592518, 'f1': 0.34940662520847365}, 'combined': 0.23293775013898238, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.35714285714285715, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32338929219600726, 'r': 0.33811669829222013, 'f1': 0.3305890538033395}, 'combined': 0.24359193438140805, 'epoch': 26} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.348896802572895, 'r': 0.3519161018259297, 'f1': 0.35039994820389364}, 'combined': 0.23359996546926237, 'epoch': 26} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5535714285714286, 'r': 0.2672413793103448, 'f1': 0.36046511627906974}, 'combined': 0.24031007751937983, 'epoch': 26} ****************************** Epoch: 28 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:27:08.855743: step: 2/470, loss: 0.0020869718864560127 2023-01-24 05:27:09.708219: step: 4/470, loss: 0.0639810636639595 2023-01-24 05:27:10.515961: step: 6/470, loss: 0.009995676577091217 2023-01-24 05:27:11.218191: step: 8/470, loss: 0.009512209333479404 2023-01-24 05:27:12.025349: step: 10/470, loss: 0.006158136297017336 2023-01-24 05:27:12.731579: step: 12/470, loss: 0.007754956372082233 2023-01-24 05:27:13.401718: step: 14/470, loss: 0.0035998544190078974 2023-01-24 05:27:14.131124: step: 16/470, loss: 0.015439898706972599 2023-01-24 05:27:14.885635: step: 18/470, loss: 0.014050621539354324 2023-01-24 05:27:15.596334: step: 20/470, loss: 0.04366869106888771 2023-01-24 05:27:16.439303: step: 22/470, loss: 0.014802333898842335 2023-01-24 05:27:17.200540: step: 24/470, loss: 0.03355008736252785 2023-01-24 05:27:17.913387: step: 26/470, loss: 0.0021390863694250584 2023-01-24 05:27:18.576073: step: 28/470, loss: 0.017872659489512444 2023-01-24 05:27:19.358156: step: 30/470, loss: 0.0009868554770946503 2023-01-24 05:27:20.116418: step: 32/470, loss: 0.007105813827365637 2023-01-24 05:27:20.849031: step: 34/470, loss: 0.020172271877527237 2023-01-24 05:27:21.595245: step: 36/470, loss: 0.058939579874277115 2023-01-24 05:27:22.327791: step: 38/470, loss: 0.08055589348077774 2023-01-24 05:27:23.051672: step: 40/470, loss: 0.053452666848897934 2023-01-24 05:27:23.781184: step: 42/470, loss: 0.018411824479699135 2023-01-24 05:27:24.551918: step: 44/470, loss: 0.018836546689271927 2023-01-24 05:27:25.248493: step: 46/470, loss: 0.001567376428283751 2023-01-24 05:27:25.951416: step: 48/470, loss: 0.002337083453312516 2023-01-24 05:27:26.667704: step: 50/470, loss: 0.2951555848121643 2023-01-24 05:27:27.319049: step: 52/470, loss: 0.01612759754061699 2023-01-24 05:27:28.169048: step: 54/470, loss: 0.011461992748081684 2023-01-24 05:27:28.920981: step: 56/470, loss: 0.0009162522037513554 2023-01-24 05:27:29.619221: step: 58/470, loss: 0.017956262454390526 2023-01-24 05:27:30.382897: step: 60/470, loss: 0.167605459690094 2023-01-24 05:27:31.117417: step: 62/470, loss: 0.0007975992048159242 2023-01-24 05:27:31.834265: step: 64/470, loss: 0.00465493556112051 2023-01-24 05:27:32.513620: step: 66/470, loss: 0.0031678981613367796 2023-01-24 05:27:33.211333: step: 68/470, loss: 0.011101577430963516 2023-01-24 05:27:34.005715: step: 70/470, loss: 0.04563984274864197 2023-01-24 05:27:34.740703: step: 72/470, loss: 0.014584038406610489 2023-01-24 05:27:35.484521: step: 74/470, loss: 0.000692845496814698 2023-01-24 05:27:36.311557: step: 76/470, loss: 0.003819751553237438 2023-01-24 05:27:37.007807: step: 78/470, loss: 0.007177893538028002 2023-01-24 05:27:37.801229: step: 80/470, loss: 0.013194219209253788 2023-01-24 05:27:38.534148: step: 82/470, loss: 0.005768152419477701 2023-01-24 05:27:39.276866: step: 84/470, loss: 0.008911560289561749 2023-01-24 05:27:40.080922: step: 86/470, loss: 0.029109152033925056 2023-01-24 05:27:40.793378: step: 88/470, loss: 0.0036423057317733765 2023-01-24 05:27:41.512210: step: 90/470, loss: 0.01244268286973238 2023-01-24 05:27:42.199591: step: 92/470, loss: 0.03755531460046768 2023-01-24 05:27:42.913659: step: 94/470, loss: 0.007035805378109217 2023-01-24 05:27:43.655325: step: 96/470, loss: 0.0064859106205403805 2023-01-24 05:27:44.335727: step: 98/470, loss: 0.000647165987174958 2023-01-24 05:27:45.018360: step: 100/470, loss: 0.014123012311756611 2023-01-24 05:27:45.747575: step: 102/470, loss: 0.0009315513307228684 2023-01-24 05:27:46.409719: step: 104/470, loss: 0.03605140745639801 2023-01-24 05:27:47.150097: step: 106/470, loss: 0.08731332421302795 2023-01-24 05:27:47.878925: step: 108/470, loss: 0.008800004608929157 2023-01-24 05:27:48.589881: step: 110/470, loss: 0.029222659766674042 2023-01-24 05:27:49.374666: step: 112/470, loss: 0.03977571055293083 2023-01-24 05:27:50.119120: step: 114/470, loss: 0.00808743480592966 2023-01-24 05:27:50.882733: step: 116/470, loss: 0.030444975942373276 2023-01-24 05:27:51.617374: step: 118/470, loss: 0.0036042286083102226 2023-01-24 05:27:52.272078: step: 120/470, loss: 0.001594691420905292 2023-01-24 05:27:53.002074: step: 122/470, loss: 0.034792497754096985 2023-01-24 05:27:53.747504: step: 124/470, loss: 0.003342484124004841 2023-01-24 05:27:54.434969: step: 126/470, loss: 0.003620708826929331 2023-01-24 05:27:55.139323: step: 128/470, loss: 0.058778248727321625 2023-01-24 05:27:55.804306: step: 130/470, loss: 0.09761997312307358 2023-01-24 05:27:56.530777: step: 132/470, loss: 0.042754001915454865 2023-01-24 05:27:57.193269: step: 134/470, loss: 0.018591521307826042 2023-01-24 05:27:57.933846: step: 136/470, loss: 0.02833203226327896 2023-01-24 05:27:58.692063: step: 138/470, loss: 0.06880441308021545 2023-01-24 05:27:59.405826: step: 140/470, loss: 0.03585517033934593 2023-01-24 05:28:00.201232: step: 142/470, loss: 0.004842748399823904 2023-01-24 05:28:00.891472: step: 144/470, loss: 0.012239599600434303 2023-01-24 05:28:01.688757: step: 146/470, loss: 0.0006771074840798974 2023-01-24 05:28:02.442193: step: 148/470, loss: 0.005776724312454462 2023-01-24 05:28:03.161002: step: 150/470, loss: 0.0004973204340785742 2023-01-24 05:28:03.833602: step: 152/470, loss: 0.06911028921604156 2023-01-24 05:28:04.630868: step: 154/470, loss: 0.010073547251522541 2023-01-24 05:28:05.352800: step: 156/470, loss: 0.0016500890487805009 2023-01-24 05:28:06.033057: step: 158/470, loss: 0.07199298590421677 2023-01-24 05:28:06.729900: step: 160/470, loss: 0.0059176781214773655 2023-01-24 05:28:07.433341: step: 162/470, loss: 0.006079540122300386 2023-01-24 05:28:08.154426: step: 164/470, loss: 0.16469572484493256 2023-01-24 05:28:08.920928: step: 166/470, loss: 0.08042943477630615 2023-01-24 05:28:09.599403: step: 168/470, loss: 0.07466889917850494 2023-01-24 05:28:10.325441: step: 170/470, loss: 0.02825554646551609 2023-01-24 05:28:11.026895: step: 172/470, loss: 0.07861734926700592 2023-01-24 05:28:11.771064: step: 174/470, loss: 0.012655568309128284 2023-01-24 05:28:12.488991: step: 176/470, loss: 0.4837439954280853 2023-01-24 05:28:13.242020: step: 178/470, loss: 0.015461144037544727 2023-01-24 05:28:13.990052: step: 180/470, loss: 0.04108460992574692 2023-01-24 05:28:14.736139: step: 182/470, loss: 0.0002883071720134467 2023-01-24 05:28:15.373978: step: 184/470, loss: 0.0014073270140215755 2023-01-24 05:28:16.143172: step: 186/470, loss: 0.0024875374510884285 2023-01-24 05:28:16.895296: step: 188/470, loss: 0.08386242389678955 2023-01-24 05:28:17.574377: step: 190/470, loss: 0.004048591013997793 2023-01-24 05:28:18.213877: step: 192/470, loss: 0.0006271903403103352 2023-01-24 05:28:19.032172: step: 194/470, loss: 0.003816502168774605 2023-01-24 05:28:19.803568: step: 196/470, loss: 0.04496389627456665 2023-01-24 05:28:20.557371: step: 198/470, loss: 0.06660241633653641 2023-01-24 05:28:21.290933: step: 200/470, loss: 0.011314080096781254 2023-01-24 05:28:22.090978: step: 202/470, loss: 0.010811764746904373 2023-01-24 05:28:22.784066: step: 204/470, loss: 0.001625021337531507 2023-01-24 05:28:23.484289: step: 206/470, loss: 0.07973671704530716 2023-01-24 05:28:24.281151: step: 208/470, loss: 0.016757629811763763 2023-01-24 05:28:24.990159: step: 210/470, loss: 0.014848452992737293 2023-01-24 05:28:25.747740: step: 212/470, loss: 0.02553386054933071 2023-01-24 05:28:26.429257: step: 214/470, loss: 0.06657985597848892 2023-01-24 05:28:27.218694: step: 216/470, loss: 0.00882027018815279 2023-01-24 05:28:27.971244: step: 218/470, loss: 0.015855491161346436 2023-01-24 05:28:28.709190: step: 220/470, loss: 0.023395488038659096 2023-01-24 05:28:29.398919: step: 222/470, loss: 0.08313391357660294 2023-01-24 05:28:30.111243: step: 224/470, loss: 0.002644822234287858 2023-01-24 05:28:30.797238: step: 226/470, loss: 0.4776703119277954 2023-01-24 05:28:31.542760: step: 228/470, loss: 0.033827830106019974 2023-01-24 05:28:32.296308: step: 230/470, loss: 0.002036722842603922 2023-01-24 05:28:33.058599: step: 232/470, loss: 0.07246463000774384 2023-01-24 05:28:33.897732: step: 234/470, loss: 2.729046583175659 2023-01-24 05:28:34.660136: step: 236/470, loss: 0.0005667444784194231 2023-01-24 05:28:35.398109: step: 238/470, loss: 0.010163289494812489 2023-01-24 05:28:36.122504: step: 240/470, loss: 0.040548261255025864 2023-01-24 05:28:36.873216: step: 242/470, loss: 0.07027406245470047 2023-01-24 05:28:37.680398: step: 244/470, loss: 0.045413631945848465 2023-01-24 05:28:38.390411: step: 246/470, loss: 0.018411103636026382 2023-01-24 05:28:39.040188: step: 248/470, loss: 0.0008637277642264962 2023-01-24 05:28:39.782077: step: 250/470, loss: 0.02072082832455635 2023-01-24 05:28:40.541208: step: 252/470, loss: 0.03055809810757637 2023-01-24 05:28:41.266079: step: 254/470, loss: 0.015467851422727108 2023-01-24 05:28:41.994048: step: 256/470, loss: 0.003622818971052766 2023-01-24 05:28:42.767830: step: 258/470, loss: 0.007900088094174862 2023-01-24 05:28:43.445932: step: 260/470, loss: 0.0008739789482206106 2023-01-24 05:28:44.120360: step: 262/470, loss: 0.07829133421182632 2023-01-24 05:28:44.858258: step: 264/470, loss: 0.013694636523723602 2023-01-24 05:28:45.537185: step: 266/470, loss: 0.15331532061100006 2023-01-24 05:28:46.240019: step: 268/470, loss: 0.008623996749520302 2023-01-24 05:28:46.917529: step: 270/470, loss: 0.014343681745231152 2023-01-24 05:28:47.686637: step: 272/470, loss: 0.00040280522080138326 2023-01-24 05:28:48.379115: step: 274/470, loss: 0.09008922427892685 2023-01-24 05:28:49.125404: step: 276/470, loss: 0.016768047586083412 2023-01-24 05:28:49.909532: step: 278/470, loss: 0.00032845677924342453 2023-01-24 05:28:50.709478: step: 280/470, loss: 0.02407873421907425 2023-01-24 05:28:51.462084: step: 282/470, loss: 0.07986540347337723 2023-01-24 05:28:52.175422: step: 284/470, loss: 0.03831029310822487 2023-01-24 05:28:52.858007: step: 286/470, loss: 0.00768911000341177 2023-01-24 05:28:53.555979: step: 288/470, loss: 0.14820675551891327 2023-01-24 05:28:54.257372: step: 290/470, loss: 0.06754887849092484 2023-01-24 05:28:55.027365: step: 292/470, loss: 0.03733934462070465 2023-01-24 05:28:55.745389: step: 294/470, loss: 0.03799036517739296 2023-01-24 05:28:56.445040: step: 296/470, loss: 0.002790941623970866 2023-01-24 05:28:57.256342: step: 298/470, loss: 0.010808521881699562 2023-01-24 05:28:58.003573: step: 300/470, loss: 0.010266797617077827 2023-01-24 05:28:58.684531: step: 302/470, loss: 0.0021426889579743147 2023-01-24 05:28:59.436982: step: 304/470, loss: 0.006693427916616201 2023-01-24 05:29:00.192065: step: 306/470, loss: 0.012375026941299438 2023-01-24 05:29:00.981369: step: 308/470, loss: 0.0827789232134819 2023-01-24 05:29:01.660015: step: 310/470, loss: 0.07285638898611069 2023-01-24 05:29:02.434735: step: 312/470, loss: 0.0009760952088981867 2023-01-24 05:29:03.131674: step: 314/470, loss: 0.006536416243761778 2023-01-24 05:29:03.994700: step: 316/470, loss: 0.2650093734264374 2023-01-24 05:29:04.702454: step: 318/470, loss: 0.02324749529361725 2023-01-24 05:29:05.487091: step: 320/470, loss: 0.007267648819833994 2023-01-24 05:29:06.194920: step: 322/470, loss: 0.02628829888999462 2023-01-24 05:29:06.895514: step: 324/470, loss: 0.023430757224559784 2023-01-24 05:29:07.605449: step: 326/470, loss: 0.019561942666769028 2023-01-24 05:29:08.305707: step: 328/470, loss: 0.0054289596155285835 2023-01-24 05:29:09.020453: step: 330/470, loss: 0.015765221789479256 2023-01-24 05:29:09.755750: step: 332/470, loss: 0.18133984506130219 2023-01-24 05:29:10.438540: step: 334/470, loss: 0.018263421952724457 2023-01-24 05:29:11.129351: step: 336/470, loss: 0.060671113431453705 2023-01-24 05:29:11.888435: step: 338/470, loss: 0.07180456817150116 2023-01-24 05:29:12.649959: step: 340/470, loss: 0.005747984629124403 2023-01-24 05:29:13.416313: step: 342/470, loss: 0.29718539118766785 2023-01-24 05:29:14.175351: step: 344/470, loss: 0.0482538603246212 2023-01-24 05:29:14.944365: step: 346/470, loss: 0.05792737379670143 2023-01-24 05:29:15.702448: step: 348/470, loss: 0.12915441393852234 2023-01-24 05:29:16.370566: step: 350/470, loss: 0.017134780064225197 2023-01-24 05:29:17.047256: step: 352/470, loss: 0.037275586277246475 2023-01-24 05:29:17.832641: step: 354/470, loss: 0.010397748090326786 2023-01-24 05:29:18.545130: step: 356/470, loss: 0.008629174903035164 2023-01-24 05:29:19.207800: step: 358/470, loss: 0.001150502823293209 2023-01-24 05:29:20.047349: step: 360/470, loss: 0.03983455151319504 2023-01-24 05:29:20.860436: step: 362/470, loss: 0.18434298038482666 2023-01-24 05:29:21.560058: step: 364/470, loss: 0.009704058058559895 2023-01-24 05:29:22.301113: step: 366/470, loss: 0.04657233878970146 2023-01-24 05:29:22.999921: step: 368/470, loss: 14.60478687286377 2023-01-24 05:29:23.732802: step: 370/470, loss: 0.02620551362633705 2023-01-24 05:29:24.431234: step: 372/470, loss: 0.0038664399180561304 2023-01-24 05:29:25.160892: step: 374/470, loss: 0.005592767149209976 2023-01-24 05:29:25.974365: step: 376/470, loss: 0.0027753994800150394 2023-01-24 05:29:26.665393: step: 378/470, loss: 0.017724450677633286 2023-01-24 05:29:27.452717: step: 380/470, loss: 0.015447490848600864 2023-01-24 05:29:28.309974: step: 382/470, loss: 0.03897276520729065 2023-01-24 05:29:29.019703: step: 384/470, loss: 0.0031961435452103615 2023-01-24 05:29:29.725813: step: 386/470, loss: 0.022479455918073654 2023-01-24 05:29:30.443028: step: 388/470, loss: 0.03246815502643585 2023-01-24 05:29:31.178689: step: 390/470, loss: 0.011416290886700153 2023-01-24 05:29:31.919818: step: 392/470, loss: 0.0019335534889250994 2023-01-24 05:29:32.573445: step: 394/470, loss: 0.016625449061393738 2023-01-24 05:29:33.215805: step: 396/470, loss: 0.0012744449777528644 2023-01-24 05:29:33.921629: step: 398/470, loss: 0.02477888949215412 2023-01-24 05:29:34.705957: step: 400/470, loss: 0.2593715786933899 2023-01-24 05:29:35.479439: step: 402/470, loss: 0.019976742565631866 2023-01-24 05:29:36.211316: step: 404/470, loss: 0.07440569996833801 2023-01-24 05:29:36.961433: step: 406/470, loss: 0.06923633068799973 2023-01-24 05:29:37.691504: step: 408/470, loss: 0.0011120573617517948 2023-01-24 05:29:38.396232: step: 410/470, loss: 0.003188740462064743 2023-01-24 05:29:39.131217: step: 412/470, loss: 0.4376097023487091 2023-01-24 05:29:39.827694: step: 414/470, loss: 0.0028341033030301332 2023-01-24 05:29:40.546285: step: 416/470, loss: 0.0008696450968272984 2023-01-24 05:29:41.248934: step: 418/470, loss: 0.008818319998681545 2023-01-24 05:29:42.017674: step: 420/470, loss: 0.0027629744727164507 2023-01-24 05:29:42.709752: step: 422/470, loss: 0.0039850943721830845 2023-01-24 05:29:43.497509: step: 424/470, loss: 0.035751424729824066 2023-01-24 05:29:44.222092: step: 426/470, loss: 0.020366976037621498 2023-01-24 05:29:45.026026: step: 428/470, loss: 0.2410111129283905 2023-01-24 05:29:45.770869: step: 430/470, loss: 0.005729255266487598 2023-01-24 05:29:46.514645: step: 432/470, loss: 0.010410521179437637 2023-01-24 05:29:47.245039: step: 434/470, loss: 0.06469309329986572 2023-01-24 05:29:48.091483: step: 436/470, loss: 0.04472040757536888 2023-01-24 05:29:48.795337: step: 438/470, loss: 0.023935789242386818 2023-01-24 05:29:49.808020: step: 440/470, loss: 0.041207972913980484 2023-01-24 05:29:50.493146: step: 442/470, loss: 0.018505996093153954 2023-01-24 05:29:51.241175: step: 444/470, loss: 0.025313975289463997 2023-01-24 05:29:51.922213: step: 446/470, loss: 0.013848811388015747 2023-01-24 05:29:52.642313: step: 448/470, loss: 0.00033676475868560374 2023-01-24 05:29:53.387789: step: 450/470, loss: 0.012692051008343697 2023-01-24 05:29:54.112759: step: 452/470, loss: 0.01313791237771511 2023-01-24 05:29:54.760652: step: 454/470, loss: 0.0016963921952992678 2023-01-24 05:29:55.471244: step: 456/470, loss: 0.5339106917381287 2023-01-24 05:29:56.227644: step: 458/470, loss: 0.055918145924806595 2023-01-24 05:29:56.875195: step: 460/470, loss: 0.00320792431011796 2023-01-24 05:29:57.662383: step: 462/470, loss: 0.0457182303071022 2023-01-24 05:29:58.471360: step: 464/470, loss: 0.01659577526152134 2023-01-24 05:29:59.300901: step: 466/470, loss: 0.03423323854804039 2023-01-24 05:30:00.145018: step: 468/470, loss: 0.009822395630180836 2023-01-24 05:30:00.837094: step: 470/470, loss: 0.0007459279731847346 2023-01-24 05:30:01.616800: step: 472/470, loss: 0.009417381137609482 2023-01-24 05:30:02.347906: step: 474/470, loss: 0.015447549521923065 2023-01-24 05:30:03.135030: step: 476/470, loss: 0.004266915377229452 2023-01-24 05:30:03.926700: step: 478/470, loss: 0.0016163645777851343 2023-01-24 05:30:04.669648: step: 480/470, loss: 0.007870226167142391 2023-01-24 05:30:05.361418: step: 482/470, loss: 0.04453890770673752 2023-01-24 05:30:06.128003: step: 484/470, loss: 0.08574463427066803 2023-01-24 05:30:06.855951: step: 486/470, loss: 0.0023265713825821877 2023-01-24 05:30:07.636079: step: 488/470, loss: 0.0027136588469147682 2023-01-24 05:30:08.347480: step: 490/470, loss: 0.056579723954200745 2023-01-24 05:30:09.107307: step: 492/470, loss: 0.049596644937992096 2023-01-24 05:30:09.711780: step: 494/470, loss: 0.009921560995280743 2023-01-24 05:30:10.397627: step: 496/470, loss: 0.011929440312087536 2023-01-24 05:30:11.226268: step: 498/470, loss: 0.05545575171709061 2023-01-24 05:30:12.025719: step: 500/470, loss: 0.041103947907686234 2023-01-24 05:30:12.920865: step: 502/470, loss: 0.05899772793054581 2023-01-24 05:30:13.655082: step: 504/470, loss: 0.02625754103064537 2023-01-24 05:30:14.368037: step: 506/470, loss: 1.3242918252944946 2023-01-24 05:30:15.060756: step: 508/470, loss: 0.011036560870707035 2023-01-24 05:30:15.791188: step: 510/470, loss: 0.002730847103521228 2023-01-24 05:30:16.443933: step: 512/470, loss: 0.04828563705086708 2023-01-24 05:30:17.203060: step: 514/470, loss: 0.011057223193347454 2023-01-24 05:30:17.946993: step: 516/470, loss: 0.024130506440997124 2023-01-24 05:30:18.633203: step: 518/470, loss: 0.009844631887972355 2023-01-24 05:30:19.406564: step: 520/470, loss: 0.09269572049379349 2023-01-24 05:30:20.138587: step: 522/470, loss: 0.010625121183693409 2023-01-24 05:30:20.954559: step: 524/470, loss: 0.09826123714447021 2023-01-24 05:30:21.698123: step: 526/470, loss: 0.07059445977210999 2023-01-24 05:30:22.518464: step: 528/470, loss: 0.21039122343063354 2023-01-24 05:30:23.219299: step: 530/470, loss: 0.004249433521181345 2023-01-24 05:30:24.022401: step: 532/470, loss: 0.6244630813598633 2023-01-24 05:30:24.732019: step: 534/470, loss: 0.10188359767198563 2023-01-24 05:30:25.478011: step: 536/470, loss: 0.0008839786169119179 2023-01-24 05:30:26.254159: step: 538/470, loss: 0.009506146423518658 2023-01-24 05:30:26.920274: step: 540/470, loss: 0.026201093569397926 2023-01-24 05:30:27.691634: step: 542/470, loss: 0.02899881824851036 2023-01-24 05:30:28.388213: step: 544/470, loss: 0.01946968585252762 2023-01-24 05:30:29.025116: step: 546/470, loss: 0.002837974112480879 2023-01-24 05:30:29.692047: step: 548/470, loss: 0.006309479475021362 2023-01-24 05:30:30.439332: step: 550/470, loss: 0.025487856939435005 2023-01-24 05:30:31.167189: step: 552/470, loss: 0.044974181801080704 2023-01-24 05:30:32.011561: step: 554/470, loss: 0.08220124244689941 2023-01-24 05:30:32.851382: step: 556/470, loss: 0.023944033309817314 2023-01-24 05:30:33.601012: step: 558/470, loss: 0.06157348304986954 2023-01-24 05:30:34.309802: step: 560/470, loss: 0.03174411877989769 2023-01-24 05:30:35.032949: step: 562/470, loss: 0.001561825512908399 2023-01-24 05:30:35.777583: step: 564/470, loss: 0.004232198931276798 2023-01-24 05:30:36.621920: step: 566/470, loss: 0.012148827314376831 2023-01-24 05:30:37.319364: step: 568/470, loss: 0.012420494109392166 2023-01-24 05:30:38.057520: step: 570/470, loss: 0.03385348618030548 2023-01-24 05:30:38.731163: step: 572/470, loss: 0.11622752249240875 2023-01-24 05:30:39.496277: step: 574/470, loss: 0.023387907072901726 2023-01-24 05:30:40.199000: step: 576/470, loss: 0.011087493039667606 2023-01-24 05:30:40.962116: step: 578/470, loss: 0.026949353516101837 2023-01-24 05:30:41.664732: step: 580/470, loss: 0.007216866593807936 2023-01-24 05:30:42.398789: step: 582/470, loss: 0.5527914762496948 2023-01-24 05:30:43.132063: step: 584/470, loss: 0.5917420387268066 2023-01-24 05:30:43.875977: step: 586/470, loss: 0.08542931079864502 2023-01-24 05:30:44.624013: step: 588/470, loss: 0.10411461442708969 2023-01-24 05:30:45.356308: step: 590/470, loss: 0.003107214579358697 2023-01-24 05:30:46.158129: step: 592/470, loss: 0.034524258226156235 2023-01-24 05:30:46.969274: step: 594/470, loss: 0.028853056952357292 2023-01-24 05:30:47.678273: step: 596/470, loss: 0.05348816141486168 2023-01-24 05:30:48.423981: step: 598/470, loss: 0.041593506932258606 2023-01-24 05:30:49.237668: step: 600/470, loss: 0.0063796937465667725 2023-01-24 05:30:50.000400: step: 602/470, loss: 0.016017813235521317 2023-01-24 05:30:50.763082: step: 604/470, loss: 0.062454525381326675 2023-01-24 05:30:51.529648: step: 606/470, loss: 0.19269520044326782 2023-01-24 05:30:52.225306: step: 608/470, loss: 0.014841437339782715 2023-01-24 05:30:53.088197: step: 610/470, loss: 2.196732759475708 2023-01-24 05:30:53.855223: step: 612/470, loss: 0.011269205249845982 2023-01-24 05:30:54.564193: step: 614/470, loss: 0.000654394447337836 2023-01-24 05:30:55.271649: step: 616/470, loss: 0.0068985833786427975 2023-01-24 05:30:56.048054: step: 618/470, loss: 0.009661390446126461 2023-01-24 05:30:56.743802: step: 620/470, loss: 0.009938615374267101 2023-01-24 05:30:57.543583: step: 622/470, loss: 0.28914228081703186 2023-01-24 05:30:58.355270: step: 624/470, loss: 0.057103097438812256 2023-01-24 05:30:59.277492: step: 626/470, loss: 0.0777493342757225 2023-01-24 05:31:00.012262: step: 628/470, loss: 0.0037567424587905407 2023-01-24 05:31:00.726194: step: 630/470, loss: 0.044795405119657516 2023-01-24 05:31:01.495343: step: 632/470, loss: 0.05516333505511284 2023-01-24 05:31:02.175236: step: 634/470, loss: 0.0026756152510643005 2023-01-24 05:31:02.894386: step: 636/470, loss: 0.049536339938640594 2023-01-24 05:31:03.638718: step: 638/470, loss: 0.020610058680176735 2023-01-24 05:31:04.412295: step: 640/470, loss: 0.0036903752479702234 2023-01-24 05:31:05.142445: step: 642/470, loss: 0.019514787942171097 2023-01-24 05:31:05.890986: step: 644/470, loss: 0.0012103342451155186 2023-01-24 05:31:06.675497: step: 646/470, loss: 0.043970245867967606 2023-01-24 05:31:07.418144: step: 648/470, loss: 0.3032195270061493 2023-01-24 05:31:08.156493: step: 650/470, loss: 0.05885202810168266 2023-01-24 05:31:08.957406: step: 652/470, loss: 0.06101761758327484 2023-01-24 05:31:09.659719: step: 654/470, loss: 0.017636749893426895 2023-01-24 05:31:10.373915: step: 656/470, loss: 0.09666401892900467 2023-01-24 05:31:11.155988: step: 658/470, loss: 0.006696424447000027 2023-01-24 05:31:11.928705: step: 660/470, loss: 0.012530512176454067 2023-01-24 05:31:12.723039: step: 662/470, loss: 0.03560841828584671 2023-01-24 05:31:13.502234: step: 664/470, loss: 0.12582863867282867 2023-01-24 05:31:14.156058: step: 666/470, loss: 0.039586570113897324 2023-01-24 05:31:14.743699: step: 668/470, loss: 0.0001087912532966584 2023-01-24 05:31:15.608219: step: 670/470, loss: 0.027073320001363754 2023-01-24 05:31:16.396562: step: 672/470, loss: 0.004335601814091206 2023-01-24 05:31:17.093650: step: 674/470, loss: 0.03368784487247467 2023-01-24 05:31:17.806631: step: 676/470, loss: 0.03928745165467262 2023-01-24 05:31:18.616663: step: 678/470, loss: 0.0066179316490888596 2023-01-24 05:31:19.397182: step: 680/470, loss: 0.03221479430794716 2023-01-24 05:31:20.226200: step: 682/470, loss: 0.08291277289390564 2023-01-24 05:31:20.965673: step: 684/470, loss: 0.023358231410384178 2023-01-24 05:31:21.720052: step: 686/470, loss: 0.052179984748363495 2023-01-24 05:31:22.440105: step: 688/470, loss: 0.012996486388146877 2023-01-24 05:31:23.190665: step: 690/470, loss: 0.2901443541049957 2023-01-24 05:31:23.947148: step: 692/470, loss: 0.15641692280769348 2023-01-24 05:31:24.709736: step: 694/470, loss: 0.03846118599176407 2023-01-24 05:31:25.499295: step: 696/470, loss: 0.008250097744166851 2023-01-24 05:31:26.255707: step: 698/470, loss: 0.1495591700077057 2023-01-24 05:31:27.050850: step: 700/470, loss: 0.05644164979457855 2023-01-24 05:31:27.712960: step: 702/470, loss: 0.0023439424112439156 2023-01-24 05:31:28.427382: step: 704/470, loss: 0.03498091921210289 2023-01-24 05:31:29.099261: step: 706/470, loss: 0.00011742630158551037 2023-01-24 05:31:29.798124: step: 708/470, loss: 0.011936147697269917 2023-01-24 05:31:30.498433: step: 710/470, loss: 0.013959010131657124 2023-01-24 05:31:31.185259: step: 712/470, loss: 0.003490228671580553 2023-01-24 05:31:31.927039: step: 714/470, loss: 0.03471314162015915 2023-01-24 05:31:32.761246: step: 716/470, loss: 0.04512022063136101 2023-01-24 05:31:33.459745: step: 718/470, loss: 0.013724502176046371 2023-01-24 05:31:34.147880: step: 720/470, loss: 0.04511544108390808 2023-01-24 05:31:34.868678: step: 722/470, loss: 0.04014601930975914 2023-01-24 05:31:35.681496: step: 724/470, loss: 0.0754375234246254 2023-01-24 05:31:36.604269: step: 726/470, loss: 0.06476394087076187 2023-01-24 05:31:37.271270: step: 728/470, loss: 0.027749724686145782 2023-01-24 05:31:38.002281: step: 730/470, loss: 0.259750634431839 2023-01-24 05:31:38.702249: step: 732/470, loss: 0.00022448692470788956 2023-01-24 05:31:39.434074: step: 734/470, loss: 0.057673707604408264 2023-01-24 05:31:40.140641: step: 736/470, loss: 0.012090279720723629 2023-01-24 05:31:40.934782: step: 738/470, loss: 0.0011684439377859235 2023-01-24 05:31:41.686825: step: 740/470, loss: 0.013645489700138569 2023-01-24 05:31:42.440180: step: 742/470, loss: 0.02157679945230484 2023-01-24 05:31:43.186812: step: 744/470, loss: 0.016018304973840714 2023-01-24 05:31:43.862875: step: 746/470, loss: 0.007724686060100794 2023-01-24 05:31:44.569289: step: 748/470, loss: 0.038802552968263626 2023-01-24 05:31:45.332841: step: 750/470, loss: 0.005301786120980978 2023-01-24 05:31:46.074132: step: 752/470, loss: 0.02522115781903267 2023-01-24 05:31:46.854461: step: 754/470, loss: 0.020645877346396446 2023-01-24 05:31:47.549937: step: 756/470, loss: 0.00777442567050457 2023-01-24 05:31:48.386684: step: 758/470, loss: 0.001848795684054494 2023-01-24 05:31:49.240147: step: 760/470, loss: 0.03325757756829262 2023-01-24 05:31:49.919117: step: 762/470, loss: 0.029930531978607178 2023-01-24 05:31:50.604187: step: 764/470, loss: 0.06273278594017029 2023-01-24 05:31:51.386189: step: 766/470, loss: 0.05899015814065933 2023-01-24 05:31:52.217788: step: 768/470, loss: 0.2313128113746643 2023-01-24 05:31:53.022173: step: 770/470, loss: 0.043592121452093124 2023-01-24 05:31:53.692649: step: 772/470, loss: 0.0039254468865692616 2023-01-24 05:31:54.477472: step: 774/470, loss: 1.0532337427139282 2023-01-24 05:31:55.236647: step: 776/470, loss: 0.0696612298488617 2023-01-24 05:31:55.952116: step: 778/470, loss: 0.05012049153447151 2023-01-24 05:31:56.746533: step: 780/470, loss: 0.029199251905083656 2023-01-24 05:31:57.505816: step: 782/470, loss: 0.6384598016738892 2023-01-24 05:31:58.236038: step: 784/470, loss: 0.05911829322576523 2023-01-24 05:31:58.988583: step: 786/470, loss: 0.010864563286304474 2023-01-24 05:31:59.664962: step: 788/470, loss: 0.005859819240868092 2023-01-24 05:32:00.487430: step: 790/470, loss: 0.020966263487935066 2023-01-24 05:32:01.279948: step: 792/470, loss: 0.33560967445373535 2023-01-24 05:32:02.037050: step: 794/470, loss: 0.040173858404159546 2023-01-24 05:32:02.883546: step: 796/470, loss: 0.025398118421435356 2023-01-24 05:32:03.653163: step: 798/470, loss: 0.06342984735965729 2023-01-24 05:32:04.405442: step: 800/470, loss: 0.06393054127693176 2023-01-24 05:32:05.185269: step: 802/470, loss: 0.04026153311133385 2023-01-24 05:32:05.930360: step: 804/470, loss: 0.0187918022274971 2023-01-24 05:32:06.626651: step: 806/470, loss: 0.3132553696632385 2023-01-24 05:32:07.396168: step: 808/470, loss: 0.007414802443236113 2023-01-24 05:32:08.103345: step: 810/470, loss: 0.005097710061818361 2023-01-24 05:32:08.863015: step: 812/470, loss: 0.011615417897701263 2023-01-24 05:32:09.595380: step: 814/470, loss: 0.12536713480949402 2023-01-24 05:32:10.395357: step: 816/470, loss: 0.029747048392891884 2023-01-24 05:32:11.153926: step: 818/470, loss: 0.050025250762701035 2023-01-24 05:32:11.921709: step: 820/470, loss: 0.01923954486846924 2023-01-24 05:32:12.668709: step: 822/470, loss: 0.039691563695669174 2023-01-24 05:32:13.410003: step: 824/470, loss: 0.03583470359444618 2023-01-24 05:32:14.098727: step: 826/470, loss: 0.06700265407562256 2023-01-24 05:32:14.844416: step: 828/470, loss: 0.02268780767917633 2023-01-24 05:32:15.518610: step: 830/470, loss: 0.05540559068322182 2023-01-24 05:32:16.255009: step: 832/470, loss: 0.03642084449529648 2023-01-24 05:32:16.919640: step: 834/470, loss: 0.007380470167845488 2023-01-24 05:32:17.639610: step: 836/470, loss: 0.04133666679263115 2023-01-24 05:32:18.314028: step: 838/470, loss: 0.09914152324199677 2023-01-24 05:32:19.071231: step: 840/470, loss: 0.0013483419315889478 2023-01-24 05:32:19.824288: step: 842/470, loss: 0.03963959217071533 2023-01-24 05:32:20.475565: step: 844/470, loss: 0.041381705552339554 2023-01-24 05:32:21.129281: step: 846/470, loss: 0.016487672924995422 2023-01-24 05:32:21.762782: step: 848/470, loss: 0.0062914639711380005 2023-01-24 05:32:22.432327: step: 850/470, loss: 0.011747756041586399 2023-01-24 05:32:23.168851: step: 852/470, loss: 0.0007929243729449809 2023-01-24 05:32:23.880781: step: 854/470, loss: 0.0006810800405219197 2023-01-24 05:32:24.577121: step: 856/470, loss: 0.002209881553426385 2023-01-24 05:32:25.318217: step: 858/470, loss: 0.028073936700820923 2023-01-24 05:32:25.989757: step: 860/470, loss: 0.16893041133880615 2023-01-24 05:32:26.763467: step: 862/470, loss: 0.0011155976681038737 2023-01-24 05:32:27.476083: step: 864/470, loss: 0.00859520398080349 2023-01-24 05:32:28.227241: step: 866/470, loss: 7.151709723984823e-05 2023-01-24 05:32:29.035314: step: 868/470, loss: 0.01832861453294754 2023-01-24 05:32:29.748138: step: 870/470, loss: 0.047499969601631165 2023-01-24 05:32:30.565433: step: 872/470, loss: 0.003909479361027479 2023-01-24 05:32:31.306148: step: 874/470, loss: 0.25040119886398315 2023-01-24 05:32:32.067758: step: 876/470, loss: 0.06178472191095352 2023-01-24 05:32:32.763279: step: 878/470, loss: 0.0017721912590786815 2023-01-24 05:32:33.501072: step: 880/470, loss: 0.007689218968153 2023-01-24 05:32:34.329357: step: 882/470, loss: 0.033220697194337845 2023-01-24 05:32:35.152832: step: 884/470, loss: 0.008743722923099995 2023-01-24 05:32:35.873499: step: 886/470, loss: 0.0026598607655614614 2023-01-24 05:32:36.633568: step: 888/470, loss: 0.006200912408530712 2023-01-24 05:32:37.311643: step: 890/470, loss: 0.033201370388269424 2023-01-24 05:32:38.027867: step: 892/470, loss: 0.005671496037393808 2023-01-24 05:32:38.766159: step: 894/470, loss: 0.0014364663511514664 2023-01-24 05:32:39.469824: step: 896/470, loss: 0.12019861489534378 2023-01-24 05:32:40.353560: step: 898/470, loss: 0.0630432739853859 2023-01-24 05:32:41.082513: step: 900/470, loss: 0.009634872898459435 2023-01-24 05:32:41.769351: step: 902/470, loss: 0.056970443576574326 2023-01-24 05:32:42.483192: step: 904/470, loss: 0.04602930322289467 2023-01-24 05:32:43.220488: step: 906/470, loss: 0.03439059853553772 2023-01-24 05:32:43.837131: step: 908/470, loss: 0.14804024994373322 2023-01-24 05:32:44.574229: step: 910/470, loss: 0.005356145091354847 2023-01-24 05:32:45.353860: step: 912/470, loss: 0.02320312149822712 2023-01-24 05:32:46.166703: step: 914/470, loss: 0.12080468982458115 2023-01-24 05:32:46.888235: step: 916/470, loss: 0.01958886720240116 2023-01-24 05:32:47.607228: step: 918/470, loss: 0.011003172025084496 2023-01-24 05:32:48.261946: step: 920/470, loss: 0.0002306982351001352 2023-01-24 05:32:49.074845: step: 922/470, loss: 0.06692475080490112 2023-01-24 05:32:49.845934: step: 924/470, loss: 0.030821437016129494 2023-01-24 05:32:50.538511: step: 926/470, loss: 0.03762578219175339 2023-01-24 05:32:51.206415: step: 928/470, loss: 0.0017827172996476293 2023-01-24 05:32:52.004421: step: 930/470, loss: 0.04079880192875862 2023-01-24 05:32:52.840735: step: 932/470, loss: 0.003436343977227807 2023-01-24 05:32:53.616466: step: 934/470, loss: 0.021370133385062218 2023-01-24 05:32:54.359833: step: 936/470, loss: 0.029703887179493904 2023-01-24 05:32:55.066688: step: 938/470, loss: 0.06702406704425812 2023-01-24 05:32:55.715593: step: 940/470, loss: 0.015583197586238384 2023-01-24 05:32:56.347221: step: 942/470, loss: 0.001184674329124391 ================================================== Loss: 0.093 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32907749675745784, 'r': 0.3209598355471221, 'f1': 0.3249679795068844}, 'combined': 0.23945009016296742, 'epoch': 28} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.34396327442318714, 'r': 0.34925501710662077, 'f1': 0.34658894827374576}, 'combined': 0.2310592988491638, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3208045916546974, 'r': 0.3183696421924226, 'f1': 0.3195824789245842}, 'combined': 0.23548182657600938, 'epoch': 28} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3442884828721958, 'r': 0.3423022031633178, 'f1': 0.3432924699034238}, 'combined': 0.2288616466022825, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3173738532110092, 'r': 0.3282139468690702, 'f1': 0.3227028917910447}, 'combined': 0.23778107816182242, 'epoch': 28} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.33180501387709066, 'r': 0.34520483174520394, 'f1': 0.3383723138690029}, 'combined': 0.22558154257933521, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27439024390243905, 'r': 0.32142857142857145, 'f1': 0.2960526315789474}, 'combined': 0.19736842105263158, 'epoch': 28} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6333333333333333, 'r': 0.41304347826086957, 'f1': 0.5}, 'combined': 0.3333333333333333, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5384615384615384, 'r': 0.2413793103448276, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3260774491094148, 'r': 0.32422122074636306, 'f1': 0.3251466856961624}, 'combined': 0.2395817684076986, 'epoch': 19} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35730778004987124, 'r': 0.3418473472592518, 'f1': 0.34940662520847365}, 'combined': 0.23293775013898238, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.35714285714285715, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32338929219600726, 'r': 0.33811669829222013, 'f1': 0.3305890538033395}, 'combined': 0.24359193438140805, 'epoch': 26} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.348896802572895, 'r': 0.3519161018259297, 'f1': 0.35039994820389364}, 'combined': 0.23359996546926237, 'epoch': 26} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5535714285714286, 'r': 0.2672413793103448, 'f1': 0.36046511627906974}, 'combined': 0.24031007751937983, 'epoch': 26} ****************************** Epoch: 29 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:35:30.077910: step: 2/470, loss: 0.2647108733654022 2023-01-24 05:35:30.944114: step: 4/470, loss: 0.02322511561214924 2023-01-24 05:35:31.640220: step: 6/470, loss: 0.02740596979856491 2023-01-24 05:35:32.384616: step: 8/470, loss: 0.013857753947377205 2023-01-24 05:35:33.203452: step: 10/470, loss: 0.005119292065501213 2023-01-24 05:35:33.961466: step: 12/470, loss: 0.00834457017481327 2023-01-24 05:35:34.602217: step: 14/470, loss: 0.0004951293813064694 2023-01-24 05:35:35.347321: step: 16/470, loss: 0.027414359152317047 2023-01-24 05:35:36.290067: step: 18/470, loss: 0.03084324114024639 2023-01-24 05:35:36.897938: step: 20/470, loss: 0.006820783019065857 2023-01-24 05:35:37.745492: step: 22/470, loss: 0.00931725837290287 2023-01-24 05:35:38.494896: step: 24/470, loss: 0.0211730245500803 2023-01-24 05:35:39.290617: step: 26/470, loss: 0.0012839719420298934 2023-01-24 05:35:39.999802: step: 28/470, loss: 0.04594583436846733 2023-01-24 05:35:40.726517: step: 30/470, loss: 0.012252802029252052 2023-01-24 05:35:41.441663: step: 32/470, loss: 0.004927062429487705 2023-01-24 05:35:42.239880: step: 34/470, loss: 0.006308556068688631 2023-01-24 05:35:43.048997: step: 36/470, loss: 0.015124126337468624 2023-01-24 05:35:43.814086: step: 38/470, loss: 0.007813768461346626 2023-01-24 05:35:44.496190: step: 40/470, loss: 0.0029113588389009237 2023-01-24 05:35:45.242951: step: 42/470, loss: 0.011285551823675632 2023-01-24 05:35:45.994974: step: 44/470, loss: 0.3646211326122284 2023-01-24 05:35:46.761402: step: 46/470, loss: 0.006847033277153969 2023-01-24 05:35:47.484924: step: 48/470, loss: 0.06652196496725082 2023-01-24 05:35:48.256002: step: 50/470, loss: 0.2817710041999817 2023-01-24 05:35:49.030047: step: 52/470, loss: 0.0034084455110132694 2023-01-24 05:35:49.784850: step: 54/470, loss: 0.04189238324761391 2023-01-24 05:35:50.439692: step: 56/470, loss: 0.002203726675361395 2023-01-24 05:35:51.259004: step: 58/470, loss: 0.011194828897714615 2023-01-24 05:35:52.012266: step: 60/470, loss: 0.19877992570400238 2023-01-24 05:35:52.737292: step: 62/470, loss: 0.007485152687877417 2023-01-24 05:35:53.429655: step: 64/470, loss: 0.020456980913877487 2023-01-24 05:35:54.134833: step: 66/470, loss: 0.005883743055164814 2023-01-24 05:35:54.864787: step: 68/470, loss: 0.012196771800518036 2023-01-24 05:35:55.548309: step: 70/470, loss: 0.00925607979297638 2023-01-24 05:35:56.184180: step: 72/470, loss: 0.014364490285515785 2023-01-24 05:35:56.929887: step: 74/470, loss: 0.37649011611938477 2023-01-24 05:35:57.697589: step: 76/470, loss: 0.010440860874950886 2023-01-24 05:35:58.508283: step: 78/470, loss: 0.02263338305056095 2023-01-24 05:35:59.254359: step: 80/470, loss: 0.2164899855852127 2023-01-24 05:35:59.991700: step: 82/470, loss: 0.0030306533444672823 2023-01-24 05:36:00.721370: step: 84/470, loss: 0.003299787174910307 2023-01-24 05:36:01.448912: step: 86/470, loss: 0.0003010545624420047 2023-01-24 05:36:02.166842: step: 88/470, loss: 0.3850425183773041 2023-01-24 05:36:02.978859: step: 90/470, loss: 0.01980891264975071 2023-01-24 05:36:03.659485: step: 92/470, loss: 0.021628176793456078 2023-01-24 05:36:04.476075: step: 94/470, loss: 0.0020503345876932144 2023-01-24 05:36:05.247242: step: 96/470, loss: 0.008198212832212448 2023-01-24 05:36:05.932780: step: 98/470, loss: 0.03375517949461937 2023-01-24 05:36:06.595237: step: 100/470, loss: 0.0032718630973249674 2023-01-24 05:36:07.238682: step: 102/470, loss: 0.0006153931026346982 2023-01-24 05:36:07.918642: step: 104/470, loss: 0.006163314450532198 2023-01-24 05:36:08.733959: step: 106/470, loss: 0.055401623249053955 2023-01-24 05:36:09.509559: step: 108/470, loss: 0.012150800786912441 2023-01-24 05:36:10.286288: step: 110/470, loss: 0.13651181757450104 2023-01-24 05:36:11.005230: step: 112/470, loss: 0.02135992981493473 2023-01-24 05:36:11.713392: step: 114/470, loss: 0.4167734980583191 2023-01-24 05:36:12.567810: step: 116/470, loss: 0.013348422944545746 2023-01-24 05:36:13.303066: step: 118/470, loss: 0.003862533252686262 2023-01-24 05:36:13.952657: step: 120/470, loss: 0.0047416831366717815 2023-01-24 05:36:14.629309: step: 122/470, loss: 0.0004611566837411374 2023-01-24 05:36:15.481856: step: 124/470, loss: 0.0077300965785980225 2023-01-24 05:36:16.247922: step: 126/470, loss: 0.005382179748266935 2023-01-24 05:36:16.903850: step: 128/470, loss: 0.004562853369861841 2023-01-24 05:36:17.594068: step: 130/470, loss: 0.04411611706018448 2023-01-24 05:36:18.338052: step: 132/470, loss: 0.013166757300496101 2023-01-24 05:36:19.038035: step: 134/470, loss: 0.005444676149636507 2023-01-24 05:36:19.828069: step: 136/470, loss: 9.606832463759929e-05 2023-01-24 05:36:20.641140: step: 138/470, loss: 0.003493986092507839 2023-01-24 05:36:21.385703: step: 140/470, loss: 0.0017787780379876494 2023-01-24 05:36:22.137917: step: 142/470, loss: 0.040565188974142075 2023-01-24 05:36:22.946375: step: 144/470, loss: 0.01262040063738823 2023-01-24 05:36:23.736774: step: 146/470, loss: 0.01510031707584858 2023-01-24 05:36:24.484730: step: 148/470, loss: 0.0011344999074935913 2023-01-24 05:36:25.280853: step: 150/470, loss: 0.049181923270225525 2023-01-24 05:36:26.006975: step: 152/470, loss: 0.04775718227028847 2023-01-24 05:36:26.709608: step: 154/470, loss: 4.9370075430488214e-05 2023-01-24 05:36:27.413683: step: 156/470, loss: 0.4461440443992615 2023-01-24 05:36:28.119947: step: 158/470, loss: 0.15681709349155426 2023-01-24 05:36:28.812819: step: 160/470, loss: 0.009236715734004974 2023-01-24 05:36:29.613905: step: 162/470, loss: 0.032232243567705154 2023-01-24 05:36:30.433525: step: 164/470, loss: 0.002545413561165333 2023-01-24 05:36:31.153116: step: 166/470, loss: 0.009975194931030273 2023-01-24 05:36:31.898642: step: 168/470, loss: 0.015161970630288124 2023-01-24 05:36:32.629215: step: 170/470, loss: 0.005908914841711521 2023-01-24 05:36:33.359848: step: 172/470, loss: 0.021803176030516624 2023-01-24 05:36:34.143414: step: 174/470, loss: 0.1319359987974167 2023-01-24 05:36:34.854201: step: 176/470, loss: 0.047474365681409836 2023-01-24 05:36:35.643797: step: 178/470, loss: 0.02708449400961399 2023-01-24 05:36:36.349928: step: 180/470, loss: 0.07310396432876587 2023-01-24 05:36:37.054914: step: 182/470, loss: 0.04242819547653198 2023-01-24 05:36:37.752676: step: 184/470, loss: 0.005080712027847767 2023-01-24 05:36:38.482969: step: 186/470, loss: 0.011022936552762985 2023-01-24 05:36:39.128571: step: 188/470, loss: 0.004775905515998602 2023-01-24 05:36:39.857758: step: 190/470, loss: 0.014512178488075733 2023-01-24 05:36:40.532733: step: 192/470, loss: 0.018934812396764755 2023-01-24 05:36:41.274485: step: 194/470, loss: 0.00309011316858232 2023-01-24 05:36:42.003233: step: 196/470, loss: 0.004391052294522524 2023-01-24 05:36:42.751646: step: 198/470, loss: 0.00033165913191623986 2023-01-24 05:36:43.412123: step: 200/470, loss: 0.00046572668361477554 2023-01-24 05:36:44.147950: step: 202/470, loss: 0.013232901692390442 2023-01-24 05:36:44.981275: step: 204/470, loss: 0.1024237647652626 2023-01-24 05:36:45.681944: step: 206/470, loss: 0.01619911752641201 2023-01-24 05:36:46.390226: step: 208/470, loss: 0.0010863811476156116 2023-01-24 05:36:47.109656: step: 210/470, loss: 0.0015397859970107675 2023-01-24 05:36:47.834086: step: 212/470, loss: 0.07132253795862198 2023-01-24 05:36:48.496516: step: 214/470, loss: 0.004997688811272383 2023-01-24 05:36:49.255348: step: 216/470, loss: 0.00376240280456841 2023-01-24 05:36:50.056325: step: 218/470, loss: 0.03204397112131119 2023-01-24 05:36:50.746785: step: 220/470, loss: 0.008753920905292034 2023-01-24 05:36:51.470670: step: 222/470, loss: 0.005575764458626509 2023-01-24 05:36:52.271302: step: 224/470, loss: 0.0023701766040176153 2023-01-24 05:36:52.971699: step: 226/470, loss: 0.005872036796063185 2023-01-24 05:36:53.738387: step: 228/470, loss: 0.023437704890966415 2023-01-24 05:36:54.493932: step: 230/470, loss: 0.010248042643070221 2023-01-24 05:36:55.253109: step: 232/470, loss: 0.02714042365550995 2023-01-24 05:36:56.049711: step: 234/470, loss: 0.01943190023303032 2023-01-24 05:36:56.738860: step: 236/470, loss: 0.00399386091157794 2023-01-24 05:36:57.419475: step: 238/470, loss: 0.0016181677347049117 2023-01-24 05:36:58.256350: step: 240/470, loss: 0.049513738602399826 2023-01-24 05:36:59.003984: step: 242/470, loss: 0.07210097461938858 2023-01-24 05:36:59.703619: step: 244/470, loss: 0.00037181295920163393 2023-01-24 05:37:00.451725: step: 246/470, loss: 0.19889435172080994 2023-01-24 05:37:01.145082: step: 248/470, loss: 0.006400907877832651 2023-01-24 05:37:02.031766: step: 250/470, loss: 0.028619172051548958 2023-01-24 05:37:02.750719: step: 252/470, loss: 0.021511005237698555 2023-01-24 05:37:03.463755: step: 254/470, loss: 0.04519416764378548 2023-01-24 05:37:04.202418: step: 256/470, loss: 0.13093401491641998 2023-01-24 05:37:04.970572: step: 258/470, loss: 0.09740595519542694 2023-01-24 05:37:05.739308: step: 260/470, loss: 0.13155671954154968 2023-01-24 05:37:06.535961: step: 262/470, loss: 0.07691262662410736 2023-01-24 05:37:07.295667: step: 264/470, loss: 0.004110024776309729 2023-01-24 05:37:07.973190: step: 266/470, loss: 0.1348675787448883 2023-01-24 05:37:08.667496: step: 268/470, loss: 0.10196854174137115 2023-01-24 05:37:09.418994: step: 270/470, loss: 0.009700944647192955 2023-01-24 05:37:10.139323: step: 272/470, loss: 0.0025991289876401424 2023-01-24 05:37:10.875430: step: 274/470, loss: 0.0007453685393556952 2023-01-24 05:37:11.525001: step: 276/470, loss: 0.0021520478185266256 2023-01-24 05:37:12.241406: step: 278/470, loss: 0.052726294845342636 2023-01-24 05:37:12.971683: step: 280/470, loss: 0.003794698743149638 2023-01-24 05:37:13.771906: step: 282/470, loss: 0.01943463459610939 2023-01-24 05:37:14.455767: step: 284/470, loss: 0.023348847404122353 2023-01-24 05:37:15.172144: step: 286/470, loss: 0.019955376163125038 2023-01-24 05:37:15.886130: step: 288/470, loss: 0.003484874265268445 2023-01-24 05:37:16.627067: step: 290/470, loss: 0.028693674132227898 2023-01-24 05:37:17.342788: step: 292/470, loss: 0.0033248071558773518 2023-01-24 05:37:18.062638: step: 294/470, loss: 0.03285335376858711 2023-01-24 05:37:18.835599: step: 296/470, loss: 0.01780891977250576 2023-01-24 05:37:19.647223: step: 298/470, loss: 0.05349568650126457 2023-01-24 05:37:20.370943: step: 300/470, loss: 0.5065484642982483 2023-01-24 05:37:21.200008: step: 302/470, loss: 0.03260203078389168 2023-01-24 05:37:21.942040: step: 304/470, loss: 0.03035620041191578 2023-01-24 05:37:22.634585: step: 306/470, loss: 0.006546898279339075 2023-01-24 05:37:23.426119: step: 308/470, loss: 0.002254707971587777 2023-01-24 05:37:24.167414: step: 310/470, loss: 0.00036036467645317316 2023-01-24 05:37:24.842820: step: 312/470, loss: 0.003247553249821067 2023-01-24 05:37:25.514253: step: 314/470, loss: 0.06962236016988754 2023-01-24 05:37:26.243084: step: 316/470, loss: 0.02976413629949093 2023-01-24 05:37:26.919145: step: 318/470, loss: 0.004767751786857843 2023-01-24 05:37:27.661245: step: 320/470, loss: 0.10309252142906189 2023-01-24 05:37:28.452405: step: 322/470, loss: 0.10256746411323547 2023-01-24 05:37:29.271429: step: 324/470, loss: 0.05060182884335518 2023-01-24 05:37:30.013416: step: 326/470, loss: 0.006237236317247152 2023-01-24 05:37:30.742002: step: 328/470, loss: 0.05871773138642311 2023-01-24 05:37:31.462338: step: 330/470, loss: 0.24029061198234558 2023-01-24 05:37:32.163403: step: 332/470, loss: 0.19288605451583862 2023-01-24 05:37:32.898124: step: 334/470, loss: 0.007970509119331837 2023-01-24 05:37:33.666876: step: 336/470, loss: 0.04301189258694649 2023-01-24 05:37:34.395990: step: 338/470, loss: 0.01984056457877159 2023-01-24 05:37:35.117142: step: 340/470, loss: 0.03184615448117256 2023-01-24 05:37:35.808505: step: 342/470, loss: 0.002288240008056164 2023-01-24 05:37:36.561588: step: 344/470, loss: 0.00532893929630518 2023-01-24 05:37:37.310149: step: 346/470, loss: 0.02240307815372944 2023-01-24 05:37:38.083117: step: 348/470, loss: 0.005755066871643066 2023-01-24 05:37:38.798931: step: 350/470, loss: 0.028193417936563492 2023-01-24 05:37:39.460723: step: 352/470, loss: 0.0016564959660172462 2023-01-24 05:37:40.187396: step: 354/470, loss: 0.1647537648677826 2023-01-24 05:37:40.964811: step: 356/470, loss: 0.02808982878923416 2023-01-24 05:37:41.699215: step: 358/470, loss: 0.02180049754679203 2023-01-24 05:37:42.442113: step: 360/470, loss: 0.009425032883882523 2023-01-24 05:37:43.165508: step: 362/470, loss: 0.002624044893309474 2023-01-24 05:37:43.950753: step: 364/470, loss: 0.03871319815516472 2023-01-24 05:37:44.716369: step: 366/470, loss: 0.0012272871099412441 2023-01-24 05:37:45.495802: step: 368/470, loss: 0.03193151205778122 2023-01-24 05:37:46.323255: step: 370/470, loss: 0.005393751431256533 2023-01-24 05:37:46.968533: step: 372/470, loss: 0.007179801352322102 2023-01-24 05:37:47.662439: step: 374/470, loss: 0.02233264036476612 2023-01-24 05:37:48.399041: step: 376/470, loss: 0.019881457090377808 2023-01-24 05:37:49.199951: step: 378/470, loss: 0.004716763272881508 2023-01-24 05:37:49.973047: step: 380/470, loss: 0.010665499605238438 2023-01-24 05:37:50.681516: step: 382/470, loss: 0.021654922515153885 2023-01-24 05:37:51.445139: step: 384/470, loss: 0.017089908942580223 2023-01-24 05:37:52.103159: step: 386/470, loss: 0.004330948460847139 2023-01-24 05:37:52.903196: step: 388/470, loss: 0.004959953483194113 2023-01-24 05:37:53.564312: step: 390/470, loss: 0.0001449349510949105 2023-01-24 05:37:54.236319: step: 392/470, loss: 0.020653393119573593 2023-01-24 05:37:55.009094: step: 394/470, loss: 0.029949171468615532 2023-01-24 05:37:55.700064: step: 396/470, loss: 0.002220702590420842 2023-01-24 05:37:56.463195: step: 398/470, loss: 0.004065732005983591 2023-01-24 05:37:57.175881: step: 400/470, loss: 0.06798814982175827 2023-01-24 05:37:57.979056: step: 402/470, loss: 0.011189811863005161 2023-01-24 05:37:58.656840: step: 404/470, loss: 0.00252006808295846 2023-01-24 05:37:59.408052: step: 406/470, loss: 0.008702918887138367 2023-01-24 05:38:00.105109: step: 408/470, loss: 0.17309211194515228 2023-01-24 05:38:00.854693: step: 410/470, loss: 0.05191047117114067 2023-01-24 05:38:01.611478: step: 412/470, loss: 0.09695277363061905 2023-01-24 05:38:02.293868: step: 414/470, loss: 0.006256232038140297 2023-01-24 05:38:03.018779: step: 416/470, loss: 0.05564850568771362 2023-01-24 05:38:03.800102: step: 418/470, loss: 0.0020740872714668512 2023-01-24 05:38:04.509706: step: 420/470, loss: 0.005247580353170633 2023-01-24 05:38:05.407739: step: 422/470, loss: 0.003924751654267311 2023-01-24 05:38:06.115963: step: 424/470, loss: 0.010796195827424526 2023-01-24 05:38:06.785351: step: 426/470, loss: 0.02733398787677288 2023-01-24 05:38:07.588167: step: 428/470, loss: 0.020288608968257904 2023-01-24 05:38:08.299747: step: 430/470, loss: 0.006797471083700657 2023-01-24 05:38:09.007195: step: 432/470, loss: 0.05538531392812729 2023-01-24 05:38:09.709864: step: 434/470, loss: 0.011707188561558723 2023-01-24 05:38:10.403426: step: 436/470, loss: 0.025928908959031105 2023-01-24 05:38:11.042158: step: 438/470, loss: 0.011687841266393661 2023-01-24 05:38:11.780701: step: 440/470, loss: 0.02929680421948433 2023-01-24 05:38:12.476954: step: 442/470, loss: 0.00014456742792390287 2023-01-24 05:38:13.227535: step: 444/470, loss: 0.018118448555469513 2023-01-24 05:38:13.942648: step: 446/470, loss: 1.0357182025909424 2023-01-24 05:38:14.759949: step: 448/470, loss: 0.035767797380685806 2023-01-24 05:38:15.562816: step: 450/470, loss: 0.012241028249263763 2023-01-24 05:38:16.280471: step: 452/470, loss: 0.02669922076165676 2023-01-24 05:38:16.986399: step: 454/470, loss: 0.0076654767617583275 2023-01-24 05:38:17.741986: step: 456/470, loss: 0.005637957714498043 2023-01-24 05:38:18.472712: step: 458/470, loss: 0.056888647377491 2023-01-24 05:38:19.227112: step: 460/470, loss: 0.020066574215888977 2023-01-24 05:38:19.995923: step: 462/470, loss: 0.36348721385002136 2023-01-24 05:38:20.670325: step: 464/470, loss: 0.029186828061938286 2023-01-24 05:38:21.432448: step: 466/470, loss: 0.013277608901262283 2023-01-24 05:38:22.112667: step: 468/470, loss: 0.19909025728702545 2023-01-24 05:38:22.832917: step: 470/470, loss: 0.004801159258931875 2023-01-24 05:38:23.667838: step: 472/470, loss: 0.012583213858306408 2023-01-24 05:38:24.426664: step: 474/470, loss: 0.07196840643882751 2023-01-24 05:38:25.149226: step: 476/470, loss: 0.024347444996237755 2023-01-24 05:38:25.902886: step: 478/470, loss: 0.020326513797044754 2023-01-24 05:38:26.636512: step: 480/470, loss: 0.011932437308132648 2023-01-24 05:38:27.416441: step: 482/470, loss: 0.02215908281505108 2023-01-24 05:38:28.158119: step: 484/470, loss: 0.0020135114900767803 2023-01-24 05:38:28.856813: step: 486/470, loss: 0.0020898778457194567 2023-01-24 05:38:29.589442: step: 488/470, loss: 0.014602022245526314 2023-01-24 05:38:30.396925: step: 490/470, loss: 0.017223408445715904 2023-01-24 05:38:31.066321: step: 492/470, loss: 0.0020636683329939842 2023-01-24 05:38:31.820966: step: 494/470, loss: 0.01937001757323742 2023-01-24 05:38:32.503586: step: 496/470, loss: 0.0012707647401839495 2023-01-24 05:38:33.203454: step: 498/470, loss: 0.17490433156490326 2023-01-24 05:38:33.945055: step: 500/470, loss: 0.013577910140156746 2023-01-24 05:38:34.673987: step: 502/470, loss: 0.007008845917880535 2023-01-24 05:38:35.385624: step: 504/470, loss: 0.002107697306200862 2023-01-24 05:38:36.136752: step: 506/470, loss: 0.005590823013335466 2023-01-24 05:38:36.887443: step: 508/470, loss: 0.019169259816408157 2023-01-24 05:38:37.555360: step: 510/470, loss: 0.0015802793204784393 2023-01-24 05:38:38.269831: step: 512/470, loss: 0.013873277232050896 2023-01-24 05:38:39.071422: step: 514/470, loss: 0.007271386217325926 2023-01-24 05:38:39.889505: step: 516/470, loss: 0.0057012708857655525 2023-01-24 05:38:40.649510: step: 518/470, loss: 0.0004432882706169039 2023-01-24 05:38:41.323592: step: 520/470, loss: 0.0004786129866261035 2023-01-24 05:38:42.011643: step: 522/470, loss: 0.1716701239347458 2023-01-24 05:38:42.651196: step: 524/470, loss: 0.018754906952381134 2023-01-24 05:38:43.424972: step: 526/470, loss: 0.0010185715509578586 2023-01-24 05:38:44.306838: step: 528/470, loss: 0.012098163366317749 2023-01-24 05:38:45.149700: step: 530/470, loss: 0.04463057965040207 2023-01-24 05:38:45.847044: step: 532/470, loss: 0.001784288208000362 2023-01-24 05:38:46.611958: step: 534/470, loss: 0.040405258536338806 2023-01-24 05:38:47.296887: step: 536/470, loss: 0.016458706930279732 2023-01-24 05:38:48.054184: step: 538/470, loss: 0.037848204374313354 2023-01-24 05:38:48.710971: step: 540/470, loss: 0.13946174085140228 2023-01-24 05:38:49.379583: step: 542/470, loss: 0.15389235317707062 2023-01-24 05:38:50.080363: step: 544/470, loss: 0.0027951474767178297 2023-01-24 05:38:50.771297: step: 546/470, loss: 0.0260777585208416 2023-01-24 05:38:51.450085: step: 548/470, loss: 0.0007106401026248932 2023-01-24 05:38:52.170488: step: 550/470, loss: 0.011294242925941944 2023-01-24 05:38:52.885253: step: 552/470, loss: 0.0024706493131816387 2023-01-24 05:38:53.585658: step: 554/470, loss: 0.0029934581834822893 2023-01-24 05:38:54.354758: step: 556/470, loss: 0.03564335033297539 2023-01-24 05:38:54.968093: step: 558/470, loss: 0.0014540080446749926 2023-01-24 05:38:55.674156: step: 560/470, loss: 0.0077100833877921104 2023-01-24 05:38:56.470927: step: 562/470, loss: 0.0005074184155091643 2023-01-24 05:38:57.305141: step: 564/470, loss: 0.13422176241874695 2023-01-24 05:38:58.104684: step: 566/470, loss: 0.011121930554509163 2023-01-24 05:38:58.867238: step: 568/470, loss: 0.0019553580787032843 2023-01-24 05:38:59.571834: step: 570/470, loss: 0.12414314597845078 2023-01-24 05:39:00.302183: step: 572/470, loss: 0.22418749332427979 2023-01-24 05:39:01.052828: step: 574/470, loss: 0.02621358633041382 2023-01-24 05:39:01.797922: step: 576/470, loss: 0.033430345356464386 2023-01-24 05:39:02.593795: step: 578/470, loss: 0.025795383378863335 2023-01-24 05:39:03.359316: step: 580/470, loss: 0.04664403572678566 2023-01-24 05:39:04.051483: step: 582/470, loss: 0.04867984354496002 2023-01-24 05:39:04.718718: step: 584/470, loss: 0.022736769169569016 2023-01-24 05:39:05.405049: step: 586/470, loss: 0.0002613358956295997 2023-01-24 05:39:06.186289: step: 588/470, loss: 0.016873285174369812 2023-01-24 05:39:06.882471: step: 590/470, loss: 0.013911283574998379 2023-01-24 05:39:07.633927: step: 592/470, loss: 0.0659724697470665 2023-01-24 05:39:08.331394: step: 594/470, loss: 0.008108820766210556 2023-01-24 05:39:09.118215: step: 596/470, loss: 0.02690688520669937 2023-01-24 05:39:09.871346: step: 598/470, loss: 0.05805189535021782 2023-01-24 05:39:10.654152: step: 600/470, loss: 0.08737591654062271 2023-01-24 05:39:11.393376: step: 602/470, loss: 0.02579532191157341 2023-01-24 05:39:12.127056: step: 604/470, loss: 0.009756003506481647 2023-01-24 05:39:12.737074: step: 606/470, loss: 0.0011332191061228514 2023-01-24 05:39:13.511765: step: 608/470, loss: 0.014959428459405899 2023-01-24 05:39:14.285608: step: 610/470, loss: 0.014169977977871895 2023-01-24 05:39:14.969970: step: 612/470, loss: 0.007126861251890659 2023-01-24 05:39:15.651981: step: 614/470, loss: 0.03526424989104271 2023-01-24 05:39:16.401110: step: 616/470, loss: 0.0036206073127686977 2023-01-24 05:39:17.170088: step: 618/470, loss: 0.005215490702539682 2023-01-24 05:39:17.959573: step: 620/470, loss: 0.011563556268811226 2023-01-24 05:39:18.712423: step: 622/470, loss: 0.03009108267724514 2023-01-24 05:39:19.492306: step: 624/470, loss: 0.47227317094802856 2023-01-24 05:39:20.228280: step: 626/470, loss: 0.000962880440056324 2023-01-24 05:39:20.928225: step: 628/470, loss: 0.004227377008646727 2023-01-24 05:39:21.595973: step: 630/470, loss: 0.05415330082178116 2023-01-24 05:39:22.279707: step: 632/470, loss: 0.02446996420621872 2023-01-24 05:39:23.029372: step: 634/470, loss: 0.10332430154085159 2023-01-24 05:39:23.705782: step: 636/470, loss: 0.0059739393182098866 2023-01-24 05:39:24.485450: step: 638/470, loss: 0.0033245279919356108 2023-01-24 05:39:25.207904: step: 640/470, loss: 0.044418323785066605 2023-01-24 05:39:25.874220: step: 642/470, loss: 0.00199850439094007 2023-01-24 05:39:26.651766: step: 644/470, loss: 0.05675657093524933 2023-01-24 05:39:27.409404: step: 646/470, loss: 0.0008988552144728601 2023-01-24 05:39:28.137567: step: 648/470, loss: 0.013591834343969822 2023-01-24 05:39:28.847283: step: 650/470, loss: 0.003277568379417062 2023-01-24 05:39:29.576840: step: 652/470, loss: 0.002477661008015275 2023-01-24 05:39:30.313206: step: 654/470, loss: 0.011621094308793545 2023-01-24 05:39:31.001459: step: 656/470, loss: 0.008883575908839703 2023-01-24 05:39:31.679626: step: 658/470, loss: 0.016700388863682747 2023-01-24 05:39:32.406312: step: 660/470, loss: 0.013254842720925808 2023-01-24 05:39:33.159959: step: 662/470, loss: 0.04380848631262779 2023-01-24 05:39:33.913561: step: 664/470, loss: 0.2254922240972519 2023-01-24 05:39:34.685627: step: 666/470, loss: 0.00549095356836915 2023-01-24 05:39:35.384371: step: 668/470, loss: 0.041242629289627075 2023-01-24 05:39:36.113720: step: 670/470, loss: 0.002172822365537286 2023-01-24 05:39:36.782321: step: 672/470, loss: 0.0004444690130185336 2023-01-24 05:39:37.653751: step: 674/470, loss: 0.21776027977466583 2023-01-24 05:39:38.487792: step: 676/470, loss: 0.04360530152916908 2023-01-24 05:39:39.271049: step: 678/470, loss: 0.01206052303314209 2023-01-24 05:39:40.068906: step: 680/470, loss: 0.1170274093747139 2023-01-24 05:39:40.825535: step: 682/470, loss: 0.05427645891904831 2023-01-24 05:39:41.538116: step: 684/470, loss: 0.025601759552955627 2023-01-24 05:39:42.323751: step: 686/470, loss: 0.027646034955978394 2023-01-24 05:39:43.010867: step: 688/470, loss: 0.18431520462036133 2023-01-24 05:39:43.761698: step: 690/470, loss: 0.0012170057743787766 2023-01-24 05:39:44.502288: step: 692/470, loss: 0.007006289437413216 2023-01-24 05:39:45.205022: step: 694/470, loss: 0.0009528612717986107 2023-01-24 05:39:45.926862: step: 696/470, loss: 0.11783985793590546 2023-01-24 05:39:46.603628: step: 698/470, loss: 0.038966577500104904 2023-01-24 05:39:47.316269: step: 700/470, loss: 0.008607025258243084 2023-01-24 05:39:48.038329: step: 702/470, loss: 0.024364376440644264 2023-01-24 05:39:48.833149: step: 704/470, loss: 0.030996840447187424 2023-01-24 05:39:49.611426: step: 706/470, loss: 0.03350379317998886 2023-01-24 05:39:50.226505: step: 708/470, loss: 0.0056647504679858685 2023-01-24 05:39:50.928765: step: 710/470, loss: 0.011958747170865536 2023-01-24 05:39:51.703400: step: 712/470, loss: 0.07865112274885178 2023-01-24 05:39:52.454993: step: 714/470, loss: 0.03213072568178177 2023-01-24 05:39:53.230159: step: 716/470, loss: 0.04514642804861069 2023-01-24 05:39:53.961311: step: 718/470, loss: 0.004229575861245394 2023-01-24 05:39:54.744850: step: 720/470, loss: 0.025142908096313477 2023-01-24 05:39:55.491360: step: 722/470, loss: 0.16131198406219482 2023-01-24 05:39:56.126435: step: 724/470, loss: 0.0029195209499448538 2023-01-24 05:39:56.868106: step: 726/470, loss: 0.06855201721191406 2023-01-24 05:39:57.550646: step: 728/470, loss: 0.006867049727588892 2023-01-24 05:39:58.303712: step: 730/470, loss: 0.004258542787283659 2023-01-24 05:39:58.971673: step: 732/470, loss: 0.0028415187261998653 2023-01-24 05:39:59.689203: step: 734/470, loss: 0.0037412350066006184 2023-01-24 05:40:00.472586: step: 736/470, loss: 0.0005636032437905669 2023-01-24 05:40:01.191132: step: 738/470, loss: 0.03270193934440613 2023-01-24 05:40:01.904765: step: 740/470, loss: 0.023219116032123566 2023-01-24 05:40:02.626528: step: 742/470, loss: 0.01399591937661171 2023-01-24 05:40:03.314968: step: 744/470, loss: 0.12446845322847366 2023-01-24 05:40:04.002702: step: 746/470, loss: 0.0018124807393178344 2023-01-24 05:40:04.816455: step: 748/470, loss: 0.025428785011172295 2023-01-24 05:40:05.492937: step: 750/470, loss: 0.005672887898981571 2023-01-24 05:40:06.197904: step: 752/470, loss: 0.002514556283131242 2023-01-24 05:40:07.027786: step: 754/470, loss: 0.016503768041729927 2023-01-24 05:40:07.913267: step: 756/470, loss: 0.001229889108799398 2023-01-24 05:40:08.537821: step: 758/470, loss: 0.008788308128714561 2023-01-24 05:40:09.330285: step: 760/470, loss: 0.030882153660058975 2023-01-24 05:40:10.030312: step: 762/470, loss: 0.027840539813041687 2023-01-24 05:40:10.807034: step: 764/470, loss: 0.0054314760491251945 2023-01-24 05:40:11.605079: step: 766/470, loss: 0.021803250536322594 2023-01-24 05:40:12.379740: step: 768/470, loss: 0.030251774936914444 2023-01-24 05:40:13.092549: step: 770/470, loss: 0.10658083856105804 2023-01-24 05:40:13.803706: step: 772/470, loss: 0.08436030149459839 2023-01-24 05:40:14.554139: step: 774/470, loss: 0.0032615482341498137 2023-01-24 05:40:15.233196: step: 776/470, loss: 0.01066649705171585 2023-01-24 05:40:16.005682: step: 778/470, loss: 0.009199859574437141 2023-01-24 05:40:16.772310: step: 780/470, loss: 0.018781311810016632 2023-01-24 05:40:17.480605: step: 782/470, loss: 0.020045407116413116 2023-01-24 05:40:18.226775: step: 784/470, loss: 0.02236953005194664 2023-01-24 05:40:19.012645: step: 786/470, loss: 0.01765470579266548 2023-01-24 05:40:19.754279: step: 788/470, loss: 0.18303386867046356 2023-01-24 05:40:20.579993: step: 790/470, loss: 0.004157851915806532 2023-01-24 05:40:21.264895: step: 792/470, loss: 0.017420660704374313 2023-01-24 05:40:22.035681: step: 794/470, loss: 0.026377053931355476 2023-01-24 05:40:22.715422: step: 796/470, loss: 0.057920631021261215 2023-01-24 05:40:23.466833: step: 798/470, loss: 0.015336936339735985 2023-01-24 05:40:24.201699: step: 800/470, loss: 0.19589649140834808 2023-01-24 05:40:24.831245: step: 802/470, loss: 0.08331488817930222 2023-01-24 05:40:25.598353: step: 804/470, loss: 0.1330551952123642 2023-01-24 05:40:26.319583: step: 806/470, loss: 0.007211843505501747 2023-01-24 05:40:27.048059: step: 808/470, loss: 0.024340027943253517 2023-01-24 05:40:27.839187: step: 810/470, loss: 0.04757057875394821 2023-01-24 05:40:28.542775: step: 812/470, loss: 0.01020081341266632 2023-01-24 05:40:29.257189: step: 814/470, loss: 0.041293397545814514 2023-01-24 05:40:29.988924: step: 816/470, loss: 0.02104293741285801 2023-01-24 05:40:30.797375: step: 818/470, loss: 0.08056701719760895 2023-01-24 05:40:31.621335: step: 820/470, loss: 0.013175521977245808 2023-01-24 05:40:32.342246: step: 822/470, loss: 0.04553241282701492 2023-01-24 05:40:33.092470: step: 824/470, loss: 0.07833508402109146 2023-01-24 05:40:33.862076: step: 826/470, loss: 0.027197131887078285 2023-01-24 05:40:34.609999: step: 828/470, loss: 0.007502416614443064 2023-01-24 05:40:35.374741: step: 830/470, loss: 0.04629238322377205 2023-01-24 05:40:36.079745: step: 832/470, loss: 0.0031321379356086254 2023-01-24 05:40:36.760678: step: 834/470, loss: 0.007129800971597433 2023-01-24 05:40:37.457645: step: 836/470, loss: 0.019213836640119553 2023-01-24 05:40:38.147964: step: 838/470, loss: 0.003881721990182996 2023-01-24 05:40:39.030770: step: 840/470, loss: 0.02087746188044548 2023-01-24 05:40:39.725393: step: 842/470, loss: 0.001440670806914568 2023-01-24 05:40:40.429390: step: 844/470, loss: 0.011802955530583858 2023-01-24 05:40:41.140526: step: 846/470, loss: 0.0013450667029246688 2023-01-24 05:40:41.837813: step: 848/470, loss: 0.03148926421999931 2023-01-24 05:40:42.593962: step: 850/470, loss: 0.08651398122310638 2023-01-24 05:40:43.448873: step: 852/470, loss: 0.03913048282265663 2023-01-24 05:40:44.145868: step: 854/470, loss: 0.04345300793647766 2023-01-24 05:40:44.857429: step: 856/470, loss: 0.0018893154338002205 2023-01-24 05:40:45.503296: step: 858/470, loss: 8.755026647122577e-05 2023-01-24 05:40:46.253503: step: 860/470, loss: 0.02182687819004059 2023-01-24 05:40:46.982286: step: 862/470, loss: 0.013897470198571682 2023-01-24 05:40:47.703705: step: 864/470, loss: 0.00483159814029932 2023-01-24 05:40:48.449765: step: 866/470, loss: 0.009607330895960331 2023-01-24 05:40:49.169433: step: 868/470, loss: 0.015445969067513943 2023-01-24 05:40:49.934530: step: 870/470, loss: 0.032280661165714264 2023-01-24 05:40:50.664561: step: 872/470, loss: 0.09819857031106949 2023-01-24 05:40:51.424554: step: 874/470, loss: 0.022332623600959778 2023-01-24 05:40:52.154058: step: 876/470, loss: 0.05316994711756706 2023-01-24 05:40:52.856217: step: 878/470, loss: 0.09301023185253143 2023-01-24 05:40:53.571167: step: 880/470, loss: 0.014414233155548573 2023-01-24 05:40:54.271292: step: 882/470, loss: 0.7137559056282043 2023-01-24 05:40:55.042229: step: 884/470, loss: 0.020797649398446083 2023-01-24 05:40:55.804364: step: 886/470, loss: 0.05245582386851311 2023-01-24 05:40:56.590711: step: 888/470, loss: 0.09029704332351685 2023-01-24 05:40:57.219907: step: 890/470, loss: 0.007554202806204557 2023-01-24 05:40:57.946103: step: 892/470, loss: 0.03437653183937073 2023-01-24 05:40:58.641993: step: 894/470, loss: 0.0020255944691598415 2023-01-24 05:40:59.394938: step: 896/470, loss: 0.11641070246696472 2023-01-24 05:41:00.149303: step: 898/470, loss: 0.11411511898040771 2023-01-24 05:41:00.857256: step: 900/470, loss: 0.028752509504556656 2023-01-24 05:41:01.564803: step: 902/470, loss: 0.020708488300442696 2023-01-24 05:41:02.315279: step: 904/470, loss: 0.06170295178890228 2023-01-24 05:41:03.299070: step: 906/470, loss: 0.030286438763141632 2023-01-24 05:41:04.090691: step: 908/470, loss: 0.011742115952074528 2023-01-24 05:41:04.802758: step: 910/470, loss: 0.008268856443464756 2023-01-24 05:41:05.524209: step: 912/470, loss: 0.07056379318237305 2023-01-24 05:41:06.166442: step: 914/470, loss: 0.0315357968211174 2023-01-24 05:41:06.928229: step: 916/470, loss: 0.032394226640462875 2023-01-24 05:41:07.690267: step: 918/470, loss: 0.012661329470574856 2023-01-24 05:41:08.378921: step: 920/470, loss: 0.06156188249588013 2023-01-24 05:41:09.144582: step: 922/470, loss: 0.0035831385757774115 2023-01-24 05:41:09.902481: step: 924/470, loss: 0.00764728058129549 2023-01-24 05:41:10.698482: step: 926/470, loss: 0.026822997257113457 2023-01-24 05:41:11.448382: step: 928/470, loss: 0.01422158908098936 2023-01-24 05:41:12.232985: step: 930/470, loss: 0.012633465230464935 2023-01-24 05:41:12.971838: step: 932/470, loss: 0.0021819123066961765 2023-01-24 05:41:13.675696: step: 934/470, loss: 0.09248220920562744 2023-01-24 05:41:14.364683: step: 936/470, loss: 0.019571691751480103 2023-01-24 05:41:15.089998: step: 938/470, loss: 0.37316635251045227 2023-01-24 05:41:15.938876: step: 940/470, loss: 0.016227828338742256 2023-01-24 05:41:16.615115: step: 942/470, loss: 0.004429019521921873 ================================================== Loss: 0.044 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.330404081733869, 'r': 0.3241345545662434, 'f1': 0.3272392916789469}, 'combined': 0.2411236886055398, 'epoch': 29} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.37146259616240523, 'r': 0.3228867182027061, 'f1': 0.34547550095762786}, 'combined': 0.2303170006384185, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3334079971329101, 'r': 0.3251835114351343, 'f1': 0.32924440062692756}, 'combined': 0.24260113730405186, 'epoch': 29} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3819903799794493, 'r': 0.3279975089631233, 'f1': 0.3529409304931694}, 'combined': 0.23529395366211286, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32414667896678967, 'r': 0.3333728652751423, 'f1': 0.3286950420954163}, 'combined': 0.24219634680714883, 'epoch': 29} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36640905493478276, 'r': 0.3297681494413045, 'f1': 0.34712436783295214}, 'combined': 0.23141624522196805, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.28289473684210525, 'r': 0.30714285714285716, 'f1': 0.29452054794520544}, 'combined': 0.19634703196347028, 'epoch': 29} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.32608695652173914, 'f1': 0.39473684210526316}, 'combined': 0.2631578947368421, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.2413793103448276, 'f1': 0.32558139534883723}, 'combined': 0.21705426356589147, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3260774491094148, 'r': 0.32422122074636306, 'f1': 0.3251466856961624}, 'combined': 0.2395817684076986, 'epoch': 19} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35730778004987124, 'r': 0.3418473472592518, 'f1': 0.34940662520847365}, 'combined': 0.23293775013898238, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.35714285714285715, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32338929219600726, 'r': 0.33811669829222013, 'f1': 0.3305890538033395}, 'combined': 0.24359193438140805, 'epoch': 26} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.348896802572895, 'r': 0.3519161018259297, 'f1': 0.35039994820389364}, 'combined': 0.23359996546926237, 'epoch': 26} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5535714285714286, 'r': 0.2672413793103448, 'f1': 0.36046511627906974}, 'combined': 0.24031007751937983, 'epoch': 26} ****************************** Epoch: 30 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:43:50.126805: step: 2/470, loss: 0.0015534983249381185 2023-01-24 05:43:50.817326: step: 4/470, loss: 0.0009303026017732918 2023-01-24 05:43:51.535209: step: 6/470, loss: 0.07198537886142731 2023-01-24 05:43:52.285675: step: 8/470, loss: 0.0074896845035254955 2023-01-24 05:43:53.070099: step: 10/470, loss: 0.0020844112150371075 2023-01-24 05:43:53.851340: step: 12/470, loss: 0.3400344252586365 2023-01-24 05:43:54.558380: step: 14/470, loss: 0.019580742344260216 2023-01-24 05:43:55.286252: step: 16/470, loss: 0.005592713598161936 2023-01-24 05:43:55.999740: step: 18/470, loss: 0.004001240245997906 2023-01-24 05:43:56.789395: step: 20/470, loss: 0.04294969141483307 2023-01-24 05:43:57.520116: step: 22/470, loss: 0.030762318521738052 2023-01-24 05:43:58.251625: step: 24/470, loss: 0.07296521216630936 2023-01-24 05:43:58.928654: step: 26/470, loss: 0.019644813612103462 2023-01-24 05:43:59.688723: step: 28/470, loss: 0.0019523368682712317 2023-01-24 05:44:00.399248: step: 30/470, loss: 0.004232059698551893 2023-01-24 05:44:01.140216: step: 32/470, loss: 0.00470116687938571 2023-01-24 05:44:01.875260: step: 34/470, loss: 3.0470375349977985e-05 2023-01-24 05:44:02.673216: step: 36/470, loss: 0.014174572192132473 2023-01-24 05:44:03.477529: step: 38/470, loss: 0.049307264387607574 2023-01-24 05:44:04.219266: step: 40/470, loss: 0.0007710535428486764 2023-01-24 05:44:04.930759: step: 42/470, loss: 0.004445178434252739 2023-01-24 05:44:05.656468: step: 44/470, loss: 0.006911000236868858 2023-01-24 05:44:06.319326: step: 46/470, loss: 0.007463586516678333 2023-01-24 05:44:07.098887: step: 48/470, loss: 0.00076903315493837 2023-01-24 05:44:07.919223: step: 50/470, loss: 0.03810786083340645 2023-01-24 05:44:08.659173: step: 52/470, loss: 0.029291216284036636 2023-01-24 05:44:09.331108: step: 54/470, loss: 0.00221418053843081 2023-01-24 05:44:10.118173: step: 56/470, loss: 0.0012072960380464792 2023-01-24 05:44:10.897915: step: 58/470, loss: 0.015327895060181618 2023-01-24 05:44:11.621799: step: 60/470, loss: 0.0036242054775357246 2023-01-24 05:44:12.376568: step: 62/470, loss: 0.039196036756038666 2023-01-24 05:44:13.067935: step: 64/470, loss: 0.004683348350226879 2023-01-24 05:44:13.776086: step: 66/470, loss: 0.005313752684742212 2023-01-24 05:44:14.526702: step: 68/470, loss: 0.003183474065735936 2023-01-24 05:44:15.201180: step: 70/470, loss: 0.042856525629758835 2023-01-24 05:44:15.825019: step: 72/470, loss: 0.015468944795429707 2023-01-24 05:44:16.474085: step: 74/470, loss: 0.003673919942229986 2023-01-24 05:44:17.140994: step: 76/470, loss: 0.0050133909098804 2023-01-24 05:44:17.830976: step: 78/470, loss: 0.0003503368643578142 2023-01-24 05:44:18.522563: step: 80/470, loss: 0.00029192844522185624 2023-01-24 05:44:19.299256: step: 82/470, loss: 0.002908646361902356 2023-01-24 05:44:20.078795: step: 84/470, loss: 0.041369758546352386 2023-01-24 05:44:20.858696: step: 86/470, loss: 0.011434613727033138 2023-01-24 05:44:21.582778: step: 88/470, loss: 0.014062466099858284 2023-01-24 05:44:22.320354: step: 90/470, loss: 0.012946287170052528 2023-01-24 05:44:23.051857: step: 92/470, loss: 0.0006630457355640829 2023-01-24 05:44:23.744339: step: 94/470, loss: 0.02848971076309681 2023-01-24 05:44:24.428839: step: 96/470, loss: 0.0033628770615905523 2023-01-24 05:44:25.177851: step: 98/470, loss: 0.02601010911166668 2023-01-24 05:44:25.900120: step: 100/470, loss: 0.13308343291282654 2023-01-24 05:44:26.666969: step: 102/470, loss: 0.02286795899271965 2023-01-24 05:44:27.499489: step: 104/470, loss: 0.006091211922466755 2023-01-24 05:44:28.195584: step: 106/470, loss: 0.0013653002679347992 2023-01-24 05:44:28.964834: step: 108/470, loss: 0.008479597978293896 2023-01-24 05:44:29.703647: step: 110/470, loss: 0.035104185342788696 2023-01-24 05:44:30.381392: step: 112/470, loss: 0.01217788178473711 2023-01-24 05:44:31.062174: step: 114/470, loss: 0.009542414918541908 2023-01-24 05:44:31.783641: step: 116/470, loss: 0.0510413832962513 2023-01-24 05:44:32.421108: step: 118/470, loss: 0.010977623052895069 2023-01-24 05:44:33.119477: step: 120/470, loss: 0.008249633945524693 2023-01-24 05:44:33.864668: step: 122/470, loss: 0.013754217885434628 2023-01-24 05:44:34.655530: step: 124/470, loss: 0.000642959144897759 2023-01-24 05:44:35.407718: step: 126/470, loss: 0.01749761775135994 2023-01-24 05:44:36.096661: step: 128/470, loss: 0.033281996846199036 2023-01-24 05:44:36.859143: step: 130/470, loss: 0.0049248565919697285 2023-01-24 05:44:37.554439: step: 132/470, loss: 0.080209881067276 2023-01-24 05:44:38.219127: step: 134/470, loss: 0.0143387196585536 2023-01-24 05:44:38.942730: step: 136/470, loss: 0.04557420313358307 2023-01-24 05:44:39.670980: step: 138/470, loss: 0.005927293561398983 2023-01-24 05:44:40.319606: step: 140/470, loss: 0.004758467432111502 2023-01-24 05:44:41.123487: step: 142/470, loss: 0.01647905260324478 2023-01-24 05:44:41.835012: step: 144/470, loss: 0.0004897841135971248 2023-01-24 05:44:42.569404: step: 146/470, loss: 0.2584097981452942 2023-01-24 05:44:43.295693: step: 148/470, loss: 0.01904984377324581 2023-01-24 05:44:43.988500: step: 150/470, loss: 0.000754874141421169 2023-01-24 05:44:44.670681: step: 152/470, loss: 0.0361163467168808 2023-01-24 05:44:45.495245: step: 154/470, loss: 0.046868957579135895 2023-01-24 05:44:46.170703: step: 156/470, loss: 0.006643644534051418 2023-01-24 05:44:46.940163: step: 158/470, loss: 0.03360613062977791 2023-01-24 05:44:47.636006: step: 160/470, loss: 0.0265851728618145 2023-01-24 05:44:48.328223: step: 162/470, loss: 0.002698094118386507 2023-01-24 05:44:49.030542: step: 164/470, loss: 0.0029194336384534836 2023-01-24 05:44:49.735809: step: 166/470, loss: 0.00015936991258058697 2023-01-24 05:44:50.477841: step: 168/470, loss: 0.011427337303757668 2023-01-24 05:44:51.195674: step: 170/470, loss: 0.0039392574690282345 2023-01-24 05:44:51.980852: step: 172/470, loss: 0.002686795312911272 2023-01-24 05:44:52.702452: step: 174/470, loss: 0.0013270019553601742 2023-01-24 05:44:53.437327: step: 176/470, loss: 0.0032820103224366903 2023-01-24 05:44:54.133082: step: 178/470, loss: 0.008707202039659023 2023-01-24 05:44:54.855009: step: 180/470, loss: 0.07832004129886627 2023-01-24 05:44:55.540316: step: 182/470, loss: 0.00024435168597847223 2023-01-24 05:44:56.225995: step: 184/470, loss: 0.019537262618541718 2023-01-24 05:44:57.025951: step: 186/470, loss: 0.024349162355065346 2023-01-24 05:44:57.748187: step: 188/470, loss: 0.026107704266905785 2023-01-24 05:44:58.448920: step: 190/470, loss: 0.2534019351005554 2023-01-24 05:44:59.145683: step: 192/470, loss: 0.022054988890886307 2023-01-24 05:44:59.785711: step: 194/470, loss: 0.00017441021918784827 2023-01-24 05:45:00.445993: step: 196/470, loss: 0.0004912808071821928 2023-01-24 05:45:01.161662: step: 198/470, loss: 0.006434681825339794 2023-01-24 05:45:01.949772: step: 200/470, loss: 0.041142772883176804 2023-01-24 05:45:02.701149: step: 202/470, loss: 0.010733246803283691 2023-01-24 05:45:03.375300: step: 204/470, loss: 0.00284022931009531 2023-01-24 05:45:04.126428: step: 206/470, loss: 0.04028492793440819 2023-01-24 05:45:04.987113: step: 208/470, loss: 0.04366816207766533 2023-01-24 05:45:05.655897: step: 210/470, loss: 0.002009680727496743 2023-01-24 05:45:06.371512: step: 212/470, loss: 0.04488565772771835 2023-01-24 05:45:07.112109: step: 214/470, loss: 0.004889700096100569 2023-01-24 05:45:07.844397: step: 216/470, loss: 0.029589461162686348 2023-01-24 05:45:08.611827: step: 218/470, loss: 0.08454018831253052 2023-01-24 05:45:09.354154: step: 220/470, loss: 0.015738148242235184 2023-01-24 05:45:10.085796: step: 222/470, loss: 3.354827404022217 2023-01-24 05:45:10.816110: step: 224/470, loss: 0.0028114912565797567 2023-01-24 05:45:11.510029: step: 226/470, loss: 0.03550800308585167 2023-01-24 05:45:12.220623: step: 228/470, loss: 0.044054701924324036 2023-01-24 05:45:12.935303: step: 230/470, loss: 0.09976823627948761 2023-01-24 05:45:13.697177: step: 232/470, loss: 0.008051842451095581 2023-01-24 05:45:14.470954: step: 234/470, loss: 0.08605451136827469 2023-01-24 05:45:15.238130: step: 236/470, loss: 0.0013634329661726952 2023-01-24 05:45:15.952326: step: 238/470, loss: 0.0263601616024971 2023-01-24 05:45:16.624835: step: 240/470, loss: 0.05547104775905609 2023-01-24 05:45:17.371427: step: 242/470, loss: 0.03622487559914589 2023-01-24 05:45:18.042975: step: 244/470, loss: 0.0007956080371513963 2023-01-24 05:45:18.777047: step: 246/470, loss: 0.0422469861805439 2023-01-24 05:45:19.592989: step: 248/470, loss: 0.019485946744680405 2023-01-24 05:45:20.289763: step: 250/470, loss: 4.62792013422586e-05 2023-01-24 05:45:21.132237: step: 252/470, loss: 0.011375891044735909 2023-01-24 05:45:21.833347: step: 254/470, loss: 0.027469327673316002 2023-01-24 05:45:22.537366: step: 256/470, loss: 0.0730309933423996 2023-01-24 05:45:23.371586: step: 258/470, loss: 0.1251501590013504 2023-01-24 05:45:24.408472: step: 260/470, loss: 0.0181076992303133 2023-01-24 05:45:25.163136: step: 262/470, loss: 0.7937953472137451 2023-01-24 05:45:25.907897: step: 264/470, loss: 0.004769394174218178 2023-01-24 05:45:26.663831: step: 266/470, loss: 0.03229885548353195 2023-01-24 05:45:27.346069: step: 268/470, loss: 0.007303445599973202 2023-01-24 05:45:28.132981: step: 270/470, loss: 0.058055225759744644 2023-01-24 05:45:28.912350: step: 272/470, loss: 0.34732547402381897 2023-01-24 05:45:29.561265: step: 274/470, loss: 0.015425494872033596 2023-01-24 05:45:30.245864: step: 276/470, loss: 0.012530090287327766 2023-01-24 05:45:31.055708: step: 278/470, loss: 0.015414786525070667 2023-01-24 05:45:31.889826: step: 280/470, loss: 0.031069021672010422 2023-01-24 05:45:32.560041: step: 282/470, loss: 0.026498055085539818 2023-01-24 05:45:33.348173: step: 284/470, loss: 0.004659529775381088 2023-01-24 05:45:34.178206: step: 286/470, loss: 0.02883639559149742 2023-01-24 05:45:34.968942: step: 288/470, loss: 0.0019614913035184145 2023-01-24 05:45:35.723410: step: 290/470, loss: 0.021054117009043694 2023-01-24 05:45:36.437503: step: 292/470, loss: 0.0282586757093668 2023-01-24 05:45:37.173445: step: 294/470, loss: 0.05488167703151703 2023-01-24 05:45:38.036843: step: 296/470, loss: 0.0710349828004837 2023-01-24 05:45:38.730036: step: 298/470, loss: 0.028987431898713112 2023-01-24 05:45:39.435601: step: 300/470, loss: 0.017589028924703598 2023-01-24 05:45:40.227839: step: 302/470, loss: 0.03203602880239487 2023-01-24 05:45:40.942960: step: 304/470, loss: 0.024194780737161636 2023-01-24 05:45:41.667312: step: 306/470, loss: 0.0027428928297013044 2023-01-24 05:45:42.311405: step: 308/470, loss: 0.08572112768888474 2023-01-24 05:45:43.180738: step: 310/470, loss: 0.06660182774066925 2023-01-24 05:45:43.935380: step: 312/470, loss: 0.09423809498548508 2023-01-24 05:45:44.729710: step: 314/470, loss: 0.03433975949883461 2023-01-24 05:45:45.584243: step: 316/470, loss: 0.04957527294754982 2023-01-24 05:45:46.296732: step: 318/470, loss: 0.0038216973189264536 2023-01-24 05:45:47.065464: step: 320/470, loss: 0.0018495884723961353 2023-01-24 05:45:47.824907: step: 322/470, loss: 0.14368680119514465 2023-01-24 05:45:48.521665: step: 324/470, loss: 0.002256029052659869 2023-01-24 05:45:49.281234: step: 326/470, loss: 0.017123982310295105 2023-01-24 05:45:49.949763: step: 328/470, loss: 0.18741732835769653 2023-01-24 05:45:50.670897: step: 330/470, loss: 0.014922713860869408 2023-01-24 05:45:51.376622: step: 332/470, loss: 0.03767160698771477 2023-01-24 05:45:52.089305: step: 334/470, loss: 0.05126776546239853 2023-01-24 05:45:52.738067: step: 336/470, loss: 0.01508275605738163 2023-01-24 05:45:53.440890: step: 338/470, loss: 0.00034520160988904536 2023-01-24 05:45:54.175513: step: 340/470, loss: 0.04148537665605545 2023-01-24 05:45:54.824769: step: 342/470, loss: 5.96605495957192e-05 2023-01-24 05:45:55.565092: step: 344/470, loss: 0.005885576829314232 2023-01-24 05:45:56.255958: step: 346/470, loss: 0.008368241600692272 2023-01-24 05:45:56.916253: step: 348/470, loss: 0.015697935596108437 2023-01-24 05:45:57.749217: step: 350/470, loss: 0.019847042858600616 2023-01-24 05:45:58.496445: step: 352/470, loss: 0.02182493358850479 2023-01-24 05:45:59.226858: step: 354/470, loss: 0.01692495495080948 2023-01-24 05:45:59.978288: step: 356/470, loss: 0.011305560357868671 2023-01-24 05:46:00.671349: step: 358/470, loss: 0.0026786348316818476 2023-01-24 05:46:01.393210: step: 360/470, loss: 0.0002164768666261807 2023-01-24 05:46:02.064802: step: 362/470, loss: 0.6910086870193481 2023-01-24 05:46:02.804900: step: 364/470, loss: 0.00040226319106295705 2023-01-24 05:46:03.509839: step: 366/470, loss: 0.016148222610354424 2023-01-24 05:46:04.201661: step: 368/470, loss: 0.027793431654572487 2023-01-24 05:46:04.959265: step: 370/470, loss: 0.006164975464344025 2023-01-24 05:46:05.649410: step: 372/470, loss: 0.01071165595203638 2023-01-24 05:46:06.337321: step: 374/470, loss: 0.0014239969896152616 2023-01-24 05:46:07.047438: step: 376/470, loss: 0.0013824844500049949 2023-01-24 05:46:07.802148: step: 378/470, loss: 0.004417424090206623 2023-01-24 05:46:08.540555: step: 380/470, loss: 0.02893391251564026 2023-01-24 05:46:09.238636: step: 382/470, loss: 0.030614320188760757 2023-01-24 05:46:09.924892: step: 384/470, loss: 0.001032169908285141 2023-01-24 05:46:10.629772: step: 386/470, loss: 0.007282666862010956 2023-01-24 05:46:11.326204: step: 388/470, loss: 0.01349354162812233 2023-01-24 05:46:12.049984: step: 390/470, loss: 0.015124008990824223 2023-01-24 05:46:12.747175: step: 392/470, loss: 0.013127674348652363 2023-01-24 05:46:13.394450: step: 394/470, loss: 0.032045748084783554 2023-01-24 05:46:14.133057: step: 396/470, loss: 0.2856377959251404 2023-01-24 05:46:14.862289: step: 398/470, loss: 0.020067989826202393 2023-01-24 05:46:15.636122: step: 400/470, loss: 0.008606786839663982 2023-01-24 05:46:16.384812: step: 402/470, loss: 0.018849369138479233 2023-01-24 05:46:17.082924: step: 404/470, loss: 0.002814099658280611 2023-01-24 05:46:17.741917: step: 406/470, loss: 0.0020936380606144667 2023-01-24 05:46:18.616924: step: 408/470, loss: 0.011628488078713417 2023-01-24 05:46:19.368575: step: 410/470, loss: 0.007815473712980747 2023-01-24 05:46:20.062329: step: 412/470, loss: 0.002352564362809062 2023-01-24 05:46:20.791089: step: 414/470, loss: 0.03523825854063034 2023-01-24 05:46:21.483974: step: 416/470, loss: 0.006999637931585312 2023-01-24 05:46:22.227888: step: 418/470, loss: 0.024556517601013184 2023-01-24 05:46:22.993479: step: 420/470, loss: 0.0016526913968846202 2023-01-24 05:46:23.729928: step: 422/470, loss: 0.013159074820578098 2023-01-24 05:46:24.386804: step: 424/470, loss: 0.023464815691113472 2023-01-24 05:46:25.155969: step: 426/470, loss: 0.0028531746938824654 2023-01-24 05:46:25.836710: step: 428/470, loss: 0.036497414112091064 2023-01-24 05:46:26.610777: step: 430/470, loss: 0.004592817276716232 2023-01-24 05:46:27.327168: step: 432/470, loss: 0.0013164383126422763 2023-01-24 05:46:28.000267: step: 434/470, loss: 0.00658793468028307 2023-01-24 05:46:28.775650: step: 436/470, loss: 0.514223575592041 2023-01-24 05:46:29.466549: step: 438/470, loss: 0.003137575928121805 2023-01-24 05:46:30.252355: step: 440/470, loss: 0.04794734716415405 2023-01-24 05:46:30.943210: step: 442/470, loss: 0.007328708656132221 2023-01-24 05:46:31.704121: step: 444/470, loss: 0.005198894999921322 2023-01-24 05:46:32.488126: step: 446/470, loss: 0.006771470420062542 2023-01-24 05:46:33.206523: step: 448/470, loss: 0.017400279641151428 2023-01-24 05:46:33.871180: step: 450/470, loss: 0.025920528918504715 2023-01-24 05:46:34.539727: step: 452/470, loss: 0.0040763262659311295 2023-01-24 05:46:35.257296: step: 454/470, loss: 0.02771720290184021 2023-01-24 05:46:36.050108: step: 456/470, loss: 0.007550915703177452 2023-01-24 05:46:36.785930: step: 458/470, loss: 0.021621685475111008 2023-01-24 05:46:37.507152: step: 460/470, loss: 0.022162331268191338 2023-01-24 05:46:38.247215: step: 462/470, loss: 0.0016908899415284395 2023-01-24 05:46:38.943330: step: 464/470, loss: 0.008422368206083775 2023-01-24 05:46:39.658254: step: 466/470, loss: 0.04641232267022133 2023-01-24 05:46:40.367323: step: 468/470, loss: 0.004679789766669273 2023-01-24 05:46:41.122811: step: 470/470, loss: 0.01792057789862156 2023-01-24 05:46:41.901310: step: 472/470, loss: 0.0020182339940220118 2023-01-24 05:46:42.695701: step: 474/470, loss: 0.011343245394527912 2023-01-24 05:46:43.471504: step: 476/470, loss: 0.08873055130243301 2023-01-24 05:46:44.255697: step: 478/470, loss: 0.007021929137408733 2023-01-24 05:46:44.929638: step: 480/470, loss: 0.0010284853633493185 2023-01-24 05:46:45.676313: step: 482/470, loss: 0.002863482804968953 2023-01-24 05:46:46.479466: step: 484/470, loss: 0.00471901660785079 2023-01-24 05:46:47.209259: step: 486/470, loss: 0.1730116307735443 2023-01-24 05:46:48.009346: step: 488/470, loss: 0.005156666971743107 2023-01-24 05:46:48.703798: step: 490/470, loss: 0.0037299960386008024 2023-01-24 05:46:49.374811: step: 492/470, loss: 0.003205210203304887 2023-01-24 05:46:50.182600: step: 494/470, loss: 0.008830721490085125 2023-01-24 05:46:50.907429: step: 496/470, loss: 0.06417898088693619 2023-01-24 05:46:51.702160: step: 498/470, loss: 0.04083579033613205 2023-01-24 05:46:52.504996: step: 500/470, loss: 3.602713108062744 2023-01-24 05:46:53.303403: step: 502/470, loss: 0.004617447033524513 2023-01-24 05:46:54.006266: step: 504/470, loss: 0.028277534991502762 2023-01-24 05:46:54.746092: step: 506/470, loss: 0.03145125135779381 2023-01-24 05:46:55.477761: step: 508/470, loss: 0.014155956916511059 2023-01-24 05:46:56.213679: step: 510/470, loss: 0.022612107917666435 2023-01-24 05:46:56.989402: step: 512/470, loss: 0.009106074459850788 2023-01-24 05:46:57.784245: step: 514/470, loss: 0.027399810031056404 2023-01-24 05:46:58.638835: step: 516/470, loss: 0.014880353584885597 2023-01-24 05:46:59.492812: step: 518/470, loss: 0.002752532484009862 2023-01-24 05:47:00.329355: step: 520/470, loss: 0.01837068982422352 2023-01-24 05:47:01.076629: step: 522/470, loss: 0.0025046353694051504 2023-01-24 05:47:01.825027: step: 524/470, loss: 0.02363811433315277 2023-01-24 05:47:02.619075: step: 526/470, loss: 0.02377389930188656 2023-01-24 05:47:03.357350: step: 528/470, loss: 0.0021252078004181385 2023-01-24 05:47:04.088902: step: 530/470, loss: 0.011902587488293648 2023-01-24 05:47:04.862634: step: 532/470, loss: 0.0007203198038041592 2023-01-24 05:47:05.566143: step: 534/470, loss: 0.015076599083840847 2023-01-24 05:47:06.361487: step: 536/470, loss: 0.021700425073504448 2023-01-24 05:47:07.069454: step: 538/470, loss: 0.08858056366443634 2023-01-24 05:47:07.816732: step: 540/470, loss: 0.02474750392138958 2023-01-24 05:47:08.490879: step: 542/470, loss: 0.009075379930436611 2023-01-24 05:47:09.188034: step: 544/470, loss: 0.014052581042051315 2023-01-24 05:47:09.955801: step: 546/470, loss: 0.005053224507719278 2023-01-24 05:47:10.715502: step: 548/470, loss: 0.00576430419459939 2023-01-24 05:47:11.491933: step: 550/470, loss: 0.005547594279050827 2023-01-24 05:47:12.279317: step: 552/470, loss: 0.007961531169712543 2023-01-24 05:47:13.027137: step: 554/470, loss: 0.0057564605958759785 2023-01-24 05:47:13.744331: step: 556/470, loss: 0.007082703057676554 2023-01-24 05:47:14.553496: step: 558/470, loss: 0.03066232055425644 2023-01-24 05:47:15.277485: step: 560/470, loss: 0.006466844584792852 2023-01-24 05:47:16.096748: step: 562/470, loss: 0.018887002021074295 2023-01-24 05:47:16.874016: step: 564/470, loss: 0.04409300163388252 2023-01-24 05:47:17.587690: step: 566/470, loss: 0.01083281822502613 2023-01-24 05:47:18.255696: step: 568/470, loss: 0.001143494970165193 2023-01-24 05:47:19.072120: step: 570/470, loss: 0.032482851296663284 2023-01-24 05:47:19.768011: step: 572/470, loss: 0.016237609088420868 2023-01-24 05:47:20.435367: step: 574/470, loss: 0.011964457109570503 2023-01-24 05:47:21.349875: step: 576/470, loss: 0.045262232422828674 2023-01-24 05:47:22.153446: step: 578/470, loss: 0.010416206903755665 2023-01-24 05:47:22.889129: step: 580/470, loss: 0.01974216289818287 2023-01-24 05:47:23.686227: step: 582/470, loss: 0.004339354112744331 2023-01-24 05:47:24.372450: step: 584/470, loss: 0.011843382380902767 2023-01-24 05:47:25.050952: step: 586/470, loss: 0.0004325766349211335 2023-01-24 05:47:25.828997: step: 588/470, loss: 0.011639975011348724 2023-01-24 05:47:26.538503: step: 590/470, loss: 0.01775071956217289 2023-01-24 05:47:27.244434: step: 592/470, loss: 0.028062820434570312 2023-01-24 05:47:27.993095: step: 594/470, loss: 0.01377648115158081 2023-01-24 05:47:28.689241: step: 596/470, loss: 0.0007976947817951441 2023-01-24 05:47:29.475135: step: 598/470, loss: 0.03537141531705856 2023-01-24 05:47:30.185986: step: 600/470, loss: 0.035397402942180634 2023-01-24 05:47:30.972165: step: 602/470, loss: 0.002034268341958523 2023-01-24 05:47:31.687447: step: 604/470, loss: 0.01001753006130457 2023-01-24 05:47:32.370700: step: 606/470, loss: 1.0455894470214844 2023-01-24 05:47:33.221415: step: 608/470, loss: 0.027688954025506973 2023-01-24 05:47:34.046217: step: 610/470, loss: 0.40734100341796875 2023-01-24 05:47:34.715321: step: 612/470, loss: 0.041962411254644394 2023-01-24 05:47:35.517208: step: 614/470, loss: 0.02194126509130001 2023-01-24 05:47:36.231592: step: 616/470, loss: 0.03260621055960655 2023-01-24 05:47:36.976918: step: 618/470, loss: 0.005211520008742809 2023-01-24 05:47:37.747516: step: 620/470, loss: 0.018071891739964485 2023-01-24 05:47:38.496197: step: 622/470, loss: 0.012008518911898136 2023-01-24 05:47:39.101851: step: 624/470, loss: 0.03403551131486893 2023-01-24 05:47:39.869132: step: 626/470, loss: 0.07104015350341797 2023-01-24 05:47:40.572127: step: 628/470, loss: 0.046591900289058685 2023-01-24 05:47:41.303439: step: 630/470, loss: 0.012893665581941605 2023-01-24 05:47:42.027540: step: 632/470, loss: 0.045894332230091095 2023-01-24 05:47:42.767473: step: 634/470, loss: 0.05242696404457092 2023-01-24 05:47:43.551932: step: 636/470, loss: 0.007960853166878223 2023-01-24 05:47:44.285797: step: 638/470, loss: 0.42619752883911133 2023-01-24 05:47:45.095747: step: 640/470, loss: 0.029724212363362312 2023-01-24 05:47:45.837306: step: 642/470, loss: 0.04799468442797661 2023-01-24 05:47:46.500938: step: 644/470, loss: 0.02126733399927616 2023-01-24 05:47:47.234474: step: 646/470, loss: 0.0056076874025166035 2023-01-24 05:47:47.968393: step: 648/470, loss: 9.23870102269575e-05 2023-01-24 05:47:48.706196: step: 650/470, loss: 0.009057758376002312 2023-01-24 05:47:49.385459: step: 652/470, loss: 0.02294195629656315 2023-01-24 05:47:50.165125: step: 654/470, loss: 0.06484882533550262 2023-01-24 05:47:50.892487: step: 656/470, loss: 0.03436193987727165 2023-01-24 05:47:51.621631: step: 658/470, loss: 0.051201097667217255 2023-01-24 05:47:52.317218: step: 660/470, loss: 0.03534897416830063 2023-01-24 05:47:53.035499: step: 662/470, loss: 0.0066528706811368465 2023-01-24 05:47:53.820488: step: 664/470, loss: 0.0037193207535892725 2023-01-24 05:47:54.573590: step: 666/470, loss: 0.0020945665892213583 2023-01-24 05:47:55.285045: step: 668/470, loss: 0.000837851723190397 2023-01-24 05:47:55.980605: step: 670/470, loss: 0.03179018944501877 2023-01-24 05:47:56.787437: step: 672/470, loss: 0.009496787562966347 2023-01-24 05:47:57.508081: step: 674/470, loss: 0.3629489243030548 2023-01-24 05:47:58.206422: step: 676/470, loss: 0.025496546179056168 2023-01-24 05:47:58.935522: step: 678/470, loss: 0.03851151093840599 2023-01-24 05:47:59.707224: step: 680/470, loss: 0.0084996921941638 2023-01-24 05:48:00.363862: step: 682/470, loss: 0.007409407291561365 2023-01-24 05:48:01.100687: step: 684/470, loss: 0.03160090744495392 2023-01-24 05:48:01.844978: step: 686/470, loss: 0.05882648006081581 2023-01-24 05:48:02.623046: step: 688/470, loss: 0.08515594899654388 2023-01-24 05:48:03.350813: step: 690/470, loss: 0.0018482680898159742 2023-01-24 05:48:04.053177: step: 692/470, loss: 0.04133886843919754 2023-01-24 05:48:04.808328: step: 694/470, loss: 0.0130581334233284 2023-01-24 05:48:05.530960: step: 696/470, loss: 0.9888176321983337 2023-01-24 05:48:06.370510: step: 698/470, loss: 0.06412041187286377 2023-01-24 05:48:07.081197: step: 700/470, loss: 0.03894779458642006 2023-01-24 05:48:07.716095: step: 702/470, loss: 0.020089661702513695 2023-01-24 05:48:08.376996: step: 704/470, loss: 0.017173096537590027 2023-01-24 05:48:09.122044: step: 706/470, loss: 0.005728584248572588 2023-01-24 05:48:09.850164: step: 708/470, loss: 0.04380139708518982 2023-01-24 05:48:10.560417: step: 710/470, loss: 0.0069374158047139645 2023-01-24 05:48:11.178155: step: 712/470, loss: 0.023490410298109055 2023-01-24 05:48:11.931785: step: 714/470, loss: 0.058307942003011703 2023-01-24 05:48:12.592321: step: 716/470, loss: 0.005838301964104176 2023-01-24 05:48:13.335178: step: 718/470, loss: 0.025616277009248734 2023-01-24 05:48:14.073661: step: 720/470, loss: 0.0021177027374505997 2023-01-24 05:48:14.782078: step: 722/470, loss: 0.07428384572267532 2023-01-24 05:48:15.536299: step: 724/470, loss: 0.14311569929122925 2023-01-24 05:48:16.314204: step: 726/470, loss: 0.007042250130325556 2023-01-24 05:48:17.046180: step: 728/470, loss: 0.0038853702135384083 2023-01-24 05:48:17.788898: step: 730/470, loss: 0.044650763273239136 2023-01-24 05:48:18.435331: step: 732/470, loss: 0.0028803348541259766 2023-01-24 05:48:19.154886: step: 734/470, loss: 0.002466881647706032 2023-01-24 05:48:19.863572: step: 736/470, loss: 0.02306767739355564 2023-01-24 05:48:20.603667: step: 738/470, loss: 0.1385963410139084 2023-01-24 05:48:21.310514: step: 740/470, loss: 0.024905715137720108 2023-01-24 05:48:22.070583: step: 742/470, loss: 0.010221011936664581 2023-01-24 05:48:22.749584: step: 744/470, loss: 0.002917324658483267 2023-01-24 05:48:23.438391: step: 746/470, loss: 0.010492140427231789 2023-01-24 05:48:24.133954: step: 748/470, loss: 0.036397598683834076 2023-01-24 05:48:24.893469: step: 750/470, loss: 0.020658204331994057 2023-01-24 05:48:25.694799: step: 752/470, loss: 0.004349207505583763 2023-01-24 05:48:26.389700: step: 754/470, loss: 0.04205322265625 2023-01-24 05:48:27.086775: step: 756/470, loss: 0.015505963005125523 2023-01-24 05:48:27.726728: step: 758/470, loss: 0.002480144612491131 2023-01-24 05:48:28.444816: step: 760/470, loss: 0.054736074060201645 2023-01-24 05:48:29.192116: step: 762/470, loss: 0.012036191299557686 2023-01-24 05:48:29.862512: step: 764/470, loss: 0.01874772645533085 2023-01-24 05:48:30.583961: step: 766/470, loss: 0.03384104743599892 2023-01-24 05:48:31.303344: step: 768/470, loss: 0.010049944743514061 2023-01-24 05:48:31.975542: step: 770/470, loss: 0.03430037200450897 2023-01-24 05:48:32.681102: step: 772/470, loss: 0.0023346322122961283 2023-01-24 05:48:33.359487: step: 774/470, loss: 0.29529932141304016 2023-01-24 05:48:34.113273: step: 776/470, loss: 0.019421333447098732 2023-01-24 05:48:34.846065: step: 778/470, loss: 0.003084076102823019 2023-01-24 05:48:35.561070: step: 780/470, loss: 0.10002864897251129 2023-01-24 05:48:36.348763: step: 782/470, loss: 0.002307226648554206 2023-01-24 05:48:37.037235: step: 784/470, loss: 0.017811257392168045 2023-01-24 05:48:37.739022: step: 786/470, loss: 0.015748564153909683 2023-01-24 05:48:38.511327: step: 788/470, loss: 0.03216710314154625 2023-01-24 05:48:39.268301: step: 790/470, loss: 0.0009349191677756608 2023-01-24 05:48:40.070613: step: 792/470, loss: 0.004412441980093718 2023-01-24 05:48:40.819265: step: 794/470, loss: 0.006835710722953081 2023-01-24 05:48:41.567301: step: 796/470, loss: 0.008032741025090218 2023-01-24 05:48:42.238978: step: 798/470, loss: 0.0019889273680746555 2023-01-24 05:48:42.961329: step: 800/470, loss: 0.008295338600873947 2023-01-24 05:48:43.735299: step: 802/470, loss: 0.01602669060230255 2023-01-24 05:48:44.521815: step: 804/470, loss: 0.03637155890464783 2023-01-24 05:48:45.216480: step: 806/470, loss: 0.040701597929000854 2023-01-24 05:48:46.036850: step: 808/470, loss: 0.005965395364910364 2023-01-24 05:48:46.745263: step: 810/470, loss: 0.010988143272697926 2023-01-24 05:48:47.550471: step: 812/470, loss: 0.09173979610204697 2023-01-24 05:48:48.211535: step: 814/470, loss: 0.004171342588961124 2023-01-24 05:48:48.943777: step: 816/470, loss: 0.00160531559959054 2023-01-24 05:48:49.734194: step: 818/470, loss: 0.11780795454978943 2023-01-24 05:48:50.474877: step: 820/470, loss: 0.005452371668070555 2023-01-24 05:48:51.154605: step: 822/470, loss: 0.005390825215727091 2023-01-24 05:48:51.847941: step: 824/470, loss: 0.0016363279428333044 2023-01-24 05:48:52.579347: step: 826/470, loss: 0.007646666374057531 2023-01-24 05:48:53.296262: step: 828/470, loss: 0.017727140337228775 2023-01-24 05:48:54.094686: step: 830/470, loss: 0.010663860477507114 2023-01-24 05:48:54.841266: step: 832/470, loss: 0.023380601778626442 2023-01-24 05:48:55.542737: step: 834/470, loss: 0.041295647621154785 2023-01-24 05:48:56.251429: step: 836/470, loss: 0.0017049266025424004 2023-01-24 05:48:57.043177: step: 838/470, loss: 0.006582122761756182 2023-01-24 05:48:57.742911: step: 840/470, loss: 0.021049687638878822 2023-01-24 05:48:58.454075: step: 842/470, loss: 0.01726703532040119 2023-01-24 05:48:59.168699: step: 844/470, loss: 0.012101834639906883 2023-01-24 05:48:59.989141: step: 846/470, loss: 0.030885327607393265 2023-01-24 05:49:00.719132: step: 848/470, loss: 0.030672363936901093 2023-01-24 05:49:01.396836: step: 850/470, loss: 0.6598572731018066 2023-01-24 05:49:02.065353: step: 852/470, loss: 0.0076820398680865765 2023-01-24 05:49:02.746642: step: 854/470, loss: 0.0031191399320960045 2023-01-24 05:49:03.516480: step: 856/470, loss: 0.004457424394786358 2023-01-24 05:49:04.203820: step: 858/470, loss: 0.15405337512493134 2023-01-24 05:49:04.888784: step: 860/470, loss: 0.010198515839874744 2023-01-24 05:49:05.611940: step: 862/470, loss: 0.12781405448913574 2023-01-24 05:49:06.329951: step: 864/470, loss: 0.006552521139383316 2023-01-24 05:49:07.026793: step: 866/470, loss: 0.005595530848950148 2023-01-24 05:49:07.717128: step: 868/470, loss: 0.008519892580807209 2023-01-24 05:49:08.348954: step: 870/470, loss: 0.0002687643573153764 2023-01-24 05:49:09.164745: step: 872/470, loss: 5.414352893829346 2023-01-24 05:49:09.889566: step: 874/470, loss: 0.03722013160586357 2023-01-24 05:49:10.518225: step: 876/470, loss: 0.05762125179171562 2023-01-24 05:49:11.188999: step: 878/470, loss: 0.006857017055153847 2023-01-24 05:49:11.980044: step: 880/470, loss: 0.009326043538749218 2023-01-24 05:49:12.681739: step: 882/470, loss: 0.033087752759456635 2023-01-24 05:49:13.412077: step: 884/470, loss: 0.052600014954805374 2023-01-24 05:49:14.066697: step: 886/470, loss: 0.024000994861125946 2023-01-24 05:49:14.774779: step: 888/470, loss: 0.031389061361551285 2023-01-24 05:49:15.414275: step: 890/470, loss: 0.018143486231565475 2023-01-24 05:49:16.217273: step: 892/470, loss: 0.0009539459133520722 2023-01-24 05:49:16.957473: step: 894/470, loss: 0.01012116763740778 2023-01-24 05:49:17.671414: step: 896/470, loss: 0.0203192550688982 2023-01-24 05:49:18.402252: step: 898/470, loss: 0.021785369142889977 2023-01-24 05:49:19.227104: step: 900/470, loss: 0.0018964793998748064 2023-01-24 05:49:20.003838: step: 902/470, loss: 0.012782180681824684 2023-01-24 05:49:20.716875: step: 904/470, loss: 0.01066429540514946 2023-01-24 05:49:21.525513: step: 906/470, loss: 0.017899200320243835 2023-01-24 05:49:22.222621: step: 908/470, loss: 0.0004175813519395888 2023-01-24 05:49:22.958658: step: 910/470, loss: 0.20423611998558044 2023-01-24 05:49:23.796634: step: 912/470, loss: 0.029853759333491325 2023-01-24 05:49:24.564266: step: 914/470, loss: 0.02101089619100094 2023-01-24 05:49:25.385350: step: 916/470, loss: 0.0083807073533535 2023-01-24 05:49:26.186735: step: 918/470, loss: 0.0004685977182816714 2023-01-24 05:49:26.899752: step: 920/470, loss: 0.006272825412452221 2023-01-24 05:49:27.678268: step: 922/470, loss: 0.007433359045535326 2023-01-24 05:49:28.435897: step: 924/470, loss: 0.02900080941617489 2023-01-24 05:49:29.106013: step: 926/470, loss: 0.0006584687507711351 2023-01-24 05:49:29.825973: step: 928/470, loss: 0.029323289170861244 2023-01-24 05:49:30.531557: step: 930/470, loss: 0.020921828225255013 2023-01-24 05:49:31.180855: step: 932/470, loss: 0.0083458935841918 2023-01-24 05:49:32.012653: step: 934/470, loss: 0.0011282124323770404 2023-01-24 05:49:32.723259: step: 936/470, loss: 0.017477432265877724 2023-01-24 05:49:33.403799: step: 938/470, loss: 0.001543686375953257 2023-01-24 05:49:34.075326: step: 940/470, loss: 0.00018178651225753129 2023-01-24 05:49:34.753390: step: 942/470, loss: 0.00588207645341754 ================================================== Loss: 0.064 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33737889840975765, 'r': 0.3252153328124419, 'f1': 0.3311854693568249}, 'combined': 0.24403139847344993, 'epoch': 30} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36645919943949934, 'r': 0.344633579492094, 'f1': 0.35521144183294046}, 'combined': 0.23680762788862691, 'epoch': 30} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3364675756027315, 'r': 0.32880607482999374, 'f1': 0.3325927090890724}, 'combined': 0.24506831196036913, 'epoch': 30} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36856148628096175, 'r': 0.34448638439133117, 'f1': 0.35611750362599376}, 'combined': 0.23741166908399577, 'epoch': 30} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3236180835970752, 'r': 0.3303601270053476, 'f1': 0.3269543525001379}, 'combined': 0.2409137334211542, 'epoch': 30} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3531791237901832, 'r': 0.34367958924059133, 'f1': 0.3483646079838905}, 'combined': 0.23224307198926028, 'epoch': 30} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2663690476190476, 'r': 0.3196428571428571, 'f1': 0.29058441558441556}, 'combined': 0.19372294372294369, 'epoch': 30} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5416666666666666, 'r': 0.2826086956521739, 'f1': 0.3714285714285714}, 'combined': 0.24761904761904757, 'epoch': 30} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4989878542510121, 'r': 0.22368421052631576, 'f1': 0.30889724310776934}, 'combined': 0.20593149540517955, 'epoch': 30} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3260774491094148, 'r': 0.32422122074636306, 'f1': 0.3251466856961624}, 'combined': 0.2395817684076986, 'epoch': 19} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35730778004987124, 'r': 0.3418473472592518, 'f1': 0.34940662520847365}, 'combined': 0.23293775013898238, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.35714285714285715, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32338929219600726, 'r': 0.33811669829222013, 'f1': 0.3305890538033395}, 'combined': 0.24359193438140805, 'epoch': 26} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.348896802572895, 'r': 0.3519161018259297, 'f1': 0.35039994820389364}, 'combined': 0.23359996546926237, 'epoch': 26} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5535714285714286, 'r': 0.2672413793103448, 'f1': 0.36046511627906974}, 'combined': 0.24031007751937983, 'epoch': 26} ****************************** Epoch: 31 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:52:07.748795: step: 2/470, loss: 0.0004153551999479532 2023-01-24 05:52:08.516743: step: 4/470, loss: 1.0925959348678589 2023-01-24 05:52:09.196759: step: 6/470, loss: 0.0014063325943425298 2023-01-24 05:52:09.832950: step: 8/470, loss: 0.0018835861701518297 2023-01-24 05:52:10.648973: step: 10/470, loss: 0.010510102845728397 2023-01-24 05:52:11.335400: step: 12/470, loss: 0.004608213901519775 2023-01-24 05:52:12.080917: step: 14/470, loss: 0.000766645884141326 2023-01-24 05:52:12.783403: step: 16/470, loss: 0.02919374406337738 2023-01-24 05:52:13.415658: step: 18/470, loss: 0.004164369311183691 2023-01-24 05:52:14.040153: step: 20/470, loss: 0.022466279566287994 2023-01-24 05:52:14.766243: step: 22/470, loss: 0.032227374613285065 2023-01-24 05:52:15.485752: step: 24/470, loss: 0.00730488495901227 2023-01-24 05:52:16.169807: step: 26/470, loss: 0.0010027121752500534 2023-01-24 05:52:16.947107: step: 28/470, loss: 0.002904940862208605 2023-01-24 05:52:17.638263: step: 30/470, loss: 0.07680755108594894 2023-01-24 05:52:18.370647: step: 32/470, loss: 0.007039623335003853 2023-01-24 05:52:19.249829: step: 34/470, loss: 0.25433337688446045 2023-01-24 05:52:19.902932: step: 36/470, loss: 0.017493372783064842 2023-01-24 05:52:20.635628: step: 38/470, loss: 0.5216097235679626 2023-01-24 05:52:21.437209: step: 40/470, loss: 0.017964819446206093 2023-01-24 05:52:22.137076: step: 42/470, loss: 0.009211939759552479 2023-01-24 05:52:22.854805: step: 44/470, loss: 0.045933887362480164 2023-01-24 05:52:23.557907: step: 46/470, loss: 0.08470865339040756 2023-01-24 05:52:24.311776: step: 48/470, loss: 0.023487238213419914 2023-01-24 05:52:25.041036: step: 50/470, loss: 0.009432869963347912 2023-01-24 05:52:25.804864: step: 52/470, loss: 0.008719773031771183 2023-01-24 05:52:26.502642: step: 54/470, loss: 0.003286207327619195 2023-01-24 05:52:27.165685: step: 56/470, loss: 0.005947456229478121 2023-01-24 05:52:27.948330: step: 58/470, loss: 0.0014269596431404352 2023-01-24 05:52:28.766871: step: 60/470, loss: 0.0015790915349498391 2023-01-24 05:52:29.500456: step: 62/470, loss: 0.018821055069565773 2023-01-24 05:52:30.188643: step: 64/470, loss: 3.195769022568129e-05 2023-01-24 05:52:30.898025: step: 66/470, loss: 0.013032572343945503 2023-01-24 05:52:31.646706: step: 68/470, loss: 0.012471756897866726 2023-01-24 05:52:32.368285: step: 70/470, loss: 0.009083814918994904 2023-01-24 05:52:33.031555: step: 72/470, loss: 0.0110135143622756 2023-01-24 05:52:33.769975: step: 74/470, loss: 0.009082856588065624 2023-01-24 05:52:34.466839: step: 76/470, loss: 0.0004449693369679153 2023-01-24 05:52:35.243709: step: 78/470, loss: 0.05842125788331032 2023-01-24 05:52:35.969329: step: 80/470, loss: 0.008214079774916172 2023-01-24 05:52:36.755915: step: 82/470, loss: 0.06217104196548462 2023-01-24 05:52:37.601418: step: 84/470, loss: 0.006593683734536171 2023-01-24 05:52:38.274756: step: 86/470, loss: 0.019678259268403053 2023-01-24 05:52:39.003092: step: 88/470, loss: 0.005078400019556284 2023-01-24 05:52:39.747354: step: 90/470, loss: 0.03903871402144432 2023-01-24 05:52:40.450448: step: 92/470, loss: 0.4988504946231842 2023-01-24 05:52:41.229370: step: 94/470, loss: 0.018317870795726776 2023-01-24 05:52:41.992752: step: 96/470, loss: 0.0024577791336923838 2023-01-24 05:52:42.634412: step: 98/470, loss: 0.0027793124318122864 2023-01-24 05:52:43.286099: step: 100/470, loss: 0.00482197804376483 2023-01-24 05:52:43.952514: step: 102/470, loss: 0.019959578290581703 2023-01-24 05:52:44.677343: step: 104/470, loss: 0.0028080667834728956 2023-01-24 05:52:45.346376: step: 106/470, loss: 0.0010208826279267669 2023-01-24 05:52:46.039577: step: 108/470, loss: 0.030457837507128716 2023-01-24 05:52:46.709677: step: 110/470, loss: 0.0025482738856226206 2023-01-24 05:52:47.429293: step: 112/470, loss: 0.0258177500218153 2023-01-24 05:52:48.157528: step: 114/470, loss: 0.0043304734863340855 2023-01-24 05:52:48.914535: step: 116/470, loss: 0.002764065284281969 2023-01-24 05:52:49.726252: step: 118/470, loss: 0.03494582325220108 2023-01-24 05:52:50.556630: step: 120/470, loss: 0.04635448753833771 2023-01-24 05:52:51.341615: step: 122/470, loss: 0.04545215889811516 2023-01-24 05:52:52.087233: step: 124/470, loss: 0.019159091636538506 2023-01-24 05:52:52.768560: step: 126/470, loss: 0.0013390732929110527 2023-01-24 05:52:53.492164: step: 128/470, loss: 0.03846125304698944 2023-01-24 05:52:54.170490: step: 130/470, loss: 0.008521667681634426 2023-01-24 05:52:54.888721: step: 132/470, loss: 0.01758314110338688 2023-01-24 05:52:55.664540: step: 134/470, loss: 0.028450246900320053 2023-01-24 05:52:56.354532: step: 136/470, loss: 0.03396439552307129 2023-01-24 05:52:57.097507: step: 138/470, loss: 0.004837017506361008 2023-01-24 05:52:57.784634: step: 140/470, loss: 0.022973332554101944 2023-01-24 05:52:58.590779: step: 142/470, loss: 0.019509391859173775 2023-01-24 05:52:59.314183: step: 144/470, loss: 0.05560195818543434 2023-01-24 05:53:00.029450: step: 146/470, loss: 0.04433672875165939 2023-01-24 05:53:00.737425: step: 148/470, loss: 0.8889219164848328 2023-01-24 05:53:01.450572: step: 150/470, loss: 0.006953278090804815 2023-01-24 05:53:02.212505: step: 152/470, loss: 0.0006373568321578205 2023-01-24 05:53:02.900249: step: 154/470, loss: 0.005718933418393135 2023-01-24 05:53:03.674749: step: 156/470, loss: 0.007636924274265766 2023-01-24 05:53:04.383750: step: 158/470, loss: 0.04812711477279663 2023-01-24 05:53:05.144729: step: 160/470, loss: 0.0677870586514473 2023-01-24 05:53:05.856768: step: 162/470, loss: 0.009034083224833012 2023-01-24 05:53:06.753052: step: 164/470, loss: 0.010354172438383102 2023-01-24 05:53:07.445686: step: 166/470, loss: 0.004512510262429714 2023-01-24 05:53:08.218728: step: 168/470, loss: 0.01185314916074276 2023-01-24 05:53:08.988783: step: 170/470, loss: 0.00713575491681695 2023-01-24 05:53:09.631619: step: 172/470, loss: 0.000432536966400221 2023-01-24 05:53:10.320935: step: 174/470, loss: 0.0017447196878492832 2023-01-24 05:53:11.005637: step: 176/470, loss: 0.0017614453099668026 2023-01-24 05:53:11.728059: step: 178/470, loss: 0.12065732479095459 2023-01-24 05:53:12.480890: step: 180/470, loss: 0.00911496952176094 2023-01-24 05:53:13.237248: step: 182/470, loss: 0.050489142537117004 2023-01-24 05:53:13.969524: step: 184/470, loss: 0.0030859841499477625 2023-01-24 05:53:14.711297: step: 186/470, loss: 0.02064261958003044 2023-01-24 05:53:15.461457: step: 188/470, loss: 0.025169501081109047 2023-01-24 05:53:16.338542: step: 190/470, loss: 0.23960715532302856 2023-01-24 05:53:17.089681: step: 192/470, loss: 0.04839571937918663 2023-01-24 05:53:17.905684: step: 194/470, loss: 0.0479017049074173 2023-01-24 05:53:18.567333: step: 196/470, loss: 0.0008035176433622837 2023-01-24 05:53:19.271572: step: 198/470, loss: 0.0024430553894490004 2023-01-24 05:53:20.018863: step: 200/470, loss: 0.0055528427474200726 2023-01-24 05:53:20.774498: step: 202/470, loss: 0.014087006449699402 2023-01-24 05:53:21.474605: step: 204/470, loss: 0.007186644244939089 2023-01-24 05:53:22.215038: step: 206/470, loss: 0.017961587756872177 2023-01-24 05:53:22.894354: step: 208/470, loss: 0.027364250272512436 2023-01-24 05:53:23.551000: step: 210/470, loss: 0.004035270307213068 2023-01-24 05:53:24.236852: step: 212/470, loss: 0.00933856051415205 2023-01-24 05:53:25.026781: step: 214/470, loss: 0.006368427537381649 2023-01-24 05:53:25.733098: step: 216/470, loss: 0.04675932601094246 2023-01-24 05:53:26.463231: step: 218/470, loss: 0.008699115365743637 2023-01-24 05:53:27.226313: step: 220/470, loss: 0.005315710324794054 2023-01-24 05:53:27.879440: step: 222/470, loss: 0.0007220212719403207 2023-01-24 05:53:28.622112: step: 224/470, loss: 4.652983079722617e-06 2023-01-24 05:53:29.420527: step: 226/470, loss: 0.020241660997271538 2023-01-24 05:53:30.054665: step: 228/470, loss: 0.0002864209236577153 2023-01-24 05:53:30.729555: step: 230/470, loss: 0.0019567871931940317 2023-01-24 05:53:31.560684: step: 232/470, loss: 0.011841587722301483 2023-01-24 05:53:32.289635: step: 234/470, loss: 0.09722713381052017 2023-01-24 05:53:33.024484: step: 236/470, loss: 0.02826106734573841 2023-01-24 05:53:33.778508: step: 238/470, loss: 0.04371390491724014 2023-01-24 05:53:34.469999: step: 240/470, loss: 0.02666950784623623 2023-01-24 05:53:35.253700: step: 242/470, loss: 0.02603962831199169 2023-01-24 05:53:35.977409: step: 244/470, loss: 0.0011542732827365398 2023-01-24 05:53:36.729052: step: 246/470, loss: 0.04242149740457535 2023-01-24 05:53:37.514097: step: 248/470, loss: 0.005671241320669651 2023-01-24 05:53:38.276639: step: 250/470, loss: 0.06051589921116829 2023-01-24 05:53:38.908320: step: 252/470, loss: 0.022702636197209358 2023-01-24 05:53:39.618558: step: 254/470, loss: 0.00014661716704722494 2023-01-24 05:53:40.355357: step: 256/470, loss: 0.024693626910448074 2023-01-24 05:53:41.073016: step: 258/470, loss: 0.006267967633903027 2023-01-24 05:53:41.826486: step: 260/470, loss: 0.010373245924711227 2023-01-24 05:53:42.561970: step: 262/470, loss: 0.013863403350114822 2023-01-24 05:53:43.275077: step: 264/470, loss: 0.011223852634429932 2023-01-24 05:53:44.007309: step: 266/470, loss: 0.13165761530399323 2023-01-24 05:53:44.797804: step: 268/470, loss: 0.010476339608430862 2023-01-24 05:53:45.503199: step: 270/470, loss: 0.015369415283203125 2023-01-24 05:53:46.240155: step: 272/470, loss: 0.0017211531521752477 2023-01-24 05:53:46.948421: step: 274/470, loss: 5.57665407541208e-05 2023-01-24 05:53:47.632377: step: 276/470, loss: 0.0009223352535627782 2023-01-24 05:53:48.350364: step: 278/470, loss: 1.025863821269013e-05 2023-01-24 05:53:49.097737: step: 280/470, loss: 2.5038185119628906 2023-01-24 05:53:49.834224: step: 282/470, loss: 0.014256109483540058 2023-01-24 05:53:50.534901: step: 284/470, loss: 0.00037505527143366635 2023-01-24 05:53:51.214435: step: 286/470, loss: 0.002377528930082917 2023-01-24 05:53:51.892292: step: 288/470, loss: 0.025805901736021042 2023-01-24 05:53:52.641791: step: 290/470, loss: 0.1646108329296112 2023-01-24 05:53:53.376413: step: 292/470, loss: 0.004207650665193796 2023-01-24 05:53:54.211687: step: 294/470, loss: 0.006044092588126659 2023-01-24 05:53:54.964892: step: 296/470, loss: 0.0017826639814302325 2023-01-24 05:53:55.642062: step: 298/470, loss: 0.0007332692039199173 2023-01-24 05:53:56.393575: step: 300/470, loss: 0.01916944980621338 2023-01-24 05:53:57.071709: step: 302/470, loss: 0.015175165608525276 2023-01-24 05:53:57.719576: step: 304/470, loss: 0.0002173801331082359 2023-01-24 05:53:58.403187: step: 306/470, loss: 0.010625113733112812 2023-01-24 05:53:59.075156: step: 308/470, loss: 0.00954343844205141 2023-01-24 05:53:59.796714: step: 310/470, loss: 0.08960520476102829 2023-01-24 05:54:00.509673: step: 312/470, loss: 0.008206892758607864 2023-01-24 05:54:01.306964: step: 314/470, loss: 0.0023078385274857283 2023-01-24 05:54:01.989234: step: 316/470, loss: 0.02238607406616211 2023-01-24 05:54:02.730528: step: 318/470, loss: 0.0010710656642913818 2023-01-24 05:54:03.390108: step: 320/470, loss: 5.481565312948078e-05 2023-01-24 05:54:04.160192: step: 322/470, loss: 0.0008377675549127162 2023-01-24 05:54:04.930390: step: 324/470, loss: 0.016996121034026146 2023-01-24 05:54:05.675533: step: 326/470, loss: 0.03880900889635086 2023-01-24 05:54:06.400059: step: 328/470, loss: 0.019622113555669785 2023-01-24 05:54:07.148290: step: 330/470, loss: 0.017126567661762238 2023-01-24 05:54:07.911659: step: 332/470, loss: 0.004321925342082977 2023-01-24 05:54:08.586541: step: 334/470, loss: 0.00889088585972786 2023-01-24 05:54:09.327672: step: 336/470, loss: 0.014416373334825039 2023-01-24 05:54:10.135708: step: 338/470, loss: 0.05695127323269844 2023-01-24 05:54:10.874310: step: 340/470, loss: 0.017850443720817566 2023-01-24 05:54:11.585471: step: 342/470, loss: 0.028859199956059456 2023-01-24 05:54:12.246971: step: 344/470, loss: 0.03185954689979553 2023-01-24 05:54:12.961470: step: 346/470, loss: 0.004228611942380667 2023-01-24 05:54:13.846918: step: 348/470, loss: 0.052207764238119125 2023-01-24 05:54:14.551055: step: 350/470, loss: 0.2492973804473877 2023-01-24 05:54:15.410996: step: 352/470, loss: 0.002187067177146673 2023-01-24 05:54:16.123650: step: 354/470, loss: 0.004828231874853373 2023-01-24 05:54:16.887236: step: 356/470, loss: 0.030843263491988182 2023-01-24 05:54:17.598560: step: 358/470, loss: 0.004479340277612209 2023-01-24 05:54:18.405077: step: 360/470, loss: 0.010255182161927223 2023-01-24 05:54:19.078202: step: 362/470, loss: 0.0042448281310498714 2023-01-24 05:54:19.738166: step: 364/470, loss: 0.0041470276191830635 2023-01-24 05:54:20.388538: step: 366/470, loss: 0.004188673570752144 2023-01-24 05:54:21.162268: step: 368/470, loss: 0.004912644159048796 2023-01-24 05:54:21.863039: step: 370/470, loss: 0.010912226513028145 2023-01-24 05:54:22.707730: step: 372/470, loss: 0.017633339390158653 2023-01-24 05:54:23.455652: step: 374/470, loss: 0.07342100143432617 2023-01-24 05:54:24.198765: step: 376/470, loss: 0.003536728210747242 2023-01-24 05:54:24.879335: step: 378/470, loss: 0.0003867686027660966 2023-01-24 05:54:25.558108: step: 380/470, loss: 0.017503436654806137 2023-01-24 05:54:26.395816: step: 382/470, loss: 0.0033323050010949373 2023-01-24 05:54:27.071036: step: 384/470, loss: 0.0019946997053921223 2023-01-24 05:54:27.784212: step: 386/470, loss: 0.680500328540802 2023-01-24 05:54:28.495932: step: 388/470, loss: 0.01015088427811861 2023-01-24 05:54:29.226234: step: 390/470, loss: 0.0255475752055645 2023-01-24 05:54:29.942187: step: 392/470, loss: 0.011009021662175655 2023-01-24 05:54:30.667950: step: 394/470, loss: 0.0023041116073727608 2023-01-24 05:54:31.418066: step: 396/470, loss: 0.012929693795740604 2023-01-24 05:54:32.123669: step: 398/470, loss: 0.0013820825843140483 2023-01-24 05:54:32.909349: step: 400/470, loss: 0.03162797540426254 2023-01-24 05:54:33.643811: step: 402/470, loss: 0.23524107038974762 2023-01-24 05:54:34.399285: step: 404/470, loss: 0.008160697296261787 2023-01-24 05:54:35.108787: step: 406/470, loss: 0.024923594668507576 2023-01-24 05:54:35.863449: step: 408/470, loss: 0.0012302573304623365 2023-01-24 05:54:36.590769: step: 410/470, loss: 0.039357952773571014 2023-01-24 05:54:37.319460: step: 412/470, loss: 0.02063934877514839 2023-01-24 05:54:38.044249: step: 414/470, loss: 0.019131643697619438 2023-01-24 05:54:38.822519: step: 416/470, loss: 0.029757630079984665 2023-01-24 05:54:39.580895: step: 418/470, loss: 0.03168037533760071 2023-01-24 05:54:40.398778: step: 420/470, loss: 0.007195422891527414 2023-01-24 05:54:41.136142: step: 422/470, loss: 0.04357065260410309 2023-01-24 05:54:41.923148: step: 424/470, loss: 0.032375071197748184 2023-01-24 05:54:42.645833: step: 426/470, loss: 0.021866897121071815 2023-01-24 05:54:43.333993: step: 428/470, loss: 0.021189482882618904 2023-01-24 05:54:44.222867: step: 430/470, loss: 0.05813028663396835 2023-01-24 05:54:44.991810: step: 432/470, loss: 0.059212926775217056 2023-01-24 05:54:45.720204: step: 434/470, loss: 0.012862597592175007 2023-01-24 05:54:46.548205: step: 436/470, loss: 0.0023115493822842836 2023-01-24 05:54:47.254032: step: 438/470, loss: 0.08521554619073868 2023-01-24 05:54:47.959333: step: 440/470, loss: 0.6096815466880798 2023-01-24 05:54:48.716213: step: 442/470, loss: 0.012424707412719727 2023-01-24 05:54:49.480090: step: 444/470, loss: 0.00018685971735976636 2023-01-24 05:54:50.238474: step: 446/470, loss: 0.0018285795813426375 2023-01-24 05:54:50.978183: step: 448/470, loss: 0.15367041528224945 2023-01-24 05:54:51.643865: step: 450/470, loss: 0.15135274827480316 2023-01-24 05:54:52.314818: step: 452/470, loss: 0.0034649712033569813 2023-01-24 05:54:53.099237: step: 454/470, loss: 0.1291857212781906 2023-01-24 05:54:53.878017: step: 456/470, loss: 0.010504554957151413 2023-01-24 05:54:54.567401: step: 458/470, loss: 0.005182147957384586 2023-01-24 05:54:55.360804: step: 460/470, loss: 0.012676097452640533 2023-01-24 05:54:56.068930: step: 462/470, loss: 0.013613465242087841 2023-01-24 05:54:56.831355: step: 464/470, loss: 0.07927101105451584 2023-01-24 05:54:57.649274: step: 466/470, loss: 0.0009735323255881667 2023-01-24 05:54:58.424595: step: 468/470, loss: 0.02965150959789753 2023-01-24 05:54:59.237837: step: 470/470, loss: 0.000922163191717118 2023-01-24 05:55:00.004604: step: 472/470, loss: 0.04083675891160965 2023-01-24 05:55:00.728814: step: 474/470, loss: 0.0027813774067908525 2023-01-24 05:55:01.494029: step: 476/470, loss: 0.021299488842487335 2023-01-24 05:55:02.251975: step: 478/470, loss: 0.006957915611565113 2023-01-24 05:55:03.299831: step: 480/470, loss: 0.01972772739827633 2023-01-24 05:55:03.979563: step: 482/470, loss: 0.006241174414753914 2023-01-24 05:55:04.771263: step: 484/470, loss: 0.014599893242120743 2023-01-24 05:55:05.500300: step: 486/470, loss: 0.005494068842381239 2023-01-24 05:55:06.289489: step: 488/470, loss: 0.015743980184197426 2023-01-24 05:55:07.071169: step: 490/470, loss: 0.009499004110693932 2023-01-24 05:55:07.847663: step: 492/470, loss: 0.00025713659124448895 2023-01-24 05:55:08.718549: step: 494/470, loss: 0.024208668619394302 2023-01-24 05:55:09.524754: step: 496/470, loss: 0.008801139891147614 2023-01-24 05:55:10.268271: step: 498/470, loss: 0.007869623601436615 2023-01-24 05:55:10.957267: step: 500/470, loss: 0.0006736805662512779 2023-01-24 05:55:11.675832: step: 502/470, loss: 0.0008966495515778661 2023-01-24 05:55:12.468336: step: 504/470, loss: 0.03283290937542915 2023-01-24 05:55:13.194617: step: 506/470, loss: 0.013244382105767727 2023-01-24 05:55:13.914126: step: 508/470, loss: 0.09672006964683533 2023-01-24 05:55:14.628389: step: 510/470, loss: 0.03322713449597359 2023-01-24 05:55:15.372600: step: 512/470, loss: 0.004942721221596003 2023-01-24 05:55:16.140223: step: 514/470, loss: 0.02580172009766102 2023-01-24 05:55:16.889897: step: 516/470, loss: 0.042521312832832336 2023-01-24 05:55:17.583942: step: 518/470, loss: 0.050515204668045044 2023-01-24 05:55:18.382550: step: 520/470, loss: 0.15628911554813385 2023-01-24 05:55:19.130046: step: 522/470, loss: 0.0003710964519996196 2023-01-24 05:55:19.811338: step: 524/470, loss: 0.005137452390044928 2023-01-24 05:55:20.542164: step: 526/470, loss: 0.03880662843585014 2023-01-24 05:55:21.315473: step: 528/470, loss: 0.0028388279024511576 2023-01-24 05:55:22.045798: step: 530/470, loss: 0.00452554551884532 2023-01-24 05:55:22.847734: step: 532/470, loss: 0.07652177661657333 2023-01-24 05:55:23.614584: step: 534/470, loss: 0.017907770350575447 2023-01-24 05:55:24.366022: step: 536/470, loss: 0.017264176160097122 2023-01-24 05:55:25.136203: step: 538/470, loss: 0.017725540325045586 2023-01-24 05:55:25.858607: step: 540/470, loss: 0.0008670427487231791 2023-01-24 05:55:26.639201: step: 542/470, loss: 0.001019967021420598 2023-01-24 05:55:27.434630: step: 544/470, loss: 0.0039613074623048306 2023-01-24 05:55:28.247562: step: 546/470, loss: 0.01247483305633068 2023-01-24 05:55:28.933828: step: 548/470, loss: 0.002028008922934532 2023-01-24 05:55:29.656220: step: 550/470, loss: 0.004361606668680906 2023-01-24 05:55:30.345189: step: 552/470, loss: 0.011579647660255432 2023-01-24 05:55:31.044889: step: 554/470, loss: 0.049814943224191666 2023-01-24 05:55:31.729424: step: 556/470, loss: 0.0027671053539961576 2023-01-24 05:55:32.457026: step: 558/470, loss: 0.017059145495295525 2023-01-24 05:55:33.198611: step: 560/470, loss: 0.0030047514010220766 2023-01-24 05:55:33.917491: step: 562/470, loss: 0.011335933580994606 2023-01-24 05:55:34.693656: step: 564/470, loss: 0.03579147905111313 2023-01-24 05:55:35.432698: step: 566/470, loss: 0.021534211933612823 2023-01-24 05:55:36.211387: step: 568/470, loss: 0.03905900940299034 2023-01-24 05:55:36.933598: step: 570/470, loss: 0.0442798025906086 2023-01-24 05:55:37.720093: step: 572/470, loss: 0.001077734399586916 2023-01-24 05:55:38.462911: step: 574/470, loss: 0.030320877209305763 2023-01-24 05:55:39.111604: step: 576/470, loss: 0.00816918071359396 2023-01-24 05:55:39.858458: step: 578/470, loss: 0.013305963017046452 2023-01-24 05:55:40.581659: step: 580/470, loss: 0.009787117131054401 2023-01-24 05:55:41.204360: step: 582/470, loss: 0.007002311293035746 2023-01-24 05:55:41.991981: step: 584/470, loss: 0.035149309784173965 2023-01-24 05:55:42.691862: step: 586/470, loss: 0.00833014864474535 2023-01-24 05:55:43.480871: step: 588/470, loss: 0.020918749272823334 2023-01-24 05:55:44.185694: step: 590/470, loss: 0.14545173943042755 2023-01-24 05:55:44.875664: step: 592/470, loss: 0.03052116557955742 2023-01-24 05:55:45.647406: step: 594/470, loss: 0.012451532296836376 2023-01-24 05:55:46.380593: step: 596/470, loss: 0.15833701193332672 2023-01-24 05:55:47.157013: step: 598/470, loss: 0.0005307383253239095 2023-01-24 05:55:47.866284: step: 600/470, loss: 0.0017066209111362696 2023-01-24 05:55:48.603267: step: 602/470, loss: 0.03470804542303085 2023-01-24 05:55:49.355809: step: 604/470, loss: 0.0007092714658938348 2023-01-24 05:55:50.087641: step: 606/470, loss: 0.001324977376498282 2023-01-24 05:55:50.772015: step: 608/470, loss: 0.024211524054408073 2023-01-24 05:55:51.534754: step: 610/470, loss: 0.00011712490959325805 2023-01-24 05:55:52.282411: step: 612/470, loss: 7.496851139876526e-06 2023-01-24 05:55:53.054813: step: 614/470, loss: 0.07053697854280472 2023-01-24 05:55:53.765098: step: 616/470, loss: 0.05512640252709389 2023-01-24 05:55:54.513910: step: 618/470, loss: 0.03331885486841202 2023-01-24 05:55:55.284415: step: 620/470, loss: 0.0004636533558368683 2023-01-24 05:55:56.036729: step: 622/470, loss: 3.8023954402888194e-06 2023-01-24 05:55:56.734668: step: 624/470, loss: 0.01048552617430687 2023-01-24 05:55:57.401684: step: 626/470, loss: 0.017999017611145973 2023-01-24 05:55:58.058411: step: 628/470, loss: 0.044697657227516174 2023-01-24 05:55:58.920468: step: 630/470, loss: 0.017182713374495506 2023-01-24 05:55:59.606621: step: 632/470, loss: 0.03972567990422249 2023-01-24 05:56:00.337539: step: 634/470, loss: 0.018354123458266258 2023-01-24 05:56:01.067967: step: 636/470, loss: 0.0014592665247619152 2023-01-24 05:56:01.774689: step: 638/470, loss: 0.003360056085512042 2023-01-24 05:56:02.552322: step: 640/470, loss: 0.054131921380758286 2023-01-24 05:56:03.260435: step: 642/470, loss: 0.00018125410133507103 2023-01-24 05:56:03.915176: step: 644/470, loss: 0.00046586460666731 2023-01-24 05:56:04.570881: step: 646/470, loss: 0.003081733826547861 2023-01-24 05:56:05.329522: step: 648/470, loss: 0.0019446099177002907 2023-01-24 05:56:06.096983: step: 650/470, loss: 0.036725327372550964 2023-01-24 05:56:06.878773: step: 652/470, loss: 0.011674618348479271 2023-01-24 05:56:07.708276: step: 654/470, loss: 0.023591268807649612 2023-01-24 05:56:08.499483: step: 656/470, loss: 0.13545000553131104 2023-01-24 05:56:09.136541: step: 658/470, loss: 0.02520488202571869 2023-01-24 05:56:09.863454: step: 660/470, loss: 0.017133207991719246 2023-01-24 05:56:10.533284: step: 662/470, loss: 0.01911136694252491 2023-01-24 05:56:11.203849: step: 664/470, loss: 0.007725501898676157 2023-01-24 05:56:11.918159: step: 666/470, loss: 0.009709802456200123 2023-01-24 05:56:12.704206: step: 668/470, loss: 0.0017183064483106136 2023-01-24 05:56:13.468085: step: 670/470, loss: 0.026015179231762886 2023-01-24 05:56:14.243522: step: 672/470, loss: 0.008718994446098804 2023-01-24 05:56:14.934247: step: 674/470, loss: 0.0018770707538351417 2023-01-24 05:56:15.766895: step: 676/470, loss: 0.007993071340024471 2023-01-24 05:56:16.463984: step: 678/470, loss: 0.0003845719911623746 2023-01-24 05:56:17.282763: step: 680/470, loss: 0.0029241188894957304 2023-01-24 05:56:18.052837: step: 682/470, loss: 0.2095903605222702 2023-01-24 05:56:18.750499: step: 684/470, loss: 0.04601133614778519 2023-01-24 05:56:19.603338: step: 686/470, loss: 0.04851691052317619 2023-01-24 05:56:20.225313: step: 688/470, loss: 0.0033728540875017643 2023-01-24 05:56:20.921129: step: 690/470, loss: 0.0015807858435437083 2023-01-24 05:56:21.765767: step: 692/470, loss: 0.010904110968112946 2023-01-24 05:56:22.475820: step: 694/470, loss: 0.07372532784938812 2023-01-24 05:56:23.207053: step: 696/470, loss: 0.0032748605590313673 2023-01-24 05:56:23.915568: step: 698/470, loss: 0.0914636179804802 2023-01-24 05:56:24.601202: step: 700/470, loss: 0.0038345667999237776 2023-01-24 05:56:25.339094: step: 702/470, loss: 0.09439164400100708 2023-01-24 05:56:26.115376: step: 704/470, loss: 0.12708623707294464 2023-01-24 05:56:26.804745: step: 706/470, loss: 0.02249508537352085 2023-01-24 05:56:27.509164: step: 708/470, loss: 0.024676885455846786 2023-01-24 05:56:28.137883: step: 710/470, loss: 0.00023603100271429867 2023-01-24 05:56:28.864940: step: 712/470, loss: 0.07618135958909988 2023-01-24 05:56:29.585776: step: 714/470, loss: 0.0014070416800677776 2023-01-24 05:56:30.425737: step: 716/470, loss: 0.038883935660123825 2023-01-24 05:56:31.149192: step: 718/470, loss: 0.0010557807981967926 2023-01-24 05:56:31.924238: step: 720/470, loss: 0.00078931002644822 2023-01-24 05:56:32.607997: step: 722/470, loss: 0.0017666048370301723 2023-01-24 05:56:33.398020: step: 724/470, loss: 0.0047446005046367645 2023-01-24 05:56:34.116960: step: 726/470, loss: 0.03437475860118866 2023-01-24 05:56:34.889002: step: 728/470, loss: 0.008103154599666595 2023-01-24 05:56:35.649364: step: 730/470, loss: 0.15290933847427368 2023-01-24 05:56:36.370164: step: 732/470, loss: 0.005006398539990187 2023-01-24 05:56:37.101222: step: 734/470, loss: 0.03129494935274124 2023-01-24 05:56:37.909085: step: 736/470, loss: 0.1397247016429901 2023-01-24 05:56:38.636492: step: 738/470, loss: 0.018076607957482338 2023-01-24 05:56:39.352841: step: 740/470, loss: 0.012959785759449005 2023-01-24 05:56:40.064527: step: 742/470, loss: 0.0037444073241204023 2023-01-24 05:56:40.764494: step: 744/470, loss: 0.05442821606993675 2023-01-24 05:56:41.487951: step: 746/470, loss: 0.012656944803893566 2023-01-24 05:56:42.221215: step: 748/470, loss: 0.002542155794799328 2023-01-24 05:56:42.888025: step: 750/470, loss: 0.20244351029396057 2023-01-24 05:56:43.525918: step: 752/470, loss: 0.006245005410164595 2023-01-24 05:56:44.239769: step: 754/470, loss: 0.06883977353572845 2023-01-24 05:56:44.941127: step: 756/470, loss: 0.3660793900489807 2023-01-24 05:56:45.684856: step: 758/470, loss: 0.01697465591132641 2023-01-24 05:56:46.528341: step: 760/470, loss: 0.0022616013884544373 2023-01-24 05:56:47.240595: step: 762/470, loss: 0.013554520905017853 2023-01-24 05:56:47.997170: step: 764/470, loss: 0.0018667317926883698 2023-01-24 05:56:48.779093: step: 766/470, loss: 0.02620398811995983 2023-01-24 05:56:49.455757: step: 768/470, loss: 0.01824253797531128 2023-01-24 05:56:50.188065: step: 770/470, loss: 0.029288796707987785 2023-01-24 05:56:50.858831: step: 772/470, loss: 0.0019241668051108718 2023-01-24 05:56:51.541223: step: 774/470, loss: 0.009944219142198563 2023-01-24 05:56:52.276275: step: 776/470, loss: 0.02224019728600979 2023-01-24 05:56:52.905130: step: 778/470, loss: 0.0010763780446723104 2023-01-24 05:56:53.747266: step: 780/470, loss: 0.026158837601542473 2023-01-24 05:56:54.498275: step: 782/470, loss: 0.053973760455846786 2023-01-24 05:56:55.256461: step: 784/470, loss: 0.02082950621843338 2023-01-24 05:56:56.009513: step: 786/470, loss: 0.1821128875017166 2023-01-24 05:56:56.681434: step: 788/470, loss: 0.0018857375252991915 2023-01-24 05:56:57.400830: step: 790/470, loss: 0.01713024079799652 2023-01-24 05:56:58.044499: step: 792/470, loss: 0.012649615295231342 2023-01-24 05:56:58.764436: step: 794/470, loss: 0.15321439504623413 2023-01-24 05:56:59.650083: step: 796/470, loss: 0.03709512576460838 2023-01-24 05:57:00.471569: step: 798/470, loss: 0.00017801785725168884 2023-01-24 05:57:01.254972: step: 800/470, loss: 0.02271411009132862 2023-01-24 05:57:01.987733: step: 802/470, loss: 0.005088508129119873 2023-01-24 05:57:02.692412: step: 804/470, loss: 0.03457785025238991 2023-01-24 05:57:03.428048: step: 806/470, loss: 0.007744433358311653 2023-01-24 05:57:04.187668: step: 808/470, loss: 0.0011210455559194088 2023-01-24 05:57:05.024918: step: 810/470, loss: 0.004407666157931089 2023-01-24 05:57:05.861946: step: 812/470, loss: 0.024257738143205643 2023-01-24 05:57:06.662239: step: 814/470, loss: 0.008528418838977814 2023-01-24 05:57:07.391235: step: 816/470, loss: 0.002460848307237029 2023-01-24 05:57:08.196399: step: 818/470, loss: 0.0038715063128620386 2023-01-24 05:57:08.972784: step: 820/470, loss: 0.0031947391107678413 2023-01-24 05:57:09.738558: step: 822/470, loss: 0.021919481456279755 2023-01-24 05:57:10.516009: step: 824/470, loss: 0.00419453764334321 2023-01-24 05:57:11.239112: step: 826/470, loss: 0.18099209666252136 2023-01-24 05:57:11.887618: step: 828/470, loss: 0.0055846464820206165 2023-01-24 05:57:12.689184: step: 830/470, loss: 0.019906627014279366 2023-01-24 05:57:13.372944: step: 832/470, loss: 0.011373650282621384 2023-01-24 05:57:14.091526: step: 834/470, loss: 0.05863653123378754 2023-01-24 05:57:14.774529: step: 836/470, loss: 0.008630829863250256 2023-01-24 05:57:15.533602: step: 838/470, loss: 0.05342869088053703 2023-01-24 05:57:16.382390: step: 840/470, loss: 0.008590944111347198 2023-01-24 05:57:17.183732: step: 842/470, loss: 0.019462179392576218 2023-01-24 05:57:17.940490: step: 844/470, loss: 0.0030498160049319267 2023-01-24 05:57:18.715252: step: 846/470, loss: 0.02540343627333641 2023-01-24 05:57:19.547299: step: 848/470, loss: 0.015893662348389626 2023-01-24 05:57:20.201893: step: 850/470, loss: 0.01733972690999508 2023-01-24 05:57:21.058205: step: 852/470, loss: 0.006161740515381098 2023-01-24 05:57:21.814794: step: 854/470, loss: 0.0038143827114254236 2023-01-24 05:57:22.625016: step: 856/470, loss: 0.08798175305128098 2023-01-24 05:57:23.418997: step: 858/470, loss: 0.008321262896060944 2023-01-24 05:57:24.146010: step: 860/470, loss: 0.0049756853841245174 2023-01-24 05:57:24.927767: step: 862/470, loss: 0.03980034962296486 2023-01-24 05:57:25.651473: step: 864/470, loss: 0.008659124374389648 2023-01-24 05:57:26.428880: step: 866/470, loss: 0.06904779374599457 2023-01-24 05:57:27.100775: step: 868/470, loss: 0.0019073209259659052 2023-01-24 05:57:27.821367: step: 870/470, loss: 0.004029394127428532 2023-01-24 05:57:28.532146: step: 872/470, loss: 0.004503278061747551 2023-01-24 05:57:29.243718: step: 874/470, loss: 0.001992929959669709 2023-01-24 05:57:30.031588: step: 876/470, loss: 0.006583628244698048 2023-01-24 05:57:30.727213: step: 878/470, loss: 0.0009295732015743852 2023-01-24 05:57:31.535877: step: 880/470, loss: 0.0053373072296381 2023-01-24 05:57:32.270895: step: 882/470, loss: 0.009604030288755894 2023-01-24 05:57:33.012612: step: 884/470, loss: 0.002130313077941537 2023-01-24 05:57:33.705158: step: 886/470, loss: 0.04465380311012268 2023-01-24 05:57:34.542398: step: 888/470, loss: 0.10829721391201019 2023-01-24 05:57:35.306838: step: 890/470, loss: 0.03182058036327362 2023-01-24 05:57:36.018925: step: 892/470, loss: 0.013102928176522255 2023-01-24 05:57:36.813217: step: 894/470, loss: 0.02757185697555542 2023-01-24 05:57:37.601113: step: 896/470, loss: 0.013785184361040592 2023-01-24 05:57:38.413896: step: 898/470, loss: 0.013047303073108196 2023-01-24 05:57:39.140844: step: 900/470, loss: 3.3479478588560596e-05 2023-01-24 05:57:39.824360: step: 902/470, loss: 0.015448780730366707 2023-01-24 05:57:40.639939: step: 904/470, loss: 0.017426110804080963 2023-01-24 05:57:41.382414: step: 906/470, loss: 0.011090615764260292 2023-01-24 05:57:42.030791: step: 908/470, loss: 0.0027628943789750338 2023-01-24 05:57:42.802601: step: 910/470, loss: 0.07512981444597244 2023-01-24 05:57:43.487163: step: 912/470, loss: 0.003129625925794244 2023-01-24 05:57:44.086445: step: 914/470, loss: 0.0007694661035202444 2023-01-24 05:57:44.722178: step: 916/470, loss: 0.0655151829123497 2023-01-24 05:57:45.436284: step: 918/470, loss: 0.01531730592250824 2023-01-24 05:57:46.183205: step: 920/470, loss: 0.003190776566043496 2023-01-24 05:57:46.887264: step: 922/470, loss: 0.02201441302895546 2023-01-24 05:57:47.747373: step: 924/470, loss: 0.23225174844264984 2023-01-24 05:57:48.496392: step: 926/470, loss: 0.0298309326171875 2023-01-24 05:57:49.185202: step: 928/470, loss: 0.024859227240085602 2023-01-24 05:57:49.921481: step: 930/470, loss: 0.15923137962818146 2023-01-24 05:57:50.828984: step: 932/470, loss: 0.004126227926462889 2023-01-24 05:57:51.512605: step: 934/470, loss: 0.005694595165550709 2023-01-24 05:57:52.301057: step: 936/470, loss: 0.1984158605337143 2023-01-24 05:57:53.034875: step: 938/470, loss: 0.01643310859799385 2023-01-24 05:57:53.733542: step: 940/470, loss: 0.04566141590476036 2023-01-24 05:57:54.380366: step: 942/470, loss: 0.022141339257359505 ================================================== Loss: 0.041 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33945655727079566, 'r': 0.3201326545798585, 'f1': 0.32951154094450286}, 'combined': 0.2427979775380547, 'epoch': 31} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3516251996304759, 'r': 0.3357344454164063, 'f1': 0.3434961369730079}, 'combined': 0.2289974246486719, 'epoch': 31} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3327250655636606, 'r': 0.31378435974409735, 'f1': 0.3229772609084753}, 'combined': 0.23798324487992914, 'epoch': 31} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3600111252415456, 'r': 0.33958741717495794, 'f1': 0.3495011517683882}, 'combined': 0.23300076784559207, 'epoch': 31} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3245705997242647, 'r': 0.31533234736019644, 'f1': 0.31988478740870746}, 'combined': 0.2357045801958897, 'epoch': 31} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35151688133309544, 'r': 0.34577093231130446, 'f1': 0.34862023228672484}, 'combined': 0.23241348819114985, 'epoch': 31} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29464285714285715, 'r': 0.3535714285714286, 'f1': 0.32142857142857145}, 'combined': 0.2142857142857143, 'epoch': 31} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.2826086956521739, 'f1': 0.3611111111111111}, 'combined': 0.24074074074074073, 'epoch': 31} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6153846153846154, 'r': 0.27586206896551724, 'f1': 0.380952380952381}, 'combined': 0.25396825396825395, 'epoch': 31} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3260774491094148, 'r': 0.32422122074636306, 'f1': 0.3251466856961624}, 'combined': 0.2395817684076986, 'epoch': 19} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35730778004987124, 'r': 0.3418473472592518, 'f1': 0.34940662520847365}, 'combined': 0.23293775013898238, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.35714285714285715, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3245705997242647, 'r': 0.31533234736019644, 'f1': 0.31988478740870746}, 'combined': 0.2357045801958897, 'epoch': 31} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35151688133309544, 'r': 0.34577093231130446, 'f1': 0.34862023228672484}, 'combined': 0.23241348819114985, 'epoch': 31} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6153846153846154, 'r': 0.27586206896551724, 'f1': 0.380952380952381}, 'combined': 0.25396825396825395, 'epoch': 31} ****************************** Epoch: 32 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:00:33.890340: step: 2/470, loss: 0.9720427393913269 2023-01-24 06:00:34.571352: step: 4/470, loss: 0.013476014137268066 2023-01-24 06:00:35.340133: step: 6/470, loss: 0.00015409085608553141 2023-01-24 06:00:36.112773: step: 8/470, loss: 0.005482874345034361 2023-01-24 06:00:36.832835: step: 10/470, loss: 0.02253660000860691 2023-01-24 06:00:37.536498: step: 12/470, loss: 0.0016350456280633807 2023-01-24 06:00:38.300374: step: 14/470, loss: 0.0006504120538011193 2023-01-24 06:00:39.021129: step: 16/470, loss: 0.0015973305562511086 2023-01-24 06:00:39.754233: step: 18/470, loss: 0.25637224316596985 2023-01-24 06:00:40.508253: step: 20/470, loss: 0.02262234315276146 2023-01-24 06:00:41.320096: step: 22/470, loss: 0.001908066333271563 2023-01-24 06:00:41.972696: step: 24/470, loss: 0.0029853819869458675 2023-01-24 06:00:42.750660: step: 26/470, loss: 0.009884890168905258 2023-01-24 06:00:43.496203: step: 28/470, loss: 0.0006165321101434529 2023-01-24 06:00:44.229361: step: 30/470, loss: 0.0001714162644930184 2023-01-24 06:00:44.943548: step: 32/470, loss: 0.0308544784784317 2023-01-24 06:00:45.682317: step: 34/470, loss: 0.027413802221417427 2023-01-24 06:00:46.462026: step: 36/470, loss: 0.1625737100839615 2023-01-24 06:00:47.150407: step: 38/470, loss: 0.006317344959825277 2023-01-24 06:00:47.989305: step: 40/470, loss: 0.003615674562752247 2023-01-24 06:00:48.711639: step: 42/470, loss: 0.08496110886335373 2023-01-24 06:00:49.514265: step: 44/470, loss: 0.0003270620945841074 2023-01-24 06:00:50.248818: step: 46/470, loss: 0.013454585336148739 2023-01-24 06:00:50.916926: step: 48/470, loss: 0.022000696510076523 2023-01-24 06:00:51.564934: step: 50/470, loss: 0.004566211719065905 2023-01-24 06:00:52.351316: step: 52/470, loss: 0.03704002499580383 2023-01-24 06:00:53.004855: step: 54/470, loss: 0.004721718840301037 2023-01-24 06:00:53.758688: step: 56/470, loss: 0.02784247137606144 2023-01-24 06:00:54.456833: step: 58/470, loss: 0.2554512321949005 2023-01-24 06:00:55.175846: step: 60/470, loss: 0.006956116762012243 2023-01-24 06:00:55.875880: step: 62/470, loss: 0.0004784489865414798 2023-01-24 06:00:56.547653: step: 64/470, loss: 0.0018533782567828894 2023-01-24 06:00:57.405976: step: 66/470, loss: 0.14442533254623413 2023-01-24 06:00:58.109984: step: 68/470, loss: 0.0010345384944230318 2023-01-24 06:00:58.889621: step: 70/470, loss: 0.02987985871732235 2023-01-24 06:00:59.678481: step: 72/470, loss: 0.010891645215451717 2023-01-24 06:01:00.479661: step: 74/470, loss: 0.03483196720480919 2023-01-24 06:01:01.310262: step: 76/470, loss: 0.09911884367465973 2023-01-24 06:01:02.115625: step: 78/470, loss: 0.007575498428195715 2023-01-24 06:01:02.927516: step: 80/470, loss: 0.007171786390244961 2023-01-24 06:01:03.687995: step: 82/470, loss: 0.002897205762565136 2023-01-24 06:01:04.391937: step: 84/470, loss: 0.0030799158848822117 2023-01-24 06:01:05.084441: step: 86/470, loss: 0.009569648653268814 2023-01-24 06:01:05.719377: step: 88/470, loss: 0.002280237153172493 2023-01-24 06:01:06.450341: step: 90/470, loss: 0.017339464277029037 2023-01-24 06:01:07.171427: step: 92/470, loss: 0.01658935844898224 2023-01-24 06:01:07.891215: step: 94/470, loss: 0.013809598982334137 2023-01-24 06:01:08.571275: step: 96/470, loss: 0.001008601044304669 2023-01-24 06:01:09.320758: step: 98/470, loss: 0.1749447137117386 2023-01-24 06:01:10.070913: step: 100/470, loss: 0.046566035598516464 2023-01-24 06:01:10.786676: step: 102/470, loss: 0.02105867676436901 2023-01-24 06:01:11.500549: step: 104/470, loss: 0.14165711402893066 2023-01-24 06:01:12.210686: step: 106/470, loss: 0.0024683044757694006 2023-01-24 06:01:12.929268: step: 108/470, loss: 0.00013477614265866578 2023-01-24 06:01:13.653200: step: 110/470, loss: 0.01837342046201229 2023-01-24 06:01:14.426527: step: 112/470, loss: 0.007353122346103191 2023-01-24 06:01:15.184300: step: 114/470, loss: 0.009461408481001854 2023-01-24 06:01:15.905568: step: 116/470, loss: 0.002974409842863679 2023-01-24 06:01:16.634305: step: 118/470, loss: 0.05226290225982666 2023-01-24 06:01:17.366559: step: 120/470, loss: 0.043346941471099854 2023-01-24 06:01:18.121459: step: 122/470, loss: 0.10949388891458511 2023-01-24 06:01:18.846471: step: 124/470, loss: 0.023482829332351685 2023-01-24 06:01:19.625227: step: 126/470, loss: 0.006389224901795387 2023-01-24 06:01:20.337252: step: 128/470, loss: 0.009503074921667576 2023-01-24 06:01:21.128015: step: 130/470, loss: 0.014946824871003628 2023-01-24 06:01:21.858712: step: 132/470, loss: 0.016744105145335197 2023-01-24 06:01:22.560147: step: 134/470, loss: 0.0011202177265658975 2023-01-24 06:01:23.309831: step: 136/470, loss: 0.0005344194360077381 2023-01-24 06:01:24.110691: step: 138/470, loss: 0.053323112428188324 2023-01-24 06:01:24.864139: step: 140/470, loss: 0.7324318289756775 2023-01-24 06:01:25.669675: step: 142/470, loss: 0.04300731047987938 2023-01-24 06:01:26.386865: step: 144/470, loss: 0.01699782721698284 2023-01-24 06:01:27.111701: step: 146/470, loss: 0.0031976874452084303 2023-01-24 06:01:27.754327: step: 148/470, loss: 0.010052609257400036 2023-01-24 06:01:28.473076: step: 150/470, loss: 0.0200212299823761 2023-01-24 06:01:29.194882: step: 152/470, loss: 0.012295265682041645 2023-01-24 06:01:29.971452: step: 154/470, loss: 0.00786919891834259 2023-01-24 06:01:30.737341: step: 156/470, loss: 0.019932325929403305 2023-01-24 06:01:31.648120: step: 158/470, loss: 0.0012513573747128248 2023-01-24 06:01:32.438343: step: 160/470, loss: 0.0011142558651044965 2023-01-24 06:01:33.201885: step: 162/470, loss: 0.021040918305516243 2023-01-24 06:01:33.962936: step: 164/470, loss: 0.002821336267516017 2023-01-24 06:01:34.731319: step: 166/470, loss: 0.006630197633057833 2023-01-24 06:01:35.409764: step: 168/470, loss: 0.004258208908140659 2023-01-24 06:01:36.094575: step: 170/470, loss: 0.03937026113271713 2023-01-24 06:01:36.849197: step: 172/470, loss: 0.0024332425091415644 2023-01-24 06:01:37.539735: step: 174/470, loss: 0.0002705961815081537 2023-01-24 06:01:38.198304: step: 176/470, loss: 0.0043231286108493805 2023-01-24 06:01:38.938996: step: 178/470, loss: 0.10010123252868652 2023-01-24 06:01:39.681544: step: 180/470, loss: 0.003182998625561595 2023-01-24 06:01:40.398801: step: 182/470, loss: 4.15868271375075e-05 2023-01-24 06:01:41.034406: step: 184/470, loss: 0.030571192502975464 2023-01-24 06:01:41.790207: step: 186/470, loss: 0.0007512365118600428 2023-01-24 06:01:42.536061: step: 188/470, loss: 0.0026000277139246464 2023-01-24 06:01:43.229844: step: 190/470, loss: 0.010836091823875904 2023-01-24 06:01:43.993648: step: 192/470, loss: 0.02573336847126484 2023-01-24 06:01:44.777192: step: 194/470, loss: 0.03251959756016731 2023-01-24 06:01:45.437434: step: 196/470, loss: 0.00026101371622644365 2023-01-24 06:01:46.103881: step: 198/470, loss: 0.09524267166852951 2023-01-24 06:01:46.877165: step: 200/470, loss: 0.11690568178892136 2023-01-24 06:01:47.538394: step: 202/470, loss: 0.007767211180180311 2023-01-24 06:01:48.289025: step: 204/470, loss: 0.18378157913684845 2023-01-24 06:01:49.079207: step: 206/470, loss: 0.13678520917892456 2023-01-24 06:01:49.946041: step: 208/470, loss: 0.0026962130796164274 2023-01-24 06:01:50.699247: step: 210/470, loss: 0.0014513169880956411 2023-01-24 06:01:51.368655: step: 212/470, loss: 0.010849079117178917 2023-01-24 06:01:52.116899: step: 214/470, loss: 0.008346461690962315 2023-01-24 06:01:52.886784: step: 216/470, loss: 0.07517098635435104 2023-01-24 06:01:53.613473: step: 218/470, loss: 0.27010560035705566 2023-01-24 06:01:54.383794: step: 220/470, loss: 0.02572491392493248 2023-01-24 06:01:55.074348: step: 222/470, loss: 0.0008853495819494128 2023-01-24 06:01:55.789797: step: 224/470, loss: 0.01680896058678627 2023-01-24 06:01:56.562700: step: 226/470, loss: 0.024884656071662903 2023-01-24 06:01:57.294340: step: 228/470, loss: 0.0003141724446322769 2023-01-24 06:01:57.944816: step: 230/470, loss: 0.06602133810520172 2023-01-24 06:01:58.572543: step: 232/470, loss: 0.014157207682728767 2023-01-24 06:01:59.281819: step: 234/470, loss: 0.00014316238230094314 2023-01-24 06:02:00.021445: step: 236/470, loss: 0.010098121128976345 2023-01-24 06:02:00.787188: step: 238/470, loss: 0.006802602671086788 2023-01-24 06:02:01.570507: step: 240/470, loss: 0.026168648153543472 2023-01-24 06:02:02.208917: step: 242/470, loss: 0.020882943645119667 2023-01-24 06:02:02.837661: step: 244/470, loss: 0.0026823831722140312 2023-01-24 06:02:03.522364: step: 246/470, loss: 0.002364139771088958 2023-01-24 06:02:04.312493: step: 248/470, loss: 0.04574478790163994 2023-01-24 06:02:05.077736: step: 250/470, loss: 0.0015829337062314153 2023-01-24 06:02:05.807768: step: 252/470, loss: 0.018606998026371002 2023-01-24 06:02:06.505790: step: 254/470, loss: 0.00044129794696345925 2023-01-24 06:02:07.222231: step: 256/470, loss: 0.03767343983054161 2023-01-24 06:02:07.968363: step: 258/470, loss: 0.00863537099212408 2023-01-24 06:02:08.684616: step: 260/470, loss: 0.01770273968577385 2023-01-24 06:02:09.395046: step: 262/470, loss: 0.015628566965460777 2023-01-24 06:02:10.089527: step: 264/470, loss: 0.0023462101817131042 2023-01-24 06:02:10.770378: step: 266/470, loss: 0.0092113446444273 2023-01-24 06:02:11.466175: step: 268/470, loss: 0.06474064290523529 2023-01-24 06:02:12.181797: step: 270/470, loss: 8.660142157168593e-06 2023-01-24 06:02:13.005968: step: 272/470, loss: 0.3112047612667084 2023-01-24 06:02:13.834373: step: 274/470, loss: 0.24923990666866302 2023-01-24 06:02:14.544762: step: 276/470, loss: 0.0041430373676121235 2023-01-24 06:02:15.291436: step: 278/470, loss: 0.008526407182216644 2023-01-24 06:02:15.899668: step: 280/470, loss: 0.004656743258237839 2023-01-24 06:02:16.649911: step: 282/470, loss: 0.016585668548941612 2023-01-24 06:02:17.333457: step: 284/470, loss: 3.0181516194716096e-05 2023-01-24 06:02:18.140242: step: 286/470, loss: 0.0011372804874554276 2023-01-24 06:02:18.958247: step: 288/470, loss: 0.022914016619324684 2023-01-24 06:02:19.726592: step: 290/470, loss: 0.06423134356737137 2023-01-24 06:02:20.509385: step: 292/470, loss: 0.002632809802889824 2023-01-24 06:02:21.300519: step: 294/470, loss: 0.04266877844929695 2023-01-24 06:02:22.078653: step: 296/470, loss: 0.03284723311662674 2023-01-24 06:02:22.829686: step: 298/470, loss: 0.007151145022362471 2023-01-24 06:02:23.647827: step: 300/470, loss: 0.1206933856010437 2023-01-24 06:02:24.442218: step: 302/470, loss: 0.010511813685297966 2023-01-24 06:02:25.144817: step: 304/470, loss: 0.011880377307534218 2023-01-24 06:02:25.876123: step: 306/470, loss: 0.28889280557632446 2023-01-24 06:02:26.561445: step: 308/470, loss: 0.008291719481348991 2023-01-24 06:02:27.189971: step: 310/470, loss: 0.0001515456533525139 2023-01-24 06:02:27.905568: step: 312/470, loss: 0.5845152139663696 2023-01-24 06:02:28.573908: step: 314/470, loss: 0.05446061119437218 2023-01-24 06:02:29.270059: step: 316/470, loss: 0.00031370227225124836 2023-01-24 06:02:30.036542: step: 318/470, loss: 0.0016886562807485461 2023-01-24 06:02:30.731009: step: 320/470, loss: 0.007711860351264477 2023-01-24 06:02:31.508586: step: 322/470, loss: 0.0003481293679215014 2023-01-24 06:02:32.213029: step: 324/470, loss: 0.00423666276037693 2023-01-24 06:02:32.997713: step: 326/470, loss: 0.21974371373653412 2023-01-24 06:02:33.742295: step: 328/470, loss: 0.02752552181482315 2023-01-24 06:02:34.484725: step: 330/470, loss: 0.001168736140243709 2023-01-24 06:02:35.242837: step: 332/470, loss: 0.18791626393795013 2023-01-24 06:02:36.020562: step: 334/470, loss: 0.016098229214549065 2023-01-24 06:02:36.639081: step: 336/470, loss: 0.005329936742782593 2023-01-24 06:02:37.408021: step: 338/470, loss: 0.05876723304390907 2023-01-24 06:02:38.105715: step: 340/470, loss: 0.06268248707056046 2023-01-24 06:02:38.836317: step: 342/470, loss: 0.006979916710406542 2023-01-24 06:02:39.562414: step: 344/470, loss: 0.0026592700742185116 2023-01-24 06:02:40.235631: step: 346/470, loss: 0.0027532707899808884 2023-01-24 06:02:41.022904: step: 348/470, loss: 0.05007663369178772 2023-01-24 06:02:41.731518: step: 350/470, loss: 0.04327116161584854 2023-01-24 06:02:42.528493: step: 352/470, loss: 0.0002385459520155564 2023-01-24 06:02:43.285911: step: 354/470, loss: 0.08725601434707642 2023-01-24 06:02:43.947035: step: 356/470, loss: 0.007848616689443588 2023-01-24 06:02:44.630645: step: 358/470, loss: 0.010468382388353348 2023-01-24 06:02:45.330146: step: 360/470, loss: 0.0008415202610194683 2023-01-24 06:02:46.130554: step: 362/470, loss: 0.005082852207124233 2023-01-24 06:02:46.911567: step: 364/470, loss: 0.04120028764009476 2023-01-24 06:02:47.599835: step: 366/470, loss: 0.052009016275405884 2023-01-24 06:02:48.317770: step: 368/470, loss: 8.392710878979415e-05 2023-01-24 06:02:49.072880: step: 370/470, loss: 0.005474635865539312 2023-01-24 06:02:49.752573: step: 372/470, loss: 0.015097817406058311 2023-01-24 06:02:50.536377: step: 374/470, loss: 0.016206717118620872 2023-01-24 06:02:51.288288: step: 376/470, loss: 0.0031522957142442465 2023-01-24 06:02:51.985995: step: 378/470, loss: 0.00015461869770660996 2023-01-24 06:02:52.727746: step: 380/470, loss: 0.0001224875304615125 2023-01-24 06:02:53.427979: step: 382/470, loss: 0.006665257271379232 2023-01-24 06:02:54.246944: step: 384/470, loss: 0.039671264588832855 2023-01-24 06:02:54.966342: step: 386/470, loss: 0.037569403648376465 2023-01-24 06:02:55.715342: step: 388/470, loss: 0.005145507864654064 2023-01-24 06:02:56.505487: step: 390/470, loss: 0.0063809738494455814 2023-01-24 06:02:57.299537: step: 392/470, loss: 0.01172274723649025 2023-01-24 06:02:58.057593: step: 394/470, loss: 0.0012702624080702662 2023-01-24 06:02:58.800198: step: 396/470, loss: 0.007107668090611696 2023-01-24 06:02:59.490511: step: 398/470, loss: 0.014695264399051666 2023-01-24 06:03:00.259817: step: 400/470, loss: 0.018078316003084183 2023-01-24 06:03:01.019715: step: 402/470, loss: 0.027752364054322243 2023-01-24 06:03:01.771562: step: 404/470, loss: 0.028842059895396233 2023-01-24 06:03:02.481272: step: 406/470, loss: 0.03767353668808937 2023-01-24 06:03:03.174991: step: 408/470, loss: 0.030763795599341393 2023-01-24 06:03:03.953443: step: 410/470, loss: 0.05021560564637184 2023-01-24 06:03:04.722696: step: 412/470, loss: 0.040465906262397766 2023-01-24 06:03:05.495099: step: 414/470, loss: 0.04318307340145111 2023-01-24 06:03:06.220433: step: 416/470, loss: 0.003911891486495733 2023-01-24 06:03:06.897486: step: 418/470, loss: 0.005350908264517784 2023-01-24 06:03:07.623097: step: 420/470, loss: 0.00030204097856767476 2023-01-24 06:03:08.280554: step: 422/470, loss: 0.015325166285037994 2023-01-24 06:03:09.019325: step: 424/470, loss: 0.41761523485183716 2023-01-24 06:03:09.717436: step: 426/470, loss: 0.011177667416632175 2023-01-24 06:03:10.501071: step: 428/470, loss: 0.0751306414604187 2023-01-24 06:03:11.277129: step: 430/470, loss: 0.05438210442662239 2023-01-24 06:03:12.051840: step: 432/470, loss: 0.05413680523633957 2023-01-24 06:03:12.727589: step: 434/470, loss: 0.04908444359898567 2023-01-24 06:03:13.518413: step: 436/470, loss: 0.0007037912728264928 2023-01-24 06:03:14.186000: step: 438/470, loss: 0.0084762591868639 2023-01-24 06:03:14.831020: step: 440/470, loss: 0.007872858084738255 2023-01-24 06:03:15.532926: step: 442/470, loss: 0.004519272595643997 2023-01-24 06:03:16.307409: step: 444/470, loss: 0.0006240661023184657 2023-01-24 06:03:17.016936: step: 446/470, loss: 0.25435835123062134 2023-01-24 06:03:17.722743: step: 448/470, loss: 0.009995101019740105 2023-01-24 06:03:18.426874: step: 450/470, loss: 0.0034806779585778713 2023-01-24 06:03:19.160137: step: 452/470, loss: 0.25618600845336914 2023-01-24 06:03:19.857336: step: 454/470, loss: 0.0810975506901741 2023-01-24 06:03:20.649638: step: 456/470, loss: 0.007433571387082338 2023-01-24 06:03:21.347109: step: 458/470, loss: 0.16480611264705658 2023-01-24 06:03:22.078324: step: 460/470, loss: 0.0011335683520883322 2023-01-24 06:03:22.879789: step: 462/470, loss: 0.005519744008779526 2023-01-24 06:03:23.547084: step: 464/470, loss: 0.006193581037223339 2023-01-24 06:03:24.216012: step: 466/470, loss: 0.08350327610969543 2023-01-24 06:03:24.914885: step: 468/470, loss: 0.035261370241642 2023-01-24 06:03:25.673661: step: 470/470, loss: 0.04092838987708092 2023-01-24 06:03:26.322157: step: 472/470, loss: 0.014473150484263897 2023-01-24 06:03:27.030141: step: 474/470, loss: 0.23851221799850464 2023-01-24 06:03:27.730144: step: 476/470, loss: 0.019818585366010666 2023-01-24 06:03:28.462040: step: 478/470, loss: 0.01893843151628971 2023-01-24 06:03:29.179805: step: 480/470, loss: 0.042298175394535065 2023-01-24 06:03:29.931810: step: 482/470, loss: 0.007379506714642048 2023-01-24 06:03:30.706795: step: 484/470, loss: 0.02085047774016857 2023-01-24 06:03:31.403940: step: 486/470, loss: 0.02521711029112339 2023-01-24 06:03:32.166606: step: 488/470, loss: 0.02616407722234726 2023-01-24 06:03:32.888275: step: 490/470, loss: 0.0031149170827120543 2023-01-24 06:03:33.577441: step: 492/470, loss: 0.0020052469335496426 2023-01-24 06:03:34.361964: step: 494/470, loss: 0.0112814512103796 2023-01-24 06:03:35.075550: step: 496/470, loss: 0.00834614410996437 2023-01-24 06:03:35.808847: step: 498/470, loss: 0.0071093300357460976 2023-01-24 06:03:36.542560: step: 500/470, loss: 0.008331052958965302 2023-01-24 06:03:37.255479: step: 502/470, loss: 0.02857259288430214 2023-01-24 06:03:37.902859: step: 504/470, loss: 0.014069000259041786 2023-01-24 06:03:38.576222: step: 506/470, loss: 0.13017906248569489 2023-01-24 06:03:39.452843: step: 508/470, loss: 0.0029538320377469063 2023-01-24 06:03:40.188892: step: 510/470, loss: 0.0012912432430312037 2023-01-24 06:03:40.992539: step: 512/470, loss: 0.0015609815018251538 2023-01-24 06:03:41.771247: step: 514/470, loss: 0.02052444778382778 2023-01-24 06:03:42.433621: step: 516/470, loss: 0.1478673666715622 2023-01-24 06:03:43.147163: step: 518/470, loss: 0.0018792763585224748 2023-01-24 06:03:43.853884: step: 520/470, loss: 0.006700445432215929 2023-01-24 06:03:44.547051: step: 522/470, loss: 0.016489727422595024 2023-01-24 06:03:45.281730: step: 524/470, loss: 0.09012142568826675 2023-01-24 06:03:46.072998: step: 526/470, loss: 0.019330738112330437 2023-01-24 06:03:46.776141: step: 528/470, loss: 0.013795935548841953 2023-01-24 06:03:47.633131: step: 530/470, loss: 0.014351630583405495 2023-01-24 06:03:48.337037: step: 532/470, loss: 0.08632518351078033 2023-01-24 06:03:49.094550: step: 534/470, loss: 0.023600619286298752 2023-01-24 06:03:49.812040: step: 536/470, loss: 0.009278071112930775 2023-01-24 06:03:50.512945: step: 538/470, loss: 0.007990618236362934 2023-01-24 06:03:51.173032: step: 540/470, loss: 8.854016778059304e-05 2023-01-24 06:03:51.838863: step: 542/470, loss: 0.11269865930080414 2023-01-24 06:03:52.527133: step: 544/470, loss: 0.01807483844459057 2023-01-24 06:03:53.371298: step: 546/470, loss: 0.02807825244963169 2023-01-24 06:03:54.042998: step: 548/470, loss: 0.02302536927163601 2023-01-24 06:03:54.775943: step: 550/470, loss: 0.03050990402698517 2023-01-24 06:03:55.534457: step: 552/470, loss: 0.03772333264350891 2023-01-24 06:03:56.328966: step: 554/470, loss: 0.00996860396116972 2023-01-24 06:03:57.043457: step: 556/470, loss: 0.006364746019244194 2023-01-24 06:03:57.774803: step: 558/470, loss: 0.011979727074503899 2023-01-24 06:03:58.517499: step: 560/470, loss: 0.05357194319367409 2023-01-24 06:03:59.302055: step: 562/470, loss: 0.0625770092010498 2023-01-24 06:03:59.987043: step: 564/470, loss: 0.00023006857372820377 2023-01-24 06:04:00.755074: step: 566/470, loss: 0.019672883674502373 2023-01-24 06:04:01.519246: step: 568/470, loss: 0.04599352553486824 2023-01-24 06:04:02.259056: step: 570/470, loss: 0.010658334009349346 2023-01-24 06:04:03.098692: step: 572/470, loss: 0.015365660190582275 2023-01-24 06:04:03.921479: step: 574/470, loss: 0.004618105012923479 2023-01-24 06:04:04.630344: step: 576/470, loss: 0.0076096258126199245 2023-01-24 06:04:05.340341: step: 578/470, loss: 0.020669929683208466 2023-01-24 06:04:06.022058: step: 580/470, loss: 0.001826974330469966 2023-01-24 06:04:06.676974: step: 582/470, loss: 0.1444631665945053 2023-01-24 06:04:07.399812: step: 584/470, loss: 0.0031216128263622522 2023-01-24 06:04:08.148821: step: 586/470, loss: 0.016765842214226723 2023-01-24 06:04:08.947465: step: 588/470, loss: 0.007116939872503281 2023-01-24 06:04:09.652286: step: 590/470, loss: 0.13170292973518372 2023-01-24 06:04:10.409106: step: 592/470, loss: 0.016895027831196785 2023-01-24 06:04:11.099602: step: 594/470, loss: 0.00024278272758238018 2023-01-24 06:04:11.813700: step: 596/470, loss: 0.0030464413575828075 2023-01-24 06:04:12.430086: step: 598/470, loss: 0.0016490904381498694 2023-01-24 06:04:13.229664: step: 600/470, loss: 0.03087281621992588 2023-01-24 06:04:13.987889: step: 602/470, loss: 0.003503436455503106 2023-01-24 06:04:14.653293: step: 604/470, loss: 0.18342113494873047 2023-01-24 06:04:15.362550: step: 606/470, loss: 0.018698526546359062 2023-01-24 06:04:16.109050: step: 608/470, loss: 0.05845620110630989 2023-01-24 06:04:16.846630: step: 610/470, loss: 0.001678596599958837 2023-01-24 06:04:17.558329: step: 612/470, loss: 0.014549157582223415 2023-01-24 06:04:18.256590: step: 614/470, loss: 0.0009063933975994587 2023-01-24 06:04:18.949292: step: 616/470, loss: 0.0022073141299188137 2023-01-24 06:04:19.635477: step: 618/470, loss: 0.012333320453763008 2023-01-24 06:04:20.382179: step: 620/470, loss: 0.09075351059436798 2023-01-24 06:04:21.114628: step: 622/470, loss: 0.029867831617593765 2023-01-24 06:04:21.857767: step: 624/470, loss: 0.09833616763353348 2023-01-24 06:04:22.532633: step: 626/470, loss: 0.0060694171115756035 2023-01-24 06:04:23.153384: step: 628/470, loss: 0.008746202103793621 2023-01-24 06:04:23.801673: step: 630/470, loss: 0.0012253581080585718 2023-01-24 06:04:24.514198: step: 632/470, loss: 0.01028769463300705 2023-01-24 06:04:25.295686: step: 634/470, loss: 0.011649365536868572 2023-01-24 06:04:26.093539: step: 636/470, loss: 0.02281986176967621 2023-01-24 06:04:26.822609: step: 638/470, loss: 0.21459834277629852 2023-01-24 06:04:27.469374: step: 640/470, loss: 0.004545276518911123 2023-01-24 06:04:28.115167: step: 642/470, loss: 3.121816189377569e-05 2023-01-24 06:04:28.769060: step: 644/470, loss: 0.005426404532045126 2023-01-24 06:04:29.624377: step: 646/470, loss: 0.040757469832897186 2023-01-24 06:04:30.293835: step: 648/470, loss: 0.011101129464805126 2023-01-24 06:04:31.084629: step: 650/470, loss: 2.5025525093078613 2023-01-24 06:04:31.773078: step: 652/470, loss: 0.012538554146885872 2023-01-24 06:04:32.524961: step: 654/470, loss: 0.03285971283912659 2023-01-24 06:04:33.394936: step: 656/470, loss: 0.0021789884194731712 2023-01-24 06:04:34.147140: step: 658/470, loss: 0.001402953639626503 2023-01-24 06:04:34.918081: step: 660/470, loss: 0.0017720997566357255 2023-01-24 06:04:35.691693: step: 662/470, loss: 0.039381928741931915 2023-01-24 06:04:36.482092: step: 664/470, loss: 0.01845177263021469 2023-01-24 06:04:37.130522: step: 666/470, loss: 0.029854735359549522 2023-01-24 06:04:37.836987: step: 668/470, loss: 0.003992500249296427 2023-01-24 06:04:38.552801: step: 670/470, loss: 0.004251103848218918 2023-01-24 06:04:39.284387: step: 672/470, loss: 0.010103264823555946 2023-01-24 06:04:40.048976: step: 674/470, loss: 0.0004654536605812609 2023-01-24 06:04:40.785866: step: 676/470, loss: 0.4272131025791168 2023-01-24 06:04:41.477263: step: 678/470, loss: 0.0190906822681427 2023-01-24 06:04:42.283856: step: 680/470, loss: 0.005119283217936754 2023-01-24 06:04:43.010056: step: 682/470, loss: 0.0027894098311662674 2023-01-24 06:04:43.819168: step: 684/470, loss: 0.01846400462090969 2023-01-24 06:04:44.576401: step: 686/470, loss: 0.1110844612121582 2023-01-24 06:04:45.264554: step: 688/470, loss: 0.0023693658877164125 2023-01-24 06:04:45.966945: step: 690/470, loss: 0.003205197863280773 2023-01-24 06:04:46.701389: step: 692/470, loss: 0.017842544242739677 2023-01-24 06:04:47.451535: step: 694/470, loss: 0.018687259405851364 2023-01-24 06:04:48.067369: step: 696/470, loss: 8.786357648205012e-05 2023-01-24 06:04:48.791852: step: 698/470, loss: 0.0007583802798762918 2023-01-24 06:04:49.613841: step: 700/470, loss: 0.20183929800987244 2023-01-24 06:04:50.353187: step: 702/470, loss: 0.00020361962378956378 2023-01-24 06:04:51.141578: step: 704/470, loss: 0.027718501165509224 2023-01-24 06:04:51.828443: step: 706/470, loss: 0.00023348911781795323 2023-01-24 06:04:52.460610: step: 708/470, loss: 0.032941270619630814 2023-01-24 06:04:53.216864: step: 710/470, loss: 0.04147607460618019 2023-01-24 06:04:53.928640: step: 712/470, loss: 6.337544618872926e-05 2023-01-24 06:04:54.654327: step: 714/470, loss: 0.014632035978138447 2023-01-24 06:04:55.335698: step: 716/470, loss: 0.020782971754670143 2023-01-24 06:04:56.005246: step: 718/470, loss: 0.006766524165868759 2023-01-24 06:04:56.764060: step: 720/470, loss: 0.014275794848799706 2023-01-24 06:04:57.528995: step: 722/470, loss: 0.017139313742518425 2023-01-24 06:04:58.245802: step: 724/470, loss: 0.014928764663636684 2023-01-24 06:04:58.926180: step: 726/470, loss: 0.028308287262916565 2023-01-24 06:04:59.660850: step: 728/470, loss: 0.05225411802530289 2023-01-24 06:05:00.364804: step: 730/470, loss: 0.045965153723955154 2023-01-24 06:05:01.161123: step: 732/470, loss: 0.001099904766306281 2023-01-24 06:05:01.979956: step: 734/470, loss: 0.018097948282957077 2023-01-24 06:05:02.726723: step: 736/470, loss: 0.06644519418478012 2023-01-24 06:05:03.423695: step: 738/470, loss: 0.017522266134619713 2023-01-24 06:05:04.222203: step: 740/470, loss: 0.006565961986780167 2023-01-24 06:05:04.926872: step: 742/470, loss: 0.0008447995060123503 2023-01-24 06:05:05.654153: step: 744/470, loss: 0.008136849850416183 2023-01-24 06:05:06.459893: step: 746/470, loss: 0.02021070197224617 2023-01-24 06:05:07.135120: step: 748/470, loss: 0.0010262180585414171 2023-01-24 06:05:07.839495: step: 750/470, loss: 0.028441239148378372 2023-01-24 06:05:08.547339: step: 752/470, loss: 1.7200602087541483e-05 2023-01-24 06:05:09.297137: step: 754/470, loss: 0.005179987754672766 2023-01-24 06:05:10.068940: step: 756/470, loss: 0.2640395760536194 2023-01-24 06:05:10.725647: step: 758/470, loss: 0.001883813296444714 2023-01-24 06:05:11.436620: step: 760/470, loss: 0.024232909083366394 2023-01-24 06:05:12.126921: step: 762/470, loss: 0.031181665137410164 2023-01-24 06:05:12.901322: step: 764/470, loss: 0.13508857786655426 2023-01-24 06:05:13.598677: step: 766/470, loss: 0.02267894335091114 2023-01-24 06:05:14.371649: step: 768/470, loss: 0.01819641701877117 2023-01-24 06:05:15.157169: step: 770/470, loss: 0.03003586456179619 2023-01-24 06:05:15.887357: step: 772/470, loss: 0.02495230734348297 2023-01-24 06:05:16.650646: step: 774/470, loss: 0.016687629744410515 2023-01-24 06:05:17.346862: step: 776/470, loss: 0.001529327011667192 2023-01-24 06:05:18.091360: step: 778/470, loss: 0.05840422213077545 2023-01-24 06:05:18.823364: step: 780/470, loss: 0.003544104751199484 2023-01-24 06:05:19.587495: step: 782/470, loss: 0.00039587877108715475 2023-01-24 06:05:20.471041: step: 784/470, loss: 0.6694744825363159 2023-01-24 06:05:21.234735: step: 786/470, loss: 0.03220684081315994 2023-01-24 06:05:21.964473: step: 788/470, loss: 0.014541038312017918 2023-01-24 06:05:22.632748: step: 790/470, loss: 0.004369522910565138 2023-01-24 06:05:23.317831: step: 792/470, loss: 0.013264582492411137 2023-01-24 06:05:24.051984: step: 794/470, loss: 0.0005037263035774231 2023-01-24 06:05:24.814590: step: 796/470, loss: 0.002712902380153537 2023-01-24 06:05:25.637953: step: 798/470, loss: 0.3494986593723297 2023-01-24 06:05:26.356134: step: 800/470, loss: 0.0015915961703285575 2023-01-24 06:05:27.088112: step: 802/470, loss: 0.013079517520964146 2023-01-24 06:05:27.872962: step: 804/470, loss: 0.004297505598515272 2023-01-24 06:05:28.613062: step: 806/470, loss: 0.40553227066993713 2023-01-24 06:05:29.467080: step: 808/470, loss: 0.034644801169633865 2023-01-24 06:05:30.143120: step: 810/470, loss: 0.008511470630764961 2023-01-24 06:05:30.890776: step: 812/470, loss: 0.09465232491493225 2023-01-24 06:05:31.575798: step: 814/470, loss: 0.001864009303972125 2023-01-24 06:05:32.258166: step: 816/470, loss: 0.03610503673553467 2023-01-24 06:05:33.010564: step: 818/470, loss: 0.03397827968001366 2023-01-24 06:05:33.733387: step: 820/470, loss: 0.003417443251237273 2023-01-24 06:05:34.409150: step: 822/470, loss: 0.020306063815951347 2023-01-24 06:05:35.150488: step: 824/470, loss: 0.06476482003927231 2023-01-24 06:05:35.885812: step: 826/470, loss: 0.002904894994571805 2023-01-24 06:05:36.678813: step: 828/470, loss: 0.004498578608036041 2023-01-24 06:05:37.396307: step: 830/470, loss: 0.03605084493756294 2023-01-24 06:05:38.116603: step: 832/470, loss: 0.011630040593445301 2023-01-24 06:05:38.792639: step: 834/470, loss: 0.01983511820435524 2023-01-24 06:05:39.510961: step: 836/470, loss: 0.002731953514739871 2023-01-24 06:05:40.309510: step: 838/470, loss: 0.009025120176374912 2023-01-24 06:05:41.038407: step: 840/470, loss: 0.19690948724746704 2023-01-24 06:05:41.824077: step: 842/470, loss: 0.0033933157101273537 2023-01-24 06:05:42.520949: step: 844/470, loss: 0.054548464715480804 2023-01-24 06:05:43.261380: step: 846/470, loss: 0.003134024329483509 2023-01-24 06:05:44.046622: step: 848/470, loss: 0.09228435903787613 2023-01-24 06:05:44.741203: step: 850/470, loss: 0.0018799483077600598 2023-01-24 06:05:45.600962: step: 852/470, loss: 0.13064418733119965 2023-01-24 06:05:46.310455: step: 854/470, loss: 0.0057640597224235535 2023-01-24 06:05:47.007028: step: 856/470, loss: 0.00749384006485343 2023-01-24 06:05:47.779867: step: 858/470, loss: 0.0018819028045982122 2023-01-24 06:05:48.513811: step: 860/470, loss: 0.012828037142753601 2023-01-24 06:05:49.232743: step: 862/470, loss: 0.00204385444521904 2023-01-24 06:05:50.007353: step: 864/470, loss: 0.03246442228555679 2023-01-24 06:05:50.722153: step: 866/470, loss: 0.042275648564100266 2023-01-24 06:05:51.356557: step: 868/470, loss: 0.020331397652626038 2023-01-24 06:05:52.040158: step: 870/470, loss: 0.019861869513988495 2023-01-24 06:05:52.711846: step: 872/470, loss: 0.0008119989070110023 2023-01-24 06:05:53.477924: step: 874/470, loss: 0.00033393464400433004 2023-01-24 06:05:54.210116: step: 876/470, loss: 0.001284466008655727 2023-01-24 06:05:54.910491: step: 878/470, loss: 0.0018008254701271653 2023-01-24 06:05:55.669921: step: 880/470, loss: 0.0011693740962073207 2023-01-24 06:05:56.394706: step: 882/470, loss: 0.002523294650018215 2023-01-24 06:05:57.115933: step: 884/470, loss: 0.022206205874681473 2023-01-24 06:05:57.825462: step: 886/470, loss: 0.0008540766430087388 2023-01-24 06:05:58.567787: step: 888/470, loss: 0.0023554093204438686 2023-01-24 06:05:59.318343: step: 890/470, loss: 0.013779774308204651 2023-01-24 06:06:00.120052: step: 892/470, loss: 0.010381845757365227 2023-01-24 06:06:00.859958: step: 894/470, loss: 0.02899681217968464 2023-01-24 06:06:01.651455: step: 896/470, loss: 0.04453825205564499 2023-01-24 06:06:02.366997: step: 898/470, loss: 0.05921311303973198 2023-01-24 06:06:03.293449: step: 900/470, loss: 0.006646065041422844 2023-01-24 06:06:04.027912: step: 902/470, loss: 0.023994415998458862 2023-01-24 06:06:04.802381: step: 904/470, loss: 0.002556569641456008 2023-01-24 06:06:05.539371: step: 906/470, loss: 0.0003309166640974581 2023-01-24 06:06:06.298957: step: 908/470, loss: 0.002325868234038353 2023-01-24 06:06:06.999711: step: 910/470, loss: 0.042233262211084366 2023-01-24 06:06:07.843237: step: 912/470, loss: 6.879373540868983e-05 2023-01-24 06:06:08.639303: step: 914/470, loss: 0.01983904279768467 2023-01-24 06:06:09.446276: step: 916/470, loss: 0.18573109805583954 2023-01-24 06:06:10.151815: step: 918/470, loss: 0.032089248299598694 2023-01-24 06:06:10.956511: step: 920/470, loss: 0.0024486505426466465 2023-01-24 06:06:11.714706: step: 922/470, loss: 0.013104692101478577 2023-01-24 06:06:12.437483: step: 924/470, loss: 0.9032142162322998 2023-01-24 06:06:13.257167: step: 926/470, loss: 0.13171081244945526 2023-01-24 06:06:14.003136: step: 928/470, loss: 0.026930810883641243 2023-01-24 06:06:14.784909: step: 930/470, loss: 0.5041398406028748 2023-01-24 06:06:15.554786: step: 932/470, loss: 0.002014985540881753 2023-01-24 06:06:16.203657: step: 934/470, loss: 0.0011477648513391614 2023-01-24 06:06:16.906497: step: 936/470, loss: 0.01803838089108467 2023-01-24 06:06:17.657634: step: 938/470, loss: 0.0009580638143233955 2023-01-24 06:06:18.426147: step: 940/470, loss: 0.0020920049864798784 2023-01-24 06:06:19.098896: step: 942/470, loss: 0.011814834550023079 ================================================== Loss: 0.048 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3236233278358073, 'r': 0.32853601592439635, 'f1': 0.3260611683468115}, 'combined': 0.2402555977292295, 'epoch': 32} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36621400321697767, 'r': 0.3538894934933294, 'f1': 0.35994628189052574}, 'combined': 0.2399641879270171, 'epoch': 32} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3168338562045088, 'r': 0.3240482893628657, 'f1': 0.320400466218068}, 'combined': 0.2360845540554185, 'epoch': 32} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36820947105447027, 'r': 0.35333947318496284, 'f1': 0.3606212483928963}, 'combined': 0.24041416559526413, 'epoch': 32} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3107107407794379, 'r': 0.33193576291996874, 'f1': 0.3209727468969239}, 'combined': 0.23650623455562814, 'epoch': 32} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3565284448677029, 'r': 0.3599566029914308, 'f1': 0.3582343225943426}, 'combined': 0.2388228817295617, 'epoch': 32} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29375, 'r': 0.3357142857142857, 'f1': 0.31333333333333335}, 'combined': 0.2088888888888889, 'epoch': 32} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5769230769230769, 'r': 0.32608695652173914, 'f1': 0.41666666666666663}, 'combined': 0.27777777777777773, 'epoch': 32} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4989878542510121, 'r': 0.22368421052631576, 'f1': 0.30889724310776934}, 'combined': 0.20593149540517955, 'epoch': 32} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3260774491094148, 'r': 0.32422122074636306, 'f1': 0.3251466856961624}, 'combined': 0.2395817684076986, 'epoch': 19} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35730778004987124, 'r': 0.3418473472592518, 'f1': 0.34940662520847365}, 'combined': 0.23293775013898238, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.35714285714285715, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3245705997242647, 'r': 0.31533234736019644, 'f1': 0.31988478740870746}, 'combined': 0.2357045801958897, 'epoch': 31} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35151688133309544, 'r': 0.34577093231130446, 'f1': 0.34862023228672484}, 'combined': 0.23241348819114985, 'epoch': 31} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6153846153846154, 'r': 0.27586206896551724, 'f1': 0.380952380952381}, 'combined': 0.25396825396825395, 'epoch': 31} ****************************** Epoch: 33 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:08:53.936771: step: 2/470, loss: 0.0353984571993351 2023-01-24 06:08:54.603628: step: 4/470, loss: 0.0006110117537900805 2023-01-24 06:08:55.304558: step: 6/470, loss: 0.003695101011544466 2023-01-24 06:08:56.094279: step: 8/470, loss: 0.01956905983388424 2023-01-24 06:08:56.831039: step: 10/470, loss: 0.010541500523686409 2023-01-24 06:08:57.524915: step: 12/470, loss: 0.025140857324004173 2023-01-24 06:08:58.323091: step: 14/470, loss: 0.00231540622189641 2023-01-24 06:08:59.105821: step: 16/470, loss: 0.0011222070315852761 2023-01-24 06:08:59.811032: step: 18/470, loss: 0.003358704037964344 2023-01-24 06:09:00.585243: step: 20/470, loss: 0.001413871650584042 2023-01-24 06:09:01.422555: step: 22/470, loss: 0.001081290072761476 2023-01-24 06:09:02.143635: step: 24/470, loss: 0.011456046253442764 2023-01-24 06:09:02.923129: step: 26/470, loss: 0.004079823382198811 2023-01-24 06:09:03.673649: step: 28/470, loss: 0.0013503070222213864 2023-01-24 06:09:04.356834: step: 30/470, loss: 0.005739379674196243 2023-01-24 06:09:05.081123: step: 32/470, loss: 0.016481278464198112 2023-01-24 06:09:05.819512: step: 34/470, loss: 0.0638788491487503 2023-01-24 06:09:06.510540: step: 36/470, loss: 0.003887937404215336 2023-01-24 06:09:07.324734: step: 38/470, loss: 2.0889205932617188 2023-01-24 06:09:08.049700: step: 40/470, loss: 0.005325763486325741 2023-01-24 06:09:08.729126: step: 42/470, loss: 0.004101912025362253 2023-01-24 06:09:09.453066: step: 44/470, loss: 0.0046531264670193195 2023-01-24 06:09:10.128072: step: 46/470, loss: 0.01230566669255495 2023-01-24 06:09:10.894295: step: 48/470, loss: 3.5447123082121834e-05 2023-01-24 06:09:11.746092: step: 50/470, loss: 0.012923387810587883 2023-01-24 06:09:12.445112: step: 52/470, loss: 0.016211075708270073 2023-01-24 06:09:13.225077: step: 54/470, loss: 0.09570623189210892 2023-01-24 06:09:14.020824: step: 56/470, loss: 0.01940716803073883 2023-01-24 06:09:14.738943: step: 58/470, loss: 0.010799039155244827 2023-01-24 06:09:15.492010: step: 60/470, loss: 0.0031259532552212477 2023-01-24 06:09:16.250524: step: 62/470, loss: 0.0015492573147639632 2023-01-24 06:09:17.015913: step: 64/470, loss: 0.03166002035140991 2023-01-24 06:09:17.710522: step: 66/470, loss: 0.0006916436832398176 2023-01-24 06:09:18.529844: step: 68/470, loss: 0.026572024449706078 2023-01-24 06:09:19.318420: step: 70/470, loss: 0.0100321089848876 2023-01-24 06:09:19.936185: step: 72/470, loss: 0.0022878365125507116 2023-01-24 06:09:20.655997: step: 74/470, loss: 0.0371539369225502 2023-01-24 06:09:21.421927: step: 76/470, loss: 0.05440312996506691 2023-01-24 06:09:22.192772: step: 78/470, loss: 0.007360953837633133 2023-01-24 06:09:22.883629: step: 80/470, loss: 0.012081542983651161 2023-01-24 06:09:23.589894: step: 82/470, loss: 0.0012202973011881113 2023-01-24 06:09:24.335323: step: 84/470, loss: 0.00959607120603323 2023-01-24 06:09:25.138490: step: 86/470, loss: 0.0013847972732037306 2023-01-24 06:09:25.838134: step: 88/470, loss: 0.0005331527790986001 2023-01-24 06:09:26.508100: step: 90/470, loss: 0.0019345465116202831 2023-01-24 06:09:27.235069: step: 92/470, loss: 0.005651640705764294 2023-01-24 06:09:28.076387: step: 94/470, loss: 0.47246649861335754 2023-01-24 06:09:28.795778: step: 96/470, loss: 0.008398247882723808 2023-01-24 06:09:29.478631: step: 98/470, loss: 0.002498921239748597 2023-01-24 06:09:30.187784: step: 100/470, loss: 0.0005510192713700235 2023-01-24 06:09:30.832042: step: 102/470, loss: 0.0007535783806815743 2023-01-24 06:09:31.585317: step: 104/470, loss: 0.00044582068221643567 2023-01-24 06:09:32.337411: step: 106/470, loss: 0.015184991993010044 2023-01-24 06:09:32.979585: step: 108/470, loss: 0.09019918739795685 2023-01-24 06:09:33.739008: step: 110/470, loss: 0.00018215616000816226 2023-01-24 06:09:34.506126: step: 112/470, loss: 0.0013423897325992584 2023-01-24 06:09:35.184122: step: 114/470, loss: 0.004689566791057587 2023-01-24 06:09:35.975753: step: 116/470, loss: 0.001340361312031746 2023-01-24 06:09:36.695772: step: 118/470, loss: 0.007616905961185694 2023-01-24 06:09:37.378331: step: 120/470, loss: 0.03174249827861786 2023-01-24 06:09:38.065301: step: 122/470, loss: 0.0005641809548251331 2023-01-24 06:09:38.775525: step: 124/470, loss: 0.0015309054870158434 2023-01-24 06:09:39.490325: step: 126/470, loss: 0.024390079081058502 2023-01-24 06:09:40.202020: step: 128/470, loss: 0.023455774411559105 2023-01-24 06:09:40.934730: step: 130/470, loss: 0.10771681368350983 2023-01-24 06:09:41.703870: step: 132/470, loss: 9.4806935521774e-05 2023-01-24 06:09:42.383824: step: 134/470, loss: 0.007546218577772379 2023-01-24 06:09:43.034977: step: 136/470, loss: 0.006308150477707386 2023-01-24 06:09:43.794122: step: 138/470, loss: 0.3277882933616638 2023-01-24 06:09:44.589783: step: 140/470, loss: 0.016741199418902397 2023-01-24 06:09:45.321770: step: 142/470, loss: 0.02945149876177311 2023-01-24 06:09:46.010377: step: 144/470, loss: 0.0745350643992424 2023-01-24 06:09:46.694093: step: 146/470, loss: 0.0008599523571319878 2023-01-24 06:09:47.481803: step: 148/470, loss: 0.01641632243990898 2023-01-24 06:09:48.241014: step: 150/470, loss: 0.007939077913761139 2023-01-24 06:09:48.951849: step: 152/470, loss: 0.0034217501524835825 2023-01-24 06:09:49.680725: step: 154/470, loss: 0.0031094097066670656 2023-01-24 06:09:50.351316: step: 156/470, loss: 0.020805522799491882 2023-01-24 06:09:51.103830: step: 158/470, loss: 0.0015209285775199533 2023-01-24 06:09:51.812078: step: 160/470, loss: 0.003026079386472702 2023-01-24 06:09:52.536612: step: 162/470, loss: 0.03428162634372711 2023-01-24 06:09:53.278609: step: 164/470, loss: 0.0027114320546388626 2023-01-24 06:09:54.061744: step: 166/470, loss: 0.027286209166049957 2023-01-24 06:09:54.764108: step: 168/470, loss: 0.00016404094640165567 2023-01-24 06:09:55.522885: step: 170/470, loss: 0.031211018562316895 2023-01-24 06:09:56.199901: step: 172/470, loss: 0.003412870690226555 2023-01-24 06:09:56.924284: step: 174/470, loss: 0.043129902333021164 2023-01-24 06:09:57.608464: step: 176/470, loss: 0.07710176706314087 2023-01-24 06:09:58.385037: step: 178/470, loss: 0.017537254840135574 2023-01-24 06:09:59.034567: step: 180/470, loss: 0.009631001390516758 2023-01-24 06:09:59.676771: step: 182/470, loss: 0.001838182215578854 2023-01-24 06:10:00.366445: step: 184/470, loss: 0.013489598408341408 2023-01-24 06:10:01.158967: step: 186/470, loss: 0.021947944536805153 2023-01-24 06:10:01.928849: step: 188/470, loss: 0.008992105722427368 2023-01-24 06:10:02.699259: step: 190/470, loss: 0.17299090325832367 2023-01-24 06:10:03.392484: step: 192/470, loss: 0.0001017658578348346 2023-01-24 06:10:04.095190: step: 194/470, loss: 0.00856022723019123 2023-01-24 06:10:04.784079: step: 196/470, loss: 0.0008848054567351937 2023-01-24 06:10:05.460894: step: 198/470, loss: 0.004278761800378561 2023-01-24 06:10:06.268989: step: 200/470, loss: 0.0017920746468007565 2023-01-24 06:10:06.980187: step: 202/470, loss: 0.00201309728436172 2023-01-24 06:10:07.694327: step: 204/470, loss: 0.01744202710688114 2023-01-24 06:10:08.418049: step: 206/470, loss: 0.0012796318624168634 2023-01-24 06:10:09.075177: step: 208/470, loss: 4.2182771721854806e-05 2023-01-24 06:10:09.790997: step: 210/470, loss: 0.0001089559227693826 2023-01-24 06:10:10.651716: step: 212/470, loss: 0.019380543380975723 2023-01-24 06:10:11.361002: step: 214/470, loss: 0.020527342334389687 2023-01-24 06:10:12.053257: step: 216/470, loss: 0.0021591780241578817 2023-01-24 06:10:12.771923: step: 218/470, loss: 0.00999562069773674 2023-01-24 06:10:13.582981: step: 220/470, loss: 0.02325505018234253 2023-01-24 06:10:14.261623: step: 222/470, loss: 0.022767867892980576 2023-01-24 06:10:14.986539: step: 224/470, loss: 0.0006900187581777573 2023-01-24 06:10:15.686420: step: 226/470, loss: 0.008171236142516136 2023-01-24 06:10:16.500500: step: 228/470, loss: 0.0029809472616761923 2023-01-24 06:10:17.203016: step: 230/470, loss: 0.002115656156092882 2023-01-24 06:10:17.993500: step: 232/470, loss: 0.003473465796560049 2023-01-24 06:10:18.844835: step: 234/470, loss: 0.040313441306352615 2023-01-24 06:10:19.804637: step: 236/470, loss: 0.027963347733020782 2023-01-24 06:10:20.595105: step: 238/470, loss: 0.011281131766736507 2023-01-24 06:10:21.318891: step: 240/470, loss: 0.00025161568191833794 2023-01-24 06:10:21.980499: step: 242/470, loss: 0.027061283588409424 2023-01-24 06:10:22.770534: step: 244/470, loss: 0.026121623814105988 2023-01-24 06:10:23.485336: step: 246/470, loss: 0.009257947094738483 2023-01-24 06:10:24.166437: step: 248/470, loss: 0.010101567022502422 2023-01-24 06:10:24.927362: step: 250/470, loss: 1.0979067087173462 2023-01-24 06:10:25.610844: step: 252/470, loss: 0.006160234101116657 2023-01-24 06:10:26.347363: step: 254/470, loss: 0.004955607000738382 2023-01-24 06:10:27.053639: step: 256/470, loss: 0.009282803162932396 2023-01-24 06:10:27.719945: step: 258/470, loss: 0.004642096348106861 2023-01-24 06:10:28.454312: step: 260/470, loss: 0.029528679326176643 2023-01-24 06:10:29.146498: step: 262/470, loss: 0.01058510597795248 2023-01-24 06:10:29.810222: step: 264/470, loss: 0.007069876883178949 2023-01-24 06:10:30.542920: step: 266/470, loss: 0.010932988487184048 2023-01-24 06:10:31.234959: step: 268/470, loss: 0.0012139062164351344 2023-01-24 06:10:32.013178: step: 270/470, loss: 0.0031020056921988726 2023-01-24 06:10:32.751905: step: 272/470, loss: 0.019556628540158272 2023-01-24 06:10:33.474733: step: 274/470, loss: 0.003774407086893916 2023-01-24 06:10:34.138122: step: 276/470, loss: 0.002119203330948949 2023-01-24 06:10:34.850040: step: 278/470, loss: 0.006396686192601919 2023-01-24 06:10:35.483377: step: 280/470, loss: 0.0008285631192848086 2023-01-24 06:10:36.173381: step: 282/470, loss: 0.030592042952775955 2023-01-24 06:10:36.844864: step: 284/470, loss: 0.02650538459420204 2023-01-24 06:10:37.575357: step: 286/470, loss: 0.0008941978449001908 2023-01-24 06:10:38.297575: step: 288/470, loss: 0.0010146403219550848 2023-01-24 06:10:39.057616: step: 290/470, loss: 0.002193046733736992 2023-01-24 06:10:39.794789: step: 292/470, loss: 0.013625368475914001 2023-01-24 06:10:40.501059: step: 294/470, loss: 0.00888641644269228 2023-01-24 06:10:41.223845: step: 296/470, loss: 0.0019641213584691286 2023-01-24 06:10:41.979070: step: 298/470, loss: 0.4439093768596649 2023-01-24 06:10:42.662686: step: 300/470, loss: 0.0006960076279938221 2023-01-24 06:10:43.426676: step: 302/470, loss: 0.0003841613361146301 2023-01-24 06:10:44.166640: step: 304/470, loss: 0.11751359701156616 2023-01-24 06:10:44.941975: step: 306/470, loss: 0.0003117234446108341 2023-01-24 06:10:45.655980: step: 308/470, loss: 0.012473184615373611 2023-01-24 06:10:46.458771: step: 310/470, loss: 0.003341148141771555 2023-01-24 06:10:47.251071: step: 312/470, loss: 0.016889702528715134 2023-01-24 06:10:48.040706: step: 314/470, loss: 0.03929717838764191 2023-01-24 06:10:48.784588: step: 316/470, loss: 4.825163341592997e-05 2023-01-24 06:10:49.529123: step: 318/470, loss: 0.48037540912628174 2023-01-24 06:10:50.255439: step: 320/470, loss: 0.010676422156393528 2023-01-24 06:10:51.001206: step: 322/470, loss: 0.00287465937435627 2023-01-24 06:10:51.704521: step: 324/470, loss: 0.0012692613527178764 2023-01-24 06:10:52.379358: step: 326/470, loss: 0.021290870383381844 2023-01-24 06:10:53.104593: step: 328/470, loss: 0.021226149052381516 2023-01-24 06:10:54.039511: step: 330/470, loss: 0.037113256752491 2023-01-24 06:10:54.775671: step: 332/470, loss: 0.0005753267323598266 2023-01-24 06:10:55.559684: step: 334/470, loss: 0.009010824374854565 2023-01-24 06:10:56.354942: step: 336/470, loss: 0.0034445514902472496 2023-01-24 06:10:57.064817: step: 338/470, loss: 0.0003466394846327603 2023-01-24 06:10:57.758529: step: 340/470, loss: 0.0053068287670612335 2023-01-24 06:10:58.404100: step: 342/470, loss: 0.017965000122785568 2023-01-24 06:10:59.185987: step: 344/470, loss: 0.027688566595315933 2023-01-24 06:10:59.912786: step: 346/470, loss: 0.0012403011787682772 2023-01-24 06:11:00.595041: step: 348/470, loss: 2.2492525577545166 2023-01-24 06:11:01.433802: step: 350/470, loss: 0.009851688519120216 2023-01-24 06:11:02.152256: step: 352/470, loss: 0.06665859371423721 2023-01-24 06:11:03.079927: step: 354/470, loss: 0.014958545565605164 2023-01-24 06:11:03.803316: step: 356/470, loss: 0.0006210833671502769 2023-01-24 06:11:04.541311: step: 358/470, loss: 0.001347895129583776 2023-01-24 06:11:05.274127: step: 360/470, loss: 0.0017309411196038127 2023-01-24 06:11:06.052894: step: 362/470, loss: 0.10375341773033142 2023-01-24 06:11:06.754372: step: 364/470, loss: 0.00011790274584200233 2023-01-24 06:11:07.526444: step: 366/470, loss: 0.024094436317682266 2023-01-24 06:11:08.214180: step: 368/470, loss: 0.007840093225240707 2023-01-24 06:11:08.987863: step: 370/470, loss: 0.005822331178933382 2023-01-24 06:11:09.828097: step: 372/470, loss: 0.015622143633663654 2023-01-24 06:11:10.463885: step: 374/470, loss: 0.0032653925009071827 2023-01-24 06:11:11.179038: step: 376/470, loss: 0.0022503521759063005 2023-01-24 06:11:11.937843: step: 378/470, loss: 0.01404476910829544 2023-01-24 06:11:12.703133: step: 380/470, loss: 0.007879259996116161 2023-01-24 06:11:13.407742: step: 382/470, loss: 0.5576677918434143 2023-01-24 06:11:14.099827: step: 384/470, loss: 0.00043255838681943715 2023-01-24 06:11:14.891744: step: 386/470, loss: 0.00029005203396081924 2023-01-24 06:11:15.665994: step: 388/470, loss: 0.0023896731436252594 2023-01-24 06:11:16.329103: step: 390/470, loss: 0.013917365111410618 2023-01-24 06:11:17.071565: step: 392/470, loss: 0.000244982453295961 2023-01-24 06:11:17.858835: step: 394/470, loss: 0.22541584074497223 2023-01-24 06:11:18.613333: step: 396/470, loss: 0.009782961569726467 2023-01-24 06:11:19.374315: step: 398/470, loss: 0.0009558402234688401 2023-01-24 06:11:20.187476: step: 400/470, loss: 0.04024609923362732 2023-01-24 06:11:20.870243: step: 402/470, loss: 0.030957777053117752 2023-01-24 06:11:21.654103: step: 404/470, loss: 0.0017546005547046661 2023-01-24 06:11:22.337843: step: 406/470, loss: 0.0013082153163850307 2023-01-24 06:11:23.120575: step: 408/470, loss: 0.11711828410625458 2023-01-24 06:11:23.830253: step: 410/470, loss: 0.020939351990818977 2023-01-24 06:11:24.518483: step: 412/470, loss: 0.04790695011615753 2023-01-24 06:11:25.199486: step: 414/470, loss: 0.0010895759332925081 2023-01-24 06:11:25.945488: step: 416/470, loss: 0.017710620537400246 2023-01-24 06:11:26.661980: step: 418/470, loss: 0.01992659457027912 2023-01-24 06:11:27.382297: step: 420/470, loss: 0.03188169375061989 2023-01-24 06:11:28.138710: step: 422/470, loss: 0.04130684584379196 2023-01-24 06:11:28.858933: step: 424/470, loss: 0.07215629518032074 2023-01-24 06:11:29.615537: step: 426/470, loss: 0.00011767345131374896 2023-01-24 06:11:30.433004: step: 428/470, loss: 0.000228075819904916 2023-01-24 06:11:31.203121: step: 430/470, loss: 0.0013060378842055798 2023-01-24 06:11:31.910537: step: 432/470, loss: 0.0043433355167508125 2023-01-24 06:11:32.631981: step: 434/470, loss: 0.259304016828537 2023-01-24 06:11:33.389270: step: 436/470, loss: 0.017298951745033264 2023-01-24 06:11:34.181964: step: 438/470, loss: 0.0009541614563204348 2023-01-24 06:11:34.936995: step: 440/470, loss: 0.0033586365170776844 2023-01-24 06:11:35.655230: step: 442/470, loss: 0.005000745877623558 2023-01-24 06:11:36.393617: step: 444/470, loss: 0.02338077686727047 2023-01-24 06:11:37.187554: step: 446/470, loss: 0.10848845541477203 2023-01-24 06:11:37.917881: step: 448/470, loss: 0.07149527221918106 2023-01-24 06:11:38.695727: step: 450/470, loss: 0.005873269401490688 2023-01-24 06:11:39.375095: step: 452/470, loss: 0.018467245623469353 2023-01-24 06:11:40.148246: step: 454/470, loss: 0.004103237763047218 2023-01-24 06:11:40.886848: step: 456/470, loss: 0.00016769995272625238 2023-01-24 06:11:41.673069: step: 458/470, loss: 0.05550146475434303 2023-01-24 06:11:42.350299: step: 460/470, loss: 0.002988182008266449 2023-01-24 06:11:43.080202: step: 462/470, loss: 0.019142037257552147 2023-01-24 06:11:43.792546: step: 464/470, loss: 0.0012444063322618604 2023-01-24 06:11:44.508193: step: 466/470, loss: 0.00499011529609561 2023-01-24 06:11:45.246188: step: 468/470, loss: 0.029499473050236702 2023-01-24 06:11:45.933500: step: 470/470, loss: 0.009594298899173737 2023-01-24 06:11:46.690108: step: 472/470, loss: 0.0007405190262943506 2023-01-24 06:11:47.402900: step: 474/470, loss: 0.006137060932815075 2023-01-24 06:11:48.126069: step: 476/470, loss: 4.0265680581796914e-05 2023-01-24 06:11:48.811925: step: 478/470, loss: 1.632533167139627e-05 2023-01-24 06:11:49.547509: step: 480/470, loss: 0.0258161760866642 2023-01-24 06:11:50.325805: step: 482/470, loss: 0.4235173165798187 2023-01-24 06:11:51.007802: step: 484/470, loss: 0.008250389248132706 2023-01-24 06:11:51.721766: step: 486/470, loss: 0.02013562060892582 2023-01-24 06:11:52.463147: step: 488/470, loss: 0.02626902237534523 2023-01-24 06:11:53.214425: step: 490/470, loss: 0.004141016863286495 2023-01-24 06:11:53.918734: step: 492/470, loss: 0.0033669506665319204 2023-01-24 06:11:54.636920: step: 494/470, loss: 0.7086845636367798 2023-01-24 06:11:55.302142: step: 496/470, loss: 0.0009194430313073099 2023-01-24 06:11:56.036594: step: 498/470, loss: 0.003966304939240217 2023-01-24 06:11:56.733755: step: 500/470, loss: 0.029114434495568275 2023-01-24 06:11:57.432802: step: 502/470, loss: 0.00011125182209070772 2023-01-24 06:11:58.199924: step: 504/470, loss: 0.014639819972217083 2023-01-24 06:11:58.919322: step: 506/470, loss: 0.008477681316435337 2023-01-24 06:11:59.671541: step: 508/470, loss: 0.011175676248967648 2023-01-24 06:12:00.442155: step: 510/470, loss: 0.0035168358590453863 2023-01-24 06:12:01.198766: step: 512/470, loss: 0.013511012308299541 2023-01-24 06:12:01.920511: step: 514/470, loss: 4.397913653519936e-05 2023-01-24 06:12:02.734204: step: 516/470, loss: 0.01750068925321102 2023-01-24 06:12:03.464006: step: 518/470, loss: 0.000392085436033085 2023-01-24 06:12:04.133040: step: 520/470, loss: 0.0018893035594373941 2023-01-24 06:12:04.819835: step: 522/470, loss: 0.006660451181232929 2023-01-24 06:12:05.592181: step: 524/470, loss: 0.02844826504588127 2023-01-24 06:12:06.424431: step: 526/470, loss: 0.009441560134291649 2023-01-24 06:12:07.169606: step: 528/470, loss: 0.006550501566380262 2023-01-24 06:12:08.012868: step: 530/470, loss: 0.06545102596282959 2023-01-24 06:12:08.748502: step: 532/470, loss: 0.017406875267624855 2023-01-24 06:12:09.511972: step: 534/470, loss: 0.016543585807085037 2023-01-24 06:12:10.277139: step: 536/470, loss: 0.01982169970870018 2023-01-24 06:12:11.042979: step: 538/470, loss: 0.004087928682565689 2023-01-24 06:12:11.899065: step: 540/470, loss: 0.017809614539146423 2023-01-24 06:12:12.593275: step: 542/470, loss: 0.016991348937153816 2023-01-24 06:12:13.408515: step: 544/470, loss: 0.002067740773782134 2023-01-24 06:12:14.099750: step: 546/470, loss: 0.07715543359518051 2023-01-24 06:12:14.734770: step: 548/470, loss: 3.090485552093014e-05 2023-01-24 06:12:15.461061: step: 550/470, loss: 0.015211367048323154 2023-01-24 06:12:16.121329: step: 552/470, loss: 0.11407588422298431 2023-01-24 06:12:16.924695: step: 554/470, loss: 0.04695338010787964 2023-01-24 06:12:17.697758: step: 556/470, loss: 0.007329762447625399 2023-01-24 06:12:18.465118: step: 558/470, loss: 0.007562029641121626 2023-01-24 06:12:19.343298: step: 560/470, loss: 0.028891831636428833 2023-01-24 06:12:20.080461: step: 562/470, loss: 0.004206001292914152 2023-01-24 06:12:20.722976: step: 564/470, loss: 0.0011216033017262816 2023-01-24 06:12:21.441675: step: 566/470, loss: 0.03309805318713188 2023-01-24 06:12:22.148883: step: 568/470, loss: 0.0024206622038036585 2023-01-24 06:12:22.901700: step: 570/470, loss: 0.03466884419322014 2023-01-24 06:12:23.574416: step: 572/470, loss: 0.028366921469569206 2023-01-24 06:12:24.272430: step: 574/470, loss: 0.014122523367404938 2023-01-24 06:12:24.963681: step: 576/470, loss: 0.03809332102537155 2023-01-24 06:12:25.731682: step: 578/470, loss: 0.032920315861701965 2023-01-24 06:12:26.453484: step: 580/470, loss: 0.45645061135292053 2023-01-24 06:12:27.181235: step: 582/470, loss: 0.007378171198070049 2023-01-24 06:12:27.963218: step: 584/470, loss: 0.016221599653363228 2023-01-24 06:12:28.620302: step: 586/470, loss: 0.0018637162866070867 2023-01-24 06:12:29.493000: step: 588/470, loss: 0.00019457412417978048 2023-01-24 06:12:30.265167: step: 590/470, loss: 0.0033394538331776857 2023-01-24 06:12:31.001643: step: 592/470, loss: 0.017871206626296043 2023-01-24 06:12:31.701677: step: 594/470, loss: 0.018105637282133102 2023-01-24 06:12:32.446071: step: 596/470, loss: 0.02217262051999569 2023-01-24 06:12:33.237947: step: 598/470, loss: 0.03489815443754196 2023-01-24 06:12:33.900290: step: 600/470, loss: 0.004941632971167564 2023-01-24 06:12:34.568643: step: 602/470, loss: 0.009684709832072258 2023-01-24 06:12:35.282687: step: 604/470, loss: 0.010802370496094227 2023-01-24 06:12:36.068524: step: 606/470, loss: 0.0353492833673954 2023-01-24 06:12:36.985641: step: 608/470, loss: 0.009187396615743637 2023-01-24 06:12:37.692946: step: 610/470, loss: 0.00303685013204813 2023-01-24 06:12:38.432873: step: 612/470, loss: 0.017729606479406357 2023-01-24 06:12:39.186725: step: 614/470, loss: 0.009220817126333714 2023-01-24 06:12:39.927784: step: 616/470, loss: 0.02744259312748909 2023-01-24 06:12:40.618111: step: 618/470, loss: 0.026758696883916855 2023-01-24 06:12:41.410614: step: 620/470, loss: 0.0033123791217803955 2023-01-24 06:12:42.177391: step: 622/470, loss: 0.00663144001737237 2023-01-24 06:12:42.925253: step: 624/470, loss: 0.0016671409830451012 2023-01-24 06:12:43.633625: step: 626/470, loss: 0.0074158660136163235 2023-01-24 06:12:44.314637: step: 628/470, loss: 0.0008439401281066239 2023-01-24 06:12:45.077646: step: 630/470, loss: 5.01374488521833e-05 2023-01-24 06:12:45.761732: step: 632/470, loss: 0.008946064859628677 2023-01-24 06:12:46.462626: step: 634/470, loss: 0.06714008748531342 2023-01-24 06:12:47.157707: step: 636/470, loss: 0.0008762570796534419 2023-01-24 06:12:47.993031: step: 638/470, loss: 0.001907090307213366 2023-01-24 06:12:48.717321: step: 640/470, loss: 0.03064919076859951 2023-01-24 06:12:49.579323: step: 642/470, loss: 0.00023972737835720181 2023-01-24 06:12:50.318079: step: 644/470, loss: 0.00011663758778013289 2023-01-24 06:12:51.035263: step: 646/470, loss: 0.010350205935537815 2023-01-24 06:12:51.807819: step: 648/470, loss: 0.015240548178553581 2023-01-24 06:12:52.538689: step: 650/470, loss: 0.017129601910710335 2023-01-24 06:12:53.274489: step: 652/470, loss: 0.03198548033833504 2023-01-24 06:12:53.912674: step: 654/470, loss: 0.0030342754907906055 2023-01-24 06:12:54.624836: step: 656/470, loss: 7.692570943618193e-05 2023-01-24 06:12:55.348831: step: 658/470, loss: 0.014984884299337864 2023-01-24 06:12:56.124686: step: 660/470, loss: 0.007573677692562342 2023-01-24 06:12:56.825222: step: 662/470, loss: 0.14432427287101746 2023-01-24 06:12:57.542483: step: 664/470, loss: 0.003277366515249014 2023-01-24 06:12:58.180456: step: 666/470, loss: 0.04675723984837532 2023-01-24 06:12:58.946815: step: 668/470, loss: 0.02985813282430172 2023-01-24 06:12:59.710017: step: 670/470, loss: 0.010331113822758198 2023-01-24 06:13:00.426560: step: 672/470, loss: 0.004046509508043528 2023-01-24 06:13:01.204198: step: 674/470, loss: 0.017745012417435646 2023-01-24 06:13:02.025255: step: 676/470, loss: 0.0010939788771793246 2023-01-24 06:13:02.724939: step: 678/470, loss: 0.0016873609274625778 2023-01-24 06:13:03.460004: step: 680/470, loss: 0.015853749588131905 2023-01-24 06:13:04.288590: step: 682/470, loss: 0.0017006437992677093 2023-01-24 06:13:04.968694: step: 684/470, loss: 0.002765461103990674 2023-01-24 06:13:05.692287: step: 686/470, loss: 0.7379614114761353 2023-01-24 06:13:06.421966: step: 688/470, loss: 0.005514780059456825 2023-01-24 06:13:07.077084: step: 690/470, loss: 0.002757574198767543 2023-01-24 06:13:07.817030: step: 692/470, loss: 0.46669384837150574 2023-01-24 06:13:08.486058: step: 694/470, loss: 0.005543508101254702 2023-01-24 06:13:09.171106: step: 696/470, loss: 0.0004790358943864703 2023-01-24 06:13:09.898232: step: 698/470, loss: 0.00039144910988397896 2023-01-24 06:13:10.623600: step: 700/470, loss: 0.01933109760284424 2023-01-24 06:13:11.379841: step: 702/470, loss: 0.0024042355362325907 2023-01-24 06:13:12.013129: step: 704/470, loss: 0.0020206007175147533 2023-01-24 06:13:12.745439: step: 706/470, loss: 0.00844528991729021 2023-01-24 06:13:13.443840: step: 708/470, loss: 0.0017009805887937546 2023-01-24 06:13:14.135095: step: 710/470, loss: 0.008159015327692032 2023-01-24 06:13:14.885730: step: 712/470, loss: 0.001408104319125414 2023-01-24 06:13:15.603615: step: 714/470, loss: 0.008478800766170025 2023-01-24 06:13:16.253306: step: 716/470, loss: 0.021621203050017357 2023-01-24 06:13:17.006611: step: 718/470, loss: 6.314300844678655e-05 2023-01-24 06:13:17.765975: step: 720/470, loss: 0.027302606031298637 2023-01-24 06:13:18.472643: step: 722/470, loss: 0.020021893084049225 2023-01-24 06:13:19.280124: step: 724/470, loss: 0.028122197836637497 2023-01-24 06:13:19.994794: step: 726/470, loss: 0.004987229593098164 2023-01-24 06:13:20.654958: step: 728/470, loss: 0.023552965372800827 2023-01-24 06:13:21.393392: step: 730/470, loss: 0.0023100976832211018 2023-01-24 06:13:22.131222: step: 732/470, loss: 0.014617957174777985 2023-01-24 06:13:22.859864: step: 734/470, loss: 0.012580832466483116 2023-01-24 06:13:23.603009: step: 736/470, loss: 0.0018753198673948646 2023-01-24 06:13:24.283254: step: 738/470, loss: 0.03843626379966736 2023-01-24 06:13:25.104169: step: 740/470, loss: 0.05392017588019371 2023-01-24 06:13:25.846256: step: 742/470, loss: 0.1939995437860489 2023-01-24 06:13:26.542950: step: 744/470, loss: 0.0006773190689273179 2023-01-24 06:13:27.326529: step: 746/470, loss: 0.04806168004870415 2023-01-24 06:13:28.056795: step: 748/470, loss: 0.03759084269404411 2023-01-24 06:13:28.831319: step: 750/470, loss: 0.002831129590049386 2023-01-24 06:13:29.570018: step: 752/470, loss: 0.006249632220715284 2023-01-24 06:13:30.294204: step: 754/470, loss: 0.004659464117139578 2023-01-24 06:13:30.975267: step: 756/470, loss: 0.000623860105406493 2023-01-24 06:13:31.638072: step: 758/470, loss: 0.0027934126555919647 2023-01-24 06:13:32.367630: step: 760/470, loss: 0.0031853329855948687 2023-01-24 06:13:33.064991: step: 762/470, loss: 0.02544858306646347 2023-01-24 06:13:33.793530: step: 764/470, loss: 0.010291761718690395 2023-01-24 06:13:34.536196: step: 766/470, loss: 0.011330293491482735 2023-01-24 06:13:35.386209: step: 768/470, loss: 0.006938802544027567 2023-01-24 06:13:36.101172: step: 770/470, loss: 0.029576266184449196 2023-01-24 06:13:36.834724: step: 772/470, loss: 0.02871057577431202 2023-01-24 06:13:37.575691: step: 774/470, loss: 0.006623897235840559 2023-01-24 06:13:38.204680: step: 776/470, loss: 0.01030951552093029 2023-01-24 06:13:38.897338: step: 778/470, loss: 0.0003881768207065761 2023-01-24 06:13:39.646156: step: 780/470, loss: 0.011595960706472397 2023-01-24 06:13:40.385530: step: 782/470, loss: 0.03303779661655426 2023-01-24 06:13:41.156120: step: 784/470, loss: 0.009521235711872578 2023-01-24 06:13:41.876266: step: 786/470, loss: 0.010765934363007545 2023-01-24 06:13:42.565916: step: 788/470, loss: 0.006151827983558178 2023-01-24 06:13:43.309546: step: 790/470, loss: 0.14049938321113586 2023-01-24 06:13:44.041684: step: 792/470, loss: 0.00779561884701252 2023-01-24 06:13:44.789895: step: 794/470, loss: 0.020738592371344566 2023-01-24 06:13:45.681687: step: 796/470, loss: 0.1423885077238083 2023-01-24 06:13:46.561010: step: 798/470, loss: 0.0193779356777668 2023-01-24 06:13:47.284416: step: 800/470, loss: 0.0032281007152050734 2023-01-24 06:13:47.915490: step: 802/470, loss: 0.00042432613554410636 2023-01-24 06:13:48.679606: step: 804/470, loss: 0.004177960567176342 2023-01-24 06:13:49.406215: step: 806/470, loss: 1.2552907466888428 2023-01-24 06:13:50.034286: step: 808/470, loss: 6.404746090993285e-05 2023-01-24 06:13:50.802757: step: 810/470, loss: 0.031237877905368805 2023-01-24 06:13:51.529620: step: 812/470, loss: 0.0003258582728449255 2023-01-24 06:13:52.173751: step: 814/470, loss: 0.007463258691132069 2023-01-24 06:13:52.919911: step: 816/470, loss: 0.01626862958073616 2023-01-24 06:13:53.711496: step: 818/470, loss: 0.02373459003865719 2023-01-24 06:13:54.439900: step: 820/470, loss: 0.0005793635500594974 2023-01-24 06:13:55.147491: step: 822/470, loss: 0.0021050209179520607 2023-01-24 06:13:55.900649: step: 824/470, loss: 0.17716780304908752 2023-01-24 06:13:56.699028: step: 826/470, loss: 0.036166831851005554 2023-01-24 06:13:57.508904: step: 828/470, loss: 0.024912940338253975 2023-01-24 06:13:58.248650: step: 830/470, loss: 0.007581733167171478 2023-01-24 06:13:58.981097: step: 832/470, loss: 0.007379438728094101 2023-01-24 06:13:59.691879: step: 834/470, loss: 0.08958467096090317 2023-01-24 06:14:00.529173: step: 836/470, loss: 0.0027318422216922045 2023-01-24 06:14:01.260530: step: 838/470, loss: 0.06193634122610092 2023-01-24 06:14:02.002251: step: 840/470, loss: 0.01766875572502613 2023-01-24 06:14:02.785488: step: 842/470, loss: 0.009170843288302422 2023-01-24 06:14:03.530705: step: 844/470, loss: 0.00840882770717144 2023-01-24 06:14:04.213794: step: 846/470, loss: 0.0007808993104845285 2023-01-24 06:14:04.935281: step: 848/470, loss: 0.06595759838819504 2023-01-24 06:14:05.716915: step: 850/470, loss: 0.017137767747044563 2023-01-24 06:14:06.432217: step: 852/470, loss: 0.0002639777958393097 2023-01-24 06:14:07.110777: step: 854/470, loss: 0.04219938814640045 2023-01-24 06:14:07.807793: step: 856/470, loss: 0.022198636084794998 2023-01-24 06:14:08.579365: step: 858/470, loss: 0.005382399074733257 2023-01-24 06:14:09.344663: step: 860/470, loss: 0.034826841205358505 2023-01-24 06:14:10.067594: step: 862/470, loss: 0.02772991545498371 2023-01-24 06:14:10.840710: step: 864/470, loss: 0.007337241433560848 2023-01-24 06:14:11.677660: step: 866/470, loss: 0.004963582381606102 2023-01-24 06:14:12.415954: step: 868/470, loss: 0.000665048137307167 2023-01-24 06:14:13.173164: step: 870/470, loss: 0.01284511387348175 2023-01-24 06:14:13.820993: step: 872/470, loss: 0.00016538244381081313 2023-01-24 06:14:14.512114: step: 874/470, loss: 0.023755772039294243 2023-01-24 06:14:15.156639: step: 876/470, loss: 0.0020838105119764805 2023-01-24 06:14:15.831601: step: 878/470, loss: 0.0033732212614268064 2023-01-24 06:14:16.636702: step: 880/470, loss: 0.08116719126701355 2023-01-24 06:14:17.265158: step: 882/470, loss: 0.0009367589373141527 2023-01-24 06:14:18.057031: step: 884/470, loss: 0.05737480893731117 2023-01-24 06:14:18.778841: step: 886/470, loss: 0.00760510703548789 2023-01-24 06:14:19.558314: step: 888/470, loss: 0.0016693559009581804 2023-01-24 06:14:20.280206: step: 890/470, loss: 0.010982673615217209 2023-01-24 06:14:21.072232: step: 892/470, loss: 0.01113244891166687 2023-01-24 06:14:21.799895: step: 894/470, loss: 0.0017129809129983187 2023-01-24 06:14:22.490178: step: 896/470, loss: 0.5727943181991577 2023-01-24 06:14:23.185539: step: 898/470, loss: 0.002152232686057687 2023-01-24 06:14:23.887445: step: 900/470, loss: 0.5070058107376099 2023-01-24 06:14:24.619416: step: 902/470, loss: 0.04603942856192589 2023-01-24 06:14:25.325075: step: 904/470, loss: 0.06946877390146255 2023-01-24 06:14:25.988504: step: 906/470, loss: 0.0038659495767205954 2023-01-24 06:14:26.703177: step: 908/470, loss: 0.0038466486148536205 2023-01-24 06:14:27.418712: step: 910/470, loss: 0.010113026015460491 2023-01-24 06:14:28.167093: step: 912/470, loss: 0.003725921269506216 2023-01-24 06:14:28.954824: step: 914/470, loss: 0.0018076320411637425 2023-01-24 06:14:29.759454: step: 916/470, loss: 0.043223872780799866 2023-01-24 06:14:30.440149: step: 918/470, loss: 0.0003173082077410072 2023-01-24 06:14:31.148501: step: 920/470, loss: 0.0005944594158791006 2023-01-24 06:14:31.920578: step: 922/470, loss: 0.05763893947005272 2023-01-24 06:14:32.672130: step: 924/470, loss: 0.0007061202195473015 2023-01-24 06:14:33.437103: step: 926/470, loss: 0.004733996000140905 2023-01-24 06:14:34.052158: step: 928/470, loss: 0.001815860508941114 2023-01-24 06:14:34.719525: step: 930/470, loss: 0.0008208305225707591 2023-01-24 06:14:35.405236: step: 932/470, loss: 0.01949726790189743 2023-01-24 06:14:36.139413: step: 934/470, loss: 0.010274535976350307 2023-01-24 06:14:36.855930: step: 936/470, loss: 0.013152135536074638 2023-01-24 06:14:37.629683: step: 938/470, loss: 0.023069579154253006 2023-01-24 06:14:38.280968: step: 940/470, loss: 0.006963435094803572 2023-01-24 06:14:39.033819: step: 942/470, loss: 1.931123188114725e-05 ================================================== Loss: 0.045 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33534524644847996, 'r': 0.32007335666714504, 'f1': 0.3275313766283212}, 'combined': 0.24133890909455247, 'epoch': 33} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3599424833531255, 'r': 0.3419453591854692, 'f1': 0.35071318890817355}, 'combined': 0.23380879260544898, 'epoch': 33} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31787886605998067, 'r': 0.30400559486571205, 'f1': 0.31078748495485986}, 'combined': 0.22900130470358093, 'epoch': 33} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36891805305939873, 'r': 0.3476343192290488, 'f1': 0.3579600910873374}, 'combined': 0.23864006072489155, 'epoch': 33} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3195693277310924, 'r': 0.31835654090858356, 'f1': 0.3189617814806531}, 'combined': 0.23502447056469172, 'epoch': 33} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3548039139688969, 'r': 0.3571920172359952, 'f1': 0.3559939606376953}, 'combined': 0.2373293070917968, 'epoch': 33} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3108108108108108, 'r': 0.32857142857142857, 'f1': 0.3194444444444444}, 'combined': 0.21296296296296294, 'epoch': 33} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5277777777777778, 'r': 0.41304347826086957, 'f1': 0.4634146341463415}, 'combined': 0.3089430894308943, 'epoch': 33} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5384615384615384, 'r': 0.2413793103448276, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 33} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3260774491094148, 'r': 0.32422122074636306, 'f1': 0.3251466856961624}, 'combined': 0.2395817684076986, 'epoch': 19} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35730778004987124, 'r': 0.3418473472592518, 'f1': 0.34940662520847365}, 'combined': 0.23293775013898238, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.35714285714285715, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3245705997242647, 'r': 0.31533234736019644, 'f1': 0.31988478740870746}, 'combined': 0.2357045801958897, 'epoch': 31} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35151688133309544, 'r': 0.34577093231130446, 'f1': 0.34862023228672484}, 'combined': 0.23241348819114985, 'epoch': 31} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6153846153846154, 'r': 0.27586206896551724, 'f1': 0.380952380952381}, 'combined': 0.25396825396825395, 'epoch': 31} ****************************** Epoch: 34 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:17:15.061398: step: 2/470, loss: 0.0033803251571953297 2023-01-24 06:17:15.693472: step: 4/470, loss: 0.0006748238811269403 2023-01-24 06:17:16.444881: step: 6/470, loss: 0.005246671847999096 2023-01-24 06:17:17.202961: step: 8/470, loss: 0.051448754966259 2023-01-24 06:17:17.952837: step: 10/470, loss: 0.010346678085625172 2023-01-24 06:17:18.776403: step: 12/470, loss: 0.006094653159379959 2023-01-24 06:17:19.516220: step: 14/470, loss: 5.761585998698138e-05 2023-01-24 06:17:20.270653: step: 16/470, loss: 0.4367545545101166 2023-01-24 06:17:21.059016: step: 18/470, loss: 0.026379667222499847 2023-01-24 06:17:21.871001: step: 20/470, loss: 0.0011629628716036677 2023-01-24 06:17:22.726882: step: 22/470, loss: 0.009967586025595665 2023-01-24 06:17:23.484818: step: 24/470, loss: 0.5176779627799988 2023-01-24 06:17:24.232472: step: 26/470, loss: 0.06595677137374878 2023-01-24 06:17:25.024668: step: 28/470, loss: 0.010950235649943352 2023-01-24 06:17:25.746570: step: 30/470, loss: 0.0005241360631771386 2023-01-24 06:17:26.473473: step: 32/470, loss: 0.007944729179143906 2023-01-24 06:17:27.156183: step: 34/470, loss: 0.0030253706499934196 2023-01-24 06:17:27.861216: step: 36/470, loss: 0.0016459508333355188 2023-01-24 06:17:28.623351: step: 38/470, loss: 0.017528338357806206 2023-01-24 06:17:29.448235: step: 40/470, loss: 0.005726838018745184 2023-01-24 06:17:30.244668: step: 42/470, loss: 0.0029343708883970976 2023-01-24 06:17:31.017297: step: 44/470, loss: 0.0019074846059083939 2023-01-24 06:17:31.819372: step: 46/470, loss: 0.04953285679221153 2023-01-24 06:17:32.559456: step: 48/470, loss: 0.013332260772585869 2023-01-24 06:17:33.311330: step: 50/470, loss: 0.00047797494335100055 2023-01-24 06:17:34.072738: step: 52/470, loss: 0.004910385701805353 2023-01-24 06:17:34.821009: step: 54/470, loss: 0.001362175797112286 2023-01-24 06:17:35.597017: step: 56/470, loss: 0.00200113607570529 2023-01-24 06:17:36.328814: step: 58/470, loss: 0.009997074492275715 2023-01-24 06:17:36.987890: step: 60/470, loss: 9.74019494606182e-05 2023-01-24 06:17:37.725054: step: 62/470, loss: 0.02216336689889431 2023-01-24 06:17:38.514439: step: 64/470, loss: 0.003930054139345884 2023-01-24 06:17:39.323298: step: 66/470, loss: 0.00021434202790260315 2023-01-24 06:17:40.074918: step: 68/470, loss: 0.00011843130050692707 2023-01-24 06:17:40.830921: step: 70/470, loss: 0.044022075831890106 2023-01-24 06:17:41.594228: step: 72/470, loss: 0.060967884957790375 2023-01-24 06:17:42.360057: step: 74/470, loss: 0.0004368863010313362 2023-01-24 06:17:43.066999: step: 76/470, loss: 0.017974242568016052 2023-01-24 06:17:43.778283: step: 78/470, loss: 0.004248203709721565 2023-01-24 06:17:44.486108: step: 80/470, loss: 0.00158304488286376 2023-01-24 06:17:45.176992: step: 82/470, loss: 0.007524224929511547 2023-01-24 06:17:45.830367: step: 84/470, loss: 0.005340985022485256 2023-01-24 06:17:46.501926: step: 86/470, loss: 0.029072092846035957 2023-01-24 06:17:47.240984: step: 88/470, loss: 0.023193301633000374 2023-01-24 06:17:47.990852: step: 90/470, loss: 9.126600343734026e-05 2023-01-24 06:17:48.798385: step: 92/470, loss: 0.0002512194332666695 2023-01-24 06:17:49.538585: step: 94/470, loss: 0.4563930332660675 2023-01-24 06:17:50.293745: step: 96/470, loss: 0.16592247784137726 2023-01-24 06:17:51.089043: step: 98/470, loss: 0.013607371598482132 2023-01-24 06:17:51.768530: step: 100/470, loss: 0.006993814371526241 2023-01-24 06:17:52.446492: step: 102/470, loss: 0.005480475723743439 2023-01-24 06:17:53.191806: step: 104/470, loss: 0.0007254070951603353 2023-01-24 06:17:53.941712: step: 106/470, loss: 0.0031888075172901154 2023-01-24 06:17:54.696154: step: 108/470, loss: 0.0031884340569376945 2023-01-24 06:17:55.390185: step: 110/470, loss: 0.0006328716408461332 2023-01-24 06:17:56.206112: step: 112/470, loss: 0.012160047888755798 2023-01-24 06:17:56.969260: step: 114/470, loss: 0.004218654707074165 2023-01-24 06:17:57.700212: step: 116/470, loss: 0.001394663704559207 2023-01-24 06:17:58.503250: step: 118/470, loss: 0.002457190537825227 2023-01-24 06:17:59.242780: step: 120/470, loss: 0.14293168485164642 2023-01-24 06:17:59.947757: step: 122/470, loss: 0.047695934772491455 2023-01-24 06:18:00.695903: step: 124/470, loss: 0.003226999891921878 2023-01-24 06:18:01.375338: step: 126/470, loss: 0.001519688288681209 2023-01-24 06:18:02.103203: step: 128/470, loss: 0.004431163426488638 2023-01-24 06:18:02.850377: step: 130/470, loss: 0.004296420607715845 2023-01-24 06:18:03.519714: step: 132/470, loss: 0.0012974428245797753 2023-01-24 06:18:04.260689: step: 134/470, loss: 0.027212653309106827 2023-01-24 06:18:04.953132: step: 136/470, loss: 0.02067440375685692 2023-01-24 06:18:05.646365: step: 138/470, loss: 0.0018030147766694427 2023-01-24 06:18:06.411720: step: 140/470, loss: 0.5232383608818054 2023-01-24 06:18:07.137905: step: 142/470, loss: 0.002090280409902334 2023-01-24 06:18:07.950374: step: 144/470, loss: 0.03019622713327408 2023-01-24 06:18:08.727438: step: 146/470, loss: 0.06532511860132217 2023-01-24 06:18:09.458335: step: 148/470, loss: 0.0835709348320961 2023-01-24 06:18:10.167674: step: 150/470, loss: 0.002398706041276455 2023-01-24 06:18:10.836075: step: 152/470, loss: 0.0007716089021414518 2023-01-24 06:18:11.537669: step: 154/470, loss: 0.04274653270840645 2023-01-24 06:18:12.247054: step: 156/470, loss: 0.027724282816052437 2023-01-24 06:18:12.999934: step: 158/470, loss: 0.0108801806345582 2023-01-24 06:18:13.691751: step: 160/470, loss: 0.0027062701992690563 2023-01-24 06:18:14.438217: step: 162/470, loss: 0.001326837227679789 2023-01-24 06:18:15.261718: step: 164/470, loss: 0.000980255426838994 2023-01-24 06:18:15.951195: step: 166/470, loss: 0.004368356894701719 2023-01-24 06:18:16.702977: step: 168/470, loss: 0.0013895826414227486 2023-01-24 06:18:17.404891: step: 170/470, loss: 0.002898984821513295 2023-01-24 06:18:18.181295: step: 172/470, loss: 0.06322634965181351 2023-01-24 06:18:18.872333: step: 174/470, loss: 0.00011356957111274824 2023-01-24 06:18:19.657516: step: 176/470, loss: 0.018166067078709602 2023-01-24 06:18:20.386438: step: 178/470, loss: 0.011182754300534725 2023-01-24 06:18:21.077853: step: 180/470, loss: 0.01945425756275654 2023-01-24 06:18:21.879607: step: 182/470, loss: 0.004951994400471449 2023-01-24 06:18:22.667955: step: 184/470, loss: 0.27803167700767517 2023-01-24 06:18:23.422666: step: 186/470, loss: 0.014578554779291153 2023-01-24 06:18:24.202829: step: 188/470, loss: 0.026163099333643913 2023-01-24 06:18:24.953107: step: 190/470, loss: 0.016671251505613327 2023-01-24 06:18:25.657035: step: 192/470, loss: 0.0024631675332784653 2023-01-24 06:18:26.332021: step: 194/470, loss: 0.017945973202586174 2023-01-24 06:18:27.107380: step: 196/470, loss: 0.019402913749217987 2023-01-24 06:18:27.854312: step: 198/470, loss: 0.009785197675228119 2023-01-24 06:18:28.657532: step: 200/470, loss: 0.011927827261388302 2023-01-24 06:18:29.431715: step: 202/470, loss: 0.015688994899392128 2023-01-24 06:18:30.134839: step: 204/470, loss: 0.5554424524307251 2023-01-24 06:18:30.834524: step: 206/470, loss: 0.004341977182775736 2023-01-24 06:18:31.541311: step: 208/470, loss: 2.5874245693557896e-05 2023-01-24 06:18:32.358412: step: 210/470, loss: 0.03483826667070389 2023-01-24 06:18:33.079118: step: 212/470, loss: 0.004074705298990011 2023-01-24 06:18:33.792617: step: 214/470, loss: 0.003476059529930353 2023-01-24 06:18:34.534163: step: 216/470, loss: 0.00816492922604084 2023-01-24 06:18:35.218523: step: 218/470, loss: 0.022160356864333153 2023-01-24 06:18:35.961816: step: 220/470, loss: 0.008341852575540543 2023-01-24 06:18:36.813717: step: 222/470, loss: 0.0019108172273263335 2023-01-24 06:18:37.532023: step: 224/470, loss: 1.008707046508789 2023-01-24 06:18:38.261540: step: 226/470, loss: 0.00015085958875715733 2023-01-24 06:18:39.030837: step: 228/470, loss: 0.004038154147565365 2023-01-24 06:18:39.857285: step: 230/470, loss: 0.005438428372144699 2023-01-24 06:18:40.639547: step: 232/470, loss: 0.0011504045687615871 2023-01-24 06:18:41.369934: step: 234/470, loss: 0.02474108338356018 2023-01-24 06:18:42.152681: step: 236/470, loss: 0.04079394042491913 2023-01-24 06:18:42.950053: step: 238/470, loss: 0.008551133796572685 2023-01-24 06:18:43.654786: step: 240/470, loss: 0.00603491673246026 2023-01-24 06:18:44.407733: step: 242/470, loss: 0.004793131723999977 2023-01-24 06:18:45.244816: step: 244/470, loss: 0.0072413296438753605 2023-01-24 06:18:45.914369: step: 246/470, loss: 0.023858316242694855 2023-01-24 06:18:46.624681: step: 248/470, loss: 0.008859733119606972 2023-01-24 06:18:47.288209: step: 250/470, loss: 0.006357423961162567 2023-01-24 06:18:47.975627: step: 252/470, loss: 0.024346143007278442 2023-01-24 06:18:48.682855: step: 254/470, loss: 0.018336040899157524 2023-01-24 06:18:49.470520: step: 256/470, loss: 0.010750774294137955 2023-01-24 06:18:50.179785: step: 258/470, loss: 0.0013539177598431706 2023-01-24 06:18:50.987622: step: 260/470, loss: 0.06712424010038376 2023-01-24 06:18:51.701516: step: 262/470, loss: 0.015392746776342392 2023-01-24 06:18:52.526864: step: 264/470, loss: 0.011326344683766365 2023-01-24 06:18:53.247178: step: 266/470, loss: 0.03263202682137489 2023-01-24 06:18:54.006931: step: 268/470, loss: 0.019025059416890144 2023-01-24 06:18:54.766737: step: 270/470, loss: 0.004486290272325277 2023-01-24 06:18:55.554292: step: 272/470, loss: 0.028263265267014503 2023-01-24 06:18:56.260115: step: 274/470, loss: 0.06688546389341354 2023-01-24 06:18:57.039164: step: 276/470, loss: 0.0005725919036194682 2023-01-24 06:18:57.721681: step: 278/470, loss: 0.0008113943040370941 2023-01-24 06:18:58.398705: step: 280/470, loss: 0.00557939475402236 2023-01-24 06:18:59.209967: step: 282/470, loss: 0.026322869583964348 2023-01-24 06:18:59.932455: step: 284/470, loss: 0.0053300210274755955 2023-01-24 06:19:00.705824: step: 286/470, loss: 0.006512013729661703 2023-01-24 06:19:01.371695: step: 288/470, loss: 0.006800213363021612 2023-01-24 06:19:02.059635: step: 290/470, loss: 0.006016398314386606 2023-01-24 06:19:02.834357: step: 292/470, loss: 0.009520920924842358 2023-01-24 06:19:03.625870: step: 294/470, loss: 0.0036107206251472235 2023-01-24 06:19:04.383813: step: 296/470, loss: 0.020766550675034523 2023-01-24 06:19:05.128869: step: 298/470, loss: 0.005723374895751476 2023-01-24 06:19:05.880463: step: 300/470, loss: 0.03173263370990753 2023-01-24 06:19:06.658920: step: 302/470, loss: 0.029550369828939438 2023-01-24 06:19:07.366509: step: 304/470, loss: 0.0009951787069439888 2023-01-24 06:19:08.135757: step: 306/470, loss: 0.04182833433151245 2023-01-24 06:19:09.117205: step: 308/470, loss: 0.03832540661096573 2023-01-24 06:19:09.866777: step: 310/470, loss: 0.11555290967226028 2023-01-24 06:19:10.630486: step: 312/470, loss: 0.02390245907008648 2023-01-24 06:19:11.339783: step: 314/470, loss: 0.0017948574386537075 2023-01-24 06:19:12.104898: step: 316/470, loss: 0.0034852821845561266 2023-01-24 06:19:12.805234: step: 318/470, loss: 0.25961652398109436 2023-01-24 06:19:13.568291: step: 320/470, loss: 0.052189331501722336 2023-01-24 06:19:14.340001: step: 322/470, loss: 0.01036821585148573 2023-01-24 06:19:15.046439: step: 324/470, loss: 0.016944773495197296 2023-01-24 06:19:15.714814: step: 326/470, loss: 1.836652882047929e-05 2023-01-24 06:19:16.442762: step: 328/470, loss: 0.0037974936421960592 2023-01-24 06:19:17.089687: step: 330/470, loss: 0.00011987396283075213 2023-01-24 06:19:17.769875: step: 332/470, loss: 0.007104361429810524 2023-01-24 06:19:18.442318: step: 334/470, loss: 0.009579218924045563 2023-01-24 06:19:19.205917: step: 336/470, loss: 0.0007889914559200406 2023-01-24 06:19:19.893724: step: 338/470, loss: 0.003870560321956873 2023-01-24 06:19:20.661002: step: 340/470, loss: 0.0053961933590471745 2023-01-24 06:19:21.353675: step: 342/470, loss: 0.0044841766357421875 2023-01-24 06:19:22.030598: step: 344/470, loss: 4.063077722094022e-05 2023-01-24 06:19:22.742738: step: 346/470, loss: 0.001006243284791708 2023-01-24 06:19:23.407211: step: 348/470, loss: 0.007106869947165251 2023-01-24 06:19:24.050914: step: 350/470, loss: 0.0015916344709694386 2023-01-24 06:19:24.830575: step: 352/470, loss: 0.027905944734811783 2023-01-24 06:19:25.562252: step: 354/470, loss: 0.003524521365761757 2023-01-24 06:19:26.322938: step: 356/470, loss: 0.0009450623765587807 2023-01-24 06:19:27.018042: step: 358/470, loss: 0.00697721540927887 2023-01-24 06:19:27.728345: step: 360/470, loss: 0.0005012876354157925 2023-01-24 06:19:28.361238: step: 362/470, loss: 4.1443989175604656e-05 2023-01-24 06:19:29.055751: step: 364/470, loss: 0.04660176858305931 2023-01-24 06:19:29.725600: step: 366/470, loss: 2.2212430849322118e-05 2023-01-24 06:19:30.441048: step: 368/470, loss: 0.0106153329834342 2023-01-24 06:19:31.106125: step: 370/470, loss: 0.0111685274168849 2023-01-24 06:19:31.861474: step: 372/470, loss: 0.0011537930695340037 2023-01-24 06:19:32.531546: step: 374/470, loss: 0.06961429864168167 2023-01-24 06:19:33.311507: step: 376/470, loss: 0.0024143445771187544 2023-01-24 06:19:34.037322: step: 378/470, loss: 0.009551659226417542 2023-01-24 06:19:34.753745: step: 380/470, loss: 1.0005992650985718 2023-01-24 06:19:35.567786: step: 382/470, loss: 0.002678003627806902 2023-01-24 06:19:36.384899: step: 384/470, loss: 0.049086350947618484 2023-01-24 06:19:37.203071: step: 386/470, loss: 0.010009855031967163 2023-01-24 06:19:37.978794: step: 388/470, loss: 0.0012464200844988227 2023-01-24 06:19:38.691743: step: 390/470, loss: 4.9415495595894754e-05 2023-01-24 06:19:39.351955: step: 392/470, loss: 0.003669227007776499 2023-01-24 06:19:40.060031: step: 394/470, loss: 0.003987874835729599 2023-01-24 06:19:40.766518: step: 396/470, loss: 0.0017396406037732959 2023-01-24 06:19:41.459359: step: 398/470, loss: 0.025890368968248367 2023-01-24 06:19:42.170563: step: 400/470, loss: 0.01730758510529995 2023-01-24 06:19:42.953694: step: 402/470, loss: 0.004256324376910925 2023-01-24 06:19:43.679951: step: 404/470, loss: 0.24290981888771057 2023-01-24 06:19:44.334923: step: 406/470, loss: 3.430567085160874e-05 2023-01-24 06:19:45.062311: step: 408/470, loss: 0.01224263571202755 2023-01-24 06:19:45.810409: step: 410/470, loss: 0.0011447453871369362 2023-01-24 06:19:46.528699: step: 412/470, loss: 0.024092786014080048 2023-01-24 06:19:47.276464: step: 414/470, loss: 0.0011776899918913841 2023-01-24 06:19:48.019926: step: 416/470, loss: 0.006173381116241217 2023-01-24 06:19:48.743367: step: 418/470, loss: 0.007338505703955889 2023-01-24 06:19:49.466371: step: 420/470, loss: 0.002101232297718525 2023-01-24 06:19:50.177926: step: 422/470, loss: 0.0022618654184043407 2023-01-24 06:19:50.966950: step: 424/470, loss: 0.0012441343860700727 2023-01-24 06:19:51.864410: step: 426/470, loss: 0.0695013627409935 2023-01-24 06:19:52.591103: step: 428/470, loss: 0.23386645317077637 2023-01-24 06:19:53.360994: step: 430/470, loss: 0.0643640011548996 2023-01-24 06:19:54.064248: step: 432/470, loss: 0.00014748272951692343 2023-01-24 06:19:54.778900: step: 434/470, loss: 0.055859826505184174 2023-01-24 06:19:55.506270: step: 436/470, loss: 0.000146715174196288 2023-01-24 06:19:56.215200: step: 438/470, loss: 0.005512524861842394 2023-01-24 06:19:56.922584: step: 440/470, loss: 0.04210149496793747 2023-01-24 06:19:57.656587: step: 442/470, loss: 0.0007326016202569008 2023-01-24 06:19:58.474341: step: 444/470, loss: 0.017042549327015877 2023-01-24 06:19:59.219754: step: 446/470, loss: 0.0027442320715636015 2023-01-24 06:19:59.926840: step: 448/470, loss: 0.000428998377174139 2023-01-24 06:20:00.650570: step: 450/470, loss: 0.009314804337918758 2023-01-24 06:20:01.393211: step: 452/470, loss: 0.0005936333909630775 2023-01-24 06:20:02.171436: step: 454/470, loss: 0.018571410328149796 2023-01-24 06:20:02.867720: step: 456/470, loss: 0.0180160254240036 2023-01-24 06:20:03.587771: step: 458/470, loss: 0.02957836724817753 2023-01-24 06:20:04.291739: step: 460/470, loss: 0.002465310040861368 2023-01-24 06:20:05.024161: step: 462/470, loss: 0.0018986280774697661 2023-01-24 06:20:05.797284: step: 464/470, loss: 0.06108655408024788 2023-01-24 06:20:06.528671: step: 466/470, loss: 0.0003646984987426549 2023-01-24 06:20:07.276726: step: 468/470, loss: 0.017292622476816177 2023-01-24 06:20:07.990804: step: 470/470, loss: 0.01416701264679432 2023-01-24 06:20:08.716068: step: 472/470, loss: 0.00011536870442796499 2023-01-24 06:20:09.443734: step: 474/470, loss: 0.002867184579372406 2023-01-24 06:20:10.292310: step: 476/470, loss: 0.03824557363986969 2023-01-24 06:20:10.999220: step: 478/470, loss: 0.001631454681046307 2023-01-24 06:20:11.717510: step: 480/470, loss: 0.4666403532028198 2023-01-24 06:20:12.475009: step: 482/470, loss: 0.0006429323111660779 2023-01-24 06:20:13.181200: step: 484/470, loss: 0.07074841111898422 2023-01-24 06:20:13.852370: step: 486/470, loss: 0.00920712761580944 2023-01-24 06:20:14.570804: step: 488/470, loss: 0.027392836287617683 2023-01-24 06:20:15.287790: step: 490/470, loss: 0.0031878724694252014 2023-01-24 06:20:15.960648: step: 492/470, loss: 0.009520125575363636 2023-01-24 06:20:16.686923: step: 494/470, loss: 0.014723233878612518 2023-01-24 06:20:17.493541: step: 496/470, loss: 0.01308779139071703 2023-01-24 06:20:18.218317: step: 498/470, loss: 0.017647597938776016 2023-01-24 06:20:18.998814: step: 500/470, loss: 0.0010442689526826143 2023-01-24 06:20:19.738440: step: 502/470, loss: 0.0007168474257923663 2023-01-24 06:20:20.500671: step: 504/470, loss: 0.0017317109741270542 2023-01-24 06:20:21.216253: step: 506/470, loss: 0.0012164206709712744 2023-01-24 06:20:21.977519: step: 508/470, loss: 0.0004420246696099639 2023-01-24 06:20:22.662434: step: 510/470, loss: 0.0015642930520698428 2023-01-24 06:20:23.368929: step: 512/470, loss: 0.020066358149051666 2023-01-24 06:20:24.217883: step: 514/470, loss: 0.3007548451423645 2023-01-24 06:20:24.985339: step: 516/470, loss: 0.4788682460784912 2023-01-24 06:20:25.735903: step: 518/470, loss: 0.011519278399646282 2023-01-24 06:20:26.469418: step: 520/470, loss: 0.01719200238585472 2023-01-24 06:20:27.141332: step: 522/470, loss: 0.01048339158296585 2023-01-24 06:20:27.878508: step: 524/470, loss: 0.020729506388306618 2023-01-24 06:20:28.554858: step: 526/470, loss: 0.020677419379353523 2023-01-24 06:20:29.283506: step: 528/470, loss: 0.18149332702159882 2023-01-24 06:20:30.020426: step: 530/470, loss: 0.013467920944094658 2023-01-24 06:20:30.753984: step: 532/470, loss: 0.02106170728802681 2023-01-24 06:20:31.454180: step: 534/470, loss: 0.0007394176791422069 2023-01-24 06:20:32.187915: step: 536/470, loss: 0.04561655968427658 2023-01-24 06:20:33.001520: step: 538/470, loss: 0.01662643626332283 2023-01-24 06:20:33.731496: step: 540/470, loss: 0.001894438057206571 2023-01-24 06:20:34.398638: step: 542/470, loss: 0.00037416958366520703 2023-01-24 06:20:35.157155: step: 544/470, loss: 0.0077200643718242645 2023-01-24 06:20:35.886026: step: 546/470, loss: 0.010255703702569008 2023-01-24 06:20:36.637981: step: 548/470, loss: 0.053163353353738785 2023-01-24 06:20:37.386824: step: 550/470, loss: 0.039663802832365036 2023-01-24 06:20:38.194976: step: 552/470, loss: 0.00014281453331932425 2023-01-24 06:20:38.883972: step: 554/470, loss: 0.12468361854553223 2023-01-24 06:20:39.673322: step: 556/470, loss: 0.0031782027799636126 2023-01-24 06:20:40.454241: step: 558/470, loss: 0.25668269395828247 2023-01-24 06:20:41.272216: step: 560/470, loss: 0.005150767974555492 2023-01-24 06:20:41.944676: step: 562/470, loss: 0.0024627491366118193 2023-01-24 06:20:42.651147: step: 564/470, loss: 0.005648725666105747 2023-01-24 06:20:43.341415: step: 566/470, loss: 0.014787226915359497 2023-01-24 06:20:44.157453: step: 568/470, loss: 0.18614830076694489 2023-01-24 06:20:44.828100: step: 570/470, loss: 0.0038616762030869722 2023-01-24 06:20:45.534801: step: 572/470, loss: 0.003033594461157918 2023-01-24 06:20:46.221490: step: 574/470, loss: 0.03422601893544197 2023-01-24 06:20:47.036630: step: 576/470, loss: 0.008382627740502357 2023-01-24 06:20:47.769004: step: 578/470, loss: 0.00314133008942008 2023-01-24 06:20:48.456393: step: 580/470, loss: 0.0034324736334383488 2023-01-24 06:20:49.162866: step: 582/470, loss: 0.002011174103245139 2023-01-24 06:20:49.861959: step: 584/470, loss: 0.07423926889896393 2023-01-24 06:20:50.564720: step: 586/470, loss: 0.00035503433900885284 2023-01-24 06:20:51.291493: step: 588/470, loss: 0.015882398933172226 2023-01-24 06:20:52.048333: step: 590/470, loss: 0.03521262854337692 2023-01-24 06:20:52.745518: step: 592/470, loss: 0.06739047169685364 2023-01-24 06:20:53.427639: step: 594/470, loss: 0.001243374776095152 2023-01-24 06:20:54.200031: step: 596/470, loss: 0.0024422684218734503 2023-01-24 06:20:54.969220: step: 598/470, loss: 0.009328119456768036 2023-01-24 06:20:55.681495: step: 600/470, loss: 6.515645509352908e-05 2023-01-24 06:20:56.295576: step: 602/470, loss: 0.009061809629201889 2023-01-24 06:20:56.917763: step: 604/470, loss: 0.0029441039077937603 2023-01-24 06:20:57.663166: step: 606/470, loss: 0.030784372240304947 2023-01-24 06:20:58.378816: step: 608/470, loss: 0.009940296411514282 2023-01-24 06:20:59.079833: step: 610/470, loss: 0.010526066645979881 2023-01-24 06:20:59.901911: step: 612/470, loss: 0.0019004541682079434 2023-01-24 06:21:00.620957: step: 614/470, loss: 0.0026740499306470156 2023-01-24 06:21:01.394976: step: 616/470, loss: 0.0034406818449497223 2023-01-24 06:21:02.172700: step: 618/470, loss: 0.00043804876622743905 2023-01-24 06:21:02.912726: step: 620/470, loss: 0.017875712364912033 2023-01-24 06:21:03.616779: step: 622/470, loss: 0.0026596703100949526 2023-01-24 06:21:04.301890: step: 624/470, loss: 0.0004920915816910565 2023-01-24 06:21:05.027856: step: 626/470, loss: 0.016695676371455193 2023-01-24 06:21:05.797926: step: 628/470, loss: 0.0026145961601287127 2023-01-24 06:21:06.678926: step: 630/470, loss: 0.026421984657645226 2023-01-24 06:21:07.373007: step: 632/470, loss: 0.0005286968080326915 2023-01-24 06:21:08.180555: step: 634/470, loss: 0.06427496671676636 2023-01-24 06:21:08.875701: step: 636/470, loss: 0.0009096733992919326 2023-01-24 06:21:09.633641: step: 638/470, loss: 0.021214094012975693 2023-01-24 06:21:10.295683: step: 640/470, loss: 0.004757567774504423 2023-01-24 06:21:11.056089: step: 642/470, loss: 0.028353553265333176 2023-01-24 06:21:11.941878: step: 644/470, loss: 0.024054067209362984 2023-01-24 06:21:12.761275: step: 646/470, loss: 2.8277341698412783e-05 2023-01-24 06:21:13.494504: step: 648/470, loss: 0.01686178334057331 2023-01-24 06:21:14.162632: step: 650/470, loss: 3.088486846536398e-05 2023-01-24 06:21:14.964425: step: 652/470, loss: 0.0076260752975940704 2023-01-24 06:21:15.689756: step: 654/470, loss: 0.0021921610459685326 2023-01-24 06:21:16.461173: step: 656/470, loss: 0.015146718360483646 2023-01-24 06:21:17.231254: step: 658/470, loss: 1.6248441934585571 2023-01-24 06:21:17.899662: step: 660/470, loss: 0.0009182182257063687 2023-01-24 06:21:18.600583: step: 662/470, loss: 0.022168146446347237 2023-01-24 06:21:19.383645: step: 664/470, loss: 0.009737711399793625 2023-01-24 06:21:20.075007: step: 666/470, loss: 0.05892786756157875 2023-01-24 06:21:20.801350: step: 668/470, loss: 0.04346398264169693 2023-01-24 06:21:21.575048: step: 670/470, loss: 0.028981253504753113 2023-01-24 06:21:22.298478: step: 672/470, loss: 0.00941953994333744 2023-01-24 06:21:23.062757: step: 674/470, loss: 0.07243026047945023 2023-01-24 06:21:23.802471: step: 676/470, loss: 0.02073572389781475 2023-01-24 06:21:24.564438: step: 678/470, loss: 0.0022810224909335375 2023-01-24 06:21:25.257458: step: 680/470, loss: 0.09601109474897385 2023-01-24 06:21:25.951566: step: 682/470, loss: 0.003557687159627676 2023-01-24 06:21:26.647839: step: 684/470, loss: 0.0015952467219904065 2023-01-24 06:21:27.385730: step: 686/470, loss: 0.0002099236153298989 2023-01-24 06:21:28.112930: step: 688/470, loss: 0.376717746257782 2023-01-24 06:21:28.910633: step: 690/470, loss: 0.004120633937418461 2023-01-24 06:21:29.697104: step: 692/470, loss: 0.0018780836835503578 2023-01-24 06:21:30.562474: step: 694/470, loss: 0.5812046527862549 2023-01-24 06:21:31.272656: step: 696/470, loss: 0.002286061178892851 2023-01-24 06:21:32.030901: step: 698/470, loss: 0.013838349841535091 2023-01-24 06:21:32.763622: step: 700/470, loss: 0.0184122733771801 2023-01-24 06:21:33.440162: step: 702/470, loss: 0.0008420947706326842 2023-01-24 06:21:34.106060: step: 704/470, loss: 0.08517462015151978 2023-01-24 06:21:34.804041: step: 706/470, loss: 0.0022694526705890894 2023-01-24 06:21:35.658491: step: 708/470, loss: 0.029612381011247635 2023-01-24 06:21:36.333388: step: 710/470, loss: 0.0010812204563990235 2023-01-24 06:21:37.130930: step: 712/470, loss: 0.0535753071308136 2023-01-24 06:21:37.872849: step: 714/470, loss: 0.008847307413816452 2023-01-24 06:21:38.638967: step: 716/470, loss: 0.03432611748576164 2023-01-24 06:21:39.347742: step: 718/470, loss: 0.006384172476828098 2023-01-24 06:21:40.104813: step: 720/470, loss: 0.004302928224205971 2023-01-24 06:21:40.833620: step: 722/470, loss: 0.01254085823893547 2023-01-24 06:21:41.618075: step: 724/470, loss: 0.007098275702446699 2023-01-24 06:21:42.331231: step: 726/470, loss: 0.018687281757593155 2023-01-24 06:21:43.061793: step: 728/470, loss: 0.08355188369750977 2023-01-24 06:21:43.772224: step: 730/470, loss: 0.00029057872598059475 2023-01-24 06:21:44.531832: step: 732/470, loss: 0.06628356873989105 2023-01-24 06:21:45.214936: step: 734/470, loss: 0.04160122945904732 2023-01-24 06:21:46.010386: step: 736/470, loss: 0.005398217123001814 2023-01-24 06:21:46.797306: step: 738/470, loss: 0.00326385535299778 2023-01-24 06:21:47.595319: step: 740/470, loss: 0.10454016923904419 2023-01-24 06:21:48.322230: step: 742/470, loss: 0.00869603082537651 2023-01-24 06:21:49.085666: step: 744/470, loss: 0.01859329640865326 2023-01-24 06:21:49.827108: step: 746/470, loss: 0.0023927935399115086 2023-01-24 06:21:50.591394: step: 748/470, loss: 0.01185494102537632 2023-01-24 06:21:51.240040: step: 750/470, loss: 0.0039005994331091642 2023-01-24 06:21:51.878042: step: 752/470, loss: 0.001562373130582273 2023-01-24 06:21:52.570048: step: 754/470, loss: 0.14783407747745514 2023-01-24 06:21:53.376798: step: 756/470, loss: 0.0047768522053956985 2023-01-24 06:21:54.144079: step: 758/470, loss: 0.005835649557411671 2023-01-24 06:21:54.915683: step: 760/470, loss: 0.06568383425474167 2023-01-24 06:21:55.693540: step: 762/470, loss: 0.17401570081710815 2023-01-24 06:21:56.417349: step: 764/470, loss: 0.0028607286512851715 2023-01-24 06:21:57.091384: step: 766/470, loss: 0.03020673617720604 2023-01-24 06:21:57.836080: step: 768/470, loss: 0.2988532483577728 2023-01-24 06:21:58.639657: step: 770/470, loss: 0.2755778729915619 2023-01-24 06:21:59.370346: step: 772/470, loss: 0.07244252413511276 2023-01-24 06:22:00.144635: step: 774/470, loss: 0.030048388987779617 2023-01-24 06:22:00.845904: step: 776/470, loss: 0.18164348602294922 2023-01-24 06:22:01.592767: step: 778/470, loss: 0.001993674086406827 2023-01-24 06:22:02.372819: step: 780/470, loss: 0.003424519905820489 2023-01-24 06:22:03.283956: step: 782/470, loss: 0.01581845059990883 2023-01-24 06:22:04.050970: step: 784/470, loss: 0.03147071599960327 2023-01-24 06:22:04.766284: step: 786/470, loss: 0.016147736459970474 2023-01-24 06:22:05.485330: step: 788/470, loss: 0.0013450286351144314 2023-01-24 06:22:06.189565: step: 790/470, loss: 0.013478526845574379 2023-01-24 06:22:06.921931: step: 792/470, loss: 0.003012130269780755 2023-01-24 06:22:07.670883: step: 794/470, loss: 0.010243113152682781 2023-01-24 06:22:08.407132: step: 796/470, loss: 0.030851509422063828 2023-01-24 06:22:09.175408: step: 798/470, loss: 0.015330376103520393 2023-01-24 06:22:09.940333: step: 800/470, loss: 0.0013428201200440526 2023-01-24 06:22:10.665188: step: 802/470, loss: 0.0035743422340601683 2023-01-24 06:22:11.400884: step: 804/470, loss: 0.05729028955101967 2023-01-24 06:22:12.173523: step: 806/470, loss: 0.002276189159601927 2023-01-24 06:22:12.876145: step: 808/470, loss: 0.002368086948990822 2023-01-24 06:22:13.654065: step: 810/470, loss: 0.06536681950092316 2023-01-24 06:22:14.362752: step: 812/470, loss: 0.0029183574952185154 2023-01-24 06:22:15.109766: step: 814/470, loss: 0.0018473371164873242 2023-01-24 06:22:15.870509: step: 816/470, loss: 0.013988809660077095 2023-01-24 06:22:16.733428: step: 818/470, loss: 0.021747639402747154 2023-01-24 06:22:17.413348: step: 820/470, loss: 0.0005320486379787326 2023-01-24 06:22:18.138338: step: 822/470, loss: 0.026045726612210274 2023-01-24 06:22:18.807722: step: 824/470, loss: 0.009219018742442131 2023-01-24 06:22:19.589011: step: 826/470, loss: 0.0010474611772224307 2023-01-24 06:22:20.331629: step: 828/470, loss: 0.00877501629292965 2023-01-24 06:22:21.038156: step: 830/470, loss: 0.0060637411661446095 2023-01-24 06:22:21.802101: step: 832/470, loss: 0.02464357018470764 2023-01-24 06:22:22.586049: step: 834/470, loss: 0.00776535551995039 2023-01-24 06:22:23.269107: step: 836/470, loss: 0.053323835134506226 2023-01-24 06:22:23.982719: step: 838/470, loss: 0.0020373347215354443 2023-01-24 06:22:24.761219: step: 840/470, loss: 0.3750033974647522 2023-01-24 06:22:25.475939: step: 842/470, loss: 0.042009808123111725 2023-01-24 06:22:26.178559: step: 844/470, loss: 0.02685542218387127 2023-01-24 06:22:26.858637: step: 846/470, loss: 0.0003366958990227431 2023-01-24 06:22:27.556669: step: 848/470, loss: 0.036231983453035355 2023-01-24 06:22:28.258923: step: 850/470, loss: 0.01380055584013462 2023-01-24 06:22:29.059778: step: 852/470, loss: 0.04030189290642738 2023-01-24 06:22:29.781724: step: 854/470, loss: 0.007406915538012981 2023-01-24 06:22:30.448140: step: 856/470, loss: 0.0028321263380348682 2023-01-24 06:22:31.159017: step: 858/470, loss: 0.0024023745208978653 2023-01-24 06:22:31.851324: step: 860/470, loss: 0.045990705490112305 2023-01-24 06:22:32.586382: step: 862/470, loss: 0.0008657873258925974 2023-01-24 06:22:33.380221: step: 864/470, loss: 0.03380803018808365 2023-01-24 06:22:34.155176: step: 866/470, loss: 0.016098463907837868 2023-01-24 06:22:34.833589: step: 868/470, loss: 0.41227632761001587 2023-01-24 06:22:35.517095: step: 870/470, loss: 0.0017065646825358272 2023-01-24 06:22:36.300513: step: 872/470, loss: 0.002425319282338023 2023-01-24 06:22:37.060318: step: 874/470, loss: 0.003872190834954381 2023-01-24 06:22:37.775672: step: 876/470, loss: 0.0007468942785635591 2023-01-24 06:22:38.646085: step: 878/470, loss: 0.008748779073357582 2023-01-24 06:22:39.342346: step: 880/470, loss: 0.18740367889404297 2023-01-24 06:22:40.024634: step: 882/470, loss: 0.006937297526746988 2023-01-24 06:22:40.726401: step: 884/470, loss: 0.001644572359509766 2023-01-24 06:22:41.477060: step: 886/470, loss: 0.03953949362039566 2023-01-24 06:22:42.150102: step: 888/470, loss: 0.0010495680617168546 2023-01-24 06:22:42.875013: step: 890/470, loss: 0.004453408066183329 2023-01-24 06:22:43.547095: step: 892/470, loss: 0.03644545376300812 2023-01-24 06:22:44.221276: step: 894/470, loss: 0.0026314761489629745 2023-01-24 06:22:44.959893: step: 896/470, loss: 0.011988071724772453 2023-01-24 06:22:45.729745: step: 898/470, loss: 1.0937950611114502 2023-01-24 06:22:46.454001: step: 900/470, loss: 0.01913582533597946 2023-01-24 06:22:47.219731: step: 902/470, loss: 0.002004404319450259 2023-01-24 06:22:47.966462: step: 904/470, loss: 0.02826942503452301 2023-01-24 06:22:48.645376: step: 906/470, loss: 0.005874123424291611 2023-01-24 06:22:49.364296: step: 908/470, loss: 0.00016057485481724143 2023-01-24 06:22:50.115082: step: 910/470, loss: 0.02106778882443905 2023-01-24 06:22:50.843006: step: 912/470, loss: 0.011593667790293694 2023-01-24 06:22:51.542795: step: 914/470, loss: 0.006512309890240431 2023-01-24 06:22:52.361449: step: 916/470, loss: 0.022902552038431168 2023-01-24 06:22:53.152726: step: 918/470, loss: 0.00646227365359664 2023-01-24 06:22:53.834429: step: 920/470, loss: 0.022378139197826385 2023-01-24 06:22:54.523485: step: 922/470, loss: 0.027927104383707047 2023-01-24 06:22:55.283616: step: 924/470, loss: 0.0021522322203963995 2023-01-24 06:22:56.016138: step: 926/470, loss: 0.013384867459535599 2023-01-24 06:22:56.801367: step: 928/470, loss: 0.001673889346420765 2023-01-24 06:22:57.593528: step: 930/470, loss: 0.3148545026779175 2023-01-24 06:22:58.315512: step: 932/470, loss: 0.03243735432624817 2023-01-24 06:22:58.997118: step: 934/470, loss: 0.008292516693472862 2023-01-24 06:22:59.795714: step: 936/470, loss: 0.00035324000054970384 2023-01-24 06:23:00.543839: step: 938/470, loss: 0.003821393707767129 2023-01-24 06:23:01.281670: step: 940/470, loss: 0.008569333702325821 2023-01-24 06:23:02.001959: step: 942/470, loss: 0.003781524719670415 ================================================== Loss: 0.043 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35514455782312926, 'r': 0.3302103099304238, 'f1': 0.34222386102917074}, 'combined': 0.2521649502320205, 'epoch': 34} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36324986426135314, 'r': 0.3573121260955425, 'f1': 0.3602565304307942}, 'combined': 0.24017102028719609, 'epoch': 34} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3445066695664782, 'r': 0.3235878585112082, 'f1': 0.3337197679753556}, 'combined': 0.24589877640289357, 'epoch': 34} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.37029654466117873, 'r': 0.3539180436473189, 'f1': 0.3619220898654982}, 'combined': 0.2412813932436654, 'epoch': 34} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33357340616581865, 'r': 0.32534484016931836, 'f1': 0.3294077440331043}, 'combined': 0.24272149560334, 'epoch': 34} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36193447415286695, 'r': 0.3685467385845059, 'f1': 0.36521067949298347}, 'combined': 0.2434737863286556, 'epoch': 34} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2869318181818182, 'r': 0.3607142857142857, 'f1': 0.319620253164557}, 'combined': 0.21308016877637131, 'epoch': 34} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.41304347826086957, 'f1': 0.45238095238095233}, 'combined': 0.3015873015873015, 'epoch': 34} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4572368421052631, 'r': 0.18920145190562612, 'f1': 0.2676508344030808}, 'combined': 0.17843388960205386, 'epoch': 34} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35514455782312926, 'r': 0.3302103099304238, 'f1': 0.34222386102917074}, 'combined': 0.2521649502320205, 'epoch': 34} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36324986426135314, 'r': 0.3573121260955425, 'f1': 0.3602565304307942}, 'combined': 0.24017102028719609, 'epoch': 34} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2869318181818182, 'r': 0.3607142857142857, 'f1': 0.319620253164557}, 'combined': 0.21308016877637131, 'epoch': 34} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3245705997242647, 'r': 0.31533234736019644, 'f1': 0.31988478740870746}, 'combined': 0.2357045801958897, 'epoch': 31} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35151688133309544, 'r': 0.34577093231130446, 'f1': 0.34862023228672484}, 'combined': 0.23241348819114985, 'epoch': 31} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6153846153846154, 'r': 0.27586206896551724, 'f1': 0.380952380952381}, 'combined': 0.25396825396825395, 'epoch': 31} ****************************** Epoch: 35 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:25:41.926488: step: 2/470, loss: 0.02259201370179653 2023-01-24 06:25:42.662789: step: 4/470, loss: 0.002634732285514474 2023-01-24 06:25:43.350767: step: 6/470, loss: 0.01444519218057394 2023-01-24 06:25:44.142240: step: 8/470, loss: 0.007645417936146259 2023-01-24 06:25:44.924449: step: 10/470, loss: 6.119604950072244e-05 2023-01-24 06:25:45.621160: step: 12/470, loss: 0.006398671306669712 2023-01-24 06:25:46.358178: step: 14/470, loss: 1.4735318422317505 2023-01-24 06:25:47.099885: step: 16/470, loss: 0.0003182763757649809 2023-01-24 06:25:47.994867: step: 18/470, loss: 0.014096281491219997 2023-01-24 06:25:48.675305: step: 20/470, loss: 0.14310337603092194 2023-01-24 06:25:49.581576: step: 22/470, loss: 0.00020834009046666324 2023-01-24 06:25:50.312368: step: 24/470, loss: 0.0019535699393600225 2023-01-24 06:25:51.104203: step: 26/470, loss: 0.020988579839468002 2023-01-24 06:25:51.875949: step: 28/470, loss: 0.02080502361059189 2023-01-24 06:25:52.593390: step: 30/470, loss: 0.006822787690907717 2023-01-24 06:25:53.358221: step: 32/470, loss: 0.006888087373226881 2023-01-24 06:25:54.043692: step: 34/470, loss: 0.0055128647945821285 2023-01-24 06:25:54.781875: step: 36/470, loss: 0.0021674029994755983 2023-01-24 06:25:55.478814: step: 38/470, loss: 0.03273850306868553 2023-01-24 06:25:56.219461: step: 40/470, loss: 0.03793059661984444 2023-01-24 06:25:57.172317: step: 42/470, loss: 0.0005464658606797457 2023-01-24 06:25:58.023530: step: 44/470, loss: 7.778897997923195e-05 2023-01-24 06:25:58.716033: step: 46/470, loss: 0.006451576016843319 2023-01-24 06:25:59.491033: step: 48/470, loss: 0.040247056633234024 2023-01-24 06:26:00.202322: step: 50/470, loss: 0.0015183590585365891 2023-01-24 06:26:00.886658: step: 52/470, loss: 0.005788735579699278 2023-01-24 06:26:01.574388: step: 54/470, loss: 0.0011430204613134265 2023-01-24 06:26:02.298962: step: 56/470, loss: 0.0012191730784252286 2023-01-24 06:26:03.021443: step: 58/470, loss: 5.7664259657030925e-05 2023-01-24 06:26:03.924662: step: 60/470, loss: 0.010701543651521206 2023-01-24 06:26:04.607954: step: 62/470, loss: 0.00015700850053690374 2023-01-24 06:26:05.328735: step: 64/470, loss: 0.00028906611260026693 2023-01-24 06:26:06.015684: step: 66/470, loss: 0.00823915097862482 2023-01-24 06:26:06.705892: step: 68/470, loss: 0.028155675157904625 2023-01-24 06:26:07.434164: step: 70/470, loss: 0.0030553205870091915 2023-01-24 06:26:08.072737: step: 72/470, loss: 0.003039710223674774 2023-01-24 06:26:08.785973: step: 74/470, loss: 0.001571728615090251 2023-01-24 06:26:09.449996: step: 76/470, loss: 0.5274283289909363 2023-01-24 06:26:10.167488: step: 78/470, loss: 0.04768010228872299 2023-01-24 06:26:10.888253: step: 80/470, loss: 0.00412197969853878 2023-01-24 06:26:11.607918: step: 82/470, loss: 0.1300504207611084 2023-01-24 06:26:12.291849: step: 84/470, loss: 0.0008046081056818366 2023-01-24 06:26:13.013528: step: 86/470, loss: 0.04525003209710121 2023-01-24 06:26:13.739171: step: 88/470, loss: 0.003112942911684513 2023-01-24 06:26:14.466679: step: 90/470, loss: 0.008025175891816616 2023-01-24 06:26:15.118152: step: 92/470, loss: 0.011606319807469845 2023-01-24 06:26:15.811792: step: 94/470, loss: 0.005697671324014664 2023-01-24 06:26:16.535664: step: 96/470, loss: 0.00157053186558187 2023-01-24 06:26:17.221380: step: 98/470, loss: 0.0029143900610506535 2023-01-24 06:26:17.948836: step: 100/470, loss: 0.0015309115406125784 2023-01-24 06:26:18.678607: step: 102/470, loss: 0.0018614137079566717 2023-01-24 06:26:19.417968: step: 104/470, loss: 0.007174816448241472 2023-01-24 06:26:20.154298: step: 106/470, loss: 0.037225011736154556 2023-01-24 06:26:20.905830: step: 108/470, loss: 0.013661464676260948 2023-01-24 06:26:21.604027: step: 110/470, loss: 0.015359270386397839 2023-01-24 06:26:22.320559: step: 112/470, loss: 0.05280671268701553 2023-01-24 06:26:23.160774: step: 114/470, loss: 0.0423421710729599 2023-01-24 06:26:23.914273: step: 116/470, loss: 0.6965582370758057 2023-01-24 06:26:24.609158: step: 118/470, loss: 0.016391227021813393 2023-01-24 06:26:25.362099: step: 120/470, loss: 0.002424615202471614 2023-01-24 06:26:26.082088: step: 122/470, loss: 0.0439317412674427 2023-01-24 06:26:26.819658: step: 124/470, loss: 0.041528187692165375 2023-01-24 06:26:27.555510: step: 126/470, loss: 0.022909237071871758 2023-01-24 06:26:28.240315: step: 128/470, loss: 0.005902472883462906 2023-01-24 06:26:29.047629: step: 130/470, loss: 0.012232090346515179 2023-01-24 06:26:29.720820: step: 132/470, loss: 0.006282647140324116 2023-01-24 06:26:30.397177: step: 134/470, loss: 0.009130376391112804 2023-01-24 06:26:31.089234: step: 136/470, loss: 0.0018813696224242449 2023-01-24 06:26:31.889767: step: 138/470, loss: 0.20341543853282928 2023-01-24 06:26:32.665132: step: 140/470, loss: 0.0039907037280499935 2023-01-24 06:26:33.414577: step: 142/470, loss: 0.00871391873806715 2023-01-24 06:26:34.198024: step: 144/470, loss: 0.004598352592438459 2023-01-24 06:26:34.822069: step: 146/470, loss: 0.02266603522002697 2023-01-24 06:26:35.568344: step: 148/470, loss: 0.014866764657199383 2023-01-24 06:26:36.269500: step: 150/470, loss: 0.023495275527238846 2023-01-24 06:26:36.977237: step: 152/470, loss: 0.015716491267085075 2023-01-24 06:26:37.737400: step: 154/470, loss: 0.021563276648521423 2023-01-24 06:26:38.413610: step: 156/470, loss: 0.08640297502279282 2023-01-24 06:26:39.085907: step: 158/470, loss: 0.01481255330145359 2023-01-24 06:26:39.781352: step: 160/470, loss: 0.005702580790966749 2023-01-24 06:26:40.585676: step: 162/470, loss: 0.0012152111157774925 2023-01-24 06:26:41.320437: step: 164/470, loss: 0.0035164314322173595 2023-01-24 06:26:41.962752: step: 166/470, loss: 4.3240583181614056e-05 2023-01-24 06:26:42.677291: step: 168/470, loss: 0.049790188670158386 2023-01-24 06:26:43.392659: step: 170/470, loss: 0.0005157435080036521 2023-01-24 06:26:44.133767: step: 172/470, loss: 0.0012744866544380784 2023-01-24 06:26:44.870089: step: 174/470, loss: 0.003838537260890007 2023-01-24 06:26:45.616759: step: 176/470, loss: 0.006663764826953411 2023-01-24 06:26:46.397784: step: 178/470, loss: 0.013853196054697037 2023-01-24 06:26:47.113600: step: 180/470, loss: 0.004837124142795801 2023-01-24 06:26:47.883679: step: 182/470, loss: 0.0024396313820034266 2023-01-24 06:26:48.651767: step: 184/470, loss: 0.009030799381434917 2023-01-24 06:26:49.340718: step: 186/470, loss: 0.010197999887168407 2023-01-24 06:26:50.101426: step: 188/470, loss: 0.1191810742020607 2023-01-24 06:26:50.781319: step: 190/470, loss: 7.335062400670722e-05 2023-01-24 06:26:51.501768: step: 192/470, loss: 0.022936223074793816 2023-01-24 06:26:52.288269: step: 194/470, loss: 0.002525016665458679 2023-01-24 06:26:53.007006: step: 196/470, loss: 0.15407709777355194 2023-01-24 06:26:53.701989: step: 198/470, loss: 0.02182081900537014 2023-01-24 06:26:54.410394: step: 200/470, loss: 0.02167477458715439 2023-01-24 06:26:55.154764: step: 202/470, loss: 0.017393987625837326 2023-01-24 06:26:55.909883: step: 204/470, loss: 0.04078075289726257 2023-01-24 06:26:56.711819: step: 206/470, loss: 0.0022740724962204695 2023-01-24 06:26:57.504499: step: 208/470, loss: 0.011434967629611492 2023-01-24 06:26:58.160185: step: 210/470, loss: 0.010663002729415894 2023-01-24 06:26:58.914355: step: 212/470, loss: 0.03014855459332466 2023-01-24 06:26:59.580161: step: 214/470, loss: 0.018498685210943222 2023-01-24 06:27:00.427884: step: 216/470, loss: 0.03520440682768822 2023-01-24 06:27:01.206914: step: 218/470, loss: 0.008998945355415344 2023-01-24 06:27:02.081771: step: 220/470, loss: 0.038609180599451065 2023-01-24 06:27:02.851940: step: 222/470, loss: 0.0071504549123346806 2023-01-24 06:27:03.533938: step: 224/470, loss: 0.0034373498056083918 2023-01-24 06:27:04.281907: step: 226/470, loss: 0.010733299888670444 2023-01-24 06:27:05.019451: step: 228/470, loss: 0.0056127398274838924 2023-01-24 06:27:05.741918: step: 230/470, loss: 0.006185805890709162 2023-01-24 06:27:06.454397: step: 232/470, loss: 0.007041038013994694 2023-01-24 06:27:07.170108: step: 234/470, loss: 0.12337449938058853 2023-01-24 06:27:07.897362: step: 236/470, loss: 0.005509430076926947 2023-01-24 06:27:08.639264: step: 238/470, loss: 0.46690887212753296 2023-01-24 06:27:09.394659: step: 240/470, loss: 0.004861161578446627 2023-01-24 06:27:10.116432: step: 242/470, loss: 0.025522425770759583 2023-01-24 06:27:10.845269: step: 244/470, loss: 0.00966788548976183 2023-01-24 06:27:11.576673: step: 246/470, loss: 0.003947984892874956 2023-01-24 06:27:12.298740: step: 248/470, loss: 0.00835223589092493 2023-01-24 06:27:13.053858: step: 250/470, loss: 0.0010288195917382836 2023-01-24 06:27:13.798526: step: 252/470, loss: 0.0008441155659966171 2023-01-24 06:27:14.493954: step: 254/470, loss: 0.0017779265763238072 2023-01-24 06:27:15.196833: step: 256/470, loss: 0.0010619647800922394 2023-01-24 06:27:15.897299: step: 258/470, loss: 0.0002834839397110045 2023-01-24 06:27:16.594653: step: 260/470, loss: 0.0008969651535153389 2023-01-24 06:27:17.362651: step: 262/470, loss: 0.0008425424457527697 2023-01-24 06:27:18.042736: step: 264/470, loss: 0.02052401751279831 2023-01-24 06:27:18.790336: step: 266/470, loss: 0.001179807586595416 2023-01-24 06:27:19.592638: step: 268/470, loss: 1.5121781826019287 2023-01-24 06:27:20.348353: step: 270/470, loss: 0.004608070477843285 2023-01-24 06:27:21.108237: step: 272/470, loss: 0.0143381766974926 2023-01-24 06:27:21.818861: step: 274/470, loss: 0.0026956668589264154 2023-01-24 06:27:22.539945: step: 276/470, loss: 0.009074504487216473 2023-01-24 06:27:23.206569: step: 278/470, loss: 0.012543557211756706 2023-01-24 06:27:23.853267: step: 280/470, loss: 0.020108414813876152 2023-01-24 06:27:24.546979: step: 282/470, loss: 0.004175996873527765 2023-01-24 06:27:25.178905: step: 284/470, loss: 0.0059522595256567 2023-01-24 06:27:25.845870: step: 286/470, loss: 0.00022827064094599336 2023-01-24 06:27:26.627064: step: 288/470, loss: 0.007600300945341587 2023-01-24 06:27:27.382520: step: 290/470, loss: 0.00705496221780777 2023-01-24 06:27:28.146451: step: 292/470, loss: 0.06734557449817657 2023-01-24 06:27:28.898200: step: 294/470, loss: 8.682074258103967e-05 2023-01-24 06:27:29.781787: step: 296/470, loss: 0.004848700948059559 2023-01-24 06:27:30.522073: step: 298/470, loss: 0.0014980545965954661 2023-01-24 06:27:31.190299: step: 300/470, loss: 0.0006909299991093576 2023-01-24 06:27:31.946883: step: 302/470, loss: 0.0281376875936985 2023-01-24 06:27:32.642439: step: 304/470, loss: 0.010767661035060883 2023-01-24 06:27:33.379629: step: 306/470, loss: 0.18951472640037537 2023-01-24 06:27:34.083377: step: 308/470, loss: 0.0026056517381221056 2023-01-24 06:27:34.750562: step: 310/470, loss: 0.0158822201192379 2023-01-24 06:27:35.454134: step: 312/470, loss: 0.03624594956636429 2023-01-24 06:27:36.197782: step: 314/470, loss: 0.026492631062865257 2023-01-24 06:27:36.872783: step: 316/470, loss: 0.0021404740400612354 2023-01-24 06:27:37.662972: step: 318/470, loss: 0.0041463132947683334 2023-01-24 06:27:38.436924: step: 320/470, loss: 0.02275344356894493 2023-01-24 06:27:39.224053: step: 322/470, loss: 0.03233027830719948 2023-01-24 06:27:39.927790: step: 324/470, loss: 0.0033383751288056374 2023-01-24 06:27:40.699880: step: 326/470, loss: 0.010370167903602123 2023-01-24 06:27:41.355212: step: 328/470, loss: 0.20740625262260437 2023-01-24 06:27:42.099742: step: 330/470, loss: 0.031531982123851776 2023-01-24 06:27:42.844817: step: 332/470, loss: 0.0020223986357450485 2023-01-24 06:27:43.581016: step: 334/470, loss: 0.0020372618455439806 2023-01-24 06:27:44.264023: step: 336/470, loss: 0.0007067452534101903 2023-01-24 06:27:44.895582: step: 338/470, loss: 0.0010336972773075104 2023-01-24 06:27:45.662251: step: 340/470, loss: 0.00013325613690540195 2023-01-24 06:27:46.411370: step: 342/470, loss: 0.009005560539662838 2023-01-24 06:27:47.141903: step: 344/470, loss: 0.0064828358590602875 2023-01-24 06:27:47.801615: step: 346/470, loss: 0.0001396966545144096 2023-01-24 06:27:48.671979: step: 348/470, loss: 0.16062486171722412 2023-01-24 06:27:49.404418: step: 350/470, loss: 0.2716470956802368 2023-01-24 06:27:50.155957: step: 352/470, loss: 0.05235571414232254 2023-01-24 06:27:50.902306: step: 354/470, loss: 0.029102042317390442 2023-01-24 06:27:51.629775: step: 356/470, loss: 0.01925988309085369 2023-01-24 06:27:52.296375: step: 358/470, loss: 0.0007001806516200304 2023-01-24 06:27:53.012903: step: 360/470, loss: 0.0038126695435494184 2023-01-24 06:27:53.720704: step: 362/470, loss: 0.009832276962697506 2023-01-24 06:27:54.411286: step: 364/470, loss: 0.010709324851632118 2023-01-24 06:27:55.261573: step: 366/470, loss: 0.0702999159693718 2023-01-24 06:27:55.911795: step: 368/470, loss: 0.006159055978059769 2023-01-24 06:27:56.632145: step: 370/470, loss: 6.533000123454258e-05 2023-01-24 06:27:57.396659: step: 372/470, loss: 0.022670604288578033 2023-01-24 06:27:58.210407: step: 374/470, loss: 0.005668703466653824 2023-01-24 06:27:58.904831: step: 376/470, loss: 0.020378923043608665 2023-01-24 06:27:59.636496: step: 378/470, loss: 0.00037373710074461997 2023-01-24 06:28:00.432054: step: 380/470, loss: 0.03276490792632103 2023-01-24 06:28:01.188651: step: 382/470, loss: 0.04409230127930641 2023-01-24 06:28:01.969813: step: 384/470, loss: 0.003761183237656951 2023-01-24 06:28:02.692939: step: 386/470, loss: 0.03813392296433449 2023-01-24 06:28:03.423246: step: 388/470, loss: 0.0055965944193303585 2023-01-24 06:28:04.165747: step: 390/470, loss: 0.003348251339048147 2023-01-24 06:28:04.882673: step: 392/470, loss: 0.015311792492866516 2023-01-24 06:28:05.592450: step: 394/470, loss: 0.00032469138386659324 2023-01-24 06:28:06.415910: step: 396/470, loss: 0.006082088686525822 2023-01-24 06:28:07.270685: step: 398/470, loss: 0.02094237320125103 2023-01-24 06:28:08.005965: step: 400/470, loss: 0.01110068242996931 2023-01-24 06:28:08.684886: step: 402/470, loss: 0.20870746672153473 2023-01-24 06:28:09.428162: step: 404/470, loss: 0.0033058554399758577 2023-01-24 06:28:10.120018: step: 406/470, loss: 0.00017056135402526706 2023-01-24 06:28:10.880258: step: 408/470, loss: 0.0010944005334749818 2023-01-24 06:28:11.601865: step: 410/470, loss: 0.03575807437300682 2023-01-24 06:28:12.282896: step: 412/470, loss: 0.007012884132564068 2023-01-24 06:28:13.084084: step: 414/470, loss: 4.490640276344493e-05 2023-01-24 06:28:13.811730: step: 416/470, loss: 0.05343223735690117 2023-01-24 06:28:14.529880: step: 418/470, loss: 0.007675068452954292 2023-01-24 06:28:15.204234: step: 420/470, loss: 0.0003359982802066952 2023-01-24 06:28:16.044793: step: 422/470, loss: 0.00039737403858453035 2023-01-24 06:28:16.722276: step: 424/470, loss: 0.002490453887730837 2023-01-24 06:28:17.476321: step: 426/470, loss: 0.0012507832143455744 2023-01-24 06:28:18.209938: step: 428/470, loss: 0.003989855293184519 2023-01-24 06:28:18.971363: step: 430/470, loss: 0.0036130514927208424 2023-01-24 06:28:19.674243: step: 432/470, loss: 0.0005295684677548707 2023-01-24 06:28:20.461516: step: 434/470, loss: 0.019468065351247787 2023-01-24 06:28:21.154932: step: 436/470, loss: 0.01111428253352642 2023-01-24 06:28:21.872780: step: 438/470, loss: 0.08234895020723343 2023-01-24 06:28:22.587203: step: 440/470, loss: 0.016692696139216423 2023-01-24 06:28:23.322788: step: 442/470, loss: 0.03166506811976433 2023-01-24 06:28:24.070067: step: 444/470, loss: 0.03169902786612511 2023-01-24 06:28:24.807855: step: 446/470, loss: 0.010146571323275566 2023-01-24 06:28:25.531875: step: 448/470, loss: 0.0064794747158885 2023-01-24 06:28:26.176108: step: 450/470, loss: 0.009995583444833755 2023-01-24 06:28:26.853402: step: 452/470, loss: 0.004090282134711742 2023-01-24 06:28:27.585189: step: 454/470, loss: 0.011358017101883888 2023-01-24 06:28:28.245320: step: 456/470, loss: 0.023835793137550354 2023-01-24 06:28:28.966001: step: 458/470, loss: 0.040863286703825 2023-01-24 06:28:29.770798: step: 460/470, loss: 0.025173354893922806 2023-01-24 06:28:30.461903: step: 462/470, loss: 0.0024334678892046213 2023-01-24 06:28:31.198873: step: 464/470, loss: 0.011209073476493359 2023-01-24 06:28:31.963991: step: 466/470, loss: 0.015448382124304771 2023-01-24 06:28:32.580691: step: 468/470, loss: 0.027771124616265297 2023-01-24 06:28:33.394272: step: 470/470, loss: 0.006685018073767424 2023-01-24 06:28:34.113314: step: 472/470, loss: 0.005859097465872765 2023-01-24 06:28:34.836724: step: 474/470, loss: 0.009597435593605042 2023-01-24 06:28:35.576808: step: 476/470, loss: 0.006935752462595701 2023-01-24 06:28:36.285367: step: 478/470, loss: 0.0011011871974915266 2023-01-24 06:28:37.041507: step: 480/470, loss: 0.008520321920514107 2023-01-24 06:28:37.776948: step: 482/470, loss: 0.031099451705813408 2023-01-24 06:28:38.479195: step: 484/470, loss: 0.001046741963364184 2023-01-24 06:28:39.125607: step: 486/470, loss: 0.001939225709065795 2023-01-24 06:28:39.914947: step: 488/470, loss: 0.027191482484340668 2023-01-24 06:28:40.692909: step: 490/470, loss: 0.004440982360392809 2023-01-24 06:28:41.388282: step: 492/470, loss: 0.00052472302922979 2023-01-24 06:28:42.157804: step: 494/470, loss: 0.005477104801684618 2023-01-24 06:28:42.983462: step: 496/470, loss: 0.006036494392901659 2023-01-24 06:28:43.693224: step: 498/470, loss: 0.006640726700425148 2023-01-24 06:28:44.438743: step: 500/470, loss: 0.024400828406214714 2023-01-24 06:28:45.248899: step: 502/470, loss: 0.002363094361498952 2023-01-24 06:28:45.983708: step: 504/470, loss: 0.007462051697075367 2023-01-24 06:28:46.732850: step: 506/470, loss: 0.005165347829461098 2023-01-24 06:28:47.470251: step: 508/470, loss: 0.0008528852486051619 2023-01-24 06:28:48.203582: step: 510/470, loss: 0.002549446653574705 2023-01-24 06:28:49.006354: step: 512/470, loss: 0.003729384858161211 2023-01-24 06:28:49.786007: step: 514/470, loss: 0.008527263067662716 2023-01-24 06:28:50.507754: step: 516/470, loss: 0.0031432053074240685 2023-01-24 06:28:51.260937: step: 518/470, loss: 0.018408743664622307 2023-01-24 06:28:51.955012: step: 520/470, loss: 0.008144121617078781 2023-01-24 06:28:52.751923: step: 522/470, loss: 0.020345963537693024 2023-01-24 06:28:53.446540: step: 524/470, loss: 0.0073492685332894325 2023-01-24 06:28:54.150690: step: 526/470, loss: 0.009888478554785252 2023-01-24 06:28:54.869061: step: 528/470, loss: 0.002217804081737995 2023-01-24 06:28:55.623521: step: 530/470, loss: 0.6544013619422913 2023-01-24 06:28:56.362753: step: 532/470, loss: 0.007728029508143663 2023-01-24 06:28:57.133777: step: 534/470, loss: 0.0011883730767294765 2023-01-24 06:28:57.828792: step: 536/470, loss: 0.0005980039713904262 2023-01-24 06:28:58.511526: step: 538/470, loss: 0.0516948327422142 2023-01-24 06:28:59.284854: step: 540/470, loss: 0.029013799503445625 2023-01-24 06:29:00.012089: step: 542/470, loss: 0.0037074440624564886 2023-01-24 06:29:00.792744: step: 544/470, loss: 0.00036978485877625644 2023-01-24 06:29:01.555823: step: 546/470, loss: 0.0021586138755083084 2023-01-24 06:29:02.369850: step: 548/470, loss: 0.014409597963094711 2023-01-24 06:29:03.081759: step: 550/470, loss: 0.0010483302175998688 2023-01-24 06:29:03.841572: step: 552/470, loss: 0.026682112365961075 2023-01-24 06:29:04.529482: step: 554/470, loss: 0.00963746290653944 2023-01-24 06:29:05.226843: step: 556/470, loss: 0.03591204062104225 2023-01-24 06:29:06.022037: step: 558/470, loss: 0.013330896385014057 2023-01-24 06:29:06.738458: step: 560/470, loss: 0.008176986128091812 2023-01-24 06:29:07.523668: step: 562/470, loss: 0.00987264420837164 2023-01-24 06:29:08.342005: step: 564/470, loss: 0.018903588876128197 2023-01-24 06:29:09.039652: step: 566/470, loss: 0.00032135986839421093 2023-01-24 06:29:09.776221: step: 568/470, loss: 0.00034836053964681923 2023-01-24 06:29:10.434072: step: 570/470, loss: 0.006845478434115648 2023-01-24 06:29:11.145099: step: 572/470, loss: 0.01886317878961563 2023-01-24 06:29:11.779646: step: 574/470, loss: 0.0015140185132622719 2023-01-24 06:29:12.538948: step: 576/470, loss: 0.0014535411028191447 2023-01-24 06:29:13.261705: step: 578/470, loss: 0.00014838328934274614 2023-01-24 06:29:13.910111: step: 580/470, loss: 0.005177702754735947 2023-01-24 06:29:14.622558: step: 582/470, loss: 0.0012136365985497832 2023-01-24 06:29:15.413742: step: 584/470, loss: 0.001734656747430563 2023-01-24 06:29:16.175610: step: 586/470, loss: 0.0007220085244625807 2023-01-24 06:29:16.817584: step: 588/470, loss: 0.0014062359696254134 2023-01-24 06:29:17.514185: step: 590/470, loss: 5.9928792325081304e-05 2023-01-24 06:29:18.241044: step: 592/470, loss: 0.03473540395498276 2023-01-24 06:29:18.969825: step: 594/470, loss: 0.01883302815258503 2023-01-24 06:29:19.680585: step: 596/470, loss: 0.008976499550044537 2023-01-24 06:29:20.450228: step: 598/470, loss: 8.038515079533681e-05 2023-01-24 06:29:21.331850: step: 600/470, loss: 0.008133098483085632 2023-01-24 06:29:22.130117: step: 602/470, loss: 0.009850953705608845 2023-01-24 06:29:22.925484: step: 604/470, loss: 0.016613174229860306 2023-01-24 06:29:23.598929: step: 606/470, loss: 0.008490724489092827 2023-01-24 06:29:24.300503: step: 608/470, loss: 0.00034214864717796445 2023-01-24 06:29:25.038030: step: 610/470, loss: 0.010509525425732136 2023-01-24 06:29:25.694532: step: 612/470, loss: 0.0069589437916874886 2023-01-24 06:29:26.555391: step: 614/470, loss: 0.11274606734514236 2023-01-24 06:29:27.268027: step: 616/470, loss: 0.009286433458328247 2023-01-24 06:29:28.096701: step: 618/470, loss: 0.11397167295217514 2023-01-24 06:29:28.814663: step: 620/470, loss: 0.04145457595586777 2023-01-24 06:29:29.501111: step: 622/470, loss: 0.03650517016649246 2023-01-24 06:29:30.255054: step: 624/470, loss: 0.004478362388908863 2023-01-24 06:29:31.038589: step: 626/470, loss: 0.06043071672320366 2023-01-24 06:29:31.748637: step: 628/470, loss: 0.055728744715452194 2023-01-24 06:29:32.535109: step: 630/470, loss: 0.09390320628881454 2023-01-24 06:29:33.178012: step: 632/470, loss: 0.0012103930348530412 2023-01-24 06:29:33.894774: step: 634/470, loss: 0.00010801952157635242 2023-01-24 06:29:34.669725: step: 636/470, loss: 0.0020870454609394073 2023-01-24 06:29:35.388402: step: 638/470, loss: 0.0008572909864597023 2023-01-24 06:29:36.184197: step: 640/470, loss: 0.00878854189068079 2023-01-24 06:29:36.888292: step: 642/470, loss: 0.006001758389174938 2023-01-24 06:29:37.616347: step: 644/470, loss: 0.003095379564911127 2023-01-24 06:29:38.451414: step: 646/470, loss: 0.005022485740482807 2023-01-24 06:29:39.245479: step: 648/470, loss: 0.01622042804956436 2023-01-24 06:29:39.990686: step: 650/470, loss: 0.03620656952261925 2023-01-24 06:29:40.761410: step: 652/470, loss: 0.0006067268550395966 2023-01-24 06:29:41.499928: step: 654/470, loss: 0.0015004700981080532 2023-01-24 06:29:42.344255: step: 656/470, loss: 0.003583112731575966 2023-01-24 06:29:43.025221: step: 658/470, loss: 0.0014826322440057993 2023-01-24 06:29:43.768473: step: 660/470, loss: 0.004717591218650341 2023-01-24 06:29:44.520652: step: 662/470, loss: 0.00042355526238679886 2023-01-24 06:29:45.270043: step: 664/470, loss: 0.005252582021057606 2023-01-24 06:29:46.022965: step: 666/470, loss: 0.011925187893211842 2023-01-24 06:29:46.776598: step: 668/470, loss: 0.005225887056440115 2023-01-24 06:29:47.541361: step: 670/470, loss: 0.04948273301124573 2023-01-24 06:29:48.264558: step: 672/470, loss: 0.0006488541257567704 2023-01-24 06:29:48.962906: step: 674/470, loss: 0.004141774959862232 2023-01-24 06:29:49.703382: step: 676/470, loss: 0.10701893270015717 2023-01-24 06:29:50.426575: step: 678/470, loss: 0.0004678604891523719 2023-01-24 06:29:51.069285: step: 680/470, loss: 0.0048322975635528564 2023-01-24 06:29:51.785885: step: 682/470, loss: 0.023693975061178207 2023-01-24 06:29:52.480534: step: 684/470, loss: 0.00018593238200992346 2023-01-24 06:29:53.245933: step: 686/470, loss: 8.329687989316881e-05 2023-01-24 06:29:53.936403: step: 688/470, loss: 0.0013428764650598168 2023-01-24 06:29:54.644367: step: 690/470, loss: 0.01941107213497162 2023-01-24 06:29:55.467978: step: 692/470, loss: 0.007984393276274204 2023-01-24 06:29:56.251054: step: 694/470, loss: 0.4107176661491394 2023-01-24 06:29:57.018929: step: 696/470, loss: 0.16444319486618042 2023-01-24 06:29:57.764962: step: 698/470, loss: 0.010815066285431385 2023-01-24 06:29:58.591837: step: 700/470, loss: 0.04077745974063873 2023-01-24 06:29:59.374418: step: 702/470, loss: 0.01998460479080677 2023-01-24 06:30:00.214420: step: 704/470, loss: 0.0018847326282411814 2023-01-24 06:30:00.966300: step: 706/470, loss: 0.12863512337207794 2023-01-24 06:30:01.692141: step: 708/470, loss: 0.00011070125765400007 2023-01-24 06:30:02.383050: step: 710/470, loss: 0.0005842326791025698 2023-01-24 06:30:03.122337: step: 712/470, loss: 0.000646944681648165 2023-01-24 06:30:03.908472: step: 714/470, loss: 0.005940971430391073 2023-01-24 06:30:04.699892: step: 716/470, loss: 0.00032181438291445374 2023-01-24 06:30:05.404227: step: 718/470, loss: 0.001542411744594574 2023-01-24 06:30:06.156543: step: 720/470, loss: 0.005175524391233921 2023-01-24 06:30:06.876198: step: 722/470, loss: 0.009312089532613754 2023-01-24 06:30:07.630325: step: 724/470, loss: 0.0014529626350849867 2023-01-24 06:30:08.371332: step: 726/470, loss: 0.012921427376568317 2023-01-24 06:30:09.129296: step: 728/470, loss: 0.010379222221672535 2023-01-24 06:30:09.855152: step: 730/470, loss: 0.02943400666117668 2023-01-24 06:30:10.672349: step: 732/470, loss: 0.004278372973203659 2023-01-24 06:30:11.425137: step: 734/470, loss: 0.002038972917944193 2023-01-24 06:30:12.148571: step: 736/470, loss: 0.008259556256234646 2023-01-24 06:30:12.913449: step: 738/470, loss: 0.005812020972371101 2023-01-24 06:30:13.741196: step: 740/470, loss: 0.0011574298841878772 2023-01-24 06:30:14.512791: step: 742/470, loss: 0.038374532014131546 2023-01-24 06:30:15.268949: step: 744/470, loss: 0.002495008986443281 2023-01-24 06:30:16.013424: step: 746/470, loss: 0.009755531325936317 2023-01-24 06:30:16.742093: step: 748/470, loss: 0.12439113855361938 2023-01-24 06:30:17.397271: step: 750/470, loss: 0.0035697193816304207 2023-01-24 06:30:18.115362: step: 752/470, loss: 0.0054044960997998714 2023-01-24 06:30:18.878952: step: 754/470, loss: 0.012278404086828232 2023-01-24 06:30:19.562693: step: 756/470, loss: 0.009794940240681171 2023-01-24 06:30:20.327485: step: 758/470, loss: 0.002220005262643099 2023-01-24 06:30:21.085026: step: 760/470, loss: 0.012325046584010124 2023-01-24 06:30:21.865179: step: 762/470, loss: 0.018408851698040962 2023-01-24 06:30:22.605410: step: 764/470, loss: 0.01817646436393261 2023-01-24 06:30:23.348165: step: 766/470, loss: 0.02506135031580925 2023-01-24 06:30:24.169565: step: 768/470, loss: 0.014540938660502434 2023-01-24 06:30:24.957000: step: 770/470, loss: 0.004760831594467163 2023-01-24 06:30:25.677925: step: 772/470, loss: 0.07413551956415176 2023-01-24 06:30:26.431072: step: 774/470, loss: 0.03643839806318283 2023-01-24 06:30:27.162056: step: 776/470, loss: 0.0018023299053311348 2023-01-24 06:30:27.939666: step: 778/470, loss: 0.020913278684020042 2023-01-24 06:30:28.706378: step: 780/470, loss: 0.02016664668917656 2023-01-24 06:30:29.429155: step: 782/470, loss: 0.01156390830874443 2023-01-24 06:30:30.281657: step: 784/470, loss: 0.013799360953271389 2023-01-24 06:30:31.001980: step: 786/470, loss: 0.0006355499499477446 2023-01-24 06:30:31.723700: step: 788/470, loss: 0.00016411063552368432 2023-01-24 06:30:32.460459: step: 790/470, loss: 0.0022397139109671116 2023-01-24 06:30:33.201364: step: 792/470, loss: 0.028047997504472733 2023-01-24 06:30:34.036513: step: 794/470, loss: 0.0009197811014018953 2023-01-24 06:30:34.831503: step: 796/470, loss: 0.5110117793083191 2023-01-24 06:30:35.592964: step: 798/470, loss: 0.0024509942159056664 2023-01-24 06:30:36.302705: step: 800/470, loss: 0.3866247832775116 2023-01-24 06:30:37.039117: step: 802/470, loss: 0.027848878875374794 2023-01-24 06:30:37.798498: step: 804/470, loss: 0.004700549878180027 2023-01-24 06:30:38.669736: step: 806/470, loss: 0.0064826603047549725 2023-01-24 06:30:39.347322: step: 808/470, loss: 0.000695836846716702 2023-01-24 06:30:40.106376: step: 810/470, loss: 0.0028286667075008154 2023-01-24 06:30:40.860446: step: 812/470, loss: 0.00968991406261921 2023-01-24 06:30:41.585590: step: 814/470, loss: 0.001139800762757659 2023-01-24 06:30:42.268829: step: 816/470, loss: 0.004776866175234318 2023-01-24 06:30:43.004381: step: 818/470, loss: 0.015945924445986748 2023-01-24 06:30:43.655923: step: 820/470, loss: 0.0016348791541531682 2023-01-24 06:30:44.426625: step: 822/470, loss: 0.02290150709450245 2023-01-24 06:30:45.174011: step: 824/470, loss: 0.046208322048187256 2023-01-24 06:30:45.892065: step: 826/470, loss: 0.0024622592609375715 2023-01-24 06:30:46.667498: step: 828/470, loss: 0.011288406327366829 2023-01-24 06:30:47.417756: step: 830/470, loss: 0.042418159544467926 2023-01-24 06:30:48.111396: step: 832/470, loss: 0.0909082442522049 2023-01-24 06:30:48.732459: step: 834/470, loss: 0.000317727419314906 2023-01-24 06:30:49.359798: step: 836/470, loss: 0.0007268586196005344 2023-01-24 06:30:50.147620: step: 838/470, loss: 0.012510606087744236 2023-01-24 06:30:50.890144: step: 840/470, loss: 0.007081144023686647 2023-01-24 06:30:51.555707: step: 842/470, loss: 0.0002773547312244773 2023-01-24 06:30:52.314779: step: 844/470, loss: 0.002572752069681883 2023-01-24 06:30:52.976863: step: 846/470, loss: 0.024069497361779213 2023-01-24 06:30:53.802093: step: 848/470, loss: 0.0006529639358632267 2023-01-24 06:30:54.458248: step: 850/470, loss: 0.008918379433453083 2023-01-24 06:30:55.162054: step: 852/470, loss: 0.005846343468874693 2023-01-24 06:30:55.830998: step: 854/470, loss: 0.011811641044914722 2023-01-24 06:30:56.511179: step: 856/470, loss: 0.002782166237011552 2023-01-24 06:30:57.280162: step: 858/470, loss: 0.0026911317836493254 2023-01-24 06:30:58.030850: step: 860/470, loss: 0.07831226289272308 2023-01-24 06:30:58.777654: step: 862/470, loss: 0.009529894217848778 2023-01-24 06:30:59.455997: step: 864/470, loss: 0.0002590777294244617 2023-01-24 06:31:00.196172: step: 866/470, loss: 0.14221465587615967 2023-01-24 06:31:00.926086: step: 868/470, loss: 0.03858411684632301 2023-01-24 06:31:01.678304: step: 870/470, loss: 0.020872116088867188 2023-01-24 06:31:02.460054: step: 872/470, loss: 0.0001517470518592745 2023-01-24 06:31:03.145024: step: 874/470, loss: 0.0035211762879043818 2023-01-24 06:31:03.882196: step: 876/470, loss: 0.04481646046042442 2023-01-24 06:31:04.585338: step: 878/470, loss: 0.013270605355501175 2023-01-24 06:31:05.296982: step: 880/470, loss: 0.020657042041420937 2023-01-24 06:31:06.059636: step: 882/470, loss: 0.036236584186553955 2023-01-24 06:31:06.761518: step: 884/470, loss: 0.012062986381351948 2023-01-24 06:31:07.474766: step: 886/470, loss: 0.0015210265992209315 2023-01-24 06:31:08.206896: step: 888/470, loss: 0.004456940107047558 2023-01-24 06:31:08.945363: step: 890/470, loss: 0.01179597806185484 2023-01-24 06:31:09.699148: step: 892/470, loss: 0.0002963356382679194 2023-01-24 06:31:10.563659: step: 894/470, loss: 0.001176392543129623 2023-01-24 06:31:11.362847: step: 896/470, loss: 0.0065690902993083 2023-01-24 06:31:12.127572: step: 898/470, loss: 0.008019453845918179 2023-01-24 06:31:12.879804: step: 900/470, loss: 0.002323372755199671 2023-01-24 06:31:13.589025: step: 902/470, loss: 0.036690160632133484 2023-01-24 06:31:14.277876: step: 904/470, loss: 0.01081377174705267 2023-01-24 06:31:14.979644: step: 906/470, loss: 0.014264862053096294 2023-01-24 06:31:15.709913: step: 908/470, loss: 0.014920435845851898 2023-01-24 06:31:16.412321: step: 910/470, loss: 0.030699947848916054 2023-01-24 06:31:17.110150: step: 912/470, loss: 0.0012479553697630763 2023-01-24 06:31:17.907267: step: 914/470, loss: 0.3528791069984436 2023-01-24 06:31:18.626137: step: 916/470, loss: 0.02547892928123474 2023-01-24 06:31:19.399709: step: 918/470, loss: 0.0035649023484438658 2023-01-24 06:31:20.083445: step: 920/470, loss: 0.0018619990441948175 2023-01-24 06:31:20.751355: step: 922/470, loss: 0.0008721183985471725 2023-01-24 06:31:21.467223: step: 924/470, loss: 0.04525361210107803 2023-01-24 06:31:22.140129: step: 926/470, loss: 0.17319992184638977 2023-01-24 06:31:22.809455: step: 928/470, loss: 0.00043553844443522394 2023-01-24 06:31:23.696824: step: 930/470, loss: 0.007656523957848549 2023-01-24 06:31:24.399013: step: 932/470, loss: 0.003974429797381163 2023-01-24 06:31:25.124410: step: 934/470, loss: 0.028335638344287872 2023-01-24 06:31:25.780158: step: 936/470, loss: 0.016558095812797546 2023-01-24 06:31:26.512205: step: 938/470, loss: 0.008354702033102512 2023-01-24 06:31:27.253256: step: 940/470, loss: 0.023873982951045036 2023-01-24 06:31:27.893381: step: 942/470, loss: 0.25000032782554626 ================================================== Loss: 0.034 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3413303805774278, 'r': 0.32902435167615435, 'f1': 0.33506441223832534}, 'combined': 0.24688956691245023, 'epoch': 35} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.37088273602874755, 'r': 0.3480811076850013, 'f1': 0.35912034995053155}, 'combined': 0.23941356663368765, 'epoch': 35} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33259877635264634, 'r': 0.33133654190728523, 'f1': 0.33196645928733715}, 'combined': 0.2446068647380379, 'epoch': 35} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3702730126404344, 'r': 0.34252921342241915, 'f1': 0.35586118879514805}, 'combined': 0.23724079253009864, 'epoch': 35} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31492132867132866, 'r': 0.3286655232812728, 'f1': 0.3216466680951361}, 'combined': 0.23700280807010027, 'epoch': 35} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35953759945745345, 'r': 0.34917628535205136, 'f1': 0.3542812018045667}, 'combined': 0.23618746786971107, 'epoch': 35} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.275, 'r': 0.3142857142857143, 'f1': 0.29333333333333333}, 'combined': 0.19555555555555554, 'epoch': 35} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5909090909090909, 'r': 0.2826086956521739, 'f1': 0.38235294117647056}, 'combined': 0.2549019607843137, 'epoch': 35} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5897129186602871, 'r': 0.22368421052631576, 'f1': 0.3243421052631579}, 'combined': 0.21622807017543857, 'epoch': 35} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35514455782312926, 'r': 0.3302103099304238, 'f1': 0.34222386102917074}, 'combined': 0.2521649502320205, 'epoch': 34} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36324986426135314, 'r': 0.3573121260955425, 'f1': 0.3602565304307942}, 'combined': 0.24017102028719609, 'epoch': 34} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2869318181818182, 'r': 0.3607142857142857, 'f1': 0.319620253164557}, 'combined': 0.21308016877637131, 'epoch': 34} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3245705997242647, 'r': 0.31533234736019644, 'f1': 0.31988478740870746}, 'combined': 0.2357045801958897, 'epoch': 31} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35151688133309544, 'r': 0.34577093231130446, 'f1': 0.34862023228672484}, 'combined': 0.23241348819114985, 'epoch': 31} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6153846153846154, 'r': 0.27586206896551724, 'f1': 0.380952380952381}, 'combined': 0.25396825396825395, 'epoch': 31} ****************************** Epoch: 36 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:34:01.092305: step: 2/470, loss: 0.00923900119960308 2023-01-24 06:34:01.823899: step: 4/470, loss: 0.012077202089130878 2023-01-24 06:34:02.641939: step: 6/470, loss: 0.017157871276140213 2023-01-24 06:34:03.372783: step: 8/470, loss: 0.0011838817736133933 2023-01-24 06:34:04.087886: step: 10/470, loss: 0.031294677406549454 2023-01-24 06:34:04.801978: step: 12/470, loss: 0.0029603552538901567 2023-01-24 06:34:05.544364: step: 14/470, loss: 0.0066464850679039955 2023-01-24 06:34:06.276943: step: 16/470, loss: 0.00202410900965333 2023-01-24 06:34:07.042408: step: 18/470, loss: 0.007574394345283508 2023-01-24 06:34:07.790847: step: 20/470, loss: 0.39814719557762146 2023-01-24 06:34:08.531456: step: 22/470, loss: 0.0019493503496050835 2023-01-24 06:34:09.358243: step: 24/470, loss: 0.035010937601327896 2023-01-24 06:34:10.155643: step: 26/470, loss: 0.000734560308046639 2023-01-24 06:34:10.946269: step: 28/470, loss: 0.008018039166927338 2023-01-24 06:34:11.692384: step: 30/470, loss: 0.0227291788905859 2023-01-24 06:34:12.401783: step: 32/470, loss: 0.003483888227492571 2023-01-24 06:34:13.076354: step: 34/470, loss: 0.0005610057269223034 2023-01-24 06:34:13.800716: step: 36/470, loss: 0.0045489352196455 2023-01-24 06:34:14.725463: step: 38/470, loss: 0.04132385924458504 2023-01-24 06:34:15.515137: step: 40/470, loss: 0.002870872151106596 2023-01-24 06:34:16.300923: step: 42/470, loss: 0.005469549912959337 2023-01-24 06:34:16.962657: step: 44/470, loss: 0.004644967149943113 2023-01-24 06:34:17.656654: step: 46/470, loss: 0.0010548782302066684 2023-01-24 06:34:18.362815: step: 48/470, loss: 0.008158699609339237 2023-01-24 06:34:19.086680: step: 50/470, loss: 0.0006612870492972434 2023-01-24 06:34:19.789724: step: 52/470, loss: 0.008797750808298588 2023-01-24 06:34:20.543620: step: 54/470, loss: 0.04867735132575035 2023-01-24 06:34:21.309469: step: 56/470, loss: 0.005971251055598259 2023-01-24 06:34:22.036618: step: 58/470, loss: 0.003688129596412182 2023-01-24 06:34:22.722486: step: 60/470, loss: 0.0009264365653507411 2023-01-24 06:34:23.523415: step: 62/470, loss: 0.010077004320919514 2023-01-24 06:34:24.138970: step: 64/470, loss: 0.0002823990071192384 2023-01-24 06:34:24.839081: step: 66/470, loss: 0.007472657132893801 2023-01-24 06:34:25.574898: step: 68/470, loss: 0.12757711112499237 2023-01-24 06:34:26.317444: step: 70/470, loss: 0.0001368361699860543 2023-01-24 06:34:27.011248: step: 72/470, loss: 0.021803027018904686 2023-01-24 06:34:27.730117: step: 74/470, loss: 0.00901093054562807 2023-01-24 06:34:28.498800: step: 76/470, loss: 0.013547614216804504 2023-01-24 06:34:29.247227: step: 78/470, loss: 0.0005593632231466472 2023-01-24 06:34:29.935488: step: 80/470, loss: 0.0004783176409546286 2023-01-24 06:34:30.657155: step: 82/470, loss: 7.210922194644809e-05 2023-01-24 06:34:31.416417: step: 84/470, loss: 0.0192702803760767 2023-01-24 06:34:32.196345: step: 86/470, loss: 0.005496703088283539 2023-01-24 06:34:32.979682: step: 88/470, loss: 0.023109078407287598 2023-01-24 06:34:33.708079: step: 90/470, loss: 0.005734420381486416 2023-01-24 06:34:34.476800: step: 92/470, loss: 0.24637438356876373 2023-01-24 06:34:35.220086: step: 94/470, loss: 0.02152753807604313 2023-01-24 06:34:35.943920: step: 96/470, loss: 0.005265772808343172 2023-01-24 06:34:36.682269: step: 98/470, loss: 0.012245593592524529 2023-01-24 06:34:37.478601: step: 100/470, loss: 0.0001247525360668078 2023-01-24 06:34:38.303461: step: 102/470, loss: 0.0013826676877215505 2023-01-24 06:34:39.039365: step: 104/470, loss: 0.0027163000777363777 2023-01-24 06:34:39.760823: step: 106/470, loss: 0.005290861241519451 2023-01-24 06:34:40.514576: step: 108/470, loss: 0.009316950105130672 2023-01-24 06:34:41.319596: step: 110/470, loss: 0.1678803563117981 2023-01-24 06:34:41.998198: step: 112/470, loss: 0.09275523573160172 2023-01-24 06:34:42.665605: step: 114/470, loss: 0.0027781319804489613 2023-01-24 06:34:43.291214: step: 116/470, loss: 4.896317113889381e-05 2023-01-24 06:34:44.034905: step: 118/470, loss: 2.4704264433239587e-05 2023-01-24 06:34:44.763944: step: 120/470, loss: 0.004806291311979294 2023-01-24 06:34:45.547800: step: 122/470, loss: 0.09023859351873398 2023-01-24 06:34:46.225729: step: 124/470, loss: 0.0018310233717784286 2023-01-24 06:34:46.932425: step: 126/470, loss: 0.0012032542144879699 2023-01-24 06:34:47.653086: step: 128/470, loss: 0.0027267164550721645 2023-01-24 06:34:48.339633: step: 130/470, loss: 0.0002811734448187053 2023-01-24 06:34:49.124747: step: 132/470, loss: 0.00040184182580560446 2023-01-24 06:34:49.850245: step: 134/470, loss: 0.0015739360824227333 2023-01-24 06:34:50.573415: step: 136/470, loss: 0.00714887585490942 2023-01-24 06:34:51.286336: step: 138/470, loss: 0.022787457332015038 2023-01-24 06:34:52.038124: step: 140/470, loss: 0.00430362019687891 2023-01-24 06:34:52.678234: step: 142/470, loss: 3.319919778732583e-05 2023-01-24 06:34:53.341584: step: 144/470, loss: 0.015644496306777 2023-01-24 06:34:54.062134: step: 146/470, loss: 0.019434552639722824 2023-01-24 06:34:54.819070: step: 148/470, loss: 0.001240216544829309 2023-01-24 06:34:55.587882: step: 150/470, loss: 0.001468593254685402 2023-01-24 06:34:56.301041: step: 152/470, loss: 0.005688677076250315 2023-01-24 06:34:57.045436: step: 154/470, loss: 0.03621711581945419 2023-01-24 06:34:57.839498: step: 156/470, loss: 0.002134978072717786 2023-01-24 06:34:58.637146: step: 158/470, loss: 0.0038371176924556494 2023-01-24 06:34:59.389939: step: 160/470, loss: 0.025045614689588547 2023-01-24 06:35:00.141794: step: 162/470, loss: 0.002029221039265394 2023-01-24 06:35:01.088913: step: 164/470, loss: 0.0003728036826942116 2023-01-24 06:35:01.793564: step: 166/470, loss: 0.00826074555516243 2023-01-24 06:35:02.629207: step: 168/470, loss: 0.003485210472717881 2023-01-24 06:35:03.408336: step: 170/470, loss: 0.002386857522651553 2023-01-24 06:35:04.145964: step: 172/470, loss: 0.010757505893707275 2023-01-24 06:35:04.907959: step: 174/470, loss: 0.00275437138043344 2023-01-24 06:35:05.541003: step: 176/470, loss: 0.007767565548419952 2023-01-24 06:35:06.327399: step: 178/470, loss: 0.9939495921134949 2023-01-24 06:35:07.040479: step: 180/470, loss: 0.0013092899462208152 2023-01-24 06:35:07.792137: step: 182/470, loss: 0.003127588424831629 2023-01-24 06:35:08.534928: step: 184/470, loss: 0.0006091459654271603 2023-01-24 06:35:09.236774: step: 186/470, loss: 0.03590903431177139 2023-01-24 06:35:09.973018: step: 188/470, loss: 0.024302396923303604 2023-01-24 06:35:10.758154: step: 190/470, loss: 0.0021148929372429848 2023-01-24 06:35:11.440847: step: 192/470, loss: 0.011187209747731686 2023-01-24 06:35:12.098253: step: 194/470, loss: 0.004178525879979134 2023-01-24 06:35:12.843379: step: 196/470, loss: 0.033812280744314194 2023-01-24 06:35:13.506615: step: 198/470, loss: 0.009949339553713799 2023-01-24 06:35:14.206680: step: 200/470, loss: 0.016883907839655876 2023-01-24 06:35:14.961731: step: 202/470, loss: 0.000815562263596803 2023-01-24 06:35:15.737469: step: 204/470, loss: 0.010811416432261467 2023-01-24 06:35:16.459440: step: 206/470, loss: 0.029568077996373177 2023-01-24 06:35:17.226039: step: 208/470, loss: 0.07659181952476501 2023-01-24 06:35:17.921649: step: 210/470, loss: 0.0033108582720160484 2023-01-24 06:35:18.676488: step: 212/470, loss: 0.011148279532790184 2023-01-24 06:35:19.472154: step: 214/470, loss: 0.07719606906175613 2023-01-24 06:35:20.159567: step: 216/470, loss: 0.00038251784280873835 2023-01-24 06:35:20.824203: step: 218/470, loss: 0.002361322520300746 2023-01-24 06:35:21.556591: step: 220/470, loss: 0.005027166102081537 2023-01-24 06:35:22.382192: step: 222/470, loss: 0.005157764069736004 2023-01-24 06:35:23.047698: step: 224/470, loss: 0.001214994816109538 2023-01-24 06:35:23.918382: step: 226/470, loss: 0.016031332314014435 2023-01-24 06:35:24.572269: step: 228/470, loss: 0.0022524199448525906 2023-01-24 06:35:25.358622: step: 230/470, loss: 0.011084591038525105 2023-01-24 06:35:25.986367: step: 232/470, loss: 0.0007433760329149663 2023-01-24 06:35:26.671348: step: 234/470, loss: 0.000476872461149469 2023-01-24 06:35:27.356141: step: 236/470, loss: 0.007138427346944809 2023-01-24 06:35:28.043291: step: 238/470, loss: 0.010517815127968788 2023-01-24 06:35:28.783640: step: 240/470, loss: 0.007859362289309502 2023-01-24 06:35:29.493548: step: 242/470, loss: 0.002640694146975875 2023-01-24 06:35:30.260453: step: 244/470, loss: 0.0009882624726742506 2023-01-24 06:35:31.008713: step: 246/470, loss: 0.41087213158607483 2023-01-24 06:35:31.693332: step: 248/470, loss: 0.003996912389993668 2023-01-24 06:35:32.437268: step: 250/470, loss: 0.0006755517679266632 2023-01-24 06:35:33.159977: step: 252/470, loss: 0.7485092878341675 2023-01-24 06:35:33.862726: step: 254/470, loss: 0.014115111902356148 2023-01-24 06:35:34.622965: step: 256/470, loss: 0.0011114904191344976 2023-01-24 06:35:35.365390: step: 258/470, loss: 0.003493404248729348 2023-01-24 06:35:36.055623: step: 260/470, loss: 0.005939795169979334 2023-01-24 06:35:36.777156: step: 262/470, loss: 0.002627335721626878 2023-01-24 06:35:37.535194: step: 264/470, loss: 0.005035010632127523 2023-01-24 06:35:38.233284: step: 266/470, loss: 0.0006927988724783063 2023-01-24 06:35:38.997408: step: 268/470, loss: 0.0007211231859400868 2023-01-24 06:35:39.729556: step: 270/470, loss: 0.011166412383317947 2023-01-24 06:35:40.352504: step: 272/470, loss: 0.001943384064361453 2023-01-24 06:35:41.187343: step: 274/470, loss: 0.0034223003312945366 2023-01-24 06:35:41.876138: step: 276/470, loss: 0.0008902418776415288 2023-01-24 06:35:42.540149: step: 278/470, loss: 1.600859104655683e-05 2023-01-24 06:35:43.282264: step: 280/470, loss: 0.009566079825162888 2023-01-24 06:35:43.983833: step: 282/470, loss: 0.014527286402881145 2023-01-24 06:35:44.658176: step: 284/470, loss: 0.0004398828314151615 2023-01-24 06:35:45.404970: step: 286/470, loss: 0.0003213112649973482 2023-01-24 06:35:46.098903: step: 288/470, loss: 0.015395899303257465 2023-01-24 06:35:46.835920: step: 290/470, loss: 0.0060600400902330875 2023-01-24 06:35:47.471874: step: 292/470, loss: 0.043014369904994965 2023-01-24 06:35:48.258309: step: 294/470, loss: 0.0011471402831375599 2023-01-24 06:35:48.999011: step: 296/470, loss: 0.005423126742243767 2023-01-24 06:35:49.672849: step: 298/470, loss: 0.002312835305929184 2023-01-24 06:35:50.480465: step: 300/470, loss: 3.738845043699257e-05 2023-01-24 06:35:51.233377: step: 302/470, loss: 0.0018065288895741105 2023-01-24 06:35:51.974292: step: 304/470, loss: 0.0027574519626796246 2023-01-24 06:35:52.699648: step: 306/470, loss: 0.01440048310905695 2023-01-24 06:35:53.337505: step: 308/470, loss: 0.0008997022523544729 2023-01-24 06:35:53.982989: step: 310/470, loss: 6.702099199173972e-05 2023-01-24 06:35:54.778010: step: 312/470, loss: 0.0026232399977743626 2023-01-24 06:35:55.544971: step: 314/470, loss: 0.024350032210350037 2023-01-24 06:35:56.292708: step: 316/470, loss: 0.001658335910178721 2023-01-24 06:35:57.156375: step: 318/470, loss: 0.17800642549991608 2023-01-24 06:35:58.004159: step: 320/470, loss: 0.007545904256403446 2023-01-24 06:35:58.706050: step: 322/470, loss: 0.005511886440217495 2023-01-24 06:35:59.463778: step: 324/470, loss: 0.00106968788895756 2023-01-24 06:36:00.230797: step: 326/470, loss: 0.009400931186974049 2023-01-24 06:36:01.012522: step: 328/470, loss: 0.09563762694597244 2023-01-24 06:36:01.836076: step: 330/470, loss: 0.032428767532110214 2023-01-24 06:36:02.526017: step: 332/470, loss: 0.0007735613035038114 2023-01-24 06:36:03.249690: step: 334/470, loss: 0.0012304666452109814 2023-01-24 06:36:03.971187: step: 336/470, loss: 0.009064443409442902 2023-01-24 06:36:04.686541: step: 338/470, loss: 0.013549595139920712 2023-01-24 06:36:05.325853: step: 340/470, loss: 0.005491095595061779 2023-01-24 06:36:06.071651: step: 342/470, loss: 0.05642802268266678 2023-01-24 06:36:06.837166: step: 344/470, loss: 0.0018173677381128073 2023-01-24 06:36:07.612600: step: 346/470, loss: 5.139792442321777 2023-01-24 06:36:08.334648: step: 348/470, loss: 0.02769005112349987 2023-01-24 06:36:09.030758: step: 350/470, loss: 0.000412216002587229 2023-01-24 06:36:09.738447: step: 352/470, loss: 0.003857748582959175 2023-01-24 06:36:10.430559: step: 354/470, loss: 2.7695212338585407e-05 2023-01-24 06:36:11.137129: step: 356/470, loss: 0.018569406121969223 2023-01-24 06:36:11.783580: step: 358/470, loss: 0.012955770827829838 2023-01-24 06:36:12.560724: step: 360/470, loss: 0.010661961510777473 2023-01-24 06:36:13.253126: step: 362/470, loss: 0.00027638301253318787 2023-01-24 06:36:13.993132: step: 364/470, loss: 0.007698203437030315 2023-01-24 06:36:14.770298: step: 366/470, loss: 0.04173200950026512 2023-01-24 06:36:15.505947: step: 368/470, loss: 0.024436986073851585 2023-01-24 06:36:16.263699: step: 370/470, loss: 0.01597761921584606 2023-01-24 06:36:16.984646: step: 372/470, loss: 0.0023119584657251835 2023-01-24 06:36:17.720370: step: 374/470, loss: 0.014497130177915096 2023-01-24 06:36:18.470696: step: 376/470, loss: 0.050365082919597626 2023-01-24 06:36:19.266112: step: 378/470, loss: 0.0003797081299126148 2023-01-24 06:36:19.933484: step: 380/470, loss: 0.008779437281191349 2023-01-24 06:36:20.719076: step: 382/470, loss: 0.0028053114656358957 2023-01-24 06:36:21.406621: step: 384/470, loss: 0.03160949423909187 2023-01-24 06:36:22.079263: step: 386/470, loss: 0.0006880006403662264 2023-01-24 06:36:22.835512: step: 388/470, loss: 0.04208315163850784 2023-01-24 06:36:23.524834: step: 390/470, loss: 0.048570651561021805 2023-01-24 06:36:24.199711: step: 392/470, loss: 0.00653655594214797 2023-01-24 06:36:24.981080: step: 394/470, loss: 0.007898114621639252 2023-01-24 06:36:25.634276: step: 396/470, loss: 0.004265481140464544 2023-01-24 06:36:26.338000: step: 398/470, loss: 0.01859181746840477 2023-01-24 06:36:27.067668: step: 400/470, loss: 0.08174117654561996 2023-01-24 06:36:27.818848: step: 402/470, loss: 0.0019423263147473335 2023-01-24 06:36:28.624782: step: 404/470, loss: 0.8433279991149902 2023-01-24 06:36:29.334175: step: 406/470, loss: 0.01536853052675724 2023-01-24 06:36:30.086047: step: 408/470, loss: 0.0003358535177540034 2023-01-24 06:36:30.930249: step: 410/470, loss: 0.0031478151213377714 2023-01-24 06:36:31.649094: step: 412/470, loss: 0.013405255042016506 2023-01-24 06:36:32.439687: step: 414/470, loss: 0.016643131151795387 2023-01-24 06:36:33.294143: step: 416/470, loss: 0.02678093872964382 2023-01-24 06:36:33.979609: step: 418/470, loss: 0.020490285009145737 2023-01-24 06:36:34.710185: step: 420/470, loss: 0.0006113100098446012 2023-01-24 06:36:35.437545: step: 422/470, loss: 0.002850792370736599 2023-01-24 06:36:36.087691: step: 424/470, loss: 0.006809963844716549 2023-01-24 06:36:36.752659: step: 426/470, loss: 0.004680205602198839 2023-01-24 06:36:37.515071: step: 428/470, loss: 0.05235651135444641 2023-01-24 06:36:38.130733: step: 430/470, loss: 0.0019032071577385068 2023-01-24 06:36:38.890253: step: 432/470, loss: 0.002714117057621479 2023-01-24 06:36:39.573714: step: 434/470, loss: 4.397636803332716e-05 2023-01-24 06:36:40.305357: step: 436/470, loss: 0.004602306988090277 2023-01-24 06:36:41.010427: step: 438/470, loss: 0.002536438638344407 2023-01-24 06:36:41.635479: step: 440/470, loss: 0.0079796202480793 2023-01-24 06:36:42.333197: step: 442/470, loss: 0.0008243197808042169 2023-01-24 06:36:43.077448: step: 444/470, loss: 0.5589166283607483 2023-01-24 06:36:43.796383: step: 446/470, loss: 0.0018179480684921145 2023-01-24 06:36:44.444191: step: 448/470, loss: 0.0001185851579066366 2023-01-24 06:36:45.184383: step: 450/470, loss: 0.0011897934600710869 2023-01-24 06:36:45.934185: step: 452/470, loss: 0.00027267372934147716 2023-01-24 06:36:46.681218: step: 454/470, loss: 0.0025439695455133915 2023-01-24 06:36:47.350519: step: 456/470, loss: 0.002703068545088172 2023-01-24 06:36:48.096290: step: 458/470, loss: 0.005375206470489502 2023-01-24 06:36:48.763366: step: 460/470, loss: 0.19524531066417694 2023-01-24 06:36:49.462833: step: 462/470, loss: 0.010033880360424519 2023-01-24 06:36:50.223331: step: 464/470, loss: 0.012586308643221855 2023-01-24 06:36:50.962734: step: 466/470, loss: 0.0472489595413208 2023-01-24 06:36:51.640387: step: 468/470, loss: 0.0008478930103592575 2023-01-24 06:36:52.376437: step: 470/470, loss: 0.11491312831640244 2023-01-24 06:36:53.132378: step: 472/470, loss: 0.009833801537752151 2023-01-24 06:36:53.868155: step: 474/470, loss: 0.0006098474841564894 2023-01-24 06:36:54.560025: step: 476/470, loss: 0.008477217517793179 2023-01-24 06:36:55.338392: step: 478/470, loss: 0.01668260060250759 2023-01-24 06:36:56.017888: step: 480/470, loss: 0.0023414173629134893 2023-01-24 06:36:56.742177: step: 482/470, loss: 0.00018246278341393918 2023-01-24 06:36:57.406902: step: 484/470, loss: 0.12619908154010773 2023-01-24 06:36:58.150639: step: 486/470, loss: 0.2414586991071701 2023-01-24 06:36:58.765349: step: 488/470, loss: 0.010544035583734512 2023-01-24 06:36:59.437903: step: 490/470, loss: 0.005296720191836357 2023-01-24 06:37:00.124841: step: 492/470, loss: 0.00037122564390301704 2023-01-24 06:37:00.856776: step: 494/470, loss: 0.03093797340989113 2023-01-24 06:37:01.613465: step: 496/470, loss: 0.008792520500719547 2023-01-24 06:37:02.354588: step: 498/470, loss: 0.00578495254740119 2023-01-24 06:37:03.063422: step: 500/470, loss: 0.001827099360525608 2023-01-24 06:37:03.825065: step: 502/470, loss: 0.0032651459332555532 2023-01-24 06:37:04.510498: step: 504/470, loss: 0.006231415551155806 2023-01-24 06:37:05.248568: step: 506/470, loss: 0.007668264210224152 2023-01-24 06:37:05.985914: step: 508/470, loss: 0.008598407730460167 2023-01-24 06:37:06.699728: step: 510/470, loss: 0.00026979786343872547 2023-01-24 06:37:07.415436: step: 512/470, loss: 0.0024315055925399065 2023-01-24 06:37:08.105308: step: 514/470, loss: 0.004124908242374659 2023-01-24 06:37:08.807749: step: 516/470, loss: 0.016314754262566566 2023-01-24 06:37:09.505085: step: 518/470, loss: 0.0018118376610800624 2023-01-24 06:37:10.323189: step: 520/470, loss: 0.015621883794665337 2023-01-24 06:37:11.075607: step: 522/470, loss: 0.9467869400978088 2023-01-24 06:37:11.882804: step: 524/470, loss: 0.0005159341963008046 2023-01-24 06:37:12.550163: step: 526/470, loss: 0.0463469959795475 2023-01-24 06:37:13.239696: step: 528/470, loss: 0.006032614037394524 2023-01-24 06:37:13.988043: step: 530/470, loss: 0.0176707673817873 2023-01-24 06:37:14.703000: step: 532/470, loss: 0.0011732151033356786 2023-01-24 06:37:15.420225: step: 534/470, loss: 0.00020226027118042111 2023-01-24 06:37:16.158636: step: 536/470, loss: 0.021612750366330147 2023-01-24 06:37:16.870918: step: 538/470, loss: 0.00013169506564736366 2023-01-24 06:37:17.587149: step: 540/470, loss: 0.003677097614854574 2023-01-24 06:37:18.263134: step: 542/470, loss: 0.0020855306647717953 2023-01-24 06:37:18.984512: step: 544/470, loss: 0.0180249884724617 2023-01-24 06:37:19.665192: step: 546/470, loss: 0.40944904088974 2023-01-24 06:37:20.369705: step: 548/470, loss: 0.03017675317823887 2023-01-24 06:37:21.095546: step: 550/470, loss: 0.0034764918964356184 2023-01-24 06:37:21.867491: step: 552/470, loss: 0.034397684037685394 2023-01-24 06:37:22.657378: step: 554/470, loss: 0.03786651790142059 2023-01-24 06:37:23.375886: step: 556/470, loss: 5.748906914959662e-05 2023-01-24 06:37:24.101880: step: 558/470, loss: 0.010671430267393589 2023-01-24 06:37:24.707476: step: 560/470, loss: 0.004713733680546284 2023-01-24 06:37:25.497012: step: 562/470, loss: 0.009778480976819992 2023-01-24 06:37:26.267663: step: 564/470, loss: 0.030256683006882668 2023-01-24 06:37:27.032138: step: 566/470, loss: 0.16448631882667542 2023-01-24 06:37:27.736292: step: 568/470, loss: 0.031042061746120453 2023-01-24 06:37:28.378443: step: 570/470, loss: 0.0012754781637340784 2023-01-24 06:37:29.095836: step: 572/470, loss: 4.709012864623219e-05 2023-01-24 06:37:29.845422: step: 574/470, loss: 0.0009633488371036947 2023-01-24 06:37:30.530211: step: 576/470, loss: 0.004429661203175783 2023-01-24 06:37:31.274392: step: 578/470, loss: 0.002976109506562352 2023-01-24 06:37:31.973188: step: 580/470, loss: 7.336642738664523e-05 2023-01-24 06:37:32.711729: step: 582/470, loss: 0.002431001979857683 2023-01-24 06:37:33.466317: step: 584/470, loss: 0.0037843480240553617 2023-01-24 06:37:34.135056: step: 586/470, loss: 0.010521448217332363 2023-01-24 06:37:34.891838: step: 588/470, loss: 0.025577588006854057 2023-01-24 06:37:35.678676: step: 590/470, loss: 1.7032889445545152e-05 2023-01-24 06:37:36.356140: step: 592/470, loss: 0.009300955571234226 2023-01-24 06:37:37.042498: step: 594/470, loss: 0.0038660001009702682 2023-01-24 06:37:37.777661: step: 596/470, loss: 0.015712929889559746 2023-01-24 06:37:38.499079: step: 598/470, loss: 0.0012648508418351412 2023-01-24 06:37:39.128857: step: 600/470, loss: 1.5697081835241988e-05 2023-01-24 06:37:39.912353: step: 602/470, loss: 0.004670980852097273 2023-01-24 06:37:40.640334: step: 604/470, loss: 0.0012620283523574471 2023-01-24 06:37:41.510435: step: 606/470, loss: 0.03995297849178314 2023-01-24 06:37:42.246001: step: 608/470, loss: 0.0038986399304121733 2023-01-24 06:37:42.901139: step: 610/470, loss: 0.00033180887112393975 2023-01-24 06:37:43.650234: step: 612/470, loss: 0.2208339273929596 2023-01-24 06:37:44.348646: step: 614/470, loss: 0.005182696972042322 2023-01-24 06:37:45.029087: step: 616/470, loss: 0.05351833254098892 2023-01-24 06:37:45.703527: step: 618/470, loss: 0.002840483095496893 2023-01-24 06:37:46.453129: step: 620/470, loss: 0.02025892585515976 2023-01-24 06:37:47.157156: step: 622/470, loss: 0.0013204108690842986 2023-01-24 06:37:47.920163: step: 624/470, loss: 0.013018092140555382 2023-01-24 06:37:48.634094: step: 626/470, loss: 0.012406433932483196 2023-01-24 06:37:49.395114: step: 628/470, loss: 0.05976390466094017 2023-01-24 06:37:50.127011: step: 630/470, loss: 0.026684027165174484 2023-01-24 06:37:50.825873: step: 632/470, loss: 0.02948051132261753 2023-01-24 06:37:51.553548: step: 634/470, loss: 0.007042787969112396 2023-01-24 06:37:52.228914: step: 636/470, loss: 0.0019455266883596778 2023-01-24 06:37:52.997208: step: 638/470, loss: 0.0003431853256188333 2023-01-24 06:37:53.737080: step: 640/470, loss: 7.0864763983991e-05 2023-01-24 06:37:54.485454: step: 642/470, loss: 0.0003391270583961159 2023-01-24 06:37:55.221021: step: 644/470, loss: 4.537831409834325e-05 2023-01-24 06:37:55.891069: step: 646/470, loss: 0.0003427540068514645 2023-01-24 06:37:56.687139: step: 648/470, loss: 0.0021758859511464834 2023-01-24 06:37:57.395733: step: 650/470, loss: 0.005326189566403627 2023-01-24 06:37:58.173255: step: 652/470, loss: 0.0031783096492290497 2023-01-24 06:37:58.892647: step: 654/470, loss: 0.008024906739592552 2023-01-24 06:37:59.551775: step: 656/470, loss: 0.02158765122294426 2023-01-24 06:38:00.239200: step: 658/470, loss: 0.002388355555012822 2023-01-24 06:38:00.925258: step: 660/470, loss: 0.06495095044374466 2023-01-24 06:38:01.646969: step: 662/470, loss: 8.486651495331898e-05 2023-01-24 06:38:02.386686: step: 664/470, loss: 0.0028081880882382393 2023-01-24 06:38:03.101184: step: 666/470, loss: 0.0018558679148554802 2023-01-24 06:38:03.929982: step: 668/470, loss: 0.06942654401063919 2023-01-24 06:38:04.673739: step: 670/470, loss: 0.07627329230308533 2023-01-24 06:38:05.373162: step: 672/470, loss: 0.001862462260760367 2023-01-24 06:38:06.094454: step: 674/470, loss: 0.05258966609835625 2023-01-24 06:38:06.819261: step: 676/470, loss: 0.0007247717585414648 2023-01-24 06:38:07.496633: step: 678/470, loss: 0.018573222681879997 2023-01-24 06:38:08.273365: step: 680/470, loss: 0.0053161317482590675 2023-01-24 06:38:08.979524: step: 682/470, loss: 0.00019386372878216207 2023-01-24 06:38:09.650123: step: 684/470, loss: 0.13742390275001526 2023-01-24 06:38:10.422225: step: 686/470, loss: 0.0006456512492150068 2023-01-24 06:38:11.195344: step: 688/470, loss: 0.18807780742645264 2023-01-24 06:38:11.970366: step: 690/470, loss: 0.028313491493463516 2023-01-24 06:38:12.677110: step: 692/470, loss: 0.0024669389240443707 2023-01-24 06:38:13.442259: step: 694/470, loss: 0.03522627428174019 2023-01-24 06:38:14.174952: step: 696/470, loss: 0.010675939731299877 2023-01-24 06:38:14.835092: step: 698/470, loss: 0.0003027912462130189 2023-01-24 06:38:15.533535: step: 700/470, loss: 0.002517101587727666 2023-01-24 06:38:16.153596: step: 702/470, loss: 0.014464635401964188 2023-01-24 06:38:16.799101: step: 704/470, loss: 0.0031239360105246305 2023-01-24 06:38:17.501394: step: 706/470, loss: 0.00918420311063528 2023-01-24 06:38:18.210229: step: 708/470, loss: 0.001296902191825211 2023-01-24 06:38:18.959469: step: 710/470, loss: 0.022592980414628983 2023-01-24 06:38:19.737978: step: 712/470, loss: 0.00858447514474392 2023-01-24 06:38:20.464367: step: 714/470, loss: 0.0009968002559617162 2023-01-24 06:38:21.305589: step: 716/470, loss: 0.01551523432135582 2023-01-24 06:38:22.009693: step: 718/470, loss: 0.0007260640268214047 2023-01-24 06:38:22.749622: step: 720/470, loss: 0.35326698422431946 2023-01-24 06:38:23.483240: step: 722/470, loss: 0.01254792045801878 2023-01-24 06:38:24.163724: step: 724/470, loss: 0.0011204167967662215 2023-01-24 06:38:24.968725: step: 726/470, loss: 0.009652595967054367 2023-01-24 06:38:25.747390: step: 728/470, loss: 0.0007612319895997643 2023-01-24 06:38:26.485223: step: 730/470, loss: 0.08440537005662918 2023-01-24 06:38:27.187694: step: 732/470, loss: 0.0013828004011884332 2023-01-24 06:38:27.889483: step: 734/470, loss: 0.03739370405673981 2023-01-24 06:38:28.687330: step: 736/470, loss: 0.009630827233195305 2023-01-24 06:38:29.388172: step: 738/470, loss: 0.002095034345984459 2023-01-24 06:38:30.195232: step: 740/470, loss: 0.010538318194448948 2023-01-24 06:38:30.931106: step: 742/470, loss: 0.014126413501799107 2023-01-24 06:38:31.546383: step: 744/470, loss: 4.5405220589600503e-05 2023-01-24 06:38:32.324030: step: 746/470, loss: 0.9865442514419556 2023-01-24 06:38:33.094496: step: 748/470, loss: 0.7587553262710571 2023-01-24 06:38:33.766095: step: 750/470, loss: 0.004690216854214668 2023-01-24 06:38:34.502152: step: 752/470, loss: 0.016793884336948395 2023-01-24 06:38:35.214904: step: 754/470, loss: 0.003269862150773406 2023-01-24 06:38:35.933884: step: 756/470, loss: 0.018169786781072617 2023-01-24 06:38:36.650223: step: 758/470, loss: 0.02258247695863247 2023-01-24 06:38:37.338316: step: 760/470, loss: 5.565112587646581e-05 2023-01-24 06:38:38.011760: step: 762/470, loss: 0.009303715080022812 2023-01-24 06:38:38.790653: step: 764/470, loss: 0.017047366127371788 2023-01-24 06:38:39.546611: step: 766/470, loss: 0.015494248829782009 2023-01-24 06:38:40.270024: step: 768/470, loss: 0.026896020397543907 2023-01-24 06:38:40.981412: step: 770/470, loss: 0.005921604577451944 2023-01-24 06:38:41.748442: step: 772/470, loss: 0.001984368311241269 2023-01-24 06:38:42.432307: step: 774/470, loss: 0.002783828182145953 2023-01-24 06:38:43.158571: step: 776/470, loss: 0.019098268821835518 2023-01-24 06:38:43.981193: step: 778/470, loss: 0.005505918525159359 2023-01-24 06:38:44.721074: step: 780/470, loss: 0.24571584165096283 2023-01-24 06:38:45.399386: step: 782/470, loss: 0.0006462166784331203 2023-01-24 06:38:46.075398: step: 784/470, loss: 0.0012267071288079023 2023-01-24 06:38:46.749599: step: 786/470, loss: 0.004911855328828096 2023-01-24 06:38:47.466560: step: 788/470, loss: 0.011736730113625526 2023-01-24 06:38:48.179363: step: 790/470, loss: 0.00543177267536521 2023-01-24 06:38:49.101512: step: 792/470, loss: 0.024822987616062164 2023-01-24 06:38:49.874692: step: 794/470, loss: 0.011290385387837887 2023-01-24 06:38:50.621385: step: 796/470, loss: 0.0001863948127720505 2023-01-24 06:38:51.435202: step: 798/470, loss: 0.0004333317338023335 2023-01-24 06:38:52.112032: step: 800/470, loss: 0.0003069574595429003 2023-01-24 06:38:52.840546: step: 802/470, loss: 0.001023111748509109 2023-01-24 06:38:53.605037: step: 804/470, loss: 0.002765122102573514 2023-01-24 06:38:54.297817: step: 806/470, loss: 0.0706728845834732 2023-01-24 06:38:55.121848: step: 808/470, loss: 0.001774181961081922 2023-01-24 06:38:55.866790: step: 810/470, loss: 0.0005504356813617051 2023-01-24 06:38:56.744724: step: 812/470, loss: 0.024189863353967667 2023-01-24 06:38:57.489519: step: 814/470, loss: 0.019002093002200127 2023-01-24 06:38:58.232820: step: 816/470, loss: 0.0006984842475503683 2023-01-24 06:38:58.919458: step: 818/470, loss: 0.0062248483300209045 2023-01-24 06:38:59.695409: step: 820/470, loss: 0.0006082578329369426 2023-01-24 06:39:00.475194: step: 822/470, loss: 0.0046651409938931465 2023-01-24 06:39:01.166217: step: 824/470, loss: 0.0008501000702381134 2023-01-24 06:39:01.822002: step: 826/470, loss: 0.008006863296031952 2023-01-24 06:39:02.544204: step: 828/470, loss: 0.0064245061948895454 2023-01-24 06:39:03.187083: step: 830/470, loss: 0.0001547907741041854 2023-01-24 06:39:03.847842: step: 832/470, loss: 0.0005969268968328834 2023-01-24 06:39:04.587270: step: 834/470, loss: 0.04945269599556923 2023-01-24 06:39:05.402939: step: 836/470, loss: 0.0009366283193230629 2023-01-24 06:39:06.168868: step: 838/470, loss: 0.0020159182604402304 2023-01-24 06:39:06.885359: step: 840/470, loss: 0.14137539267539978 2023-01-24 06:39:07.661977: step: 842/470, loss: 0.012791264802217484 2023-01-24 06:39:08.299943: step: 844/470, loss: 0.0020705685019493103 2023-01-24 06:39:09.062310: step: 846/470, loss: 0.006297953426837921 2023-01-24 06:39:09.835587: step: 848/470, loss: 0.01519366167485714 2023-01-24 06:39:10.632717: step: 850/470, loss: 0.001254754257388413 2023-01-24 06:39:11.455890: step: 852/470, loss: 0.00023478205548599362 2023-01-24 06:39:12.178387: step: 854/470, loss: 0.004564228001981974 2023-01-24 06:39:12.861826: step: 856/470, loss: 0.0048842052929103374 2023-01-24 06:39:13.545406: step: 858/470, loss: 0.020877385511994362 2023-01-24 06:39:14.296510: step: 860/470, loss: 0.18510141968727112 2023-01-24 06:39:15.063971: step: 862/470, loss: 0.011671909131109715 2023-01-24 06:39:15.803077: step: 864/470, loss: 0.02993558533489704 2023-01-24 06:39:16.467451: step: 866/470, loss: 0.0056413402780890465 2023-01-24 06:39:17.186058: step: 868/470, loss: 0.009789801202714443 2023-01-24 06:39:17.956753: step: 870/470, loss: 0.029731469228863716 2023-01-24 06:39:18.698887: step: 872/470, loss: 0.01826680265367031 2023-01-24 06:39:19.424492: step: 874/470, loss: 0.0003056666173506528 2023-01-24 06:39:20.198391: step: 876/470, loss: 0.007411897648125887 2023-01-24 06:39:21.069953: step: 878/470, loss: 0.00305022019892931 2023-01-24 06:39:21.753254: step: 880/470, loss: 5.6819328165147454e-05 2023-01-24 06:39:22.497320: step: 882/470, loss: 0.0015559961320832372 2023-01-24 06:39:23.264837: step: 884/470, loss: 0.03191646188497543 2023-01-24 06:39:24.089319: step: 886/470, loss: 0.00044945150148123503 2023-01-24 06:39:24.784286: step: 888/470, loss: 0.00564240338280797 2023-01-24 06:39:25.506472: step: 890/470, loss: 0.0024683803785592318 2023-01-24 06:39:26.242004: step: 892/470, loss: 0.025083282962441444 2023-01-24 06:39:26.949596: step: 894/470, loss: 0.007105534430593252 2023-01-24 06:39:27.618894: step: 896/470, loss: 0.0048616016283631325 2023-01-24 06:39:28.389002: step: 898/470, loss: 0.03480706736445427 2023-01-24 06:39:29.180991: step: 900/470, loss: 0.0010672089410945773 2023-01-24 06:39:29.961926: step: 902/470, loss: 0.006357176695019007 2023-01-24 06:39:30.698005: step: 904/470, loss: 0.002066312823444605 2023-01-24 06:39:31.392521: step: 906/470, loss: 9.631262946641073e-05 2023-01-24 06:39:32.100206: step: 908/470, loss: 0.001976320054382086 2023-01-24 06:39:33.023341: step: 910/470, loss: 0.00038530846359208226 2023-01-24 06:39:33.774626: step: 912/470, loss: 0.0007559002260677516 2023-01-24 06:39:34.501838: step: 914/470, loss: 0.004526065196841955 2023-01-24 06:39:35.264496: step: 916/470, loss: 0.0027980429586023092 2023-01-24 06:39:36.026714: step: 918/470, loss: 0.001787687069736421 2023-01-24 06:39:36.749997: step: 920/470, loss: 0.0017783924704417586 2023-01-24 06:39:37.482415: step: 922/470, loss: 0.010765299201011658 2023-01-24 06:39:38.349889: step: 924/470, loss: 0.005860594101250172 2023-01-24 06:39:39.102419: step: 926/470, loss: 0.009120491333305836 2023-01-24 06:39:39.918286: step: 928/470, loss: 0.051197804510593414 2023-01-24 06:39:40.818989: step: 930/470, loss: 0.1064954325556755 2023-01-24 06:39:41.534282: step: 932/470, loss: 0.008499711751937866 2023-01-24 06:39:42.242517: step: 934/470, loss: 0.03420386090874672 2023-01-24 06:39:43.011181: step: 936/470, loss: 0.001930433209054172 2023-01-24 06:39:43.742758: step: 938/470, loss: 0.06531298905611038 2023-01-24 06:39:44.410810: step: 940/470, loss: 0.048529766499996185 2023-01-24 06:39:44.999169: step: 942/470, loss: 0.0032194419763982296 ================================================== Loss: 0.043 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32540717900063254, 'r': 0.32540717900063254, 'f1': 0.32540717900063254}, 'combined': 0.23977371084257132, 'epoch': 36} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3575342246374415, 'r': 0.35340882973777876, 'f1': 0.3554595579567601}, 'combined': 0.23697303863784, 'epoch': 36} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32211879416208694, 'r': 0.32517494970442173, 'f1': 0.3236396572128994}, 'combined': 0.23847132636739954, 'epoch': 36} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35427441644897656, 'r': 0.3478020953792356, 'f1': 0.35100842231383317}, 'combined': 0.23400561487588872, 'epoch': 36} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30395153985507245, 'r': 0.31837049335863377, 'f1': 0.31099397590361444}, 'combined': 0.22915345592897904, 'epoch': 36} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3506173343423147, 'r': 0.3651140125891604, 'f1': 0.3577188630171708}, 'combined': 0.23847924201144716, 'epoch': 36} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25290697674418605, 'r': 0.3107142857142857, 'f1': 0.27884615384615385}, 'combined': 0.1858974358974359, 'epoch': 36} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5588235294117647, 'r': 0.41304347826086957, 'f1': 0.475}, 'combined': 0.31666666666666665, 'epoch': 36} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4633458646616541, 'r': 0.22368421052631576, 'f1': 0.3017135862913096}, 'combined': 0.20114239086087307, 'epoch': 36} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35514455782312926, 'r': 0.3302103099304238, 'f1': 0.34222386102917074}, 'combined': 0.2521649502320205, 'epoch': 34} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36324986426135314, 'r': 0.3573121260955425, 'f1': 0.3602565304307942}, 'combined': 0.24017102028719609, 'epoch': 34} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2869318181818182, 'r': 0.3607142857142857, 'f1': 0.319620253164557}, 'combined': 0.21308016877637131, 'epoch': 34} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3245705997242647, 'r': 0.31533234736019644, 'f1': 0.31988478740870746}, 'combined': 0.2357045801958897, 'epoch': 31} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35151688133309544, 'r': 0.34577093231130446, 'f1': 0.34862023228672484}, 'combined': 0.23241348819114985, 'epoch': 31} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6153846153846154, 'r': 0.27586206896551724, 'f1': 0.380952380952381}, 'combined': 0.25396825396825395, 'epoch': 31} ****************************** Epoch: 37 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:42:19.192260: step: 2/470, loss: 0.0016411297256127 2023-01-24 06:42:20.068495: step: 4/470, loss: 0.016578450798988342 2023-01-24 06:42:20.838674: step: 6/470, loss: 0.00037532756687141955 2023-01-24 06:42:21.561308: step: 8/470, loss: 0.0027909998316317797 2023-01-24 06:42:22.289623: step: 10/470, loss: 0.08526574820280075 2023-01-24 06:42:23.022047: step: 12/470, loss: 0.00015779869863763452 2023-01-24 06:42:23.731339: step: 14/470, loss: 0.0017313596326857805 2023-01-24 06:42:24.477299: step: 16/470, loss: 0.00044223369332030416 2023-01-24 06:42:25.209457: step: 18/470, loss: 0.06428509205579758 2023-01-24 06:42:25.994194: step: 20/470, loss: 0.00421540392562747 2023-01-24 06:42:26.702373: step: 22/470, loss: 2.9487378924386576e-05 2023-01-24 06:42:27.515856: step: 24/470, loss: 0.0027779489755630493 2023-01-24 06:42:28.184159: step: 26/470, loss: 0.00016546357073821127 2023-01-24 06:42:28.837991: step: 28/470, loss: 0.006498188711702824 2023-01-24 06:42:29.548953: step: 30/470, loss: 0.0014709843089804053 2023-01-24 06:42:30.315722: step: 32/470, loss: 0.0002651438699103892 2023-01-24 06:42:31.009428: step: 34/470, loss: 0.7554433941841125 2023-01-24 06:42:31.682017: step: 36/470, loss: 0.00971017125993967 2023-01-24 06:42:32.408316: step: 38/470, loss: 0.002934138523414731 2023-01-24 06:42:33.077155: step: 40/470, loss: 0.00081524538109079 2023-01-24 06:42:33.718922: step: 42/470, loss: 0.003965584561228752 2023-01-24 06:42:34.455374: step: 44/470, loss: 0.26607316732406616 2023-01-24 06:42:35.182219: step: 46/470, loss: 0.37619858980178833 2023-01-24 06:42:35.991272: step: 48/470, loss: 0.20665885508060455 2023-01-24 06:42:36.754157: step: 50/470, loss: 0.0007290860521607101 2023-01-24 06:42:37.513642: step: 52/470, loss: 0.032187577337026596 2023-01-24 06:42:38.213443: step: 54/470, loss: 0.006359893828630447 2023-01-24 06:42:38.969446: step: 56/470, loss: 0.000407334475312382 2023-01-24 06:42:39.747586: step: 58/470, loss: 0.004570923279970884 2023-01-24 06:42:40.451834: step: 60/470, loss: 0.02728239819407463 2023-01-24 06:42:41.286815: step: 62/470, loss: 0.17052745819091797 2023-01-24 06:42:42.021302: step: 64/470, loss: 0.007626243866980076 2023-01-24 06:42:42.729628: step: 66/470, loss: 0.0001989319862332195 2023-01-24 06:42:43.474638: step: 68/470, loss: 0.011886470019817352 2023-01-24 06:42:44.139306: step: 70/470, loss: 0.0021775467321276665 2023-01-24 06:42:44.887000: step: 72/470, loss: 0.0016382178291678429 2023-01-24 06:42:45.670703: step: 74/470, loss: 0.005465012509375811 2023-01-24 06:42:46.457006: step: 76/470, loss: 0.004183304961770773 2023-01-24 06:42:47.175459: step: 78/470, loss: 0.029515385627746582 2023-01-24 06:42:47.929347: step: 80/470, loss: 0.0003540659963618964 2023-01-24 06:42:48.631540: step: 82/470, loss: 0.00043878774158656597 2023-01-24 06:42:49.396239: step: 84/470, loss: 0.0017511650221422315 2023-01-24 06:42:50.111242: step: 86/470, loss: 0.0043872627429664135 2023-01-24 06:42:50.877490: step: 88/470, loss: 0.4596310257911682 2023-01-24 06:42:51.582598: step: 90/470, loss: 0.006851941347122192 2023-01-24 06:42:52.293828: step: 92/470, loss: 0.015803921967744827 2023-01-24 06:42:53.031331: step: 94/470, loss: 0.014629622921347618 2023-01-24 06:42:53.858911: step: 96/470, loss: 0.01233682967722416 2023-01-24 06:42:54.646022: step: 98/470, loss: 0.0026958484668284655 2023-01-24 06:42:55.354948: step: 100/470, loss: 0.005636794026941061 2023-01-24 06:42:56.064981: step: 102/470, loss: 0.00012584813521243632 2023-01-24 06:42:56.831404: step: 104/470, loss: 0.006042279303073883 2023-01-24 06:42:57.480842: step: 106/470, loss: 5.00088935950771e-05 2023-01-24 06:42:58.164933: step: 108/470, loss: 0.0024630522821098566 2023-01-24 06:42:58.921968: step: 110/470, loss: 0.045400720089673996 2023-01-24 06:42:59.648081: step: 112/470, loss: 0.006315591745078564 2023-01-24 06:43:00.424200: step: 114/470, loss: 3.898659269907512e-05 2023-01-24 06:43:01.155438: step: 116/470, loss: 0.0008913876954466105 2023-01-24 06:43:01.923856: step: 118/470, loss: 0.007831503637135029 2023-01-24 06:43:02.739062: step: 120/470, loss: 0.0005501203122548759 2023-01-24 06:43:03.440794: step: 122/470, loss: 0.06952209770679474 2023-01-24 06:43:04.238407: step: 124/470, loss: 0.002455966779962182 2023-01-24 06:43:04.992693: step: 126/470, loss: 0.018089069053530693 2023-01-24 06:43:05.753930: step: 128/470, loss: 0.0002797857450786978 2023-01-24 06:43:06.500267: step: 130/470, loss: 0.0014697941951453686 2023-01-24 06:43:07.293758: step: 132/470, loss: 0.027464300394058228 2023-01-24 06:43:08.077323: step: 134/470, loss: 0.0019714029040187597 2023-01-24 06:43:08.827058: step: 136/470, loss: 0.00593600096181035 2023-01-24 06:43:09.541819: step: 138/470, loss: 0.008636260405182838 2023-01-24 06:43:10.282591: step: 140/470, loss: 0.021064892411231995 2023-01-24 06:43:11.010794: step: 142/470, loss: 2.5779121642699465e-05 2023-01-24 06:43:11.764676: step: 144/470, loss: 1.2797374438378029e-05 2023-01-24 06:43:12.540547: step: 146/470, loss: 0.05897778272628784 2023-01-24 06:43:13.372347: step: 148/470, loss: 0.016995619982481003 2023-01-24 06:43:14.089131: step: 150/470, loss: 0.0013439225731417537 2023-01-24 06:43:14.792326: step: 152/470, loss: 0.056216444820165634 2023-01-24 06:43:15.468707: step: 154/470, loss: 0.0074424357153475285 2023-01-24 06:43:16.210807: step: 156/470, loss: 0.2076435685157776 2023-01-24 06:43:16.926554: step: 158/470, loss: 0.0012701174709945917 2023-01-24 06:43:17.656125: step: 160/470, loss: 0.00368306296877563 2023-01-24 06:43:18.494090: step: 162/470, loss: 0.05626552179455757 2023-01-24 06:43:19.239073: step: 164/470, loss: 0.01470974925905466 2023-01-24 06:43:19.908695: step: 166/470, loss: 0.1756785660982132 2023-01-24 06:43:20.591213: step: 168/470, loss: 0.0030123700853437185 2023-01-24 06:43:21.309044: step: 170/470, loss: 0.002067750785499811 2023-01-24 06:43:22.062197: step: 172/470, loss: 3.085620846832171e-05 2023-01-24 06:43:22.882472: step: 174/470, loss: 0.014414145611226559 2023-01-24 06:43:23.653805: step: 176/470, loss: 0.03165549039840698 2023-01-24 06:43:24.411120: step: 178/470, loss: 0.014181282371282578 2023-01-24 06:43:25.110083: step: 180/470, loss: 1.0798722505569458 2023-01-24 06:43:25.765604: step: 182/470, loss: 0.0001561841054353863 2023-01-24 06:43:26.434039: step: 184/470, loss: 0.015310071408748627 2023-01-24 06:43:27.405224: step: 186/470, loss: 0.022418607026338577 2023-01-24 06:43:28.144303: step: 188/470, loss: 0.07125722616910934 2023-01-24 06:43:28.910736: step: 190/470, loss: 0.013494039885699749 2023-01-24 06:43:29.643780: step: 192/470, loss: 0.015244124457240105 2023-01-24 06:43:30.336989: step: 194/470, loss: 0.01948845013976097 2023-01-24 06:43:31.035391: step: 196/470, loss: 0.00848582573235035 2023-01-24 06:43:31.761788: step: 198/470, loss: 0.049408018589019775 2023-01-24 06:43:32.492739: step: 200/470, loss: 0.003815028350800276 2023-01-24 06:43:33.231445: step: 202/470, loss: 0.05077585205435753 2023-01-24 06:43:34.051818: step: 204/470, loss: 0.0009153272258117795 2023-01-24 06:43:34.824451: step: 206/470, loss: 0.00020322480122558773 2023-01-24 06:43:35.595707: step: 208/470, loss: 0.00020252318063285202 2023-01-24 06:43:36.366185: step: 210/470, loss: 0.0242539644241333 2023-01-24 06:43:37.133001: step: 212/470, loss: 0.029064660891890526 2023-01-24 06:43:37.812389: step: 214/470, loss: 0.0016236408846452832 2023-01-24 06:43:38.533777: step: 216/470, loss: 0.12475449591875076 2023-01-24 06:43:39.168654: step: 218/470, loss: 0.0002371451264480129 2023-01-24 06:43:39.887757: step: 220/470, loss: 0.011168582364916801 2023-01-24 06:43:40.603900: step: 222/470, loss: 0.005518828984349966 2023-01-24 06:43:41.281340: step: 224/470, loss: 0.010224402882158756 2023-01-24 06:43:42.069259: step: 226/470, loss: 0.020215503871440887 2023-01-24 06:43:42.872809: step: 228/470, loss: 0.0007321978337131441 2023-01-24 06:43:43.601516: step: 230/470, loss: 2.0057850633747876e-05 2023-01-24 06:43:44.414811: step: 232/470, loss: 0.04708104953169823 2023-01-24 06:43:45.072171: step: 234/470, loss: 0.0007134419283829629 2023-01-24 06:43:45.782100: step: 236/470, loss: 0.7298058271408081 2023-01-24 06:43:46.442082: step: 238/470, loss: 0.007942304015159607 2023-01-24 06:43:47.161088: step: 240/470, loss: 0.00015898249694146216 2023-01-24 06:43:47.896971: step: 242/470, loss: 0.023740533739328384 2023-01-24 06:43:48.757797: step: 244/470, loss: 0.004855903331190348 2023-01-24 06:43:49.454683: step: 246/470, loss: 0.0001292641827603802 2023-01-24 06:43:50.224778: step: 248/470, loss: 0.0025803775060921907 2023-01-24 06:43:50.943704: step: 250/470, loss: 0.5248620510101318 2023-01-24 06:43:51.690942: step: 252/470, loss: 0.010610873810946941 2023-01-24 06:43:52.356593: step: 254/470, loss: 0.02230040729045868 2023-01-24 06:43:53.072305: step: 256/470, loss: 0.023776421323418617 2023-01-24 06:43:53.770743: step: 258/470, loss: 0.0019985968247056007 2023-01-24 06:43:54.559104: step: 260/470, loss: 0.07004007697105408 2023-01-24 06:43:55.359079: step: 262/470, loss: 0.0001779235026333481 2023-01-24 06:43:56.095684: step: 264/470, loss: 0.00566717516630888 2023-01-24 06:43:56.745544: step: 266/470, loss: 0.00029370139236561954 2023-01-24 06:43:57.464628: step: 268/470, loss: 0.0012920513981953263 2023-01-24 06:43:58.209049: step: 270/470, loss: 0.001267312210984528 2023-01-24 06:43:58.889478: step: 272/470, loss: 0.011984679847955704 2023-01-24 06:43:59.541209: step: 274/470, loss: 0.018051162362098694 2023-01-24 06:44:00.259841: step: 276/470, loss: 0.00023922794207464904 2023-01-24 06:44:01.008514: step: 278/470, loss: 0.016359608620405197 2023-01-24 06:44:01.694767: step: 280/470, loss: 0.011056328192353249 2023-01-24 06:44:02.414112: step: 282/470, loss: 0.01704174093902111 2023-01-24 06:44:03.120830: step: 284/470, loss: 0.0023452085442841053 2023-01-24 06:44:03.780928: step: 286/470, loss: 0.0006594705628231168 2023-01-24 06:44:04.553149: step: 288/470, loss: 0.00027242524083703756 2023-01-24 06:44:05.320395: step: 290/470, loss: 0.006094738841056824 2023-01-24 06:44:06.019941: step: 292/470, loss: 0.0011509230826050043 2023-01-24 06:44:06.790048: step: 294/470, loss: 0.008679484948515892 2023-01-24 06:44:07.566931: step: 296/470, loss: 0.027196824550628662 2023-01-24 06:44:08.218436: step: 298/470, loss: 3.1080591725185513e-05 2023-01-24 06:44:08.970128: step: 300/470, loss: 0.001202079583890736 2023-01-24 06:44:09.691431: step: 302/470, loss: 0.013225025497376919 2023-01-24 06:44:10.471496: step: 304/470, loss: 0.015826869755983353 2023-01-24 06:44:11.129262: step: 306/470, loss: 0.0002728473045863211 2023-01-24 06:44:11.926541: step: 308/470, loss: 0.0064653316512703896 2023-01-24 06:44:12.698051: step: 310/470, loss: 0.005568230524659157 2023-01-24 06:44:13.485497: step: 312/470, loss: 8.150991925504059e-05 2023-01-24 06:44:14.252057: step: 314/470, loss: 0.005682247690856457 2023-01-24 06:44:14.940005: step: 316/470, loss: 0.0011145909084007144 2023-01-24 06:44:15.648060: step: 318/470, loss: 0.0007300268625840545 2023-01-24 06:44:16.360422: step: 320/470, loss: 0.00024394701176788658 2023-01-24 06:44:17.082384: step: 322/470, loss: 0.0013543710811063647 2023-01-24 06:44:17.848184: step: 324/470, loss: 0.015326937660574913 2023-01-24 06:44:18.545616: step: 326/470, loss: 0.004613461904227734 2023-01-24 06:44:19.255785: step: 328/470, loss: 1.1873652510985266e-05 2023-01-24 06:44:19.987106: step: 330/470, loss: 0.0037322877906262875 2023-01-24 06:44:20.668501: step: 332/470, loss: 0.001422520843334496 2023-01-24 06:44:21.368342: step: 334/470, loss: 0.003986767493188381 2023-01-24 06:44:22.098241: step: 336/470, loss: 0.0021966679487377405 2023-01-24 06:44:22.882382: step: 338/470, loss: 0.03222150355577469 2023-01-24 06:44:23.542829: step: 340/470, loss: 0.0010147334542125463 2023-01-24 06:44:24.292714: step: 342/470, loss: 0.010336131788790226 2023-01-24 06:44:24.993930: step: 344/470, loss: 0.006794884335249662 2023-01-24 06:44:25.739774: step: 346/470, loss: 0.009388666599988937 2023-01-24 06:44:26.443926: step: 348/470, loss: 0.004102275241166353 2023-01-24 06:44:27.208148: step: 350/470, loss: 0.00270704529248178 2023-01-24 06:44:27.891351: step: 352/470, loss: 2.3283802875084803e-05 2023-01-24 06:44:28.565053: step: 354/470, loss: 0.000149372877785936 2023-01-24 06:44:29.331259: step: 356/470, loss: 0.015503468923270702 2023-01-24 06:44:29.991911: step: 358/470, loss: 0.013661712408065796 2023-01-24 06:44:30.737043: step: 360/470, loss: 0.029292738065123558 2023-01-24 06:44:31.488132: step: 362/470, loss: 0.07505685836076736 2023-01-24 06:44:32.259302: step: 364/470, loss: 0.005268405191600323 2023-01-24 06:44:32.973456: step: 366/470, loss: 4.039578925585374e-05 2023-01-24 06:44:33.891464: step: 368/470, loss: 0.040998730808496475 2023-01-24 06:44:34.649951: step: 370/470, loss: 0.010451802052557468 2023-01-24 06:44:35.484908: step: 372/470, loss: 0.2076258659362793 2023-01-24 06:44:36.220717: step: 374/470, loss: 0.010212671011686325 2023-01-24 06:44:36.869697: step: 376/470, loss: 0.00308050075545907 2023-01-24 06:44:37.579220: step: 378/470, loss: 2.2757983207702637 2023-01-24 06:44:38.325404: step: 380/470, loss: 0.08818937093019485 2023-01-24 06:44:39.081790: step: 382/470, loss: 0.2878647744655609 2023-01-24 06:44:39.785101: step: 384/470, loss: 0.0031312655191868544 2023-01-24 06:44:40.494014: step: 386/470, loss: 0.002897688653320074 2023-01-24 06:44:41.249711: step: 388/470, loss: 0.02370820753276348 2023-01-24 06:44:42.014680: step: 390/470, loss: 0.005385119933634996 2023-01-24 06:44:42.765236: step: 392/470, loss: 0.002497171750292182 2023-01-24 06:44:43.444152: step: 394/470, loss: 0.006736780051141977 2023-01-24 06:44:44.120532: step: 396/470, loss: 0.0072611235082149506 2023-01-24 06:44:44.829851: step: 398/470, loss: 0.0004087547422386706 2023-01-24 06:44:45.541585: step: 400/470, loss: 0.00022048353275749832 2023-01-24 06:44:46.249262: step: 402/470, loss: 0.0018256985349580646 2023-01-24 06:44:46.920683: step: 404/470, loss: 0.0001288076746277511 2023-01-24 06:44:47.604294: step: 406/470, loss: 0.004111488815397024 2023-01-24 06:44:48.337622: step: 408/470, loss: 0.0005288118845783174 2023-01-24 06:44:49.092470: step: 410/470, loss: 0.0015056623378768563 2023-01-24 06:44:49.959937: step: 412/470, loss: 0.10391915589570999 2023-01-24 06:44:50.669662: step: 414/470, loss: 0.00016978340863715857 2023-01-24 06:44:51.474559: step: 416/470, loss: 0.006940767168998718 2023-01-24 06:44:52.123258: step: 418/470, loss: 0.4740176498889923 2023-01-24 06:44:52.813789: step: 420/470, loss: 0.00903422199189663 2023-01-24 06:44:53.532682: step: 422/470, loss: 1.154094934463501 2023-01-24 06:44:54.311583: step: 424/470, loss: 0.0007773156394250691 2023-01-24 06:44:55.077970: step: 426/470, loss: 0.00015792468911968172 2023-01-24 06:44:55.825353: step: 428/470, loss: 0.0006084730848670006 2023-01-24 06:44:56.547901: step: 430/470, loss: 0.003304409794509411 2023-01-24 06:44:57.306229: step: 432/470, loss: 0.0006498509901575744 2023-01-24 06:44:57.943402: step: 434/470, loss: 0.014365673996508121 2023-01-24 06:44:58.685914: step: 436/470, loss: 0.0002987508487422019 2023-01-24 06:44:59.509770: step: 438/470, loss: 0.03105132095515728 2023-01-24 06:45:00.148284: step: 440/470, loss: 5.6159387895604596e-05 2023-01-24 06:45:00.999457: step: 442/470, loss: 0.0020124197471886873 2023-01-24 06:45:01.684138: step: 444/470, loss: 0.002125130034983158 2023-01-24 06:45:02.410150: step: 446/470, loss: 0.0010889448458328843 2023-01-24 06:45:03.125205: step: 448/470, loss: 3.070883030886762e-05 2023-01-24 06:45:03.818202: step: 450/470, loss: 0.005335522815585136 2023-01-24 06:45:04.590973: step: 452/470, loss: 0.01190522313117981 2023-01-24 06:45:05.291640: step: 454/470, loss: 0.00037295298534445465 2023-01-24 06:45:05.988796: step: 456/470, loss: 0.005229064263403416 2023-01-24 06:45:06.737418: step: 458/470, loss: 0.0013408676022663713 2023-01-24 06:45:07.535280: step: 460/470, loss: 0.0023357283789664507 2023-01-24 06:45:08.263742: step: 462/470, loss: 0.0028918476309627295 2023-01-24 06:45:09.022002: step: 464/470, loss: 0.0005529711488634348 2023-01-24 06:45:09.710499: step: 466/470, loss: 0.00048034434439614415 2023-01-24 06:45:10.460366: step: 468/470, loss: 1.2877992048743181e-05 2023-01-24 06:45:11.206695: step: 470/470, loss: 0.09141544997692108 2023-01-24 06:45:11.943641: step: 472/470, loss: 0.15473003685474396 2023-01-24 06:45:12.730363: step: 474/470, loss: 0.0005995671381242573 2023-01-24 06:45:13.415336: step: 476/470, loss: 0.00010133895557373762 2023-01-24 06:45:14.172397: step: 478/470, loss: 0.006271105259656906 2023-01-24 06:45:14.926037: step: 480/470, loss: 0.02504108101129532 2023-01-24 06:45:15.699932: step: 482/470, loss: 0.0013932195724919438 2023-01-24 06:45:16.494688: step: 484/470, loss: 0.0003886119229719043 2023-01-24 06:45:17.208012: step: 486/470, loss: 0.00025422993348911405 2023-01-24 06:45:17.959541: step: 488/470, loss: 0.0017747258534654975 2023-01-24 06:45:18.633670: step: 490/470, loss: 0.016103271394968033 2023-01-24 06:45:19.314571: step: 492/470, loss: 0.003346665995195508 2023-01-24 06:45:20.077128: step: 494/470, loss: 0.00019028606766369194 2023-01-24 06:45:20.737600: step: 496/470, loss: 6.881119043100625e-05 2023-01-24 06:45:21.518127: step: 498/470, loss: 0.000268188159679994 2023-01-24 06:45:22.275016: step: 500/470, loss: 0.0006737832445651293 2023-01-24 06:45:23.118712: step: 502/470, loss: 0.007682368624955416 2023-01-24 06:45:23.796991: step: 504/470, loss: 1.9424080164753832e-05 2023-01-24 06:45:24.533652: step: 506/470, loss: 7.247896428452805e-05 2023-01-24 06:45:25.215870: step: 508/470, loss: 0.006508438847959042 2023-01-24 06:45:25.965579: step: 510/470, loss: 0.008590683341026306 2023-01-24 06:45:26.683888: step: 512/470, loss: 0.019543835893273354 2023-01-24 06:45:27.447100: step: 514/470, loss: 0.30058515071868896 2023-01-24 06:45:28.148859: step: 516/470, loss: 2.5567267584847286e-05 2023-01-24 06:45:28.857334: step: 518/470, loss: 0.022811856120824814 2023-01-24 06:45:29.650771: step: 520/470, loss: 0.00019056579913012683 2023-01-24 06:45:30.362495: step: 522/470, loss: 0.006042586639523506 2023-01-24 06:45:31.010823: step: 524/470, loss: 0.009766626171767712 2023-01-24 06:45:31.742493: step: 526/470, loss: 0.0004236418753862381 2023-01-24 06:45:32.416622: step: 528/470, loss: 0.0460224524140358 2023-01-24 06:45:33.157970: step: 530/470, loss: 0.03648354113101959 2023-01-24 06:45:33.955278: step: 532/470, loss: 0.026953857392072678 2023-01-24 06:45:34.779365: step: 534/470, loss: 0.001078708446584642 2023-01-24 06:45:35.469869: step: 536/470, loss: 0.0006425505271181464 2023-01-24 06:45:36.141464: step: 538/470, loss: 0.013254445046186447 2023-01-24 06:45:36.874225: step: 540/470, loss: 0.005868022330105305 2023-01-24 06:45:37.613044: step: 542/470, loss: 0.000866345944814384 2023-01-24 06:45:38.300362: step: 544/470, loss: 0.019351843744516373 2023-01-24 06:45:39.036126: step: 546/470, loss: 0.0006408991175703704 2023-01-24 06:45:39.745895: step: 548/470, loss: 0.007142396178096533 2023-01-24 06:45:40.442731: step: 550/470, loss: 0.0009534017299301922 2023-01-24 06:45:41.186281: step: 552/470, loss: 0.0083243353292346 2023-01-24 06:45:41.868225: step: 554/470, loss: 0.007734335493296385 2023-01-24 06:45:42.614778: step: 556/470, loss: 0.0021097231656312943 2023-01-24 06:45:43.403823: step: 558/470, loss: 0.10651249438524246 2023-01-24 06:45:44.195382: step: 560/470, loss: 0.06708988547325134 2023-01-24 06:45:44.896997: step: 562/470, loss: 0.012138977646827698 2023-01-24 06:45:45.596690: step: 564/470, loss: 0.0041181351989507675 2023-01-24 06:45:46.322281: step: 566/470, loss: 0.004253920167684555 2023-01-24 06:45:47.046146: step: 568/470, loss: 0.009514007717370987 2023-01-24 06:45:47.835870: step: 570/470, loss: 1.9646420696517453e-05 2023-01-24 06:45:48.574969: step: 572/470, loss: 0.0004975633346475661 2023-01-24 06:45:49.268105: step: 574/470, loss: 1.6465276075905422e-06 2023-01-24 06:45:49.943700: step: 576/470, loss: 0.03554369881749153 2023-01-24 06:45:50.588887: step: 578/470, loss: 0.0003723879635799676 2023-01-24 06:45:51.380458: step: 580/470, loss: 0.0021262504160404205 2023-01-24 06:45:52.153962: step: 582/470, loss: 0.006091665010899305 2023-01-24 06:45:52.907079: step: 584/470, loss: 0.07040276378393173 2023-01-24 06:45:53.652618: step: 586/470, loss: 0.007872075773775578 2023-01-24 06:45:54.338469: step: 588/470, loss: 0.010333622805774212 2023-01-24 06:45:55.048556: step: 590/470, loss: 0.003944714087992907 2023-01-24 06:45:55.815091: step: 592/470, loss: 0.08302487432956696 2023-01-24 06:45:56.581240: step: 594/470, loss: 0.002667823573574424 2023-01-24 06:45:57.363625: step: 596/470, loss: 0.0011115572415292263 2023-01-24 06:45:58.137401: step: 598/470, loss: 0.0013725311728194356 2023-01-24 06:45:58.824312: step: 600/470, loss: 0.01085622701793909 2023-01-24 06:45:59.545636: step: 602/470, loss: 0.011929565109312534 2023-01-24 06:46:00.318477: step: 604/470, loss: 0.5936955809593201 2023-01-24 06:46:01.047913: step: 606/470, loss: 0.0015288189752027392 2023-01-24 06:46:01.755913: step: 608/470, loss: 0.004490552004426718 2023-01-24 06:46:02.516586: step: 610/470, loss: 0.01219299528747797 2023-01-24 06:46:03.248490: step: 612/470, loss: 0.004125348757952452 2023-01-24 06:46:03.953494: step: 614/470, loss: 0.0011770040728151798 2023-01-24 06:46:04.666847: step: 616/470, loss: 0.0035097640939056873 2023-01-24 06:46:05.432954: step: 618/470, loss: 0.04082076996564865 2023-01-24 06:46:06.089145: step: 620/470, loss: 0.04343542456626892 2023-01-24 06:46:06.855907: step: 622/470, loss: 0.0005687833181582391 2023-01-24 06:46:07.653797: step: 624/470, loss: 0.02319457195699215 2023-01-24 06:46:08.438208: step: 626/470, loss: 0.020851243287324905 2023-01-24 06:46:09.225089: step: 628/470, loss: 0.0018202860374003649 2023-01-24 06:46:09.953405: step: 630/470, loss: 0.006153845693916082 2023-01-24 06:46:10.682624: step: 632/470, loss: 0.004557922948151827 2023-01-24 06:46:11.488992: step: 634/470, loss: 0.018611159175634384 2023-01-24 06:46:12.228415: step: 636/470, loss: 0.0369689054787159 2023-01-24 06:46:13.014511: step: 638/470, loss: 0.002355735981836915 2023-01-24 06:46:13.832814: step: 640/470, loss: 0.0005931655177846551 2023-01-24 06:46:14.559472: step: 642/470, loss: 0.03275495022535324 2023-01-24 06:46:15.274093: step: 644/470, loss: 0.0019692745991051197 2023-01-24 06:46:16.044637: step: 646/470, loss: 0.0005796861951239407 2023-01-24 06:46:16.855516: step: 648/470, loss: 0.0003035140107385814 2023-01-24 06:46:17.701749: step: 650/470, loss: 0.00021470840147230774 2023-01-24 06:46:18.407189: step: 652/470, loss: 0.010880122892558575 2023-01-24 06:46:19.163866: step: 654/470, loss: 2.0693125406978652e-05 2023-01-24 06:46:19.877483: step: 656/470, loss: 0.0021833537612110376 2023-01-24 06:46:20.627370: step: 658/470, loss: 0.010350065305829048 2023-01-24 06:46:21.382084: step: 660/470, loss: 0.00021858404215890914 2023-01-24 06:46:22.068570: step: 662/470, loss: 0.000333769858116284 2023-01-24 06:46:22.798938: step: 664/470, loss: 0.004476209171116352 2023-01-24 06:46:23.585663: step: 666/470, loss: 0.01990194246172905 2023-01-24 06:46:24.336436: step: 668/470, loss: 0.09436644613742828 2023-01-24 06:46:25.054044: step: 670/470, loss: 0.00025006639771163464 2023-01-24 06:46:25.825360: step: 672/470, loss: 0.050850946456193924 2023-01-24 06:46:26.511774: step: 674/470, loss: 0.0038154199719429016 2023-01-24 06:46:27.260339: step: 676/470, loss: 0.0026396666653454304 2023-01-24 06:46:28.040574: step: 678/470, loss: 0.02135513350367546 2023-01-24 06:46:28.763909: step: 680/470, loss: 0.001105017145164311 2023-01-24 06:46:29.599441: step: 682/470, loss: 0.009239214472472668 2023-01-24 06:46:30.394643: step: 684/470, loss: 0.11062158644199371 2023-01-24 06:46:31.137162: step: 686/470, loss: 7.758984429528937e-05 2023-01-24 06:46:31.965776: step: 688/470, loss: 0.007189847994595766 2023-01-24 06:46:32.747042: step: 690/470, loss: 0.004308292642235756 2023-01-24 06:46:33.393339: step: 692/470, loss: 0.0004358371370472014 2023-01-24 06:46:34.067001: step: 694/470, loss: 0.2612048089504242 2023-01-24 06:46:34.809521: step: 696/470, loss: 9.558172314427793e-05 2023-01-24 06:46:35.501745: step: 698/470, loss: 0.00466720899567008 2023-01-24 06:46:36.299535: step: 700/470, loss: 0.0009645888931117952 2023-01-24 06:46:37.129541: step: 702/470, loss: 0.009687711484730244 2023-01-24 06:46:37.921674: step: 704/470, loss: 0.03733493387699127 2023-01-24 06:46:38.654284: step: 706/470, loss: 0.0338343009352684 2023-01-24 06:46:39.459370: step: 708/470, loss: 0.0013095543254166842 2023-01-24 06:46:40.206847: step: 710/470, loss: 0.0019843606278300285 2023-01-24 06:46:40.966917: step: 712/470, loss: 0.0003942087641917169 2023-01-24 06:46:41.721425: step: 714/470, loss: 0.004477455280721188 2023-01-24 06:46:42.469363: step: 716/470, loss: 0.00019188599253538996 2023-01-24 06:46:43.156520: step: 718/470, loss: 0.012036411091685295 2023-01-24 06:46:43.782908: step: 720/470, loss: 0.0012441710568964481 2023-01-24 06:46:44.452711: step: 722/470, loss: 0.02268451265990734 2023-01-24 06:46:45.200767: step: 724/470, loss: 0.04499243572354317 2023-01-24 06:46:45.938900: step: 726/470, loss: 0.000995938084088266 2023-01-24 06:46:46.740405: step: 728/470, loss: 0.002610167022794485 2023-01-24 06:46:47.470387: step: 730/470, loss: 0.001554196118377149 2023-01-24 06:46:48.179901: step: 732/470, loss: 0.0007086883997544646 2023-01-24 06:46:48.879077: step: 734/470, loss: 0.00042572562233544886 2023-01-24 06:46:49.625346: step: 736/470, loss: 0.004255416337400675 2023-01-24 06:46:50.361149: step: 738/470, loss: 0.006742789875715971 2023-01-24 06:46:51.034604: step: 740/470, loss: 0.0009496554266661406 2023-01-24 06:46:51.747232: step: 742/470, loss: 0.001538438955321908 2023-01-24 06:46:52.466231: step: 744/470, loss: 0.002146479906514287 2023-01-24 06:46:53.163518: step: 746/470, loss: 0.00030127508216537535 2023-01-24 06:46:53.809903: step: 748/470, loss: 4.851120593230007e-06 2023-01-24 06:46:54.532689: step: 750/470, loss: 0.22810763120651245 2023-01-24 06:46:55.294906: step: 752/470, loss: 0.0003023869649041444 2023-01-24 06:46:56.079869: step: 754/470, loss: 0.0018333548214286566 2023-01-24 06:46:56.939144: step: 756/470, loss: 0.012261569499969482 2023-01-24 06:46:57.628360: step: 758/470, loss: 3.2106316211866215e-05 2023-01-24 06:46:58.311487: step: 760/470, loss: 0.0017151250503957272 2023-01-24 06:46:59.027464: step: 762/470, loss: 0.08843082189559937 2023-01-24 06:46:59.786292: step: 764/470, loss: 0.0009025583858601749 2023-01-24 06:47:00.564077: step: 766/470, loss: 0.03692952170968056 2023-01-24 06:47:01.309665: step: 768/470, loss: 0.0004031884600408375 2023-01-24 06:47:02.055395: step: 770/470, loss: 0.06433983892202377 2023-01-24 06:47:02.876734: step: 772/470, loss: 0.013428415171802044 2023-01-24 06:47:03.701425: step: 774/470, loss: 0.00031575208413414657 2023-01-24 06:47:04.417820: step: 776/470, loss: 0.00019189363229088485 2023-01-24 06:47:05.125844: step: 778/470, loss: 0.0002521543647162616 2023-01-24 06:47:05.939839: step: 780/470, loss: 0.012361546978354454 2023-01-24 06:47:06.614086: step: 782/470, loss: 0.0014749522088095546 2023-01-24 06:47:07.478317: step: 784/470, loss: 0.004103075712919235 2023-01-24 06:47:08.232256: step: 786/470, loss: 0.015317887999117374 2023-01-24 06:47:08.940366: step: 788/470, loss: 0.007157180458307266 2023-01-24 06:47:09.637000: step: 790/470, loss: 0.03482293710112572 2023-01-24 06:47:10.361277: step: 792/470, loss: 0.0006050001247785985 2023-01-24 06:47:11.097787: step: 794/470, loss: 0.006788891274482012 2023-01-24 06:47:11.906830: step: 796/470, loss: 0.12752631306648254 2023-01-24 06:47:12.603919: step: 798/470, loss: 0.5755633115768433 2023-01-24 06:47:13.300370: step: 800/470, loss: 0.004557878710329533 2023-01-24 06:47:14.092330: step: 802/470, loss: 0.0018064269097521901 2023-01-24 06:47:14.809297: step: 804/470, loss: 0.09993268549442291 2023-01-24 06:47:15.493196: step: 806/470, loss: 0.0003193170123267919 2023-01-24 06:47:16.266907: step: 808/470, loss: 0.031694598495960236 2023-01-24 06:47:16.993227: step: 810/470, loss: 0.0002737323520705104 2023-01-24 06:47:17.727085: step: 812/470, loss: 0.00031162946834228933 2023-01-24 06:47:18.437629: step: 814/470, loss: 0.055558472871780396 2023-01-24 06:47:19.206727: step: 816/470, loss: 0.0006891106604598463 2023-01-24 06:47:19.938572: step: 818/470, loss: 0.012265880592167377 2023-01-24 06:47:20.653412: step: 820/470, loss: 0.005278497468680143 2023-01-24 06:47:21.426786: step: 822/470, loss: 0.0007367177749983966 2023-01-24 06:47:22.224997: step: 824/470, loss: 0.00032776681473478675 2023-01-24 06:47:22.896457: step: 826/470, loss: 0.004107217770069838 2023-01-24 06:47:23.720947: step: 828/470, loss: 0.008525116369128227 2023-01-24 06:47:24.479238: step: 830/470, loss: 0.00033007533056661487 2023-01-24 06:47:25.218249: step: 832/470, loss: 0.0008853072067722678 2023-01-24 06:47:25.939261: step: 834/470, loss: 0.0006735201459378004 2023-01-24 06:47:26.625803: step: 836/470, loss: 0.0024453336372971535 2023-01-24 06:47:27.342202: step: 838/470, loss: 1.662024988036137e-05 2023-01-24 06:47:28.089790: step: 840/470, loss: 0.007163457106798887 2023-01-24 06:47:28.738776: step: 842/470, loss: 8.924589928938076e-05 2023-01-24 06:47:29.487243: step: 844/470, loss: 0.012785697355866432 2023-01-24 06:47:30.242404: step: 846/470, loss: 0.004503779578953981 2023-01-24 06:47:30.946326: step: 848/470, loss: 0.04323597252368927 2023-01-24 06:47:31.780490: step: 850/470, loss: 0.09764706343412399 2023-01-24 06:47:32.605639: step: 852/470, loss: 0.11818161606788635 2023-01-24 06:47:33.343822: step: 854/470, loss: 0.00011157716653542593 2023-01-24 06:47:34.143085: step: 856/470, loss: 0.003554239170625806 2023-01-24 06:47:34.882264: step: 858/470, loss: 0.009540732949972153 2023-01-24 06:47:35.610247: step: 860/470, loss: 0.0023567378520965576 2023-01-24 06:47:36.383313: step: 862/470, loss: 0.0008384129614569247 2023-01-24 06:47:37.132064: step: 864/470, loss: 0.0015519903972744942 2023-01-24 06:47:37.857689: step: 866/470, loss: 0.0001553489564685151 2023-01-24 06:47:38.785602: step: 868/470, loss: 0.018373709172010422 2023-01-24 06:47:39.564710: step: 870/470, loss: 0.0008085042354650795 2023-01-24 06:47:40.280872: step: 872/470, loss: 0.0006269579171203077 2023-01-24 06:47:40.983145: step: 874/470, loss: 0.00702142296358943 2023-01-24 06:47:41.783343: step: 876/470, loss: 0.0382898710668087 2023-01-24 06:47:42.442174: step: 878/470, loss: 0.004564675502479076 2023-01-24 06:47:43.272466: step: 880/470, loss: 0.0015754105988889933 2023-01-24 06:47:44.030842: step: 882/470, loss: 0.00046436249976977706 2023-01-24 06:47:44.803180: step: 884/470, loss: 0.0009858196135610342 2023-01-24 06:47:45.525436: step: 886/470, loss: 1.2100362255296204e-05 2023-01-24 06:47:46.341546: step: 888/470, loss: 0.001151207135990262 2023-01-24 06:47:47.120060: step: 890/470, loss: 0.002776085864752531 2023-01-24 06:47:47.870612: step: 892/470, loss: 0.0019681635312736034 2023-01-24 06:47:48.760849: step: 894/470, loss: 0.0007172105833888054 2023-01-24 06:47:49.429447: step: 896/470, loss: 0.010990173555910587 2023-01-24 06:47:50.187129: step: 898/470, loss: 0.01894933171570301 2023-01-24 06:47:50.916538: step: 900/470, loss: 0.003088920610025525 2023-01-24 06:47:51.622368: step: 902/470, loss: 0.026566803455352783 2023-01-24 06:47:52.338756: step: 904/470, loss: 0.0029794382862746716 2023-01-24 06:47:53.062692: step: 906/470, loss: 0.0001242105645360425 2023-01-24 06:47:53.877880: step: 908/470, loss: 0.10085117816925049 2023-01-24 06:47:54.560942: step: 910/470, loss: 0.00012268772115930915 2023-01-24 06:47:55.286995: step: 912/470, loss: 0.004678426310420036 2023-01-24 06:47:56.070884: step: 914/470, loss: 0.0005872580222785473 2023-01-24 06:47:56.779000: step: 916/470, loss: 0.00040301651461049914 2023-01-24 06:47:57.591207: step: 918/470, loss: 0.0017456887289881706 2023-01-24 06:47:58.302770: step: 920/470, loss: 0.07405664026737213 2023-01-24 06:47:58.989738: step: 922/470, loss: 0.0002474442298989743 2023-01-24 06:47:59.747595: step: 924/470, loss: 0.019610069692134857 2023-01-24 06:48:00.457388: step: 926/470, loss: 0.00045439210953190923 2023-01-24 06:48:01.234243: step: 928/470, loss: 0.004804328549653292 2023-01-24 06:48:01.953814: step: 930/470, loss: 0.02111213654279709 2023-01-24 06:48:02.734478: step: 932/470, loss: 0.04191237688064575 2023-01-24 06:48:03.474207: step: 934/470, loss: 0.007126152515411377 2023-01-24 06:48:04.215892: step: 936/470, loss: 0.0033556444104760885 2023-01-24 06:48:04.900970: step: 938/470, loss: 0.0007148180739022791 2023-01-24 06:48:05.667532: step: 940/470, loss: 0.12642525136470795 2023-01-24 06:48:06.270086: step: 942/470, loss: 0.00044497830094769597 ================================================== Loss: 0.036 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3396016314187758, 'r': 0.3280023347099751, 'f1': 0.3337012169732758}, 'combined': 0.24588510724346638, 'epoch': 37} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3611898473664161, 'r': 0.34695063222985545, 'f1': 0.3539270794693964}, 'combined': 0.23595138631293086, 'epoch': 37} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3194561509504428, 'r': 0.31521290036855837, 'f1': 0.3173203409631905}, 'combined': 0.23381498807814036, 'epoch': 37} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36117282792714595, 'r': 0.34450331279204693, 'f1': 0.3526411863225677}, 'combined': 0.23509412421504508, 'epoch': 37} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3139187731672453, 'r': 0.3210668287232357, 'f1': 0.3174525679871393}, 'combined': 0.23391241851683944, 'epoch': 37} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.350868599552509, 'r': 0.35356758877983596, 'f1': 0.35221292368872553}, 'combined': 0.23480861579248363, 'epoch': 37} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2785714285714286, 'r': 0.2785714285714286, 'f1': 0.2785714285714286}, 'combined': 0.18571428571428572, 'epoch': 37} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.53125, 'r': 0.3695652173913043, 'f1': 0.4358974358974359}, 'combined': 0.29059829059829057, 'epoch': 37} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4633458646616541, 'r': 0.22368421052631576, 'f1': 0.3017135862913096}, 'combined': 0.20114239086087307, 'epoch': 37} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35514455782312926, 'r': 0.3302103099304238, 'f1': 0.34222386102917074}, 'combined': 0.2521649502320205, 'epoch': 34} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36324986426135314, 'r': 0.3573121260955425, 'f1': 0.3602565304307942}, 'combined': 0.24017102028719609, 'epoch': 34} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2869318181818182, 'r': 0.3607142857142857, 'f1': 0.319620253164557}, 'combined': 0.21308016877637131, 'epoch': 34} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3245705997242647, 'r': 0.31533234736019644, 'f1': 0.31988478740870746}, 'combined': 0.2357045801958897, 'epoch': 31} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35151688133309544, 'r': 0.34577093231130446, 'f1': 0.34862023228672484}, 'combined': 0.23241348819114985, 'epoch': 31} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6153846153846154, 'r': 0.27586206896551724, 'f1': 0.380952380952381}, 'combined': 0.25396825396825395, 'epoch': 31} ****************************** Epoch: 38 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:50:43.444857: step: 2/470, loss: 0.003493683412671089 2023-01-24 06:50:44.208522: step: 4/470, loss: 3.1483668863074854e-05 2023-01-24 06:50:44.921224: step: 6/470, loss: 0.0010329708456993103 2023-01-24 06:50:45.663519: step: 8/470, loss: 0.015900876373052597 2023-01-24 06:50:46.423230: step: 10/470, loss: 0.0006670716102235019 2023-01-24 06:50:47.230857: step: 12/470, loss: 0.03588513284921646 2023-01-24 06:50:47.894466: step: 14/470, loss: 0.018479470163583755 2023-01-24 06:50:48.597488: step: 16/470, loss: 0.0064428700134158134 2023-01-24 06:50:49.361591: step: 18/470, loss: 0.008134379982948303 2023-01-24 06:50:50.120647: step: 20/470, loss: 0.04875548183917999 2023-01-24 06:50:50.961560: step: 22/470, loss: 0.007432709448039532 2023-01-24 06:50:51.727326: step: 24/470, loss: 0.015830012038350105 2023-01-24 06:50:52.418341: step: 26/470, loss: 0.0009923202451318502 2023-01-24 06:50:53.177079: step: 28/470, loss: 0.02561793476343155 2023-01-24 06:50:53.924833: step: 30/470, loss: 0.06104350462555885 2023-01-24 06:50:54.659179: step: 32/470, loss: 0.00034792270162142813 2023-01-24 06:50:55.517475: step: 34/470, loss: 0.0635775625705719 2023-01-24 06:50:56.250547: step: 36/470, loss: 0.0034135098103433847 2023-01-24 06:50:56.920518: step: 38/470, loss: 0.0001381459878757596 2023-01-24 06:50:57.677307: step: 40/470, loss: 0.005255894735455513 2023-01-24 06:50:58.472624: step: 42/470, loss: 0.006958745885640383 2023-01-24 06:50:59.140213: step: 44/470, loss: 2.5377163183293305e-05 2023-01-24 06:50:59.859425: step: 46/470, loss: 0.0034183606039732695 2023-01-24 06:51:00.688151: step: 48/470, loss: 0.011098065413534641 2023-01-24 06:51:01.473443: step: 50/470, loss: 0.03030436858534813 2023-01-24 06:51:02.188948: step: 52/470, loss: 0.001068002893589437 2023-01-24 06:51:02.949076: step: 54/470, loss: 0.0028049203101545572 2023-01-24 06:51:03.802797: step: 56/470, loss: 0.0009381828713230789 2023-01-24 06:51:04.541339: step: 58/470, loss: 0.00014905152784194797 2023-01-24 06:51:05.382450: step: 60/470, loss: 0.013133973814547062 2023-01-24 06:51:06.109199: step: 62/470, loss: 0.0014203452738001943 2023-01-24 06:51:06.977987: step: 64/470, loss: 0.0008770119166001678 2023-01-24 06:51:07.812216: step: 66/470, loss: 0.018221968784928322 2023-01-24 06:51:08.523930: step: 68/470, loss: 0.00032925268169492483 2023-01-24 06:51:09.147765: step: 70/470, loss: 8.492947381455451e-05 2023-01-24 06:51:10.089116: step: 72/470, loss: 0.01569739170372486 2023-01-24 06:51:10.863412: step: 74/470, loss: 0.011922664940357208 2023-01-24 06:51:11.586688: step: 76/470, loss: 0.00024971627863124013 2023-01-24 06:51:12.335441: step: 78/470, loss: 0.012681872583925724 2023-01-24 06:51:13.036485: step: 80/470, loss: 0.03299639746546745 2023-01-24 06:51:13.855974: step: 82/470, loss: 0.3435828983783722 2023-01-24 06:51:14.562777: step: 84/470, loss: 0.0480882003903389 2023-01-24 06:51:15.270904: step: 86/470, loss: 0.006340092979371548 2023-01-24 06:51:16.032306: step: 88/470, loss: 0.018705761060118675 2023-01-24 06:51:16.777671: step: 90/470, loss: 0.0008621179731562734 2023-01-24 06:51:17.473163: step: 92/470, loss: 0.022489935159683228 2023-01-24 06:51:18.201066: step: 94/470, loss: 0.008963331580162048 2023-01-24 06:51:19.017640: step: 96/470, loss: 0.04265124723315239 2023-01-24 06:51:19.768980: step: 98/470, loss: 0.022511204704642296 2023-01-24 06:51:20.531914: step: 100/470, loss: 0.002531637204810977 2023-01-24 06:51:21.264446: step: 102/470, loss: 0.0007971610175445676 2023-01-24 06:51:21.935301: step: 104/470, loss: 0.01751614920794964 2023-01-24 06:51:22.716332: step: 106/470, loss: 0.022801943123340607 2023-01-24 06:51:23.476424: step: 108/470, loss: 7.938418275443837e-05 2023-01-24 06:51:24.157945: step: 110/470, loss: 0.0036345114931464195 2023-01-24 06:51:24.934726: step: 112/470, loss: 0.0004885205999016762 2023-01-24 06:51:25.680828: step: 114/470, loss: 0.02976686879992485 2023-01-24 06:51:26.402109: step: 116/470, loss: 0.0012918829452246428 2023-01-24 06:51:27.129488: step: 118/470, loss: 0.030225861817598343 2023-01-24 06:51:27.888723: step: 120/470, loss: 0.0013654425274580717 2023-01-24 06:51:28.636804: step: 122/470, loss: 0.049725260585546494 2023-01-24 06:51:29.393095: step: 124/470, loss: 0.0041059935465455055 2023-01-24 06:51:30.089023: step: 126/470, loss: 0.0016458419850096107 2023-01-24 06:51:30.809365: step: 128/470, loss: 0.0037780962884426117 2023-01-24 06:51:31.522777: step: 130/470, loss: 0.00395290507003665 2023-01-24 06:51:32.290443: step: 132/470, loss: 0.01029009185731411 2023-01-24 06:51:33.013880: step: 134/470, loss: 0.0011309736873954535 2023-01-24 06:51:33.798322: step: 136/470, loss: 0.00018435719539411366 2023-01-24 06:51:34.514525: step: 138/470, loss: 0.0008806980913504958 2023-01-24 06:51:35.245934: step: 140/470, loss: 0.0079522505402565 2023-01-24 06:51:36.072538: step: 142/470, loss: 3.3286640245933086e-05 2023-01-24 06:51:36.848626: step: 144/470, loss: 0.0006921407766640186 2023-01-24 06:51:37.606119: step: 146/470, loss: 0.2176320105791092 2023-01-24 06:51:38.437198: step: 148/470, loss: 0.011884159408509731 2023-01-24 06:51:39.176697: step: 150/470, loss: 0.0032580445986241102 2023-01-24 06:51:39.979338: step: 152/470, loss: 0.0007281180587597191 2023-01-24 06:51:40.886799: step: 154/470, loss: 0.019526991993188858 2023-01-24 06:51:41.613897: step: 156/470, loss: 0.005697279702872038 2023-01-24 06:51:42.405332: step: 158/470, loss: 0.004067837260663509 2023-01-24 06:51:43.139591: step: 160/470, loss: 5.4411604651249945e-05 2023-01-24 06:51:43.835646: step: 162/470, loss: 0.019002696499228477 2023-01-24 06:51:44.594309: step: 164/470, loss: 0.0015450211940333247 2023-01-24 06:51:45.415299: step: 166/470, loss: 0.0039725713431835175 2023-01-24 06:51:46.209053: step: 168/470, loss: 0.011467957869172096 2023-01-24 06:51:46.929941: step: 170/470, loss: 0.027040036395192146 2023-01-24 06:51:47.608702: step: 172/470, loss: 0.00044272729428485036 2023-01-24 06:51:48.416894: step: 174/470, loss: 0.0003731549368239939 2023-01-24 06:51:49.177795: step: 176/470, loss: 0.016215885058045387 2023-01-24 06:51:49.925518: step: 178/470, loss: 0.03532567247748375 2023-01-24 06:51:50.609973: step: 180/470, loss: 0.002269421936944127 2023-01-24 06:51:51.329699: step: 182/470, loss: 0.004340062849223614 2023-01-24 06:51:52.047202: step: 184/470, loss: 0.021665049716830254 2023-01-24 06:51:52.797184: step: 186/470, loss: 0.0015216005267575383 2023-01-24 06:51:53.573463: step: 188/470, loss: 0.03777991607785225 2023-01-24 06:51:54.330650: step: 190/470, loss: 0.0024247351102530956 2023-01-24 06:51:55.114920: step: 192/470, loss: 0.026008278131484985 2023-01-24 06:51:55.916717: step: 194/470, loss: 0.011309238150715828 2023-01-24 06:51:56.668421: step: 196/470, loss: 0.0033743376843631268 2023-01-24 06:51:57.441670: step: 198/470, loss: 0.022116800770163536 2023-01-24 06:51:58.192324: step: 200/470, loss: 6.394281808752567e-05 2023-01-24 06:51:58.877091: step: 202/470, loss: 0.010615077801048756 2023-01-24 06:51:59.612070: step: 204/470, loss: 0.0010532918386161327 2023-01-24 06:52:00.311528: step: 206/470, loss: 0.001243436592631042 2023-01-24 06:52:01.018251: step: 208/470, loss: 0.3090232312679291 2023-01-24 06:52:01.728360: step: 210/470, loss: 0.00770140066742897 2023-01-24 06:52:02.496055: step: 212/470, loss: 0.0004572441102936864 2023-01-24 06:52:03.200465: step: 214/470, loss: 0.0049099307507276535 2023-01-24 06:52:03.873376: step: 216/470, loss: 0.00032190539059229195 2023-01-24 06:52:04.577201: step: 218/470, loss: 0.002639106009155512 2023-01-24 06:52:05.375455: step: 220/470, loss: 0.0019580356311053038 2023-01-24 06:52:06.075134: step: 222/470, loss: 0.0008421125821769238 2023-01-24 06:52:06.827443: step: 224/470, loss: 0.0025131748989224434 2023-01-24 06:52:07.571890: step: 226/470, loss: 0.006898400839418173 2023-01-24 06:52:08.276427: step: 228/470, loss: 0.003961809910833836 2023-01-24 06:52:09.050393: step: 230/470, loss: 0.012256169691681862 2023-01-24 06:52:09.757223: step: 232/470, loss: 0.011719431728124619 2023-01-24 06:52:10.516117: step: 234/470, loss: 0.07077656686306 2023-01-24 06:52:11.282245: step: 236/470, loss: 0.0006426791660487652 2023-01-24 06:52:12.003432: step: 238/470, loss: 1.4502748854283709e-05 2023-01-24 06:52:12.623548: step: 240/470, loss: 0.00010448225657455623 2023-01-24 06:52:13.385232: step: 242/470, loss: 0.008650097995996475 2023-01-24 06:52:14.110881: step: 244/470, loss: 0.00271412986330688 2023-01-24 06:52:14.871965: step: 246/470, loss: 0.008904355578124523 2023-01-24 06:52:15.616692: step: 248/470, loss: 0.01523964386433363 2023-01-24 06:52:16.376407: step: 250/470, loss: 0.0009835069067776203 2023-01-24 06:52:17.088516: step: 252/470, loss: 0.001248181564733386 2023-01-24 06:52:17.899907: step: 254/470, loss: 0.12711653113365173 2023-01-24 06:52:18.686412: step: 256/470, loss: 0.014596930705010891 2023-01-24 06:52:19.527449: step: 258/470, loss: 0.03263501450419426 2023-01-24 06:52:20.228563: step: 260/470, loss: 0.12978902459144592 2023-01-24 06:52:20.971548: step: 262/470, loss: 0.011506453156471252 2023-01-24 06:52:21.769697: step: 264/470, loss: 0.07690394669771194 2023-01-24 06:52:22.492486: step: 266/470, loss: 0.0022593012545257807 2023-01-24 06:52:23.272721: step: 268/470, loss: 0.008098762482404709 2023-01-24 06:52:24.087900: step: 270/470, loss: 0.0018563955090939999 2023-01-24 06:52:24.892312: step: 272/470, loss: 0.0008043406414799392 2023-01-24 06:52:25.640754: step: 274/470, loss: 0.006701835431158543 2023-01-24 06:52:26.373741: step: 276/470, loss: 0.0002937865210697055 2023-01-24 06:52:27.062097: step: 278/470, loss: 0.029652804136276245 2023-01-24 06:52:27.846581: step: 280/470, loss: 0.22939205169677734 2023-01-24 06:52:28.554630: step: 282/470, loss: 0.0002202181494794786 2023-01-24 06:52:29.257944: step: 284/470, loss: 1.331815747107612e-05 2023-01-24 06:52:30.003783: step: 286/470, loss: 0.02264559268951416 2023-01-24 06:52:30.785455: step: 288/470, loss: 0.0002129612839780748 2023-01-24 06:52:31.557714: step: 290/470, loss: 0.005394687410444021 2023-01-24 06:52:32.300474: step: 292/470, loss: 0.025830525904893875 2023-01-24 06:52:33.076279: step: 294/470, loss: 0.0006529740057885647 2023-01-24 06:52:33.809251: step: 296/470, loss: 0.011604727245867252 2023-01-24 06:52:34.546344: step: 298/470, loss: 0.002371342619881034 2023-01-24 06:52:35.365798: step: 300/470, loss: 0.005713804624974728 2023-01-24 06:52:36.110358: step: 302/470, loss: 0.041894592344760895 2023-01-24 06:52:36.886210: step: 304/470, loss: 0.04704660177230835 2023-01-24 06:52:37.654317: step: 306/470, loss: 0.0020737978629767895 2023-01-24 06:52:38.434657: step: 308/470, loss: 0.006861019879579544 2023-01-24 06:52:39.080355: step: 310/470, loss: 0.25099924206733704 2023-01-24 06:52:39.736830: step: 312/470, loss: 0.00043172220466658473 2023-01-24 06:52:40.434164: step: 314/470, loss: 0.015380592085421085 2023-01-24 06:52:41.134194: step: 316/470, loss: 0.0004332458192948252 2023-01-24 06:52:41.927898: step: 318/470, loss: 0.005911845248192549 2023-01-24 06:52:42.717490: step: 320/470, loss: 0.00782099924981594 2023-01-24 06:52:43.465383: step: 322/470, loss: 0.006352236494421959 2023-01-24 06:52:44.238833: step: 324/470, loss: 0.006406526081264019 2023-01-24 06:52:44.939512: step: 326/470, loss: 0.006970448885113001 2023-01-24 06:52:45.686549: step: 328/470, loss: 0.004823725204914808 2023-01-24 06:52:46.479993: step: 330/470, loss: 0.021097727119922638 2023-01-24 06:52:47.264826: step: 332/470, loss: 0.014263940043747425 2023-01-24 06:52:47.992854: step: 334/470, loss: 0.005755257327109575 2023-01-24 06:52:48.715864: step: 336/470, loss: 0.0009758573723956943 2023-01-24 06:52:49.547522: step: 338/470, loss: 5.3987059800419956e-05 2023-01-24 06:52:50.270308: step: 340/470, loss: 0.001099450164474547 2023-01-24 06:52:51.001748: step: 342/470, loss: 0.0010999426012858748 2023-01-24 06:52:51.780747: step: 344/470, loss: 0.02734139934182167 2023-01-24 06:52:52.554941: step: 346/470, loss: 0.005323213990777731 2023-01-24 06:52:53.316621: step: 348/470, loss: 0.0010934954043477774 2023-01-24 06:52:54.059465: step: 350/470, loss: 0.02636098861694336 2023-01-24 06:52:54.845640: step: 352/470, loss: 0.29431140422821045 2023-01-24 06:52:55.664929: step: 354/470, loss: 1.4190295587468427e-06 2023-01-24 06:52:56.369102: step: 356/470, loss: 0.00703160697594285 2023-01-24 06:52:57.068194: step: 358/470, loss: 4.769103725266177e-06 2023-01-24 06:52:57.863642: step: 360/470, loss: 0.008066543377935886 2023-01-24 06:52:58.658533: step: 362/470, loss: 0.008893858641386032 2023-01-24 06:52:59.433077: step: 364/470, loss: 0.0012347385054454207 2023-01-24 06:53:00.282586: step: 366/470, loss: 0.03243786841630936 2023-01-24 06:53:01.117504: step: 368/470, loss: 0.05025627464056015 2023-01-24 06:53:01.910034: step: 370/470, loss: 0.0002960547572001815 2023-01-24 06:53:02.686479: step: 372/470, loss: 0.01112099178135395 2023-01-24 06:53:03.441537: step: 374/470, loss: 5.0827387894969434e-05 2023-01-24 06:53:04.182772: step: 376/470, loss: 0.0254096370190382 2023-01-24 06:53:04.886564: step: 378/470, loss: 1.0762357711791992 2023-01-24 06:53:05.741478: step: 380/470, loss: 0.09362594038248062 2023-01-24 06:53:06.457947: step: 382/470, loss: 0.08472180366516113 2023-01-24 06:53:07.189347: step: 384/470, loss: 5.627515201922506e-05 2023-01-24 06:53:08.025436: step: 386/470, loss: 0.04056701436638832 2023-01-24 06:53:08.844047: step: 388/470, loss: 0.0017334421863779426 2023-01-24 06:53:09.617961: step: 390/470, loss: 0.013753866776823997 2023-01-24 06:53:10.425412: step: 392/470, loss: 0.029546622186899185 2023-01-24 06:53:11.239054: step: 394/470, loss: 0.008471227250993252 2023-01-24 06:53:12.032734: step: 396/470, loss: 0.05549805611371994 2023-01-24 06:53:12.777124: step: 398/470, loss: 4.936423465551343e-06 2023-01-24 06:53:13.590479: step: 400/470, loss: 0.022593067958950996 2023-01-24 06:53:14.406642: step: 402/470, loss: 0.011362415738403797 2023-01-24 06:53:15.119911: step: 404/470, loss: 0.007395919878035784 2023-01-24 06:53:15.792433: step: 406/470, loss: 0.0004373751871753484 2023-01-24 06:53:16.551280: step: 408/470, loss: 0.0032540217507630587 2023-01-24 06:53:17.341323: step: 410/470, loss: 6.207002297742292e-05 2023-01-24 06:53:18.157926: step: 412/470, loss: 0.012555564753711224 2023-01-24 06:53:18.974004: step: 414/470, loss: 0.005541603546589613 2023-01-24 06:53:19.767660: step: 416/470, loss: 1.1564688682556152 2023-01-24 06:53:20.521910: step: 418/470, loss: 0.011560517363250256 2023-01-24 06:53:21.285118: step: 420/470, loss: 0.13285844027996063 2023-01-24 06:53:22.049523: step: 422/470, loss: 0.02132769674062729 2023-01-24 06:53:22.860199: step: 424/470, loss: 3.81058816856239e-05 2023-01-24 06:53:23.623964: step: 426/470, loss: 0.005625903606414795 2023-01-24 06:53:24.353845: step: 428/470, loss: 3.352219209773466e-05 2023-01-24 06:53:25.154188: step: 430/470, loss: 0.0006965019856579602 2023-01-24 06:53:25.894258: step: 432/470, loss: 0.00841616839170456 2023-01-24 06:53:26.606549: step: 434/470, loss: 0.0019366976339370012 2023-01-24 06:53:27.515782: step: 436/470, loss: 0.0008078304235823452 2023-01-24 06:53:28.297346: step: 438/470, loss: 0.008926390670239925 2023-01-24 06:53:29.100834: step: 440/470, loss: 0.012859417125582695 2023-01-24 06:53:29.893095: step: 442/470, loss: 0.00021163160272408277 2023-01-24 06:53:30.607268: step: 444/470, loss: 0.0004545637348201126 2023-01-24 06:53:31.301122: step: 446/470, loss: 4.504845492192544e-06 2023-01-24 06:53:32.136222: step: 448/470, loss: 0.04429243505001068 2023-01-24 06:53:32.853324: step: 450/470, loss: 0.013035489246249199 2023-01-24 06:53:33.661844: step: 452/470, loss: 0.01099762599915266 2023-01-24 06:53:34.455123: step: 454/470, loss: 0.0008165753679350019 2023-01-24 06:53:35.164990: step: 456/470, loss: 0.00030910957138985395 2023-01-24 06:53:35.997630: step: 458/470, loss: 0.004646346438676119 2023-01-24 06:53:36.808013: step: 460/470, loss: 1.028315782546997 2023-01-24 06:53:37.538477: step: 462/470, loss: 0.0014696192229166627 2023-01-24 06:53:38.333370: step: 464/470, loss: 0.05205130949616432 2023-01-24 06:53:39.109646: step: 466/470, loss: 2.820938971126452e-05 2023-01-24 06:53:39.926042: step: 468/470, loss: 0.00012028154014842585 2023-01-24 06:53:40.567248: step: 470/470, loss: 6.529298116220161e-05 2023-01-24 06:53:41.323271: step: 472/470, loss: 0.0006013525417074561 2023-01-24 06:53:42.061618: step: 474/470, loss: 3.899422154063359e-05 2023-01-24 06:53:42.939332: step: 476/470, loss: 0.0051171015948057175 2023-01-24 06:53:43.695039: step: 478/470, loss: 0.0003485481138341129 2023-01-24 06:53:44.416631: step: 480/470, loss: 0.0009719117661006749 2023-01-24 06:53:45.167627: step: 482/470, loss: 0.0005722529022023082 2023-01-24 06:53:45.904114: step: 484/470, loss: 0.0002770486462395638 2023-01-24 06:53:46.619211: step: 486/470, loss: 0.004022694192826748 2023-01-24 06:53:47.364975: step: 488/470, loss: 0.007053688168525696 2023-01-24 06:53:48.087678: step: 490/470, loss: 0.05090930685400963 2023-01-24 06:53:48.798924: step: 492/470, loss: 0.0005236592842265964 2023-01-24 06:53:49.629495: step: 494/470, loss: 0.03422538563609123 2023-01-24 06:53:50.395164: step: 496/470, loss: 0.011507346294820309 2023-01-24 06:53:51.138621: step: 498/470, loss: 0.14850161969661713 2023-01-24 06:53:51.821795: step: 500/470, loss: 0.02185884863138199 2023-01-24 06:53:52.550687: step: 502/470, loss: 0.39024344086647034 2023-01-24 06:53:53.299422: step: 504/470, loss: 5.338906339602545e-05 2023-01-24 06:53:53.989071: step: 506/470, loss: 0.008324535563588142 2023-01-24 06:53:54.707149: step: 508/470, loss: 0.9969122409820557 2023-01-24 06:53:55.460417: step: 510/470, loss: 0.005451219622045755 2023-01-24 06:53:56.127638: step: 512/470, loss: 0.00021868124895263463 2023-01-24 06:53:56.869423: step: 514/470, loss: 0.009522140957415104 2023-01-24 06:53:57.654342: step: 516/470, loss: 0.053997401148080826 2023-01-24 06:53:58.411349: step: 518/470, loss: 0.07220856100320816 2023-01-24 06:53:59.184880: step: 520/470, loss: 0.19266104698181152 2023-01-24 06:53:59.882366: step: 522/470, loss: 0.0018507946515455842 2023-01-24 06:54:00.547790: step: 524/470, loss: 0.01633111573755741 2023-01-24 06:54:01.423351: step: 526/470, loss: 0.5133503079414368 2023-01-24 06:54:02.124761: step: 528/470, loss: 0.2512091100215912 2023-01-24 06:54:02.864978: step: 530/470, loss: 0.00509637501090765 2023-01-24 06:54:03.820721: step: 532/470, loss: 0.05830131098628044 2023-01-24 06:54:04.470758: step: 534/470, loss: 0.002167430240660906 2023-01-24 06:54:05.155654: step: 536/470, loss: 0.00017176716937683523 2023-01-24 06:54:05.944114: step: 538/470, loss: 0.018888715654611588 2023-01-24 06:54:06.678002: step: 540/470, loss: 0.0069089666940271854 2023-01-24 06:54:07.517521: step: 542/470, loss: 0.002944743959233165 2023-01-24 06:54:08.256659: step: 544/470, loss: 0.012334275059401989 2023-01-24 06:54:08.989572: step: 546/470, loss: 0.03784177079796791 2023-01-24 06:54:09.791899: step: 548/470, loss: 0.005058033857494593 2023-01-24 06:54:10.634501: step: 550/470, loss: 0.007854700088500977 2023-01-24 06:54:11.295764: step: 552/470, loss: 3.625124918471556e-06 2023-01-24 06:54:12.078864: step: 554/470, loss: 0.011495105922222137 2023-01-24 06:54:12.765820: step: 556/470, loss: 0.0006342419073916972 2023-01-24 06:54:13.528162: step: 558/470, loss: 0.0004920351784676313 2023-01-24 06:54:14.328924: step: 560/470, loss: 0.0354156494140625 2023-01-24 06:54:15.207136: step: 562/470, loss: 0.03654221445322037 2023-01-24 06:54:15.930263: step: 564/470, loss: 0.005547667853534222 2023-01-24 06:54:16.595529: step: 566/470, loss: 0.005937586072832346 2023-01-24 06:54:17.308547: step: 568/470, loss: 0.0038067607674747705 2023-01-24 06:54:18.038127: step: 570/470, loss: 0.0002508886100258678 2023-01-24 06:54:18.801805: step: 572/470, loss: 0.021944653242826462 2023-01-24 06:54:19.536743: step: 574/470, loss: 3.4421158488839865e-05 2023-01-24 06:54:20.185686: step: 576/470, loss: 0.005076797213405371 2023-01-24 06:54:20.957359: step: 578/470, loss: 0.026614626869559288 2023-01-24 06:54:21.689279: step: 580/470, loss: 0.0064454590901732445 2023-01-24 06:54:22.465640: step: 582/470, loss: 0.009054692462086678 2023-01-24 06:54:23.254214: step: 584/470, loss: 0.0006180154159665108 2023-01-24 06:54:23.958883: step: 586/470, loss: 0.03550654277205467 2023-01-24 06:54:24.789135: step: 588/470, loss: 0.006163349840790033 2023-01-24 06:54:25.529109: step: 590/470, loss: 0.019109375774860382 2023-01-24 06:54:26.235130: step: 592/470, loss: 0.03721586987376213 2023-01-24 06:54:26.908414: step: 594/470, loss: 3.0975337722338736e-05 2023-01-24 06:54:27.728066: step: 596/470, loss: 0.0007861484191380441 2023-01-24 06:54:28.456408: step: 598/470, loss: 0.009015318937599659 2023-01-24 06:54:29.219538: step: 600/470, loss: 0.0021047855261713266 2023-01-24 06:54:29.939150: step: 602/470, loss: 4.80856433568988e-05 2023-01-24 06:54:30.695313: step: 604/470, loss: 0.00718892365694046 2023-01-24 06:54:31.404787: step: 606/470, loss: 0.05739133059978485 2023-01-24 06:54:32.153494: step: 608/470, loss: 0.00620870478451252 2023-01-24 06:54:32.889212: step: 610/470, loss: 0.01584588550031185 2023-01-24 06:54:33.654452: step: 612/470, loss: 0.028577158227562904 2023-01-24 06:54:34.370152: step: 614/470, loss: 0.00660181138664484 2023-01-24 06:54:35.070157: step: 616/470, loss: 0.01830855756998062 2023-01-24 06:54:35.755308: step: 618/470, loss: 0.010931288823485374 2023-01-24 06:54:36.548592: step: 620/470, loss: 0.20352265238761902 2023-01-24 06:54:37.324421: step: 622/470, loss: 0.01606188900768757 2023-01-24 06:54:38.099239: step: 624/470, loss: 0.0026829990092664957 2023-01-24 06:54:38.858032: step: 626/470, loss: 0.0006494335830211639 2023-01-24 06:54:39.576471: step: 628/470, loss: 0.019799262285232544 2023-01-24 06:54:40.403358: step: 630/470, loss: 0.06131910905241966 2023-01-24 06:54:41.038706: step: 632/470, loss: 6.267506978474557e-05 2023-01-24 06:54:41.794143: step: 634/470, loss: 0.012804257683455944 2023-01-24 06:54:42.505817: step: 636/470, loss: 0.0005007492727600038 2023-01-24 06:54:43.265317: step: 638/470, loss: 0.001479864353314042 2023-01-24 06:54:43.916678: step: 640/470, loss: 0.0061265756376087666 2023-01-24 06:54:44.580335: step: 642/470, loss: 0.0012702817330136895 2023-01-24 06:54:45.382561: step: 644/470, loss: 0.00014075188664719462 2023-01-24 06:54:46.090274: step: 646/470, loss: 0.0016734458040446043 2023-01-24 06:54:46.847398: step: 648/470, loss: 0.010227406397461891 2023-01-24 06:54:47.519060: step: 650/470, loss: 0.029626229777932167 2023-01-24 06:54:48.345440: step: 652/470, loss: 0.022710563614964485 2023-01-24 06:54:49.193597: step: 654/470, loss: 0.009011475369334221 2023-01-24 06:54:49.959388: step: 656/470, loss: 0.004524306394159794 2023-01-24 06:54:50.711748: step: 658/470, loss: 0.007571091875433922 2023-01-24 06:54:51.461467: step: 660/470, loss: 0.032529208809137344 2023-01-24 06:54:52.202436: step: 662/470, loss: 0.01610148325562477 2023-01-24 06:54:52.921669: step: 664/470, loss: 0.02389807626605034 2023-01-24 06:54:53.729256: step: 666/470, loss: 0.02644765004515648 2023-01-24 06:54:54.448680: step: 668/470, loss: 0.008968241512775421 2023-01-24 06:54:55.214224: step: 670/470, loss: 0.005838526878505945 2023-01-24 06:54:56.039952: step: 672/470, loss: 0.000868885894306004 2023-01-24 06:54:56.832547: step: 674/470, loss: 0.015672659501433372 2023-01-24 06:54:57.663303: step: 676/470, loss: 0.005507215391844511 2023-01-24 06:54:58.388060: step: 678/470, loss: 0.04494674503803253 2023-01-24 06:54:59.154652: step: 680/470, loss: 0.027755441144108772 2023-01-24 06:54:59.913135: step: 682/470, loss: 0.003982035908848047 2023-01-24 06:55:00.629249: step: 684/470, loss: 0.0009073261171579361 2023-01-24 06:55:01.408854: step: 686/470, loss: 0.004845160525292158 2023-01-24 06:55:02.115537: step: 688/470, loss: 0.0002821074740495533 2023-01-24 06:55:02.995436: step: 690/470, loss: 0.007127921562641859 2023-01-24 06:55:03.715776: step: 692/470, loss: 0.0001395836443407461 2023-01-24 06:55:04.395376: step: 694/470, loss: 0.0343923382461071 2023-01-24 06:55:05.117120: step: 696/470, loss: 0.06720244139432907 2023-01-24 06:55:05.855008: step: 698/470, loss: 0.02562553994357586 2023-01-24 06:55:06.610347: step: 700/470, loss: 0.07589274644851685 2023-01-24 06:55:07.407822: step: 702/470, loss: 0.03103337623178959 2023-01-24 06:55:08.091708: step: 704/470, loss: 0.1309605836868286 2023-01-24 06:55:08.838731: step: 706/470, loss: 0.04467151314020157 2023-01-24 06:55:09.569813: step: 708/470, loss: 0.0040990193374454975 2023-01-24 06:55:10.261171: step: 710/470, loss: 0.004827653989195824 2023-01-24 06:55:11.016255: step: 712/470, loss: 0.18793350458145142 2023-01-24 06:55:11.779004: step: 714/470, loss: 0.0711977481842041 2023-01-24 06:55:12.481034: step: 716/470, loss: 0.0011849101865664124 2023-01-24 06:55:13.177140: step: 718/470, loss: 0.038444213569164276 2023-01-24 06:55:13.921608: step: 720/470, loss: 0.01529417559504509 2023-01-24 06:55:14.685691: step: 722/470, loss: 0.02028188854455948 2023-01-24 06:55:15.441766: step: 724/470, loss: 0.0011991052888333797 2023-01-24 06:55:16.271344: step: 726/470, loss: 0.007202590350061655 2023-01-24 06:55:17.056005: step: 728/470, loss: 0.03086160123348236 2023-01-24 06:55:17.788901: step: 730/470, loss: 0.005256262607872486 2023-01-24 06:55:18.581847: step: 732/470, loss: 0.07266030460596085 2023-01-24 06:55:19.407619: step: 734/470, loss: 0.003284410573542118 2023-01-24 06:55:20.168006: step: 736/470, loss: 0.018563855439424515 2023-01-24 06:55:20.877913: step: 738/470, loss: 2.060541373793967e-05 2023-01-24 06:55:21.724086: step: 740/470, loss: 0.004683589097112417 2023-01-24 06:55:22.352346: step: 742/470, loss: 0.00043677486246451735 2023-01-24 06:55:22.998034: step: 744/470, loss: 0.0007822015904821455 2023-01-24 06:55:23.763048: step: 746/470, loss: 0.0006057433784008026 2023-01-24 06:55:24.525715: step: 748/470, loss: 0.053732726722955704 2023-01-24 06:55:25.194076: step: 750/470, loss: 0.009779625572264194 2023-01-24 06:55:25.941699: step: 752/470, loss: 0.0024724539835006 2023-01-24 06:55:26.662564: step: 754/470, loss: 0.8649208545684814 2023-01-24 06:55:27.425030: step: 756/470, loss: 0.005285963881760836 2023-01-24 06:55:28.192359: step: 758/470, loss: 0.01545005477964878 2023-01-24 06:55:28.883221: step: 760/470, loss: 0.011997690424323082 2023-01-24 06:55:29.657969: step: 762/470, loss: 0.0027697875630110502 2023-01-24 06:55:30.381505: step: 764/470, loss: 0.000867619295604527 2023-01-24 06:55:31.100109: step: 766/470, loss: 0.0008559745037928224 2023-01-24 06:55:31.803206: step: 768/470, loss: 0.002753552980720997 2023-01-24 06:55:32.589640: step: 770/470, loss: 0.08743966370820999 2023-01-24 06:55:33.307049: step: 772/470, loss: 0.8679112792015076 2023-01-24 06:55:34.094928: step: 774/470, loss: 0.0008498340612277389 2023-01-24 06:55:34.833556: step: 776/470, loss: 0.012493046931922436 2023-01-24 06:55:35.537511: step: 778/470, loss: 0.0022574099712073803 2023-01-24 06:55:36.316748: step: 780/470, loss: 0.09348601847887039 2023-01-24 06:55:37.097132: step: 782/470, loss: 0.09230636805295944 2023-01-24 06:55:37.792980: step: 784/470, loss: 0.0021880273707211018 2023-01-24 06:55:38.590973: step: 786/470, loss: 0.003932863939553499 2023-01-24 06:55:39.373395: step: 788/470, loss: 0.009727765806019306 2023-01-24 06:55:40.181713: step: 790/470, loss: 0.002854890888556838 2023-01-24 06:55:41.076220: step: 792/470, loss: 0.028947800397872925 2023-01-24 06:55:41.878362: step: 794/470, loss: 0.004353455267846584 2023-01-24 06:55:42.606413: step: 796/470, loss: 0.016604196280241013 2023-01-24 06:55:43.341626: step: 798/470, loss: 0.0022141074296087027 2023-01-24 06:55:44.085342: step: 800/470, loss: 0.0014762524515390396 2023-01-24 06:55:44.850463: step: 802/470, loss: 0.008055765181779861 2023-01-24 06:55:45.569935: step: 804/470, loss: 0.0005244429339654744 2023-01-24 06:55:46.297487: step: 806/470, loss: 0.01423501968383789 2023-01-24 06:55:46.949782: step: 808/470, loss: 0.0004736521514132619 2023-01-24 06:55:47.683212: step: 810/470, loss: 0.0029225496109575033 2023-01-24 06:55:48.431429: step: 812/470, loss: 0.006569644436240196 2023-01-24 06:55:49.230031: step: 814/470, loss: 0.003673528553918004 2023-01-24 06:55:50.007039: step: 816/470, loss: 0.0015710997395217419 2023-01-24 06:55:50.837578: step: 818/470, loss: 0.006185244768857956 2023-01-24 06:55:51.638752: step: 820/470, loss: 0.014171771705150604 2023-01-24 06:55:52.348922: step: 822/470, loss: 0.006676795426756144 2023-01-24 06:55:53.083178: step: 824/470, loss: 0.000639638863503933 2023-01-24 06:55:53.778237: step: 826/470, loss: 0.036543309688568115 2023-01-24 06:55:54.482864: step: 828/470, loss: 0.00471093412488699 2023-01-24 06:55:55.166890: step: 830/470, loss: 0.0005620458978228271 2023-01-24 06:55:55.874616: step: 832/470, loss: 0.04118078574538231 2023-01-24 06:55:56.650141: step: 834/470, loss: 0.0009206313407048583 2023-01-24 06:55:57.468962: step: 836/470, loss: 0.006762698758393526 2023-01-24 06:55:58.213064: step: 838/470, loss: 0.01615045592188835 2023-01-24 06:55:59.006732: step: 840/470, loss: 0.00715272594243288 2023-01-24 06:55:59.762836: step: 842/470, loss: 0.028812158852815628 2023-01-24 06:56:00.544454: step: 844/470, loss: 0.0012455545365810394 2023-01-24 06:56:01.274630: step: 846/470, loss: 0.027298571541905403 2023-01-24 06:56:01.971819: step: 848/470, loss: 0.017294684424996376 2023-01-24 06:56:02.649970: step: 850/470, loss: 0.0035474118776619434 2023-01-24 06:56:03.406508: step: 852/470, loss: 0.005111002828925848 2023-01-24 06:56:04.161298: step: 854/470, loss: 0.0006961169419810176 2023-01-24 06:56:04.919734: step: 856/470, loss: 0.036905642598867416 2023-01-24 06:56:05.685738: step: 858/470, loss: 0.0012904554605484009 2023-01-24 06:56:06.448218: step: 860/470, loss: 0.058939360082149506 2023-01-24 06:56:07.206318: step: 862/470, loss: 0.056269776076078415 2023-01-24 06:56:07.917786: step: 864/470, loss: 0.0005217547295615077 2023-01-24 06:56:08.623289: step: 866/470, loss: 0.0017588756745681167 2023-01-24 06:56:09.384927: step: 868/470, loss: 0.0008544818265363574 2023-01-24 06:56:10.087958: step: 870/470, loss: 0.0433480478823185 2023-01-24 06:56:10.861049: step: 872/470, loss: 0.030004724860191345 2023-01-24 06:56:11.607680: step: 874/470, loss: 0.002005601767450571 2023-01-24 06:56:12.259959: step: 876/470, loss: 0.00046739837853237987 2023-01-24 06:56:12.958125: step: 878/470, loss: 0.00015845979214645922 2023-01-24 06:56:13.693053: step: 880/470, loss: 0.0015988233499228954 2023-01-24 06:56:14.407655: step: 882/470, loss: 0.0655452087521553 2023-01-24 06:56:15.219396: step: 884/470, loss: 0.08574583381414413 2023-01-24 06:56:16.018602: step: 886/470, loss: 1.4678356647491455 2023-01-24 06:56:16.758004: step: 888/470, loss: 0.00045837866491638124 2023-01-24 06:56:17.521522: step: 890/470, loss: 0.18071959912776947 2023-01-24 06:56:18.258802: step: 892/470, loss: 0.016763942316174507 2023-01-24 06:56:19.042896: step: 894/470, loss: 0.01770934835076332 2023-01-24 06:56:19.784181: step: 896/470, loss: 0.01703697070479393 2023-01-24 06:56:20.533376: step: 898/470, loss: 0.003155779093503952 2023-01-24 06:56:21.248848: step: 900/470, loss: 0.01867401972413063 2023-01-24 06:56:22.066436: step: 902/470, loss: 0.08417651057243347 2023-01-24 06:56:22.860637: step: 904/470, loss: 0.00018019463459495455 2023-01-24 06:56:23.620228: step: 906/470, loss: 0.016756407916545868 2023-01-24 06:56:24.400197: step: 908/470, loss: 0.02904907800257206 2023-01-24 06:56:25.094713: step: 910/470, loss: 0.0007967533310875297 2023-01-24 06:56:25.821761: step: 912/470, loss: 5.6903561926446855e-05 2023-01-24 06:56:26.571074: step: 914/470, loss: 0.0034690299071371555 2023-01-24 06:56:27.316082: step: 916/470, loss: 0.005849133711308241 2023-01-24 06:56:28.180650: step: 918/470, loss: 0.004884378984570503 2023-01-24 06:56:28.855056: step: 920/470, loss: 0.00029265874763950706 2023-01-24 06:56:29.533369: step: 922/470, loss: 0.0014200083678588271 2023-01-24 06:56:30.274845: step: 924/470, loss: 0.004363223910331726 2023-01-24 06:56:31.081061: step: 926/470, loss: 0.04603644460439682 2023-01-24 06:56:31.826276: step: 928/470, loss: 0.012013577856123447 2023-01-24 06:56:32.558839: step: 930/470, loss: 0.011867745779454708 2023-01-24 06:56:33.264412: step: 932/470, loss: 0.014655977487564087 2023-01-24 06:56:33.985086: step: 934/470, loss: 0.0016832905821502209 2023-01-24 06:56:34.723990: step: 936/470, loss: 0.003247485961765051 2023-01-24 06:56:35.451817: step: 938/470, loss: 0.015403217636048794 2023-01-24 06:56:36.185728: step: 940/470, loss: 0.00028605852276086807 2023-01-24 06:56:36.871974: step: 942/470, loss: 0.0061555225402116776 ================================================== Loss: 0.037 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32733147870649143, 'r': 0.3341638245618451, 'f1': 0.3307123672189528}, 'combined': 0.24368279689817574, 'epoch': 38} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3538344880358182, 'r': 0.3432874984885967, 'f1': 0.3484812088122406}, 'combined': 0.232320805874827, 'epoch': 38} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3274574736254414, 'r': 0.33677789507588096, 'f1': 0.3320522931805225}, 'combined': 0.2446701107645955, 'epoch': 38} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3589636014498774, 'r': 0.34653793832276625, 'f1': 0.3526413462384314}, 'combined': 0.2350942308256209, 'epoch': 38} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3200345179511846, 'r': 0.34432556295696715, 'f1': 0.3317359628488513}, 'combined': 0.24443702525704833, 'epoch': 38} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.34659418041784995, 'r': 0.3539259803882276, 'f1': 0.3502217122775991}, 'combined': 0.23348114151839933, 'epoch': 38} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2804878048780488, 'r': 0.32857142857142857, 'f1': 0.3026315789473685}, 'combined': 0.2017543859649123, 'epoch': 38} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6071428571428571, 'r': 0.3695652173913043, 'f1': 0.45945945945945943}, 'combined': 0.3063063063063063, 'epoch': 38} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5405701754385964, 'r': 0.22368421052631576, 'f1': 0.3164313222079589}, 'combined': 0.2109542148053059, 'epoch': 38} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35514455782312926, 'r': 0.3302103099304238, 'f1': 0.34222386102917074}, 'combined': 0.2521649502320205, 'epoch': 34} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36324986426135314, 'r': 0.3573121260955425, 'f1': 0.3602565304307942}, 'combined': 0.24017102028719609, 'epoch': 34} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2869318181818182, 'r': 0.3607142857142857, 'f1': 0.319620253164557}, 'combined': 0.21308016877637131, 'epoch': 34} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3245705997242647, 'r': 0.31533234736019644, 'f1': 0.31988478740870746}, 'combined': 0.2357045801958897, 'epoch': 31} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35151688133309544, 'r': 0.34577093231130446, 'f1': 0.34862023228672484}, 'combined': 0.23241348819114985, 'epoch': 31} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6153846153846154, 'r': 0.27586206896551724, 'f1': 0.380952380952381}, 'combined': 0.25396825396825395, 'epoch': 31} ****************************** Epoch: 39 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:59:14.370747: step: 2/470, loss: 0.0007795958081260324 2023-01-24 06:59:15.143979: step: 4/470, loss: 0.008938697166740894 2023-01-24 06:59:15.942951: step: 6/470, loss: 0.004093769006431103 2023-01-24 06:59:16.701806: step: 8/470, loss: 0.012828747741878033 2023-01-24 06:59:17.426626: step: 10/470, loss: 0.00028150121215730906 2023-01-24 06:59:18.107644: step: 12/470, loss: 4.195710062049329e-05 2023-01-24 06:59:18.845553: step: 14/470, loss: 0.3589991629123688 2023-01-24 06:59:19.520187: step: 16/470, loss: 0.00017055154603440315 2023-01-24 06:59:20.254213: step: 18/470, loss: 0.0029144282452762127 2023-01-24 06:59:20.944070: step: 20/470, loss: 0.007652122061699629 2023-01-24 06:59:21.663576: step: 22/470, loss: 0.03987063094973564 2023-01-24 06:59:22.399811: step: 24/470, loss: 0.032152093946933746 2023-01-24 06:59:23.092023: step: 26/470, loss: 0.20022591948509216 2023-01-24 06:59:23.858465: step: 28/470, loss: 0.17966990172863007 2023-01-24 06:59:24.584465: step: 30/470, loss: 0.001606591627933085 2023-01-24 06:59:25.340171: step: 32/470, loss: 0.012557115405797958 2023-01-24 06:59:25.996408: step: 34/470, loss: 0.003112519159913063 2023-01-24 06:59:26.819122: step: 36/470, loss: 0.08968937397003174 2023-01-24 06:59:27.600023: step: 38/470, loss: 5.763117587775923e-05 2023-01-24 06:59:28.406076: step: 40/470, loss: 0.02070966176688671 2023-01-24 06:59:29.117838: step: 42/470, loss: 0.000932761759031564 2023-01-24 06:59:29.814009: step: 44/470, loss: 0.020521540194749832 2023-01-24 06:59:30.503023: step: 46/470, loss: 0.004408894572407007 2023-01-24 06:59:31.264718: step: 48/470, loss: 0.6617658734321594 2023-01-24 06:59:31.954115: step: 50/470, loss: 0.00338426954112947 2023-01-24 06:59:32.726001: step: 52/470, loss: 0.09528716653585434 2023-01-24 06:59:33.448601: step: 54/470, loss: 0.002500073052942753 2023-01-24 06:59:34.195242: step: 56/470, loss: 0.023632550612092018 2023-01-24 06:59:34.913788: step: 58/470, loss: 0.02701537311077118 2023-01-24 06:59:35.622224: step: 60/470, loss: 0.0013945092214271426 2023-01-24 06:59:36.401868: step: 62/470, loss: 0.0031489867251366377 2023-01-24 06:59:37.081459: step: 64/470, loss: 0.0029364058282226324 2023-01-24 06:59:37.791824: step: 66/470, loss: 9.198818588629365e-05 2023-01-24 06:59:38.478715: step: 68/470, loss: 0.011578256264328957 2023-01-24 06:59:39.184434: step: 70/470, loss: 1.6287378912238637e-06 2023-01-24 06:59:39.876304: step: 72/470, loss: 0.013432585634291172 2023-01-24 06:59:40.585403: step: 74/470, loss: 4.559730768960435e-06 2023-01-24 06:59:41.389404: step: 76/470, loss: 0.01460875291377306 2023-01-24 06:59:42.084722: step: 78/470, loss: 0.007790104486048222 2023-01-24 06:59:42.890751: step: 80/470, loss: 0.001107382820919156 2023-01-24 06:59:43.734147: step: 82/470, loss: 0.15270131826400757 2023-01-24 06:59:44.456714: step: 84/470, loss: 0.0016121534863486886 2023-01-24 06:59:45.158321: step: 86/470, loss: 0.013319252990186214 2023-01-24 06:59:45.904952: step: 88/470, loss: 0.001663623726926744 2023-01-24 06:59:46.779994: step: 90/470, loss: 5.101099668536335e-05 2023-01-24 06:59:47.540477: step: 92/470, loss: 0.3214491605758667 2023-01-24 06:59:48.212248: step: 94/470, loss: 0.0008662448963150382 2023-01-24 06:59:48.990743: step: 96/470, loss: 0.002882634988054633 2023-01-24 06:59:49.763140: step: 98/470, loss: 0.041424382477998734 2023-01-24 06:59:50.511847: step: 100/470, loss: 0.00402632774785161 2023-01-24 06:59:51.284767: step: 102/470, loss: 0.018145432695746422 2023-01-24 06:59:51.991208: step: 104/470, loss: 0.0006512062391266227 2023-01-24 06:59:52.692821: step: 106/470, loss: 0.0001297965063713491 2023-01-24 06:59:53.477889: step: 108/470, loss: 0.13866417109966278 2023-01-24 06:59:54.191079: step: 110/470, loss: 0.0011843107640743256 2023-01-24 06:59:55.044920: step: 112/470, loss: 0.0004619006940629333 2023-01-24 06:59:55.799652: step: 114/470, loss: 0.03009113296866417 2023-01-24 06:59:56.497379: step: 116/470, loss: 0.0007518218480981886 2023-01-24 06:59:57.187994: step: 118/470, loss: 0.00045790887088514864 2023-01-24 06:59:57.881049: step: 120/470, loss: 0.005708106327801943 2023-01-24 06:59:58.628342: step: 122/470, loss: 0.00396349374204874 2023-01-24 06:59:59.300509: step: 124/470, loss: 0.0009232903830707073 2023-01-24 07:00:00.117866: step: 126/470, loss: 0.012349043041467667 2023-01-24 07:00:00.915794: step: 128/470, loss: 0.01509046833962202 2023-01-24 07:00:01.661903: step: 130/470, loss: 0.0012966676149517298 2023-01-24 07:00:02.382436: step: 132/470, loss: 0.0010874420404434204 2023-01-24 07:00:03.125719: step: 134/470, loss: 0.001199746155180037 2023-01-24 07:00:03.748986: step: 136/470, loss: 0.04231145977973938 2023-01-24 07:00:04.470718: step: 138/470, loss: 0.0131779033690691 2023-01-24 07:00:05.172308: step: 140/470, loss: 0.7903496623039246 2023-01-24 07:00:05.971286: step: 142/470, loss: 0.00014183785242494196 2023-01-24 07:00:06.695589: step: 144/470, loss: 0.00482182577252388 2023-01-24 07:00:07.402425: step: 146/470, loss: 0.026487575843930244 2023-01-24 07:00:08.176884: step: 148/470, loss: 0.0030447212047874928 2023-01-24 07:00:08.906914: step: 150/470, loss: 6.557068263646215e-05 2023-01-24 07:00:09.622053: step: 152/470, loss: 0.0004728731291834265 2023-01-24 07:00:10.423252: step: 154/470, loss: 0.0145382359623909 2023-01-24 07:00:11.100088: step: 156/470, loss: 0.0035765781067311764 2023-01-24 07:00:11.830940: step: 158/470, loss: 0.0564517006278038 2023-01-24 07:00:12.535108: step: 160/470, loss: 0.0001131351527874358 2023-01-24 07:00:13.239555: step: 162/470, loss: 0.005984265822917223 2023-01-24 07:00:13.907868: step: 164/470, loss: 0.012996729463338852 2023-01-24 07:00:14.566207: step: 166/470, loss: 0.03707212582230568 2023-01-24 07:00:15.387699: step: 168/470, loss: 0.08814679831266403 2023-01-24 07:00:16.179906: step: 170/470, loss: 0.01129702664911747 2023-01-24 07:00:16.861723: step: 172/470, loss: 4.848052776651457e-05 2023-01-24 07:00:17.664024: step: 174/470, loss: 0.08441205322742462 2023-01-24 07:00:18.402002: step: 176/470, loss: 0.005867713131010532 2023-01-24 07:00:19.123896: step: 178/470, loss: 0.00030190395773388445 2023-01-24 07:00:19.856809: step: 180/470, loss: 0.009309839457273483 2023-01-24 07:00:20.587706: step: 182/470, loss: 0.005093792919069529 2023-01-24 07:00:21.347289: step: 184/470, loss: 0.0002611110976431519 2023-01-24 07:00:22.084614: step: 186/470, loss: 0.0002226200158474967 2023-01-24 07:00:22.850270: step: 188/470, loss: 0.015231126919388771 2023-01-24 07:00:23.511992: step: 190/470, loss: 0.045069869607686996 2023-01-24 07:00:24.184298: step: 192/470, loss: 0.15897539258003235 2023-01-24 07:00:24.893458: step: 194/470, loss: 0.003946701996028423 2023-01-24 07:00:25.573087: step: 196/470, loss: 0.0025108088739216328 2023-01-24 07:00:26.311671: step: 198/470, loss: 0.0012698525097221136 2023-01-24 07:00:27.102112: step: 200/470, loss: 0.6876391172409058 2023-01-24 07:00:27.835575: step: 202/470, loss: 0.004398363176733255 2023-01-24 07:00:28.676438: step: 204/470, loss: 0.01833234541118145 2023-01-24 07:00:29.335117: step: 206/470, loss: 0.0011467249132692814 2023-01-24 07:00:30.035518: step: 208/470, loss: 0.00026933234767057 2023-01-24 07:00:30.790834: step: 210/470, loss: 0.00012024387979181483 2023-01-24 07:00:31.478245: step: 212/470, loss: 0.024397404864430428 2023-01-24 07:00:32.216053: step: 214/470, loss: 0.0003621695504989475 2023-01-24 07:00:32.910195: step: 216/470, loss: 0.0002858054649550468 2023-01-24 07:00:33.680566: step: 218/470, loss: 1.0421897172927856 2023-01-24 07:00:34.448139: step: 220/470, loss: 0.007457996252924204 2023-01-24 07:00:35.155612: step: 222/470, loss: 0.004006984643638134 2023-01-24 07:00:35.795243: step: 224/470, loss: 0.0020408506970852613 2023-01-24 07:00:36.451273: step: 226/470, loss: 0.007179014850407839 2023-01-24 07:00:37.192317: step: 228/470, loss: 0.0010738419368863106 2023-01-24 07:00:37.924181: step: 230/470, loss: 0.0004274783132132143 2023-01-24 07:00:38.637707: step: 232/470, loss: 0.0001489683927502483 2023-01-24 07:00:39.430044: step: 234/470, loss: 0.00039236308657564223 2023-01-24 07:00:40.132693: step: 236/470, loss: 0.0004236626555211842 2023-01-24 07:00:40.923353: step: 238/470, loss: 0.003484722226858139 2023-01-24 07:00:41.637724: step: 240/470, loss: 0.3052813410758972 2023-01-24 07:00:42.327703: step: 242/470, loss: 0.0012881134171038866 2023-01-24 07:00:42.990721: step: 244/470, loss: 0.0002472828491590917 2023-01-24 07:00:43.653224: step: 246/470, loss: 0.002726994687691331 2023-01-24 07:00:44.405980: step: 248/470, loss: 0.06096314638853073 2023-01-24 07:00:45.112786: step: 250/470, loss: 0.006045083049684763 2023-01-24 07:00:45.756405: step: 252/470, loss: 0.003386293537914753 2023-01-24 07:00:46.498222: step: 254/470, loss: 0.0014597978442907333 2023-01-24 07:00:47.224417: step: 256/470, loss: 0.022637123242020607 2023-01-24 07:00:48.076905: step: 258/470, loss: 0.0008558848057873547 2023-01-24 07:00:48.844476: step: 260/470, loss: 0.0035284487530589104 2023-01-24 07:00:49.635870: step: 262/470, loss: 4.839608664042316e-06 2023-01-24 07:00:50.392152: step: 264/470, loss: 0.0002396140480414033 2023-01-24 07:00:51.116883: step: 266/470, loss: 0.0001242422586074099 2023-01-24 07:00:51.864161: step: 268/470, loss: 0.006238142028450966 2023-01-24 07:00:52.566941: step: 270/470, loss: 0.001572537119500339 2023-01-24 07:00:53.219600: step: 272/470, loss: 0.0011656886199489236 2023-01-24 07:00:54.038849: step: 274/470, loss: 0.047970063984394073 2023-01-24 07:00:54.828745: step: 276/470, loss: 0.017633995041251183 2023-01-24 07:00:55.572672: step: 278/470, loss: 0.00019742750737350434 2023-01-24 07:00:56.286763: step: 280/470, loss: 0.12787386775016785 2023-01-24 07:00:57.050348: step: 282/470, loss: 0.009434567764401436 2023-01-24 07:00:57.770329: step: 284/470, loss: 0.013597944751381874 2023-01-24 07:00:58.510599: step: 286/470, loss: 0.00370029010809958 2023-01-24 07:00:59.216183: step: 288/470, loss: 0.04034204035997391 2023-01-24 07:00:59.970330: step: 290/470, loss: 0.0010682768188416958 2023-01-24 07:01:00.716600: step: 292/470, loss: 0.003589589847251773 2023-01-24 07:01:01.484801: step: 294/470, loss: 0.0023420064244419336 2023-01-24 07:01:02.230354: step: 296/470, loss: 0.2466408759355545 2023-01-24 07:01:03.030835: step: 298/470, loss: 0.028866639360785484 2023-01-24 07:01:03.765484: step: 300/470, loss: 0.002564261667430401 2023-01-24 07:01:04.512199: step: 302/470, loss: 0.02908160910010338 2023-01-24 07:01:05.307131: step: 304/470, loss: 0.01281247939914465 2023-01-24 07:01:06.001587: step: 306/470, loss: 0.0089055011048913 2023-01-24 07:01:06.735756: step: 308/470, loss: 0.0027301160153001547 2023-01-24 07:01:07.457280: step: 310/470, loss: 0.0009317622752860188 2023-01-24 07:01:08.141741: step: 312/470, loss: 0.0025892360135912895 2023-01-24 07:01:08.917282: step: 314/470, loss: 0.00038833668804727495 2023-01-24 07:01:09.656860: step: 316/470, loss: 0.011556989513337612 2023-01-24 07:01:10.356064: step: 318/470, loss: 0.0006725000566802919 2023-01-24 07:01:11.081884: step: 320/470, loss: 0.005721176974475384 2023-01-24 07:01:11.794881: step: 322/470, loss: 0.000432559143519029 2023-01-24 07:01:12.578315: step: 324/470, loss: 0.026360701769590378 2023-01-24 07:01:13.386652: step: 326/470, loss: 0.002206456381827593 2023-01-24 07:01:14.090078: step: 328/470, loss: 0.0021658348850905895 2023-01-24 07:01:14.808762: step: 330/470, loss: 0.0009932523826137185 2023-01-24 07:01:15.538520: step: 332/470, loss: 0.01891978457570076 2023-01-24 07:01:16.296644: step: 334/470, loss: 0.0009264256223104894 2023-01-24 07:01:17.084286: step: 336/470, loss: 0.03548232465982437 2023-01-24 07:01:17.796023: step: 338/470, loss: 0.0002263460773974657 2023-01-24 07:01:18.429892: step: 340/470, loss: 0.0013739075511693954 2023-01-24 07:01:19.184633: step: 342/470, loss: 0.29935917258262634 2023-01-24 07:01:19.975679: step: 344/470, loss: 0.007482586428523064 2023-01-24 07:01:20.621107: step: 346/470, loss: 0.012100168503820896 2023-01-24 07:01:21.334639: step: 348/470, loss: 0.00046900202869437635 2023-01-24 07:01:22.104457: step: 350/470, loss: 0.0212935209274292 2023-01-24 07:01:22.848165: step: 352/470, loss: 0.000681842677295208 2023-01-24 07:01:23.616566: step: 354/470, loss: 0.006671892944723368 2023-01-24 07:01:24.409252: step: 356/470, loss: 0.024509701877832413 2023-01-24 07:01:25.115085: step: 358/470, loss: 0.0007479641353711486 2023-01-24 07:01:25.858160: step: 360/470, loss: 0.06951668113470078 2023-01-24 07:01:26.650314: step: 362/470, loss: 0.02821964956820011 2023-01-24 07:01:27.354853: step: 364/470, loss: 0.005078599322587252 2023-01-24 07:01:28.130667: step: 366/470, loss: 0.021827857941389084 2023-01-24 07:01:28.891066: step: 368/470, loss: 0.0009424221352674067 2023-01-24 07:01:29.606471: step: 370/470, loss: 0.38306307792663574 2023-01-24 07:01:30.353041: step: 372/470, loss: 0.0023595362436026335 2023-01-24 07:01:31.092262: step: 374/470, loss: 0.01760093681514263 2023-01-24 07:01:31.773301: step: 376/470, loss: 0.0019117107149213552 2023-01-24 07:01:32.474391: step: 378/470, loss: 0.010603736154735088 2023-01-24 07:01:33.185275: step: 380/470, loss: 0.0070642814971506596 2023-01-24 07:01:33.951265: step: 382/470, loss: 0.0003894304682034999 2023-01-24 07:01:34.683989: step: 384/470, loss: 0.003691543824970722 2023-01-24 07:01:35.415299: step: 386/470, loss: 0.07347030937671661 2023-01-24 07:01:36.181682: step: 388/470, loss: 0.0005422882386483252 2023-01-24 07:01:36.881758: step: 390/470, loss: 0.10034418106079102 2023-01-24 07:01:37.516214: step: 392/470, loss: 0.0036059198901057243 2023-01-24 07:01:38.311572: step: 394/470, loss: 0.000504388939589262 2023-01-24 07:01:38.974763: step: 396/470, loss: 0.01450659055262804 2023-01-24 07:01:39.670272: step: 398/470, loss: 0.0029942444525659084 2023-01-24 07:01:40.349765: step: 400/470, loss: 0.0008541368297301233 2023-01-24 07:01:41.058018: step: 402/470, loss: 0.017634334042668343 2023-01-24 07:01:41.741443: step: 404/470, loss: 0.0022887280210852623 2023-01-24 07:01:42.486657: step: 406/470, loss: 0.03916192799806595 2023-01-24 07:01:43.204755: step: 408/470, loss: 0.16810278594493866 2023-01-24 07:01:43.928928: step: 410/470, loss: 0.0020672930404543877 2023-01-24 07:01:44.684859: step: 412/470, loss: 0.01781904324889183 2023-01-24 07:01:45.349734: step: 414/470, loss: 0.001842794707044959 2023-01-24 07:01:46.086797: step: 416/470, loss: 0.021762700751423836 2023-01-24 07:01:46.804036: step: 418/470, loss: 0.013885277323424816 2023-01-24 07:01:47.446017: step: 420/470, loss: 0.0006842563161626458 2023-01-24 07:01:48.148679: step: 422/470, loss: 0.0057321698404848576 2023-01-24 07:01:48.814626: step: 424/470, loss: 0.009932457469403744 2023-01-24 07:01:49.632606: step: 426/470, loss: 0.008049868047237396 2023-01-24 07:01:50.333645: step: 428/470, loss: 0.005516226403415203 2023-01-24 07:01:51.102407: step: 430/470, loss: 0.0049128164537250996 2023-01-24 07:01:51.923468: step: 432/470, loss: 0.1927299201488495 2023-01-24 07:01:52.626527: step: 434/470, loss: 0.007491968106478453 2023-01-24 07:01:53.336714: step: 436/470, loss: 0.009216003119945526 2023-01-24 07:01:54.009159: step: 438/470, loss: 0.00041842067730613053 2023-01-24 07:01:54.717864: step: 440/470, loss: 0.0009216439793817699 2023-01-24 07:01:55.442717: step: 442/470, loss: 0.14297710359096527 2023-01-24 07:01:56.203233: step: 444/470, loss: 0.04162221401929855 2023-01-24 07:01:56.960733: step: 446/470, loss: 0.01259060762822628 2023-01-24 07:01:57.726531: step: 448/470, loss: 0.02452508918941021 2023-01-24 07:01:58.444272: step: 450/470, loss: 0.008215454407036304 2023-01-24 07:01:59.186681: step: 452/470, loss: 9.857612894847989e-05 2023-01-24 07:01:59.907894: step: 454/470, loss: 0.004926904104650021 2023-01-24 07:02:00.605010: step: 456/470, loss: 0.00019339239224791527 2023-01-24 07:02:01.263705: step: 458/470, loss: 0.0010940470965579152 2023-01-24 07:02:02.038943: step: 460/470, loss: 0.0007743262685835361 2023-01-24 07:02:02.874146: step: 462/470, loss: 0.00038364637293852866 2023-01-24 07:02:03.663007: step: 464/470, loss: 0.001179278246127069 2023-01-24 07:02:04.424616: step: 466/470, loss: 0.007702074479311705 2023-01-24 07:02:05.197412: step: 468/470, loss: 0.00043093261774629354 2023-01-24 07:02:05.930109: step: 470/470, loss: 0.05112699419260025 2023-01-24 07:02:06.722936: step: 472/470, loss: 0.015505004674196243 2023-01-24 07:02:07.454118: step: 474/470, loss: 0.0007698743138462305 2023-01-24 07:02:08.227797: step: 476/470, loss: 0.15226824581623077 2023-01-24 07:02:08.933755: step: 478/470, loss: 0.00011416709457989782 2023-01-24 07:02:09.622375: step: 480/470, loss: 0.009174869395792484 2023-01-24 07:02:10.389621: step: 482/470, loss: 0.06859564036130905 2023-01-24 07:02:11.192115: step: 484/470, loss: 0.0012576787266880274 2023-01-24 07:02:11.896763: step: 486/470, loss: 0.0073244026862084866 2023-01-24 07:02:12.653792: step: 488/470, loss: 0.0008550825295969844 2023-01-24 07:02:13.383070: step: 490/470, loss: 0.05312467738986015 2023-01-24 07:02:14.104100: step: 492/470, loss: 0.022805843502283096 2023-01-24 07:02:14.862264: step: 494/470, loss: 0.005543670151382685 2023-01-24 07:02:15.568458: step: 496/470, loss: 0.0007484982488676906 2023-01-24 07:02:16.270328: step: 498/470, loss: 0.06718970835208893 2023-01-24 07:02:17.016516: step: 500/470, loss: 0.013428159058094025 2023-01-24 07:02:17.755375: step: 502/470, loss: 0.027854073792696 2023-01-24 07:02:18.481920: step: 504/470, loss: 0.010355038568377495 2023-01-24 07:02:19.281986: step: 506/470, loss: 0.02420848049223423 2023-01-24 07:02:19.992450: step: 508/470, loss: 2.1866453607799485e-05 2023-01-24 07:02:20.716867: step: 510/470, loss: 0.010722950100898743 2023-01-24 07:02:21.424940: step: 512/470, loss: 0.0001193592615891248 2023-01-24 07:02:22.191283: step: 514/470, loss: 0.4225482940673828 2023-01-24 07:02:22.960091: step: 516/470, loss: 0.015295075252652168 2023-01-24 07:02:23.729621: step: 518/470, loss: 0.32571691274642944 2023-01-24 07:02:24.460390: step: 520/470, loss: 0.0004058307677041739 2023-01-24 07:02:25.230901: step: 522/470, loss: 0.019950976595282555 2023-01-24 07:02:25.992563: step: 524/470, loss: 0.014127479866147041 2023-01-24 07:02:26.783295: step: 526/470, loss: 0.000844079302623868 2023-01-24 07:02:27.455762: step: 528/470, loss: 0.0007464765221811831 2023-01-24 07:02:28.222263: step: 530/470, loss: 0.0009180636261589825 2023-01-24 07:02:28.943996: step: 532/470, loss: 0.0004960569203831255 2023-01-24 07:02:29.645962: step: 534/470, loss: 0.0015324270352721214 2023-01-24 07:02:30.487349: step: 536/470, loss: 0.74675053358078 2023-01-24 07:02:31.144679: step: 538/470, loss: 0.0016997962957248092 2023-01-24 07:02:31.897225: step: 540/470, loss: 0.03810073807835579 2023-01-24 07:02:32.618932: step: 542/470, loss: 0.02312638983130455 2023-01-24 07:02:33.428838: step: 544/470, loss: 0.20076841115951538 2023-01-24 07:02:34.161873: step: 546/470, loss: 0.044742703437805176 2023-01-24 07:02:34.885299: step: 548/470, loss: 0.033179204910993576 2023-01-24 07:02:35.648244: step: 550/470, loss: 0.048377711325883865 2023-01-24 07:02:36.447988: step: 552/470, loss: 0.006665708031505346 2023-01-24 07:02:37.240192: step: 554/470, loss: 0.011248448863625526 2023-01-24 07:02:37.954231: step: 556/470, loss: 0.010977053083479404 2023-01-24 07:02:38.727420: step: 558/470, loss: 0.007646726910024881 2023-01-24 07:02:39.466546: step: 560/470, loss: 0.0020836268085986376 2023-01-24 07:02:40.160605: step: 562/470, loss: 0.00017479869711678475 2023-01-24 07:02:40.918318: step: 564/470, loss: 0.002762383548542857 2023-01-24 07:02:41.650857: step: 566/470, loss: 0.0010429834946990013 2023-01-24 07:02:42.376725: step: 568/470, loss: 0.02210627682507038 2023-01-24 07:02:43.119795: step: 570/470, loss: 3.689844015752897e-05 2023-01-24 07:02:43.830605: step: 572/470, loss: 0.06625575572252274 2023-01-24 07:02:44.504560: step: 574/470, loss: 0.006409522611647844 2023-01-24 07:02:45.350358: step: 576/470, loss: 0.0005805494729429483 2023-01-24 07:02:46.129782: step: 578/470, loss: 0.012947368435561657 2023-01-24 07:02:46.897942: step: 580/470, loss: 0.00042535990360192955 2023-01-24 07:02:47.577117: step: 582/470, loss: 0.00019787903875112534 2023-01-24 07:02:48.267017: step: 584/470, loss: 0.14290878176689148 2023-01-24 07:02:49.093216: step: 586/470, loss: 0.013853715732693672 2023-01-24 07:02:49.837113: step: 588/470, loss: 0.030745120719075203 2023-01-24 07:02:50.518258: step: 590/470, loss: 0.0006633042357861996 2023-01-24 07:02:51.252418: step: 592/470, loss: 0.022054284811019897 2023-01-24 07:02:52.078981: step: 594/470, loss: 0.0035873970482498407 2023-01-24 07:02:52.870132: step: 596/470, loss: 0.001877595204859972 2023-01-24 07:02:53.634781: step: 598/470, loss: 0.021728595718741417 2023-01-24 07:02:54.405182: step: 600/470, loss: 0.0009449566714465618 2023-01-24 07:02:55.156900: step: 602/470, loss: 0.012191184796392918 2023-01-24 07:02:55.864344: step: 604/470, loss: 0.21409592032432556 2023-01-24 07:02:56.532418: step: 606/470, loss: 0.0021795283537358046 2023-01-24 07:02:57.313861: step: 608/470, loss: 0.08947306126356125 2023-01-24 07:02:58.051375: step: 610/470, loss: 0.027726903557777405 2023-01-24 07:02:58.805586: step: 612/470, loss: 0.004544347990304232 2023-01-24 07:02:59.524379: step: 614/470, loss: 0.006356291007250547 2023-01-24 07:03:00.333580: step: 616/470, loss: 0.0020529376342892647 2023-01-24 07:03:01.111763: step: 618/470, loss: 0.0441039502620697 2023-01-24 07:03:01.802830: step: 620/470, loss: 0.05852342024445534 2023-01-24 07:03:02.523030: step: 622/470, loss: 0.0003269554581493139 2023-01-24 07:03:03.257605: step: 624/470, loss: 0.015510090626776218 2023-01-24 07:03:03.951393: step: 626/470, loss: 0.00037635324406437576 2023-01-24 07:03:04.698370: step: 628/470, loss: 0.002281700260937214 2023-01-24 07:03:05.400055: step: 630/470, loss: 0.010925859212875366 2023-01-24 07:03:06.149247: step: 632/470, loss: 0.003865706268697977 2023-01-24 07:03:06.888382: step: 634/470, loss: 0.0007639245595782995 2023-01-24 07:03:07.669644: step: 636/470, loss: 0.04122069478034973 2023-01-24 07:03:08.387091: step: 638/470, loss: 0.013298324309289455 2023-01-24 07:03:09.088767: step: 640/470, loss: 0.007208535913378 2023-01-24 07:03:09.888936: step: 642/470, loss: 0.5793889760971069 2023-01-24 07:03:10.661061: step: 644/470, loss: 0.010782578960061073 2023-01-24 07:03:11.368425: step: 646/470, loss: 0.003287682542577386 2023-01-24 07:03:12.072204: step: 648/470, loss: 0.005844367202371359 2023-01-24 07:03:12.763894: step: 650/470, loss: 0.00159078452270478 2023-01-24 07:03:13.459366: step: 652/470, loss: 0.007373278960585594 2023-01-24 07:03:14.113541: step: 654/470, loss: 0.0001302184391533956 2023-01-24 07:03:14.827462: step: 656/470, loss: 0.002802535193040967 2023-01-24 07:03:15.579380: step: 658/470, loss: 0.028815504163503647 2023-01-24 07:03:16.368017: step: 660/470, loss: 0.0023840556386858225 2023-01-24 07:03:17.116378: step: 662/470, loss: 0.00024311590823344886 2023-01-24 07:03:17.808032: step: 664/470, loss: 1.6967294868663885e-05 2023-01-24 07:03:18.483008: step: 666/470, loss: 0.00038249947829172015 2023-01-24 07:03:19.280747: step: 668/470, loss: 0.0011958489194512367 2023-01-24 07:03:20.102879: step: 670/470, loss: 0.0026525375433266163 2023-01-24 07:03:20.780545: step: 672/470, loss: 0.0011103339493274689 2023-01-24 07:03:21.508989: step: 674/470, loss: 7.855349394958466e-05 2023-01-24 07:03:22.306887: step: 676/470, loss: 0.036018408834934235 2023-01-24 07:03:23.109214: step: 678/470, loss: 0.03961295261979103 2023-01-24 07:03:23.805892: step: 680/470, loss: 0.009320229291915894 2023-01-24 07:03:24.500875: step: 682/470, loss: 0.0022059644106775522 2023-01-24 07:03:25.297002: step: 684/470, loss: 0.07972223311662674 2023-01-24 07:03:26.057214: step: 686/470, loss: 0.003704060334712267 2023-01-24 07:03:26.855892: step: 688/470, loss: 0.5761838555335999 2023-01-24 07:03:27.588216: step: 690/470, loss: 0.002108166925609112 2023-01-24 07:03:28.358533: step: 692/470, loss: 0.001077714143320918 2023-01-24 07:03:29.116745: step: 694/470, loss: 0.03051694482564926 2023-01-24 07:03:29.834836: step: 696/470, loss: 2.4262013539555483e-05 2023-01-24 07:03:30.564617: step: 698/470, loss: 0.009325804188847542 2023-01-24 07:03:31.354170: step: 700/470, loss: 0.0015849667834118009 2023-01-24 07:03:32.083261: step: 702/470, loss: 0.005560775753110647 2023-01-24 07:03:32.890276: step: 704/470, loss: 0.05311994254589081 2023-01-24 07:03:33.674164: step: 706/470, loss: 0.00885532982647419 2023-01-24 07:03:34.425395: step: 708/470, loss: 0.025782722979784012 2023-01-24 07:03:35.113975: step: 710/470, loss: 0.00032764862407930195 2023-01-24 07:03:35.821590: step: 712/470, loss: 0.0006452351808547974 2023-01-24 07:03:36.587154: step: 714/470, loss: 0.018562477082014084 2023-01-24 07:03:37.360214: step: 716/470, loss: 0.004105889238417149 2023-01-24 07:03:38.104363: step: 718/470, loss: 0.004994503688067198 2023-01-24 07:03:38.827643: step: 720/470, loss: 0.015740545466542244 2023-01-24 07:03:39.572848: step: 722/470, loss: 0.002721569035202265 2023-01-24 07:03:40.342612: step: 724/470, loss: 0.0018496755510568619 2023-01-24 07:03:41.043249: step: 726/470, loss: 0.029428904876112938 2023-01-24 07:03:41.857872: step: 728/470, loss: 0.0025027492083609104 2023-01-24 07:03:42.580004: step: 730/470, loss: 0.0005519500700756907 2023-01-24 07:03:43.313818: step: 732/470, loss: 0.00124834175221622 2023-01-24 07:03:44.068814: step: 734/470, loss: 0.025663498789072037 2023-01-24 07:03:44.807770: step: 736/470, loss: 0.00022738105326425284 2023-01-24 07:03:45.685241: step: 738/470, loss: 0.04520634189248085 2023-01-24 07:03:46.422225: step: 740/470, loss: 0.00048320789937861264 2023-01-24 07:03:47.231388: step: 742/470, loss: 0.001786353881470859 2023-01-24 07:03:47.943167: step: 744/470, loss: 0.011439421214163303 2023-01-24 07:03:48.709940: step: 746/470, loss: 0.035814620554447174 2023-01-24 07:03:49.438383: step: 748/470, loss: 0.46565985679626465 2023-01-24 07:03:50.397910: step: 750/470, loss: 0.0016404861817136407 2023-01-24 07:03:51.195601: step: 752/470, loss: 0.006969146430492401 2023-01-24 07:03:51.991957: step: 754/470, loss: 0.011821585707366467 2023-01-24 07:03:52.807839: step: 756/470, loss: 0.0448044054210186 2023-01-24 07:03:53.595124: step: 758/470, loss: 0.0014806183753535151 2023-01-24 07:03:54.340256: step: 760/470, loss: 0.25201913714408875 2023-01-24 07:03:55.032369: step: 762/470, loss: 0.026176057755947113 2023-01-24 07:03:55.755160: step: 764/470, loss: 0.0018919870490208268 2023-01-24 07:03:56.501310: step: 766/470, loss: 6.11542709521018e-05 2023-01-24 07:03:57.351994: step: 768/470, loss: 0.06037821248173714 2023-01-24 07:03:58.094650: step: 770/470, loss: 0.005792189389467239 2023-01-24 07:03:58.832018: step: 772/470, loss: 0.00010853780258912593 2023-01-24 07:03:59.532727: step: 774/470, loss: 3.3160622479044832e-06 2023-01-24 07:04:00.340452: step: 776/470, loss: 0.048592712730169296 2023-01-24 07:04:01.099448: step: 778/470, loss: 0.0152989961206913 2023-01-24 07:04:01.831677: step: 780/470, loss: 0.027926130220294 2023-01-24 07:04:02.542333: step: 782/470, loss: 0.0008958295802585781 2023-01-24 07:04:03.177272: step: 784/470, loss: 0.00017820145876612514 2023-01-24 07:04:03.913079: step: 786/470, loss: 0.028859004378318787 2023-01-24 07:04:04.608985: step: 788/470, loss: 0.009325975552201271 2023-01-24 07:04:05.267541: step: 790/470, loss: 0.018157802522182465 2023-01-24 07:04:05.986652: step: 792/470, loss: 5.18089764227625e-05 2023-01-24 07:04:06.768688: step: 794/470, loss: 0.0016927659744396806 2023-01-24 07:04:07.495739: step: 796/470, loss: 0.004147370811551809 2023-01-24 07:04:08.208536: step: 798/470, loss: 0.002366115804761648 2023-01-24 07:04:08.939555: step: 800/470, loss: 0.08526662737131119 2023-01-24 07:04:09.610084: step: 802/470, loss: 0.02207419089972973 2023-01-24 07:04:10.438669: step: 804/470, loss: 0.00026135783991776407 2023-01-24 07:04:11.111784: step: 806/470, loss: 0.017718037590384483 2023-01-24 07:04:11.897726: step: 808/470, loss: 0.009163436479866505 2023-01-24 07:04:12.624914: step: 810/470, loss: 0.049685999751091 2023-01-24 07:04:13.428421: step: 812/470, loss: 0.004708106629550457 2023-01-24 07:04:14.144510: step: 814/470, loss: 0.01260793674737215 2023-01-24 07:04:14.846935: step: 816/470, loss: 0.7914705276489258 2023-01-24 07:04:15.597661: step: 818/470, loss: 0.007497282233089209 2023-01-24 07:04:16.375648: step: 820/470, loss: 0.0009469312499277294 2023-01-24 07:04:17.153179: step: 822/470, loss: 0.001793618779629469 2023-01-24 07:04:17.868156: step: 824/470, loss: 0.025681249797344208 2023-01-24 07:04:18.625866: step: 826/470, loss: 0.037340566515922546 2023-01-24 07:04:19.386173: step: 828/470, loss: 0.00744800828397274 2023-01-24 07:04:20.161235: step: 830/470, loss: 1.3749153367825784e-05 2023-01-24 07:04:20.863799: step: 832/470, loss: 0.014599119313061237 2023-01-24 07:04:21.677293: step: 834/470, loss: 0.0006064804038032889 2023-01-24 07:04:22.446373: step: 836/470, loss: 0.0008935255464166403 2023-01-24 07:04:23.211017: step: 838/470, loss: 0.003527364693582058 2023-01-24 07:04:24.050385: step: 840/470, loss: 0.0005473060882650316 2023-01-24 07:04:24.903568: step: 842/470, loss: 0.10252714157104492 2023-01-24 07:04:25.649782: step: 844/470, loss: 0.004621810279786587 2023-01-24 07:04:26.541506: step: 846/470, loss: 0.0056939744390547276 2023-01-24 07:04:27.258243: step: 848/470, loss: 0.021790560334920883 2023-01-24 07:04:28.011798: step: 850/470, loss: 0.011583208106458187 2023-01-24 07:04:28.732139: step: 852/470, loss: 0.002285032533109188 2023-01-24 07:04:29.453874: step: 854/470, loss: 0.0009343082783743739 2023-01-24 07:04:30.212498: step: 856/470, loss: 0.006179484073072672 2023-01-24 07:04:30.915304: step: 858/470, loss: 0.007022823207080364 2023-01-24 07:04:31.691762: step: 860/470, loss: 0.01619824767112732 2023-01-24 07:04:32.426393: step: 862/470, loss: 0.0002929774345830083 2023-01-24 07:04:33.159588: step: 864/470, loss: 0.01771281473338604 2023-01-24 07:04:33.852421: step: 866/470, loss: 1.254774360859301e-05 2023-01-24 07:04:34.627447: step: 868/470, loss: 0.001473193638958037 2023-01-24 07:04:35.367421: step: 870/470, loss: 0.001364637166261673 2023-01-24 07:04:36.036387: step: 872/470, loss: 0.0011659307638183236 2023-01-24 07:04:36.720945: step: 874/470, loss: 0.0024868298787623644 2023-01-24 07:04:37.334215: step: 876/470, loss: 0.0011807549744844437 2023-01-24 07:04:38.002585: step: 878/470, loss: 0.00360662373714149 2023-01-24 07:04:38.784125: step: 880/470, loss: 0.0015713890315964818 2023-01-24 07:04:39.463351: step: 882/470, loss: 6.543661584146321e-05 2023-01-24 07:04:40.190126: step: 884/470, loss: 0.004186991136521101 2023-01-24 07:04:40.917518: step: 886/470, loss: 0.0009998814202845097 2023-01-24 07:04:41.665802: step: 888/470, loss: 0.0374150276184082 2023-01-24 07:04:42.544933: step: 890/470, loss: 0.01954301819205284 2023-01-24 07:04:43.296525: step: 892/470, loss: 0.0002561210421845317 2023-01-24 07:04:44.075793: step: 894/470, loss: 0.0005132320802658796 2023-01-24 07:04:44.838274: step: 896/470, loss: 0.07816541939973831 2023-01-24 07:04:45.631666: step: 898/470, loss: 0.014139272272586823 2023-01-24 07:04:46.365836: step: 900/470, loss: 0.00017748454411048442 2023-01-24 07:04:47.194633: step: 902/470, loss: 0.0016783780883997679 2023-01-24 07:04:47.987807: step: 904/470, loss: 0.022887928411364555 2023-01-24 07:04:48.733130: step: 906/470, loss: 0.0025164291728287935 2023-01-24 07:04:49.493136: step: 908/470, loss: 0.0015736209461465478 2023-01-24 07:04:50.214474: step: 910/470, loss: 5.445835540740518e-06 2023-01-24 07:04:51.009992: step: 912/470, loss: 0.003006444312632084 2023-01-24 07:04:51.813516: step: 914/470, loss: 0.05807241424918175 2023-01-24 07:04:52.681028: step: 916/470, loss: 0.008495689369738102 2023-01-24 07:04:53.375146: step: 918/470, loss: 0.0016545577673241496 2023-01-24 07:04:54.117171: step: 920/470, loss: 0.0005608369829133153 2023-01-24 07:04:54.865213: step: 922/470, loss: 0.1146390438079834 2023-01-24 07:04:55.623567: step: 924/470, loss: 0.00022846035426482558 2023-01-24 07:04:56.289329: step: 926/470, loss: 0.14736011624336243 2023-01-24 07:04:56.953328: step: 928/470, loss: 0.00010952012962661684 2023-01-24 07:04:57.747589: step: 930/470, loss: 0.3251962661743164 2023-01-24 07:04:58.564397: step: 932/470, loss: 0.005167566705495119 2023-01-24 07:04:59.298779: step: 934/470, loss: 0.03677722439169884 2023-01-24 07:05:00.167034: step: 936/470, loss: 0.00461982935667038 2023-01-24 07:05:00.950856: step: 938/470, loss: 7.254021329572424e-05 2023-01-24 07:05:01.707024: step: 940/470, loss: 0.014099986292421818 2023-01-24 07:05:02.352243: step: 942/470, loss: 0.00016049730766098946 ================================================== Loss: 0.037 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3249765568011896, 'r': 0.3243599029932177, 'f1': 0.32466793708912767}, 'combined': 0.2392290062761993, 'epoch': 39} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36733517555532763, 'r': 0.34932162367713365, 'f1': 0.3581020094866624}, 'combined': 0.2387346729911082, 'epoch': 39} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3288131042448772, 'r': 0.335052442086336, 'f1': 0.3319034529689832}, 'combined': 0.2445604390297771, 'epoch': 39} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3671483044937429, 'r': 0.34561364432632147, 'f1': 0.3560556613168641}, 'combined': 0.23737044087790934, 'epoch': 39} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31500826228477613, 'r': 0.3383200691711258, 'f1': 0.3262482643242146}, 'combined': 0.24039345792310549, 'epoch': 39} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.359973345430056, 'r': 0.35720431969597866, 'f1': 0.35858348695349207}, 'combined': 0.23905565796899467, 'epoch': 39} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2441860465116279, 'r': 0.3, 'f1': 0.26923076923076916}, 'combined': 0.17948717948717943, 'epoch': 39} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5769230769230769, 'r': 0.32608695652173914, 'f1': 0.41666666666666663}, 'combined': 0.27777777777777773, 'epoch': 39} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4989878542510121, 'r': 0.22368421052631576, 'f1': 0.30889724310776934}, 'combined': 0.20593149540517955, 'epoch': 39} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35514455782312926, 'r': 0.3302103099304238, 'f1': 0.34222386102917074}, 'combined': 0.2521649502320205, 'epoch': 34} Test for Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.36324986426135314, 'r': 0.3573121260955425, 'f1': 0.3602565304307942}, 'combined': 0.24017102028719609, 'epoch': 34} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2869318181818182, 'r': 0.3607142857142857, 'f1': 0.319620253164557}, 'combined': 0.21308016877637131, 'epoch': 34} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33522071999085, 'r': 0.31931840879583817, 'f1': 0.3270763876295563}, 'combined': 0.24100365404283097, 'epoch': 17} Test for Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.3666114489031648, 'r': 0.31126722055912937, 'f1': 0.3366800929604727}, 'combined': 0.22445339530698175, 'epoch': 17} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.65625, 'r': 0.45652173913043476, 'f1': 0.5384615384615383}, 'combined': 0.35897435897435886, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3245705997242647, 'r': 0.31533234736019644, 'f1': 0.31988478740870746}, 'combined': 0.2357045801958897, 'epoch': 31} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.35151688133309544, 'r': 0.34577093231130446, 'f1': 0.34862023228672484}, 'combined': 0.23241348819114985, 'epoch': 31} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6153846153846154, 'r': 0.27586206896551724, 'f1': 0.380952380952381}, 'combined': 0.25396825396825395, 'epoch': 31}