Command that produces this log: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> basic_gcn.T_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.0.bias: torch.Size([1024]) >>> basic_gcn.T_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.1.bias: torch.Size([1024]) >>> basic_gcn.T_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.2.bias: torch.Size([1024]) >>> basic_gcn.T_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.0.bias: torch.Size([1024]) >>> basic_gcn.T_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.1.bias: torch.Size([1024]) >>> basic_gcn.T_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.2.bias: torch.Size([1024]) >>> basic_gcn.E_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.0.bias: torch.Size([1024]) >>> basic_gcn.E_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.1.bias: torch.Size([1024]) >>> basic_gcn.E_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.2.bias: torch.Size([1024]) >>> basic_gcn.E_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.0.bias: torch.Size([1024]) >>> basic_gcn.E_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.1.bias: torch.Size([1024]) >>> basic_gcn.E_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.2.bias: torch.Size([1024]) >>> basic_gcn.f_t.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_t.0.bias: torch.Size([1024]) >>> basic_gcn.f_e.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_e.0.bias: torch.Size([1024]) >>> name2classifier.occupy-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.occupy-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.occupy-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.occupy-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outcome-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outcome-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outcome-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outcome-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.when-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.when-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.when-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.when-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.where-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.where-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.where-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.where-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.who-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.who-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.who-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.who-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-against-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-against-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-against-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-against-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-for-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-for-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-for-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-for-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.wounded-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.wounded-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.wounded-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.wounded-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.arrested-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.arrested-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.arrested-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.arrested-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.imprisoned-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.imprisoned-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.imprisoned-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.imprisoned-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.corrupt-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.corrupt-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.corrupt-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.corrupt-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.judicial-actions-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.judicial-actions-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.judicial-actions-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.judicial-actions-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.charged-with-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.charged-with-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.charged-with-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.charged-with-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.prison-term-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.prison-term-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.prison-term-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.prison-term-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.fine-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.fine-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.fine-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.fine-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.npi-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.npi-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.npi-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.npi-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outbreak-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outbreak-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outbreak-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outbreak-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blamed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blamed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blamed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blamed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.claimed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.claimed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.claimed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.claimed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.terror-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.terror-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.terror-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.terror-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.kidnapped-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.kidnapped-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.kidnapped-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.kidnapped-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-org-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-org-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-org-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-org-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-physical-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-physical-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-physical-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-physical-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-human-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-human-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-human-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-human-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-captured-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-captured-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-captured-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-captured-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-objective-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-objective-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-objective-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-objective-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.weapon-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.weapon-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.weapon-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.weapon-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.damage-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.damage-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.damage-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.damage-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.major-disaster-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.major-disaster-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.major-disaster-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.major-disaster-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.responders-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.responders-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.responders-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.responders-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-provided-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-provided-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-provided-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-provided-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescue-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescue-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescue-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescue-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.individuals-affected-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.individuals-affected-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.individuals-affected-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.individuals-affected-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.missing-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.missing-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.missing-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.missing-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.injured-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.injured-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.injured-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.injured-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-needed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-needed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-needed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-needed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescued-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescued-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescued-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescued-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.repair-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.repair-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.repair-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.repair-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.declare-emergency-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.declare-emergency-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.declare-emergency-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.declare-emergency-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transitory-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transitory-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transitory-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transitory-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.current-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.current-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.current-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.current-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.destination-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.destination-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.destination-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.destination-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.total-displaced-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.total-displaced-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.total-displaced-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.total-displaced-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transiting-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transiting-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transiting-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transiting-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.group-identity-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.group-identity-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.group-identity-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.group-identity-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blocked-migration-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blocked-migration-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.detained-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.detained-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.detained-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.detained-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.cybercrime-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.cybercrime-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.cybercrime-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.cybercrime-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perpetrator-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perpetrator-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perpetrator-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perpetrator-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.response-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.response-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.response-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.response-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.information-stolen-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.information-stolen-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.information-stolen-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.information-stolen-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-crimes-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-crimes-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-crimes-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-crimes-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-impact-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-impact-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-impact-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-impact-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.etip-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.etip-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.etip-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.etip-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-name-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-name-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-name-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-name-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.signatories-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.signatories-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.signatories-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.signatories-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awardee-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awardee-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awardee-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awardee-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.overall-project-value-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.overall-project-value-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.overall-project-value-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.overall-project-value-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-recipient-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-recipient-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-recipient-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-recipient-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-source-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-source-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-source-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-source-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awarder-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awarder-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awarder-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awarder-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.agreement-length-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.agreement-length-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.agreement-length-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.agreement-length-ffn.layers.1.bias: torch.Size([2]) >>> irrealis_classifier.layers.0.weight: torch.Size([350, 1127]) >>> irrealis_classifier.layers.0.bias: torch.Size([350]) >>> irrealis_classifier.layers.1.weight: torch.Size([7, 350]) >>> irrealis_classifier.layers.1.bias: torch.Size([7]) n_trainable_params: 613743345, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 09:38:51.867934: step: 2/464, loss: 25.893234252929688 2023-01-22 09:38:52.478217: step: 4/464, loss: 7.9878716468811035 2023-01-22 09:38:53.057680: step: 6/464, loss: 34.16461181640625 2023-01-22 09:38:53.737161: step: 8/464, loss: 18.327600479125977 2023-01-22 09:38:54.346733: step: 10/464, loss: 23.868892669677734 2023-01-22 09:38:54.980281: step: 12/464, loss: 7.332606315612793 2023-01-22 09:38:55.635801: step: 14/464, loss: 13.878583908081055 2023-01-22 09:38:56.253409: step: 16/464, loss: 22.151412963867188 2023-01-22 09:38:56.879896: step: 18/464, loss: 14.98178482055664 2023-01-22 09:38:57.594860: step: 20/464, loss: 5.897348403930664 2023-01-22 09:38:58.198152: step: 22/464, loss: 10.577079772949219 2023-01-22 09:38:58.814652: step: 24/464, loss: 21.333738327026367 2023-01-22 09:38:59.448423: step: 26/464, loss: 6.832368850708008 2023-01-22 09:39:00.076927: step: 28/464, loss: 8.707086563110352 2023-01-22 09:39:00.724018: step: 30/464, loss: 21.255126953125 2023-01-22 09:39:01.422053: step: 32/464, loss: 23.188125610351562 2023-01-22 09:39:02.020927: step: 34/464, loss: 11.354782104492188 2023-01-22 09:39:02.701329: step: 36/464, loss: 9.144607543945312 2023-01-22 09:39:03.337459: step: 38/464, loss: 9.938633918762207 2023-01-22 09:39:04.022295: step: 40/464, loss: 9.579240798950195 2023-01-22 09:39:04.643715: step: 42/464, loss: 7.6820969581604 2023-01-22 09:39:05.237757: step: 44/464, loss: 8.154853820800781 2023-01-22 09:39:05.869527: step: 46/464, loss: 17.464797973632812 2023-01-22 09:39:06.470954: step: 48/464, loss: 24.921092987060547 2023-01-22 09:39:07.147635: step: 50/464, loss: 10.282855987548828 2023-01-22 09:39:07.811186: step: 52/464, loss: 9.140432357788086 2023-01-22 09:39:08.442009: step: 54/464, loss: 9.383966445922852 2023-01-22 09:39:09.059004: step: 56/464, loss: 41.08988571166992 2023-01-22 09:39:09.621655: step: 58/464, loss: 16.880178451538086 2023-01-22 09:39:10.203792: step: 60/464, loss: 5.725705146789551 2023-01-22 09:39:10.757634: step: 62/464, loss: 16.076332092285156 2023-01-22 09:39:11.407157: step: 64/464, loss: 23.628440856933594 2023-01-22 09:39:12.068630: step: 66/464, loss: 24.215709686279297 2023-01-22 09:39:12.696840: step: 68/464, loss: 4.20828914642334 2023-01-22 09:39:13.323122: step: 70/464, loss: 8.635614395141602 2023-01-22 09:39:13.901257: step: 72/464, loss: 25.89919662475586 2023-01-22 09:39:14.495541: step: 74/464, loss: 6.081369400024414 2023-01-22 09:39:15.093141: step: 76/464, loss: 8.278801918029785 2023-01-22 09:39:15.713697: step: 78/464, loss: 18.827049255371094 2023-01-22 09:39:16.280162: step: 80/464, loss: 15.493717193603516 2023-01-22 09:39:16.907944: step: 82/464, loss: 14.888334274291992 2023-01-22 09:39:17.523811: step: 84/464, loss: 7.287348747253418 2023-01-22 09:39:18.133515: step: 86/464, loss: 4.9151458740234375 2023-01-22 09:39:18.821824: step: 88/464, loss: 19.12151336669922 2023-01-22 09:39:19.393326: step: 90/464, loss: 10.497282981872559 2023-01-22 09:39:19.976277: step: 92/464, loss: 21.297636032104492 2023-01-22 09:39:20.693122: step: 94/464, loss: 15.331517219543457 2023-01-22 09:39:21.328040: step: 96/464, loss: 23.956851959228516 2023-01-22 09:39:21.979782: step: 98/464, loss: 11.051862716674805 2023-01-22 09:39:22.582539: step: 100/464, loss: 10.751426696777344 2023-01-22 09:39:23.177641: step: 102/464, loss: 13.166668891906738 2023-01-22 09:39:23.729708: step: 104/464, loss: 13.671867370605469 2023-01-22 09:39:24.328180: step: 106/464, loss: 14.124359130859375 2023-01-22 09:39:24.990141: step: 108/464, loss: 25.87872314453125 2023-01-22 09:39:25.632091: step: 110/464, loss: 5.732607364654541 2023-01-22 09:39:26.244008: step: 112/464, loss: 11.07235336303711 2023-01-22 09:39:26.917840: step: 114/464, loss: 9.082109451293945 2023-01-22 09:39:27.557123: step: 116/464, loss: 12.189888000488281 2023-01-22 09:39:28.169607: step: 118/464, loss: 11.501178741455078 2023-01-22 09:39:28.810203: step: 120/464, loss: 19.088016510009766 2023-01-22 09:39:29.493668: step: 122/464, loss: 14.197904586791992 2023-01-22 09:39:30.075288: step: 124/464, loss: 4.7059149742126465 2023-01-22 09:39:30.677877: step: 126/464, loss: 7.997165679931641 2023-01-22 09:39:31.252302: step: 128/464, loss: 11.891801834106445 2023-01-22 09:39:31.863681: step: 130/464, loss: 6.820798397064209 2023-01-22 09:39:32.569942: step: 132/464, loss: 9.701921463012695 2023-01-22 09:39:33.192066: step: 134/464, loss: 6.925141334533691 2023-01-22 09:39:33.771075: step: 136/464, loss: 13.604503631591797 2023-01-22 09:39:34.399821: step: 138/464, loss: 19.978195190429688 2023-01-22 09:39:35.079831: step: 140/464, loss: 6.376090049743652 2023-01-22 09:39:35.699062: step: 142/464, loss: 7.691537857055664 2023-01-22 09:39:36.354440: step: 144/464, loss: 5.879671573638916 2023-01-22 09:39:36.993074: step: 146/464, loss: 9.334442138671875 2023-01-22 09:39:37.673732: step: 148/464, loss: 5.401294708251953 2023-01-22 09:39:38.275422: step: 150/464, loss: 13.273971557617188 2023-01-22 09:39:38.933243: step: 152/464, loss: 12.276399612426758 2023-01-22 09:39:39.509241: step: 154/464, loss: 6.026156425476074 2023-01-22 09:39:40.176849: step: 156/464, loss: 16.545625686645508 2023-01-22 09:39:40.807631: step: 158/464, loss: 16.53818130493164 2023-01-22 09:39:41.510882: step: 160/464, loss: 8.795269012451172 2023-01-22 09:39:42.254333: step: 162/464, loss: 7.852406024932861 2023-01-22 09:39:42.880849: step: 164/464, loss: 4.57177734375 2023-01-22 09:39:43.591950: step: 166/464, loss: 4.372272491455078 2023-01-22 09:39:44.147584: step: 168/464, loss: 14.673152923583984 2023-01-22 09:39:44.745701: step: 170/464, loss: 2.6895434856414795 2023-01-22 09:39:45.348836: step: 172/464, loss: 8.476045608520508 2023-01-22 09:39:46.002710: step: 174/464, loss: 11.554985046386719 2023-01-22 09:39:46.593063: step: 176/464, loss: 4.613548755645752 2023-01-22 09:39:47.266269: step: 178/464, loss: 17.155364990234375 2023-01-22 09:39:47.868458: step: 180/464, loss: 13.597175598144531 2023-01-22 09:39:48.523826: step: 182/464, loss: 13.538841247558594 2023-01-22 09:39:49.088043: step: 184/464, loss: 4.875265121459961 2023-01-22 09:39:49.653636: step: 186/464, loss: 7.670773983001709 2023-01-22 09:39:50.237312: step: 188/464, loss: 10.10245418548584 2023-01-22 09:39:50.879499: step: 190/464, loss: 15.357199668884277 2023-01-22 09:39:51.462831: step: 192/464, loss: 14.560921669006348 2023-01-22 09:39:52.100815: step: 194/464, loss: 7.032919406890869 2023-01-22 09:39:52.753571: step: 196/464, loss: 10.493429183959961 2023-01-22 09:39:53.395274: step: 198/464, loss: 12.067194938659668 2023-01-22 09:39:54.038523: step: 200/464, loss: 15.097469329833984 2023-01-22 09:39:54.636672: step: 202/464, loss: 5.528450012207031 2023-01-22 09:39:55.237414: step: 204/464, loss: 10.181692123413086 2023-01-22 09:39:55.859775: step: 206/464, loss: 10.08667278289795 2023-01-22 09:39:56.461452: step: 208/464, loss: 8.108156204223633 2023-01-22 09:39:57.101633: step: 210/464, loss: 3.823437213897705 2023-01-22 09:39:57.719307: step: 212/464, loss: 3.2625980377197266 2023-01-22 09:39:58.339805: step: 214/464, loss: 9.450902938842773 2023-01-22 09:39:58.942352: step: 216/464, loss: 7.178184509277344 2023-01-22 09:39:59.551343: step: 218/464, loss: 15.258796691894531 2023-01-22 09:40:00.242427: step: 220/464, loss: 13.636726379394531 2023-01-22 09:40:00.937553: step: 222/464, loss: 2.666865348815918 2023-01-22 09:40:01.664408: step: 224/464, loss: 10.846983909606934 2023-01-22 09:40:02.321286: step: 226/464, loss: 6.188336372375488 2023-01-22 09:40:02.923765: step: 228/464, loss: 5.240720748901367 2023-01-22 09:40:03.559509: step: 230/464, loss: 12.24063491821289 2023-01-22 09:40:04.147782: step: 232/464, loss: 4.9737372398376465 2023-01-22 09:40:04.759353: step: 234/464, loss: 3.9984071254730225 2023-01-22 09:40:05.384218: step: 236/464, loss: 7.2288618087768555 2023-01-22 09:40:05.971778: step: 238/464, loss: 13.265220642089844 2023-01-22 09:40:06.590564: step: 240/464, loss: 3.304727077484131 2023-01-22 09:40:07.233139: step: 242/464, loss: 7.927270889282227 2023-01-22 09:40:07.831302: step: 244/464, loss: 6.870214462280273 2023-01-22 09:40:08.431275: step: 246/464, loss: 15.09991455078125 2023-01-22 09:40:09.067799: step: 248/464, loss: 4.424330234527588 2023-01-22 09:40:09.763098: step: 250/464, loss: 20.561283111572266 2023-01-22 09:40:10.400361: step: 252/464, loss: 16.219852447509766 2023-01-22 09:40:11.049683: step: 254/464, loss: 9.015839576721191 2023-01-22 09:40:11.643804: step: 256/464, loss: 6.264186859130859 2023-01-22 09:40:12.312305: step: 258/464, loss: 3.0174646377563477 2023-01-22 09:40:13.066844: step: 260/464, loss: 13.42202377319336 2023-01-22 09:40:13.759496: step: 262/464, loss: 5.563702583312988 2023-01-22 09:40:14.420906: step: 264/464, loss: 13.135787010192871 2023-01-22 09:40:14.989216: step: 266/464, loss: 6.930896282196045 2023-01-22 09:40:15.651230: step: 268/464, loss: 5.268439769744873 2023-01-22 09:40:16.212101: step: 270/464, loss: 4.526968002319336 2023-01-22 09:40:16.822381: step: 272/464, loss: 1.6564035415649414 2023-01-22 09:40:17.485499: step: 274/464, loss: 2.870774507522583 2023-01-22 09:40:18.071970: step: 276/464, loss: 9.916946411132812 2023-01-22 09:40:18.777391: step: 278/464, loss: 3.924294948577881 2023-01-22 09:40:19.389942: step: 280/464, loss: 10.471038818359375 2023-01-22 09:40:20.068844: step: 282/464, loss: 10.573553085327148 2023-01-22 09:40:20.707286: step: 284/464, loss: 13.430482864379883 2023-01-22 09:40:21.464955: step: 286/464, loss: 2.6837880611419678 2023-01-22 09:40:22.070856: step: 288/464, loss: 9.279662132263184 2023-01-22 09:40:22.654087: step: 290/464, loss: 15.841981887817383 2023-01-22 09:40:23.226597: step: 292/464, loss: 10.159821510314941 2023-01-22 09:40:23.807384: step: 294/464, loss: 4.929985046386719 2023-01-22 09:40:24.500263: step: 296/464, loss: 5.215975761413574 2023-01-22 09:40:25.115314: step: 298/464, loss: 3.1540944576263428 2023-01-22 09:40:25.719208: step: 300/464, loss: 11.945456504821777 2023-01-22 09:40:26.278817: step: 302/464, loss: 4.5313920974731445 2023-01-22 09:40:26.892981: step: 304/464, loss: 8.657771110534668 2023-01-22 09:40:27.535017: step: 306/464, loss: 2.949284553527832 2023-01-22 09:40:28.100108: step: 308/464, loss: 2.8570871353149414 2023-01-22 09:40:28.717236: step: 310/464, loss: 9.278157234191895 2023-01-22 09:40:29.339934: step: 312/464, loss: 6.604586601257324 2023-01-22 09:40:29.987460: step: 314/464, loss: 9.309601783752441 2023-01-22 09:40:30.592344: step: 316/464, loss: 4.563741207122803 2023-01-22 09:40:31.234906: step: 318/464, loss: 5.5995283126831055 2023-01-22 09:40:31.909532: step: 320/464, loss: 1.8042103052139282 2023-01-22 09:40:32.582098: step: 322/464, loss: 22.809059143066406 2023-01-22 09:40:33.156113: step: 324/464, loss: 5.388643741607666 2023-01-22 09:40:33.789221: step: 326/464, loss: 5.1678266525268555 2023-01-22 09:40:34.411333: step: 328/464, loss: 11.327838897705078 2023-01-22 09:40:35.030997: step: 330/464, loss: 7.059610843658447 2023-01-22 09:40:35.706661: step: 332/464, loss: 23.0896053314209 2023-01-22 09:40:36.349360: step: 334/464, loss: 4.956096172332764 2023-01-22 09:40:37.026440: step: 336/464, loss: 13.520605087280273 2023-01-22 09:40:37.713111: step: 338/464, loss: 6.89166259765625 2023-01-22 09:40:38.368322: step: 340/464, loss: 3.214412212371826 2023-01-22 09:40:38.948622: step: 342/464, loss: 2.595851421356201 2023-01-22 09:40:39.570118: step: 344/464, loss: 18.70476531982422 2023-01-22 09:40:40.204970: step: 346/464, loss: 6.768799781799316 2023-01-22 09:40:40.843194: step: 348/464, loss: 5.832672119140625 2023-01-22 09:40:41.499445: step: 350/464, loss: 6.987588882446289 2023-01-22 09:40:42.237682: step: 352/464, loss: 31.79225730895996 2023-01-22 09:40:42.955216: step: 354/464, loss: 6.638126373291016 2023-01-22 09:40:43.552096: step: 356/464, loss: 8.499835968017578 2023-01-22 09:40:44.193543: step: 358/464, loss: 2.6248958110809326 2023-01-22 09:40:44.878082: step: 360/464, loss: 23.990123748779297 2023-01-22 09:40:45.596367: step: 362/464, loss: 7.547783374786377 2023-01-22 09:40:46.209766: step: 364/464, loss: 1.7016931772232056 2023-01-22 09:40:46.745252: step: 366/464, loss: 1.898937463760376 2023-01-22 09:40:47.376927: step: 368/464, loss: 3.6756973266601562 2023-01-22 09:40:48.020709: step: 370/464, loss: 6.603666305541992 2023-01-22 09:40:48.638286: step: 372/464, loss: 3.9010848999023438 2023-01-22 09:40:49.250927: step: 374/464, loss: 7.395991325378418 2023-01-22 09:40:49.870663: step: 376/464, loss: 5.332321643829346 2023-01-22 09:40:50.562107: step: 378/464, loss: 4.313864707946777 2023-01-22 09:40:51.114433: step: 380/464, loss: 4.549687385559082 2023-01-22 09:40:51.688040: step: 382/464, loss: 7.73099422454834 2023-01-22 09:40:52.226053: step: 384/464, loss: 1.9287822246551514 2023-01-22 09:40:52.926811: step: 386/464, loss: 2.0085694789886475 2023-01-22 09:40:53.531979: step: 388/464, loss: 3.305400848388672 2023-01-22 09:40:54.259484: step: 390/464, loss: 3.4623374938964844 2023-01-22 09:40:54.903932: step: 392/464, loss: 5.293943405151367 2023-01-22 09:40:55.545772: step: 394/464, loss: 5.717831134796143 2023-01-22 09:40:56.137848: step: 396/464, loss: 1.6635620594024658 2023-01-22 09:40:56.779319: step: 398/464, loss: 0.9851242303848267 2023-01-22 09:40:57.374802: step: 400/464, loss: 1.1627798080444336 2023-01-22 09:40:58.015709: step: 402/464, loss: 2.4061439037323 2023-01-22 09:40:58.615927: step: 404/464, loss: 3.4080448150634766 2023-01-22 09:40:59.273321: step: 406/464, loss: 6.438791751861572 2023-01-22 09:40:59.927685: step: 408/464, loss: 1.6807105541229248 2023-01-22 09:41:00.538950: step: 410/464, loss: 2.9713494777679443 2023-01-22 09:41:01.173001: step: 412/464, loss: 1.5428835153579712 2023-01-22 09:41:01.847600: step: 414/464, loss: 1.4319688081741333 2023-01-22 09:41:02.478192: step: 416/464, loss: 1.5449755191802979 2023-01-22 09:41:03.113187: step: 418/464, loss: 2.1071863174438477 2023-01-22 09:41:03.687505: step: 420/464, loss: 1.7880160808563232 2023-01-22 09:41:04.314589: step: 422/464, loss: 3.24936580657959 2023-01-22 09:41:04.931149: step: 424/464, loss: 3.7851812839508057 2023-01-22 09:41:05.551181: step: 426/464, loss: 3.3374154567718506 2023-01-22 09:41:06.182982: step: 428/464, loss: 1.0898224115371704 2023-01-22 09:41:06.780737: step: 430/464, loss: 1.4387747049331665 2023-01-22 09:41:07.468877: step: 432/464, loss: 1.4988466501235962 2023-01-22 09:41:08.119292: step: 434/464, loss: 4.100554466247559 2023-01-22 09:41:08.769082: step: 436/464, loss: 5.842254638671875 2023-01-22 09:41:09.419790: step: 438/464, loss: 1.7818788290023804 2023-01-22 09:41:10.130915: step: 440/464, loss: 1.24330472946167 2023-01-22 09:41:10.849332: step: 442/464, loss: 1.083603024482727 2023-01-22 09:41:11.460515: step: 444/464, loss: 3.9309680461883545 2023-01-22 09:41:12.056438: step: 446/464, loss: 1.6359953880310059 2023-01-22 09:41:12.701990: step: 448/464, loss: 8.672527313232422 2023-01-22 09:41:13.300957: step: 450/464, loss: 1.3632621765136719 2023-01-22 09:41:13.939443: step: 452/464, loss: 1.7263782024383545 2023-01-22 09:41:14.638230: step: 454/464, loss: 3.074094772338867 2023-01-22 09:41:15.290913: step: 456/464, loss: 1.5102462768554688 2023-01-22 09:41:15.900685: step: 458/464, loss: 3.697157621383667 2023-01-22 09:41:16.553248: step: 460/464, loss: 6.106800079345703 2023-01-22 09:41:17.218248: step: 462/464, loss: 0.9169987440109253 2023-01-22 09:41:17.899788: step: 464/464, loss: 6.377109527587891 2023-01-22 09:41:18.488856: step: 466/464, loss: 0.525560200214386 2023-01-22 09:41:19.092492: step: 468/464, loss: 2.8090384006500244 2023-01-22 09:41:19.719493: step: 470/464, loss: 1.1070313453674316 2023-01-22 09:41:20.362703: step: 472/464, loss: 1.8549484014511108 2023-01-22 09:41:20.959108: step: 474/464, loss: 8.1439208984375 2023-01-22 09:41:21.627329: step: 476/464, loss: 2.7541751861572266 2023-01-22 09:41:22.251634: step: 478/464, loss: 2.8204009532928467 2023-01-22 09:41:22.842400: step: 480/464, loss: 1.782137155532837 2023-01-22 09:41:23.468209: step: 482/464, loss: 0.5694196820259094 2023-01-22 09:41:24.104943: step: 484/464, loss: 2.7259013652801514 2023-01-22 09:41:24.733345: step: 486/464, loss: 0.5797066688537598 2023-01-22 09:41:25.380500: step: 488/464, loss: 1.6275838613510132 2023-01-22 09:41:26.020421: step: 490/464, loss: 2.965346574783325 2023-01-22 09:41:26.679032: step: 492/464, loss: 5.800957679748535 2023-01-22 09:41:27.318859: step: 494/464, loss: 8.127657890319824 2023-01-22 09:41:27.928088: step: 496/464, loss: 1.8809348344802856 2023-01-22 09:41:28.549356: step: 498/464, loss: 0.6403765082359314 2023-01-22 09:41:29.216332: step: 500/464, loss: 0.5952467918395996 2023-01-22 09:41:29.774013: step: 502/464, loss: 6.675691604614258 2023-01-22 09:41:30.426738: step: 504/464, loss: 1.6062860488891602 2023-01-22 09:41:31.032112: step: 506/464, loss: 8.55487060546875 2023-01-22 09:41:31.670569: step: 508/464, loss: 4.537815093994141 2023-01-22 09:41:32.245398: step: 510/464, loss: 2.048262596130371 2023-01-22 09:41:32.881144: step: 512/464, loss: 3.5662026405334473 2023-01-22 09:41:33.514670: step: 514/464, loss: 0.9425341486930847 2023-01-22 09:41:34.192113: step: 516/464, loss: 0.9256479740142822 2023-01-22 09:41:34.880638: step: 518/464, loss: 2.127551794052124 2023-01-22 09:41:35.518140: step: 520/464, loss: 3.395993709564209 2023-01-22 09:41:36.166827: step: 522/464, loss: 2.3432466983795166 2023-01-22 09:41:36.843693: step: 524/464, loss: 1.383481502532959 2023-01-22 09:41:37.418814: step: 526/464, loss: 3.4100661277770996 2023-01-22 09:41:38.141721: step: 528/464, loss: 2.922839641571045 2023-01-22 09:41:38.722575: step: 530/464, loss: 1.9120745658874512 2023-01-22 09:41:39.427175: step: 532/464, loss: 1.312376618385315 2023-01-22 09:41:40.064176: step: 534/464, loss: 3.5674753189086914 2023-01-22 09:41:40.655657: step: 536/464, loss: 3.0240063667297363 2023-01-22 09:41:41.291358: step: 538/464, loss: 1.7374709844589233 2023-01-22 09:41:41.913828: step: 540/464, loss: 0.6851668953895569 2023-01-22 09:41:42.548023: step: 542/464, loss: 4.37807035446167 2023-01-22 09:41:43.203424: step: 544/464, loss: 1.2854983806610107 2023-01-22 09:41:43.880173: step: 546/464, loss: 6.369767189025879 2023-01-22 09:41:44.491153: step: 548/464, loss: 2.6160709857940674 2023-01-22 09:41:45.081740: step: 550/464, loss: 4.167600154876709 2023-01-22 09:41:45.681184: step: 552/464, loss: 1.1332805156707764 2023-01-22 09:41:46.293953: step: 554/464, loss: 0.7444527745246887 2023-01-22 09:41:46.895875: step: 556/464, loss: 12.333288192749023 2023-01-22 09:41:47.512863: step: 558/464, loss: 8.097074508666992 2023-01-22 09:41:48.166956: step: 560/464, loss: 2.7238166332244873 2023-01-22 09:41:48.751634: step: 562/464, loss: 3.192190170288086 2023-01-22 09:41:49.370266: step: 564/464, loss: 0.49808821082115173 2023-01-22 09:41:50.012205: step: 566/464, loss: 1.5350732803344727 2023-01-22 09:41:50.667531: step: 568/464, loss: 2.190887928009033 2023-01-22 09:41:51.352678: step: 570/464, loss: 1.2030106782913208 2023-01-22 09:41:52.025361: step: 572/464, loss: 1.8200008869171143 2023-01-22 09:41:52.639896: step: 574/464, loss: 1.8087153434753418 2023-01-22 09:41:53.241846: step: 576/464, loss: 0.961233377456665 2023-01-22 09:41:53.932411: step: 578/464, loss: 1.1750059127807617 2023-01-22 09:41:54.540626: step: 580/464, loss: 3.103426933288574 2023-01-22 09:41:55.133127: step: 582/464, loss: 8.684738159179688 2023-01-22 09:41:55.732142: step: 584/464, loss: 3.7423057556152344 2023-01-22 09:41:56.347256: step: 586/464, loss: 7.903364658355713 2023-01-22 09:41:56.993691: step: 588/464, loss: 2.912686586380005 2023-01-22 09:41:57.657665: step: 590/464, loss: 2.24096941947937 2023-01-22 09:41:58.283467: step: 592/464, loss: 2.5934605598449707 2023-01-22 09:41:58.875613: step: 594/464, loss: 2.838994264602661 2023-01-22 09:41:59.484019: step: 596/464, loss: 2.592299699783325 2023-01-22 09:42:00.093644: step: 598/464, loss: 2.6744465827941895 2023-01-22 09:42:00.745194: step: 600/464, loss: 3.6681036949157715 2023-01-22 09:42:01.330938: step: 602/464, loss: 1.1927385330200195 2023-01-22 09:42:01.927911: step: 604/464, loss: 0.988146960735321 2023-01-22 09:42:02.530654: step: 606/464, loss: 0.9232864379882812 2023-01-22 09:42:03.144198: step: 608/464, loss: 2.6107113361358643 2023-01-22 09:42:03.742719: step: 610/464, loss: 6.5684614181518555 2023-01-22 09:42:04.394170: step: 612/464, loss: 2.9056572914123535 2023-01-22 09:42:05.041525: step: 614/464, loss: 1.2010129690170288 2023-01-22 09:42:05.704463: step: 616/464, loss: 2.280693292617798 2023-01-22 09:42:06.321665: step: 618/464, loss: 1.2569115161895752 2023-01-22 09:42:06.915551: step: 620/464, loss: 0.6354900002479553 2023-01-22 09:42:07.608835: step: 622/464, loss: 6.259234428405762 2023-01-22 09:42:08.279885: step: 624/464, loss: 1.5421119928359985 2023-01-22 09:42:08.796274: step: 626/464, loss: 1.037724256515503 2023-01-22 09:42:09.413521: step: 628/464, loss: 3.7696783542633057 2023-01-22 09:42:10.030245: step: 630/464, loss: 7.192898750305176 2023-01-22 09:42:10.675725: step: 632/464, loss: 3.1449084281921387 2023-01-22 09:42:11.440112: step: 634/464, loss: 3.239497423171997 2023-01-22 09:42:12.186374: step: 636/464, loss: 3.8216047286987305 2023-01-22 09:42:12.840911: step: 638/464, loss: 1.120566725730896 2023-01-22 09:42:13.480645: step: 640/464, loss: 0.9647707939147949 2023-01-22 09:42:14.156491: step: 642/464, loss: 3.0738391876220703 2023-01-22 09:42:14.889137: step: 644/464, loss: 0.7195495963096619 2023-01-22 09:42:15.491722: step: 646/464, loss: 4.335334300994873 2023-01-22 09:42:16.094820: step: 648/464, loss: 1.711287498474121 2023-01-22 09:42:16.764945: step: 650/464, loss: 1.3882620334625244 2023-01-22 09:42:17.374511: step: 652/464, loss: 2.3732476234436035 2023-01-22 09:42:17.997722: step: 654/464, loss: 0.8575702905654907 2023-01-22 09:42:18.613891: step: 656/464, loss: 0.5496256947517395 2023-01-22 09:42:19.256432: step: 658/464, loss: 1.9140018224716187 2023-01-22 09:42:19.801581: step: 660/464, loss: 2.0099096298217773 2023-01-22 09:42:20.481023: step: 662/464, loss: 2.6142263412475586 2023-01-22 09:42:21.159898: step: 664/464, loss: 5.653798580169678 2023-01-22 09:42:21.756961: step: 666/464, loss: 1.4585859775543213 2023-01-22 09:42:22.334573: step: 668/464, loss: 4.889749526977539 2023-01-22 09:42:22.869067: step: 670/464, loss: 2.410616159439087 2023-01-22 09:42:23.464651: step: 672/464, loss: 0.8650250434875488 2023-01-22 09:42:24.032284: step: 674/464, loss: 1.1738193035125732 2023-01-22 09:42:24.597128: step: 676/464, loss: 3.919182062149048 2023-01-22 09:42:25.230221: step: 678/464, loss: 3.5058436393737793 2023-01-22 09:42:25.783259: step: 680/464, loss: 0.35765209794044495 2023-01-22 09:42:26.398285: step: 682/464, loss: 6.447207450866699 2023-01-22 09:42:27.007398: step: 684/464, loss: 2.850321054458618 2023-01-22 09:42:27.586019: step: 686/464, loss: 6.106337070465088 2023-01-22 09:42:28.277854: step: 688/464, loss: 0.6156391501426697 2023-01-22 09:42:28.888390: step: 690/464, loss: 5.986627578735352 2023-01-22 09:42:29.489291: step: 692/464, loss: 1.725941777229309 2023-01-22 09:42:30.142119: step: 694/464, loss: 3.855546236038208 2023-01-22 09:42:30.796264: step: 696/464, loss: 12.777762413024902 2023-01-22 09:42:31.459669: step: 698/464, loss: 0.6011559963226318 2023-01-22 09:42:32.191302: step: 700/464, loss: 16.054662704467773 2023-01-22 09:42:32.806533: step: 702/464, loss: 2.418577194213867 2023-01-22 09:42:33.421234: step: 704/464, loss: 1.305466651916504 2023-01-22 09:42:34.061064: step: 706/464, loss: 5.697353363037109 2023-01-22 09:42:34.711481: step: 708/464, loss: 1.6900533437728882 2023-01-22 09:42:35.477480: step: 710/464, loss: 0.9690849781036377 2023-01-22 09:42:36.089187: step: 712/464, loss: 0.8801342844963074 2023-01-22 09:42:36.749659: step: 714/464, loss: 2.1792311668395996 2023-01-22 09:42:37.429771: step: 716/464, loss: 4.735498905181885 2023-01-22 09:42:38.222291: step: 718/464, loss: 0.9284535646438599 2023-01-22 09:42:38.835909: step: 720/464, loss: 2.369762420654297 2023-01-22 09:42:39.463716: step: 722/464, loss: 10.625313758850098 2023-01-22 09:42:40.165415: step: 724/464, loss: 2.382413864135742 2023-01-22 09:42:40.792971: step: 726/464, loss: 7.8514180183410645 2023-01-22 09:42:41.429233: step: 728/464, loss: 1.2403920888900757 2023-01-22 09:42:42.093961: step: 730/464, loss: 2.554053544998169 2023-01-22 09:42:42.653858: step: 732/464, loss: 1.515782356262207 2023-01-22 09:42:43.285503: step: 734/464, loss: 2.2663307189941406 2023-01-22 09:42:43.923407: step: 736/464, loss: 4.186933994293213 2023-01-22 09:42:44.536462: step: 738/464, loss: 0.7283748984336853 2023-01-22 09:42:45.204870: step: 740/464, loss: 3.5390701293945312 2023-01-22 09:42:45.821560: step: 742/464, loss: 0.6066951751708984 2023-01-22 09:42:46.404962: step: 744/464, loss: 2.166688919067383 2023-01-22 09:42:46.953988: step: 746/464, loss: 1.1567081212997437 2023-01-22 09:42:47.572770: step: 748/464, loss: 1.163918137550354 2023-01-22 09:42:48.111665: step: 750/464, loss: 0.7749082446098328 2023-01-22 09:42:48.748682: step: 752/464, loss: 0.47420597076416016 2023-01-22 09:42:49.353545: step: 754/464, loss: 1.50184166431427 2023-01-22 09:42:49.945827: step: 756/464, loss: 6.195437431335449 2023-01-22 09:42:50.600708: step: 758/464, loss: 1.8309895992279053 2023-01-22 09:42:51.223457: step: 760/464, loss: 5.113033294677734 2023-01-22 09:42:51.904910: step: 762/464, loss: 0.9480839371681213 2023-01-22 09:42:52.561126: step: 764/464, loss: 5.2821478843688965 2023-01-22 09:42:53.204203: step: 766/464, loss: 0.63105708360672 2023-01-22 09:42:53.812654: step: 768/464, loss: 8.196881294250488 2023-01-22 09:42:54.390187: step: 770/464, loss: 0.6351608037948608 2023-01-22 09:42:54.993154: step: 772/464, loss: 1.5293173789978027 2023-01-22 09:42:55.569491: step: 774/464, loss: 1.171482801437378 2023-01-22 09:42:56.165434: step: 776/464, loss: 10.900691986083984 2023-01-22 09:42:56.812662: step: 778/464, loss: 4.51688814163208 2023-01-22 09:42:57.451873: step: 780/464, loss: 6.444057941436768 2023-01-22 09:42:58.032789: step: 782/464, loss: 2.60901141166687 2023-01-22 09:42:58.640694: step: 784/464, loss: 0.8947842717170715 2023-01-22 09:42:59.279284: step: 786/464, loss: 1.6689441204071045 2023-01-22 09:42:59.933799: step: 788/464, loss: 1.3592464923858643 2023-01-22 09:43:00.541673: step: 790/464, loss: 0.7408801317214966 2023-01-22 09:43:01.195302: step: 792/464, loss: 0.40166574716567993 2023-01-22 09:43:01.858859: step: 794/464, loss: 2.8900511264801025 2023-01-22 09:43:02.475882: step: 796/464, loss: 5.074830532073975 2023-01-22 09:43:03.137202: step: 798/464, loss: 3.0470948219299316 2023-01-22 09:43:03.716987: step: 800/464, loss: 1.3238725662231445 2023-01-22 09:43:04.352436: step: 802/464, loss: 2.3110499382019043 2023-01-22 09:43:04.971429: step: 804/464, loss: 4.000260829925537 2023-01-22 09:43:05.533675: step: 806/464, loss: 5.249054431915283 2023-01-22 09:43:06.200858: step: 808/464, loss: 1.7909568548202515 2023-01-22 09:43:06.812381: step: 810/464, loss: 2.6298608779907227 2023-01-22 09:43:07.417239: step: 812/464, loss: 2.052001714706421 2023-01-22 09:43:08.011117: step: 814/464, loss: 7.3221211433410645 2023-01-22 09:43:08.660091: step: 816/464, loss: 1.4111716747283936 2023-01-22 09:43:09.395518: step: 818/464, loss: 1.5203192234039307 2023-01-22 09:43:09.959788: step: 820/464, loss: 1.7173218727111816 2023-01-22 09:43:10.564057: step: 822/464, loss: 4.458634376525879 2023-01-22 09:43:11.164822: step: 824/464, loss: 0.8378711938858032 2023-01-22 09:43:11.765838: step: 826/464, loss: 0.9857144355773926 2023-01-22 09:43:12.472014: step: 828/464, loss: 1.1376981735229492 2023-01-22 09:43:13.131084: step: 830/464, loss: 2.9880220890045166 2023-01-22 09:43:13.655668: step: 832/464, loss: 1.2170941829681396 2023-01-22 09:43:14.249678: step: 834/464, loss: 0.9180886745452881 2023-01-22 09:43:14.896079: step: 836/464, loss: 3.690925121307373 2023-01-22 09:43:15.582344: step: 838/464, loss: 2.100409746170044 2023-01-22 09:43:16.193438: step: 840/464, loss: 2.7280213832855225 2023-01-22 09:43:16.792429: step: 842/464, loss: 0.4261971116065979 2023-01-22 09:43:17.368436: step: 844/464, loss: 1.1917791366577148 2023-01-22 09:43:18.067669: step: 846/464, loss: 2.6262195110321045 2023-01-22 09:43:18.728492: step: 848/464, loss: 2.6037867069244385 2023-01-22 09:43:19.358655: step: 850/464, loss: 5.234838485717773 2023-01-22 09:43:19.958374: step: 852/464, loss: 1.619897723197937 2023-01-22 09:43:20.675177: step: 854/464, loss: 1.9957997798919678 2023-01-22 09:43:21.262513: step: 856/464, loss: 1.6528217792510986 2023-01-22 09:43:21.869798: step: 858/464, loss: 2.4378433227539062 2023-01-22 09:43:22.459410: step: 860/464, loss: 0.8490089178085327 2023-01-22 09:43:23.082652: step: 862/464, loss: 0.8172425031661987 2023-01-22 09:43:23.652531: step: 864/464, loss: 1.6703541278839111 2023-01-22 09:43:24.267525: step: 866/464, loss: 1.3778300285339355 2023-01-22 09:43:24.842165: step: 868/464, loss: 2.208710193634033 2023-01-22 09:43:25.479816: step: 870/464, loss: 2.3513078689575195 2023-01-22 09:43:26.119013: step: 872/464, loss: 1.8490746021270752 2023-01-22 09:43:26.758672: step: 874/464, loss: 0.4844001531600952 2023-01-22 09:43:27.448621: step: 876/464, loss: 0.6247026324272156 2023-01-22 09:43:28.059040: step: 878/464, loss: 3.3467068672180176 2023-01-22 09:43:28.782883: step: 880/464, loss: 0.645189106464386 2023-01-22 09:43:29.454007: step: 882/464, loss: 2.4063289165496826 2023-01-22 09:43:30.026107: step: 884/464, loss: 1.1080090999603271 2023-01-22 09:43:30.637857: step: 886/464, loss: 1.222339153289795 2023-01-22 09:43:31.332911: step: 888/464, loss: 1.995471715927124 2023-01-22 09:43:31.962484: step: 890/464, loss: 0.5842683911323547 2023-01-22 09:43:32.586589: step: 892/464, loss: 2.0001533031463623 2023-01-22 09:43:33.203871: step: 894/464, loss: 3.2898120880126953 2023-01-22 09:43:33.863872: step: 896/464, loss: 1.5262447595596313 2023-01-22 09:43:34.508722: step: 898/464, loss: 1.5730087757110596 2023-01-22 09:43:35.151279: step: 900/464, loss: 0.9650188088417053 2023-01-22 09:43:35.771624: step: 902/464, loss: 2.3240749835968018 2023-01-22 09:43:36.384617: step: 904/464, loss: 5.325174808502197 2023-01-22 09:43:36.943498: step: 906/464, loss: 1.3020225763320923 2023-01-22 09:43:37.561647: step: 908/464, loss: 2.313237190246582 2023-01-22 09:43:38.239254: step: 910/464, loss: 3.4341206550598145 2023-01-22 09:43:38.870687: step: 912/464, loss: 5.504545211791992 2023-01-22 09:43:39.492153: step: 914/464, loss: 2.7761149406433105 2023-01-22 09:43:40.119814: step: 916/464, loss: 2.362014055252075 2023-01-22 09:43:40.678869: step: 918/464, loss: 0.7432173490524292 2023-01-22 09:43:41.245498: step: 920/464, loss: 9.008573532104492 2023-01-22 09:43:41.896691: step: 922/464, loss: 0.6847949624061584 2023-01-22 09:43:42.530749: step: 924/464, loss: 0.9933121204376221 2023-01-22 09:43:43.172463: step: 926/464, loss: 0.7144534587860107 2023-01-22 09:43:43.819837: step: 928/464, loss: 3.1983590126037598 2023-01-22 09:43:44.320464: step: 930/464, loss: 0.08428919315338135 ================================================== Loss: 6.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2572959901421221, 'r': 0.1104086762991715, 'f1': 0.15451372952979292}, 'combined': 0.11385222175879478, 'epoch': 0} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3540975798921719, 'r': 0.060313324047567736, 'f1': 0.10307065705781529}, 'combined': 0.06728965175795194, 'epoch': 0} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2548415383852656, 'r': 0.10887370070901706, 'f1': 0.15256738456972194}, 'combined': 0.11241807284084773, 'epoch': 0} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.35189993564993566, 'r': 0.059616747339961626, 'f1': 0.10196004400193906}, 'combined': 0.06656458831214675, 'epoch': 0} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2592295198403994, 'r': 0.09506715850479676, 'f1': 0.1391162568438105}, 'combined': 0.10250671556912351, 'epoch': 0} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.37378750184284243, 'r': 0.058190362008501105, 'f1': 0.10070344740615406}, 'combined': 0.06574421955013167, 'epoch': 0} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.33333333333333337, 'r': 0.13333333333333333, 'f1': 0.1904761904761905}, 'combined': 0.12698412698412698, 'epoch': 0} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.10344827586206896, 'f1': 0.17142857142857143}, 'combined': 0.11428571428571428, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2572959901421221, 'r': 0.1104086762991715, 'f1': 0.15451372952979292}, 'combined': 0.11385222175879478, 'epoch': 0} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3540975798921719, 'r': 0.060313324047567736, 'f1': 0.10307065705781529}, 'combined': 0.06728965175795194, 'epoch': 0} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.33333333333333337, 'r': 0.13333333333333333, 'f1': 0.1904761904761905}, 'combined': 0.12698412698412698, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2548415383852656, 'r': 0.10887370070901706, 'f1': 0.15256738456972194}, 'combined': 0.11241807284084773, 'epoch': 0} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.35189993564993566, 'r': 0.059616747339961626, 'f1': 0.10196004400193906}, 'combined': 0.06656458831214675, 'epoch': 0} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2592295198403994, 'r': 0.09506715850479676, 'f1': 0.1391162568438105}, 'combined': 0.10250671556912351, 'epoch': 0} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.37378750184284243, 'r': 0.058190362008501105, 'f1': 0.10070344740615406}, 'combined': 0.06574421955013167, 'epoch': 0} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.10344827586206896, 'f1': 0.17142857142857143}, 'combined': 0.11428571428571428, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 09:46:33.642513: step: 2/464, loss: 2.4085278511047363 2023-01-22 09:46:34.207420: step: 4/464, loss: 4.375063896179199 2023-01-22 09:46:34.852444: step: 6/464, loss: 1.7053239345550537 2023-01-22 09:46:35.539598: step: 8/464, loss: 2.6834206581115723 2023-01-22 09:46:36.163859: step: 10/464, loss: 5.249751091003418 2023-01-22 09:46:36.808996: step: 12/464, loss: 0.7104110717773438 2023-01-22 09:46:37.431476: step: 14/464, loss: 3.3045058250427246 2023-01-22 09:46:38.015500: step: 16/464, loss: 2.128939151763916 2023-01-22 09:46:38.624604: step: 18/464, loss: 1.1264522075653076 2023-01-22 09:46:39.196404: step: 20/464, loss: 1.1155978441238403 2023-01-22 09:46:39.842047: step: 22/464, loss: 5.055933952331543 2023-01-22 09:46:40.477809: step: 24/464, loss: 1.4645519256591797 2023-01-22 09:46:41.137522: step: 26/464, loss: 1.1370962858200073 2023-01-22 09:46:41.726122: step: 28/464, loss: 0.4650828242301941 2023-01-22 09:46:42.446580: step: 30/464, loss: 0.6294041872024536 2023-01-22 09:46:43.087424: step: 32/464, loss: 1.4798829555511475 2023-01-22 09:46:43.726664: step: 34/464, loss: 6.095230579376221 2023-01-22 09:46:44.324983: step: 36/464, loss: 0.5478473901748657 2023-01-22 09:46:45.005629: step: 38/464, loss: 2.8686866760253906 2023-01-22 09:46:45.615795: step: 40/464, loss: 1.9995484352111816 2023-01-22 09:46:46.269309: step: 42/464, loss: 2.31650972366333 2023-01-22 09:46:46.926834: step: 44/464, loss: 1.5859663486480713 2023-01-22 09:46:47.505814: step: 46/464, loss: 1.5173289775848389 2023-01-22 09:46:48.166099: step: 48/464, loss: 6.074652671813965 2023-01-22 09:46:48.813423: step: 50/464, loss: 1.6784247159957886 2023-01-22 09:46:49.435522: step: 52/464, loss: 1.4369029998779297 2023-01-22 09:46:50.033326: step: 54/464, loss: 2.3493897914886475 2023-01-22 09:46:50.636317: step: 56/464, loss: 2.295942544937134 2023-01-22 09:46:51.274707: step: 58/464, loss: 1.5282453298568726 2023-01-22 09:46:51.920572: step: 60/464, loss: 1.5331041812896729 2023-01-22 09:46:52.532047: step: 62/464, loss: 3.563119888305664 2023-01-22 09:46:53.255681: step: 64/464, loss: 1.34977126121521 2023-01-22 09:46:53.888580: step: 66/464, loss: 0.8964823484420776 2023-01-22 09:46:54.504554: step: 68/464, loss: 0.8326548933982849 2023-01-22 09:46:55.108187: step: 70/464, loss: 0.6099372506141663 2023-01-22 09:46:55.661597: step: 72/464, loss: 0.5478998422622681 2023-01-22 09:46:56.349979: step: 74/464, loss: 4.812145709991455 2023-01-22 09:46:56.912633: step: 76/464, loss: 1.0465631484985352 2023-01-22 09:46:57.548983: step: 78/464, loss: 0.7286931276321411 2023-01-22 09:46:58.165010: step: 80/464, loss: 4.54271125793457 2023-01-22 09:46:58.797357: step: 82/464, loss: 2.443305253982544 2023-01-22 09:46:59.407628: step: 84/464, loss: 1.6770225763320923 2023-01-22 09:46:59.962295: step: 86/464, loss: 0.9204431772232056 2023-01-22 09:47:00.625009: step: 88/464, loss: 0.9454526305198669 2023-01-22 09:47:01.238916: step: 90/464, loss: 2.1437172889709473 2023-01-22 09:47:01.928466: step: 92/464, loss: 1.6663503646850586 2023-01-22 09:47:02.540015: step: 94/464, loss: 4.770730972290039 2023-01-22 09:47:03.169885: step: 96/464, loss: 1.5186935663223267 2023-01-22 09:47:03.799884: step: 98/464, loss: 0.8113821744918823 2023-01-22 09:47:04.453581: step: 100/464, loss: 1.9256279468536377 2023-01-22 09:47:05.055360: step: 102/464, loss: 1.964971899986267 2023-01-22 09:47:05.652754: step: 104/464, loss: 1.2278070449829102 2023-01-22 09:47:06.290826: step: 106/464, loss: 2.1178746223449707 2023-01-22 09:47:06.979567: step: 108/464, loss: 1.0850036144256592 2023-01-22 09:47:07.549341: step: 110/464, loss: 3.908975601196289 2023-01-22 09:47:08.208980: step: 112/464, loss: 8.245256423950195 2023-01-22 09:47:08.836595: step: 114/464, loss: 0.6226215362548828 2023-01-22 09:47:09.492721: step: 116/464, loss: 1.4349032640457153 2023-01-22 09:47:10.115906: step: 118/464, loss: 1.4965016841888428 2023-01-22 09:47:10.726132: step: 120/464, loss: 5.736588954925537 2023-01-22 09:47:11.339694: step: 122/464, loss: 0.5515249371528625 2023-01-22 09:47:11.975604: step: 124/464, loss: 2.035945415496826 2023-01-22 09:47:12.612981: step: 126/464, loss: 0.27566272020339966 2023-01-22 09:47:13.240237: step: 128/464, loss: 1.9847958087921143 2023-01-22 09:47:13.888095: step: 130/464, loss: 1.4272325038909912 2023-01-22 09:47:14.524723: step: 132/464, loss: 0.5434401035308838 2023-01-22 09:47:15.130370: step: 134/464, loss: 4.213326930999756 2023-01-22 09:47:15.686714: step: 136/464, loss: 1.4150797128677368 2023-01-22 09:47:16.298753: step: 138/464, loss: 2.063068389892578 2023-01-22 09:47:16.968197: step: 140/464, loss: 1.975632667541504 2023-01-22 09:47:17.571916: step: 142/464, loss: 1.9226182699203491 2023-01-22 09:47:18.256191: step: 144/464, loss: 1.3040951490402222 2023-01-22 09:47:18.894661: step: 146/464, loss: 2.4764351844787598 2023-01-22 09:47:19.487130: step: 148/464, loss: 0.8127474188804626 2023-01-22 09:47:20.043372: step: 150/464, loss: 0.4643832743167877 2023-01-22 09:47:20.625149: step: 152/464, loss: 1.3815553188323975 2023-01-22 09:47:21.373936: step: 154/464, loss: 1.9366369247436523 2023-01-22 09:47:22.015716: step: 156/464, loss: 0.44186994433403015 2023-01-22 09:47:22.653946: step: 158/464, loss: 1.2369760274887085 2023-01-22 09:47:23.281569: step: 160/464, loss: 3.2742278575897217 2023-01-22 09:47:23.837091: step: 162/464, loss: 0.3232155740261078 2023-01-22 09:47:24.489911: step: 164/464, loss: 13.85115909576416 2023-01-22 09:47:25.094810: step: 166/464, loss: 0.5114213824272156 2023-01-22 09:47:25.772010: step: 168/464, loss: 1.908266544342041 2023-01-22 09:47:26.400958: step: 170/464, loss: 5.393420219421387 2023-01-22 09:47:27.034262: step: 172/464, loss: 0.997491180896759 2023-01-22 09:47:27.686002: step: 174/464, loss: 2.017730951309204 2023-01-22 09:47:28.344299: step: 176/464, loss: 3.1283817291259766 2023-01-22 09:47:28.985617: step: 178/464, loss: 2.6031904220581055 2023-01-22 09:47:29.672099: step: 180/464, loss: 0.28233417868614197 2023-01-22 09:47:30.318210: step: 182/464, loss: 1.887563705444336 2023-01-22 09:47:30.957399: step: 184/464, loss: 1.3217190504074097 2023-01-22 09:47:31.516523: step: 186/464, loss: 2.251591205596924 2023-01-22 09:47:32.107482: step: 188/464, loss: 1.1629953384399414 2023-01-22 09:47:32.668549: step: 190/464, loss: 1.2898328304290771 2023-01-22 09:47:33.273352: step: 192/464, loss: 5.701601028442383 2023-01-22 09:47:33.856156: step: 194/464, loss: 2.956191062927246 2023-01-22 09:47:34.449710: step: 196/464, loss: 1.9662350416183472 2023-01-22 09:47:35.027781: step: 198/464, loss: 4.0824055671691895 2023-01-22 09:47:35.658500: step: 200/464, loss: 1.9693928956985474 2023-01-22 09:47:36.285711: step: 202/464, loss: 2.0761656761169434 2023-01-22 09:47:36.917327: step: 204/464, loss: 4.071382999420166 2023-01-22 09:47:37.523678: step: 206/464, loss: 0.8178318738937378 2023-01-22 09:47:38.152093: step: 208/464, loss: 6.27664852142334 2023-01-22 09:47:38.790772: step: 210/464, loss: 2.5243842601776123 2023-01-22 09:47:39.411154: step: 212/464, loss: 1.3201401233673096 2023-01-22 09:47:40.053574: step: 214/464, loss: 0.9902519583702087 2023-01-22 09:47:40.692448: step: 216/464, loss: 3.904649496078491 2023-01-22 09:47:41.310481: step: 218/464, loss: 0.5062090158462524 2023-01-22 09:47:41.912015: step: 220/464, loss: 1.3447294235229492 2023-01-22 09:47:42.669710: step: 222/464, loss: 0.7208210229873657 2023-01-22 09:47:43.277336: step: 224/464, loss: 0.9612250328063965 2023-01-22 09:47:43.860616: step: 226/464, loss: 1.8179367780685425 2023-01-22 09:47:44.490127: step: 228/464, loss: 0.6544939279556274 2023-01-22 09:47:45.151075: step: 230/464, loss: 4.567896366119385 2023-01-22 09:47:45.777241: step: 232/464, loss: 1.5239843130111694 2023-01-22 09:47:46.392892: step: 234/464, loss: 1.2557464838027954 2023-01-22 09:47:47.010984: step: 236/464, loss: 0.5024946331977844 2023-01-22 09:47:47.610983: step: 238/464, loss: 3.551517963409424 2023-01-22 09:47:48.250406: step: 240/464, loss: 0.9418332576751709 2023-01-22 09:47:48.830136: step: 242/464, loss: 1.2138640880584717 2023-01-22 09:47:49.429567: step: 244/464, loss: 1.2311737537384033 2023-01-22 09:47:50.046509: step: 246/464, loss: 1.031320571899414 2023-01-22 09:47:50.669690: step: 248/464, loss: 1.4449968338012695 2023-01-22 09:47:51.321603: step: 250/464, loss: 0.6216345429420471 2023-01-22 09:47:51.888384: step: 252/464, loss: 1.7167075872421265 2023-01-22 09:47:52.529249: step: 254/464, loss: 1.0585178136825562 2023-01-22 09:47:53.112336: step: 256/464, loss: 0.9872919917106628 2023-01-22 09:47:53.704566: step: 258/464, loss: 2.103480100631714 2023-01-22 09:47:54.310296: step: 260/464, loss: 3.0257513523101807 2023-01-22 09:47:54.900935: step: 262/464, loss: 0.34053394198417664 2023-01-22 09:47:55.620016: step: 264/464, loss: 3.0975921154022217 2023-01-22 09:47:56.248625: step: 266/464, loss: 1.6566007137298584 2023-01-22 09:47:56.882488: step: 268/464, loss: 1.2498109340667725 2023-01-22 09:47:57.475323: step: 270/464, loss: 0.18298912048339844 2023-01-22 09:47:58.117385: step: 272/464, loss: 2.479445457458496 2023-01-22 09:47:58.809762: step: 274/464, loss: 2.039584159851074 2023-01-22 09:47:59.405774: step: 276/464, loss: 3.011651039123535 2023-01-22 09:48:00.068690: step: 278/464, loss: 1.4087164402008057 2023-01-22 09:48:00.726189: step: 280/464, loss: 1.6842014789581299 2023-01-22 09:48:01.410525: step: 282/464, loss: 2.3197803497314453 2023-01-22 09:48:02.056301: step: 284/464, loss: 3.6316616535186768 2023-01-22 09:48:02.690962: step: 286/464, loss: 2.2884769439697266 2023-01-22 09:48:03.308354: step: 288/464, loss: 1.5190672874450684 2023-01-22 09:48:03.937881: step: 290/464, loss: 0.6203249096870422 2023-01-22 09:48:04.514975: step: 292/464, loss: 1.1577428579330444 2023-01-22 09:48:05.102733: step: 294/464, loss: 0.1644885390996933 2023-01-22 09:48:05.693239: step: 296/464, loss: 5.212287425994873 2023-01-22 09:48:06.356607: step: 298/464, loss: 1.941894769668579 2023-01-22 09:48:07.088496: step: 300/464, loss: 1.883209228515625 2023-01-22 09:48:07.755388: step: 302/464, loss: 3.0236852169036865 2023-01-22 09:48:08.441674: step: 304/464, loss: 1.6562798023223877 2023-01-22 09:48:09.089877: step: 306/464, loss: 1.2389827966690063 2023-01-22 09:48:09.710010: step: 308/464, loss: 5.742254734039307 2023-01-22 09:48:10.349531: step: 310/464, loss: 3.27360463142395 2023-01-22 09:48:10.961769: step: 312/464, loss: 4.364145278930664 2023-01-22 09:48:11.631462: step: 314/464, loss: 6.163469314575195 2023-01-22 09:48:12.331233: step: 316/464, loss: 7.255561828613281 2023-01-22 09:48:12.969951: step: 318/464, loss: 0.9108213782310486 2023-01-22 09:48:13.590977: step: 320/464, loss: 0.9451639652252197 2023-01-22 09:48:14.174783: step: 322/464, loss: 1.8270184993743896 2023-01-22 09:48:14.771388: step: 324/464, loss: 0.9697843194007874 2023-01-22 09:48:15.415784: step: 326/464, loss: 3.6913609504699707 2023-01-22 09:48:15.959437: step: 328/464, loss: 4.866645812988281 2023-01-22 09:48:16.633031: step: 330/464, loss: 0.8567318320274353 2023-01-22 09:48:17.280876: step: 332/464, loss: 2.2915091514587402 2023-01-22 09:48:17.887474: step: 334/464, loss: 3.5241568088531494 2023-01-22 09:48:18.527598: step: 336/464, loss: 0.7516891360282898 2023-01-22 09:48:19.148940: step: 338/464, loss: 1.6842435598373413 2023-01-22 09:48:19.764467: step: 340/464, loss: 2.723252296447754 2023-01-22 09:48:20.431296: step: 342/464, loss: 1.7390908002853394 2023-01-22 09:48:21.177971: step: 344/464, loss: 3.8381030559539795 2023-01-22 09:48:21.822866: step: 346/464, loss: 0.8049541711807251 2023-01-22 09:48:22.384995: step: 348/464, loss: 0.7041277885437012 2023-01-22 09:48:23.102015: step: 350/464, loss: 2.2053818702697754 2023-01-22 09:48:23.774747: step: 352/464, loss: 8.421344757080078 2023-01-22 09:48:24.379434: step: 354/464, loss: 0.6538692116737366 2023-01-22 09:48:24.951505: step: 356/464, loss: 0.3336998224258423 2023-01-22 09:48:25.635266: step: 358/464, loss: 0.8734704256057739 2023-01-22 09:48:26.226392: step: 360/464, loss: 1.0664860010147095 2023-01-22 09:48:26.832536: step: 362/464, loss: 0.6072345972061157 2023-01-22 09:48:27.449114: step: 364/464, loss: 0.42965126037597656 2023-01-22 09:48:28.020181: step: 366/464, loss: 2.433159112930298 2023-01-22 09:48:28.646336: step: 368/464, loss: 1.0689464807510376 2023-01-22 09:48:29.226116: step: 370/464, loss: 0.9118122458457947 2023-01-22 09:48:29.908955: step: 372/464, loss: 1.2723240852355957 2023-01-22 09:48:30.609412: step: 374/464, loss: 2.288015842437744 2023-01-22 09:48:31.203127: step: 376/464, loss: 1.927898645401001 2023-01-22 09:48:31.819051: step: 378/464, loss: 2.154618740081787 2023-01-22 09:48:32.455317: step: 380/464, loss: 2.2089216709136963 2023-01-22 09:48:33.127858: step: 382/464, loss: 3.4999170303344727 2023-01-22 09:48:33.816806: step: 384/464, loss: 0.4415696859359741 2023-01-22 09:48:34.428271: step: 386/464, loss: 5.115442752838135 2023-01-22 09:48:35.071907: step: 388/464, loss: 1.9158753156661987 2023-01-22 09:48:35.688044: step: 390/464, loss: 1.7842466831207275 2023-01-22 09:48:36.354139: step: 392/464, loss: 0.6219794750213623 2023-01-22 09:48:37.061096: step: 394/464, loss: 1.2650036811828613 2023-01-22 09:48:37.691721: step: 396/464, loss: 2.0196750164031982 2023-01-22 09:48:38.408341: step: 398/464, loss: 1.8090322017669678 2023-01-22 09:48:39.060693: step: 400/464, loss: 1.7102181911468506 2023-01-22 09:48:39.725997: step: 402/464, loss: 4.04551362991333 2023-01-22 09:48:40.381226: step: 404/464, loss: 0.5374937653541565 2023-01-22 09:48:40.998737: step: 406/464, loss: 1.2260247468948364 2023-01-22 09:48:41.662007: step: 408/464, loss: 0.8174344301223755 2023-01-22 09:48:42.350080: step: 410/464, loss: 1.277963399887085 2023-01-22 09:48:42.956866: step: 412/464, loss: 1.2115471363067627 2023-01-22 09:48:43.608911: step: 414/464, loss: 1.9816060066223145 2023-01-22 09:48:44.249910: step: 416/464, loss: 3.267216682434082 2023-01-22 09:48:44.897118: step: 418/464, loss: 1.381102204322815 2023-01-22 09:48:45.512072: step: 420/464, loss: 1.9319000244140625 2023-01-22 09:48:46.170959: step: 422/464, loss: 0.7982077598571777 2023-01-22 09:48:46.824917: step: 424/464, loss: 1.7125730514526367 2023-01-22 09:48:47.414602: step: 426/464, loss: 1.6744165420532227 2023-01-22 09:48:48.076424: step: 428/464, loss: 1.3618673086166382 2023-01-22 09:48:48.680622: step: 430/464, loss: 0.8129212260246277 2023-01-22 09:48:49.226152: step: 432/464, loss: 1.3697012662887573 2023-01-22 09:48:49.798186: step: 434/464, loss: 1.0622531175613403 2023-01-22 09:48:50.365842: step: 436/464, loss: 1.1265355348587036 2023-01-22 09:48:51.029008: step: 438/464, loss: 6.394579887390137 2023-01-22 09:48:51.641404: step: 440/464, loss: 1.1750330924987793 2023-01-22 09:48:52.229513: step: 442/464, loss: 3.9063398838043213 2023-01-22 09:48:52.848292: step: 444/464, loss: 7.915856838226318 2023-01-22 09:48:53.437240: step: 446/464, loss: 2.0682547092437744 2023-01-22 09:48:54.028868: step: 448/464, loss: 1.599812626838684 2023-01-22 09:48:54.614458: step: 450/464, loss: 0.47663643956184387 2023-01-22 09:48:55.310204: step: 452/464, loss: 1.7338075637817383 2023-01-22 09:48:55.943832: step: 454/464, loss: 1.2220450639724731 2023-01-22 09:48:56.662929: step: 456/464, loss: 0.8941260576248169 2023-01-22 09:48:57.318795: step: 458/464, loss: 1.4423011541366577 2023-01-22 09:48:57.951834: step: 460/464, loss: 1.9423093795776367 2023-01-22 09:48:58.714239: step: 462/464, loss: 5.917514801025391 2023-01-22 09:48:59.340722: step: 464/464, loss: 0.6594303250312805 2023-01-22 09:48:59.965373: step: 466/464, loss: 5.047465801239014 2023-01-22 09:49:00.603416: step: 468/464, loss: 6.084995746612549 2023-01-22 09:49:01.195296: step: 470/464, loss: 0.6543664336204529 2023-01-22 09:49:01.786722: step: 472/464, loss: 1.768980860710144 2023-01-22 09:49:02.475882: step: 474/464, loss: 1.1559669971466064 2023-01-22 09:49:03.039363: step: 476/464, loss: 1.564383625984192 2023-01-22 09:49:03.670897: step: 478/464, loss: 0.354350745677948 2023-01-22 09:49:04.369171: step: 480/464, loss: 7.63833475112915 2023-01-22 09:49:04.965698: step: 482/464, loss: 3.8567113876342773 2023-01-22 09:49:05.602494: step: 484/464, loss: 3.6513772010803223 2023-01-22 09:49:06.194881: step: 486/464, loss: 1.1392251253128052 2023-01-22 09:49:06.851555: step: 488/464, loss: 0.7947462201118469 2023-01-22 09:49:07.513967: step: 490/464, loss: 0.6445846557617188 2023-01-22 09:49:08.155438: step: 492/464, loss: 0.7971004247665405 2023-01-22 09:49:08.766693: step: 494/464, loss: 2.8422393798828125 2023-01-22 09:49:09.373347: step: 496/464, loss: 1.5413612127304077 2023-01-22 09:49:10.060825: step: 498/464, loss: 1.6812660694122314 2023-01-22 09:49:10.647087: step: 500/464, loss: 0.44464367628097534 2023-01-22 09:49:11.294478: step: 502/464, loss: 1.7780667543411255 2023-01-22 09:49:11.921250: step: 504/464, loss: 3.0602879524230957 2023-01-22 09:49:12.628554: step: 506/464, loss: 2.5655436515808105 2023-01-22 09:49:13.227607: step: 508/464, loss: 0.34945783019065857 2023-01-22 09:49:13.913415: step: 510/464, loss: 1.001344919204712 2023-01-22 09:49:14.567769: step: 512/464, loss: 0.6395118236541748 2023-01-22 09:49:15.236310: step: 514/464, loss: 1.4813051223754883 2023-01-22 09:49:15.845530: step: 516/464, loss: 1.1046370267868042 2023-01-22 09:49:16.492696: step: 518/464, loss: 0.7151631712913513 2023-01-22 09:49:17.115399: step: 520/464, loss: 1.0134127140045166 2023-01-22 09:49:17.699432: step: 522/464, loss: 1.9913588762283325 2023-01-22 09:49:18.404629: step: 524/464, loss: 6.719761848449707 2023-01-22 09:49:19.031075: step: 526/464, loss: 0.5916548371315002 2023-01-22 09:49:19.643062: step: 528/464, loss: 1.092638373374939 2023-01-22 09:49:20.258887: step: 530/464, loss: 0.8220130801200867 2023-01-22 09:49:20.953871: step: 532/464, loss: 0.7773857116699219 2023-01-22 09:49:21.522277: step: 534/464, loss: 1.0024209022521973 2023-01-22 09:49:22.142092: step: 536/464, loss: 2.910573959350586 2023-01-22 09:49:22.776798: step: 538/464, loss: 3.8845930099487305 2023-01-22 09:49:23.389549: step: 540/464, loss: 0.8535181879997253 2023-01-22 09:49:23.964778: step: 542/464, loss: 0.4238739609718323 2023-01-22 09:49:24.629043: step: 544/464, loss: 1.4098758697509766 2023-01-22 09:49:25.253285: step: 546/464, loss: 0.9261318445205688 2023-01-22 09:49:25.852032: step: 548/464, loss: 0.5454732179641724 2023-01-22 09:49:26.398996: step: 550/464, loss: 0.25435954332351685 2023-01-22 09:49:26.923676: step: 552/464, loss: 0.3379531502723694 2023-01-22 09:49:27.615963: step: 554/464, loss: 0.7175856828689575 2023-01-22 09:49:28.248502: step: 556/464, loss: 2.1935458183288574 2023-01-22 09:49:28.891698: step: 558/464, loss: 2.169313907623291 2023-01-22 09:49:29.470499: step: 560/464, loss: 1.6759428977966309 2023-01-22 09:49:30.071320: step: 562/464, loss: 0.8559223413467407 2023-01-22 09:49:30.695376: step: 564/464, loss: 1.2192752361297607 2023-01-22 09:49:31.335814: step: 566/464, loss: 6.734214782714844 2023-01-22 09:49:31.998011: step: 568/464, loss: 1.4761178493499756 2023-01-22 09:49:32.651928: step: 570/464, loss: 0.6868404150009155 2023-01-22 09:49:33.251013: step: 572/464, loss: 0.4026981592178345 2023-01-22 09:49:33.940503: step: 574/464, loss: 1.4963315725326538 2023-01-22 09:49:34.636826: step: 576/464, loss: 0.5705739259719849 2023-01-22 09:49:35.281794: step: 578/464, loss: 0.6995615363121033 2023-01-22 09:49:35.870792: step: 580/464, loss: 2.8022780418395996 2023-01-22 09:49:36.515684: step: 582/464, loss: 3.60697340965271 2023-01-22 09:49:37.151494: step: 584/464, loss: 3.5830140113830566 2023-01-22 09:49:37.816438: step: 586/464, loss: 1.1568769216537476 2023-01-22 09:49:38.414099: step: 588/464, loss: 0.32793667912483215 2023-01-22 09:49:39.012191: step: 590/464, loss: 0.7447863817214966 2023-01-22 09:49:39.666692: step: 592/464, loss: 1.2953349351882935 2023-01-22 09:49:40.249390: step: 594/464, loss: 6.4309587478637695 2023-01-22 09:49:40.859324: step: 596/464, loss: 1.7616764307022095 2023-01-22 09:49:41.567733: step: 598/464, loss: 1.6291272640228271 2023-01-22 09:49:42.251194: step: 600/464, loss: 2.5082497596740723 2023-01-22 09:49:42.828982: step: 602/464, loss: 3.272620916366577 2023-01-22 09:49:43.424234: step: 604/464, loss: 0.9270907640457153 2023-01-22 09:49:44.046374: step: 606/464, loss: 0.6672835350036621 2023-01-22 09:49:44.640135: step: 608/464, loss: 0.9879858493804932 2023-01-22 09:49:45.189730: step: 610/464, loss: 0.5429264307022095 2023-01-22 09:49:45.856058: step: 612/464, loss: 2.867297887802124 2023-01-22 09:49:46.489323: step: 614/464, loss: 0.9067877531051636 2023-01-22 09:49:47.247253: step: 616/464, loss: 0.9723592400550842 2023-01-22 09:49:47.920813: step: 618/464, loss: 1.9591346979141235 2023-01-22 09:49:48.545680: step: 620/464, loss: 3.082007884979248 2023-01-22 09:49:49.181753: step: 622/464, loss: 0.9543931484222412 2023-01-22 09:49:49.824087: step: 624/464, loss: 1.2434319257736206 2023-01-22 09:49:50.522817: step: 626/464, loss: 0.28026509284973145 2023-01-22 09:49:51.215168: step: 628/464, loss: 1.2699750661849976 2023-01-22 09:49:51.852598: step: 630/464, loss: 10.263710975646973 2023-01-22 09:49:52.489718: step: 632/464, loss: 3.928663730621338 2023-01-22 09:49:53.096939: step: 634/464, loss: 0.27222734689712524 2023-01-22 09:49:53.764335: step: 636/464, loss: 0.46683162450790405 2023-01-22 09:49:54.312795: step: 638/464, loss: 0.9551849365234375 2023-01-22 09:49:54.955845: step: 640/464, loss: 2.2991268634796143 2023-01-22 09:49:55.572080: step: 642/464, loss: 0.5585823059082031 2023-01-22 09:49:56.206350: step: 644/464, loss: 1.1316943168640137 2023-01-22 09:49:56.937280: step: 646/464, loss: 1.7934308052062988 2023-01-22 09:49:57.529573: step: 648/464, loss: 0.7643765807151794 2023-01-22 09:49:58.185583: step: 650/464, loss: 0.8959051370620728 2023-01-22 09:49:58.814363: step: 652/464, loss: 1.3295584917068481 2023-01-22 09:49:59.454943: step: 654/464, loss: 0.3932426869869232 2023-01-22 09:50:00.116897: step: 656/464, loss: 0.37686648964881897 2023-01-22 09:50:00.748595: step: 658/464, loss: 1.5109634399414062 2023-01-22 09:50:01.391178: step: 660/464, loss: 1.0998424291610718 2023-01-22 09:50:02.045049: step: 662/464, loss: 4.340769290924072 2023-01-22 09:50:02.621557: step: 664/464, loss: 1.1464810371398926 2023-01-22 09:50:03.177284: step: 666/464, loss: 1.1062222719192505 2023-01-22 09:50:03.835285: step: 668/464, loss: 1.909510612487793 2023-01-22 09:50:04.454881: step: 670/464, loss: 3.512485980987549 2023-01-22 09:50:05.059286: step: 672/464, loss: 0.8809099197387695 2023-01-22 09:50:05.702725: step: 674/464, loss: 1.7871596813201904 2023-01-22 09:50:06.299407: step: 676/464, loss: 0.6380197405815125 2023-01-22 09:50:06.966616: step: 678/464, loss: 0.8312307596206665 2023-01-22 09:50:07.571239: step: 680/464, loss: 1.8398313522338867 2023-01-22 09:50:08.236042: step: 682/464, loss: 2.106290340423584 2023-01-22 09:50:08.820597: step: 684/464, loss: 0.15295329689979553 2023-01-22 09:50:09.425539: step: 686/464, loss: 1.5513062477111816 2023-01-22 09:50:10.064619: step: 688/464, loss: 1.4429486989974976 2023-01-22 09:50:10.708693: step: 690/464, loss: 2.961740493774414 2023-01-22 09:50:11.282666: step: 692/464, loss: 1.389230728149414 2023-01-22 09:50:11.929278: step: 694/464, loss: 0.4239702820777893 2023-01-22 09:50:12.551812: step: 696/464, loss: 1.065203309059143 2023-01-22 09:50:13.165523: step: 698/464, loss: 1.3233675956726074 2023-01-22 09:50:13.773968: step: 700/464, loss: 0.9995968341827393 2023-01-22 09:50:14.421209: step: 702/464, loss: 0.5759196281433105 2023-01-22 09:50:15.106386: step: 704/464, loss: 2.6891186237335205 2023-01-22 09:50:15.785746: step: 706/464, loss: 5.920416355133057 2023-01-22 09:50:16.390935: step: 708/464, loss: 0.6381198763847351 2023-01-22 09:50:16.999448: step: 710/464, loss: 1.4230495691299438 2023-01-22 09:50:17.631813: step: 712/464, loss: 2.112436294555664 2023-01-22 09:50:18.253638: step: 714/464, loss: 0.27687883377075195 2023-01-22 09:50:18.797022: step: 716/464, loss: 0.7682527303695679 2023-01-22 09:50:19.424113: step: 718/464, loss: 0.506624698638916 2023-01-22 09:50:20.018290: step: 720/464, loss: 3.78389310836792 2023-01-22 09:50:20.594134: step: 722/464, loss: 2.2654786109924316 2023-01-22 09:50:21.175891: step: 724/464, loss: 0.934356689453125 2023-01-22 09:50:21.803655: step: 726/464, loss: 0.2805936932563782 2023-01-22 09:50:22.414083: step: 728/464, loss: 3.379147529602051 2023-01-22 09:50:23.069688: step: 730/464, loss: 1.473331332206726 2023-01-22 09:50:23.682211: step: 732/464, loss: 1.9181344509124756 2023-01-22 09:50:24.377184: step: 734/464, loss: 1.289449691772461 2023-01-22 09:50:24.914936: step: 736/464, loss: 0.7308832406997681 2023-01-22 09:50:25.570689: step: 738/464, loss: 1.2970857620239258 2023-01-22 09:50:26.151119: step: 740/464, loss: 1.1372034549713135 2023-01-22 09:50:26.876453: step: 742/464, loss: 9.448132514953613 2023-01-22 09:50:27.532349: step: 744/464, loss: 15.97134017944336 2023-01-22 09:50:28.171075: step: 746/464, loss: 0.32879093289375305 2023-01-22 09:50:28.833033: step: 748/464, loss: 1.876343011856079 2023-01-22 09:50:29.430204: step: 750/464, loss: 2.1643118858337402 2023-01-22 09:50:30.007023: step: 752/464, loss: 4.110321044921875 2023-01-22 09:50:30.601214: step: 754/464, loss: 2.606478452682495 2023-01-22 09:50:31.254121: step: 756/464, loss: 0.7163191437721252 2023-01-22 09:50:31.851462: step: 758/464, loss: 1.9252287149429321 2023-01-22 09:50:32.447650: step: 760/464, loss: 0.46068650484085083 2023-01-22 09:50:33.086928: step: 762/464, loss: 1.9304959774017334 2023-01-22 09:50:33.755238: step: 764/464, loss: 0.5159561038017273 2023-01-22 09:50:34.385658: step: 766/464, loss: 2.7147603034973145 2023-01-22 09:50:34.996708: step: 768/464, loss: 1.8933334350585938 2023-01-22 09:50:35.692588: step: 770/464, loss: 1.7268967628479004 2023-01-22 09:50:36.335434: step: 772/464, loss: 0.45652639865875244 2023-01-22 09:50:36.903631: step: 774/464, loss: 2.0273325443267822 2023-01-22 09:50:37.575731: step: 776/464, loss: 2.12650203704834 2023-01-22 09:50:38.250644: step: 778/464, loss: 1.441702961921692 2023-01-22 09:50:38.904835: step: 780/464, loss: 0.38216400146484375 2023-01-22 09:50:39.560725: step: 782/464, loss: 2.160226821899414 2023-01-22 09:50:40.246044: step: 784/464, loss: 0.460218608379364 2023-01-22 09:50:40.920226: step: 786/464, loss: 0.9313029050827026 2023-01-22 09:50:41.586500: step: 788/464, loss: 8.107101440429688 2023-01-22 09:50:42.222616: step: 790/464, loss: 1.3128178119659424 2023-01-22 09:50:42.850908: step: 792/464, loss: 0.8122116923332214 2023-01-22 09:50:43.438038: step: 794/464, loss: 0.34943875670433044 2023-01-22 09:50:44.071236: step: 796/464, loss: 0.8123091459274292 2023-01-22 09:50:44.706590: step: 798/464, loss: 2.5083839893341064 2023-01-22 09:50:45.353202: step: 800/464, loss: 1.1529542207717896 2023-01-22 09:50:45.963022: step: 802/464, loss: 1.4196804761886597 2023-01-22 09:50:46.535050: step: 804/464, loss: 0.8466091752052307 2023-01-22 09:50:47.172337: step: 806/464, loss: 2.2197837829589844 2023-01-22 09:50:47.800590: step: 808/464, loss: 1.2632802724838257 2023-01-22 09:50:48.462984: step: 810/464, loss: 0.4522426128387451 2023-01-22 09:50:49.065057: step: 812/464, loss: 0.1866062730550766 2023-01-22 09:50:49.621546: step: 814/464, loss: 1.1612019538879395 2023-01-22 09:50:50.235625: step: 816/464, loss: 3.2337965965270996 2023-01-22 09:50:50.851667: step: 818/464, loss: 1.704648494720459 2023-01-22 09:50:51.535738: step: 820/464, loss: 1.4136182069778442 2023-01-22 09:50:52.192260: step: 822/464, loss: 0.6175124645233154 2023-01-22 09:50:52.837486: step: 824/464, loss: 3.2209134101867676 2023-01-22 09:50:53.443171: step: 826/464, loss: 0.3057088851928711 2023-01-22 09:50:54.051952: step: 828/464, loss: 0.34447264671325684 2023-01-22 09:50:54.770060: step: 830/464, loss: 0.23444117605686188 2023-01-22 09:50:55.404439: step: 832/464, loss: 1.7572524547576904 2023-01-22 09:50:56.039046: step: 834/464, loss: 1.0998787879943848 2023-01-22 09:50:56.678506: step: 836/464, loss: 1.7545276880264282 2023-01-22 09:50:57.240122: step: 838/464, loss: 0.498182088136673 2023-01-22 09:50:57.877822: step: 840/464, loss: 0.8405647277832031 2023-01-22 09:50:58.505754: step: 842/464, loss: 0.36363208293914795 2023-01-22 09:50:59.158125: step: 844/464, loss: 3.207554340362549 2023-01-22 09:50:59.716871: step: 846/464, loss: 0.6012241840362549 2023-01-22 09:51:00.371699: step: 848/464, loss: 0.46905815601348877 2023-01-22 09:51:00.966379: step: 850/464, loss: 0.22596511244773865 2023-01-22 09:51:01.605253: step: 852/464, loss: 1.1187995672225952 2023-01-22 09:51:02.232164: step: 854/464, loss: 8.382316589355469 2023-01-22 09:51:02.834609: step: 856/464, loss: 0.8991714715957642 2023-01-22 09:51:03.486220: step: 858/464, loss: 1.5431783199310303 2023-01-22 09:51:04.228251: step: 860/464, loss: 1.9879878759384155 2023-01-22 09:51:04.872762: step: 862/464, loss: 4.223198890686035 2023-01-22 09:51:05.409782: step: 864/464, loss: 1.4735733270645142 2023-01-22 09:51:06.037992: step: 866/464, loss: 1.8661390542984009 2023-01-22 09:51:06.651242: step: 868/464, loss: 2.429267406463623 2023-01-22 09:51:07.346526: step: 870/464, loss: 2.5032222270965576 2023-01-22 09:51:07.943369: step: 872/464, loss: 3.873025417327881 2023-01-22 09:51:08.612795: step: 874/464, loss: 9.972686767578125 2023-01-22 09:51:09.183832: step: 876/464, loss: 0.4837803840637207 2023-01-22 09:51:09.768798: step: 878/464, loss: 1.799896240234375 2023-01-22 09:51:10.463192: step: 880/464, loss: 0.8232937455177307 2023-01-22 09:51:11.032289: step: 882/464, loss: 4.3672332763671875 2023-01-22 09:51:11.651546: step: 884/464, loss: 1.8989098072052002 2023-01-22 09:51:12.281281: step: 886/464, loss: 1.4259133338928223 2023-01-22 09:51:12.884108: step: 888/464, loss: 1.350890874862671 2023-01-22 09:51:13.526712: step: 890/464, loss: 1.217652440071106 2023-01-22 09:51:14.146014: step: 892/464, loss: 2.2741332054138184 2023-01-22 09:51:14.776155: step: 894/464, loss: 1.9730730056762695 2023-01-22 09:51:15.456364: step: 896/464, loss: 1.6086657047271729 2023-01-22 09:51:16.087538: step: 898/464, loss: 0.7476229667663574 2023-01-22 09:51:16.675962: step: 900/464, loss: 1.049003005027771 2023-01-22 09:51:17.398698: step: 902/464, loss: 1.2468827962875366 2023-01-22 09:51:18.043609: step: 904/464, loss: 0.4603670835494995 2023-01-22 09:51:18.680852: step: 906/464, loss: 0.921544075012207 2023-01-22 09:51:19.310267: step: 908/464, loss: 1.3425829410552979 2023-01-22 09:51:19.940545: step: 910/464, loss: 5.488288879394531 2023-01-22 09:51:20.521705: step: 912/464, loss: 0.4062989354133606 2023-01-22 09:51:21.099688: step: 914/464, loss: 2.260221481323242 2023-01-22 09:51:21.794649: step: 916/464, loss: 0.9517034292221069 2023-01-22 09:51:22.384795: step: 918/464, loss: 1.0265724658966064 2023-01-22 09:51:23.000549: step: 920/464, loss: 0.3605181574821472 2023-01-22 09:51:23.631993: step: 922/464, loss: 0.7988200187683105 2023-01-22 09:51:24.281292: step: 924/464, loss: 0.6456014513969421 2023-01-22 09:51:24.902998: step: 926/464, loss: 0.7226334810256958 2023-01-22 09:51:25.572951: step: 928/464, loss: 4.191205024719238 2023-01-22 09:51:26.074172: step: 930/464, loss: 0.06958475708961487 ================================================== Loss: 1.974 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.237717219472338, 'r': 0.23411544341972682, 'f1': 0.23590258420919039}, 'combined': 0.1738229567857192, 'epoch': 1} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2760115860797881, 'r': 0.2296294738184463, 'f1': 0.2506932300634466}, 'combined': 0.16366501030048847, 'epoch': 1} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.23584435925638375, 'r': 0.2465645574044012, 'f1': 0.2410853450176367}, 'combined': 0.1776418331708902, 'epoch': 1} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.28538313969940676, 'r': 0.23459017722079673, 'f1': 0.25750583400872956}, 'combined': 0.16811261702124314, 'epoch': 1} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.24524492853708318, 'r': 0.23316847372275712, 'f1': 0.23905427985556457}, 'combined': 0.17614525884094231, 'epoch': 1} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.28856785086288, 'r': 0.23477604762581425, 'f1': 0.258907459103303}, 'combined': 0.1690276676011201, 'epoch': 1} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21164021164021163, 'r': 0.21768707482993196, 'f1': 0.2146210596914822}, 'combined': 0.14308070646098814, 'epoch': 1} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.1810344827586207, 'r': 0.22826086956521738, 'f1': 0.2019230769230769}, 'combined': 0.10096153846153845, 'epoch': 1} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45, 'r': 0.15517241379310345, 'f1': 0.23076923076923075}, 'combined': 0.15384615384615383, 'epoch': 1} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.237717219472338, 'r': 0.23411544341972682, 'f1': 0.23590258420919039}, 'combined': 0.1738229567857192, 'epoch': 1} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2760115860797881, 'r': 0.2296294738184463, 'f1': 0.2506932300634466}, 'combined': 0.16366501030048847, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21164021164021163, 'r': 0.21768707482993196, 'f1': 0.2146210596914822}, 'combined': 0.14308070646098814, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.23584435925638375, 'r': 0.2465645574044012, 'f1': 0.2410853450176367}, 'combined': 0.1776418331708902, 'epoch': 1} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.28538313969940676, 'r': 0.23459017722079673, 'f1': 0.25750583400872956}, 'combined': 0.16811261702124314, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.1810344827586207, 'r': 0.22826086956521738, 'f1': 0.2019230769230769}, 'combined': 0.10096153846153845, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.24524492853708318, 'r': 0.23316847372275712, 'f1': 0.23905427985556457}, 'combined': 0.17614525884094231, 'epoch': 1} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.28856785086288, 'r': 0.23477604762581425, 'f1': 0.258907459103303}, 'combined': 0.1690276676011201, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45, 'r': 0.15517241379310345, 'f1': 0.23076923076923075}, 'combined': 0.15384615384615383, 'epoch': 1} ****************************** Epoch: 2 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 09:54:19.118862: step: 2/464, loss: 2.4187142848968506 2023-01-22 09:54:19.797043: step: 4/464, loss: 1.1150403022766113 2023-01-22 09:54:20.421062: step: 6/464, loss: 0.42951124906539917 2023-01-22 09:54:21.024716: step: 8/464, loss: 0.3977397382259369 2023-01-22 09:54:21.603769: step: 10/464, loss: 0.8529415726661682 2023-01-22 09:54:22.196675: step: 12/464, loss: 1.3303990364074707 2023-01-22 09:54:22.781124: step: 14/464, loss: 1.2783358097076416 2023-01-22 09:54:23.426240: step: 16/464, loss: 3.2577548027038574 2023-01-22 09:54:24.080288: step: 18/464, loss: 4.058514595031738 2023-01-22 09:54:24.733618: step: 20/464, loss: 3.979752540588379 2023-01-22 09:54:25.468295: step: 22/464, loss: 0.47690603137016296 2023-01-22 09:54:26.147290: step: 24/464, loss: 13.170414924621582 2023-01-22 09:54:26.750010: step: 26/464, loss: 2.2249720096588135 2023-01-22 09:54:27.330629: step: 28/464, loss: 5.017965316772461 2023-01-22 09:54:28.027138: step: 30/464, loss: 1.719996690750122 2023-01-22 09:54:28.692386: step: 32/464, loss: 0.8921970129013062 2023-01-22 09:54:29.300821: step: 34/464, loss: 2.103689670562744 2023-01-22 09:54:29.940407: step: 36/464, loss: 1.3535950183868408 2023-01-22 09:54:30.611483: step: 38/464, loss: 0.5213176012039185 2023-01-22 09:54:31.249682: step: 40/464, loss: 2.4116697311401367 2023-01-22 09:54:31.873741: step: 42/464, loss: 0.6674009561538696 2023-01-22 09:54:32.510976: step: 44/464, loss: 0.758773684501648 2023-01-22 09:54:33.150211: step: 46/464, loss: 1.5063775777816772 2023-01-22 09:54:33.904766: step: 48/464, loss: 0.4901307225227356 2023-01-22 09:54:34.433316: step: 50/464, loss: 1.0143728256225586 2023-01-22 09:54:34.972056: step: 52/464, loss: 0.5364481806755066 2023-01-22 09:54:35.586355: step: 54/464, loss: 0.7740236520767212 2023-01-22 09:54:36.235211: step: 56/464, loss: 1.2117143869400024 2023-01-22 09:54:36.862591: step: 58/464, loss: 1.6247572898864746 2023-01-22 09:54:37.523075: step: 60/464, loss: 7.515179634094238 2023-01-22 09:54:38.184756: step: 62/464, loss: 1.646538496017456 2023-01-22 09:54:38.853825: step: 64/464, loss: 0.550868809223175 2023-01-22 09:54:39.511449: step: 66/464, loss: 0.6326333284378052 2023-01-22 09:54:40.177664: step: 68/464, loss: 1.768183708190918 2023-01-22 09:54:40.789249: step: 70/464, loss: 1.3389290571212769 2023-01-22 09:54:41.368842: step: 72/464, loss: 2.8966360092163086 2023-01-22 09:54:41.985798: step: 74/464, loss: 2.0588395595550537 2023-01-22 09:54:42.710132: step: 76/464, loss: 0.4368211627006531 2023-01-22 09:54:43.366184: step: 78/464, loss: 1.182746410369873 2023-01-22 09:54:43.984246: step: 80/464, loss: 1.4138734340667725 2023-01-22 09:54:44.646487: step: 82/464, loss: 0.3119286298751831 2023-01-22 09:54:45.268377: step: 84/464, loss: 8.67248249053955 2023-01-22 09:54:45.879013: step: 86/464, loss: 0.36811041831970215 2023-01-22 09:54:46.510994: step: 88/464, loss: 0.8464750051498413 2023-01-22 09:54:47.183997: step: 90/464, loss: 0.6472318768501282 2023-01-22 09:54:47.789703: step: 92/464, loss: 1.5341274738311768 2023-01-22 09:54:48.407645: step: 94/464, loss: 1.2942731380462646 2023-01-22 09:54:49.035568: step: 96/464, loss: 0.365266352891922 2023-01-22 09:54:49.695253: step: 98/464, loss: 0.9241402745246887 2023-01-22 09:54:50.274898: step: 100/464, loss: 0.2900221347808838 2023-01-22 09:54:50.924290: step: 102/464, loss: 0.7217608094215393 2023-01-22 09:54:51.556803: step: 104/464, loss: 0.29744741320610046 2023-01-22 09:54:52.198779: step: 106/464, loss: 1.191608190536499 2023-01-22 09:54:52.782620: step: 108/464, loss: 3.7245192527770996 2023-01-22 09:54:53.400470: step: 110/464, loss: 2.004127025604248 2023-01-22 09:54:54.010042: step: 112/464, loss: 2.8814492225646973 2023-01-22 09:54:54.623373: step: 114/464, loss: 0.2999536991119385 2023-01-22 09:54:55.295663: step: 116/464, loss: 0.24473756551742554 2023-01-22 09:54:55.886066: step: 118/464, loss: 2.501936197280884 2023-01-22 09:54:56.434008: step: 120/464, loss: 0.9501038193702698 2023-01-22 09:54:57.055569: step: 122/464, loss: 0.9306862950325012 2023-01-22 09:54:57.689644: step: 124/464, loss: 1.8798160552978516 2023-01-22 09:54:58.342737: step: 126/464, loss: 4.3524956703186035 2023-01-22 09:54:58.999446: step: 128/464, loss: 1.222733974456787 2023-01-22 09:54:59.678569: step: 130/464, loss: 0.6787784099578857 2023-01-22 09:55:00.348157: step: 132/464, loss: 0.28239545226097107 2023-01-22 09:55:00.921265: step: 134/464, loss: 4.137892723083496 2023-01-22 09:55:01.563898: step: 136/464, loss: 3.7175168991088867 2023-01-22 09:55:02.224431: step: 138/464, loss: 0.6110906600952148 2023-01-22 09:55:02.830170: step: 140/464, loss: 3.6410927772521973 2023-01-22 09:55:03.455118: step: 142/464, loss: 3.896022319793701 2023-01-22 09:55:04.061649: step: 144/464, loss: 0.6825583577156067 2023-01-22 09:55:04.703351: step: 146/464, loss: 1.4839084148406982 2023-01-22 09:55:05.254079: step: 148/464, loss: 0.5772268772125244 2023-01-22 09:55:05.866155: step: 150/464, loss: 1.6391181945800781 2023-01-22 09:55:06.496964: step: 152/464, loss: 1.1953972578048706 2023-01-22 09:55:07.124018: step: 154/464, loss: 2.2237143516540527 2023-01-22 09:55:07.699952: step: 156/464, loss: 0.38013097643852234 2023-01-22 09:55:08.328948: step: 158/464, loss: 1.0890448093414307 2023-01-22 09:55:08.920061: step: 160/464, loss: 0.47497808933258057 2023-01-22 09:55:09.540452: step: 162/464, loss: 1.2158253192901611 2023-01-22 09:55:10.163351: step: 164/464, loss: 0.6530698537826538 2023-01-22 09:55:10.773264: step: 166/464, loss: 0.9841471910476685 2023-01-22 09:55:11.343950: step: 168/464, loss: 3.2851874828338623 2023-01-22 09:55:12.127554: step: 170/464, loss: 2.146338939666748 2023-01-22 09:55:12.722658: step: 172/464, loss: 0.4231458902359009 2023-01-22 09:55:13.426330: step: 174/464, loss: 0.24826262891292572 2023-01-22 09:55:14.032938: step: 176/464, loss: 1.625036597251892 2023-01-22 09:55:14.685950: step: 178/464, loss: 0.5881162881851196 2023-01-22 09:55:15.313169: step: 180/464, loss: 3.509032964706421 2023-01-22 09:55:16.028406: step: 182/464, loss: 1.0909960269927979 2023-01-22 09:55:16.633403: step: 184/464, loss: 3.288181781768799 2023-01-22 09:55:17.356506: step: 186/464, loss: 1.0657768249511719 2023-01-22 09:55:17.959820: step: 188/464, loss: 0.6416198015213013 2023-01-22 09:55:18.518477: step: 190/464, loss: 2.416868209838867 2023-01-22 09:55:19.113474: step: 192/464, loss: 1.6623948812484741 2023-01-22 09:55:19.714942: step: 194/464, loss: 0.4083830416202545 2023-01-22 09:55:20.336894: step: 196/464, loss: 0.6389298439025879 2023-01-22 09:55:20.905322: step: 198/464, loss: 1.5056620836257935 2023-01-22 09:55:21.578493: step: 200/464, loss: 0.6298701763153076 2023-01-22 09:55:22.175785: step: 202/464, loss: 1.815793514251709 2023-01-22 09:55:22.798233: step: 204/464, loss: 0.5246171951293945 2023-01-22 09:55:23.387616: step: 206/464, loss: 0.44010719656944275 2023-01-22 09:55:24.033857: step: 208/464, loss: 0.8312480449676514 2023-01-22 09:55:24.657595: step: 210/464, loss: 0.25231248140335083 2023-01-22 09:55:25.241422: step: 212/464, loss: 1.9889405965805054 2023-01-22 09:55:25.873220: step: 214/464, loss: 0.5998648405075073 2023-01-22 09:55:26.528503: step: 216/464, loss: 2.334963083267212 2023-01-22 09:55:27.081970: step: 218/464, loss: 3.4502251148223877 2023-01-22 09:55:27.849226: step: 220/464, loss: 1.4894522428512573 2023-01-22 09:55:28.383028: step: 222/464, loss: 0.4923945367336273 2023-01-22 09:55:29.098540: step: 224/464, loss: 2.6111702919006348 2023-01-22 09:55:29.707022: step: 226/464, loss: 0.5539352297782898 2023-01-22 09:55:30.342222: step: 228/464, loss: 0.38164007663726807 2023-01-22 09:55:31.052236: step: 230/464, loss: 6.053883075714111 2023-01-22 09:55:31.610981: step: 232/464, loss: 1.0229780673980713 2023-01-22 09:55:32.248312: step: 234/464, loss: 0.270619660615921 2023-01-22 09:55:32.831008: step: 236/464, loss: 0.4925817549228668 2023-01-22 09:55:33.465332: step: 238/464, loss: 0.16969110071659088 2023-01-22 09:55:34.117087: step: 240/464, loss: 1.587585687637329 2023-01-22 09:55:34.751952: step: 242/464, loss: 0.6882718801498413 2023-01-22 09:55:35.342783: step: 244/464, loss: 0.5376545786857605 2023-01-22 09:55:35.967446: step: 246/464, loss: 0.8686901330947876 2023-01-22 09:55:36.621928: step: 248/464, loss: 1.1710368394851685 2023-01-22 09:55:37.330232: step: 250/464, loss: 0.6487290263175964 2023-01-22 09:55:37.912859: step: 252/464, loss: 1.1861798763275146 2023-01-22 09:55:38.522462: step: 254/464, loss: 1.1853152513504028 2023-01-22 09:55:39.181152: step: 256/464, loss: 0.9062669277191162 2023-01-22 09:55:39.782489: step: 258/464, loss: 1.076135277748108 2023-01-22 09:55:40.395583: step: 260/464, loss: 1.5580992698669434 2023-01-22 09:55:40.993756: step: 262/464, loss: 1.5502890348434448 2023-01-22 09:55:41.616788: step: 264/464, loss: 2.189706325531006 2023-01-22 09:55:42.225662: step: 266/464, loss: 3.4544577598571777 2023-01-22 09:55:42.851188: step: 268/464, loss: 2.041504144668579 2023-01-22 09:55:43.482037: step: 270/464, loss: 0.7947772145271301 2023-01-22 09:55:44.148423: step: 272/464, loss: 0.48068466782569885 2023-01-22 09:55:44.766326: step: 274/464, loss: 1.1678788661956787 2023-01-22 09:55:45.497149: step: 276/464, loss: 1.2527238130569458 2023-01-22 09:55:46.147838: step: 278/464, loss: 1.8058478832244873 2023-01-22 09:55:46.731279: step: 280/464, loss: 0.164068803191185 2023-01-22 09:55:47.338081: step: 282/464, loss: 0.997909665107727 2023-01-22 09:55:47.954992: step: 284/464, loss: 0.9107556343078613 2023-01-22 09:55:48.535493: step: 286/464, loss: 0.9681934118270874 2023-01-22 09:55:49.206084: step: 288/464, loss: 2.2107839584350586 2023-01-22 09:55:49.835274: step: 290/464, loss: 0.7348326444625854 2023-01-22 09:55:50.489480: step: 292/464, loss: 1.357094407081604 2023-01-22 09:55:51.096293: step: 294/464, loss: 1.6372148990631104 2023-01-22 09:55:51.756691: step: 296/464, loss: 3.5598602294921875 2023-01-22 09:55:52.389622: step: 298/464, loss: 0.6690852046012878 2023-01-22 09:55:53.014396: step: 300/464, loss: 2.2038683891296387 2023-01-22 09:55:53.687706: step: 302/464, loss: 0.31484052538871765 2023-01-22 09:55:54.290564: step: 304/464, loss: 0.4014740586280823 2023-01-22 09:55:54.896527: step: 306/464, loss: 0.7648613452911377 2023-01-22 09:55:55.463354: step: 308/464, loss: 0.6005369424819946 2023-01-22 09:55:56.089297: step: 310/464, loss: 1.0281970500946045 2023-01-22 09:55:56.787685: step: 312/464, loss: 1.3290072679519653 2023-01-22 09:55:57.446629: step: 314/464, loss: 2.8620059490203857 2023-01-22 09:55:58.052127: step: 316/464, loss: 2.0859313011169434 2023-01-22 09:55:58.710913: step: 318/464, loss: 0.7931711673736572 2023-01-22 09:55:59.311273: step: 320/464, loss: 2.7562625408172607 2023-01-22 09:55:59.941859: step: 322/464, loss: 0.9217475056648254 2023-01-22 09:56:00.633958: step: 324/464, loss: 2.597505569458008 2023-01-22 09:56:01.208823: step: 326/464, loss: 0.5340583324432373 2023-01-22 09:56:01.827786: step: 328/464, loss: 0.7224334478378296 2023-01-22 09:56:02.472112: step: 330/464, loss: 1.8008909225463867 2023-01-22 09:56:03.135184: step: 332/464, loss: 1.970460057258606 2023-01-22 09:56:03.742687: step: 334/464, loss: 1.087660789489746 2023-01-22 09:56:04.353592: step: 336/464, loss: 0.8673958778381348 2023-01-22 09:56:04.978748: step: 338/464, loss: 0.8990617394447327 2023-01-22 09:56:05.628905: step: 340/464, loss: 1.4432100057601929 2023-01-22 09:56:06.228807: step: 342/464, loss: 0.6929177641868591 2023-01-22 09:56:06.768528: step: 344/464, loss: 0.22175689041614532 2023-01-22 09:56:07.372939: step: 346/464, loss: 1.3276338577270508 2023-01-22 09:56:07.938543: step: 348/464, loss: 3.061007499694824 2023-01-22 09:56:08.563547: step: 350/464, loss: 0.39631396532058716 2023-01-22 09:56:09.141939: step: 352/464, loss: 0.8200819492340088 2023-01-22 09:56:09.733394: step: 354/464, loss: 0.6805083751678467 2023-01-22 09:56:10.378353: step: 356/464, loss: 3.4740169048309326 2023-01-22 09:56:11.101766: step: 358/464, loss: 7.517362117767334 2023-01-22 09:56:11.801495: step: 360/464, loss: 0.15741147100925446 2023-01-22 09:56:12.441077: step: 362/464, loss: 0.9822812080383301 2023-01-22 09:56:13.041549: step: 364/464, loss: 1.2826683521270752 2023-01-22 09:56:13.611705: step: 366/464, loss: 1.1188969612121582 2023-01-22 09:56:14.263801: step: 368/464, loss: 7.362642288208008 2023-01-22 09:56:14.950189: step: 370/464, loss: 0.9471052885055542 2023-01-22 09:56:15.605721: step: 372/464, loss: 4.690070629119873 2023-01-22 09:56:16.273744: step: 374/464, loss: 1.3145390748977661 2023-01-22 09:56:16.914628: step: 376/464, loss: 0.794826865196228 2023-01-22 09:56:17.548970: step: 378/464, loss: 1.242023229598999 2023-01-22 09:56:18.127935: step: 380/464, loss: 5.607382774353027 2023-01-22 09:56:18.755640: step: 382/464, loss: 0.3236657381057739 2023-01-22 09:56:19.436718: step: 384/464, loss: 0.6186391711235046 2023-01-22 09:56:20.229803: step: 386/464, loss: 4.379059791564941 2023-01-22 09:56:20.899039: step: 388/464, loss: 2.559650421142578 2023-01-22 09:56:21.547830: step: 390/464, loss: 0.7290323972702026 2023-01-22 09:56:22.123488: step: 392/464, loss: 1.9199469089508057 2023-01-22 09:56:22.754901: step: 394/464, loss: 8.768730163574219 2023-01-22 09:56:23.347750: step: 396/464, loss: 7.07295036315918 2023-01-22 09:56:24.081219: step: 398/464, loss: 3.395118236541748 2023-01-22 09:56:24.773548: step: 400/464, loss: 1.324446201324463 2023-01-22 09:56:25.390771: step: 402/464, loss: 1.6456454992294312 2023-01-22 09:56:26.023674: step: 404/464, loss: 2.236362934112549 2023-01-22 09:56:26.627738: step: 406/464, loss: 1.138903021812439 2023-01-22 09:56:27.245160: step: 408/464, loss: 1.8457611799240112 2023-01-22 09:56:27.852749: step: 410/464, loss: 7.671128273010254 2023-01-22 09:56:28.493110: step: 412/464, loss: 1.9276256561279297 2023-01-22 09:56:29.122302: step: 414/464, loss: 0.4018891453742981 2023-01-22 09:56:29.773756: step: 416/464, loss: 1.0885846614837646 2023-01-22 09:56:30.419753: step: 418/464, loss: 1.0276798009872437 2023-01-22 09:56:31.036394: step: 420/464, loss: 1.6159963607788086 2023-01-22 09:56:31.662776: step: 422/464, loss: 2.1850693225860596 2023-01-22 09:56:32.281339: step: 424/464, loss: 0.8571640253067017 2023-01-22 09:56:32.934584: step: 426/464, loss: 0.421733021736145 2023-01-22 09:56:33.492219: step: 428/464, loss: 1.596917748451233 2023-01-22 09:56:34.078821: step: 430/464, loss: 1.4006236791610718 2023-01-22 09:56:34.635986: step: 432/464, loss: 0.28295522928237915 2023-01-22 09:56:35.236001: step: 434/464, loss: 4.537855625152588 2023-01-22 09:56:35.811620: step: 436/464, loss: 0.3357005715370178 2023-01-22 09:56:36.438051: step: 438/464, loss: 0.3452349901199341 2023-01-22 09:56:37.105000: step: 440/464, loss: 0.5913131237030029 2023-01-22 09:56:37.631684: step: 442/464, loss: 0.19197748601436615 2023-01-22 09:56:38.294209: step: 444/464, loss: 0.9170107245445251 2023-01-22 09:56:39.006381: step: 446/464, loss: 1.5506609678268433 2023-01-22 09:56:39.614794: step: 448/464, loss: 1.7220441102981567 2023-01-22 09:56:40.216370: step: 450/464, loss: 0.3225691020488739 2023-01-22 09:56:40.820717: step: 452/464, loss: 0.25700774788856506 2023-01-22 09:56:41.552848: step: 454/464, loss: 0.4294798672199249 2023-01-22 09:56:42.137653: step: 456/464, loss: 0.9342938661575317 2023-01-22 09:56:42.796179: step: 458/464, loss: 2.666210412979126 2023-01-22 09:56:43.433656: step: 460/464, loss: 1.6266505718231201 2023-01-22 09:56:44.172643: step: 462/464, loss: 1.9788882732391357 2023-01-22 09:56:44.748722: step: 464/464, loss: 1.51228928565979 2023-01-22 09:56:45.386605: step: 466/464, loss: 0.583839476108551 2023-01-22 09:56:45.993874: step: 468/464, loss: 0.7826869487762451 2023-01-22 09:56:46.570648: step: 470/464, loss: 0.9563395977020264 2023-01-22 09:56:47.211766: step: 472/464, loss: 1.4059982299804688 2023-01-22 09:56:47.824156: step: 474/464, loss: 0.27404946088790894 2023-01-22 09:56:48.474622: step: 476/464, loss: 1.0723012685775757 2023-01-22 09:56:49.095682: step: 478/464, loss: 0.6428566575050354 2023-01-22 09:56:49.714850: step: 480/464, loss: 1.2714120149612427 2023-01-22 09:56:50.376886: step: 482/464, loss: 1.4510433673858643 2023-01-22 09:56:51.003624: step: 484/464, loss: 0.9711257219314575 2023-01-22 09:56:51.612640: step: 486/464, loss: 0.8307426571846008 2023-01-22 09:56:52.218928: step: 488/464, loss: 1.4742562770843506 2023-01-22 09:56:52.844733: step: 490/464, loss: 1.3365199565887451 2023-01-22 09:56:53.475886: step: 492/464, loss: 0.7465637922286987 2023-01-22 09:56:54.123303: step: 494/464, loss: 0.24693168699741364 2023-01-22 09:56:54.769513: step: 496/464, loss: 0.6171448230743408 2023-01-22 09:56:55.391851: step: 498/464, loss: 1.3459293842315674 2023-01-22 09:56:56.040706: step: 500/464, loss: 1.4232227802276611 2023-01-22 09:56:56.713446: step: 502/464, loss: 0.6017249226570129 2023-01-22 09:56:57.340141: step: 504/464, loss: 0.8567928075790405 2023-01-22 09:56:57.988318: step: 506/464, loss: 0.2506132125854492 2023-01-22 09:56:58.558955: step: 508/464, loss: 0.15811972320079803 2023-01-22 09:56:59.161364: step: 510/464, loss: 2.0057129859924316 2023-01-22 09:56:59.813147: step: 512/464, loss: 8.11188793182373 2023-01-22 09:57:00.492978: step: 514/464, loss: 1.6561710834503174 2023-01-22 09:57:01.164486: step: 516/464, loss: 0.8971422910690308 2023-01-22 09:57:01.792235: step: 518/464, loss: 0.5069814920425415 2023-01-22 09:57:02.393462: step: 520/464, loss: 1.3453993797302246 2023-01-22 09:57:03.038051: step: 522/464, loss: 2.265744924545288 2023-01-22 09:57:03.670379: step: 524/464, loss: 1.7987972497940063 2023-01-22 09:57:04.300442: step: 526/464, loss: 1.4300066232681274 2023-01-22 09:57:04.925819: step: 528/464, loss: 1.3526700735092163 2023-01-22 09:57:05.574023: step: 530/464, loss: 2.729444980621338 2023-01-22 09:57:06.215155: step: 532/464, loss: 2.88145112991333 2023-01-22 09:57:06.801741: step: 534/464, loss: 0.34708070755004883 2023-01-22 09:57:07.417743: step: 536/464, loss: 10.343750953674316 2023-01-22 09:57:08.095414: step: 538/464, loss: 0.594245433807373 2023-01-22 09:57:08.663922: step: 540/464, loss: 0.765412449836731 2023-01-22 09:57:09.297197: step: 542/464, loss: 1.2842626571655273 2023-01-22 09:57:09.909465: step: 544/464, loss: 2.3602559566497803 2023-01-22 09:57:10.513187: step: 546/464, loss: 1.1977283954620361 2023-01-22 09:57:11.178150: step: 548/464, loss: 0.5755077600479126 2023-01-22 09:57:11.794752: step: 550/464, loss: 3.6580703258514404 2023-01-22 09:57:12.442330: step: 552/464, loss: 0.8802728652954102 2023-01-22 09:57:13.081785: step: 554/464, loss: 0.2585405707359314 2023-01-22 09:57:13.735430: step: 556/464, loss: 0.6584972739219666 2023-01-22 09:57:14.367541: step: 558/464, loss: 0.42822739481925964 2023-01-22 09:57:15.055675: step: 560/464, loss: 0.7816445827484131 2023-01-22 09:57:15.657571: step: 562/464, loss: 1.3169465065002441 2023-01-22 09:57:16.272001: step: 564/464, loss: 3.784709930419922 2023-01-22 09:57:16.868013: step: 566/464, loss: 1.1701374053955078 2023-01-22 09:57:17.505348: step: 568/464, loss: 3.610154628753662 2023-01-22 09:57:18.115226: step: 570/464, loss: 0.28687018156051636 2023-01-22 09:57:18.760605: step: 572/464, loss: 0.5567400455474854 2023-01-22 09:57:19.338587: step: 574/464, loss: 0.470517098903656 2023-01-22 09:57:20.016467: step: 576/464, loss: 2.1874237060546875 2023-01-22 09:57:20.602763: step: 578/464, loss: 0.5025588870048523 2023-01-22 09:57:21.185567: step: 580/464, loss: 1.1785072088241577 2023-01-22 09:57:21.823083: step: 582/464, loss: 0.6726641654968262 2023-01-22 09:57:22.441986: step: 584/464, loss: 0.5943033695220947 2023-01-22 09:57:23.120795: step: 586/464, loss: 0.500882625579834 2023-01-22 09:57:23.809177: step: 588/464, loss: 1.7816160917282104 2023-01-22 09:57:24.464951: step: 590/464, loss: 2.4452710151672363 2023-01-22 09:57:25.054575: step: 592/464, loss: 2.792174816131592 2023-01-22 09:57:25.643821: step: 594/464, loss: 7.6662163734436035 2023-01-22 09:57:26.400930: step: 596/464, loss: 0.7320204973220825 2023-01-22 09:57:26.992306: step: 598/464, loss: 0.4488712549209595 2023-01-22 09:57:27.590660: step: 600/464, loss: 0.5410355925559998 2023-01-22 09:57:28.208352: step: 602/464, loss: 0.4308883249759674 2023-01-22 09:57:28.824534: step: 604/464, loss: 0.9670007824897766 2023-01-22 09:57:29.510952: step: 606/464, loss: 0.26769569516181946 2023-01-22 09:57:30.160486: step: 608/464, loss: 1.1745327711105347 2023-01-22 09:57:30.780628: step: 610/464, loss: 2.1340060234069824 2023-01-22 09:57:31.432595: step: 612/464, loss: 1.5466679334640503 2023-01-22 09:57:32.111318: step: 614/464, loss: 1.054890513420105 2023-01-22 09:57:32.783631: step: 616/464, loss: 0.4689290523529053 2023-01-22 09:57:33.408293: step: 618/464, loss: 3.1759071350097656 2023-01-22 09:57:33.993584: step: 620/464, loss: 0.8746719360351562 2023-01-22 09:57:34.587577: step: 622/464, loss: 0.40230920910835266 2023-01-22 09:57:35.169568: step: 624/464, loss: 1.1251157522201538 2023-01-22 09:57:35.817300: step: 626/464, loss: 0.9146066904067993 2023-01-22 09:57:36.432724: step: 628/464, loss: 1.6216715574264526 2023-01-22 09:57:37.120396: step: 630/464, loss: 1.0551037788391113 2023-01-22 09:57:37.768895: step: 632/464, loss: 0.9549449682235718 2023-01-22 09:57:38.367017: step: 634/464, loss: 3.389556884765625 2023-01-22 09:57:38.950912: step: 636/464, loss: 1.8385090827941895 2023-01-22 09:57:39.595201: step: 638/464, loss: 1.2338461875915527 2023-01-22 09:57:40.224879: step: 640/464, loss: 1.8416494131088257 2023-01-22 09:57:40.809312: step: 642/464, loss: 1.1865665912628174 2023-01-22 09:57:41.448858: step: 644/464, loss: 0.439983993768692 2023-01-22 09:57:42.086001: step: 646/464, loss: 0.5343146324157715 2023-01-22 09:57:42.808924: step: 648/464, loss: 0.8855712413787842 2023-01-22 09:57:43.381386: step: 650/464, loss: 1.6070959568023682 2023-01-22 09:57:44.031993: step: 652/464, loss: 1.1842074394226074 2023-01-22 09:57:44.726682: step: 654/464, loss: 2.822164297103882 2023-01-22 09:57:45.284754: step: 656/464, loss: 0.7699689269065857 2023-01-22 09:57:45.927809: step: 658/464, loss: 24.550432205200195 2023-01-22 09:57:46.599157: step: 660/464, loss: 1.128835916519165 2023-01-22 09:57:47.307742: step: 662/464, loss: 1.0747103691101074 2023-01-22 09:57:47.897328: step: 664/464, loss: 1.582950472831726 2023-01-22 09:57:48.600569: step: 666/464, loss: 0.4008786976337433 2023-01-22 09:57:49.244859: step: 668/464, loss: 0.841454803943634 2023-01-22 09:57:49.857597: step: 670/464, loss: 1.05251145362854 2023-01-22 09:57:50.535740: step: 672/464, loss: 0.6999272108078003 2023-01-22 09:57:51.152696: step: 674/464, loss: 0.5895771980285645 2023-01-22 09:57:51.802221: step: 676/464, loss: 3.9432730674743652 2023-01-22 09:57:52.450377: step: 678/464, loss: 0.5963733196258545 2023-01-22 09:57:53.112218: step: 680/464, loss: 1.180455207824707 2023-01-22 09:57:53.690251: step: 682/464, loss: 0.7794288992881775 2023-01-22 09:57:54.367380: step: 684/464, loss: 0.4899601936340332 2023-01-22 09:57:54.957107: step: 686/464, loss: 0.9549975991249084 2023-01-22 09:57:55.610087: step: 688/464, loss: 0.47775551676750183 2023-01-22 09:57:56.187442: step: 690/464, loss: 4.92923641204834 2023-01-22 09:57:56.841773: step: 692/464, loss: 0.21919465065002441 2023-01-22 09:57:57.474592: step: 694/464, loss: 2.0028936862945557 2023-01-22 09:57:58.120580: step: 696/464, loss: 3.0337226390838623 2023-01-22 09:57:58.696830: step: 698/464, loss: 1.7912003993988037 2023-01-22 09:57:59.327883: step: 700/464, loss: 0.4943981468677521 2023-01-22 09:58:00.031272: step: 702/464, loss: 1.810803771018982 2023-01-22 09:58:00.732244: step: 704/464, loss: 1.6248208284378052 2023-01-22 09:58:01.413060: step: 706/464, loss: 3.5441529750823975 2023-01-22 09:58:02.058685: step: 708/464, loss: 3.9152140617370605 2023-01-22 09:58:02.661053: step: 710/464, loss: 1.0746217966079712 2023-01-22 09:58:03.244502: step: 712/464, loss: 1.0703041553497314 2023-01-22 09:58:03.856545: step: 714/464, loss: 1.0361018180847168 2023-01-22 09:58:04.487333: step: 716/464, loss: 0.34002020955085754 2023-01-22 09:58:05.118271: step: 718/464, loss: 1.2165015935897827 2023-01-22 09:58:05.713365: step: 720/464, loss: 0.5235038995742798 2023-01-22 09:58:06.311621: step: 722/464, loss: 0.7303531169891357 2023-01-22 09:58:06.919137: step: 724/464, loss: 5.70162296295166 2023-01-22 09:58:07.552126: step: 726/464, loss: 0.8262537717819214 2023-01-22 09:58:08.241545: step: 728/464, loss: 0.32231605052948 2023-01-22 09:58:08.830267: step: 730/464, loss: 7.0202178955078125 2023-01-22 09:58:09.530944: step: 732/464, loss: 1.515478491783142 2023-01-22 09:58:10.171196: step: 734/464, loss: 3.805112600326538 2023-01-22 09:58:10.799833: step: 736/464, loss: 1.0174000263214111 2023-01-22 09:58:11.353158: step: 738/464, loss: 1.1117806434631348 2023-01-22 09:58:12.012952: step: 740/464, loss: 1.7839009761810303 2023-01-22 09:58:12.615868: step: 742/464, loss: 5.611761569976807 2023-01-22 09:58:13.228974: step: 744/464, loss: 3.942810535430908 2023-01-22 09:58:13.818160: step: 746/464, loss: 1.0222467184066772 2023-01-22 09:58:14.520141: step: 748/464, loss: 0.8140878677368164 2023-01-22 09:58:15.175579: step: 750/464, loss: 0.8113280534744263 2023-01-22 09:58:15.831755: step: 752/464, loss: 0.7970805168151855 2023-01-22 09:58:16.417936: step: 754/464, loss: 0.326967716217041 2023-01-22 09:58:17.088211: step: 756/464, loss: 0.513617753982544 2023-01-22 09:58:17.709685: step: 758/464, loss: 4.979618072509766 2023-01-22 09:58:18.307743: step: 760/464, loss: 3.6950440406799316 2023-01-22 09:58:18.917863: step: 762/464, loss: 0.11817652732133865 2023-01-22 09:58:19.464951: step: 764/464, loss: 1.2285819053649902 2023-01-22 09:58:20.093205: step: 766/464, loss: 0.31264907121658325 2023-01-22 09:58:20.689895: step: 768/464, loss: 3.249337673187256 2023-01-22 09:58:21.292105: step: 770/464, loss: 2.1495447158813477 2023-01-22 09:58:21.928770: step: 772/464, loss: 0.2141517996788025 2023-01-22 09:58:22.620101: step: 774/464, loss: 0.7330082058906555 2023-01-22 09:58:23.280149: step: 776/464, loss: 2.1982250213623047 2023-01-22 09:58:23.907990: step: 778/464, loss: 2.4884581565856934 2023-01-22 09:58:24.536498: step: 780/464, loss: 1.1558810472488403 2023-01-22 09:58:25.199038: step: 782/464, loss: 1.8573219776153564 2023-01-22 09:58:25.818581: step: 784/464, loss: 2.610272169113159 2023-01-22 09:58:26.531732: step: 786/464, loss: 1.9637396335601807 2023-01-22 09:58:27.150496: step: 788/464, loss: 1.42550528049469 2023-01-22 09:58:27.789027: step: 790/464, loss: 1.9258475303649902 2023-01-22 09:58:28.358392: step: 792/464, loss: 0.8880046010017395 2023-01-22 09:58:28.974617: step: 794/464, loss: 1.1594154834747314 2023-01-22 09:58:29.591752: step: 796/464, loss: 1.5943104028701782 2023-01-22 09:58:30.213832: step: 798/464, loss: 0.8055059909820557 2023-01-22 09:58:30.804726: step: 800/464, loss: 0.7145674228668213 2023-01-22 09:58:31.410264: step: 802/464, loss: 0.6078277826309204 2023-01-22 09:58:32.009653: step: 804/464, loss: 2.397320032119751 2023-01-22 09:58:32.644923: step: 806/464, loss: 0.8714660406112671 2023-01-22 09:58:33.323021: step: 808/464, loss: 2.104137897491455 2023-01-22 09:58:33.896969: step: 810/464, loss: 0.40223950147628784 2023-01-22 09:58:34.528073: step: 812/464, loss: 1.0274685621261597 2023-01-22 09:58:35.083913: step: 814/464, loss: 2.062959909439087 2023-01-22 09:58:35.740808: step: 816/464, loss: 0.6237020492553711 2023-01-22 09:58:36.411310: step: 818/464, loss: 0.4401100277900696 2023-01-22 09:58:37.132506: step: 820/464, loss: 1.614302635192871 2023-01-22 09:58:37.782989: step: 822/464, loss: 0.7584272027015686 2023-01-22 09:58:38.361192: step: 824/464, loss: 1.8146684169769287 2023-01-22 09:58:38.974182: step: 826/464, loss: 1.495345115661621 2023-01-22 09:58:39.575456: step: 828/464, loss: 1.7539260387420654 2023-01-22 09:58:40.288257: step: 830/464, loss: 13.450841903686523 2023-01-22 09:58:40.955144: step: 832/464, loss: 0.9826687574386597 2023-01-22 09:58:41.615340: step: 834/464, loss: 0.8282465934753418 2023-01-22 09:58:42.265137: step: 836/464, loss: 1.085193157196045 2023-01-22 09:58:42.895925: step: 838/464, loss: 0.7056786417961121 2023-01-22 09:58:43.520764: step: 840/464, loss: 8.448920249938965 2023-01-22 09:58:44.215024: step: 842/464, loss: 1.2861590385437012 2023-01-22 09:58:44.871512: step: 844/464, loss: 6.431674003601074 2023-01-22 09:58:45.484969: step: 846/464, loss: 1.2567325830459595 2023-01-22 09:58:46.079496: step: 848/464, loss: 0.5662977695465088 2023-01-22 09:58:46.721950: step: 850/464, loss: 1.2586541175842285 2023-01-22 09:58:47.479721: step: 852/464, loss: 1.3401498794555664 2023-01-22 09:58:48.144865: step: 854/464, loss: 0.34222355484962463 2023-01-22 09:58:48.795751: step: 856/464, loss: 2.7347874641418457 2023-01-22 09:58:49.483600: step: 858/464, loss: 0.3046620488166809 2023-01-22 09:58:50.083747: step: 860/464, loss: 0.5517042279243469 2023-01-22 09:58:50.679452: step: 862/464, loss: 0.48601213097572327 2023-01-22 09:58:51.332129: step: 864/464, loss: 7.9639692306518555 2023-01-22 09:58:51.927665: step: 866/464, loss: 0.9728195667266846 2023-01-22 09:58:52.536997: step: 868/464, loss: 0.6252280473709106 2023-01-22 09:58:53.145755: step: 870/464, loss: 1.1837365627288818 2023-01-22 09:58:53.770398: step: 872/464, loss: 3.1168296337127686 2023-01-22 09:58:54.455857: step: 874/464, loss: 0.8699135184288025 2023-01-22 09:58:55.150967: step: 876/464, loss: 0.5066431164741516 2023-01-22 09:58:55.784883: step: 878/464, loss: 1.5564488172531128 2023-01-22 09:58:56.464233: step: 880/464, loss: 1.1878808736801147 2023-01-22 09:58:57.059820: step: 882/464, loss: 2.0188775062561035 2023-01-22 09:58:57.677078: step: 884/464, loss: 0.8318590521812439 2023-01-22 09:58:58.296743: step: 886/464, loss: 0.8224694132804871 2023-01-22 09:58:58.924225: step: 888/464, loss: 0.2953476309776306 2023-01-22 09:58:59.558356: step: 890/464, loss: 0.8902751207351685 2023-01-22 09:59:00.155645: step: 892/464, loss: 1.3326923847198486 2023-01-22 09:59:00.717385: step: 894/464, loss: 0.45923858880996704 2023-01-22 09:59:01.344848: step: 896/464, loss: 1.2682569026947021 2023-01-22 09:59:02.041994: step: 898/464, loss: 3.1980276107788086 2023-01-22 09:59:02.645471: step: 900/464, loss: 0.6890709400177002 2023-01-22 09:59:03.304217: step: 902/464, loss: 0.5486746430397034 2023-01-22 09:59:03.957316: step: 904/464, loss: 4.070559978485107 2023-01-22 09:59:04.633997: step: 906/464, loss: 1.2787551879882812 2023-01-22 09:59:05.277862: step: 908/464, loss: 0.7099980115890503 2023-01-22 09:59:05.914404: step: 910/464, loss: 0.9993118643760681 2023-01-22 09:59:06.545335: step: 912/464, loss: 0.677141547203064 2023-01-22 09:59:07.182446: step: 914/464, loss: 0.28165921568870544 2023-01-22 09:59:07.938058: step: 916/464, loss: 3.867194652557373 2023-01-22 09:59:08.546316: step: 918/464, loss: 1.677793025970459 2023-01-22 09:59:09.181097: step: 920/464, loss: 2.707850456237793 2023-01-22 09:59:09.775351: step: 922/464, loss: 7.062939643859863 2023-01-22 09:59:10.373725: step: 924/464, loss: 2.674006938934326 2023-01-22 09:59:11.000600: step: 926/464, loss: 1.025741696357727 2023-01-22 09:59:11.650340: step: 928/464, loss: 1.2850465774536133 2023-01-22 09:59:12.173197: step: 930/464, loss: 0.13279880583286285 ================================================== Loss: 1.699 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.25905497432939684, 'r': 0.25513731876297874, 'f1': 0.25708122214402995}, 'combined': 0.1894282689482326, 'epoch': 2} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2886886520028654, 'r': 0.21697955512589334, 'f1': 0.24774954959952863}, 'combined': 0.16174322927223111, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26572393529360744, 'r': 0.27577020883967956, 'f1': 0.27065387843449074}, 'combined': 0.19942917358330894, 'epoch': 2} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30365922452928923, 'r': 0.22266487591515252, 'f1': 0.2569300683484203}, 'combined': 0.16773672855907232, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2638232246476927, 'r': 0.2578385768295976, 'f1': 0.2607965719748702}, 'combined': 0.19216589513937804, 'epoch': 2} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30303842366654554, 'r': 0.20693274576679782, 'f1': 0.2459298754156606}, 'combined': 0.16055525545271107, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20271787960467205, 'r': 0.30697278911564624, 'f1': 0.24418290043290042}, 'combined': 0.16278860028860026, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2196969696969697, 'r': 0.31521739130434784, 'f1': 0.25892857142857145}, 'combined': 0.12946428571428573, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.38333333333333336, 'r': 0.19827586206896552, 'f1': 0.2613636363636364}, 'combined': 0.17424242424242425, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.25905497432939684, 'r': 0.25513731876297874, 'f1': 0.25708122214402995}, 'combined': 0.1894282689482326, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2886886520028654, 'r': 0.21697955512589334, 'f1': 0.24774954959952863}, 'combined': 0.16174322927223111, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20271787960467205, 'r': 0.30697278911564624, 'f1': 0.24418290043290042}, 'combined': 0.16278860028860026, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26572393529360744, 'r': 0.27577020883967956, 'f1': 0.27065387843449074}, 'combined': 0.19942917358330894, 'epoch': 2} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30365922452928923, 'r': 0.22266487591515252, 'f1': 0.2569300683484203}, 'combined': 0.16773672855907232, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2196969696969697, 'r': 0.31521739130434784, 'f1': 0.25892857142857145}, 'combined': 0.12946428571428573, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2638232246476927, 'r': 0.2578385768295976, 'f1': 0.2607965719748702}, 'combined': 0.19216589513937804, 'epoch': 2} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30303842366654554, 'r': 0.20693274576679782, 'f1': 0.2459298754156606}, 'combined': 0.16055525545271107, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.38333333333333336, 'r': 0.19827586206896552, 'f1': 0.2613636363636364}, 'combined': 0.17424242424242425, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:02:18.315513: step: 2/464, loss: 0.760606050491333 2023-01-22 10:02:18.929008: step: 4/464, loss: 1.206289529800415 2023-01-22 10:02:19.580713: step: 6/464, loss: 0.6236059069633484 2023-01-22 10:02:20.147375: step: 8/464, loss: 0.686949610710144 2023-01-22 10:02:20.761031: step: 10/464, loss: 4.821654796600342 2023-01-22 10:02:21.362388: step: 12/464, loss: 3.201460123062134 2023-01-22 10:02:21.956403: step: 14/464, loss: 1.308962106704712 2023-01-22 10:02:22.605572: step: 16/464, loss: 1.7753371000289917 2023-01-22 10:02:23.173778: step: 18/464, loss: 1.2878186702728271 2023-01-22 10:02:23.771254: step: 20/464, loss: 0.9418953657150269 2023-01-22 10:02:24.358763: step: 22/464, loss: 0.35510966181755066 2023-01-22 10:02:24.981530: step: 24/464, loss: 0.8940156698226929 2023-01-22 10:02:25.593448: step: 26/464, loss: 1.737830400466919 2023-01-22 10:02:26.144062: step: 28/464, loss: 1.108651041984558 2023-01-22 10:02:26.773002: step: 30/464, loss: 0.2774873971939087 2023-01-22 10:02:27.395171: step: 32/464, loss: 1.181756615638733 2023-01-22 10:02:28.038276: step: 34/464, loss: 0.8835384845733643 2023-01-22 10:02:28.721606: step: 36/464, loss: 0.2640799582004547 2023-01-22 10:02:29.450279: step: 38/464, loss: 0.9748669266700745 2023-01-22 10:02:30.107184: step: 40/464, loss: 1.3259875774383545 2023-01-22 10:02:30.727281: step: 42/464, loss: 1.2881635427474976 2023-01-22 10:02:31.364618: step: 44/464, loss: 0.8446848392486572 2023-01-22 10:02:31.988578: step: 46/464, loss: 0.39714711904525757 2023-01-22 10:02:32.609500: step: 48/464, loss: 0.3975988030433655 2023-01-22 10:02:33.237146: step: 50/464, loss: 1.5401337146759033 2023-01-22 10:02:33.960332: step: 52/464, loss: 1.0109367370605469 2023-01-22 10:02:34.674313: step: 54/464, loss: 1.12022864818573 2023-01-22 10:02:35.292527: step: 56/464, loss: 1.0091583728790283 2023-01-22 10:02:35.945117: step: 58/464, loss: 1.8323918581008911 2023-01-22 10:02:36.570545: step: 60/464, loss: 2.5795531272888184 2023-01-22 10:02:37.197958: step: 62/464, loss: 0.4858267903327942 2023-01-22 10:02:37.923702: step: 64/464, loss: 0.16328811645507812 2023-01-22 10:02:38.554819: step: 66/464, loss: 0.4784179925918579 2023-01-22 10:02:39.202055: step: 68/464, loss: 1.4395607709884644 2023-01-22 10:02:39.841883: step: 70/464, loss: 0.24091686308383942 2023-01-22 10:02:40.411062: step: 72/464, loss: 1.3440728187561035 2023-01-22 10:02:41.019981: step: 74/464, loss: 0.4796639084815979 2023-01-22 10:02:41.599932: step: 76/464, loss: 1.3922030925750732 2023-01-22 10:02:42.192906: step: 78/464, loss: 1.6975597143173218 2023-01-22 10:02:42.923935: step: 80/464, loss: 0.6518625020980835 2023-01-22 10:02:43.470179: step: 82/464, loss: 0.5299268960952759 2023-01-22 10:02:44.092382: step: 84/464, loss: 0.3443664312362671 2023-01-22 10:02:44.683484: step: 86/464, loss: 1.5084147453308105 2023-01-22 10:02:45.296020: step: 88/464, loss: 0.21632380783557892 2023-01-22 10:02:45.900645: step: 90/464, loss: 0.5657486319541931 2023-01-22 10:02:46.488202: step: 92/464, loss: 1.2559709548950195 2023-01-22 10:02:47.129763: step: 94/464, loss: 0.5930248498916626 2023-01-22 10:02:47.730556: step: 96/464, loss: 0.35066333413124084 2023-01-22 10:02:48.292207: step: 98/464, loss: 1.4469847679138184 2023-01-22 10:02:48.950632: step: 100/464, loss: 1.8228155374526978 2023-01-22 10:02:49.653593: step: 102/464, loss: 0.4205220639705658 2023-01-22 10:02:50.259572: step: 104/464, loss: 0.4687854051589966 2023-01-22 10:02:50.954420: step: 106/464, loss: 0.39240720868110657 2023-01-22 10:02:51.522984: step: 108/464, loss: 1.0675050020217896 2023-01-22 10:02:52.173889: step: 110/464, loss: 1.3701432943344116 2023-01-22 10:02:52.868210: step: 112/464, loss: 0.5170474648475647 2023-01-22 10:02:53.502611: step: 114/464, loss: 1.0686296224594116 2023-01-22 10:02:54.239507: step: 116/464, loss: 0.533559262752533 2023-01-22 10:02:54.871974: step: 118/464, loss: 0.2963751554489136 2023-01-22 10:02:55.497812: step: 120/464, loss: 0.8292972445487976 2023-01-22 10:02:56.108618: step: 122/464, loss: 1.3835995197296143 2023-01-22 10:02:56.672553: step: 124/464, loss: 0.8213170766830444 2023-01-22 10:02:57.344191: step: 126/464, loss: 0.2727380692958832 2023-01-22 10:02:57.980422: step: 128/464, loss: 0.8483532071113586 2023-01-22 10:02:58.544599: step: 130/464, loss: 0.5043916702270508 2023-01-22 10:02:59.241011: step: 132/464, loss: 1.355796217918396 2023-01-22 10:02:59.885259: step: 134/464, loss: 0.757931649684906 2023-01-22 10:03:00.588058: step: 136/464, loss: 1.686948299407959 2023-01-22 10:03:01.323251: step: 138/464, loss: 0.601470410823822 2023-01-22 10:03:01.942667: step: 140/464, loss: 0.8765469789505005 2023-01-22 10:03:02.545980: step: 142/464, loss: 3.985328197479248 2023-01-22 10:03:03.212701: step: 144/464, loss: 1.0359535217285156 2023-01-22 10:03:03.818267: step: 146/464, loss: 2.8616089820861816 2023-01-22 10:03:04.482880: step: 148/464, loss: 0.5264968276023865 2023-01-22 10:03:05.134772: step: 150/464, loss: 0.31478896737098694 2023-01-22 10:03:05.766822: step: 152/464, loss: 0.9130817651748657 2023-01-22 10:03:06.401136: step: 154/464, loss: 14.749269485473633 2023-01-22 10:03:06.990309: step: 156/464, loss: 1.5816093683242798 2023-01-22 10:03:07.598397: step: 158/464, loss: 0.45651036500930786 2023-01-22 10:03:08.218127: step: 160/464, loss: 0.591182291507721 2023-01-22 10:03:08.902555: step: 162/464, loss: 0.930648684501648 2023-01-22 10:03:09.530242: step: 164/464, loss: 2.3614890575408936 2023-01-22 10:03:10.155965: step: 166/464, loss: 1.4386388063430786 2023-01-22 10:03:10.717915: step: 168/464, loss: 0.519232988357544 2023-01-22 10:03:11.410803: step: 170/464, loss: 2.4041099548339844 2023-01-22 10:03:12.118734: step: 172/464, loss: 1.3577077388763428 2023-01-22 10:03:12.701564: step: 174/464, loss: 0.6223466396331787 2023-01-22 10:03:13.449696: step: 176/464, loss: 0.2883419990539551 2023-01-22 10:03:14.043850: step: 178/464, loss: 0.300167977809906 2023-01-22 10:03:14.685019: step: 180/464, loss: 0.3131827712059021 2023-01-22 10:03:15.309663: step: 182/464, loss: 2.6632723808288574 2023-01-22 10:03:15.949038: step: 184/464, loss: 1.6046333312988281 2023-01-22 10:03:16.573956: step: 186/464, loss: 1.1761341094970703 2023-01-22 10:03:17.200769: step: 188/464, loss: 2.0994763374328613 2023-01-22 10:03:17.867000: step: 190/464, loss: 0.5785619020462036 2023-01-22 10:03:18.533577: step: 192/464, loss: 0.3165262043476105 2023-01-22 10:03:19.124411: step: 194/464, loss: 0.565475344657898 2023-01-22 10:03:19.751174: step: 196/464, loss: 0.776207685470581 2023-01-22 10:03:20.387772: step: 198/464, loss: 0.8251913189888 2023-01-22 10:03:20.988562: step: 200/464, loss: 0.42415738105773926 2023-01-22 10:03:21.592184: step: 202/464, loss: 0.7484412789344788 2023-01-22 10:03:22.225892: step: 204/464, loss: 0.5022807717323303 2023-01-22 10:03:22.785852: step: 206/464, loss: 0.808050274848938 2023-01-22 10:03:23.430977: step: 208/464, loss: 1.0661100149154663 2023-01-22 10:03:23.974115: step: 210/464, loss: 1.1805107593536377 2023-01-22 10:03:24.595769: step: 212/464, loss: 1.4876399040222168 2023-01-22 10:03:25.238066: step: 214/464, loss: 1.7432225942611694 2023-01-22 10:03:25.824112: step: 216/464, loss: 0.44966259598731995 2023-01-22 10:03:26.500151: step: 218/464, loss: 0.4931964874267578 2023-01-22 10:03:27.123995: step: 220/464, loss: 0.9156018495559692 2023-01-22 10:03:27.780539: step: 222/464, loss: 1.7440860271453857 2023-01-22 10:03:28.451895: step: 224/464, loss: 1.1128723621368408 2023-01-22 10:03:29.036618: step: 226/464, loss: 0.9241154193878174 2023-01-22 10:03:29.584881: step: 228/464, loss: 1.0193665027618408 2023-01-22 10:03:30.429037: step: 230/464, loss: 3.656179904937744 2023-01-22 10:03:31.062753: step: 232/464, loss: 2.219571590423584 2023-01-22 10:03:31.687526: step: 234/464, loss: 0.6206840872764587 2023-01-22 10:03:32.353003: step: 236/464, loss: 0.8857594132423401 2023-01-22 10:03:33.021482: step: 238/464, loss: 2.9234108924865723 2023-01-22 10:03:33.655619: step: 240/464, loss: 1.119886875152588 2023-01-22 10:03:34.230204: step: 242/464, loss: 0.18582743406295776 2023-01-22 10:03:34.890910: step: 244/464, loss: 2.426466941833496 2023-01-22 10:03:35.560334: step: 246/464, loss: 0.4373409152030945 2023-01-22 10:03:36.182550: step: 248/464, loss: 1.3089731931686401 2023-01-22 10:03:36.783105: step: 250/464, loss: 1.0849674940109253 2023-01-22 10:03:37.402349: step: 252/464, loss: 0.937401294708252 2023-01-22 10:03:38.057789: step: 254/464, loss: 0.90260910987854 2023-01-22 10:03:38.775215: step: 256/464, loss: 0.1584082692861557 2023-01-22 10:03:39.413483: step: 258/464, loss: 2.6713130474090576 2023-01-22 10:03:40.083756: step: 260/464, loss: 0.5284579396247864 2023-01-22 10:03:40.709956: step: 262/464, loss: 7.9691290855407715 2023-01-22 10:03:41.387257: step: 264/464, loss: 0.2675029933452606 2023-01-22 10:03:41.992627: step: 266/464, loss: 1.0456719398498535 2023-01-22 10:03:42.650333: step: 268/464, loss: 1.667466163635254 2023-01-22 10:03:43.238693: step: 270/464, loss: 3.5396268367767334 2023-01-22 10:03:43.869137: step: 272/464, loss: 1.0372648239135742 2023-01-22 10:03:44.545210: step: 274/464, loss: 1.1810011863708496 2023-01-22 10:03:45.131623: step: 276/464, loss: 1.4830915927886963 2023-01-22 10:03:45.720285: step: 278/464, loss: 0.495393842458725 2023-01-22 10:03:46.329500: step: 280/464, loss: 0.6834374070167542 2023-01-22 10:03:46.894450: step: 282/464, loss: 1.3428385257720947 2023-01-22 10:03:47.527245: step: 284/464, loss: 4.434447288513184 2023-01-22 10:03:48.107102: step: 286/464, loss: 0.7110435962677002 2023-01-22 10:03:48.751932: step: 288/464, loss: 1.4327901601791382 2023-01-22 10:03:49.368314: step: 290/464, loss: 1.5169810056686401 2023-01-22 10:03:49.954291: step: 292/464, loss: 0.9470404386520386 2023-01-22 10:03:50.568544: step: 294/464, loss: 0.6485475897789001 2023-01-22 10:03:51.208651: step: 296/464, loss: 0.8116719126701355 2023-01-22 10:03:51.847279: step: 298/464, loss: 0.4988217055797577 2023-01-22 10:03:52.498999: step: 300/464, loss: 0.7799360752105713 2023-01-22 10:03:53.166630: step: 302/464, loss: 0.10422151535749435 2023-01-22 10:03:53.789855: step: 304/464, loss: 4.046075820922852 2023-01-22 10:03:54.358925: step: 306/464, loss: 1.3420652151107788 2023-01-22 10:03:54.972682: step: 308/464, loss: 4.418938159942627 2023-01-22 10:03:55.608149: step: 310/464, loss: 0.7048822641372681 2023-01-22 10:03:56.263933: step: 312/464, loss: 0.4731024503707886 2023-01-22 10:03:56.865115: step: 314/464, loss: 0.5880229473114014 2023-01-22 10:03:57.492288: step: 316/464, loss: 0.9250689148902893 2023-01-22 10:03:58.052331: step: 318/464, loss: 1.7018532752990723 2023-01-22 10:03:58.663188: step: 320/464, loss: 0.2926866114139557 2023-01-22 10:03:59.264015: step: 322/464, loss: 0.2836105525493622 2023-01-22 10:03:59.812720: step: 324/464, loss: 1.6393259763717651 2023-01-22 10:04:00.437230: step: 326/464, loss: 1.966665267944336 2023-01-22 10:04:01.029263: step: 328/464, loss: 1.082220196723938 2023-01-22 10:04:01.616216: step: 330/464, loss: 0.8523777723312378 2023-01-22 10:04:02.168950: step: 332/464, loss: 0.9466300010681152 2023-01-22 10:04:02.804991: step: 334/464, loss: 0.7340304851531982 2023-01-22 10:04:03.456066: step: 336/464, loss: 0.5134017467498779 2023-01-22 10:04:04.069858: step: 338/464, loss: 4.246328353881836 2023-01-22 10:04:04.756925: step: 340/464, loss: 1.5157082080841064 2023-01-22 10:04:05.375937: step: 342/464, loss: 0.7334668636322021 2023-01-22 10:04:05.956023: step: 344/464, loss: 1.2602307796478271 2023-01-22 10:04:06.579339: step: 346/464, loss: 0.20811858773231506 2023-01-22 10:04:07.135051: step: 348/464, loss: 2.6614458560943604 2023-01-22 10:04:07.759206: step: 350/464, loss: 1.1471977233886719 2023-01-22 10:04:08.408145: step: 352/464, loss: 0.416929692029953 2023-01-22 10:04:09.017462: step: 354/464, loss: 1.1446795463562012 2023-01-22 10:04:09.734490: step: 356/464, loss: 6.461057662963867 2023-01-22 10:04:10.357062: step: 358/464, loss: 1.036741018295288 2023-01-22 10:04:11.049701: step: 360/464, loss: 1.5848876237869263 2023-01-22 10:04:11.659981: step: 362/464, loss: 1.4145913124084473 2023-01-22 10:04:12.365474: step: 364/464, loss: 0.7970523834228516 2023-01-22 10:04:13.018918: step: 366/464, loss: 1.8541629314422607 2023-01-22 10:04:13.667151: step: 368/464, loss: 1.0937998294830322 2023-01-22 10:04:14.301288: step: 370/464, loss: 0.8395752310752869 2023-01-22 10:04:14.891298: step: 372/464, loss: 0.637897253036499 2023-01-22 10:04:15.466078: step: 374/464, loss: 1.4895851612091064 2023-01-22 10:04:16.113301: step: 376/464, loss: 0.5075844526290894 2023-01-22 10:04:16.749097: step: 378/464, loss: 0.44364073872566223 2023-01-22 10:04:17.414134: step: 380/464, loss: 0.4025718867778778 2023-01-22 10:04:17.966184: step: 382/464, loss: 4.496883869171143 2023-01-22 10:04:18.687207: step: 384/464, loss: 0.6705805659294128 2023-01-22 10:04:19.286239: step: 386/464, loss: 1.7309107780456543 2023-01-22 10:04:19.859729: step: 388/464, loss: 1.173380732536316 2023-01-22 10:04:20.453288: step: 390/464, loss: 0.8357263803482056 2023-01-22 10:04:21.064824: step: 392/464, loss: 3.84114146232605 2023-01-22 10:04:21.648543: step: 394/464, loss: 0.624237596988678 2023-01-22 10:04:22.259608: step: 396/464, loss: 11.576501846313477 2023-01-22 10:04:22.898580: step: 398/464, loss: 0.8219079971313477 2023-01-22 10:04:23.567310: step: 400/464, loss: 0.5659942030906677 2023-01-22 10:04:24.177444: step: 402/464, loss: 0.94480961561203 2023-01-22 10:04:24.812257: step: 404/464, loss: 1.4206219911575317 2023-01-22 10:04:25.414953: step: 406/464, loss: 0.8018251061439514 2023-01-22 10:04:26.064016: step: 408/464, loss: 1.2566442489624023 2023-01-22 10:04:26.662486: step: 410/464, loss: 1.8468358516693115 2023-01-22 10:04:27.284629: step: 412/464, loss: 0.3620869815349579 2023-01-22 10:04:27.864813: step: 414/464, loss: 0.9323358535766602 2023-01-22 10:04:28.447745: step: 416/464, loss: 0.6399459838867188 2023-01-22 10:04:29.186436: step: 418/464, loss: 4.124539375305176 2023-01-22 10:04:29.817445: step: 420/464, loss: 3.8888773918151855 2023-01-22 10:04:30.468147: step: 422/464, loss: 1.2105002403259277 2023-01-22 10:04:31.126352: step: 424/464, loss: 0.8193405270576477 2023-01-22 10:04:31.706139: step: 426/464, loss: 0.2333287000656128 2023-01-22 10:04:32.328172: step: 428/464, loss: 1.331892967224121 2023-01-22 10:04:32.957408: step: 430/464, loss: 1.5995988845825195 2023-01-22 10:04:33.599169: step: 432/464, loss: 3.416703224182129 2023-01-22 10:04:34.323950: step: 434/464, loss: 0.3051130771636963 2023-01-22 10:04:34.945099: step: 436/464, loss: 2.9975223541259766 2023-01-22 10:04:35.585895: step: 438/464, loss: 0.33027902245521545 2023-01-22 10:04:36.224393: step: 440/464, loss: 0.8280506730079651 2023-01-22 10:04:36.888345: step: 442/464, loss: 0.4559107720851898 2023-01-22 10:04:37.493851: step: 444/464, loss: 1.1207265853881836 2023-01-22 10:04:38.161533: step: 446/464, loss: 0.6139945387840271 2023-01-22 10:04:38.778585: step: 448/464, loss: 1.029955267906189 2023-01-22 10:04:39.417683: step: 450/464, loss: 1.5795459747314453 2023-01-22 10:04:40.042415: step: 452/464, loss: 1.4873511791229248 2023-01-22 10:04:40.700617: step: 454/464, loss: 0.7497228384017944 2023-01-22 10:04:41.340641: step: 456/464, loss: 0.6450871229171753 2023-01-22 10:04:42.045295: step: 458/464, loss: 0.9673238396644592 2023-01-22 10:04:42.667657: step: 460/464, loss: 0.5542375445365906 2023-01-22 10:04:43.319514: step: 462/464, loss: 3.1521575450897217 2023-01-22 10:04:44.003965: step: 464/464, loss: 0.9212895631790161 2023-01-22 10:04:44.606906: step: 466/464, loss: 0.3986515402793884 2023-01-22 10:04:45.216183: step: 468/464, loss: 3.097533941268921 2023-01-22 10:04:45.799303: step: 470/464, loss: 1.3664813041687012 2023-01-22 10:04:46.411264: step: 472/464, loss: 0.2985018193721771 2023-01-22 10:04:47.045680: step: 474/464, loss: 0.9037873148918152 2023-01-22 10:04:47.684808: step: 476/464, loss: 0.7243385314941406 2023-01-22 10:04:48.306421: step: 478/464, loss: 0.7075210809707642 2023-01-22 10:04:48.914319: step: 480/464, loss: 0.609467089176178 2023-01-22 10:04:49.511900: step: 482/464, loss: 1.2596162557601929 2023-01-22 10:04:50.071312: step: 484/464, loss: 0.3639557361602783 2023-01-22 10:04:50.682292: step: 486/464, loss: 2.3972129821777344 2023-01-22 10:04:51.396033: step: 488/464, loss: 1.2693873643875122 2023-01-22 10:04:52.014474: step: 490/464, loss: 2.1535725593566895 2023-01-22 10:04:52.629140: step: 492/464, loss: 1.125908374786377 2023-01-22 10:04:53.265355: step: 494/464, loss: 0.8671747446060181 2023-01-22 10:04:53.938383: step: 496/464, loss: 0.9024983048439026 2023-01-22 10:04:54.657252: step: 498/464, loss: 3.1360628604888916 2023-01-22 10:04:55.264875: step: 500/464, loss: 1.1961506605148315 2023-01-22 10:04:55.893817: step: 502/464, loss: 1.9053184986114502 2023-01-22 10:04:56.516986: step: 504/464, loss: 1.222051739692688 2023-01-22 10:04:57.167745: step: 506/464, loss: 0.6592516303062439 2023-01-22 10:04:57.740660: step: 508/464, loss: 2.705397129058838 2023-01-22 10:04:58.367828: step: 510/464, loss: 1.3199049234390259 2023-01-22 10:04:58.989764: step: 512/464, loss: 0.7428873777389526 2023-01-22 10:04:59.632251: step: 514/464, loss: 0.8885185718536377 2023-01-22 10:05:00.277165: step: 516/464, loss: 1.146332859992981 2023-01-22 10:05:00.992531: step: 518/464, loss: 1.469440221786499 2023-01-22 10:05:01.613037: step: 520/464, loss: 1.943987488746643 2023-01-22 10:05:02.238879: step: 522/464, loss: 0.3035939037799835 2023-01-22 10:05:02.841109: step: 524/464, loss: 0.741900622844696 2023-01-22 10:05:03.502953: step: 526/464, loss: 0.20480138063430786 2023-01-22 10:05:04.094487: step: 528/464, loss: 0.4008975028991699 2023-01-22 10:05:04.666186: step: 530/464, loss: 0.5813942551612854 2023-01-22 10:05:05.231351: step: 532/464, loss: 0.5860927104949951 2023-01-22 10:05:05.825536: step: 534/464, loss: 0.44807976484298706 2023-01-22 10:05:06.523792: step: 536/464, loss: 0.6833713054656982 2023-01-22 10:05:07.191351: step: 538/464, loss: 0.35068416595458984 2023-01-22 10:05:07.817405: step: 540/464, loss: 0.6230434775352478 2023-01-22 10:05:08.502018: step: 542/464, loss: 1.5239896774291992 2023-01-22 10:05:09.077742: step: 544/464, loss: 2.0269064903259277 2023-01-22 10:05:09.723640: step: 546/464, loss: 1.0461208820343018 2023-01-22 10:05:10.394683: step: 548/464, loss: 0.7397414445877075 2023-01-22 10:05:11.122366: step: 550/464, loss: 9.873390197753906 2023-01-22 10:05:11.763017: step: 552/464, loss: 0.9010847806930542 2023-01-22 10:05:12.425605: step: 554/464, loss: 0.3523099422454834 2023-01-22 10:05:13.155625: step: 556/464, loss: 1.7873268127441406 2023-01-22 10:05:13.779601: step: 558/464, loss: 1.1752336025238037 2023-01-22 10:05:14.436797: step: 560/464, loss: 1.152069091796875 2023-01-22 10:05:15.067540: step: 562/464, loss: 2.558657646179199 2023-01-22 10:05:15.650839: step: 564/464, loss: 2.342944622039795 2023-01-22 10:05:16.239917: step: 566/464, loss: 0.7745879292488098 2023-01-22 10:05:16.881903: step: 568/464, loss: 1.1222927570343018 2023-01-22 10:05:17.434383: step: 570/464, loss: 0.8955410122871399 2023-01-22 10:05:18.102067: step: 572/464, loss: 0.38107162714004517 2023-01-22 10:05:18.721137: step: 574/464, loss: 1.0901833772659302 2023-01-22 10:05:19.335973: step: 576/464, loss: 5.22351598739624 2023-01-22 10:05:19.980260: step: 578/464, loss: 0.7503743171691895 2023-01-22 10:05:20.615664: step: 580/464, loss: 1.6508920192718506 2023-01-22 10:05:21.253220: step: 582/464, loss: 1.1056047677993774 2023-01-22 10:05:21.882341: step: 584/464, loss: 1.5326659679412842 2023-01-22 10:05:22.549561: step: 586/464, loss: 0.6887844800949097 2023-01-22 10:05:23.186192: step: 588/464, loss: 1.729138970375061 2023-01-22 10:05:23.766909: step: 590/464, loss: 0.5543575882911682 2023-01-22 10:05:24.423856: step: 592/464, loss: 0.8567236661911011 2023-01-22 10:05:25.117083: step: 594/464, loss: 2.516667366027832 2023-01-22 10:05:25.701234: step: 596/464, loss: 3.135136127471924 2023-01-22 10:05:26.325465: step: 598/464, loss: 0.6818070411682129 2023-01-22 10:05:26.968615: step: 600/464, loss: 1.3219012022018433 2023-01-22 10:05:27.569863: step: 602/464, loss: 0.669220507144928 2023-01-22 10:05:28.208681: step: 604/464, loss: 0.6591908931732178 2023-01-22 10:05:28.874231: step: 606/464, loss: 0.9920374751091003 2023-01-22 10:05:29.469424: step: 608/464, loss: 0.8480632305145264 2023-01-22 10:05:30.123810: step: 610/464, loss: 1.539106011390686 2023-01-22 10:05:30.777222: step: 612/464, loss: 2.172405242919922 2023-01-22 10:05:31.516272: step: 614/464, loss: 0.5282004475593567 2023-01-22 10:05:32.164380: step: 616/464, loss: 0.40630924701690674 2023-01-22 10:05:32.936346: step: 618/464, loss: 0.6823827028274536 2023-01-22 10:05:33.554591: step: 620/464, loss: 0.4420427083969116 2023-01-22 10:05:34.180655: step: 622/464, loss: 0.8073464035987854 2023-01-22 10:05:34.802987: step: 624/464, loss: 0.20690055191516876 2023-01-22 10:05:35.463985: step: 626/464, loss: 0.7722306847572327 2023-01-22 10:05:36.070182: step: 628/464, loss: 1.8317077159881592 2023-01-22 10:05:36.694151: step: 630/464, loss: 1.550157904624939 2023-01-22 10:05:37.306491: step: 632/464, loss: 0.8743520379066467 2023-01-22 10:05:38.002371: step: 634/464, loss: 1.5449718236923218 2023-01-22 10:05:38.656221: step: 636/464, loss: 1.3520747423171997 2023-01-22 10:05:39.288848: step: 638/464, loss: 2.3713107109069824 2023-01-22 10:05:39.921193: step: 640/464, loss: 0.19988097250461578 2023-01-22 10:05:40.585964: step: 642/464, loss: 1.0374170541763306 2023-01-22 10:05:41.224021: step: 644/464, loss: 1.6740143299102783 2023-01-22 10:05:41.928292: step: 646/464, loss: 0.5114726424217224 2023-01-22 10:05:42.551702: step: 648/464, loss: 0.7445206046104431 2023-01-22 10:05:43.149188: step: 650/464, loss: 1.1598302125930786 2023-01-22 10:05:43.748114: step: 652/464, loss: 0.36706048250198364 2023-01-22 10:05:44.330338: step: 654/464, loss: 3.273355007171631 2023-01-22 10:05:44.939342: step: 656/464, loss: 1.2574751377105713 2023-01-22 10:05:45.508708: step: 658/464, loss: 1.3063020706176758 2023-01-22 10:05:46.118185: step: 660/464, loss: 0.4659620225429535 2023-01-22 10:05:46.750370: step: 662/464, loss: 1.2157269716262817 2023-01-22 10:05:47.415486: step: 664/464, loss: 0.5862348675727844 2023-01-22 10:05:48.009066: step: 666/464, loss: 0.717773973941803 2023-01-22 10:05:48.693401: step: 668/464, loss: 0.49368932843208313 2023-01-22 10:05:49.495244: step: 670/464, loss: 0.4888678789138794 2023-01-22 10:05:50.071937: step: 672/464, loss: 0.3790941536426544 2023-01-22 10:05:50.728995: step: 674/464, loss: 1.465955138206482 2023-01-22 10:05:51.411007: step: 676/464, loss: 1.2423797845840454 2023-01-22 10:05:52.007380: step: 678/464, loss: 0.29232341051101685 2023-01-22 10:05:52.652951: step: 680/464, loss: 0.6101855039596558 2023-01-22 10:05:53.296024: step: 682/464, loss: 0.9923334717750549 2023-01-22 10:05:53.928445: step: 684/464, loss: 1.0098955631256104 2023-01-22 10:05:54.602329: step: 686/464, loss: 0.7436755299568176 2023-01-22 10:05:55.236612: step: 688/464, loss: 1.1025340557098389 2023-01-22 10:05:55.823005: step: 690/464, loss: 0.6141563653945923 2023-01-22 10:05:56.456925: step: 692/464, loss: 5.098584175109863 2023-01-22 10:05:57.101186: step: 694/464, loss: 0.9757270812988281 2023-01-22 10:05:57.772002: step: 696/464, loss: 3.4678096771240234 2023-01-22 10:05:58.372792: step: 698/464, loss: 0.35483089089393616 2023-01-22 10:05:58.972550: step: 700/464, loss: 0.9248161911964417 2023-01-22 10:05:59.578848: step: 702/464, loss: 0.3786623775959015 2023-01-22 10:06:00.270859: step: 704/464, loss: 1.2063881158828735 2023-01-22 10:06:00.901917: step: 706/464, loss: 0.7255852222442627 2023-01-22 10:06:01.474317: step: 708/464, loss: 0.9540723562240601 2023-01-22 10:06:02.090736: step: 710/464, loss: 0.387376993894577 2023-01-22 10:06:02.750877: step: 712/464, loss: 0.412129670381546 2023-01-22 10:06:03.384910: step: 714/464, loss: 4.866971969604492 2023-01-22 10:06:03.975310: step: 716/464, loss: 0.6999180912971497 2023-01-22 10:06:04.566547: step: 718/464, loss: 3.2527616024017334 2023-01-22 10:06:05.267198: step: 720/464, loss: 0.9839757680892944 2023-01-22 10:06:05.916467: step: 722/464, loss: 3.7680652141571045 2023-01-22 10:06:06.568541: step: 724/464, loss: 1.1528985500335693 2023-01-22 10:06:07.287036: step: 726/464, loss: 0.2059568166732788 2023-01-22 10:06:07.942412: step: 728/464, loss: 0.9154174327850342 2023-01-22 10:06:08.584711: step: 730/464, loss: 0.508955180644989 2023-01-22 10:06:09.244777: step: 732/464, loss: 0.45395326614379883 2023-01-22 10:06:09.912964: step: 734/464, loss: 1.9029622077941895 2023-01-22 10:06:10.534232: step: 736/464, loss: 0.24253416061401367 2023-01-22 10:06:11.150006: step: 738/464, loss: 3.9075145721435547 2023-01-22 10:06:11.749702: step: 740/464, loss: 1.4352221488952637 2023-01-22 10:06:12.394961: step: 742/464, loss: 1.3655582666397095 2023-01-22 10:06:13.011446: step: 744/464, loss: 0.7305575609207153 2023-01-22 10:06:13.682911: step: 746/464, loss: 0.31875115633010864 2023-01-22 10:06:14.309210: step: 748/464, loss: 0.8504392504692078 2023-01-22 10:06:14.997139: step: 750/464, loss: 1.7576044797897339 2023-01-22 10:06:15.697249: step: 752/464, loss: 1.8157269954681396 2023-01-22 10:06:16.329842: step: 754/464, loss: 0.5760119557380676 2023-01-22 10:06:16.982514: step: 756/464, loss: 0.46522757411003113 2023-01-22 10:06:17.602944: step: 758/464, loss: 0.4888097643852234 2023-01-22 10:06:18.185250: step: 760/464, loss: 0.8452578186988831 2023-01-22 10:06:18.905820: step: 762/464, loss: 0.5606443285942078 2023-01-22 10:06:19.593190: step: 764/464, loss: 1.1209548711776733 2023-01-22 10:06:20.253035: step: 766/464, loss: 0.8494024276733398 2023-01-22 10:06:20.848803: step: 768/464, loss: 2.736093521118164 2023-01-22 10:06:21.432201: step: 770/464, loss: 1.3247060775756836 2023-01-22 10:06:22.095253: step: 772/464, loss: 2.002469778060913 2023-01-22 10:06:22.684448: step: 774/464, loss: 3.2550501823425293 2023-01-22 10:06:23.284149: step: 776/464, loss: 0.8077048063278198 2023-01-22 10:06:23.879433: step: 778/464, loss: 0.4682812988758087 2023-01-22 10:06:24.438384: step: 780/464, loss: 1.0419408082962036 2023-01-22 10:06:25.141487: step: 782/464, loss: 1.3942831754684448 2023-01-22 10:06:25.671188: step: 784/464, loss: 4.118315696716309 2023-01-22 10:06:26.314333: step: 786/464, loss: 0.7110327482223511 2023-01-22 10:06:26.902280: step: 788/464, loss: 0.537163257598877 2023-01-22 10:06:27.571280: step: 790/464, loss: 3.044668436050415 2023-01-22 10:06:28.162857: step: 792/464, loss: 2.6408910751342773 2023-01-22 10:06:28.772464: step: 794/464, loss: 0.8422490358352661 2023-01-22 10:06:29.390432: step: 796/464, loss: 1.2086379528045654 2023-01-22 10:06:30.141257: step: 798/464, loss: 2.6411752700805664 2023-01-22 10:06:30.745950: step: 800/464, loss: 1.6432220935821533 2023-01-22 10:06:31.355890: step: 802/464, loss: 0.5683345794677734 2023-01-22 10:06:32.014663: step: 804/464, loss: 1.1452221870422363 2023-01-22 10:06:32.610650: step: 806/464, loss: 2.8372998237609863 2023-01-22 10:06:33.232477: step: 808/464, loss: 0.8040033578872681 2023-01-22 10:06:33.810268: step: 810/464, loss: 2.037003755569458 2023-01-22 10:06:34.462626: step: 812/464, loss: 1.8354355096817017 2023-01-22 10:06:35.099772: step: 814/464, loss: 0.59405517578125 2023-01-22 10:06:35.820942: step: 816/464, loss: 2.1018495559692383 2023-01-22 10:06:36.476352: step: 818/464, loss: 0.8677703142166138 2023-01-22 10:06:37.109627: step: 820/464, loss: 0.5010039806365967 2023-01-22 10:06:37.724595: step: 822/464, loss: 0.891311764717102 2023-01-22 10:06:38.330784: step: 824/464, loss: 0.5709667205810547 2023-01-22 10:06:38.977435: step: 826/464, loss: 0.16370205581188202 2023-01-22 10:06:39.572691: step: 828/464, loss: 0.8301252126693726 2023-01-22 10:06:40.212117: step: 830/464, loss: 0.792355477809906 2023-01-22 10:06:40.872629: step: 832/464, loss: 0.8665425777435303 2023-01-22 10:06:41.461588: step: 834/464, loss: 0.21131619811058044 2023-01-22 10:06:42.167146: step: 836/464, loss: 8.229231834411621 2023-01-22 10:06:42.793070: step: 838/464, loss: 0.24403637647628784 2023-01-22 10:06:43.438066: step: 840/464, loss: 1.594043493270874 2023-01-22 10:06:44.027425: step: 842/464, loss: 0.7965450882911682 2023-01-22 10:06:44.612722: step: 844/464, loss: 1.0780465602874756 2023-01-22 10:06:45.250597: step: 846/464, loss: 1.3620350360870361 2023-01-22 10:06:45.800706: step: 848/464, loss: 0.4115268886089325 2023-01-22 10:06:46.460863: step: 850/464, loss: 2.11138916015625 2023-01-22 10:06:47.139758: step: 852/464, loss: 0.945903480052948 2023-01-22 10:06:47.731505: step: 854/464, loss: 1.8096015453338623 2023-01-22 10:06:48.402093: step: 856/464, loss: 2.286001682281494 2023-01-22 10:06:49.030582: step: 858/464, loss: 0.39916497468948364 2023-01-22 10:06:49.626643: step: 860/464, loss: 1.914172649383545 2023-01-22 10:06:50.272473: step: 862/464, loss: 1.116877794265747 2023-01-22 10:06:50.921919: step: 864/464, loss: 0.7338138222694397 2023-01-22 10:06:51.541007: step: 866/464, loss: 1.5705674886703491 2023-01-22 10:06:52.227682: step: 868/464, loss: 3.6879262924194336 2023-01-22 10:06:52.860925: step: 870/464, loss: 0.6445427536964417 2023-01-22 10:06:53.478074: step: 872/464, loss: 1.250487208366394 2023-01-22 10:06:54.071969: step: 874/464, loss: 2.3814239501953125 2023-01-22 10:06:54.723776: step: 876/464, loss: 0.8483049869537354 2023-01-22 10:06:55.446859: step: 878/464, loss: 0.6505409479141235 2023-01-22 10:06:56.055248: step: 880/464, loss: 1.3916373252868652 2023-01-22 10:06:56.707210: step: 882/464, loss: 0.3472626805305481 2023-01-22 10:06:57.327987: step: 884/464, loss: 0.6810139417648315 2023-01-22 10:06:57.890338: step: 886/464, loss: 0.9531253576278687 2023-01-22 10:06:58.500887: step: 888/464, loss: 2.4580156803131104 2023-01-22 10:06:59.111781: step: 890/464, loss: 0.4591882824897766 2023-01-22 10:06:59.711453: step: 892/464, loss: 0.8312535285949707 2023-01-22 10:07:00.322083: step: 894/464, loss: 0.5686916708946228 2023-01-22 10:07:00.972951: step: 896/464, loss: 0.17750242352485657 2023-01-22 10:07:01.520368: step: 898/464, loss: 1.004361867904663 2023-01-22 10:07:02.137415: step: 900/464, loss: 2.430652618408203 2023-01-22 10:07:02.676276: step: 902/464, loss: 1.3346503973007202 2023-01-22 10:07:03.345702: step: 904/464, loss: 1.2361981868743896 2023-01-22 10:07:03.978148: step: 906/464, loss: 0.4384477436542511 2023-01-22 10:07:04.619005: step: 908/464, loss: 0.49424174427986145 2023-01-22 10:07:05.269321: step: 910/464, loss: 0.32670140266418457 2023-01-22 10:07:05.848048: step: 912/464, loss: 1.3756822347640991 2023-01-22 10:07:06.512897: step: 914/464, loss: 5.016775131225586 2023-01-22 10:07:07.157186: step: 916/464, loss: 0.8426229953765869 2023-01-22 10:07:07.779145: step: 918/464, loss: 5.336833953857422 2023-01-22 10:07:08.395112: step: 920/464, loss: 1.2759982347488403 2023-01-22 10:07:08.977639: step: 922/464, loss: 1.1790809631347656 2023-01-22 10:07:09.574840: step: 924/464, loss: 2.6815872192382812 2023-01-22 10:07:10.219237: step: 926/464, loss: 0.760179877281189 2023-01-22 10:07:10.818493: step: 928/464, loss: 0.6275213360786438 2023-01-22 10:07:11.319251: step: 930/464, loss: 0.2987763285636902 ================================================== Loss: 1.330 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27162038269436156, 'r': 0.23972556502949333, 'f1': 0.2546782662687575}, 'combined': 0.18765766988224236, 'epoch': 3} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.289232787875879, 'r': 0.23828536102067832, 'f1': 0.26129883653172975}, 'combined': 0.17058887773574066, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26916768127705626, 'r': 0.25285448847238623, 'f1': 0.26075619123714827}, 'combined': 0.19213614091158293, 'epoch': 3} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2966583078843843, 'r': 0.2386874642335826, 'f1': 0.26453414948104226}, 'combined': 0.17270105095653535, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2819480885931225, 'r': 0.236558718270366, 'f1': 0.2572667420118502}, 'combined': 0.1895649677982054, 'epoch': 3} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2961399290424104, 'r': 0.22332754281913883, 'f1': 0.25463077580843235}, 'combined': 0.16623563602001282, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26822916666666663, 'r': 0.2452380952380952, 'f1': 0.2562189054726368}, 'combined': 0.17081260364842454, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31666666666666665, 'r': 0.41304347826086957, 'f1': 0.3584905660377358}, 'combined': 0.1792452830188679, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.475, 'r': 0.16379310344827586, 'f1': 0.24358974358974356}, 'combined': 0.16239316239316237, 'epoch': 3} New best chinese model... New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27162038269436156, 'r': 0.23972556502949333, 'f1': 0.2546782662687575}, 'combined': 0.18765766988224236, 'epoch': 3} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.289232787875879, 'r': 0.23828536102067832, 'f1': 0.26129883653172975}, 'combined': 0.17058887773574066, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26822916666666663, 'r': 0.2452380952380952, 'f1': 0.2562189054726368}, 'combined': 0.17081260364842454, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26916768127705626, 'r': 0.25285448847238623, 'f1': 0.26075619123714827}, 'combined': 0.19213614091158293, 'epoch': 3} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2966583078843843, 'r': 0.2386874642335826, 'f1': 0.26453414948104226}, 'combined': 0.17270105095653535, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31666666666666665, 'r': 0.41304347826086957, 'f1': 0.3584905660377358}, 'combined': 0.1792452830188679, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2638232246476927, 'r': 0.2578385768295976, 'f1': 0.2607965719748702}, 'combined': 0.19216589513937804, 'epoch': 2} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30303842366654554, 'r': 0.20693274576679782, 'f1': 0.2459298754156606}, 'combined': 0.16055525545271107, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.38333333333333336, 'r': 0.19827586206896552, 'f1': 0.2613636363636364}, 'combined': 0.17424242424242425, 'epoch': 2} ****************************** Epoch: 4 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:10:06.541154: step: 2/464, loss: 0.5093468427658081 2023-01-22 10:10:07.193219: step: 4/464, loss: 0.3172636330127716 2023-01-22 10:10:07.809373: step: 6/464, loss: 0.5387977361679077 2023-01-22 10:10:08.506647: step: 8/464, loss: 0.3183968663215637 2023-01-22 10:10:09.108689: step: 10/464, loss: 0.4791586399078369 2023-01-22 10:10:09.768302: step: 12/464, loss: 0.463077187538147 2023-01-22 10:10:10.444435: step: 14/464, loss: 0.6189804673194885 2023-01-22 10:10:11.072971: step: 16/464, loss: 1.5961620807647705 2023-01-22 10:10:11.692570: step: 18/464, loss: 1.0718073844909668 2023-01-22 10:10:12.345688: step: 20/464, loss: 2.087601661682129 2023-01-22 10:10:12.947115: step: 22/464, loss: 1.253535270690918 2023-01-22 10:10:13.561801: step: 24/464, loss: 0.6115002632141113 2023-01-22 10:10:14.171923: step: 26/464, loss: 1.6747785806655884 2023-01-22 10:10:14.703309: step: 28/464, loss: 0.09929452836513519 2023-01-22 10:10:15.324537: step: 30/464, loss: 0.4556482434272766 2023-01-22 10:10:15.951738: step: 32/464, loss: 0.3613569438457489 2023-01-22 10:10:16.590420: step: 34/464, loss: 0.9141637086868286 2023-01-22 10:10:17.259970: step: 36/464, loss: 0.960303544998169 2023-01-22 10:10:17.870615: step: 38/464, loss: 0.6171204447746277 2023-01-22 10:10:18.450538: step: 40/464, loss: 0.3142845630645752 2023-01-22 10:10:19.094861: step: 42/464, loss: 0.7349892258644104 2023-01-22 10:10:19.695545: step: 44/464, loss: 0.2745680510997772 2023-01-22 10:10:20.355564: step: 46/464, loss: 0.3958454728126526 2023-01-22 10:10:20.959348: step: 48/464, loss: 0.395162969827652 2023-01-22 10:10:21.611054: step: 50/464, loss: 0.5048075318336487 2023-01-22 10:10:22.230450: step: 52/464, loss: 0.4473804831504822 2023-01-22 10:10:22.853077: step: 54/464, loss: 0.28719592094421387 2023-01-22 10:10:23.459231: step: 56/464, loss: 1.0184855461120605 2023-01-22 10:10:24.109178: step: 58/464, loss: 0.29800641536712646 2023-01-22 10:10:24.722364: step: 60/464, loss: 2.3515331745147705 2023-01-22 10:10:25.294953: step: 62/464, loss: 9.85141658782959 2023-01-22 10:10:25.964320: step: 64/464, loss: 0.6098653674125671 2023-01-22 10:10:26.632631: step: 66/464, loss: 0.5678577423095703 2023-01-22 10:10:27.224639: step: 68/464, loss: 0.693811297416687 2023-01-22 10:10:27.867961: step: 70/464, loss: 0.8950579762458801 2023-01-22 10:10:28.405764: step: 72/464, loss: 1.755286693572998 2023-01-22 10:10:28.986571: step: 74/464, loss: 1.2793998718261719 2023-01-22 10:10:29.629135: step: 76/464, loss: 0.23721827566623688 2023-01-22 10:10:30.204022: step: 78/464, loss: 1.0371506214141846 2023-01-22 10:10:30.809845: step: 80/464, loss: 0.9888033866882324 2023-01-22 10:10:31.453658: step: 82/464, loss: 0.5146254301071167 2023-01-22 10:10:32.100900: step: 84/464, loss: 0.8823146820068359 2023-01-22 10:10:32.628018: step: 86/464, loss: 0.6790046095848083 2023-01-22 10:10:33.365762: step: 88/464, loss: 0.34398648142814636 2023-01-22 10:10:34.036186: step: 90/464, loss: 0.4917071759700775 2023-01-22 10:10:34.626109: step: 92/464, loss: 1.7098023891448975 2023-01-22 10:10:35.266056: step: 94/464, loss: 0.45337536931037903 2023-01-22 10:10:35.952839: step: 96/464, loss: 1.211728572845459 2023-01-22 10:10:36.606769: step: 98/464, loss: 1.021390676498413 2023-01-22 10:10:37.240872: step: 100/464, loss: 0.7689708471298218 2023-01-22 10:10:37.941158: step: 102/464, loss: 1.190798282623291 2023-01-22 10:10:38.532110: step: 104/464, loss: 0.3627750873565674 2023-01-22 10:10:39.147308: step: 106/464, loss: 0.26812222599983215 2023-01-22 10:10:39.844949: step: 108/464, loss: 0.6177099347114563 2023-01-22 10:10:40.499394: step: 110/464, loss: 0.4237760007381439 2023-01-22 10:10:41.090594: step: 112/464, loss: 0.2965877056121826 2023-01-22 10:10:41.716360: step: 114/464, loss: 0.3406103849411011 2023-01-22 10:10:42.343986: step: 116/464, loss: 1.0979301929473877 2023-01-22 10:10:42.968910: step: 118/464, loss: 0.29808324575424194 2023-01-22 10:10:43.578229: step: 120/464, loss: 0.48130905628204346 2023-01-22 10:10:44.190557: step: 122/464, loss: 0.3830515444278717 2023-01-22 10:10:44.836665: step: 124/464, loss: 1.2250969409942627 2023-01-22 10:10:45.509121: step: 126/464, loss: 1.5770454406738281 2023-01-22 10:10:46.148061: step: 128/464, loss: 0.8493466973304749 2023-01-22 10:10:46.764077: step: 130/464, loss: 0.262433797121048 2023-01-22 10:10:47.380444: step: 132/464, loss: 0.8228477239608765 2023-01-22 10:10:47.991369: step: 134/464, loss: 0.5960792303085327 2023-01-22 10:10:48.553392: step: 136/464, loss: 3.4240593910217285 2023-01-22 10:10:49.127890: step: 138/464, loss: 1.304851770401001 2023-01-22 10:10:49.795849: step: 140/464, loss: 0.8198272585868835 2023-01-22 10:10:50.420346: step: 142/464, loss: 0.650009036064148 2023-01-22 10:10:51.084199: step: 144/464, loss: 0.6824265718460083 2023-01-22 10:10:51.677047: step: 146/464, loss: 0.849699079990387 2023-01-22 10:10:52.342649: step: 148/464, loss: 0.5839065313339233 2023-01-22 10:10:52.947930: step: 150/464, loss: 0.509751558303833 2023-01-22 10:10:53.621758: step: 152/464, loss: 0.324587881565094 2023-01-22 10:10:54.301603: step: 154/464, loss: 0.4301684498786926 2023-01-22 10:10:54.949854: step: 156/464, loss: 0.7258855700492859 2023-01-22 10:10:55.622503: step: 158/464, loss: 0.43304145336151123 2023-01-22 10:10:56.275959: step: 160/464, loss: 0.4341854155063629 2023-01-22 10:10:56.921197: step: 162/464, loss: 1.0745781660079956 2023-01-22 10:10:57.513856: step: 164/464, loss: 2.8442575931549072 2023-01-22 10:10:58.103953: step: 166/464, loss: 0.29139086604118347 2023-01-22 10:10:58.787742: step: 168/464, loss: 2.0749263763427734 2023-01-22 10:10:59.411043: step: 170/464, loss: 1.3194022178649902 2023-01-22 10:10:59.992767: step: 172/464, loss: 3.8606531620025635 2023-01-22 10:11:00.606158: step: 174/464, loss: 0.9364582896232605 2023-01-22 10:11:01.187124: step: 176/464, loss: 1.4038944244384766 2023-01-22 10:11:01.770159: step: 178/464, loss: 0.5467737913131714 2023-01-22 10:11:02.332871: step: 180/464, loss: 0.7304741740226746 2023-01-22 10:11:02.928473: step: 182/464, loss: 1.0861698389053345 2023-01-22 10:11:03.521782: step: 184/464, loss: 0.2547362148761749 2023-01-22 10:11:04.149867: step: 186/464, loss: 0.12271208316087723 2023-01-22 10:11:04.807321: step: 188/464, loss: 0.7968823313713074 2023-01-22 10:11:05.455138: step: 190/464, loss: 0.2650519609451294 2023-01-22 10:11:06.015979: step: 192/464, loss: 0.4801476299762726 2023-01-22 10:11:06.620678: step: 194/464, loss: 2.0805039405822754 2023-01-22 10:11:07.291744: step: 196/464, loss: 0.9843195676803589 2023-01-22 10:11:08.007840: step: 198/464, loss: 1.461974024772644 2023-01-22 10:11:08.614333: step: 200/464, loss: 0.3458203971385956 2023-01-22 10:11:09.256674: step: 202/464, loss: 3.31097149848938 2023-01-22 10:11:09.829805: step: 204/464, loss: 0.3172132670879364 2023-01-22 10:11:10.455083: step: 206/464, loss: 0.8039501905441284 2023-01-22 10:11:11.065884: step: 208/464, loss: 1.0592734813690186 2023-01-22 10:11:11.658395: step: 210/464, loss: 0.6010752320289612 2023-01-22 10:11:12.283122: step: 212/464, loss: 3.5686473846435547 2023-01-22 10:11:12.889203: step: 214/464, loss: 0.6694086790084839 2023-01-22 10:11:13.470476: step: 216/464, loss: 0.46622198820114136 2023-01-22 10:11:14.106700: step: 218/464, loss: 0.5628550052642822 2023-01-22 10:11:14.755019: step: 220/464, loss: 0.3373853266239166 2023-01-22 10:11:15.332609: step: 222/464, loss: 0.2996734082698822 2023-01-22 10:11:15.960522: step: 224/464, loss: 0.7901242971420288 2023-01-22 10:11:16.554880: step: 226/464, loss: 1.8366661071777344 2023-01-22 10:11:17.158040: step: 228/464, loss: 0.2719947099685669 2023-01-22 10:11:17.722987: step: 230/464, loss: 0.48229557275772095 2023-01-22 10:11:18.448880: step: 232/464, loss: 1.682943344116211 2023-01-22 10:11:19.050324: step: 234/464, loss: 0.5389991402626038 2023-01-22 10:11:19.715632: step: 236/464, loss: 1.144383192062378 2023-01-22 10:11:20.352460: step: 238/464, loss: 1.1018505096435547 2023-01-22 10:11:20.949867: step: 240/464, loss: 0.7944072484970093 2023-01-22 10:11:21.568497: step: 242/464, loss: 0.5787885189056396 2023-01-22 10:11:22.157616: step: 244/464, loss: 0.7037353515625 2023-01-22 10:11:22.741338: step: 246/464, loss: 0.9548017978668213 2023-01-22 10:11:23.362290: step: 248/464, loss: 0.4501967132091522 2023-01-22 10:11:23.993567: step: 250/464, loss: 0.16243723034858704 2023-01-22 10:11:24.701786: step: 252/464, loss: 1.9847934246063232 2023-01-22 10:11:25.311506: step: 254/464, loss: 0.5167011022567749 2023-01-22 10:11:25.953085: step: 256/464, loss: 0.6174933910369873 2023-01-22 10:11:26.665876: step: 258/464, loss: 2.048726797103882 2023-01-22 10:11:27.317959: step: 260/464, loss: 2.179675579071045 2023-01-22 10:11:27.963867: step: 262/464, loss: 4.515833377838135 2023-01-22 10:11:28.598096: step: 264/464, loss: 0.4232323467731476 2023-01-22 10:11:29.188889: step: 266/464, loss: 2.2544357776641846 2023-01-22 10:11:29.793714: step: 268/464, loss: 2.305783748626709 2023-01-22 10:11:30.397698: step: 270/464, loss: 0.31464582681655884 2023-01-22 10:11:31.010855: step: 272/464, loss: 3.5395264625549316 2023-01-22 10:11:31.614119: step: 274/464, loss: 0.4365318715572357 2023-01-22 10:11:32.266134: step: 276/464, loss: 1.1285804510116577 2023-01-22 10:11:32.971894: step: 278/464, loss: 1.1635280847549438 2023-01-22 10:11:33.598542: step: 280/464, loss: 0.29192355275154114 2023-01-22 10:11:34.243982: step: 282/464, loss: 0.5533744096755981 2023-01-22 10:11:34.885692: step: 284/464, loss: 0.31066614389419556 2023-01-22 10:11:35.534381: step: 286/464, loss: 0.7738747596740723 2023-01-22 10:11:36.226196: step: 288/464, loss: 1.1949348449707031 2023-01-22 10:11:36.857765: step: 290/464, loss: 0.6176042556762695 2023-01-22 10:11:37.451904: step: 292/464, loss: 2.0230178833007812 2023-01-22 10:11:38.091072: step: 294/464, loss: 1.5586206912994385 2023-01-22 10:11:38.741409: step: 296/464, loss: 0.7693490982055664 2023-01-22 10:11:39.306008: step: 298/464, loss: 0.6012693643569946 2023-01-22 10:11:39.921937: step: 300/464, loss: 1.4037246704101562 2023-01-22 10:11:40.504220: step: 302/464, loss: 0.4873807430267334 2023-01-22 10:11:41.103676: step: 304/464, loss: 0.6122986078262329 2023-01-22 10:11:41.787015: step: 306/464, loss: 1.4132821559906006 2023-01-22 10:11:42.449498: step: 308/464, loss: 0.44583189487457275 2023-01-22 10:11:43.144392: step: 310/464, loss: 0.8842347860336304 2023-01-22 10:11:43.725235: step: 312/464, loss: 0.3857048749923706 2023-01-22 10:11:44.332975: step: 314/464, loss: 0.2515755295753479 2023-01-22 10:11:44.904440: step: 316/464, loss: 0.999019205570221 2023-01-22 10:11:45.523256: step: 318/464, loss: 0.33942168951034546 2023-01-22 10:11:46.120260: step: 320/464, loss: 0.5291414260864258 2023-01-22 10:11:46.803969: step: 322/464, loss: 0.3920437693595886 2023-01-22 10:11:47.413004: step: 324/464, loss: 1.3074945211410522 2023-01-22 10:11:48.090177: step: 326/464, loss: 0.23588593304157257 2023-01-22 10:11:48.729329: step: 328/464, loss: 0.4870832562446594 2023-01-22 10:11:49.329930: step: 330/464, loss: 1.0733529329299927 2023-01-22 10:11:49.937263: step: 332/464, loss: 0.29912006855010986 2023-01-22 10:11:50.625944: step: 334/464, loss: 1.0160815715789795 2023-01-22 10:11:51.263248: step: 336/464, loss: 0.8775022625923157 2023-01-22 10:11:51.858483: step: 338/464, loss: 1.2015305757522583 2023-01-22 10:11:52.497322: step: 340/464, loss: 0.6995279788970947 2023-01-22 10:11:53.097784: step: 342/464, loss: 0.4498124420642853 2023-01-22 10:11:53.681554: step: 344/464, loss: 0.1893487572669983 2023-01-22 10:11:54.314471: step: 346/464, loss: 0.6980522871017456 2023-01-22 10:11:54.881497: step: 348/464, loss: 1.2606297731399536 2023-01-22 10:11:55.618241: step: 350/464, loss: 3.804093599319458 2023-01-22 10:11:56.182038: step: 352/464, loss: 0.7864630222320557 2023-01-22 10:11:56.801156: step: 354/464, loss: 0.36259323358535767 2023-01-22 10:11:57.438603: step: 356/464, loss: 0.5102464556694031 2023-01-22 10:11:58.052315: step: 358/464, loss: 0.34992679953575134 2023-01-22 10:11:58.632329: step: 360/464, loss: 0.4517691731452942 2023-01-22 10:11:59.347874: step: 362/464, loss: 0.6049309968948364 2023-01-22 10:12:00.024693: step: 364/464, loss: 0.6882205605506897 2023-01-22 10:12:00.683152: step: 366/464, loss: 1.3858107328414917 2023-01-22 10:12:01.251589: step: 368/464, loss: 0.28278064727783203 2023-01-22 10:12:01.908918: step: 370/464, loss: 0.41853728890419006 2023-01-22 10:12:02.550532: step: 372/464, loss: 1.2912614345550537 2023-01-22 10:12:03.199879: step: 374/464, loss: 0.9934861063957214 2023-01-22 10:12:03.829033: step: 376/464, loss: 1.2262226343154907 2023-01-22 10:12:04.509890: step: 378/464, loss: 0.5085292458534241 2023-01-22 10:12:05.184772: step: 380/464, loss: 0.6032164096832275 2023-01-22 10:12:05.816257: step: 382/464, loss: 1.4110809564590454 2023-01-22 10:12:06.425405: step: 384/464, loss: 0.8371944427490234 2023-01-22 10:12:07.118892: step: 386/464, loss: 0.6973156332969666 2023-01-22 10:12:07.752044: step: 388/464, loss: 0.9601186513900757 2023-01-22 10:12:08.348073: step: 390/464, loss: 1.5634307861328125 2023-01-22 10:12:08.969281: step: 392/464, loss: 0.2142696976661682 2023-01-22 10:12:09.579599: step: 394/464, loss: 1.0404516458511353 2023-01-22 10:12:10.210201: step: 396/464, loss: 1.0398080348968506 2023-01-22 10:12:10.796247: step: 398/464, loss: 0.3107651472091675 2023-01-22 10:12:11.419456: step: 400/464, loss: 0.8754726648330688 2023-01-22 10:12:12.068461: step: 402/464, loss: 3.0080599784851074 2023-01-22 10:12:12.751219: step: 404/464, loss: 2.916818380355835 2023-01-22 10:12:13.346257: step: 406/464, loss: 0.6109200119972229 2023-01-22 10:12:13.907157: step: 408/464, loss: 1.4414656162261963 2023-01-22 10:12:14.575652: step: 410/464, loss: 1.5391944646835327 2023-01-22 10:12:15.208763: step: 412/464, loss: 1.1875689029693604 2023-01-22 10:12:15.843210: step: 414/464, loss: 0.6385482549667358 2023-01-22 10:12:16.451294: step: 416/464, loss: 0.31985437870025635 2023-01-22 10:12:17.047662: step: 418/464, loss: 1.223050594329834 2023-01-22 10:12:17.765525: step: 420/464, loss: 0.47759753465652466 2023-01-22 10:12:18.372781: step: 422/464, loss: 0.7822179794311523 2023-01-22 10:12:18.971692: step: 424/464, loss: 0.6694520115852356 2023-01-22 10:12:19.603479: step: 426/464, loss: 0.48283255100250244 2023-01-22 10:12:20.245335: step: 428/464, loss: 0.5288424491882324 2023-01-22 10:12:20.899558: step: 430/464, loss: 0.6541222333908081 2023-01-22 10:12:21.488497: step: 432/464, loss: 1.5048766136169434 2023-01-22 10:12:22.070254: step: 434/464, loss: 0.977016031742096 2023-01-22 10:12:22.622211: step: 436/464, loss: 1.579056978225708 2023-01-22 10:12:23.246380: step: 438/464, loss: 1.9080028533935547 2023-01-22 10:12:23.854036: step: 440/464, loss: 1.2760496139526367 2023-01-22 10:12:24.450784: step: 442/464, loss: 3.4610836505889893 2023-01-22 10:12:25.035178: step: 444/464, loss: 0.7282527089118958 2023-01-22 10:12:25.739380: step: 446/464, loss: 3.853248357772827 2023-01-22 10:12:26.335915: step: 448/464, loss: 0.40053898096084595 2023-01-22 10:12:26.975205: step: 450/464, loss: 0.9175394177436829 2023-01-22 10:12:27.682218: step: 452/464, loss: 0.48572227358818054 2023-01-22 10:12:28.273271: step: 454/464, loss: 0.22067205607891083 2023-01-22 10:12:28.940198: step: 456/464, loss: 0.6411260366439819 2023-01-22 10:12:29.567674: step: 458/464, loss: 1.13019859790802 2023-01-22 10:12:30.187247: step: 460/464, loss: 0.9006060361862183 2023-01-22 10:12:30.782437: step: 462/464, loss: 0.8046503067016602 2023-01-22 10:12:31.419238: step: 464/464, loss: 1.4703752994537354 2023-01-22 10:12:32.089415: step: 466/464, loss: 0.3552650809288025 2023-01-22 10:12:32.694291: step: 468/464, loss: 1.3312273025512695 2023-01-22 10:12:33.383544: step: 470/464, loss: 0.3936591148376465 2023-01-22 10:12:34.044239: step: 472/464, loss: 0.6073078513145447 2023-01-22 10:12:34.730809: step: 474/464, loss: 0.47256404161453247 2023-01-22 10:12:35.350890: step: 476/464, loss: 0.3240320384502411 2023-01-22 10:12:35.993145: step: 478/464, loss: 3.5507354736328125 2023-01-22 10:12:36.614056: step: 480/464, loss: 0.995995283126831 2023-01-22 10:12:37.241519: step: 482/464, loss: 0.6410576701164246 2023-01-22 10:12:37.841941: step: 484/464, loss: 0.8179224729537964 2023-01-22 10:12:38.417594: step: 486/464, loss: 0.30543074011802673 2023-01-22 10:12:39.021684: step: 488/464, loss: 0.6726969480514526 2023-01-22 10:12:39.713174: step: 490/464, loss: 0.44736555218696594 2023-01-22 10:12:40.365194: step: 492/464, loss: 0.7336754202842712 2023-01-22 10:12:41.059280: step: 494/464, loss: 0.8342028260231018 2023-01-22 10:12:41.654110: step: 496/464, loss: 2.2336502075195312 2023-01-22 10:12:42.310810: step: 498/464, loss: 0.5034320950508118 2023-01-22 10:12:43.014417: step: 500/464, loss: 1.1821582317352295 2023-01-22 10:12:43.680332: step: 502/464, loss: 0.4648742377758026 2023-01-22 10:12:44.276113: step: 504/464, loss: 3.733365774154663 2023-01-22 10:12:44.903607: step: 506/464, loss: 1.5545661449432373 2023-01-22 10:12:45.552754: step: 508/464, loss: 0.7909056544303894 2023-01-22 10:12:46.182324: step: 510/464, loss: 0.8181418180465698 2023-01-22 10:12:46.795374: step: 512/464, loss: 1.5901449918746948 2023-01-22 10:12:47.399745: step: 514/464, loss: 0.7300226092338562 2023-01-22 10:12:48.042192: step: 516/464, loss: 2.1893913745880127 2023-01-22 10:12:48.727342: step: 518/464, loss: 0.2126644104719162 2023-01-22 10:12:49.369984: step: 520/464, loss: 0.6104111671447754 2023-01-22 10:12:50.018296: step: 522/464, loss: 0.49161142110824585 2023-01-22 10:12:50.620063: step: 524/464, loss: 1.7882384061813354 2023-01-22 10:12:51.310574: step: 526/464, loss: 3.518047332763672 2023-01-22 10:12:51.964423: step: 528/464, loss: 0.8232895731925964 2023-01-22 10:12:52.642842: step: 530/464, loss: 0.8883892893791199 2023-01-22 10:12:53.306409: step: 532/464, loss: 0.3270670175552368 2023-01-22 10:12:53.934177: step: 534/464, loss: 2.1954493522644043 2023-01-22 10:12:54.548469: step: 536/464, loss: 0.18684567511081696 2023-01-22 10:12:55.263208: step: 538/464, loss: 2.936201333999634 2023-01-22 10:12:55.884092: step: 540/464, loss: 1.008811593055725 2023-01-22 10:12:56.482044: step: 542/464, loss: 0.8713910579681396 2023-01-22 10:12:57.188858: step: 544/464, loss: 1.9630047082901 2023-01-22 10:12:57.846526: step: 546/464, loss: 1.2913490533828735 2023-01-22 10:12:58.445555: step: 548/464, loss: 0.835235595703125 2023-01-22 10:12:59.085145: step: 550/464, loss: 1.656591534614563 2023-01-22 10:12:59.896087: step: 552/464, loss: 0.384236603975296 2023-01-22 10:13:00.497297: step: 554/464, loss: 1.2251346111297607 2023-01-22 10:13:01.124339: step: 556/464, loss: 0.3945186138153076 2023-01-22 10:13:01.796538: step: 558/464, loss: 0.5363366603851318 2023-01-22 10:13:02.383769: step: 560/464, loss: 0.7180920243263245 2023-01-22 10:13:02.980082: step: 562/464, loss: 8.346687316894531 2023-01-22 10:13:03.627618: step: 564/464, loss: 1.5861282348632812 2023-01-22 10:13:04.211744: step: 566/464, loss: 0.1805248260498047 2023-01-22 10:13:04.833668: step: 568/464, loss: 0.3826966881752014 2023-01-22 10:13:05.477084: step: 570/464, loss: 1.587929368019104 2023-01-22 10:13:06.072962: step: 572/464, loss: 0.8956756591796875 2023-01-22 10:13:06.782737: step: 574/464, loss: 0.4481387734413147 2023-01-22 10:13:07.485215: step: 576/464, loss: 1.6725045442581177 2023-01-22 10:13:08.155519: step: 578/464, loss: 0.6068788766860962 2023-01-22 10:13:08.849114: step: 580/464, loss: 1.715501308441162 2023-01-22 10:13:09.489989: step: 582/464, loss: 1.3745126724243164 2023-01-22 10:13:10.079616: step: 584/464, loss: 0.45005327463150024 2023-01-22 10:13:10.609989: step: 586/464, loss: 0.4800697863101959 2023-01-22 10:13:11.294449: step: 588/464, loss: 0.12299307435750961 2023-01-22 10:13:11.831393: step: 590/464, loss: 1.523522138595581 2023-01-22 10:13:12.441460: step: 592/464, loss: 0.9146558046340942 2023-01-22 10:13:13.073477: step: 594/464, loss: 1.3624142408370972 2023-01-22 10:13:13.675672: step: 596/464, loss: 0.13913631439208984 2023-01-22 10:13:14.299284: step: 598/464, loss: 0.4703514575958252 2023-01-22 10:13:14.925082: step: 600/464, loss: 1.6409592628479004 2023-01-22 10:13:15.571721: step: 602/464, loss: 0.6029088497161865 2023-01-22 10:13:16.143231: step: 604/464, loss: 0.4723614752292633 2023-01-22 10:13:16.820218: step: 606/464, loss: 1.2699775695800781 2023-01-22 10:13:17.502268: step: 608/464, loss: 1.0261311531066895 2023-01-22 10:13:18.125899: step: 610/464, loss: 1.0511482954025269 2023-01-22 10:13:18.776115: step: 612/464, loss: 0.19793467223644257 2023-01-22 10:13:19.383867: step: 614/464, loss: 2.3930282592773438 2023-01-22 10:13:19.952818: step: 616/464, loss: 0.9835858345031738 2023-01-22 10:13:20.544780: step: 618/464, loss: 0.8501909375190735 2023-01-22 10:13:21.167051: step: 620/464, loss: 0.7180901765823364 2023-01-22 10:13:21.864434: step: 622/464, loss: 0.4181664288043976 2023-01-22 10:13:22.496783: step: 624/464, loss: 0.4150451719760895 2023-01-22 10:13:23.066377: step: 626/464, loss: 0.8431905508041382 2023-01-22 10:13:23.724780: step: 628/464, loss: 0.3675621449947357 2023-01-22 10:13:24.284934: step: 630/464, loss: 0.41591790318489075 2023-01-22 10:13:24.857530: step: 632/464, loss: 0.8589701652526855 2023-01-22 10:13:25.424425: step: 634/464, loss: 1.0192818641662598 2023-01-22 10:13:26.033148: step: 636/464, loss: 2.393925428390503 2023-01-22 10:13:26.626080: step: 638/464, loss: 0.49664920568466187 2023-01-22 10:13:27.254828: step: 640/464, loss: 1.0956499576568604 2023-01-22 10:13:27.907243: step: 642/464, loss: 0.6687727570533752 2023-01-22 10:13:28.523713: step: 644/464, loss: 1.0667164325714111 2023-01-22 10:13:29.256819: step: 646/464, loss: 3.0677218437194824 2023-01-22 10:13:29.839077: step: 648/464, loss: 1.1712805032730103 2023-01-22 10:13:30.455836: step: 650/464, loss: 0.5602961182594299 2023-01-22 10:13:31.103668: step: 652/464, loss: 1.0304670333862305 2023-01-22 10:13:31.713062: step: 654/464, loss: 0.3069724440574646 2023-01-22 10:13:32.372329: step: 656/464, loss: 1.068381905555725 2023-01-22 10:13:32.936590: step: 658/464, loss: 1.361100673675537 2023-01-22 10:13:33.524254: step: 660/464, loss: 0.9713236689567566 2023-01-22 10:13:34.134117: step: 662/464, loss: 0.4035855233669281 2023-01-22 10:13:34.749519: step: 664/464, loss: 1.217246651649475 2023-01-22 10:13:35.432342: step: 666/464, loss: 1.0265988111495972 2023-01-22 10:13:36.057535: step: 668/464, loss: 1.0197653770446777 2023-01-22 10:13:36.680317: step: 670/464, loss: 0.36315852403640747 2023-01-22 10:13:37.285157: step: 672/464, loss: 0.7223759889602661 2023-01-22 10:13:37.922077: step: 674/464, loss: 1.1136391162872314 2023-01-22 10:13:38.547328: step: 676/464, loss: 0.37483763694763184 2023-01-22 10:13:39.236568: step: 678/464, loss: 0.7622231841087341 2023-01-22 10:13:39.876046: step: 680/464, loss: 1.3584575653076172 2023-01-22 10:13:40.454243: step: 682/464, loss: 3.232109546661377 2023-01-22 10:13:41.104440: step: 684/464, loss: 1.17849862575531 2023-01-22 10:13:41.746221: step: 686/464, loss: 0.5659350752830505 2023-01-22 10:13:42.408912: step: 688/464, loss: 0.8451026082038879 2023-01-22 10:13:42.999882: step: 690/464, loss: 8.128095626831055 2023-01-22 10:13:43.585828: step: 692/464, loss: 0.3531476557254791 2023-01-22 10:13:44.206000: step: 694/464, loss: 0.9488494396209717 2023-01-22 10:13:44.706086: step: 696/464, loss: 0.7125240564346313 2023-01-22 10:13:45.343870: step: 698/464, loss: 0.43225884437561035 2023-01-22 10:13:45.982480: step: 700/464, loss: 1.4727071523666382 2023-01-22 10:13:46.597991: step: 702/464, loss: 0.9909815788269043 2023-01-22 10:13:47.175431: step: 704/464, loss: 0.717685878276825 2023-01-22 10:13:47.883841: step: 706/464, loss: 0.8827025890350342 2023-01-22 10:13:48.565505: step: 708/464, loss: 0.8928143978118896 2023-01-22 10:13:49.210137: step: 710/464, loss: 0.45587989687919617 2023-01-22 10:13:49.862166: step: 712/464, loss: 0.3754402697086334 2023-01-22 10:13:50.480075: step: 714/464, loss: 6.441854000091553 2023-01-22 10:13:51.143186: step: 716/464, loss: 0.9459872245788574 2023-01-22 10:13:51.818985: step: 718/464, loss: 2.9799163341522217 2023-01-22 10:13:52.402008: step: 720/464, loss: 0.3675011098384857 2023-01-22 10:13:53.012284: step: 722/464, loss: 0.39418911933898926 2023-01-22 10:13:53.709577: step: 724/464, loss: 0.24418479204177856 2023-01-22 10:13:54.327979: step: 726/464, loss: 3.739204168319702 2023-01-22 10:13:54.999310: step: 728/464, loss: 0.9147809743881226 2023-01-22 10:13:55.646969: step: 730/464, loss: 0.7824108600616455 2023-01-22 10:13:56.191990: step: 732/464, loss: 0.20895031094551086 2023-01-22 10:13:56.794387: step: 734/464, loss: 4.588395118713379 2023-01-22 10:13:57.423059: step: 736/464, loss: 1.1733981370925903 2023-01-22 10:13:58.023003: step: 738/464, loss: 0.8675525784492493 2023-01-22 10:13:58.675948: step: 740/464, loss: 0.52099609375 2023-01-22 10:13:59.302880: step: 742/464, loss: 0.5001940727233887 2023-01-22 10:13:59.970973: step: 744/464, loss: 0.8723838329315186 2023-01-22 10:14:00.638662: step: 746/464, loss: 6.227485656738281 2023-01-22 10:14:01.250598: step: 748/464, loss: 1.8690444231033325 2023-01-22 10:14:01.903374: step: 750/464, loss: 0.33338722586631775 2023-01-22 10:14:02.518313: step: 752/464, loss: 0.8549349904060364 2023-01-22 10:14:03.083369: step: 754/464, loss: 0.18853016197681427 2023-01-22 10:14:03.692095: step: 756/464, loss: 0.5232789516448975 2023-01-22 10:14:04.322898: step: 758/464, loss: 0.5195374488830566 2023-01-22 10:14:04.982043: step: 760/464, loss: 1.1466377973556519 2023-01-22 10:14:05.585978: step: 762/464, loss: 2.1955342292785645 2023-01-22 10:14:06.222998: step: 764/464, loss: 1.1806930303573608 2023-01-22 10:14:06.825363: step: 766/464, loss: 0.9567260146141052 2023-01-22 10:14:07.439158: step: 768/464, loss: 0.49507617950439453 2023-01-22 10:14:08.053735: step: 770/464, loss: 0.4921751022338867 2023-01-22 10:14:08.607148: step: 772/464, loss: 0.3551686406135559 2023-01-22 10:14:09.205964: step: 774/464, loss: 4.9337310791015625 2023-01-22 10:14:09.794127: step: 776/464, loss: 1.8172357082366943 2023-01-22 10:14:10.410472: step: 778/464, loss: 0.5766383409500122 2023-01-22 10:14:11.255284: step: 780/464, loss: 0.9690201878547668 2023-01-22 10:14:11.907478: step: 782/464, loss: 0.39389437437057495 2023-01-22 10:14:12.561873: step: 784/464, loss: 0.7511005401611328 2023-01-22 10:14:13.303592: step: 786/464, loss: 0.2664196491241455 2023-01-22 10:14:13.930634: step: 788/464, loss: 5.700353622436523 2023-01-22 10:14:14.576956: step: 790/464, loss: 0.8287971615791321 2023-01-22 10:14:15.226997: step: 792/464, loss: 2.5748291015625 2023-01-22 10:14:15.840485: step: 794/464, loss: 0.9550393223762512 2023-01-22 10:14:16.403648: step: 796/464, loss: 0.7970628142356873 2023-01-22 10:14:17.045058: step: 798/464, loss: 3.856137752532959 2023-01-22 10:14:17.684376: step: 800/464, loss: 0.6149031519889832 2023-01-22 10:14:18.282759: step: 802/464, loss: 0.25857508182525635 2023-01-22 10:14:18.920191: step: 804/464, loss: 0.5109097361564636 2023-01-22 10:14:19.564019: step: 806/464, loss: 2.601020336151123 2023-01-22 10:14:20.134710: step: 808/464, loss: 0.8694771528244019 2023-01-22 10:14:20.754522: step: 810/464, loss: 2.0033280849456787 2023-01-22 10:14:21.408275: step: 812/464, loss: 0.6385647058486938 2023-01-22 10:14:22.042361: step: 814/464, loss: 0.7749675512313843 2023-01-22 10:14:22.677542: step: 816/464, loss: 2.016286849975586 2023-01-22 10:14:23.358922: step: 818/464, loss: 0.6855290532112122 2023-01-22 10:14:23.995975: step: 820/464, loss: 0.5522828102111816 2023-01-22 10:14:24.616015: step: 822/464, loss: 0.7204458713531494 2023-01-22 10:14:25.212095: step: 824/464, loss: 9.690481185913086 2023-01-22 10:14:25.807304: step: 826/464, loss: 5.666155815124512 2023-01-22 10:14:26.420959: step: 828/464, loss: 0.40514591336250305 2023-01-22 10:14:27.124788: step: 830/464, loss: 1.7112529277801514 2023-01-22 10:14:27.733591: step: 832/464, loss: 0.20129477977752686 2023-01-22 10:14:28.363190: step: 834/464, loss: 0.801225483417511 2023-01-22 10:14:29.146091: step: 836/464, loss: 0.6476247310638428 2023-01-22 10:14:29.818647: step: 838/464, loss: 0.4347991347312927 2023-01-22 10:14:30.443662: step: 840/464, loss: 1.338456153869629 2023-01-22 10:14:31.065285: step: 842/464, loss: 1.0563338994979858 2023-01-22 10:14:31.692261: step: 844/464, loss: 0.5551201105117798 2023-01-22 10:14:32.357002: step: 846/464, loss: 0.4709798991680145 2023-01-22 10:14:32.983752: step: 848/464, loss: 0.2790720462799072 2023-01-22 10:14:33.623052: step: 850/464, loss: 0.9369035363197327 2023-01-22 10:14:34.291950: step: 852/464, loss: 0.46123066544532776 2023-01-22 10:14:34.907241: step: 854/464, loss: 1.2389100790023804 2023-01-22 10:14:35.582745: step: 856/464, loss: 1.399316430091858 2023-01-22 10:14:36.223241: step: 858/464, loss: 1.239919662475586 2023-01-22 10:14:36.819699: step: 860/464, loss: 4.345308303833008 2023-01-22 10:14:37.498050: step: 862/464, loss: 0.33345890045166016 2023-01-22 10:14:38.096504: step: 864/464, loss: 0.2196376919746399 2023-01-22 10:14:38.715097: step: 866/464, loss: 0.7692931890487671 2023-01-22 10:14:39.375150: step: 868/464, loss: 1.874446988105774 2023-01-22 10:14:40.009030: step: 870/464, loss: 1.640389084815979 2023-01-22 10:14:40.559831: step: 872/464, loss: 0.6178362965583801 2023-01-22 10:14:41.150986: step: 874/464, loss: 1.367974042892456 2023-01-22 10:14:41.823886: step: 876/464, loss: 0.2461293339729309 2023-01-22 10:14:42.509313: step: 878/464, loss: 0.5997183918952942 2023-01-22 10:14:43.081297: step: 880/464, loss: 0.7863847017288208 2023-01-22 10:14:43.723053: step: 882/464, loss: 0.7832212448120117 2023-01-22 10:14:44.324450: step: 884/464, loss: 0.4306342899799347 2023-01-22 10:14:44.961344: step: 886/464, loss: 0.5308839678764343 2023-01-22 10:14:45.574517: step: 888/464, loss: 0.610247790813446 2023-01-22 10:14:46.183643: step: 890/464, loss: 0.8606748580932617 2023-01-22 10:14:46.849954: step: 892/464, loss: 1.7569546699523926 2023-01-22 10:14:47.456950: step: 894/464, loss: 0.2615882158279419 2023-01-22 10:14:48.118255: step: 896/464, loss: 0.26798972487449646 2023-01-22 10:14:48.725344: step: 898/464, loss: 1.191440224647522 2023-01-22 10:14:49.345410: step: 900/464, loss: 0.333391398191452 2023-01-22 10:14:49.939557: step: 902/464, loss: 0.4805077612400055 2023-01-22 10:14:50.524055: step: 904/464, loss: 1.0383741855621338 2023-01-22 10:14:51.233843: step: 906/464, loss: 1.8628451824188232 2023-01-22 10:14:51.852598: step: 908/464, loss: 1.6816130876541138 2023-01-22 10:14:52.498422: step: 910/464, loss: 0.8756980895996094 2023-01-22 10:14:53.224163: step: 912/464, loss: 0.6809302568435669 2023-01-22 10:14:53.842739: step: 914/464, loss: 0.9276522994041443 2023-01-22 10:14:54.491809: step: 916/464, loss: 4.850551128387451 2023-01-22 10:14:55.050106: step: 918/464, loss: 0.8563334941864014 2023-01-22 10:14:55.643105: step: 920/464, loss: 0.20570218563079834 2023-01-22 10:14:56.231895: step: 922/464, loss: 4.263764381408691 2023-01-22 10:14:56.967760: step: 924/464, loss: 2.06984281539917 2023-01-22 10:14:57.595678: step: 926/464, loss: 0.24161413311958313 2023-01-22 10:14:58.217289: step: 928/464, loss: 0.3930075764656067 2023-01-22 10:14:58.789284: step: 930/464, loss: 0.3355711102485657 ================================================== Loss: 1.110 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30009433962264154, 'r': 0.3018026565464896, 'f1': 0.3009460737937559}, 'combined': 0.22174973858487274, 'epoch': 4} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2955621429043047, 'r': 0.27902150921883445, 'f1': 0.28705374709629977}, 'combined': 0.1874029644255636, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2922339997070884, 'r': 0.315523995888678, 'f1': 0.3034327478710462}, 'combined': 0.22358202474708666, 'epoch': 4} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3099448839945791, 'r': 0.28150957353636086, 'f1': 0.2950436876486859}, 'combined': 0.19261919504525607, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2967885147601476, 'r': 0.3052360056925996, 'f1': 0.30095299345182414}, 'combined': 0.22175483728029147, 'epoch': 4} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3028972827204988, 'r': 0.27591745129360407, 'f1': 0.2887785722813406}, 'combined': 0.18852901610077155, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25166666666666665, 'r': 0.35952380952380947, 'f1': 0.296078431372549}, 'combined': 0.19738562091503267, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3382352941176471, 'r': 0.5, 'f1': 0.4035087719298246}, 'combined': 0.2017543859649123, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45588235294117646, 'r': 0.2672413793103448, 'f1': 0.33695652173913043}, 'combined': 0.2246376811594203, 'epoch': 4} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30009433962264154, 'r': 0.3018026565464896, 'f1': 0.3009460737937559}, 'combined': 0.22174973858487274, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2955621429043047, 'r': 0.27902150921883445, 'f1': 0.28705374709629977}, 'combined': 0.1874029644255636, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25166666666666665, 'r': 0.35952380952380947, 'f1': 0.296078431372549}, 'combined': 0.19738562091503267, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2922339997070884, 'r': 0.315523995888678, 'f1': 0.3034327478710462}, 'combined': 0.22358202474708666, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3099448839945791, 'r': 0.28150957353636086, 'f1': 0.2950436876486859}, 'combined': 0.19261919504525607, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3382352941176471, 'r': 0.5, 'f1': 0.4035087719298246}, 'combined': 0.2017543859649123, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2967885147601476, 'r': 0.3052360056925996, 'f1': 0.30095299345182414}, 'combined': 0.22175483728029147, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3028972827204988, 'r': 0.27591745129360407, 'f1': 0.2887785722813406}, 'combined': 0.18852901610077155, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45588235294117646, 'r': 0.2672413793103448, 'f1': 0.33695652173913043}, 'combined': 0.2246376811594203, 'epoch': 4} ****************************** Epoch: 5 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:18:09.034843: step: 2/464, loss: 0.3140789270401001 2023-01-22 10:18:09.774961: step: 4/464, loss: 0.5168225169181824 2023-01-22 10:18:10.389697: step: 6/464, loss: 0.41354134678840637 2023-01-22 10:18:10.970295: step: 8/464, loss: 0.481793075799942 2023-01-22 10:18:11.545857: step: 10/464, loss: 0.40146562457084656 2023-01-22 10:18:12.293824: step: 12/464, loss: 0.2971474528312683 2023-01-22 10:18:12.925785: step: 14/464, loss: 0.2346367985010147 2023-01-22 10:18:13.549907: step: 16/464, loss: 1.0591741800308228 2023-01-22 10:18:14.140522: step: 18/464, loss: 0.5679463744163513 2023-01-22 10:18:14.709453: step: 20/464, loss: 0.1803908348083496 2023-01-22 10:18:15.323932: step: 22/464, loss: 0.9101299047470093 2023-01-22 10:18:15.994478: step: 24/464, loss: 0.6895893812179565 2023-01-22 10:18:16.597989: step: 26/464, loss: 0.9380605220794678 2023-01-22 10:18:17.182320: step: 28/464, loss: 1.3286418914794922 2023-01-22 10:18:17.830038: step: 30/464, loss: 0.17582368850708008 2023-01-22 10:18:18.481296: step: 32/464, loss: 0.3735285699367523 2023-01-22 10:18:19.113117: step: 34/464, loss: 1.2204426527023315 2023-01-22 10:18:19.882457: step: 36/464, loss: 1.5764824151992798 2023-01-22 10:18:20.557664: step: 38/464, loss: 0.26649218797683716 2023-01-22 10:18:21.194616: step: 40/464, loss: 0.17335833609104156 2023-01-22 10:18:21.783032: step: 42/464, loss: 0.6150107979774475 2023-01-22 10:18:22.469487: step: 44/464, loss: 1.0510187149047852 2023-01-22 10:18:23.082745: step: 46/464, loss: 0.301383912563324 2023-01-22 10:18:23.763692: step: 48/464, loss: 0.20942439138889313 2023-01-22 10:18:24.328748: step: 50/464, loss: 1.81709623336792 2023-01-22 10:18:24.969388: step: 52/464, loss: 0.25536850094795227 2023-01-22 10:18:25.574980: step: 54/464, loss: 0.4041978716850281 2023-01-22 10:18:26.172218: step: 56/464, loss: 0.3635302186012268 2023-01-22 10:18:26.843517: step: 58/464, loss: 0.16067996621131897 2023-01-22 10:18:27.429856: step: 60/464, loss: 0.16115136444568634 2023-01-22 10:18:28.030344: step: 62/464, loss: 0.35865315794944763 2023-01-22 10:18:28.594108: step: 64/464, loss: 1.2715744972229004 2023-01-22 10:18:29.185137: step: 66/464, loss: 1.9927908182144165 2023-01-22 10:18:29.862915: step: 68/464, loss: 0.31118354201316833 2023-01-22 10:18:30.503110: step: 70/464, loss: 0.34601879119873047 2023-01-22 10:18:31.171784: step: 72/464, loss: 1.2204174995422363 2023-01-22 10:18:31.700090: step: 74/464, loss: 0.5587912201881409 2023-01-22 10:18:32.305268: step: 76/464, loss: 0.6392359137535095 2023-01-22 10:18:33.003599: step: 78/464, loss: 0.5543754696846008 2023-01-22 10:18:33.601343: step: 80/464, loss: 1.1867753267288208 2023-01-22 10:18:34.247243: step: 82/464, loss: 0.7671768665313721 2023-01-22 10:18:34.889428: step: 84/464, loss: 0.17810527980327606 2023-01-22 10:18:35.528844: step: 86/464, loss: 0.910143256187439 2023-01-22 10:18:36.161761: step: 88/464, loss: 0.7807407975196838 2023-01-22 10:18:36.774469: step: 90/464, loss: 1.1668219566345215 2023-01-22 10:18:37.346204: step: 92/464, loss: 0.11305569857358932 2023-01-22 10:18:38.035574: step: 94/464, loss: 0.4058632254600525 2023-01-22 10:18:38.718391: step: 96/464, loss: 0.36806875467300415 2023-01-22 10:18:39.376709: step: 98/464, loss: 0.4692281186580658 2023-01-22 10:18:39.938580: step: 100/464, loss: 0.42481958866119385 2023-01-22 10:18:40.537833: step: 102/464, loss: 0.2551076412200928 2023-01-22 10:18:41.127179: step: 104/464, loss: 0.24660855531692505 2023-01-22 10:18:41.817273: step: 106/464, loss: 0.47354790568351746 2023-01-22 10:18:42.476290: step: 108/464, loss: 0.47715210914611816 2023-01-22 10:18:43.184760: step: 110/464, loss: 0.22332307696342468 2023-01-22 10:18:43.805385: step: 112/464, loss: 0.1295851618051529 2023-01-22 10:18:44.479436: step: 114/464, loss: 0.808097779750824 2023-01-22 10:18:45.074431: step: 116/464, loss: 2.0270867347717285 2023-01-22 10:18:45.744551: step: 118/464, loss: 1.2771189212799072 2023-01-22 10:18:46.487066: step: 120/464, loss: 0.3275717496871948 2023-01-22 10:18:47.091673: step: 122/464, loss: 0.4346131682395935 2023-01-22 10:18:47.710753: step: 124/464, loss: 1.078847050666809 2023-01-22 10:18:48.331247: step: 126/464, loss: 1.6871562004089355 2023-01-22 10:18:48.950311: step: 128/464, loss: 0.5023834109306335 2023-01-22 10:18:49.598439: step: 130/464, loss: 0.4305103123188019 2023-01-22 10:18:50.258339: step: 132/464, loss: 0.19542160630226135 2023-01-22 10:18:50.883098: step: 134/464, loss: 0.7721800804138184 2023-01-22 10:18:51.484095: step: 136/464, loss: 1.2299044132232666 2023-01-22 10:18:52.005134: step: 138/464, loss: 0.883644163608551 2023-01-22 10:18:52.632409: step: 140/464, loss: 0.7875983715057373 2023-01-22 10:18:53.190135: step: 142/464, loss: 0.19403350353240967 2023-01-22 10:18:53.819574: step: 144/464, loss: 0.787096381187439 2023-01-22 10:18:54.510595: step: 146/464, loss: 0.6089295148849487 2023-01-22 10:18:55.131473: step: 148/464, loss: 0.6090214252471924 2023-01-22 10:18:55.693022: step: 150/464, loss: 1.256813645362854 2023-01-22 10:18:56.292662: step: 152/464, loss: 0.6920052170753479 2023-01-22 10:18:56.880410: step: 154/464, loss: 1.9848008155822754 2023-01-22 10:18:57.493599: step: 156/464, loss: 0.5117339491844177 2023-01-22 10:18:58.191291: step: 158/464, loss: 0.5870523452758789 2023-01-22 10:18:58.845491: step: 160/464, loss: 1.0715384483337402 2023-01-22 10:18:59.481338: step: 162/464, loss: 0.8279769420623779 2023-01-22 10:19:00.140969: step: 164/464, loss: 1.3789533376693726 2023-01-22 10:19:00.755102: step: 166/464, loss: 1.1194424629211426 2023-01-22 10:19:01.310436: step: 168/464, loss: 1.0870202779769897 2023-01-22 10:19:01.960743: step: 170/464, loss: 0.4928306043148041 2023-01-22 10:19:02.527406: step: 172/464, loss: 0.7663793563842773 2023-01-22 10:19:03.196277: step: 174/464, loss: 0.753650963306427 2023-01-22 10:19:03.806402: step: 176/464, loss: 0.362321674823761 2023-01-22 10:19:04.396811: step: 178/464, loss: 1.0201119184494019 2023-01-22 10:19:05.034137: step: 180/464, loss: 2.436824083328247 2023-01-22 10:19:05.679510: step: 182/464, loss: 0.6812862157821655 2023-01-22 10:19:06.328678: step: 184/464, loss: 0.7665892243385315 2023-01-22 10:19:06.955805: step: 186/464, loss: 3.6190996170043945 2023-01-22 10:19:07.557427: step: 188/464, loss: 1.214016318321228 2023-01-22 10:19:08.198716: step: 190/464, loss: 1.0860960483551025 2023-01-22 10:19:08.845538: step: 192/464, loss: 0.4875420928001404 2023-01-22 10:19:09.447128: step: 194/464, loss: 0.17811265587806702 2023-01-22 10:19:10.155918: step: 196/464, loss: 0.43976664543151855 2023-01-22 10:19:10.886279: step: 198/464, loss: 0.7189344763755798 2023-01-22 10:19:11.515739: step: 200/464, loss: 0.6835828423500061 2023-01-22 10:19:12.166689: step: 202/464, loss: 0.5588489174842834 2023-01-22 10:19:12.814561: step: 204/464, loss: 0.6121894717216492 2023-01-22 10:19:13.453721: step: 206/464, loss: 1.1947513818740845 2023-01-22 10:19:14.064159: step: 208/464, loss: 0.6017618775367737 2023-01-22 10:19:14.666402: step: 210/464, loss: 0.4701478183269501 2023-01-22 10:19:15.315261: step: 212/464, loss: 0.9148519039154053 2023-01-22 10:19:15.980852: step: 214/464, loss: 2.0866644382476807 2023-01-22 10:19:16.514513: step: 216/464, loss: 0.6283119320869446 2023-01-22 10:19:17.188481: step: 218/464, loss: 0.5661494731903076 2023-01-22 10:19:17.788593: step: 220/464, loss: 0.5721613764762878 2023-01-22 10:19:18.400788: step: 222/464, loss: 1.6528464555740356 2023-01-22 10:19:19.038818: step: 224/464, loss: 1.1147488355636597 2023-01-22 10:19:19.671414: step: 226/464, loss: 0.4663833677768707 2023-01-22 10:19:20.289822: step: 228/464, loss: 0.6291152834892273 2023-01-22 10:19:20.936281: step: 230/464, loss: 0.9880251884460449 2023-01-22 10:19:21.585977: step: 232/464, loss: 0.8147752285003662 2023-01-22 10:19:22.197152: step: 234/464, loss: 6.869061470031738 2023-01-22 10:19:22.909797: step: 236/464, loss: 0.6083580255508423 2023-01-22 10:19:23.562402: step: 238/464, loss: 0.4468768537044525 2023-01-22 10:19:24.259378: step: 240/464, loss: 0.6752504706382751 2023-01-22 10:19:24.897075: step: 242/464, loss: 0.5524731278419495 2023-01-22 10:19:25.555612: step: 244/464, loss: 1.4635412693023682 2023-01-22 10:19:26.166824: step: 246/464, loss: 0.24824781715869904 2023-01-22 10:19:26.803771: step: 248/464, loss: 0.8254262208938599 2023-01-22 10:19:27.381260: step: 250/464, loss: 1.0828063488006592 2023-01-22 10:19:27.999361: step: 252/464, loss: 1.3140785694122314 2023-01-22 10:19:28.621079: step: 254/464, loss: 0.3395722806453705 2023-01-22 10:19:29.245530: step: 256/464, loss: 0.39691609144210815 2023-01-22 10:19:29.888380: step: 258/464, loss: 1.0010672807693481 2023-01-22 10:19:30.592710: step: 260/464, loss: 0.33052146434783936 2023-01-22 10:19:31.214945: step: 262/464, loss: 2.2361369132995605 2023-01-22 10:19:31.794863: step: 264/464, loss: 0.15512794256210327 2023-01-22 10:19:32.422373: step: 266/464, loss: 0.42286238074302673 2023-01-22 10:19:32.984539: step: 268/464, loss: 0.4806721806526184 2023-01-22 10:19:33.652920: step: 270/464, loss: 1.3433846235275269 2023-01-22 10:19:34.373887: step: 272/464, loss: 0.27988773584365845 2023-01-22 10:19:35.021836: step: 274/464, loss: 0.4252055287361145 2023-01-22 10:19:35.664303: step: 276/464, loss: 0.4951595366001129 2023-01-22 10:19:36.214364: step: 278/464, loss: 0.18977151811122894 2023-01-22 10:19:36.802192: step: 280/464, loss: 0.44850343465805054 2023-01-22 10:19:37.480991: step: 282/464, loss: 1.0554685592651367 2023-01-22 10:19:38.138374: step: 284/464, loss: 1.7502734661102295 2023-01-22 10:19:38.740519: step: 286/464, loss: 0.28199800848960876 2023-01-22 10:19:39.331722: step: 288/464, loss: 0.3071030080318451 2023-01-22 10:19:39.927466: step: 290/464, loss: 1.5086228847503662 2023-01-22 10:19:40.575148: step: 292/464, loss: 0.7221007347106934 2023-01-22 10:19:41.152511: step: 294/464, loss: 0.6204640865325928 2023-01-22 10:19:41.819390: step: 296/464, loss: 0.6025323271751404 2023-01-22 10:19:42.393702: step: 298/464, loss: 0.6046009063720703 2023-01-22 10:19:43.033483: step: 300/464, loss: 0.8346729278564453 2023-01-22 10:19:43.687963: step: 302/464, loss: 1.0257431268692017 2023-01-22 10:19:44.427228: step: 304/464, loss: 2.040140151977539 2023-01-22 10:19:45.140227: step: 306/464, loss: 1.0125569105148315 2023-01-22 10:19:45.741383: step: 308/464, loss: 0.6352773904800415 2023-01-22 10:19:46.355273: step: 310/464, loss: 0.3539637625217438 2023-01-22 10:19:46.988976: step: 312/464, loss: 0.8425735831260681 2023-01-22 10:19:47.630932: step: 314/464, loss: 0.39707064628601074 2023-01-22 10:19:48.331999: step: 316/464, loss: 1.3782291412353516 2023-01-22 10:19:48.974506: step: 318/464, loss: 1.362913727760315 2023-01-22 10:19:49.615487: step: 320/464, loss: 0.5295073390007019 2023-01-22 10:19:50.237270: step: 322/464, loss: 1.2126305103302002 2023-01-22 10:19:50.912634: step: 324/464, loss: 0.2630016505718231 2023-01-22 10:19:51.561409: step: 326/464, loss: 1.073133945465088 2023-01-22 10:19:52.173462: step: 328/464, loss: 0.4515211880207062 2023-01-22 10:19:52.824263: step: 330/464, loss: 1.6365458965301514 2023-01-22 10:19:53.494694: step: 332/464, loss: 0.25209635496139526 2023-01-22 10:19:54.202504: step: 334/464, loss: 0.41742879152297974 2023-01-22 10:19:54.837552: step: 336/464, loss: 1.067553162574768 2023-01-22 10:19:55.460935: step: 338/464, loss: 0.4192078113555908 2023-01-22 10:19:56.061408: step: 340/464, loss: 1.813993215560913 2023-01-22 10:19:56.603593: step: 342/464, loss: 1.2661583423614502 2023-01-22 10:19:57.224065: step: 344/464, loss: 1.9221551418304443 2023-01-22 10:19:57.879539: step: 346/464, loss: 1.029069185256958 2023-01-22 10:19:58.491396: step: 348/464, loss: 0.9227486848831177 2023-01-22 10:19:59.070700: step: 350/464, loss: 0.9554811120033264 2023-01-22 10:19:59.820089: step: 352/464, loss: 0.9297544956207275 2023-01-22 10:20:00.427243: step: 354/464, loss: 0.6628623008728027 2023-01-22 10:20:00.962250: step: 356/464, loss: 0.4751133322715759 2023-01-22 10:20:01.601248: step: 358/464, loss: 0.9389272928237915 2023-01-22 10:20:02.215474: step: 360/464, loss: 1.0912551879882812 2023-01-22 10:20:02.887724: step: 362/464, loss: 0.8685484528541565 2023-01-22 10:20:03.559561: step: 364/464, loss: 1.7897229194641113 2023-01-22 10:20:04.174684: step: 366/464, loss: 0.3098595142364502 2023-01-22 10:20:04.774172: step: 368/464, loss: 0.6312590837478638 2023-01-22 10:20:05.399285: step: 370/464, loss: 0.5151140689849854 2023-01-22 10:20:05.976323: step: 372/464, loss: 0.6841112375259399 2023-01-22 10:20:06.604281: step: 374/464, loss: 1.679078459739685 2023-01-22 10:20:07.224472: step: 376/464, loss: 0.6533408164978027 2023-01-22 10:20:07.854975: step: 378/464, loss: 1.0430176258087158 2023-01-22 10:20:08.485533: step: 380/464, loss: 0.39996644854545593 2023-01-22 10:20:09.077874: step: 382/464, loss: 0.7073900699615479 2023-01-22 10:20:09.745397: step: 384/464, loss: 4.618206977844238 2023-01-22 10:20:10.341220: step: 386/464, loss: 0.7551769614219666 2023-01-22 10:20:10.920069: step: 388/464, loss: 0.7826072573661804 2023-01-22 10:20:11.596913: step: 390/464, loss: 0.7515690326690674 2023-01-22 10:20:12.210975: step: 392/464, loss: 0.999121367931366 2023-01-22 10:20:12.878808: step: 394/464, loss: 0.9337067604064941 2023-01-22 10:20:13.552257: step: 396/464, loss: 0.9681384563446045 2023-01-22 10:20:14.138831: step: 398/464, loss: 1.0782345533370972 2023-01-22 10:20:14.780746: step: 400/464, loss: 1.4214262962341309 2023-01-22 10:20:15.345725: step: 402/464, loss: 0.7020053863525391 2023-01-22 10:20:15.937473: step: 404/464, loss: 0.4647718071937561 2023-01-22 10:20:16.596648: step: 406/464, loss: 4.217225074768066 2023-01-22 10:20:17.174089: step: 408/464, loss: 1.2776316404342651 2023-01-22 10:20:17.804554: step: 410/464, loss: 0.26782044768333435 2023-01-22 10:20:18.363167: step: 412/464, loss: 0.6994425654411316 2023-01-22 10:20:19.006480: step: 414/464, loss: 0.5873758792877197 2023-01-22 10:20:19.566938: step: 416/464, loss: 0.48875847458839417 2023-01-22 10:20:20.173045: step: 418/464, loss: 0.8659118413925171 2023-01-22 10:20:20.779843: step: 420/464, loss: 1.0143709182739258 2023-01-22 10:20:21.377889: step: 422/464, loss: 1.1839038133621216 2023-01-22 10:20:22.003308: step: 424/464, loss: 0.23417912423610687 2023-01-22 10:20:22.657110: step: 426/464, loss: 0.3920256495475769 2023-01-22 10:20:23.250596: step: 428/464, loss: 0.27268993854522705 2023-01-22 10:20:23.866286: step: 430/464, loss: 0.26965102553367615 2023-01-22 10:20:24.486592: step: 432/464, loss: 1.9450130462646484 2023-01-22 10:20:25.127224: step: 434/464, loss: 0.28021562099456787 2023-01-22 10:20:25.670499: step: 436/464, loss: 1.2575656175613403 2023-01-22 10:20:26.297237: step: 438/464, loss: 0.8403143286705017 2023-01-22 10:20:26.912781: step: 440/464, loss: 4.10808801651001 2023-01-22 10:20:27.551027: step: 442/464, loss: 0.5153523683547974 2023-01-22 10:20:28.169481: step: 444/464, loss: 0.261572927236557 2023-01-22 10:20:28.779103: step: 446/464, loss: 0.25158095359802246 2023-01-22 10:20:29.451668: step: 448/464, loss: 0.8015758395195007 2023-01-22 10:20:30.113362: step: 450/464, loss: 1.1201467514038086 2023-01-22 10:20:30.736540: step: 452/464, loss: 1.856890320777893 2023-01-22 10:20:31.359879: step: 454/464, loss: 1.2516084909439087 2023-01-22 10:20:31.982709: step: 456/464, loss: 0.6944349408149719 2023-01-22 10:20:32.595744: step: 458/464, loss: 3.0015387535095215 2023-01-22 10:20:33.190904: step: 460/464, loss: 0.2708805501461029 2023-01-22 10:20:33.872114: step: 462/464, loss: 0.2695115804672241 2023-01-22 10:20:34.462233: step: 464/464, loss: 0.16175496578216553 2023-01-22 10:20:35.092655: step: 466/464, loss: 0.9598912000656128 2023-01-22 10:20:35.652508: step: 468/464, loss: 0.7443088293075562 2023-01-22 10:20:36.280289: step: 470/464, loss: 0.40656954050064087 2023-01-22 10:20:36.953957: step: 472/464, loss: 1.2153903245925903 2023-01-22 10:20:37.574390: step: 474/464, loss: 0.4018632173538208 2023-01-22 10:20:38.130801: step: 476/464, loss: 1.0047473907470703 2023-01-22 10:20:38.754959: step: 478/464, loss: 0.20961833000183105 2023-01-22 10:20:39.378125: step: 480/464, loss: 0.3342295289039612 2023-01-22 10:20:39.969551: step: 482/464, loss: 0.5498560667037964 2023-01-22 10:20:40.651790: step: 484/464, loss: 0.690646767616272 2023-01-22 10:20:41.334733: step: 486/464, loss: 0.10926651954650879 2023-01-22 10:20:41.986794: step: 488/464, loss: 0.8070119619369507 2023-01-22 10:20:42.728209: step: 490/464, loss: 0.2085690051317215 2023-01-22 10:20:43.372300: step: 492/464, loss: 1.0676285028457642 2023-01-22 10:20:43.977379: step: 494/464, loss: 0.781734049320221 2023-01-22 10:20:44.586808: step: 496/464, loss: 0.7987548112869263 2023-01-22 10:20:45.175332: step: 498/464, loss: 0.43733590841293335 2023-01-22 10:20:45.798269: step: 500/464, loss: 0.9267998933792114 2023-01-22 10:20:46.448628: step: 502/464, loss: 0.6473803520202637 2023-01-22 10:20:47.078893: step: 504/464, loss: 0.3214409053325653 2023-01-22 10:20:47.751215: step: 506/464, loss: 0.14971300959587097 2023-01-22 10:20:48.396195: step: 508/464, loss: 0.7291408777236938 2023-01-22 10:20:48.997621: step: 510/464, loss: 1.3302973508834839 2023-01-22 10:20:49.607881: step: 512/464, loss: 0.25704845786094666 2023-01-22 10:20:50.224078: step: 514/464, loss: 0.8286025524139404 2023-01-22 10:20:50.836245: step: 516/464, loss: 0.8494953513145447 2023-01-22 10:20:51.490479: step: 518/464, loss: 0.7644921541213989 2023-01-22 10:20:52.165229: step: 520/464, loss: 1.463140845298767 2023-01-22 10:20:52.797399: step: 522/464, loss: 0.38025563955307007 2023-01-22 10:20:53.417941: step: 524/464, loss: 0.5238245129585266 2023-01-22 10:20:54.061481: step: 526/464, loss: 0.9393330216407776 2023-01-22 10:20:54.688240: step: 528/464, loss: 0.7470495700836182 2023-01-22 10:20:55.350431: step: 530/464, loss: 2.6326611042022705 2023-01-22 10:20:55.941432: step: 532/464, loss: 0.3551231920719147 2023-01-22 10:20:56.529879: step: 534/464, loss: 0.4791729748249054 2023-01-22 10:20:57.216879: step: 536/464, loss: 3.0031039714813232 2023-01-22 10:20:57.827929: step: 538/464, loss: 0.38098952174186707 2023-01-22 10:20:58.444059: step: 540/464, loss: 0.855867862701416 2023-01-22 10:20:59.076827: step: 542/464, loss: 1.410219669342041 2023-01-22 10:20:59.737909: step: 544/464, loss: 0.7034099102020264 2023-01-22 10:21:00.404097: step: 546/464, loss: 1.93264901638031 2023-01-22 10:21:01.037990: step: 548/464, loss: 1.4344180822372437 2023-01-22 10:21:01.718449: step: 550/464, loss: 0.9101685285568237 2023-01-22 10:21:02.360336: step: 552/464, loss: 0.23653540015220642 2023-01-22 10:21:02.948589: step: 554/464, loss: 0.6157763600349426 2023-01-22 10:21:03.593274: step: 556/464, loss: 0.8293452262878418 2023-01-22 10:21:04.238773: step: 558/464, loss: 0.8490480780601501 2023-01-22 10:21:04.875486: step: 560/464, loss: 0.24026226997375488 2023-01-22 10:21:05.509752: step: 562/464, loss: 0.83760666847229 2023-01-22 10:21:06.148782: step: 564/464, loss: 0.6933444142341614 2023-01-22 10:21:06.701039: step: 566/464, loss: 0.6754300594329834 2023-01-22 10:21:07.360619: step: 568/464, loss: 1.0457748174667358 2023-01-22 10:21:08.042466: step: 570/464, loss: 2.0500330924987793 2023-01-22 10:21:08.730383: step: 572/464, loss: 0.4848044514656067 2023-01-22 10:21:09.375244: step: 574/464, loss: 0.38042065501213074 2023-01-22 10:21:10.054834: step: 576/464, loss: 0.8919004797935486 2023-01-22 10:21:10.705593: step: 578/464, loss: 0.5954698920249939 2023-01-22 10:21:11.334364: step: 580/464, loss: 0.5718483328819275 2023-01-22 10:21:11.927167: step: 582/464, loss: 0.4611566662788391 2023-01-22 10:21:12.564292: step: 584/464, loss: 0.8591204881668091 2023-01-22 10:21:13.138761: step: 586/464, loss: 0.7177731990814209 2023-01-22 10:21:13.738472: step: 588/464, loss: 2.1465821266174316 2023-01-22 10:21:14.398180: step: 590/464, loss: 3.7014565467834473 2023-01-22 10:21:15.063208: step: 592/464, loss: 1.2863422632217407 2023-01-22 10:21:15.685576: step: 594/464, loss: 1.9341545104980469 2023-01-22 10:21:16.304119: step: 596/464, loss: 0.5215336084365845 2023-01-22 10:21:16.926421: step: 598/464, loss: 0.18646299839019775 2023-01-22 10:21:17.668041: step: 600/464, loss: 2.8123488426208496 2023-01-22 10:21:18.302720: step: 602/464, loss: 0.28051960468292236 2023-01-22 10:21:18.974280: step: 604/464, loss: 1.1275231838226318 2023-01-22 10:21:19.622700: step: 606/464, loss: 0.9808369874954224 2023-01-22 10:21:20.298452: step: 608/464, loss: 0.6259108781814575 2023-01-22 10:21:20.893801: step: 610/464, loss: 0.3483581244945526 2023-01-22 10:21:21.517400: step: 612/464, loss: 0.32177531719207764 2023-01-22 10:21:22.181836: step: 614/464, loss: 0.5979961156845093 2023-01-22 10:21:22.756185: step: 616/464, loss: 0.26392918825149536 2023-01-22 10:21:23.358388: step: 618/464, loss: 0.785770058631897 2023-01-22 10:21:23.986474: step: 620/464, loss: 1.825857400894165 2023-01-22 10:21:24.647094: step: 622/464, loss: 0.4133148789405823 2023-01-22 10:21:25.276620: step: 624/464, loss: 0.19767507910728455 2023-01-22 10:21:25.963887: step: 626/464, loss: 0.2690700590610504 2023-01-22 10:21:26.605943: step: 628/464, loss: 0.8146762251853943 2023-01-22 10:21:27.229604: step: 630/464, loss: 1.2115219831466675 2023-01-22 10:21:27.944162: step: 632/464, loss: 1.1510913372039795 2023-01-22 10:21:28.580972: step: 634/464, loss: 0.7050117254257202 2023-01-22 10:21:29.164149: step: 636/464, loss: 0.6447600722312927 2023-01-22 10:21:29.801922: step: 638/464, loss: 0.751822292804718 2023-01-22 10:21:30.427198: step: 640/464, loss: 1.283268928527832 2023-01-22 10:21:31.062283: step: 642/464, loss: 2.2203149795532227 2023-01-22 10:21:31.730916: step: 644/464, loss: 0.4357527196407318 2023-01-22 10:21:32.366270: step: 646/464, loss: 0.6004408001899719 2023-01-22 10:21:32.954242: step: 648/464, loss: 0.4118346571922302 2023-01-22 10:21:33.566618: step: 650/464, loss: 1.280852198600769 2023-01-22 10:21:34.249764: step: 652/464, loss: 0.7061904668807983 2023-01-22 10:21:34.821575: step: 654/464, loss: 0.7301682233810425 2023-01-22 10:21:35.367345: step: 656/464, loss: 0.27377596497535706 2023-01-22 10:21:36.015420: step: 658/464, loss: 6.4098052978515625 2023-01-22 10:21:36.717183: step: 660/464, loss: 0.6805958151817322 2023-01-22 10:21:37.379271: step: 662/464, loss: 1.777627944946289 2023-01-22 10:21:38.027873: step: 664/464, loss: 0.647487223148346 2023-01-22 10:21:38.689961: step: 666/464, loss: 1.2705881595611572 2023-01-22 10:21:39.294659: step: 668/464, loss: 0.36145371198654175 2023-01-22 10:21:39.898412: step: 670/464, loss: 1.693847894668579 2023-01-22 10:21:40.531245: step: 672/464, loss: 2.836374044418335 2023-01-22 10:21:41.137228: step: 674/464, loss: 0.3379068672657013 2023-01-22 10:21:41.701834: step: 676/464, loss: 0.2719695270061493 2023-01-22 10:21:42.364148: step: 678/464, loss: 2.592514991760254 2023-01-22 10:21:43.111294: step: 680/464, loss: 0.5013712048530579 2023-01-22 10:21:43.745614: step: 682/464, loss: 1.502289056777954 2023-01-22 10:21:44.372642: step: 684/464, loss: 0.6350601315498352 2023-01-22 10:21:45.011860: step: 686/464, loss: 0.3374522924423218 2023-01-22 10:21:45.666353: step: 688/464, loss: 1.4543397426605225 2023-01-22 10:21:46.301127: step: 690/464, loss: 1.5196514129638672 2023-01-22 10:21:46.931842: step: 692/464, loss: 0.31878867745399475 2023-01-22 10:21:47.567089: step: 694/464, loss: 0.4354569911956787 2023-01-22 10:21:48.269300: step: 696/464, loss: 0.707033097743988 2023-01-22 10:21:48.913846: step: 698/464, loss: 0.7893030643463135 2023-01-22 10:21:49.525667: step: 700/464, loss: 1.016629695892334 2023-01-22 10:21:50.184636: step: 702/464, loss: 0.307487428188324 2023-01-22 10:21:50.815478: step: 704/464, loss: 1.902664303779602 2023-01-22 10:21:51.467682: step: 706/464, loss: 0.557648241519928 2023-01-22 10:21:52.038415: step: 708/464, loss: 0.23311075568199158 2023-01-22 10:21:52.632279: step: 710/464, loss: 0.310183584690094 2023-01-22 10:21:53.268945: step: 712/464, loss: 0.36607521772384644 2023-01-22 10:21:53.873723: step: 714/464, loss: 0.2555963397026062 2023-01-22 10:21:54.499579: step: 716/464, loss: 0.9427339434623718 2023-01-22 10:21:55.146793: step: 718/464, loss: 1.401888132095337 2023-01-22 10:21:55.785442: step: 720/464, loss: 0.6344363689422607 2023-01-22 10:21:56.458380: step: 722/464, loss: 1.5003769397735596 2023-01-22 10:21:57.078486: step: 724/464, loss: 1.5138165950775146 2023-01-22 10:21:57.667198: step: 726/464, loss: 0.7028983235359192 2023-01-22 10:21:58.332996: step: 728/464, loss: 0.4118358790874481 2023-01-22 10:21:58.936813: step: 730/464, loss: 0.7308695316314697 2023-01-22 10:21:59.504831: step: 732/464, loss: 0.8298178911209106 2023-01-22 10:22:00.100110: step: 734/464, loss: 0.1830146610736847 2023-01-22 10:22:00.769217: step: 736/464, loss: 0.5564534068107605 2023-01-22 10:22:01.371849: step: 738/464, loss: 0.6699367165565491 2023-01-22 10:22:01.993193: step: 740/464, loss: 0.6066685318946838 2023-01-22 10:22:02.651597: step: 742/464, loss: 0.4273430407047272 2023-01-22 10:22:03.304700: step: 744/464, loss: 0.9662275910377502 2023-01-22 10:22:03.847715: step: 746/464, loss: 0.7614527940750122 2023-01-22 10:22:04.488059: step: 748/464, loss: 0.4929681718349457 2023-01-22 10:22:05.104258: step: 750/464, loss: 0.6777883768081665 2023-01-22 10:22:05.739272: step: 752/464, loss: 0.7379733920097351 2023-01-22 10:22:06.380567: step: 754/464, loss: 0.9559131264686584 2023-01-22 10:22:06.986846: step: 756/464, loss: 1.5169222354888916 2023-01-22 10:22:07.626343: step: 758/464, loss: 0.5358415246009827 2023-01-22 10:22:08.290765: step: 760/464, loss: 0.8351231813430786 2023-01-22 10:22:08.911352: step: 762/464, loss: 0.649901807308197 2023-01-22 10:22:09.509374: step: 764/464, loss: 0.19484145939350128 2023-01-22 10:22:10.194656: step: 766/464, loss: 0.13287372887134552 2023-01-22 10:22:10.816502: step: 768/464, loss: 0.17175209522247314 2023-01-22 10:22:11.426782: step: 770/464, loss: 1.7275898456573486 2023-01-22 10:22:12.134990: step: 772/464, loss: 0.49211055040359497 2023-01-22 10:22:12.742559: step: 774/464, loss: 0.42846372723579407 2023-01-22 10:22:13.416687: step: 776/464, loss: 0.6717638969421387 2023-01-22 10:22:14.035613: step: 778/464, loss: 0.29499301314353943 2023-01-22 10:22:14.663621: step: 780/464, loss: 0.5855064988136292 2023-01-22 10:22:15.279819: step: 782/464, loss: 0.24147354066371918 2023-01-22 10:22:15.844187: step: 784/464, loss: 0.8271307945251465 2023-01-22 10:22:16.478067: step: 786/464, loss: 0.5050127506256104 2023-01-22 10:22:17.107112: step: 788/464, loss: 0.4337732195854187 2023-01-22 10:22:17.714924: step: 790/464, loss: 0.6392048001289368 2023-01-22 10:22:18.366701: step: 792/464, loss: 2.8629088401794434 2023-01-22 10:22:18.957248: step: 794/464, loss: 0.2869815528392792 2023-01-22 10:22:19.609384: step: 796/464, loss: 1.8846849203109741 2023-01-22 10:22:20.202008: step: 798/464, loss: 2.81234073638916 2023-01-22 10:22:20.752630: step: 800/464, loss: 0.3832673132419586 2023-01-22 10:22:21.382088: step: 802/464, loss: 0.5510661005973816 2023-01-22 10:22:22.013121: step: 804/464, loss: 0.5066255331039429 2023-01-22 10:22:22.717665: step: 806/464, loss: 0.6949787139892578 2023-01-22 10:22:23.338724: step: 808/464, loss: 0.3027384281158447 2023-01-22 10:22:23.957177: step: 810/464, loss: 0.33572134375572205 2023-01-22 10:22:24.559837: step: 812/464, loss: 0.22994890809059143 2023-01-22 10:22:25.214986: step: 814/464, loss: 0.2857647240161896 2023-01-22 10:22:25.881615: step: 816/464, loss: 1.0291653871536255 2023-01-22 10:22:26.514951: step: 818/464, loss: 0.9672958850860596 2023-01-22 10:22:27.134540: step: 820/464, loss: 0.20487524569034576 2023-01-22 10:22:27.759537: step: 822/464, loss: 0.283672958612442 2023-01-22 10:22:28.354634: step: 824/464, loss: 0.361631840467453 2023-01-22 10:22:29.017090: step: 826/464, loss: 1.1465498208999634 2023-01-22 10:22:29.676241: step: 828/464, loss: 0.4269474446773529 2023-01-22 10:22:30.265730: step: 830/464, loss: 0.15169720351696014 2023-01-22 10:22:30.953767: step: 832/464, loss: 0.7894524335861206 2023-01-22 10:22:31.585552: step: 834/464, loss: 0.31384778022766113 2023-01-22 10:22:32.221718: step: 836/464, loss: 0.25070080161094666 2023-01-22 10:22:32.827285: step: 838/464, loss: 1.0677363872528076 2023-01-22 10:22:33.475386: step: 840/464, loss: 0.20605960488319397 2023-01-22 10:22:34.144846: step: 842/464, loss: 0.5931063294410706 2023-01-22 10:22:34.788567: step: 844/464, loss: 0.7979549169540405 2023-01-22 10:22:35.430928: step: 846/464, loss: 0.42603933811187744 2023-01-22 10:22:36.084158: step: 848/464, loss: 0.4495619535446167 2023-01-22 10:22:36.680077: step: 850/464, loss: 0.15042515099048615 2023-01-22 10:22:37.350987: step: 852/464, loss: 0.997209906578064 2023-01-22 10:22:38.020483: step: 854/464, loss: 0.8001546859741211 2023-01-22 10:22:38.627781: step: 856/464, loss: 0.37791669368743896 2023-01-22 10:22:39.252024: step: 858/464, loss: 0.3678218722343445 2023-01-22 10:22:39.876753: step: 860/464, loss: 0.22284124791622162 2023-01-22 10:22:40.559423: step: 862/464, loss: 1.3511481285095215 2023-01-22 10:22:41.208406: step: 864/464, loss: 1.718711256980896 2023-01-22 10:22:41.883573: step: 866/464, loss: 0.7483352422714233 2023-01-22 10:22:42.606912: step: 868/464, loss: 0.3021269142627716 2023-01-22 10:22:43.222790: step: 870/464, loss: 0.4957874119281769 2023-01-22 10:22:43.772835: step: 872/464, loss: 0.546061635017395 2023-01-22 10:22:44.401320: step: 874/464, loss: 1.8420979976654053 2023-01-22 10:22:45.059860: step: 876/464, loss: 2.1327435970306396 2023-01-22 10:22:45.706846: step: 878/464, loss: 0.83139967918396 2023-01-22 10:22:46.325601: step: 880/464, loss: 0.2847437560558319 2023-01-22 10:22:46.990024: step: 882/464, loss: 1.017668604850769 2023-01-22 10:22:47.608691: step: 884/464, loss: 0.5310131311416626 2023-01-22 10:22:48.316624: step: 886/464, loss: 0.13240331411361694 2023-01-22 10:22:49.009439: step: 888/464, loss: 1.3001729249954224 2023-01-22 10:22:49.617565: step: 890/464, loss: 0.9886908531188965 2023-01-22 10:22:50.268872: step: 892/464, loss: 1.2750895023345947 2023-01-22 10:22:50.898468: step: 894/464, loss: 0.9979209303855896 2023-01-22 10:22:51.543722: step: 896/464, loss: 0.6268845796585083 2023-01-22 10:22:52.200778: step: 898/464, loss: 0.674816906452179 2023-01-22 10:22:52.833993: step: 900/464, loss: 0.2394643872976303 2023-01-22 10:22:53.450747: step: 902/464, loss: 1.133461356163025 2023-01-22 10:22:54.099942: step: 904/464, loss: 0.8986351490020752 2023-01-22 10:22:54.711954: step: 906/464, loss: 1.2165088653564453 2023-01-22 10:22:55.306451: step: 908/464, loss: 0.5075541734695435 2023-01-22 10:22:55.977028: step: 910/464, loss: 0.42147156596183777 2023-01-22 10:22:56.580871: step: 912/464, loss: 1.6376748085021973 2023-01-22 10:22:57.211863: step: 914/464, loss: 0.8599750995635986 2023-01-22 10:22:57.797938: step: 916/464, loss: 1.6517157554626465 2023-01-22 10:22:58.495413: step: 918/464, loss: 0.7676430344581604 2023-01-22 10:22:59.049709: step: 920/464, loss: 0.538670539855957 2023-01-22 10:22:59.660519: step: 922/464, loss: 1.3265511989593506 2023-01-22 10:23:00.320157: step: 924/464, loss: 0.19481389224529266 2023-01-22 10:23:00.937151: step: 926/464, loss: 0.24411243200302124 2023-01-22 10:23:01.533215: step: 928/464, loss: 0.18539859354496002 2023-01-22 10:23:02.025558: step: 930/464, loss: 0.4154171943664551 ================================================== Loss: 0.850 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26830577783007764, 'r': 0.3084500135281385, 'f1': 0.2869808055380742}, 'combined': 0.2114595409227915, 'epoch': 5} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2678291574334983, 'r': 0.280360613423506, 'f1': 0.27395165274013583}, 'combined': 0.17884926551946692, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2652954799107143, 'r': 0.3215702786796537, 'f1': 0.2907347725048923}, 'combined': 0.21422562184571012, 'epoch': 5} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2838815113234134, 'r': 0.2854441618444597, 'f1': 0.28466069204982714}, 'combined': 0.18584065905843636, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2673109415054766, 'r': 0.3103439529220779, 'f1': 0.28722455239764616}, 'combined': 0.2116391438719498, 'epoch': 5} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.27701988258084803, 'r': 0.27041206886791036, 'f1': 0.27367609569732804}, 'combined': 0.17866936817545767, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22733918128654967, 'r': 0.3702380952380952, 'f1': 0.2817028985507246}, 'combined': 0.18780193236714973, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.22727272727272727, 'r': 0.32608695652173914, 'f1': 0.26785714285714285}, 'combined': 0.13392857142857142, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40476190476190477, 'r': 0.29310344827586204, 'f1': 0.34}, 'combined': 0.22666666666666668, 'epoch': 5} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30009433962264154, 'r': 0.3018026565464896, 'f1': 0.3009460737937559}, 'combined': 0.22174973858487274, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2955621429043047, 'r': 0.27902150921883445, 'f1': 0.28705374709629977}, 'combined': 0.1874029644255636, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25166666666666665, 'r': 0.35952380952380947, 'f1': 0.296078431372549}, 'combined': 0.19738562091503267, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2922339997070884, 'r': 0.315523995888678, 'f1': 0.3034327478710462}, 'combined': 0.22358202474708666, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3099448839945791, 'r': 0.28150957353636086, 'f1': 0.2950436876486859}, 'combined': 0.19261919504525607, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3382352941176471, 'r': 0.5, 'f1': 0.4035087719298246}, 'combined': 0.2017543859649123, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2967885147601476, 'r': 0.3052360056925996, 'f1': 0.30095299345182414}, 'combined': 0.22175483728029147, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3028972827204988, 'r': 0.27591745129360407, 'f1': 0.2887785722813406}, 'combined': 0.18852901610077155, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45588235294117646, 'r': 0.2672413793103448, 'f1': 0.33695652173913043}, 'combined': 0.2246376811594203, 'epoch': 4} ****************************** Epoch: 6 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:25:42.638332: step: 2/464, loss: 0.3751084506511688 2023-01-22 10:25:43.257903: step: 4/464, loss: 0.8991535305976868 2023-01-22 10:25:43.874827: step: 6/464, loss: 0.8244770169258118 2023-01-22 10:25:44.483426: step: 8/464, loss: 0.34533822536468506 2023-01-22 10:25:45.125677: step: 10/464, loss: 0.24936716258525848 2023-01-22 10:25:45.772987: step: 12/464, loss: 0.6579227447509766 2023-01-22 10:25:46.379061: step: 14/464, loss: 0.45752012729644775 2023-01-22 10:25:46.985754: step: 16/464, loss: 0.6975422501564026 2023-01-22 10:25:47.529011: step: 18/464, loss: 0.2402292788028717 2023-01-22 10:25:48.132057: step: 20/464, loss: 0.5649269819259644 2023-01-22 10:25:48.741435: step: 22/464, loss: 1.5623259544372559 2023-01-22 10:25:49.408122: step: 24/464, loss: 0.45161202549934387 2023-01-22 10:25:50.098535: step: 26/464, loss: 0.4018263816833496 2023-01-22 10:25:50.669133: step: 28/464, loss: 0.5523669719696045 2023-01-22 10:25:51.307865: step: 30/464, loss: 3.6010148525238037 2023-01-22 10:25:51.919676: step: 32/464, loss: 0.7040104269981384 2023-01-22 10:25:52.562791: step: 34/464, loss: 1.5320963859558105 2023-01-22 10:25:53.169305: step: 36/464, loss: 0.5989647507667542 2023-01-22 10:25:53.873222: step: 38/464, loss: 0.3131082057952881 2023-01-22 10:25:54.499020: step: 40/464, loss: 2.0335254669189453 2023-01-22 10:25:55.100780: step: 42/464, loss: 1.0707502365112305 2023-01-22 10:25:55.808472: step: 44/464, loss: 0.6073818206787109 2023-01-22 10:25:56.467520: step: 46/464, loss: 0.2975615859031677 2023-01-22 10:25:57.101658: step: 48/464, loss: 0.6314176917076111 2023-01-22 10:25:57.711585: step: 50/464, loss: 0.39682334661483765 2023-01-22 10:25:58.327271: step: 52/464, loss: 1.36899733543396 2023-01-22 10:25:58.993367: step: 54/464, loss: 0.5601557493209839 2023-01-22 10:25:59.611138: step: 56/464, loss: 0.4270496368408203 2023-01-22 10:26:00.206771: step: 58/464, loss: 0.6895309090614319 2023-01-22 10:26:00.812806: step: 60/464, loss: 0.9848753809928894 2023-01-22 10:26:01.563374: step: 62/464, loss: 0.35673847794532776 2023-01-22 10:26:02.204512: step: 64/464, loss: 0.5678101778030396 2023-01-22 10:26:02.771997: step: 66/464, loss: 0.17788580060005188 2023-01-22 10:26:03.432439: step: 68/464, loss: 0.21318335831165314 2023-01-22 10:26:04.026403: step: 70/464, loss: 0.1260576993227005 2023-01-22 10:26:04.675245: step: 72/464, loss: 0.21788544952869415 2023-01-22 10:26:05.321622: step: 74/464, loss: 0.25507208704948425 2023-01-22 10:26:05.878130: step: 76/464, loss: 0.387909859418869 2023-01-22 10:26:06.441822: step: 78/464, loss: 6.739643096923828 2023-01-22 10:26:07.062371: step: 80/464, loss: 1.3248546123504639 2023-01-22 10:26:07.671677: step: 82/464, loss: 0.20794707536697388 2023-01-22 10:26:08.234668: step: 84/464, loss: 0.36006394028663635 2023-01-22 10:26:08.850044: step: 86/464, loss: 0.3022036552429199 2023-01-22 10:26:09.439925: step: 88/464, loss: 0.21739986538887024 2023-01-22 10:26:10.030567: step: 90/464, loss: 0.35528916120529175 2023-01-22 10:26:10.671337: step: 92/464, loss: 0.5038557052612305 2023-01-22 10:26:11.270345: step: 94/464, loss: 0.6198413372039795 2023-01-22 10:26:11.964674: step: 96/464, loss: 0.5114516615867615 2023-01-22 10:26:12.548123: step: 98/464, loss: 1.2284324169158936 2023-01-22 10:26:13.224792: step: 100/464, loss: 1.3396265506744385 2023-01-22 10:26:13.975187: step: 102/464, loss: 0.38964733481407166 2023-01-22 10:26:14.632764: step: 104/464, loss: 1.7171967029571533 2023-01-22 10:26:15.251100: step: 106/464, loss: 0.1736004650592804 2023-01-22 10:26:15.889638: step: 108/464, loss: 0.13742183148860931 2023-01-22 10:26:16.431871: step: 110/464, loss: 0.6270983815193176 2023-01-22 10:26:17.073179: step: 112/464, loss: 0.7468340396881104 2023-01-22 10:26:17.744860: step: 114/464, loss: 0.22080263495445251 2023-01-22 10:26:18.343566: step: 116/464, loss: 0.24238267540931702 2023-01-22 10:26:19.005731: step: 118/464, loss: 0.4134461283683777 2023-01-22 10:26:19.642363: step: 120/464, loss: 0.24008819460868835 2023-01-22 10:26:20.256178: step: 122/464, loss: 0.34048280119895935 2023-01-22 10:26:20.848130: step: 124/464, loss: 0.2101212739944458 2023-01-22 10:26:21.468051: step: 126/464, loss: 0.31376171112060547 2023-01-22 10:26:22.127229: step: 128/464, loss: 0.6731464862823486 2023-01-22 10:26:22.747211: step: 130/464, loss: 0.5460286140441895 2023-01-22 10:26:23.369610: step: 132/464, loss: 0.8231890797615051 2023-01-22 10:26:24.017838: step: 134/464, loss: 0.27242153882980347 2023-01-22 10:26:24.631911: step: 136/464, loss: 0.16866791248321533 2023-01-22 10:26:25.289106: step: 138/464, loss: 0.11519985646009445 2023-01-22 10:26:25.931383: step: 140/464, loss: 0.4271545112133026 2023-01-22 10:26:26.604425: step: 142/464, loss: 0.34215304255485535 2023-01-22 10:26:27.267427: step: 144/464, loss: 0.24908070266246796 2023-01-22 10:26:27.941243: step: 146/464, loss: 0.572262167930603 2023-01-22 10:26:28.527596: step: 148/464, loss: 0.8973062038421631 2023-01-22 10:26:29.122897: step: 150/464, loss: 0.3133440315723419 2023-01-22 10:26:29.895234: step: 152/464, loss: 0.24786102771759033 2023-01-22 10:26:30.539399: step: 154/464, loss: 0.298935204744339 2023-01-22 10:26:31.171709: step: 156/464, loss: 0.28106117248535156 2023-01-22 10:26:31.797866: step: 158/464, loss: 0.4187696874141693 2023-01-22 10:26:32.404313: step: 160/464, loss: 2.7460684776306152 2023-01-22 10:26:33.035200: step: 162/464, loss: 1.8586090803146362 2023-01-22 10:26:33.699341: step: 164/464, loss: 1.2428754568099976 2023-01-22 10:26:34.322958: step: 166/464, loss: 2.1811106204986572 2023-01-22 10:26:34.980062: step: 168/464, loss: 1.0358861684799194 2023-01-22 10:26:35.625764: step: 170/464, loss: 0.3583020865917206 2023-01-22 10:26:36.237590: step: 172/464, loss: 0.6705204248428345 2023-01-22 10:26:36.789272: step: 174/464, loss: 0.3304373621940613 2023-01-22 10:26:37.415462: step: 176/464, loss: 0.9637370705604553 2023-01-22 10:26:38.053026: step: 178/464, loss: 0.09790074080228806 2023-01-22 10:26:38.703297: step: 180/464, loss: 0.2238306850194931 2023-01-22 10:26:39.291264: step: 182/464, loss: 1.275614619255066 2023-01-22 10:26:39.888450: step: 184/464, loss: 0.7819212079048157 2023-01-22 10:26:40.486590: step: 186/464, loss: 0.9053548574447632 2023-01-22 10:26:41.104739: step: 188/464, loss: 0.3270997107028961 2023-01-22 10:26:41.770765: step: 190/464, loss: 0.5571175813674927 2023-01-22 10:26:42.369314: step: 192/464, loss: 1.6119208335876465 2023-01-22 10:26:42.945177: step: 194/464, loss: 0.13971984386444092 2023-01-22 10:26:43.549953: step: 196/464, loss: 0.4664154648780823 2023-01-22 10:26:44.172575: step: 198/464, loss: 0.6606048941612244 2023-01-22 10:26:44.704030: step: 200/464, loss: 0.46219271421432495 2023-01-22 10:26:45.281217: step: 202/464, loss: 0.8494325876235962 2023-01-22 10:26:45.932909: step: 204/464, loss: 1.4003181457519531 2023-01-22 10:26:46.559247: step: 206/464, loss: 0.7771140933036804 2023-01-22 10:26:47.227830: step: 208/464, loss: 0.4586014151573181 2023-01-22 10:26:47.792307: step: 210/464, loss: 0.303373247385025 2023-01-22 10:26:48.437154: step: 212/464, loss: 0.2954310178756714 2023-01-22 10:26:49.156855: step: 214/464, loss: 0.1799723505973816 2023-01-22 10:26:49.760857: step: 216/464, loss: 0.6660267114639282 2023-01-22 10:26:50.358430: step: 218/464, loss: 1.0787197351455688 2023-01-22 10:26:51.022066: step: 220/464, loss: 0.3768312633037567 2023-01-22 10:26:51.621492: step: 222/464, loss: 0.40435791015625 2023-01-22 10:26:52.198545: step: 224/464, loss: 3.5142626762390137 2023-01-22 10:26:52.812714: step: 226/464, loss: 0.37316417694091797 2023-01-22 10:26:53.442683: step: 228/464, loss: 0.5245544910430908 2023-01-22 10:26:54.109298: step: 230/464, loss: 0.6997588276863098 2023-01-22 10:26:54.744589: step: 232/464, loss: 2.2032132148742676 2023-01-22 10:26:55.526476: step: 234/464, loss: 0.3766220510005951 2023-01-22 10:26:56.116462: step: 236/464, loss: 0.5885345339775085 2023-01-22 10:26:56.757187: step: 238/464, loss: 0.5886207818984985 2023-01-22 10:26:57.392976: step: 240/464, loss: 1.0273098945617676 2023-01-22 10:26:58.016287: step: 242/464, loss: 1.1725513935089111 2023-01-22 10:26:58.660557: step: 244/464, loss: 1.0625218152999878 2023-01-22 10:26:59.218163: step: 246/464, loss: 0.33822906017303467 2023-01-22 10:26:59.871202: step: 248/464, loss: 0.5773215293884277 2023-01-22 10:27:00.464328: step: 250/464, loss: 0.3922623097896576 2023-01-22 10:27:01.084469: step: 252/464, loss: 3.0866641998291016 2023-01-22 10:27:01.735407: step: 254/464, loss: 0.3755728006362915 2023-01-22 10:27:02.352602: step: 256/464, loss: 0.7650343179702759 2023-01-22 10:27:02.952462: step: 258/464, loss: 0.58442223072052 2023-01-22 10:27:03.545524: step: 260/464, loss: 0.4950478672981262 2023-01-22 10:27:04.233717: step: 262/464, loss: 0.7851177453994751 2023-01-22 10:27:04.931246: step: 264/464, loss: 0.22636084258556366 2023-01-22 10:27:05.559189: step: 266/464, loss: 0.2058066874742508 2023-01-22 10:27:06.162621: step: 268/464, loss: 0.27423572540283203 2023-01-22 10:27:06.771731: step: 270/464, loss: 0.6314135789871216 2023-01-22 10:27:07.479033: step: 272/464, loss: 1.1377860307693481 2023-01-22 10:27:08.089750: step: 274/464, loss: 1.4917114973068237 2023-01-22 10:27:08.745002: step: 276/464, loss: 0.4098925292491913 2023-01-22 10:27:09.365921: step: 278/464, loss: 0.2656930685043335 2023-01-22 10:27:09.966517: step: 280/464, loss: 0.7664421796798706 2023-01-22 10:27:10.554048: step: 282/464, loss: 1.195511817932129 2023-01-22 10:27:11.239052: step: 284/464, loss: 0.8020053505897522 2023-01-22 10:27:11.831102: step: 286/464, loss: 0.9370518922805786 2023-01-22 10:27:12.505192: step: 288/464, loss: 0.5342237949371338 2023-01-22 10:27:13.144804: step: 290/464, loss: 0.8717308044433594 2023-01-22 10:27:13.750736: step: 292/464, loss: 0.27026355266571045 2023-01-22 10:27:14.472579: step: 294/464, loss: 7.652609825134277 2023-01-22 10:27:15.164858: step: 296/464, loss: 1.2849352359771729 2023-01-22 10:27:15.838852: step: 298/464, loss: 0.43978744745254517 2023-01-22 10:27:16.547362: step: 300/464, loss: 0.5479200482368469 2023-01-22 10:27:17.203173: step: 302/464, loss: 0.3849724233150482 2023-01-22 10:27:17.817639: step: 304/464, loss: 0.41675111651420593 2023-01-22 10:27:18.441181: step: 306/464, loss: 0.8829919099807739 2023-01-22 10:27:19.095794: step: 308/464, loss: 0.2806163728237152 2023-01-22 10:27:19.722675: step: 310/464, loss: 0.23910051584243774 2023-01-22 10:27:20.364911: step: 312/464, loss: 0.46648287773132324 2023-01-22 10:27:21.021063: step: 314/464, loss: 0.4761250615119934 2023-01-22 10:27:21.678384: step: 316/464, loss: 0.15457160770893097 2023-01-22 10:27:22.330997: step: 318/464, loss: 0.7718314528465271 2023-01-22 10:27:22.915086: step: 320/464, loss: 0.16734570264816284 2023-01-22 10:27:23.585070: step: 322/464, loss: 1.4428234100341797 2023-01-22 10:27:24.189377: step: 324/464, loss: 0.5153453946113586 2023-01-22 10:27:24.783871: step: 326/464, loss: 0.41749900579452515 2023-01-22 10:27:25.412545: step: 328/464, loss: 3.295747756958008 2023-01-22 10:27:26.029675: step: 330/464, loss: 0.4043913185596466 2023-01-22 10:27:26.629872: step: 332/464, loss: 4.637999534606934 2023-01-22 10:27:27.244032: step: 334/464, loss: 1.2028005123138428 2023-01-22 10:27:27.890395: step: 336/464, loss: 0.6247386932373047 2023-01-22 10:27:28.513380: step: 338/464, loss: 0.3917686343193054 2023-01-22 10:27:29.126331: step: 340/464, loss: 0.8415156602859497 2023-01-22 10:27:29.720495: step: 342/464, loss: 0.6987343430519104 2023-01-22 10:27:30.368996: step: 344/464, loss: 0.7762897610664368 2023-01-22 10:27:31.019054: step: 346/464, loss: 1.5636210441589355 2023-01-22 10:27:31.653028: step: 348/464, loss: 1.3135454654693604 2023-01-22 10:27:32.233995: step: 350/464, loss: 0.2784609794616699 2023-01-22 10:27:32.829131: step: 352/464, loss: 0.46578752994537354 2023-01-22 10:27:33.419788: step: 354/464, loss: 0.8856964111328125 2023-01-22 10:27:34.071004: step: 356/464, loss: 1.1614477634429932 2023-01-22 10:27:34.684290: step: 358/464, loss: 0.5184161067008972 2023-01-22 10:27:35.337294: step: 360/464, loss: 0.844288170337677 2023-01-22 10:27:35.937719: step: 362/464, loss: 0.4373282194137573 2023-01-22 10:27:36.531611: step: 364/464, loss: 1.5751210451126099 2023-01-22 10:27:37.211179: step: 366/464, loss: 0.9275869131088257 2023-01-22 10:27:37.747524: step: 368/464, loss: 0.567407488822937 2023-01-22 10:27:38.350884: step: 370/464, loss: 0.40654146671295166 2023-01-22 10:27:39.052105: step: 372/464, loss: 1.1315515041351318 2023-01-22 10:27:39.686458: step: 374/464, loss: 3.533780336380005 2023-01-22 10:27:40.406132: step: 376/464, loss: 0.2930464744567871 2023-01-22 10:27:41.028016: step: 378/464, loss: 0.4550985097885132 2023-01-22 10:27:41.635246: step: 380/464, loss: 0.45705240964889526 2023-01-22 10:27:42.243242: step: 382/464, loss: 0.24261905252933502 2023-01-22 10:27:42.849774: step: 384/464, loss: 0.7945497632026672 2023-01-22 10:27:43.425413: step: 386/464, loss: 0.4450487792491913 2023-01-22 10:27:44.023739: step: 388/464, loss: 0.2671772241592407 2023-01-22 10:27:44.663328: step: 390/464, loss: 0.27833473682403564 2023-01-22 10:27:45.322697: step: 392/464, loss: 0.7007308602333069 2023-01-22 10:27:45.975598: step: 394/464, loss: 1.0011887550354004 2023-01-22 10:27:46.647789: step: 396/464, loss: 0.3089820444583893 2023-01-22 10:27:47.283135: step: 398/464, loss: 0.991563081741333 2023-01-22 10:27:47.911974: step: 400/464, loss: 1.2187684774398804 2023-01-22 10:27:48.469780: step: 402/464, loss: 0.5915836095809937 2023-01-22 10:27:49.131380: step: 404/464, loss: 0.9523316025733948 2023-01-22 10:27:49.711716: step: 406/464, loss: 1.4720518589019775 2023-01-22 10:27:50.316322: step: 408/464, loss: 0.2306549847126007 2023-01-22 10:27:50.981746: step: 410/464, loss: 0.5047092437744141 2023-01-22 10:27:51.566260: step: 412/464, loss: 0.4012000858783722 2023-01-22 10:27:52.168804: step: 414/464, loss: 1.0264873504638672 2023-01-22 10:27:52.782756: step: 416/464, loss: 0.4799194037914276 2023-01-22 10:27:53.439638: step: 418/464, loss: 0.6494021415710449 2023-01-22 10:27:54.077281: step: 420/464, loss: 1.1561380624771118 2023-01-22 10:27:54.677566: step: 422/464, loss: 0.3921971321105957 2023-01-22 10:27:55.264477: step: 424/464, loss: 0.6223727464675903 2023-01-22 10:27:55.901988: step: 426/464, loss: 0.4728529751300812 2023-01-22 10:27:56.642399: step: 428/464, loss: 0.3983856439590454 2023-01-22 10:27:57.246139: step: 430/464, loss: 0.37754401564598083 2023-01-22 10:27:57.859212: step: 432/464, loss: 0.5962048768997192 2023-01-22 10:27:58.481096: step: 434/464, loss: 0.258344441652298 2023-01-22 10:27:59.223305: step: 436/464, loss: 0.604759156703949 2023-01-22 10:27:59.865978: step: 438/464, loss: 0.5698238611221313 2023-01-22 10:28:00.531677: step: 440/464, loss: 0.9591268301010132 2023-01-22 10:28:01.127255: step: 442/464, loss: 0.13089722394943237 2023-01-22 10:28:01.773881: step: 444/464, loss: 0.37283504009246826 2023-01-22 10:28:02.430919: step: 446/464, loss: 0.8682311773300171 2023-01-22 10:28:03.024942: step: 448/464, loss: 0.28524962067604065 2023-01-22 10:28:03.633239: step: 450/464, loss: 1.1062726974487305 2023-01-22 10:28:04.234413: step: 452/464, loss: 4.481321334838867 2023-01-22 10:28:04.899890: step: 454/464, loss: 1.0563178062438965 2023-01-22 10:28:05.539089: step: 456/464, loss: 0.2760435938835144 2023-01-22 10:28:06.173379: step: 458/464, loss: 0.24600005149841309 2023-01-22 10:28:06.779567: step: 460/464, loss: 0.2210104614496231 2023-01-22 10:28:07.398384: step: 462/464, loss: 1.7358520030975342 2023-01-22 10:28:07.984938: step: 464/464, loss: 4.569807529449463 2023-01-22 10:28:08.591859: step: 466/464, loss: 0.37099695205688477 2023-01-22 10:28:09.265349: step: 468/464, loss: 0.5953925251960754 2023-01-22 10:28:09.877652: step: 470/464, loss: 0.3241918385028839 2023-01-22 10:28:10.451532: step: 472/464, loss: 0.5551413893699646 2023-01-22 10:28:11.100951: step: 474/464, loss: 0.4248945116996765 2023-01-22 10:28:11.692196: step: 476/464, loss: 0.6409351825714111 2023-01-22 10:28:12.351169: step: 478/464, loss: 0.18332374095916748 2023-01-22 10:28:12.969272: step: 480/464, loss: 0.6044588685035706 2023-01-22 10:28:13.665550: step: 482/464, loss: 0.5401633977890015 2023-01-22 10:28:14.285397: step: 484/464, loss: 0.6177085638046265 2023-01-22 10:28:14.947706: step: 486/464, loss: 0.361588716506958 2023-01-22 10:28:15.615337: step: 488/464, loss: 0.4207829535007477 2023-01-22 10:28:16.218367: step: 490/464, loss: 0.5759197473526001 2023-01-22 10:28:16.828165: step: 492/464, loss: 0.22131508588790894 2023-01-22 10:28:17.446825: step: 494/464, loss: 1.4635684490203857 2023-01-22 10:28:18.058900: step: 496/464, loss: 0.40518543124198914 2023-01-22 10:28:18.641292: step: 498/464, loss: 1.0234875679016113 2023-01-22 10:28:19.295607: step: 500/464, loss: 2.552335739135742 2023-01-22 10:28:19.926703: step: 502/464, loss: 1.8648881912231445 2023-01-22 10:28:20.578593: step: 504/464, loss: 0.7620370388031006 2023-01-22 10:28:21.196685: step: 506/464, loss: 0.42183205485343933 2023-01-22 10:28:21.808583: step: 508/464, loss: 0.18581755459308624 2023-01-22 10:28:22.375950: step: 510/464, loss: 0.7951881289482117 2023-01-22 10:28:23.080557: step: 512/464, loss: 0.6534091234207153 2023-01-22 10:28:23.669374: step: 514/464, loss: 0.37720787525177 2023-01-22 10:28:24.361227: step: 516/464, loss: 0.39885827898979187 2023-01-22 10:28:24.980227: step: 518/464, loss: 1.3286685943603516 2023-01-22 10:28:25.652994: step: 520/464, loss: 0.5616076588630676 2023-01-22 10:28:26.300055: step: 522/464, loss: 0.16582699120044708 2023-01-22 10:28:26.949715: step: 524/464, loss: 0.18395079672336578 2023-01-22 10:28:27.542697: step: 526/464, loss: 0.9664916396141052 2023-01-22 10:28:28.125446: step: 528/464, loss: 0.26636892557144165 2023-01-22 10:28:28.703508: step: 530/464, loss: 1.677715539932251 2023-01-22 10:28:29.259775: step: 532/464, loss: 0.21337102353572845 2023-01-22 10:28:29.906065: step: 534/464, loss: 0.9010314345359802 2023-01-22 10:28:30.541594: step: 536/464, loss: 0.503167450428009 2023-01-22 10:28:31.142195: step: 538/464, loss: 0.18851390480995178 2023-01-22 10:28:31.719209: step: 540/464, loss: 0.16893097758293152 2023-01-22 10:28:32.351096: step: 542/464, loss: 0.21447324752807617 2023-01-22 10:28:32.984066: step: 544/464, loss: 0.4860786199569702 2023-01-22 10:28:33.679151: step: 546/464, loss: 0.9197517037391663 2023-01-22 10:28:34.380766: step: 548/464, loss: 0.9261701107025146 2023-01-22 10:28:35.086184: step: 550/464, loss: 1.1221946477890015 2023-01-22 10:28:35.701940: step: 552/464, loss: 0.8768857717514038 2023-01-22 10:28:36.372764: step: 554/464, loss: 0.7108152508735657 2023-01-22 10:28:37.065809: step: 556/464, loss: 0.24862165749073029 2023-01-22 10:28:37.714993: step: 558/464, loss: 0.7068761587142944 2023-01-22 10:28:38.330705: step: 560/464, loss: 1.691831111907959 2023-01-22 10:28:39.021941: step: 562/464, loss: 0.4378052353858948 2023-01-22 10:28:39.715314: step: 564/464, loss: 0.2569923400878906 2023-01-22 10:28:40.348251: step: 566/464, loss: 0.7471778988838196 2023-01-22 10:28:41.028104: step: 568/464, loss: 1.2964478731155396 2023-01-22 10:28:41.670357: step: 570/464, loss: 0.366839736700058 2023-01-22 10:28:42.325719: step: 572/464, loss: 0.4140085279941559 2023-01-22 10:28:42.880134: step: 574/464, loss: 0.33276864886283875 2023-01-22 10:28:43.587449: step: 576/464, loss: 0.5633888840675354 2023-01-22 10:28:44.203073: step: 578/464, loss: 0.847203254699707 2023-01-22 10:28:44.770803: step: 580/464, loss: 0.5664862394332886 2023-01-22 10:28:45.446220: step: 582/464, loss: 0.9577569365501404 2023-01-22 10:28:46.068389: step: 584/464, loss: 0.4908193051815033 2023-01-22 10:28:46.797254: step: 586/464, loss: 0.28701263666152954 2023-01-22 10:28:47.422143: step: 588/464, loss: 0.18440507352352142 2023-01-22 10:28:48.048053: step: 590/464, loss: 2.0263559818267822 2023-01-22 10:28:48.656298: step: 592/464, loss: 0.5223295092582703 2023-01-22 10:28:49.289798: step: 594/464, loss: 0.6685274839401245 2023-01-22 10:28:49.952914: step: 596/464, loss: 0.8350842595100403 2023-01-22 10:28:50.533043: step: 598/464, loss: 0.297424852848053 2023-01-22 10:28:51.180033: step: 600/464, loss: 0.24484041333198547 2023-01-22 10:28:51.766057: step: 602/464, loss: 1.6383540630340576 2023-01-22 10:28:52.440545: step: 604/464, loss: 1.1496939659118652 2023-01-22 10:28:53.098559: step: 606/464, loss: 1.071155309677124 2023-01-22 10:28:53.720664: step: 608/464, loss: 0.5096967220306396 2023-01-22 10:28:54.319026: step: 610/464, loss: 1.3994693756103516 2023-01-22 10:28:54.994308: step: 612/464, loss: 1.5042353868484497 2023-01-22 10:28:55.574812: step: 614/464, loss: 0.18285751342773438 2023-01-22 10:28:56.162955: step: 616/464, loss: 0.4884994328022003 2023-01-22 10:28:56.798307: step: 618/464, loss: 0.25369390845298767 2023-01-22 10:28:57.365881: step: 620/464, loss: 0.2683863639831543 2023-01-22 10:28:57.989299: step: 622/464, loss: 0.5101136565208435 2023-01-22 10:28:58.551136: step: 624/464, loss: 0.21888434886932373 2023-01-22 10:28:59.181050: step: 626/464, loss: 0.7710225582122803 2023-01-22 10:28:59.756131: step: 628/464, loss: 0.8689543604850769 2023-01-22 10:29:00.353788: step: 630/464, loss: 5.544312000274658 2023-01-22 10:29:00.971164: step: 632/464, loss: 0.41301852464675903 2023-01-22 10:29:01.599275: step: 634/464, loss: 0.23599055409431458 2023-01-22 10:29:02.288780: step: 636/464, loss: 0.3407132923603058 2023-01-22 10:29:02.893406: step: 638/464, loss: 1.0151219367980957 2023-01-22 10:29:03.513174: step: 640/464, loss: 0.36375269293785095 2023-01-22 10:29:04.100896: step: 642/464, loss: 1.4476675987243652 2023-01-22 10:29:04.704540: step: 644/464, loss: 0.49768805503845215 2023-01-22 10:29:05.308388: step: 646/464, loss: 1.441270351409912 2023-01-22 10:29:06.054323: step: 648/464, loss: 0.802528440952301 2023-01-22 10:29:06.678095: step: 650/464, loss: 0.31411662697792053 2023-01-22 10:29:07.246460: step: 652/464, loss: 0.21255828440189362 2023-01-22 10:29:07.825666: step: 654/464, loss: 0.6642628312110901 2023-01-22 10:29:08.518863: step: 656/464, loss: 0.27438193559646606 2023-01-22 10:29:09.177930: step: 658/464, loss: 0.16417086124420166 2023-01-22 10:29:09.810161: step: 660/464, loss: 0.2855834364891052 2023-01-22 10:29:10.417078: step: 662/464, loss: 0.450242280960083 2023-01-22 10:29:11.011721: step: 664/464, loss: 0.5720327496528625 2023-01-22 10:29:11.645437: step: 666/464, loss: 1.0651787519454956 2023-01-22 10:29:12.256163: step: 668/464, loss: 0.4257969558238983 2023-01-22 10:29:12.933323: step: 670/464, loss: 0.6946537494659424 2023-01-22 10:29:13.567497: step: 672/464, loss: 0.3971732258796692 2023-01-22 10:29:14.277621: step: 674/464, loss: 0.17086222767829895 2023-01-22 10:29:14.803918: step: 676/464, loss: 0.6342722177505493 2023-01-22 10:29:15.445787: step: 678/464, loss: 0.3377092480659485 2023-01-22 10:29:16.043895: step: 680/464, loss: 0.13712920248508453 2023-01-22 10:29:16.674532: step: 682/464, loss: 0.5758203268051147 2023-01-22 10:29:17.276545: step: 684/464, loss: 0.5807439088821411 2023-01-22 10:29:17.965127: step: 686/464, loss: 0.9199244379997253 2023-01-22 10:29:18.591603: step: 688/464, loss: 0.15518754720687866 2023-01-22 10:29:19.229588: step: 690/464, loss: 0.343379408121109 2023-01-22 10:29:19.815570: step: 692/464, loss: 0.5740495920181274 2023-01-22 10:29:20.420483: step: 694/464, loss: 0.15535210072994232 2023-01-22 10:29:21.002199: step: 696/464, loss: 0.45480844378471375 2023-01-22 10:29:21.668847: step: 698/464, loss: 0.41847825050354004 2023-01-22 10:29:22.254806: step: 700/464, loss: 0.4854355454444885 2023-01-22 10:29:22.918193: step: 702/464, loss: 0.3150523900985718 2023-01-22 10:29:23.583169: step: 704/464, loss: 0.6841596961021423 2023-01-22 10:29:24.189736: step: 706/464, loss: 2.1344518661499023 2023-01-22 10:29:24.882242: step: 708/464, loss: 0.7121348977088928 2023-01-22 10:29:25.391275: step: 710/464, loss: 0.12060072273015976 2023-01-22 10:29:26.033183: step: 712/464, loss: 1.5457645654678345 2023-01-22 10:29:26.703712: step: 714/464, loss: 0.530612587928772 2023-01-22 10:29:27.334540: step: 716/464, loss: 0.3051770329475403 2023-01-22 10:29:28.058810: step: 718/464, loss: 0.6609290838241577 2023-01-22 10:29:28.756806: step: 720/464, loss: 0.9934588670730591 2023-01-22 10:29:29.409355: step: 722/464, loss: 1.0542535781860352 2023-01-22 10:29:29.976863: step: 724/464, loss: 0.2150609791278839 2023-01-22 10:29:30.589648: step: 726/464, loss: 0.07730886340141296 2023-01-22 10:29:31.132398: step: 728/464, loss: 2.659024715423584 2023-01-22 10:29:31.787968: step: 730/464, loss: 0.14896048605442047 2023-01-22 10:29:32.401145: step: 732/464, loss: 0.49377694725990295 2023-01-22 10:29:33.002915: step: 734/464, loss: 0.36152413487434387 2023-01-22 10:29:33.638614: step: 736/464, loss: 0.44300612807273865 2023-01-22 10:29:34.267591: step: 738/464, loss: 0.38425812125205994 2023-01-22 10:29:34.922341: step: 740/464, loss: 0.18084846436977386 2023-01-22 10:29:35.527827: step: 742/464, loss: 0.7159069180488586 2023-01-22 10:29:36.207189: step: 744/464, loss: 0.2239290177822113 2023-01-22 10:29:36.889473: step: 746/464, loss: 0.21971198916435242 2023-01-22 10:29:37.541553: step: 748/464, loss: 0.1720772087574005 2023-01-22 10:29:38.185683: step: 750/464, loss: 1.9814131259918213 2023-01-22 10:29:38.802744: step: 752/464, loss: 0.7665748596191406 2023-01-22 10:29:39.468841: step: 754/464, loss: 0.5029324293136597 2023-01-22 10:29:40.136813: step: 756/464, loss: 0.35377073287963867 2023-01-22 10:29:40.838685: step: 758/464, loss: 0.6000191569328308 2023-01-22 10:29:41.470585: step: 760/464, loss: 0.7425803542137146 2023-01-22 10:29:42.166289: step: 762/464, loss: 0.19316062331199646 2023-01-22 10:29:42.803169: step: 764/464, loss: 0.5338562726974487 2023-01-22 10:29:43.383218: step: 766/464, loss: 0.33601799607276917 2023-01-22 10:29:44.030743: step: 768/464, loss: 0.46154558658599854 2023-01-22 10:29:44.669442: step: 770/464, loss: 0.3042210042476654 2023-01-22 10:29:45.340819: step: 772/464, loss: 0.4761200249195099 2023-01-22 10:29:46.115146: step: 774/464, loss: 0.559170126914978 2023-01-22 10:29:46.716396: step: 776/464, loss: 0.5083188414573669 2023-01-22 10:29:47.278862: step: 778/464, loss: 0.13107992708683014 2023-01-22 10:29:47.904607: step: 780/464, loss: 0.8679192662239075 2023-01-22 10:29:48.457101: step: 782/464, loss: 4.753159523010254 2023-01-22 10:29:49.043375: step: 784/464, loss: 0.3700558543205261 2023-01-22 10:29:49.632159: step: 786/464, loss: 0.7138491272926331 2023-01-22 10:29:50.222778: step: 788/464, loss: 0.270452082157135 2023-01-22 10:29:50.829263: step: 790/464, loss: 0.15871131420135498 2023-01-22 10:29:51.471258: step: 792/464, loss: 0.501331090927124 2023-01-22 10:29:52.037951: step: 794/464, loss: 0.5413829684257507 2023-01-22 10:29:52.706607: step: 796/464, loss: 0.3064698874950409 2023-01-22 10:29:53.318088: step: 798/464, loss: 0.557877779006958 2023-01-22 10:29:53.911542: step: 800/464, loss: 0.45851510763168335 2023-01-22 10:29:54.546142: step: 802/464, loss: 0.6986446976661682 2023-01-22 10:29:55.166884: step: 804/464, loss: 0.48981723189353943 2023-01-22 10:29:55.750037: step: 806/464, loss: 0.5766662359237671 2023-01-22 10:29:56.328614: step: 808/464, loss: 0.21251314878463745 2023-01-22 10:29:56.889304: step: 810/464, loss: 0.16906926035881042 2023-01-22 10:29:57.501473: step: 812/464, loss: 0.42454585433006287 2023-01-22 10:29:58.156495: step: 814/464, loss: 0.5092841982841492 2023-01-22 10:29:58.697783: step: 816/464, loss: 0.31051722168922424 2023-01-22 10:29:59.315832: step: 818/464, loss: 1.3641095161437988 2023-01-22 10:30:00.001996: step: 820/464, loss: 13.601166725158691 2023-01-22 10:30:00.629101: step: 822/464, loss: 3.046140670776367 2023-01-22 10:30:01.306769: step: 824/464, loss: 0.29892173409461975 2023-01-22 10:30:01.938665: step: 826/464, loss: 0.2666424810886383 2023-01-22 10:30:02.544996: step: 828/464, loss: 0.5248275995254517 2023-01-22 10:30:03.163158: step: 830/464, loss: 0.6012407541275024 2023-01-22 10:30:03.830966: step: 832/464, loss: 1.2032874822616577 2023-01-22 10:30:04.534527: step: 834/464, loss: 2.7928154468536377 2023-01-22 10:30:05.224902: step: 836/464, loss: 0.5200483798980713 2023-01-22 10:30:05.857752: step: 838/464, loss: 1.0235695838928223 2023-01-22 10:30:06.419753: step: 840/464, loss: 1.1554210186004639 2023-01-22 10:30:07.070487: step: 842/464, loss: 0.6198804378509521 2023-01-22 10:30:07.718428: step: 844/464, loss: 0.7470586895942688 2023-01-22 10:30:08.346696: step: 846/464, loss: 0.3621271848678589 2023-01-22 10:30:09.051259: step: 848/464, loss: 0.42628931999206543 2023-01-22 10:30:09.646960: step: 850/464, loss: 0.5762926936149597 2023-01-22 10:30:10.249342: step: 852/464, loss: 1.4255084991455078 2023-01-22 10:30:10.985296: step: 854/464, loss: 0.3030523955821991 2023-01-22 10:30:11.613195: step: 856/464, loss: 0.26940327882766724 2023-01-22 10:30:12.242799: step: 858/464, loss: 0.9834439158439636 2023-01-22 10:30:12.901137: step: 860/464, loss: 0.8142734169960022 2023-01-22 10:30:13.511922: step: 862/464, loss: 0.23037023842334747 2023-01-22 10:30:14.130321: step: 864/464, loss: 1.6187280416488647 2023-01-22 10:30:14.806299: step: 866/464, loss: 1.5179880857467651 2023-01-22 10:30:15.437003: step: 868/464, loss: 0.785308837890625 2023-01-22 10:30:16.057673: step: 870/464, loss: 0.8136089444160461 2023-01-22 10:30:16.789610: step: 872/464, loss: 2.153433322906494 2023-01-22 10:30:17.421614: step: 874/464, loss: 0.34129735827445984 2023-01-22 10:30:18.039476: step: 876/464, loss: 0.4100603759288788 2023-01-22 10:30:18.647754: step: 878/464, loss: 1.1842362880706787 2023-01-22 10:30:19.292759: step: 880/464, loss: 0.5711566805839539 2023-01-22 10:30:19.882581: step: 882/464, loss: 0.3342677652835846 2023-01-22 10:30:20.483032: step: 884/464, loss: 0.9091795682907104 2023-01-22 10:30:21.139882: step: 886/464, loss: 0.49165284633636475 2023-01-22 10:30:21.826223: step: 888/464, loss: 0.9058147072792053 2023-01-22 10:30:22.445472: step: 890/464, loss: 0.14935161173343658 2023-01-22 10:30:23.063934: step: 892/464, loss: 0.3612709939479828 2023-01-22 10:30:23.671799: step: 894/464, loss: 0.23926648497581482 2023-01-22 10:30:24.249424: step: 896/464, loss: 0.18639202415943146 2023-01-22 10:30:24.864220: step: 898/464, loss: 0.6763578653335571 2023-01-22 10:30:25.495616: step: 900/464, loss: 1.2428151369094849 2023-01-22 10:30:26.162185: step: 902/464, loss: 0.260333389043808 2023-01-22 10:30:26.770663: step: 904/464, loss: 0.40388208627700806 2023-01-22 10:30:27.397736: step: 906/464, loss: 1.1095035076141357 2023-01-22 10:30:28.008228: step: 908/464, loss: 1.2671597003936768 2023-01-22 10:30:28.545918: step: 910/464, loss: 0.5920587778091431 2023-01-22 10:30:29.149450: step: 912/464, loss: 1.0389655828475952 2023-01-22 10:30:29.757587: step: 914/464, loss: 0.8073325157165527 2023-01-22 10:30:30.334037: step: 916/464, loss: 0.3246050179004669 2023-01-22 10:30:30.960163: step: 918/464, loss: 0.20470765233039856 2023-01-22 10:30:31.603181: step: 920/464, loss: 0.7123910188674927 2023-01-22 10:30:32.237361: step: 922/464, loss: 0.13054336607456207 2023-01-22 10:30:32.871545: step: 924/464, loss: 0.4696577191352844 2023-01-22 10:30:33.519355: step: 926/464, loss: 1.0713528394699097 2023-01-22 10:30:34.080492: step: 928/464, loss: 0.5203782916069031 2023-01-22 10:30:34.578220: step: 930/464, loss: 0.20702479779720306 ================================================== Loss: 0.777 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2662921186245096, 'r': 0.3127795473027921, 'f1': 0.28766984542508106}, 'combined': 0.21196725452374393, 'epoch': 6} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2822796165112715, 'r': 0.30584607990808405, 'f1': 0.2935906887713004}, 'combined': 0.19167060510457953, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2684084722442192, 'r': 0.33410997683530896, 'f1': 0.29767702078141645}, 'combined': 0.2193409626810437, 'epoch': 6} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2947206499588104, 'r': 0.304454542984973, 'f1': 0.29950853055380916}, 'combined': 0.19553406657917075, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2824252010406812, 'r': 0.32369036324207107, 'f1': 0.30165308829101933}, 'combined': 0.22227069663548793, 'epoch': 6} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2824717200483764, 'r': 0.29387424819711816, 'f1': 0.288060189329909}, 'combined': 0.18806001997703903, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26543209876543206, 'r': 0.4095238095238095, 'f1': 0.3220973782771535}, 'combined': 0.21473158551810234, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.40217391304347827, 'f1': 0.30833333333333335}, 'combined': 0.15416666666666667, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40476190476190477, 'r': 0.29310344827586204, 'f1': 0.34}, 'combined': 0.22666666666666668, 'epoch': 6} New best chinese model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2662921186245096, 'r': 0.3127795473027921, 'f1': 0.28766984542508106}, 'combined': 0.21196725452374393, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2822796165112715, 'r': 0.30584607990808405, 'f1': 0.2935906887713004}, 'combined': 0.19167060510457953, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26543209876543206, 'r': 0.4095238095238095, 'f1': 0.3220973782771535}, 'combined': 0.21473158551810234, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2922339997070884, 'r': 0.315523995888678, 'f1': 0.3034327478710462}, 'combined': 0.22358202474708666, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3099448839945791, 'r': 0.28150957353636086, 'f1': 0.2950436876486859}, 'combined': 0.19261919504525607, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3382352941176471, 'r': 0.5, 'f1': 0.4035087719298246}, 'combined': 0.2017543859649123, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2824252010406812, 'r': 0.32369036324207107, 'f1': 0.30165308829101933}, 'combined': 0.22227069663548793, 'epoch': 6} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2824717200483764, 'r': 0.29387424819711816, 'f1': 0.288060189329909}, 'combined': 0.18806001997703903, 'epoch': 6} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40476190476190477, 'r': 0.29310344827586204, 'f1': 0.34}, 'combined': 0.22666666666666668, 'epoch': 6} ****************************** Epoch: 7 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:33:31.528978: step: 2/464, loss: 0.5017930269241333 2023-01-22 10:33:32.171280: step: 4/464, loss: 0.126792773604393 2023-01-22 10:33:32.808476: step: 6/464, loss: 0.6471153497695923 2023-01-22 10:33:33.490226: step: 8/464, loss: 0.17067140340805054 2023-01-22 10:33:34.069581: step: 10/464, loss: 0.15195642411708832 2023-01-22 10:33:34.722493: step: 12/464, loss: 0.5397238731384277 2023-01-22 10:33:35.335303: step: 14/464, loss: 0.2516040802001953 2023-01-22 10:33:35.963394: step: 16/464, loss: 0.3369365334510803 2023-01-22 10:33:36.558962: step: 18/464, loss: 0.5769916772842407 2023-01-22 10:33:37.194806: step: 20/464, loss: 0.2530610263347626 2023-01-22 10:33:37.943137: step: 22/464, loss: 0.7644116878509521 2023-01-22 10:33:38.575177: step: 24/464, loss: 0.662712812423706 2023-01-22 10:33:39.188555: step: 26/464, loss: 0.6387485265731812 2023-01-22 10:33:39.799846: step: 28/464, loss: 0.19953793287277222 2023-01-22 10:33:40.415502: step: 30/464, loss: 0.3266963064670563 2023-01-22 10:33:41.024938: step: 32/464, loss: 0.3829626142978668 2023-01-22 10:33:41.710894: step: 34/464, loss: 0.29524633288383484 2023-01-22 10:33:42.376312: step: 36/464, loss: 0.5962638258934021 2023-01-22 10:33:43.047704: step: 38/464, loss: 0.09318985044956207 2023-01-22 10:33:43.689574: step: 40/464, loss: 0.09047455340623856 2023-01-22 10:33:44.287587: step: 42/464, loss: 0.23816466331481934 2023-01-22 10:33:44.930557: step: 44/464, loss: 0.22475215792655945 2023-01-22 10:33:45.547234: step: 46/464, loss: 3.0335137844085693 2023-01-22 10:33:46.184920: step: 48/464, loss: 1.1206142902374268 2023-01-22 10:33:46.877868: step: 50/464, loss: 0.2682996094226837 2023-01-22 10:33:47.468074: step: 52/464, loss: 0.4726658761501312 2023-01-22 10:33:48.067771: step: 54/464, loss: 0.5618013143539429 2023-01-22 10:33:48.683195: step: 56/464, loss: 1.9542077779769897 2023-01-22 10:33:49.314381: step: 58/464, loss: 0.4544164836406708 2023-01-22 10:33:49.887825: step: 60/464, loss: 1.5395740270614624 2023-01-22 10:33:50.495307: step: 62/464, loss: 1.0734786987304688 2023-01-22 10:33:51.071624: step: 64/464, loss: 0.20159535109996796 2023-01-22 10:33:51.755868: step: 66/464, loss: 0.8156777024269104 2023-01-22 10:33:52.349021: step: 68/464, loss: 2.720673084259033 2023-01-22 10:33:52.954978: step: 70/464, loss: 0.6011072397232056 2023-01-22 10:33:53.732046: step: 72/464, loss: 0.6634661555290222 2023-01-22 10:33:54.336457: step: 74/464, loss: 1.25831937789917 2023-01-22 10:33:55.137875: step: 76/464, loss: 0.3787534534931183 2023-01-22 10:33:55.770945: step: 78/464, loss: 0.45780324935913086 2023-01-22 10:33:56.365904: step: 80/464, loss: 0.15130579471588135 2023-01-22 10:33:57.068634: step: 82/464, loss: 0.1392175853252411 2023-01-22 10:33:57.622426: step: 84/464, loss: 0.6962271332740784 2023-01-22 10:33:58.281289: step: 86/464, loss: 0.7196193933486938 2023-01-22 10:33:58.958865: step: 88/464, loss: 0.30974024534225464 2023-01-22 10:33:59.585605: step: 90/464, loss: 0.5273309946060181 2023-01-22 10:34:00.252578: step: 92/464, loss: 1.1774016618728638 2023-01-22 10:34:00.915403: step: 94/464, loss: 0.23848721385002136 2023-01-22 10:34:01.532497: step: 96/464, loss: 0.4920649230480194 2023-01-22 10:34:02.182556: step: 98/464, loss: 0.4335693418979645 2023-01-22 10:34:02.781486: step: 100/464, loss: 0.682791531085968 2023-01-22 10:34:03.467308: step: 102/464, loss: 0.5497469902038574 2023-01-22 10:34:04.109931: step: 104/464, loss: 0.4676864743232727 2023-01-22 10:34:04.665628: step: 106/464, loss: 0.16365735232830048 2023-01-22 10:34:05.320772: step: 108/464, loss: 0.5850328207015991 2023-01-22 10:34:05.916911: step: 110/464, loss: 0.2704683840274811 2023-01-22 10:34:06.567104: step: 112/464, loss: 0.09844101220369339 2023-01-22 10:34:07.269537: step: 114/464, loss: 0.2614676058292389 2023-01-22 10:34:07.933217: step: 116/464, loss: 0.1152581125497818 2023-01-22 10:34:08.553965: step: 118/464, loss: 0.8495705723762512 2023-01-22 10:34:09.219380: step: 120/464, loss: 0.5793513059616089 2023-01-22 10:34:09.848538: step: 122/464, loss: 0.21496832370758057 2023-01-22 10:34:10.398555: step: 124/464, loss: 0.3885643780231476 2023-01-22 10:34:11.045184: step: 126/464, loss: 1.2978103160858154 2023-01-22 10:34:11.637506: step: 128/464, loss: 0.6677225828170776 2023-01-22 10:34:12.267875: step: 130/464, loss: 1.0965478420257568 2023-01-22 10:34:12.910017: step: 132/464, loss: 0.2660011351108551 2023-01-22 10:34:13.507543: step: 134/464, loss: 0.17295731604099274 2023-01-22 10:34:14.147362: step: 136/464, loss: 0.13129302859306335 2023-01-22 10:34:14.738018: step: 138/464, loss: 0.24394181370735168 2023-01-22 10:34:15.350991: step: 140/464, loss: 0.4994749426841736 2023-01-22 10:34:15.965619: step: 142/464, loss: 0.878116250038147 2023-01-22 10:34:16.527195: step: 144/464, loss: 0.31296879053115845 2023-01-22 10:34:17.114183: step: 146/464, loss: 0.15763776004314423 2023-01-22 10:34:17.731498: step: 148/464, loss: 0.25190410017967224 2023-01-22 10:34:18.383140: step: 150/464, loss: 1.953006386756897 2023-01-22 10:34:19.014027: step: 152/464, loss: 0.582210123538971 2023-01-22 10:34:19.665149: step: 154/464, loss: 4.223416328430176 2023-01-22 10:34:20.331074: step: 156/464, loss: 0.4259297549724579 2023-01-22 10:34:20.953817: step: 158/464, loss: 1.2600336074829102 2023-01-22 10:34:21.583902: step: 160/464, loss: 0.362914502620697 2023-01-22 10:34:22.159018: step: 162/464, loss: 0.26163020730018616 2023-01-22 10:34:22.818468: step: 164/464, loss: 0.45940202474594116 2023-01-22 10:34:23.458357: step: 166/464, loss: 0.4737444221973419 2023-01-22 10:34:24.121467: step: 168/464, loss: 0.425026535987854 2023-01-22 10:34:24.789781: step: 170/464, loss: 0.42416107654571533 2023-01-22 10:34:25.433210: step: 172/464, loss: 0.32647231221199036 2023-01-22 10:34:26.121248: step: 174/464, loss: 0.21015892922878265 2023-01-22 10:34:26.744607: step: 176/464, loss: 0.19564324617385864 2023-01-22 10:34:27.439147: step: 178/464, loss: 1.4059756994247437 2023-01-22 10:34:28.186805: step: 180/464, loss: 0.26178687810897827 2023-01-22 10:34:28.825327: step: 182/464, loss: 0.19771495461463928 2023-01-22 10:34:29.454262: step: 184/464, loss: 0.6399397850036621 2023-01-22 10:34:30.015748: step: 186/464, loss: 0.283091276884079 2023-01-22 10:34:30.684589: step: 188/464, loss: 0.25623446702957153 2023-01-22 10:34:31.384861: step: 190/464, loss: 0.27372244000434875 2023-01-22 10:34:32.004720: step: 192/464, loss: 0.18834808468818665 2023-01-22 10:34:32.593196: step: 194/464, loss: 0.09465673565864563 2023-01-22 10:34:33.166869: step: 196/464, loss: 0.4658953845500946 2023-01-22 10:34:33.803319: step: 198/464, loss: 0.32783472537994385 2023-01-22 10:34:34.434863: step: 200/464, loss: 0.7937031984329224 2023-01-22 10:34:35.044089: step: 202/464, loss: 0.13799446821212769 2023-01-22 10:34:35.658924: step: 204/464, loss: 0.3625771403312683 2023-01-22 10:34:36.282868: step: 206/464, loss: 0.3306332528591156 2023-01-22 10:34:36.942332: step: 208/464, loss: 0.2596589922904968 2023-01-22 10:34:37.531239: step: 210/464, loss: 0.2501373589038849 2023-01-22 10:34:38.128302: step: 212/464, loss: 0.42752233147621155 2023-01-22 10:34:38.755390: step: 214/464, loss: 0.2059427797794342 2023-01-22 10:34:39.372401: step: 216/464, loss: 1.4278908967971802 2023-01-22 10:34:40.122344: step: 218/464, loss: 0.21744832396507263 2023-01-22 10:34:40.744923: step: 220/464, loss: 0.6722741723060608 2023-01-22 10:34:41.318466: step: 222/464, loss: 0.7491254210472107 2023-01-22 10:34:42.013717: step: 224/464, loss: 0.4034697711467743 2023-01-22 10:34:42.618908: step: 226/464, loss: 0.5779275894165039 2023-01-22 10:34:43.239892: step: 228/464, loss: 0.1363086998462677 2023-01-22 10:34:43.902408: step: 230/464, loss: 0.7873183488845825 2023-01-22 10:34:44.481649: step: 232/464, loss: 0.16622385382652283 2023-01-22 10:34:45.081310: step: 234/464, loss: 0.5206785798072815 2023-01-22 10:34:45.688547: step: 236/464, loss: 0.15616653859615326 2023-01-22 10:34:46.277793: step: 238/464, loss: 0.2109265774488449 2023-01-22 10:34:46.887154: step: 240/464, loss: 0.5359236598014832 2023-01-22 10:34:47.508444: step: 242/464, loss: 2.9070255756378174 2023-01-22 10:34:48.141864: step: 244/464, loss: 0.3161059617996216 2023-01-22 10:34:48.706636: step: 246/464, loss: 0.27038678526878357 2023-01-22 10:34:49.356114: step: 248/464, loss: 3.062499761581421 2023-01-22 10:34:49.970147: step: 250/464, loss: 0.4249669015407562 2023-01-22 10:34:50.546430: step: 252/464, loss: 1.4178982973098755 2023-01-22 10:34:51.103356: step: 254/464, loss: 0.5252469778060913 2023-01-22 10:34:51.791744: step: 256/464, loss: 0.22781912982463837 2023-01-22 10:34:52.401019: step: 258/464, loss: 0.12805120646953583 2023-01-22 10:34:53.033694: step: 260/464, loss: 0.7952200174331665 2023-01-22 10:34:53.682719: step: 262/464, loss: 4.063023567199707 2023-01-22 10:34:54.366898: step: 264/464, loss: 0.8507735133171082 2023-01-22 10:34:55.046486: step: 266/464, loss: 0.31542882323265076 2023-01-22 10:34:55.675114: step: 268/464, loss: 0.13520748913288116 2023-01-22 10:34:56.321910: step: 270/464, loss: 0.3633846938610077 2023-01-22 10:34:56.939663: step: 272/464, loss: 0.47487807273864746 2023-01-22 10:34:57.548969: step: 274/464, loss: 0.3144719898700714 2023-01-22 10:34:58.214333: step: 276/464, loss: 0.1806526780128479 2023-01-22 10:34:58.936523: step: 278/464, loss: 0.490356981754303 2023-01-22 10:34:59.641880: step: 280/464, loss: 0.217569962143898 2023-01-22 10:35:00.306751: step: 282/464, loss: 0.1519968956708908 2023-01-22 10:35:00.931906: step: 284/464, loss: 0.24375273287296295 2023-01-22 10:35:01.477936: step: 286/464, loss: 0.24348284304141998 2023-01-22 10:35:02.132101: step: 288/464, loss: 0.5655607581138611 2023-01-22 10:35:02.723124: step: 290/464, loss: 0.25451746582984924 2023-01-22 10:35:03.358579: step: 292/464, loss: 0.3051149249076843 2023-01-22 10:35:03.932387: step: 294/464, loss: 0.3064817190170288 2023-01-22 10:35:04.541092: step: 296/464, loss: 0.1428401917219162 2023-01-22 10:35:05.154672: step: 298/464, loss: 1.3966853618621826 2023-01-22 10:35:05.840281: step: 300/464, loss: 0.22358113527297974 2023-01-22 10:35:06.459122: step: 302/464, loss: 0.09851841628551483 2023-01-22 10:35:07.110884: step: 304/464, loss: 0.10326429456472397 2023-01-22 10:35:07.707651: step: 306/464, loss: 0.25323233008384705 2023-01-22 10:35:08.364020: step: 308/464, loss: 0.9947264790534973 2023-01-22 10:35:08.978931: step: 310/464, loss: 0.516456663608551 2023-01-22 10:35:09.613139: step: 312/464, loss: 2.4057650566101074 2023-01-22 10:35:10.288888: step: 314/464, loss: 0.6443356871604919 2023-01-22 10:35:10.950593: step: 316/464, loss: 0.5381366014480591 2023-01-22 10:35:11.599770: step: 318/464, loss: 0.8102694749832153 2023-01-22 10:35:12.223935: step: 320/464, loss: 2.134423017501831 2023-01-22 10:35:12.816847: step: 322/464, loss: 0.17424273490905762 2023-01-22 10:35:13.521550: step: 324/464, loss: 0.2220447063446045 2023-01-22 10:35:14.136709: step: 326/464, loss: 0.3784591853618622 2023-01-22 10:35:14.689374: step: 328/464, loss: 1.0891444683074951 2023-01-22 10:35:15.312355: step: 330/464, loss: 0.3071114718914032 2023-01-22 10:35:15.907331: step: 332/464, loss: 0.33946919441223145 2023-01-22 10:35:16.551507: step: 334/464, loss: 0.7581555247306824 2023-01-22 10:35:17.117165: step: 336/464, loss: 0.3212588429450989 2023-01-22 10:35:17.719152: step: 338/464, loss: 2.019733428955078 2023-01-22 10:35:18.360753: step: 340/464, loss: 0.4056606590747833 2023-01-22 10:35:18.982019: step: 342/464, loss: 0.6060588955879211 2023-01-22 10:35:19.605308: step: 344/464, loss: 0.4233732521533966 2023-01-22 10:35:20.275322: step: 346/464, loss: 0.4451332986354828 2023-01-22 10:35:20.874939: step: 348/464, loss: 1.2351199388504028 2023-01-22 10:35:21.531870: step: 350/464, loss: 0.27783745527267456 2023-01-22 10:35:22.193402: step: 352/464, loss: 0.43464934825897217 2023-01-22 10:35:22.813116: step: 354/464, loss: 0.17238818109035492 2023-01-22 10:35:23.505145: step: 356/464, loss: 0.7823545336723328 2023-01-22 10:35:24.165046: step: 358/464, loss: 0.7749119997024536 2023-01-22 10:35:24.759507: step: 360/464, loss: 0.758780837059021 2023-01-22 10:35:25.387906: step: 362/464, loss: 0.22388920187950134 2023-01-22 10:35:25.978152: step: 364/464, loss: 0.2760586738586426 2023-01-22 10:35:26.658196: step: 366/464, loss: 0.3953625559806824 2023-01-22 10:35:27.329559: step: 368/464, loss: 0.6412069201469421 2023-01-22 10:35:28.042640: step: 370/464, loss: 1.1693072319030762 2023-01-22 10:35:28.712667: step: 372/464, loss: 0.2744199335575104 2023-01-22 10:35:29.328643: step: 374/464, loss: 0.8128865957260132 2023-01-22 10:35:29.964527: step: 376/464, loss: 0.328846275806427 2023-01-22 10:35:30.653053: step: 378/464, loss: 0.9959462881088257 2023-01-22 10:35:31.194578: step: 380/464, loss: 0.6619890928268433 2023-01-22 10:35:31.754057: step: 382/464, loss: 0.21763145923614502 2023-01-22 10:35:32.381229: step: 384/464, loss: 0.30981579422950745 2023-01-22 10:35:33.064806: step: 386/464, loss: 0.24547596275806427 2023-01-22 10:35:33.700863: step: 388/464, loss: 0.48541855812072754 2023-01-22 10:35:34.322555: step: 390/464, loss: 0.2570076584815979 2023-01-22 10:35:34.954522: step: 392/464, loss: 0.43805766105651855 2023-01-22 10:35:35.615187: step: 394/464, loss: 0.3368385434150696 2023-01-22 10:35:36.171157: step: 396/464, loss: 0.31896984577178955 2023-01-22 10:35:36.833475: step: 398/464, loss: 0.4800797998905182 2023-01-22 10:35:37.438096: step: 400/464, loss: 0.4680897295475006 2023-01-22 10:35:38.103235: step: 402/464, loss: 0.4075247049331665 2023-01-22 10:35:38.733827: step: 404/464, loss: 0.3742852807044983 2023-01-22 10:35:39.423395: step: 406/464, loss: 0.6806045174598694 2023-01-22 10:35:39.990353: step: 408/464, loss: 0.34732770919799805 2023-01-22 10:35:40.675645: step: 410/464, loss: 0.5915437936782837 2023-01-22 10:35:41.361903: step: 412/464, loss: 0.5556359887123108 2023-01-22 10:35:42.023177: step: 414/464, loss: 0.676139771938324 2023-01-22 10:35:42.627682: step: 416/464, loss: 0.20332291722297668 2023-01-22 10:35:43.238211: step: 418/464, loss: 0.34385713934898376 2023-01-22 10:35:43.912411: step: 420/464, loss: 0.9345203042030334 2023-01-22 10:35:44.446626: step: 422/464, loss: 0.31550469994544983 2023-01-22 10:35:45.137615: step: 424/464, loss: 0.47289949655532837 2023-01-22 10:35:45.748053: step: 426/464, loss: 0.098211370408535 2023-01-22 10:35:46.339899: step: 428/464, loss: 0.1142539530992508 2023-01-22 10:35:46.868695: step: 430/464, loss: 0.4738588035106659 2023-01-22 10:35:47.493216: step: 432/464, loss: 0.3524607717990875 2023-01-22 10:35:48.078035: step: 434/464, loss: 0.0996241495013237 2023-01-22 10:35:48.617760: step: 436/464, loss: 0.22331421077251434 2023-01-22 10:35:49.252421: step: 438/464, loss: 0.9253294467926025 2023-01-22 10:35:49.900117: step: 440/464, loss: 0.3071550130844116 2023-01-22 10:35:50.476287: step: 442/464, loss: 0.44775813817977905 2023-01-22 10:35:51.084892: step: 444/464, loss: 5.230406761169434 2023-01-22 10:35:51.702661: step: 446/464, loss: 0.38780683279037476 2023-01-22 10:35:52.357188: step: 448/464, loss: 0.16223810613155365 2023-01-22 10:35:53.079956: step: 450/464, loss: 0.394161194562912 2023-01-22 10:35:53.668432: step: 452/464, loss: 1.3057899475097656 2023-01-22 10:35:54.246609: step: 454/464, loss: 2.0477466583251953 2023-01-22 10:35:54.919546: step: 456/464, loss: 0.2690102756023407 2023-01-22 10:35:55.604257: step: 458/464, loss: 0.4495788812637329 2023-01-22 10:35:56.199154: step: 460/464, loss: 2.760188341140747 2023-01-22 10:35:56.876186: step: 462/464, loss: 0.1652659773826599 2023-01-22 10:35:57.579133: step: 464/464, loss: 0.8136246204376221 2023-01-22 10:35:58.169881: step: 466/464, loss: 0.64471435546875 2023-01-22 10:35:58.802762: step: 468/464, loss: 1.094394564628601 2023-01-22 10:35:59.457874: step: 470/464, loss: 0.3615909814834595 2023-01-22 10:36:00.129345: step: 472/464, loss: 0.12893834710121155 2023-01-22 10:36:00.797775: step: 474/464, loss: 0.3952723443508148 2023-01-22 10:36:01.422132: step: 476/464, loss: 0.6580344438552856 2023-01-22 10:36:02.131145: step: 478/464, loss: 0.6497165560722351 2023-01-22 10:36:02.714369: step: 480/464, loss: 0.45428207516670227 2023-01-22 10:36:03.290429: step: 482/464, loss: 0.20757251977920532 2023-01-22 10:36:03.852450: step: 484/464, loss: 0.4601346552371979 2023-01-22 10:36:04.478174: step: 486/464, loss: 0.8326131701469421 2023-01-22 10:36:05.159526: step: 488/464, loss: 0.3128070831298828 2023-01-22 10:36:05.813336: step: 490/464, loss: 0.519420862197876 2023-01-22 10:36:06.450223: step: 492/464, loss: 0.44036027789115906 2023-01-22 10:36:07.054258: step: 494/464, loss: 0.20831894874572754 2023-01-22 10:36:07.690686: step: 496/464, loss: 0.6344039440155029 2023-01-22 10:36:08.315692: step: 498/464, loss: 0.98247891664505 2023-01-22 10:36:08.908943: step: 500/464, loss: 0.3340934216976166 2023-01-22 10:36:09.551310: step: 502/464, loss: 0.3000660538673401 2023-01-22 10:36:10.203362: step: 504/464, loss: 0.2559548616409302 2023-01-22 10:36:10.875369: step: 506/464, loss: 0.9728012084960938 2023-01-22 10:36:11.517732: step: 508/464, loss: 1.2932955026626587 2023-01-22 10:36:12.211786: step: 510/464, loss: 0.6466187834739685 2023-01-22 10:36:12.730888: step: 512/464, loss: 0.1755853146314621 2023-01-22 10:36:13.325183: step: 514/464, loss: 0.22444133460521698 2023-01-22 10:36:13.908184: step: 516/464, loss: 0.14731839299201965 2023-01-22 10:36:14.514839: step: 518/464, loss: 0.32209834456443787 2023-01-22 10:36:15.151591: step: 520/464, loss: 0.19032403826713562 2023-01-22 10:36:15.689629: step: 522/464, loss: 0.3677624464035034 2023-01-22 10:36:16.352504: step: 524/464, loss: 1.061833143234253 2023-01-22 10:36:16.952819: step: 526/464, loss: 0.13024339079856873 2023-01-22 10:36:17.566842: step: 528/464, loss: 0.40342581272125244 2023-01-22 10:36:18.154197: step: 530/464, loss: 0.12615080177783966 2023-01-22 10:36:18.738259: step: 532/464, loss: 0.3409889340400696 2023-01-22 10:36:19.430541: step: 534/464, loss: 0.3384639024734497 2023-01-22 10:36:20.089188: step: 536/464, loss: 0.2110147923231125 2023-01-22 10:36:20.693887: step: 538/464, loss: 2.6247472763061523 2023-01-22 10:36:21.306527: step: 540/464, loss: 0.30893072485923767 2023-01-22 10:36:21.916181: step: 542/464, loss: 0.2871909439563751 2023-01-22 10:36:22.538436: step: 544/464, loss: 0.7530099153518677 2023-01-22 10:36:23.186546: step: 546/464, loss: 0.7604941129684448 2023-01-22 10:36:23.823715: step: 548/464, loss: 0.5688288807868958 2023-01-22 10:36:24.442097: step: 550/464, loss: 0.5931585431098938 2023-01-22 10:36:25.073776: step: 552/464, loss: 0.3792456388473511 2023-01-22 10:36:25.713590: step: 554/464, loss: 0.5893974304199219 2023-01-22 10:36:26.353779: step: 556/464, loss: 0.23130621016025543 2023-01-22 10:36:27.007363: step: 558/464, loss: 1.0410833358764648 2023-01-22 10:36:27.690803: step: 560/464, loss: 0.344408243894577 2023-01-22 10:36:28.425620: step: 562/464, loss: 0.7938024401664734 2023-01-22 10:36:29.048621: step: 564/464, loss: 1.2642261981964111 2023-01-22 10:36:29.706335: step: 566/464, loss: 0.3455996811389923 2023-01-22 10:36:30.316528: step: 568/464, loss: 0.5413110256195068 2023-01-22 10:36:30.986206: step: 570/464, loss: 0.1074780598282814 2023-01-22 10:36:31.553032: step: 572/464, loss: 0.6459642648696899 2023-01-22 10:36:32.176633: step: 574/464, loss: 0.5658280849456787 2023-01-22 10:36:32.864397: step: 576/464, loss: 0.42404305934906006 2023-01-22 10:36:33.488682: step: 578/464, loss: 0.18226858973503113 2023-01-22 10:36:34.152327: step: 580/464, loss: 2.8185646533966064 2023-01-22 10:36:34.780651: step: 582/464, loss: 0.381437748670578 2023-01-22 10:36:35.503725: step: 584/464, loss: 1.764461636543274 2023-01-22 10:36:36.154807: step: 586/464, loss: 0.28118273615837097 2023-01-22 10:36:36.785939: step: 588/464, loss: 0.4842296838760376 2023-01-22 10:36:37.393089: step: 590/464, loss: 0.3451806604862213 2023-01-22 10:36:38.051822: step: 592/464, loss: 0.38013893365859985 2023-01-22 10:36:38.580177: step: 594/464, loss: 0.2910463213920593 2023-01-22 10:36:39.250045: step: 596/464, loss: 0.2851117253303528 2023-01-22 10:36:39.860942: step: 598/464, loss: 0.8281416893005371 2023-01-22 10:36:40.511867: step: 600/464, loss: 1.1513570547103882 2023-01-22 10:36:41.137807: step: 602/464, loss: 0.38009434938430786 2023-01-22 10:36:41.816981: step: 604/464, loss: 0.43365761637687683 2023-01-22 10:36:42.475330: step: 606/464, loss: 0.8094660043716431 2023-01-22 10:36:43.087323: step: 608/464, loss: 0.24837060272693634 2023-01-22 10:36:43.724993: step: 610/464, loss: 0.9438183307647705 2023-01-22 10:36:44.326550: step: 612/464, loss: 0.4907517731189728 2023-01-22 10:36:44.951324: step: 614/464, loss: 0.30295583605766296 2023-01-22 10:36:45.538367: step: 616/464, loss: 0.1532827913761139 2023-01-22 10:36:46.199400: step: 618/464, loss: 0.47530195116996765 2023-01-22 10:36:46.777626: step: 620/464, loss: 0.32910770177841187 2023-01-22 10:36:47.396749: step: 622/464, loss: 0.25522682070732117 2023-01-22 10:36:48.051455: step: 624/464, loss: 0.5116230249404907 2023-01-22 10:36:48.616710: step: 626/464, loss: 0.3476697504520416 2023-01-22 10:36:49.335979: step: 628/464, loss: 0.1793689727783203 2023-01-22 10:36:49.959482: step: 630/464, loss: 0.7922105193138123 2023-01-22 10:36:50.569536: step: 632/464, loss: 0.3281751275062561 2023-01-22 10:36:51.198821: step: 634/464, loss: 0.13465668261051178 2023-01-22 10:36:51.842182: step: 636/464, loss: 0.7905817031860352 2023-01-22 10:36:52.402074: step: 638/464, loss: 1.7722148895263672 2023-01-22 10:36:53.044501: step: 640/464, loss: 0.4341278076171875 2023-01-22 10:36:53.632666: step: 642/464, loss: 0.5807147026062012 2023-01-22 10:36:54.307694: step: 644/464, loss: 0.25977441668510437 2023-01-22 10:36:54.920972: step: 646/464, loss: 0.6894766092300415 2023-01-22 10:36:55.506576: step: 648/464, loss: 0.36047056317329407 2023-01-22 10:36:56.042485: step: 650/464, loss: 0.6689058542251587 2023-01-22 10:36:56.687250: step: 652/464, loss: 0.8208218216896057 2023-01-22 10:36:57.331090: step: 654/464, loss: 0.21951250731945038 2023-01-22 10:36:57.992717: step: 656/464, loss: 0.6928043365478516 2023-01-22 10:36:58.612078: step: 658/464, loss: 0.7265662550926208 2023-01-22 10:36:59.246452: step: 660/464, loss: 0.390576034784317 2023-01-22 10:36:59.916375: step: 662/464, loss: 0.728776752948761 2023-01-22 10:37:00.505973: step: 664/464, loss: 0.3489152193069458 2023-01-22 10:37:01.129325: step: 666/464, loss: 0.29477861523628235 2023-01-22 10:37:01.664685: step: 668/464, loss: 0.95859295129776 2023-01-22 10:37:02.253082: step: 670/464, loss: 0.4374726116657257 2023-01-22 10:37:02.849638: step: 672/464, loss: 0.29404279589653015 2023-01-22 10:37:03.450030: step: 674/464, loss: 0.20971252024173737 2023-01-22 10:37:04.054202: step: 676/464, loss: 0.49006038904190063 2023-01-22 10:37:04.744307: step: 678/464, loss: 0.49993690848350525 2023-01-22 10:37:05.411015: step: 680/464, loss: 0.0939859002828598 2023-01-22 10:37:05.992567: step: 682/464, loss: 0.21504995226860046 2023-01-22 10:37:06.597473: step: 684/464, loss: 0.27052539587020874 2023-01-22 10:37:07.229724: step: 686/464, loss: 0.4174782335758209 2023-01-22 10:37:07.853616: step: 688/464, loss: 0.28174564242362976 2023-01-22 10:37:08.442225: step: 690/464, loss: 0.7273067235946655 2023-01-22 10:37:09.042711: step: 692/464, loss: 0.2717307209968567 2023-01-22 10:37:09.685090: step: 694/464, loss: 0.5884332060813904 2023-01-22 10:37:10.279180: step: 696/464, loss: 0.15132024884223938 2023-01-22 10:37:10.972661: step: 698/464, loss: 1.1313226222991943 2023-01-22 10:37:11.628087: step: 700/464, loss: 0.2777230143547058 2023-01-22 10:37:12.333952: step: 702/464, loss: 0.385998010635376 2023-01-22 10:37:12.949948: step: 704/464, loss: 0.6033426523208618 2023-01-22 10:37:13.538798: step: 706/464, loss: 0.635158896446228 2023-01-22 10:37:14.158358: step: 708/464, loss: 0.6942959427833557 2023-01-22 10:37:14.787053: step: 710/464, loss: 0.2962024211883545 2023-01-22 10:37:15.332854: step: 712/464, loss: 0.5487346053123474 2023-01-22 10:37:15.958091: step: 714/464, loss: 1.2508326768875122 2023-01-22 10:37:16.519120: step: 716/464, loss: 0.24779725074768066 2023-01-22 10:37:17.131712: step: 718/464, loss: 0.41226813197135925 2023-01-22 10:37:17.803926: step: 720/464, loss: 0.23897162079811096 2023-01-22 10:37:18.410515: step: 722/464, loss: 2.119680404663086 2023-01-22 10:37:19.048248: step: 724/464, loss: 5.4505295753479 2023-01-22 10:37:19.659268: step: 726/464, loss: 0.42558956146240234 2023-01-22 10:37:20.336410: step: 728/464, loss: 1.017122507095337 2023-01-22 10:37:20.975690: step: 730/464, loss: 1.044908881187439 2023-01-22 10:37:21.692516: step: 732/464, loss: 0.33385512232780457 2023-01-22 10:37:22.332467: step: 734/464, loss: 0.29980790615081787 2023-01-22 10:37:22.926835: step: 736/464, loss: 0.31169456243515015 2023-01-22 10:37:23.605767: step: 738/464, loss: 0.1868811845779419 2023-01-22 10:37:24.247365: step: 740/464, loss: 0.3444088101387024 2023-01-22 10:37:24.911448: step: 742/464, loss: 0.8411497473716736 2023-01-22 10:37:25.494969: step: 744/464, loss: 0.2264484465122223 2023-01-22 10:37:26.126135: step: 746/464, loss: 0.9234243631362915 2023-01-22 10:37:26.748977: step: 748/464, loss: 0.4514959454536438 2023-01-22 10:37:27.333146: step: 750/464, loss: 0.6468408107757568 2023-01-22 10:37:27.943811: step: 752/464, loss: 1.5808587074279785 2023-01-22 10:37:28.545274: step: 754/464, loss: 0.9604551792144775 2023-01-22 10:37:29.149153: step: 756/464, loss: 0.7801356315612793 2023-01-22 10:37:29.760700: step: 758/464, loss: 0.9812237024307251 2023-01-22 10:37:30.355225: step: 760/464, loss: 1.0901546478271484 2023-01-22 10:37:30.969295: step: 762/464, loss: 0.48776379227638245 2023-01-22 10:37:31.586222: step: 764/464, loss: 0.34306395053863525 2023-01-22 10:37:32.226859: step: 766/464, loss: 0.42013850808143616 2023-01-22 10:37:32.844463: step: 768/464, loss: 0.15818996727466583 2023-01-22 10:37:33.623894: step: 770/464, loss: 0.7389655709266663 2023-01-22 10:37:34.300295: step: 772/464, loss: 0.5518840551376343 2023-01-22 10:37:34.961717: step: 774/464, loss: 0.4506901800632477 2023-01-22 10:37:35.599555: step: 776/464, loss: 0.24919630587100983 2023-01-22 10:37:36.209450: step: 778/464, loss: 0.33095237612724304 2023-01-22 10:37:36.825277: step: 780/464, loss: 0.397079735994339 2023-01-22 10:37:37.434329: step: 782/464, loss: 0.14219141006469727 2023-01-22 10:37:38.076772: step: 784/464, loss: 0.7468022108078003 2023-01-22 10:37:38.844359: step: 786/464, loss: 0.26718056201934814 2023-01-22 10:37:39.522107: step: 788/464, loss: 0.5908616781234741 2023-01-22 10:37:40.174718: step: 790/464, loss: 0.34508705139160156 2023-01-22 10:37:40.833119: step: 792/464, loss: 0.2724108099937439 2023-01-22 10:37:41.418595: step: 794/464, loss: 0.2742052376270294 2023-01-22 10:37:41.977917: step: 796/464, loss: 0.1287633329629898 2023-01-22 10:37:42.600899: step: 798/464, loss: 0.8055970668792725 2023-01-22 10:37:43.239372: step: 800/464, loss: 0.2703949511051178 2023-01-22 10:37:43.878571: step: 802/464, loss: 0.17353391647338867 2023-01-22 10:37:44.542534: step: 804/464, loss: 0.8829239010810852 2023-01-22 10:37:45.143561: step: 806/464, loss: 0.2704838216304779 2023-01-22 10:37:45.768652: step: 808/464, loss: 0.20319268107414246 2023-01-22 10:37:46.499371: step: 810/464, loss: 0.2762671709060669 2023-01-22 10:37:47.213945: step: 812/464, loss: 0.8066801428794861 2023-01-22 10:37:47.846622: step: 814/464, loss: 0.4635451138019562 2023-01-22 10:37:48.465874: step: 816/464, loss: 0.48344099521636963 2023-01-22 10:37:49.116621: step: 818/464, loss: 1.341935396194458 2023-01-22 10:37:49.706624: step: 820/464, loss: 0.2656901180744171 2023-01-22 10:37:50.305263: step: 822/464, loss: 0.21394850313663483 2023-01-22 10:37:50.939456: step: 824/464, loss: 0.6438600420951843 2023-01-22 10:37:51.566925: step: 826/464, loss: 0.5125842690467834 2023-01-22 10:37:52.228345: step: 828/464, loss: 0.7177165746688843 2023-01-22 10:37:52.857615: step: 830/464, loss: 1.0645838975906372 2023-01-22 10:37:53.496997: step: 832/464, loss: 0.374090313911438 2023-01-22 10:37:54.108616: step: 834/464, loss: 0.9594247341156006 2023-01-22 10:37:54.686324: step: 836/464, loss: 1.5267280340194702 2023-01-22 10:37:55.308810: step: 838/464, loss: 0.12863865494728088 2023-01-22 10:37:55.937520: step: 840/464, loss: 0.5739308595657349 2023-01-22 10:37:56.563887: step: 842/464, loss: 0.41546162962913513 2023-01-22 10:37:57.189155: step: 844/464, loss: 0.4548647105693817 2023-01-22 10:37:57.852100: step: 846/464, loss: 0.3953627049922943 2023-01-22 10:37:58.458859: step: 848/464, loss: 0.6522714495658875 2023-01-22 10:37:59.110251: step: 850/464, loss: 0.42764395475387573 2023-01-22 10:37:59.775122: step: 852/464, loss: 0.47073379158973694 2023-01-22 10:38:00.354341: step: 854/464, loss: 0.48062995076179504 2023-01-22 10:38:01.001710: step: 856/464, loss: 0.22322116792201996 2023-01-22 10:38:01.528437: step: 858/464, loss: 0.31415146589279175 2023-01-22 10:38:02.129745: step: 860/464, loss: 0.3948202133178711 2023-01-22 10:38:02.726317: step: 862/464, loss: 0.3536752462387085 2023-01-22 10:38:03.350004: step: 864/464, loss: 0.4739671051502228 2023-01-22 10:38:03.907577: step: 866/464, loss: 0.8376191854476929 2023-01-22 10:38:04.518016: step: 868/464, loss: 1.064820408821106 2023-01-22 10:38:05.166651: step: 870/464, loss: 0.45677515864372253 2023-01-22 10:38:05.806154: step: 872/464, loss: 0.4026552140712738 2023-01-22 10:38:06.438012: step: 874/464, loss: 0.8032701015472412 2023-01-22 10:38:07.054048: step: 876/464, loss: 0.14895497262477875 2023-01-22 10:38:07.685191: step: 878/464, loss: 1.5880987644195557 2023-01-22 10:38:08.335705: step: 880/464, loss: 0.3673778176307678 2023-01-22 10:38:08.916610: step: 882/464, loss: 0.43994849920272827 2023-01-22 10:38:09.593041: step: 884/464, loss: 1.6903603076934814 2023-01-22 10:38:10.208745: step: 886/464, loss: 0.43105217814445496 2023-01-22 10:38:10.859540: step: 888/464, loss: 0.4356974959373474 2023-01-22 10:38:11.493846: step: 890/464, loss: 0.5950191617012024 2023-01-22 10:38:12.135476: step: 892/464, loss: 0.3932482898235321 2023-01-22 10:38:12.732229: step: 894/464, loss: 0.19387339055538177 2023-01-22 10:38:13.340887: step: 896/464, loss: 0.3198925256729126 2023-01-22 10:38:13.948104: step: 898/464, loss: 0.4456925392150879 2023-01-22 10:38:14.675325: step: 900/464, loss: 0.17775744199752808 2023-01-22 10:38:15.283805: step: 902/464, loss: 0.40187326073646545 2023-01-22 10:38:15.853408: step: 904/464, loss: 0.18394802510738373 2023-01-22 10:38:16.478270: step: 906/464, loss: 0.38583412766456604 2023-01-22 10:38:17.123206: step: 908/464, loss: 0.2537674903869629 2023-01-22 10:38:17.788658: step: 910/464, loss: 0.318142294883728 2023-01-22 10:38:18.520734: step: 912/464, loss: 0.6849666833877563 2023-01-22 10:38:19.159403: step: 914/464, loss: 0.23542797565460205 2023-01-22 10:38:19.843495: step: 916/464, loss: 1.5675033330917358 2023-01-22 10:38:20.471015: step: 918/464, loss: 1.523693561553955 2023-01-22 10:38:21.049445: step: 920/464, loss: 0.32572609186172485 2023-01-22 10:38:21.654243: step: 922/464, loss: 0.35879573225975037 2023-01-22 10:38:22.279588: step: 924/464, loss: 0.34379321336746216 2023-01-22 10:38:22.932279: step: 926/464, loss: 0.3060515522956848 2023-01-22 10:38:23.549264: step: 928/464, loss: 0.1787095069885254 2023-01-22 10:38:24.055811: step: 930/464, loss: 0.1375858634710312 ================================================== Loss: 0.586 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29396313012384445, 'r': 0.3045614213427307, 'f1': 0.29916844184085567}, 'combined': 0.2204399045143147, 'epoch': 7} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3124162270893963, 'r': 0.2831541011361195, 'f1': 0.2970662968566803}, 'combined': 0.19393965494270318, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2924276728579739, 'r': 0.31795266897081415, 'f1': 0.30465646645021643}, 'combined': 0.2244837121212121, 'epoch': 7} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3221670363225695, 'r': 0.28282064896636955, 'f1': 0.3012143635446224}, 'combined': 0.19664771920529753, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30749315693430657, 'r': 0.31974620493358635, 'f1': 0.31350000000000006}, 'combined': 0.23100000000000004, 'epoch': 7} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.31591739826469223, 'r': 0.27186286199291865, 'f1': 0.2922391711758198}, 'combined': 0.19078826719250414, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2688923395445134, 'r': 0.3534013605442176, 'f1': 0.30540858318636094}, 'combined': 0.2036057221242406, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42105263157894735, 'r': 0.27586206896551724, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 7} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2662921186245096, 'r': 0.3127795473027921, 'f1': 0.28766984542508106}, 'combined': 0.21196725452374393, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2822796165112715, 'r': 0.30584607990808405, 'f1': 0.2935906887713004}, 'combined': 0.19167060510457953, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26543209876543206, 'r': 0.4095238095238095, 'f1': 0.3220973782771535}, 'combined': 0.21473158551810234, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2922339997070884, 'r': 0.315523995888678, 'f1': 0.3034327478710462}, 'combined': 0.22358202474708666, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3099448839945791, 'r': 0.28150957353636086, 'f1': 0.2950436876486859}, 'combined': 0.19261919504525607, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3382352941176471, 'r': 0.5, 'f1': 0.4035087719298246}, 'combined': 0.2017543859649123, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30749315693430657, 'r': 0.31974620493358635, 'f1': 0.31350000000000006}, 'combined': 0.23100000000000004, 'epoch': 7} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.31591739826469223, 'r': 0.27186286199291865, 'f1': 0.2922391711758198}, 'combined': 0.19078826719250414, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42105263157894735, 'r': 0.27586206896551724, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 7} ****************************** Epoch: 8 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:41:08.413488: step: 2/464, loss: 0.5256147980690002 2023-01-22 10:41:09.052060: step: 4/464, loss: 0.16271987557411194 2023-01-22 10:41:09.636036: step: 6/464, loss: 0.24483849108219147 2023-01-22 10:41:10.260503: step: 8/464, loss: 0.11545782536268234 2023-01-22 10:41:10.895222: step: 10/464, loss: 0.4342447817325592 2023-01-22 10:41:11.537584: step: 12/464, loss: 0.13910457491874695 2023-01-22 10:41:12.111549: step: 14/464, loss: 0.20827490091323853 2023-01-22 10:41:12.709380: step: 16/464, loss: 0.2899113893508911 2023-01-22 10:41:13.332983: step: 18/464, loss: 1.2569013833999634 2023-01-22 10:41:13.946130: step: 20/464, loss: 0.6739532947540283 2023-01-22 10:41:14.558112: step: 22/464, loss: 0.6435673832893372 2023-01-22 10:41:15.112969: step: 24/464, loss: 0.2875503897666931 2023-01-22 10:41:15.745454: step: 26/464, loss: 0.6059324145317078 2023-01-22 10:41:16.431494: step: 28/464, loss: 0.37544775009155273 2023-01-22 10:41:17.085658: step: 30/464, loss: 0.6912835836410522 2023-01-22 10:41:17.682366: step: 32/464, loss: 0.20572799444198608 2023-01-22 10:41:18.203196: step: 34/464, loss: 0.49429890513420105 2023-01-22 10:41:18.882142: step: 36/464, loss: 0.45233234763145447 2023-01-22 10:41:19.562777: step: 38/464, loss: 0.16969938576221466 2023-01-22 10:41:20.227636: step: 40/464, loss: 0.11263077706098557 2023-01-22 10:41:20.854506: step: 42/464, loss: 0.5201073288917542 2023-01-22 10:41:21.519508: step: 44/464, loss: 0.3596522808074951 2023-01-22 10:41:22.106950: step: 46/464, loss: 0.4956699013710022 2023-01-22 10:41:22.789735: step: 48/464, loss: 0.17830289900302887 2023-01-22 10:41:23.409804: step: 50/464, loss: 0.30615612864494324 2023-01-22 10:41:24.001350: step: 52/464, loss: 0.20722505450248718 2023-01-22 10:41:24.637954: step: 54/464, loss: 0.13355082273483276 2023-01-22 10:41:25.247460: step: 56/464, loss: 0.3708045184612274 2023-01-22 10:41:25.890408: step: 58/464, loss: 0.3382740020751953 2023-01-22 10:41:26.484542: step: 60/464, loss: 0.2672390639781952 2023-01-22 10:41:27.159886: step: 62/464, loss: 0.3419777750968933 2023-01-22 10:41:27.802304: step: 64/464, loss: 0.22744600474834442 2023-01-22 10:41:28.425212: step: 66/464, loss: 0.5304995179176331 2023-01-22 10:41:29.034212: step: 68/464, loss: 0.5001859068870544 2023-01-22 10:41:29.643128: step: 70/464, loss: 1.7130024433135986 2023-01-22 10:41:30.231252: step: 72/464, loss: 0.48129764199256897 2023-01-22 10:41:30.842189: step: 74/464, loss: 0.40763619542121887 2023-01-22 10:41:31.491263: step: 76/464, loss: 0.3825840950012207 2023-01-22 10:41:32.143259: step: 78/464, loss: 0.35622477531433105 2023-01-22 10:41:32.696533: step: 80/464, loss: 3.721479654312134 2023-01-22 10:41:33.329458: step: 82/464, loss: 1.761260986328125 2023-01-22 10:41:34.052013: step: 84/464, loss: 0.5433917045593262 2023-01-22 10:41:34.661782: step: 86/464, loss: 3.278985023498535 2023-01-22 10:41:35.243050: step: 88/464, loss: 0.36821499466896057 2023-01-22 10:41:35.869376: step: 90/464, loss: 0.6947041749954224 2023-01-22 10:41:36.532349: step: 92/464, loss: 0.18669536709785461 2023-01-22 10:41:37.115352: step: 94/464, loss: 0.603237509727478 2023-01-22 10:41:37.755813: step: 96/464, loss: 0.5378149747848511 2023-01-22 10:41:38.379953: step: 98/464, loss: 0.9626664519309998 2023-01-22 10:41:38.973818: step: 100/464, loss: 0.14888140559196472 2023-01-22 10:41:39.608181: step: 102/464, loss: 0.12726211547851562 2023-01-22 10:41:40.201473: step: 104/464, loss: 0.18909582495689392 2023-01-22 10:41:40.805225: step: 106/464, loss: 0.2111038863658905 2023-01-22 10:41:41.390556: step: 108/464, loss: 0.28433895111083984 2023-01-22 10:41:41.985694: step: 110/464, loss: 0.1759897619485855 2023-01-22 10:41:42.516284: step: 112/464, loss: 1.5583981275558472 2023-01-22 10:41:43.131820: step: 114/464, loss: 6.815537929534912 2023-01-22 10:41:43.720273: step: 116/464, loss: 1.2100276947021484 2023-01-22 10:41:44.323088: step: 118/464, loss: 0.5964804887771606 2023-01-22 10:41:44.987751: step: 120/464, loss: 0.2303905189037323 2023-01-22 10:41:45.557185: step: 122/464, loss: 0.14260776340961456 2023-01-22 10:41:46.123537: step: 124/464, loss: 0.147199809551239 2023-01-22 10:41:46.697322: step: 126/464, loss: 0.3138817846775055 2023-01-22 10:41:47.320482: step: 128/464, loss: 0.6843975186347961 2023-01-22 10:41:47.939694: step: 130/464, loss: 0.1680045872926712 2023-01-22 10:41:48.585625: step: 132/464, loss: 0.20160116255283356 2023-01-22 10:41:49.295844: step: 134/464, loss: 0.4476246237754822 2023-01-22 10:41:49.962380: step: 136/464, loss: 0.7357347011566162 2023-01-22 10:41:50.593574: step: 138/464, loss: 0.2661999464035034 2023-01-22 10:41:51.219850: step: 140/464, loss: 0.5534363985061646 2023-01-22 10:41:51.874840: step: 142/464, loss: 0.1794956773519516 2023-01-22 10:41:52.486202: step: 144/464, loss: 0.22185489535331726 2023-01-22 10:41:53.186160: step: 146/464, loss: 0.771228551864624 2023-01-22 10:41:53.815535: step: 148/464, loss: 1.5506646633148193 2023-01-22 10:41:54.478163: step: 150/464, loss: 0.5256957411766052 2023-01-22 10:41:55.155409: step: 152/464, loss: 0.30285948514938354 2023-01-22 10:41:55.836191: step: 154/464, loss: 0.6013203263282776 2023-01-22 10:41:56.480006: step: 156/464, loss: 0.38056620955467224 2023-01-22 10:41:57.113158: step: 158/464, loss: 0.1869347244501114 2023-01-22 10:41:57.768502: step: 160/464, loss: 0.6900219321250916 2023-01-22 10:41:58.424291: step: 162/464, loss: 0.124812051653862 2023-01-22 10:41:58.992033: step: 164/464, loss: 0.5109988451004028 2023-01-22 10:41:59.621015: step: 166/464, loss: 0.2805618643760681 2023-01-22 10:42:00.230298: step: 168/464, loss: 0.6953806281089783 2023-01-22 10:42:00.820464: step: 170/464, loss: 1.1952989101409912 2023-01-22 10:42:01.443772: step: 172/464, loss: 0.3715342581272125 2023-01-22 10:42:02.019905: step: 174/464, loss: 0.294126957654953 2023-01-22 10:42:02.712983: step: 176/464, loss: 0.9353902339935303 2023-01-22 10:42:03.362852: step: 178/464, loss: 0.13832412660121918 2023-01-22 10:42:04.081615: step: 180/464, loss: 0.29111751914024353 2023-01-22 10:42:04.660830: step: 182/464, loss: 0.9309651851654053 2023-01-22 10:42:05.312342: step: 184/464, loss: 0.5732741355895996 2023-01-22 10:42:05.941836: step: 186/464, loss: 2.244755744934082 2023-01-22 10:42:06.530878: step: 188/464, loss: 0.6004652976989746 2023-01-22 10:42:07.162770: step: 190/464, loss: 0.3898637592792511 2023-01-22 10:42:07.960764: step: 192/464, loss: 0.27575916051864624 2023-01-22 10:42:08.608816: step: 194/464, loss: 0.189696803689003 2023-01-22 10:42:09.283840: step: 196/464, loss: 0.34838491678237915 2023-01-22 10:42:09.910040: step: 198/464, loss: 0.3968013525009155 2023-01-22 10:42:10.538783: step: 200/464, loss: 0.3276688754558563 2023-01-22 10:42:11.135434: step: 202/464, loss: 0.22431330382823944 2023-01-22 10:42:11.825475: step: 204/464, loss: 0.18088360130786896 2023-01-22 10:42:12.423044: step: 206/464, loss: 0.3663378953933716 2023-01-22 10:42:13.039807: step: 208/464, loss: 0.7812042832374573 2023-01-22 10:42:13.702346: step: 210/464, loss: 0.11790280789136887 2023-01-22 10:42:14.311673: step: 212/464, loss: 0.6330567002296448 2023-01-22 10:42:14.888023: step: 214/464, loss: 0.20008674263954163 2023-01-22 10:42:15.492326: step: 216/464, loss: 0.29765304923057556 2023-01-22 10:42:16.120023: step: 218/464, loss: 0.4665645658969879 2023-01-22 10:42:16.817657: step: 220/464, loss: 1.2618739604949951 2023-01-22 10:42:17.451460: step: 222/464, loss: 0.2906162142753601 2023-01-22 10:42:18.054090: step: 224/464, loss: 0.20724472403526306 2023-01-22 10:42:18.653181: step: 226/464, loss: 0.3578889071941376 2023-01-22 10:42:19.250276: step: 228/464, loss: 0.4452539086341858 2023-01-22 10:42:19.903229: step: 230/464, loss: 0.19588086009025574 2023-01-22 10:42:20.550676: step: 232/464, loss: 0.6037926077842712 2023-01-22 10:42:21.233479: step: 234/464, loss: 4.56406307220459 2023-01-22 10:42:21.940631: step: 236/464, loss: 0.6469090580940247 2023-01-22 10:42:22.559433: step: 238/464, loss: 0.6941478848457336 2023-01-22 10:42:23.220769: step: 240/464, loss: 0.3625757694244385 2023-01-22 10:42:23.851471: step: 242/464, loss: 0.6837671399116516 2023-01-22 10:42:24.549301: step: 244/464, loss: 0.07192370295524597 2023-01-22 10:42:25.187162: step: 246/464, loss: 0.5115048289299011 2023-01-22 10:42:25.771287: step: 248/464, loss: 0.26653966307640076 2023-01-22 10:42:26.400825: step: 250/464, loss: 0.12437698245048523 2023-01-22 10:42:27.022931: step: 252/464, loss: 0.18795190751552582 2023-01-22 10:42:27.624948: step: 254/464, loss: 0.47079092264175415 2023-01-22 10:42:28.209545: step: 256/464, loss: 0.10718715935945511 2023-01-22 10:42:28.869582: step: 258/464, loss: 0.1507108211517334 2023-01-22 10:42:29.458125: step: 260/464, loss: 2.347313642501831 2023-01-22 10:42:30.157736: step: 262/464, loss: 1.1006702184677124 2023-01-22 10:42:30.743361: step: 264/464, loss: 0.4398345947265625 2023-01-22 10:42:31.411652: step: 266/464, loss: 0.6085986495018005 2023-01-22 10:42:32.089331: step: 268/464, loss: 0.2565450966358185 2023-01-22 10:42:32.727184: step: 270/464, loss: 0.5254449844360352 2023-01-22 10:42:33.423653: step: 272/464, loss: 0.1736554354429245 2023-01-22 10:42:34.134681: step: 274/464, loss: 0.44962701201438904 2023-01-22 10:42:34.791248: step: 276/464, loss: 0.3792150914669037 2023-01-22 10:42:35.400279: step: 278/464, loss: 0.26783522963523865 2023-01-22 10:42:36.012437: step: 280/464, loss: 0.34746065735816956 2023-01-22 10:42:36.614960: step: 282/464, loss: 0.8172089457511902 2023-01-22 10:42:37.272494: step: 284/464, loss: 0.23747019469738007 2023-01-22 10:42:37.910813: step: 286/464, loss: 0.9747133255004883 2023-01-22 10:42:38.533449: step: 288/464, loss: 0.45197030901908875 2023-01-22 10:42:39.175182: step: 290/464, loss: 0.4176959693431854 2023-01-22 10:42:39.783916: step: 292/464, loss: 0.1349785178899765 2023-01-22 10:42:40.318326: step: 294/464, loss: 0.23899558186531067 2023-01-22 10:42:40.900012: step: 296/464, loss: 0.6888447999954224 2023-01-22 10:42:41.499397: step: 298/464, loss: 0.30062979459762573 2023-01-22 10:42:42.240510: step: 300/464, loss: 0.16735124588012695 2023-01-22 10:42:42.803071: step: 302/464, loss: 0.4858199954032898 2023-01-22 10:42:43.402077: step: 304/464, loss: 0.8488729000091553 2023-01-22 10:42:44.011387: step: 306/464, loss: 0.36384105682373047 2023-01-22 10:42:44.638128: step: 308/464, loss: 0.23121513426303864 2023-01-22 10:42:45.273204: step: 310/464, loss: 0.2734513282775879 2023-01-22 10:42:45.889586: step: 312/464, loss: 0.17578211426734924 2023-01-22 10:42:46.501955: step: 314/464, loss: 0.3193581998348236 2023-01-22 10:42:47.227938: step: 316/464, loss: 0.4633854031562805 2023-01-22 10:42:47.839151: step: 318/464, loss: 0.3597419857978821 2023-01-22 10:42:48.439217: step: 320/464, loss: 0.1768357753753662 2023-01-22 10:42:49.037956: step: 322/464, loss: 0.9580966234207153 2023-01-22 10:42:49.623771: step: 324/464, loss: 0.8831745386123657 2023-01-22 10:42:50.223749: step: 326/464, loss: 0.656175971031189 2023-01-22 10:42:50.886011: step: 328/464, loss: 0.12558133900165558 2023-01-22 10:42:51.555021: step: 330/464, loss: 0.2244042605161667 2023-01-22 10:42:52.129503: step: 332/464, loss: 0.1423230618238449 2023-01-22 10:42:52.745607: step: 334/464, loss: 0.2261742502450943 2023-01-22 10:42:53.413379: step: 336/464, loss: 0.16952291131019592 2023-01-22 10:42:54.088459: step: 338/464, loss: 0.2626737654209137 2023-01-22 10:42:54.720092: step: 340/464, loss: 0.9168149828910828 2023-01-22 10:42:55.314460: step: 342/464, loss: 0.3371291756629944 2023-01-22 10:42:55.930209: step: 344/464, loss: 0.5078762769699097 2023-01-22 10:42:56.562736: step: 346/464, loss: 0.5057971477508545 2023-01-22 10:42:57.162443: step: 348/464, loss: 0.5341809391975403 2023-01-22 10:42:57.763657: step: 350/464, loss: 0.07459886372089386 2023-01-22 10:42:58.324925: step: 352/464, loss: 0.1680305004119873 2023-01-22 10:42:58.937354: step: 354/464, loss: 0.21369802951812744 2023-01-22 10:42:59.553271: step: 356/464, loss: 0.4864957630634308 2023-01-22 10:43:00.247064: step: 358/464, loss: 0.8802257180213928 2023-01-22 10:43:00.866222: step: 360/464, loss: 0.188669815659523 2023-01-22 10:43:01.543105: step: 362/464, loss: 0.14713504910469055 2023-01-22 10:43:02.188741: step: 364/464, loss: 0.2680954337120056 2023-01-22 10:43:02.820575: step: 366/464, loss: 0.4230412542819977 2023-01-22 10:43:03.441015: step: 368/464, loss: 0.41321080923080444 2023-01-22 10:43:04.126174: step: 370/464, loss: 0.3964707851409912 2023-01-22 10:43:04.724808: step: 372/464, loss: 0.5696877837181091 2023-01-22 10:43:05.365641: step: 374/464, loss: 0.30090492963790894 2023-01-22 10:43:05.950039: step: 376/464, loss: 1.4616659879684448 2023-01-22 10:43:06.575942: step: 378/464, loss: 0.12314710021018982 2023-01-22 10:43:07.237363: step: 380/464, loss: 0.4488513171672821 2023-01-22 10:43:07.908133: step: 382/464, loss: 0.23523855209350586 2023-01-22 10:43:08.516116: step: 384/464, loss: 0.3382420241832733 2023-01-22 10:43:09.205333: step: 386/464, loss: 0.24432437121868134 2023-01-22 10:43:09.859463: step: 388/464, loss: 0.6148150563240051 2023-01-22 10:43:10.463898: step: 390/464, loss: 0.11040592193603516 2023-01-22 10:43:11.075630: step: 392/464, loss: 0.5171048045158386 2023-01-22 10:43:11.715990: step: 394/464, loss: 0.631478488445282 2023-01-22 10:43:12.378125: step: 396/464, loss: 1.022544264793396 2023-01-22 10:43:13.060066: step: 398/464, loss: 0.0752885639667511 2023-01-22 10:43:13.670236: step: 400/464, loss: 0.37636426091194153 2023-01-22 10:43:14.261829: step: 402/464, loss: 0.8598151206970215 2023-01-22 10:43:14.895593: step: 404/464, loss: 0.5534162521362305 2023-01-22 10:43:15.538422: step: 406/464, loss: 0.7628689408302307 2023-01-22 10:43:16.138319: step: 408/464, loss: 0.41472098231315613 2023-01-22 10:43:16.768033: step: 410/464, loss: 0.31681686639785767 2023-01-22 10:43:17.350074: step: 412/464, loss: 0.6400054097175598 2023-01-22 10:43:17.986734: step: 414/464, loss: 0.48053276538848877 2023-01-22 10:43:18.646717: step: 416/464, loss: 0.07062732428312302 2023-01-22 10:43:19.260780: step: 418/464, loss: 0.37162572145462036 2023-01-22 10:43:19.878629: step: 420/464, loss: 0.10916796326637268 2023-01-22 10:43:20.512196: step: 422/464, loss: 0.29724493622779846 2023-01-22 10:43:21.160210: step: 424/464, loss: 0.16576127707958221 2023-01-22 10:43:21.926054: step: 426/464, loss: 0.15553560853004456 2023-01-22 10:43:22.500158: step: 428/464, loss: 0.4601963460445404 2023-01-22 10:43:23.078105: step: 430/464, loss: 0.9218921065330505 2023-01-22 10:43:23.707585: step: 432/464, loss: 1.5235856771469116 2023-01-22 10:43:24.366289: step: 434/464, loss: 0.3329862654209137 2023-01-22 10:43:24.995879: step: 436/464, loss: 0.2956153154373169 2023-01-22 10:43:25.629034: step: 438/464, loss: 0.7929648756980896 2023-01-22 10:43:26.274233: step: 440/464, loss: 0.24007189273834229 2023-01-22 10:43:26.921182: step: 442/464, loss: 0.4005393981933594 2023-01-22 10:43:27.563621: step: 444/464, loss: 1.0202969312667847 2023-01-22 10:43:28.213137: step: 446/464, loss: 0.5497901439666748 2023-01-22 10:43:28.844091: step: 448/464, loss: 0.406026691198349 2023-01-22 10:43:29.454432: step: 450/464, loss: 0.4039490818977356 2023-01-22 10:43:30.106028: step: 452/464, loss: 0.6189019083976746 2023-01-22 10:43:30.680066: step: 454/464, loss: 0.8497124910354614 2023-01-22 10:43:31.289464: step: 456/464, loss: 0.23253557085990906 2023-01-22 10:43:31.831596: step: 458/464, loss: 0.254323273897171 2023-01-22 10:43:32.470889: step: 460/464, loss: 0.2543869912624359 2023-01-22 10:43:33.132997: step: 462/464, loss: 1.3793638944625854 2023-01-22 10:43:33.707405: step: 464/464, loss: 0.15979808568954468 2023-01-22 10:43:34.304833: step: 466/464, loss: 0.3608567714691162 2023-01-22 10:43:34.956567: step: 468/464, loss: 0.2662655711174011 2023-01-22 10:43:35.649389: step: 470/464, loss: 0.15268206596374512 2023-01-22 10:43:36.292204: step: 472/464, loss: 0.4905347526073456 2023-01-22 10:43:36.917656: step: 474/464, loss: 1.036557912826538 2023-01-22 10:43:37.531189: step: 476/464, loss: 0.8116432428359985 2023-01-22 10:43:38.184290: step: 478/464, loss: 0.47300270199775696 2023-01-22 10:43:39.034298: step: 480/464, loss: 0.6200789213180542 2023-01-22 10:43:39.644576: step: 482/464, loss: 0.12582731246948242 2023-01-22 10:43:40.315807: step: 484/464, loss: 0.2855589985847473 2023-01-22 10:43:40.990595: step: 486/464, loss: 1.0768077373504639 2023-01-22 10:43:41.692674: step: 488/464, loss: 0.8582307696342468 2023-01-22 10:43:42.400116: step: 490/464, loss: 0.1929677426815033 2023-01-22 10:43:43.046008: step: 492/464, loss: 0.7221046090126038 2023-01-22 10:43:43.632954: step: 494/464, loss: 0.2424413561820984 2023-01-22 10:43:44.318490: step: 496/464, loss: 0.3820532560348511 2023-01-22 10:43:44.963722: step: 498/464, loss: 0.6332082152366638 2023-01-22 10:43:45.556347: step: 500/464, loss: 0.4443873465061188 2023-01-22 10:43:46.192803: step: 502/464, loss: 0.9196009039878845 2023-01-22 10:43:46.721833: step: 504/464, loss: 0.22076547145843506 2023-01-22 10:43:47.372731: step: 506/464, loss: 0.778414249420166 2023-01-22 10:43:47.996521: step: 508/464, loss: 0.3775312900543213 2023-01-22 10:43:48.580444: step: 510/464, loss: 0.14615599811077118 2023-01-22 10:43:49.189548: step: 512/464, loss: 0.1535091996192932 2023-01-22 10:43:49.800132: step: 514/464, loss: 0.9353958368301392 2023-01-22 10:43:50.502829: step: 516/464, loss: 0.7229629158973694 2023-01-22 10:43:51.220031: step: 518/464, loss: 1.0084935426712036 2023-01-22 10:43:51.821523: step: 520/464, loss: 0.3375189006328583 2023-01-22 10:43:52.487200: step: 522/464, loss: 0.3442927896976471 2023-01-22 10:43:53.164103: step: 524/464, loss: 0.19687649607658386 2023-01-22 10:43:53.823670: step: 526/464, loss: 0.8136903047561646 2023-01-22 10:43:54.478191: step: 528/464, loss: 1.1159623861312866 2023-01-22 10:43:55.146617: step: 530/464, loss: 0.4194756746292114 2023-01-22 10:43:55.794071: step: 532/464, loss: 0.1660752147436142 2023-01-22 10:43:56.409566: step: 534/464, loss: 0.29503533244132996 2023-01-22 10:43:57.047564: step: 536/464, loss: 0.4381676912307739 2023-01-22 10:43:57.706421: step: 538/464, loss: 0.46901679039001465 2023-01-22 10:43:58.473937: step: 540/464, loss: 0.5324099063873291 2023-01-22 10:43:59.088219: step: 542/464, loss: 0.4037749469280243 2023-01-22 10:43:59.813681: step: 544/464, loss: 0.383460134267807 2023-01-22 10:44:00.499785: step: 546/464, loss: 0.6488150954246521 2023-01-22 10:44:01.097547: step: 548/464, loss: 0.15714922547340393 2023-01-22 10:44:01.784855: step: 550/464, loss: 2.6088953018188477 2023-01-22 10:44:02.413600: step: 552/464, loss: 0.14634662866592407 2023-01-22 10:44:03.012604: step: 554/464, loss: 0.2663712501525879 2023-01-22 10:44:03.643931: step: 556/464, loss: 0.31006959080696106 2023-01-22 10:44:04.270367: step: 558/464, loss: 0.552808403968811 2023-01-22 10:44:04.850556: step: 560/464, loss: 0.6006173491477966 2023-01-22 10:44:05.448079: step: 562/464, loss: 0.5467162132263184 2023-01-22 10:44:06.030106: step: 564/464, loss: 0.3103739023208618 2023-01-22 10:44:06.713449: step: 566/464, loss: 0.3589913249015808 2023-01-22 10:44:07.337205: step: 568/464, loss: 0.2639947831630707 2023-01-22 10:44:07.969243: step: 570/464, loss: 1.988520622253418 2023-01-22 10:44:08.568688: step: 572/464, loss: 0.44152554869651794 2023-01-22 10:44:09.194351: step: 574/464, loss: 0.4914637804031372 2023-01-22 10:44:09.830461: step: 576/464, loss: 0.22426076233386993 2023-01-22 10:44:10.450400: step: 578/464, loss: 0.79241943359375 2023-01-22 10:44:11.082033: step: 580/464, loss: 0.28034767508506775 2023-01-22 10:44:11.755249: step: 582/464, loss: 0.535868227481842 2023-01-22 10:44:12.449340: step: 584/464, loss: 0.3306492567062378 2023-01-22 10:44:13.063118: step: 586/464, loss: 0.3533237874507904 2023-01-22 10:44:13.708878: step: 588/464, loss: 0.10611508041620255 2023-01-22 10:44:14.301031: step: 590/464, loss: 0.26176005601882935 2023-01-22 10:44:14.952486: step: 592/464, loss: 0.20076461136341095 2023-01-22 10:44:15.594111: step: 594/464, loss: 0.46523594856262207 2023-01-22 10:44:16.297542: step: 596/464, loss: 0.32261672616004944 2023-01-22 10:44:16.953109: step: 598/464, loss: 0.2716301381587982 2023-01-22 10:44:17.548489: step: 600/464, loss: 0.3709212839603424 2023-01-22 10:44:18.156140: step: 602/464, loss: 0.5088427662849426 2023-01-22 10:44:18.785992: step: 604/464, loss: 2.6977601051330566 2023-01-22 10:44:19.337098: step: 606/464, loss: 0.16669407486915588 2023-01-22 10:44:20.003997: step: 608/464, loss: 0.5212874412536621 2023-01-22 10:44:20.629548: step: 610/464, loss: 0.37637704610824585 2023-01-22 10:44:21.266654: step: 612/464, loss: 1.7240705490112305 2023-01-22 10:44:21.862301: step: 614/464, loss: 0.829563558101654 2023-01-22 10:44:22.458078: step: 616/464, loss: 0.166648268699646 2023-01-22 10:44:23.138529: step: 618/464, loss: 0.1423303782939911 2023-01-22 10:44:23.826610: step: 620/464, loss: 0.16027969121932983 2023-01-22 10:44:24.469790: step: 622/464, loss: 0.24504999816417694 2023-01-22 10:44:25.042964: step: 624/464, loss: 0.2669730484485626 2023-01-22 10:44:25.688898: step: 626/464, loss: 0.367501437664032 2023-01-22 10:44:26.289518: step: 628/464, loss: 0.1951684206724167 2023-01-22 10:44:26.911422: step: 630/464, loss: 0.29548734426498413 2023-01-22 10:44:27.519693: step: 632/464, loss: 0.10060364007949829 2023-01-22 10:44:28.172679: step: 634/464, loss: 0.2757560610771179 2023-01-22 10:44:28.870902: step: 636/464, loss: 0.1910465508699417 2023-01-22 10:44:29.471493: step: 638/464, loss: 0.4311179220676422 2023-01-22 10:44:30.117669: step: 640/464, loss: 0.45955127477645874 2023-01-22 10:44:30.715032: step: 642/464, loss: 0.5652684569358826 2023-01-22 10:44:31.376337: step: 644/464, loss: 1.5861278772354126 2023-01-22 10:44:31.954199: step: 646/464, loss: 1.1334205865859985 2023-01-22 10:44:32.582034: step: 648/464, loss: 0.27795305848121643 2023-01-22 10:44:33.165982: step: 650/464, loss: 0.3862375319004059 2023-01-22 10:44:33.791689: step: 652/464, loss: 1.0548150539398193 2023-01-22 10:44:34.418713: step: 654/464, loss: 0.6420807242393494 2023-01-22 10:44:35.076231: step: 656/464, loss: 0.45009008049964905 2023-01-22 10:44:35.727032: step: 658/464, loss: 0.5131018161773682 2023-01-22 10:44:36.366317: step: 660/464, loss: 0.24536296725273132 2023-01-22 10:44:37.005346: step: 662/464, loss: 0.5035173296928406 2023-01-22 10:44:37.647362: step: 664/464, loss: 0.7572827935218811 2023-01-22 10:44:38.291646: step: 666/464, loss: 0.17303897440433502 2023-01-22 10:44:38.882120: step: 668/464, loss: 0.5711219310760498 2023-01-22 10:44:39.496590: step: 670/464, loss: 0.6885030269622803 2023-01-22 10:44:40.107445: step: 672/464, loss: 0.770183801651001 2023-01-22 10:44:40.778151: step: 674/464, loss: 0.20264945924282074 2023-01-22 10:44:41.412234: step: 676/464, loss: 0.25271549820899963 2023-01-22 10:44:42.065490: step: 678/464, loss: 0.26536649465560913 2023-01-22 10:44:42.662084: step: 680/464, loss: 0.1744324117898941 2023-01-22 10:44:43.268203: step: 682/464, loss: 0.25122275948524475 2023-01-22 10:44:43.911582: step: 684/464, loss: 0.5525637269020081 2023-01-22 10:44:44.542218: step: 686/464, loss: 0.37694841623306274 2023-01-22 10:44:45.132684: step: 688/464, loss: 1.0671063661575317 2023-01-22 10:44:45.779683: step: 690/464, loss: 0.2689855098724365 2023-01-22 10:44:46.442444: step: 692/464, loss: 0.2012881338596344 2023-01-22 10:44:47.034245: step: 694/464, loss: 0.6586238145828247 2023-01-22 10:44:47.638850: step: 696/464, loss: 2.0456178188323975 2023-01-22 10:44:48.250273: step: 698/464, loss: 0.3495972454547882 2023-01-22 10:44:48.845784: step: 700/464, loss: 0.22533591091632843 2023-01-22 10:44:49.496120: step: 702/464, loss: 0.6673823595046997 2023-01-22 10:44:50.078299: step: 704/464, loss: 0.31505724787712097 2023-01-22 10:44:50.644994: step: 706/464, loss: 0.17165252566337585 2023-01-22 10:44:51.218526: step: 708/464, loss: 1.0771108865737915 2023-01-22 10:44:51.893738: step: 710/464, loss: 0.32624274492263794 2023-01-22 10:44:52.499863: step: 712/464, loss: 0.19555053114891052 2023-01-22 10:44:53.246283: step: 714/464, loss: 0.499306321144104 2023-01-22 10:44:53.839612: step: 716/464, loss: 0.431405246257782 2023-01-22 10:44:54.496114: step: 718/464, loss: 0.19072787463665009 2023-01-22 10:44:55.119863: step: 720/464, loss: 0.16556185483932495 2023-01-22 10:44:55.819837: step: 722/464, loss: 0.5962976217269897 2023-01-22 10:44:56.482972: step: 724/464, loss: 0.297981321811676 2023-01-22 10:44:57.063446: step: 726/464, loss: 0.13247878849506378 2023-01-22 10:44:57.687769: step: 728/464, loss: 0.3829472064971924 2023-01-22 10:44:58.322094: step: 730/464, loss: 0.09066735953092575 2023-01-22 10:44:59.009897: step: 732/464, loss: 0.6439650058746338 2023-01-22 10:44:59.647744: step: 734/464, loss: 0.20518773794174194 2023-01-22 10:45:00.297159: step: 736/464, loss: 0.6721473336219788 2023-01-22 10:45:00.871842: step: 738/464, loss: 1.1977266073226929 2023-01-22 10:45:01.482209: step: 740/464, loss: 0.23774825036525726 2023-01-22 10:45:02.143919: step: 742/464, loss: 0.28592649102211 2023-01-22 10:45:02.819670: step: 744/464, loss: 0.3035857081413269 2023-01-22 10:45:03.495736: step: 746/464, loss: 0.8129310011863708 2023-01-22 10:45:04.160194: step: 748/464, loss: 0.3011573851108551 2023-01-22 10:45:04.787260: step: 750/464, loss: 0.3996354043483734 2023-01-22 10:45:05.440551: step: 752/464, loss: 0.3707866668701172 2023-01-22 10:45:06.130913: step: 754/464, loss: 0.1652347296476364 2023-01-22 10:45:06.751390: step: 756/464, loss: 0.18049731850624084 2023-01-22 10:45:07.438843: step: 758/464, loss: 1.1612261533737183 2023-01-22 10:45:08.052492: step: 760/464, loss: 0.2037789523601532 2023-01-22 10:45:08.711459: step: 762/464, loss: 0.2417970597743988 2023-01-22 10:45:09.354955: step: 764/464, loss: 0.2190207988023758 2023-01-22 10:45:10.088040: step: 766/464, loss: 0.7531817555427551 2023-01-22 10:45:10.691119: step: 768/464, loss: 0.36380305886268616 2023-01-22 10:45:11.327218: step: 770/464, loss: 0.15014781057834625 2023-01-22 10:45:11.984762: step: 772/464, loss: 0.1852748692035675 2023-01-22 10:45:12.632865: step: 774/464, loss: 0.6573358178138733 2023-01-22 10:45:13.306864: step: 776/464, loss: 0.5150532126426697 2023-01-22 10:45:13.961496: step: 778/464, loss: 0.539428174495697 2023-01-22 10:45:14.561431: step: 780/464, loss: 0.07932940870523453 2023-01-22 10:45:15.158231: step: 782/464, loss: 0.27510306239128113 2023-01-22 10:45:15.807356: step: 784/464, loss: 0.2898877263069153 2023-01-22 10:45:16.394488: step: 786/464, loss: 0.16442044079303741 2023-01-22 10:45:17.062105: step: 788/464, loss: 0.24723902344703674 2023-01-22 10:45:17.672111: step: 790/464, loss: 0.6519625782966614 2023-01-22 10:45:18.299197: step: 792/464, loss: 0.451835036277771 2023-01-22 10:45:18.940034: step: 794/464, loss: 0.7727434635162354 2023-01-22 10:45:19.584373: step: 796/464, loss: 4.483155250549316 2023-01-22 10:45:20.204238: step: 798/464, loss: 0.3991261124610901 2023-01-22 10:45:20.825900: step: 800/464, loss: 1.022581696510315 2023-01-22 10:45:21.447299: step: 802/464, loss: 0.5598032474517822 2023-01-22 10:45:22.077212: step: 804/464, loss: 0.21302887797355652 2023-01-22 10:45:22.740492: step: 806/464, loss: 0.4852057099342346 2023-01-22 10:45:23.320095: step: 808/464, loss: 0.5502801537513733 2023-01-22 10:45:23.991777: step: 810/464, loss: 0.3856951892375946 2023-01-22 10:45:24.614535: step: 812/464, loss: 0.41724491119384766 2023-01-22 10:45:25.234533: step: 814/464, loss: 0.4339942932128906 2023-01-22 10:45:25.868730: step: 816/464, loss: 0.19563280045986176 2023-01-22 10:45:26.543567: step: 818/464, loss: 0.13324302434921265 2023-01-22 10:45:27.210596: step: 820/464, loss: 0.41319727897644043 2023-01-22 10:45:27.812090: step: 822/464, loss: 0.867550253868103 2023-01-22 10:45:28.397973: step: 824/464, loss: 0.7590504884719849 2023-01-22 10:45:29.025255: step: 826/464, loss: 1.368444800376892 2023-01-22 10:45:29.632264: step: 828/464, loss: 0.4613424241542816 2023-01-22 10:45:30.362968: step: 830/464, loss: 0.18761426210403442 2023-01-22 10:45:30.947005: step: 832/464, loss: 0.1387253850698471 2023-01-22 10:45:31.591618: step: 834/464, loss: 0.35486987233161926 2023-01-22 10:45:32.211531: step: 836/464, loss: 0.6135989427566528 2023-01-22 10:45:32.840782: step: 838/464, loss: 0.1506817638874054 2023-01-22 10:45:33.462517: step: 840/464, loss: 0.6843259334564209 2023-01-22 10:45:34.124591: step: 842/464, loss: 0.41133636236190796 2023-01-22 10:45:34.783900: step: 844/464, loss: 0.3200354278087616 2023-01-22 10:45:35.372884: step: 846/464, loss: 1.0006072521209717 2023-01-22 10:45:36.056692: step: 848/464, loss: 0.16702058911323547 2023-01-22 10:45:36.631035: step: 850/464, loss: 0.9346050024032593 2023-01-22 10:45:37.274651: step: 852/464, loss: 0.42332419753074646 2023-01-22 10:45:37.906298: step: 854/464, loss: 0.4530613422393799 2023-01-22 10:45:38.587296: step: 856/464, loss: 0.6461949348449707 2023-01-22 10:45:39.309549: step: 858/464, loss: 0.7953711748123169 2023-01-22 10:45:39.874477: step: 860/464, loss: 0.26844945549964905 2023-01-22 10:45:40.450993: step: 862/464, loss: 0.5950400233268738 2023-01-22 10:45:41.049466: step: 864/464, loss: 0.17732596397399902 2023-01-22 10:45:41.680330: step: 866/464, loss: 0.12965376675128937 2023-01-22 10:45:42.303540: step: 868/464, loss: 0.29925432801246643 2023-01-22 10:45:42.889452: step: 870/464, loss: 0.30027610063552856 2023-01-22 10:45:43.591291: step: 872/464, loss: 0.12318747490644455 2023-01-22 10:45:44.202317: step: 874/464, loss: 0.768347442150116 2023-01-22 10:45:44.866094: step: 876/464, loss: 0.46050161123275757 2023-01-22 10:45:45.489049: step: 878/464, loss: 0.492687463760376 2023-01-22 10:45:46.111692: step: 880/464, loss: 0.7648109793663025 2023-01-22 10:45:46.712953: step: 882/464, loss: 0.16960103809833527 2023-01-22 10:45:47.304361: step: 884/464, loss: 0.229720339179039 2023-01-22 10:45:47.900902: step: 886/464, loss: 0.21732476353645325 2023-01-22 10:45:48.502519: step: 888/464, loss: 0.9167367815971375 2023-01-22 10:45:49.052641: step: 890/464, loss: 0.23118619620800018 2023-01-22 10:45:49.636879: step: 892/464, loss: 0.5303144454956055 2023-01-22 10:45:50.310413: step: 894/464, loss: 0.4033811092376709 2023-01-22 10:45:50.957993: step: 896/464, loss: 0.0627986267209053 2023-01-22 10:45:51.576959: step: 898/464, loss: 8.275155067443848 2023-01-22 10:45:52.269754: step: 900/464, loss: 0.125852569937706 2023-01-22 10:45:52.871585: step: 902/464, loss: 0.6692149043083191 2023-01-22 10:45:53.544009: step: 904/464, loss: 0.16022256016731262 2023-01-22 10:45:54.133577: step: 906/464, loss: 0.6760783791542053 2023-01-22 10:45:54.713209: step: 908/464, loss: 0.3508135676383972 2023-01-22 10:45:55.396248: step: 910/464, loss: 1.5788806676864624 2023-01-22 10:45:56.028223: step: 912/464, loss: 0.07519754767417908 2023-01-22 10:45:56.636310: step: 914/464, loss: 0.4660152494907379 2023-01-22 10:45:57.286016: step: 916/464, loss: 0.8508497476577759 2023-01-22 10:45:57.841054: step: 918/464, loss: 0.10930764675140381 2023-01-22 10:45:58.492873: step: 920/464, loss: 0.6356523633003235 2023-01-22 10:45:59.131182: step: 922/464, loss: 0.36704057455062866 2023-01-22 10:45:59.762962: step: 924/464, loss: 1.3801870346069336 2023-01-22 10:46:00.369751: step: 926/464, loss: 0.44619035720825195 2023-01-22 10:46:01.000934: step: 928/464, loss: 0.3960069715976715 2023-01-22 10:46:01.510899: step: 930/464, loss: 0.07545240968465805 ================================================== Loss: 0.537 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2704345792554957, 'r': 0.3510004785782904, 'f1': 0.30549504906814046}, 'combined': 0.22510161510284032, 'epoch': 8} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2924397665517951, 'r': 0.3112375476891924, 'f1': 0.30154598704050756}, 'combined': 0.19686421951867333, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27063948948391026, 'r': 0.36410511962825876, 'f1': 0.3104909353464278}, 'combined': 0.2287827944657889, 'epoch': 8} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30374786247449376, 'r': 0.3135191631974784, 'f1': 0.3085561732780249}, 'combined': 0.2014408177877261, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27695471606886457, 'r': 0.3568353931892961, 'f1': 0.3118611147773782}, 'combined': 0.22979240036227866, 'epoch': 8} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3025890038037988, 'r': 0.30397957826980887, 'f1': 0.30328269707249156}, 'combined': 0.1979980302131292, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2251243781094527, 'r': 0.4309523809523809, 'f1': 0.29575163398692805}, 'combined': 0.19716775599128536, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23529411764705882, 'r': 0.5217391304347826, 'f1': 0.32432432432432434}, 'combined': 0.16216216216216217, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2857142857142857, 'r': 0.27586206896551724, 'f1': 0.28070175438596495}, 'combined': 0.18713450292397663, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2662921186245096, 'r': 0.3127795473027921, 'f1': 0.28766984542508106}, 'combined': 0.21196725452374393, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2822796165112715, 'r': 0.30584607990808405, 'f1': 0.2935906887713004}, 'combined': 0.19167060510457953, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26543209876543206, 'r': 0.4095238095238095, 'f1': 0.3220973782771535}, 'combined': 0.21473158551810234, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2922339997070884, 'r': 0.315523995888678, 'f1': 0.3034327478710462}, 'combined': 0.22358202474708666, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3099448839945791, 'r': 0.28150957353636086, 'f1': 0.2950436876486859}, 'combined': 0.19261919504525607, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3382352941176471, 'r': 0.5, 'f1': 0.4035087719298246}, 'combined': 0.2017543859649123, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30749315693430657, 'r': 0.31974620493358635, 'f1': 0.31350000000000006}, 'combined': 0.23100000000000004, 'epoch': 7} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.31591739826469223, 'r': 0.27186286199291865, 'f1': 0.2922391711758198}, 'combined': 0.19078826719250414, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42105263157894735, 'r': 0.27586206896551724, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 7} ****************************** Epoch: 9 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:48:39.993677: step: 2/464, loss: 0.19961535930633545 2023-01-22 10:48:40.583407: step: 4/464, loss: 0.19874349236488342 2023-01-22 10:48:41.231996: step: 6/464, loss: 0.7833148241043091 2023-01-22 10:48:41.867749: step: 8/464, loss: 0.2644481658935547 2023-01-22 10:48:42.505775: step: 10/464, loss: 0.520695686340332 2023-01-22 10:48:43.169943: step: 12/464, loss: 0.38046133518218994 2023-01-22 10:48:43.838874: step: 14/464, loss: 0.16472403705120087 2023-01-22 10:48:44.446142: step: 16/464, loss: 0.27650564908981323 2023-01-22 10:48:45.086652: step: 18/464, loss: 0.1332404911518097 2023-01-22 10:48:45.774110: step: 20/464, loss: 0.3479761481285095 2023-01-22 10:48:46.379026: step: 22/464, loss: 0.33198633790016174 2023-01-22 10:48:46.990685: step: 24/464, loss: 0.20194487273693085 2023-01-22 10:48:47.728973: step: 26/464, loss: 0.1725732982158661 2023-01-22 10:48:48.355838: step: 28/464, loss: 0.7356686592102051 2023-01-22 10:48:48.960989: step: 30/464, loss: 0.3056510090827942 2023-01-22 10:48:49.578511: step: 32/464, loss: 0.629055917263031 2023-01-22 10:48:50.225280: step: 34/464, loss: 0.09917306154966354 2023-01-22 10:48:50.855929: step: 36/464, loss: 0.4345434308052063 2023-01-22 10:48:51.514786: step: 38/464, loss: 0.2141372114419937 2023-01-22 10:48:52.078671: step: 40/464, loss: 0.20127037167549133 2023-01-22 10:48:52.754739: step: 42/464, loss: 0.08401795476675034 2023-01-22 10:48:53.369467: step: 44/464, loss: 0.5465341806411743 2023-01-22 10:48:53.961241: step: 46/464, loss: 0.10158457607030869 2023-01-22 10:48:54.538272: step: 48/464, loss: 0.4749198853969574 2023-01-22 10:48:55.189825: step: 50/464, loss: 0.5016505718231201 2023-01-22 10:48:55.878545: step: 52/464, loss: 0.0881711021065712 2023-01-22 10:48:56.532850: step: 54/464, loss: 0.24765042960643768 2023-01-22 10:48:57.134581: step: 56/464, loss: 0.11251507699489594 2023-01-22 10:48:57.791623: step: 58/464, loss: 0.2939249873161316 2023-01-22 10:48:58.444621: step: 60/464, loss: 0.4889867305755615 2023-01-22 10:48:59.018182: step: 62/464, loss: 1.0417712926864624 2023-01-22 10:48:59.628589: step: 64/464, loss: 0.14317934215068817 2023-01-22 10:49:00.222920: step: 66/464, loss: 0.1017264723777771 2023-01-22 10:49:00.909766: step: 68/464, loss: 0.1042710468173027 2023-01-22 10:49:01.533377: step: 70/464, loss: 0.442725270986557 2023-01-22 10:49:02.119628: step: 72/464, loss: 0.14025932550430298 2023-01-22 10:49:02.728326: step: 74/464, loss: 0.3430282771587372 2023-01-22 10:49:03.332790: step: 76/464, loss: 1.3112821578979492 2023-01-22 10:49:04.010082: step: 78/464, loss: 0.4583187699317932 2023-01-22 10:49:04.633332: step: 80/464, loss: 0.319332093000412 2023-01-22 10:49:05.305495: step: 82/464, loss: 0.6261417865753174 2023-01-22 10:49:06.038591: step: 84/464, loss: 0.2486996352672577 2023-01-22 10:49:06.636666: step: 86/464, loss: 0.09624868631362915 2023-01-22 10:49:07.204228: step: 88/464, loss: 0.2241324484348297 2023-01-22 10:49:07.793789: step: 90/464, loss: 0.8514329791069031 2023-01-22 10:49:08.416574: step: 92/464, loss: 0.12544073164463043 2023-01-22 10:49:09.017952: step: 94/464, loss: 0.20031552016735077 2023-01-22 10:49:09.648657: step: 96/464, loss: 0.1628793179988861 2023-01-22 10:49:10.327159: step: 98/464, loss: 0.3327937424182892 2023-01-22 10:49:10.933244: step: 100/464, loss: 0.10830485820770264 2023-01-22 10:49:11.503235: step: 102/464, loss: 0.16933751106262207 2023-01-22 10:49:12.090564: step: 104/464, loss: 0.3470763564109802 2023-01-22 10:49:12.716085: step: 106/464, loss: 0.0833381935954094 2023-01-22 10:49:13.372765: step: 108/464, loss: 0.1391914188861847 2023-01-22 10:49:13.967499: step: 110/464, loss: 0.1907893568277359 2023-01-22 10:49:14.603859: step: 112/464, loss: 0.11749202758073807 2023-01-22 10:49:15.226695: step: 114/464, loss: 0.5847851037979126 2023-01-22 10:49:15.855376: step: 116/464, loss: 0.13662198185920715 2023-01-22 10:49:16.472104: step: 118/464, loss: 0.679173469543457 2023-01-22 10:49:17.147042: step: 120/464, loss: 0.1345047503709793 2023-01-22 10:49:17.818174: step: 122/464, loss: 0.07206592708826065 2023-01-22 10:49:18.538240: step: 124/464, loss: 0.1326543688774109 2023-01-22 10:49:19.124076: step: 126/464, loss: 0.16537462174892426 2023-01-22 10:49:19.721856: step: 128/464, loss: 0.08092430233955383 2023-01-22 10:49:20.347633: step: 130/464, loss: 0.18472331762313843 2023-01-22 10:49:20.974211: step: 132/464, loss: 0.42979902029037476 2023-01-22 10:49:21.564194: step: 134/464, loss: 2.8610308170318604 2023-01-22 10:49:22.200885: step: 136/464, loss: 0.5815833210945129 2023-01-22 10:49:22.836615: step: 138/464, loss: 0.41417253017425537 2023-01-22 10:49:23.449675: step: 140/464, loss: 0.16000278294086456 2023-01-22 10:49:24.101425: step: 142/464, loss: 0.1544022113084793 2023-01-22 10:49:24.721239: step: 144/464, loss: 0.2434360384941101 2023-01-22 10:49:25.339878: step: 146/464, loss: 0.1725931018590927 2023-01-22 10:49:25.989140: step: 148/464, loss: 0.2619199752807617 2023-01-22 10:49:26.620018: step: 150/464, loss: 0.2151547074317932 2023-01-22 10:49:27.269278: step: 152/464, loss: 0.389448881149292 2023-01-22 10:49:27.913052: step: 154/464, loss: 0.1410638689994812 2023-01-22 10:49:28.537130: step: 156/464, loss: 2.4310078620910645 2023-01-22 10:49:29.177226: step: 158/464, loss: 0.2114315629005432 2023-01-22 10:49:29.837578: step: 160/464, loss: 0.3406426012516022 2023-01-22 10:49:30.467591: step: 162/464, loss: 0.10214785486459732 2023-01-22 10:49:31.094540: step: 164/464, loss: 0.31256258487701416 2023-01-22 10:49:31.683422: step: 166/464, loss: 0.1485060155391693 2023-01-22 10:49:32.250483: step: 168/464, loss: 0.15247027575969696 2023-01-22 10:49:32.869033: step: 170/464, loss: 0.12883232533931732 2023-01-22 10:49:33.513983: step: 172/464, loss: 0.2534656226634979 2023-01-22 10:49:34.174778: step: 174/464, loss: 0.4512403905391693 2023-01-22 10:49:34.780588: step: 176/464, loss: 0.5365868806838989 2023-01-22 10:49:35.423679: step: 178/464, loss: 0.3706887364387512 2023-01-22 10:49:36.043052: step: 180/464, loss: 0.18491411209106445 2023-01-22 10:49:36.614771: step: 182/464, loss: 1.0071208477020264 2023-01-22 10:49:37.262361: step: 184/464, loss: 0.2296251654624939 2023-01-22 10:49:37.875717: step: 186/464, loss: 0.31761446595191956 2023-01-22 10:49:38.510979: step: 188/464, loss: 0.41353002190589905 2023-01-22 10:49:39.122692: step: 190/464, loss: 0.22576896846294403 2023-01-22 10:49:39.759284: step: 192/464, loss: 0.8026258945465088 2023-01-22 10:49:40.431132: step: 194/464, loss: 0.8088206052780151 2023-01-22 10:49:41.074392: step: 196/464, loss: 0.1576479822397232 2023-01-22 10:49:41.707947: step: 198/464, loss: 0.9424635171890259 2023-01-22 10:49:42.323725: step: 200/464, loss: 0.14036808907985687 2023-01-22 10:49:42.952524: step: 202/464, loss: 0.13937592506408691 2023-01-22 10:49:43.599561: step: 204/464, loss: 0.2324567437171936 2023-01-22 10:49:44.237446: step: 206/464, loss: 0.8766568303108215 2023-01-22 10:49:44.860531: step: 208/464, loss: 0.3298742175102234 2023-01-22 10:49:45.493509: step: 210/464, loss: 0.7993811368942261 2023-01-22 10:49:46.123864: step: 212/464, loss: 0.9075860381126404 2023-01-22 10:49:46.768803: step: 214/464, loss: 0.12574732303619385 2023-01-22 10:49:47.368424: step: 216/464, loss: 0.17546413838863373 2023-01-22 10:49:48.052168: step: 218/464, loss: 0.18658989667892456 2023-01-22 10:49:48.753673: step: 220/464, loss: 1.1319619417190552 2023-01-22 10:49:49.396748: step: 222/464, loss: 0.21586492657661438 2023-01-22 10:49:50.039275: step: 224/464, loss: 0.27885472774505615 2023-01-22 10:49:50.639439: step: 226/464, loss: 0.3149697780609131 2023-01-22 10:49:51.285952: step: 228/464, loss: 0.21803054213523865 2023-01-22 10:49:51.944619: step: 230/464, loss: 0.19087247550487518 2023-01-22 10:49:52.563889: step: 232/464, loss: 0.13294973969459534 2023-01-22 10:49:53.162557: step: 234/464, loss: 0.0896846204996109 2023-01-22 10:49:53.864056: step: 236/464, loss: 0.7859571576118469 2023-01-22 10:49:54.501165: step: 238/464, loss: 0.27156299352645874 2023-01-22 10:49:55.083795: step: 240/464, loss: 1.2523858547210693 2023-01-22 10:49:55.706435: step: 242/464, loss: 0.07696082442998886 2023-01-22 10:49:56.401986: step: 244/464, loss: 0.1793721616268158 2023-01-22 10:49:56.977989: step: 246/464, loss: 0.21266333758831024 2023-01-22 10:49:57.573977: step: 248/464, loss: 0.5519155859947205 2023-01-22 10:49:58.175130: step: 250/464, loss: 0.16949373483657837 2023-01-22 10:49:58.750282: step: 252/464, loss: 0.21048606932163239 2023-01-22 10:49:59.289740: step: 254/464, loss: 0.07983586937189102 2023-01-22 10:49:59.876567: step: 256/464, loss: 0.7817102074623108 2023-01-22 10:50:00.517897: step: 258/464, loss: 0.4482273459434509 2023-01-22 10:50:01.113793: step: 260/464, loss: 0.730652391910553 2023-01-22 10:50:01.681433: step: 262/464, loss: 0.3141850531101227 2023-01-22 10:50:02.338647: step: 264/464, loss: 0.2017117440700531 2023-01-22 10:50:02.969879: step: 266/464, loss: 0.2721196115016937 2023-01-22 10:50:03.548540: step: 268/464, loss: 0.6391931176185608 2023-01-22 10:50:04.236001: step: 270/464, loss: 0.7095168828964233 2023-01-22 10:50:04.847185: step: 272/464, loss: 0.2975116968154907 2023-01-22 10:50:05.491335: step: 274/464, loss: 0.08494570851325989 2023-01-22 10:50:06.073321: step: 276/464, loss: 0.21788422763347626 2023-01-22 10:50:06.633058: step: 278/464, loss: 0.32937175035476685 2023-01-22 10:50:07.233388: step: 280/464, loss: 0.3043481707572937 2023-01-22 10:50:07.915861: step: 282/464, loss: 0.2767504155635834 2023-01-22 10:50:08.647781: step: 284/464, loss: 0.455371618270874 2023-01-22 10:50:09.292308: step: 286/464, loss: 0.8453356623649597 2023-01-22 10:50:09.890739: step: 288/464, loss: 0.08785227686166763 2023-01-22 10:50:10.479852: step: 290/464, loss: 0.4613943099975586 2023-01-22 10:50:11.073806: step: 292/464, loss: 0.08880679309368134 2023-01-22 10:50:11.763110: step: 294/464, loss: 0.23107773065567017 2023-01-22 10:50:12.424181: step: 296/464, loss: 0.13927944004535675 2023-01-22 10:50:13.042465: step: 298/464, loss: 0.4584111273288727 2023-01-22 10:50:13.687073: step: 300/464, loss: 0.09672047942876816 2023-01-22 10:50:14.415120: step: 302/464, loss: 0.5475522875785828 2023-01-22 10:50:15.034788: step: 304/464, loss: 0.36058226227760315 2023-01-22 10:50:15.648186: step: 306/464, loss: 0.20680579543113708 2023-01-22 10:50:16.272637: step: 308/464, loss: 0.4903566539287567 2023-01-22 10:50:17.096893: step: 310/464, loss: 0.36605599522590637 2023-01-22 10:50:17.688747: step: 312/464, loss: 0.13913631439208984 2023-01-22 10:50:18.300967: step: 314/464, loss: 0.22459517419338226 2023-01-22 10:50:18.944765: step: 316/464, loss: 0.35183778405189514 2023-01-22 10:50:19.567193: step: 318/464, loss: 1.192873239517212 2023-01-22 10:50:20.150507: step: 320/464, loss: 0.1527167409658432 2023-01-22 10:50:20.766162: step: 322/464, loss: 0.42959147691726685 2023-01-22 10:50:21.352565: step: 324/464, loss: 0.3765203356742859 2023-01-22 10:50:21.934397: step: 326/464, loss: 0.21764370799064636 2023-01-22 10:50:22.541028: step: 328/464, loss: 0.23091094195842743 2023-01-22 10:50:23.132289: step: 330/464, loss: 0.22147002816200256 2023-01-22 10:50:23.771481: step: 332/464, loss: 0.3773364722728729 2023-01-22 10:50:24.356735: step: 334/464, loss: 0.06285819411277771 2023-01-22 10:50:24.959287: step: 336/464, loss: 5.5828537940979 2023-01-22 10:50:25.554023: step: 338/464, loss: 0.1912716180086136 2023-01-22 10:50:26.220419: step: 340/464, loss: 0.8449513912200928 2023-01-22 10:50:26.858087: step: 342/464, loss: 0.2755206823348999 2023-01-22 10:50:27.585120: step: 344/464, loss: 0.2016700953245163 2023-01-22 10:50:28.210530: step: 346/464, loss: 0.2179202437400818 2023-01-22 10:50:28.855076: step: 348/464, loss: 0.07725241780281067 2023-01-22 10:50:29.462401: step: 350/464, loss: 0.17912504076957703 2023-01-22 10:50:30.113757: step: 352/464, loss: 0.16181229054927826 2023-01-22 10:50:30.696287: step: 354/464, loss: 0.16142886877059937 2023-01-22 10:50:31.379381: step: 356/464, loss: 0.1609855741262436 2023-01-22 10:50:32.023325: step: 358/464, loss: 0.5756404399871826 2023-01-22 10:50:32.665474: step: 360/464, loss: 0.36991944909095764 2023-01-22 10:50:33.280654: step: 362/464, loss: 0.22064253687858582 2023-01-22 10:50:33.861937: step: 364/464, loss: 0.4196416139602661 2023-01-22 10:50:34.483000: step: 366/464, loss: 0.1866072416305542 2023-01-22 10:50:35.096817: step: 368/464, loss: 0.0899113342165947 2023-01-22 10:50:35.704884: step: 370/464, loss: 0.15155668556690216 2023-01-22 10:50:36.337731: step: 372/464, loss: 0.22216102480888367 2023-01-22 10:50:36.996592: step: 374/464, loss: 0.20138542354106903 2023-01-22 10:50:37.537538: step: 376/464, loss: 0.5346720218658447 2023-01-22 10:50:38.233387: step: 378/464, loss: 0.3250393867492676 2023-01-22 10:50:38.831929: step: 380/464, loss: 0.46061569452285767 2023-01-22 10:50:39.467052: step: 382/464, loss: 0.10293516516685486 2023-01-22 10:50:40.084918: step: 384/464, loss: 0.13653913140296936 2023-01-22 10:50:40.693174: step: 386/464, loss: 0.17543986439704895 2023-01-22 10:50:41.299773: step: 388/464, loss: 0.07186532020568848 2023-01-22 10:50:41.918738: step: 390/464, loss: 0.3736836910247803 2023-01-22 10:50:42.523180: step: 392/464, loss: 1.7982009649276733 2023-01-22 10:50:43.216471: step: 394/464, loss: 0.05094000697135925 2023-01-22 10:50:43.822547: step: 396/464, loss: 0.08247574418783188 2023-01-22 10:50:44.421218: step: 398/464, loss: 0.7131425142288208 2023-01-22 10:50:45.070576: step: 400/464, loss: 0.7239433526992798 2023-01-22 10:50:45.675179: step: 402/464, loss: 0.20473116636276245 2023-01-22 10:50:46.314597: step: 404/464, loss: 0.3672104775905609 2023-01-22 10:50:46.977213: step: 406/464, loss: 0.18884846568107605 2023-01-22 10:50:47.601447: step: 408/464, loss: 0.2608042359352112 2023-01-22 10:50:48.252207: step: 410/464, loss: 0.17296145856380463 2023-01-22 10:50:48.906349: step: 412/464, loss: 0.19148777425289154 2023-01-22 10:50:49.550740: step: 414/464, loss: 0.7486370801925659 2023-01-22 10:50:50.191716: step: 416/464, loss: 0.1606966108083725 2023-01-22 10:50:50.858810: step: 418/464, loss: 0.1943890005350113 2023-01-22 10:50:51.515529: step: 420/464, loss: 0.2487289309501648 2023-01-22 10:50:52.139125: step: 422/464, loss: 0.6002469062805176 2023-01-22 10:50:52.850234: step: 424/464, loss: 0.9607329368591309 2023-01-22 10:50:53.420241: step: 426/464, loss: 0.27625828981399536 2023-01-22 10:50:54.080723: step: 428/464, loss: 1.326363444328308 2023-01-22 10:50:54.726090: step: 430/464, loss: 0.5038409233093262 2023-01-22 10:50:55.290596: step: 432/464, loss: 0.4234456717967987 2023-01-22 10:50:55.931603: step: 434/464, loss: 0.702119767665863 2023-01-22 10:50:56.589124: step: 436/464, loss: 0.17340542376041412 2023-01-22 10:50:57.109804: step: 438/464, loss: 0.6242377161979675 2023-01-22 10:50:57.747550: step: 440/464, loss: 0.16712586581707 2023-01-22 10:50:58.387305: step: 442/464, loss: 0.14836061000823975 2023-01-22 10:50:59.044784: step: 444/464, loss: 0.3254392743110657 2023-01-22 10:50:59.648158: step: 446/464, loss: 0.22568395733833313 2023-01-22 10:51:00.288318: step: 448/464, loss: 0.4411729574203491 2023-01-22 10:51:00.907107: step: 450/464, loss: 0.5406442284584045 2023-01-22 10:51:01.478431: step: 452/464, loss: 0.16906428337097168 2023-01-22 10:51:02.142880: step: 454/464, loss: 0.03588523343205452 2023-01-22 10:51:02.734165: step: 456/464, loss: 0.06504597514867783 2023-01-22 10:51:03.370256: step: 458/464, loss: 0.17932981252670288 2023-01-22 10:51:04.067605: step: 460/464, loss: 0.3948647975921631 2023-01-22 10:51:04.666649: step: 462/464, loss: 0.2129187285900116 2023-01-22 10:51:05.249994: step: 464/464, loss: 0.2704509198665619 2023-01-22 10:51:05.872151: step: 466/464, loss: 0.2650047540664673 2023-01-22 10:51:06.489479: step: 468/464, loss: 0.3367736339569092 2023-01-22 10:51:07.075601: step: 470/464, loss: 0.27748000621795654 2023-01-22 10:51:07.696454: step: 472/464, loss: 0.11794216930866241 2023-01-22 10:51:08.274543: step: 474/464, loss: 0.2931903600692749 2023-01-22 10:51:08.906314: step: 476/464, loss: 0.18659132719039917 2023-01-22 10:51:09.542442: step: 478/464, loss: 0.29165756702423096 2023-01-22 10:51:10.213962: step: 480/464, loss: 0.5868270397186279 2023-01-22 10:51:10.862857: step: 482/464, loss: 0.42066115140914917 2023-01-22 10:51:11.470902: step: 484/464, loss: 0.8886086344718933 2023-01-22 10:51:12.171412: step: 486/464, loss: 0.5854116678237915 2023-01-22 10:51:12.801160: step: 488/464, loss: 0.1770869493484497 2023-01-22 10:51:13.402265: step: 490/464, loss: 0.29514598846435547 2023-01-22 10:51:14.026721: step: 492/464, loss: 0.5091790556907654 2023-01-22 10:51:14.673799: step: 494/464, loss: 0.566424548625946 2023-01-22 10:51:15.278852: step: 496/464, loss: 0.09062954783439636 2023-01-22 10:51:15.932632: step: 498/464, loss: 0.19813255965709686 2023-01-22 10:51:16.591356: step: 500/464, loss: 0.17183098196983337 2023-01-22 10:51:17.213494: step: 502/464, loss: 0.7004691958427429 2023-01-22 10:51:17.860684: step: 504/464, loss: 0.2983857989311218 2023-01-22 10:51:18.519733: step: 506/464, loss: 0.4526219069957733 2023-01-22 10:51:19.112684: step: 508/464, loss: 0.5289148092269897 2023-01-22 10:51:19.730002: step: 510/464, loss: 0.19427700340747833 2023-01-22 10:51:20.342153: step: 512/464, loss: 0.49163126945495605 2023-01-22 10:51:20.985103: step: 514/464, loss: 0.13287903368473053 2023-01-22 10:51:21.643338: step: 516/464, loss: 0.40076348185539246 2023-01-22 10:51:22.245155: step: 518/464, loss: 0.7974677681922913 2023-01-22 10:51:22.869358: step: 520/464, loss: 0.5654568672180176 2023-01-22 10:51:23.497249: step: 522/464, loss: 0.8992674946784973 2023-01-22 10:51:24.139646: step: 524/464, loss: 0.6453098654747009 2023-01-22 10:51:24.754496: step: 526/464, loss: 0.15305867791175842 2023-01-22 10:51:25.383082: step: 528/464, loss: 0.7035385370254517 2023-01-22 10:51:26.038584: step: 530/464, loss: 0.4323200583457947 2023-01-22 10:51:26.659286: step: 532/464, loss: 0.77094566822052 2023-01-22 10:51:27.331031: step: 534/464, loss: 0.1831674575805664 2023-01-22 10:51:28.048670: step: 536/464, loss: 2.2393898963928223 2023-01-22 10:51:28.689917: step: 538/464, loss: 1.6402591466903687 2023-01-22 10:51:29.379646: step: 540/464, loss: 0.15160660445690155 2023-01-22 10:51:30.026622: step: 542/464, loss: 0.16926424205303192 2023-01-22 10:51:30.695266: step: 544/464, loss: 0.15502096712589264 2023-01-22 10:51:31.388899: step: 546/464, loss: 0.5304486751556396 2023-01-22 10:51:32.057285: step: 548/464, loss: 0.821464478969574 2023-01-22 10:51:32.687350: step: 550/464, loss: 0.132523313164711 2023-01-22 10:51:33.317970: step: 552/464, loss: 0.11264272779226303 2023-01-22 10:51:33.936661: step: 554/464, loss: 0.13352425396442413 2023-01-22 10:51:34.582079: step: 556/464, loss: 0.7259401679039001 2023-01-22 10:51:35.182718: step: 558/464, loss: 0.24717287719249725 2023-01-22 10:51:35.785350: step: 560/464, loss: 0.2604392468929291 2023-01-22 10:51:36.383566: step: 562/464, loss: 0.10367932915687561 2023-01-22 10:51:37.061298: step: 564/464, loss: 0.4997091591358185 2023-01-22 10:51:37.684268: step: 566/464, loss: 0.34742504358291626 2023-01-22 10:51:38.294997: step: 568/464, loss: 0.1672915518283844 2023-01-22 10:51:38.920663: step: 570/464, loss: 0.2621702253818512 2023-01-22 10:51:39.600152: step: 572/464, loss: 0.13340547680854797 2023-01-22 10:51:40.221249: step: 574/464, loss: 1.081810474395752 2023-01-22 10:51:40.907074: step: 576/464, loss: 0.17170488834381104 2023-01-22 10:51:41.554216: step: 578/464, loss: 0.28923726081848145 2023-01-22 10:51:42.170031: step: 580/464, loss: 0.20817041397094727 2023-01-22 10:51:42.809002: step: 582/464, loss: 0.10076570510864258 2023-01-22 10:51:43.465190: step: 584/464, loss: 0.8717387914657593 2023-01-22 10:51:44.180021: step: 586/464, loss: 0.09899066388607025 2023-01-22 10:51:44.803404: step: 588/464, loss: 0.5495445132255554 2023-01-22 10:51:45.459157: step: 590/464, loss: 0.3652946949005127 2023-01-22 10:51:46.072467: step: 592/464, loss: 0.2133476883172989 2023-01-22 10:51:46.695473: step: 594/464, loss: 0.16517765820026398 2023-01-22 10:51:47.323805: step: 596/464, loss: 0.49846720695495605 2023-01-22 10:51:47.915016: step: 598/464, loss: 0.8974509835243225 2023-01-22 10:51:48.581243: step: 600/464, loss: 0.818812370300293 2023-01-22 10:51:49.217922: step: 602/464, loss: 1.0239721536636353 2023-01-22 10:51:49.828649: step: 604/464, loss: 0.5344446301460266 2023-01-22 10:51:50.433195: step: 606/464, loss: 0.43328937888145447 2023-01-22 10:51:51.062008: step: 608/464, loss: 0.2958345413208008 2023-01-22 10:51:51.639773: step: 610/464, loss: 0.18794173002243042 2023-01-22 10:51:52.288099: step: 612/464, loss: 0.3947238028049469 2023-01-22 10:51:52.959677: step: 614/464, loss: 1.7605561017990112 2023-01-22 10:51:53.555896: step: 616/464, loss: 0.5139614343643188 2023-01-22 10:51:54.163710: step: 618/464, loss: 0.16109994053840637 2023-01-22 10:51:54.803414: step: 620/464, loss: 0.5573995113372803 2023-01-22 10:51:55.422512: step: 622/464, loss: 0.18353329598903656 2023-01-22 10:51:56.053963: step: 624/464, loss: 0.4768543541431427 2023-01-22 10:51:56.655444: step: 626/464, loss: 0.3241148293018341 2023-01-22 10:51:57.283222: step: 628/464, loss: 0.19876053929328918 2023-01-22 10:51:57.847199: step: 630/464, loss: 0.3442919850349426 2023-01-22 10:51:58.439508: step: 632/464, loss: 0.6632809638977051 2023-01-22 10:51:59.115538: step: 634/464, loss: 0.852631688117981 2023-01-22 10:51:59.733257: step: 636/464, loss: 0.45948293805122375 2023-01-22 10:52:00.446518: step: 638/464, loss: 0.3417765200138092 2023-01-22 10:52:01.168431: step: 640/464, loss: 0.156797856092453 2023-01-22 10:52:01.787413: step: 642/464, loss: 0.19794075191020966 2023-01-22 10:52:02.417699: step: 644/464, loss: 0.31666621565818787 2023-01-22 10:52:03.064570: step: 646/464, loss: 0.24088281393051147 2023-01-22 10:52:03.688931: step: 648/464, loss: 1.0673682689666748 2023-01-22 10:52:04.351586: step: 650/464, loss: 0.1969476342201233 2023-01-22 10:52:04.960505: step: 652/464, loss: 0.6506825685501099 2023-01-22 10:52:05.592125: step: 654/464, loss: 0.4994967579841614 2023-01-22 10:52:06.219249: step: 656/464, loss: 0.3196776211261749 2023-01-22 10:52:06.833279: step: 658/464, loss: 0.19478558003902435 2023-01-22 10:52:07.462722: step: 660/464, loss: 0.3909696638584137 2023-01-22 10:52:08.059808: step: 662/464, loss: 1.1307225227355957 2023-01-22 10:52:08.733115: step: 664/464, loss: 0.3461707532405853 2023-01-22 10:52:09.373110: step: 666/464, loss: 0.4930865168571472 2023-01-22 10:52:10.074813: step: 668/464, loss: 0.10782203823328018 2023-01-22 10:52:10.731766: step: 670/464, loss: 0.2896060645580292 2023-01-22 10:52:11.361340: step: 672/464, loss: 0.2806074619293213 2023-01-22 10:52:12.066818: step: 674/464, loss: 0.38923388719558716 2023-01-22 10:52:12.798691: step: 676/464, loss: 0.40678125619888306 2023-01-22 10:52:13.466092: step: 678/464, loss: 0.3723219037055969 2023-01-22 10:52:14.109439: step: 680/464, loss: 0.21378019452095032 2023-01-22 10:52:14.718710: step: 682/464, loss: 0.09969107806682587 2023-01-22 10:52:15.321669: step: 684/464, loss: 0.5234864354133606 2023-01-22 10:52:15.955489: step: 686/464, loss: 0.48588019609451294 2023-01-22 10:52:16.573720: step: 688/464, loss: 0.16970089077949524 2023-01-22 10:52:17.214020: step: 690/464, loss: 0.3834255337715149 2023-01-22 10:52:17.825059: step: 692/464, loss: 0.7420527935028076 2023-01-22 10:52:18.419803: step: 694/464, loss: 3.329572916030884 2023-01-22 10:52:19.049638: step: 696/464, loss: 0.696709930896759 2023-01-22 10:52:19.629718: step: 698/464, loss: 0.4788230061531067 2023-01-22 10:52:20.243666: step: 700/464, loss: 0.5059593319892883 2023-01-22 10:52:20.866581: step: 702/464, loss: 0.40401121973991394 2023-01-22 10:52:21.511537: step: 704/464, loss: 0.14822030067443848 2023-01-22 10:52:22.140502: step: 706/464, loss: 0.2039600908756256 2023-01-22 10:52:22.780905: step: 708/464, loss: 0.5814012885093689 2023-01-22 10:52:23.397232: step: 710/464, loss: 0.14190104603767395 2023-01-22 10:52:23.991079: step: 712/464, loss: 0.5267803072929382 2023-01-22 10:52:24.639910: step: 714/464, loss: 0.28079739212989807 2023-01-22 10:52:25.206509: step: 716/464, loss: 0.2747531533241272 2023-01-22 10:52:25.939618: step: 718/464, loss: 0.4952031075954437 2023-01-22 10:52:26.609475: step: 720/464, loss: 0.4949815571308136 2023-01-22 10:52:27.188692: step: 722/464, loss: 0.39597049355506897 2023-01-22 10:52:27.890373: step: 724/464, loss: 0.6801155805587769 2023-01-22 10:52:28.502250: step: 726/464, loss: 0.5496456027030945 2023-01-22 10:52:29.189594: step: 728/464, loss: 0.2377110719680786 2023-01-22 10:52:29.842815: step: 730/464, loss: 1.3523091077804565 2023-01-22 10:52:30.423865: step: 732/464, loss: 0.1813611090183258 2023-01-22 10:52:31.039131: step: 734/464, loss: 0.05107222497463226 2023-01-22 10:52:31.758471: step: 736/464, loss: 0.13555821776390076 2023-01-22 10:52:32.357004: step: 738/464, loss: 1.779261827468872 2023-01-22 10:52:33.009764: step: 740/464, loss: 0.20782986283302307 2023-01-22 10:52:33.643718: step: 742/464, loss: 1.2898242473602295 2023-01-22 10:52:34.288235: step: 744/464, loss: 0.32072851061820984 2023-01-22 10:52:34.918339: step: 746/464, loss: 0.2118932455778122 2023-01-22 10:52:35.547490: step: 748/464, loss: 0.34228479862213135 2023-01-22 10:52:36.179775: step: 750/464, loss: 0.20209060609340668 2023-01-22 10:52:36.812117: step: 752/464, loss: 0.16558676958084106 2023-01-22 10:52:37.450343: step: 754/464, loss: 0.5465676188468933 2023-01-22 10:52:38.077220: step: 756/464, loss: 0.2919003963470459 2023-01-22 10:52:38.745587: step: 758/464, loss: 0.7971205711364746 2023-01-22 10:52:39.370601: step: 760/464, loss: 0.1666930615901947 2023-01-22 10:52:39.963722: step: 762/464, loss: 0.5473403930664062 2023-01-22 10:52:40.596789: step: 764/464, loss: 0.1011856272816658 2023-01-22 10:52:41.228454: step: 766/464, loss: 0.5450992584228516 2023-01-22 10:52:41.927523: step: 768/464, loss: 0.4951463043689728 2023-01-22 10:52:42.552555: step: 770/464, loss: 0.15516912937164307 2023-01-22 10:52:43.155931: step: 772/464, loss: 0.37668851017951965 2023-01-22 10:52:43.780741: step: 774/464, loss: 0.5910404324531555 2023-01-22 10:52:44.400274: step: 776/464, loss: 0.37189438939094543 2023-01-22 10:52:45.064363: step: 778/464, loss: 0.910901665687561 2023-01-22 10:52:45.677866: step: 780/464, loss: 0.17977401614189148 2023-01-22 10:52:46.297524: step: 782/464, loss: 0.6486947536468506 2023-01-22 10:52:46.861216: step: 784/464, loss: 0.20105552673339844 2023-01-22 10:52:47.489253: step: 786/464, loss: 0.20137327909469604 2023-01-22 10:52:48.144921: step: 788/464, loss: 0.17438651621341705 2023-01-22 10:52:48.764284: step: 790/464, loss: 0.19812040030956268 2023-01-22 10:52:49.389241: step: 792/464, loss: 0.36395344138145447 2023-01-22 10:52:50.008667: step: 794/464, loss: 0.13106000423431396 2023-01-22 10:52:50.600959: step: 796/464, loss: 0.300517737865448 2023-01-22 10:52:51.234636: step: 798/464, loss: 0.606153666973114 2023-01-22 10:52:51.871687: step: 800/464, loss: 0.12423531711101532 2023-01-22 10:52:52.465328: step: 802/464, loss: 0.6810371279716492 2023-01-22 10:52:53.102817: step: 804/464, loss: 0.264427125453949 2023-01-22 10:52:53.701794: step: 806/464, loss: 0.11184506118297577 2023-01-22 10:52:54.332113: step: 808/464, loss: 0.1165018230676651 2023-01-22 10:52:55.004529: step: 810/464, loss: 0.22094683349132538 2023-01-22 10:52:55.655987: step: 812/464, loss: 0.2403636872768402 2023-01-22 10:52:56.276709: step: 814/464, loss: 0.25624537467956543 2023-01-22 10:52:56.939513: step: 816/464, loss: 0.18656274676322937 2023-01-22 10:52:57.610239: step: 818/464, loss: 0.19701141119003296 2023-01-22 10:52:58.303483: step: 820/464, loss: 0.17605522274971008 2023-01-22 10:52:58.920785: step: 822/464, loss: 0.17397205531597137 2023-01-22 10:52:59.528280: step: 824/464, loss: 0.1863817274570465 2023-01-22 10:53:00.246072: step: 826/464, loss: 0.16790562868118286 2023-01-22 10:53:00.848655: step: 828/464, loss: 0.17844824492931366 2023-01-22 10:53:01.479029: step: 830/464, loss: 0.18206892907619476 2023-01-22 10:53:02.136787: step: 832/464, loss: 0.2522604465484619 2023-01-22 10:53:02.785341: step: 834/464, loss: 0.16825295984745026 2023-01-22 10:53:03.420387: step: 836/464, loss: 0.2886344790458679 2023-01-22 10:53:04.048959: step: 838/464, loss: 0.2774253785610199 2023-01-22 10:53:04.690685: step: 840/464, loss: 0.6883927583694458 2023-01-22 10:53:05.278045: step: 842/464, loss: 0.4036794900894165 2023-01-22 10:53:05.911085: step: 844/464, loss: 1.1966632604599 2023-01-22 10:53:06.537796: step: 846/464, loss: 0.34209907054901123 2023-01-22 10:53:07.173048: step: 848/464, loss: 0.7691254019737244 2023-01-22 10:53:07.845025: step: 850/464, loss: 0.41199547052383423 2023-01-22 10:53:08.464783: step: 852/464, loss: 0.17533652484416962 2023-01-22 10:53:09.137891: step: 854/464, loss: 0.17096589505672455 2023-01-22 10:53:09.793095: step: 856/464, loss: 0.2661553919315338 2023-01-22 10:53:10.421809: step: 858/464, loss: 0.17946060001850128 2023-01-22 10:53:10.993333: step: 860/464, loss: 0.946364164352417 2023-01-22 10:53:11.614703: step: 862/464, loss: 0.7228373885154724 2023-01-22 10:53:12.258711: step: 864/464, loss: 0.3066113293170929 2023-01-22 10:53:12.908144: step: 866/464, loss: 0.5188855528831482 2023-01-22 10:53:13.540004: step: 868/464, loss: 0.34665554761886597 2023-01-22 10:53:14.136927: step: 870/464, loss: 0.16440880298614502 2023-01-22 10:53:14.770794: step: 872/464, loss: 0.36245080828666687 2023-01-22 10:53:15.449553: step: 874/464, loss: 12.406877517700195 2023-01-22 10:53:16.072017: step: 876/464, loss: 0.190341517329216 2023-01-22 10:53:16.766347: step: 878/464, loss: 0.2363278865814209 2023-01-22 10:53:17.402342: step: 880/464, loss: 0.29699596762657166 2023-01-22 10:53:18.042930: step: 882/464, loss: 6.164422988891602 2023-01-22 10:53:18.687110: step: 884/464, loss: 0.2677531838417053 2023-01-22 10:53:19.427357: step: 886/464, loss: 0.7761355638504028 2023-01-22 10:53:20.026540: step: 888/464, loss: 0.44219255447387695 2023-01-22 10:53:20.625871: step: 890/464, loss: 0.6220502257347107 2023-01-22 10:53:21.234222: step: 892/464, loss: 0.3263578712940216 2023-01-22 10:53:21.782591: step: 894/464, loss: 1.4174706935882568 2023-01-22 10:53:22.442302: step: 896/464, loss: 0.4406396448612213 2023-01-22 10:53:23.076401: step: 898/464, loss: 0.28490161895751953 2023-01-22 10:53:23.688464: step: 900/464, loss: 0.25278356671333313 2023-01-22 10:53:24.421809: step: 902/464, loss: 0.22905734181404114 2023-01-22 10:53:24.996838: step: 904/464, loss: 0.11674833297729492 2023-01-22 10:53:25.623917: step: 906/464, loss: 0.2179863154888153 2023-01-22 10:53:26.296566: step: 908/464, loss: 0.31769436597824097 2023-01-22 10:53:26.924036: step: 910/464, loss: 0.5596010684967041 2023-01-22 10:53:27.536317: step: 912/464, loss: 0.07528844475746155 2023-01-22 10:53:28.160856: step: 914/464, loss: 0.30073490738868713 2023-01-22 10:53:28.758070: step: 916/464, loss: 0.4311095178127289 2023-01-22 10:53:29.413110: step: 918/464, loss: 0.10860362648963928 2023-01-22 10:53:30.048088: step: 920/464, loss: 0.23732000589370728 2023-01-22 10:53:30.681458: step: 922/464, loss: 0.5516647100448608 2023-01-22 10:53:31.456995: step: 924/464, loss: 0.28733107447624207 2023-01-22 10:53:32.054225: step: 926/464, loss: 0.10183804482221603 2023-01-22 10:53:32.740358: step: 928/464, loss: 0.15522173047065735 2023-01-22 10:53:33.195055: step: 930/464, loss: 0.421254426240921 ================================================== Loss: 0.442 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2753113026819924, 'r': 0.3181491144845035, 'f1': 0.2951841255868545}, 'combined': 0.21750409253768224, 'epoch': 9} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2766998413594423, 'r': 0.3140162419831469, 'f1': 0.29417937581575426}, 'combined': 0.19205492928904164, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2714514014571442, 'r': 0.336352495543672, 'f1': 0.30043689008731383}, 'combined': 0.22137455059065228, 'epoch': 9} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29110252180831975, 'r': 0.31593970944884614, 'f1': 0.3030130077424041}, 'combined': 0.1978219636038493, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28487491086284766, 'r': 0.3248763214963405, 'f1': 0.303563513171226}, 'combined': 0.2236783781261665, 'epoch': 9} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2834185544035629, 'r': 0.30291983108270715, 'f1': 0.2928448921331714}, 'combined': 0.19118371196258857, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22798742138364778, 'r': 0.3452380952380952, 'f1': 0.2746212121212121}, 'combined': 0.18308080808080807, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2222222222222222, 'r': 0.43478260869565216, 'f1': 0.29411764705882354}, 'combined': 0.14705882352941177, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47368421052631576, 'r': 0.3103448275862069, 'f1': 0.375}, 'combined': 0.25, 'epoch': 9} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2662921186245096, 'r': 0.3127795473027921, 'f1': 0.28766984542508106}, 'combined': 0.21196725452374393, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2822796165112715, 'r': 0.30584607990808405, 'f1': 0.2935906887713004}, 'combined': 0.19167060510457953, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26543209876543206, 'r': 0.4095238095238095, 'f1': 0.3220973782771535}, 'combined': 0.21473158551810234, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2922339997070884, 'r': 0.315523995888678, 'f1': 0.3034327478710462}, 'combined': 0.22358202474708666, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3099448839945791, 'r': 0.28150957353636086, 'f1': 0.2950436876486859}, 'combined': 0.19261919504525607, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3382352941176471, 'r': 0.5, 'f1': 0.4035087719298246}, 'combined': 0.2017543859649123, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28487491086284766, 'r': 0.3248763214963405, 'f1': 0.303563513171226}, 'combined': 0.2236783781261665, 'epoch': 9} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2834185544035629, 'r': 0.30291983108270715, 'f1': 0.2928448921331714}, 'combined': 0.19118371196258857, 'epoch': 9} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47368421052631576, 'r': 0.3103448275862069, 'f1': 0.375}, 'combined': 0.25, 'epoch': 9} ****************************** Epoch: 10 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:56:20.008154: step: 2/464, loss: 0.28201234340667725 2023-01-22 10:56:20.690286: step: 4/464, loss: 0.11395406723022461 2023-01-22 10:56:21.267447: step: 6/464, loss: 0.09517635405063629 2023-01-22 10:56:21.956878: step: 8/464, loss: 0.08937979489564896 2023-01-22 10:56:22.568448: step: 10/464, loss: 0.5818873047828674 2023-01-22 10:56:23.201540: step: 12/464, loss: 0.2057216912508011 2023-01-22 10:56:24.003585: step: 14/464, loss: 1.59352707862854 2023-01-22 10:56:24.640939: step: 16/464, loss: 0.37595269083976746 2023-01-22 10:56:25.205749: step: 18/464, loss: 0.08871912211179733 2023-01-22 10:56:25.881533: step: 20/464, loss: 0.29423627257347107 2023-01-22 10:56:26.526140: step: 22/464, loss: 0.27823805809020996 2023-01-22 10:56:27.182316: step: 24/464, loss: 0.149687722325325 2023-01-22 10:56:27.777241: step: 26/464, loss: 0.198866605758667 2023-01-22 10:56:28.397808: step: 28/464, loss: 0.49141961336135864 2023-01-22 10:56:28.999969: step: 30/464, loss: 0.11584275215864182 2023-01-22 10:56:29.637059: step: 32/464, loss: 0.1265438348054886 2023-01-22 10:56:30.230822: step: 34/464, loss: 0.2609385848045349 2023-01-22 10:56:30.923452: step: 36/464, loss: 0.8702123761177063 2023-01-22 10:56:31.610105: step: 38/464, loss: 0.11494985222816467 2023-01-22 10:56:32.194540: step: 40/464, loss: 0.17757941782474518 2023-01-22 10:56:32.806519: step: 42/464, loss: 0.06117549166083336 2023-01-22 10:56:33.398559: step: 44/464, loss: 0.5850616693496704 2023-01-22 10:56:33.973513: step: 46/464, loss: 0.1942889392375946 2023-01-22 10:56:34.767909: step: 48/464, loss: 0.7532861232757568 2023-01-22 10:56:35.409181: step: 50/464, loss: 0.46198928356170654 2023-01-22 10:56:36.059188: step: 52/464, loss: 0.08040419965982437 2023-01-22 10:56:36.758721: step: 54/464, loss: 0.1533208042383194 2023-01-22 10:56:37.377324: step: 56/464, loss: 0.11138932406902313 2023-01-22 10:56:38.018779: step: 58/464, loss: 0.19376027584075928 2023-01-22 10:56:38.757457: step: 60/464, loss: 0.6468014717102051 2023-01-22 10:56:39.332093: step: 62/464, loss: 0.3951880931854248 2023-01-22 10:56:39.946974: step: 64/464, loss: 0.0764017328619957 2023-01-22 10:56:40.608120: step: 66/464, loss: 0.1439065784215927 2023-01-22 10:56:41.216660: step: 68/464, loss: 0.17877231538295746 2023-01-22 10:56:41.884086: step: 70/464, loss: 0.20332950353622437 2023-01-22 10:56:42.502507: step: 72/464, loss: 0.12935759127140045 2023-01-22 10:56:43.198474: step: 74/464, loss: 0.40820690989494324 2023-01-22 10:56:43.867789: step: 76/464, loss: 0.15177741646766663 2023-01-22 10:56:44.535201: step: 78/464, loss: 0.3033875823020935 2023-01-22 10:56:45.174890: step: 80/464, loss: 0.24807725846767426 2023-01-22 10:56:45.895623: step: 82/464, loss: 0.25280049443244934 2023-01-22 10:56:46.493190: step: 84/464, loss: 0.22164084017276764 2023-01-22 10:56:47.030515: step: 86/464, loss: 0.0625792145729065 2023-01-22 10:56:47.683137: step: 88/464, loss: 0.3868742287158966 2023-01-22 10:56:48.291456: step: 90/464, loss: 0.10269523411989212 2023-01-22 10:56:48.882332: step: 92/464, loss: 0.19511628150939941 2023-01-22 10:56:49.473399: step: 94/464, loss: 0.22719942033290863 2023-01-22 10:56:50.056918: step: 96/464, loss: 0.16706952452659607 2023-01-22 10:56:50.675200: step: 98/464, loss: 0.3117726147174835 2023-01-22 10:56:51.331182: step: 100/464, loss: 0.12993161380290985 2023-01-22 10:56:51.960981: step: 102/464, loss: 0.28551098704338074 2023-01-22 10:56:52.562984: step: 104/464, loss: 0.4209898114204407 2023-01-22 10:56:53.183493: step: 106/464, loss: 0.114765964448452 2023-01-22 10:56:53.878703: step: 108/464, loss: 0.2144257128238678 2023-01-22 10:56:54.510004: step: 110/464, loss: 0.09205218404531479 2023-01-22 10:56:55.109155: step: 112/464, loss: 0.18294711410999298 2023-01-22 10:56:55.680441: step: 114/464, loss: 0.23293060064315796 2023-01-22 10:56:56.301624: step: 116/464, loss: 1.5510797500610352 2023-01-22 10:56:56.888005: step: 118/464, loss: 0.08900728076696396 2023-01-22 10:56:57.515098: step: 120/464, loss: 0.3488987982273102 2023-01-22 10:56:58.147207: step: 122/464, loss: 0.16980993747711182 2023-01-22 10:56:58.803607: step: 124/464, loss: 0.2912577986717224 2023-01-22 10:56:59.448659: step: 126/464, loss: 0.17310898005962372 2023-01-22 10:57:00.022645: step: 128/464, loss: 0.3511984944343567 2023-01-22 10:57:00.739467: step: 130/464, loss: 0.4029984474182129 2023-01-22 10:57:01.309562: step: 132/464, loss: 0.13000822067260742 2023-01-22 10:57:02.047493: step: 134/464, loss: 0.0925740897655487 2023-01-22 10:57:02.690577: step: 136/464, loss: 0.10538043081760406 2023-01-22 10:57:03.331320: step: 138/464, loss: 0.09516558796167374 2023-01-22 10:57:03.921586: step: 140/464, loss: 1.171372652053833 2023-01-22 10:57:04.529604: step: 142/464, loss: 0.13884663581848145 2023-01-22 10:57:05.155127: step: 144/464, loss: 0.12954020500183105 2023-01-22 10:57:05.764723: step: 146/464, loss: 0.41415587067604065 2023-01-22 10:57:06.398340: step: 148/464, loss: 0.17221221327781677 2023-01-22 10:57:07.000407: step: 150/464, loss: 0.4241466224193573 2023-01-22 10:57:07.711648: step: 152/464, loss: 0.14755919575691223 2023-01-22 10:57:08.372040: step: 154/464, loss: 0.1533004343509674 2023-01-22 10:57:08.997989: step: 156/464, loss: 0.2734523415565491 2023-01-22 10:57:09.667233: step: 158/464, loss: 0.09559568762779236 2023-01-22 10:57:10.291648: step: 160/464, loss: 0.15946142375469208 2023-01-22 10:57:10.949147: step: 162/464, loss: 0.18343742191791534 2023-01-22 10:57:11.553232: step: 164/464, loss: 0.12799851596355438 2023-01-22 10:57:12.212677: step: 166/464, loss: 0.0605124905705452 2023-01-22 10:57:12.823992: step: 168/464, loss: 0.12303553521633148 2023-01-22 10:57:13.402839: step: 170/464, loss: 0.05820485204458237 2023-01-22 10:57:13.966945: step: 172/464, loss: 0.6615782976150513 2023-01-22 10:57:14.557227: step: 174/464, loss: 0.21617081761360168 2023-01-22 10:57:15.133911: step: 176/464, loss: 0.28681764006614685 2023-01-22 10:57:15.890828: step: 178/464, loss: 0.10227718204259872 2023-01-22 10:57:16.537101: step: 180/464, loss: 0.26523256301879883 2023-01-22 10:57:17.086879: step: 182/464, loss: 0.16301316022872925 2023-01-22 10:57:17.732628: step: 184/464, loss: 0.2153080403804779 2023-01-22 10:57:18.414969: step: 186/464, loss: 0.2818724513053894 2023-01-22 10:57:19.093554: step: 188/464, loss: 0.0786951556801796 2023-01-22 10:57:19.719989: step: 190/464, loss: 0.6118411421775818 2023-01-22 10:57:20.325065: step: 192/464, loss: 0.10918118059635162 2023-01-22 10:57:20.897035: step: 194/464, loss: 0.2923741638660431 2023-01-22 10:57:21.495368: step: 196/464, loss: 0.15965557098388672 2023-01-22 10:57:22.165771: step: 198/464, loss: 0.24203696846961975 2023-01-22 10:57:22.742977: step: 200/464, loss: 0.4994286000728607 2023-01-22 10:57:23.351515: step: 202/464, loss: 0.3190830647945404 2023-01-22 10:57:24.004103: step: 204/464, loss: 0.23423121869564056 2023-01-22 10:57:24.641140: step: 206/464, loss: 0.19695164263248444 2023-01-22 10:57:25.260055: step: 208/464, loss: 0.16323286294937134 2023-01-22 10:57:25.848428: step: 210/464, loss: 0.5013033151626587 2023-01-22 10:57:26.473751: step: 212/464, loss: 0.2052978128194809 2023-01-22 10:57:27.097103: step: 214/464, loss: 0.11153028160333633 2023-01-22 10:57:27.721368: step: 216/464, loss: 0.4214012622833252 2023-01-22 10:57:28.364348: step: 218/464, loss: 0.45658013224601746 2023-01-22 10:57:29.039486: step: 220/464, loss: 0.36566397547721863 2023-01-22 10:57:29.668660: step: 222/464, loss: 0.42506176233291626 2023-01-22 10:57:30.266591: step: 224/464, loss: 0.05392969027161598 2023-01-22 10:57:30.847192: step: 226/464, loss: 0.2593022882938385 2023-01-22 10:57:31.498129: step: 228/464, loss: 0.3885348439216614 2023-01-22 10:57:32.083866: step: 230/464, loss: 0.5690717697143555 2023-01-22 10:57:32.697738: step: 232/464, loss: 0.4385407567024231 2023-01-22 10:57:33.306388: step: 234/464, loss: 0.15703076124191284 2023-01-22 10:57:33.869356: step: 236/464, loss: 0.6849433779716492 2023-01-22 10:57:34.514443: step: 238/464, loss: 2.7772321701049805 2023-01-22 10:57:35.175273: step: 240/464, loss: 0.2341809868812561 2023-01-22 10:57:35.865303: step: 242/464, loss: 0.16340501606464386 2023-01-22 10:57:36.484392: step: 244/464, loss: 0.5387740731239319 2023-01-22 10:57:37.122918: step: 246/464, loss: 0.11686232686042786 2023-01-22 10:57:37.724325: step: 248/464, loss: 0.2346784472465515 2023-01-22 10:57:38.407342: step: 250/464, loss: 0.5751308798789978 2023-01-22 10:57:39.006925: step: 252/464, loss: 0.187391459941864 2023-01-22 10:57:39.608366: step: 254/464, loss: 0.3938005268573761 2023-01-22 10:57:40.241609: step: 256/464, loss: 0.16256101429462433 2023-01-22 10:57:40.837556: step: 258/464, loss: 0.16977809369564056 2023-01-22 10:57:41.452783: step: 260/464, loss: 0.13777224719524384 2023-01-22 10:57:42.084424: step: 262/464, loss: 0.6039695143699646 2023-01-22 10:57:42.651706: step: 264/464, loss: 0.04727890342473984 2023-01-22 10:57:43.241034: step: 266/464, loss: 0.3389414846897125 2023-01-22 10:57:43.821848: step: 268/464, loss: 0.505021333694458 2023-01-22 10:57:44.455274: step: 270/464, loss: 0.09833864122629166 2023-01-22 10:57:45.141867: step: 272/464, loss: 0.40759316086769104 2023-01-22 10:57:45.742478: step: 274/464, loss: 0.3481176495552063 2023-01-22 10:57:46.335098: step: 276/464, loss: 0.17788687348365784 2023-01-22 10:57:46.917788: step: 278/464, loss: 0.22429144382476807 2023-01-22 10:57:47.523264: step: 280/464, loss: 0.2772428095340729 2023-01-22 10:57:48.113378: step: 282/464, loss: 0.04167911782860756 2023-01-22 10:57:48.759428: step: 284/464, loss: 1.6752103567123413 2023-01-22 10:57:49.391557: step: 286/464, loss: 0.13085174560546875 2023-01-22 10:57:50.026170: step: 288/464, loss: 0.3987504541873932 2023-01-22 10:57:50.715812: step: 290/464, loss: 0.4655897915363312 2023-01-22 10:57:51.339828: step: 292/464, loss: 0.4385165274143219 2023-01-22 10:57:51.950966: step: 294/464, loss: 0.11675146967172623 2023-01-22 10:57:52.569006: step: 296/464, loss: 0.4354645311832428 2023-01-22 10:57:53.142385: step: 298/464, loss: 0.6538931727409363 2023-01-22 10:57:53.773258: step: 300/464, loss: 0.18928231298923492 2023-01-22 10:57:54.414408: step: 302/464, loss: 0.2807188034057617 2023-01-22 10:57:54.996157: step: 304/464, loss: 0.24691380560398102 2023-01-22 10:57:55.607199: step: 306/464, loss: 0.15931986272335052 2023-01-22 10:57:56.218184: step: 308/464, loss: 0.17005343735218048 2023-01-22 10:57:56.889065: step: 310/464, loss: 0.38724708557128906 2023-01-22 10:57:57.475609: step: 312/464, loss: 0.07521989941596985 2023-01-22 10:57:58.137762: step: 314/464, loss: 0.4269282817840576 2023-01-22 10:57:58.720673: step: 316/464, loss: 0.5009620785713196 2023-01-22 10:57:59.316758: step: 318/464, loss: 0.15193048119544983 2023-01-22 10:57:59.890342: step: 320/464, loss: 0.14470677077770233 2023-01-22 10:58:00.542223: step: 322/464, loss: 0.17145533859729767 2023-01-22 10:58:01.150362: step: 324/464, loss: 0.18966177105903625 2023-01-22 10:58:01.813038: step: 326/464, loss: 0.2075006663799286 2023-01-22 10:58:02.439219: step: 328/464, loss: 0.16791434586048126 2023-01-22 10:58:03.051007: step: 330/464, loss: 0.19567112624645233 2023-01-22 10:58:03.720583: step: 332/464, loss: 0.6014448404312134 2023-01-22 10:58:04.350991: step: 334/464, loss: 0.30597642064094543 2023-01-22 10:58:04.957516: step: 336/464, loss: 0.12940239906311035 2023-01-22 10:58:05.610232: step: 338/464, loss: 0.25960540771484375 2023-01-22 10:58:06.216154: step: 340/464, loss: 0.09363491088151932 2023-01-22 10:58:06.849085: step: 342/464, loss: 0.0951174795627594 2023-01-22 10:58:07.432709: step: 344/464, loss: 0.6307440400123596 2023-01-22 10:58:08.032983: step: 346/464, loss: 0.23237261176109314 2023-01-22 10:58:08.624700: step: 348/464, loss: 0.20420436561107635 2023-01-22 10:58:09.206438: step: 350/464, loss: 0.29692232608795166 2023-01-22 10:58:09.892689: step: 352/464, loss: 0.22677010297775269 2023-01-22 10:58:10.611082: step: 354/464, loss: 0.24775464832782745 2023-01-22 10:58:11.219132: step: 356/464, loss: 0.16239655017852783 2023-01-22 10:58:11.878236: step: 358/464, loss: 0.5020847320556641 2023-01-22 10:58:12.511510: step: 360/464, loss: 0.3233250081539154 2023-01-22 10:58:13.154024: step: 362/464, loss: 0.05828201398253441 2023-01-22 10:58:13.810763: step: 364/464, loss: 0.22994956374168396 2023-01-22 10:58:14.471371: step: 366/464, loss: 0.18350572884082794 2023-01-22 10:58:15.067364: step: 368/464, loss: 0.330773264169693 2023-01-22 10:58:15.697694: step: 370/464, loss: 1.7163708209991455 2023-01-22 10:58:16.316905: step: 372/464, loss: 0.15031535923480988 2023-01-22 10:58:16.922772: step: 374/464, loss: 0.13070261478424072 2023-01-22 10:58:17.545463: step: 376/464, loss: 4.782593250274658 2023-01-22 10:58:18.173089: step: 378/464, loss: 0.19723346829414368 2023-01-22 10:58:18.876298: step: 380/464, loss: 0.21597571671009064 2023-01-22 10:58:19.558911: step: 382/464, loss: 0.1993137151002884 2023-01-22 10:58:20.146675: step: 384/464, loss: 0.4163748025894165 2023-01-22 10:58:20.748729: step: 386/464, loss: 0.5400003790855408 2023-01-22 10:58:21.380207: step: 388/464, loss: 0.0977616235613823 2023-01-22 10:58:21.997322: step: 390/464, loss: 0.2727479934692383 2023-01-22 10:58:22.640495: step: 392/464, loss: 0.4199751317501068 2023-01-22 10:58:23.267538: step: 394/464, loss: 0.34497177600860596 2023-01-22 10:58:23.880541: step: 396/464, loss: 0.47766903042793274 2023-01-22 10:58:24.530738: step: 398/464, loss: 0.37423473596572876 2023-01-22 10:58:25.141522: step: 400/464, loss: 0.1728307157754898 2023-01-22 10:58:25.776002: step: 402/464, loss: 0.5170645117759705 2023-01-22 10:58:26.362755: step: 404/464, loss: 0.3612293004989624 2023-01-22 10:58:27.052776: step: 406/464, loss: 0.31947973370552063 2023-01-22 10:58:27.707914: step: 408/464, loss: 0.11924263834953308 2023-01-22 10:58:28.303622: step: 410/464, loss: 0.52806156873703 2023-01-22 10:58:28.899061: step: 412/464, loss: 0.45087093114852905 2023-01-22 10:58:29.536344: step: 414/464, loss: 0.07716657966375351 2023-01-22 10:58:30.077612: step: 416/464, loss: 0.22068598866462708 2023-01-22 10:58:30.686502: step: 418/464, loss: 0.2416449636220932 2023-01-22 10:58:31.318778: step: 420/464, loss: 0.23852917551994324 2023-01-22 10:58:31.920638: step: 422/464, loss: 0.4021141231060028 2023-01-22 10:58:32.544178: step: 424/464, loss: 0.2847093343734741 2023-01-22 10:58:33.175536: step: 426/464, loss: 0.1429942548274994 2023-01-22 10:58:33.826405: step: 428/464, loss: 0.47880011796951294 2023-01-22 10:58:34.443696: step: 430/464, loss: 0.9428687691688538 2023-01-22 10:58:35.081054: step: 432/464, loss: 0.621206521987915 2023-01-22 10:58:35.709877: step: 434/464, loss: 0.1105286180973053 2023-01-22 10:58:36.390298: step: 436/464, loss: 0.13531525433063507 2023-01-22 10:58:37.066820: step: 438/464, loss: 0.3364936113357544 2023-01-22 10:58:37.664454: step: 440/464, loss: 0.12852786481380463 2023-01-22 10:58:38.271167: step: 442/464, loss: 0.25219330191612244 2023-01-22 10:58:38.853798: step: 444/464, loss: 1.138298749923706 2023-01-22 10:58:39.501295: step: 446/464, loss: 0.24194155633449554 2023-01-22 10:58:40.114420: step: 448/464, loss: 1.1671202182769775 2023-01-22 10:58:40.781171: step: 450/464, loss: 0.10700088739395142 2023-01-22 10:58:41.408952: step: 452/464, loss: 0.28190112113952637 2023-01-22 10:58:41.998951: step: 454/464, loss: 0.31971225142478943 2023-01-22 10:58:42.647479: step: 456/464, loss: 0.9074978828430176 2023-01-22 10:58:43.284960: step: 458/464, loss: 0.13902981579303741 2023-01-22 10:58:43.900801: step: 460/464, loss: 0.20467960834503174 2023-01-22 10:58:44.465167: step: 462/464, loss: 0.04869261011481285 2023-01-22 10:58:45.097665: step: 464/464, loss: 0.17675481736660004 2023-01-22 10:58:45.720285: step: 466/464, loss: 0.9886869788169861 2023-01-22 10:58:46.398758: step: 468/464, loss: 0.11232612282037735 2023-01-22 10:58:46.986708: step: 470/464, loss: 0.06668158620595932 2023-01-22 10:58:47.570494: step: 472/464, loss: 0.11958461254835129 2023-01-22 10:58:48.155558: step: 474/464, loss: 0.6422286033630371 2023-01-22 10:58:48.790308: step: 476/464, loss: 0.0932425931096077 2023-01-22 10:58:49.406697: step: 478/464, loss: 0.23732468485832214 2023-01-22 10:58:50.037678: step: 480/464, loss: 0.21387946605682373 2023-01-22 10:58:50.696437: step: 482/464, loss: 0.25116702914237976 2023-01-22 10:58:51.336017: step: 484/464, loss: 0.30064210295677185 2023-01-22 10:58:51.950275: step: 486/464, loss: 0.35024961829185486 2023-01-22 10:58:52.587304: step: 488/464, loss: 0.24333778023719788 2023-01-22 10:58:53.259738: step: 490/464, loss: 0.14430415630340576 2023-01-22 10:58:53.901017: step: 492/464, loss: 0.2119111269712448 2023-01-22 10:58:54.545081: step: 494/464, loss: 0.07665427029132843 2023-01-22 10:58:55.203354: step: 496/464, loss: 0.20361050963401794 2023-01-22 10:58:55.838029: step: 498/464, loss: 0.2889696955680847 2023-01-22 10:58:56.461660: step: 500/464, loss: 0.14266535639762878 2023-01-22 10:58:57.043176: step: 502/464, loss: 0.8530055284500122 2023-01-22 10:58:57.665257: step: 504/464, loss: 0.9472627639770508 2023-01-22 10:58:58.279829: step: 506/464, loss: 0.20206870138645172 2023-01-22 10:58:59.024065: step: 508/464, loss: 0.10462598502635956 2023-01-22 10:58:59.646981: step: 510/464, loss: 0.147408589720726 2023-01-22 10:59:00.231129: step: 512/464, loss: 0.24323834478855133 2023-01-22 10:59:00.872787: step: 514/464, loss: 0.16047310829162598 2023-01-22 10:59:01.530508: step: 516/464, loss: 0.15635882318019867 2023-01-22 10:59:02.193536: step: 518/464, loss: 0.4227506220340729 2023-01-22 10:59:02.846730: step: 520/464, loss: 0.2195674329996109 2023-01-22 10:59:03.461217: step: 522/464, loss: 0.11909165978431702 2023-01-22 10:59:04.049550: step: 524/464, loss: 0.18910080194473267 2023-01-22 10:59:04.657109: step: 526/464, loss: 0.3546684682369232 2023-01-22 10:59:05.300406: step: 528/464, loss: 0.12872424721717834 2023-01-22 10:59:05.909402: step: 530/464, loss: 0.07886382937431335 2023-01-22 10:59:06.529002: step: 532/464, loss: 0.16459733247756958 2023-01-22 10:59:07.156365: step: 534/464, loss: 0.1703285276889801 2023-01-22 10:59:07.788541: step: 536/464, loss: 0.6366952061653137 2023-01-22 10:59:08.450194: step: 538/464, loss: 0.09655551612377167 2023-01-22 10:59:09.037360: step: 540/464, loss: 0.21718533337116241 2023-01-22 10:59:09.666285: step: 542/464, loss: 0.17979641258716583 2023-01-22 10:59:10.264898: step: 544/464, loss: 0.37652286887168884 2023-01-22 10:59:10.841534: step: 546/464, loss: 0.08691912144422531 2023-01-22 10:59:11.465209: step: 548/464, loss: 0.3053312599658966 2023-01-22 10:59:12.189635: step: 550/464, loss: 0.28237849473953247 2023-01-22 10:59:12.872430: step: 552/464, loss: 0.594807505607605 2023-01-22 10:59:13.486000: step: 554/464, loss: 0.6040641665458679 2023-01-22 10:59:14.112414: step: 556/464, loss: 0.17427340149879456 2023-01-22 10:59:14.758970: step: 558/464, loss: 0.34472134709358215 2023-01-22 10:59:15.352381: step: 560/464, loss: 0.2316531389951706 2023-01-22 10:59:16.002468: step: 562/464, loss: 0.6255875825881958 2023-01-22 10:59:16.571912: step: 564/464, loss: 0.24147167801856995 2023-01-22 10:59:17.202002: step: 566/464, loss: 0.6458610892295837 2023-01-22 10:59:17.845751: step: 568/464, loss: 0.47449079155921936 2023-01-22 10:59:18.412388: step: 570/464, loss: 0.42779847979545593 2023-01-22 10:59:19.066144: step: 572/464, loss: 0.18389077484607697 2023-01-22 10:59:19.659363: step: 574/464, loss: 0.1575031727552414 2023-01-22 10:59:20.300890: step: 576/464, loss: 0.25704607367515564 2023-01-22 10:59:20.940945: step: 578/464, loss: 0.1292218267917633 2023-01-22 10:59:21.621363: step: 580/464, loss: 0.5134697556495667 2023-01-22 10:59:22.241512: step: 582/464, loss: 0.3436373472213745 2023-01-22 10:59:22.846775: step: 584/464, loss: 0.25400784611701965 2023-01-22 10:59:23.440628: step: 586/464, loss: 0.35351771116256714 2023-01-22 10:59:24.052180: step: 588/464, loss: 0.15195152163505554 2023-01-22 10:59:24.664881: step: 590/464, loss: 1.1360224485397339 2023-01-22 10:59:25.237126: step: 592/464, loss: 0.13217885792255402 2023-01-22 10:59:25.846185: step: 594/464, loss: 0.2512986361980438 2023-01-22 10:59:26.480716: step: 596/464, loss: 0.2633204162120819 2023-01-22 10:59:27.108404: step: 598/464, loss: 0.07968783378601074 2023-01-22 10:59:27.729831: step: 600/464, loss: 0.16802997887134552 2023-01-22 10:59:28.414588: step: 602/464, loss: 0.11943110078573227 2023-01-22 10:59:29.071228: step: 604/464, loss: 0.2673475444316864 2023-01-22 10:59:29.714083: step: 606/464, loss: 0.11374931782484055 2023-01-22 10:59:30.362599: step: 608/464, loss: 1.5972604751586914 2023-01-22 10:59:30.977759: step: 610/464, loss: 0.14802365005016327 2023-01-22 10:59:31.630430: step: 612/464, loss: 0.09203001856803894 2023-01-22 10:59:32.236253: step: 614/464, loss: 0.1717901974916458 2023-01-22 10:59:32.838684: step: 616/464, loss: 0.34820762276649475 2023-01-22 10:59:33.439119: step: 618/464, loss: 0.14696389436721802 2023-01-22 10:59:34.036710: step: 620/464, loss: 0.19122423231601715 2023-01-22 10:59:34.688829: step: 622/464, loss: 0.1879727691411972 2023-01-22 10:59:35.277631: step: 624/464, loss: 0.18596497178077698 2023-01-22 10:59:35.919353: step: 626/464, loss: 0.4350574314594269 2023-01-22 10:59:36.592334: step: 628/464, loss: 0.1632673740386963 2023-01-22 10:59:37.187053: step: 630/464, loss: 0.1334041804075241 2023-01-22 10:59:37.790154: step: 632/464, loss: 0.1441114842891693 2023-01-22 10:59:38.402521: step: 634/464, loss: 0.3799474835395813 2023-01-22 10:59:39.034583: step: 636/464, loss: 0.3114156424999237 2023-01-22 10:59:39.638786: step: 638/464, loss: 0.23049066960811615 2023-01-22 10:59:40.248148: step: 640/464, loss: 0.3577210307121277 2023-01-22 10:59:40.854215: step: 642/464, loss: 0.15999066829681396 2023-01-22 10:59:41.473393: step: 644/464, loss: 0.20487497746944427 2023-01-22 10:59:42.090619: step: 646/464, loss: 0.2615987956523895 2023-01-22 10:59:42.781990: step: 648/464, loss: 0.49903854727745056 2023-01-22 10:59:43.427191: step: 650/464, loss: 0.2588825821876526 2023-01-22 10:59:44.003962: step: 652/464, loss: 0.8313612937927246 2023-01-22 10:59:44.623737: step: 654/464, loss: 0.0567249171435833 2023-01-22 10:59:45.282688: step: 656/464, loss: 0.11456841230392456 2023-01-22 10:59:45.877958: step: 658/464, loss: 0.33578962087631226 2023-01-22 10:59:46.517049: step: 660/464, loss: 0.887792706489563 2023-01-22 10:59:47.131406: step: 662/464, loss: 0.1429959535598755 2023-01-22 10:59:47.718499: step: 664/464, loss: 0.9350042343139648 2023-01-22 10:59:48.354367: step: 666/464, loss: 0.16369758546352386 2023-01-22 10:59:48.973484: step: 668/464, loss: 0.24192282557487488 2023-01-22 10:59:49.606235: step: 670/464, loss: 0.22956036031246185 2023-01-22 10:59:50.254705: step: 672/464, loss: 0.5422670245170593 2023-01-22 10:59:50.842723: step: 674/464, loss: 0.3296627998352051 2023-01-22 10:59:51.431539: step: 676/464, loss: 0.15746179223060608 2023-01-22 10:59:52.094674: step: 678/464, loss: 0.37195703387260437 2023-01-22 10:59:52.763967: step: 680/464, loss: 0.6448568105697632 2023-01-22 10:59:53.344331: step: 682/464, loss: 0.4757457971572876 2023-01-22 10:59:53.957083: step: 684/464, loss: 0.16026271879673004 2023-01-22 10:59:54.584443: step: 686/464, loss: 0.34827935695648193 2023-01-22 10:59:55.249471: step: 688/464, loss: 0.4063800871372223 2023-01-22 10:59:55.896676: step: 690/464, loss: 0.1765885055065155 2023-01-22 10:59:56.526482: step: 692/464, loss: 0.0919233188033104 2023-01-22 10:59:57.210921: step: 694/464, loss: 0.3090978264808655 2023-01-22 10:59:57.829983: step: 696/464, loss: 0.06764783710241318 2023-01-22 10:59:58.559296: step: 698/464, loss: 0.2639875113964081 2023-01-22 10:59:59.157010: step: 700/464, loss: 0.13689519464969635 2023-01-22 10:59:59.794806: step: 702/464, loss: 0.38917234539985657 2023-01-22 11:00:00.439032: step: 704/464, loss: 0.12273726612329483 2023-01-22 11:00:01.038894: step: 706/464, loss: 0.21885795891284943 2023-01-22 11:00:01.661600: step: 708/464, loss: 0.2251049429178238 2023-01-22 11:00:02.251951: step: 710/464, loss: 0.1878732144832611 2023-01-22 11:00:02.849356: step: 712/464, loss: 0.8885036110877991 2023-01-22 11:00:04.139239: step: 714/464, loss: 0.0979180634021759 2023-01-22 11:00:04.721893: step: 716/464, loss: 0.32866814732551575 2023-01-22 11:00:05.308182: step: 718/464, loss: 0.09184763580560684 2023-01-22 11:00:05.908145: step: 720/464, loss: 0.26306676864624023 2023-01-22 11:00:06.508534: step: 722/464, loss: 0.08239500224590302 2023-01-22 11:00:07.115249: step: 724/464, loss: 0.06577450037002563 2023-01-22 11:00:07.839022: step: 726/464, loss: 0.38353481888771057 2023-01-22 11:00:08.514919: step: 728/464, loss: 0.23454375565052032 2023-01-22 11:00:09.152484: step: 730/464, loss: 0.13665996491909027 2023-01-22 11:00:09.739108: step: 732/464, loss: 0.227525994181633 2023-01-22 11:00:10.388101: step: 734/464, loss: 0.9282320737838745 2023-01-22 11:00:11.062808: step: 736/464, loss: 0.2728773057460785 2023-01-22 11:00:11.658013: step: 738/464, loss: 0.2511763870716095 2023-01-22 11:00:12.420744: step: 740/464, loss: 2.0587427616119385 2023-01-22 11:00:13.061637: step: 742/464, loss: 0.24321237206459045 2023-01-22 11:00:13.677499: step: 744/464, loss: 0.15974242985248566 2023-01-22 11:00:14.331899: step: 746/464, loss: 0.6454801559448242 2023-01-22 11:00:14.908041: step: 748/464, loss: 0.4255438446998596 2023-01-22 11:00:15.551455: step: 750/464, loss: 0.17316550016403198 2023-01-22 11:00:16.176907: step: 752/464, loss: 0.9697102308273315 2023-01-22 11:00:16.798243: step: 754/464, loss: 0.19061174988746643 2023-01-22 11:00:17.409508: step: 756/464, loss: 0.749290406703949 2023-01-22 11:00:18.060825: step: 758/464, loss: 0.23312531411647797 2023-01-22 11:00:18.641819: step: 760/464, loss: 0.5433177947998047 2023-01-22 11:00:19.272012: step: 762/464, loss: 0.3394450545310974 2023-01-22 11:00:19.936097: step: 764/464, loss: 0.24980996549129486 2023-01-22 11:00:20.514839: step: 766/464, loss: 0.1455468237400055 2023-01-22 11:00:21.166342: step: 768/464, loss: 0.24350574612617493 2023-01-22 11:00:21.826719: step: 770/464, loss: 0.8219181299209595 2023-01-22 11:00:22.439868: step: 772/464, loss: 0.16108842194080353 2023-01-22 11:00:23.050410: step: 774/464, loss: 0.09629150480031967 2023-01-22 11:00:23.681689: step: 776/464, loss: 0.2186581939458847 2023-01-22 11:00:24.281465: step: 778/464, loss: 0.11710034310817719 2023-01-22 11:00:24.956921: step: 780/464, loss: 0.13322243094444275 2023-01-22 11:00:25.536047: step: 782/464, loss: 0.3277522027492523 2023-01-22 11:00:26.176619: step: 784/464, loss: 0.2634742856025696 2023-01-22 11:00:26.920843: step: 786/464, loss: 0.21504506468772888 2023-01-22 11:00:27.585596: step: 788/464, loss: 0.21060091257095337 2023-01-22 11:00:28.265775: step: 790/464, loss: 0.13833431899547577 2023-01-22 11:00:28.920898: step: 792/464, loss: 0.26143184304237366 2023-01-22 11:00:29.564667: step: 794/464, loss: 0.11014603078365326 2023-01-22 11:00:30.241682: step: 796/464, loss: 0.4107128381729126 2023-01-22 11:00:30.798313: step: 798/464, loss: 0.14051704108715057 2023-01-22 11:00:31.341741: step: 800/464, loss: 0.5600093603134155 2023-01-22 11:00:31.899386: step: 802/464, loss: 0.20583002269268036 2023-01-22 11:00:32.492239: step: 804/464, loss: 0.07505656778812408 2023-01-22 11:00:33.122066: step: 806/464, loss: 0.10342224687337875 2023-01-22 11:00:33.690840: step: 808/464, loss: 0.07260114699602127 2023-01-22 11:00:34.306440: step: 810/464, loss: 0.3308141529560089 2023-01-22 11:00:34.964740: step: 812/464, loss: 0.4845127761363983 2023-01-22 11:00:35.673226: step: 814/464, loss: 0.14502455294132233 2023-01-22 11:00:36.341432: step: 816/464, loss: 0.41885289549827576 2023-01-22 11:00:36.995232: step: 818/464, loss: 0.1776328831911087 2023-01-22 11:00:37.663694: step: 820/464, loss: 1.3490067720413208 2023-01-22 11:00:38.311966: step: 822/464, loss: 0.7090396881103516 2023-01-22 11:00:38.938592: step: 824/464, loss: 0.05094294250011444 2023-01-22 11:00:39.625546: step: 826/464, loss: 0.1734030693769455 2023-01-22 11:00:40.263488: step: 828/464, loss: 0.9050090909004211 2023-01-22 11:00:40.884910: step: 830/464, loss: 0.10672441869974136 2023-01-22 11:00:41.508064: step: 832/464, loss: 0.14033271372318268 2023-01-22 11:00:42.178968: step: 834/464, loss: 0.4053362309932709 2023-01-22 11:00:42.755080: step: 836/464, loss: 0.14464549720287323 2023-01-22 11:00:43.435697: step: 838/464, loss: 0.39528706669807434 2023-01-22 11:00:44.045248: step: 840/464, loss: 0.4757036566734314 2023-01-22 11:00:44.631680: step: 842/464, loss: 0.13344378769397736 2023-01-22 11:00:45.278222: step: 844/464, loss: 0.20037075877189636 2023-01-22 11:00:45.932706: step: 846/464, loss: 1.9156296253204346 2023-01-22 11:00:46.585863: step: 848/464, loss: 0.2232384830713272 2023-01-22 11:00:47.239410: step: 850/464, loss: 0.41045475006103516 2023-01-22 11:00:47.867392: step: 852/464, loss: 1.0136959552764893 2023-01-22 11:00:48.460264: step: 854/464, loss: 0.2570632994174957 2023-01-22 11:00:49.064469: step: 856/464, loss: 5.904565811157227 2023-01-22 11:00:49.626375: step: 858/464, loss: 0.7196681499481201 2023-01-22 11:00:50.289674: step: 860/464, loss: 0.39808255434036255 2023-01-22 11:00:50.884086: step: 862/464, loss: 0.5597915649414062 2023-01-22 11:00:51.455590: step: 864/464, loss: 0.07103001326322556 2023-01-22 11:00:52.149854: step: 866/464, loss: 0.2170611023902893 2023-01-22 11:00:52.815049: step: 868/464, loss: 0.3302440643310547 2023-01-22 11:00:53.509868: step: 870/464, loss: 0.16965359449386597 2023-01-22 11:00:54.120224: step: 872/464, loss: 0.45563748478889465 2023-01-22 11:00:54.778903: step: 874/464, loss: 0.1041213721036911 2023-01-22 11:00:55.400275: step: 876/464, loss: 0.19837379455566406 2023-01-22 11:00:56.021842: step: 878/464, loss: 0.26904308795928955 2023-01-22 11:00:56.648108: step: 880/464, loss: 0.4328576326370239 2023-01-22 11:00:57.298154: step: 882/464, loss: 0.26968351006507874 2023-01-22 11:00:57.968168: step: 884/464, loss: 0.8352260589599609 2023-01-22 11:00:58.635696: step: 886/464, loss: 0.0773007869720459 2023-01-22 11:00:59.283745: step: 888/464, loss: 0.12449493259191513 2023-01-22 11:00:59.960236: step: 890/464, loss: 1.1425083875656128 2023-01-22 11:01:00.592492: step: 892/464, loss: 0.5259579420089722 2023-01-22 11:01:01.142652: step: 894/464, loss: 0.09961270540952682 2023-01-22 11:01:01.761260: step: 896/464, loss: 0.3245554566383362 2023-01-22 11:01:02.397791: step: 898/464, loss: 0.5239310264587402 2023-01-22 11:01:03.049689: step: 900/464, loss: 0.2951996922492981 2023-01-22 11:01:03.624180: step: 902/464, loss: 0.17997859418392181 2023-01-22 11:01:04.240590: step: 904/464, loss: 0.2505132257938385 2023-01-22 11:01:04.797401: step: 906/464, loss: 0.14798392355442047 2023-01-22 11:01:05.452051: step: 908/464, loss: 1.538906455039978 2023-01-22 11:01:06.090478: step: 910/464, loss: 0.38162997364997864 2023-01-22 11:01:06.702628: step: 912/464, loss: 0.15071247518062592 2023-01-22 11:01:07.424152: step: 914/464, loss: 0.231694296002388 2023-01-22 11:01:08.045446: step: 916/464, loss: 0.20453636348247528 2023-01-22 11:01:08.615144: step: 918/464, loss: 0.34366247057914734 2023-01-22 11:01:09.337655: step: 920/464, loss: 0.12969070672988892 2023-01-22 11:01:09.924542: step: 922/464, loss: 0.21827757358551025 2023-01-22 11:01:10.537043: step: 924/464, loss: 0.20832978188991547 2023-01-22 11:01:11.119438: step: 926/464, loss: 0.1685469001531601 2023-01-22 11:01:11.781543: step: 928/464, loss: 0.20983704924583435 2023-01-22 11:01:12.425801: step: 930/464, loss: 0.025569764897227287 ================================================== Loss: 0.343 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.273544610151753, 'r': 0.33064120809614167, 'f1': 0.2993950458190149}, 'combined': 0.22060687586664254, 'epoch': 10} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.27720597169423244, 'r': 0.3041883711428905, 'f1': 0.2900710474383606}, 'combined': 0.18937280817219396, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2704563938938939, 'r': 0.34179119228336496, 'f1': 0.3019680776753283}, 'combined': 0.22250279407655768, 'epoch': 10} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2884953334845766, 'r': 0.3120730053671545, 'f1': 0.2998213523112759}, 'combined': 0.19573829218249097, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28140213854849433, 'r': 0.3406727977114599, 'f1': 0.3082138444531148}, 'combined': 0.22710493801808457, 'epoch': 10} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.28910246423910374, 'r': 0.3053113340172153, 'f1': 0.2969859020212161}, 'combined': 0.19388716919519808, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24707602339181284, 'r': 0.40238095238095234, 'f1': 0.30615942028985504}, 'combined': 0.20410628019323668, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.22916666666666666, 'r': 0.358695652173913, 'f1': 0.2796610169491525}, 'combined': 0.13983050847457626, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42105263157894735, 'r': 0.27586206896551724, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2662921186245096, 'r': 0.3127795473027921, 'f1': 0.28766984542508106}, 'combined': 0.21196725452374393, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2822796165112715, 'r': 0.30584607990808405, 'f1': 0.2935906887713004}, 'combined': 0.19167060510457953, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26543209876543206, 'r': 0.4095238095238095, 'f1': 0.3220973782771535}, 'combined': 0.21473158551810234, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2922339997070884, 'r': 0.315523995888678, 'f1': 0.3034327478710462}, 'combined': 0.22358202474708666, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3099448839945791, 'r': 0.28150957353636086, 'f1': 0.2950436876486859}, 'combined': 0.19261919504525607, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3382352941176471, 'r': 0.5, 'f1': 0.4035087719298246}, 'combined': 0.2017543859649123, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28487491086284766, 'r': 0.3248763214963405, 'f1': 0.303563513171226}, 'combined': 0.2236783781261665, 'epoch': 9} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2834185544035629, 'r': 0.30291983108270715, 'f1': 0.2928448921331714}, 'combined': 0.19118371196258857, 'epoch': 9} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47368421052631576, 'r': 0.3103448275862069, 'f1': 0.375}, 'combined': 0.25, 'epoch': 9} ****************************** Epoch: 11 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:03:52.547198: step: 2/464, loss: 0.12749086320400238 2023-01-22 11:03:53.205884: step: 4/464, loss: 0.31675276160240173 2023-01-22 11:03:53.784771: step: 6/464, loss: 0.0454997643828392 2023-01-22 11:03:54.409099: step: 8/464, loss: 0.16314424574375153 2023-01-22 11:03:55.009462: step: 10/464, loss: 0.16916592419147491 2023-01-22 11:03:55.614362: step: 12/464, loss: 0.3637135326862335 2023-01-22 11:03:56.116072: step: 14/464, loss: 0.25179359316825867 2023-01-22 11:03:56.712449: step: 16/464, loss: 0.054944101721048355 2023-01-22 11:03:57.289822: step: 18/464, loss: 0.17395174503326416 2023-01-22 11:03:57.925338: step: 20/464, loss: 0.18405458331108093 2023-01-22 11:03:58.608383: step: 22/464, loss: 0.09289663285017014 2023-01-22 11:03:59.197605: step: 24/464, loss: 0.30236729979515076 2023-01-22 11:03:59.835866: step: 26/464, loss: 0.3274882137775421 2023-01-22 11:04:00.487761: step: 28/464, loss: 0.13694429397583008 2023-01-22 11:04:01.116580: step: 30/464, loss: 0.10084377229213715 2023-01-22 11:04:01.699408: step: 32/464, loss: 0.10855576395988464 2023-01-22 11:04:02.358319: step: 34/464, loss: 0.11531636863946915 2023-01-22 11:04:02.986613: step: 36/464, loss: 0.3022859990596771 2023-01-22 11:04:03.566379: step: 38/464, loss: 0.1443987339735031 2023-01-22 11:04:04.177977: step: 40/464, loss: 0.7737499475479126 2023-01-22 11:04:04.766912: step: 42/464, loss: 0.24089673161506653 2023-01-22 11:04:05.408027: step: 44/464, loss: 0.16677124798297882 2023-01-22 11:04:05.989065: step: 46/464, loss: 0.16070647537708282 2023-01-22 11:04:06.557143: step: 48/464, loss: 0.09703407436609268 2023-01-22 11:04:07.308409: step: 50/464, loss: 0.1399368792772293 2023-01-22 11:04:07.930424: step: 52/464, loss: 0.2855876386165619 2023-01-22 11:04:08.532980: step: 54/464, loss: 0.083491250872612 2023-01-22 11:04:09.298651: step: 56/464, loss: 0.27693670988082886 2023-01-22 11:04:10.048557: step: 58/464, loss: 0.9555200338363647 2023-01-22 11:04:10.616548: step: 60/464, loss: 0.14794579148292542 2023-01-22 11:04:11.166352: step: 62/464, loss: 0.14458629488945007 2023-01-22 11:04:11.821049: step: 64/464, loss: 0.09685725718736649 2023-01-22 11:04:12.520512: step: 66/464, loss: 0.17956098914146423 2023-01-22 11:04:13.087769: step: 68/464, loss: 0.4257870018482208 2023-01-22 11:04:13.742578: step: 70/464, loss: 0.18782329559326172 2023-01-22 11:04:14.383653: step: 72/464, loss: 0.22681228816509247 2023-01-22 11:04:14.948089: step: 74/464, loss: 0.16160351037979126 2023-01-22 11:04:15.618402: step: 76/464, loss: 0.12337357550859451 2023-01-22 11:04:16.223107: step: 78/464, loss: 0.4088905155658722 2023-01-22 11:04:16.837615: step: 80/464, loss: 0.13035966455936432 2023-01-22 11:04:17.461736: step: 82/464, loss: 0.03869122639298439 2023-01-22 11:04:18.063970: step: 84/464, loss: 0.14494037628173828 2023-01-22 11:04:18.656951: step: 86/464, loss: 0.35110121965408325 2023-01-22 11:04:19.264057: step: 88/464, loss: 0.03901585564017296 2023-01-22 11:04:19.907278: step: 90/464, loss: 0.11286500841379166 2023-01-22 11:04:20.548249: step: 92/464, loss: 0.14447273313999176 2023-01-22 11:04:21.193752: step: 94/464, loss: 0.2669197916984558 2023-01-22 11:04:21.779636: step: 96/464, loss: 0.038198988884687424 2023-01-22 11:04:22.450246: step: 98/464, loss: 0.2722930610179901 2023-01-22 11:04:23.008225: step: 100/464, loss: 0.08510913699865341 2023-01-22 11:04:23.627766: step: 102/464, loss: 0.1735830307006836 2023-01-22 11:04:24.256869: step: 104/464, loss: 0.24530547857284546 2023-01-22 11:04:24.887980: step: 106/464, loss: 0.15999534726142883 2023-01-22 11:04:25.483648: step: 108/464, loss: 1.8271517753601074 2023-01-22 11:04:26.099229: step: 110/464, loss: 0.5154167413711548 2023-01-22 11:04:26.756700: step: 112/464, loss: 0.19308911263942719 2023-01-22 11:04:27.407625: step: 114/464, loss: 0.23946955800056458 2023-01-22 11:04:28.050382: step: 116/464, loss: 0.08525333553552628 2023-01-22 11:04:28.707295: step: 118/464, loss: 0.45863062143325806 2023-01-22 11:04:29.350886: step: 120/464, loss: 0.5794384479522705 2023-01-22 11:04:30.006782: step: 122/464, loss: 0.43312111496925354 2023-01-22 11:04:30.618159: step: 124/464, loss: 0.23851554095745087 2023-01-22 11:04:31.240509: step: 126/464, loss: 0.039798635989427567 2023-01-22 11:04:31.877444: step: 128/464, loss: 0.4510522484779358 2023-01-22 11:04:32.521410: step: 130/464, loss: 0.15508149564266205 2023-01-22 11:04:33.129271: step: 132/464, loss: 0.11749690026044846 2023-01-22 11:04:33.751343: step: 134/464, loss: 0.10931003838777542 2023-01-22 11:04:34.394042: step: 136/464, loss: 0.22598673403263092 2023-01-22 11:04:35.071065: step: 138/464, loss: 0.3516949415206909 2023-01-22 11:04:35.738113: step: 140/464, loss: 0.17344337701797485 2023-01-22 11:04:36.340542: step: 142/464, loss: 0.15874630212783813 2023-01-22 11:04:36.941521: step: 144/464, loss: 1.534786343574524 2023-01-22 11:04:37.634484: step: 146/464, loss: 0.7723768949508667 2023-01-22 11:04:38.210172: step: 148/464, loss: 0.3668593466281891 2023-01-22 11:04:38.856004: step: 150/464, loss: 0.14745070040225983 2023-01-22 11:04:39.487444: step: 152/464, loss: 0.1011207103729248 2023-01-22 11:04:40.161089: step: 154/464, loss: 0.25891536474227905 2023-01-22 11:04:40.816842: step: 156/464, loss: 0.051904287189245224 2023-01-22 11:04:41.451676: step: 158/464, loss: 0.3749949336051941 2023-01-22 11:04:42.051505: step: 160/464, loss: 0.10414428263902664 2023-01-22 11:04:42.688298: step: 162/464, loss: 0.0400250181555748 2023-01-22 11:04:43.268003: step: 164/464, loss: 0.19313499331474304 2023-01-22 11:04:43.943256: step: 166/464, loss: 1.6297383308410645 2023-01-22 11:04:44.523342: step: 168/464, loss: 0.21383130550384521 2023-01-22 11:04:45.175349: step: 170/464, loss: 0.3675953149795532 2023-01-22 11:04:45.824475: step: 172/464, loss: 0.0551636628806591 2023-01-22 11:04:46.438269: step: 174/464, loss: 0.2502910792827606 2023-01-22 11:04:47.067069: step: 176/464, loss: 0.1080092266201973 2023-01-22 11:04:47.648847: step: 178/464, loss: 0.2360805720090866 2023-01-22 11:04:48.278052: step: 180/464, loss: 0.16856279969215393 2023-01-22 11:04:48.848630: step: 182/464, loss: 0.08450154960155487 2023-01-22 11:04:49.647843: step: 184/464, loss: 0.09318307787179947 2023-01-22 11:04:50.343825: step: 186/464, loss: 0.22691823542118073 2023-01-22 11:04:51.020018: step: 188/464, loss: 0.10292818397283554 2023-01-22 11:04:51.685054: step: 190/464, loss: 0.201989084482193 2023-01-22 11:04:52.312601: step: 192/464, loss: 1.099289894104004 2023-01-22 11:04:52.897330: step: 194/464, loss: 0.2058786004781723 2023-01-22 11:04:53.429435: step: 196/464, loss: 0.15418343245983124 2023-01-22 11:04:54.027215: step: 198/464, loss: 0.23155158758163452 2023-01-22 11:04:54.681883: step: 200/464, loss: 0.4182059168815613 2023-01-22 11:04:55.307512: step: 202/464, loss: 0.14667606353759766 2023-01-22 11:04:55.903042: step: 204/464, loss: 3.515292167663574 2023-01-22 11:04:56.523487: step: 206/464, loss: 0.16485071182250977 2023-01-22 11:04:57.173314: step: 208/464, loss: 0.04763878136873245 2023-01-22 11:04:57.812686: step: 210/464, loss: 0.1819470077753067 2023-01-22 11:04:58.455840: step: 212/464, loss: 0.2851487100124359 2023-01-22 11:04:59.042914: step: 214/464, loss: 0.18764753639698029 2023-01-22 11:04:59.648058: step: 216/464, loss: 0.2979755997657776 2023-01-22 11:05:00.280705: step: 218/464, loss: 0.17477896809577942 2023-01-22 11:05:00.882700: step: 220/464, loss: 0.12481864541769028 2023-01-22 11:05:01.533757: step: 222/464, loss: 0.5897207260131836 2023-01-22 11:05:02.129509: step: 224/464, loss: 0.6215092539787292 2023-01-22 11:05:02.743098: step: 226/464, loss: 0.1567331850528717 2023-01-22 11:05:03.344257: step: 228/464, loss: 0.19493567943572998 2023-01-22 11:05:03.925933: step: 230/464, loss: 0.2208445817232132 2023-01-22 11:05:04.537433: step: 232/464, loss: 0.2621890902519226 2023-01-22 11:05:05.134732: step: 234/464, loss: 0.21368613839149475 2023-01-22 11:05:05.775576: step: 236/464, loss: 0.1271626353263855 2023-01-22 11:05:06.366450: step: 238/464, loss: 0.7963235378265381 2023-01-22 11:05:06.989568: step: 240/464, loss: 0.19868683815002441 2023-01-22 11:05:07.544659: step: 242/464, loss: 0.2507416009902954 2023-01-22 11:05:08.186971: step: 244/464, loss: 0.07598677277565002 2023-01-22 11:05:08.813574: step: 246/464, loss: 0.8013763427734375 2023-01-22 11:05:09.455229: step: 248/464, loss: 0.3838903605937958 2023-01-22 11:05:10.137438: step: 250/464, loss: 0.13968589901924133 2023-01-22 11:05:10.749749: step: 252/464, loss: 1.1072745323181152 2023-01-22 11:05:11.341537: step: 254/464, loss: 0.11604411900043488 2023-01-22 11:05:11.983688: step: 256/464, loss: 0.7088308930397034 2023-01-22 11:05:12.620238: step: 258/464, loss: 0.23516815900802612 2023-01-22 11:05:13.311041: step: 260/464, loss: 0.14172761142253876 2023-01-22 11:05:13.907502: step: 262/464, loss: 0.3576776385307312 2023-01-22 11:05:14.540279: step: 264/464, loss: 0.17829737067222595 2023-01-22 11:05:15.157573: step: 266/464, loss: 0.12431307137012482 2023-01-22 11:05:15.729997: step: 268/464, loss: 0.5392585396766663 2023-01-22 11:05:16.296563: step: 270/464, loss: 0.09986944496631622 2023-01-22 11:05:16.897824: step: 272/464, loss: 0.16034409403800964 2023-01-22 11:05:17.523254: step: 274/464, loss: 0.3127243220806122 2023-01-22 11:05:18.154393: step: 276/464, loss: 0.5250251889228821 2023-01-22 11:05:18.785979: step: 278/464, loss: 0.12543682754039764 2023-01-22 11:05:19.411275: step: 280/464, loss: 0.40157195925712585 2023-01-22 11:05:20.038573: step: 282/464, loss: 0.09980019181966782 2023-01-22 11:05:20.676276: step: 284/464, loss: 4.3395771980285645 2023-01-22 11:05:21.261466: step: 286/464, loss: 0.057638343423604965 2023-01-22 11:05:21.926813: step: 288/464, loss: 0.1725718080997467 2023-01-22 11:05:22.564523: step: 290/464, loss: 0.2406628280878067 2023-01-22 11:05:23.184380: step: 292/464, loss: 0.2707064151763916 2023-01-22 11:05:23.819042: step: 294/464, loss: 0.10121627897024155 2023-01-22 11:05:24.445706: step: 296/464, loss: 0.0865129828453064 2023-01-22 11:05:25.027751: step: 298/464, loss: 0.08518489450216293 2023-01-22 11:05:25.661755: step: 300/464, loss: 0.2202875018119812 2023-01-22 11:05:26.219383: step: 302/464, loss: 0.043326690793037415 2023-01-22 11:05:26.898819: step: 304/464, loss: 0.21041861176490784 2023-01-22 11:05:27.565005: step: 306/464, loss: 0.12960094213485718 2023-01-22 11:05:28.215652: step: 308/464, loss: 0.20338009297847748 2023-01-22 11:05:28.991294: step: 310/464, loss: 0.11522030830383301 2023-01-22 11:05:29.679771: step: 312/464, loss: 0.11141736805438995 2023-01-22 11:05:30.370687: step: 314/464, loss: 0.06860148161649704 2023-01-22 11:05:31.016559: step: 316/464, loss: 0.3496860861778259 2023-01-22 11:05:31.634189: step: 318/464, loss: 0.1790461540222168 2023-01-22 11:05:32.308911: step: 320/464, loss: 0.6583364009857178 2023-01-22 11:05:32.905338: step: 322/464, loss: 0.2994745969772339 2023-01-22 11:05:33.601073: step: 324/464, loss: 0.22190432250499725 2023-01-22 11:05:34.212247: step: 326/464, loss: 0.08175873756408691 2023-01-22 11:05:34.827091: step: 328/464, loss: 0.16329525411128998 2023-01-22 11:05:35.370843: step: 330/464, loss: 0.038639314472675323 2023-01-22 11:05:35.956182: step: 332/464, loss: 0.20517034828662872 2023-01-22 11:05:36.628039: step: 334/464, loss: 0.2505493760108948 2023-01-22 11:05:37.187711: step: 336/464, loss: 0.1626298874616623 2023-01-22 11:05:37.798785: step: 338/464, loss: 0.24210214614868164 2023-01-22 11:05:38.375773: step: 340/464, loss: 0.46474650502204895 2023-01-22 11:05:39.043305: step: 342/464, loss: 0.05986591428518295 2023-01-22 11:05:39.624673: step: 344/464, loss: 0.38242480158805847 2023-01-22 11:05:40.253157: step: 346/464, loss: 1.0713443756103516 2023-01-22 11:05:40.898706: step: 348/464, loss: 0.27796074748039246 2023-01-22 11:05:41.553374: step: 350/464, loss: 0.2751466929912567 2023-01-22 11:05:42.227225: step: 352/464, loss: 0.3686355650424957 2023-01-22 11:05:42.867876: step: 354/464, loss: 0.062489546835422516 2023-01-22 11:05:43.521520: step: 356/464, loss: 0.1203119084239006 2023-01-22 11:05:44.145081: step: 358/464, loss: 0.0970081016421318 2023-01-22 11:05:44.771961: step: 360/464, loss: 0.09083372354507446 2023-01-22 11:05:45.437473: step: 362/464, loss: 0.11161081492900848 2023-01-22 11:05:46.084212: step: 364/464, loss: 0.43868952989578247 2023-01-22 11:05:46.777860: step: 366/464, loss: 0.14594672620296478 2023-01-22 11:05:47.397790: step: 368/464, loss: 0.23021414875984192 2023-01-22 11:05:48.004925: step: 370/464, loss: 0.26811346411705017 2023-01-22 11:05:48.585723: step: 372/464, loss: 0.1601974219083786 2023-01-22 11:05:49.272204: step: 374/464, loss: 0.1352207511663437 2023-01-22 11:05:49.906429: step: 376/464, loss: 0.11734617501497269 2023-01-22 11:05:50.523873: step: 378/464, loss: 0.8292566537857056 2023-01-22 11:05:51.111018: step: 380/464, loss: 0.13411815464496613 2023-01-22 11:05:51.771269: step: 382/464, loss: 0.0297938734292984 2023-01-22 11:05:52.383043: step: 384/464, loss: 0.47775954008102417 2023-01-22 11:05:53.015749: step: 386/464, loss: 0.25933510065078735 2023-01-22 11:05:53.674626: step: 388/464, loss: 0.43880245089530945 2023-01-22 11:05:54.291747: step: 390/464, loss: 0.15018054842948914 2023-01-22 11:05:54.894130: step: 392/464, loss: 1.495927333831787 2023-01-22 11:05:55.471363: step: 394/464, loss: 0.14321881532669067 2023-01-22 11:05:56.101220: step: 396/464, loss: 0.4615509510040283 2023-01-22 11:05:56.736077: step: 398/464, loss: 0.2685154676437378 2023-01-22 11:05:57.362186: step: 400/464, loss: 0.35869333148002625 2023-01-22 11:05:58.037248: step: 402/464, loss: 0.49212753772735596 2023-01-22 11:05:58.616810: step: 404/464, loss: 0.21866217255592346 2023-01-22 11:05:59.253020: step: 406/464, loss: 0.216976597905159 2023-01-22 11:05:59.862235: step: 408/464, loss: 0.6453323364257812 2023-01-22 11:06:00.444593: step: 410/464, loss: 0.0943211019039154 2023-01-22 11:06:01.063296: step: 412/464, loss: 0.0908517837524414 2023-01-22 11:06:01.656101: step: 414/464, loss: 0.17199210822582245 2023-01-22 11:06:02.319009: step: 416/464, loss: 0.08901306241750717 2023-01-22 11:06:02.903339: step: 418/464, loss: 0.05542987212538719 2023-01-22 11:06:03.508557: step: 420/464, loss: 0.17803852260112762 2023-01-22 11:06:04.097160: step: 422/464, loss: 0.09037181735038757 2023-01-22 11:06:04.763673: step: 424/464, loss: 0.2514219284057617 2023-01-22 11:06:05.425517: step: 426/464, loss: 0.18447107076644897 2023-01-22 11:06:06.104700: step: 428/464, loss: 0.09749721735715866 2023-01-22 11:06:06.678903: step: 430/464, loss: 0.8195016980171204 2023-01-22 11:06:07.278309: step: 432/464, loss: 0.07239938527345657 2023-01-22 11:06:07.821299: step: 434/464, loss: 0.1674031764268875 2023-01-22 11:06:08.490977: step: 436/464, loss: 0.19262349605560303 2023-01-22 11:06:09.130152: step: 438/464, loss: 0.42374101281166077 2023-01-22 11:06:09.758440: step: 440/464, loss: 0.12468132376670837 2023-01-22 11:06:10.409384: step: 442/464, loss: 0.18052774667739868 2023-01-22 11:06:11.008958: step: 444/464, loss: 0.15312236547470093 2023-01-22 11:06:11.628289: step: 446/464, loss: 0.19045044481754303 2023-01-22 11:06:12.223363: step: 448/464, loss: 0.15186341106891632 2023-01-22 11:06:12.860318: step: 450/464, loss: 0.12853415310382843 2023-01-22 11:06:13.527148: step: 452/464, loss: 0.26743295788764954 2023-01-22 11:06:14.142981: step: 454/464, loss: 0.41534423828125 2023-01-22 11:06:14.769250: step: 456/464, loss: 0.10010375827550888 2023-01-22 11:06:15.412910: step: 458/464, loss: 0.055969975888729095 2023-01-22 11:06:16.057677: step: 460/464, loss: 0.4095710515975952 2023-01-22 11:06:16.617659: step: 462/464, loss: 0.1918717473745346 2023-01-22 11:06:17.267202: step: 464/464, loss: 0.09351630508899689 2023-01-22 11:06:17.845420: step: 466/464, loss: 0.02616412192583084 2023-01-22 11:06:18.505421: step: 468/464, loss: 0.7575204968452454 2023-01-22 11:06:19.139632: step: 470/464, loss: 0.2488774061203003 2023-01-22 11:06:19.779946: step: 472/464, loss: 0.5412296652793884 2023-01-22 11:06:20.471465: step: 474/464, loss: 0.13925917446613312 2023-01-22 11:06:21.139676: step: 476/464, loss: 0.3951810598373413 2023-01-22 11:06:21.754291: step: 478/464, loss: 0.18069101870059967 2023-01-22 11:06:22.359122: step: 480/464, loss: 0.09641866385936737 2023-01-22 11:06:22.978036: step: 482/464, loss: 0.2705845534801483 2023-01-22 11:06:23.646357: step: 484/464, loss: 0.20070071518421173 2023-01-22 11:06:24.323361: step: 486/464, loss: 0.49247515201568604 2023-01-22 11:06:24.944558: step: 488/464, loss: 0.5439991354942322 2023-01-22 11:06:25.538809: step: 490/464, loss: 0.16222120821475983 2023-01-22 11:06:26.067861: step: 492/464, loss: 0.21172896027565002 2023-01-22 11:06:26.695030: step: 494/464, loss: 0.286189466714859 2023-01-22 11:06:27.398303: step: 496/464, loss: 0.08380153030157089 2023-01-22 11:06:28.122060: step: 498/464, loss: 0.3246591091156006 2023-01-22 11:06:28.731583: step: 500/464, loss: 0.09528906643390656 2023-01-22 11:06:29.391463: step: 502/464, loss: 0.22733502089977264 2023-01-22 11:06:30.024145: step: 504/464, loss: 0.052736204117536545 2023-01-22 11:06:30.642342: step: 506/464, loss: 0.17266158759593964 2023-01-22 11:06:31.350703: step: 508/464, loss: 0.6875770688056946 2023-01-22 11:06:31.969367: step: 510/464, loss: 0.09864681214094162 2023-01-22 11:06:32.615378: step: 512/464, loss: 0.14266711473464966 2023-01-22 11:06:33.258399: step: 514/464, loss: 0.25594624876976013 2023-01-22 11:06:33.952937: step: 516/464, loss: 0.10775838047266006 2023-01-22 11:06:34.609568: step: 518/464, loss: 0.484315425157547 2023-01-22 11:06:35.255296: step: 520/464, loss: 0.10059746354818344 2023-01-22 11:06:35.801002: step: 522/464, loss: 0.2775045335292816 2023-01-22 11:06:36.433148: step: 524/464, loss: 0.2562503516674042 2023-01-22 11:06:37.131112: step: 526/464, loss: 0.29064902663230896 2023-01-22 11:06:37.714658: step: 528/464, loss: 0.19548296928405762 2023-01-22 11:06:38.297753: step: 530/464, loss: 0.4946255683898926 2023-01-22 11:06:38.850687: step: 532/464, loss: 0.11917847394943237 2023-01-22 11:06:39.403107: step: 534/464, loss: 0.0935017466545105 2023-01-22 11:06:40.057246: step: 536/464, loss: 0.10296916961669922 2023-01-22 11:06:40.704947: step: 538/464, loss: 0.5450205206871033 2023-01-22 11:06:41.434427: step: 540/464, loss: 0.04718095809221268 2023-01-22 11:06:42.154713: step: 542/464, loss: 0.5736770629882812 2023-01-22 11:06:42.831882: step: 544/464, loss: 0.1732597053050995 2023-01-22 11:06:43.425928: step: 546/464, loss: 0.1497419774532318 2023-01-22 11:06:44.051574: step: 548/464, loss: 0.41093266010284424 2023-01-22 11:06:44.811908: step: 550/464, loss: 0.2896675765514374 2023-01-22 11:06:45.482854: step: 552/464, loss: 0.10698677599430084 2023-01-22 11:06:46.138901: step: 554/464, loss: 0.26328244805336 2023-01-22 11:06:46.809129: step: 556/464, loss: 0.18400733172893524 2023-01-22 11:06:47.457040: step: 558/464, loss: 0.119113028049469 2023-01-22 11:06:48.041285: step: 560/464, loss: 0.1332729309797287 2023-01-22 11:06:48.720834: step: 562/464, loss: 0.16722966730594635 2023-01-22 11:06:49.395356: step: 564/464, loss: 4.722121715545654 2023-01-22 11:06:50.051738: step: 566/464, loss: 0.2186322808265686 2023-01-22 11:06:50.636385: step: 568/464, loss: 0.09318897873163223 2023-01-22 11:06:51.255954: step: 570/464, loss: 0.2538876533508301 2023-01-22 11:06:51.824508: step: 572/464, loss: 0.13304342329502106 2023-01-22 11:06:52.487254: step: 574/464, loss: 0.15800759196281433 2023-01-22 11:06:53.083646: step: 576/464, loss: 0.25743475556373596 2023-01-22 11:06:53.698236: step: 578/464, loss: 0.09190484136343002 2023-01-22 11:06:54.409390: step: 580/464, loss: 0.09873532503843307 2023-01-22 11:06:55.085179: step: 582/464, loss: 0.4712373912334442 2023-01-22 11:06:55.693477: step: 584/464, loss: 0.12569165229797363 2023-01-22 11:06:56.327844: step: 586/464, loss: 0.19897960126399994 2023-01-22 11:06:57.002135: step: 588/464, loss: 0.3795648217201233 2023-01-22 11:06:57.650493: step: 590/464, loss: 0.08776821941137314 2023-01-22 11:06:58.293435: step: 592/464, loss: 0.12162689119577408 2023-01-22 11:06:58.892390: step: 594/464, loss: 0.24996206164360046 2023-01-22 11:06:59.534808: step: 596/464, loss: 0.40573614835739136 2023-01-22 11:07:00.067904: step: 598/464, loss: 0.15663489699363708 2023-01-22 11:07:00.677106: step: 600/464, loss: 0.2295760214328766 2023-01-22 11:07:01.384321: step: 602/464, loss: 0.6561931371688843 2023-01-22 11:07:02.034731: step: 604/464, loss: 0.13190947473049164 2023-01-22 11:07:02.614238: step: 606/464, loss: 0.13859710097312927 2023-01-22 11:07:03.205503: step: 608/464, loss: 0.24577781558036804 2023-01-22 11:07:03.842379: step: 610/464, loss: 0.05066034570336342 2023-01-22 11:07:04.408885: step: 612/464, loss: 0.11498422175645828 2023-01-22 11:07:05.094269: step: 614/464, loss: 2.149258613586426 2023-01-22 11:07:05.729374: step: 616/464, loss: 0.140337735414505 2023-01-22 11:07:06.368699: step: 618/464, loss: 0.18237602710723877 2023-01-22 11:07:07.047627: step: 620/464, loss: 0.1998087614774704 2023-01-22 11:07:07.765077: step: 622/464, loss: 0.19751664996147156 2023-01-22 11:07:08.414684: step: 624/464, loss: 0.10175623744726181 2023-01-22 11:07:09.079918: step: 626/464, loss: 0.49671512842178345 2023-01-22 11:07:09.671837: step: 628/464, loss: 0.031716883182525635 2023-01-22 11:07:10.301052: step: 630/464, loss: 0.11761437356472015 2023-01-22 11:07:10.899382: step: 632/464, loss: 0.34634527564048767 2023-01-22 11:07:11.558600: step: 634/464, loss: 0.29505980014801025 2023-01-22 11:07:12.189795: step: 636/464, loss: 0.15596410632133484 2023-01-22 11:07:12.796764: step: 638/464, loss: 0.10715252161026001 2023-01-22 11:07:13.430814: step: 640/464, loss: 0.9346346259117126 2023-01-22 11:07:14.048715: step: 642/464, loss: 0.11918459087610245 2023-01-22 11:07:14.662999: step: 644/464, loss: 0.6793946027755737 2023-01-22 11:07:15.315656: step: 646/464, loss: 0.11271430552005768 2023-01-22 11:07:15.908532: step: 648/464, loss: 0.08095047622919083 2023-01-22 11:07:16.539704: step: 650/464, loss: 0.21067102253437042 2023-01-22 11:07:17.305503: step: 652/464, loss: 0.5532771944999695 2023-01-22 11:07:17.904549: step: 654/464, loss: 0.12611809372901917 2023-01-22 11:07:18.615213: step: 656/464, loss: 0.13094867765903473 2023-01-22 11:07:19.299053: step: 658/464, loss: 0.7898499965667725 2023-01-22 11:07:19.930849: step: 660/464, loss: 0.12929470837116241 2023-01-22 11:07:20.502080: step: 662/464, loss: 0.06109068915247917 2023-01-22 11:07:21.147429: step: 664/464, loss: 0.0926525816321373 2023-01-22 11:07:21.738306: step: 666/464, loss: 0.5199193358421326 2023-01-22 11:07:22.346496: step: 668/464, loss: 0.4801885783672333 2023-01-22 11:07:22.965889: step: 670/464, loss: 0.19196748733520508 2023-01-22 11:07:23.542654: step: 672/464, loss: 0.37548545002937317 2023-01-22 11:07:24.115416: step: 674/464, loss: 0.1620676964521408 2023-01-22 11:07:24.795857: step: 676/464, loss: 0.3277484178543091 2023-01-22 11:07:25.429691: step: 678/464, loss: 0.08762902021408081 2023-01-22 11:07:26.032572: step: 680/464, loss: 0.13059160113334656 2023-01-22 11:07:26.668471: step: 682/464, loss: 0.31305909156799316 2023-01-22 11:07:27.339639: step: 684/464, loss: 0.23233528435230255 2023-01-22 11:07:27.993921: step: 686/464, loss: 0.19373933970928192 2023-01-22 11:07:28.612646: step: 688/464, loss: 0.09218301624059677 2023-01-22 11:07:29.299781: step: 690/464, loss: 0.1672954112291336 2023-01-22 11:07:29.977811: step: 692/464, loss: 0.6126482486724854 2023-01-22 11:07:30.616592: step: 694/464, loss: 0.15671324729919434 2023-01-22 11:07:31.229206: step: 696/464, loss: 0.3100976049900055 2023-01-22 11:07:31.898501: step: 698/464, loss: 0.5160097479820251 2023-01-22 11:07:32.480030: step: 700/464, loss: 0.5026370286941528 2023-01-22 11:07:33.112422: step: 702/464, loss: 0.4017609655857086 2023-01-22 11:07:33.697393: step: 704/464, loss: 0.11298071593046188 2023-01-22 11:07:34.389439: step: 706/464, loss: 0.2641523778438568 2023-01-22 11:07:35.071392: step: 708/464, loss: 0.1176559180021286 2023-01-22 11:07:35.735837: step: 710/464, loss: 0.15013401210308075 2023-01-22 11:07:36.491506: step: 712/464, loss: 0.29811954498291016 2023-01-22 11:07:37.036518: step: 714/464, loss: 0.13270290195941925 2023-01-22 11:07:37.633084: step: 716/464, loss: 0.22287288308143616 2023-01-22 11:07:38.276764: step: 718/464, loss: 0.3418169319629669 2023-01-22 11:07:38.981229: step: 720/464, loss: 0.08123631030321121 2023-01-22 11:07:39.667947: step: 722/464, loss: 0.14334438741207123 2023-01-22 11:07:40.311278: step: 724/464, loss: 0.24343787133693695 2023-01-22 11:07:40.926167: step: 726/464, loss: 0.18862269818782806 2023-01-22 11:07:41.549377: step: 728/464, loss: 0.24572555720806122 2023-01-22 11:07:42.200749: step: 730/464, loss: 0.20189018547534943 2023-01-22 11:07:42.793273: step: 732/464, loss: 0.3579574525356293 2023-01-22 11:07:43.432960: step: 734/464, loss: 0.39582088589668274 2023-01-22 11:07:44.041917: step: 736/464, loss: 0.09848456084728241 2023-01-22 11:07:44.716440: step: 738/464, loss: 0.387652724981308 2023-01-22 11:07:45.340758: step: 740/464, loss: 0.12421456724405289 2023-01-22 11:07:45.921470: step: 742/464, loss: 0.13614031672477722 2023-01-22 11:07:46.614116: step: 744/464, loss: 0.5611193776130676 2023-01-22 11:07:47.214923: step: 746/464, loss: 0.18564994633197784 2023-01-22 11:07:47.889164: step: 748/464, loss: 0.17745666205883026 2023-01-22 11:07:48.618415: step: 750/464, loss: 0.34961196780204773 2023-01-22 11:07:49.269088: step: 752/464, loss: 1.3152897357940674 2023-01-22 11:07:49.873771: step: 754/464, loss: 0.1217946857213974 2023-01-22 11:07:50.505614: step: 756/464, loss: 0.36618006229400635 2023-01-22 11:07:51.161655: step: 758/464, loss: 0.2611903250217438 2023-01-22 11:07:51.746618: step: 760/464, loss: 0.12786732614040375 2023-01-22 11:07:52.384678: step: 762/464, loss: 0.46200627088546753 2023-01-22 11:07:53.054321: step: 764/464, loss: 0.4778285622596741 2023-01-22 11:07:53.657041: step: 766/464, loss: 0.7928224205970764 2023-01-22 11:07:54.392192: step: 768/464, loss: 0.2523520886898041 2023-01-22 11:07:55.059605: step: 770/464, loss: 0.1595786213874817 2023-01-22 11:07:55.687901: step: 772/464, loss: 0.1493327021598816 2023-01-22 11:07:56.298131: step: 774/464, loss: 0.23997730016708374 2023-01-22 11:07:56.921443: step: 776/464, loss: 0.06494729220867157 2023-01-22 11:07:57.599491: step: 778/464, loss: 0.07723943889141083 2023-01-22 11:07:58.211658: step: 780/464, loss: 0.08414718508720398 2023-01-22 11:07:58.832583: step: 782/464, loss: 0.4716000556945801 2023-01-22 11:07:59.431716: step: 784/464, loss: 0.16538885235786438 2023-01-22 11:08:00.047378: step: 786/464, loss: 0.29278749227523804 2023-01-22 11:08:00.695582: step: 788/464, loss: 0.37954071164131165 2023-01-22 11:08:01.360156: step: 790/464, loss: 0.21551840007305145 2023-01-22 11:08:01.981370: step: 792/464, loss: 0.07903808355331421 2023-01-22 11:08:02.627168: step: 794/464, loss: 0.593177855014801 2023-01-22 11:08:03.184136: step: 796/464, loss: 0.5082368850708008 2023-01-22 11:08:03.757162: step: 798/464, loss: 0.36979803442955017 2023-01-22 11:08:04.372502: step: 800/464, loss: 0.35664430260658264 2023-01-22 11:08:05.095488: step: 802/464, loss: 0.09309973567724228 2023-01-22 11:08:05.687770: step: 804/464, loss: 0.11440564692020416 2023-01-22 11:08:06.248594: step: 806/464, loss: 0.8553968667984009 2023-01-22 11:08:06.880054: step: 808/464, loss: 0.1172894537448883 2023-01-22 11:08:07.453064: step: 810/464, loss: 0.2246769815683365 2023-01-22 11:08:08.013850: step: 812/464, loss: 0.12205179780721664 2023-01-22 11:08:08.668627: step: 814/464, loss: 0.7095149159431458 2023-01-22 11:08:09.329318: step: 816/464, loss: 0.7112863659858704 2023-01-22 11:08:09.945719: step: 818/464, loss: 0.2133084535598755 2023-01-22 11:08:10.504106: step: 820/464, loss: 0.3527364730834961 2023-01-22 11:08:11.066922: step: 822/464, loss: 0.18876755237579346 2023-01-22 11:08:11.696640: step: 824/464, loss: 0.21099744737148285 2023-01-22 11:08:12.403804: step: 826/464, loss: 0.0990365520119667 2023-01-22 11:08:13.105980: step: 828/464, loss: 0.18760396540164948 2023-01-22 11:08:13.801850: step: 830/464, loss: 0.3649967610836029 2023-01-22 11:08:14.393878: step: 832/464, loss: 0.04117352142930031 2023-01-22 11:08:14.996937: step: 834/464, loss: 0.21748271584510803 2023-01-22 11:08:15.598234: step: 836/464, loss: 0.5736399292945862 2023-01-22 11:08:16.135754: step: 838/464, loss: 0.9643673896789551 2023-01-22 11:08:16.767397: step: 840/464, loss: 0.09535722434520721 2023-01-22 11:08:17.371987: step: 842/464, loss: 0.12945391237735748 2023-01-22 11:08:18.058939: step: 844/464, loss: 0.2260231077671051 2023-01-22 11:08:18.723321: step: 846/464, loss: 0.1597353219985962 2023-01-22 11:08:19.387429: step: 848/464, loss: 0.19716760516166687 2023-01-22 11:08:20.058797: step: 850/464, loss: 0.21844589710235596 2023-01-22 11:08:20.697523: step: 852/464, loss: 0.5882822871208191 2023-01-22 11:08:21.251876: step: 854/464, loss: 0.10058721154928207 2023-01-22 11:08:21.851058: step: 856/464, loss: 0.33563870191574097 2023-01-22 11:08:22.473734: step: 858/464, loss: 0.14041025936603546 2023-01-22 11:08:23.058619: step: 860/464, loss: 0.13803352415561676 2023-01-22 11:08:23.686320: step: 862/464, loss: 0.12181901931762695 2023-01-22 11:08:24.235557: step: 864/464, loss: 0.06367615610361099 2023-01-22 11:08:24.930663: step: 866/464, loss: 0.42367619276046753 2023-01-22 11:08:25.564014: step: 868/464, loss: 0.2047378569841385 2023-01-22 11:08:26.143106: step: 870/464, loss: 0.07182841747999191 2023-01-22 11:08:26.775437: step: 872/464, loss: 0.12919709086418152 2023-01-22 11:08:27.393774: step: 874/464, loss: 0.26941314339637756 2023-01-22 11:08:28.022548: step: 876/464, loss: 0.5692474246025085 2023-01-22 11:08:28.664825: step: 878/464, loss: 0.17177428305149078 2023-01-22 11:08:29.287557: step: 880/464, loss: 0.6481063961982727 2023-01-22 11:08:29.927960: step: 882/464, loss: 0.3271678388118744 2023-01-22 11:08:30.604369: step: 884/464, loss: 0.2979361116886139 2023-01-22 11:08:31.228540: step: 886/464, loss: 0.0645914226770401 2023-01-22 11:08:31.864641: step: 888/464, loss: 2.7499001026153564 2023-01-22 11:08:32.447931: step: 890/464, loss: 0.13479718565940857 2023-01-22 11:08:33.069436: step: 892/464, loss: 0.11270148307085037 2023-01-22 11:08:33.695563: step: 894/464, loss: 0.32310348749160767 2023-01-22 11:08:34.335473: step: 896/464, loss: 0.388225257396698 2023-01-22 11:08:34.960916: step: 898/464, loss: 0.042076922953128815 2023-01-22 11:08:35.620550: step: 900/464, loss: 0.03660179674625397 2023-01-22 11:08:36.247405: step: 902/464, loss: 0.1979013830423355 2023-01-22 11:08:36.879782: step: 904/464, loss: 0.20062708854675293 2023-01-22 11:08:37.448320: step: 906/464, loss: 0.1717149317264557 2023-01-22 11:08:38.091363: step: 908/464, loss: 0.20906086266040802 2023-01-22 11:08:38.756247: step: 910/464, loss: 0.45263901352882385 2023-01-22 11:08:39.404182: step: 912/464, loss: 0.22234554588794708 2023-01-22 11:08:40.017389: step: 914/464, loss: 0.13453200459480286 2023-01-22 11:08:40.658055: step: 916/464, loss: 0.24855343997478485 2023-01-22 11:08:41.251080: step: 918/464, loss: 0.47845444083213806 2023-01-22 11:08:41.927138: step: 920/464, loss: 0.11308290809392929 2023-01-22 11:08:42.578530: step: 922/464, loss: 0.26682430505752563 2023-01-22 11:08:43.251214: step: 924/464, loss: 0.30981704592704773 2023-01-22 11:08:43.899709: step: 926/464, loss: 0.1841297149658203 2023-01-22 11:08:44.548903: step: 928/464, loss: 0.1580587923526764 2023-01-22 11:08:45.101801: step: 930/464, loss: 0.3135313391685486 ================================================== Loss: 0.298 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30680626417233564, 'r': 0.36677029682840884, 'f1': 0.3341191813804174}, 'combined': 0.24619308101714965, 'epoch': 11} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2906931347961049, 'r': 0.2952310441547218, 'f1': 0.2929445167056875}, 'combined': 0.19124875183894624, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3977272727272727, 'r': 0.3017241379310345, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} New best chinese model... New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28487491086284766, 'r': 0.3248763214963405, 'f1': 0.303563513171226}, 'combined': 0.2236783781261665, 'epoch': 9} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2834185544035629, 'r': 0.30291983108270715, 'f1': 0.2928448921331714}, 'combined': 0.19118371196258857, 'epoch': 9} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47368421052631576, 'r': 0.3103448275862069, 'f1': 0.375}, 'combined': 0.25, 'epoch': 9} ****************************** Epoch: 12 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:11:35.930354: step: 2/464, loss: 0.09609854221343994 2023-01-22 11:11:36.557338: step: 4/464, loss: 1.1962323188781738 2023-01-22 11:11:37.185674: step: 6/464, loss: 0.8360907435417175 2023-01-22 11:11:37.804935: step: 8/464, loss: 0.14039205014705658 2023-01-22 11:11:38.444000: step: 10/464, loss: 0.14549694955348969 2023-01-22 11:11:39.047907: step: 12/464, loss: 0.7983697056770325 2023-01-22 11:11:39.726202: step: 14/464, loss: 0.12540067732334137 2023-01-22 11:11:40.308495: step: 16/464, loss: 0.08263260126113892 2023-01-22 11:11:40.916344: step: 18/464, loss: 0.33770740032196045 2023-01-22 11:11:41.527258: step: 20/464, loss: 0.11727096885442734 2023-01-22 11:11:42.209043: step: 22/464, loss: 0.09696599841117859 2023-01-22 11:11:42.837021: step: 24/464, loss: 0.131135493516922 2023-01-22 11:11:43.470843: step: 26/464, loss: 0.12408984452486038 2023-01-22 11:11:44.131642: step: 28/464, loss: 0.0949990451335907 2023-01-22 11:11:44.833862: step: 30/464, loss: 0.1661735624074936 2023-01-22 11:11:45.400857: step: 32/464, loss: 0.14546817541122437 2023-01-22 11:11:46.024861: step: 34/464, loss: 0.049527350813150406 2023-01-22 11:11:46.648441: step: 36/464, loss: 0.07935893535614014 2023-01-22 11:11:47.319008: step: 38/464, loss: 0.07667119055986404 2023-01-22 11:11:47.971618: step: 40/464, loss: 0.1613290011882782 2023-01-22 11:11:48.605352: step: 42/464, loss: 0.07237593084573746 2023-01-22 11:11:49.211151: step: 44/464, loss: 0.07318137586116791 2023-01-22 11:11:49.886603: step: 46/464, loss: 0.041637614369392395 2023-01-22 11:11:50.492387: step: 48/464, loss: 0.03524008020758629 2023-01-22 11:11:51.143625: step: 50/464, loss: 0.08365655690431595 2023-01-22 11:11:51.752895: step: 52/464, loss: 0.07698964327573776 2023-01-22 11:11:52.408093: step: 54/464, loss: 0.11151023954153061 2023-01-22 11:11:53.047924: step: 56/464, loss: 0.3668684661388397 2023-01-22 11:11:53.608814: step: 58/464, loss: 0.23254403471946716 2023-01-22 11:11:54.240703: step: 60/464, loss: 0.34759053587913513 2023-01-22 11:11:54.869040: step: 62/464, loss: 0.10199126601219177 2023-01-22 11:11:55.456019: step: 64/464, loss: 0.46143120527267456 2023-01-22 11:11:56.071016: step: 66/464, loss: 0.07514477521181107 2023-01-22 11:11:56.750757: step: 68/464, loss: 0.24961446225643158 2023-01-22 11:11:57.460592: step: 70/464, loss: 0.19061315059661865 2023-01-22 11:11:58.085607: step: 72/464, loss: 0.1399098038673401 2023-01-22 11:11:58.767196: step: 74/464, loss: 0.11505573987960815 2023-01-22 11:11:59.328074: step: 76/464, loss: 0.14768767356872559 2023-01-22 11:11:59.987569: step: 78/464, loss: 0.27581924200057983 2023-01-22 11:12:00.657706: step: 80/464, loss: 0.13880829513072968 2023-01-22 11:12:01.280215: step: 82/464, loss: 0.0872029960155487 2023-01-22 11:12:01.949246: step: 84/464, loss: 0.4102860391139984 2023-01-22 11:12:02.606889: step: 86/464, loss: 0.4079272150993347 2023-01-22 11:12:03.216960: step: 88/464, loss: 0.18566866219043732 2023-01-22 11:12:03.827589: step: 90/464, loss: 0.3931496739387512 2023-01-22 11:12:04.445217: step: 92/464, loss: 0.0867336243391037 2023-01-22 11:12:05.056122: step: 94/464, loss: 0.29617542028427124 2023-01-22 11:12:05.622198: step: 96/464, loss: 0.12288712710142136 2023-01-22 11:12:06.296522: step: 98/464, loss: 0.08559727668762207 2023-01-22 11:12:06.837744: step: 100/464, loss: 0.23719806969165802 2023-01-22 11:12:07.443799: step: 102/464, loss: 0.24966849386692047 2023-01-22 11:12:08.058341: step: 104/464, loss: 0.22053930163383484 2023-01-22 11:12:08.712124: step: 106/464, loss: 2.525239944458008 2023-01-22 11:12:09.300833: step: 108/464, loss: 0.05087382718920708 2023-01-22 11:12:10.008722: step: 110/464, loss: 0.20529168844223022 2023-01-22 11:12:10.652415: step: 112/464, loss: 0.14163416624069214 2023-01-22 11:12:11.325638: step: 114/464, loss: 0.44657936692237854 2023-01-22 11:12:11.974817: step: 116/464, loss: 0.6462829113006592 2023-01-22 11:12:12.580694: step: 118/464, loss: 0.15209028124809265 2023-01-22 11:12:13.237606: step: 120/464, loss: 0.12344679981470108 2023-01-22 11:12:13.831744: step: 122/464, loss: 0.5198908448219299 2023-01-22 11:12:14.471942: step: 124/464, loss: 0.11611258238554001 2023-01-22 11:12:15.083387: step: 126/464, loss: 0.71803218126297 2023-01-22 11:12:15.776525: step: 128/464, loss: 0.12478084117174149 2023-01-22 11:12:16.481538: step: 130/464, loss: 0.09767883270978928 2023-01-22 11:12:17.126605: step: 132/464, loss: 0.17500635981559753 2023-01-22 11:12:17.688596: step: 134/464, loss: 0.05641498789191246 2023-01-22 11:12:18.396902: step: 136/464, loss: 0.23416012525558472 2023-01-22 11:12:19.010470: step: 138/464, loss: 0.10653456300497055 2023-01-22 11:12:19.586008: step: 140/464, loss: 0.17489150166511536 2023-01-22 11:12:20.194186: step: 142/464, loss: 0.10827875882387161 2023-01-22 11:12:20.817346: step: 144/464, loss: 0.22888274490833282 2023-01-22 11:12:21.491923: step: 146/464, loss: 0.15548251569271088 2023-01-22 11:12:22.108677: step: 148/464, loss: 0.09006007760763168 2023-01-22 11:12:22.701400: step: 150/464, loss: 0.05137726292014122 2023-01-22 11:12:23.300882: step: 152/464, loss: 0.14549240469932556 2023-01-22 11:12:23.947874: step: 154/464, loss: 0.21249419450759888 2023-01-22 11:12:24.543132: step: 156/464, loss: 0.04181840643286705 2023-01-22 11:12:25.196041: step: 158/464, loss: 0.09198150783777237 2023-01-22 11:12:25.825249: step: 160/464, loss: 0.17108485102653503 2023-01-22 11:12:26.453293: step: 162/464, loss: 0.20262686908245087 2023-01-22 11:12:27.077298: step: 164/464, loss: 0.15493832528591156 2023-01-22 11:12:27.705595: step: 166/464, loss: 0.08424071967601776 2023-01-22 11:12:28.333429: step: 168/464, loss: 0.12358138710260391 2023-01-22 11:12:28.926732: step: 170/464, loss: 0.22446371614933014 2023-01-22 11:12:29.514890: step: 172/464, loss: 0.2094225436449051 2023-01-22 11:12:30.172371: step: 174/464, loss: 0.6193203926086426 2023-01-22 11:12:30.733874: step: 176/464, loss: 0.05181468650698662 2023-01-22 11:12:31.404199: step: 178/464, loss: 0.13235141336917877 2023-01-22 11:12:32.093454: step: 180/464, loss: 0.10980284214019775 2023-01-22 11:12:32.731616: step: 182/464, loss: 0.17049528658390045 2023-01-22 11:12:33.304844: step: 184/464, loss: 0.27909931540489197 2023-01-22 11:12:33.859944: step: 186/464, loss: 0.10104744136333466 2023-01-22 11:12:34.448548: step: 188/464, loss: 0.1292169988155365 2023-01-22 11:12:35.017820: step: 190/464, loss: 0.6000388860702515 2023-01-22 11:12:35.705340: step: 192/464, loss: 0.18760737776756287 2023-01-22 11:12:36.306394: step: 194/464, loss: 0.4577399492263794 2023-01-22 11:12:36.899346: step: 196/464, loss: 0.14205476641654968 2023-01-22 11:12:37.475761: step: 198/464, loss: 0.1507430076599121 2023-01-22 11:12:38.097147: step: 200/464, loss: 0.0702497735619545 2023-01-22 11:12:38.690918: step: 202/464, loss: 0.12581519782543182 2023-01-22 11:12:39.379024: step: 204/464, loss: 0.17592482268810272 2023-01-22 11:12:40.015458: step: 206/464, loss: 0.11105167120695114 2023-01-22 11:12:40.562176: step: 208/464, loss: 0.36974987387657166 2023-01-22 11:12:41.173376: step: 210/464, loss: 0.12456315755844116 2023-01-22 11:12:41.881599: step: 212/464, loss: 0.15190669894218445 2023-01-22 11:12:42.531411: step: 214/464, loss: 0.16267246007919312 2023-01-22 11:12:43.235386: step: 216/464, loss: 0.2663927972316742 2023-01-22 11:12:43.756567: step: 218/464, loss: 0.0649973526597023 2023-01-22 11:12:44.418203: step: 220/464, loss: 0.05866961181163788 2023-01-22 11:12:45.128869: step: 222/464, loss: 0.20051930844783783 2023-01-22 11:12:45.795641: step: 224/464, loss: 0.1325078159570694 2023-01-22 11:12:46.479999: step: 226/464, loss: 0.06449079513549805 2023-01-22 11:12:47.134700: step: 228/464, loss: 0.19155147671699524 2023-01-22 11:12:47.769777: step: 230/464, loss: 0.18524989485740662 2023-01-22 11:12:48.373734: step: 232/464, loss: 0.18985101580619812 2023-01-22 11:12:48.985014: step: 234/464, loss: 0.32292234897613525 2023-01-22 11:12:49.680079: step: 236/464, loss: 0.10213154554367065 2023-01-22 11:12:50.340475: step: 238/464, loss: 0.08985261619091034 2023-01-22 11:12:51.036455: step: 240/464, loss: 0.28371939063072205 2023-01-22 11:12:51.637606: step: 242/464, loss: 0.03562863916158676 2023-01-22 11:12:52.301993: step: 244/464, loss: 0.14082902669906616 2023-01-22 11:12:52.923985: step: 246/464, loss: 0.08222658187150955 2023-01-22 11:12:53.472037: step: 248/464, loss: 0.06636505573987961 2023-01-22 11:12:54.172655: step: 250/464, loss: 0.7063173055648804 2023-01-22 11:12:54.798235: step: 252/464, loss: 0.1520986109972 2023-01-22 11:12:55.465741: step: 254/464, loss: 0.06759010255336761 2023-01-22 11:12:56.049502: step: 256/464, loss: 0.11777284741401672 2023-01-22 11:12:56.781339: step: 258/464, loss: 0.15299879014492035 2023-01-22 11:12:57.406566: step: 260/464, loss: 0.5783911943435669 2023-01-22 11:12:57.967004: step: 262/464, loss: 0.09978681057691574 2023-01-22 11:12:58.598992: step: 264/464, loss: 0.18470978736877441 2023-01-22 11:12:59.217604: step: 266/464, loss: 0.8560441136360168 2023-01-22 11:12:59.812684: step: 268/464, loss: 0.10765422135591507 2023-01-22 11:13:00.469266: step: 270/464, loss: 0.624764621257782 2023-01-22 11:13:01.101242: step: 272/464, loss: 0.16080394387245178 2023-01-22 11:13:01.645815: step: 274/464, loss: 0.22208231687545776 2023-01-22 11:13:02.292928: step: 276/464, loss: 0.14986518025398254 2023-01-22 11:13:02.945606: step: 278/464, loss: 0.10917126387357712 2023-01-22 11:13:03.582746: step: 280/464, loss: 0.059265293180942535 2023-01-22 11:13:04.165530: step: 282/464, loss: 0.05906907096505165 2023-01-22 11:13:04.766210: step: 284/464, loss: 0.08650758862495422 2023-01-22 11:13:05.426592: step: 286/464, loss: 0.8987934589385986 2023-01-22 11:13:06.151062: step: 288/464, loss: 0.05748949572443962 2023-01-22 11:13:06.744682: step: 290/464, loss: 0.1825551688671112 2023-01-22 11:13:07.414458: step: 292/464, loss: 0.10999234020709991 2023-01-22 11:13:08.036774: step: 294/464, loss: 0.12873074412345886 2023-01-22 11:13:08.702361: step: 296/464, loss: 0.1095266342163086 2023-01-22 11:13:09.299104: step: 298/464, loss: 0.3119054138660431 2023-01-22 11:13:09.913751: step: 300/464, loss: 0.16263194382190704 2023-01-22 11:13:10.560421: step: 302/464, loss: 0.14298595488071442 2023-01-22 11:13:11.181095: step: 304/464, loss: 0.2573631703853607 2023-01-22 11:13:11.731101: step: 306/464, loss: 0.12914666533470154 2023-01-22 11:13:12.409600: step: 308/464, loss: 0.19286595284938812 2023-01-22 11:13:12.999806: step: 310/464, loss: 0.4885352551937103 2023-01-22 11:13:13.593198: step: 312/464, loss: 0.09529785811901093 2023-01-22 11:13:14.192876: step: 314/464, loss: 0.04528629034757614 2023-01-22 11:13:14.938762: step: 316/464, loss: 0.4325774610042572 2023-01-22 11:13:15.567070: step: 318/464, loss: 0.3529732823371887 2023-01-22 11:13:16.177456: step: 320/464, loss: 0.40400734543800354 2023-01-22 11:13:16.781659: step: 322/464, loss: 0.0664587914943695 2023-01-22 11:13:17.453786: step: 324/464, loss: 0.13620255887508392 2023-01-22 11:13:18.064153: step: 326/464, loss: 0.07878082245588303 2023-01-22 11:13:18.739560: step: 328/464, loss: 0.5997202396392822 2023-01-22 11:13:19.429137: step: 330/464, loss: 0.18104591965675354 2023-01-22 11:13:20.087852: step: 332/464, loss: 0.049419764429330826 2023-01-22 11:13:20.829814: step: 334/464, loss: 0.08512122929096222 2023-01-22 11:13:21.462143: step: 336/464, loss: 0.09855330735445023 2023-01-22 11:13:22.137654: step: 338/464, loss: 0.08050018548965454 2023-01-22 11:13:22.802851: step: 340/464, loss: 0.1313687264919281 2023-01-22 11:13:23.410240: step: 342/464, loss: 0.3599167466163635 2023-01-22 11:13:24.074079: step: 344/464, loss: 0.0932551920413971 2023-01-22 11:13:24.684214: step: 346/464, loss: 0.05233725160360336 2023-01-22 11:13:25.330846: step: 348/464, loss: 0.1757393330335617 2023-01-22 11:13:25.952351: step: 350/464, loss: 0.14035636186599731 2023-01-22 11:13:26.671585: step: 352/464, loss: 0.4218212068080902 2023-01-22 11:13:27.330935: step: 354/464, loss: 0.524379551410675 2023-01-22 11:13:28.008330: step: 356/464, loss: 0.07185309380292892 2023-01-22 11:13:28.656596: step: 358/464, loss: 0.21396894752979279 2023-01-22 11:13:29.209724: step: 360/464, loss: 0.0998459979891777 2023-01-22 11:13:29.815020: step: 362/464, loss: 0.13245929777622223 2023-01-22 11:13:30.470896: step: 364/464, loss: 0.2340196967124939 2023-01-22 11:13:31.117178: step: 366/464, loss: 0.14111104607582092 2023-01-22 11:13:31.816233: step: 368/464, loss: 0.2503955364227295 2023-01-22 11:13:32.450823: step: 370/464, loss: 0.04617612808942795 2023-01-22 11:13:33.050623: step: 372/464, loss: 0.09113742411136627 2023-01-22 11:13:33.714931: step: 374/464, loss: 0.08529345691204071 2023-01-22 11:13:34.322439: step: 376/464, loss: 0.1966417133808136 2023-01-22 11:13:34.968260: step: 378/464, loss: 0.6417378187179565 2023-01-22 11:13:35.606220: step: 380/464, loss: 0.10361170023679733 2023-01-22 11:13:36.288000: step: 382/464, loss: 0.746138870716095 2023-01-22 11:13:36.824855: step: 384/464, loss: 0.4580155909061432 2023-01-22 11:13:37.432093: step: 386/464, loss: 0.15263885259628296 2023-01-22 11:13:38.084878: step: 388/464, loss: 0.09458209574222565 2023-01-22 11:13:38.695339: step: 390/464, loss: 0.8690895438194275 2023-01-22 11:13:39.319571: step: 392/464, loss: 0.49207213521003723 2023-01-22 11:13:39.960399: step: 394/464, loss: 0.08876999467611313 2023-01-22 11:13:40.585010: step: 396/464, loss: 0.031726229935884476 2023-01-22 11:13:41.091609: step: 398/464, loss: 0.0860455110669136 2023-01-22 11:13:41.710860: step: 400/464, loss: 0.17339417338371277 2023-01-22 11:13:42.321859: step: 402/464, loss: 0.07491616904735565 2023-01-22 11:13:42.970331: step: 404/464, loss: 0.1457730084657669 2023-01-22 11:13:43.649123: step: 406/464, loss: 0.14544224739074707 2023-01-22 11:13:44.274290: step: 408/464, loss: 0.0813344195485115 2023-01-22 11:13:44.904862: step: 410/464, loss: 0.09824948012828827 2023-01-22 11:13:45.584495: step: 412/464, loss: 0.10653730481863022 2023-01-22 11:13:46.212619: step: 414/464, loss: 0.1890765279531479 2023-01-22 11:13:46.856878: step: 416/464, loss: 1.9404715299606323 2023-01-22 11:13:47.497953: step: 418/464, loss: 5.300214767456055 2023-01-22 11:13:48.113075: step: 420/464, loss: 0.11093682050704956 2023-01-22 11:13:48.720026: step: 422/464, loss: 0.12824968993663788 2023-01-22 11:13:49.399499: step: 424/464, loss: 0.11972280591726303 2023-01-22 11:13:50.058086: step: 426/464, loss: 0.049842387437820435 2023-01-22 11:13:50.670687: step: 428/464, loss: 0.24512696266174316 2023-01-22 11:13:51.286896: step: 430/464, loss: 0.1847914457321167 2023-01-22 11:13:51.908822: step: 432/464, loss: 0.24950304627418518 2023-01-22 11:13:52.522218: step: 434/464, loss: 0.41385746002197266 2023-01-22 11:13:53.135028: step: 436/464, loss: 0.3146797716617584 2023-01-22 11:13:53.775155: step: 438/464, loss: 0.10622680932283401 2023-01-22 11:13:54.337022: step: 440/464, loss: 0.15090520679950714 2023-01-22 11:13:54.892715: step: 442/464, loss: 0.08917783200740814 2023-01-22 11:13:55.547313: step: 444/464, loss: 0.407958447933197 2023-01-22 11:13:56.220732: step: 446/464, loss: 0.10987649857997894 2023-01-22 11:13:56.871724: step: 448/464, loss: 0.1464168280363083 2023-01-22 11:13:57.463286: step: 450/464, loss: 0.094017393887043 2023-01-22 11:13:58.190395: step: 452/464, loss: 0.1568855494260788 2023-01-22 11:13:58.820267: step: 454/464, loss: 0.4010990262031555 2023-01-22 11:13:59.454850: step: 456/464, loss: 0.48869457840919495 2023-01-22 11:14:00.064888: step: 458/464, loss: 0.09566915780305862 2023-01-22 11:14:00.683778: step: 460/464, loss: 0.13694754242897034 2023-01-22 11:14:01.354445: step: 462/464, loss: 0.4534783959388733 2023-01-22 11:14:01.969551: step: 464/464, loss: 0.10260273516178131 2023-01-22 11:14:02.556696: step: 466/464, loss: 0.2750387191772461 2023-01-22 11:14:03.193055: step: 468/464, loss: 0.34514862298965454 2023-01-22 11:14:03.823750: step: 470/464, loss: 0.20128001272678375 2023-01-22 11:14:04.411615: step: 472/464, loss: 0.19402840733528137 2023-01-22 11:14:04.997534: step: 474/464, loss: 0.04928231239318848 2023-01-22 11:14:05.568488: step: 476/464, loss: 0.7598804831504822 2023-01-22 11:14:06.202653: step: 478/464, loss: 0.3005625307559967 2023-01-22 11:14:06.816551: step: 480/464, loss: 0.09437231719493866 2023-01-22 11:14:07.448755: step: 482/464, loss: 0.2333991974592209 2023-01-22 11:14:08.064336: step: 484/464, loss: 0.13361912965774536 2023-01-22 11:14:08.687248: step: 486/464, loss: 0.052449826151132584 2023-01-22 11:14:09.370866: step: 488/464, loss: 0.04909124970436096 2023-01-22 11:14:09.953823: step: 490/464, loss: 0.141932412981987 2023-01-22 11:14:10.558742: step: 492/464, loss: 1.3987499475479126 2023-01-22 11:14:11.193768: step: 494/464, loss: 0.025841237977147102 2023-01-22 11:14:11.877712: step: 496/464, loss: 0.13612967729568481 2023-01-22 11:14:12.542989: step: 498/464, loss: 0.10867279767990112 2023-01-22 11:14:13.157364: step: 500/464, loss: 0.4297686517238617 2023-01-22 11:14:13.815969: step: 502/464, loss: 0.2767006456851959 2023-01-22 11:14:14.446327: step: 504/464, loss: 0.14466437697410583 2023-01-22 11:14:15.069532: step: 506/464, loss: 0.10168630629777908 2023-01-22 11:14:15.737964: step: 508/464, loss: 0.27599096298217773 2023-01-22 11:14:16.377318: step: 510/464, loss: 0.411590576171875 2023-01-22 11:14:17.065337: step: 512/464, loss: 0.3165765404701233 2023-01-22 11:14:17.706461: step: 514/464, loss: 0.08729775249958038 2023-01-22 11:14:18.329802: step: 516/464, loss: 0.10420161485671997 2023-01-22 11:14:18.961181: step: 518/464, loss: 0.5963342785835266 2023-01-22 11:14:19.614821: step: 520/464, loss: 0.0534040592610836 2023-01-22 11:14:20.214437: step: 522/464, loss: 0.4430117905139923 2023-01-22 11:14:20.857397: step: 524/464, loss: 0.6797560453414917 2023-01-22 11:14:21.515497: step: 526/464, loss: 0.223937526345253 2023-01-22 11:14:22.143207: step: 528/464, loss: 0.1542477160692215 2023-01-22 11:14:22.816083: step: 530/464, loss: 0.1835298091173172 2023-01-22 11:14:23.417084: step: 532/464, loss: 0.28976961970329285 2023-01-22 11:14:24.004928: step: 534/464, loss: 0.11457552760839462 2023-01-22 11:14:24.641523: step: 536/464, loss: 0.39779454469680786 2023-01-22 11:14:25.326153: step: 538/464, loss: 0.09472489356994629 2023-01-22 11:14:25.992902: step: 540/464, loss: 0.07289186120033264 2023-01-22 11:14:26.632892: step: 542/464, loss: 0.1270940750837326 2023-01-22 11:14:27.203330: step: 544/464, loss: 0.12064804881811142 2023-01-22 11:14:27.744910: step: 546/464, loss: 0.1936420202255249 2023-01-22 11:14:28.411032: step: 548/464, loss: 0.10025503486394882 2023-01-22 11:14:29.110541: step: 550/464, loss: 1.1451830863952637 2023-01-22 11:14:29.756934: step: 552/464, loss: 0.2011127769947052 2023-01-22 11:14:30.352892: step: 554/464, loss: 0.17952358722686768 2023-01-22 11:14:30.934290: step: 556/464, loss: 0.054836973547935486 2023-01-22 11:14:31.512969: step: 558/464, loss: 0.03980335220694542 2023-01-22 11:14:32.136349: step: 560/464, loss: 0.20447109639644623 2023-01-22 11:14:32.783007: step: 562/464, loss: 0.29087498784065247 2023-01-22 11:14:33.369819: step: 564/464, loss: 0.16058194637298584 2023-01-22 11:14:33.970002: step: 566/464, loss: 0.05219912528991699 2023-01-22 11:14:34.558020: step: 568/464, loss: 0.21024443209171295 2023-01-22 11:14:35.177366: step: 570/464, loss: 0.3421464264392853 2023-01-22 11:14:35.838051: step: 572/464, loss: 0.20216087996959686 2023-01-22 11:14:36.479823: step: 574/464, loss: 0.041584938764572144 2023-01-22 11:14:37.106863: step: 576/464, loss: 0.08998734503984451 2023-01-22 11:14:37.689232: step: 578/464, loss: 0.6275544762611389 2023-01-22 11:14:38.246370: step: 580/464, loss: 0.1089167669415474 2023-01-22 11:14:38.891901: step: 582/464, loss: 0.2104983776807785 2023-01-22 11:14:39.484935: step: 584/464, loss: 0.19659323990345 2023-01-22 11:14:40.088612: step: 586/464, loss: 0.1263107806444168 2023-01-22 11:14:40.670933: step: 588/464, loss: 0.7749478220939636 2023-01-22 11:14:41.399307: step: 590/464, loss: 0.1915738433599472 2023-01-22 11:14:41.992674: step: 592/464, loss: 0.2550014853477478 2023-01-22 11:14:42.595465: step: 594/464, loss: 0.05610324442386627 2023-01-22 11:14:43.235305: step: 596/464, loss: 0.37603238224983215 2023-01-22 11:14:43.896424: step: 598/464, loss: 0.29247811436653137 2023-01-22 11:14:44.459519: step: 600/464, loss: 0.13564375042915344 2023-01-22 11:14:45.074426: step: 602/464, loss: 0.4842967689037323 2023-01-22 11:14:45.719708: step: 604/464, loss: 0.06512904167175293 2023-01-22 11:14:46.344179: step: 606/464, loss: 0.21476440131664276 2023-01-22 11:14:46.972771: step: 608/464, loss: 0.25330716371536255 2023-01-22 11:14:47.718460: step: 610/464, loss: 0.10401315242052078 2023-01-22 11:14:48.350132: step: 612/464, loss: 0.24575677514076233 2023-01-22 11:14:49.088435: step: 614/464, loss: 0.07836044579744339 2023-01-22 11:14:49.671962: step: 616/464, loss: 0.03666771203279495 2023-01-22 11:14:50.321034: step: 618/464, loss: 0.06859150528907776 2023-01-22 11:14:50.967595: step: 620/464, loss: 0.17963087558746338 2023-01-22 11:14:51.598593: step: 622/464, loss: 0.5812908411026001 2023-01-22 11:14:52.201305: step: 624/464, loss: 0.15075625479221344 2023-01-22 11:14:52.825198: step: 626/464, loss: 0.1244310662150383 2023-01-22 11:14:53.529290: step: 628/464, loss: 0.3986271023750305 2023-01-22 11:14:54.304589: step: 630/464, loss: 0.21589024364948273 2023-01-22 11:14:54.982198: step: 632/464, loss: 0.24251723289489746 2023-01-22 11:14:55.644549: step: 634/464, loss: 0.6837549209594727 2023-01-22 11:14:56.234427: step: 636/464, loss: 0.077407605946064 2023-01-22 11:14:56.855705: step: 638/464, loss: 0.17971009016036987 2023-01-22 11:14:57.578755: step: 640/464, loss: 0.13150545954704285 2023-01-22 11:14:58.168835: step: 642/464, loss: 0.05822813883423805 2023-01-22 11:14:58.755202: step: 644/464, loss: 0.21754500269889832 2023-01-22 11:14:59.409624: step: 646/464, loss: 0.1172441616654396 2023-01-22 11:15:00.046585: step: 648/464, loss: 0.16576620936393738 2023-01-22 11:15:00.616587: step: 650/464, loss: 0.5241413116455078 2023-01-22 11:15:01.248395: step: 652/464, loss: 0.15110956132411957 2023-01-22 11:15:01.833070: step: 654/464, loss: 0.051666341722011566 2023-01-22 11:15:02.414297: step: 656/464, loss: 0.23533783853054047 2023-01-22 11:15:02.971945: step: 658/464, loss: 0.44304656982421875 2023-01-22 11:15:03.669522: step: 660/464, loss: 0.21447089314460754 2023-01-22 11:15:04.280137: step: 662/464, loss: 2.8307924270629883 2023-01-22 11:15:04.984120: step: 664/464, loss: 0.10394848883152008 2023-01-22 11:15:05.639367: step: 666/464, loss: 0.0736704096198082 2023-01-22 11:15:06.312453: step: 668/464, loss: 0.14852645993232727 2023-01-22 11:15:06.961864: step: 670/464, loss: 0.5525936484336853 2023-01-22 11:15:07.554144: step: 672/464, loss: 0.26049676537513733 2023-01-22 11:15:08.181772: step: 674/464, loss: 0.3359794020652771 2023-01-22 11:15:08.780569: step: 676/464, loss: 0.23344263434410095 2023-01-22 11:15:09.384854: step: 678/464, loss: 0.38956648111343384 2023-01-22 11:15:09.988094: step: 680/464, loss: 0.28944116830825806 2023-01-22 11:15:10.814312: step: 682/464, loss: 0.16485880315303802 2023-01-22 11:15:11.459353: step: 684/464, loss: 0.10292749106884003 2023-01-22 11:15:12.169465: step: 686/464, loss: 0.24333380162715912 2023-01-22 11:15:12.753161: step: 688/464, loss: 0.15236453711986542 2023-01-22 11:15:13.352595: step: 690/464, loss: 0.4543951749801636 2023-01-22 11:15:13.955838: step: 692/464, loss: 0.23155415058135986 2023-01-22 11:15:14.579784: step: 694/464, loss: 0.1280626654624939 2023-01-22 11:15:15.239223: step: 696/464, loss: 0.16600367426872253 2023-01-22 11:15:15.934861: step: 698/464, loss: 0.17227059602737427 2023-01-22 11:15:16.578033: step: 700/464, loss: 0.15010888874530792 2023-01-22 11:15:17.227326: step: 702/464, loss: 0.2915771007537842 2023-01-22 11:15:17.837164: step: 704/464, loss: 0.14665408432483673 2023-01-22 11:15:18.471426: step: 706/464, loss: 0.09335776418447495 2023-01-22 11:15:19.041597: step: 708/464, loss: 0.12245311588048935 2023-01-22 11:15:19.669441: step: 710/464, loss: 0.3094889521598816 2023-01-22 11:15:20.310181: step: 712/464, loss: 0.28910723328590393 2023-01-22 11:15:20.993829: step: 714/464, loss: 0.16503068804740906 2023-01-22 11:15:21.544527: step: 716/464, loss: 0.16619819402694702 2023-01-22 11:15:22.111520: step: 718/464, loss: 0.24636615812778473 2023-01-22 11:15:22.807743: step: 720/464, loss: 0.12881028652191162 2023-01-22 11:15:23.471264: step: 722/464, loss: 0.2925278842449188 2023-01-22 11:15:24.098709: step: 724/464, loss: 0.16039560735225677 2023-01-22 11:15:24.719138: step: 726/464, loss: 0.20397178828716278 2023-01-22 11:15:25.412383: step: 728/464, loss: 0.11749129742383957 2023-01-22 11:15:26.039174: step: 730/464, loss: 0.6463688611984253 2023-01-22 11:15:26.671345: step: 732/464, loss: 0.5270076394081116 2023-01-22 11:15:27.247664: step: 734/464, loss: 0.0638769268989563 2023-01-22 11:15:27.878248: step: 736/464, loss: 0.1781592071056366 2023-01-22 11:15:28.537724: step: 738/464, loss: 0.9981155395507812 2023-01-22 11:15:29.182703: step: 740/464, loss: 0.1648676097393036 2023-01-22 11:15:29.799352: step: 742/464, loss: 0.1325865089893341 2023-01-22 11:15:30.388826: step: 744/464, loss: 0.046779971569776535 2023-01-22 11:15:31.048069: step: 746/464, loss: 0.2365027666091919 2023-01-22 11:15:31.637534: step: 748/464, loss: 0.11213462054729462 2023-01-22 11:15:32.186392: step: 750/464, loss: 0.1705876886844635 2023-01-22 11:15:32.887971: step: 752/464, loss: 0.2322748452425003 2023-01-22 11:15:33.448047: step: 754/464, loss: 0.1390574723482132 2023-01-22 11:15:34.077811: step: 756/464, loss: 0.2047601044178009 2023-01-22 11:15:34.711378: step: 758/464, loss: 0.9834222197532654 2023-01-22 11:15:35.341815: step: 760/464, loss: 0.49498993158340454 2023-01-22 11:15:36.062553: step: 762/464, loss: 0.11589276790618896 2023-01-22 11:15:36.755269: step: 764/464, loss: 0.07173576205968857 2023-01-22 11:15:37.404787: step: 766/464, loss: 0.21668976545333862 2023-01-22 11:15:38.092693: step: 768/464, loss: 0.08592022955417633 2023-01-22 11:15:38.723864: step: 770/464, loss: 0.16447365283966064 2023-01-22 11:15:39.324590: step: 772/464, loss: 0.21605470776557922 2023-01-22 11:15:39.943612: step: 774/464, loss: 1.5658270120620728 2023-01-22 11:15:40.542942: step: 776/464, loss: 0.0884871706366539 2023-01-22 11:15:41.198701: step: 778/464, loss: 0.1517365276813507 2023-01-22 11:15:41.824589: step: 780/464, loss: 0.09208640456199646 2023-01-22 11:15:42.392955: step: 782/464, loss: 0.11096998304128647 2023-01-22 11:15:43.060375: step: 784/464, loss: 0.2630148231983185 2023-01-22 11:15:43.616642: step: 786/464, loss: 0.1333618462085724 2023-01-22 11:15:44.256054: step: 788/464, loss: 0.32209762930870056 2023-01-22 11:15:44.837915: step: 790/464, loss: 0.12842051684856415 2023-01-22 11:15:45.452872: step: 792/464, loss: 0.15147587656974792 2023-01-22 11:15:46.047068: step: 794/464, loss: 0.4042651653289795 2023-01-22 11:15:46.694404: step: 796/464, loss: 0.08628109842538834 2023-01-22 11:15:47.314186: step: 798/464, loss: 0.16129985451698303 2023-01-22 11:15:47.958446: step: 800/464, loss: 0.3072906732559204 2023-01-22 11:15:48.580287: step: 802/464, loss: 0.12120974808931351 2023-01-22 11:15:49.201731: step: 804/464, loss: 0.07602791488170624 2023-01-22 11:15:49.864433: step: 806/464, loss: 0.10465062409639359 2023-01-22 11:15:50.528388: step: 808/464, loss: 0.432160884141922 2023-01-22 11:15:51.123188: step: 810/464, loss: 0.19127866625785828 2023-01-22 11:15:51.724263: step: 812/464, loss: 0.17180559039115906 2023-01-22 11:15:52.327692: step: 814/464, loss: 0.4046097695827484 2023-01-22 11:15:52.967721: step: 816/464, loss: 0.14955194294452667 2023-01-22 11:15:53.580994: step: 818/464, loss: 0.3648272156715393 2023-01-22 11:15:54.229894: step: 820/464, loss: 0.07315445691347122 2023-01-22 11:15:54.831825: step: 822/464, loss: 0.08252275735139847 2023-01-22 11:15:55.373835: step: 824/464, loss: 0.07464836537837982 2023-01-22 11:15:56.044247: step: 826/464, loss: 0.8104329109191895 2023-01-22 11:15:56.719947: step: 828/464, loss: 0.3973939120769501 2023-01-22 11:15:57.329815: step: 830/464, loss: 0.22226989269256592 2023-01-22 11:15:57.930417: step: 832/464, loss: 0.19058486819267273 2023-01-22 11:15:58.486599: step: 834/464, loss: 0.13172155618667603 2023-01-22 11:15:59.083436: step: 836/464, loss: 0.10252564400434494 2023-01-22 11:15:59.756638: step: 838/464, loss: 0.156595379114151 2023-01-22 11:16:00.375415: step: 840/464, loss: 0.1269509494304657 2023-01-22 11:16:01.004874: step: 842/464, loss: 0.5591135621070862 2023-01-22 11:16:01.653916: step: 844/464, loss: 0.2109595239162445 2023-01-22 11:16:02.271902: step: 846/464, loss: 0.7321330308914185 2023-01-22 11:16:02.919787: step: 848/464, loss: 0.27749067544937134 2023-01-22 11:16:03.520342: step: 850/464, loss: 0.34926146268844604 2023-01-22 11:16:04.144886: step: 852/464, loss: 0.26148805022239685 2023-01-22 11:16:04.706373: step: 854/464, loss: 0.13771076500415802 2023-01-22 11:16:05.413317: step: 856/464, loss: 0.14018401503562927 2023-01-22 11:16:05.998674: step: 858/464, loss: 0.3332751989364624 2023-01-22 11:16:06.672475: step: 860/464, loss: 0.20438069105148315 2023-01-22 11:16:07.314154: step: 862/464, loss: 0.19731405377388 2023-01-22 11:16:07.940406: step: 864/464, loss: 0.4525524377822876 2023-01-22 11:16:08.566459: step: 866/464, loss: 0.7084553241729736 2023-01-22 11:16:09.126848: step: 868/464, loss: 0.29406675696372986 2023-01-22 11:16:09.796887: step: 870/464, loss: 0.23075011372566223 2023-01-22 11:16:10.388867: step: 872/464, loss: 0.1038394421339035 2023-01-22 11:16:11.024686: step: 874/464, loss: 0.25779569149017334 2023-01-22 11:16:11.701185: step: 876/464, loss: 0.3240090310573578 2023-01-22 11:16:12.327627: step: 878/464, loss: 2.591330051422119 2023-01-22 11:16:12.993299: step: 880/464, loss: 0.1222638189792633 2023-01-22 11:16:13.667987: step: 882/464, loss: 0.26508739590644836 2023-01-22 11:16:14.283234: step: 884/464, loss: 0.45191052556037903 2023-01-22 11:16:14.956222: step: 886/464, loss: 0.08805494010448456 2023-01-22 11:16:15.612368: step: 888/464, loss: 0.2745131254196167 2023-01-22 11:16:16.159691: step: 890/464, loss: 0.13159851729869843 2023-01-22 11:16:16.822960: step: 892/464, loss: 0.09864508360624313 2023-01-22 11:16:17.433105: step: 894/464, loss: 0.1688559204339981 2023-01-22 11:16:17.972888: step: 896/464, loss: 0.06535923480987549 2023-01-22 11:16:18.593590: step: 898/464, loss: 0.08384721726179123 2023-01-22 11:16:19.218398: step: 900/464, loss: 0.08997979760169983 2023-01-22 11:16:19.835954: step: 902/464, loss: 0.17216522991657257 2023-01-22 11:16:20.430228: step: 904/464, loss: 0.1290782392024994 2023-01-22 11:16:21.053429: step: 906/464, loss: 0.1777675300836563 2023-01-22 11:16:21.697113: step: 908/464, loss: 0.2289048284292221 2023-01-22 11:16:22.274552: step: 910/464, loss: 0.09597111493349075 2023-01-22 11:16:22.855793: step: 912/464, loss: 0.1423436552286148 2023-01-22 11:16:23.488945: step: 914/464, loss: 0.25019633769989014 2023-01-22 11:16:24.105458: step: 916/464, loss: 0.19405746459960938 2023-01-22 11:16:24.715735: step: 918/464, loss: 0.06312482059001923 2023-01-22 11:16:25.323828: step: 920/464, loss: 0.09683822840452194 2023-01-22 11:16:25.970819: step: 922/464, loss: 0.05893225595355034 2023-01-22 11:16:26.646685: step: 924/464, loss: 0.6508408784866333 2023-01-22 11:16:27.269834: step: 926/464, loss: 0.04898369312286377 2023-01-22 11:16:27.887920: step: 928/464, loss: 0.07690513879060745 2023-01-22 11:16:28.380956: step: 930/464, loss: 0.07759331911802292 ================================================== Loss: 0.255 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28515503875968995, 'r': 0.34900379506641366, 'f1': 0.31386518771331057}, 'combined': 0.23126908568349197, 'epoch': 12} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2905966233902622, 'r': 0.28072327622273263, 'f1': 0.28557463597062666}, 'combined': 0.1864373271103573, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2813176007866274, 'r': 0.3619228336495889, 'f1': 0.31656984785615494}, 'combined': 0.23326199315716678, 'epoch': 12} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.31235126312800804, 'r': 0.2922735878121581, 'f1': 0.3019790674833398}, 'combined': 0.19714695597357934, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2918622848200313, 'r': 0.3538899430740038, 'f1': 0.31989708404802747}, 'combined': 0.2357136408774939, 'epoch': 12} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29741243040965004, 'r': 0.2739253146931855, 'f1': 0.28518610678860323}, 'combined': 0.18618367593452853, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20128205128205126, 'r': 0.37380952380952376, 'f1': 0.2616666666666666}, 'combined': 0.1744444444444444, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2616279069767442, 'r': 0.4891304347826087, 'f1': 0.34090909090909094}, 'combined': 0.17045454545454547, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3977272727272727, 'r': 0.3017241379310345, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28487491086284766, 'r': 0.3248763214963405, 'f1': 0.303563513171226}, 'combined': 0.2236783781261665, 'epoch': 9} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2834185544035629, 'r': 0.30291983108270715, 'f1': 0.2928448921331714}, 'combined': 0.19118371196258857, 'epoch': 9} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47368421052631576, 'r': 0.3103448275862069, 'f1': 0.375}, 'combined': 0.25, 'epoch': 9} ****************************** Epoch: 13 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:19:08.539720: step: 2/464, loss: 0.20185832679271698 2023-01-22 11:19:09.134791: step: 4/464, loss: 0.15542711317539215 2023-01-22 11:19:09.885175: step: 6/464, loss: 0.05096767470240593 2023-01-22 11:19:10.426136: step: 8/464, loss: 0.03697284683585167 2023-01-22 11:19:11.038887: step: 10/464, loss: 0.13277460634708405 2023-01-22 11:19:11.654365: step: 12/464, loss: 0.06230716407299042 2023-01-22 11:19:12.298075: step: 14/464, loss: 0.17492106556892395 2023-01-22 11:19:12.897438: step: 16/464, loss: 0.07750896364450455 2023-01-22 11:19:13.460289: step: 18/464, loss: 0.3672374188899994 2023-01-22 11:19:14.147462: step: 20/464, loss: 0.022687960416078568 2023-01-22 11:19:14.793827: step: 22/464, loss: 0.04192107543349266 2023-01-22 11:19:15.453380: step: 24/464, loss: 0.037765905261039734 2023-01-22 11:19:16.063867: step: 26/464, loss: 0.15746667981147766 2023-01-22 11:19:16.721875: step: 28/464, loss: 0.20232006907463074 2023-01-22 11:19:17.350134: step: 30/464, loss: 0.1225430890917778 2023-01-22 11:19:17.938692: step: 32/464, loss: 0.10123708099126816 2023-01-22 11:19:18.558897: step: 34/464, loss: 0.12023011595010757 2023-01-22 11:19:19.183518: step: 36/464, loss: 0.03455466777086258 2023-01-22 11:19:19.764813: step: 38/464, loss: 0.24662727117538452 2023-01-22 11:19:20.371229: step: 40/464, loss: 0.161251500248909 2023-01-22 11:19:20.971299: step: 42/464, loss: 0.12451664358377457 2023-01-22 11:19:21.684555: step: 44/464, loss: 0.21603776514530182 2023-01-22 11:19:22.329235: step: 46/464, loss: 0.14945615828037262 2023-01-22 11:19:22.987356: step: 48/464, loss: 0.14207737147808075 2023-01-22 11:19:23.549084: step: 50/464, loss: 0.017215615138411522 2023-01-22 11:19:24.152714: step: 52/464, loss: 0.7450473308563232 2023-01-22 11:19:24.744485: step: 54/464, loss: 0.1268204152584076 2023-01-22 11:19:25.354803: step: 56/464, loss: 0.07096073031425476 2023-01-22 11:19:25.979278: step: 58/464, loss: 0.5722800493240356 2023-01-22 11:19:26.598856: step: 60/464, loss: 0.13189375400543213 2023-01-22 11:19:27.242167: step: 62/464, loss: 0.5198311805725098 2023-01-22 11:19:27.965868: step: 64/464, loss: 0.17269432544708252 2023-01-22 11:19:28.634687: step: 66/464, loss: 0.3504256308078766 2023-01-22 11:19:29.347595: step: 68/464, loss: 2.9256834983825684 2023-01-22 11:19:29.968099: step: 70/464, loss: 0.1361749768257141 2023-01-22 11:19:30.589405: step: 72/464, loss: 0.07835980504751205 2023-01-22 11:19:31.268542: step: 74/464, loss: 0.1003730520606041 2023-01-22 11:19:31.926773: step: 76/464, loss: 0.10211724787950516 2023-01-22 11:19:32.550394: step: 78/464, loss: 0.05625668913125992 2023-01-22 11:19:33.102295: step: 80/464, loss: 0.1728784143924713 2023-01-22 11:19:33.730740: step: 82/464, loss: 0.18018820881843567 2023-01-22 11:19:34.306881: step: 84/464, loss: 0.10599424690008163 2023-01-22 11:19:34.993883: step: 86/464, loss: 0.06094743683934212 2023-01-22 11:19:35.667208: step: 88/464, loss: 0.1300642192363739 2023-01-22 11:19:36.333355: step: 90/464, loss: 0.08097365498542786 2023-01-22 11:19:36.947472: step: 92/464, loss: 0.2460734248161316 2023-01-22 11:19:37.561983: step: 94/464, loss: 0.19132286310195923 2023-01-22 11:19:38.239355: step: 96/464, loss: 0.19940787553787231 2023-01-22 11:19:38.825875: step: 98/464, loss: 0.10040685534477234 2023-01-22 11:19:39.457206: step: 100/464, loss: 0.08837602287530899 2023-01-22 11:19:40.074415: step: 102/464, loss: 0.2815611660480499 2023-01-22 11:19:40.647876: step: 104/464, loss: 0.08271598815917969 2023-01-22 11:19:41.314525: step: 106/464, loss: 0.187362939119339 2023-01-22 11:19:41.956022: step: 108/464, loss: 0.07661551237106323 2023-01-22 11:19:42.577146: step: 110/464, loss: 0.17065709829330444 2023-01-22 11:19:43.148629: step: 112/464, loss: 0.1066417247056961 2023-01-22 11:19:43.721938: step: 114/464, loss: 0.03715033456683159 2023-01-22 11:19:44.278682: step: 116/464, loss: 0.04503128305077553 2023-01-22 11:19:44.889586: step: 118/464, loss: 0.13615788519382477 2023-01-22 11:19:45.564007: step: 120/464, loss: 0.13754060864448547 2023-01-22 11:19:46.184182: step: 122/464, loss: 0.1050601676106453 2023-01-22 11:19:46.856407: step: 124/464, loss: 0.17007730901241302 2023-01-22 11:19:47.451811: step: 126/464, loss: 0.05719568207859993 2023-01-22 11:19:48.015504: step: 128/464, loss: 0.17565593123435974 2023-01-22 11:19:48.662200: step: 130/464, loss: 0.08824195712804794 2023-01-22 11:19:49.261987: step: 132/464, loss: 0.14226341247558594 2023-01-22 11:19:49.921600: step: 134/464, loss: 0.2548733949661255 2023-01-22 11:19:50.513668: step: 136/464, loss: 0.06347456574440002 2023-01-22 11:19:51.232337: step: 138/464, loss: 0.1744856983423233 2023-01-22 11:19:51.853971: step: 140/464, loss: 0.14623475074768066 2023-01-22 11:19:52.506620: step: 142/464, loss: 0.10009687393903732 2023-01-22 11:19:53.154738: step: 144/464, loss: 0.09313633292913437 2023-01-22 11:19:53.775256: step: 146/464, loss: 0.35434556007385254 2023-01-22 11:19:54.594515: step: 148/464, loss: 0.13106636703014374 2023-01-22 11:19:55.247766: step: 150/464, loss: 0.06860726326704025 2023-01-22 11:19:55.866119: step: 152/464, loss: 0.09948920458555222 2023-01-22 11:19:56.494073: step: 154/464, loss: 0.007669544313102961 2023-01-22 11:19:57.168192: step: 156/464, loss: 0.4408150911331177 2023-01-22 11:19:57.858967: step: 158/464, loss: 0.22104589641094208 2023-01-22 11:19:58.428293: step: 160/464, loss: 0.1109457015991211 2023-01-22 11:19:58.981873: step: 162/464, loss: 0.14397618174552917 2023-01-22 11:19:59.601044: step: 164/464, loss: 0.06954605132341385 2023-01-22 11:20:00.222805: step: 166/464, loss: 0.10357537120580673 2023-01-22 11:20:00.861396: step: 168/464, loss: 0.06206103786826134 2023-01-22 11:20:01.499117: step: 170/464, loss: 0.08428634703159332 2023-01-22 11:20:02.180071: step: 172/464, loss: 0.16943979263305664 2023-01-22 11:20:02.711591: step: 174/464, loss: 0.36546847224235535 2023-01-22 11:20:03.347277: step: 176/464, loss: 0.1322539895772934 2023-01-22 11:20:03.998547: step: 178/464, loss: 0.0765388011932373 2023-01-22 11:20:04.559093: step: 180/464, loss: 0.4844176471233368 2023-01-22 11:20:05.149708: step: 182/464, loss: 0.21347054839134216 2023-01-22 11:20:05.743310: step: 184/464, loss: 0.1827172338962555 2023-01-22 11:20:06.317341: step: 186/464, loss: 0.04949979484081268 2023-01-22 11:20:06.938866: step: 188/464, loss: 0.12278314679861069 2023-01-22 11:20:07.582478: step: 190/464, loss: 0.07154679298400879 2023-01-22 11:20:08.227061: step: 192/464, loss: 0.05961580574512482 2023-01-22 11:20:08.874590: step: 194/464, loss: 0.3202018737792969 2023-01-22 11:20:09.459112: step: 196/464, loss: 0.532412052154541 2023-01-22 11:20:10.043220: step: 198/464, loss: 0.06335130333900452 2023-01-22 11:20:10.683464: step: 200/464, loss: 0.6394345164299011 2023-01-22 11:20:11.367987: step: 202/464, loss: 0.09846265614032745 2023-01-22 11:20:11.994508: step: 204/464, loss: 0.06447742134332657 2023-01-22 11:20:12.594139: step: 206/464, loss: 0.08010944724082947 2023-01-22 11:20:13.277215: step: 208/464, loss: 0.12857092916965485 2023-01-22 11:20:13.918322: step: 210/464, loss: 0.2337302416563034 2023-01-22 11:20:14.546801: step: 212/464, loss: 0.25796255469322205 2023-01-22 11:20:15.205601: step: 214/464, loss: 0.15600861608982086 2023-01-22 11:20:15.796546: step: 216/464, loss: 0.10373272001743317 2023-01-22 11:20:16.415371: step: 218/464, loss: 0.07564297318458557 2023-01-22 11:20:17.011096: step: 220/464, loss: 0.21570098400115967 2023-01-22 11:20:17.640689: step: 222/464, loss: 0.06716049462556839 2023-01-22 11:20:18.285145: step: 224/464, loss: 0.1479952186346054 2023-01-22 11:20:18.889994: step: 226/464, loss: 0.12563945353031158 2023-01-22 11:20:19.519100: step: 228/464, loss: 0.10678159445524216 2023-01-22 11:20:20.190586: step: 230/464, loss: 0.17874892055988312 2023-01-22 11:20:20.755026: step: 232/464, loss: 0.08733192831277847 2023-01-22 11:20:21.396987: step: 234/464, loss: 0.1556948870420456 2023-01-22 11:20:22.057757: step: 236/464, loss: 0.4824150502681732 2023-01-22 11:20:22.684189: step: 238/464, loss: 0.05019241198897362 2023-01-22 11:20:23.272254: step: 240/464, loss: 0.07417203485965729 2023-01-22 11:20:23.899478: step: 242/464, loss: 0.21365107595920563 2023-01-22 11:20:24.485353: step: 244/464, loss: 0.34110331535339355 2023-01-22 11:20:25.070237: step: 246/464, loss: 0.45811355113983154 2023-01-22 11:20:25.637656: step: 248/464, loss: 0.10789628326892853 2023-01-22 11:20:26.220955: step: 250/464, loss: 0.2191714644432068 2023-01-22 11:20:26.870208: step: 252/464, loss: 0.06005513295531273 2023-01-22 11:20:27.514679: step: 254/464, loss: 0.23280870914459229 2023-01-22 11:20:28.149468: step: 256/464, loss: 0.09422003477811813 2023-01-22 11:20:28.780395: step: 258/464, loss: 0.2404172122478485 2023-01-22 11:20:29.408835: step: 260/464, loss: 0.17267613112926483 2023-01-22 11:20:30.028658: step: 262/464, loss: 0.32540443539619446 2023-01-22 11:20:30.694987: step: 264/464, loss: 0.0512615405023098 2023-01-22 11:20:31.337224: step: 266/464, loss: 0.2330555021762848 2023-01-22 11:20:32.027810: step: 268/464, loss: 0.307839572429657 2023-01-22 11:20:32.652349: step: 270/464, loss: 0.16887786984443665 2023-01-22 11:20:33.275242: step: 272/464, loss: 0.27475062012672424 2023-01-22 11:20:33.906546: step: 274/464, loss: 0.2929746210575104 2023-01-22 11:20:34.516791: step: 276/464, loss: 0.09339070320129395 2023-01-22 11:20:35.102045: step: 278/464, loss: 0.0926584005355835 2023-01-22 11:20:35.721640: step: 280/464, loss: 0.04409787058830261 2023-01-22 11:20:36.433748: step: 282/464, loss: 0.0822482779622078 2023-01-22 11:20:37.097459: step: 284/464, loss: 0.806532621383667 2023-01-22 11:20:37.741981: step: 286/464, loss: 0.07540340721607208 2023-01-22 11:20:38.365703: step: 288/464, loss: 0.07068516314029694 2023-01-22 11:20:38.994180: step: 290/464, loss: 0.19356432557106018 2023-01-22 11:20:39.664801: step: 292/464, loss: 0.3262822926044464 2023-01-22 11:20:40.232576: step: 294/464, loss: 0.06418322026729584 2023-01-22 11:20:40.803940: step: 296/464, loss: 0.19877147674560547 2023-01-22 11:20:41.360587: step: 298/464, loss: 0.6032619476318359 2023-01-22 11:20:41.974964: step: 300/464, loss: 0.22746288776397705 2023-01-22 11:20:42.586492: step: 302/464, loss: 0.010866068303585052 2023-01-22 11:20:43.176600: step: 304/464, loss: 0.11572247743606567 2023-01-22 11:20:43.714489: step: 306/464, loss: 0.1199236810207367 2023-01-22 11:20:44.301066: step: 308/464, loss: 0.49689608812332153 2023-01-22 11:20:44.854040: step: 310/464, loss: 0.13504759967327118 2023-01-22 11:20:45.527486: step: 312/464, loss: 0.1400964856147766 2023-01-22 11:20:46.235883: step: 314/464, loss: 1.2025758028030396 2023-01-22 11:20:46.840039: step: 316/464, loss: 0.10083557665348053 2023-01-22 11:20:47.493740: step: 318/464, loss: 0.23876634240150452 2023-01-22 11:20:48.091682: step: 320/464, loss: 0.2314985990524292 2023-01-22 11:20:48.729857: step: 322/464, loss: 0.665070652961731 2023-01-22 11:20:49.373432: step: 324/464, loss: 0.11292921751737595 2023-01-22 11:20:50.039018: step: 326/464, loss: 0.9820161461830139 2023-01-22 11:20:50.732067: step: 328/464, loss: 0.22751489281654358 2023-01-22 11:20:51.334194: step: 330/464, loss: 0.14982061088085175 2023-01-22 11:20:51.982439: step: 332/464, loss: 0.05553364381194115 2023-01-22 11:20:52.632688: step: 334/464, loss: 0.12155529856681824 2023-01-22 11:20:53.195470: step: 336/464, loss: 0.07996393740177155 2023-01-22 11:20:53.779833: step: 338/464, loss: 0.14088571071624756 2023-01-22 11:20:54.386303: step: 340/464, loss: 0.06349222362041473 2023-01-22 11:20:55.042305: step: 342/464, loss: 0.1997671276330948 2023-01-22 11:20:55.701598: step: 344/464, loss: 0.09512768685817719 2023-01-22 11:20:56.312854: step: 346/464, loss: 0.14336644113063812 2023-01-22 11:20:57.054630: step: 348/464, loss: 0.36467671394348145 2023-01-22 11:20:57.606182: step: 350/464, loss: 0.13217979669570923 2023-01-22 11:20:58.218268: step: 352/464, loss: 0.5009357929229736 2023-01-22 11:20:58.811851: step: 354/464, loss: 0.18532253801822662 2023-01-22 11:20:59.388600: step: 356/464, loss: 0.1418599635362625 2023-01-22 11:20:59.998700: step: 358/464, loss: 0.14950698614120483 2023-01-22 11:21:00.724013: step: 360/464, loss: 0.07432292401790619 2023-01-22 11:21:01.354053: step: 362/464, loss: 0.6410752534866333 2023-01-22 11:21:02.041119: step: 364/464, loss: 0.11664801836013794 2023-01-22 11:21:02.697878: step: 366/464, loss: 0.2435479313135147 2023-01-22 11:21:03.255817: step: 368/464, loss: 0.40186649560928345 2023-01-22 11:21:03.920156: step: 370/464, loss: 0.3850458860397339 2023-01-22 11:21:04.539673: step: 372/464, loss: 0.4946538507938385 2023-01-22 11:21:05.170821: step: 374/464, loss: 0.6173791885375977 2023-01-22 11:21:05.773177: step: 376/464, loss: 0.17963023483753204 2023-01-22 11:21:06.423593: step: 378/464, loss: 0.45287269353866577 2023-01-22 11:21:07.148590: step: 380/464, loss: 0.5002993941307068 2023-01-22 11:21:07.904796: step: 382/464, loss: 0.12893010675907135 2023-01-22 11:21:08.518764: step: 384/464, loss: 0.07852751761674881 2023-01-22 11:21:09.132764: step: 386/464, loss: 0.09611756354570389 2023-01-22 11:21:09.797093: step: 388/464, loss: 0.09752000123262405 2023-01-22 11:21:10.409690: step: 390/464, loss: 0.14214427769184113 2023-01-22 11:21:11.077833: step: 392/464, loss: 0.12182476371526718 2023-01-22 11:21:11.679426: step: 394/464, loss: 0.14517463743686676 2023-01-22 11:21:12.348850: step: 396/464, loss: 0.10885108262300491 2023-01-22 11:21:12.942612: step: 398/464, loss: 0.14360931515693665 2023-01-22 11:21:13.672626: step: 400/464, loss: 0.30467498302459717 2023-01-22 11:21:14.334390: step: 402/464, loss: 0.1928461492061615 2023-01-22 11:21:14.879994: step: 404/464, loss: 0.03510986641049385 2023-01-22 11:21:15.464031: step: 406/464, loss: 0.3122141659259796 2023-01-22 11:21:16.105461: step: 408/464, loss: 0.39408615231513977 2023-01-22 11:21:16.713114: step: 410/464, loss: 0.17328330874443054 2023-01-22 11:21:17.392784: step: 412/464, loss: 0.09749648720026016 2023-01-22 11:21:17.966098: step: 414/464, loss: 0.2930392324924469 2023-01-22 11:21:18.571739: step: 416/464, loss: 0.4744638502597809 2023-01-22 11:21:19.164799: step: 418/464, loss: 1.5964727401733398 2023-01-22 11:21:19.805196: step: 420/464, loss: 0.1724444031715393 2023-01-22 11:21:20.425904: step: 422/464, loss: 0.29615873098373413 2023-01-22 11:21:21.045983: step: 424/464, loss: 0.13187594711780548 2023-01-22 11:21:21.726001: step: 426/464, loss: 0.25384002923965454 2023-01-22 11:21:22.384657: step: 428/464, loss: 0.11173109710216522 2023-01-22 11:21:22.972507: step: 430/464, loss: 0.08897748589515686 2023-01-22 11:21:23.645685: step: 432/464, loss: 0.2709905505180359 2023-01-22 11:21:24.289633: step: 434/464, loss: 0.3819783627986908 2023-01-22 11:21:24.939040: step: 436/464, loss: 0.08471409231424332 2023-01-22 11:21:25.585563: step: 438/464, loss: 0.17317360639572144 2023-01-22 11:21:26.187880: step: 440/464, loss: 0.14402073621749878 2023-01-22 11:21:26.832779: step: 442/464, loss: 0.10715532302856445 2023-01-22 11:21:27.468210: step: 444/464, loss: 0.06745106726884842 2023-01-22 11:21:28.054965: step: 446/464, loss: 0.23669381439685822 2023-01-22 11:21:28.715155: step: 448/464, loss: 0.1531083732843399 2023-01-22 11:21:29.357019: step: 450/464, loss: 0.09652193635702133 2023-01-22 11:21:29.927547: step: 452/464, loss: 0.4267866015434265 2023-01-22 11:21:30.621721: step: 454/464, loss: 0.11406000703573227 2023-01-22 11:21:31.290290: step: 456/464, loss: 0.09449287503957748 2023-01-22 11:21:31.979310: step: 458/464, loss: 0.06641307473182678 2023-01-22 11:21:32.666475: step: 460/464, loss: 0.16144509613513947 2023-01-22 11:21:33.321239: step: 462/464, loss: 0.20724427700042725 2023-01-22 11:21:33.979473: step: 464/464, loss: 0.36079099774360657 2023-01-22 11:21:34.609110: step: 466/464, loss: 0.49858927726745605 2023-01-22 11:21:35.210756: step: 468/464, loss: 0.194271981716156 2023-01-22 11:21:35.874012: step: 470/464, loss: 0.067782923579216 2023-01-22 11:21:36.477141: step: 472/464, loss: 0.0739690512418747 2023-01-22 11:21:37.053803: step: 474/464, loss: 0.004954623989760876 2023-01-22 11:21:37.658197: step: 476/464, loss: 0.05232124403119087 2023-01-22 11:21:38.249451: step: 478/464, loss: 0.14982157945632935 2023-01-22 11:21:38.893635: step: 480/464, loss: 0.054943837225437164 2023-01-22 11:21:39.553466: step: 482/464, loss: 0.5149544477462769 2023-01-22 11:21:40.166583: step: 484/464, loss: 0.28584542870521545 2023-01-22 11:21:40.756179: step: 486/464, loss: 0.11520925164222717 2023-01-22 11:21:41.443406: step: 488/464, loss: 0.3075667917728424 2023-01-22 11:21:42.094675: step: 490/464, loss: 0.34954291582107544 2023-01-22 11:21:42.700556: step: 492/464, loss: 0.16785860061645508 2023-01-22 11:21:43.378904: step: 494/464, loss: 0.13346517086029053 2023-01-22 11:21:44.021437: step: 496/464, loss: 0.21838581562042236 2023-01-22 11:21:44.641141: step: 498/464, loss: 0.17647969722747803 2023-01-22 11:21:45.265871: step: 500/464, loss: 0.3172367513179779 2023-01-22 11:21:45.927523: step: 502/464, loss: 0.10279625654220581 2023-01-22 11:21:46.549246: step: 504/464, loss: 0.05722634121775627 2023-01-22 11:21:47.158876: step: 506/464, loss: 0.08139292895793915 2023-01-22 11:21:47.798361: step: 508/464, loss: 0.28629621863365173 2023-01-22 11:21:48.394468: step: 510/464, loss: 0.07338398694992065 2023-01-22 11:21:49.004524: step: 512/464, loss: 0.11667048931121826 2023-01-22 11:21:49.612273: step: 514/464, loss: 0.10032473504543304 2023-01-22 11:21:50.195403: step: 516/464, loss: 0.10229482501745224 2023-01-22 11:21:50.829013: step: 518/464, loss: 0.3341778516769409 2023-01-22 11:21:51.437630: step: 520/464, loss: 0.348959743976593 2023-01-22 11:21:52.155952: step: 522/464, loss: 0.16965113580226898 2023-01-22 11:21:52.743584: step: 524/464, loss: 0.056599151343107224 2023-01-22 11:21:53.384963: step: 526/464, loss: 0.0732683464884758 2023-01-22 11:21:54.064417: step: 528/464, loss: 0.0883973240852356 2023-01-22 11:21:54.647594: step: 530/464, loss: 0.08199550956487656 2023-01-22 11:21:55.273371: step: 532/464, loss: 0.08900762349367142 2023-01-22 11:21:55.921765: step: 534/464, loss: 0.05986578017473221 2023-01-22 11:21:56.557365: step: 536/464, loss: 0.11681246757507324 2023-01-22 11:21:57.224084: step: 538/464, loss: 0.4057319462299347 2023-01-22 11:21:57.906769: step: 540/464, loss: 0.0658615306019783 2023-01-22 11:21:58.550899: step: 542/464, loss: 0.25594618916511536 2023-01-22 11:21:59.178081: step: 544/464, loss: 0.15549439191818237 2023-01-22 11:21:59.793637: step: 546/464, loss: 0.11209609359502792 2023-01-22 11:22:00.455831: step: 548/464, loss: 0.21639572083950043 2023-01-22 11:22:01.121587: step: 550/464, loss: 0.11636323481798172 2023-01-22 11:22:01.834694: step: 552/464, loss: 0.2554556727409363 2023-01-22 11:22:02.465256: step: 554/464, loss: 0.47724249958992004 2023-01-22 11:22:03.113847: step: 556/464, loss: 0.05490335449576378 2023-01-22 11:22:03.783623: step: 558/464, loss: 0.05813895910978317 2023-01-22 11:22:04.419306: step: 560/464, loss: 0.07191114872694016 2023-01-22 11:22:05.101189: step: 562/464, loss: 0.0820101946592331 2023-01-22 11:22:05.703468: step: 564/464, loss: 0.14678408205509186 2023-01-22 11:22:06.260235: step: 566/464, loss: 0.14023204147815704 2023-01-22 11:22:06.901953: step: 568/464, loss: 0.4147571623325348 2023-01-22 11:22:07.539708: step: 570/464, loss: 0.12394271045923233 2023-01-22 11:22:08.133337: step: 572/464, loss: 0.4368882477283478 2023-01-22 11:22:08.778309: step: 574/464, loss: 0.6768796443939209 2023-01-22 11:22:09.363042: step: 576/464, loss: 0.05807606875896454 2023-01-22 11:22:09.994985: step: 578/464, loss: 0.10776543617248535 2023-01-22 11:22:10.618825: step: 580/464, loss: 0.09010083973407745 2023-01-22 11:22:11.249512: step: 582/464, loss: 0.09341294318437576 2023-01-22 11:22:11.921579: step: 584/464, loss: 0.1841656118631363 2023-01-22 11:22:12.505912: step: 586/464, loss: 0.1848258078098297 2023-01-22 11:22:13.128987: step: 588/464, loss: 0.2034948617219925 2023-01-22 11:22:13.823227: step: 590/464, loss: 0.09111060202121735 2023-01-22 11:22:14.444840: step: 592/464, loss: 0.0409679114818573 2023-01-22 11:22:15.080896: step: 594/464, loss: 0.26828378438949585 2023-01-22 11:22:15.699286: step: 596/464, loss: 0.07353124767541885 2023-01-22 11:22:16.323156: step: 598/464, loss: 0.14041757583618164 2023-01-22 11:22:16.992335: step: 600/464, loss: 0.34325841069221497 2023-01-22 11:22:17.660700: step: 602/464, loss: 0.6918802261352539 2023-01-22 11:22:18.250554: step: 604/464, loss: 0.12383803725242615 2023-01-22 11:22:18.872837: step: 606/464, loss: 0.06072302907705307 2023-01-22 11:22:19.465471: step: 608/464, loss: 0.061823777854442596 2023-01-22 11:22:20.056848: step: 610/464, loss: 0.06603369861841202 2023-01-22 11:22:20.653326: step: 612/464, loss: 0.14131344854831696 2023-01-22 11:22:21.309131: step: 614/464, loss: 0.12874765694141388 2023-01-22 11:22:21.922792: step: 616/464, loss: 0.34312903881073 2023-01-22 11:22:22.554897: step: 618/464, loss: 0.4952242970466614 2023-01-22 11:22:23.217012: step: 620/464, loss: 0.07113741338253021 2023-01-22 11:22:23.837539: step: 622/464, loss: 0.04245679825544357 2023-01-22 11:22:24.475947: step: 624/464, loss: 0.1600327342748642 2023-01-22 11:22:25.132795: step: 626/464, loss: 0.1523052304983139 2023-01-22 11:22:25.784335: step: 628/464, loss: 0.1359877735376358 2023-01-22 11:22:26.447839: step: 630/464, loss: 0.11738570034503937 2023-01-22 11:22:27.117346: step: 632/464, loss: 0.2088128924369812 2023-01-22 11:22:27.731457: step: 634/464, loss: 0.09273140132427216 2023-01-22 11:22:28.303285: step: 636/464, loss: 0.09316191077232361 2023-01-22 11:22:28.953695: step: 638/464, loss: 0.2756721079349518 2023-01-22 11:22:29.703097: step: 640/464, loss: 0.21960747241973877 2023-01-22 11:22:30.363136: step: 642/464, loss: 0.13363170623779297 2023-01-22 11:22:31.034681: step: 644/464, loss: 0.2028121054172516 2023-01-22 11:22:31.630046: step: 646/464, loss: 0.20220768451690674 2023-01-22 11:22:32.268743: step: 648/464, loss: 0.12750768661499023 2023-01-22 11:22:32.894515: step: 650/464, loss: 0.22797568142414093 2023-01-22 11:22:33.522418: step: 652/464, loss: 0.09133946895599365 2023-01-22 11:22:34.201928: step: 654/464, loss: 0.12407989799976349 2023-01-22 11:22:34.796089: step: 656/464, loss: 0.1795833855867386 2023-01-22 11:22:35.366338: step: 658/464, loss: 0.11011943221092224 2023-01-22 11:22:35.931360: step: 660/464, loss: 0.29204973578453064 2023-01-22 11:22:36.641764: step: 662/464, loss: 0.5304825305938721 2023-01-22 11:22:37.267175: step: 664/464, loss: 0.10545740276575089 2023-01-22 11:22:37.942627: step: 666/464, loss: 0.5920228958129883 2023-01-22 11:22:38.572099: step: 668/464, loss: 0.3886897563934326 2023-01-22 11:22:39.201130: step: 670/464, loss: 0.055316805839538574 2023-01-22 11:22:39.894216: step: 672/464, loss: 0.15686535835266113 2023-01-22 11:22:40.485670: step: 674/464, loss: 0.22624193131923676 2023-01-22 11:22:41.101339: step: 676/464, loss: 0.11832347512245178 2023-01-22 11:22:41.759415: step: 678/464, loss: 0.08923111855983734 2023-01-22 11:22:42.396693: step: 680/464, loss: 0.14050790667533875 2023-01-22 11:22:43.152980: step: 682/464, loss: 0.11861838400363922 2023-01-22 11:22:43.792930: step: 684/464, loss: 0.13761171698570251 2023-01-22 11:22:44.436894: step: 686/464, loss: 0.35186994075775146 2023-01-22 11:22:45.047035: step: 688/464, loss: 0.4274250864982605 2023-01-22 11:22:45.706755: step: 690/464, loss: 0.08268038183450699 2023-01-22 11:22:46.394172: step: 692/464, loss: 0.10088789463043213 2023-01-22 11:22:47.031245: step: 694/464, loss: 0.40460291504859924 2023-01-22 11:22:47.712053: step: 696/464, loss: 0.10182829201221466 2023-01-22 11:22:48.298539: step: 698/464, loss: 0.05861677974462509 2023-01-22 11:22:48.932061: step: 700/464, loss: 0.15200048685073853 2023-01-22 11:22:49.513412: step: 702/464, loss: 0.07056646049022675 2023-01-22 11:22:50.198762: step: 704/464, loss: 0.19686491787433624 2023-01-22 11:22:50.845562: step: 706/464, loss: 0.07907170802354813 2023-01-22 11:22:51.428702: step: 708/464, loss: 0.3801847994327545 2023-01-22 11:22:52.019625: step: 710/464, loss: 0.11562389135360718 2023-01-22 11:22:52.637376: step: 712/464, loss: 0.3307975232601166 2023-01-22 11:22:53.208499: step: 714/464, loss: 0.22483167052268982 2023-01-22 11:22:53.815805: step: 716/464, loss: 0.21573488414287567 2023-01-22 11:22:54.517199: step: 718/464, loss: 0.1486646980047226 2023-01-22 11:22:55.147685: step: 720/464, loss: 0.17777541279792786 2023-01-22 11:22:55.733774: step: 722/464, loss: 0.08999773114919662 2023-01-22 11:22:56.311103: step: 724/464, loss: 0.8580670952796936 2023-01-22 11:22:56.965758: step: 726/464, loss: 0.3896852731704712 2023-01-22 11:22:57.609592: step: 728/464, loss: 0.39495813846588135 2023-01-22 11:22:58.244793: step: 730/464, loss: 0.07761916518211365 2023-01-22 11:22:58.830614: step: 732/464, loss: 0.04868488386273384 2023-01-22 11:22:59.512343: step: 734/464, loss: 0.1510753333568573 2023-01-22 11:23:00.134459: step: 736/464, loss: 0.7264018654823303 2023-01-22 11:23:00.736531: step: 738/464, loss: 0.09703339636325836 2023-01-22 11:23:01.397339: step: 740/464, loss: 0.10365378856658936 2023-01-22 11:23:02.031928: step: 742/464, loss: 0.14898864924907684 2023-01-22 11:23:02.692232: step: 744/464, loss: 0.1205286905169487 2023-01-22 11:23:03.315739: step: 746/464, loss: 0.32317137718200684 2023-01-22 11:23:03.949380: step: 748/464, loss: 0.5914244651794434 2023-01-22 11:23:04.495746: step: 750/464, loss: 0.8164223432540894 2023-01-22 11:23:05.167972: step: 752/464, loss: 0.42005714774131775 2023-01-22 11:23:05.778790: step: 754/464, loss: 0.2197103053331375 2023-01-22 11:23:06.434601: step: 756/464, loss: 0.07207664847373962 2023-01-22 11:23:07.045843: step: 758/464, loss: 0.18790841102600098 2023-01-22 11:23:07.687221: step: 760/464, loss: 0.4112013578414917 2023-01-22 11:23:08.351225: step: 762/464, loss: 0.10273218899965286 2023-01-22 11:23:08.996355: step: 764/464, loss: 0.14175225794315338 2023-01-22 11:23:09.673121: step: 766/464, loss: 0.07376343011856079 2023-01-22 11:23:10.291977: step: 768/464, loss: 0.1520259529352188 2023-01-22 11:23:10.917258: step: 770/464, loss: 0.07247330248355865 2023-01-22 11:23:11.462130: step: 772/464, loss: 0.04153461754322052 2023-01-22 11:23:12.238454: step: 774/464, loss: 0.1823529452085495 2023-01-22 11:23:12.905926: step: 776/464, loss: 0.5420295596122742 2023-01-22 11:23:13.553655: step: 778/464, loss: 0.4019032120704651 2023-01-22 11:23:14.153335: step: 780/464, loss: 0.1072503924369812 2023-01-22 11:23:14.864651: step: 782/464, loss: 0.20655189454555511 2023-01-22 11:23:15.458951: step: 784/464, loss: 0.07755839824676514 2023-01-22 11:23:16.083749: step: 786/464, loss: 0.17498230934143066 2023-01-22 11:23:16.715461: step: 788/464, loss: 1.4299875497817993 2023-01-22 11:23:17.251762: step: 790/464, loss: 0.1002681702375412 2023-01-22 11:23:17.812986: step: 792/464, loss: 0.1689300835132599 2023-01-22 11:23:18.424479: step: 794/464, loss: 0.07717189937829971 2023-01-22 11:23:19.088198: step: 796/464, loss: 0.13709495961666107 2023-01-22 11:23:19.769905: step: 798/464, loss: 0.044095080345869064 2023-01-22 11:23:20.470619: step: 800/464, loss: 0.10243158042430878 2023-01-22 11:23:21.146052: step: 802/464, loss: 0.07887127250432968 2023-01-22 11:23:21.782885: step: 804/464, loss: 0.2742998003959656 2023-01-22 11:23:22.367266: step: 806/464, loss: 0.09627963602542877 2023-01-22 11:23:22.951824: step: 808/464, loss: 0.10505854338407516 2023-01-22 11:23:23.587522: step: 810/464, loss: 0.5737142562866211 2023-01-22 11:23:24.244785: step: 812/464, loss: 0.1623065322637558 2023-01-22 11:23:24.905787: step: 814/464, loss: 0.10733121633529663 2023-01-22 11:23:25.534688: step: 816/464, loss: 0.060589712113142014 2023-01-22 11:23:26.182098: step: 818/464, loss: 0.2681101858615875 2023-01-22 11:23:26.778022: step: 820/464, loss: 0.08098702877759933 2023-01-22 11:23:27.426270: step: 822/464, loss: 0.058230411261320114 2023-01-22 11:23:28.015023: step: 824/464, loss: 0.1930847018957138 2023-01-22 11:23:28.599818: step: 826/464, loss: 0.47333502769470215 2023-01-22 11:23:29.301766: step: 828/464, loss: 0.18079730868339539 2023-01-22 11:23:30.012548: step: 830/464, loss: 0.12895090878009796 2023-01-22 11:23:30.696863: step: 832/464, loss: 0.1963747888803482 2023-01-22 11:23:31.262170: step: 834/464, loss: 0.26649853587150574 2023-01-22 11:23:31.909562: step: 836/464, loss: 0.6310838460922241 2023-01-22 11:23:32.516500: step: 838/464, loss: 0.09940309077501297 2023-01-22 11:23:33.165422: step: 840/464, loss: 0.07393283396959305 2023-01-22 11:23:33.753547: step: 842/464, loss: 0.6933224201202393 2023-01-22 11:23:34.378981: step: 844/464, loss: 0.10924071073532104 2023-01-22 11:23:34.966503: step: 846/464, loss: 0.08506400883197784 2023-01-22 11:23:35.607987: step: 848/464, loss: 0.7190383672714233 2023-01-22 11:23:36.216988: step: 850/464, loss: 0.2215147167444229 2023-01-22 11:23:36.878755: step: 852/464, loss: 0.2889682650566101 2023-01-22 11:23:37.501247: step: 854/464, loss: 0.18120506405830383 2023-01-22 11:23:38.052030: step: 856/464, loss: 0.17137137055397034 2023-01-22 11:23:38.639344: step: 858/464, loss: 0.7795878648757935 2023-01-22 11:23:39.289948: step: 860/464, loss: 0.13829930126667023 2023-01-22 11:23:39.930974: step: 862/464, loss: 0.06743170320987701 2023-01-22 11:23:40.621054: step: 864/464, loss: 0.16243469715118408 2023-01-22 11:23:41.224787: step: 866/464, loss: 0.21655528247356415 2023-01-22 11:23:41.863708: step: 868/464, loss: 0.12445773184299469 2023-01-22 11:23:42.443599: step: 870/464, loss: 0.34826043248176575 2023-01-22 11:23:43.126963: step: 872/464, loss: 0.27048900723457336 2023-01-22 11:23:43.730775: step: 874/464, loss: 0.11076904088258743 2023-01-22 11:23:44.399024: step: 876/464, loss: 0.203446164727211 2023-01-22 11:23:45.074266: step: 878/464, loss: 0.3083826005458832 2023-01-22 11:23:45.688177: step: 880/464, loss: 0.13766227662563324 2023-01-22 11:23:46.369240: step: 882/464, loss: 0.25832998752593994 2023-01-22 11:23:46.986037: step: 884/464, loss: 0.08957251161336899 2023-01-22 11:23:47.603889: step: 886/464, loss: 0.12176202982664108 2023-01-22 11:23:48.203162: step: 888/464, loss: 0.10469197481870651 2023-01-22 11:23:48.808974: step: 890/464, loss: 0.06898195296525955 2023-01-22 11:23:49.416799: step: 892/464, loss: 0.15037567913532257 2023-01-22 11:23:50.037484: step: 894/464, loss: 0.10617604851722717 2023-01-22 11:23:50.663161: step: 896/464, loss: 0.11643420159816742 2023-01-22 11:23:51.338093: step: 898/464, loss: 0.13417676091194153 2023-01-22 11:23:51.988104: step: 900/464, loss: 0.14892005920410156 2023-01-22 11:23:52.626531: step: 902/464, loss: 0.5806592702865601 2023-01-22 11:23:53.240886: step: 904/464, loss: 0.09055360406637192 2023-01-22 11:23:53.863417: step: 906/464, loss: 0.12215010076761246 2023-01-22 11:23:54.525788: step: 908/464, loss: 0.11172604560852051 2023-01-22 11:23:55.122629: step: 910/464, loss: 0.2146151065826416 2023-01-22 11:23:55.761154: step: 912/464, loss: 0.18249428272247314 2023-01-22 11:23:56.395817: step: 914/464, loss: 0.07410207390785217 2023-01-22 11:23:57.061193: step: 916/464, loss: 0.13241495192050934 2023-01-22 11:23:57.737271: step: 918/464, loss: 0.08030088245868683 2023-01-22 11:23:58.326059: step: 920/464, loss: 0.19699254631996155 2023-01-22 11:23:58.936583: step: 922/464, loss: 0.10928104817867279 2023-01-22 11:23:59.592404: step: 924/464, loss: 0.0733671635389328 2023-01-22 11:24:00.256874: step: 926/464, loss: 0.14551511406898499 2023-01-22 11:24:00.917697: step: 928/464, loss: 0.38674890995025635 2023-01-22 11:24:01.397632: step: 930/464, loss: 0.08716857433319092 ================================================== Loss: 0.208 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2581847240915209, 'r': 0.3640061669829222, 'f1': 0.3020964566929134}, 'combined': 0.22259738914214672, 'epoch': 13} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2688209022352343, 'r': 0.33793922420572614, 'f1': 0.29944329956064747}, 'combined': 0.19549148054218435, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.25651478494623653, 'r': 0.3772276249209361, 'f1': 0.30537474398361486}, 'combined': 0.22501296925108463, 'epoch': 13} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2876061685874242, 'r': 0.3486135376817263, 'f1': 0.31518484228758814}, 'combined': 0.2057683426333477, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2633713054187192, 'r': 0.3623229533748984, 'f1': 0.3050226780009129}, 'combined': 0.22475355221119894, 'epoch': 13} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2801271703270636, 'r': 0.33780041127675314, 'f1': 0.3062723728909228}, 'combined': 0.19994983929666463, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26842948717948717, 'r': 0.3988095238095238, 'f1': 0.32088122605363983}, 'combined': 0.2139208173690932, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.22959183673469388, 'r': 0.4891304347826087, 'f1': 0.3125}, 'combined': 0.15625, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.3017241379310345, 'f1': 0.35}, 'combined': 0.2333333333333333, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28487491086284766, 'r': 0.3248763214963405, 'f1': 0.303563513171226}, 'combined': 0.2236783781261665, 'epoch': 9} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2834185544035629, 'r': 0.30291983108270715, 'f1': 0.2928448921331714}, 'combined': 0.19118371196258857, 'epoch': 9} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47368421052631576, 'r': 0.3103448275862069, 'f1': 0.375}, 'combined': 0.25, 'epoch': 9} ****************************** Epoch: 14 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:26:42.063151: step: 2/464, loss: 0.12613213062286377 2023-01-22 11:26:42.675242: step: 4/464, loss: 0.12780576944351196 2023-01-22 11:26:43.291738: step: 6/464, loss: 0.06970081478357315 2023-01-22 11:26:43.951120: step: 8/464, loss: 0.07534730434417725 2023-01-22 11:26:44.629471: step: 10/464, loss: 0.1213054358959198 2023-01-22 11:26:45.241034: step: 12/464, loss: 0.1861896812915802 2023-01-22 11:26:45.873677: step: 14/464, loss: 0.21565768122673035 2023-01-22 11:26:46.512511: step: 16/464, loss: 0.03582453355193138 2023-01-22 11:26:47.147168: step: 18/464, loss: 0.11763232201337814 2023-01-22 11:26:47.754304: step: 20/464, loss: 0.07920301705598831 2023-01-22 11:26:48.416289: step: 22/464, loss: 0.21957677602767944 2023-01-22 11:26:49.109544: step: 24/464, loss: 0.08056668192148209 2023-01-22 11:26:49.790044: step: 26/464, loss: 0.43024322390556335 2023-01-22 11:26:50.412654: step: 28/464, loss: 0.21023477613925934 2023-01-22 11:26:51.069468: step: 30/464, loss: 0.058270473033189774 2023-01-22 11:26:51.685938: step: 32/464, loss: 0.06991127133369446 2023-01-22 11:26:52.236698: step: 34/464, loss: 0.11741513013839722 2023-01-22 11:26:52.856134: step: 36/464, loss: 0.09309908002614975 2023-01-22 11:26:53.455506: step: 38/464, loss: 0.09769189357757568 2023-01-22 11:26:54.080793: step: 40/464, loss: 0.039672598242759705 2023-01-22 11:26:54.718766: step: 42/464, loss: 0.08527854084968567 2023-01-22 11:26:55.313617: step: 44/464, loss: 0.05091758444905281 2023-01-22 11:26:55.842550: step: 46/464, loss: 0.11744002252817154 2023-01-22 11:26:56.509582: step: 48/464, loss: 0.03684813156723976 2023-01-22 11:26:57.100268: step: 50/464, loss: 0.23239551484584808 2023-01-22 11:26:57.688034: step: 52/464, loss: 0.009587530046701431 2023-01-22 11:26:58.282482: step: 54/464, loss: 0.4318223297595978 2023-01-22 11:26:58.827681: step: 56/464, loss: 0.0942460224032402 2023-01-22 11:26:59.478024: step: 58/464, loss: 0.12825199961662292 2023-01-22 11:27:00.215576: step: 60/464, loss: 0.1324436217546463 2023-01-22 11:27:00.827893: step: 62/464, loss: 0.04708656668663025 2023-01-22 11:27:01.459075: step: 64/464, loss: 0.04969207942485809 2023-01-22 11:27:02.089915: step: 66/464, loss: 0.41940873861312866 2023-01-22 11:27:02.705927: step: 68/464, loss: 0.0641220286488533 2023-01-22 11:27:03.291508: step: 70/464, loss: 0.060402270406484604 2023-01-22 11:27:03.952030: step: 72/464, loss: 0.0645303726196289 2023-01-22 11:27:04.636997: step: 74/464, loss: 0.07540280371904373 2023-01-22 11:27:05.260054: step: 76/464, loss: 0.0162261463701725 2023-01-22 11:27:05.922186: step: 78/464, loss: 0.292266845703125 2023-01-22 11:27:06.555117: step: 80/464, loss: 0.6582212448120117 2023-01-22 11:27:07.146767: step: 82/464, loss: 0.494401216506958 2023-01-22 11:27:07.774145: step: 84/464, loss: 0.307862251996994 2023-01-22 11:27:08.375033: step: 86/464, loss: 0.04717501625418663 2023-01-22 11:27:09.007566: step: 88/464, loss: 0.21211999654769897 2023-01-22 11:27:09.641944: step: 90/464, loss: 0.05120338127017021 2023-01-22 11:27:10.301686: step: 92/464, loss: 0.22324422001838684 2023-01-22 11:27:11.002237: step: 94/464, loss: 0.08178785443305969 2023-01-22 11:27:11.671871: step: 96/464, loss: 0.16229651868343353 2023-01-22 11:27:12.331205: step: 98/464, loss: 0.2983476519584656 2023-01-22 11:27:13.006419: step: 100/464, loss: 0.2334270179271698 2023-01-22 11:27:13.676031: step: 102/464, loss: 0.047153085470199585 2023-01-22 11:27:14.299040: step: 104/464, loss: 0.15230758488178253 2023-01-22 11:27:14.899483: step: 106/464, loss: 0.22207175195217133 2023-01-22 11:27:15.524901: step: 108/464, loss: 0.1279035210609436 2023-01-22 11:27:16.152214: step: 110/464, loss: 0.07191181927919388 2023-01-22 11:27:16.787369: step: 112/464, loss: 0.10683086514472961 2023-01-22 11:27:17.448529: step: 114/464, loss: 0.11870521306991577 2023-01-22 11:27:18.136045: step: 116/464, loss: 0.05834219232201576 2023-01-22 11:27:18.753027: step: 118/464, loss: 0.2276674211025238 2023-01-22 11:27:19.427990: step: 120/464, loss: 0.032239992171525955 2023-01-22 11:27:20.069027: step: 122/464, loss: 0.4272522032260895 2023-01-22 11:27:20.735777: step: 124/464, loss: 0.04740273952484131 2023-01-22 11:27:21.327410: step: 126/464, loss: 0.09091894328594208 2023-01-22 11:27:21.968041: step: 128/464, loss: 0.0550725944340229 2023-01-22 11:27:22.526885: step: 130/464, loss: 0.27808040380477905 2023-01-22 11:27:23.188808: step: 132/464, loss: 0.030161166563630104 2023-01-22 11:27:23.820436: step: 134/464, loss: 0.012089907191693783 2023-01-22 11:27:24.419214: step: 136/464, loss: 0.23043721914291382 2023-01-22 11:27:25.081647: step: 138/464, loss: 0.0220566987991333 2023-01-22 11:27:25.656245: step: 140/464, loss: 0.1365833580493927 2023-01-22 11:27:26.322665: step: 142/464, loss: 0.06428226828575134 2023-01-22 11:27:27.014425: step: 144/464, loss: 0.19025635719299316 2023-01-22 11:27:27.674809: step: 146/464, loss: 0.18001894652843475 2023-01-22 11:27:28.274296: step: 148/464, loss: 0.047540344297885895 2023-01-22 11:27:28.901882: step: 150/464, loss: 0.2429361641407013 2023-01-22 11:27:29.616872: step: 152/464, loss: 0.15517257153987885 2023-01-22 11:27:30.255469: step: 154/464, loss: 0.27889472246170044 2023-01-22 11:27:30.924916: step: 156/464, loss: 0.10768377035856247 2023-01-22 11:27:31.495581: step: 158/464, loss: 0.1395530253648758 2023-01-22 11:27:32.108340: step: 160/464, loss: 0.15417103469371796 2023-01-22 11:27:32.745118: step: 162/464, loss: 0.06897418200969696 2023-01-22 11:27:33.397012: step: 164/464, loss: 0.09392699599266052 2023-01-22 11:27:33.975057: step: 166/464, loss: 0.1080712378025055 2023-01-22 11:27:34.611529: step: 168/464, loss: 0.06684279441833496 2023-01-22 11:27:35.175788: step: 170/464, loss: 0.4811127781867981 2023-01-22 11:27:35.846104: step: 172/464, loss: 0.26632991433143616 2023-01-22 11:27:36.578929: step: 174/464, loss: 0.09028254449367523 2023-01-22 11:27:37.143052: step: 176/464, loss: 1.6176563501358032 2023-01-22 11:27:37.820672: step: 178/464, loss: 0.10107997804880142 2023-01-22 11:27:38.356275: step: 180/464, loss: 0.1728367805480957 2023-01-22 11:27:38.989030: step: 182/464, loss: 0.2647758722305298 2023-01-22 11:27:39.578670: step: 184/464, loss: 0.057612065225839615 2023-01-22 11:27:40.156467: step: 186/464, loss: 0.16593562066555023 2023-01-22 11:27:40.790327: step: 188/464, loss: 0.39534786343574524 2023-01-22 11:27:41.440987: step: 190/464, loss: 0.11273720115423203 2023-01-22 11:27:42.072413: step: 192/464, loss: 0.24984633922576904 2023-01-22 11:27:42.653974: step: 194/464, loss: 0.14231686294078827 2023-01-22 11:27:43.287664: step: 196/464, loss: 0.22011631727218628 2023-01-22 11:27:43.943523: step: 198/464, loss: 0.2356623113155365 2023-01-22 11:27:44.614956: step: 200/464, loss: 0.4909124970436096 2023-01-22 11:27:45.206094: step: 202/464, loss: 0.06964804232120514 2023-01-22 11:27:45.842762: step: 204/464, loss: 0.13431423902511597 2023-01-22 11:27:46.476165: step: 206/464, loss: 0.11794348061084747 2023-01-22 11:27:47.022043: step: 208/464, loss: 0.06027591973543167 2023-01-22 11:27:47.776935: step: 210/464, loss: 0.16354875266551971 2023-01-22 11:27:48.395384: step: 212/464, loss: 0.02065500244498253 2023-01-22 11:27:49.070715: step: 214/464, loss: 0.1693529486656189 2023-01-22 11:27:49.724058: step: 216/464, loss: 0.09896227717399597 2023-01-22 11:27:50.372650: step: 218/464, loss: 0.17069461941719055 2023-01-22 11:27:51.039443: step: 220/464, loss: 13.397747993469238 2023-01-22 11:27:51.649287: step: 222/464, loss: 0.1541566550731659 2023-01-22 11:27:52.243176: step: 224/464, loss: 0.15598753094673157 2023-01-22 11:27:52.926598: step: 226/464, loss: 0.11893276870250702 2023-01-22 11:27:53.599681: step: 228/464, loss: 0.15374010801315308 2023-01-22 11:27:54.150635: step: 230/464, loss: 2.745584487915039 2023-01-22 11:27:54.719871: step: 232/464, loss: 0.035824257880449295 2023-01-22 11:27:55.349485: step: 234/464, loss: 0.06368924677371979 2023-01-22 11:27:55.927595: step: 236/464, loss: 0.0707118809223175 2023-01-22 11:27:56.538541: step: 238/464, loss: 0.24368637800216675 2023-01-22 11:27:57.165157: step: 240/464, loss: 0.16533474624156952 2023-01-22 11:27:57.819471: step: 242/464, loss: 0.07772989571094513 2023-01-22 11:27:58.375610: step: 244/464, loss: 0.23945599794387817 2023-01-22 11:27:59.008292: step: 246/464, loss: 0.07287517935037613 2023-01-22 11:27:59.696266: step: 248/464, loss: 0.17496463656425476 2023-01-22 11:28:00.310140: step: 250/464, loss: 0.0961579903960228 2023-01-22 11:28:00.930975: step: 252/464, loss: 0.6809481978416443 2023-01-22 11:28:01.538629: step: 254/464, loss: 0.10889847576618195 2023-01-22 11:28:02.160338: step: 256/464, loss: 0.08200860768556595 2023-01-22 11:28:02.760246: step: 258/464, loss: 0.15323743224143982 2023-01-22 11:28:03.372069: step: 260/464, loss: 0.09156341105699539 2023-01-22 11:28:03.947301: step: 262/464, loss: 0.18293572962284088 2023-01-22 11:28:04.593678: step: 264/464, loss: 0.30259743332862854 2023-01-22 11:28:05.219053: step: 266/464, loss: 0.16626927256584167 2023-01-22 11:28:05.868651: step: 268/464, loss: 0.1518091857433319 2023-01-22 11:28:06.464321: step: 270/464, loss: 0.08012847602367401 2023-01-22 11:28:07.051419: step: 272/464, loss: 0.18999381363391876 2023-01-22 11:28:07.676928: step: 274/464, loss: 0.10878537595272064 2023-01-22 11:28:08.295843: step: 276/464, loss: 0.14942845702171326 2023-01-22 11:28:09.070281: step: 278/464, loss: 0.14834989607334137 2023-01-22 11:28:09.639824: step: 280/464, loss: 0.032504718750715256 2023-01-22 11:28:10.233558: step: 282/464, loss: 0.08153710514307022 2023-01-22 11:28:10.835702: step: 284/464, loss: 0.11140517145395279 2023-01-22 11:28:11.474983: step: 286/464, loss: 0.3821107745170593 2023-01-22 11:28:12.182802: step: 288/464, loss: 0.05341333895921707 2023-01-22 11:28:12.830140: step: 290/464, loss: 0.27715128660202026 2023-01-22 11:28:13.472283: step: 292/464, loss: 0.14849257469177246 2023-01-22 11:28:14.102772: step: 294/464, loss: 0.05485979840159416 2023-01-22 11:28:14.643972: step: 296/464, loss: 0.037415195256471634 2023-01-22 11:28:15.268546: step: 298/464, loss: 0.05425393208861351 2023-01-22 11:28:15.912852: step: 300/464, loss: 0.12795880436897278 2023-01-22 11:28:16.537030: step: 302/464, loss: 0.18602930009365082 2023-01-22 11:28:17.154421: step: 304/464, loss: 0.14597341418266296 2023-01-22 11:28:17.776502: step: 306/464, loss: 0.09839761257171631 2023-01-22 11:28:18.375913: step: 308/464, loss: 0.12964694201946259 2023-01-22 11:28:19.030194: step: 310/464, loss: 0.04418937861919403 2023-01-22 11:28:19.704329: step: 312/464, loss: 0.046734243631362915 2023-01-22 11:28:20.395875: step: 314/464, loss: 0.337647944688797 2023-01-22 11:28:21.037131: step: 316/464, loss: 0.1818884015083313 2023-01-22 11:28:21.590723: step: 318/464, loss: 0.13660390675067902 2023-01-22 11:28:22.239696: step: 320/464, loss: 0.05392802134156227 2023-01-22 11:28:22.878256: step: 322/464, loss: 0.15964898467063904 2023-01-22 11:28:23.553963: step: 324/464, loss: 0.23734918236732483 2023-01-22 11:28:24.232175: step: 326/464, loss: 0.11211232841014862 2023-01-22 11:28:24.933026: step: 328/464, loss: 0.08280149102210999 2023-01-22 11:28:25.628327: step: 330/464, loss: 0.2425793558359146 2023-01-22 11:28:26.296766: step: 332/464, loss: 1.0096615552902222 2023-01-22 11:28:26.964337: step: 334/464, loss: 0.2613997459411621 2023-01-22 11:28:27.621694: step: 336/464, loss: 0.06869491934776306 2023-01-22 11:28:28.282017: step: 338/464, loss: 0.1866166889667511 2023-01-22 11:28:28.909966: step: 340/464, loss: 0.05125715583562851 2023-01-22 11:28:29.547399: step: 342/464, loss: 0.02904468961060047 2023-01-22 11:28:30.157115: step: 344/464, loss: 0.12807013094425201 2023-01-22 11:28:30.727522: step: 346/464, loss: 0.09498754888772964 2023-01-22 11:28:31.408112: step: 348/464, loss: 0.22028857469558716 2023-01-22 11:28:32.040390: step: 350/464, loss: 0.10805842280387878 2023-01-22 11:28:32.750361: step: 352/464, loss: 0.14931246638298035 2023-01-22 11:28:33.436266: step: 354/464, loss: 0.09262239933013916 2023-01-22 11:28:34.027848: step: 356/464, loss: 0.10256149619817734 2023-01-22 11:28:34.593226: step: 358/464, loss: 0.27007755637168884 2023-01-22 11:28:35.233673: step: 360/464, loss: 0.33299100399017334 2023-01-22 11:28:35.855374: step: 362/464, loss: 0.10185973346233368 2023-01-22 11:28:36.447680: step: 364/464, loss: 0.3008521795272827 2023-01-22 11:28:37.053365: step: 366/464, loss: 0.13727213442325592 2023-01-22 11:28:37.686447: step: 368/464, loss: 0.1457100361585617 2023-01-22 11:28:38.306495: step: 370/464, loss: 0.2102964222431183 2023-01-22 11:28:38.966440: step: 372/464, loss: 0.11828085780143738 2023-01-22 11:28:39.550577: step: 374/464, loss: 0.024987978860735893 2023-01-22 11:28:40.341414: step: 376/464, loss: 0.2500342130661011 2023-01-22 11:28:41.018007: step: 378/464, loss: 0.23952820897102356 2023-01-22 11:28:41.641863: step: 380/464, loss: 0.15812312066555023 2023-01-22 11:28:42.416764: step: 382/464, loss: 0.09742007404565811 2023-01-22 11:28:43.015399: step: 384/464, loss: 0.18498073518276215 2023-01-22 11:28:43.687786: step: 386/464, loss: 0.10564729571342468 2023-01-22 11:28:44.295865: step: 388/464, loss: 0.1336047202348709 2023-01-22 11:28:44.865777: step: 390/464, loss: 0.057287439703941345 2023-01-22 11:28:45.498963: step: 392/464, loss: 0.0842222347855568 2023-01-22 11:28:46.064043: step: 394/464, loss: 0.1357746720314026 2023-01-22 11:28:46.654572: step: 396/464, loss: 0.07283379137516022 2023-01-22 11:28:47.399934: step: 398/464, loss: 0.505624532699585 2023-01-22 11:28:48.074117: step: 400/464, loss: 0.24738441407680511 2023-01-22 11:28:48.704089: step: 402/464, loss: 0.12691976130008698 2023-01-22 11:28:49.308377: step: 404/464, loss: 0.09851517528295517 2023-01-22 11:28:49.931597: step: 406/464, loss: 0.06937228888273239 2023-01-22 11:28:50.644998: step: 408/464, loss: 0.192485049366951 2023-01-22 11:28:51.320406: step: 410/464, loss: 0.12418614327907562 2023-01-22 11:28:51.933058: step: 412/464, loss: 0.03279638662934303 2023-01-22 11:28:52.593493: step: 414/464, loss: 0.20969468355178833 2023-01-22 11:28:53.243933: step: 416/464, loss: 0.11003034561872482 2023-01-22 11:28:53.867663: step: 418/464, loss: 0.20709995925426483 2023-01-22 11:28:54.433289: step: 420/464, loss: 0.14961837232112885 2023-01-22 11:28:55.033644: step: 422/464, loss: 0.12297564744949341 2023-01-22 11:28:55.643787: step: 424/464, loss: 0.45564550161361694 2023-01-22 11:28:56.315450: step: 426/464, loss: 0.16366340219974518 2023-01-22 11:28:56.969555: step: 428/464, loss: 0.09504736959934235 2023-01-22 11:28:57.584711: step: 430/464, loss: 0.12506511807441711 2023-01-22 11:28:58.258749: step: 432/464, loss: 0.16169333457946777 2023-01-22 11:28:58.884764: step: 434/464, loss: 0.5574436187744141 2023-01-22 11:28:59.469446: step: 436/464, loss: 0.037777990102767944 2023-01-22 11:29:00.082371: step: 438/464, loss: 0.06685791909694672 2023-01-22 11:29:00.607198: step: 440/464, loss: 0.04349729046225548 2023-01-22 11:29:01.245745: step: 442/464, loss: 0.14673559367656708 2023-01-22 11:29:01.854999: step: 444/464, loss: 0.11699734628200531 2023-01-22 11:29:02.492764: step: 446/464, loss: 0.17990678548812866 2023-01-22 11:29:03.089351: step: 448/464, loss: 0.03047196939587593 2023-01-22 11:29:03.626705: step: 450/464, loss: 0.08635605871677399 2023-01-22 11:29:04.236323: step: 452/464, loss: 0.133879154920578 2023-01-22 11:29:04.854143: step: 454/464, loss: 0.03776126354932785 2023-01-22 11:29:05.481796: step: 456/464, loss: 0.2038840502500534 2023-01-22 11:29:06.167232: step: 458/464, loss: 0.6528012156486511 2023-01-22 11:29:06.853371: step: 460/464, loss: 0.033662084490060806 2023-01-22 11:29:07.449082: step: 462/464, loss: 0.354397714138031 2023-01-22 11:29:08.132098: step: 464/464, loss: 0.07987023144960403 2023-01-22 11:29:08.707209: step: 466/464, loss: 0.20296648144721985 2023-01-22 11:29:09.312900: step: 468/464, loss: 0.13156884908676147 2023-01-22 11:29:09.963670: step: 470/464, loss: 0.11640924960374832 2023-01-22 11:29:10.601142: step: 472/464, loss: 0.09786844253540039 2023-01-22 11:29:11.183127: step: 474/464, loss: 0.17049400508403778 2023-01-22 11:29:11.790295: step: 476/464, loss: 0.1515236645936966 2023-01-22 11:29:12.491019: step: 478/464, loss: 0.4211825430393219 2023-01-22 11:29:13.157523: step: 480/464, loss: 0.09188634157180786 2023-01-22 11:29:13.753696: step: 482/464, loss: 0.14980553090572357 2023-01-22 11:29:14.403845: step: 484/464, loss: 0.21239091455936432 2023-01-22 11:29:15.044981: step: 486/464, loss: 0.05061900615692139 2023-01-22 11:29:15.727987: step: 488/464, loss: 0.3776748776435852 2023-01-22 11:29:16.350061: step: 490/464, loss: 0.09961634129285812 2023-01-22 11:29:17.046904: step: 492/464, loss: 0.052889157086610794 2023-01-22 11:29:17.727023: step: 494/464, loss: 0.09435848146677017 2023-01-22 11:29:18.433659: step: 496/464, loss: 0.47626474499702454 2023-01-22 11:29:19.126686: step: 498/464, loss: 0.08534081280231476 2023-01-22 11:29:19.717381: step: 500/464, loss: 0.10756982862949371 2023-01-22 11:29:20.353937: step: 502/464, loss: 0.09308572858572006 2023-01-22 11:29:20.999111: step: 504/464, loss: 0.258376806974411 2023-01-22 11:29:21.622123: step: 506/464, loss: 1.1513007879257202 2023-01-22 11:29:22.261100: step: 508/464, loss: 0.5706973671913147 2023-01-22 11:29:22.881410: step: 510/464, loss: 0.062000103294849396 2023-01-22 11:29:23.535460: step: 512/464, loss: 0.06004420295357704 2023-01-22 11:29:24.197320: step: 514/464, loss: 0.04904761537909508 2023-01-22 11:29:24.845553: step: 516/464, loss: 0.09929678589105606 2023-01-22 11:29:25.447436: step: 518/464, loss: 0.1910267472267151 2023-01-22 11:29:26.119764: step: 520/464, loss: 0.06864751130342484 2023-01-22 11:29:26.681774: step: 522/464, loss: 0.052617765963077545 2023-01-22 11:29:27.290512: step: 524/464, loss: 0.22045567631721497 2023-01-22 11:29:27.945032: step: 526/464, loss: 0.3091566264629364 2023-01-22 11:29:28.612829: step: 528/464, loss: 0.16436395049095154 2023-01-22 11:29:29.251854: step: 530/464, loss: 0.08261793851852417 2023-01-22 11:29:29.867430: step: 532/464, loss: 0.7199795842170715 2023-01-22 11:29:30.545721: step: 534/464, loss: 0.201430082321167 2023-01-22 11:29:31.152721: step: 536/464, loss: 0.7143619060516357 2023-01-22 11:29:31.763932: step: 538/464, loss: 0.0859590396285057 2023-01-22 11:29:32.372393: step: 540/464, loss: 0.12776008248329163 2023-01-22 11:29:33.004364: step: 542/464, loss: 0.0532626137137413 2023-01-22 11:29:33.623643: step: 544/464, loss: 0.16390809416770935 2023-01-22 11:29:34.258414: step: 546/464, loss: 0.6926539540290833 2023-01-22 11:29:34.948540: step: 548/464, loss: 0.028972607105970383 2023-01-22 11:29:35.555748: step: 550/464, loss: 0.060550302267074585 2023-01-22 11:29:36.163675: step: 552/464, loss: 0.1250130832195282 2023-01-22 11:29:36.750433: step: 554/464, loss: 0.22803033888339996 2023-01-22 11:29:37.401755: step: 556/464, loss: 0.24090181291103363 2023-01-22 11:29:38.083130: step: 558/464, loss: 0.12897726893424988 2023-01-22 11:29:38.753034: step: 560/464, loss: 0.12019973248243332 2023-01-22 11:29:39.390704: step: 562/464, loss: 0.2126852422952652 2023-01-22 11:29:40.004623: step: 564/464, loss: 0.07297311723232269 2023-01-22 11:29:40.766370: step: 566/464, loss: 0.12867839634418488 2023-01-22 11:29:41.356243: step: 568/464, loss: 0.04996206983923912 2023-01-22 11:29:41.957717: step: 570/464, loss: 0.1706579178571701 2023-01-22 11:29:42.595245: step: 572/464, loss: 0.16440588235855103 2023-01-22 11:29:43.228256: step: 574/464, loss: 0.20301684737205505 2023-01-22 11:29:43.896300: step: 576/464, loss: 0.04953206703066826 2023-01-22 11:29:44.536245: step: 578/464, loss: 0.041960328817367554 2023-01-22 11:29:45.175482: step: 580/464, loss: 0.02059108205139637 2023-01-22 11:29:45.787942: step: 582/464, loss: 0.17526574432849884 2023-01-22 11:29:46.421526: step: 584/464, loss: 0.18093959987163544 2023-01-22 11:29:47.042946: step: 586/464, loss: 0.2419964224100113 2023-01-22 11:29:47.629981: step: 588/464, loss: 0.11984448879957199 2023-01-22 11:29:48.230072: step: 590/464, loss: 0.1029224768280983 2023-01-22 11:29:48.880977: step: 592/464, loss: 0.034720923751592636 2023-01-22 11:29:49.486982: step: 594/464, loss: 0.05716710537672043 2023-01-22 11:29:50.135231: step: 596/464, loss: 0.51500403881073 2023-01-22 11:29:50.792285: step: 598/464, loss: 0.2622247040271759 2023-01-22 11:29:51.410209: step: 600/464, loss: 0.5692510008811951 2023-01-22 11:29:51.999230: step: 602/464, loss: 0.017966220155358315 2023-01-22 11:29:52.630314: step: 604/464, loss: 0.6535645723342896 2023-01-22 11:29:53.210274: step: 606/464, loss: 0.12797455489635468 2023-01-22 11:29:53.953228: step: 608/464, loss: 0.13018736243247986 2023-01-22 11:29:54.536128: step: 610/464, loss: 0.13536271452903748 2023-01-22 11:29:55.182034: step: 612/464, loss: 0.065898597240448 2023-01-22 11:29:55.761493: step: 614/464, loss: 0.049187514930963516 2023-01-22 11:29:56.401980: step: 616/464, loss: 0.12127663195133209 2023-01-22 11:29:57.037660: step: 618/464, loss: 0.3113647699356079 2023-01-22 11:29:57.673237: step: 620/464, loss: 0.0806652083992958 2023-01-22 11:29:58.284107: step: 622/464, loss: 0.037088535726070404 2023-01-22 11:29:58.915048: step: 624/464, loss: 0.1297650933265686 2023-01-22 11:29:59.467782: step: 626/464, loss: 0.2142733335494995 2023-01-22 11:30:00.025923: step: 628/464, loss: 0.1200641393661499 2023-01-22 11:30:00.716596: step: 630/464, loss: 0.20464667677879333 2023-01-22 11:30:01.373942: step: 632/464, loss: 0.04464574530720711 2023-01-22 11:30:01.987708: step: 634/464, loss: 0.03455955907702446 2023-01-22 11:30:02.585059: step: 636/464, loss: 2.4867355823516846 2023-01-22 11:30:03.245646: step: 638/464, loss: 0.09488419443368912 2023-01-22 11:30:03.890343: step: 640/464, loss: 0.09570202976465225 2023-01-22 11:30:04.557168: step: 642/464, loss: 0.6297911405563354 2023-01-22 11:30:05.198977: step: 644/464, loss: 0.17018218338489532 2023-01-22 11:30:05.862292: step: 646/464, loss: 0.037947215139865875 2023-01-22 11:30:06.460348: step: 648/464, loss: 0.0291412565857172 2023-01-22 11:30:07.022716: step: 650/464, loss: 0.3197057247161865 2023-01-22 11:30:07.623848: step: 652/464, loss: 0.3060678243637085 2023-01-22 11:30:08.227181: step: 654/464, loss: 0.3409462869167328 2023-01-22 11:30:08.831331: step: 656/464, loss: 0.11322534084320068 2023-01-22 11:30:09.511298: step: 658/464, loss: 0.06705331057310104 2023-01-22 11:30:10.133218: step: 660/464, loss: 0.5259943604469299 2023-01-22 11:30:10.809580: step: 662/464, loss: 0.08499801158905029 2023-01-22 11:30:11.525272: step: 664/464, loss: 0.3869590759277344 2023-01-22 11:30:12.194002: step: 666/464, loss: 0.47266441583633423 2023-01-22 11:30:12.789693: step: 668/464, loss: 0.15361620485782623 2023-01-22 11:30:13.413123: step: 670/464, loss: 0.039505161345005035 2023-01-22 11:30:14.030220: step: 672/464, loss: 0.16262929141521454 2023-01-22 11:30:14.615697: step: 674/464, loss: 0.07864541560411453 2023-01-22 11:30:15.205241: step: 676/464, loss: 0.11444167792797089 2023-01-22 11:30:15.863320: step: 678/464, loss: 0.15262271463871002 2023-01-22 11:30:16.592373: step: 680/464, loss: 0.1119389459490776 2023-01-22 11:30:17.204289: step: 682/464, loss: 0.21531479060649872 2023-01-22 11:30:17.871311: step: 684/464, loss: 0.3819708824157715 2023-01-22 11:30:18.431801: step: 686/464, loss: 0.06918393820524216 2023-01-22 11:30:19.015351: step: 688/464, loss: 0.03321441262960434 2023-01-22 11:30:19.671278: step: 690/464, loss: 0.2979850172996521 2023-01-22 11:30:20.337041: step: 692/464, loss: 0.04416452348232269 2023-01-22 11:30:20.980386: step: 694/464, loss: 0.1963769644498825 2023-01-22 11:30:21.534759: step: 696/464, loss: 0.04243450611829758 2023-01-22 11:30:22.138469: step: 698/464, loss: 0.10437124967575073 2023-01-22 11:30:22.724775: step: 700/464, loss: 0.15086588263511658 2023-01-22 11:30:23.409330: step: 702/464, loss: 0.08452273905277252 2023-01-22 11:30:24.026042: step: 704/464, loss: 0.1438084840774536 2023-01-22 11:30:24.702241: step: 706/464, loss: 0.20095717906951904 2023-01-22 11:30:25.306883: step: 708/464, loss: 0.12397371977567673 2023-01-22 11:30:25.999099: step: 710/464, loss: 0.12223966419696808 2023-01-22 11:30:26.638643: step: 712/464, loss: 0.16863887012004852 2023-01-22 11:30:27.229708: step: 714/464, loss: 0.1033097431063652 2023-01-22 11:30:27.924620: step: 716/464, loss: 0.12905749678611755 2023-01-22 11:30:28.550256: step: 718/464, loss: 0.12600231170654297 2023-01-22 11:30:29.214844: step: 720/464, loss: 0.08024387806653976 2023-01-22 11:30:29.822152: step: 722/464, loss: 0.08329156041145325 2023-01-22 11:30:30.357309: step: 724/464, loss: 0.12820667028427124 2023-01-22 11:30:30.918111: step: 726/464, loss: 0.06663686782121658 2023-01-22 11:30:31.493696: step: 728/464, loss: 0.12871386110782623 2023-01-22 11:30:32.119202: step: 730/464, loss: 0.15328256785869598 2023-01-22 11:30:32.713787: step: 732/464, loss: 0.08829231560230255 2023-01-22 11:30:33.320667: step: 734/464, loss: 0.11055053025484085 2023-01-22 11:30:33.977278: step: 736/464, loss: 0.14831359684467316 2023-01-22 11:30:34.559810: step: 738/464, loss: 0.12051140516996384 2023-01-22 11:30:35.142619: step: 740/464, loss: 0.15838512778282166 2023-01-22 11:30:35.782998: step: 742/464, loss: 0.18228304386138916 2023-01-22 11:30:36.410709: step: 744/464, loss: 0.07749143987894058 2023-01-22 11:30:37.015021: step: 746/464, loss: 0.11602754145860672 2023-01-22 11:30:37.731424: step: 748/464, loss: 1.0120458602905273 2023-01-22 11:30:38.437436: step: 750/464, loss: 0.14229518175125122 2023-01-22 11:30:39.114537: step: 752/464, loss: 0.4490815997123718 2023-01-22 11:30:39.752617: step: 754/464, loss: 0.0855349525809288 2023-01-22 11:30:40.363282: step: 756/464, loss: 0.1280864179134369 2023-01-22 11:30:41.023350: step: 758/464, loss: 0.07882758975028992 2023-01-22 11:30:41.738255: step: 760/464, loss: 0.15018177032470703 2023-01-22 11:30:42.453785: step: 762/464, loss: 1.9219484329223633 2023-01-22 11:30:43.104840: step: 764/464, loss: 0.279685914516449 2023-01-22 11:30:43.711241: step: 766/464, loss: 0.1714082658290863 2023-01-22 11:30:44.326127: step: 768/464, loss: 1.316659688949585 2023-01-22 11:30:44.962663: step: 770/464, loss: 0.2550918459892273 2023-01-22 11:30:45.579867: step: 772/464, loss: 0.10818459093570709 2023-01-22 11:30:46.215629: step: 774/464, loss: 0.10291647166013718 2023-01-22 11:30:46.822024: step: 776/464, loss: 0.08849417418241501 2023-01-22 11:30:47.395340: step: 778/464, loss: 0.08703681081533432 2023-01-22 11:30:48.023982: step: 780/464, loss: 0.09158416092395782 2023-01-22 11:30:48.732518: step: 782/464, loss: 0.08443973958492279 2023-01-22 11:30:49.380108: step: 784/464, loss: 0.10147544741630554 2023-01-22 11:30:49.970804: step: 786/464, loss: 0.06591346114873886 2023-01-22 11:30:50.658941: step: 788/464, loss: 0.05924259498715401 2023-01-22 11:30:51.328458: step: 790/464, loss: 0.12014824897050858 2023-01-22 11:30:52.033123: step: 792/464, loss: 0.06217009946703911 2023-01-22 11:30:52.731706: step: 794/464, loss: 0.33827370405197144 2023-01-22 11:30:53.320262: step: 796/464, loss: 0.39951568841934204 2023-01-22 11:30:53.967028: step: 798/464, loss: 0.17067255079746246 2023-01-22 11:30:54.571844: step: 800/464, loss: 0.03321341052651405 2023-01-22 11:30:55.171225: step: 802/464, loss: 0.071148581802845 2023-01-22 11:30:55.777574: step: 804/464, loss: 0.09109427779912949 2023-01-22 11:30:56.408403: step: 806/464, loss: 0.10565490275621414 2023-01-22 11:30:57.072516: step: 808/464, loss: 0.21609793603420258 2023-01-22 11:30:57.659920: step: 810/464, loss: 0.1835024505853653 2023-01-22 11:30:58.254600: step: 812/464, loss: 1.816299319267273 2023-01-22 11:30:58.833597: step: 814/464, loss: 0.03708549588918686 2023-01-22 11:30:59.486579: step: 816/464, loss: 0.1517760008573532 2023-01-22 11:31:00.183142: step: 818/464, loss: 0.9188517928123474 2023-01-22 11:31:00.806033: step: 820/464, loss: 0.2169983685016632 2023-01-22 11:31:01.418382: step: 822/464, loss: 0.19979752600193024 2023-01-22 11:31:02.051723: step: 824/464, loss: 0.14540570974349976 2023-01-22 11:31:02.614963: step: 826/464, loss: 0.39509427547454834 2023-01-22 11:31:03.285476: step: 828/464, loss: 0.07251793146133423 2023-01-22 11:31:03.983283: step: 830/464, loss: 0.27849599719047546 2023-01-22 11:31:04.592407: step: 832/464, loss: 0.11490640789270401 2023-01-22 11:31:05.255629: step: 834/464, loss: 0.04840140417218208 2023-01-22 11:31:05.859867: step: 836/464, loss: 0.04847031831741333 2023-01-22 11:31:06.440408: step: 838/464, loss: 0.16373252868652344 2023-01-22 11:31:07.043022: step: 840/464, loss: 0.36391955614089966 2023-01-22 11:31:07.646887: step: 842/464, loss: 0.11276250332593918 2023-01-22 11:31:08.284956: step: 844/464, loss: 0.054898716509342194 2023-01-22 11:31:08.909752: step: 846/464, loss: 1.3503093719482422 2023-01-22 11:31:09.552358: step: 848/464, loss: 0.17195822298526764 2023-01-22 11:31:10.170756: step: 850/464, loss: 0.12788565456867218 2023-01-22 11:31:10.794240: step: 852/464, loss: 0.07781606167554855 2023-01-22 11:31:11.439860: step: 854/464, loss: 0.09696003794670105 2023-01-22 11:31:12.078605: step: 856/464, loss: 0.08800847083330154 2023-01-22 11:31:12.704126: step: 858/464, loss: 0.1215500682592392 2023-01-22 11:31:13.350420: step: 860/464, loss: 0.19684864580631256 2023-01-22 11:31:13.984230: step: 862/464, loss: 0.12104436010122299 2023-01-22 11:31:14.664974: step: 864/464, loss: 0.09140199422836304 2023-01-22 11:31:15.339017: step: 866/464, loss: 0.166214719414711 2023-01-22 11:31:16.035444: step: 868/464, loss: 0.18114838004112244 2023-01-22 11:31:16.704329: step: 870/464, loss: 0.10851343721151352 2023-01-22 11:31:17.378087: step: 872/464, loss: 0.12601056694984436 2023-01-22 11:31:17.958568: step: 874/464, loss: 0.12040676921606064 2023-01-22 11:31:18.624526: step: 876/464, loss: 0.6361841559410095 2023-01-22 11:31:19.198853: step: 878/464, loss: 0.14714059233665466 2023-01-22 11:31:19.829085: step: 880/464, loss: 0.09067393839359283 2023-01-22 11:31:20.410478: step: 882/464, loss: 0.022877417504787445 2023-01-22 11:31:21.064413: step: 884/464, loss: 0.05267036333680153 2023-01-22 11:31:21.757239: step: 886/464, loss: 0.11166580766439438 2023-01-22 11:31:22.459761: step: 888/464, loss: 0.12089953571557999 2023-01-22 11:31:23.023125: step: 890/464, loss: 0.12612242996692657 2023-01-22 11:31:23.693327: step: 892/464, loss: 0.06378553807735443 2023-01-22 11:31:24.260135: step: 894/464, loss: 0.07771226018667221 2023-01-22 11:31:24.924539: step: 896/464, loss: 0.18652714788913727 2023-01-22 11:31:25.580725: step: 898/464, loss: 0.1391475349664688 2023-01-22 11:31:26.208631: step: 900/464, loss: 0.0657849982380867 2023-01-22 11:31:26.806082: step: 902/464, loss: 0.05659789219498634 2023-01-22 11:31:27.419941: step: 904/464, loss: 0.1160811260342598 2023-01-22 11:31:28.042962: step: 906/464, loss: 0.17878614366054535 2023-01-22 11:31:28.672578: step: 908/464, loss: 0.050274789333343506 2023-01-22 11:31:29.260403: step: 910/464, loss: 0.08825455605983734 2023-01-22 11:31:29.877610: step: 912/464, loss: 1.2888163328170776 2023-01-22 11:31:30.518414: step: 914/464, loss: 0.2394239753484726 2023-01-22 11:31:31.070651: step: 916/464, loss: 0.1337050199508667 2023-01-22 11:31:31.715947: step: 918/464, loss: 0.11263155937194824 2023-01-22 11:31:32.408221: step: 920/464, loss: 0.2214917689561844 2023-01-22 11:31:33.050023: step: 922/464, loss: 0.12152550369501114 2023-01-22 11:31:33.655349: step: 924/464, loss: 0.27968689799308777 2023-01-22 11:31:34.283795: step: 926/464, loss: 1.3776767253875732 2023-01-22 11:31:34.999214: step: 928/464, loss: 0.6834884881973267 2023-01-22 11:31:35.537818: step: 930/464, loss: 0.16125810146331787 ================================================== Loss: 0.226 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.309141674962668, 'r': 0.3367121848739496, 'f1': 0.3223384585441806}, 'combined': 0.23751254840097516, 'epoch': 14} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29492554833368473, 'r': 0.31010553979203614, 'f1': 0.30232511406248685}, 'combined': 0.19737287239312612, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30423644894004115, 'r': 0.34753385628444927, 'f1': 0.3244470190644903}, 'combined': 0.23906622457383495, 'epoch': 14} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.31388885588886273, 'r': 0.31475356348084305, 'f1': 0.31432061497536734}, 'combined': 0.20520413205645743, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3215276761799725, 'r': 0.3520331483033096, 'f1': 0.33608961803594956}, 'combined': 0.24764498171069965, 'epoch': 14} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3050538704813727, 'r': 0.3061743622003126, 'f1': 0.3056130893090196}, 'combined': 0.19951942618101798, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26996527777777773, 'r': 0.3702380952380952, 'f1': 0.3122489959839357}, 'combined': 0.20816599732262378, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2556818181818182, 'r': 0.4891304347826087, 'f1': 0.3358208955223881}, 'combined': 0.16791044776119404, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4605263157894737, 'r': 0.3017241379310345, 'f1': 0.3645833333333333}, 'combined': 0.24305555555555552, 'epoch': 14} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3215276761799725, 'r': 0.3520331483033096, 'f1': 0.33608961803594956}, 'combined': 0.24764498171069965, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3050538704813727, 'r': 0.3061743622003126, 'f1': 0.3056130893090196}, 'combined': 0.19951942618101798, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4605263157894737, 'r': 0.3017241379310345, 'f1': 0.3645833333333333}, 'combined': 0.24305555555555552, 'epoch': 14} ****************************** Epoch: 15 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:34:25.455442: step: 2/464, loss: 0.23376545310020447 2023-01-22 11:34:26.106941: step: 4/464, loss: 0.14027710258960724 2023-01-22 11:34:26.768756: step: 6/464, loss: 0.1160091981291771 2023-01-22 11:34:27.378858: step: 8/464, loss: 0.05574905499815941 2023-01-22 11:34:27.965583: step: 10/464, loss: 0.20945367217063904 2023-01-22 11:34:28.570684: step: 12/464, loss: 0.04651932418346405 2023-01-22 11:34:29.233127: step: 14/464, loss: 0.07678551226854324 2023-01-22 11:34:29.953642: step: 16/464, loss: 0.03442936763167381 2023-01-22 11:34:30.722401: step: 18/464, loss: 0.12439695000648499 2023-01-22 11:34:31.355507: step: 20/464, loss: 0.1926797479391098 2023-01-22 11:34:32.003276: step: 22/464, loss: 0.05314495787024498 2023-01-22 11:34:32.625279: step: 24/464, loss: 0.13110500574111938 2023-01-22 11:34:33.233648: step: 26/464, loss: 0.024646729230880737 2023-01-22 11:34:33.845124: step: 28/464, loss: 0.08958971500396729 2023-01-22 11:34:34.414946: step: 30/464, loss: 0.2605423629283905 2023-01-22 11:34:35.056422: step: 32/464, loss: 0.23618650436401367 2023-01-22 11:34:35.638095: step: 34/464, loss: 0.018489956855773926 2023-01-22 11:34:36.240115: step: 36/464, loss: 0.09872162342071533 2023-01-22 11:34:36.893624: step: 38/464, loss: 0.1872928887605667 2023-01-22 11:34:37.552640: step: 40/464, loss: 0.29993316531181335 2023-01-22 11:34:38.148329: step: 42/464, loss: 0.11506807804107666 2023-01-22 11:34:38.738832: step: 44/464, loss: 0.07895046472549438 2023-01-22 11:34:39.399336: step: 46/464, loss: 0.13309316337108612 2023-01-22 11:34:39.993399: step: 48/464, loss: 0.23003296554088593 2023-01-22 11:34:40.699515: step: 50/464, loss: 0.05665964633226395 2023-01-22 11:34:41.255120: step: 52/464, loss: 0.04237835109233856 2023-01-22 11:34:41.860464: step: 54/464, loss: 0.10793208330869675 2023-01-22 11:34:42.586654: step: 56/464, loss: 0.08486316353082657 2023-01-22 11:34:43.274438: step: 58/464, loss: 0.07201363146305084 2023-01-22 11:34:43.926622: step: 60/464, loss: 0.017657579854130745 2023-01-22 11:34:44.540914: step: 62/464, loss: 0.07392582297325134 2023-01-22 11:34:45.156392: step: 64/464, loss: 1.4638862609863281 2023-01-22 11:34:45.812870: step: 66/464, loss: 0.18318338692188263 2023-01-22 11:34:46.399103: step: 68/464, loss: 0.24032913148403168 2023-01-22 11:34:47.016679: step: 70/464, loss: 0.0510554239153862 2023-01-22 11:34:47.636824: step: 72/464, loss: 0.07235153019428253 2023-01-22 11:34:48.327741: step: 74/464, loss: 0.2249719351530075 2023-01-22 11:34:48.965714: step: 76/464, loss: 0.12286219000816345 2023-01-22 11:34:49.605441: step: 78/464, loss: 0.40949761867523193 2023-01-22 11:34:50.255839: step: 80/464, loss: 0.17456267774105072 2023-01-22 11:34:50.887649: step: 82/464, loss: 0.03343282639980316 2023-01-22 11:34:51.452213: step: 84/464, loss: 0.10452353209257126 2023-01-22 11:34:52.054935: step: 86/464, loss: 1.240683674812317 2023-01-22 11:34:52.712011: step: 88/464, loss: 0.14128847420215607 2023-01-22 11:34:53.288335: step: 90/464, loss: 0.13347876071929932 2023-01-22 11:34:53.856647: step: 92/464, loss: 0.1920931339263916 2023-01-22 11:34:54.444818: step: 94/464, loss: 0.02162131667137146 2023-01-22 11:34:55.059068: step: 96/464, loss: 0.019855773076415062 2023-01-22 11:34:55.600423: step: 98/464, loss: 0.017085513100028038 2023-01-22 11:34:56.212338: step: 100/464, loss: 0.12481603771448135 2023-01-22 11:34:56.804521: step: 102/464, loss: 0.007431838195770979 2023-01-22 11:34:57.490635: step: 104/464, loss: 0.08557116985321045 2023-01-22 11:34:58.099652: step: 106/464, loss: 0.044302843511104584 2023-01-22 11:34:58.760683: step: 108/464, loss: 0.2067716419696808 2023-01-22 11:34:59.441774: step: 110/464, loss: 0.13873068988323212 2023-01-22 11:35:00.107557: step: 112/464, loss: 0.1558220535516739 2023-01-22 11:35:00.761498: step: 114/464, loss: 0.052157383412122726 2023-01-22 11:35:01.359704: step: 116/464, loss: 0.0652746856212616 2023-01-22 11:35:02.047282: step: 118/464, loss: 0.02936823107302189 2023-01-22 11:35:02.624672: step: 120/464, loss: 0.055115893483161926 2023-01-22 11:35:03.251234: step: 122/464, loss: 0.19929994642734528 2023-01-22 11:35:03.810343: step: 124/464, loss: 0.06124817207455635 2023-01-22 11:35:04.527641: step: 126/464, loss: 0.6890258193016052 2023-01-22 11:35:05.130226: step: 128/464, loss: 0.08555949479341507 2023-01-22 11:35:05.760720: step: 130/464, loss: 0.046023499220609665 2023-01-22 11:35:06.458643: step: 132/464, loss: 0.1614185869693756 2023-01-22 11:35:07.078259: step: 134/464, loss: 0.4741933047771454 2023-01-22 11:35:07.721790: step: 136/464, loss: 0.09928226470947266 2023-01-22 11:35:08.373563: step: 138/464, loss: 0.06665418297052383 2023-01-22 11:35:08.907677: step: 140/464, loss: 0.022180533036589622 2023-01-22 11:35:09.578621: step: 142/464, loss: 0.04915543273091316 2023-01-22 11:35:10.254678: step: 144/464, loss: 0.29495981335639954 2023-01-22 11:35:10.854201: step: 146/464, loss: 0.05937875807285309 2023-01-22 11:35:11.467173: step: 148/464, loss: 0.0950523167848587 2023-01-22 11:35:12.081598: step: 150/464, loss: 0.15129932761192322 2023-01-22 11:35:12.691618: step: 152/464, loss: 0.17876113951206207 2023-01-22 11:35:13.285775: step: 154/464, loss: 0.419171005487442 2023-01-22 11:35:13.925289: step: 156/464, loss: 0.0766398161649704 2023-01-22 11:35:14.543965: step: 158/464, loss: 0.3040328919887543 2023-01-22 11:35:15.118349: step: 160/464, loss: 0.13785137236118317 2023-01-22 11:35:15.731611: step: 162/464, loss: 0.05590350180864334 2023-01-22 11:35:16.310124: step: 164/464, loss: 0.22143127024173737 2023-01-22 11:35:16.952958: step: 166/464, loss: 0.24892783164978027 2023-01-22 11:35:17.553731: step: 168/464, loss: 0.024986477568745613 2023-01-22 11:35:18.216754: step: 170/464, loss: 0.04494039714336395 2023-01-22 11:35:18.789161: step: 172/464, loss: 0.12915894389152527 2023-01-22 11:35:19.471869: step: 174/464, loss: 0.061370573937892914 2023-01-22 11:35:20.135644: step: 176/464, loss: 0.09692001342773438 2023-01-22 11:35:20.820263: step: 178/464, loss: 0.058332499116659164 2023-01-22 11:35:21.469174: step: 180/464, loss: 0.043565262109041214 2023-01-22 11:35:22.057589: step: 182/464, loss: 0.06048927828669548 2023-01-22 11:35:22.647675: step: 184/464, loss: 0.04548937454819679 2023-01-22 11:35:23.243345: step: 186/464, loss: 0.13006599247455597 2023-01-22 11:35:23.825273: step: 188/464, loss: 0.18524733185768127 2023-01-22 11:35:24.443462: step: 190/464, loss: 0.5778462290763855 2023-01-22 11:35:25.121754: step: 192/464, loss: 0.1461637318134308 2023-01-22 11:35:25.744416: step: 194/464, loss: 0.09309983998537064 2023-01-22 11:35:26.384609: step: 196/464, loss: 0.07581238448619843 2023-01-22 11:35:26.981722: step: 198/464, loss: 0.14314910769462585 2023-01-22 11:35:27.564159: step: 200/464, loss: 0.37534722685813904 2023-01-22 11:35:28.214574: step: 202/464, loss: 0.08187779039144516 2023-01-22 11:35:28.751586: step: 204/464, loss: 0.11266358941793442 2023-01-22 11:35:29.371380: step: 206/464, loss: 1.0839660167694092 2023-01-22 11:35:29.967095: step: 208/464, loss: 0.059994887560606 2023-01-22 11:35:30.548301: step: 210/464, loss: 0.01351864542812109 2023-01-22 11:35:31.271488: step: 212/464, loss: 0.07712341845035553 2023-01-22 11:35:31.879819: step: 214/464, loss: 0.20243489742279053 2023-01-22 11:35:32.506364: step: 216/464, loss: 0.1516103744506836 2023-01-22 11:35:33.201916: step: 218/464, loss: 0.06254269182682037 2023-01-22 11:35:33.846194: step: 220/464, loss: 0.05681382119655609 2023-01-22 11:35:34.500463: step: 222/464, loss: 0.04472225531935692 2023-01-22 11:35:35.119403: step: 224/464, loss: 0.0798860639333725 2023-01-22 11:35:35.783904: step: 226/464, loss: 0.09117922186851501 2023-01-22 11:35:36.398006: step: 228/464, loss: 0.06990282237529755 2023-01-22 11:35:37.050067: step: 230/464, loss: 0.38029158115386963 2023-01-22 11:35:37.701558: step: 232/464, loss: 0.3118628263473511 2023-01-22 11:35:38.345816: step: 234/464, loss: 0.19242215156555176 2023-01-22 11:35:39.048351: step: 236/464, loss: 0.06552839279174805 2023-01-22 11:35:39.716086: step: 238/464, loss: 0.15583792328834534 2023-01-22 11:35:40.363590: step: 240/464, loss: 0.060535915195941925 2023-01-22 11:35:40.931927: step: 242/464, loss: 0.17972353100776672 2023-01-22 11:35:41.547007: step: 244/464, loss: 0.02065899781882763 2023-01-22 11:35:42.185627: step: 246/464, loss: 0.02574816718697548 2023-01-22 11:35:42.775956: step: 248/464, loss: 0.08912333101034164 2023-01-22 11:35:43.430963: step: 250/464, loss: 0.10333779454231262 2023-01-22 11:35:44.083545: step: 252/464, loss: 0.07574377954006195 2023-01-22 11:35:44.682915: step: 254/464, loss: 0.06383416801691055 2023-01-22 11:35:45.343119: step: 256/464, loss: 0.048079632222652435 2023-01-22 11:35:45.944298: step: 258/464, loss: 0.4219626784324646 2023-01-22 11:35:46.532508: step: 260/464, loss: 0.07055892050266266 2023-01-22 11:35:47.129317: step: 262/464, loss: 0.3615981936454773 2023-01-22 11:35:47.737364: step: 264/464, loss: 0.14180190861225128 2023-01-22 11:35:48.320995: step: 266/464, loss: 0.1585061401128769 2023-01-22 11:35:48.943539: step: 268/464, loss: 0.12235391139984131 2023-01-22 11:35:49.629146: step: 270/464, loss: 0.2497805804014206 2023-01-22 11:35:50.344895: step: 272/464, loss: 0.0508241206407547 2023-01-22 11:35:50.996473: step: 274/464, loss: 0.1656421422958374 2023-01-22 11:35:51.660969: step: 276/464, loss: 0.03251105174422264 2023-01-22 11:35:52.323899: step: 278/464, loss: 0.052973296493291855 2023-01-22 11:35:52.919395: step: 280/464, loss: 0.38845095038414 2023-01-22 11:35:53.596111: step: 282/464, loss: 0.5072574615478516 2023-01-22 11:35:54.348841: step: 284/464, loss: 0.05709134787321091 2023-01-22 11:35:55.010221: step: 286/464, loss: 0.055934157222509384 2023-01-22 11:35:55.640749: step: 288/464, loss: 0.15055230259895325 2023-01-22 11:35:56.226140: step: 290/464, loss: 0.07166709005832672 2023-01-22 11:35:56.862401: step: 292/464, loss: 0.048629552125930786 2023-01-22 11:35:57.451061: step: 294/464, loss: 0.14174535870552063 2023-01-22 11:35:58.088409: step: 296/464, loss: 0.14061374962329865 2023-01-22 11:35:58.696108: step: 298/464, loss: 0.1977543979883194 2023-01-22 11:35:59.330932: step: 300/464, loss: 0.2333637773990631 2023-01-22 11:35:59.932711: step: 302/464, loss: 0.15602213144302368 2023-01-22 11:36:00.535563: step: 304/464, loss: 0.11040691286325455 2023-01-22 11:36:01.173846: step: 306/464, loss: 0.06026787310838699 2023-01-22 11:36:01.808962: step: 308/464, loss: 0.04550894722342491 2023-01-22 11:36:02.413920: step: 310/464, loss: 0.15067565441131592 2023-01-22 11:36:03.034379: step: 312/464, loss: 0.18284235894680023 2023-01-22 11:36:03.705324: step: 314/464, loss: 0.09937175363302231 2023-01-22 11:36:04.394270: step: 316/464, loss: 0.07563548535108566 2023-01-22 11:36:05.003033: step: 318/464, loss: 0.066224105656147 2023-01-22 11:36:05.592391: step: 320/464, loss: 0.14571310579776764 2023-01-22 11:36:06.185662: step: 322/464, loss: 0.0441647469997406 2023-01-22 11:36:06.774737: step: 324/464, loss: 0.061574917286634445 2023-01-22 11:36:07.413551: step: 326/464, loss: 2.2150657176971436 2023-01-22 11:36:08.097444: step: 328/464, loss: 0.07661069184541702 2023-01-22 11:36:08.766202: step: 330/464, loss: 0.1501404345035553 2023-01-22 11:36:09.346550: step: 332/464, loss: 0.10598700493574142 2023-01-22 11:36:09.983623: step: 334/464, loss: 0.17445522546768188 2023-01-22 11:36:10.589437: step: 336/464, loss: 0.018463660031557083 2023-01-22 11:36:11.234410: step: 338/464, loss: 0.22861771285533905 2023-01-22 11:36:11.928685: step: 340/464, loss: 0.43219172954559326 2023-01-22 11:36:12.546633: step: 342/464, loss: 0.05386766791343689 2023-01-22 11:36:13.143076: step: 344/464, loss: 0.08726462721824646 2023-01-22 11:36:13.841168: step: 346/464, loss: 0.18939970433712006 2023-01-22 11:36:14.483040: step: 348/464, loss: 0.062296636402606964 2023-01-22 11:36:15.068338: step: 350/464, loss: 0.11539874225854874 2023-01-22 11:36:15.694341: step: 352/464, loss: 0.23490335047245026 2023-01-22 11:36:16.372258: step: 354/464, loss: 0.09849587082862854 2023-01-22 11:36:17.051287: step: 356/464, loss: 0.14270582795143127 2023-01-22 11:36:17.639381: step: 358/464, loss: 0.11370982229709625 2023-01-22 11:36:18.471126: step: 360/464, loss: 0.17099055647850037 2023-01-22 11:36:19.029826: step: 362/464, loss: 0.04839561507105827 2023-01-22 11:36:19.653418: step: 364/464, loss: 0.22834891080856323 2023-01-22 11:36:20.225518: step: 366/464, loss: 0.1289820820093155 2023-01-22 11:36:20.848199: step: 368/464, loss: 0.07608374953269958 2023-01-22 11:36:21.466395: step: 370/464, loss: 0.10896164923906326 2023-01-22 11:36:22.057663: step: 372/464, loss: 0.08024145662784576 2023-01-22 11:36:22.720262: step: 374/464, loss: 0.23710879683494568 2023-01-22 11:36:23.268230: step: 376/464, loss: 0.17971497774124146 2023-01-22 11:36:23.839338: step: 378/464, loss: 0.020017314702272415 2023-01-22 11:36:24.386557: step: 380/464, loss: 0.06661124527454376 2023-01-22 11:36:25.010541: step: 382/464, loss: 0.10413400828838348 2023-01-22 11:36:25.629305: step: 384/464, loss: 0.495614618062973 2023-01-22 11:36:26.210300: step: 386/464, loss: 0.06679220497608185 2023-01-22 11:36:26.815349: step: 388/464, loss: 0.5594455599784851 2023-01-22 11:36:27.413186: step: 390/464, loss: 0.0512617863714695 2023-01-22 11:36:28.035264: step: 392/464, loss: 0.04496656358242035 2023-01-22 11:36:28.647772: step: 394/464, loss: 0.04869386553764343 2023-01-22 11:36:29.302447: step: 396/464, loss: 0.16523469984531403 2023-01-22 11:36:29.922512: step: 398/464, loss: 0.36372125148773193 2023-01-22 11:36:30.577160: step: 400/464, loss: 0.0999399796128273 2023-01-22 11:36:31.201975: step: 402/464, loss: 0.07038531452417374 2023-01-22 11:36:31.780183: step: 404/464, loss: 0.06395240873098373 2023-01-22 11:36:32.433940: step: 406/464, loss: 0.15672455728054047 2023-01-22 11:36:33.061222: step: 408/464, loss: 0.564751923084259 2023-01-22 11:36:33.734891: step: 410/464, loss: 0.11285138130187988 2023-01-22 11:36:34.415902: step: 412/464, loss: 0.0786018893122673 2023-01-22 11:36:35.039501: step: 414/464, loss: 0.05017438158392906 2023-01-22 11:36:35.647399: step: 416/464, loss: 0.2782638669013977 2023-01-22 11:36:36.225945: step: 418/464, loss: 0.5186176300048828 2023-01-22 11:36:36.719639: step: 420/464, loss: 0.10362860560417175 2023-01-22 11:36:37.327494: step: 422/464, loss: 0.07758516073226929 2023-01-22 11:36:37.981950: step: 424/464, loss: 0.12924860417842865 2023-01-22 11:36:38.703736: step: 426/464, loss: 0.33915987610816956 2023-01-22 11:36:39.339657: step: 428/464, loss: 0.05942815542221069 2023-01-22 11:36:39.987296: step: 430/464, loss: 0.35485219955444336 2023-01-22 11:36:40.649629: step: 432/464, loss: 0.11021588742733002 2023-01-22 11:36:41.187757: step: 434/464, loss: 0.4730512499809265 2023-01-22 11:36:41.789536: step: 436/464, loss: 0.12729057669639587 2023-01-22 11:36:42.383271: step: 438/464, loss: 0.09083368629217148 2023-01-22 11:36:43.025163: step: 440/464, loss: 0.08375909924507141 2023-01-22 11:36:43.608983: step: 442/464, loss: 0.14418822526931763 2023-01-22 11:36:44.226930: step: 444/464, loss: 1.2189446687698364 2023-01-22 11:36:44.861472: step: 446/464, loss: 0.046907439827919006 2023-01-22 11:36:45.516684: step: 448/464, loss: 0.1263635754585266 2023-01-22 11:36:46.108008: step: 450/464, loss: 0.07947744429111481 2023-01-22 11:36:46.797185: step: 452/464, loss: 0.4060099124908447 2023-01-22 11:36:47.433127: step: 454/464, loss: 0.05249390751123428 2023-01-22 11:36:48.032962: step: 456/464, loss: 0.06890036165714264 2023-01-22 11:36:48.649908: step: 458/464, loss: 0.1694265455007553 2023-01-22 11:36:49.277857: step: 460/464, loss: 0.1672012060880661 2023-01-22 11:36:49.903700: step: 462/464, loss: 0.12467416375875473 2023-01-22 11:36:50.499138: step: 464/464, loss: 0.2216804176568985 2023-01-22 11:36:51.189041: step: 466/464, loss: 0.02688206359744072 2023-01-22 11:36:51.787001: step: 468/464, loss: 0.15295109152793884 2023-01-22 11:36:52.437459: step: 470/464, loss: 0.17021578550338745 2023-01-22 11:36:53.032371: step: 472/464, loss: 0.08870581537485123 2023-01-22 11:36:53.642965: step: 474/464, loss: 0.057112276554107666 2023-01-22 11:36:54.262322: step: 476/464, loss: 0.11644468456506729 2023-01-22 11:36:54.892653: step: 478/464, loss: 0.10197106748819351 2023-01-22 11:36:55.526225: step: 480/464, loss: 0.345088928937912 2023-01-22 11:36:56.129982: step: 482/464, loss: 0.15798737108707428 2023-01-22 11:36:56.724114: step: 484/464, loss: 0.45416873693466187 2023-01-22 11:36:57.403923: step: 486/464, loss: 0.2126789689064026 2023-01-22 11:36:58.029317: step: 488/464, loss: 0.09349019825458527 2023-01-22 11:36:58.758757: step: 490/464, loss: 0.1536969393491745 2023-01-22 11:36:59.327869: step: 492/464, loss: 0.1000894233584404 2023-01-22 11:36:59.894960: step: 494/464, loss: 0.08661019057035446 2023-01-22 11:37:00.517241: step: 496/464, loss: 0.034136559814214706 2023-01-22 11:37:01.188267: step: 498/464, loss: 0.9338065385818481 2023-01-22 11:37:01.785016: step: 500/464, loss: 0.047420501708984375 2023-01-22 11:37:02.396325: step: 502/464, loss: 0.09700897336006165 2023-01-22 11:37:03.040475: step: 504/464, loss: 0.3427742123603821 2023-01-22 11:37:03.718175: step: 506/464, loss: 0.2460949569940567 2023-01-22 11:37:04.336130: step: 508/464, loss: 0.057013846933841705 2023-01-22 11:37:04.914170: step: 510/464, loss: 0.14210188388824463 2023-01-22 11:37:05.518408: step: 512/464, loss: 0.036092836409807205 2023-01-22 11:37:06.133688: step: 514/464, loss: 0.23894937336444855 2023-01-22 11:37:06.787345: step: 516/464, loss: 0.017120173200964928 2023-01-22 11:37:07.436959: step: 518/464, loss: 0.08431785553693771 2023-01-22 11:37:08.006180: step: 520/464, loss: 0.5087003111839294 2023-01-22 11:37:08.631040: step: 522/464, loss: 0.060391779989004135 2023-01-22 11:37:09.262180: step: 524/464, loss: 0.11891240626573563 2023-01-22 11:37:09.949130: step: 526/464, loss: 0.3534661829471588 2023-01-22 11:37:10.561861: step: 528/464, loss: 0.21036121249198914 2023-01-22 11:37:11.151439: step: 530/464, loss: 0.16232508420944214 2023-01-22 11:37:11.769434: step: 532/464, loss: 0.1413215547800064 2023-01-22 11:37:12.454000: step: 534/464, loss: 0.04500097036361694 2023-01-22 11:37:13.106838: step: 536/464, loss: 0.0697244256734848 2023-01-22 11:37:13.815806: step: 538/464, loss: 0.12187471985816956 2023-01-22 11:37:14.565653: step: 540/464, loss: 0.3152616620063782 2023-01-22 11:37:15.187048: step: 542/464, loss: 0.18337717652320862 2023-01-22 11:37:15.887524: step: 544/464, loss: 0.22301732003688812 2023-01-22 11:37:16.488480: step: 546/464, loss: 0.07435160130262375 2023-01-22 11:37:17.166335: step: 548/464, loss: 0.04245968163013458 2023-01-22 11:37:17.893079: step: 550/464, loss: 0.08916405588388443 2023-01-22 11:37:18.524462: step: 552/464, loss: 0.01657041907310486 2023-01-22 11:37:19.151761: step: 554/464, loss: 0.471982479095459 2023-01-22 11:37:19.873104: step: 556/464, loss: 0.1751091629266739 2023-01-22 11:37:20.512378: step: 558/464, loss: 0.10122612118721008 2023-01-22 11:37:21.118421: step: 560/464, loss: 0.11630212515592575 2023-01-22 11:37:21.759873: step: 562/464, loss: 0.014227952808141708 2023-01-22 11:37:22.352804: step: 564/464, loss: 0.20856709778308868 2023-01-22 11:37:22.983260: step: 566/464, loss: 0.11056730896234512 2023-01-22 11:37:23.615149: step: 568/464, loss: 0.0749744102358818 2023-01-22 11:37:24.277460: step: 570/464, loss: 0.04471934586763382 2023-01-22 11:37:24.902392: step: 572/464, loss: 0.15633288025856018 2023-01-22 11:37:25.553451: step: 574/464, loss: 0.11412084847688675 2023-01-22 11:37:26.194315: step: 576/464, loss: 0.028770413249731064 2023-01-22 11:37:26.836945: step: 578/464, loss: 0.17178964614868164 2023-01-22 11:37:27.431410: step: 580/464, loss: 0.07429475337266922 2023-01-22 11:37:28.034329: step: 582/464, loss: 0.14217743277549744 2023-01-22 11:37:28.621605: step: 584/464, loss: 0.10671277344226837 2023-01-22 11:37:29.261570: step: 586/464, loss: 0.31617486476898193 2023-01-22 11:37:29.836771: step: 588/464, loss: 0.2289099395275116 2023-01-22 11:37:30.512371: step: 590/464, loss: 0.2649511396884918 2023-01-22 11:37:31.115306: step: 592/464, loss: 0.2369571179151535 2023-01-22 11:37:31.739261: step: 594/464, loss: 0.26038309931755066 2023-01-22 11:37:32.476865: step: 596/464, loss: 0.41762369871139526 2023-01-22 11:37:33.125792: step: 598/464, loss: 0.18469442427158356 2023-01-22 11:37:33.854049: step: 600/464, loss: 0.032377939671278 2023-01-22 11:37:34.554488: step: 602/464, loss: 1.2633156776428223 2023-01-22 11:37:35.183977: step: 604/464, loss: 0.0722263976931572 2023-01-22 11:37:35.798783: step: 606/464, loss: 0.18451766669750214 2023-01-22 11:37:36.361273: step: 608/464, loss: 0.246455579996109 2023-01-22 11:37:36.978457: step: 610/464, loss: 0.15055091679096222 2023-01-22 11:37:37.629970: step: 612/464, loss: 0.082424096763134 2023-01-22 11:37:38.203061: step: 614/464, loss: 0.029938578605651855 2023-01-22 11:37:38.873235: step: 616/464, loss: 1.3979496955871582 2023-01-22 11:37:39.514447: step: 618/464, loss: 0.07138173282146454 2023-01-22 11:37:40.123724: step: 620/464, loss: 0.2069871574640274 2023-01-22 11:37:40.792207: step: 622/464, loss: 0.42995592951774597 2023-01-22 11:37:41.376870: step: 624/464, loss: 0.1266365945339203 2023-01-22 11:37:41.974038: step: 626/464, loss: 0.1062287762761116 2023-01-22 11:37:42.620067: step: 628/464, loss: 0.16082951426506042 2023-01-22 11:37:43.271905: step: 630/464, loss: 0.08530285209417343 2023-01-22 11:37:43.844546: step: 632/464, loss: 0.15649454295635223 2023-01-22 11:37:44.474298: step: 634/464, loss: 0.16694426536560059 2023-01-22 11:37:45.097938: step: 636/464, loss: 0.1181652843952179 2023-01-22 11:37:45.848742: step: 638/464, loss: 0.16633333265781403 2023-01-22 11:37:46.577257: step: 640/464, loss: 0.03431246429681778 2023-01-22 11:37:47.223434: step: 642/464, loss: 0.05832459032535553 2023-01-22 11:37:47.862482: step: 644/464, loss: 0.04486502707004547 2023-01-22 11:37:48.490430: step: 646/464, loss: 0.25312352180480957 2023-01-22 11:37:49.127046: step: 648/464, loss: 0.1561816781759262 2023-01-22 11:37:49.842063: step: 650/464, loss: 0.04630190134048462 2023-01-22 11:37:50.474987: step: 652/464, loss: 0.2901785969734192 2023-01-22 11:37:51.097735: step: 654/464, loss: 0.058462291955947876 2023-01-22 11:37:51.689978: step: 656/464, loss: 0.21496911346912384 2023-01-22 11:37:52.314142: step: 658/464, loss: 0.09897728264331818 2023-01-22 11:37:52.912487: step: 660/464, loss: 0.1999654620885849 2023-01-22 11:37:53.609550: step: 662/464, loss: 0.060551948845386505 2023-01-22 11:37:54.229820: step: 664/464, loss: 0.06704603880643845 2023-01-22 11:37:54.877151: step: 666/464, loss: 0.16565890610218048 2023-01-22 11:37:55.503246: step: 668/464, loss: 0.11661874502897263 2023-01-22 11:37:56.212625: step: 670/464, loss: 0.4740934371948242 2023-01-22 11:37:56.825176: step: 672/464, loss: 0.25017955899238586 2023-01-22 11:37:57.544922: step: 674/464, loss: 0.25479915738105774 2023-01-22 11:37:58.154804: step: 676/464, loss: 0.052261002361774445 2023-01-22 11:37:58.782705: step: 678/464, loss: 0.2774779796600342 2023-01-22 11:37:59.380016: step: 680/464, loss: 0.03868470713496208 2023-01-22 11:37:59.950597: step: 682/464, loss: 0.20814983546733856 2023-01-22 11:38:00.543258: step: 684/464, loss: 0.07299919426441193 2023-01-22 11:38:01.195317: step: 686/464, loss: 0.12103313952684402 2023-01-22 11:38:01.815646: step: 688/464, loss: 0.1205439493060112 2023-01-22 11:38:02.464792: step: 690/464, loss: 0.06843750178813934 2023-01-22 11:38:03.157090: step: 692/464, loss: 0.0651388168334961 2023-01-22 11:38:03.795798: step: 694/464, loss: 0.06578768789768219 2023-01-22 11:38:04.426549: step: 696/464, loss: 0.09666206687688828 2023-01-22 11:38:04.962205: step: 698/464, loss: 0.06669321656227112 2023-01-22 11:38:05.545601: step: 700/464, loss: 0.05347609147429466 2023-01-22 11:38:06.142157: step: 702/464, loss: 0.18711988627910614 2023-01-22 11:38:06.716474: step: 704/464, loss: 0.09071377664804459 2023-01-22 11:38:07.310246: step: 706/464, loss: 0.37969571352005005 2023-01-22 11:38:07.948020: step: 708/464, loss: 0.05282412841916084 2023-01-22 11:38:08.555821: step: 710/464, loss: 0.1803780049085617 2023-01-22 11:38:09.173767: step: 712/464, loss: 0.21614904701709747 2023-01-22 11:38:09.792839: step: 714/464, loss: 0.12261205166578293 2023-01-22 11:38:10.531136: step: 716/464, loss: 0.09753377735614777 2023-01-22 11:38:11.162627: step: 718/464, loss: 0.1277727484703064 2023-01-22 11:38:11.776009: step: 720/464, loss: 0.19646309316158295 2023-01-22 11:38:12.443257: step: 722/464, loss: 0.13632582128047943 2023-01-22 11:38:13.023202: step: 724/464, loss: 0.05022649094462395 2023-01-22 11:38:13.691821: step: 726/464, loss: 0.4702479839324951 2023-01-22 11:38:14.299923: step: 728/464, loss: 0.12363642454147339 2023-01-22 11:38:14.963577: step: 730/464, loss: 0.6109817028045654 2023-01-22 11:38:15.547374: step: 732/464, loss: 0.32869744300842285 2023-01-22 11:38:16.188536: step: 734/464, loss: 0.07312636077404022 2023-01-22 11:38:16.749195: step: 736/464, loss: 0.08416087180376053 2023-01-22 11:38:17.393599: step: 738/464, loss: 0.27036938071250916 2023-01-22 11:38:17.973073: step: 740/464, loss: 0.09116753190755844 2023-01-22 11:38:18.623625: step: 742/464, loss: 0.13980446755886078 2023-01-22 11:38:19.207199: step: 744/464, loss: 0.07982996106147766 2023-01-22 11:38:19.815829: step: 746/464, loss: 0.01018874254077673 2023-01-22 11:38:20.421243: step: 748/464, loss: 0.4726656675338745 2023-01-22 11:38:21.021158: step: 750/464, loss: 0.45306769013404846 2023-01-22 11:38:21.669550: step: 752/464, loss: 0.14255671203136444 2023-01-22 11:38:22.389224: step: 754/464, loss: 0.17154064774513245 2023-01-22 11:38:23.007926: step: 756/464, loss: 0.1206292062997818 2023-01-22 11:38:23.744377: step: 758/464, loss: 0.4320429861545563 2023-01-22 11:38:24.374390: step: 760/464, loss: 0.09046813100576401 2023-01-22 11:38:25.001149: step: 762/464, loss: 0.11342841386795044 2023-01-22 11:38:25.607182: step: 764/464, loss: 0.12874212861061096 2023-01-22 11:38:26.235386: step: 766/464, loss: 0.17379102110862732 2023-01-22 11:38:26.841332: step: 768/464, loss: 0.08714067935943604 2023-01-22 11:38:27.483469: step: 770/464, loss: 0.2055840790271759 2023-01-22 11:38:28.090771: step: 772/464, loss: 0.24172013998031616 2023-01-22 11:38:28.683152: step: 774/464, loss: 0.07567917555570602 2023-01-22 11:38:29.303534: step: 776/464, loss: 0.4996664226055145 2023-01-22 11:38:29.908626: step: 778/464, loss: 0.1334775984287262 2023-01-22 11:38:30.477490: step: 780/464, loss: 0.05684034898877144 2023-01-22 11:38:31.124072: step: 782/464, loss: 0.2430305927991867 2023-01-22 11:38:31.846724: step: 784/464, loss: 0.15896043181419373 2023-01-22 11:38:32.524087: step: 786/464, loss: 0.7327299118041992 2023-01-22 11:38:33.208620: step: 788/464, loss: 0.4628361463546753 2023-01-22 11:38:33.877736: step: 790/464, loss: 1.0212870836257935 2023-01-22 11:38:34.539361: step: 792/464, loss: 0.07481864094734192 2023-01-22 11:38:35.164515: step: 794/464, loss: 0.2146650105714798 2023-01-22 11:38:35.874157: step: 796/464, loss: 3.0680768489837646 2023-01-22 11:38:36.476607: step: 798/464, loss: 0.14101924002170563 2023-01-22 11:38:37.070777: step: 800/464, loss: 0.10189759731292725 2023-01-22 11:38:37.710398: step: 802/464, loss: 0.4617618918418884 2023-01-22 11:38:38.407673: step: 804/464, loss: 0.32869577407836914 2023-01-22 11:38:39.005651: step: 806/464, loss: 0.11678659170866013 2023-01-22 11:38:39.642236: step: 808/464, loss: 0.4081166088581085 2023-01-22 11:38:40.266056: step: 810/464, loss: 0.07252360880374908 2023-01-22 11:38:40.875580: step: 812/464, loss: 0.08252737671136856 2023-01-22 11:38:41.538085: step: 814/464, loss: 0.16961263120174408 2023-01-22 11:38:42.162517: step: 816/464, loss: 0.015630565583705902 2023-01-22 11:38:42.796277: step: 818/464, loss: 0.097167007625103 2023-01-22 11:38:43.394605: step: 820/464, loss: 0.06768547743558884 2023-01-22 11:38:44.051731: step: 822/464, loss: 0.15056432783603668 2023-01-22 11:38:44.706740: step: 824/464, loss: 0.09026777744293213 2023-01-22 11:38:45.306488: step: 826/464, loss: 0.13992241024971008 2023-01-22 11:38:45.895169: step: 828/464, loss: 0.31633102893829346 2023-01-22 11:38:46.465300: step: 830/464, loss: 0.25410526990890503 2023-01-22 11:38:47.037933: step: 832/464, loss: 0.008967715315520763 2023-01-22 11:38:47.713531: step: 834/464, loss: 0.10733868181705475 2023-01-22 11:38:48.340112: step: 836/464, loss: 0.10406633466482162 2023-01-22 11:38:48.951195: step: 838/464, loss: 0.09029827266931534 2023-01-22 11:38:49.554989: step: 840/464, loss: 0.24169792234897614 2023-01-22 11:38:50.132532: step: 842/464, loss: 0.13256599009037018 2023-01-22 11:38:50.761254: step: 844/464, loss: 0.11500195413827896 2023-01-22 11:38:51.328763: step: 846/464, loss: 0.03509023040533066 2023-01-22 11:38:51.962181: step: 848/464, loss: 0.11999034881591797 2023-01-22 11:38:52.614342: step: 850/464, loss: 0.04450235888361931 2023-01-22 11:38:53.231535: step: 852/464, loss: 0.10161370784044266 2023-01-22 11:38:53.871111: step: 854/464, loss: 0.12068932503461838 2023-01-22 11:38:54.457404: step: 856/464, loss: 0.07015305757522583 2023-01-22 11:38:55.065500: step: 858/464, loss: 0.10911504924297333 2023-01-22 11:38:55.667482: step: 860/464, loss: 0.05616682395339012 2023-01-22 11:38:56.295935: step: 862/464, loss: 1.5696333646774292 2023-01-22 11:38:56.943099: step: 864/464, loss: 0.16139520704746246 2023-01-22 11:38:57.563472: step: 866/464, loss: 0.4261089563369751 2023-01-22 11:38:58.219677: step: 868/464, loss: 0.07795168459415436 2023-01-22 11:38:58.797512: step: 870/464, loss: 0.0567050464451313 2023-01-22 11:38:59.398756: step: 872/464, loss: 0.04633583500981331 2023-01-22 11:39:00.092879: step: 874/464, loss: 0.12251248210668564 2023-01-22 11:39:00.683658: step: 876/464, loss: 0.06280695647001266 2023-01-22 11:39:01.286993: step: 878/464, loss: 0.15962255001068115 2023-01-22 11:39:01.917329: step: 880/464, loss: 0.0784224271774292 2023-01-22 11:39:02.618380: step: 882/464, loss: 0.2128182202577591 2023-01-22 11:39:03.183304: step: 884/464, loss: 0.022529419511556625 2023-01-22 11:39:03.844227: step: 886/464, loss: 0.3284769058227539 2023-01-22 11:39:04.476565: step: 888/464, loss: 0.09700017422437668 2023-01-22 11:39:05.157256: step: 890/464, loss: 0.08831659704446793 2023-01-22 11:39:05.798454: step: 892/464, loss: 0.08953232318162918 2023-01-22 11:39:06.400871: step: 894/464, loss: 0.5294684767723083 2023-01-22 11:39:07.023005: step: 896/464, loss: 0.13867852091789246 2023-01-22 11:39:07.642719: step: 898/464, loss: 3.1478331089019775 2023-01-22 11:39:08.261025: step: 900/464, loss: 0.1969163417816162 2023-01-22 11:39:08.860110: step: 902/464, loss: 0.07681287080049515 2023-01-22 11:39:09.548745: step: 904/464, loss: 0.040397848933935165 2023-01-22 11:39:10.206194: step: 906/464, loss: 0.7580767273902893 2023-01-22 11:39:10.851990: step: 908/464, loss: 0.049065783619880676 2023-01-22 11:39:11.527393: step: 910/464, loss: 0.09030801057815552 2023-01-22 11:39:12.200252: step: 912/464, loss: 0.12545651197433472 2023-01-22 11:39:12.844826: step: 914/464, loss: 0.04713783785700798 2023-01-22 11:39:13.501492: step: 916/464, loss: 0.09222618490457535 2023-01-22 11:39:14.126405: step: 918/464, loss: 0.02281993441283703 2023-01-22 11:39:14.760800: step: 920/464, loss: 0.11207670718431473 2023-01-22 11:39:15.377197: step: 922/464, loss: 0.07248653471469879 2023-01-22 11:39:16.079302: step: 924/464, loss: 0.10601303726434708 2023-01-22 11:39:16.716468: step: 926/464, loss: 0.05023236200213432 2023-01-22 11:39:17.455908: step: 928/464, loss: 0.1637064516544342 2023-01-22 11:39:17.951642: step: 930/464, loss: 0.008865215815603733 ================================================== Loss: 0.189 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2994631280388979, 'r': 0.35060483870967746, 'f1': 0.32302229020979023}, 'combined': 0.2380164243651086, 'epoch': 15} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2960034243201015, 'r': 0.31501281854249336, 'f1': 0.3052124197433935}, 'combined': 0.1992578491588994, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.294438586400818, 'r': 0.36427696078431376, 'f1': 0.32565556969182924}, 'combined': 0.2399567355624005, 'epoch': 15} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3082518567091595, 'r': 0.30995021404640005, 'f1': 0.3090987024693495}, 'combined': 0.20179500782973075, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30348185622563634, 'r': 0.35876507861209, 'f1': 0.32881599378881987}, 'combined': 0.24228546910755147, 'epoch': 15} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3036013025000908, 'r': 0.30415836911018274, 'f1': 0.3038795805042156}, 'combined': 0.19838770540689724, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2275132275132275, 'r': 0.4095238095238095, 'f1': 0.2925170068027211}, 'combined': 0.1950113378684807, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.20625, 'r': 0.358695652173913, 'f1': 0.26190476190476186}, 'combined': 0.13095238095238093, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.34210526315789475, 'r': 0.22413793103448276, 'f1': 0.2708333333333333}, 'combined': 0.18055555555555552, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3215276761799725, 'r': 0.3520331483033096, 'f1': 0.33608961803594956}, 'combined': 0.24764498171069965, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3050538704813727, 'r': 0.3061743622003126, 'f1': 0.3056130893090196}, 'combined': 0.19951942618101798, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4605263157894737, 'r': 0.3017241379310345, 'f1': 0.3645833333333333}, 'combined': 0.24305555555555552, 'epoch': 14} ****************************** Epoch: 16 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:41:57.105007: step: 2/464, loss: 0.06967566907405853 2023-01-22 11:41:57.711450: step: 4/464, loss: 0.0738082230091095 2023-01-22 11:41:58.344183: step: 6/464, loss: 0.03507356718182564 2023-01-22 11:41:58.962427: step: 8/464, loss: 0.3681235909461975 2023-01-22 11:41:59.543338: step: 10/464, loss: 0.01978295110166073 2023-01-22 11:42:00.137278: step: 12/464, loss: 0.0787145122885704 2023-01-22 11:42:00.763892: step: 14/464, loss: 0.07112926989793777 2023-01-22 11:42:01.392454: step: 16/464, loss: 0.11526873707771301 2023-01-22 11:42:02.020164: step: 18/464, loss: 0.2685200870037079 2023-01-22 11:42:02.627671: step: 20/464, loss: 0.054469529539346695 2023-01-22 11:42:03.257301: step: 22/464, loss: 0.10368768870830536 2023-01-22 11:42:03.868792: step: 24/464, loss: 0.06370580196380615 2023-01-22 11:42:04.492397: step: 26/464, loss: 0.4279961884021759 2023-01-22 11:42:05.138151: step: 28/464, loss: 0.1075909212231636 2023-01-22 11:42:05.880025: step: 30/464, loss: 0.11014177650213242 2023-01-22 11:42:06.564988: step: 32/464, loss: 0.17367292940616608 2023-01-22 11:42:07.148858: step: 34/464, loss: 0.0846795067191124 2023-01-22 11:42:07.779997: step: 36/464, loss: 0.14901630580425262 2023-01-22 11:42:08.374888: step: 38/464, loss: 0.08096755295991898 2023-01-22 11:42:08.933589: step: 40/464, loss: 0.06089453399181366 2023-01-22 11:42:09.536266: step: 42/464, loss: 0.013688472099602222 2023-01-22 11:42:10.145101: step: 44/464, loss: 0.12926563620567322 2023-01-22 11:42:10.770388: step: 46/464, loss: 0.08856255561113358 2023-01-22 11:42:11.425431: step: 48/464, loss: 0.06617226451635361 2023-01-22 11:42:12.078392: step: 50/464, loss: 0.03761773556470871 2023-01-22 11:42:12.707287: step: 52/464, loss: 0.03826698660850525 2023-01-22 11:42:13.318864: step: 54/464, loss: 0.023077616468071938 2023-01-22 11:42:13.972009: step: 56/464, loss: 0.20579925179481506 2023-01-22 11:42:14.655995: step: 58/464, loss: 0.07355199754238129 2023-01-22 11:42:15.258742: step: 60/464, loss: 0.5119641423225403 2023-01-22 11:42:15.873242: step: 62/464, loss: 0.11700855940580368 2023-01-22 11:42:16.513268: step: 64/464, loss: 0.24788638949394226 2023-01-22 11:42:17.140041: step: 66/464, loss: 0.028873804956674576 2023-01-22 11:42:17.774501: step: 68/464, loss: 0.22852276265621185 2023-01-22 11:42:18.365980: step: 70/464, loss: 0.0821826308965683 2023-01-22 11:42:19.014301: step: 72/464, loss: 0.09222550690174103 2023-01-22 11:42:19.615377: step: 74/464, loss: 0.1988222748041153 2023-01-22 11:42:20.220796: step: 76/464, loss: 0.052708689123392105 2023-01-22 11:42:20.806191: step: 78/464, loss: 0.16055501997470856 2023-01-22 11:42:21.397104: step: 80/464, loss: 1.3652526140213013 2023-01-22 11:42:21.983041: step: 82/464, loss: 0.06484121829271317 2023-01-22 11:42:22.596207: step: 84/464, loss: 0.10143230855464935 2023-01-22 11:42:23.221119: step: 86/464, loss: 0.07512756437063217 2023-01-22 11:42:23.925075: step: 88/464, loss: 0.09119124710559845 2023-01-22 11:42:24.537124: step: 90/464, loss: 0.2271091341972351 2023-01-22 11:42:25.221686: step: 92/464, loss: 0.08392105996608734 2023-01-22 11:42:25.879214: step: 94/464, loss: 0.03651168569922447 2023-01-22 11:42:26.481820: step: 96/464, loss: 0.04457028955221176 2023-01-22 11:42:27.170312: step: 98/464, loss: 0.07616758346557617 2023-01-22 11:42:27.800926: step: 100/464, loss: 0.18623724579811096 2023-01-22 11:42:28.418183: step: 102/464, loss: 0.08702529966831207 2023-01-22 11:42:29.004176: step: 104/464, loss: 0.0574377216398716 2023-01-22 11:42:29.627570: step: 106/464, loss: 0.08055932074785233 2023-01-22 11:42:30.291405: step: 108/464, loss: 0.09230149537324905 2023-01-22 11:42:30.890411: step: 110/464, loss: 0.11659622937440872 2023-01-22 11:42:31.520872: step: 112/464, loss: 0.1416178196668625 2023-01-22 11:42:32.170441: step: 114/464, loss: 0.06667982786893845 2023-01-22 11:42:32.745718: step: 116/464, loss: 0.10814934968948364 2023-01-22 11:42:33.406724: step: 118/464, loss: 0.2515222728252411 2023-01-22 11:42:34.107152: step: 120/464, loss: 0.12438605725765228 2023-01-22 11:42:34.773889: step: 122/464, loss: 0.056299109011888504 2023-01-22 11:42:35.355911: step: 124/464, loss: 0.22707466781139374 2023-01-22 11:42:35.959771: step: 126/464, loss: 0.06861184537410736 2023-01-22 11:42:36.548785: step: 128/464, loss: 0.08313598483800888 2023-01-22 11:42:37.239643: step: 130/464, loss: 0.6121090054512024 2023-01-22 11:42:37.854760: step: 132/464, loss: 0.05384763330221176 2023-01-22 11:42:38.486339: step: 134/464, loss: 0.42914947867393494 2023-01-22 11:42:39.094675: step: 136/464, loss: 0.051652342081069946 2023-01-22 11:42:39.685510: step: 138/464, loss: 0.1508612036705017 2023-01-22 11:42:40.311548: step: 140/464, loss: 0.07505248486995697 2023-01-22 11:42:40.886393: step: 142/464, loss: 0.03908807784318924 2023-01-22 11:42:41.559547: step: 144/464, loss: 0.0981028825044632 2023-01-22 11:42:42.198160: step: 146/464, loss: 0.1842205822467804 2023-01-22 11:42:42.786902: step: 148/464, loss: 0.05427127704024315 2023-01-22 11:42:43.427860: step: 150/464, loss: 0.1553102284669876 2023-01-22 11:42:44.049564: step: 152/464, loss: 0.12031495571136475 2023-01-22 11:42:44.730479: step: 154/464, loss: 0.27708104252815247 2023-01-22 11:42:45.363459: step: 156/464, loss: 0.12309330701828003 2023-01-22 11:42:45.968225: step: 158/464, loss: 0.1471334844827652 2023-01-22 11:42:46.560867: step: 160/464, loss: 0.22516928613185883 2023-01-22 11:42:47.144510: step: 162/464, loss: 0.006947956047952175 2023-01-22 11:42:47.757468: step: 164/464, loss: 0.0785510316491127 2023-01-22 11:42:48.438416: step: 166/464, loss: 0.12449684739112854 2023-01-22 11:42:49.089321: step: 168/464, loss: 0.28927740454673767 2023-01-22 11:42:49.750127: step: 170/464, loss: 0.04315275698900223 2023-01-22 11:42:50.312532: step: 172/464, loss: 0.09736642986536026 2023-01-22 11:42:50.870556: step: 174/464, loss: 0.10606750100851059 2023-01-22 11:42:51.552456: step: 176/464, loss: 0.048355862498283386 2023-01-22 11:42:52.212820: step: 178/464, loss: 0.04929488152265549 2023-01-22 11:42:52.850422: step: 180/464, loss: 0.05741899088025093 2023-01-22 11:42:53.427676: step: 182/464, loss: 1.8915715217590332 2023-01-22 11:42:54.051926: step: 184/464, loss: 0.04535231366753578 2023-01-22 11:42:54.726019: step: 186/464, loss: 0.11236447840929031 2023-01-22 11:42:55.341591: step: 188/464, loss: 0.3592509925365448 2023-01-22 11:42:56.068389: step: 190/464, loss: 0.14119786024093628 2023-01-22 11:42:56.691054: step: 192/464, loss: 0.13931706547737122 2023-01-22 11:42:57.301128: step: 194/464, loss: 0.05476130172610283 2023-01-22 11:42:57.935842: step: 196/464, loss: 0.5920207500457764 2023-01-22 11:42:58.578638: step: 198/464, loss: 0.09765797853469849 2023-01-22 11:42:59.200295: step: 200/464, loss: 0.08117769658565521 2023-01-22 11:42:59.773778: step: 202/464, loss: 0.05527033284306526 2023-01-22 11:43:00.393277: step: 204/464, loss: 0.2432255893945694 2023-01-22 11:43:01.006142: step: 206/464, loss: 0.4053729772567749 2023-01-22 11:43:01.576345: step: 208/464, loss: 0.057459376752376556 2023-01-22 11:43:02.232654: step: 210/464, loss: 0.6187804341316223 2023-01-22 11:43:02.852954: step: 212/464, loss: 0.44048136472702026 2023-01-22 11:43:03.437302: step: 214/464, loss: 0.013785900548100471 2023-01-22 11:43:04.108740: step: 216/464, loss: 0.05621178448200226 2023-01-22 11:43:04.837922: step: 218/464, loss: 0.04226300120353699 2023-01-22 11:43:05.473203: step: 220/464, loss: 0.08753485232591629 2023-01-22 11:43:06.078003: step: 222/464, loss: 0.10808547586202621 2023-01-22 11:43:06.703712: step: 224/464, loss: 0.22819502651691437 2023-01-22 11:43:07.289415: step: 226/464, loss: 0.07079092413187027 2023-01-22 11:43:07.937079: step: 228/464, loss: 0.023703573271632195 2023-01-22 11:43:08.575080: step: 230/464, loss: 0.08335469663143158 2023-01-22 11:43:09.227666: step: 232/464, loss: 0.08740612864494324 2023-01-22 11:43:09.837079: step: 234/464, loss: 0.07887952029705048 2023-01-22 11:43:10.461537: step: 236/464, loss: 0.10675524175167084 2023-01-22 11:43:11.049401: step: 238/464, loss: 0.016704462468624115 2023-01-22 11:43:11.699698: step: 240/464, loss: 0.11877373605966568 2023-01-22 11:43:12.369569: step: 242/464, loss: 0.03198765590786934 2023-01-22 11:43:13.106367: step: 244/464, loss: 0.08731076121330261 2023-01-22 11:43:13.728181: step: 246/464, loss: 0.19646023213863373 2023-01-22 11:43:14.362469: step: 248/464, loss: 0.03692404553294182 2023-01-22 11:43:15.094840: step: 250/464, loss: 0.41249361634254456 2023-01-22 11:43:15.681241: step: 252/464, loss: 0.12720321118831635 2023-01-22 11:43:16.301581: step: 254/464, loss: 0.06617468595504761 2023-01-22 11:43:16.982688: step: 256/464, loss: 0.041970960795879364 2023-01-22 11:43:17.633389: step: 258/464, loss: 0.12893933057785034 2023-01-22 11:43:18.227559: step: 260/464, loss: 0.046301230788230896 2023-01-22 11:43:18.791272: step: 262/464, loss: 0.06939200311899185 2023-01-22 11:43:19.438350: step: 264/464, loss: 0.14944562315940857 2023-01-22 11:43:20.016952: step: 266/464, loss: 0.04032471030950546 2023-01-22 11:43:20.644805: step: 268/464, loss: 0.1515982449054718 2023-01-22 11:43:21.332975: step: 270/464, loss: 0.0740865021944046 2023-01-22 11:43:22.032787: step: 272/464, loss: 1.8496662378311157 2023-01-22 11:43:22.638474: step: 274/464, loss: 0.16171090304851532 2023-01-22 11:43:23.281976: step: 276/464, loss: 0.04464978352189064 2023-01-22 11:43:23.924287: step: 278/464, loss: 0.3915879726409912 2023-01-22 11:43:24.544549: step: 280/464, loss: 0.06493178009986877 2023-01-22 11:43:25.198478: step: 282/464, loss: 0.034338947385549545 2023-01-22 11:43:25.882298: step: 284/464, loss: 0.07518131285905838 2023-01-22 11:43:26.526195: step: 286/464, loss: 0.32087764143943787 2023-01-22 11:43:27.137051: step: 288/464, loss: 0.06764665246009827 2023-01-22 11:43:27.773351: step: 290/464, loss: 0.23117481172084808 2023-01-22 11:43:28.477996: step: 292/464, loss: 0.05319612845778465 2023-01-22 11:43:29.119203: step: 294/464, loss: 0.04225257411599159 2023-01-22 11:43:29.731244: step: 296/464, loss: 0.2298288196325302 2023-01-22 11:43:30.408187: step: 298/464, loss: 0.21512852609157562 2023-01-22 11:43:31.046986: step: 300/464, loss: 0.1140446588397026 2023-01-22 11:43:31.684899: step: 302/464, loss: 0.059378352016210556 2023-01-22 11:43:32.336226: step: 304/464, loss: 0.1522151678800583 2023-01-22 11:43:32.967963: step: 306/464, loss: 0.06590325385332108 2023-01-22 11:43:33.592419: step: 308/464, loss: 0.1357267200946808 2023-01-22 11:43:34.272929: step: 310/464, loss: 0.07562603801488876 2023-01-22 11:43:34.837934: step: 312/464, loss: 0.07189598679542542 2023-01-22 11:43:35.388049: step: 314/464, loss: 0.4715685546398163 2023-01-22 11:43:36.006316: step: 316/464, loss: 0.06670360267162323 2023-01-22 11:43:36.587466: step: 318/464, loss: 0.0039108796045184135 2023-01-22 11:43:37.186858: step: 320/464, loss: 0.08325283229351044 2023-01-22 11:43:37.791884: step: 322/464, loss: 0.23594802618026733 2023-01-22 11:43:38.438901: step: 324/464, loss: 0.08955429494380951 2023-01-22 11:43:39.102011: step: 326/464, loss: 0.3751218616962433 2023-01-22 11:43:39.751165: step: 328/464, loss: 0.04241259768605232 2023-01-22 11:43:40.393391: step: 330/464, loss: 0.12729467451572418 2023-01-22 11:43:41.039521: step: 332/464, loss: 0.15847192704677582 2023-01-22 11:43:41.650167: step: 334/464, loss: 0.13148055970668793 2023-01-22 11:43:42.305884: step: 336/464, loss: 0.1411283314228058 2023-01-22 11:43:42.904096: step: 338/464, loss: 0.07324307411909103 2023-01-22 11:43:43.557829: step: 340/464, loss: 0.05654343590140343 2023-01-22 11:43:44.154740: step: 342/464, loss: 0.07110802084207535 2023-01-22 11:43:44.775654: step: 344/464, loss: 0.19061794877052307 2023-01-22 11:43:45.325495: step: 346/464, loss: 0.03509838134050369 2023-01-22 11:43:45.933035: step: 348/464, loss: 0.5751135945320129 2023-01-22 11:43:46.526392: step: 350/464, loss: 0.059561777859926224 2023-01-22 11:43:47.124423: step: 352/464, loss: 0.03886803984642029 2023-01-22 11:43:47.742405: step: 354/464, loss: 0.0850643590092659 2023-01-22 11:43:48.320691: step: 356/464, loss: 0.05165430158376694 2023-01-22 11:43:48.953228: step: 358/464, loss: 0.11026255041360855 2023-01-22 11:43:49.564227: step: 360/464, loss: 0.0780283734202385 2023-01-22 11:43:50.186578: step: 362/464, loss: 0.04332159832119942 2023-01-22 11:43:50.882009: step: 364/464, loss: 0.03511100634932518 2023-01-22 11:43:51.620812: step: 366/464, loss: 0.06925657391548157 2023-01-22 11:43:52.171914: step: 368/464, loss: 0.025680840015411377 2023-01-22 11:43:52.777112: step: 370/464, loss: 0.06052357703447342 2023-01-22 11:43:53.376332: step: 372/464, loss: 0.03492802754044533 2023-01-22 11:43:54.065520: step: 374/464, loss: 0.09259124100208282 2023-01-22 11:43:54.671010: step: 376/464, loss: 0.037405405193567276 2023-01-22 11:43:55.311249: step: 378/464, loss: 0.45301663875579834 2023-01-22 11:43:55.929006: step: 380/464, loss: 0.04687231779098511 2023-01-22 11:43:56.613799: step: 382/464, loss: 0.05050540715456009 2023-01-22 11:43:57.244953: step: 384/464, loss: 0.11022012680768967 2023-01-22 11:43:57.907490: step: 386/464, loss: 0.10482141375541687 2023-01-22 11:43:58.545202: step: 388/464, loss: 0.08359164744615555 2023-01-22 11:43:59.175667: step: 390/464, loss: 0.07807466387748718 2023-01-22 11:43:59.797684: step: 392/464, loss: 0.20732919871807098 2023-01-22 11:44:00.428199: step: 394/464, loss: 0.08477477729320526 2023-01-22 11:44:01.024274: step: 396/464, loss: 0.014245497062802315 2023-01-22 11:44:01.667171: step: 398/464, loss: 0.16936513781547546 2023-01-22 11:44:02.283632: step: 400/464, loss: 0.11278378963470459 2023-01-22 11:44:02.955652: step: 402/464, loss: 0.17410509288311005 2023-01-22 11:44:03.762951: step: 404/464, loss: 0.052976060658693314 2023-01-22 11:44:04.328384: step: 406/464, loss: 0.008185646496713161 2023-01-22 11:44:04.943758: step: 408/464, loss: 0.14385266602039337 2023-01-22 11:44:05.549951: step: 410/464, loss: 0.09457972645759583 2023-01-22 11:44:06.162443: step: 412/464, loss: 0.03777166083455086 2023-01-22 11:44:06.817715: step: 414/464, loss: 0.07037343829870224 2023-01-22 11:44:07.445690: step: 416/464, loss: 0.04227151721715927 2023-01-22 11:44:08.064047: step: 418/464, loss: 0.12296376377344131 2023-01-22 11:44:08.726037: step: 420/464, loss: 0.06956274062395096 2023-01-22 11:44:09.344443: step: 422/464, loss: 0.18227338790893555 2023-01-22 11:44:10.006819: step: 424/464, loss: 0.12981176376342773 2023-01-22 11:44:10.606230: step: 426/464, loss: 0.031735748052597046 2023-01-22 11:44:11.225886: step: 428/464, loss: 0.10249984264373779 2023-01-22 11:44:11.972751: step: 430/464, loss: 0.07617301493883133 2023-01-22 11:44:12.562017: step: 432/464, loss: 0.0622231587767601 2023-01-22 11:44:13.175598: step: 434/464, loss: 0.0698821097612381 2023-01-22 11:44:13.820245: step: 436/464, loss: 0.1468801498413086 2023-01-22 11:44:14.446849: step: 438/464, loss: 0.439418226480484 2023-01-22 11:44:15.102316: step: 440/464, loss: 0.05343214049935341 2023-01-22 11:44:15.696126: step: 442/464, loss: 0.03801265358924866 2023-01-22 11:44:16.356613: step: 444/464, loss: 0.0925765261054039 2023-01-22 11:44:16.984669: step: 446/464, loss: 0.13436639308929443 2023-01-22 11:44:17.580706: step: 448/464, loss: 0.20754016935825348 2023-01-22 11:44:18.173846: step: 450/464, loss: 0.20153497159481049 2023-01-22 11:44:18.760439: step: 452/464, loss: 0.4196487069129944 2023-01-22 11:44:19.428800: step: 454/464, loss: 0.09021150320768356 2023-01-22 11:44:20.049332: step: 456/464, loss: 0.1262298971414566 2023-01-22 11:44:20.643418: step: 458/464, loss: 0.03276536613702774 2023-01-22 11:44:21.214064: step: 460/464, loss: 0.05063563585281372 2023-01-22 11:44:21.813229: step: 462/464, loss: 0.12348288297653198 2023-01-22 11:44:22.441432: step: 464/464, loss: 1.272400975227356 2023-01-22 11:44:23.025852: step: 466/464, loss: 0.5960069298744202 2023-01-22 11:44:23.593520: step: 468/464, loss: 0.18682511150836945 2023-01-22 11:44:24.277597: step: 470/464, loss: 0.5299834609031677 2023-01-22 11:44:24.873101: step: 472/464, loss: 0.047101832926273346 2023-01-22 11:44:25.573394: step: 474/464, loss: 0.12800177931785583 2023-01-22 11:44:26.176791: step: 476/464, loss: 0.03759802505373955 2023-01-22 11:44:26.823169: step: 478/464, loss: 0.13510610163211823 2023-01-22 11:44:27.457023: step: 480/464, loss: 0.2975039482116699 2023-01-22 11:44:28.128262: step: 482/464, loss: 0.06511014699935913 2023-01-22 11:44:28.880746: step: 484/464, loss: 0.08919087052345276 2023-01-22 11:44:29.517949: step: 486/464, loss: 0.039460860192775726 2023-01-22 11:44:30.176537: step: 488/464, loss: 0.04157282039523125 2023-01-22 11:44:30.859353: step: 490/464, loss: 0.10913825780153275 2023-01-22 11:44:31.438118: step: 492/464, loss: 0.06604700535535812 2023-01-22 11:44:32.090434: step: 494/464, loss: 0.44415482878685 2023-01-22 11:44:32.688645: step: 496/464, loss: 5.065694808959961 2023-01-22 11:44:33.320076: step: 498/464, loss: 0.07200480252504349 2023-01-22 11:44:33.904463: step: 500/464, loss: 0.09879495203495026 2023-01-22 11:44:34.470260: step: 502/464, loss: 0.18941263854503632 2023-01-22 11:44:35.162662: step: 504/464, loss: 0.42598849534988403 2023-01-22 11:44:35.753661: step: 506/464, loss: 0.08068060129880905 2023-01-22 11:44:36.373261: step: 508/464, loss: 0.0772366151213646 2023-01-22 11:44:37.060566: step: 510/464, loss: 0.2177586853504181 2023-01-22 11:44:37.682298: step: 512/464, loss: 0.08070854097604752 2023-01-22 11:44:38.279977: step: 514/464, loss: 0.027162298560142517 2023-01-22 11:44:38.923023: step: 516/464, loss: 0.09526893496513367 2023-01-22 11:44:39.591791: step: 518/464, loss: 0.1426323652267456 2023-01-22 11:44:40.193758: step: 520/464, loss: 0.10545886307954788 2023-01-22 11:44:40.785055: step: 522/464, loss: 0.09707791358232498 2023-01-22 11:44:41.396745: step: 524/464, loss: 0.08150474727153778 2023-01-22 11:44:42.038564: step: 526/464, loss: 0.1750078946352005 2023-01-22 11:44:42.634055: step: 528/464, loss: 0.08888711780309677 2023-01-22 11:44:43.208890: step: 530/464, loss: 0.09063917398452759 2023-01-22 11:44:43.854445: step: 532/464, loss: 0.068354532122612 2023-01-22 11:44:44.446559: step: 534/464, loss: 2.5237388610839844 2023-01-22 11:44:45.033941: step: 536/464, loss: 0.14953790605068207 2023-01-22 11:44:45.652647: step: 538/464, loss: 0.19570550322532654 2023-01-22 11:44:46.343871: step: 540/464, loss: 0.08763013780117035 2023-01-22 11:44:47.041416: step: 542/464, loss: 0.18320994079113007 2023-01-22 11:44:47.745257: step: 544/464, loss: 0.00924855750054121 2023-01-22 11:44:48.383087: step: 546/464, loss: 0.045794107019901276 2023-01-22 11:44:49.045553: step: 548/464, loss: 0.04685569554567337 2023-01-22 11:44:49.830701: step: 550/464, loss: 0.16176503896713257 2023-01-22 11:44:50.441397: step: 552/464, loss: 0.04111208766698837 2023-01-22 11:44:51.035129: step: 554/464, loss: 0.17326392233371735 2023-01-22 11:44:51.655759: step: 556/464, loss: 0.01820904016494751 2023-01-22 11:44:52.242918: step: 558/464, loss: 0.10440154373645782 2023-01-22 11:44:52.840997: step: 560/464, loss: 0.06176659092307091 2023-01-22 11:44:53.446064: step: 562/464, loss: 0.04807630181312561 2023-01-22 11:44:54.057941: step: 564/464, loss: 0.06456959992647171 2023-01-22 11:44:54.699614: step: 566/464, loss: 0.5453143119812012 2023-01-22 11:44:55.330791: step: 568/464, loss: 0.0833263024687767 2023-01-22 11:44:55.971196: step: 570/464, loss: 0.09743386507034302 2023-01-22 11:44:56.590166: step: 572/464, loss: 0.6703399419784546 2023-01-22 11:44:57.225272: step: 574/464, loss: 0.21478229761123657 2023-01-22 11:44:57.889802: step: 576/464, loss: 0.06619272381067276 2023-01-22 11:44:58.539076: step: 578/464, loss: 0.08586452901363373 2023-01-22 11:44:59.145683: step: 580/464, loss: 0.1202094778418541 2023-01-22 11:44:59.814122: step: 582/464, loss: 0.08488589525222778 2023-01-22 11:45:00.441019: step: 584/464, loss: 0.6994089484214783 2023-01-22 11:45:01.035245: step: 586/464, loss: 0.008067386224865913 2023-01-22 11:45:01.641867: step: 588/464, loss: 0.11339905858039856 2023-01-22 11:45:02.301384: step: 590/464, loss: 0.09993147850036621 2023-01-22 11:45:02.938605: step: 592/464, loss: 0.20108789205551147 2023-01-22 11:45:03.610411: step: 594/464, loss: 0.26813051104545593 2023-01-22 11:45:04.167747: step: 596/464, loss: 0.03282526135444641 2023-01-22 11:45:04.779765: step: 598/464, loss: 0.04220222681760788 2023-01-22 11:45:05.385913: step: 600/464, loss: 0.10550139844417572 2023-01-22 11:45:06.005710: step: 602/464, loss: 0.49047207832336426 2023-01-22 11:45:06.622303: step: 604/464, loss: 0.05437614768743515 2023-01-22 11:45:07.197072: step: 606/464, loss: 0.11706947535276413 2023-01-22 11:45:07.782367: step: 608/464, loss: 0.13433879613876343 2023-01-22 11:45:08.390054: step: 610/464, loss: 0.04218660295009613 2023-01-22 11:45:09.055684: step: 612/464, loss: 0.04034535586833954 2023-01-22 11:45:09.626297: step: 614/464, loss: 1.275100827217102 2023-01-22 11:45:10.262200: step: 616/464, loss: 0.18319328129291534 2023-01-22 11:45:10.942738: step: 618/464, loss: 0.22358042001724243 2023-01-22 11:45:11.555787: step: 620/464, loss: 0.28204214572906494 2023-01-22 11:45:12.231980: step: 622/464, loss: 0.05341412127017975 2023-01-22 11:45:12.815445: step: 624/464, loss: 0.21865655481815338 2023-01-22 11:45:13.457264: step: 626/464, loss: 0.23845311999320984 2023-01-22 11:45:14.052492: step: 628/464, loss: 0.08601119369268417 2023-01-22 11:45:14.727044: step: 630/464, loss: 0.07053272426128387 2023-01-22 11:45:15.347978: step: 632/464, loss: 0.045950137078762054 2023-01-22 11:45:15.982192: step: 634/464, loss: 0.11749845743179321 2023-01-22 11:45:16.722660: step: 636/464, loss: 0.37584465742111206 2023-01-22 11:45:17.348887: step: 638/464, loss: 0.06773810088634491 2023-01-22 11:45:17.934630: step: 640/464, loss: 0.15824325382709503 2023-01-22 11:45:18.550979: step: 642/464, loss: 0.19471389055252075 2023-01-22 11:45:19.295338: step: 644/464, loss: 0.06105535477399826 2023-01-22 11:45:19.905598: step: 646/464, loss: 0.06644576787948608 2023-01-22 11:45:20.613790: step: 648/464, loss: 0.09399471431970596 2023-01-22 11:45:21.250188: step: 650/464, loss: 0.24062883853912354 2023-01-22 11:45:21.851820: step: 652/464, loss: 0.09345666319131851 2023-01-22 11:45:22.543581: step: 654/464, loss: 0.18545842170715332 2023-01-22 11:45:23.150153: step: 656/464, loss: 0.11836571246385574 2023-01-22 11:45:23.744044: step: 658/464, loss: 0.06930890679359436 2023-01-22 11:45:24.338075: step: 660/464, loss: 0.08569063246250153 2023-01-22 11:45:24.946956: step: 662/464, loss: 0.1048484519124031 2023-01-22 11:45:25.611812: step: 664/464, loss: 0.2178206741809845 2023-01-22 11:45:26.271502: step: 666/464, loss: 0.12674731016159058 2023-01-22 11:45:26.869105: step: 668/464, loss: 0.11909352242946625 2023-01-22 11:45:27.592565: step: 670/464, loss: 0.6282179355621338 2023-01-22 11:45:28.287389: step: 672/464, loss: 0.08999377489089966 2023-01-22 11:45:28.889153: step: 674/464, loss: 0.02203369326889515 2023-01-22 11:45:29.436142: step: 676/464, loss: 0.02420470118522644 2023-01-22 11:45:29.997887: step: 678/464, loss: 0.1601714789867401 2023-01-22 11:45:30.685227: step: 680/464, loss: 0.06573443859815598 2023-01-22 11:45:31.302183: step: 682/464, loss: 0.043082889169454575 2023-01-22 11:45:31.902320: step: 684/464, loss: 0.14057160913944244 2023-01-22 11:45:32.586814: step: 686/464, loss: 0.21037933230400085 2023-01-22 11:45:33.213564: step: 688/464, loss: 0.039161499589681625 2023-01-22 11:45:33.836493: step: 690/464, loss: 0.07449066638946533 2023-01-22 11:45:34.507351: step: 692/464, loss: 0.053820233792066574 2023-01-22 11:45:35.245605: step: 694/464, loss: 0.9339656829833984 2023-01-22 11:45:35.840305: step: 696/464, loss: 0.06315405666828156 2023-01-22 11:45:36.438241: step: 698/464, loss: 0.927216112613678 2023-01-22 11:45:37.072424: step: 700/464, loss: 0.06461441516876221 2023-01-22 11:45:37.707612: step: 702/464, loss: 0.10851768404245377 2023-01-22 11:45:38.422816: step: 704/464, loss: 0.5746562480926514 2023-01-22 11:45:39.112040: step: 706/464, loss: 0.19040845334529877 2023-01-22 11:45:39.722862: step: 708/464, loss: 0.12218072265386581 2023-01-22 11:45:40.359450: step: 710/464, loss: 0.09031082689762115 2023-01-22 11:45:40.966869: step: 712/464, loss: 0.0764712393283844 2023-01-22 11:45:41.644195: step: 714/464, loss: 0.1057339459657669 2023-01-22 11:45:42.255280: step: 716/464, loss: 0.1484196037054062 2023-01-22 11:45:42.937036: step: 718/464, loss: 0.15744993090629578 2023-01-22 11:45:43.511977: step: 720/464, loss: 0.055503856390714645 2023-01-22 11:45:44.135612: step: 722/464, loss: 0.34808388352394104 2023-01-22 11:45:44.771228: step: 724/464, loss: 0.7415017485618591 2023-01-22 11:45:45.431716: step: 726/464, loss: 0.11673478782176971 2023-01-22 11:45:46.050742: step: 728/464, loss: 0.10913416743278503 2023-01-22 11:45:46.690568: step: 730/464, loss: 0.05623776093125343 2023-01-22 11:45:47.263564: step: 732/464, loss: 0.18155232071876526 2023-01-22 11:45:47.857171: step: 734/464, loss: 0.111542247235775 2023-01-22 11:45:48.508325: step: 736/464, loss: 0.1297747641801834 2023-01-22 11:45:49.082284: step: 738/464, loss: 0.05977364629507065 2023-01-22 11:45:49.709254: step: 740/464, loss: 0.07568145543336868 2023-01-22 11:45:50.414700: step: 742/464, loss: 0.16066378355026245 2023-01-22 11:45:51.012764: step: 744/464, loss: 0.023682449012994766 2023-01-22 11:45:51.610587: step: 746/464, loss: 0.05309910327196121 2023-01-22 11:45:52.189589: step: 748/464, loss: 0.10708057880401611 2023-01-22 11:45:52.852299: step: 750/464, loss: 0.1418672800064087 2023-01-22 11:45:53.398890: step: 752/464, loss: 0.08676539361476898 2023-01-22 11:45:54.021493: step: 754/464, loss: 0.15166112780570984 2023-01-22 11:45:54.686684: step: 756/464, loss: 0.0914529487490654 2023-01-22 11:45:55.333984: step: 758/464, loss: 0.11644382029771805 2023-01-22 11:45:55.959191: step: 760/464, loss: 0.13015790283679962 2023-01-22 11:45:56.588271: step: 762/464, loss: 0.1178271546959877 2023-01-22 11:45:57.256537: step: 764/464, loss: 0.1096925288438797 2023-01-22 11:45:57.936409: step: 766/464, loss: 0.08539048582315445 2023-01-22 11:45:58.504005: step: 768/464, loss: 0.08482424914836884 2023-01-22 11:45:59.103258: step: 770/464, loss: 0.026232510805130005 2023-01-22 11:45:59.672643: step: 772/464, loss: 0.05931536853313446 2023-01-22 11:46:00.331855: step: 774/464, loss: 0.06830425560474396 2023-01-22 11:46:00.942766: step: 776/464, loss: 0.026791663840413094 2023-01-22 11:46:01.635805: step: 778/464, loss: 0.1483849138021469 2023-01-22 11:46:02.291582: step: 780/464, loss: 0.07420195639133453 2023-01-22 11:46:02.915910: step: 782/464, loss: 0.06437802314758301 2023-01-22 11:46:03.546921: step: 784/464, loss: 0.03879198431968689 2023-01-22 11:46:04.199809: step: 786/464, loss: 0.07572466135025024 2023-01-22 11:46:04.772769: step: 788/464, loss: 0.08594454824924469 2023-01-22 11:46:05.384476: step: 790/464, loss: 0.08379257470369339 2023-01-22 11:46:05.956800: step: 792/464, loss: 0.08512155711650848 2023-01-22 11:46:06.628504: step: 794/464, loss: 0.19086991250514984 2023-01-22 11:46:07.288738: step: 796/464, loss: 0.07286135852336884 2023-01-22 11:46:07.977741: step: 798/464, loss: 0.02229865826666355 2023-01-22 11:46:08.636402: step: 800/464, loss: 0.09870410710573196 2023-01-22 11:46:09.291695: step: 802/464, loss: 0.19168686866760254 2023-01-22 11:46:09.901762: step: 804/464, loss: 0.3734838664531708 2023-01-22 11:46:10.488088: step: 806/464, loss: 0.02032576873898506 2023-01-22 11:46:11.118980: step: 808/464, loss: 0.0900048092007637 2023-01-22 11:46:11.740658: step: 810/464, loss: 0.06668906658887863 2023-01-22 11:46:12.354019: step: 812/464, loss: 0.018267882987856865 2023-01-22 11:46:13.012226: step: 814/464, loss: 0.02541377954185009 2023-01-22 11:46:13.697263: step: 816/464, loss: 0.09765075892210007 2023-01-22 11:46:14.425501: step: 818/464, loss: 0.07601515203714371 2023-01-22 11:46:15.053431: step: 820/464, loss: 0.43445128202438354 2023-01-22 11:46:15.719339: step: 822/464, loss: 0.09697567671537399 2023-01-22 11:46:16.377501: step: 824/464, loss: 0.12798605859279633 2023-01-22 11:46:16.993457: step: 826/464, loss: 0.17840342223644257 2023-01-22 11:46:17.654783: step: 828/464, loss: 0.026910794898867607 2023-01-22 11:46:18.264519: step: 830/464, loss: 0.01830548793077469 2023-01-22 11:46:18.893347: step: 832/464, loss: 0.1646566390991211 2023-01-22 11:46:19.526057: step: 834/464, loss: 0.03405522555112839 2023-01-22 11:46:20.181482: step: 836/464, loss: 0.22875934839248657 2023-01-22 11:46:20.776014: step: 838/464, loss: 0.02723325602710247 2023-01-22 11:46:21.321296: step: 840/464, loss: 0.10927391052246094 2023-01-22 11:46:22.081332: step: 842/464, loss: 0.17469929158687592 2023-01-22 11:46:22.749669: step: 844/464, loss: 0.10927444696426392 2023-01-22 11:46:23.310776: step: 846/464, loss: 0.13192278146743774 2023-01-22 11:46:23.941344: step: 848/464, loss: 0.4885123074054718 2023-01-22 11:46:24.562147: step: 850/464, loss: 0.09656789898872375 2023-01-22 11:46:25.224272: step: 852/464, loss: 0.09319087117910385 2023-01-22 11:46:25.852447: step: 854/464, loss: 0.016286242753267288 2023-01-22 11:46:26.419637: step: 856/464, loss: 0.05448547378182411 2023-01-22 11:46:27.075396: step: 858/464, loss: 0.15139378607273102 2023-01-22 11:46:27.730207: step: 860/464, loss: 0.20680542290210724 2023-01-22 11:46:28.366466: step: 862/464, loss: 0.1301339566707611 2023-01-22 11:46:29.051995: step: 864/464, loss: 0.09975486993789673 2023-01-22 11:46:29.667051: step: 866/464, loss: 0.12980853021144867 2023-01-22 11:46:30.256068: step: 868/464, loss: 0.10604464262723923 2023-01-22 11:46:30.943780: step: 870/464, loss: 0.11554470658302307 2023-01-22 11:46:31.555328: step: 872/464, loss: 0.15384931862354279 2023-01-22 11:46:32.247251: step: 874/464, loss: 0.07003022730350494 2023-01-22 11:46:32.862422: step: 876/464, loss: 0.07454525679349899 2023-01-22 11:46:33.510169: step: 878/464, loss: 0.10567981004714966 2023-01-22 11:46:34.136346: step: 880/464, loss: 5.253168106079102 2023-01-22 11:46:34.744596: step: 882/464, loss: 0.06796644628047943 2023-01-22 11:46:35.403958: step: 884/464, loss: 0.03893854469060898 2023-01-22 11:46:36.022547: step: 886/464, loss: 0.04492393508553505 2023-01-22 11:46:36.655822: step: 888/464, loss: 0.19420526921749115 2023-01-22 11:46:37.359025: step: 890/464, loss: 0.25114211440086365 2023-01-22 11:46:37.960696: step: 892/464, loss: 0.015781380236148834 2023-01-22 11:46:38.614464: step: 894/464, loss: 0.00819613691419363 2023-01-22 11:46:39.281866: step: 896/464, loss: 0.1408635824918747 2023-01-22 11:46:39.948462: step: 898/464, loss: 2.3857741355895996 2023-01-22 11:46:40.607912: step: 900/464, loss: 0.09957841783761978 2023-01-22 11:46:41.222755: step: 902/464, loss: 0.051161251962184906 2023-01-22 11:46:41.819866: step: 904/464, loss: 0.1271309107542038 2023-01-22 11:46:42.522699: step: 906/464, loss: 0.13337206840515137 2023-01-22 11:46:43.227265: step: 908/464, loss: 0.07845316082239151 2023-01-22 11:46:43.871783: step: 910/464, loss: 0.17774929106235504 2023-01-22 11:46:44.511476: step: 912/464, loss: 0.057782527059316635 2023-01-22 11:46:45.158604: step: 914/464, loss: 0.0975969061255455 2023-01-22 11:46:45.791753: step: 916/464, loss: 0.298834890127182 2023-01-22 11:46:46.368751: step: 918/464, loss: 0.08659328520298004 2023-01-22 11:46:47.037987: step: 920/464, loss: 0.48213207721710205 2023-01-22 11:46:47.667934: step: 922/464, loss: 0.051230985671281815 2023-01-22 11:46:48.266955: step: 924/464, loss: 0.1356029510498047 2023-01-22 11:46:48.876972: step: 926/464, loss: 0.3158682584762573 2023-01-22 11:46:49.525504: step: 928/464, loss: 0.08663108944892883 2023-01-22 11:46:50.044733: step: 930/464, loss: 0.0403536893427372 ================================================== Loss: 0.181 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28913781824931195, 'r': 0.34235673356275265, 'f1': 0.31350477599925397}, 'combined': 0.23100351915734502, 'epoch': 16} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2805193307091205, 'r': 0.31603462212, 'f1': 0.2972197912949094}, 'combined': 0.19403986374693566, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.287974691085231, 'r': 0.3562798834678759, 'f1': 0.31850635892717655}, 'combined': 0.23468889605160376, 'epoch': 16} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2991598521009755, 'r': 0.3224888313932534, 'f1': 0.3103866015175684}, 'combined': 0.20263581238970788, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3000426144205819, 'r': 0.3495752661370726, 'f1': 0.3229205350643949}, 'combined': 0.23794144688955413, 'epoch': 16} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2921443551955032, 'r': 0.31283903353041925, 'f1': 0.3021377427343987}, 'combined': 0.19725054707012557, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.19270833333333331, 'r': 0.35238095238095235, 'f1': 0.24915824915824908}, 'combined': 0.1661054994388327, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.24404761904761904, 'r': 0.44565217391304346, 'f1': 0.3153846153846154}, 'combined': 0.1576923076923077, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40625, 'r': 0.33620689655172414, 'f1': 0.36792452830188677}, 'combined': 0.2452830188679245, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3215276761799725, 'r': 0.3520331483033096, 'f1': 0.33608961803594956}, 'combined': 0.24764498171069965, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3050538704813727, 'r': 0.3061743622003126, 'f1': 0.3056130893090196}, 'combined': 0.19951942618101798, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4605263157894737, 'r': 0.3017241379310345, 'f1': 0.3645833333333333}, 'combined': 0.24305555555555552, 'epoch': 14} ****************************** Epoch: 17 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:49:30.353087: step: 2/464, loss: 2.323784828186035 2023-01-22 11:49:30.997648: step: 4/464, loss: 0.07156746834516525 2023-01-22 11:49:31.630793: step: 6/464, loss: 0.06237180158495903 2023-01-22 11:49:32.249516: step: 8/464, loss: 0.38281306624412537 2023-01-22 11:49:32.843317: step: 10/464, loss: 0.059620022773742676 2023-01-22 11:49:33.444095: step: 12/464, loss: 0.06657879054546356 2023-01-22 11:49:34.086578: step: 14/464, loss: 0.1868683099746704 2023-01-22 11:49:34.712497: step: 16/464, loss: 0.05568621680140495 2023-01-22 11:49:35.332379: step: 18/464, loss: 0.26912921667099 2023-01-22 11:49:35.922819: step: 20/464, loss: 0.029237784445285797 2023-01-22 11:49:36.560183: step: 22/464, loss: 0.05056983605027199 2023-01-22 11:49:37.221935: step: 24/464, loss: 0.07277239114046097 2023-01-22 11:49:37.870249: step: 26/464, loss: 0.07805615663528442 2023-01-22 11:49:38.433807: step: 28/464, loss: 0.06455496698617935 2023-01-22 11:49:39.060360: step: 30/464, loss: 0.037866562604904175 2023-01-22 11:49:39.680285: step: 32/464, loss: 0.04250551760196686 2023-01-22 11:49:40.230681: step: 34/464, loss: 0.665719747543335 2023-01-22 11:49:40.792503: step: 36/464, loss: 0.06608240306377411 2023-01-22 11:49:41.418968: step: 38/464, loss: 0.015798650681972504 2023-01-22 11:49:41.990091: step: 40/464, loss: 0.10514256358146667 2023-01-22 11:49:42.584241: step: 42/464, loss: 0.0462794303894043 2023-01-22 11:49:43.212177: step: 44/464, loss: 0.059097494930028915 2023-01-22 11:49:43.848220: step: 46/464, loss: 0.14740002155303955 2023-01-22 11:49:44.567654: step: 48/464, loss: 0.021973850205540657 2023-01-22 11:49:45.270928: step: 50/464, loss: 0.3088114559650421 2023-01-22 11:49:45.967585: step: 52/464, loss: 0.18334423005580902 2023-01-22 11:49:46.534465: step: 54/464, loss: 0.06878239661455154 2023-01-22 11:49:47.254717: step: 56/464, loss: 0.019568689167499542 2023-01-22 11:49:47.898713: step: 58/464, loss: 0.11608067899942398 2023-01-22 11:49:48.478891: step: 60/464, loss: 0.07847467809915543 2023-01-22 11:49:49.204319: step: 62/464, loss: 0.09866181761026382 2023-01-22 11:49:49.842973: step: 64/464, loss: 0.57988041639328 2023-01-22 11:49:50.507814: step: 66/464, loss: 0.06298445910215378 2023-01-22 11:49:51.121074: step: 68/464, loss: 0.1550336629152298 2023-01-22 11:49:51.728120: step: 70/464, loss: 0.04487539455294609 2023-01-22 11:49:52.306760: step: 72/464, loss: 0.06373658031225204 2023-01-22 11:49:52.882823: step: 74/464, loss: 0.03194844722747803 2023-01-22 11:49:53.503794: step: 76/464, loss: 0.05486992374062538 2023-01-22 11:49:54.120505: step: 78/464, loss: 0.1321585327386856 2023-01-22 11:49:54.737758: step: 80/464, loss: 0.09452857077121735 2023-01-22 11:49:55.345711: step: 82/464, loss: 0.06801652908325195 2023-01-22 11:49:55.994554: step: 84/464, loss: 0.06772405654191971 2023-01-22 11:49:56.649795: step: 86/464, loss: 0.14668281376361847 2023-01-22 11:49:57.292985: step: 88/464, loss: 0.042957138270139694 2023-01-22 11:49:57.922208: step: 90/464, loss: 0.028503797948360443 2023-01-22 11:49:58.497517: step: 92/464, loss: 0.10230975598096848 2023-01-22 11:49:59.132870: step: 94/464, loss: 0.05061762034893036 2023-01-22 11:49:59.801490: step: 96/464, loss: 0.02996540255844593 2023-01-22 11:50:00.460322: step: 98/464, loss: 0.0619596503674984 2023-01-22 11:50:01.149060: step: 100/464, loss: 0.023418670520186424 2023-01-22 11:50:01.799988: step: 102/464, loss: 0.18141427636146545 2023-01-22 11:50:02.467988: step: 104/464, loss: 0.08905918151140213 2023-01-22 11:50:03.066266: step: 106/464, loss: 0.09526338428258896 2023-01-22 11:50:03.579949: step: 108/464, loss: 0.005294324364513159 2023-01-22 11:50:04.215840: step: 110/464, loss: 0.11219874769449234 2023-01-22 11:50:04.871251: step: 112/464, loss: 0.13767443597316742 2023-01-22 11:50:05.513019: step: 114/464, loss: 0.02810472808778286 2023-01-22 11:50:06.127141: step: 116/464, loss: 0.09533141553401947 2023-01-22 11:50:06.779954: step: 118/464, loss: 0.24947750568389893 2023-01-22 11:50:07.399428: step: 120/464, loss: 0.07970800250768661 2023-01-22 11:50:08.010063: step: 122/464, loss: 0.04374972730875015 2023-01-22 11:50:08.629258: step: 124/464, loss: 0.15076035261154175 2023-01-22 11:50:09.273393: step: 126/464, loss: 0.03661858290433884 2023-01-22 11:50:09.826461: step: 128/464, loss: 0.057950735092163086 2023-01-22 11:50:10.430832: step: 130/464, loss: 0.03365790471434593 2023-01-22 11:50:11.112038: step: 132/464, loss: 0.04842458665370941 2023-01-22 11:50:11.764706: step: 134/464, loss: 0.09987257421016693 2023-01-22 11:50:12.454665: step: 136/464, loss: 0.022941607981920242 2023-01-22 11:50:13.064563: step: 138/464, loss: 0.06291542947292328 2023-01-22 11:50:13.662149: step: 140/464, loss: 0.00350829865783453 2023-01-22 11:50:14.238782: step: 142/464, loss: 0.09380892664194107 2023-01-22 11:50:14.903466: step: 144/464, loss: 0.06917431950569153 2023-01-22 11:50:15.506340: step: 146/464, loss: 0.04020599275827408 2023-01-22 11:50:16.119787: step: 148/464, loss: 0.08713655918836594 2023-01-22 11:50:16.674298: step: 150/464, loss: 0.046814925968647 2023-01-22 11:50:17.349970: step: 152/464, loss: 0.13716870546340942 2023-01-22 11:50:18.010188: step: 154/464, loss: 0.09229633212089539 2023-01-22 11:50:18.735771: step: 156/464, loss: 0.05761297792196274 2023-01-22 11:50:19.383332: step: 158/464, loss: 0.09417982399463654 2023-01-22 11:50:20.003760: step: 160/464, loss: 0.013948668725788593 2023-01-22 11:50:20.670876: step: 162/464, loss: 0.04147933050990105 2023-01-22 11:50:21.308823: step: 164/464, loss: 0.14888344705104828 2023-01-22 11:50:21.951409: step: 166/464, loss: 0.23217937350273132 2023-01-22 11:50:22.562753: step: 168/464, loss: 0.8458350896835327 2023-01-22 11:50:23.287747: step: 170/464, loss: 0.017771458253264427 2023-01-22 11:50:23.880004: step: 172/464, loss: 0.0396922342479229 2023-01-22 11:50:24.458360: step: 174/464, loss: 0.10030711442232132 2023-01-22 11:50:25.080711: step: 176/464, loss: 0.0012506459606811404 2023-01-22 11:50:25.690156: step: 178/464, loss: 0.12276698648929596 2023-01-22 11:50:26.281457: step: 180/464, loss: 0.0790601447224617 2023-01-22 11:50:26.973756: step: 182/464, loss: 0.15493811666965485 2023-01-22 11:50:27.651390: step: 184/464, loss: 0.12189780175685883 2023-01-22 11:50:28.410621: step: 186/464, loss: 0.0675785094499588 2023-01-22 11:50:29.028808: step: 188/464, loss: 0.09874018281698227 2023-01-22 11:50:29.582521: step: 190/464, loss: 0.04013747721910477 2023-01-22 11:50:30.148913: step: 192/464, loss: 0.123075470328331 2023-01-22 11:50:30.873094: step: 194/464, loss: 0.021077025681734085 2023-01-22 11:50:31.483928: step: 196/464, loss: 0.16640305519104004 2023-01-22 11:50:32.118526: step: 198/464, loss: 0.087140291929245 2023-01-22 11:50:32.771678: step: 200/464, loss: 0.059325482696294785 2023-01-22 11:50:33.395207: step: 202/464, loss: 0.06079040840268135 2023-01-22 11:50:33.992547: step: 204/464, loss: 0.04386744648218155 2023-01-22 11:50:34.639033: step: 206/464, loss: 0.008767152205109596 2023-01-22 11:50:35.321867: step: 208/464, loss: 0.02713126689195633 2023-01-22 11:50:35.970983: step: 210/464, loss: 0.0859706699848175 2023-01-22 11:50:36.536157: step: 212/464, loss: 0.10441812127828598 2023-01-22 11:50:37.160788: step: 214/464, loss: 0.02353387139737606 2023-01-22 11:50:37.757131: step: 216/464, loss: 0.13387531042099 2023-01-22 11:50:38.400602: step: 218/464, loss: 0.02812942862510681 2023-01-22 11:50:39.119919: step: 220/464, loss: 0.10433623194694519 2023-01-22 11:50:39.739435: step: 222/464, loss: 0.055663034319877625 2023-01-22 11:50:40.380706: step: 224/464, loss: 0.2841798961162567 2023-01-22 11:50:40.962305: step: 226/464, loss: 0.06304500997066498 2023-01-22 11:50:41.571922: step: 228/464, loss: 0.007425735704600811 2023-01-22 11:50:42.247793: step: 230/464, loss: 0.07880717515945435 2023-01-22 11:50:42.830503: step: 232/464, loss: 0.3579563498497009 2023-01-22 11:50:43.454594: step: 234/464, loss: 0.10063056647777557 2023-01-22 11:50:44.031991: step: 236/464, loss: 0.14094175398349762 2023-01-22 11:50:44.654560: step: 238/464, loss: 0.1755877137184143 2023-01-22 11:50:45.168691: step: 240/464, loss: 0.041254084557294846 2023-01-22 11:50:45.806593: step: 242/464, loss: 0.05494088679552078 2023-01-22 11:50:46.393907: step: 244/464, loss: 0.35076582431793213 2023-01-22 11:50:47.019734: step: 246/464, loss: 0.46399596333503723 2023-01-22 11:50:47.670879: step: 248/464, loss: 0.11473860591650009 2023-01-22 11:50:48.268262: step: 250/464, loss: 0.0690879076719284 2023-01-22 11:50:48.856601: step: 252/464, loss: 0.030523525550961494 2023-01-22 11:50:49.489015: step: 254/464, loss: 0.031805213540792465 2023-01-22 11:50:50.161994: step: 256/464, loss: 0.413985013961792 2023-01-22 11:50:50.832518: step: 258/464, loss: 0.35511845350265503 2023-01-22 11:50:51.508981: step: 260/464, loss: 0.11695799231529236 2023-01-22 11:50:52.116286: step: 262/464, loss: 0.11076057702302933 2023-01-22 11:50:52.676699: step: 264/464, loss: 0.1434173882007599 2023-01-22 11:50:53.272597: step: 266/464, loss: 0.08204736560583115 2023-01-22 11:50:53.899052: step: 268/464, loss: 0.06001695618033409 2023-01-22 11:50:54.585929: step: 270/464, loss: 0.0372479073703289 2023-01-22 11:50:55.136503: step: 272/464, loss: 0.03706498444080353 2023-01-22 11:50:55.775761: step: 274/464, loss: 0.07792051136493683 2023-01-22 11:50:56.343704: step: 276/464, loss: 0.15670345723628998 2023-01-22 11:50:56.977815: step: 278/464, loss: 0.057117052376270294 2023-01-22 11:50:57.676636: step: 280/464, loss: 0.044370219111442566 2023-01-22 11:50:58.320419: step: 282/464, loss: 0.04691505432128906 2023-01-22 11:50:58.968152: step: 284/464, loss: 0.12970347702503204 2023-01-22 11:50:59.626293: step: 286/464, loss: 0.07708616554737091 2023-01-22 11:51:00.288549: step: 288/464, loss: 0.07818364351987839 2023-01-22 11:51:00.925275: step: 290/464, loss: 0.10086287558078766 2023-01-22 11:51:01.521745: step: 292/464, loss: 0.012405943125486374 2023-01-22 11:51:02.148116: step: 294/464, loss: 0.12408587336540222 2023-01-22 11:51:02.752382: step: 296/464, loss: 0.0847407802939415 2023-01-22 11:51:03.357363: step: 298/464, loss: 0.2100462168455124 2023-01-22 11:51:03.996161: step: 300/464, loss: 0.01914643868803978 2023-01-22 11:51:04.664066: step: 302/464, loss: 0.16246408224105835 2023-01-22 11:51:05.272263: step: 304/464, loss: 0.0580965057015419 2023-01-22 11:51:05.878752: step: 306/464, loss: 0.022193651646375656 2023-01-22 11:51:06.478073: step: 308/464, loss: 0.6016255021095276 2023-01-22 11:51:07.065002: step: 310/464, loss: 0.05895552039146423 2023-01-22 11:51:07.696115: step: 312/464, loss: 0.0020470358431339264 2023-01-22 11:51:08.316511: step: 314/464, loss: 0.09098812937736511 2023-01-22 11:51:08.909121: step: 316/464, loss: 0.02651539258658886 2023-01-22 11:51:09.545674: step: 318/464, loss: 0.057089027017354965 2023-01-22 11:51:10.215592: step: 320/464, loss: 0.05555043742060661 2023-01-22 11:51:10.844755: step: 322/464, loss: 0.1641826331615448 2023-01-22 11:51:11.494543: step: 324/464, loss: 0.035017579793930054 2023-01-22 11:51:12.106384: step: 326/464, loss: 0.09218455851078033 2023-01-22 11:51:12.771231: step: 328/464, loss: 0.015327589586377144 2023-01-22 11:51:13.350855: step: 330/464, loss: 0.0740237608551979 2023-01-22 11:51:13.937903: step: 332/464, loss: 0.060544099658727646 2023-01-22 11:51:14.632010: step: 334/464, loss: 0.12118522822856903 2023-01-22 11:51:15.361786: step: 336/464, loss: 0.02989678643643856 2023-01-22 11:51:16.007329: step: 338/464, loss: 0.08799106627702713 2023-01-22 11:51:16.604130: step: 340/464, loss: 0.024672655388712883 2023-01-22 11:51:17.149665: step: 342/464, loss: 0.018849296495318413 2023-01-22 11:51:17.716876: step: 344/464, loss: 0.03175678104162216 2023-01-22 11:51:18.386507: step: 346/464, loss: 0.07565797120332718 2023-01-22 11:51:19.063761: step: 348/464, loss: 0.16002212464809418 2023-01-22 11:51:19.669153: step: 350/464, loss: 0.0876501128077507 2023-01-22 11:51:20.346662: step: 352/464, loss: 0.673304557800293 2023-01-22 11:51:20.979369: step: 354/464, loss: 0.046740952879190445 2023-01-22 11:51:21.612201: step: 356/464, loss: 0.16490834951400757 2023-01-22 11:51:22.185090: step: 358/464, loss: 0.13249750435352325 2023-01-22 11:51:22.875401: step: 360/464, loss: 0.14411969482898712 2023-01-22 11:51:23.474735: step: 362/464, loss: 0.050405144691467285 2023-01-22 11:51:24.053351: step: 364/464, loss: 2.248347520828247 2023-01-22 11:51:24.659634: step: 366/464, loss: 0.0665668323636055 2023-01-22 11:51:25.307667: step: 368/464, loss: 0.0881873369216919 2023-01-22 11:51:25.912607: step: 370/464, loss: 0.15915344655513763 2023-01-22 11:51:26.492409: step: 372/464, loss: 0.18714694678783417 2023-01-22 11:51:27.109180: step: 374/464, loss: 0.6852954626083374 2023-01-22 11:51:27.674019: step: 376/464, loss: 0.057403597980737686 2023-01-22 11:51:28.254776: step: 378/464, loss: 0.03670104593038559 2023-01-22 11:51:28.850448: step: 380/464, loss: 0.01751803793013096 2023-01-22 11:51:29.617913: step: 382/464, loss: 0.2213580459356308 2023-01-22 11:51:30.298779: step: 384/464, loss: 0.0936044305562973 2023-01-22 11:51:30.924937: step: 386/464, loss: 0.04487286135554314 2023-01-22 11:51:31.537230: step: 388/464, loss: 1.0210002660751343 2023-01-22 11:51:32.199714: step: 390/464, loss: 0.1955382376909256 2023-01-22 11:51:32.805484: step: 392/464, loss: 0.10517556965351105 2023-01-22 11:51:33.375968: step: 394/464, loss: 0.04596574604511261 2023-01-22 11:51:34.008503: step: 396/464, loss: 0.06611286848783493 2023-01-22 11:51:34.638967: step: 398/464, loss: 0.20487700402736664 2023-01-22 11:51:35.324333: step: 400/464, loss: 0.1009177640080452 2023-01-22 11:51:35.988662: step: 402/464, loss: 0.02000158652663231 2023-01-22 11:51:36.606163: step: 404/464, loss: 0.022291820496320724 2023-01-22 11:51:37.211582: step: 406/464, loss: 0.07473233342170715 2023-01-22 11:51:37.878978: step: 408/464, loss: 0.04916410893201828 2023-01-22 11:51:38.490716: step: 410/464, loss: 0.07321424782276154 2023-01-22 11:51:39.180365: step: 412/464, loss: 0.07492359727621078 2023-01-22 11:51:39.791435: step: 414/464, loss: 0.47756317257881165 2023-01-22 11:51:40.449819: step: 416/464, loss: 0.23084905743598938 2023-01-22 11:51:41.112767: step: 418/464, loss: 0.07805997133255005 2023-01-22 11:51:41.733651: step: 420/464, loss: 0.07706529647111893 2023-01-22 11:51:42.394523: step: 422/464, loss: 0.08947402983903885 2023-01-22 11:51:43.016543: step: 424/464, loss: 0.039802052080631256 2023-01-22 11:51:43.630095: step: 426/464, loss: 2.4658396244049072 2023-01-22 11:51:44.222722: step: 428/464, loss: 0.12952269613742828 2023-01-22 11:51:44.779543: step: 430/464, loss: 0.10222648084163666 2023-01-22 11:51:45.457003: step: 432/464, loss: 0.09944965690374374 2023-01-22 11:51:46.080187: step: 434/464, loss: 0.06927595287561417 2023-01-22 11:51:46.693859: step: 436/464, loss: 0.18523666262626648 2023-01-22 11:51:47.321635: step: 438/464, loss: 0.07039118558168411 2023-01-22 11:51:47.902228: step: 440/464, loss: 0.011012760922312737 2023-01-22 11:51:48.550470: step: 442/464, loss: 0.04270339012145996 2023-01-22 11:51:49.174815: step: 444/464, loss: 0.03231300413608551 2023-01-22 11:51:49.824767: step: 446/464, loss: 0.0860917940735817 2023-01-22 11:51:50.441296: step: 448/464, loss: 0.182876318693161 2023-01-22 11:51:51.070050: step: 450/464, loss: 0.05450039729475975 2023-01-22 11:51:51.672678: step: 452/464, loss: 0.0930841788649559 2023-01-22 11:51:52.327213: step: 454/464, loss: 0.18066838383674622 2023-01-22 11:51:52.977092: step: 456/464, loss: 0.09857988357543945 2023-01-22 11:51:53.601449: step: 458/464, loss: 0.05851111561059952 2023-01-22 11:51:54.260778: step: 460/464, loss: 0.02112884260714054 2023-01-22 11:51:54.859191: step: 462/464, loss: 0.3320540487766266 2023-01-22 11:51:55.423012: step: 464/464, loss: 0.05477989837527275 2023-01-22 11:51:55.988738: step: 466/464, loss: 0.04254557937383652 2023-01-22 11:51:56.602565: step: 468/464, loss: 0.08997486531734467 2023-01-22 11:51:57.244521: step: 470/464, loss: 0.08402448147535324 2023-01-22 11:51:57.849628: step: 472/464, loss: 0.1302717924118042 2023-01-22 11:51:58.416109: step: 474/464, loss: 0.05059501528739929 2023-01-22 11:51:59.076111: step: 476/464, loss: 0.0985957607626915 2023-01-22 11:51:59.699697: step: 478/464, loss: 0.051531895995140076 2023-01-22 11:52:00.358567: step: 480/464, loss: 0.229657843708992 2023-01-22 11:52:01.010460: step: 482/464, loss: 0.09313531965017319 2023-01-22 11:52:01.630843: step: 484/464, loss: 0.02037380449473858 2023-01-22 11:52:02.280111: step: 486/464, loss: 0.4801367223262787 2023-01-22 11:52:02.880639: step: 488/464, loss: 0.191447913646698 2023-01-22 11:52:03.514034: step: 490/464, loss: 0.10895294696092606 2023-01-22 11:52:04.104116: step: 492/464, loss: 0.192514106631279 2023-01-22 11:52:04.724135: step: 494/464, loss: 0.36968791484832764 2023-01-22 11:52:05.347492: step: 496/464, loss: 0.0772470086812973 2023-01-22 11:52:05.993502: step: 498/464, loss: 0.14960245788097382 2023-01-22 11:52:06.642237: step: 500/464, loss: 0.12601497769355774 2023-01-22 11:52:07.227963: step: 502/464, loss: 0.3592793643474579 2023-01-22 11:52:07.868907: step: 504/464, loss: 0.05251913145184517 2023-01-22 11:52:08.479361: step: 506/464, loss: 0.10560287535190582 2023-01-22 11:52:09.105725: step: 508/464, loss: 0.0798950046300888 2023-01-22 11:52:09.721574: step: 510/464, loss: 2.3549673557281494 2023-01-22 11:52:10.307036: step: 512/464, loss: 0.1024058386683464 2023-01-22 11:52:10.915902: step: 514/464, loss: 0.1027681976556778 2023-01-22 11:52:11.568234: step: 516/464, loss: 0.03300444036722183 2023-01-22 11:52:12.221389: step: 518/464, loss: 0.08592050522565842 2023-01-22 11:52:12.873464: step: 520/464, loss: 0.0952911302447319 2023-01-22 11:52:13.488423: step: 522/464, loss: 0.0671185627579689 2023-01-22 11:52:14.086180: step: 524/464, loss: 0.34994953870773315 2023-01-22 11:52:14.719606: step: 526/464, loss: 0.07091367244720459 2023-01-22 11:52:15.268118: step: 528/464, loss: 0.1497611254453659 2023-01-22 11:52:15.862165: step: 530/464, loss: 0.8266758322715759 2023-01-22 11:52:16.534590: step: 532/464, loss: 0.14483654499053955 2023-01-22 11:52:17.173470: step: 534/464, loss: 0.032947149127721786 2023-01-22 11:52:17.779122: step: 536/464, loss: 0.06762748211622238 2023-01-22 11:52:18.437630: step: 538/464, loss: 0.21056100726127625 2023-01-22 11:52:19.061685: step: 540/464, loss: 0.005598200485110283 2023-01-22 11:52:19.688924: step: 542/464, loss: 0.09686747938394547 2023-01-22 11:52:20.292955: step: 544/464, loss: 0.14103296399116516 2023-01-22 11:52:20.916668: step: 546/464, loss: 0.10201167315244675 2023-01-22 11:52:21.560137: step: 548/464, loss: 0.05190252885222435 2023-01-22 11:52:22.242039: step: 550/464, loss: 0.16771650314331055 2023-01-22 11:52:22.950332: step: 552/464, loss: 0.06907981634140015 2023-01-22 11:52:23.559819: step: 554/464, loss: 0.0784645602107048 2023-01-22 11:52:24.183769: step: 556/464, loss: 0.06753809750080109 2023-01-22 11:52:24.810950: step: 558/464, loss: 0.05707962065935135 2023-01-22 11:52:25.395709: step: 560/464, loss: 0.00938950851559639 2023-01-22 11:52:26.048297: step: 562/464, loss: 0.2545424699783325 2023-01-22 11:52:26.708644: step: 564/464, loss: 0.18138128519058228 2023-01-22 11:52:27.440417: step: 566/464, loss: 0.1237185001373291 2023-01-22 11:52:28.119227: step: 568/464, loss: 0.058364395052194595 2023-01-22 11:52:28.765787: step: 570/464, loss: 0.027004418894648552 2023-01-22 11:52:29.389401: step: 572/464, loss: 0.07947518676519394 2023-01-22 11:52:30.064833: step: 574/464, loss: 0.05337420478463173 2023-01-22 11:52:30.687781: step: 576/464, loss: 0.11597231030464172 2023-01-22 11:52:31.425713: step: 578/464, loss: 0.22074364125728607 2023-01-22 11:52:32.081431: step: 580/464, loss: 0.06235186755657196 2023-01-22 11:52:32.721762: step: 582/464, loss: 0.3610675036907196 2023-01-22 11:52:33.279716: step: 584/464, loss: 0.04820900782942772 2023-01-22 11:52:33.931676: step: 586/464, loss: 0.1334627866744995 2023-01-22 11:52:34.561474: step: 588/464, loss: 0.1078186184167862 2023-01-22 11:52:35.233426: step: 590/464, loss: 0.05610848218202591 2023-01-22 11:52:35.870236: step: 592/464, loss: 0.025929966941475868 2023-01-22 11:52:36.498919: step: 594/464, loss: 0.025815889239311218 2023-01-22 11:52:37.091093: step: 596/464, loss: 0.2066698968410492 2023-01-22 11:52:37.699095: step: 598/464, loss: 0.026584582403302193 2023-01-22 11:52:38.280658: step: 600/464, loss: 0.08874484896659851 2023-01-22 11:52:38.939940: step: 602/464, loss: 0.1297636330127716 2023-01-22 11:52:39.528045: step: 604/464, loss: 0.034968677908182144 2023-01-22 11:52:40.154317: step: 606/464, loss: 0.057455047965049744 2023-01-22 11:52:40.807248: step: 608/464, loss: 0.6402359008789062 2023-01-22 11:52:41.382246: step: 610/464, loss: 0.27028095722198486 2023-01-22 11:52:41.967301: step: 612/464, loss: 0.05439862608909607 2023-01-22 11:52:42.757748: step: 614/464, loss: 0.0543132908642292 2023-01-22 11:52:43.424455: step: 616/464, loss: 0.11633554846048355 2023-01-22 11:52:44.103013: step: 618/464, loss: 0.153121218085289 2023-01-22 11:52:44.780280: step: 620/464, loss: 0.03367575258016586 2023-01-22 11:52:45.445985: step: 622/464, loss: 0.024352246895432472 2023-01-22 11:52:46.082840: step: 624/464, loss: 0.0796428844332695 2023-01-22 11:52:46.691682: step: 626/464, loss: 0.2559564709663391 2023-01-22 11:52:47.281674: step: 628/464, loss: 0.08324375003576279 2023-01-22 11:52:47.866123: step: 630/464, loss: 0.027401749044656754 2023-01-22 11:52:48.503362: step: 632/464, loss: 0.09654907882213593 2023-01-22 11:52:49.079689: step: 634/464, loss: 0.48647624254226685 2023-01-22 11:52:49.732931: step: 636/464, loss: 0.2040783166885376 2023-01-22 11:52:50.390058: step: 638/464, loss: 0.1974896490573883 2023-01-22 11:52:50.966567: step: 640/464, loss: 0.31124648451805115 2023-01-22 11:52:51.620667: step: 642/464, loss: 0.1740168184041977 2023-01-22 11:52:52.310847: step: 644/464, loss: 0.11383002996444702 2023-01-22 11:52:53.008565: step: 646/464, loss: 0.08932628482580185 2023-01-22 11:52:53.704381: step: 648/464, loss: 0.15132799744606018 2023-01-22 11:52:54.302044: step: 650/464, loss: 0.060079481452703476 2023-01-22 11:52:54.972761: step: 652/464, loss: 0.1314394623041153 2023-01-22 11:52:55.581028: step: 654/464, loss: 0.09137765318155289 2023-01-22 11:52:56.215829: step: 656/464, loss: 0.45513778924942017 2023-01-22 11:52:56.827015: step: 658/464, loss: 0.09241456538438797 2023-01-22 11:52:57.384938: step: 660/464, loss: 0.020481666550040245 2023-01-22 11:52:57.983560: step: 662/464, loss: 0.129355788230896 2023-01-22 11:52:58.614192: step: 664/464, loss: 0.13476087152957916 2023-01-22 11:52:59.228794: step: 666/464, loss: 0.14710845053195953 2023-01-22 11:52:59.888427: step: 668/464, loss: 0.24165816605091095 2023-01-22 11:53:00.489414: step: 670/464, loss: 0.07523495703935623 2023-01-22 11:53:01.137702: step: 672/464, loss: 0.1060524731874466 2023-01-22 11:53:01.764217: step: 674/464, loss: 0.02545899897813797 2023-01-22 11:53:02.465969: step: 676/464, loss: 0.10189370065927505 2023-01-22 11:53:03.092148: step: 678/464, loss: 0.10449320822954178 2023-01-22 11:53:03.780168: step: 680/464, loss: 0.09408724308013916 2023-01-22 11:53:04.348226: step: 682/464, loss: 0.1366397887468338 2023-01-22 11:53:04.991697: step: 684/464, loss: 0.4168032109737396 2023-01-22 11:53:05.668430: step: 686/464, loss: 0.3207146227359772 2023-01-22 11:53:06.296470: step: 688/464, loss: 0.08765596151351929 2023-01-22 11:53:06.933833: step: 690/464, loss: 0.05220439285039902 2023-01-22 11:53:07.611986: step: 692/464, loss: 0.14175178110599518 2023-01-22 11:53:08.244243: step: 694/464, loss: 0.0828574076294899 2023-01-22 11:53:08.877576: step: 696/464, loss: 0.17829617857933044 2023-01-22 11:53:09.511318: step: 698/464, loss: 0.07231821864843369 2023-01-22 11:53:10.083925: step: 700/464, loss: 0.07823842018842697 2023-01-22 11:53:10.714721: step: 702/464, loss: 0.011867905966937542 2023-01-22 11:53:11.332087: step: 704/464, loss: 0.07791872322559357 2023-01-22 11:53:11.988900: step: 706/464, loss: 0.01650223508477211 2023-01-22 11:53:12.607834: step: 708/464, loss: 0.07700739800930023 2023-01-22 11:53:13.204165: step: 710/464, loss: 0.05118641257286072 2023-01-22 11:53:13.821067: step: 712/464, loss: 0.06447432190179825 2023-01-22 11:53:14.526851: step: 714/464, loss: 0.2940301299095154 2023-01-22 11:53:15.183961: step: 716/464, loss: 0.10145014524459839 2023-01-22 11:53:15.808263: step: 718/464, loss: 0.0858948826789856 2023-01-22 11:53:16.445823: step: 720/464, loss: 0.0860401913523674 2023-01-22 11:53:17.066230: step: 722/464, loss: 0.06797818094491959 2023-01-22 11:53:17.619980: step: 724/464, loss: 0.15240587294101715 2023-01-22 11:53:18.252042: step: 726/464, loss: 0.026602206751704216 2023-01-22 11:53:18.950789: step: 728/464, loss: 0.20981769263744354 2023-01-22 11:53:19.630919: step: 730/464, loss: 0.0704990103840828 2023-01-22 11:53:20.270628: step: 732/464, loss: 0.0580148808658123 2023-01-22 11:53:20.892282: step: 734/464, loss: 0.07398834824562073 2023-01-22 11:53:21.511512: step: 736/464, loss: 0.21365252137184143 2023-01-22 11:53:22.101404: step: 738/464, loss: 0.08065182715654373 2023-01-22 11:53:22.724919: step: 740/464, loss: 0.09811010211706161 2023-01-22 11:53:23.365133: step: 742/464, loss: 0.03974481299519539 2023-01-22 11:53:23.955403: step: 744/464, loss: 0.10361728072166443 2023-01-22 11:53:24.581977: step: 746/464, loss: 0.08695392310619354 2023-01-22 11:53:25.195811: step: 748/464, loss: 0.04826812446117401 2023-01-22 11:53:25.767871: step: 750/464, loss: 0.048566415905952454 2023-01-22 11:53:26.435987: step: 752/464, loss: 0.05902061611413956 2023-01-22 11:53:27.068608: step: 754/464, loss: 0.200590580701828 2023-01-22 11:53:27.706662: step: 756/464, loss: 0.06276501715183258 2023-01-22 11:53:28.338458: step: 758/464, loss: 0.17710302770137787 2023-01-22 11:53:29.000183: step: 760/464, loss: 0.03277694061398506 2023-01-22 11:53:29.573704: step: 762/464, loss: 0.06173248216509819 2023-01-22 11:53:30.201537: step: 764/464, loss: 0.04483325779438019 2023-01-22 11:53:30.755614: step: 766/464, loss: 0.20668122172355652 2023-01-22 11:53:31.368504: step: 768/464, loss: 0.06592900305986404 2023-01-22 11:53:32.082968: step: 770/464, loss: 0.26677677035331726 2023-01-22 11:53:32.736472: step: 772/464, loss: 0.12577171623706818 2023-01-22 11:53:33.349012: step: 774/464, loss: 0.037337757647037506 2023-01-22 11:53:33.959490: step: 776/464, loss: 0.8632736802101135 2023-01-22 11:53:34.538394: step: 778/464, loss: 0.09053826332092285 2023-01-22 11:53:35.147953: step: 780/464, loss: 0.2430754154920578 2023-01-22 11:53:35.833572: step: 782/464, loss: 0.057098012417554855 2023-01-22 11:53:36.503724: step: 784/464, loss: 0.248166024684906 2023-01-22 11:53:37.066180: step: 786/464, loss: 0.05004265159368515 2023-01-22 11:53:37.766162: step: 788/464, loss: 0.11410597711801529 2023-01-22 11:53:38.470790: step: 790/464, loss: 0.0983242616057396 2023-01-22 11:53:39.189602: step: 792/464, loss: 0.05851510167121887 2023-01-22 11:53:39.807703: step: 794/464, loss: 0.05701727420091629 2023-01-22 11:53:40.374449: step: 796/464, loss: 0.07675885409116745 2023-01-22 11:53:40.910567: step: 798/464, loss: 1.612334966659546 2023-01-22 11:53:41.518890: step: 800/464, loss: 0.12681429088115692 2023-01-22 11:53:42.201038: step: 802/464, loss: 0.7585135102272034 2023-01-22 11:53:42.841657: step: 804/464, loss: 0.03295173868536949 2023-01-22 11:53:43.466243: step: 806/464, loss: 0.0535990409553051 2023-01-22 11:53:44.116704: step: 808/464, loss: 0.0772099569439888 2023-01-22 11:53:44.761589: step: 810/464, loss: 0.12030258774757385 2023-01-22 11:53:45.352936: step: 812/464, loss: 0.14565113186836243 2023-01-22 11:53:45.949879: step: 814/464, loss: 0.04593478515744209 2023-01-22 11:53:46.552595: step: 816/464, loss: 0.04296870529651642 2023-01-22 11:53:47.255780: step: 818/464, loss: 0.07276370376348495 2023-01-22 11:53:47.897185: step: 820/464, loss: 0.07223569601774216 2023-01-22 11:53:48.565629: step: 822/464, loss: 0.07028353214263916 2023-01-22 11:53:49.208599: step: 824/464, loss: 0.09849094599485397 2023-01-22 11:53:49.868642: step: 826/464, loss: 0.10646132379770279 2023-01-22 11:53:50.465292: step: 828/464, loss: 0.06366239488124847 2023-01-22 11:53:51.098841: step: 830/464, loss: 0.054628703743219376 2023-01-22 11:53:51.728488: step: 832/464, loss: 0.08870861679315567 2023-01-22 11:53:52.306154: step: 834/464, loss: 0.09653643518686295 2023-01-22 11:53:52.889501: step: 836/464, loss: 0.13117004930973053 2023-01-22 11:53:53.497830: step: 838/464, loss: 0.08204658329486847 2023-01-22 11:53:54.158656: step: 840/464, loss: 0.11241410672664642 2023-01-22 11:53:54.806804: step: 842/464, loss: 0.0784202367067337 2023-01-22 11:53:55.431262: step: 844/464, loss: 0.10148025304079056 2023-01-22 11:53:56.117049: step: 846/464, loss: 0.23058192431926727 2023-01-22 11:53:56.693164: step: 848/464, loss: 0.009667718783020973 2023-01-22 11:53:57.294472: step: 850/464, loss: 0.06157953292131424 2023-01-22 11:53:57.859410: step: 852/464, loss: 0.06967552751302719 2023-01-22 11:53:58.503514: step: 854/464, loss: 0.17770619690418243 2023-01-22 11:53:59.161269: step: 856/464, loss: 0.2894016206264496 2023-01-22 11:53:59.804313: step: 858/464, loss: 0.09124509990215302 2023-01-22 11:54:00.519310: step: 860/464, loss: 0.0562569834291935 2023-01-22 11:54:01.250831: step: 862/464, loss: 0.12094349414110184 2023-01-22 11:54:01.832105: step: 864/464, loss: 0.18446677923202515 2023-01-22 11:54:02.462385: step: 866/464, loss: 0.21191228926181793 2023-01-22 11:54:03.117276: step: 868/464, loss: 0.1075199767947197 2023-01-22 11:54:03.777615: step: 870/464, loss: 0.08860337734222412 2023-01-22 11:54:04.511881: step: 872/464, loss: 0.06619521975517273 2023-01-22 11:54:05.178316: step: 874/464, loss: 0.17170093953609467 2023-01-22 11:54:05.794846: step: 876/464, loss: 0.1552496701478958 2023-01-22 11:54:06.471046: step: 878/464, loss: 0.02386847883462906 2023-01-22 11:54:07.123195: step: 880/464, loss: 0.1008271872997284 2023-01-22 11:54:07.724925: step: 882/464, loss: 0.016682665795087814 2023-01-22 11:54:08.294963: step: 884/464, loss: 0.058642178773880005 2023-01-22 11:54:08.923515: step: 886/464, loss: 0.07499005645513535 2023-01-22 11:54:09.537505: step: 888/464, loss: 0.23680682480335236 2023-01-22 11:54:10.131659: step: 890/464, loss: 0.13080447912216187 2023-01-22 11:54:10.764694: step: 892/464, loss: 0.03092711791396141 2023-01-22 11:54:11.435111: step: 894/464, loss: 0.13128332793712616 2023-01-22 11:54:11.997129: step: 896/464, loss: 0.06329464912414551 2023-01-22 11:54:12.631516: step: 898/464, loss: 0.050127509981393814 2023-01-22 11:54:13.232984: step: 900/464, loss: 0.06793268769979477 2023-01-22 11:54:13.842255: step: 902/464, loss: 0.21452850103378296 2023-01-22 11:54:14.507459: step: 904/464, loss: 0.1359676569700241 2023-01-22 11:54:15.123842: step: 906/464, loss: 0.02065298706293106 2023-01-22 11:54:15.769800: step: 908/464, loss: 0.10467097908258438 2023-01-22 11:54:16.427641: step: 910/464, loss: 0.031337834894657135 2023-01-22 11:54:17.067109: step: 912/464, loss: 0.03133145719766617 2023-01-22 11:54:17.691120: step: 914/464, loss: 0.066623255610466 2023-01-22 11:54:18.272951: step: 916/464, loss: 0.12824635207653046 2023-01-22 11:54:18.883525: step: 918/464, loss: 0.17738160490989685 2023-01-22 11:54:19.516593: step: 920/464, loss: 0.041672658175230026 2023-01-22 11:54:20.208167: step: 922/464, loss: 0.04812244698405266 2023-01-22 11:54:20.889676: step: 924/464, loss: 0.20663529634475708 2023-01-22 11:54:21.573444: step: 926/464, loss: 0.08598199486732483 2023-01-22 11:54:22.257740: step: 928/464, loss: 0.12492011487483978 2023-01-22 11:54:22.746198: step: 930/464, loss: 0.010233801789581776 ================================================== Loss: 0.141 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28244904761904766, 'r': 0.3349727794343544, 'f1': 0.3064768311838625}, 'combined': 0.22582503350389865, 'epoch': 17} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.28906991671027155, 'r': 0.30526207917062653, 'f1': 0.2969454258301137}, 'combined': 0.19386074432432293, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28015972235907743, 'r': 0.350332556042945, 'f1': 0.3113410742573896}, 'combined': 0.22940921261070812, 'epoch': 17} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3027574522692771, 'r': 0.3088793860467809, 'f1': 0.30578778163684944}, 'combined': 0.1996334740219846, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29375072826846893, 'r': 0.34168727975061, 'f1': 0.31591087092731834}, 'combined': 0.23277643120960298, 'epoch': 17} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.302158624246058, 'r': 0.30077002762728017, 'f1': 0.3014627269078589}, 'combined': 0.19680986316264365, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20238095238095236, 'r': 0.32380952380952377, 'f1': 0.24908424908424906}, 'combined': 0.16605616605616602, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.21808510638297873, 'r': 0.44565217391304346, 'f1': 0.29285714285714287}, 'combined': 0.14642857142857144, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36904761904761907, 'r': 0.2672413793103448, 'f1': 0.31}, 'combined': 0.20666666666666667, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3215276761799725, 'r': 0.3520331483033096, 'f1': 0.33608961803594956}, 'combined': 0.24764498171069965, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3050538704813727, 'r': 0.3061743622003126, 'f1': 0.3056130893090196}, 'combined': 0.19951942618101798, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4605263157894737, 'r': 0.3017241379310345, 'f1': 0.3645833333333333}, 'combined': 0.24305555555555552, 'epoch': 14} ****************************** Epoch: 18 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:57:02.272438: step: 2/464, loss: 0.041110098361968994 2023-01-22 11:57:02.906764: step: 4/464, loss: 0.0779784619808197 2023-01-22 11:57:03.514127: step: 6/464, loss: 0.04192517325282097 2023-01-22 11:57:04.130847: step: 8/464, loss: 0.09400144219398499 2023-01-22 11:57:04.773101: step: 10/464, loss: 0.025001734495162964 2023-01-22 11:57:05.387411: step: 12/464, loss: 0.02416273020207882 2023-01-22 11:57:06.088399: step: 14/464, loss: 0.10990064591169357 2023-01-22 11:57:06.778555: step: 16/464, loss: 0.033001650124788284 2023-01-22 11:57:07.374300: step: 18/464, loss: 0.1382390856742859 2023-01-22 11:57:07.942502: step: 20/464, loss: 0.038926269859075546 2023-01-22 11:57:08.607046: step: 22/464, loss: 0.06506065279245377 2023-01-22 11:57:09.226993: step: 24/464, loss: 0.05326371639966965 2023-01-22 11:57:09.891114: step: 26/464, loss: 0.030164137482643127 2023-01-22 11:57:10.482877: step: 28/464, loss: 0.06463479995727539 2023-01-22 11:57:11.105012: step: 30/464, loss: 0.224723681807518 2023-01-22 11:57:11.728337: step: 32/464, loss: 0.029780397191643715 2023-01-22 11:57:12.417114: step: 34/464, loss: 0.11149509251117706 2023-01-22 11:57:13.072602: step: 36/464, loss: 0.15237043797969818 2023-01-22 11:57:13.691861: step: 38/464, loss: 0.0657920241355896 2023-01-22 11:57:14.343892: step: 40/464, loss: 0.026680337265133858 2023-01-22 11:57:14.978185: step: 42/464, loss: 0.07511678338050842 2023-01-22 11:57:15.575084: step: 44/464, loss: 0.027576958760619164 2023-01-22 11:57:16.202273: step: 46/464, loss: 0.1626603901386261 2023-01-22 11:57:16.826368: step: 48/464, loss: 0.06560619175434113 2023-01-22 11:57:17.324228: step: 50/464, loss: 0.06561832875013351 2023-01-22 11:57:18.055584: step: 52/464, loss: 0.09738533198833466 2023-01-22 11:57:18.660494: step: 54/464, loss: 0.017096685245633125 2023-01-22 11:57:19.337307: step: 56/464, loss: 0.024371450766921043 2023-01-22 11:57:19.944896: step: 58/464, loss: 0.2863137722015381 2023-01-22 11:57:20.556830: step: 60/464, loss: 0.04328886792063713 2023-01-22 11:57:21.160701: step: 62/464, loss: 0.15461523830890656 2023-01-22 11:57:21.778435: step: 64/464, loss: 0.0762556940317154 2023-01-22 11:57:22.380535: step: 66/464, loss: 0.008332204073667526 2023-01-22 11:57:22.950794: step: 68/464, loss: 0.1283010095357895 2023-01-22 11:57:23.651480: step: 70/464, loss: 0.8763940334320068 2023-01-22 11:57:24.252303: step: 72/464, loss: 0.03446583077311516 2023-01-22 11:57:24.830688: step: 74/464, loss: 0.09413344413042068 2023-01-22 11:57:25.420927: step: 76/464, loss: 0.025667276233434677 2023-01-22 11:57:26.018295: step: 78/464, loss: 0.006051539443433285 2023-01-22 11:57:26.652835: step: 80/464, loss: 0.10737523436546326 2023-01-22 11:57:27.240228: step: 82/464, loss: 0.04778726026415825 2023-01-22 11:57:27.996112: step: 84/464, loss: 0.022513197734951973 2023-01-22 11:57:28.572061: step: 86/464, loss: 0.04210365563631058 2023-01-22 11:57:29.191951: step: 88/464, loss: 0.08981982618570328 2023-01-22 11:57:29.837816: step: 90/464, loss: 0.07886163890361786 2023-01-22 11:57:30.454816: step: 92/464, loss: 0.0617126002907753 2023-01-22 11:57:31.095908: step: 94/464, loss: 0.05298151820898056 2023-01-22 11:57:31.762882: step: 96/464, loss: 0.15078003704547882 2023-01-22 11:57:32.519998: step: 98/464, loss: 0.02470622956752777 2023-01-22 11:57:33.215088: step: 100/464, loss: 0.048409465700387955 2023-01-22 11:57:33.815383: step: 102/464, loss: 0.010371328331530094 2023-01-22 11:57:34.483820: step: 104/464, loss: 0.04539891332387924 2023-01-22 11:57:35.110397: step: 106/464, loss: 0.2269112467765808 2023-01-22 11:57:35.686334: step: 108/464, loss: 0.031764477491378784 2023-01-22 11:57:36.316079: step: 110/464, loss: 0.09004306048154831 2023-01-22 11:57:36.953149: step: 112/464, loss: 0.007801396306604147 2023-01-22 11:57:37.595838: step: 114/464, loss: 0.09724310785531998 2023-01-22 11:57:38.148187: step: 116/464, loss: 0.06961280852556229 2023-01-22 11:57:38.878725: step: 118/464, loss: 0.3340175151824951 2023-01-22 11:57:39.485651: step: 120/464, loss: 0.7357770800590515 2023-01-22 11:57:40.042862: step: 122/464, loss: 0.01734624058008194 2023-01-22 11:57:40.660800: step: 124/464, loss: 0.037545252591371536 2023-01-22 11:57:41.279236: step: 126/464, loss: 0.029403043910861015 2023-01-22 11:57:41.932081: step: 128/464, loss: 0.02145428955554962 2023-01-22 11:57:42.555615: step: 130/464, loss: 0.12339643388986588 2023-01-22 11:57:43.160712: step: 132/464, loss: 0.10763213783502579 2023-01-22 11:57:43.763178: step: 134/464, loss: 0.09028073400259018 2023-01-22 11:57:44.378803: step: 136/464, loss: 0.11030615121126175 2023-01-22 11:57:44.994966: step: 138/464, loss: 0.06513524800539017 2023-01-22 11:57:45.636891: step: 140/464, loss: 0.020626483485102654 2023-01-22 11:57:46.295346: step: 142/464, loss: 0.038871075958013535 2023-01-22 11:57:46.942368: step: 144/464, loss: 0.1584267020225525 2023-01-22 11:57:47.512246: step: 146/464, loss: 0.43131452798843384 2023-01-22 11:57:48.096138: step: 148/464, loss: 0.041273582726716995 2023-01-22 11:57:48.718791: step: 150/464, loss: 0.09523283690214157 2023-01-22 11:57:49.306188: step: 152/464, loss: 0.029719259589910507 2023-01-22 11:57:49.978355: step: 154/464, loss: 0.06470183283090591 2023-01-22 11:57:50.576893: step: 156/464, loss: 0.028908902779221535 2023-01-22 11:57:51.257889: step: 158/464, loss: 0.08944498002529144 2023-01-22 11:57:51.865643: step: 160/464, loss: 0.03279788792133331 2023-01-22 11:57:52.463685: step: 162/464, loss: 0.03688148781657219 2023-01-22 11:57:53.095538: step: 164/464, loss: 0.009075549431145191 2023-01-22 11:57:53.727198: step: 166/464, loss: 0.060015976428985596 2023-01-22 11:57:54.368820: step: 168/464, loss: 0.596712589263916 2023-01-22 11:57:55.013373: step: 170/464, loss: 0.06025798246264458 2023-01-22 11:57:55.715522: step: 172/464, loss: 0.052680373191833496 2023-01-22 11:57:56.276142: step: 174/464, loss: 0.07611407339572906 2023-01-22 11:57:57.001217: step: 176/464, loss: 0.044987499713897705 2023-01-22 11:57:57.639627: step: 178/464, loss: 0.07838653028011322 2023-01-22 11:57:58.247720: step: 180/464, loss: 0.03733246773481369 2023-01-22 11:57:58.944132: step: 182/464, loss: 0.34007447957992554 2023-01-22 11:57:59.672087: step: 184/464, loss: 0.39250442385673523 2023-01-22 11:58:00.315997: step: 186/464, loss: 0.0765819102525711 2023-01-22 11:58:01.029136: step: 188/464, loss: 0.32012081146240234 2023-01-22 11:58:01.654672: step: 190/464, loss: 0.12850935757160187 2023-01-22 11:58:02.222147: step: 192/464, loss: 0.1261843889951706 2023-01-22 11:58:02.845859: step: 194/464, loss: 0.07346116006374359 2023-01-22 11:58:03.445190: step: 196/464, loss: 0.040497634559869766 2023-01-22 11:58:04.024312: step: 198/464, loss: 0.06537256389856339 2023-01-22 11:58:04.652201: step: 200/464, loss: 0.12721972167491913 2023-01-22 11:58:05.226871: step: 202/464, loss: 0.1444709450006485 2023-01-22 11:58:05.805647: step: 204/464, loss: 0.03681337833404541 2023-01-22 11:58:06.372534: step: 206/464, loss: 0.11811290681362152 2023-01-22 11:58:06.964520: step: 208/464, loss: 0.4837630093097687 2023-01-22 11:58:07.601282: step: 210/464, loss: 0.040075208991765976 2023-01-22 11:58:08.219732: step: 212/464, loss: 0.04343654587864876 2023-01-22 11:58:08.809174: step: 214/464, loss: 0.07648956775665283 2023-01-22 11:58:09.458508: step: 216/464, loss: 0.09798888117074966 2023-01-22 11:58:10.012438: step: 218/464, loss: 0.006863585207611322 2023-01-22 11:58:10.620347: step: 220/464, loss: 0.14883661270141602 2023-01-22 11:58:11.280055: step: 222/464, loss: 0.04018692672252655 2023-01-22 11:58:11.892312: step: 224/464, loss: 0.15864543616771698 2023-01-22 11:58:12.484869: step: 226/464, loss: 0.340562641620636 2023-01-22 11:58:13.119160: step: 228/464, loss: 4.534863471984863 2023-01-22 11:58:13.755169: step: 230/464, loss: 0.0738111138343811 2023-01-22 11:58:14.385940: step: 232/464, loss: 0.1230124831199646 2023-01-22 11:58:15.082800: step: 234/464, loss: 0.08037590235471725 2023-01-22 11:58:15.648180: step: 236/464, loss: 0.199858620762825 2023-01-22 11:58:16.278963: step: 238/464, loss: 0.6242475509643555 2023-01-22 11:58:16.859768: step: 240/464, loss: 0.044954366981983185 2023-01-22 11:58:17.467315: step: 242/464, loss: 0.08434027433395386 2023-01-22 11:58:18.074799: step: 244/464, loss: 0.05194975063204765 2023-01-22 11:58:18.693605: step: 246/464, loss: 0.07475651800632477 2023-01-22 11:58:19.262743: step: 248/464, loss: 0.053270064294338226 2023-01-22 11:58:19.882421: step: 250/464, loss: 0.011551953852176666 2023-01-22 11:58:20.501773: step: 252/464, loss: 0.007733418606221676 2023-01-22 11:58:21.159688: step: 254/464, loss: 0.15320716798305511 2023-01-22 11:58:21.798594: step: 256/464, loss: 0.09333204478025436 2023-01-22 11:58:22.417743: step: 258/464, loss: 0.055523402988910675 2023-01-22 11:58:23.083569: step: 260/464, loss: 0.10055474936962128 2023-01-22 11:58:23.691474: step: 262/464, loss: 0.02099481225013733 2023-01-22 11:58:24.323539: step: 264/464, loss: 0.12725037336349487 2023-01-22 11:58:24.969445: step: 266/464, loss: 0.07128822058439255 2023-01-22 11:58:25.576086: step: 268/464, loss: 0.07808691263198853 2023-01-22 11:58:26.216177: step: 270/464, loss: 0.04100404679775238 2023-01-22 11:58:26.851465: step: 272/464, loss: 0.034963686019182205 2023-01-22 11:58:27.443626: step: 274/464, loss: 0.05234309285879135 2023-01-22 11:58:28.117488: step: 276/464, loss: 0.12192157655954361 2023-01-22 11:58:28.770720: step: 278/464, loss: 0.029132816940546036 2023-01-22 11:58:29.374918: step: 280/464, loss: 0.03790270909667015 2023-01-22 11:58:29.962444: step: 282/464, loss: 0.03763355314731598 2023-01-22 11:58:30.572962: step: 284/464, loss: 0.04848802089691162 2023-01-22 11:58:31.155754: step: 286/464, loss: 0.04248863086104393 2023-01-22 11:58:31.758817: step: 288/464, loss: 0.06175648421049118 2023-01-22 11:58:32.423093: step: 290/464, loss: 0.19913525879383087 2023-01-22 11:58:33.074593: step: 292/464, loss: 0.07799387723207474 2023-01-22 11:58:33.716909: step: 294/464, loss: 0.05060545355081558 2023-01-22 11:58:34.357253: step: 296/464, loss: 0.09269505739212036 2023-01-22 11:58:35.022035: step: 298/464, loss: 0.08034278452396393 2023-01-22 11:58:35.657477: step: 300/464, loss: 0.13465744256973267 2023-01-22 11:58:36.228025: step: 302/464, loss: 0.04607897996902466 2023-01-22 11:58:36.948486: step: 304/464, loss: 0.04409221559762955 2023-01-22 11:58:37.561173: step: 306/464, loss: 0.09037043154239655 2023-01-22 11:58:38.237145: step: 308/464, loss: 0.0722469910979271 2023-01-22 11:58:38.855407: step: 310/464, loss: 2.683577537536621 2023-01-22 11:58:39.506063: step: 312/464, loss: 0.06599780917167664 2023-01-22 11:58:40.137305: step: 314/464, loss: 0.0621807798743248 2023-01-22 11:58:40.755191: step: 316/464, loss: 0.12119466811418533 2023-01-22 11:58:41.305740: step: 318/464, loss: 0.060603171586990356 2023-01-22 11:58:41.927449: step: 320/464, loss: 0.019483543932437897 2023-01-22 11:58:42.522553: step: 322/464, loss: 0.08985820412635803 2023-01-22 11:58:43.146049: step: 324/464, loss: 0.07970519363880157 2023-01-22 11:58:43.815679: step: 326/464, loss: 0.06589441001415253 2023-01-22 11:58:44.481505: step: 328/464, loss: 0.13149969279766083 2023-01-22 11:58:45.083632: step: 330/464, loss: 0.04497695714235306 2023-01-22 11:58:45.705507: step: 332/464, loss: 0.07083417475223541 2023-01-22 11:58:46.434787: step: 334/464, loss: 0.15319781005382538 2023-01-22 11:58:47.047446: step: 336/464, loss: 0.16858309507369995 2023-01-22 11:58:47.642521: step: 338/464, loss: 0.5628475546836853 2023-01-22 11:58:48.262561: step: 340/464, loss: 0.041089314967393875 2023-01-22 11:58:48.817178: step: 342/464, loss: 0.03335518017411232 2023-01-22 11:58:49.418723: step: 344/464, loss: 0.13614562153816223 2023-01-22 11:58:50.026424: step: 346/464, loss: 0.017061160877346992 2023-01-22 11:58:50.676095: step: 348/464, loss: 0.0899868905544281 2023-01-22 11:58:51.303195: step: 350/464, loss: 0.05003766342997551 2023-01-22 11:58:51.867737: step: 352/464, loss: 0.03434675186872482 2023-01-22 11:58:52.560102: step: 354/464, loss: 0.31193801760673523 2023-01-22 11:58:53.163599: step: 356/464, loss: 0.19960804283618927 2023-01-22 11:58:53.791279: step: 358/464, loss: 0.13193681836128235 2023-01-22 11:58:54.419626: step: 360/464, loss: 0.16517403721809387 2023-01-22 11:58:55.110574: step: 362/464, loss: 4.577445983886719 2023-01-22 11:58:55.696384: step: 364/464, loss: 0.022282257676124573 2023-01-22 11:58:56.311427: step: 366/464, loss: 0.0883570984005928 2023-01-22 11:58:56.900625: step: 368/464, loss: 0.07743432372808456 2023-01-22 11:58:57.576802: step: 370/464, loss: 0.06543838977813721 2023-01-22 11:58:58.183443: step: 372/464, loss: 0.7924804091453552 2023-01-22 11:58:58.785128: step: 374/464, loss: 0.07172054052352905 2023-01-22 11:58:59.411717: step: 376/464, loss: 0.15854498744010925 2023-01-22 11:59:00.055659: step: 378/464, loss: 0.0758543461561203 2023-01-22 11:59:00.774489: step: 380/464, loss: 0.4814733564853668 2023-01-22 11:59:01.491977: step: 382/464, loss: 0.07087898254394531 2023-01-22 11:59:02.064059: step: 384/464, loss: 0.017085997387766838 2023-01-22 11:59:02.689865: step: 386/464, loss: 0.15765145421028137 2023-01-22 11:59:03.281490: step: 388/464, loss: 0.03160521388053894 2023-01-22 11:59:03.831689: step: 390/464, loss: 0.02035319060087204 2023-01-22 11:59:04.476284: step: 392/464, loss: 0.027835458517074585 2023-01-22 11:59:05.095802: step: 394/464, loss: 0.10642287135124207 2023-01-22 11:59:05.681264: step: 396/464, loss: 0.004822934977710247 2023-01-22 11:59:06.287339: step: 398/464, loss: 0.022031346336007118 2023-01-22 11:59:06.897223: step: 400/464, loss: 0.05492803454399109 2023-01-22 11:59:07.459016: step: 402/464, loss: 0.04970407485961914 2023-01-22 11:59:08.027147: step: 404/464, loss: 0.010082113556563854 2023-01-22 11:59:08.726809: step: 406/464, loss: 0.11934235692024231 2023-01-22 11:59:09.279957: step: 408/464, loss: 0.02112478017807007 2023-01-22 11:59:09.898194: step: 410/464, loss: 0.09695271402597427 2023-01-22 11:59:10.487310: step: 412/464, loss: 0.8858206272125244 2023-01-22 11:59:11.155576: step: 414/464, loss: 0.08377958089113235 2023-01-22 11:59:11.764581: step: 416/464, loss: 0.5520583391189575 2023-01-22 11:59:12.546398: step: 418/464, loss: 0.08656132221221924 2023-01-22 11:59:13.194309: step: 420/464, loss: 0.10898464173078537 2023-01-22 11:59:13.846124: step: 422/464, loss: 0.3741585612297058 2023-01-22 11:59:14.451574: step: 424/464, loss: 0.6237977147102356 2023-01-22 11:59:15.055418: step: 426/464, loss: 0.05429627373814583 2023-01-22 11:59:15.635282: step: 428/464, loss: 0.10554182529449463 2023-01-22 11:59:16.260490: step: 430/464, loss: 0.1171455830335617 2023-01-22 11:59:16.849559: step: 432/464, loss: 0.15304937958717346 2023-01-22 11:59:17.487026: step: 434/464, loss: 0.08756015449762344 2023-01-22 11:59:18.092782: step: 436/464, loss: 0.06931018829345703 2023-01-22 11:59:18.733477: step: 438/464, loss: 0.06887312233448029 2023-01-22 11:59:19.339611: step: 440/464, loss: 0.05952431634068489 2023-01-22 11:59:19.975656: step: 442/464, loss: 0.03734879940748215 2023-01-22 11:59:20.636302: step: 444/464, loss: 0.04014189913868904 2023-01-22 11:59:21.229583: step: 446/464, loss: 0.035013578832149506 2023-01-22 11:59:21.798641: step: 448/464, loss: 0.01310723926872015 2023-01-22 11:59:22.408897: step: 450/464, loss: 0.12435585260391235 2023-01-22 11:59:23.063724: step: 452/464, loss: 0.10649074614048004 2023-01-22 11:59:23.639307: step: 454/464, loss: 0.055591508746147156 2023-01-22 11:59:24.255188: step: 456/464, loss: 0.07723914831876755 2023-01-22 11:59:24.932685: step: 458/464, loss: 0.24242645502090454 2023-01-22 11:59:25.550563: step: 460/464, loss: 0.48419389128685 2023-01-22 11:59:26.197929: step: 462/464, loss: 0.11971094459295273 2023-01-22 11:59:26.905187: step: 464/464, loss: 0.02786792442202568 2023-01-22 11:59:27.536047: step: 466/464, loss: 0.04215722158551216 2023-01-22 11:59:28.144044: step: 468/464, loss: 0.29968586564064026 2023-01-22 11:59:28.719074: step: 470/464, loss: 0.15486252307891846 2023-01-22 11:59:29.336029: step: 472/464, loss: 0.02210089936852455 2023-01-22 11:59:29.906564: step: 474/464, loss: 0.018674930557608604 2023-01-22 11:59:30.475683: step: 476/464, loss: 0.0230863057076931 2023-01-22 11:59:31.149946: step: 478/464, loss: 0.08007515966892242 2023-01-22 11:59:31.745569: step: 480/464, loss: 0.17813025414943695 2023-01-22 11:59:32.363808: step: 482/464, loss: 0.022379839792847633 2023-01-22 11:59:33.062966: step: 484/464, loss: 0.3104344308376312 2023-01-22 11:59:33.733952: step: 486/464, loss: 0.0335686057806015 2023-01-22 11:59:34.347914: step: 488/464, loss: 0.03525681793689728 2023-01-22 11:59:34.951489: step: 490/464, loss: 0.0288990531116724 2023-01-22 11:59:35.619043: step: 492/464, loss: 0.0916205570101738 2023-01-22 11:59:36.312264: step: 494/464, loss: 0.04630056768655777 2023-01-22 11:59:36.920058: step: 496/464, loss: 0.03312882408499718 2023-01-22 11:59:37.533555: step: 498/464, loss: 0.012081924825906754 2023-01-22 11:59:38.122949: step: 500/464, loss: 0.009474774822592735 2023-01-22 11:59:38.745855: step: 502/464, loss: 0.08945140242576599 2023-01-22 11:59:39.454899: step: 504/464, loss: 0.018516186624765396 2023-01-22 11:59:40.081823: step: 506/464, loss: 0.03508473560214043 2023-01-22 11:59:40.657858: step: 508/464, loss: 0.11172937601804733 2023-01-22 11:59:41.287283: step: 510/464, loss: 0.02396565116941929 2023-01-22 11:59:41.925330: step: 512/464, loss: 0.08729846775531769 2023-01-22 11:59:42.564402: step: 514/464, loss: 0.03557702153921127 2023-01-22 11:59:43.184213: step: 516/464, loss: 0.39258062839508057 2023-01-22 11:59:43.892049: step: 518/464, loss: 0.020482761785387993 2023-01-22 11:59:44.519963: step: 520/464, loss: 0.01152450405061245 2023-01-22 11:59:45.168493: step: 522/464, loss: 0.02186405472457409 2023-01-22 11:59:45.834718: step: 524/464, loss: 0.059222932904958725 2023-01-22 11:59:46.453114: step: 526/464, loss: 0.04049962759017944 2023-01-22 11:59:47.077562: step: 528/464, loss: 0.055559832602739334 2023-01-22 11:59:47.688508: step: 530/464, loss: 0.03861182928085327 2023-01-22 11:59:48.397076: step: 532/464, loss: 0.006717274431139231 2023-01-22 11:59:49.044977: step: 534/464, loss: 0.05977385863661766 2023-01-22 11:59:49.694981: step: 536/464, loss: 0.2655171751976013 2023-01-22 11:59:50.334926: step: 538/464, loss: 0.3412909209728241 2023-01-22 11:59:50.942254: step: 540/464, loss: 0.06314253807067871 2023-01-22 11:59:51.541285: step: 542/464, loss: 0.09367623925209045 2023-01-22 11:59:52.173350: step: 544/464, loss: 0.11986998468637466 2023-01-22 11:59:52.790744: step: 546/464, loss: 0.05728675052523613 2023-01-22 11:59:53.387470: step: 548/464, loss: 0.09846282005310059 2023-01-22 11:59:54.040279: step: 550/464, loss: 0.2600788176059723 2023-01-22 11:59:54.693374: step: 552/464, loss: 0.025453370064496994 2023-01-22 11:59:55.347423: step: 554/464, loss: 0.18188495934009552 2023-01-22 11:59:55.981640: step: 556/464, loss: 0.10237760096788406 2023-01-22 11:59:56.607342: step: 558/464, loss: 0.034575216472148895 2023-01-22 11:59:57.229125: step: 560/464, loss: 0.04972601681947708 2023-01-22 11:59:57.910487: step: 562/464, loss: 0.17534971237182617 2023-01-22 11:59:58.534000: step: 564/464, loss: 0.077382892370224 2023-01-22 11:59:59.151552: step: 566/464, loss: 0.11572468280792236 2023-01-22 11:59:59.777478: step: 568/464, loss: 0.1357371062040329 2023-01-22 12:00:00.468829: step: 570/464, loss: 0.07837279886007309 2023-01-22 12:00:01.111977: step: 572/464, loss: 0.45323073863983154 2023-01-22 12:00:01.720683: step: 574/464, loss: 0.052668049931526184 2023-01-22 12:00:02.373529: step: 576/464, loss: 0.0244721919298172 2023-01-22 12:00:03.015716: step: 578/464, loss: 0.13950631022453308 2023-01-22 12:00:03.565494: step: 580/464, loss: 0.19411024451255798 2023-01-22 12:00:04.208952: step: 582/464, loss: 0.7193666696548462 2023-01-22 12:00:04.848264: step: 584/464, loss: 0.07989802211523056 2023-01-22 12:00:05.447754: step: 586/464, loss: 0.09557466953992844 2023-01-22 12:00:06.055259: step: 588/464, loss: 0.011171412654221058 2023-01-22 12:00:06.747716: step: 590/464, loss: 0.06220633536577225 2023-01-22 12:00:07.392356: step: 592/464, loss: 0.04791721701622009 2023-01-22 12:00:08.055308: step: 594/464, loss: 0.04407791048288345 2023-01-22 12:00:08.657124: step: 596/464, loss: 0.053509440273046494 2023-01-22 12:00:09.399097: step: 598/464, loss: 0.07637985050678253 2023-01-22 12:00:10.156302: step: 600/464, loss: 0.01582491211593151 2023-01-22 12:00:10.786399: step: 602/464, loss: 0.016740994527935982 2023-01-22 12:00:11.403746: step: 604/464, loss: 0.02371152862906456 2023-01-22 12:00:12.005223: step: 606/464, loss: 0.09300397336483002 2023-01-22 12:00:12.620504: step: 608/464, loss: 0.030090780928730965 2023-01-22 12:00:13.295146: step: 610/464, loss: 0.05113796889781952 2023-01-22 12:00:13.872746: step: 612/464, loss: 0.024184707552194595 2023-01-22 12:00:14.484878: step: 614/464, loss: 0.026209469884634018 2023-01-22 12:00:15.140011: step: 616/464, loss: 0.0543823279440403 2023-01-22 12:00:15.740834: step: 618/464, loss: 0.06335633248090744 2023-01-22 12:00:16.327816: step: 620/464, loss: 0.10870085656642914 2023-01-22 12:00:16.958425: step: 622/464, loss: 0.03715025633573532 2023-01-22 12:00:17.463995: step: 624/464, loss: 0.03070726804435253 2023-01-22 12:00:18.055375: step: 626/464, loss: 0.042747415602207184 2023-01-22 12:00:18.740089: step: 628/464, loss: 0.227604478597641 2023-01-22 12:00:19.321808: step: 630/464, loss: 0.10268836468458176 2023-01-22 12:00:19.916304: step: 632/464, loss: 0.06253713369369507 2023-01-22 12:00:20.522764: step: 634/464, loss: 0.03581200912594795 2023-01-22 12:00:21.200839: step: 636/464, loss: 0.38426434993743896 2023-01-22 12:00:21.805611: step: 638/464, loss: 0.09403909742832184 2023-01-22 12:00:22.502135: step: 640/464, loss: 0.033941175788640976 2023-01-22 12:00:23.042424: step: 642/464, loss: 0.052015673369169235 2023-01-22 12:00:23.683600: step: 644/464, loss: 0.05198928713798523 2023-01-22 12:00:24.338515: step: 646/464, loss: 0.1153169572353363 2023-01-22 12:00:24.948453: step: 648/464, loss: 0.05121876299381256 2023-01-22 12:00:25.570679: step: 650/464, loss: 0.06071249395608902 2023-01-22 12:00:26.258356: step: 652/464, loss: 0.14332036674022675 2023-01-22 12:00:26.852454: step: 654/464, loss: 0.12269081175327301 2023-01-22 12:00:27.478620: step: 656/464, loss: 0.038532841950654984 2023-01-22 12:00:28.087327: step: 658/464, loss: 0.07894743978977203 2023-01-22 12:00:28.755513: step: 660/464, loss: 0.04618338122963905 2023-01-22 12:00:29.428041: step: 662/464, loss: 0.13720129430294037 2023-01-22 12:00:30.136223: step: 664/464, loss: 0.21078215539455414 2023-01-22 12:00:30.769144: step: 666/464, loss: 0.05811762437224388 2023-01-22 12:00:31.371114: step: 668/464, loss: 0.0910278782248497 2023-01-22 12:00:31.941018: step: 670/464, loss: 0.068028025329113 2023-01-22 12:00:32.618812: step: 672/464, loss: 0.20305296778678894 2023-01-22 12:00:33.283911: step: 674/464, loss: 0.1361401528120041 2023-01-22 12:00:33.984076: step: 676/464, loss: 0.0884474515914917 2023-01-22 12:00:34.653225: step: 678/464, loss: 0.055569324642419815 2023-01-22 12:00:35.296131: step: 680/464, loss: 0.11080397665500641 2023-01-22 12:00:35.954571: step: 682/464, loss: 0.024835048243403435 2023-01-22 12:00:36.622692: step: 684/464, loss: 0.11517369002103806 2023-01-22 12:00:37.253188: step: 686/464, loss: 0.03534563630819321 2023-01-22 12:00:37.836381: step: 688/464, loss: 0.045791640877723694 2023-01-22 12:00:38.467620: step: 690/464, loss: 0.06101490184664726 2023-01-22 12:00:39.210229: step: 692/464, loss: 0.06657504290342331 2023-01-22 12:00:39.901271: step: 694/464, loss: 0.02768394909799099 2023-01-22 12:00:40.547775: step: 696/464, loss: 0.12973853945732117 2023-01-22 12:00:41.171530: step: 698/464, loss: 0.16631053388118744 2023-01-22 12:00:41.869101: step: 700/464, loss: 0.049925461411476135 2023-01-22 12:00:42.456450: step: 702/464, loss: 0.07540343701839447 2023-01-22 12:00:42.987027: step: 704/464, loss: 0.06347178667783737 2023-01-22 12:00:43.543278: step: 706/464, loss: 0.014731958508491516 2023-01-22 12:00:44.199787: step: 708/464, loss: 0.02005917578935623 2023-01-22 12:00:44.825148: step: 710/464, loss: 0.04048619046807289 2023-01-22 12:00:45.432360: step: 712/464, loss: 0.00946044921875 2023-01-22 12:00:46.031265: step: 714/464, loss: 0.02131369151175022 2023-01-22 12:00:46.566941: step: 716/464, loss: 0.12847092747688293 2023-01-22 12:00:47.229618: step: 718/464, loss: 0.024426549673080444 2023-01-22 12:00:47.900219: step: 720/464, loss: 0.12111278623342514 2023-01-22 12:00:48.500440: step: 722/464, loss: 0.03929593041539192 2023-01-22 12:00:49.135386: step: 724/464, loss: 0.0471901036798954 2023-01-22 12:00:49.805813: step: 726/464, loss: 0.06377588212490082 2023-01-22 12:00:50.461229: step: 728/464, loss: 0.600334644317627 2023-01-22 12:00:51.079745: step: 730/464, loss: 0.01961999200284481 2023-01-22 12:00:51.619538: step: 732/464, loss: 0.06293871998786926 2023-01-22 12:00:52.237062: step: 734/464, loss: 0.008754570037126541 2023-01-22 12:00:52.821127: step: 736/464, loss: 0.0206870436668396 2023-01-22 12:00:53.453970: step: 738/464, loss: 0.04069453477859497 2023-01-22 12:00:54.023106: step: 740/464, loss: 0.014518450945615768 2023-01-22 12:00:54.652658: step: 742/464, loss: 0.05866739898920059 2023-01-22 12:00:55.280551: step: 744/464, loss: 0.09685290604829788 2023-01-22 12:00:55.926072: step: 746/464, loss: 0.0906219631433487 2023-01-22 12:00:56.587669: step: 748/464, loss: 0.013325815089046955 2023-01-22 12:00:57.109025: step: 750/464, loss: 0.03870666027069092 2023-01-22 12:00:57.738510: step: 752/464, loss: 0.14578811824321747 2023-01-22 12:00:58.365930: step: 754/464, loss: 0.1603614091873169 2023-01-22 12:00:59.083318: step: 756/464, loss: 0.07946944236755371 2023-01-22 12:00:59.716457: step: 758/464, loss: 0.09339815378189087 2023-01-22 12:01:00.352119: step: 760/464, loss: 0.10323522984981537 2023-01-22 12:01:01.015326: step: 762/464, loss: 0.057176802307367325 2023-01-22 12:01:01.694923: step: 764/464, loss: 0.38984283804893494 2023-01-22 12:01:02.324155: step: 766/464, loss: 0.02038021758198738 2023-01-22 12:01:03.047919: step: 768/464, loss: 0.15187768638134003 2023-01-22 12:01:03.691888: step: 770/464, loss: 0.11661543697118759 2023-01-22 12:01:04.308508: step: 772/464, loss: 0.11710263788700104 2023-01-22 12:01:04.968791: step: 774/464, loss: 0.12077804654836655 2023-01-22 12:01:05.682747: step: 776/464, loss: 0.09523236751556396 2023-01-22 12:01:06.369558: step: 778/464, loss: 0.12539014220237732 2023-01-22 12:01:07.016467: step: 780/464, loss: 0.07297204434871674 2023-01-22 12:01:07.713536: step: 782/464, loss: 0.22416375577449799 2023-01-22 12:01:08.315572: step: 784/464, loss: 0.09013670682907104 2023-01-22 12:01:08.914031: step: 786/464, loss: 0.1530447006225586 2023-01-22 12:01:09.530240: step: 788/464, loss: 0.056844066828489304 2023-01-22 12:01:10.130275: step: 790/464, loss: 0.024438276886940002 2023-01-22 12:01:10.751799: step: 792/464, loss: 0.10997240245342255 2023-01-22 12:01:11.327177: step: 794/464, loss: 0.0637752115726471 2023-01-22 12:01:11.970976: step: 796/464, loss: 0.0915752574801445 2023-01-22 12:01:12.549664: step: 798/464, loss: 0.030072197318077087 2023-01-22 12:01:13.130102: step: 800/464, loss: 0.06692662090063095 2023-01-22 12:01:13.757312: step: 802/464, loss: 0.06783221662044525 2023-01-22 12:01:14.412054: step: 804/464, loss: 0.216878280043602 2023-01-22 12:01:15.005315: step: 806/464, loss: 0.11674143373966217 2023-01-22 12:01:15.651209: step: 808/464, loss: 0.09718753397464752 2023-01-22 12:01:16.323904: step: 810/464, loss: 0.1712542623281479 2023-01-22 12:01:16.981569: step: 812/464, loss: 0.3075892925262451 2023-01-22 12:01:17.623092: step: 814/464, loss: 0.24416068196296692 2023-01-22 12:01:18.241056: step: 816/464, loss: 0.046887144446372986 2023-01-22 12:01:18.867042: step: 818/464, loss: 0.20484276115894318 2023-01-22 12:01:19.486564: step: 820/464, loss: 0.08469177037477493 2023-01-22 12:01:20.084695: step: 822/464, loss: 0.05184699594974518 2023-01-22 12:01:20.718270: step: 824/464, loss: 0.02204933390021324 2023-01-22 12:01:21.317583: step: 826/464, loss: 0.04278869926929474 2023-01-22 12:01:21.960279: step: 828/464, loss: 0.09152209758758545 2023-01-22 12:01:22.557505: step: 830/464, loss: 0.8785819411277771 2023-01-22 12:01:23.202885: step: 832/464, loss: 0.46504613757133484 2023-01-22 12:01:23.818814: step: 834/464, loss: 0.16306844353675842 2023-01-22 12:01:24.465619: step: 836/464, loss: 0.027663005515933037 2023-01-22 12:01:25.124675: step: 838/464, loss: 0.11722411960363388 2023-01-22 12:01:25.773944: step: 840/464, loss: 0.11919905245304108 2023-01-22 12:01:26.418069: step: 842/464, loss: 0.05290542542934418 2023-01-22 12:01:27.009074: step: 844/464, loss: 0.034706782549619675 2023-01-22 12:01:27.596651: step: 846/464, loss: 0.06160600855946541 2023-01-22 12:01:28.233938: step: 848/464, loss: 0.029707245528697968 2023-01-22 12:01:28.870743: step: 850/464, loss: 0.3252856135368347 2023-01-22 12:01:29.488768: step: 852/464, loss: 0.006985540967434645 2023-01-22 12:01:30.109431: step: 854/464, loss: 0.07038073986768723 2023-01-22 12:01:30.720439: step: 856/464, loss: 0.14189094305038452 2023-01-22 12:01:31.450610: step: 858/464, loss: 0.21152852475643158 2023-01-22 12:01:32.093697: step: 860/464, loss: 0.03939679637551308 2023-01-22 12:01:32.692353: step: 862/464, loss: 0.16607047617435455 2023-01-22 12:01:33.276742: step: 864/464, loss: 0.05294930934906006 2023-01-22 12:01:33.882717: step: 866/464, loss: 0.04461954906582832 2023-01-22 12:01:34.522240: step: 868/464, loss: 0.024309026077389717 2023-01-22 12:01:35.082532: step: 870/464, loss: 0.04591096192598343 2023-01-22 12:01:35.738654: step: 872/464, loss: 0.07206946611404419 2023-01-22 12:01:36.430577: step: 874/464, loss: 0.045928601175546646 2023-01-22 12:01:37.093997: step: 876/464, loss: 0.05889524519443512 2023-01-22 12:01:37.678042: step: 878/464, loss: 0.11609186232089996 2023-01-22 12:01:38.349449: step: 880/464, loss: 0.13958750665187836 2023-01-22 12:01:39.006225: step: 882/464, loss: 0.0900338888168335 2023-01-22 12:01:39.602717: step: 884/464, loss: 0.058699481189250946 2023-01-22 12:01:40.262561: step: 886/464, loss: 0.07599426060914993 2023-01-22 12:01:40.887443: step: 888/464, loss: 0.12085162103176117 2023-01-22 12:01:41.577334: step: 890/464, loss: 0.5733956098556519 2023-01-22 12:01:42.222520: step: 892/464, loss: 0.03937987610697746 2023-01-22 12:01:42.834098: step: 894/464, loss: 0.15277822315692902 2023-01-22 12:01:43.423451: step: 896/464, loss: 0.10777375102043152 2023-01-22 12:01:44.070976: step: 898/464, loss: 0.35059159994125366 2023-01-22 12:01:44.733999: step: 900/464, loss: 0.07969984412193298 2023-01-22 12:01:45.399537: step: 902/464, loss: 0.06196504086256027 2023-01-22 12:01:46.009138: step: 904/464, loss: 0.11778075248003006 2023-01-22 12:01:46.626659: step: 906/464, loss: 0.08533184230327606 2023-01-22 12:01:47.317956: step: 908/464, loss: 0.08933708816766739 2023-01-22 12:01:47.912847: step: 910/464, loss: 0.04876921698451042 2023-01-22 12:01:48.515052: step: 912/464, loss: 0.01687920093536377 2023-01-22 12:01:49.173374: step: 914/464, loss: 0.1410721093416214 2023-01-22 12:01:49.771740: step: 916/464, loss: 0.10500001907348633 2023-01-22 12:01:50.460174: step: 918/464, loss: 0.863258421421051 2023-01-22 12:01:51.156916: step: 920/464, loss: 0.03630939871072769 2023-01-22 12:01:51.781995: step: 922/464, loss: 0.07011484354734421 2023-01-22 12:01:52.414823: step: 924/464, loss: 0.10621833056211472 2023-01-22 12:01:53.045278: step: 926/464, loss: 0.0867888554930687 2023-01-22 12:01:53.652810: step: 928/464, loss: 0.12446019053459167 2023-01-22 12:01:54.127688: step: 930/464, loss: 0.00032717103022150695 ================================================== Loss: 0.134 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2876029059949339, 'r': 0.3525454976712093, 'f1': 0.316780012400217}, 'combined': 0.23341685124226516, 'epoch': 18} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29547728076309493, 'r': 0.31770584683885067, 'f1': 0.3061886587571594}, 'combined': 0.1998951865461248, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2867288256448413, 'r': 0.3656200585072739, 'f1': 0.3214041214901307}, 'combined': 0.23682408951904366, 'epoch': 18} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3074939624168777, 'r': 0.3153928898918067, 'f1': 0.31139334237506283}, 'combined': 0.2032930628977094, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29732859052711996, 'r': 0.35487605966140123, 'f1': 0.32356346616186576}, 'combined': 0.23841518559295372, 'epoch': 18} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3092526456600215, 'r': 0.3052805933304432, 'f1': 0.30725378276101856}, 'combined': 0.20059055247610538, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2251243781094527, 'r': 0.4309523809523809, 'f1': 0.29575163398692805}, 'combined': 0.19716775599128536, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2391304347826087, 'r': 0.4782608695652174, 'f1': 0.3188405797101449}, 'combined': 0.15942028985507245, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2980769230769231, 'r': 0.2672413793103448, 'f1': 0.2818181818181818}, 'combined': 0.18787878787878787, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3215276761799725, 'r': 0.3520331483033096, 'f1': 0.33608961803594956}, 'combined': 0.24764498171069965, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3050538704813727, 'r': 0.3061743622003126, 'f1': 0.3056130893090196}, 'combined': 0.19951942618101798, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4605263157894737, 'r': 0.3017241379310345, 'f1': 0.3645833333333333}, 'combined': 0.24305555555555552, 'epoch': 14} ****************************** Epoch: 19 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:04:34.414779: step: 2/464, loss: 0.043649137020111084 2023-01-22 12:04:35.060659: step: 4/464, loss: 0.0485350638628006 2023-01-22 12:04:35.668648: step: 6/464, loss: 0.06481219083070755 2023-01-22 12:04:36.316614: step: 8/464, loss: 0.25353118777275085 2023-01-22 12:04:36.897419: step: 10/464, loss: 0.04911917448043823 2023-01-22 12:04:37.554688: step: 12/464, loss: 0.08624802529811859 2023-01-22 12:04:38.190835: step: 14/464, loss: 0.08918741345405579 2023-01-22 12:04:38.775422: step: 16/464, loss: 0.07811764627695084 2023-01-22 12:04:39.381323: step: 18/464, loss: 0.037579625844955444 2023-01-22 12:04:40.000738: step: 20/464, loss: 0.05042491853237152 2023-01-22 12:04:40.601546: step: 22/464, loss: 0.03239549696445465 2023-01-22 12:04:41.169892: step: 24/464, loss: 0.0234699584543705 2023-01-22 12:04:41.753127: step: 26/464, loss: 0.11668428778648376 2023-01-22 12:04:42.381112: step: 28/464, loss: 0.015777695924043655 2023-01-22 12:04:42.998257: step: 30/464, loss: 0.05128711462020874 2023-01-22 12:04:43.603159: step: 32/464, loss: 0.08597104996442795 2023-01-22 12:04:44.237143: step: 34/464, loss: 0.06288472563028336 2023-01-22 12:04:44.836599: step: 36/464, loss: 0.08793296664953232 2023-01-22 12:04:45.452716: step: 38/464, loss: 0.011416365392506123 2023-01-22 12:04:46.067120: step: 40/464, loss: 0.09050152450799942 2023-01-22 12:04:46.759477: step: 42/464, loss: 0.46950775384902954 2023-01-22 12:04:47.324894: step: 44/464, loss: 0.23902305960655212 2023-01-22 12:04:47.952015: step: 46/464, loss: 0.04410150274634361 2023-01-22 12:04:48.558123: step: 48/464, loss: 0.019048461690545082 2023-01-22 12:04:49.264247: step: 50/464, loss: 0.34819623827934265 2023-01-22 12:04:49.916181: step: 52/464, loss: 0.05642472952604294 2023-01-22 12:04:50.504641: step: 54/464, loss: 0.030337944626808167 2023-01-22 12:04:51.148805: step: 56/464, loss: 0.08515579253435135 2023-01-22 12:04:51.761090: step: 58/464, loss: 0.013418847694993019 2023-01-22 12:04:52.343324: step: 60/464, loss: 0.021707965061068535 2023-01-22 12:04:52.978968: step: 62/464, loss: 0.009400303475558758 2023-01-22 12:04:53.585604: step: 64/464, loss: 0.20763559639453888 2023-01-22 12:04:54.164456: step: 66/464, loss: 0.054090019315481186 2023-01-22 12:04:54.852209: step: 68/464, loss: 0.12675267457962036 2023-01-22 12:04:55.428222: step: 70/464, loss: 0.01828434132039547 2023-01-22 12:04:56.087307: step: 72/464, loss: 0.03414449095726013 2023-01-22 12:04:56.629937: step: 74/464, loss: 0.005128489341586828 2023-01-22 12:04:57.205898: step: 76/464, loss: 0.017243361100554466 2023-01-22 12:04:57.910148: step: 78/464, loss: 0.01814139261841774 2023-01-22 12:04:58.539627: step: 80/464, loss: 0.10465224832296371 2023-01-22 12:04:59.179231: step: 82/464, loss: 0.1301683783531189 2023-01-22 12:04:59.834082: step: 84/464, loss: 0.030013512820005417 2023-01-22 12:05:00.558151: step: 86/464, loss: 0.105363629758358 2023-01-22 12:05:01.155929: step: 88/464, loss: 0.045319244265556335 2023-01-22 12:05:01.779347: step: 90/464, loss: 0.015606243163347244 2023-01-22 12:05:02.391624: step: 92/464, loss: 0.001734345336444676 2023-01-22 12:05:03.011127: step: 94/464, loss: 0.018572824075818062 2023-01-22 12:05:03.635386: step: 96/464, loss: 0.044814515858888626 2023-01-22 12:05:04.179154: step: 98/464, loss: 0.04040629044175148 2023-01-22 12:05:04.858045: step: 100/464, loss: 0.14970074594020844 2023-01-22 12:05:05.472304: step: 102/464, loss: 1.2993954420089722 2023-01-22 12:05:06.106816: step: 104/464, loss: 0.05898214876651764 2023-01-22 12:05:06.726102: step: 106/464, loss: 0.6732302904129028 2023-01-22 12:05:07.382751: step: 108/464, loss: 0.10700202733278275 2023-01-22 12:05:07.988274: step: 110/464, loss: 0.06434134393930435 2023-01-22 12:05:08.641906: step: 112/464, loss: 0.06865505129098892 2023-01-22 12:05:09.254010: step: 114/464, loss: 0.028769873082637787 2023-01-22 12:05:09.862670: step: 116/464, loss: 0.04342222586274147 2023-01-22 12:05:10.489864: step: 118/464, loss: 0.22497865557670593 2023-01-22 12:05:11.092716: step: 120/464, loss: 0.19769158959388733 2023-01-22 12:05:11.698976: step: 122/464, loss: 0.007902226410806179 2023-01-22 12:05:12.352793: step: 124/464, loss: 0.05975205451250076 2023-01-22 12:05:12.999679: step: 126/464, loss: 0.11951949447393417 2023-01-22 12:05:13.583551: step: 128/464, loss: 0.19905780255794525 2023-01-22 12:05:14.190362: step: 130/464, loss: 0.08745703846216202 2023-01-22 12:05:14.812427: step: 132/464, loss: 0.07580374926328659 2023-01-22 12:05:15.420983: step: 134/464, loss: 0.05008558928966522 2023-01-22 12:05:16.046254: step: 136/464, loss: 0.09049684554338455 2023-01-22 12:05:16.743677: step: 138/464, loss: 0.08347878605127335 2023-01-22 12:05:17.375795: step: 140/464, loss: 0.06866847723722458 2023-01-22 12:05:18.031638: step: 142/464, loss: 0.12992535531520844 2023-01-22 12:05:18.641217: step: 144/464, loss: 0.07780981063842773 2023-01-22 12:05:19.299020: step: 146/464, loss: 0.17778530716896057 2023-01-22 12:05:19.909126: step: 148/464, loss: 0.030234916135668755 2023-01-22 12:05:20.549556: step: 150/464, loss: 0.040719643235206604 2023-01-22 12:05:21.126255: step: 152/464, loss: 0.09193213284015656 2023-01-22 12:05:21.672405: step: 154/464, loss: 0.003865104168653488 2023-01-22 12:05:22.283491: step: 156/464, loss: 0.033786624670028687 2023-01-22 12:05:22.924072: step: 158/464, loss: 0.014323681592941284 2023-01-22 12:05:23.538937: step: 160/464, loss: 0.011782080866396427 2023-01-22 12:05:24.138276: step: 162/464, loss: 0.07542740553617477 2023-01-22 12:05:24.676362: step: 164/464, loss: 0.05516229569911957 2023-01-22 12:05:25.289483: step: 166/464, loss: 1.0248652696609497 2023-01-22 12:05:25.930915: step: 168/464, loss: 0.4814464747905731 2023-01-22 12:05:26.661690: step: 170/464, loss: 0.037918876856565475 2023-01-22 12:05:27.283063: step: 172/464, loss: 0.11610672622919083 2023-01-22 12:05:27.939847: step: 174/464, loss: 0.047969166189432144 2023-01-22 12:05:28.649943: step: 176/464, loss: 0.03514929860830307 2023-01-22 12:05:29.298764: step: 178/464, loss: 0.04943988844752312 2023-01-22 12:05:30.003408: step: 180/464, loss: 0.05877591669559479 2023-01-22 12:05:30.682745: step: 182/464, loss: 0.07240848243236542 2023-01-22 12:05:31.394097: step: 184/464, loss: 0.07649239897727966 2023-01-22 12:05:32.020548: step: 186/464, loss: 0.023809637874364853 2023-01-22 12:05:32.676404: step: 188/464, loss: 0.11529197543859482 2023-01-22 12:05:33.310348: step: 190/464, loss: 0.060493309050798416 2023-01-22 12:05:33.907482: step: 192/464, loss: 0.020831234753131866 2023-01-22 12:05:34.537613: step: 194/464, loss: 0.023075975477695465 2023-01-22 12:05:35.264282: step: 196/464, loss: 0.0883576050400734 2023-01-22 12:05:35.875837: step: 198/464, loss: 0.045864857733249664 2023-01-22 12:05:36.519929: step: 200/464, loss: 0.27580738067626953 2023-01-22 12:05:37.183586: step: 202/464, loss: 0.20697720348834991 2023-01-22 12:05:37.768707: step: 204/464, loss: 0.014832507818937302 2023-01-22 12:05:38.414168: step: 206/464, loss: 0.15060590207576752 2023-01-22 12:05:39.066676: step: 208/464, loss: 0.10265764594078064 2023-01-22 12:05:39.722923: step: 210/464, loss: 0.06849902123212814 2023-01-22 12:05:40.312618: step: 212/464, loss: 0.03741922602057457 2023-01-22 12:05:40.935025: step: 214/464, loss: 0.022133296355605125 2023-01-22 12:05:41.637726: step: 216/464, loss: 0.09701382368803024 2023-01-22 12:05:42.281076: step: 218/464, loss: 0.11456061154603958 2023-01-22 12:05:42.877324: step: 220/464, loss: 0.03466453775763512 2023-01-22 12:05:43.494357: step: 222/464, loss: 0.03732229024171829 2023-01-22 12:05:44.147129: step: 224/464, loss: 0.0477830171585083 2023-01-22 12:05:44.733477: step: 226/464, loss: 0.03878113254904747 2023-01-22 12:05:45.341370: step: 228/464, loss: 0.10058791190385818 2023-01-22 12:05:45.960935: step: 230/464, loss: 0.012252474203705788 2023-01-22 12:05:46.570122: step: 232/464, loss: 0.047803886234760284 2023-01-22 12:05:47.230014: step: 234/464, loss: 0.047188617289066315 2023-01-22 12:05:47.801557: step: 236/464, loss: 0.04074384272098541 2023-01-22 12:05:48.326460: step: 238/464, loss: 0.23794332146644592 2023-01-22 12:05:48.986010: step: 240/464, loss: 0.057008638978004456 2023-01-22 12:05:49.664326: step: 242/464, loss: 0.047704242169857025 2023-01-22 12:05:50.259880: step: 244/464, loss: 0.04410808905959129 2023-01-22 12:05:50.893500: step: 246/464, loss: 0.06980064511299133 2023-01-22 12:05:51.515252: step: 248/464, loss: 0.06814610213041306 2023-01-22 12:05:52.128230: step: 250/464, loss: 0.09101682156324387 2023-01-22 12:05:52.733930: step: 252/464, loss: 0.026719750836491585 2023-01-22 12:05:53.331517: step: 254/464, loss: 0.029958590865135193 2023-01-22 12:05:54.008541: step: 256/464, loss: 0.08468754589557648 2023-01-22 12:05:54.603364: step: 258/464, loss: 0.06755435466766357 2023-01-22 12:05:55.202293: step: 260/464, loss: 0.11044995486736298 2023-01-22 12:05:55.795421: step: 262/464, loss: 0.037777338176965714 2023-01-22 12:05:56.425357: step: 264/464, loss: 0.048214830458164215 2023-01-22 12:05:57.047896: step: 266/464, loss: 0.04914911836385727 2023-01-22 12:05:57.672700: step: 268/464, loss: 0.13612990081310272 2023-01-22 12:05:58.272114: step: 270/464, loss: 0.05357581004500389 2023-01-22 12:05:58.892816: step: 272/464, loss: 0.38628140091896057 2023-01-22 12:05:59.620343: step: 274/464, loss: 0.2231987863779068 2023-01-22 12:06:00.242065: step: 276/464, loss: 0.07788091152906418 2023-01-22 12:06:00.949764: step: 278/464, loss: 0.00692960899323225 2023-01-22 12:06:01.578543: step: 280/464, loss: 0.02506919391453266 2023-01-22 12:06:02.195304: step: 282/464, loss: 0.046163853257894516 2023-01-22 12:06:02.885221: step: 284/464, loss: 0.09043905138969421 2023-01-22 12:06:03.520732: step: 286/464, loss: 0.990919828414917 2023-01-22 12:06:04.110706: step: 288/464, loss: 0.11400745064020157 2023-01-22 12:06:04.682281: step: 290/464, loss: 0.02036544308066368 2023-01-22 12:06:05.313025: step: 292/464, loss: 0.03672667592763901 2023-01-22 12:06:05.897332: step: 294/464, loss: 0.06968734413385391 2023-01-22 12:06:06.534411: step: 296/464, loss: 0.05790454521775246 2023-01-22 12:06:07.171950: step: 298/464, loss: 0.09719129651784897 2023-01-22 12:06:07.800647: step: 300/464, loss: 0.21586154401302338 2023-01-22 12:06:08.433502: step: 302/464, loss: 0.05399652197957039 2023-01-22 12:06:09.079705: step: 304/464, loss: 0.06565868854522705 2023-01-22 12:06:09.653776: step: 306/464, loss: 0.024197373539209366 2023-01-22 12:06:10.252220: step: 308/464, loss: 0.04468630626797676 2023-01-22 12:06:10.977880: step: 310/464, loss: 0.034479573369026184 2023-01-22 12:06:11.661394: step: 312/464, loss: 0.03586513549089432 2023-01-22 12:06:12.302009: step: 314/464, loss: 0.0927836000919342 2023-01-22 12:06:12.884816: step: 316/464, loss: 0.03038622997701168 2023-01-22 12:06:13.534821: step: 318/464, loss: 0.05650036409497261 2023-01-22 12:06:14.164354: step: 320/464, loss: 0.17589622735977173 2023-01-22 12:06:14.837697: step: 322/464, loss: 0.3455429673194885 2023-01-22 12:06:15.427656: step: 324/464, loss: 0.03918929398059845 2023-01-22 12:06:16.056444: step: 326/464, loss: 0.13918551802635193 2023-01-22 12:06:16.686635: step: 328/464, loss: 0.058665983378887177 2023-01-22 12:06:17.367023: step: 330/464, loss: 0.03296054154634476 2023-01-22 12:06:17.980709: step: 332/464, loss: 0.40283647179603577 2023-01-22 12:06:18.587444: step: 334/464, loss: 0.13812364637851715 2023-01-22 12:06:19.250318: step: 336/464, loss: 0.9251861572265625 2023-01-22 12:06:19.874468: step: 338/464, loss: 0.11263386160135269 2023-01-22 12:06:20.456481: step: 340/464, loss: 0.06823277473449707 2023-01-22 12:06:21.087136: step: 342/464, loss: 0.051416944712400436 2023-01-22 12:06:21.718156: step: 344/464, loss: 0.02385799027979374 2023-01-22 12:06:22.369884: step: 346/464, loss: 0.039055630564689636 2023-01-22 12:06:22.968948: step: 348/464, loss: 0.10177624225616455 2023-01-22 12:06:23.593579: step: 350/464, loss: 0.06820239126682281 2023-01-22 12:06:24.292511: step: 352/464, loss: 0.10517510771751404 2023-01-22 12:06:24.925194: step: 354/464, loss: 0.1108148992061615 2023-01-22 12:06:25.529773: step: 356/464, loss: 0.026672907173633575 2023-01-22 12:06:26.108582: step: 358/464, loss: 0.2044680267572403 2023-01-22 12:06:26.751960: step: 360/464, loss: 0.15214747190475464 2023-01-22 12:06:27.403919: step: 362/464, loss: 0.004370806738734245 2023-01-22 12:06:28.005031: step: 364/464, loss: 0.04464809224009514 2023-01-22 12:06:28.616437: step: 366/464, loss: 0.05915183201432228 2023-01-22 12:06:29.255884: step: 368/464, loss: 0.005321057513356209 2023-01-22 12:06:29.915395: step: 370/464, loss: 0.04195632040500641 2023-01-22 12:06:30.641629: step: 372/464, loss: 0.05455410107970238 2023-01-22 12:06:31.218547: step: 374/464, loss: 0.025054074823856354 2023-01-22 12:06:31.865287: step: 376/464, loss: 0.022881191223859787 2023-01-22 12:06:32.457650: step: 378/464, loss: 0.11177432537078857 2023-01-22 12:06:33.045544: step: 380/464, loss: 0.07313412427902222 2023-01-22 12:06:33.673471: step: 382/464, loss: 0.08014141023159027 2023-01-22 12:06:34.243716: step: 384/464, loss: 0.0690186396241188 2023-01-22 12:06:34.853318: step: 386/464, loss: 0.0153644485399127 2023-01-22 12:06:35.486886: step: 388/464, loss: 0.08579351752996445 2023-01-22 12:06:36.052073: step: 390/464, loss: 0.022128120064735413 2023-01-22 12:06:36.676349: step: 392/464, loss: 0.04024612158536911 2023-01-22 12:06:37.304144: step: 394/464, loss: 0.097946397960186 2023-01-22 12:06:37.853678: step: 396/464, loss: 0.02984456904232502 2023-01-22 12:06:38.470450: step: 398/464, loss: 0.014964022673666477 2023-01-22 12:06:39.113409: step: 400/464, loss: 0.025280293077230453 2023-01-22 12:06:39.782104: step: 402/464, loss: 0.21250039339065552 2023-01-22 12:06:40.389619: step: 404/464, loss: 0.082395538687706 2023-01-22 12:06:41.024157: step: 406/464, loss: 0.08781449496746063 2023-01-22 12:06:41.657612: step: 408/464, loss: 0.04008530080318451 2023-01-22 12:06:42.290059: step: 410/464, loss: 0.08903039246797562 2023-01-22 12:06:42.908292: step: 412/464, loss: 0.027576781809329987 2023-01-22 12:06:43.514427: step: 414/464, loss: 0.21906928718090057 2023-01-22 12:06:44.095289: step: 416/464, loss: 0.46569788455963135 2023-01-22 12:06:44.774694: step: 418/464, loss: 0.22811473906040192 2023-01-22 12:06:45.332164: step: 420/464, loss: 0.0242232084274292 2023-01-22 12:06:46.009532: step: 422/464, loss: 0.023428920656442642 2023-01-22 12:06:46.615212: step: 424/464, loss: 0.04994883015751839 2023-01-22 12:06:47.305306: step: 426/464, loss: 0.06240205466747284 2023-01-22 12:06:47.984622: step: 428/464, loss: 0.08307365328073502 2023-01-22 12:06:48.601856: step: 430/464, loss: 0.005301331635564566 2023-01-22 12:06:49.246326: step: 432/464, loss: 0.09943713247776031 2023-01-22 12:06:49.891986: step: 434/464, loss: 0.4063957631587982 2023-01-22 12:06:50.532593: step: 436/464, loss: 0.012808769941329956 2023-01-22 12:06:51.180217: step: 438/464, loss: 0.03013821877539158 2023-01-22 12:06:51.815351: step: 440/464, loss: 0.06016019359230995 2023-01-22 12:06:52.454067: step: 442/464, loss: 0.01702016219496727 2023-01-22 12:06:53.089842: step: 444/464, loss: 0.17447879910469055 2023-01-22 12:06:53.758355: step: 446/464, loss: 0.04016990214586258 2023-01-22 12:06:54.397852: step: 448/464, loss: 0.039848506450653076 2023-01-22 12:06:55.001018: step: 450/464, loss: 0.12156244367361069 2023-01-22 12:06:55.643355: step: 452/464, loss: 0.4423430263996124 2023-01-22 12:06:56.253460: step: 454/464, loss: 0.3502335548400879 2023-01-22 12:06:56.911701: step: 456/464, loss: 0.07991401851177216 2023-01-22 12:06:57.504872: step: 458/464, loss: 0.028739823028445244 2023-01-22 12:06:58.189694: step: 460/464, loss: 0.03141813725233078 2023-01-22 12:06:58.890402: step: 462/464, loss: 0.015840908512473106 2023-01-22 12:06:59.511368: step: 464/464, loss: 0.07145281136035919 2023-01-22 12:07:00.095485: step: 466/464, loss: 0.22814258933067322 2023-01-22 12:07:00.660985: step: 468/464, loss: 0.11121980100870132 2023-01-22 12:07:01.286413: step: 470/464, loss: 0.08399312198162079 2023-01-22 12:07:01.976049: step: 472/464, loss: 0.046664491295814514 2023-01-22 12:07:02.602818: step: 474/464, loss: 0.10848533362150192 2023-01-22 12:07:03.189633: step: 476/464, loss: 0.023174069821834564 2023-01-22 12:07:03.765964: step: 478/464, loss: 0.07110322266817093 2023-01-22 12:07:04.324678: step: 480/464, loss: 0.2516523003578186 2023-01-22 12:07:04.944403: step: 482/464, loss: 0.032961003482341766 2023-01-22 12:07:05.566177: step: 484/464, loss: 0.12668445706367493 2023-01-22 12:07:06.199772: step: 486/464, loss: 0.09966351091861725 2023-01-22 12:07:06.753252: step: 488/464, loss: 0.023068716749548912 2023-01-22 12:07:07.342716: step: 490/464, loss: 0.033713988959789276 2023-01-22 12:07:07.977383: step: 492/464, loss: 0.020636672154068947 2023-01-22 12:07:08.721443: step: 494/464, loss: 0.10108671337366104 2023-01-22 12:07:09.309712: step: 496/464, loss: 0.28205952048301697 2023-01-22 12:07:09.974450: step: 498/464, loss: 0.023579929023981094 2023-01-22 12:07:10.600938: step: 500/464, loss: 0.03312808275222778 2023-01-22 12:07:11.243803: step: 502/464, loss: 0.049804236739873886 2023-01-22 12:07:11.865188: step: 504/464, loss: 0.08485108613967896 2023-01-22 12:07:12.484924: step: 506/464, loss: 0.10577449202537537 2023-01-22 12:07:13.105657: step: 508/464, loss: 0.04027533903717995 2023-01-22 12:07:13.708505: step: 510/464, loss: 0.06960257887840271 2023-01-22 12:07:14.379402: step: 512/464, loss: 0.02281743660569191 2023-01-22 12:07:14.985042: step: 514/464, loss: 0.03415166586637497 2023-01-22 12:07:15.657825: step: 516/464, loss: 0.1048787534236908 2023-01-22 12:07:16.263165: step: 518/464, loss: 0.030197374522686005 2023-01-22 12:07:16.922175: step: 520/464, loss: 0.22120463848114014 2023-01-22 12:07:17.515118: step: 522/464, loss: 0.15047624707221985 2023-01-22 12:07:18.181469: step: 524/464, loss: 0.08096174150705338 2023-01-22 12:07:18.767264: step: 526/464, loss: 0.05747506394982338 2023-01-22 12:07:19.392596: step: 528/464, loss: 0.0479048490524292 2023-01-22 12:07:20.070484: step: 530/464, loss: 0.06097549945116043 2023-01-22 12:07:20.745679: step: 532/464, loss: 0.040148764848709106 2023-01-22 12:07:21.360195: step: 534/464, loss: 0.18284642696380615 2023-01-22 12:07:21.932411: step: 536/464, loss: 0.015024237334728241 2023-01-22 12:07:22.653239: step: 538/464, loss: 0.019144777208566666 2023-01-22 12:07:23.309766: step: 540/464, loss: 0.0017776531167328358 2023-01-22 12:07:23.909414: step: 542/464, loss: 0.06847799569368362 2023-01-22 12:07:24.531756: step: 544/464, loss: 0.1029786616563797 2023-01-22 12:07:25.157288: step: 546/464, loss: 0.01616619899868965 2023-01-22 12:07:25.813180: step: 548/464, loss: 0.022176573053002357 2023-01-22 12:07:26.491610: step: 550/464, loss: 0.06482881307601929 2023-01-22 12:07:27.116713: step: 552/464, loss: 0.01709182932972908 2023-01-22 12:07:27.728295: step: 554/464, loss: 0.014702457003295422 2023-01-22 12:07:28.293446: step: 556/464, loss: 0.011646861210465431 2023-01-22 12:07:28.907581: step: 558/464, loss: 0.12523825466632843 2023-01-22 12:07:29.653046: step: 560/464, loss: 0.04933774843811989 2023-01-22 12:07:30.246469: step: 562/464, loss: 0.05424141138792038 2023-01-22 12:07:30.860375: step: 564/464, loss: 0.11501363664865494 2023-01-22 12:07:31.471872: step: 566/464, loss: 0.07519600540399551 2023-01-22 12:07:32.057277: step: 568/464, loss: 0.08401346951723099 2023-01-22 12:07:32.716012: step: 570/464, loss: 0.056519828736782074 2023-01-22 12:07:33.323773: step: 572/464, loss: 0.08458318561315536 2023-01-22 12:07:33.896284: step: 574/464, loss: 0.031199859455227852 2023-01-22 12:07:34.651994: step: 576/464, loss: 0.01655229926109314 2023-01-22 12:07:35.312421: step: 578/464, loss: 0.10784925520420074 2023-01-22 12:07:35.972416: step: 580/464, loss: 0.06575610488653183 2023-01-22 12:07:36.585921: step: 582/464, loss: 0.03883712738752365 2023-01-22 12:07:37.220949: step: 584/464, loss: 0.04284549504518509 2023-01-22 12:07:37.834430: step: 586/464, loss: 0.11854876577854156 2023-01-22 12:07:38.437291: step: 588/464, loss: 0.04092574864625931 2023-01-22 12:07:39.032654: step: 590/464, loss: 0.3116703927516937 2023-01-22 12:07:39.647146: step: 592/464, loss: 0.030424591153860092 2023-01-22 12:07:40.229088: step: 594/464, loss: 0.23410645127296448 2023-01-22 12:07:40.848885: step: 596/464, loss: 0.048708029091358185 2023-01-22 12:07:41.443373: step: 598/464, loss: 0.047511957585811615 2023-01-22 12:07:42.106535: step: 600/464, loss: 0.03580872714519501 2023-01-22 12:07:42.767058: step: 602/464, loss: 0.10451705753803253 2023-01-22 12:07:43.388948: step: 604/464, loss: 0.1291225254535675 2023-01-22 12:07:44.146831: step: 606/464, loss: 0.08524785935878754 2023-01-22 12:07:44.741108: step: 608/464, loss: 0.0811164602637291 2023-01-22 12:07:45.341482: step: 610/464, loss: 0.06724970042705536 2023-01-22 12:07:45.940953: step: 612/464, loss: 0.11718723177909851 2023-01-22 12:07:46.527369: step: 614/464, loss: 0.07521151006221771 2023-01-22 12:07:47.165842: step: 616/464, loss: 0.06996964663267136 2023-01-22 12:07:47.829516: step: 618/464, loss: 0.06816425919532776 2023-01-22 12:07:48.428704: step: 620/464, loss: 0.004956142511218786 2023-01-22 12:07:49.148562: step: 622/464, loss: 0.07144085317850113 2023-01-22 12:07:49.720339: step: 624/464, loss: 0.029275184497237206 2023-01-22 12:07:50.339141: step: 626/464, loss: 0.05224230885505676 2023-01-22 12:07:50.926179: step: 628/464, loss: 0.05378033220767975 2023-01-22 12:07:51.633029: step: 630/464, loss: 0.11603333055973053 2023-01-22 12:07:52.224917: step: 632/464, loss: 0.07940942049026489 2023-01-22 12:07:52.832683: step: 634/464, loss: 0.013182312250137329 2023-01-22 12:07:53.501812: step: 636/464, loss: 0.012838025577366352 2023-01-22 12:07:54.146111: step: 638/464, loss: 0.06080120429396629 2023-01-22 12:07:54.785435: step: 640/464, loss: 0.05779057368636131 2023-01-22 12:07:55.390046: step: 642/464, loss: 0.021169716492295265 2023-01-22 12:07:55.971342: step: 644/464, loss: 0.029275264590978622 2023-01-22 12:07:56.548754: step: 646/464, loss: 0.07615438848733902 2023-01-22 12:07:57.240422: step: 648/464, loss: 0.03917456418275833 2023-01-22 12:07:57.888371: step: 650/464, loss: 0.03054221160709858 2023-01-22 12:07:58.564440: step: 652/464, loss: 0.08743109554052353 2023-01-22 12:07:59.160383: step: 654/464, loss: 0.1987110674381256 2023-01-22 12:07:59.846676: step: 656/464, loss: 0.013709792867302895 2023-01-22 12:08:00.479334: step: 658/464, loss: 0.0877533107995987 2023-01-22 12:08:01.065568: step: 660/464, loss: 0.041052624583244324 2023-01-22 12:08:01.779670: step: 662/464, loss: 0.08393299579620361 2023-01-22 12:08:02.351574: step: 664/464, loss: 0.05065491423010826 2023-01-22 12:08:02.986830: step: 666/464, loss: 0.03020860254764557 2023-01-22 12:08:03.635206: step: 668/464, loss: 0.03692087158560753 2023-01-22 12:08:04.282077: step: 670/464, loss: 0.07487764954566956 2023-01-22 12:08:04.867956: step: 672/464, loss: 0.15530121326446533 2023-01-22 12:08:05.413032: step: 674/464, loss: 0.06111391261219978 2023-01-22 12:08:06.096605: step: 676/464, loss: 0.01869584433734417 2023-01-22 12:08:06.667905: step: 678/464, loss: 0.3590307831764221 2023-01-22 12:08:07.320524: step: 680/464, loss: 0.045789625495672226 2023-01-22 12:08:07.935751: step: 682/464, loss: 0.0480433851480484 2023-01-22 12:08:08.473404: step: 684/464, loss: 0.03853193297982216 2023-01-22 12:08:09.075318: step: 686/464, loss: 0.1281861513853073 2023-01-22 12:08:09.673142: step: 688/464, loss: 0.010447696782648563 2023-01-22 12:08:10.268126: step: 690/464, loss: 0.18426543474197388 2023-01-22 12:08:10.909316: step: 692/464, loss: 0.011111344210803509 2023-01-22 12:08:11.470608: step: 694/464, loss: 0.09752097725868225 2023-01-22 12:08:12.165505: step: 696/464, loss: 0.018900105729699135 2023-01-22 12:08:12.801251: step: 698/464, loss: 0.07611563056707382 2023-01-22 12:08:13.434091: step: 700/464, loss: 0.03231906145811081 2023-01-22 12:08:14.036568: step: 702/464, loss: 0.6264570355415344 2023-01-22 12:08:14.676129: step: 704/464, loss: 0.03651156276464462 2023-01-22 12:08:15.311794: step: 706/464, loss: 0.05947602912783623 2023-01-22 12:08:16.039679: step: 708/464, loss: 1.0475647449493408 2023-01-22 12:08:16.710005: step: 710/464, loss: 0.09719257056713104 2023-01-22 12:08:17.459995: step: 712/464, loss: 0.05212196335196495 2023-01-22 12:08:18.081684: step: 714/464, loss: 0.03310827910900116 2023-01-22 12:08:18.745921: step: 716/464, loss: 0.059840813279151917 2023-01-22 12:08:19.372686: step: 718/464, loss: 0.07850253582000732 2023-01-22 12:08:19.991468: step: 720/464, loss: 0.1370672732591629 2023-01-22 12:08:20.595164: step: 722/464, loss: 0.03510038182139397 2023-01-22 12:08:21.175422: step: 724/464, loss: 0.06712029129266739 2023-01-22 12:08:21.778532: step: 726/464, loss: 0.054644204676151276 2023-01-22 12:08:22.454994: step: 728/464, loss: 0.12576636672019958 2023-01-22 12:08:23.058428: step: 730/464, loss: 0.2506440281867981 2023-01-22 12:08:23.664486: step: 732/464, loss: 0.07583318650722504 2023-01-22 12:08:24.358395: step: 734/464, loss: 0.0640006735920906 2023-01-22 12:08:24.997083: step: 736/464, loss: 0.36113810539245605 2023-01-22 12:08:25.601058: step: 738/464, loss: 0.02094738371670246 2023-01-22 12:08:26.215948: step: 740/464, loss: 0.07040239125490189 2023-01-22 12:08:26.875468: step: 742/464, loss: 0.07829161733388901 2023-01-22 12:08:27.508518: step: 744/464, loss: 0.03566007688641548 2023-01-22 12:08:28.091211: step: 746/464, loss: 0.07003956288099289 2023-01-22 12:08:28.681156: step: 748/464, loss: 0.4165613055229187 2023-01-22 12:08:29.300428: step: 750/464, loss: 1.438755989074707 2023-01-22 12:08:29.925120: step: 752/464, loss: 0.07919133454561234 2023-01-22 12:08:30.610605: step: 754/464, loss: 0.04016372188925743 2023-01-22 12:08:31.150055: step: 756/464, loss: 0.03662552312016487 2023-01-22 12:08:31.772406: step: 758/464, loss: 0.011545374058187008 2023-01-22 12:08:32.548574: step: 760/464, loss: 0.08247997611761093 2023-01-22 12:08:33.183125: step: 762/464, loss: 0.062238164246082306 2023-01-22 12:08:33.756011: step: 764/464, loss: 0.06991934031248093 2023-01-22 12:08:34.339353: step: 766/464, loss: 0.7585777640342712 2023-01-22 12:08:35.028358: step: 768/464, loss: 0.6400312781333923 2023-01-22 12:08:35.611820: step: 770/464, loss: 0.040842048823833466 2023-01-22 12:08:36.260942: step: 772/464, loss: 0.06262914836406708 2023-01-22 12:08:36.900937: step: 774/464, loss: 0.057515811175107956 2023-01-22 12:08:37.463123: step: 776/464, loss: 0.05399390310049057 2023-01-22 12:08:38.068243: step: 778/464, loss: 0.1591375321149826 2023-01-22 12:08:38.681513: step: 780/464, loss: 0.6502782106399536 2023-01-22 12:08:39.235924: step: 782/464, loss: 0.0023214572574943304 2023-01-22 12:08:39.850048: step: 784/464, loss: 0.1388404220342636 2023-01-22 12:08:40.498225: step: 786/464, loss: 0.22411896288394928 2023-01-22 12:08:41.080468: step: 788/464, loss: 0.019362160935997963 2023-01-22 12:08:41.653032: step: 790/464, loss: 0.021875949576497078 2023-01-22 12:08:42.262362: step: 792/464, loss: 0.06016132980585098 2023-01-22 12:08:42.926474: step: 794/464, loss: 0.04290073364973068 2023-01-22 12:08:43.548489: step: 796/464, loss: 0.08380314707756042 2023-01-22 12:08:44.188277: step: 798/464, loss: 0.025916090235114098 2023-01-22 12:08:44.924414: step: 800/464, loss: 0.10179685056209564 2023-01-22 12:08:45.534279: step: 802/464, loss: 0.03139950707554817 2023-01-22 12:08:46.127976: step: 804/464, loss: 0.05587141215801239 2023-01-22 12:08:46.684977: step: 806/464, loss: 0.3088460862636566 2023-01-22 12:08:47.363297: step: 808/464, loss: 0.04401913657784462 2023-01-22 12:08:48.013266: step: 810/464, loss: 0.12718059122562408 2023-01-22 12:08:48.651955: step: 812/464, loss: 0.2657465636730194 2023-01-22 12:08:49.287732: step: 814/464, loss: 0.06844300776720047 2023-01-22 12:08:49.959385: step: 816/464, loss: 0.022106116637587547 2023-01-22 12:08:50.639243: step: 818/464, loss: 0.15089477598667145 2023-01-22 12:08:51.329243: step: 820/464, loss: 0.08072828501462936 2023-01-22 12:08:51.990473: step: 822/464, loss: 0.11256927251815796 2023-01-22 12:08:52.651778: step: 824/464, loss: 0.19882658123970032 2023-01-22 12:08:53.299949: step: 826/464, loss: 0.05668610706925392 2023-01-22 12:08:53.941158: step: 828/464, loss: 0.033308397978544235 2023-01-22 12:08:54.635578: step: 830/464, loss: 0.0646291971206665 2023-01-22 12:08:55.287060: step: 832/464, loss: 0.09487643837928772 2023-01-22 12:08:55.871889: step: 834/464, loss: 0.09053507447242737 2023-01-22 12:08:56.528429: step: 836/464, loss: 0.049667082726955414 2023-01-22 12:08:57.113150: step: 838/464, loss: 0.1634913980960846 2023-01-22 12:08:57.717810: step: 840/464, loss: 0.011972474865615368 2023-01-22 12:08:58.367385: step: 842/464, loss: 0.06723528355360031 2023-01-22 12:08:59.030644: step: 844/464, loss: 0.04506109282374382 2023-01-22 12:08:59.702147: step: 846/464, loss: 0.5153870582580566 2023-01-22 12:09:00.310387: step: 848/464, loss: 0.019879765808582306 2023-01-22 12:09:00.887433: step: 850/464, loss: 0.028760865330696106 2023-01-22 12:09:01.527548: step: 852/464, loss: 0.03077036142349243 2023-01-22 12:09:02.178026: step: 854/464, loss: 0.06875479966402054 2023-01-22 12:09:02.838119: step: 856/464, loss: 0.047324683517217636 2023-01-22 12:09:03.415678: step: 858/464, loss: 0.029233718290925026 2023-01-22 12:09:04.016941: step: 860/464, loss: 0.16239085793495178 2023-01-22 12:09:04.691582: step: 862/464, loss: 0.12425319850444794 2023-01-22 12:09:05.307065: step: 864/464, loss: 0.07654102146625519 2023-01-22 12:09:06.022234: step: 866/464, loss: 0.4811508059501648 2023-01-22 12:09:06.712038: step: 868/464, loss: 0.05206020548939705 2023-01-22 12:09:07.357577: step: 870/464, loss: 0.29282277822494507 2023-01-22 12:09:08.011299: step: 872/464, loss: 0.0878470316529274 2023-01-22 12:09:08.695175: step: 874/464, loss: 0.31125855445861816 2023-01-22 12:09:09.351931: step: 876/464, loss: 0.004280728287994862 2023-01-22 12:09:10.009203: step: 878/464, loss: 0.0990959033370018 2023-01-22 12:09:10.598687: step: 880/464, loss: 0.05305367335677147 2023-01-22 12:09:11.241776: step: 882/464, loss: 0.03634254261851311 2023-01-22 12:09:11.886853: step: 884/464, loss: 0.10698814690113068 2023-01-22 12:09:12.550750: step: 886/464, loss: 0.07746051251888275 2023-01-22 12:09:13.205834: step: 888/464, loss: 0.019592829048633575 2023-01-22 12:09:13.832185: step: 890/464, loss: 0.027923477813601494 2023-01-22 12:09:14.482369: step: 892/464, loss: 0.32788199186325073 2023-01-22 12:09:15.160380: step: 894/464, loss: 0.050454281270504 2023-01-22 12:09:15.854271: step: 896/464, loss: 0.051692795008420944 2023-01-22 12:09:16.440033: step: 898/464, loss: 0.31875765323638916 2023-01-22 12:09:17.122259: step: 900/464, loss: 0.09781712293624878 2023-01-22 12:09:17.708754: step: 902/464, loss: 0.3061683773994446 2023-01-22 12:09:18.271197: step: 904/464, loss: 0.010698407888412476 2023-01-22 12:09:18.967508: step: 906/464, loss: 0.02591118961572647 2023-01-22 12:09:19.641104: step: 908/464, loss: 0.12273906171321869 2023-01-22 12:09:20.264881: step: 910/464, loss: 0.07957099378108978 2023-01-22 12:09:20.879002: step: 912/464, loss: 0.1690153181552887 2023-01-22 12:09:21.501450: step: 914/464, loss: 0.03161702677607536 2023-01-22 12:09:22.141149: step: 916/464, loss: 0.041874177753925323 2023-01-22 12:09:22.756894: step: 918/464, loss: 0.037186700850725174 2023-01-22 12:09:23.402855: step: 920/464, loss: 0.16274917125701904 2023-01-22 12:09:24.015906: step: 922/464, loss: 0.06382595747709274 2023-01-22 12:09:24.666373: step: 924/464, loss: 0.18154233694076538 2023-01-22 12:09:25.323114: step: 926/464, loss: 0.018826236948370934 2023-01-22 12:09:25.950529: step: 928/464, loss: 0.010248782113194466 2023-01-22 12:09:26.435706: step: 930/464, loss: 0.010287413373589516 ================================================== Loss: 0.105 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2909069031903191, 'r': 0.3345153383934219, 'f1': 0.31119079140923805}, 'combined': 0.22929847788049118, 'epoch': 19} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.28881861108107676, 'r': 0.32833557439703676, 'f1': 0.3073119385632772}, 'combined': 0.20062851947654367, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.285703689370485, 'r': 0.3502174256799494, 'f1': 0.3146881216254617}, 'combined': 0.23187545803981388, 'epoch': 19} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30115824022673027, 'r': 0.3290893534158026, 'f1': 0.3145048757084765}, 'combined': 0.2053244266283318, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30114112227805695, 'r': 0.3411408918406072, 'f1': 0.3198954626334519}, 'combined': 0.23571244615096457, 'epoch': 19} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2974484091547685, 'r': 0.31848011485258043, 'f1': 0.3076051841015167}, 'combined': 0.200819964750213, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22395833333333331, 'r': 0.4095238095238095, 'f1': 0.2895622895622895}, 'combined': 0.19304152637485966, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2558139534883721, 'r': 0.4782608695652174, 'f1': 0.33333333333333337}, 'combined': 0.16666666666666669, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4305555555555556, 'r': 0.2672413793103448, 'f1': 0.3297872340425532}, 'combined': 0.2198581560283688, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3215276761799725, 'r': 0.3520331483033096, 'f1': 0.33608961803594956}, 'combined': 0.24764498171069965, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3050538704813727, 'r': 0.3061743622003126, 'f1': 0.3056130893090196}, 'combined': 0.19951942618101798, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4605263157894737, 'r': 0.3017241379310345, 'f1': 0.3645833333333333}, 'combined': 0.24305555555555552, 'epoch': 14} ****************************** Epoch: 20 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:12:05.834053: step: 2/464, loss: 0.11350901424884796 2023-01-22 12:12:06.505433: step: 4/464, loss: 0.018851248547434807 2023-01-22 12:12:07.132010: step: 6/464, loss: 0.09123280644416809 2023-01-22 12:12:07.743504: step: 8/464, loss: 0.9773920178413391 2023-01-22 12:12:08.374122: step: 10/464, loss: 0.09162721037864685 2023-01-22 12:12:09.173166: step: 12/464, loss: 0.09528015553951263 2023-01-22 12:12:09.826749: step: 14/464, loss: 0.06696914881467819 2023-01-22 12:12:10.405800: step: 16/464, loss: 0.14077621698379517 2023-01-22 12:12:11.065163: step: 18/464, loss: 0.11180231720209122 2023-01-22 12:12:11.634660: step: 20/464, loss: 0.03215533122420311 2023-01-22 12:12:12.294809: step: 22/464, loss: 0.0469103641808033 2023-01-22 12:12:13.019501: step: 24/464, loss: 0.03041096217930317 2023-01-22 12:12:13.718211: step: 26/464, loss: 0.050311531871557236 2023-01-22 12:12:14.423248: step: 28/464, loss: 0.03684114292263985 2023-01-22 12:12:15.022394: step: 30/464, loss: 0.11024195700883865 2023-01-22 12:12:15.686536: step: 32/464, loss: 0.042063917964696884 2023-01-22 12:12:16.369838: step: 34/464, loss: 0.11315114051103592 2023-01-22 12:12:17.042628: step: 36/464, loss: 0.21695709228515625 2023-01-22 12:12:17.669969: step: 38/464, loss: 0.062165722250938416 2023-01-22 12:12:18.324668: step: 40/464, loss: 0.01616864651441574 2023-01-22 12:12:18.955715: step: 42/464, loss: 0.025449542328715324 2023-01-22 12:12:19.545102: step: 44/464, loss: 0.008808738552033901 2023-01-22 12:12:20.167645: step: 46/464, loss: 0.035203129053115845 2023-01-22 12:12:20.747164: step: 48/464, loss: 10.598348617553711 2023-01-22 12:12:21.375344: step: 50/464, loss: 0.39950209856033325 2023-01-22 12:12:21.966342: step: 52/464, loss: 0.015635067597031593 2023-01-22 12:12:22.586179: step: 54/464, loss: 0.08349144458770752 2023-01-22 12:12:23.256676: step: 56/464, loss: 0.08045043796300888 2023-01-22 12:12:23.866422: step: 58/464, loss: 0.07628641277551651 2023-01-22 12:12:24.504518: step: 60/464, loss: 0.07498513907194138 2023-01-22 12:12:25.092053: step: 62/464, loss: 0.1008698120713234 2023-01-22 12:12:25.723361: step: 64/464, loss: 0.8809760212898254 2023-01-22 12:12:26.342101: step: 66/464, loss: 0.015431375242769718 2023-01-22 12:12:27.041573: step: 68/464, loss: 0.03336857631802559 2023-01-22 12:12:27.652799: step: 70/464, loss: 0.009299016557633877 2023-01-22 12:12:28.303563: step: 72/464, loss: 0.034576259553432465 2023-01-22 12:12:28.868868: step: 74/464, loss: 0.036344997584819794 2023-01-22 12:12:29.462556: step: 76/464, loss: 0.04858871176838875 2023-01-22 12:12:30.086586: step: 78/464, loss: 0.05398684740066528 2023-01-22 12:12:30.673998: step: 80/464, loss: 0.023192742839455605 2023-01-22 12:12:31.314935: step: 82/464, loss: 0.09319634735584259 2023-01-22 12:12:31.974199: step: 84/464, loss: 0.07040213793516159 2023-01-22 12:12:32.590860: step: 86/464, loss: 0.22368858754634857 2023-01-22 12:12:33.209843: step: 88/464, loss: 0.05666378140449524 2023-01-22 12:12:33.849807: step: 90/464, loss: 0.015081451274454594 2023-01-22 12:12:34.518290: step: 92/464, loss: 0.016412660479545593 2023-01-22 12:12:35.135050: step: 94/464, loss: 0.14707431197166443 2023-01-22 12:12:35.746475: step: 96/464, loss: 0.17913639545440674 2023-01-22 12:12:36.322355: step: 98/464, loss: 0.0637957751750946 2023-01-22 12:12:37.039267: step: 100/464, loss: 0.06730735301971436 2023-01-22 12:12:37.658667: step: 102/464, loss: 0.026260200887918472 2023-01-22 12:12:38.331116: step: 104/464, loss: 0.020162414759397507 2023-01-22 12:12:38.908148: step: 106/464, loss: 0.012230110354721546 2023-01-22 12:12:39.592596: step: 108/464, loss: 0.1642729640007019 2023-01-22 12:12:40.192190: step: 110/464, loss: 0.041497450321912766 2023-01-22 12:12:40.876055: step: 112/464, loss: 0.06851537525653839 2023-01-22 12:12:41.495743: step: 114/464, loss: 0.12393549829721451 2023-01-22 12:12:42.142944: step: 116/464, loss: 0.010810545645654202 2023-01-22 12:12:42.756456: step: 118/464, loss: 0.19591349363327026 2023-01-22 12:12:43.380299: step: 120/464, loss: 0.053106024861335754 2023-01-22 12:12:44.069095: step: 122/464, loss: 0.022138893604278564 2023-01-22 12:12:44.773581: step: 124/464, loss: 0.004090593662112951 2023-01-22 12:12:45.368077: step: 126/464, loss: 0.005117037799209356 2023-01-22 12:12:46.019111: step: 128/464, loss: 0.11544764041900635 2023-01-22 12:12:46.686023: step: 130/464, loss: 0.031940482556819916 2023-01-22 12:12:47.274174: step: 132/464, loss: 0.046688053756952286 2023-01-22 12:12:47.880110: step: 134/464, loss: 0.11258510500192642 2023-01-22 12:12:48.459968: step: 136/464, loss: 0.027288896963000298 2023-01-22 12:12:49.065265: step: 138/464, loss: 0.06842369586229324 2023-01-22 12:12:49.706015: step: 140/464, loss: 0.03138872981071472 2023-01-22 12:12:50.310746: step: 142/464, loss: 0.050660863518714905 2023-01-22 12:12:50.934543: step: 144/464, loss: 0.02836783416569233 2023-01-22 12:12:51.576085: step: 146/464, loss: 0.06767866015434265 2023-01-22 12:12:52.173970: step: 148/464, loss: 0.3799901008605957 2023-01-22 12:12:52.745254: step: 150/464, loss: 0.04470186308026314 2023-01-22 12:12:53.389937: step: 152/464, loss: 0.024545220658183098 2023-01-22 12:12:53.945593: step: 154/464, loss: 0.02867044135928154 2023-01-22 12:12:54.562181: step: 156/464, loss: 0.04654316604137421 2023-01-22 12:12:55.214948: step: 158/464, loss: 0.0218499843031168 2023-01-22 12:12:55.908178: step: 160/464, loss: 0.058538228273391724 2023-01-22 12:12:56.538808: step: 162/464, loss: 0.03959566727280617 2023-01-22 12:12:57.197879: step: 164/464, loss: 0.8627987504005432 2023-01-22 12:12:57.854919: step: 166/464, loss: 0.4328685998916626 2023-01-22 12:12:58.452206: step: 168/464, loss: 0.013413364067673683 2023-01-22 12:12:59.024054: step: 170/464, loss: 0.03929009661078453 2023-01-22 12:12:59.631036: step: 172/464, loss: 0.0497039295732975 2023-01-22 12:13:00.284430: step: 174/464, loss: 0.042862989008426666 2023-01-22 12:13:00.885800: step: 176/464, loss: 0.018839845433831215 2023-01-22 12:13:01.509055: step: 178/464, loss: 0.04663556441664696 2023-01-22 12:13:02.126247: step: 180/464, loss: 0.02066127397119999 2023-01-22 12:13:02.765971: step: 182/464, loss: 0.10703632235527039 2023-01-22 12:13:03.368500: step: 184/464, loss: 0.1171896904706955 2023-01-22 12:13:03.928473: step: 186/464, loss: 0.14891201257705688 2023-01-22 12:13:04.562776: step: 188/464, loss: 0.13603384792804718 2023-01-22 12:13:05.187316: step: 190/464, loss: 0.11203299462795258 2023-01-22 12:13:05.802461: step: 192/464, loss: 0.01096986886113882 2023-01-22 12:13:06.410164: step: 194/464, loss: 0.011121785268187523 2023-01-22 12:13:07.015317: step: 196/464, loss: 0.052049603313207626 2023-01-22 12:13:07.609971: step: 198/464, loss: 0.12041202187538147 2023-01-22 12:13:08.186248: step: 200/464, loss: 0.03418387845158577 2023-01-22 12:13:08.793317: step: 202/464, loss: 0.027816904708743095 2023-01-22 12:13:09.484423: step: 204/464, loss: 0.012873095460236073 2023-01-22 12:13:10.151710: step: 206/464, loss: 0.07279540598392487 2023-01-22 12:13:10.813241: step: 208/464, loss: 0.025248397141695023 2023-01-22 12:13:11.450053: step: 210/464, loss: 0.2562524378299713 2023-01-22 12:13:12.041801: step: 212/464, loss: 0.05504145473241806 2023-01-22 12:13:12.632327: step: 214/464, loss: 0.031437475234270096 2023-01-22 12:13:13.243994: step: 216/464, loss: 0.021800341084599495 2023-01-22 12:13:13.921639: step: 218/464, loss: 0.041105590760707855 2023-01-22 12:13:14.600163: step: 220/464, loss: 0.025019172579050064 2023-01-22 12:13:15.265709: step: 222/464, loss: 0.051166728138923645 2023-01-22 12:13:15.863070: step: 224/464, loss: 0.0837060809135437 2023-01-22 12:13:16.467572: step: 226/464, loss: 0.032398246228694916 2023-01-22 12:13:17.090430: step: 228/464, loss: 0.06584363430738449 2023-01-22 12:13:17.732458: step: 230/464, loss: 0.1002630889415741 2023-01-22 12:13:18.412311: step: 232/464, loss: 0.14658352732658386 2023-01-22 12:13:19.027004: step: 234/464, loss: 0.022812798619270325 2023-01-22 12:13:19.634665: step: 236/464, loss: 0.0083860382437706 2023-01-22 12:13:20.265006: step: 238/464, loss: 0.059458956122398376 2023-01-22 12:13:20.909235: step: 240/464, loss: 0.2803577184677124 2023-01-22 12:13:21.611982: step: 242/464, loss: 0.054438553750514984 2023-01-22 12:13:22.266373: step: 244/464, loss: 0.07082608342170715 2023-01-22 12:13:22.932778: step: 246/464, loss: 0.39088043570518494 2023-01-22 12:13:23.579269: step: 248/464, loss: 0.0758625715970993 2023-01-22 12:13:24.195210: step: 250/464, loss: 0.03512102738022804 2023-01-22 12:13:24.856497: step: 252/464, loss: 0.0235645342618227 2023-01-22 12:13:25.535541: step: 254/464, loss: 0.23970156908035278 2023-01-22 12:13:26.213239: step: 256/464, loss: 0.0481828935444355 2023-01-22 12:13:26.838040: step: 258/464, loss: 0.025949949398636818 2023-01-22 12:13:27.480634: step: 260/464, loss: 0.05415412038564682 2023-01-22 12:13:28.069833: step: 262/464, loss: 0.01755969226360321 2023-01-22 12:13:28.719032: step: 264/464, loss: 0.011041303165256977 2023-01-22 12:13:29.413463: step: 266/464, loss: 0.05071423202753067 2023-01-22 12:13:29.993374: step: 268/464, loss: 0.10481975227594376 2023-01-22 12:13:30.703048: step: 270/464, loss: 0.05235401168465614 2023-01-22 12:13:31.419087: step: 272/464, loss: 0.04715341702103615 2023-01-22 12:13:32.035830: step: 274/464, loss: 0.016234122216701508 2023-01-22 12:13:32.619120: step: 276/464, loss: 0.03238828852772713 2023-01-22 12:13:33.301950: step: 278/464, loss: 0.005319828633219004 2023-01-22 12:13:33.934612: step: 280/464, loss: 0.005704961251467466 2023-01-22 12:13:34.537289: step: 282/464, loss: 0.022617634385824203 2023-01-22 12:13:35.251798: step: 284/464, loss: 0.14760327339172363 2023-01-22 12:13:35.876212: step: 286/464, loss: 0.0545155368745327 2023-01-22 12:13:36.492669: step: 288/464, loss: 0.03985011577606201 2023-01-22 12:13:37.093537: step: 290/464, loss: 0.06032747030258179 2023-01-22 12:13:37.800761: step: 292/464, loss: 0.18031242489814758 2023-01-22 12:13:38.442429: step: 294/464, loss: 0.0449274517595768 2023-01-22 12:13:39.063681: step: 296/464, loss: 0.06015952676534653 2023-01-22 12:13:39.672795: step: 298/464, loss: 0.07454682886600494 2023-01-22 12:13:40.411094: step: 300/464, loss: 0.012659909203648567 2023-01-22 12:13:41.023685: step: 302/464, loss: 0.013314616866409779 2023-01-22 12:13:41.637555: step: 304/464, loss: 0.13898083567619324 2023-01-22 12:13:42.274286: step: 306/464, loss: 0.13636218011379242 2023-01-22 12:13:43.003160: step: 308/464, loss: 0.04414936900138855 2023-01-22 12:13:43.685581: step: 310/464, loss: 0.04620746150612831 2023-01-22 12:13:44.278464: step: 312/464, loss: 0.05739176645874977 2023-01-22 12:13:44.956108: step: 314/464, loss: 0.01256087888032198 2023-01-22 12:13:45.594767: step: 316/464, loss: 0.2560037672519684 2023-01-22 12:13:46.232535: step: 318/464, loss: 0.11795883625745773 2023-01-22 12:13:46.884384: step: 320/464, loss: 0.25905025005340576 2023-01-22 12:13:47.533845: step: 322/464, loss: 0.043898455798625946 2023-01-22 12:13:48.186249: step: 324/464, loss: 0.01372869685292244 2023-01-22 12:13:48.777797: step: 326/464, loss: 0.02278818003833294 2023-01-22 12:13:49.405798: step: 328/464, loss: 0.019648827612400055 2023-01-22 12:13:50.084191: step: 330/464, loss: 0.9100474119186401 2023-01-22 12:13:50.729214: step: 332/464, loss: 0.025974465534090996 2023-01-22 12:13:51.303488: step: 334/464, loss: 0.04663110896945 2023-01-22 12:13:51.826636: step: 336/464, loss: 0.06748991459608078 2023-01-22 12:13:52.508369: step: 338/464, loss: 0.050867773592472076 2023-01-22 12:13:53.153814: step: 340/464, loss: 3.019404411315918 2023-01-22 12:13:53.746562: step: 342/464, loss: 0.005875057075172663 2023-01-22 12:13:54.298789: step: 344/464, loss: 0.09006526321172714 2023-01-22 12:13:54.871972: step: 346/464, loss: 0.27136194705963135 2023-01-22 12:13:55.491768: step: 348/464, loss: 0.03859527409076691 2023-01-22 12:13:56.070608: step: 350/464, loss: 0.029409943148493767 2023-01-22 12:13:56.733685: step: 352/464, loss: 0.07346473634243011 2023-01-22 12:13:57.374131: step: 354/464, loss: 0.014956225641071796 2023-01-22 12:13:57.968888: step: 356/464, loss: 0.02973783016204834 2023-01-22 12:13:58.614204: step: 358/464, loss: 0.10100691020488739 2023-01-22 12:13:59.241418: step: 360/464, loss: 0.03443049639463425 2023-01-22 12:13:59.894034: step: 362/464, loss: 0.06473658233880997 2023-01-22 12:14:00.493418: step: 364/464, loss: 0.018873097375035286 2023-01-22 12:14:01.112409: step: 366/464, loss: 0.0961344912648201 2023-01-22 12:14:01.753792: step: 368/464, loss: 0.2247253954410553 2023-01-22 12:14:02.317769: step: 370/464, loss: 0.06430181860923767 2023-01-22 12:14:02.958482: step: 372/464, loss: 0.02368774078786373 2023-01-22 12:14:03.624276: step: 374/464, loss: 0.010254189372062683 2023-01-22 12:14:04.237643: step: 376/464, loss: 0.033824507147073746 2023-01-22 12:14:04.843589: step: 378/464, loss: 0.0350734181702137 2023-01-22 12:14:05.457306: step: 380/464, loss: 0.09410750865936279 2023-01-22 12:14:06.029566: step: 382/464, loss: 0.036839719861745834 2023-01-22 12:14:06.629241: step: 384/464, loss: 0.0061513688415288925 2023-01-22 12:14:07.227180: step: 386/464, loss: 0.02870376594364643 2023-01-22 12:14:07.854935: step: 388/464, loss: 0.10995577275753021 2023-01-22 12:14:08.474926: step: 390/464, loss: 0.0530233196914196 2023-01-22 12:14:09.119943: step: 392/464, loss: 0.05366725102066994 2023-01-22 12:14:09.721172: step: 394/464, loss: 0.16559071838855743 2023-01-22 12:14:10.324152: step: 396/464, loss: 0.003926682285964489 2023-01-22 12:14:10.928108: step: 398/464, loss: 0.032966699451208115 2023-01-22 12:14:11.555808: step: 400/464, loss: 0.02813999354839325 2023-01-22 12:14:12.195581: step: 402/464, loss: 0.013925126753747463 2023-01-22 12:14:12.814050: step: 404/464, loss: 0.0418202169239521 2023-01-22 12:14:13.406616: step: 406/464, loss: 0.1888405680656433 2023-01-22 12:14:14.002813: step: 408/464, loss: 0.029980331659317017 2023-01-22 12:14:14.613111: step: 410/464, loss: 0.12852568924427032 2023-01-22 12:14:15.180227: step: 412/464, loss: 0.06435182690620422 2023-01-22 12:14:15.820580: step: 414/464, loss: 0.024199888110160828 2023-01-22 12:14:16.488507: step: 416/464, loss: 0.008535795845091343 2023-01-22 12:14:17.134644: step: 418/464, loss: 0.04710658639669418 2023-01-22 12:14:17.798891: step: 420/464, loss: 0.09452218562364578 2023-01-22 12:14:18.431395: step: 422/464, loss: 0.01563073880970478 2023-01-22 12:14:19.086700: step: 424/464, loss: 0.08478792756795883 2023-01-22 12:14:19.735892: step: 426/464, loss: 0.016601169481873512 2023-01-22 12:14:20.350049: step: 428/464, loss: 0.13459746539592743 2023-01-22 12:14:21.009101: step: 430/464, loss: 0.28737953305244446 2023-01-22 12:14:21.646628: step: 432/464, loss: 0.17553038895130157 2023-01-22 12:14:22.326100: step: 434/464, loss: 0.10274675488471985 2023-01-22 12:14:22.945398: step: 436/464, loss: 0.07716778665781021 2023-01-22 12:14:23.610073: step: 438/464, loss: 0.09986363351345062 2023-01-22 12:14:24.219124: step: 440/464, loss: 1.048673391342163 2023-01-22 12:14:24.835175: step: 442/464, loss: 0.01017417199909687 2023-01-22 12:14:25.467335: step: 444/464, loss: 0.04866155609488487 2023-01-22 12:14:26.074182: step: 446/464, loss: 0.055671293288469315 2023-01-22 12:14:26.648151: step: 448/464, loss: 0.08362412452697754 2023-01-22 12:14:27.281853: step: 450/464, loss: 0.04041793569922447 2023-01-22 12:14:27.909299: step: 452/464, loss: 0.02534993179142475 2023-01-22 12:14:28.522295: step: 454/464, loss: 0.1549074947834015 2023-01-22 12:14:29.103987: step: 456/464, loss: 0.036239758133888245 2023-01-22 12:14:29.717756: step: 458/464, loss: 0.07680630683898926 2023-01-22 12:14:30.348955: step: 460/464, loss: 0.029601193964481354 2023-01-22 12:14:30.981299: step: 462/464, loss: 0.13906963169574738 2023-01-22 12:14:31.623775: step: 464/464, loss: 0.07985132932662964 2023-01-22 12:14:32.228441: step: 466/464, loss: 0.09230957180261612 2023-01-22 12:14:32.941150: step: 468/464, loss: 0.16694991290569305 2023-01-22 12:14:33.613190: step: 470/464, loss: 1.2943658828735352 2023-01-22 12:14:34.235383: step: 472/464, loss: 0.007379346992820501 2023-01-22 12:14:34.878082: step: 474/464, loss: 0.020899252966046333 2023-01-22 12:14:35.506260: step: 476/464, loss: 0.004411220550537109 2023-01-22 12:14:36.144103: step: 478/464, loss: 0.044038306921720505 2023-01-22 12:14:36.743026: step: 480/464, loss: 0.021422799676656723 2023-01-22 12:14:37.336598: step: 482/464, loss: 0.02925972454249859 2023-01-22 12:14:37.951826: step: 484/464, loss: 0.09991101175546646 2023-01-22 12:14:38.587968: step: 486/464, loss: 0.06824138760566711 2023-01-22 12:14:39.237777: step: 488/464, loss: 0.04947546496987343 2023-01-22 12:14:39.863541: step: 490/464, loss: 0.020775338634848595 2023-01-22 12:14:40.468115: step: 492/464, loss: 0.7242358922958374 2023-01-22 12:14:41.151765: step: 494/464, loss: 0.0369894877076149 2023-01-22 12:14:41.731632: step: 496/464, loss: 0.012789350003004074 2023-01-22 12:14:42.366836: step: 498/464, loss: 0.07767671346664429 2023-01-22 12:14:42.997670: step: 500/464, loss: 0.006513867061585188 2023-01-22 12:14:43.612910: step: 502/464, loss: 0.0035043805837631226 2023-01-22 12:14:44.240854: step: 504/464, loss: 0.09234146773815155 2023-01-22 12:14:44.839182: step: 506/464, loss: 0.12091848999261856 2023-01-22 12:14:45.459218: step: 508/464, loss: 0.04351840168237686 2023-01-22 12:14:46.124038: step: 510/464, loss: 0.08592573553323746 2023-01-22 12:14:46.764543: step: 512/464, loss: 0.02391248755156994 2023-01-22 12:14:47.405381: step: 514/464, loss: 0.03436162322759628 2023-01-22 12:14:48.013707: step: 516/464, loss: 0.07426901161670685 2023-01-22 12:14:48.772681: step: 518/464, loss: 0.05543963238596916 2023-01-22 12:14:49.499111: step: 520/464, loss: 0.04919443279504776 2023-01-22 12:14:50.119790: step: 522/464, loss: 0.5651112198829651 2023-01-22 12:14:50.672672: step: 524/464, loss: 0.4395751655101776 2023-01-22 12:14:51.276534: step: 526/464, loss: 0.09639833122491837 2023-01-22 12:14:51.957482: step: 528/464, loss: 0.08385375142097473 2023-01-22 12:14:52.529800: step: 530/464, loss: 0.07988838851451874 2023-01-22 12:14:53.212730: step: 532/464, loss: 0.16797766089439392 2023-01-22 12:14:53.844092: step: 534/464, loss: 0.5436436533927917 2023-01-22 12:14:54.464777: step: 536/464, loss: 0.0218205489218235 2023-01-22 12:14:55.110767: step: 538/464, loss: 0.03554167598485947 2023-01-22 12:14:55.737376: step: 540/464, loss: 0.01219553779810667 2023-01-22 12:14:56.329335: step: 542/464, loss: 0.060262829065322876 2023-01-22 12:14:57.045992: step: 544/464, loss: 0.018596313893795013 2023-01-22 12:14:57.719132: step: 546/464, loss: 1.0576659440994263 2023-01-22 12:14:58.344235: step: 548/464, loss: 0.014665772207081318 2023-01-22 12:14:58.980238: step: 550/464, loss: 0.17855624854564667 2023-01-22 12:14:59.568154: step: 552/464, loss: 0.09625093638896942 2023-01-22 12:15:00.165497: step: 554/464, loss: 0.03178940713405609 2023-01-22 12:15:00.779637: step: 556/464, loss: 0.3213636875152588 2023-01-22 12:15:01.388505: step: 558/464, loss: 0.04676403850317001 2023-01-22 12:15:02.033216: step: 560/464, loss: 0.03744920343160629 2023-01-22 12:15:02.666421: step: 562/464, loss: 0.20983579754829407 2023-01-22 12:15:03.308168: step: 564/464, loss: 0.026830270886421204 2023-01-22 12:15:04.043867: step: 566/464, loss: 0.052127353847026825 2023-01-22 12:15:04.611520: step: 568/464, loss: 0.12017125636339188 2023-01-22 12:15:05.262987: step: 570/464, loss: 0.050923123955726624 2023-01-22 12:15:05.884906: step: 572/464, loss: 0.07927900552749634 2023-01-22 12:15:06.514041: step: 574/464, loss: 0.014502828009426594 2023-01-22 12:15:07.090901: step: 576/464, loss: 0.040527619421482086 2023-01-22 12:15:07.716611: step: 578/464, loss: 0.019206058233976364 2023-01-22 12:15:08.362222: step: 580/464, loss: 0.03054519183933735 2023-01-22 12:15:08.909429: step: 582/464, loss: 0.17930087447166443 2023-01-22 12:15:09.577388: step: 584/464, loss: 0.24165508151054382 2023-01-22 12:15:10.146187: step: 586/464, loss: 0.04640674591064453 2023-01-22 12:15:10.714744: step: 588/464, loss: 0.07006865739822388 2023-01-22 12:15:11.295022: step: 590/464, loss: 0.023412950336933136 2023-01-22 12:15:11.965962: step: 592/464, loss: 1.1467721462249756 2023-01-22 12:15:12.665616: step: 594/464, loss: 0.02414711005985737 2023-01-22 12:15:13.263591: step: 596/464, loss: 0.1264013797044754 2023-01-22 12:15:13.892412: step: 598/464, loss: 7.126690864562988 2023-01-22 12:15:14.494146: step: 600/464, loss: 0.04586399346590042 2023-01-22 12:15:15.138585: step: 602/464, loss: 0.01952878199517727 2023-01-22 12:15:15.728480: step: 604/464, loss: 0.01158772874623537 2023-01-22 12:15:16.371724: step: 606/464, loss: 0.030504167079925537 2023-01-22 12:15:17.014747: step: 608/464, loss: 0.0007740100263617933 2023-01-22 12:15:17.649220: step: 610/464, loss: 0.04654207453131676 2023-01-22 12:15:18.227842: step: 612/464, loss: 0.045558538287878036 2023-01-22 12:15:18.795685: step: 614/464, loss: 0.03136839345097542 2023-01-22 12:15:19.404450: step: 616/464, loss: 0.11822475492954254 2023-01-22 12:15:19.993138: step: 618/464, loss: 0.14972276985645294 2023-01-22 12:15:20.617861: step: 620/464, loss: 0.1923346370458603 2023-01-22 12:15:21.275258: step: 622/464, loss: 0.03167210519313812 2023-01-22 12:15:22.020342: step: 624/464, loss: 0.02385994978249073 2023-01-22 12:15:22.628948: step: 626/464, loss: 0.017547806724905968 2023-01-22 12:15:23.325092: step: 628/464, loss: 0.030291052535176277 2023-01-22 12:15:23.952604: step: 630/464, loss: 0.05227775126695633 2023-01-22 12:15:24.583596: step: 632/464, loss: 0.09638462960720062 2023-01-22 12:15:25.169077: step: 634/464, loss: 0.04361134022474289 2023-01-22 12:15:25.834128: step: 636/464, loss: 0.12159812450408936 2023-01-22 12:15:26.407115: step: 638/464, loss: 0.2759445905685425 2023-01-22 12:15:26.995217: step: 640/464, loss: 0.02072410099208355 2023-01-22 12:15:27.719739: step: 642/464, loss: 0.5923712849617004 2023-01-22 12:15:28.333282: step: 644/464, loss: 0.10416293144226074 2023-01-22 12:15:28.938026: step: 646/464, loss: 0.029567325487732887 2023-01-22 12:15:29.498461: step: 648/464, loss: 0.044003792107105255 2023-01-22 12:15:30.096957: step: 650/464, loss: 0.4801039695739746 2023-01-22 12:15:30.728128: step: 652/464, loss: 0.1835222840309143 2023-01-22 12:15:31.337478: step: 654/464, loss: 0.1707906872034073 2023-01-22 12:15:31.961233: step: 656/464, loss: 0.05018517002463341 2023-01-22 12:15:32.552263: step: 658/464, loss: 0.14961649477481842 2023-01-22 12:15:33.167143: step: 660/464, loss: 0.15067699551582336 2023-01-22 12:15:33.831879: step: 662/464, loss: 5.681148052215576 2023-01-22 12:15:34.491341: step: 664/464, loss: 0.02603510022163391 2023-01-22 12:15:35.162419: step: 666/464, loss: 0.059188805520534515 2023-01-22 12:15:35.844080: step: 668/464, loss: 0.06851480901241302 2023-01-22 12:15:36.485180: step: 670/464, loss: 0.03866920247673988 2023-01-22 12:15:37.146375: step: 672/464, loss: 0.11503680050373077 2023-01-22 12:15:37.746011: step: 674/464, loss: 0.07259032875299454 2023-01-22 12:15:38.349965: step: 676/464, loss: 0.01903834007680416 2023-01-22 12:15:38.952437: step: 678/464, loss: 0.03478504717350006 2023-01-22 12:15:39.604461: step: 680/464, loss: 0.02486582100391388 2023-01-22 12:15:40.203073: step: 682/464, loss: 0.07330082356929779 2023-01-22 12:15:40.808232: step: 684/464, loss: 0.2603822946548462 2023-01-22 12:15:41.434534: step: 686/464, loss: 0.01605270616710186 2023-01-22 12:15:42.118891: step: 688/464, loss: 0.12658101320266724 2023-01-22 12:15:42.784937: step: 690/464, loss: 0.07447104901075363 2023-01-22 12:15:43.374831: step: 692/464, loss: 0.04469962790608406 2023-01-22 12:15:43.981950: step: 694/464, loss: 0.014046311378479004 2023-01-22 12:15:44.585718: step: 696/464, loss: 0.06072556599974632 2023-01-22 12:15:45.187551: step: 698/464, loss: 0.357933908700943 2023-01-22 12:15:45.829800: step: 700/464, loss: 0.4937451183795929 2023-01-22 12:15:46.476430: step: 702/464, loss: 0.04278302937746048 2023-01-22 12:15:47.113063: step: 704/464, loss: 0.0051592267118394375 2023-01-22 12:15:47.735554: step: 706/464, loss: 0.02061188779771328 2023-01-22 12:15:48.410981: step: 708/464, loss: 0.03858703374862671 2023-01-22 12:15:49.092340: step: 710/464, loss: 0.08712794631719589 2023-01-22 12:15:49.697657: step: 712/464, loss: 0.07264778763055801 2023-01-22 12:15:50.339201: step: 714/464, loss: 0.013997122645378113 2023-01-22 12:15:50.975882: step: 716/464, loss: 0.02950339764356613 2023-01-22 12:15:51.598383: step: 718/464, loss: 0.04933254048228264 2023-01-22 12:15:52.227056: step: 720/464, loss: 0.008906682021915913 2023-01-22 12:15:52.850075: step: 722/464, loss: 0.03700536489486694 2023-01-22 12:15:53.435575: step: 724/464, loss: 0.06359779834747314 2023-01-22 12:15:54.141489: step: 726/464, loss: 0.06773487478494644 2023-01-22 12:15:54.752790: step: 728/464, loss: 0.06296397745609283 2023-01-22 12:15:55.391016: step: 730/464, loss: 0.11643794178962708 2023-01-22 12:15:55.971470: step: 732/464, loss: 0.14957650005817413 2023-01-22 12:15:56.605409: step: 734/464, loss: 0.026945363730192184 2023-01-22 12:15:57.246229: step: 736/464, loss: 0.011237741447985172 2023-01-22 12:15:57.931492: step: 738/464, loss: 0.0552695207297802 2023-01-22 12:15:58.566193: step: 740/464, loss: 0.0017769387923181057 2023-01-22 12:15:59.197575: step: 742/464, loss: 0.0779266208410263 2023-01-22 12:15:59.817622: step: 744/464, loss: 0.9308744668960571 2023-01-22 12:16:00.438568: step: 746/464, loss: 0.0353737510740757 2023-01-22 12:16:01.125863: step: 748/464, loss: 0.09376434981822968 2023-01-22 12:16:01.737020: step: 750/464, loss: 0.10377569496631622 2023-01-22 12:16:02.349689: step: 752/464, loss: 0.3020147383213043 2023-01-22 12:16:03.011430: step: 754/464, loss: 0.05934539809823036 2023-01-22 12:16:03.568647: step: 756/464, loss: 0.05946807563304901 2023-01-22 12:16:04.203554: step: 758/464, loss: 0.04041888937354088 2023-01-22 12:16:04.783170: step: 760/464, loss: 0.13945092260837555 2023-01-22 12:16:05.421125: step: 762/464, loss: 0.1342869997024536 2023-01-22 12:16:06.056585: step: 764/464, loss: 0.022831939160823822 2023-01-22 12:16:06.651665: step: 766/464, loss: 0.05797601491212845 2023-01-22 12:16:07.306679: step: 768/464, loss: 0.04214341565966606 2023-01-22 12:16:07.926463: step: 770/464, loss: 0.06823202967643738 2023-01-22 12:16:08.538833: step: 772/464, loss: 0.04907310754060745 2023-01-22 12:16:09.249571: step: 774/464, loss: 0.04450121521949768 2023-01-22 12:16:09.897580: step: 776/464, loss: 0.23292294144630432 2023-01-22 12:16:10.505093: step: 778/464, loss: 0.06011654809117317 2023-01-22 12:16:11.151448: step: 780/464, loss: 0.3196452260017395 2023-01-22 12:16:11.725460: step: 782/464, loss: 0.03805053234100342 2023-01-22 12:16:12.416558: step: 784/464, loss: 0.0314154289662838 2023-01-22 12:16:13.069426: step: 786/464, loss: 0.13404737412929535 2023-01-22 12:16:13.675630: step: 788/464, loss: 0.08828222751617432 2023-01-22 12:16:14.269820: step: 790/464, loss: 0.001246851752512157 2023-01-22 12:16:14.910963: step: 792/464, loss: 0.07216297090053558 2023-01-22 12:16:15.644747: step: 794/464, loss: 0.05633767321705818 2023-01-22 12:16:16.232241: step: 796/464, loss: 0.14546740055084229 2023-01-22 12:16:16.835149: step: 798/464, loss: 0.056861281394958496 2023-01-22 12:16:17.512079: step: 800/464, loss: 0.2274370640516281 2023-01-22 12:16:18.206523: step: 802/464, loss: 0.027379153296351433 2023-01-22 12:16:18.796027: step: 804/464, loss: 0.030357034876942635 2023-01-22 12:16:19.390478: step: 806/464, loss: 0.13116410374641418 2023-01-22 12:16:19.988564: step: 808/464, loss: 0.09121730178594589 2023-01-22 12:16:20.639470: step: 810/464, loss: 0.27981850504875183 2023-01-22 12:16:21.254684: step: 812/464, loss: 0.0588790699839592 2023-01-22 12:16:21.881269: step: 814/464, loss: 0.017142634838819504 2023-01-22 12:16:22.519504: step: 816/464, loss: 0.04943757504224777 2023-01-22 12:16:23.231144: step: 818/464, loss: 0.01358798611909151 2023-01-22 12:16:23.876408: step: 820/464, loss: 0.0991472601890564 2023-01-22 12:16:24.506973: step: 822/464, loss: 0.05006462708115578 2023-01-22 12:16:25.096065: step: 824/464, loss: 0.005734651815146208 2023-01-22 12:16:25.671564: step: 826/464, loss: 0.027040397748351097 2023-01-22 12:16:26.318174: step: 828/464, loss: 0.1423128992319107 2023-01-22 12:16:26.862526: step: 830/464, loss: 0.013830373995006084 2023-01-22 12:16:27.519263: step: 832/464, loss: 0.06289157271385193 2023-01-22 12:16:28.118312: step: 834/464, loss: 0.12293533980846405 2023-01-22 12:16:28.719988: step: 836/464, loss: 0.016394013538956642 2023-01-22 12:16:29.381098: step: 838/464, loss: 0.29480549693107605 2023-01-22 12:16:29.964749: step: 840/464, loss: 0.09442153573036194 2023-01-22 12:16:30.597741: step: 842/464, loss: 0.2897118330001831 2023-01-22 12:16:31.204623: step: 844/464, loss: 0.07965207099914551 2023-01-22 12:16:31.799107: step: 846/464, loss: 0.05758389085531235 2023-01-22 12:16:32.397156: step: 848/464, loss: 0.14077042043209076 2023-01-22 12:16:32.970100: step: 850/464, loss: 0.052814725786447525 2023-01-22 12:16:33.589724: step: 852/464, loss: 0.06765639781951904 2023-01-22 12:16:34.216591: step: 854/464, loss: 0.0761323794722557 2023-01-22 12:16:34.819445: step: 856/464, loss: 0.06979372352361679 2023-01-22 12:16:35.411199: step: 858/464, loss: 0.013624468818306923 2023-01-22 12:16:36.074692: step: 860/464, loss: 0.034657832235097885 2023-01-22 12:16:36.716476: step: 862/464, loss: 0.030351106077432632 2023-01-22 12:16:37.332715: step: 864/464, loss: 0.040423404425382614 2023-01-22 12:16:37.972489: step: 866/464, loss: 0.0385703444480896 2023-01-22 12:16:38.603366: step: 868/464, loss: 0.04118971526622772 2023-01-22 12:16:39.273945: step: 870/464, loss: 0.14877402782440186 2023-01-22 12:16:39.887651: step: 872/464, loss: 0.02471744269132614 2023-01-22 12:16:40.492919: step: 874/464, loss: 0.08140549808740616 2023-01-22 12:16:41.076723: step: 876/464, loss: 0.032378822565078735 2023-01-22 12:16:41.707829: step: 878/464, loss: 0.018628351390361786 2023-01-22 12:16:42.340842: step: 880/464, loss: 1.039273738861084 2023-01-22 12:16:42.969990: step: 882/464, loss: 0.013100219890475273 2023-01-22 12:16:43.562798: step: 884/464, loss: 0.04357944428920746 2023-01-22 12:16:44.166130: step: 886/464, loss: 0.02474367432296276 2023-01-22 12:16:44.829879: step: 888/464, loss: 0.010314030572772026 2023-01-22 12:16:45.426688: step: 890/464, loss: 0.05558737367391586 2023-01-22 12:16:45.973593: step: 892/464, loss: 0.07188452780246735 2023-01-22 12:16:46.567544: step: 894/464, loss: 0.012282581068575382 2023-01-22 12:16:47.203035: step: 896/464, loss: 0.03327278420329094 2023-01-22 12:16:47.937499: step: 898/464, loss: 0.11073649674654007 2023-01-22 12:16:48.498939: step: 900/464, loss: 0.048409298062324524 2023-01-22 12:16:49.189334: step: 902/464, loss: 1.977931261062622 2023-01-22 12:16:49.869765: step: 904/464, loss: 0.010337852872908115 2023-01-22 12:16:50.491192: step: 906/464, loss: 0.0512164831161499 2023-01-22 12:16:51.151682: step: 908/464, loss: 0.051889412105083466 2023-01-22 12:16:51.856206: step: 910/464, loss: 0.04711935296654701 2023-01-22 12:16:52.496988: step: 912/464, loss: 0.023373626172542572 2023-01-22 12:16:53.184555: step: 914/464, loss: 0.0980856716632843 2023-01-22 12:16:53.822169: step: 916/464, loss: 0.07699963450431824 2023-01-22 12:16:54.383058: step: 918/464, loss: 0.11549467593431473 2023-01-22 12:16:55.037314: step: 920/464, loss: 0.014528129249811172 2023-01-22 12:16:55.680985: step: 922/464, loss: 0.08139970153570175 2023-01-22 12:16:56.301278: step: 924/464, loss: 0.019468706101179123 2023-01-22 12:16:56.997414: step: 926/464, loss: 0.36008772253990173 2023-01-22 12:16:57.625248: step: 928/464, loss: 0.02395019680261612 2023-01-22 12:16:58.105662: step: 930/464, loss: 1.4262751340866089 ================================================== Loss: 0.163 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2879232283464567, 'r': 0.3469283681214421, 'f1': 0.31468373493975904}, 'combined': 0.2318722257450856, 'epoch': 20} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.28560933462165283, 'r': 0.32962985592113697, 'f1': 0.30604475549747806}, 'combined': 0.19980123934032246, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2901736111111111, 'r': 0.36340528146742573, 'f1': 0.3226867452962651}, 'combined': 0.2377691807446164, 'epoch': 20} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29270283215815596, 'r': 0.32606234611735757, 'f1': 0.3084833326745087}, 'combined': 0.20139326381859118, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30456807815581854, 'r': 0.3606270982338345, 'f1': 0.3302354140212524}, 'combined': 0.24333135769987016, 'epoch': 20} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2965543382750373, 'r': 0.32079064323966383, 'f1': 0.3081967450269024}, 'combined': 0.2012061651470969, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24743589743589742, 'r': 0.4595238095238095, 'f1': 0.32166666666666666}, 'combined': 0.21444444444444444, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.22560975609756098, 'r': 0.40217391304347827, 'f1': 0.2890625}, 'combined': 0.14453125, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3215276761799725, 'r': 0.3520331483033096, 'f1': 0.33608961803594956}, 'combined': 0.24764498171069965, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3050538704813727, 'r': 0.3061743622003126, 'f1': 0.3056130893090196}, 'combined': 0.19951942618101798, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4605263157894737, 'r': 0.3017241379310345, 'f1': 0.3645833333333333}, 'combined': 0.24305555555555552, 'epoch': 14} ****************************** Epoch: 21 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:19:38.636488: step: 2/464, loss: 0.2879459857940674 2023-01-22 12:19:39.226848: step: 4/464, loss: 0.02850549854338169 2023-01-22 12:19:39.766391: step: 6/464, loss: 0.0287797749042511 2023-01-22 12:19:40.365310: step: 8/464, loss: 0.06807540357112885 2023-01-22 12:19:41.014700: step: 10/464, loss: 0.10075829178094864 2023-01-22 12:19:41.695455: step: 12/464, loss: 0.060137659311294556 2023-01-22 12:19:42.313562: step: 14/464, loss: 0.03860960528254509 2023-01-22 12:19:42.980243: step: 16/464, loss: 0.1313168853521347 2023-01-22 12:19:43.564260: step: 18/464, loss: 0.03235574811697006 2023-01-22 12:19:44.191468: step: 20/464, loss: 0.028379498049616814 2023-01-22 12:19:44.806519: step: 22/464, loss: 0.03577785566449165 2023-01-22 12:19:45.418006: step: 24/464, loss: 0.08534808456897736 2023-01-22 12:19:46.027904: step: 26/464, loss: 0.05950196087360382 2023-01-22 12:19:46.664765: step: 28/464, loss: 0.08497898280620575 2023-01-22 12:19:47.266635: step: 30/464, loss: 5.2432098388671875 2023-01-22 12:19:47.920245: step: 32/464, loss: 0.3288305401802063 2023-01-22 12:19:48.525043: step: 34/464, loss: 0.027298834174871445 2023-01-22 12:19:49.160107: step: 36/464, loss: 0.020640632137656212 2023-01-22 12:19:49.769758: step: 38/464, loss: 0.6343681812286377 2023-01-22 12:19:50.376279: step: 40/464, loss: 0.02846040204167366 2023-01-22 12:19:50.996013: step: 42/464, loss: 0.031150346621870995 2023-01-22 12:19:51.641952: step: 44/464, loss: 0.057186901569366455 2023-01-22 12:19:52.237845: step: 46/464, loss: 0.02878502942621708 2023-01-22 12:19:52.827327: step: 48/464, loss: 0.01139222364872694 2023-01-22 12:19:53.492606: step: 50/464, loss: 0.18849028646945953 2023-01-22 12:19:54.093027: step: 52/464, loss: 0.10385727137327194 2023-01-22 12:19:54.701482: step: 54/464, loss: 0.1948239952325821 2023-01-22 12:19:55.332069: step: 56/464, loss: 0.040434375405311584 2023-01-22 12:19:55.966621: step: 58/464, loss: 0.044722944498062134 2023-01-22 12:19:56.626443: step: 60/464, loss: 0.1531590223312378 2023-01-22 12:19:57.311286: step: 62/464, loss: 0.036119494587183 2023-01-22 12:19:57.909689: step: 64/464, loss: 0.17749238014221191 2023-01-22 12:19:58.488534: step: 66/464, loss: 0.09734697639942169 2023-01-22 12:19:59.060716: step: 68/464, loss: 0.035482268780469894 2023-01-22 12:19:59.715026: step: 70/464, loss: 0.017056122422218323 2023-01-22 12:20:00.371115: step: 72/464, loss: 0.009295911528170109 2023-01-22 12:20:01.019890: step: 74/464, loss: 0.23246900737285614 2023-01-22 12:20:01.642771: step: 76/464, loss: 0.1933257281780243 2023-01-22 12:20:02.268390: step: 78/464, loss: 0.0870656743645668 2023-01-22 12:20:02.882203: step: 80/464, loss: 0.022031113505363464 2023-01-22 12:20:03.547735: step: 82/464, loss: 0.027440907433629036 2023-01-22 12:20:04.215211: step: 84/464, loss: 0.128365620970726 2023-01-22 12:20:04.783832: step: 86/464, loss: 0.005240590311586857 2023-01-22 12:20:05.383733: step: 88/464, loss: 0.149997740983963 2023-01-22 12:20:05.998589: step: 90/464, loss: 0.1929507851600647 2023-01-22 12:20:06.625212: step: 92/464, loss: 0.15306395292282104 2023-01-22 12:20:07.285532: step: 94/464, loss: 0.02803787775337696 2023-01-22 12:20:07.873271: step: 96/464, loss: 0.04839000850915909 2023-01-22 12:20:08.422661: step: 98/464, loss: 0.01491641066968441 2023-01-22 12:20:09.035459: step: 100/464, loss: 0.08155541121959686 2023-01-22 12:20:09.619100: step: 102/464, loss: 0.041741661727428436 2023-01-22 12:20:10.227776: step: 104/464, loss: 0.02218620479106903 2023-01-22 12:20:10.808128: step: 106/464, loss: 0.026464005932211876 2023-01-22 12:20:11.464177: step: 108/464, loss: 0.04624956101179123 2023-01-22 12:20:12.082777: step: 110/464, loss: 0.08009933680295944 2023-01-22 12:20:12.715757: step: 112/464, loss: 0.0521833561360836 2023-01-22 12:20:13.363719: step: 114/464, loss: 0.030787384137511253 2023-01-22 12:20:14.037144: step: 116/464, loss: 0.07171545177698135 2023-01-22 12:20:14.669924: step: 118/464, loss: 0.012919297441840172 2023-01-22 12:20:15.227568: step: 120/464, loss: 0.2969965636730194 2023-01-22 12:20:15.843685: step: 122/464, loss: 0.027525799348950386 2023-01-22 12:20:16.495363: step: 124/464, loss: 0.011926285922527313 2023-01-22 12:20:17.035874: step: 126/464, loss: 0.04155060276389122 2023-01-22 12:20:17.621529: step: 128/464, loss: 0.029117681086063385 2023-01-22 12:20:18.195299: step: 130/464, loss: 0.021135665476322174 2023-01-22 12:20:18.795035: step: 132/464, loss: 0.021641455590724945 2023-01-22 12:20:19.417605: step: 134/464, loss: 0.015647945925593376 2023-01-22 12:20:20.029824: step: 136/464, loss: 0.08576782792806625 2023-01-22 12:20:20.573849: step: 138/464, loss: 0.9435380697250366 2023-01-22 12:20:21.218084: step: 140/464, loss: 0.0296559389680624 2023-01-22 12:20:21.818138: step: 142/464, loss: 0.015051405876874924 2023-01-22 12:20:22.473938: step: 144/464, loss: 3.710465908050537 2023-01-22 12:20:23.039141: step: 146/464, loss: 1.3897058963775635 2023-01-22 12:20:23.732974: step: 148/464, loss: 0.05914725735783577 2023-01-22 12:20:24.367244: step: 150/464, loss: 0.038997046649456024 2023-01-22 12:20:25.056741: step: 152/464, loss: 0.033092889934778214 2023-01-22 12:20:25.687677: step: 154/464, loss: 0.06606288254261017 2023-01-22 12:20:26.455884: step: 156/464, loss: 0.005674705840647221 2023-01-22 12:20:27.104174: step: 158/464, loss: 0.1593201756477356 2023-01-22 12:20:27.749402: step: 160/464, loss: 0.051348887383937836 2023-01-22 12:20:28.351522: step: 162/464, loss: 0.0321359857916832 2023-01-22 12:20:28.981246: step: 164/464, loss: 0.16339421272277832 2023-01-22 12:20:29.636266: step: 166/464, loss: 0.08392519503831863 2023-01-22 12:20:30.265900: step: 168/464, loss: 0.04907825589179993 2023-01-22 12:20:30.884934: step: 170/464, loss: 0.0903925821185112 2023-01-22 12:20:31.477495: step: 172/464, loss: 0.01728115975856781 2023-01-22 12:20:32.160009: step: 174/464, loss: 0.39139094948768616 2023-01-22 12:20:32.891427: step: 176/464, loss: 0.05329263582825661 2023-01-22 12:20:33.556998: step: 178/464, loss: 0.06931658834218979 2023-01-22 12:20:34.168468: step: 180/464, loss: 0.061838164925575256 2023-01-22 12:20:34.728198: step: 182/464, loss: 0.06392183154821396 2023-01-22 12:20:35.463556: step: 184/464, loss: 0.11093329638242722 2023-01-22 12:20:36.093448: step: 186/464, loss: 0.027669671922922134 2023-01-22 12:20:36.788784: step: 188/464, loss: 0.05224090442061424 2023-01-22 12:20:37.451516: step: 190/464, loss: 0.1401863545179367 2023-01-22 12:20:38.007998: step: 192/464, loss: 0.02816525287926197 2023-01-22 12:20:38.658585: step: 194/464, loss: 0.023517200723290443 2023-01-22 12:20:39.210267: step: 196/464, loss: 0.0481773279607296 2023-01-22 12:20:39.844269: step: 198/464, loss: 0.02006007730960846 2023-01-22 12:20:40.437764: step: 200/464, loss: 0.011920628137886524 2023-01-22 12:20:41.099584: step: 202/464, loss: 0.0183256845921278 2023-01-22 12:20:41.746842: step: 204/464, loss: 0.04755396768450737 2023-01-22 12:20:42.418153: step: 206/464, loss: 0.0879100114107132 2023-01-22 12:20:43.069237: step: 208/464, loss: 0.03002486377954483 2023-01-22 12:20:43.749379: step: 210/464, loss: 0.04555777460336685 2023-01-22 12:20:44.416563: step: 212/464, loss: 0.02567063271999359 2023-01-22 12:20:45.052362: step: 214/464, loss: 0.007622311823070049 2023-01-22 12:20:45.634099: step: 216/464, loss: 0.3607770502567291 2023-01-22 12:20:46.309293: step: 218/464, loss: 0.017416903749108315 2023-01-22 12:20:46.932106: step: 220/464, loss: 0.08756670355796814 2023-01-22 12:20:47.570288: step: 222/464, loss: 0.07937604188919067 2023-01-22 12:20:48.222361: step: 224/464, loss: 0.1075153797864914 2023-01-22 12:20:48.765655: step: 226/464, loss: 0.0323040634393692 2023-01-22 12:20:49.387165: step: 228/464, loss: 0.03348545730113983 2023-01-22 12:20:50.015274: step: 230/464, loss: 2.6350879669189453 2023-01-22 12:20:50.600693: step: 232/464, loss: 0.011316961608827114 2023-01-22 12:20:51.263066: step: 234/464, loss: 0.11862074583768845 2023-01-22 12:20:51.859676: step: 236/464, loss: 0.05245814472436905 2023-01-22 12:20:52.523179: step: 238/464, loss: 0.3137938380241394 2023-01-22 12:20:53.150185: step: 240/464, loss: 0.08349335193634033 2023-01-22 12:20:53.814072: step: 242/464, loss: 0.11334215849637985 2023-01-22 12:20:54.432432: step: 244/464, loss: 0.0941920280456543 2023-01-22 12:20:55.011753: step: 246/464, loss: 0.010574623011052608 2023-01-22 12:20:55.625579: step: 248/464, loss: 0.01173438224941492 2023-01-22 12:20:56.257447: step: 250/464, loss: 0.04012186452746391 2023-01-22 12:20:56.840720: step: 252/464, loss: 0.04171859472990036 2023-01-22 12:20:57.488321: step: 254/464, loss: 0.03524204343557358 2023-01-22 12:20:58.188956: step: 256/464, loss: 0.028740596026182175 2023-01-22 12:20:58.863114: step: 258/464, loss: 0.03225775808095932 2023-01-22 12:20:59.419519: step: 260/464, loss: 0.043071161955595016 2023-01-22 12:20:59.940240: step: 262/464, loss: 0.02790139988064766 2023-01-22 12:21:00.560013: step: 264/464, loss: 0.054161399602890015 2023-01-22 12:21:01.098872: step: 266/464, loss: 0.10258924216032028 2023-01-22 12:21:01.695679: step: 268/464, loss: 0.05986592546105385 2023-01-22 12:21:02.480923: step: 270/464, loss: 0.016713662073016167 2023-01-22 12:21:03.075397: step: 272/464, loss: 0.07509545236825943 2023-01-22 12:21:03.727581: step: 274/464, loss: 0.6472654342651367 2023-01-22 12:21:04.299414: step: 276/464, loss: 0.11911797523498535 2023-01-22 12:21:04.920821: step: 278/464, loss: 0.04465444013476372 2023-01-22 12:21:05.504075: step: 280/464, loss: 0.026976360008120537 2023-01-22 12:21:06.127677: step: 282/464, loss: 0.17191733419895172 2023-01-22 12:21:06.774864: step: 284/464, loss: 0.06832541525363922 2023-01-22 12:21:07.392915: step: 286/464, loss: 0.016170917078852654 2023-01-22 12:21:08.011750: step: 288/464, loss: 0.030662380158901215 2023-01-22 12:21:08.696648: step: 290/464, loss: 0.10576559603214264 2023-01-22 12:21:09.364174: step: 292/464, loss: 0.05849209055304527 2023-01-22 12:21:09.994981: step: 294/464, loss: 0.11040965467691422 2023-01-22 12:21:10.628751: step: 296/464, loss: 0.2339029610157013 2023-01-22 12:21:11.210839: step: 298/464, loss: 0.025430981069803238 2023-01-22 12:21:11.774974: step: 300/464, loss: 0.018815884366631508 2023-01-22 12:21:12.424377: step: 302/464, loss: 0.0253947451710701 2023-01-22 12:21:12.961997: step: 304/464, loss: 0.10287298262119293 2023-01-22 12:21:13.555956: step: 306/464, loss: 0.10712384432554245 2023-01-22 12:21:14.139755: step: 308/464, loss: 0.06882070004940033 2023-01-22 12:21:14.824949: step: 310/464, loss: 0.1400669515132904 2023-01-22 12:21:15.447144: step: 312/464, loss: 0.020639974623918533 2023-01-22 12:21:16.070596: step: 314/464, loss: 0.01266369316726923 2023-01-22 12:21:16.698018: step: 316/464, loss: 0.10999668389558792 2023-01-22 12:21:17.314133: step: 318/464, loss: 0.05323231965303421 2023-01-22 12:21:17.980362: step: 320/464, loss: 0.015401119366288185 2023-01-22 12:21:18.567976: step: 322/464, loss: 0.036088548600673676 2023-01-22 12:21:19.188693: step: 324/464, loss: 0.25261619687080383 2023-01-22 12:21:19.837676: step: 326/464, loss: 0.03083650954067707 2023-01-22 12:21:20.513691: step: 328/464, loss: 0.07151514291763306 2023-01-22 12:21:21.148182: step: 330/464, loss: 0.14306670427322388 2023-01-22 12:21:21.768417: step: 332/464, loss: 0.06048280745744705 2023-01-22 12:21:22.401723: step: 334/464, loss: 0.03349380940198898 2023-01-22 12:21:23.081465: step: 336/464, loss: 0.033371634781360626 2023-01-22 12:21:23.751400: step: 338/464, loss: 0.05449317395687103 2023-01-22 12:21:24.365592: step: 340/464, loss: 0.034356266260147095 2023-01-22 12:21:24.958685: step: 342/464, loss: 0.05067477002739906 2023-01-22 12:21:25.577028: step: 344/464, loss: 0.011599806137382984 2023-01-22 12:21:26.171060: step: 346/464, loss: 0.06270205974578857 2023-01-22 12:21:26.854512: step: 348/464, loss: 0.029605992138385773 2023-01-22 12:21:27.511868: step: 350/464, loss: 0.017472002655267715 2023-01-22 12:21:28.115742: step: 352/464, loss: 0.0753653347492218 2023-01-22 12:21:28.704326: step: 354/464, loss: 0.015513327904045582 2023-01-22 12:21:29.402610: step: 356/464, loss: 0.04514767974615097 2023-01-22 12:21:30.087630: step: 358/464, loss: 0.06842514127492905 2023-01-22 12:21:30.751244: step: 360/464, loss: 0.02094561979174614 2023-01-22 12:21:31.418802: step: 362/464, loss: 0.06777993589639664 2023-01-22 12:21:31.993746: step: 364/464, loss: 0.022707749158143997 2023-01-22 12:21:32.621630: step: 366/464, loss: 0.04803130403161049 2023-01-22 12:21:33.312401: step: 368/464, loss: 0.01649634726345539 2023-01-22 12:21:34.006351: step: 370/464, loss: 0.027657456696033478 2023-01-22 12:21:34.623955: step: 372/464, loss: 0.023393074050545692 2023-01-22 12:21:35.186979: step: 374/464, loss: 0.03944697603583336 2023-01-22 12:21:35.841161: step: 376/464, loss: 0.15143027901649475 2023-01-22 12:21:36.529430: step: 378/464, loss: 0.02022443525493145 2023-01-22 12:21:37.169932: step: 380/464, loss: 0.05097239837050438 2023-01-22 12:21:37.767459: step: 382/464, loss: 0.035139452666044235 2023-01-22 12:21:38.381691: step: 384/464, loss: 0.10621704906225204 2023-01-22 12:21:38.973493: step: 386/464, loss: 0.0067662461660802364 2023-01-22 12:21:39.590859: step: 388/464, loss: 0.06667128950357437 2023-01-22 12:21:40.241700: step: 390/464, loss: 0.04963650554418564 2023-01-22 12:21:40.921803: step: 392/464, loss: 0.016870509833097458 2023-01-22 12:21:41.553424: step: 394/464, loss: 0.044276162981987 2023-01-22 12:21:42.172159: step: 396/464, loss: 0.00846901349723339 2023-01-22 12:21:42.785377: step: 398/464, loss: 0.07062599807977676 2023-01-22 12:21:43.391899: step: 400/464, loss: 0.046838875859975815 2023-01-22 12:21:44.103336: step: 402/464, loss: 0.16247150301933289 2023-01-22 12:21:44.738082: step: 404/464, loss: 0.08528237789869308 2023-01-22 12:21:45.335638: step: 406/464, loss: 0.03201433643698692 2023-01-22 12:21:45.910315: step: 408/464, loss: 0.005379590671509504 2023-01-22 12:21:46.569610: step: 410/464, loss: 0.03673448786139488 2023-01-22 12:21:47.229742: step: 412/464, loss: 1.2216758728027344 2023-01-22 12:21:47.911049: step: 414/464, loss: 0.026421865448355675 2023-01-22 12:21:48.532162: step: 416/464, loss: 0.11724504083395004 2023-01-22 12:21:49.204630: step: 418/464, loss: 0.0895252674818039 2023-01-22 12:21:49.783089: step: 420/464, loss: 0.036729466170072556 2023-01-22 12:21:50.433553: step: 422/464, loss: 0.05561475083231926 2023-01-22 12:21:51.059326: step: 424/464, loss: 0.2665712535381317 2023-01-22 12:21:51.659986: step: 426/464, loss: 0.048663366585969925 2023-01-22 12:21:52.299584: step: 428/464, loss: 0.02604263462126255 2023-01-22 12:21:52.954083: step: 430/464, loss: 0.09479238837957382 2023-01-22 12:21:53.611192: step: 432/464, loss: 0.056636255234479904 2023-01-22 12:21:54.232940: step: 434/464, loss: 0.04270360246300697 2023-01-22 12:21:54.874165: step: 436/464, loss: 0.06862106919288635 2023-01-22 12:21:55.502645: step: 438/464, loss: 0.10623826831579208 2023-01-22 12:21:56.092342: step: 440/464, loss: 0.0833999365568161 2023-01-22 12:21:56.657105: step: 442/464, loss: 0.009899923577904701 2023-01-22 12:21:57.257055: step: 444/464, loss: 0.028848154470324516 2023-01-22 12:21:57.869094: step: 446/464, loss: 0.08141449093818665 2023-01-22 12:21:58.486811: step: 448/464, loss: 0.4562784433364868 2023-01-22 12:21:59.123653: step: 450/464, loss: 0.019771141931414604 2023-01-22 12:21:59.744467: step: 452/464, loss: 0.04862720146775246 2023-01-22 12:22:00.367347: step: 454/464, loss: 0.5798062086105347 2023-01-22 12:22:00.934403: step: 456/464, loss: 0.02002645842730999 2023-01-22 12:22:01.522670: step: 458/464, loss: 0.03038819320499897 2023-01-22 12:22:02.111364: step: 460/464, loss: 1.1626721620559692 2023-01-22 12:22:02.735154: step: 462/464, loss: 0.00896801520138979 2023-01-22 12:22:03.288906: step: 464/464, loss: 0.02811810001730919 2023-01-22 12:22:03.929027: step: 466/464, loss: 0.012318997643887997 2023-01-22 12:22:04.664709: step: 468/464, loss: 0.07171928137540817 2023-01-22 12:22:05.444708: step: 470/464, loss: 0.099583700299263 2023-01-22 12:22:06.019826: step: 472/464, loss: 0.03668338432908058 2023-01-22 12:22:06.639598: step: 474/464, loss: 0.05059617757797241 2023-01-22 12:22:07.272136: step: 476/464, loss: 0.07265280187129974 2023-01-22 12:22:07.849048: step: 478/464, loss: 0.011166172102093697 2023-01-22 12:22:08.525827: step: 480/464, loss: 0.03359964117407799 2023-01-22 12:22:09.105628: step: 482/464, loss: 0.019395098090171814 2023-01-22 12:22:09.771415: step: 484/464, loss: 0.05742871016263962 2023-01-22 12:22:10.353385: step: 486/464, loss: 0.020450137555599213 2023-01-22 12:22:11.008871: step: 488/464, loss: 0.03934083878993988 2023-01-22 12:22:11.698127: step: 490/464, loss: 0.08692191541194916 2023-01-22 12:22:12.380585: step: 492/464, loss: 0.02365756221115589 2023-01-22 12:22:13.000643: step: 494/464, loss: 0.03775375708937645 2023-01-22 12:22:13.640453: step: 496/464, loss: 0.05765479430556297 2023-01-22 12:22:14.208446: step: 498/464, loss: 0.0200423002243042 2023-01-22 12:22:14.880427: step: 500/464, loss: 0.03446970880031586 2023-01-22 12:22:15.486443: step: 502/464, loss: 0.03138982877135277 2023-01-22 12:22:16.107383: step: 504/464, loss: 0.02364160306751728 2023-01-22 12:22:16.742685: step: 506/464, loss: 0.03466923162341118 2023-01-22 12:22:17.447849: step: 508/464, loss: 0.10752367973327637 2023-01-22 12:22:18.079164: step: 510/464, loss: 0.05907217785716057 2023-01-22 12:22:18.735424: step: 512/464, loss: 0.5026339292526245 2023-01-22 12:22:19.371238: step: 514/464, loss: 0.019515832886099815 2023-01-22 12:22:20.043060: step: 516/464, loss: 0.025658082216978073 2023-01-22 12:22:20.701170: step: 518/464, loss: 0.08302264660596848 2023-01-22 12:22:21.272288: step: 520/464, loss: 0.3803815543651581 2023-01-22 12:22:21.923498: step: 522/464, loss: 0.0429992713034153 2023-01-22 12:22:22.516356: step: 524/464, loss: 0.027729887515306473 2023-01-22 12:22:23.094052: step: 526/464, loss: 0.007166002411395311 2023-01-22 12:22:23.766633: step: 528/464, loss: 0.9323362112045288 2023-01-22 12:22:24.416525: step: 530/464, loss: 0.04219109192490578 2023-01-22 12:22:25.065503: step: 532/464, loss: 0.0168119128793478 2023-01-22 12:22:25.665659: step: 534/464, loss: 0.06935977190732956 2023-01-22 12:22:26.280608: step: 536/464, loss: 0.015928490087389946 2023-01-22 12:22:26.913190: step: 538/464, loss: 0.0640493631362915 2023-01-22 12:22:27.609721: step: 540/464, loss: 0.005617052782326937 2023-01-22 12:22:28.245254: step: 542/464, loss: 0.05419943481683731 2023-01-22 12:22:28.891029: step: 544/464, loss: 0.023738780990242958 2023-01-22 12:22:29.503231: step: 546/464, loss: 0.14997144043445587 2023-01-22 12:22:30.084942: step: 548/464, loss: 0.18707990646362305 2023-01-22 12:22:30.826517: step: 550/464, loss: 0.825693666934967 2023-01-22 12:22:31.463260: step: 552/464, loss: 0.035482730716466904 2023-01-22 12:22:32.073597: step: 554/464, loss: 0.030714012682437897 2023-01-22 12:22:32.812365: step: 556/464, loss: 0.04303360357880592 2023-01-22 12:22:33.477043: step: 558/464, loss: 0.11482765525579453 2023-01-22 12:22:34.061232: step: 560/464, loss: 0.014046892523765564 2023-01-22 12:22:34.743409: step: 562/464, loss: 0.069298654794693 2023-01-22 12:22:35.351059: step: 564/464, loss: 0.13554075360298157 2023-01-22 12:22:35.971953: step: 566/464, loss: 0.058562301099300385 2023-01-22 12:22:36.626311: step: 568/464, loss: 0.027104122564196587 2023-01-22 12:22:37.296290: step: 570/464, loss: 0.15894848108291626 2023-01-22 12:22:37.940882: step: 572/464, loss: 0.03762518987059593 2023-01-22 12:22:38.521179: step: 574/464, loss: 0.3076055943965912 2023-01-22 12:22:39.185403: step: 576/464, loss: 0.01175174955278635 2023-01-22 12:22:39.827058: step: 578/464, loss: 0.031038088724017143 2023-01-22 12:22:40.466731: step: 580/464, loss: 0.23649537563323975 2023-01-22 12:22:41.158184: step: 582/464, loss: 0.08286372572183609 2023-01-22 12:22:41.789060: step: 584/464, loss: 0.020930882543325424 2023-01-22 12:22:42.415685: step: 586/464, loss: 0.04851653426885605 2023-01-22 12:22:42.996094: step: 588/464, loss: 0.04376620426774025 2023-01-22 12:22:43.647401: step: 590/464, loss: 0.07862431555986404 2023-01-22 12:22:44.304906: step: 592/464, loss: 0.06696991622447968 2023-01-22 12:22:44.882400: step: 594/464, loss: 0.014856110326945782 2023-01-22 12:22:45.503664: step: 596/464, loss: 0.03530821204185486 2023-01-22 12:22:46.167935: step: 598/464, loss: 0.08003076165914536 2023-01-22 12:22:46.761324: step: 600/464, loss: 0.038060080260038376 2023-01-22 12:22:47.430914: step: 602/464, loss: 0.07375941425561905 2023-01-22 12:22:48.018978: step: 604/464, loss: 0.007829632610082626 2023-01-22 12:22:48.630057: step: 606/464, loss: 0.03282972425222397 2023-01-22 12:22:49.303246: step: 608/464, loss: 0.06060962378978729 2023-01-22 12:22:49.900060: step: 610/464, loss: 0.17766328155994415 2023-01-22 12:22:50.485557: step: 612/464, loss: 0.14387981593608856 2023-01-22 12:22:51.048047: step: 614/464, loss: 0.04774000868201256 2023-01-22 12:22:51.685488: step: 616/464, loss: 0.017273657023906708 2023-01-22 12:22:52.350805: step: 618/464, loss: 0.043972667306661606 2023-01-22 12:22:53.013699: step: 620/464, loss: 0.02949305810034275 2023-01-22 12:22:53.594904: step: 622/464, loss: 0.0540006048977375 2023-01-22 12:22:54.202536: step: 624/464, loss: 0.002033422002568841 2023-01-22 12:22:54.826150: step: 626/464, loss: 0.2388991117477417 2023-01-22 12:22:55.421468: step: 628/464, loss: 0.08435311913490295 2023-01-22 12:22:56.072616: step: 630/464, loss: 0.0835963562130928 2023-01-22 12:22:56.721205: step: 632/464, loss: 0.01947946660220623 2023-01-22 12:22:57.334957: step: 634/464, loss: 0.1267274171113968 2023-01-22 12:22:58.036837: step: 636/464, loss: 0.6429212093353271 2023-01-22 12:22:58.641783: step: 638/464, loss: 0.15381783246994019 2023-01-22 12:22:59.199652: step: 640/464, loss: 0.007321540731936693 2023-01-22 12:22:59.863565: step: 642/464, loss: 0.06959162652492523 2023-01-22 12:23:00.536903: step: 644/464, loss: 0.059891972690820694 2023-01-22 12:23:01.103927: step: 646/464, loss: 0.13082417845726013 2023-01-22 12:23:01.691689: step: 648/464, loss: 0.037121932953596115 2023-01-22 12:23:02.328431: step: 650/464, loss: 0.006254613399505615 2023-01-22 12:23:02.917490: step: 652/464, loss: 0.04932815954089165 2023-01-22 12:23:03.504322: step: 654/464, loss: 0.03491489961743355 2023-01-22 12:23:04.244109: step: 656/464, loss: 0.06369685381650925 2023-01-22 12:23:04.854031: step: 658/464, loss: 0.29147234559059143 2023-01-22 12:23:05.470464: step: 660/464, loss: 0.017888322472572327 2023-01-22 12:23:06.083857: step: 662/464, loss: 0.0431828498840332 2023-01-22 12:23:06.799859: step: 664/464, loss: 0.04324536398053169 2023-01-22 12:23:07.399227: step: 666/464, loss: 0.007824858650565147 2023-01-22 12:23:08.064085: step: 668/464, loss: 0.12181359529495239 2023-01-22 12:23:08.649068: step: 670/464, loss: 0.048685409128665924 2023-01-22 12:23:09.332344: step: 672/464, loss: 0.030775291845202446 2023-01-22 12:23:09.975316: step: 674/464, loss: 0.04483339563012123 2023-01-22 12:23:10.553958: step: 676/464, loss: 0.15573513507843018 2023-01-22 12:23:11.195986: step: 678/464, loss: 0.059290602803230286 2023-01-22 12:23:11.806517: step: 680/464, loss: 0.4794975817203522 2023-01-22 12:23:12.489585: step: 682/464, loss: 0.06171262636780739 2023-01-22 12:23:13.089260: step: 684/464, loss: 0.07990762591362 2023-01-22 12:23:13.736865: step: 686/464, loss: 0.14318765699863434 2023-01-22 12:23:14.299477: step: 688/464, loss: 0.0039002220146358013 2023-01-22 12:23:14.981435: step: 690/464, loss: 0.011800544336438179 2023-01-22 12:23:15.584913: step: 692/464, loss: 0.0400354377925396 2023-01-22 12:23:16.278678: step: 694/464, loss: 0.03960973024368286 2023-01-22 12:23:16.892973: step: 696/464, loss: 0.052699748426675797 2023-01-22 12:23:17.535176: step: 698/464, loss: 0.055506426841020584 2023-01-22 12:23:18.161368: step: 700/464, loss: 0.08599307388067245 2023-01-22 12:23:18.784755: step: 702/464, loss: 0.07293307036161423 2023-01-22 12:23:19.393228: step: 704/464, loss: 0.02525770291686058 2023-01-22 12:23:20.003054: step: 706/464, loss: 0.14079737663269043 2023-01-22 12:23:20.664261: step: 708/464, loss: 0.294656902551651 2023-01-22 12:23:21.282174: step: 710/464, loss: 0.14622029662132263 2023-01-22 12:23:21.882050: step: 712/464, loss: 0.012117592617869377 2023-01-22 12:23:22.501979: step: 714/464, loss: 0.006660753861069679 2023-01-22 12:23:23.133278: step: 716/464, loss: 0.0036901962012052536 2023-01-22 12:23:23.750169: step: 718/464, loss: 0.05405588448047638 2023-01-22 12:23:24.348962: step: 720/464, loss: 0.03300733119249344 2023-01-22 12:23:24.914713: step: 722/464, loss: 0.1454676240682602 2023-01-22 12:23:25.537692: step: 724/464, loss: 0.12987717986106873 2023-01-22 12:23:26.144814: step: 726/464, loss: 0.018779395148158073 2023-01-22 12:23:26.739413: step: 728/464, loss: 0.02126624621450901 2023-01-22 12:23:27.380323: step: 730/464, loss: 0.06283842772245407 2023-01-22 12:23:27.984859: step: 732/464, loss: 0.07268655300140381 2023-01-22 12:23:28.643662: step: 734/464, loss: 0.020557576790452003 2023-01-22 12:23:29.328083: step: 736/464, loss: 0.4944629669189453 2023-01-22 12:23:29.949550: step: 738/464, loss: 0.08161593973636627 2023-01-22 12:23:30.590337: step: 740/464, loss: 0.008167529478669167 2023-01-22 12:23:31.244473: step: 742/464, loss: 0.012403919361531734 2023-01-22 12:23:31.954280: step: 744/464, loss: 0.36549267172813416 2023-01-22 12:23:32.563937: step: 746/464, loss: 0.0025342279113829136 2023-01-22 12:23:33.205940: step: 748/464, loss: 0.02981056272983551 2023-01-22 12:23:33.808100: step: 750/464, loss: 0.04026034474372864 2023-01-22 12:23:34.460429: step: 752/464, loss: 0.031751926988363266 2023-01-22 12:23:35.057503: step: 754/464, loss: 0.019904276356101036 2023-01-22 12:23:35.704026: step: 756/464, loss: 0.052797820419073105 2023-01-22 12:23:36.300540: step: 758/464, loss: 0.6545272469520569 2023-01-22 12:23:36.861258: step: 760/464, loss: 0.05094560608267784 2023-01-22 12:23:37.491526: step: 762/464, loss: 0.06578005850315094 2023-01-22 12:23:38.186597: step: 764/464, loss: 0.008793084882199764 2023-01-22 12:23:38.864361: step: 766/464, loss: 0.07369567453861237 2023-01-22 12:23:39.428083: step: 768/464, loss: 0.06409741938114166 2023-01-22 12:23:39.990899: step: 770/464, loss: 0.07845474779605865 2023-01-22 12:23:40.659343: step: 772/464, loss: 0.13869287073612213 2023-01-22 12:23:41.293955: step: 774/464, loss: 0.10635069012641907 2023-01-22 12:23:41.918065: step: 776/464, loss: 15.494712829589844 2023-01-22 12:23:42.574714: step: 778/464, loss: 0.03596947342157364 2023-01-22 12:23:43.272775: step: 780/464, loss: 0.0767352506518364 2023-01-22 12:23:43.987810: step: 782/464, loss: 0.017218533903360367 2023-01-22 12:23:44.628450: step: 784/464, loss: 0.15298594534397125 2023-01-22 12:23:45.271532: step: 786/464, loss: 0.11351388692855835 2023-01-22 12:23:45.892222: step: 788/464, loss: 0.18765272200107574 2023-01-22 12:23:46.566930: step: 790/464, loss: 0.044379230588674545 2023-01-22 12:23:47.157455: step: 792/464, loss: 0.06699885427951813 2023-01-22 12:23:47.800489: step: 794/464, loss: 0.028787074610590935 2023-01-22 12:23:48.407927: step: 796/464, loss: 0.33658385276794434 2023-01-22 12:23:49.049492: step: 798/464, loss: 0.09242162108421326 2023-01-22 12:23:49.657253: step: 800/464, loss: 0.35072797536849976 2023-01-22 12:23:50.329543: step: 802/464, loss: 0.04858367517590523 2023-01-22 12:23:51.042165: step: 804/464, loss: 0.051442377269268036 2023-01-22 12:23:51.652402: step: 806/464, loss: 0.035532910376787186 2023-01-22 12:23:52.285233: step: 808/464, loss: 0.0076722376979887486 2023-01-22 12:23:52.851794: step: 810/464, loss: 0.030712995678186417 2023-01-22 12:23:53.443692: step: 812/464, loss: 0.10349004715681076 2023-01-22 12:23:54.039011: step: 814/464, loss: 0.03708258643746376 2023-01-22 12:23:54.655601: step: 816/464, loss: 1.1700975894927979 2023-01-22 12:23:55.286870: step: 818/464, loss: 0.05340784788131714 2023-01-22 12:23:55.896850: step: 820/464, loss: 0.03580804914236069 2023-01-22 12:23:56.548307: step: 822/464, loss: 0.019508054479956627 2023-01-22 12:23:57.173008: step: 824/464, loss: 0.08856187015771866 2023-01-22 12:23:57.818999: step: 826/464, loss: 0.023758994415402412 2023-01-22 12:23:58.552579: step: 828/464, loss: 0.024007648229599 2023-01-22 12:23:59.101772: step: 830/464, loss: 0.017617687582969666 2023-01-22 12:23:59.735859: step: 832/464, loss: 0.040325313806533813 2023-01-22 12:24:00.387421: step: 834/464, loss: 0.10785327106714249 2023-01-22 12:24:00.957992: step: 836/464, loss: 0.02498418465256691 2023-01-22 12:24:01.669992: step: 838/464, loss: 0.06586012244224548 2023-01-22 12:24:02.280443: step: 840/464, loss: 0.1145709902048111 2023-01-22 12:24:02.922575: step: 842/464, loss: 0.07150621712207794 2023-01-22 12:24:03.549462: step: 844/464, loss: 0.0404898002743721 2023-01-22 12:24:04.248810: step: 846/464, loss: 0.03282487764954567 2023-01-22 12:24:04.866853: step: 848/464, loss: 0.039687447249889374 2023-01-22 12:24:05.505245: step: 850/464, loss: 1.8295704126358032 2023-01-22 12:24:06.114663: step: 852/464, loss: 0.10599999874830246 2023-01-22 12:24:06.744516: step: 854/464, loss: 0.23428945243358612 2023-01-22 12:24:07.420281: step: 856/464, loss: 0.08374352753162384 2023-01-22 12:24:08.101982: step: 858/464, loss: 2.8789405822753906 2023-01-22 12:24:08.747003: step: 860/464, loss: 0.019031409174203873 2023-01-22 12:24:09.425355: step: 862/464, loss: 0.2627542018890381 2023-01-22 12:24:10.048924: step: 864/464, loss: 0.06664364784955978 2023-01-22 12:24:10.633733: step: 866/464, loss: 0.0017015093471854925 2023-01-22 12:24:11.269770: step: 868/464, loss: 0.08811833709478378 2023-01-22 12:24:11.927459: step: 870/464, loss: 0.049786198884248734 2023-01-22 12:24:12.581028: step: 872/464, loss: 0.0974259227514267 2023-01-22 12:24:13.250737: step: 874/464, loss: 0.16216818988323212 2023-01-22 12:24:13.841877: step: 876/464, loss: 0.14738522469997406 2023-01-22 12:24:14.474921: step: 878/464, loss: 0.13370949029922485 2023-01-22 12:24:15.084618: step: 880/464, loss: 0.03261880576610565 2023-01-22 12:24:15.765422: step: 882/464, loss: 1.8354557752609253 2023-01-22 12:24:16.394858: step: 884/464, loss: 0.03743087500333786 2023-01-22 12:24:17.064121: step: 886/464, loss: 0.015178278088569641 2023-01-22 12:24:17.766514: step: 888/464, loss: 0.07287105172872543 2023-01-22 12:24:18.411462: step: 890/464, loss: 0.006585855036973953 2023-01-22 12:24:19.005397: step: 892/464, loss: 0.1828705072402954 2023-01-22 12:24:19.594814: step: 894/464, loss: 0.031428128480911255 2023-01-22 12:24:20.195627: step: 896/464, loss: 0.043859273195266724 2023-01-22 12:24:20.770143: step: 898/464, loss: 0.14311519265174866 2023-01-22 12:24:21.354384: step: 900/464, loss: 0.015120675787329674 2023-01-22 12:24:22.040019: step: 902/464, loss: 0.010816311463713646 2023-01-22 12:24:22.668608: step: 904/464, loss: 1.2248504161834717 2023-01-22 12:24:23.253983: step: 906/464, loss: 0.06228072941303253 2023-01-22 12:24:23.938945: step: 908/464, loss: 0.030728058889508247 2023-01-22 12:24:24.513951: step: 910/464, loss: 0.10771479457616806 2023-01-22 12:24:25.163870: step: 912/464, loss: 0.15603163838386536 2023-01-22 12:24:25.803308: step: 914/464, loss: 0.061756011098623276 2023-01-22 12:24:26.472992: step: 916/464, loss: 0.04426893591880798 2023-01-22 12:24:27.078015: step: 918/464, loss: 0.026621172204613686 2023-01-22 12:24:27.612205: step: 920/464, loss: 0.009261952713131905 2023-01-22 12:24:28.276336: step: 922/464, loss: 0.0934833437204361 2023-01-22 12:24:28.937961: step: 924/464, loss: 0.06076713278889656 2023-01-22 12:24:29.588304: step: 926/464, loss: 0.01151891890913248 2023-01-22 12:24:30.205142: step: 928/464, loss: 0.16116264462471008 2023-01-22 12:24:30.685213: step: 930/464, loss: 0.005820272024720907 ================================================== Loss: 0.167 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2971315887874569, 'r': 0.31686518576575096, 'f1': 0.30668127254095645}, 'combined': 0.22597567450386263, 'epoch': 21} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2898321827752456, 'r': 0.29462279736657193, 'f1': 0.2922078564045509}, 'combined': 0.19076782335219383, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2843783930510315, 'r': 0.3313251106894371, 'f1': 0.3060619339760445}, 'combined': 0.22551931977182224, 'epoch': 21} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30761372117290176, 'r': 0.30196424970967123, 'f1': 0.3047628062408081}, 'combined': 0.19896431910021667, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31484587018974197, 'r': 0.3261755511246001, 'f1': 0.3204105878954211}, 'combined': 0.23609201213346817, 'epoch': 21} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3079344024575886, 'r': 0.2892717113995529, 'f1': 0.29831145238078893}, 'combined': 0.19475255440403838, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21264367816091953, 'r': 0.35238095238095235, 'f1': 0.26523297491039427}, 'combined': 0.17682198327359616, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2569444444444444, 'r': 0.40217391304347827, 'f1': 0.31355932203389825}, 'combined': 0.15677966101694912, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.27586206896551724, 'f1': 0.3404255319148936}, 'combined': 0.22695035460992907, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3215276761799725, 'r': 0.3520331483033096, 'f1': 0.33608961803594956}, 'combined': 0.24764498171069965, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3050538704813727, 'r': 0.3061743622003126, 'f1': 0.3056130893090196}, 'combined': 0.19951942618101798, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4605263157894737, 'r': 0.3017241379310345, 'f1': 0.3645833333333333}, 'combined': 0.24305555555555552, 'epoch': 14} ****************************** Epoch: 22 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:27:08.608260: step: 2/464, loss: 0.006758301518857479 2023-01-22 12:27:09.208664: step: 4/464, loss: 0.14314305782318115 2023-01-22 12:27:09.942511: step: 6/464, loss: 0.0054590520448982716 2023-01-22 12:27:10.512724: step: 8/464, loss: 0.03108260966837406 2023-01-22 12:27:11.103780: step: 10/464, loss: 0.07363323867321014 2023-01-22 12:27:11.745336: step: 12/464, loss: 0.06686335802078247 2023-01-22 12:27:12.342948: step: 14/464, loss: 0.01742183230817318 2023-01-22 12:27:12.922521: step: 16/464, loss: 0.05019579827785492 2023-01-22 12:27:13.533927: step: 18/464, loss: 0.017191683873534203 2023-01-22 12:27:14.149072: step: 20/464, loss: 0.11063700169324875 2023-01-22 12:27:14.808758: step: 22/464, loss: 0.10509807616472244 2023-01-22 12:27:15.483068: step: 24/464, loss: 0.018130838871002197 2023-01-22 12:27:16.146766: step: 26/464, loss: 0.02149237133562565 2023-01-22 12:27:16.737858: step: 28/464, loss: 0.03470902144908905 2023-01-22 12:27:17.283309: step: 30/464, loss: 0.00973481498658657 2023-01-22 12:27:17.945625: step: 32/464, loss: 0.04356635734438896 2023-01-22 12:27:18.494196: step: 34/464, loss: 0.4145301878452301 2023-01-22 12:27:19.123021: step: 36/464, loss: 0.02120104245841503 2023-01-22 12:27:19.804148: step: 38/464, loss: 0.20581495761871338 2023-01-22 12:27:20.402982: step: 40/464, loss: 0.082242451608181 2023-01-22 12:27:21.028196: step: 42/464, loss: 0.05397970974445343 2023-01-22 12:27:21.636882: step: 44/464, loss: 0.011220986023545265 2023-01-22 12:27:22.304431: step: 46/464, loss: 0.160739928483963 2023-01-22 12:27:22.893776: step: 48/464, loss: 0.0300454068928957 2023-01-22 12:27:23.457087: step: 50/464, loss: 2.2988154888153076 2023-01-22 12:27:24.127868: step: 52/464, loss: 0.025502270087599754 2023-01-22 12:27:24.722204: step: 54/464, loss: 0.10452800989151001 2023-01-22 12:27:25.410759: step: 56/464, loss: 0.03143616393208504 2023-01-22 12:27:26.029314: step: 58/464, loss: 0.029270924627780914 2023-01-22 12:27:26.699501: step: 60/464, loss: 0.03443043678998947 2023-01-22 12:27:27.311534: step: 62/464, loss: 0.029024790972471237 2023-01-22 12:27:28.012632: step: 64/464, loss: 0.03446149453520775 2023-01-22 12:27:28.603151: step: 66/464, loss: 0.00391471479088068 2023-01-22 12:27:29.226684: step: 68/464, loss: 0.0015981389442458749 2023-01-22 12:27:29.775731: step: 70/464, loss: 0.0024913130328059196 2023-01-22 12:27:30.354665: step: 72/464, loss: 0.013183190487325191 2023-01-22 12:27:30.941223: step: 74/464, loss: 0.02803228795528412 2023-01-22 12:27:31.504942: step: 76/464, loss: 0.3102782070636749 2023-01-22 12:27:32.143905: step: 78/464, loss: 0.039724547415971756 2023-01-22 12:27:32.736436: step: 80/464, loss: 0.0979723408818245 2023-01-22 12:27:33.352214: step: 82/464, loss: 0.08590143918991089 2023-01-22 12:27:33.957266: step: 84/464, loss: 0.04453185573220253 2023-01-22 12:27:34.588157: step: 86/464, loss: 0.09062372148036957 2023-01-22 12:27:35.239768: step: 88/464, loss: 0.045178622007369995 2023-01-22 12:27:35.861997: step: 90/464, loss: 0.05401252210140228 2023-01-22 12:27:36.464641: step: 92/464, loss: 0.005012328736484051 2023-01-22 12:27:37.112549: step: 94/464, loss: 0.00415660860016942 2023-01-22 12:27:37.705951: step: 96/464, loss: 0.036089878529310226 2023-01-22 12:27:38.288360: step: 98/464, loss: 0.04593805968761444 2023-01-22 12:27:38.874251: step: 100/464, loss: 0.06143837049603462 2023-01-22 12:27:39.482470: step: 102/464, loss: 0.018398325890302658 2023-01-22 12:27:40.062333: step: 104/464, loss: 0.025438066571950912 2023-01-22 12:27:40.712213: step: 106/464, loss: 0.05824285000562668 2023-01-22 12:27:41.280751: step: 108/464, loss: 0.02007497102022171 2023-01-22 12:27:41.973410: step: 110/464, loss: 0.06423313170671463 2023-01-22 12:27:42.619871: step: 112/464, loss: 0.0621689036488533 2023-01-22 12:27:43.244272: step: 114/464, loss: 0.11528107523918152 2023-01-22 12:27:43.869581: step: 116/464, loss: 0.053891878575086594 2023-01-22 12:27:44.436279: step: 118/464, loss: 0.02513236738741398 2023-01-22 12:27:45.084359: step: 120/464, loss: 0.059944998472929 2023-01-22 12:27:45.753275: step: 122/464, loss: 0.023398755118250847 2023-01-22 12:27:46.387950: step: 124/464, loss: 0.47563937306404114 2023-01-22 12:27:47.015429: step: 126/464, loss: 0.01028918381780386 2023-01-22 12:27:47.604187: step: 128/464, loss: 0.010359102860093117 2023-01-22 12:27:48.262753: step: 130/464, loss: 0.4950485825538635 2023-01-22 12:27:48.898117: step: 132/464, loss: 0.010646861046552658 2023-01-22 12:27:49.599091: step: 134/464, loss: 0.019035184755921364 2023-01-22 12:27:50.156951: step: 136/464, loss: 0.12675100564956665 2023-01-22 12:27:50.789645: step: 138/464, loss: 0.008613305166363716 2023-01-22 12:27:51.440498: step: 140/464, loss: 0.009645706973969936 2023-01-22 12:27:52.078649: step: 142/464, loss: 0.03028269298374653 2023-01-22 12:27:52.693283: step: 144/464, loss: 0.004214303568005562 2023-01-22 12:27:53.372379: step: 146/464, loss: 0.0990675687789917 2023-01-22 12:27:54.023396: step: 148/464, loss: 0.04364365339279175 2023-01-22 12:27:54.637526: step: 150/464, loss: 0.06068604812026024 2023-01-22 12:27:55.250635: step: 152/464, loss: 0.05011973902583122 2023-01-22 12:27:55.893730: step: 154/464, loss: 0.0195098128169775 2023-01-22 12:27:56.492346: step: 156/464, loss: 0.011885374784469604 2023-01-22 12:27:57.135461: step: 158/464, loss: 0.021790891885757446 2023-01-22 12:27:57.725678: step: 160/464, loss: 0.0470498651266098 2023-01-22 12:27:58.366788: step: 162/464, loss: 0.0032399813644587994 2023-01-22 12:27:59.006483: step: 164/464, loss: 0.0178668275475502 2023-01-22 12:27:59.600062: step: 166/464, loss: 0.0072959973476827145 2023-01-22 12:28:00.183496: step: 168/464, loss: 0.022963957861065865 2023-01-22 12:28:00.740236: step: 170/464, loss: 0.00620347261428833 2023-01-22 12:28:01.361990: step: 172/464, loss: 0.04798974096775055 2023-01-22 12:28:01.947928: step: 174/464, loss: 0.014539512805640697 2023-01-22 12:28:02.502115: step: 176/464, loss: 0.06378328055143356 2023-01-22 12:28:03.134414: step: 178/464, loss: 0.04277843236923218 2023-01-22 12:28:03.750874: step: 180/464, loss: 1.583171010017395 2023-01-22 12:28:04.370223: step: 182/464, loss: 0.10841219872236252 2023-01-22 12:28:05.004984: step: 184/464, loss: 0.007886328734457493 2023-01-22 12:28:05.579199: step: 186/464, loss: 0.13257752358913422 2023-01-22 12:28:06.187829: step: 188/464, loss: 0.4060317575931549 2023-01-22 12:28:06.804733: step: 190/464, loss: 0.06079034507274628 2023-01-22 12:28:07.427105: step: 192/464, loss: 0.014938176609575748 2023-01-22 12:28:08.096310: step: 194/464, loss: 0.01524947490543127 2023-01-22 12:28:08.739841: step: 196/464, loss: 0.032105594873428345 2023-01-22 12:28:09.482082: step: 198/464, loss: 0.016917673870921135 2023-01-22 12:28:10.141441: step: 200/464, loss: 0.07070586830377579 2023-01-22 12:28:10.786602: step: 202/464, loss: 0.020239602774381638 2023-01-22 12:28:11.450760: step: 204/464, loss: 0.008704866282641888 2023-01-22 12:28:12.112196: step: 206/464, loss: 0.030908847227692604 2023-01-22 12:28:12.773139: step: 208/464, loss: 0.07697136700153351 2023-01-22 12:28:13.394926: step: 210/464, loss: 0.09847211837768555 2023-01-22 12:28:13.974392: step: 212/464, loss: 0.11305604875087738 2023-01-22 12:28:14.615094: step: 214/464, loss: 0.16584600508213043 2023-01-22 12:28:15.247447: step: 216/464, loss: 0.31934815645217896 2023-01-22 12:28:15.910805: step: 218/464, loss: 0.05138273909687996 2023-01-22 12:28:16.514918: step: 220/464, loss: 0.05057439208030701 2023-01-22 12:28:17.158973: step: 222/464, loss: 0.09238734841346741 2023-01-22 12:28:17.847324: step: 224/464, loss: 0.00903787650167942 2023-01-22 12:28:18.456487: step: 226/464, loss: 0.019578061997890472 2023-01-22 12:28:19.192698: step: 228/464, loss: 0.044871579855680466 2023-01-22 12:28:19.785549: step: 230/464, loss: 0.08930542320013046 2023-01-22 12:28:20.474360: step: 232/464, loss: 0.005069571081548929 2023-01-22 12:28:21.090475: step: 234/464, loss: 1.0194897651672363 2023-01-22 12:28:21.757413: step: 236/464, loss: 0.0036579384468495846 2023-01-22 12:28:22.357710: step: 238/464, loss: 0.12989561259746552 2023-01-22 12:28:22.985077: step: 240/464, loss: 0.1594226062297821 2023-01-22 12:28:23.574344: step: 242/464, loss: 0.06729419529438019 2023-01-22 12:28:24.210405: step: 244/464, loss: 0.041194040328264236 2023-01-22 12:28:24.833493: step: 246/464, loss: 0.02332022227346897 2023-01-22 12:28:25.476293: step: 248/464, loss: 0.03594561293721199 2023-01-22 12:28:26.183232: step: 250/464, loss: 0.09034476429224014 2023-01-22 12:28:26.809416: step: 252/464, loss: 0.03551686927676201 2023-01-22 12:28:27.493915: step: 254/464, loss: 0.024127595126628876 2023-01-22 12:28:28.116918: step: 256/464, loss: 0.07626382261514664 2023-01-22 12:28:28.877851: step: 258/464, loss: 0.07468894869089127 2023-01-22 12:28:29.534060: step: 260/464, loss: 0.06592415273189545 2023-01-22 12:28:30.173487: step: 262/464, loss: 0.3178122043609619 2023-01-22 12:28:30.826828: step: 264/464, loss: 0.12749071419239044 2023-01-22 12:28:31.464219: step: 266/464, loss: 0.01742948777973652 2023-01-22 12:28:32.060045: step: 268/464, loss: 0.01200178824365139 2023-01-22 12:28:32.634476: step: 270/464, loss: 0.15292149782180786 2023-01-22 12:28:33.261985: step: 272/464, loss: 0.12947878241539001 2023-01-22 12:28:33.960560: step: 274/464, loss: 0.4505672752857208 2023-01-22 12:28:34.567806: step: 276/464, loss: 0.05702408775687218 2023-01-22 12:28:35.234248: step: 278/464, loss: 0.024187006056308746 2023-01-22 12:28:35.846450: step: 280/464, loss: 0.03368527069687843 2023-01-22 12:28:36.442952: step: 282/464, loss: 0.03611384332180023 2023-01-22 12:28:37.072953: step: 284/464, loss: 0.021873539313673973 2023-01-22 12:28:37.777511: step: 286/464, loss: 0.10742607712745667 2023-01-22 12:28:38.469573: step: 288/464, loss: 0.11804650723934174 2023-01-22 12:28:39.109333: step: 290/464, loss: 0.018299585208296776 2023-01-22 12:28:39.742153: step: 292/464, loss: 0.06340126693248749 2023-01-22 12:28:40.420882: step: 294/464, loss: 0.016614476218819618 2023-01-22 12:28:41.034289: step: 296/464, loss: 0.24299855530261993 2023-01-22 12:28:41.605868: step: 298/464, loss: 0.07846701145172119 2023-01-22 12:28:42.242860: step: 300/464, loss: 0.023147309198975563 2023-01-22 12:28:42.860783: step: 302/464, loss: 0.06763315200805664 2023-01-22 12:28:43.477742: step: 304/464, loss: 0.0443316288292408 2023-01-22 12:28:44.104885: step: 306/464, loss: 0.011263721622526646 2023-01-22 12:28:44.729856: step: 308/464, loss: 0.041338883340358734 2023-01-22 12:28:45.368746: step: 310/464, loss: 0.022091420367360115 2023-01-22 12:28:46.099794: step: 312/464, loss: 0.0018858188996091485 2023-01-22 12:28:46.718139: step: 314/464, loss: 0.13889434933662415 2023-01-22 12:28:47.387493: step: 316/464, loss: 0.018320569768548012 2023-01-22 12:28:48.116241: step: 318/464, loss: 0.13306625187397003 2023-01-22 12:28:48.773283: step: 320/464, loss: 0.25399693846702576 2023-01-22 12:28:49.420594: step: 322/464, loss: 0.11977692693471909 2023-01-22 12:28:50.057021: step: 324/464, loss: 0.06183283403515816 2023-01-22 12:28:50.637426: step: 326/464, loss: 0.04578084126114845 2023-01-22 12:28:51.304440: step: 328/464, loss: 0.0503641702234745 2023-01-22 12:28:51.911427: step: 330/464, loss: 0.029564879834651947 2023-01-22 12:28:52.527535: step: 332/464, loss: 0.1057472974061966 2023-01-22 12:28:53.152772: step: 334/464, loss: 0.043896257877349854 2023-01-22 12:28:53.810916: step: 336/464, loss: 0.06871680915355682 2023-01-22 12:28:54.425719: step: 338/464, loss: 0.02590806782245636 2023-01-22 12:28:55.010666: step: 340/464, loss: 0.012619096785783768 2023-01-22 12:28:55.684425: step: 342/464, loss: 0.24375081062316895 2023-01-22 12:28:56.335430: step: 344/464, loss: 0.06244340538978577 2023-01-22 12:28:56.963395: step: 346/464, loss: 0.03981610760092735 2023-01-22 12:28:57.594932: step: 348/464, loss: 0.02591633051633835 2023-01-22 12:28:58.277047: step: 350/464, loss: 0.002215398009866476 2023-01-22 12:28:58.897999: step: 352/464, loss: 0.003011910943314433 2023-01-22 12:28:59.536008: step: 354/464, loss: 0.012786061502993107 2023-01-22 12:29:00.086497: step: 356/464, loss: 0.008177302777767181 2023-01-22 12:29:00.700905: step: 358/464, loss: 0.02352547086775303 2023-01-22 12:29:01.272180: step: 360/464, loss: 0.054179847240448 2023-01-22 12:29:01.845293: step: 362/464, loss: 0.19881106913089752 2023-01-22 12:29:02.421213: step: 364/464, loss: 0.07807371765375137 2023-01-22 12:29:03.036676: step: 366/464, loss: 0.07180866599082947 2023-01-22 12:29:03.635306: step: 368/464, loss: 0.028741007670760155 2023-01-22 12:29:04.304528: step: 370/464, loss: 0.1762828379869461 2023-01-22 12:29:04.944647: step: 372/464, loss: 0.014499752782285213 2023-01-22 12:29:05.595897: step: 374/464, loss: 0.046783238649368286 2023-01-22 12:29:06.238835: step: 376/464, loss: 0.07356592267751694 2023-01-22 12:29:06.864625: step: 378/464, loss: 0.25877201557159424 2023-01-22 12:29:07.478506: step: 380/464, loss: 0.013184488750994205 2023-01-22 12:29:08.075516: step: 382/464, loss: 0.0728127658367157 2023-01-22 12:29:08.701547: step: 384/464, loss: 0.07052990049123764 2023-01-22 12:29:09.325523: step: 386/464, loss: 0.04295302927494049 2023-01-22 12:29:09.945623: step: 388/464, loss: 0.04797111079096794 2023-01-22 12:29:10.637031: step: 390/464, loss: 0.005648870021104813 2023-01-22 12:29:11.194334: step: 392/464, loss: 0.03515315055847168 2023-01-22 12:29:11.742078: step: 394/464, loss: 0.005830351263284683 2023-01-22 12:29:12.394903: step: 396/464, loss: 0.044321294873952866 2023-01-22 12:29:13.054243: step: 398/464, loss: 0.0036008793395012617 2023-01-22 12:29:13.705163: step: 400/464, loss: 7.645439147949219 2023-01-22 12:29:14.299133: step: 402/464, loss: 0.03247757628560066 2023-01-22 12:29:14.904401: step: 404/464, loss: 0.06370621919631958 2023-01-22 12:29:15.471262: step: 406/464, loss: 0.04437775909900665 2023-01-22 12:29:16.029973: step: 408/464, loss: 0.05218346416950226 2023-01-22 12:29:16.677075: step: 410/464, loss: 0.028424391523003578 2023-01-22 12:29:17.254824: step: 412/464, loss: 0.08368080109357834 2023-01-22 12:29:17.833869: step: 414/464, loss: 0.06493217498064041 2023-01-22 12:29:18.530155: step: 416/464, loss: 0.10637064278125763 2023-01-22 12:29:19.208470: step: 418/464, loss: 0.12815283238887787 2023-01-22 12:29:19.791586: step: 420/464, loss: 0.017795566469430923 2023-01-22 12:29:20.481394: step: 422/464, loss: 0.016728708520531654 2023-01-22 12:29:21.182663: step: 424/464, loss: 0.10644500702619553 2023-01-22 12:29:21.792444: step: 426/464, loss: 0.06623958051204681 2023-01-22 12:29:22.455377: step: 428/464, loss: 0.009427705779671669 2023-01-22 12:29:23.083355: step: 430/464, loss: 0.04186146333813667 2023-01-22 12:29:23.724986: step: 432/464, loss: 0.0737660676240921 2023-01-22 12:29:24.468378: step: 434/464, loss: 0.013183049857616425 2023-01-22 12:29:25.044431: step: 436/464, loss: 0.019024258479475975 2023-01-22 12:29:25.682334: step: 438/464, loss: 0.0077477432787418365 2023-01-22 12:29:26.307764: step: 440/464, loss: 0.003322381991893053 2023-01-22 12:29:26.965069: step: 442/464, loss: 0.009420504793524742 2023-01-22 12:29:27.609578: step: 444/464, loss: 0.1055421233177185 2023-01-22 12:29:28.226319: step: 446/464, loss: 0.072256900370121 2023-01-22 12:29:28.799108: step: 448/464, loss: 0.03360395506024361 2023-01-22 12:29:29.406325: step: 450/464, loss: 0.03499939665198326 2023-01-22 12:29:30.046238: step: 452/464, loss: 0.0326056033372879 2023-01-22 12:29:30.633774: step: 454/464, loss: 0.11220617592334747 2023-01-22 12:29:31.338857: step: 456/464, loss: 0.0994848981499672 2023-01-22 12:29:31.955899: step: 458/464, loss: 0.07699523866176605 2023-01-22 12:29:32.562747: step: 460/464, loss: 0.011592340655624866 2023-01-22 12:29:33.209258: step: 462/464, loss: 0.02367353066802025 2023-01-22 12:29:33.833816: step: 464/464, loss: 0.021390561014413834 2023-01-22 12:29:34.449907: step: 466/464, loss: 0.017145728692412376 2023-01-22 12:29:35.138967: step: 468/464, loss: 2.969501495361328 2023-01-22 12:29:35.755403: step: 470/464, loss: 0.05871887877583504 2023-01-22 12:29:36.419632: step: 472/464, loss: 0.06936714798212051 2023-01-22 12:29:37.007519: step: 474/464, loss: 0.18440918624401093 2023-01-22 12:29:37.562657: step: 476/464, loss: 0.6779115796089172 2023-01-22 12:29:38.215405: step: 478/464, loss: 0.6086742281913757 2023-01-22 12:29:38.772640: step: 480/464, loss: 0.024632520973682404 2023-01-22 12:29:39.419875: step: 482/464, loss: 0.02487977221608162 2023-01-22 12:29:40.050555: step: 484/464, loss: 0.007665781769901514 2023-01-22 12:29:40.709103: step: 486/464, loss: 0.05555078759789467 2023-01-22 12:29:41.338921: step: 488/464, loss: 0.0898955911397934 2023-01-22 12:29:41.987844: step: 490/464, loss: 0.03891049697995186 2023-01-22 12:29:42.690810: step: 492/464, loss: 0.01622828096151352 2023-01-22 12:29:43.270441: step: 494/464, loss: 0.09543530642986298 2023-01-22 12:29:43.965717: step: 496/464, loss: 0.320547491312027 2023-01-22 12:29:44.544271: step: 498/464, loss: 0.034561898559331894 2023-01-22 12:29:45.163372: step: 500/464, loss: 0.030993588268756866 2023-01-22 12:29:45.764863: step: 502/464, loss: 0.009038617834448814 2023-01-22 12:29:46.485378: step: 504/464, loss: 0.07900907099246979 2023-01-22 12:29:47.036445: step: 506/464, loss: 0.007727830670773983 2023-01-22 12:29:47.637973: step: 508/464, loss: 0.06347976624965668 2023-01-22 12:29:48.236444: step: 510/464, loss: 0.0526079498231411 2023-01-22 12:29:48.939548: step: 512/464, loss: 0.11497075110673904 2023-01-22 12:29:49.626027: step: 514/464, loss: 0.03942112997174263 2023-01-22 12:29:50.275006: step: 516/464, loss: 0.038449421525001526 2023-01-22 12:29:50.899582: step: 518/464, loss: 0.01590256206691265 2023-01-22 12:29:51.506370: step: 520/464, loss: 0.043512072414159775 2023-01-22 12:29:52.100180: step: 522/464, loss: 0.021960392594337463 2023-01-22 12:29:52.713864: step: 524/464, loss: 0.031567446887493134 2023-01-22 12:29:53.263070: step: 526/464, loss: 0.007618204224854708 2023-01-22 12:29:53.953861: step: 528/464, loss: 0.024137090891599655 2023-01-22 12:29:54.639351: step: 530/464, loss: 0.061135660856962204 2023-01-22 12:29:55.259597: step: 532/464, loss: 0.07971035689115524 2023-01-22 12:29:55.837127: step: 534/464, loss: 0.048400457948446274 2023-01-22 12:29:56.487489: step: 536/464, loss: 0.05122085288167 2023-01-22 12:29:57.107460: step: 538/464, loss: 0.029268791899085045 2023-01-22 12:29:57.721929: step: 540/464, loss: 0.09635338187217712 2023-01-22 12:29:58.356261: step: 542/464, loss: 0.07422041893005371 2023-01-22 12:29:59.012524: step: 544/464, loss: 0.05977031961083412 2023-01-22 12:29:59.726210: step: 546/464, loss: 0.009733343496918678 2023-01-22 12:30:00.453168: step: 548/464, loss: 0.06108175963163376 2023-01-22 12:30:01.063201: step: 550/464, loss: 0.03537317365407944 2023-01-22 12:30:01.687795: step: 552/464, loss: 0.21923477947711945 2023-01-22 12:30:02.401654: step: 554/464, loss: 0.025603273883461952 2023-01-22 12:30:03.021225: step: 556/464, loss: 0.03309585526585579 2023-01-22 12:30:03.667100: step: 558/464, loss: 0.0017926269210875034 2023-01-22 12:30:04.288764: step: 560/464, loss: 0.04637129232287407 2023-01-22 12:30:04.900408: step: 562/464, loss: 0.0066716535948216915 2023-01-22 12:30:05.502496: step: 564/464, loss: 0.04727320373058319 2023-01-22 12:30:06.169762: step: 566/464, loss: 0.23864291608333588 2023-01-22 12:30:06.779472: step: 568/464, loss: 0.007027804851531982 2023-01-22 12:30:07.415012: step: 570/464, loss: 0.00685263192281127 2023-01-22 12:30:08.011707: step: 572/464, loss: 0.03749839961528778 2023-01-22 12:30:08.615996: step: 574/464, loss: 0.12423070520162582 2023-01-22 12:30:09.270652: step: 576/464, loss: 0.07700704783201218 2023-01-22 12:30:09.868060: step: 578/464, loss: 0.0055520497262477875 2023-01-22 12:30:10.480508: step: 580/464, loss: 0.019946932792663574 2023-01-22 12:30:11.085639: step: 582/464, loss: 0.06127611920237541 2023-01-22 12:30:11.767074: step: 584/464, loss: 0.12984861433506012 2023-01-22 12:30:12.425443: step: 586/464, loss: 0.042202942073345184 2023-01-22 12:30:13.035723: step: 588/464, loss: 0.01746554672718048 2023-01-22 12:30:13.634909: step: 590/464, loss: 0.08601154386997223 2023-01-22 12:30:14.238019: step: 592/464, loss: 0.01006026566028595 2023-01-22 12:30:14.916973: step: 594/464, loss: 0.059398457407951355 2023-01-22 12:30:15.508758: step: 596/464, loss: 0.04708977788686752 2023-01-22 12:30:16.134710: step: 598/464, loss: 0.0155387157574296 2023-01-22 12:30:16.764465: step: 600/464, loss: 0.038387056440114975 2023-01-22 12:30:17.415348: step: 602/464, loss: 0.07821520417928696 2023-01-22 12:30:18.097485: step: 604/464, loss: 0.019257348030805588 2023-01-22 12:30:18.744289: step: 606/464, loss: 0.05501368269324303 2023-01-22 12:30:19.375961: step: 608/464, loss: 0.07247531414031982 2023-01-22 12:30:20.027982: step: 610/464, loss: 1.6925289630889893 2023-01-22 12:30:20.605627: step: 612/464, loss: 0.017896315082907677 2023-01-22 12:30:21.271154: step: 614/464, loss: 0.47850364446640015 2023-01-22 12:30:21.900644: step: 616/464, loss: 0.034765344113111496 2023-01-22 12:30:22.477649: step: 618/464, loss: 0.011117692105472088 2023-01-22 12:30:23.103484: step: 620/464, loss: 0.025692759081721306 2023-01-22 12:30:23.699559: step: 622/464, loss: 0.057302094995975494 2023-01-22 12:30:24.300337: step: 624/464, loss: 0.0591789186000824 2023-01-22 12:30:24.892755: step: 626/464, loss: 0.039957478642463684 2023-01-22 12:30:25.532030: step: 628/464, loss: 0.023328104987740517 2023-01-22 12:30:26.127943: step: 630/464, loss: 0.01869240775704384 2023-01-22 12:30:26.767178: step: 632/464, loss: 0.03534715250134468 2023-01-22 12:30:27.307685: step: 634/464, loss: 0.02479608729481697 2023-01-22 12:30:27.936869: step: 636/464, loss: 0.06726644933223724 2023-01-22 12:30:28.526321: step: 638/464, loss: 0.03567817807197571 2023-01-22 12:30:29.175687: step: 640/464, loss: 0.025053132325410843 2023-01-22 12:30:29.834321: step: 642/464, loss: 0.035145752131938934 2023-01-22 12:30:30.467485: step: 644/464, loss: 0.03355040028691292 2023-01-22 12:30:31.111484: step: 646/464, loss: 0.2510853111743927 2023-01-22 12:30:31.849908: step: 648/464, loss: 1.1779260635375977 2023-01-22 12:30:32.457112: step: 650/464, loss: 0.014893234707415104 2023-01-22 12:30:33.117904: step: 652/464, loss: 0.14382338523864746 2023-01-22 12:30:33.745267: step: 654/464, loss: 0.08110351115465164 2023-01-22 12:30:34.414848: step: 656/464, loss: 0.4584517478942871 2023-01-22 12:30:35.082795: step: 658/464, loss: 0.03606909513473511 2023-01-22 12:30:35.705247: step: 660/464, loss: 0.03051850013434887 2023-01-22 12:30:36.251990: step: 662/464, loss: 0.06344317644834518 2023-01-22 12:30:36.854887: step: 664/464, loss: 0.3073681592941284 2023-01-22 12:30:37.450106: step: 666/464, loss: 0.0030211834236979485 2023-01-22 12:30:38.098912: step: 668/464, loss: 0.2713046371936798 2023-01-22 12:30:38.698627: step: 670/464, loss: 0.10655353963375092 2023-01-22 12:30:39.357520: step: 672/464, loss: 0.05543030798435211 2023-01-22 12:30:39.966656: step: 674/464, loss: 0.0740400180220604 2023-01-22 12:30:40.590125: step: 676/464, loss: 0.05888616293668747 2023-01-22 12:30:41.272418: step: 678/464, loss: 0.03706745803356171 2023-01-22 12:30:41.870143: step: 680/464, loss: 0.3182055950164795 2023-01-22 12:30:42.427882: step: 682/464, loss: 0.0079153161495924 2023-01-22 12:30:43.008724: step: 684/464, loss: 0.09043563157320023 2023-01-22 12:30:43.617401: step: 686/464, loss: 0.014447236433625221 2023-01-22 12:30:44.256024: step: 688/464, loss: 0.06912492215633392 2023-01-22 12:30:44.879111: step: 690/464, loss: 0.023338302969932556 2023-01-22 12:30:45.502148: step: 692/464, loss: 0.0038261814042925835 2023-01-22 12:30:46.143570: step: 694/464, loss: 0.15607619285583496 2023-01-22 12:30:46.774470: step: 696/464, loss: 0.040195267647504807 2023-01-22 12:30:47.322946: step: 698/464, loss: 0.0795249193906784 2023-01-22 12:30:47.894202: step: 700/464, loss: 0.10193470120429993 2023-01-22 12:30:48.537980: step: 702/464, loss: 0.033415645360946655 2023-01-22 12:30:49.156009: step: 704/464, loss: 0.04193181172013283 2023-01-22 12:30:49.749162: step: 706/464, loss: 0.0171140655875206 2023-01-22 12:30:50.384686: step: 708/464, loss: 0.033263515681028366 2023-01-22 12:30:51.007947: step: 710/464, loss: 0.0340133011341095 2023-01-22 12:30:51.698123: step: 712/464, loss: 0.08204270154237747 2023-01-22 12:30:52.371777: step: 714/464, loss: 0.06933029741048813 2023-01-22 12:30:53.010626: step: 716/464, loss: 0.01937519945204258 2023-01-22 12:30:53.591718: step: 718/464, loss: 0.017431603744626045 2023-01-22 12:30:54.242203: step: 720/464, loss: 0.009755883365869522 2023-01-22 12:30:54.786537: step: 722/464, loss: 0.06420817226171494 2023-01-22 12:30:55.347762: step: 724/464, loss: 0.009759259410202503 2023-01-22 12:30:55.986600: step: 726/464, loss: 0.00809104647487402 2023-01-22 12:30:56.617205: step: 728/464, loss: 0.07396422326564789 2023-01-22 12:30:57.218647: step: 730/464, loss: 0.29828405380249023 2023-01-22 12:30:57.784463: step: 732/464, loss: 0.0108730997890234 2023-01-22 12:30:58.359553: step: 734/464, loss: 0.02986094169318676 2023-01-22 12:30:59.061354: step: 736/464, loss: 0.05073142424225807 2023-01-22 12:30:59.773473: step: 738/464, loss: 0.02255566418170929 2023-01-22 12:31:00.442768: step: 740/464, loss: 1.2356326580047607 2023-01-22 12:31:01.136431: step: 742/464, loss: 0.0011746954405680299 2023-01-22 12:31:01.848565: step: 744/464, loss: 0.031542785465717316 2023-01-22 12:31:02.470456: step: 746/464, loss: 0.03540130704641342 2023-01-22 12:31:03.120017: step: 748/464, loss: 0.030641360208392143 2023-01-22 12:31:03.803482: step: 750/464, loss: 0.5967709422111511 2023-01-22 12:31:04.415618: step: 752/464, loss: 0.003998616710305214 2023-01-22 12:31:05.126155: step: 754/464, loss: 0.04136351868510246 2023-01-22 12:31:05.781832: step: 756/464, loss: 0.011975946836173534 2023-01-22 12:31:06.480495: step: 758/464, loss: 0.019011514261364937 2023-01-22 12:31:07.101814: step: 760/464, loss: 0.024285368621349335 2023-01-22 12:31:07.731085: step: 762/464, loss: 0.0003498998412396759 2023-01-22 12:31:08.386557: step: 764/464, loss: 0.1730339080095291 2023-01-22 12:31:08.944299: step: 766/464, loss: 0.036918770521879196 2023-01-22 12:31:09.562712: step: 768/464, loss: 0.2820606231689453 2023-01-22 12:31:10.151278: step: 770/464, loss: 0.11820968985557556 2023-01-22 12:31:10.844245: step: 772/464, loss: 0.007654739078134298 2023-01-22 12:31:11.477601: step: 774/464, loss: 0.038378458470106125 2023-01-22 12:31:12.204191: step: 776/464, loss: 0.0019242237322032452 2023-01-22 12:31:12.802435: step: 778/464, loss: 0.024111930280923843 2023-01-22 12:31:13.395371: step: 780/464, loss: 0.35401463508605957 2023-01-22 12:31:14.054534: step: 782/464, loss: 0.1398405134677887 2023-01-22 12:31:14.677363: step: 784/464, loss: 0.05183522775769234 2023-01-22 12:31:15.302971: step: 786/464, loss: 0.014426725916564465 2023-01-22 12:31:15.929175: step: 788/464, loss: 0.008511271327733994 2023-01-22 12:31:16.472624: step: 790/464, loss: 0.01709834113717079 2023-01-22 12:31:17.115199: step: 792/464, loss: 0.11454438418149948 2023-01-22 12:31:17.732186: step: 794/464, loss: 0.07856132090091705 2023-01-22 12:31:18.401203: step: 796/464, loss: 0.08450557291507721 2023-01-22 12:31:19.093149: step: 798/464, loss: 0.07357024401426315 2023-01-22 12:31:19.693979: step: 800/464, loss: 0.03462034836411476 2023-01-22 12:31:20.354311: step: 802/464, loss: 0.035732369869947433 2023-01-22 12:31:20.997181: step: 804/464, loss: 0.25203943252563477 2023-01-22 12:31:21.603796: step: 806/464, loss: 0.07085590809583664 2023-01-22 12:31:22.194088: step: 808/464, loss: 0.05034415423870087 2023-01-22 12:31:22.799593: step: 810/464, loss: 0.012539473362267017 2023-01-22 12:31:23.379736: step: 812/464, loss: 0.01923408731818199 2023-01-22 12:31:23.936043: step: 814/464, loss: 0.11244556307792664 2023-01-22 12:31:24.484094: step: 816/464, loss: 0.05045890435576439 2023-01-22 12:31:25.113229: step: 818/464, loss: 0.09219861775636673 2023-01-22 12:31:25.766097: step: 820/464, loss: 0.03292452171444893 2023-01-22 12:31:26.412954: step: 822/464, loss: 0.035826146602630615 2023-01-22 12:31:26.997062: step: 824/464, loss: 0.010722989216446877 2023-01-22 12:31:27.584064: step: 826/464, loss: 0.005713389255106449 2023-01-22 12:31:28.165252: step: 828/464, loss: 0.0340614952147007 2023-01-22 12:31:28.838631: step: 830/464, loss: 0.08456560969352722 2023-01-22 12:31:29.481839: step: 832/464, loss: 0.16549208760261536 2023-01-22 12:31:30.112908: step: 834/464, loss: 0.539658784866333 2023-01-22 12:31:30.762376: step: 836/464, loss: 0.027302242815494537 2023-01-22 12:31:31.409733: step: 838/464, loss: 0.018886419013142586 2023-01-22 12:31:32.052819: step: 840/464, loss: 0.007382436189800501 2023-01-22 12:31:32.717035: step: 842/464, loss: 13.666826248168945 2023-01-22 12:31:33.349068: step: 844/464, loss: 0.17506583034992218 2023-01-22 12:31:33.988754: step: 846/464, loss: 0.14494232833385468 2023-01-22 12:31:34.659886: step: 848/464, loss: 0.05238157510757446 2023-01-22 12:31:35.210892: step: 850/464, loss: 0.04806152358651161 2023-01-22 12:31:35.810506: step: 852/464, loss: 0.026084087789058685 2023-01-22 12:31:36.453518: step: 854/464, loss: 0.08451046794652939 2023-01-22 12:31:37.076462: step: 856/464, loss: 0.010034758597612381 2023-01-22 12:31:37.662072: step: 858/464, loss: 0.03995664045214653 2023-01-22 12:31:38.251316: step: 860/464, loss: 0.09627938270568848 2023-01-22 12:31:38.853470: step: 862/464, loss: 0.0830642431974411 2023-01-22 12:31:39.492407: step: 864/464, loss: 0.02704242244362831 2023-01-22 12:31:40.125060: step: 866/464, loss: 0.02706335112452507 2023-01-22 12:31:40.748136: step: 868/464, loss: 0.019787253811955452 2023-01-22 12:31:41.360923: step: 870/464, loss: 0.07990527153015137 2023-01-22 12:31:41.962411: step: 872/464, loss: 0.01874559558928013 2023-01-22 12:31:42.554413: step: 874/464, loss: 0.13096484541893005 2023-01-22 12:31:43.183892: step: 876/464, loss: 0.08135256171226501 2023-01-22 12:31:43.791838: step: 878/464, loss: 0.0434761568903923 2023-01-22 12:31:44.413049: step: 880/464, loss: 0.037891894578933716 2023-01-22 12:31:45.017126: step: 882/464, loss: 0.02107781358063221 2023-01-22 12:31:45.617356: step: 884/464, loss: 0.020204002037644386 2023-01-22 12:31:46.206394: step: 886/464, loss: 0.030770068988204002 2023-01-22 12:31:46.863939: step: 888/464, loss: 0.0335783027112484 2023-01-22 12:31:47.483137: step: 890/464, loss: 0.07361844927072525 2023-01-22 12:31:48.105282: step: 892/464, loss: 0.02100438065826893 2023-01-22 12:31:48.735062: step: 894/464, loss: 0.06870966404676437 2023-01-22 12:31:49.297400: step: 896/464, loss: 0.3108283579349518 2023-01-22 12:31:49.985045: step: 898/464, loss: 0.43348509073257446 2023-01-22 12:31:50.718071: step: 900/464, loss: 0.008296527899801731 2023-01-22 12:31:51.327174: step: 902/464, loss: 0.004848845303058624 2023-01-22 12:31:51.908751: step: 904/464, loss: 0.449689120054245 2023-01-22 12:31:52.529132: step: 906/464, loss: 0.03436870872974396 2023-01-22 12:31:53.133595: step: 908/464, loss: 0.05464969947934151 2023-01-22 12:31:53.779098: step: 910/464, loss: 0.013983510434627533 2023-01-22 12:31:54.393244: step: 912/464, loss: 0.027595987543463707 2023-01-22 12:31:55.026732: step: 914/464, loss: 0.14348651468753815 2023-01-22 12:31:55.685352: step: 916/464, loss: 0.0415404848754406 2023-01-22 12:31:56.341968: step: 918/464, loss: 0.009719896130263805 2023-01-22 12:31:57.014658: step: 920/464, loss: 0.03497597947716713 2023-01-22 12:31:57.646800: step: 922/464, loss: 0.04873776435852051 2023-01-22 12:31:58.308070: step: 924/464, loss: 0.07699619978666306 2023-01-22 12:31:58.904591: step: 926/464, loss: 0.07241154462099075 2023-01-22 12:31:59.661752: step: 928/464, loss: 0.6287901997566223 2023-01-22 12:32:00.216774: step: 930/464, loss: 0.029939774423837662 ================================================== Loss: 0.142 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2921486801242236, 'r': 0.3570090132827325, 'f1': 0.32133859948761745}, 'combined': 0.23677581014877075, 'epoch': 22} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.28651875376633046, 'r': 0.3320355071194757, 'f1': 0.30760243917746416}, 'combined': 0.20081817272725638, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28357082732082733, 'r': 0.3728929475015813, 'f1': 0.3221550546448087}, 'combined': 0.2373774086856485, 'epoch': 22} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2998528625303657, 'r': 0.32766290763189637, 'f1': 0.31314164669691547}, 'combined': 0.20443444292130233, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951249161636486, 'r': 0.3578459609650312, 'f1': 0.3234731070815977}, 'combined': 0.23834860521801934, 'epoch': 22} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29521273154881944, 'r': 0.3185261336729686, 'f1': 0.30642664272956077}, 'combined': 0.20005055432085314, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22562893081761004, 'r': 0.3416666666666666, 'f1': 0.27178030303030304}, 'combined': 0.18118686868686867, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23717948717948717, 'r': 0.40217391304347827, 'f1': 0.2983870967741935}, 'combined': 0.14919354838709675, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 22} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951249161636486, 'r': 0.3578459609650312, 'f1': 0.3234731070815977}, 'combined': 0.23834860521801934, 'epoch': 22} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29521273154881944, 'r': 0.3185261336729686, 'f1': 0.30642664272956077}, 'combined': 0.20005055432085314, 'epoch': 22} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 22} ****************************** Epoch: 23 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:34:44.866754: step: 2/464, loss: 0.10644122958183289 2023-01-22 12:34:45.528092: step: 4/464, loss: 0.07475408166646957 2023-01-22 12:34:46.147512: step: 6/464, loss: 0.011993331834673882 2023-01-22 12:34:46.744286: step: 8/464, loss: 0.019554661586880684 2023-01-22 12:34:47.308553: step: 10/464, loss: 0.04917929321527481 2023-01-22 12:34:47.875373: step: 12/464, loss: 0.052631910890340805 2023-01-22 12:34:48.518136: step: 14/464, loss: 0.11645985394716263 2023-01-22 12:34:49.148131: step: 16/464, loss: 0.02561560459434986 2023-01-22 12:34:49.783480: step: 18/464, loss: 0.028937913477420807 2023-01-22 12:34:50.399761: step: 20/464, loss: 0.04139014706015587 2023-01-22 12:34:51.038231: step: 22/464, loss: 0.015588788315653801 2023-01-22 12:34:51.646361: step: 24/464, loss: 0.003131264355033636 2023-01-22 12:34:52.255121: step: 26/464, loss: 0.07951285690069199 2023-01-22 12:34:52.918502: step: 28/464, loss: 0.007127698510885239 2023-01-22 12:34:53.582185: step: 30/464, loss: 0.10955634713172913 2023-01-22 12:34:54.155551: step: 32/464, loss: 0.04282316938042641 2023-01-22 12:34:54.748869: step: 34/464, loss: 0.10839519649744034 2023-01-22 12:34:55.318978: step: 36/464, loss: 0.025897495448589325 2023-01-22 12:34:55.868138: step: 38/464, loss: 0.03440878540277481 2023-01-22 12:34:56.469814: step: 40/464, loss: 0.024650786072015762 2023-01-22 12:34:57.040259: step: 42/464, loss: 0.0286126546561718 2023-01-22 12:34:57.665350: step: 44/464, loss: 0.10195796191692352 2023-01-22 12:34:58.331216: step: 46/464, loss: 0.010873687453567982 2023-01-22 12:34:58.890470: step: 48/464, loss: 0.02549760416150093 2023-01-22 12:34:59.533800: step: 50/464, loss: 0.05368296802043915 2023-01-22 12:35:00.121413: step: 52/464, loss: 0.11347991228103638 2023-01-22 12:35:00.752192: step: 54/464, loss: 0.10324886441230774 2023-01-22 12:35:01.380667: step: 56/464, loss: 0.060572996735572815 2023-01-22 12:35:02.007236: step: 58/464, loss: 0.03887447714805603 2023-01-22 12:35:02.631906: step: 60/464, loss: 0.022154150530695915 2023-01-22 12:35:03.257498: step: 62/464, loss: 0.009307364001870155 2023-01-22 12:35:03.884711: step: 64/464, loss: 0.016641004011034966 2023-01-22 12:35:04.536309: step: 66/464, loss: 0.23638705909252167 2023-01-22 12:35:05.107887: step: 68/464, loss: 0.01452475693076849 2023-01-22 12:35:05.697945: step: 70/464, loss: 0.02883811853826046 2023-01-22 12:35:06.259523: step: 72/464, loss: 0.007868830114603043 2023-01-22 12:35:06.809977: step: 74/464, loss: 0.04357173666357994 2023-01-22 12:35:07.473526: step: 76/464, loss: 0.09538904577493668 2023-01-22 12:35:08.092519: step: 78/464, loss: 0.0018587729427963495 2023-01-22 12:35:08.731588: step: 80/464, loss: 0.055834852159023285 2023-01-22 12:35:09.401407: step: 82/464, loss: 0.17760033905506134 2023-01-22 12:35:09.962633: step: 84/464, loss: 0.0023912456817924976 2023-01-22 12:35:10.565907: step: 86/464, loss: 0.038486480712890625 2023-01-22 12:35:11.179663: step: 88/464, loss: 0.07755500823259354 2023-01-22 12:35:11.834137: step: 90/464, loss: 0.011968021281063557 2023-01-22 12:35:12.411461: step: 92/464, loss: 0.005228791851550341 2023-01-22 12:35:13.084443: step: 94/464, loss: 0.021261105313897133 2023-01-22 12:35:13.672933: step: 96/464, loss: 0.021069560199975967 2023-01-22 12:35:14.311762: step: 98/464, loss: 0.011009802110493183 2023-01-22 12:35:14.923983: step: 100/464, loss: 0.044996075332164764 2023-01-22 12:35:15.568933: step: 102/464, loss: 0.8489930033683777 2023-01-22 12:35:16.262001: step: 104/464, loss: 0.004066376946866512 2023-01-22 12:35:16.903557: step: 106/464, loss: 0.05179396644234657 2023-01-22 12:35:17.533583: step: 108/464, loss: 0.062286872416734695 2023-01-22 12:35:18.229957: step: 110/464, loss: 0.017060136422514915 2023-01-22 12:35:18.916168: step: 112/464, loss: 0.03774134814739227 2023-01-22 12:35:19.555713: step: 114/464, loss: 0.001183267217129469 2023-01-22 12:35:20.176424: step: 116/464, loss: 0.0009860256686806679 2023-01-22 12:35:20.815704: step: 118/464, loss: 0.009680584073066711 2023-01-22 12:35:21.413271: step: 120/464, loss: 0.01292145811021328 2023-01-22 12:35:22.090903: step: 122/464, loss: 0.1006164699792862 2023-01-22 12:35:22.636220: step: 124/464, loss: 0.00143814692273736 2023-01-22 12:35:23.239224: step: 126/464, loss: 0.006221970543265343 2023-01-22 12:35:23.869015: step: 128/464, loss: 0.020568178966641426 2023-01-22 12:35:24.499230: step: 130/464, loss: 0.09491739422082901 2023-01-22 12:35:25.080794: step: 132/464, loss: 0.025057677179574966 2023-01-22 12:35:25.665831: step: 134/464, loss: 0.0070098889991641045 2023-01-22 12:35:26.219292: step: 136/464, loss: 0.011948754079639912 2023-01-22 12:35:26.827656: step: 138/464, loss: 0.07747643440961838 2023-01-22 12:35:27.407418: step: 140/464, loss: 0.2736450135707855 2023-01-22 12:35:28.110611: step: 142/464, loss: 0.014300171285867691 2023-01-22 12:35:28.711413: step: 144/464, loss: 0.0038129989989101887 2023-01-22 12:35:29.404424: step: 146/464, loss: 0.01623530127108097 2023-01-22 12:35:30.036032: step: 148/464, loss: 0.08698973804712296 2023-01-22 12:35:30.710750: step: 150/464, loss: 0.14513812959194183 2023-01-22 12:35:31.300361: step: 152/464, loss: 0.0026721323374658823 2023-01-22 12:35:31.925695: step: 154/464, loss: 0.054639026522636414 2023-01-22 12:35:32.544219: step: 156/464, loss: 0.0822264775633812 2023-01-22 12:35:33.186027: step: 158/464, loss: 0.029045280069112778 2023-01-22 12:35:33.795801: step: 160/464, loss: 0.0024003051221370697 2023-01-22 12:35:34.447320: step: 162/464, loss: 0.004694780800491571 2023-01-22 12:35:35.107046: step: 164/464, loss: 0.009777367115020752 2023-01-22 12:35:35.717792: step: 166/464, loss: 0.013787111267447472 2023-01-22 12:35:36.362479: step: 168/464, loss: 0.024741878733038902 2023-01-22 12:35:36.950851: step: 170/464, loss: 0.015733882784843445 2023-01-22 12:35:37.583177: step: 172/464, loss: 0.020991239696741104 2023-01-22 12:35:38.186453: step: 174/464, loss: 0.1043391227722168 2023-01-22 12:35:38.780252: step: 176/464, loss: 0.026850463822484016 2023-01-22 12:35:39.513272: step: 178/464, loss: 0.03842521458864212 2023-01-22 12:35:40.131541: step: 180/464, loss: 0.546832263469696 2023-01-22 12:35:40.694937: step: 182/464, loss: 0.019418247044086456 2023-01-22 12:35:41.287322: step: 184/464, loss: 0.02360132336616516 2023-01-22 12:35:41.923158: step: 186/464, loss: 0.009908870793879032 2023-01-22 12:35:42.551873: step: 188/464, loss: 0.0006705268751829863 2023-01-22 12:35:43.177832: step: 190/464, loss: 0.0026790674310177565 2023-01-22 12:35:43.842148: step: 192/464, loss: 0.10986961424350739 2023-01-22 12:35:44.435630: step: 194/464, loss: 0.0328039713203907 2023-01-22 12:35:45.047048: step: 196/464, loss: 0.3907298147678375 2023-01-22 12:35:45.645286: step: 198/464, loss: 0.07607251405715942 2023-01-22 12:35:46.211938: step: 200/464, loss: 0.029756220057606697 2023-01-22 12:35:46.831582: step: 202/464, loss: 0.0925208330154419 2023-01-22 12:35:47.493457: step: 204/464, loss: 0.02148488536477089 2023-01-22 12:35:48.116197: step: 206/464, loss: 0.013174588792026043 2023-01-22 12:35:48.736692: step: 208/464, loss: 0.03021945431828499 2023-01-22 12:35:49.352962: step: 210/464, loss: 0.03675977513194084 2023-01-22 12:35:50.031641: step: 212/464, loss: 0.02435053512454033 2023-01-22 12:35:50.695103: step: 214/464, loss: 0.12001971900463104 2023-01-22 12:35:51.337336: step: 216/464, loss: 0.053732819855213165 2023-01-22 12:35:51.925778: step: 218/464, loss: 0.015495366416871548 2023-01-22 12:35:52.553802: step: 220/464, loss: 0.09464208781719208 2023-01-22 12:35:53.214072: step: 222/464, loss: 0.03359712287783623 2023-01-22 12:35:53.850562: step: 224/464, loss: 0.0051836310885846615 2023-01-22 12:35:54.476040: step: 226/464, loss: 0.01719290390610695 2023-01-22 12:35:55.100022: step: 228/464, loss: 0.036921728402376175 2023-01-22 12:35:55.758077: step: 230/464, loss: 0.038592804223299026 2023-01-22 12:35:56.339746: step: 232/464, loss: 0.04210580512881279 2023-01-22 12:35:56.920769: step: 234/464, loss: 0.01965983398258686 2023-01-22 12:35:57.535089: step: 236/464, loss: 0.012154348194599152 2023-01-22 12:35:58.155985: step: 238/464, loss: 0.024885592982172966 2023-01-22 12:35:58.803344: step: 240/464, loss: 0.013276264071464539 2023-01-22 12:35:59.428796: step: 242/464, loss: 0.11231344938278198 2023-01-22 12:36:00.037667: step: 244/464, loss: 0.0007568482542410493 2023-01-22 12:36:00.657378: step: 246/464, loss: 0.05851550027728081 2023-01-22 12:36:01.280209: step: 248/464, loss: 0.009488094598054886 2023-01-22 12:36:01.878380: step: 250/464, loss: 0.05608345568180084 2023-01-22 12:36:02.448496: step: 252/464, loss: 0.06864190101623535 2023-01-22 12:36:03.077535: step: 254/464, loss: 0.03570174798369408 2023-01-22 12:36:03.729339: step: 256/464, loss: 0.07568265497684479 2023-01-22 12:36:04.362364: step: 258/464, loss: 0.04004029929637909 2023-01-22 12:36:05.050791: step: 260/464, loss: 0.04522702470421791 2023-01-22 12:36:05.643543: step: 262/464, loss: 0.01839270070195198 2023-01-22 12:36:06.203441: step: 264/464, loss: 0.06256843358278275 2023-01-22 12:36:06.825468: step: 266/464, loss: 0.0010576738277450204 2023-01-22 12:36:07.452408: step: 268/464, loss: 0.20907624065876007 2023-01-22 12:36:08.081430: step: 270/464, loss: 1.4093396663665771 2023-01-22 12:36:08.683144: step: 272/464, loss: 0.12446978688240051 2023-01-22 12:36:09.318495: step: 274/464, loss: 0.004322616849094629 2023-01-22 12:36:09.911477: step: 276/464, loss: 0.020683592185378075 2023-01-22 12:36:10.544226: step: 278/464, loss: 0.11281999945640564 2023-01-22 12:36:11.177180: step: 280/464, loss: 0.06595201045274734 2023-01-22 12:36:11.801225: step: 282/464, loss: 0.4145338535308838 2023-01-22 12:36:12.480021: step: 284/464, loss: 0.05684361606836319 2023-01-22 12:36:13.103157: step: 286/464, loss: 1.880172848701477 2023-01-22 12:36:13.773407: step: 288/464, loss: 0.08005440980195999 2023-01-22 12:36:14.430832: step: 290/464, loss: 0.006799811031669378 2023-01-22 12:36:15.066526: step: 292/464, loss: 0.03461438789963722 2023-01-22 12:36:15.639481: step: 294/464, loss: 0.010340527631342411 2023-01-22 12:36:16.280470: step: 296/464, loss: 0.06825224310159683 2023-01-22 12:36:16.851770: step: 298/464, loss: 0.007516786921769381 2023-01-22 12:36:17.496774: step: 300/464, loss: 0.060877662152051926 2023-01-22 12:36:18.109352: step: 302/464, loss: 0.031738415360450745 2023-01-22 12:36:18.803817: step: 304/464, loss: 0.060117222368717194 2023-01-22 12:36:19.396607: step: 306/464, loss: 0.022865887731313705 2023-01-22 12:36:20.049914: step: 308/464, loss: 0.09713611006736755 2023-01-22 12:36:20.673690: step: 310/464, loss: 0.06979311257600784 2023-01-22 12:36:21.279435: step: 312/464, loss: 0.09803225100040436 2023-01-22 12:36:21.974459: step: 314/464, loss: 0.06322037428617477 2023-01-22 12:36:22.588748: step: 316/464, loss: 0.3646462559700012 2023-01-22 12:36:23.181600: step: 318/464, loss: 0.005568439140915871 2023-01-22 12:36:23.815359: step: 320/464, loss: 0.08541621267795563 2023-01-22 12:36:24.425951: step: 322/464, loss: 0.016559764742851257 2023-01-22 12:36:25.074926: step: 324/464, loss: 0.10714079439640045 2023-01-22 12:36:25.644430: step: 326/464, loss: 0.005957483313977718 2023-01-22 12:36:26.292070: step: 328/464, loss: 0.01278592087328434 2023-01-22 12:36:26.925864: step: 330/464, loss: 0.019597185775637627 2023-01-22 12:36:27.569929: step: 332/464, loss: 0.00973434280604124 2023-01-22 12:36:28.211781: step: 334/464, loss: 0.025493955239653587 2023-01-22 12:36:28.772272: step: 336/464, loss: 0.02513478696346283 2023-01-22 12:36:29.364021: step: 338/464, loss: 0.0401698537170887 2023-01-22 12:36:29.964520: step: 340/464, loss: 0.02699063904583454 2023-01-22 12:36:30.657920: step: 342/464, loss: 0.060390595346689224 2023-01-22 12:36:31.287853: step: 344/464, loss: 0.010543850250542164 2023-01-22 12:36:31.932769: step: 346/464, loss: 0.001764788175933063 2023-01-22 12:36:32.554383: step: 348/464, loss: 0.009307467378675938 2023-01-22 12:36:33.163866: step: 350/464, loss: 0.0627625361084938 2023-01-22 12:36:33.770124: step: 352/464, loss: 0.05518518015742302 2023-01-22 12:36:34.369552: step: 354/464, loss: 0.06610012799501419 2023-01-22 12:36:35.003757: step: 356/464, loss: 0.07874742150306702 2023-01-22 12:36:35.592282: step: 358/464, loss: 0.09541633725166321 2023-01-22 12:36:36.228466: step: 360/464, loss: 5.9227447509765625 2023-01-22 12:36:36.833813: step: 362/464, loss: 0.02257785201072693 2023-01-22 12:36:37.446701: step: 364/464, loss: 0.023625222966074944 2023-01-22 12:36:37.960588: step: 366/464, loss: 0.023329224437475204 2023-01-22 12:36:38.558530: step: 368/464, loss: 0.12411260604858398 2023-01-22 12:36:39.163685: step: 370/464, loss: 0.1140187606215477 2023-01-22 12:36:39.808937: step: 372/464, loss: 1.646920919418335 2023-01-22 12:36:40.572096: step: 374/464, loss: 0.29714834690093994 2023-01-22 12:36:41.259040: step: 376/464, loss: 0.5195530652999878 2023-01-22 12:36:41.912042: step: 378/464, loss: 0.03506457060575485 2023-01-22 12:36:42.578369: step: 380/464, loss: 0.035885609686374664 2023-01-22 12:36:43.170315: step: 382/464, loss: 0.01231367141008377 2023-01-22 12:36:43.759157: step: 384/464, loss: 0.014884741976857185 2023-01-22 12:36:44.394731: step: 386/464, loss: 0.004058377351611853 2023-01-22 12:36:45.003958: step: 388/464, loss: 0.026437105610966682 2023-01-22 12:36:45.728890: step: 390/464, loss: 0.058611735701560974 2023-01-22 12:36:46.367822: step: 392/464, loss: 0.4463246166706085 2023-01-22 12:36:46.993133: step: 394/464, loss: 0.0023168607149273157 2023-01-22 12:36:47.647176: step: 396/464, loss: 0.07154099643230438 2023-01-22 12:36:48.341412: step: 398/464, loss: 0.06256363540887833 2023-01-22 12:36:49.066453: step: 400/464, loss: 0.043433479964733124 2023-01-22 12:36:49.725911: step: 402/464, loss: 0.08834725618362427 2023-01-22 12:36:50.323607: step: 404/464, loss: 0.9852097630500793 2023-01-22 12:36:51.010750: step: 406/464, loss: 0.056845128536224365 2023-01-22 12:36:51.596182: step: 408/464, loss: 0.18970215320587158 2023-01-22 12:36:52.142356: step: 410/464, loss: 0.009288941510021687 2023-01-22 12:36:52.744679: step: 412/464, loss: 0.01798395812511444 2023-01-22 12:36:53.437884: step: 414/464, loss: 0.03440636768937111 2023-01-22 12:36:54.093375: step: 416/464, loss: 0.031902141869068146 2023-01-22 12:36:54.703004: step: 418/464, loss: 0.2523932456970215 2023-01-22 12:36:55.350510: step: 420/464, loss: 0.05770578980445862 2023-01-22 12:36:56.032271: step: 422/464, loss: 0.009200010448694229 2023-01-22 12:36:56.805623: step: 424/464, loss: 0.04757911339402199 2023-01-22 12:36:57.386885: step: 426/464, loss: 0.09651003032922745 2023-01-22 12:36:58.116214: step: 428/464, loss: 0.02554561011493206 2023-01-22 12:36:58.817249: step: 430/464, loss: 0.07525847852230072 2023-01-22 12:36:59.478999: step: 432/464, loss: 0.05352248623967171 2023-01-22 12:37:00.120990: step: 434/464, loss: 0.004007582552731037 2023-01-22 12:37:00.819160: step: 436/464, loss: 0.039869554340839386 2023-01-22 12:37:01.434474: step: 438/464, loss: 0.031047623604536057 2023-01-22 12:37:02.072598: step: 440/464, loss: 0.009951122105121613 2023-01-22 12:37:02.713928: step: 442/464, loss: 0.027510803192853928 2023-01-22 12:37:03.314005: step: 444/464, loss: 0.0036701064091175795 2023-01-22 12:37:03.895273: step: 446/464, loss: 0.05184917896986008 2023-01-22 12:37:04.507745: step: 448/464, loss: 0.10439484566450119 2023-01-22 12:37:05.153274: step: 450/464, loss: 0.0792376846075058 2023-01-22 12:37:05.745518: step: 452/464, loss: 0.04133224859833717 2023-01-22 12:37:06.332590: step: 454/464, loss: 0.02767222933471203 2023-01-22 12:37:06.932887: step: 456/464, loss: 0.029987365007400513 2023-01-22 12:37:07.555389: step: 458/464, loss: 0.03149545192718506 2023-01-22 12:37:08.179391: step: 460/464, loss: 0.006614563055336475 2023-01-22 12:37:08.778097: step: 462/464, loss: 0.025004135444760323 2023-01-22 12:37:09.375243: step: 464/464, loss: 0.036268845200538635 2023-01-22 12:37:10.053425: step: 466/464, loss: 0.024740692228078842 2023-01-22 12:37:10.668318: step: 468/464, loss: 0.027130400761961937 2023-01-22 12:37:11.256507: step: 470/464, loss: 0.008292856626212597 2023-01-22 12:37:11.977830: step: 472/464, loss: 0.023867568001151085 2023-01-22 12:37:12.560362: step: 474/464, loss: 0.09256128966808319 2023-01-22 12:37:13.158972: step: 476/464, loss: 0.03897581249475479 2023-01-22 12:37:13.833440: step: 478/464, loss: 0.09847109764814377 2023-01-22 12:37:14.466638: step: 480/464, loss: 0.004689089488238096 2023-01-22 12:37:15.085968: step: 482/464, loss: 0.015606822445988655 2023-01-22 12:37:15.652704: step: 484/464, loss: 0.14244210720062256 2023-01-22 12:37:16.261531: step: 486/464, loss: 0.012135368771851063 2023-01-22 12:37:16.896339: step: 488/464, loss: 0.08063202351331711 2023-01-22 12:37:17.569861: step: 490/464, loss: 0.023309670388698578 2023-01-22 12:37:18.097919: step: 492/464, loss: 0.03579302877187729 2023-01-22 12:37:18.717406: step: 494/464, loss: 0.013103240169584751 2023-01-22 12:37:19.388190: step: 496/464, loss: 0.021631481125950813 2023-01-22 12:37:19.998559: step: 498/464, loss: 0.05697460100054741 2023-01-22 12:37:20.707306: step: 500/464, loss: 0.05060938373208046 2023-01-22 12:37:21.334004: step: 502/464, loss: 0.017666872590780258 2023-01-22 12:37:21.990828: step: 504/464, loss: 0.16124327480793 2023-01-22 12:37:22.548876: step: 506/464, loss: 0.037429243326187134 2023-01-22 12:37:23.168174: step: 508/464, loss: 0.03328167274594307 2023-01-22 12:37:23.880986: step: 510/464, loss: 0.03959827497601509 2023-01-22 12:37:24.501150: step: 512/464, loss: 0.026566093787550926 2023-01-22 12:37:25.158114: step: 514/464, loss: 0.11199571192264557 2023-01-22 12:37:25.783742: step: 516/464, loss: 0.07202999293804169 2023-01-22 12:37:26.587030: step: 518/464, loss: 0.09872186183929443 2023-01-22 12:37:27.198580: step: 520/464, loss: 0.056988392025232315 2023-01-22 12:37:27.752094: step: 522/464, loss: 0.007081233896315098 2023-01-22 12:37:28.333136: step: 524/464, loss: 0.24260111153125763 2023-01-22 12:37:28.981649: step: 526/464, loss: 0.006394832860678434 2023-01-22 12:37:29.594934: step: 528/464, loss: 0.062192559242248535 2023-01-22 12:37:30.210217: step: 530/464, loss: 0.1292833387851715 2023-01-22 12:37:30.824649: step: 532/464, loss: 0.0303835216909647 2023-01-22 12:37:31.442702: step: 534/464, loss: 0.06893176585435867 2023-01-22 12:37:32.071637: step: 536/464, loss: 0.05994332581758499 2023-01-22 12:37:32.710169: step: 538/464, loss: 0.022876126691699028 2023-01-22 12:37:33.299081: step: 540/464, loss: 0.037819162011146545 2023-01-22 12:37:33.872065: step: 542/464, loss: 0.022822659462690353 2023-01-22 12:37:34.477448: step: 544/464, loss: 0.022124748677015305 2023-01-22 12:37:35.072735: step: 546/464, loss: 0.0019097479525953531 2023-01-22 12:37:35.671829: step: 548/464, loss: 0.059465598315000534 2023-01-22 12:37:36.244019: step: 550/464, loss: 0.11557956784963608 2023-01-22 12:37:36.861263: step: 552/464, loss: 0.04174748808145523 2023-01-22 12:37:37.478850: step: 554/464, loss: 0.02896018885076046 2023-01-22 12:37:38.086197: step: 556/464, loss: 0.04794749245047569 2023-01-22 12:37:38.687995: step: 558/464, loss: 0.029625695198774338 2023-01-22 12:37:39.297023: step: 560/464, loss: 0.02156044915318489 2023-01-22 12:37:39.859141: step: 562/464, loss: 0.015180575661361217 2023-01-22 12:37:40.535280: step: 564/464, loss: 0.035031288862228394 2023-01-22 12:37:41.178863: step: 566/464, loss: 0.0358675979077816 2023-01-22 12:37:41.777443: step: 568/464, loss: 0.04682903736829758 2023-01-22 12:37:42.417267: step: 570/464, loss: 0.04473865032196045 2023-01-22 12:37:43.052994: step: 572/464, loss: 0.034532204270362854 2023-01-22 12:37:43.715991: step: 574/464, loss: 0.018957484513521194 2023-01-22 12:37:44.407954: step: 576/464, loss: 0.11535761505365372 2023-01-22 12:37:44.988948: step: 578/464, loss: 0.020611165091395378 2023-01-22 12:37:45.571320: step: 580/464, loss: 0.06758003681898117 2023-01-22 12:37:46.195917: step: 582/464, loss: 0.0010374293196946383 2023-01-22 12:37:46.850506: step: 584/464, loss: 0.037798311561346054 2023-01-22 12:37:47.457886: step: 586/464, loss: 0.03922104090452194 2023-01-22 12:37:48.120065: step: 588/464, loss: 0.011756215244531631 2023-01-22 12:37:48.741776: step: 590/464, loss: 0.013177670538425446 2023-01-22 12:37:49.396037: step: 592/464, loss: 0.052139610052108765 2023-01-22 12:37:50.031178: step: 594/464, loss: 0.047864366322755814 2023-01-22 12:37:50.610367: step: 596/464, loss: 0.011701811105012894 2023-01-22 12:37:51.171583: step: 598/464, loss: 0.021516425535082817 2023-01-22 12:37:51.925158: step: 600/464, loss: 0.0323818065226078 2023-01-22 12:37:52.622048: step: 602/464, loss: 0.9882540106773376 2023-01-22 12:37:53.216672: step: 604/464, loss: 0.2516738176345825 2023-01-22 12:37:53.860399: step: 606/464, loss: 0.036247603595256805 2023-01-22 12:37:54.562427: step: 608/464, loss: 0.08317970484495163 2023-01-22 12:37:55.163612: step: 610/464, loss: 0.05322250723838806 2023-01-22 12:37:55.743486: step: 612/464, loss: 0.007082348223775625 2023-01-22 12:37:56.304485: step: 614/464, loss: 0.013145468197762966 2023-01-22 12:37:56.913833: step: 616/464, loss: 0.03757704794406891 2023-01-22 12:37:57.551618: step: 618/464, loss: 0.01598481647670269 2023-01-22 12:37:58.163758: step: 620/464, loss: 0.009986592456698418 2023-01-22 12:37:58.807450: step: 622/464, loss: 0.04212084785103798 2023-01-22 12:37:59.461123: step: 624/464, loss: 0.009833576157689095 2023-01-22 12:38:00.083319: step: 626/464, loss: 0.060698915272951126 2023-01-22 12:38:00.643222: step: 628/464, loss: 0.01126753631979227 2023-01-22 12:38:01.313109: step: 630/464, loss: 0.04587607830762863 2023-01-22 12:38:02.028914: step: 632/464, loss: 0.03733550384640694 2023-01-22 12:38:02.679560: step: 634/464, loss: 0.06836825609207153 2023-01-22 12:38:03.265632: step: 636/464, loss: 0.014130858704447746 2023-01-22 12:38:03.839324: step: 638/464, loss: 0.020342597737908363 2023-01-22 12:38:04.506081: step: 640/464, loss: 0.052543386816978455 2023-01-22 12:38:05.119091: step: 642/464, loss: 0.059955451637506485 2023-01-22 12:38:05.786603: step: 644/464, loss: 0.025845695286989212 2023-01-22 12:38:06.505974: step: 646/464, loss: 0.04973992705345154 2023-01-22 12:38:07.114003: step: 648/464, loss: 0.04457143321633339 2023-01-22 12:38:07.687845: step: 650/464, loss: 0.14433138072490692 2023-01-22 12:38:08.385458: step: 652/464, loss: 0.03164048120379448 2023-01-22 12:38:09.019687: step: 654/464, loss: 0.14378276467323303 2023-01-22 12:38:09.618053: step: 656/464, loss: 0.004824694711714983 2023-01-22 12:38:10.206348: step: 658/464, loss: 0.08035098761320114 2023-01-22 12:38:10.812775: step: 660/464, loss: 0.007931775413453579 2023-01-22 12:38:11.418583: step: 662/464, loss: 0.006059381645172834 2023-01-22 12:38:12.002855: step: 664/464, loss: 0.020819762721657753 2023-01-22 12:38:12.628809: step: 666/464, loss: 0.034322306513786316 2023-01-22 12:38:13.273567: step: 668/464, loss: 0.04108717292547226 2023-01-22 12:38:13.871604: step: 670/464, loss: 0.017097413539886475 2023-01-22 12:38:14.443999: step: 672/464, loss: 0.036327969282865524 2023-01-22 12:38:15.033297: step: 674/464, loss: 0.014165619388222694 2023-01-22 12:38:15.584951: step: 676/464, loss: 0.020011477172374725 2023-01-22 12:38:16.201257: step: 678/464, loss: 0.0024309654254466295 2023-01-22 12:38:16.835790: step: 680/464, loss: 0.019209301099181175 2023-01-22 12:38:17.475004: step: 682/464, loss: 0.025400152429938316 2023-01-22 12:38:18.129143: step: 684/464, loss: 0.028395840898156166 2023-01-22 12:38:18.769782: step: 686/464, loss: 0.1376052349805832 2023-01-22 12:38:19.392747: step: 688/464, loss: 0.01598648726940155 2023-01-22 12:38:19.995962: step: 690/464, loss: 0.049407944083213806 2023-01-22 12:38:20.623488: step: 692/464, loss: 0.0032649594359099865 2023-01-22 12:38:21.257867: step: 694/464, loss: 0.34030041098594666 2023-01-22 12:38:21.943191: step: 696/464, loss: 0.07703862339258194 2023-01-22 12:38:22.559050: step: 698/464, loss: 0.038921430706977844 2023-01-22 12:38:23.198962: step: 700/464, loss: 0.06986559927463531 2023-01-22 12:38:23.833130: step: 702/464, loss: 0.04618273675441742 2023-01-22 12:38:24.430130: step: 704/464, loss: 0.052316583693027496 2023-01-22 12:38:25.102252: step: 706/464, loss: 0.0034637299831956625 2023-01-22 12:38:25.711161: step: 708/464, loss: 0.14313380420207977 2023-01-22 12:38:26.299754: step: 710/464, loss: 0.0035805453080683947 2023-01-22 12:38:26.867880: step: 712/464, loss: 0.008872403763234615 2023-01-22 12:38:27.523956: step: 714/464, loss: 0.0516185462474823 2023-01-22 12:38:28.283872: step: 716/464, loss: 0.014751007780432701 2023-01-22 12:38:28.906205: step: 718/464, loss: 0.05501936003565788 2023-01-22 12:38:29.485065: step: 720/464, loss: 0.13606928288936615 2023-01-22 12:38:30.182752: step: 722/464, loss: 0.10207492858171463 2023-01-22 12:38:30.780632: step: 724/464, loss: 0.027091432362794876 2023-01-22 12:38:31.397528: step: 726/464, loss: 0.04187340661883354 2023-01-22 12:38:31.995860: step: 728/464, loss: 0.020310498774051666 2023-01-22 12:38:32.666477: step: 730/464, loss: 0.0079060522839427 2023-01-22 12:38:33.274842: step: 732/464, loss: 0.006408375222235918 2023-01-22 12:38:33.857319: step: 734/464, loss: 0.05852990970015526 2023-01-22 12:38:34.462502: step: 736/464, loss: 2.211858034133911 2023-01-22 12:38:35.103567: step: 738/464, loss: 0.07533301413059235 2023-01-22 12:38:35.800597: step: 740/464, loss: 0.0711047425866127 2023-01-22 12:38:36.446795: step: 742/464, loss: 0.02632574737071991 2023-01-22 12:38:37.108979: step: 744/464, loss: 0.011705012992024422 2023-01-22 12:38:37.747832: step: 746/464, loss: 0.032878875732421875 2023-01-22 12:38:38.353831: step: 748/464, loss: 0.0006193576846271753 2023-01-22 12:38:39.023144: step: 750/464, loss: 0.10979987680912018 2023-01-22 12:38:39.689261: step: 752/464, loss: 0.04243628680706024 2023-01-22 12:38:40.324547: step: 754/464, loss: 0.05799168720841408 2023-01-22 12:38:40.866674: step: 756/464, loss: 0.019762758165597916 2023-01-22 12:38:41.486442: step: 758/464, loss: 0.013637124560773373 2023-01-22 12:38:42.113857: step: 760/464, loss: 0.014164343476295471 2023-01-22 12:38:42.815195: step: 762/464, loss: 0.02152133919298649 2023-01-22 12:38:43.461015: step: 764/464, loss: 0.029147664085030556 2023-01-22 12:38:44.050392: step: 766/464, loss: 0.0378149151802063 2023-01-22 12:38:44.706794: step: 768/464, loss: 0.07730662822723389 2023-01-22 12:38:45.311582: step: 770/464, loss: 0.005782376509159803 2023-01-22 12:38:45.914648: step: 772/464, loss: 0.027559412643313408 2023-01-22 12:38:46.514431: step: 774/464, loss: 0.02795860543847084 2023-01-22 12:38:47.184447: step: 776/464, loss: 0.02722206711769104 2023-01-22 12:38:47.852774: step: 778/464, loss: 0.033019471913576126 2023-01-22 12:38:48.491006: step: 780/464, loss: 0.02790030650794506 2023-01-22 12:38:49.130501: step: 782/464, loss: 0.2831510901451111 2023-01-22 12:38:49.731082: step: 784/464, loss: 0.03222643956542015 2023-01-22 12:38:50.334898: step: 786/464, loss: 0.21533460915088654 2023-01-22 12:38:50.933557: step: 788/464, loss: 0.06409087032079697 2023-01-22 12:38:51.645624: step: 790/464, loss: 0.004153969697654247 2023-01-22 12:38:52.259866: step: 792/464, loss: 0.12987728416919708 2023-01-22 12:38:52.895493: step: 794/464, loss: 0.10713863372802734 2023-01-22 12:38:53.538098: step: 796/464, loss: 0.2646576166152954 2023-01-22 12:38:54.181745: step: 798/464, loss: 0.011718453839421272 2023-01-22 12:38:54.788090: step: 800/464, loss: 0.020255332812666893 2023-01-22 12:38:55.444106: step: 802/464, loss: 0.025662105530500412 2023-01-22 12:38:56.123116: step: 804/464, loss: 0.011995234526693821 2023-01-22 12:38:56.796621: step: 806/464, loss: 0.054274559020996094 2023-01-22 12:38:57.303753: step: 808/464, loss: 0.044902458786964417 2023-01-22 12:38:57.929290: step: 810/464, loss: 0.04050503671169281 2023-01-22 12:38:58.496680: step: 812/464, loss: 0.006103217601776123 2023-01-22 12:38:59.199993: step: 814/464, loss: 0.06136700510978699 2023-01-22 12:38:59.846526: step: 816/464, loss: 0.014977425336837769 2023-01-22 12:39:00.478134: step: 818/464, loss: 0.028046803548932076 2023-01-22 12:39:01.187982: step: 820/464, loss: 0.3037695586681366 2023-01-22 12:39:01.827310: step: 822/464, loss: 0.7152191996574402 2023-01-22 12:39:02.492168: step: 824/464, loss: 0.1377854347229004 2023-01-22 12:39:03.173702: step: 826/464, loss: 0.07740423083305359 2023-01-22 12:39:03.806791: step: 828/464, loss: 0.011711751110851765 2023-01-22 12:39:04.447577: step: 830/464, loss: 0.0030323874671012163 2023-01-22 12:39:05.047884: step: 832/464, loss: 0.04836704954504967 2023-01-22 12:39:05.611197: step: 834/464, loss: 0.0130617655813694 2023-01-22 12:39:06.223556: step: 836/464, loss: 0.015250151976943016 2023-01-22 12:39:06.850907: step: 838/464, loss: 0.008788347244262695 2023-01-22 12:39:07.464347: step: 840/464, loss: 0.056761596351861954 2023-01-22 12:39:08.086694: step: 842/464, loss: 0.0005920501425862312 2023-01-22 12:39:08.664495: step: 844/464, loss: 0.4124147295951843 2023-01-22 12:39:09.225086: step: 846/464, loss: 0.009998713620007038 2023-01-22 12:39:09.872283: step: 848/464, loss: 0.057369478046894073 2023-01-22 12:39:10.487199: step: 850/464, loss: 0.2438540756702423 2023-01-22 12:39:11.210144: step: 852/464, loss: 0.02027585729956627 2023-01-22 12:39:11.775808: step: 854/464, loss: 0.017415670678019524 2023-01-22 12:39:12.531047: step: 856/464, loss: 0.0011836671037599444 2023-01-22 12:39:13.151518: step: 858/464, loss: 0.010898541659116745 2023-01-22 12:39:13.705972: step: 860/464, loss: 0.053165942430496216 2023-01-22 12:39:14.302189: step: 862/464, loss: 0.1877359002828598 2023-01-22 12:39:14.956080: step: 864/464, loss: 0.06324882805347443 2023-01-22 12:39:15.649159: step: 866/464, loss: 0.03348288685083389 2023-01-22 12:39:16.351717: step: 868/464, loss: 0.12745660543441772 2023-01-22 12:39:16.882884: step: 870/464, loss: 0.050736475735902786 2023-01-22 12:39:17.563945: step: 872/464, loss: 0.011317925527691841 2023-01-22 12:39:18.235095: step: 874/464, loss: 0.04550086334347725 2023-01-22 12:39:18.950191: step: 876/464, loss: 0.12162181735038757 2023-01-22 12:39:19.545024: step: 878/464, loss: 0.05083238705992699 2023-01-22 12:39:20.165913: step: 880/464, loss: 0.025550205260515213 2023-01-22 12:39:20.841585: step: 882/464, loss: 0.020611297339200974 2023-01-22 12:39:21.557900: step: 884/464, loss: 0.15503253042697906 2023-01-22 12:39:22.189757: step: 886/464, loss: 0.060559242963790894 2023-01-22 12:39:22.867660: step: 888/464, loss: 0.04286627843976021 2023-01-22 12:39:23.505518: step: 890/464, loss: 0.30708542466163635 2023-01-22 12:39:24.105310: step: 892/464, loss: 0.08181405812501907 2023-01-22 12:39:24.720248: step: 894/464, loss: 0.02677612379193306 2023-01-22 12:39:25.309587: step: 896/464, loss: 0.04505239799618721 2023-01-22 12:39:25.912976: step: 898/464, loss: 0.08125241100788116 2023-01-22 12:39:26.586352: step: 900/464, loss: 0.03448351100087166 2023-01-22 12:39:27.231765: step: 902/464, loss: 0.028867723420262337 2023-01-22 12:39:27.929498: step: 904/464, loss: 0.1862575262784958 2023-01-22 12:39:28.557566: step: 906/464, loss: 0.06476181745529175 2023-01-22 12:39:29.174660: step: 908/464, loss: 0.015432149171829224 2023-01-22 12:39:29.816847: step: 910/464, loss: 0.00814574584364891 2023-01-22 12:39:30.392103: step: 912/464, loss: 0.007910625077784061 2023-01-22 12:39:31.037236: step: 914/464, loss: 0.035133056342601776 2023-01-22 12:39:31.668665: step: 916/464, loss: 0.009897212497889996 2023-01-22 12:39:32.278652: step: 918/464, loss: 0.0011734727304428816 2023-01-22 12:39:32.876285: step: 920/464, loss: 0.024619368836283684 2023-01-22 12:39:33.472201: step: 922/464, loss: 0.023495979607105255 2023-01-22 12:39:34.066121: step: 924/464, loss: 0.03111870028078556 2023-01-22 12:39:34.729001: step: 926/464, loss: 0.06664323806762695 2023-01-22 12:39:36.035786: step: 928/464, loss: 0.05593162775039673 2023-01-22 12:39:36.499079: step: 930/464, loss: 0.020524058490991592 ================================================== Loss: 0.090 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30139749262536875, 'r': 0.3231301391524352, 'f1': 0.3118856837606838}, 'combined': 0.22981050382366175, 'epoch': 23} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.28545016089836045, 'r': 0.30091532113068664, 'f1': 0.2929787972385496}, 'combined': 0.19127113187594427, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29742379753914994, 'r': 0.3363654332700823, 'f1': 0.3156982784208965}, 'combined': 0.23261978409960793, 'epoch': 23} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30101620081007097, 'r': 0.30433623243665264, 'f1': 0.3026671123318812}, 'combined': 0.19759614587469965, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3111888674388675, 'r': 0.3277226213065872, 'f1': 0.31924181410087143}, 'combined': 0.23523081039011579, 'epoch': 23} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30653112652131626, 'r': 0.30174597578590545, 'f1': 0.30411972941309673}, 'combined': 0.19854448655984552, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20676100628930816, 'r': 0.31309523809523804, 'f1': 0.24905303030303022}, 'combined': 0.16603535353535348, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.27439024390243905, 'r': 0.4891304347826087, 'f1': 0.35156250000000006}, 'combined': 0.17578125000000003, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.3017241379310345, 'f1': 0.35}, 'combined': 0.2333333333333333, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951249161636486, 'r': 0.3578459609650312, 'f1': 0.3234731070815977}, 'combined': 0.23834860521801934, 'epoch': 22} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29521273154881944, 'r': 0.3185261336729686, 'f1': 0.30642664272956077}, 'combined': 0.20005055432085314, 'epoch': 22} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 22} ****************************** Epoch: 24 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:42:14.002222: step: 2/464, loss: 0.002368804533034563 2023-01-22 12:42:14.593870: step: 4/464, loss: 0.050723202526569366 2023-01-22 12:42:15.190824: step: 6/464, loss: 0.05543776601552963 2023-01-22 12:42:15.708740: step: 8/464, loss: 0.02150196023285389 2023-01-22 12:42:16.367438: step: 10/464, loss: 0.07164845615625381 2023-01-22 12:42:17.002551: step: 12/464, loss: 0.007598825264722109 2023-01-22 12:42:17.634702: step: 14/464, loss: 0.002054923679679632 2023-01-22 12:42:18.254619: step: 16/464, loss: 0.06091110408306122 2023-01-22 12:42:18.917062: step: 18/464, loss: 0.255815327167511 2023-01-22 12:42:19.555813: step: 20/464, loss: 0.00489705428481102 2023-01-22 12:42:20.191586: step: 22/464, loss: 0.006008407566696405 2023-01-22 12:42:20.801477: step: 24/464, loss: 0.022913167253136635 2023-01-22 12:42:21.418388: step: 26/464, loss: 0.5145890712738037 2023-01-22 12:42:22.097048: step: 28/464, loss: 0.04759300500154495 2023-01-22 12:42:22.820514: step: 30/464, loss: 0.02373594418168068 2023-01-22 12:42:23.424744: step: 32/464, loss: 0.0328323096036911 2023-01-22 12:42:24.035430: step: 34/464, loss: 0.07039014995098114 2023-01-22 12:42:24.687660: step: 36/464, loss: 0.009671245701611042 2023-01-22 12:42:25.262682: step: 38/464, loss: 0.013924448750913143 2023-01-22 12:42:25.910626: step: 40/464, loss: 0.16166502237319946 2023-01-22 12:42:26.555826: step: 42/464, loss: 0.08743336796760559 2023-01-22 12:42:27.113349: step: 44/464, loss: 0.017455067485570908 2023-01-22 12:42:27.789294: step: 46/464, loss: 0.05810796469449997 2023-01-22 12:42:28.388635: step: 48/464, loss: 0.3676496744155884 2023-01-22 12:42:28.969134: step: 50/464, loss: 0.02534743957221508 2023-01-22 12:42:29.588430: step: 52/464, loss: 0.024781718850135803 2023-01-22 12:42:30.228495: step: 54/464, loss: 0.04081379622220993 2023-01-22 12:42:30.845423: step: 56/464, loss: 0.013650135137140751 2023-01-22 12:42:31.425911: step: 58/464, loss: 0.003687690244987607 2023-01-22 12:42:32.069491: step: 60/464, loss: 0.05775059387087822 2023-01-22 12:42:32.724634: step: 62/464, loss: 0.009221754968166351 2023-01-22 12:42:33.322732: step: 64/464, loss: 0.011974228546023369 2023-01-22 12:42:33.929119: step: 66/464, loss: 0.004298684652894735 2023-01-22 12:42:34.634389: step: 68/464, loss: 0.02135203778743744 2023-01-22 12:42:35.264540: step: 70/464, loss: 0.0393330343067646 2023-01-22 12:42:35.913338: step: 72/464, loss: 0.02525671012699604 2023-01-22 12:42:36.471344: step: 74/464, loss: 0.009668112732470036 2023-01-22 12:42:37.013442: step: 76/464, loss: 0.01860128343105316 2023-01-22 12:42:37.682387: step: 78/464, loss: 0.33107542991638184 2023-01-22 12:42:38.299772: step: 80/464, loss: 0.030080964788794518 2023-01-22 12:42:38.978656: step: 82/464, loss: 0.07465073466300964 2023-01-22 12:42:39.725358: step: 84/464, loss: 0.018534686416387558 2023-01-22 12:42:40.312857: step: 86/464, loss: 0.030880525708198547 2023-01-22 12:42:40.937961: step: 88/464, loss: 0.022276412695646286 2023-01-22 12:42:41.559860: step: 90/464, loss: 0.0007963493699207902 2023-01-22 12:42:42.156794: step: 92/464, loss: 0.06814160197973251 2023-01-22 12:42:42.758360: step: 94/464, loss: 0.3641512989997864 2023-01-22 12:42:43.319195: step: 96/464, loss: 0.05214928463101387 2023-01-22 12:42:43.873535: step: 98/464, loss: 0.016802603378891945 2023-01-22 12:42:44.495527: step: 100/464, loss: 0.14683029055595398 2023-01-22 12:42:45.186940: step: 102/464, loss: 0.02423812262713909 2023-01-22 12:42:45.844129: step: 104/464, loss: 0.014936227351427078 2023-01-22 12:42:46.447974: step: 106/464, loss: 0.04058907553553581 2023-01-22 12:42:47.061540: step: 108/464, loss: 0.10261441022157669 2023-01-22 12:42:47.652610: step: 110/464, loss: 0.035076435655355453 2023-01-22 12:42:48.267395: step: 112/464, loss: 0.37915679812431335 2023-01-22 12:42:48.966429: step: 114/464, loss: 0.34553590416908264 2023-01-22 12:42:49.557316: step: 116/464, loss: 0.019644953310489655 2023-01-22 12:42:50.178894: step: 118/464, loss: 0.047253821045160294 2023-01-22 12:42:50.837547: step: 120/464, loss: 0.018846411257982254 2023-01-22 12:42:51.428239: step: 122/464, loss: 0.04492904245853424 2023-01-22 12:42:52.046368: step: 124/464, loss: 0.007162538822740316 2023-01-22 12:42:52.683731: step: 126/464, loss: 0.02271394059062004 2023-01-22 12:42:53.325121: step: 128/464, loss: 0.014112776145339012 2023-01-22 12:42:53.960519: step: 130/464, loss: 0.010637849569320679 2023-01-22 12:42:54.635507: step: 132/464, loss: 0.04476896673440933 2023-01-22 12:42:55.194923: step: 134/464, loss: 0.0009231481235474348 2023-01-22 12:42:55.774696: step: 136/464, loss: 0.022022517397999763 2023-01-22 12:42:56.417005: step: 138/464, loss: 0.767135739326477 2023-01-22 12:42:57.028408: step: 140/464, loss: 0.005619126372039318 2023-01-22 12:42:57.644978: step: 142/464, loss: 0.02571299485862255 2023-01-22 12:42:58.275158: step: 144/464, loss: 0.09120732545852661 2023-01-22 12:42:58.922130: step: 146/464, loss: 0.016840722411870956 2023-01-22 12:42:59.548946: step: 148/464, loss: 0.012031922116875648 2023-01-22 12:43:00.277577: step: 150/464, loss: 0.03083711303770542 2023-01-22 12:43:00.965691: step: 152/464, loss: 0.026089321821928024 2023-01-22 12:43:01.606331: step: 154/464, loss: 0.10427644103765488 2023-01-22 12:43:02.218412: step: 156/464, loss: 0.04840332269668579 2023-01-22 12:43:02.860250: step: 158/464, loss: 0.06633774936199188 2023-01-22 12:43:03.452328: step: 160/464, loss: 0.02363380417227745 2023-01-22 12:43:04.059047: step: 162/464, loss: 0.012433268129825592 2023-01-22 12:43:04.667858: step: 164/464, loss: 0.0023826195392757654 2023-01-22 12:43:05.388080: step: 166/464, loss: 0.8820675015449524 2023-01-22 12:43:05.995376: step: 168/464, loss: 0.010968861170113087 2023-01-22 12:43:06.675552: step: 170/464, loss: 0.049012307077646255 2023-01-22 12:43:07.256228: step: 172/464, loss: 0.0016267724568024278 2023-01-22 12:43:07.883218: step: 174/464, loss: 0.08859127759933472 2023-01-22 12:43:08.566645: step: 176/464, loss: 0.08113919198513031 2023-01-22 12:43:09.166026: step: 178/464, loss: 0.2307984083890915 2023-01-22 12:43:09.792180: step: 180/464, loss: 0.017306754365563393 2023-01-22 12:43:10.348751: step: 182/464, loss: 0.03797895461320877 2023-01-22 12:43:10.983444: step: 184/464, loss: 0.034086208790540695 2023-01-22 12:43:11.617805: step: 186/464, loss: 0.01586483046412468 2023-01-22 12:43:12.357515: step: 188/464, loss: 0.5600696802139282 2023-01-22 12:43:12.979820: step: 190/464, loss: 0.048893045634031296 2023-01-22 12:43:13.534063: step: 192/464, loss: 0.0026721959002316 2023-01-22 12:43:14.174206: step: 194/464, loss: 0.05101538822054863 2023-01-22 12:43:14.822682: step: 196/464, loss: 0.026031123474240303 2023-01-22 12:43:15.564206: step: 198/464, loss: 0.05834761634469032 2023-01-22 12:43:16.171562: step: 200/464, loss: 0.008085883222520351 2023-01-22 12:43:16.822972: step: 202/464, loss: 0.0739692896604538 2023-01-22 12:43:17.441221: step: 204/464, loss: 0.00796657893806696 2023-01-22 12:43:18.087396: step: 206/464, loss: 0.009787347167730331 2023-01-22 12:43:18.736341: step: 208/464, loss: 0.05727081000804901 2023-01-22 12:43:19.399878: step: 210/464, loss: 0.03274077922105789 2023-01-22 12:43:19.975529: step: 212/464, loss: 0.05570128932595253 2023-01-22 12:43:20.655150: step: 214/464, loss: 0.8114643096923828 2023-01-22 12:43:21.194197: step: 216/464, loss: 0.0107719199731946 2023-01-22 12:43:21.804061: step: 218/464, loss: 0.006827626843005419 2023-01-22 12:43:22.464106: step: 220/464, loss: 0.006962248589843512 2023-01-22 12:43:23.030060: step: 222/464, loss: 0.00787719339132309 2023-01-22 12:43:23.655111: step: 224/464, loss: 0.049737539142370224 2023-01-22 12:43:24.329827: step: 226/464, loss: 0.06598670035600662 2023-01-22 12:43:24.918925: step: 228/464, loss: 0.021805718541145325 2023-01-22 12:43:25.535458: step: 230/464, loss: 0.04670698195695877 2023-01-22 12:43:26.110779: step: 232/464, loss: 0.00791192427277565 2023-01-22 12:43:26.743350: step: 234/464, loss: 0.003434679936617613 2023-01-22 12:43:27.433797: step: 236/464, loss: 0.07541213184595108 2023-01-22 12:43:28.162772: step: 238/464, loss: 0.037969619035720825 2023-01-22 12:43:28.892381: step: 240/464, loss: 0.06740114837884903 2023-01-22 12:43:29.494778: step: 242/464, loss: 0.030239589512348175 2023-01-22 12:43:30.130559: step: 244/464, loss: 0.025601763278245926 2023-01-22 12:43:30.715339: step: 246/464, loss: 0.017570916563272476 2023-01-22 12:43:31.318439: step: 248/464, loss: 0.10075247287750244 2023-01-22 12:43:31.932686: step: 250/464, loss: 0.030370311811566353 2023-01-22 12:43:32.553395: step: 252/464, loss: 0.6089963912963867 2023-01-22 12:43:33.137750: step: 254/464, loss: 0.02403067983686924 2023-01-22 12:43:33.758486: step: 256/464, loss: 0.09271278232336044 2023-01-22 12:43:34.402185: step: 258/464, loss: 0.07433987408876419 2023-01-22 12:43:35.101515: step: 260/464, loss: 0.08124585449695587 2023-01-22 12:43:35.791437: step: 262/464, loss: 0.011426866054534912 2023-01-22 12:43:36.474274: step: 264/464, loss: 0.289348304271698 2023-01-22 12:43:37.092175: step: 266/464, loss: 0.025414174422621727 2023-01-22 12:43:37.737293: step: 268/464, loss: 0.5135270953178406 2023-01-22 12:43:38.370115: step: 270/464, loss: 0.008043395355343819 2023-01-22 12:43:39.026885: step: 272/464, loss: 0.005079520400613546 2023-01-22 12:43:39.620546: step: 274/464, loss: 0.22397971153259277 2023-01-22 12:43:40.242281: step: 276/464, loss: 0.019696544855833054 2023-01-22 12:43:40.873636: step: 278/464, loss: 0.004111408721655607 2023-01-22 12:43:41.577021: step: 280/464, loss: 0.08541527390480042 2023-01-22 12:43:42.233887: step: 282/464, loss: 0.06891372799873352 2023-01-22 12:43:42.840679: step: 284/464, loss: 0.024619489908218384 2023-01-22 12:43:43.466849: step: 286/464, loss: 0.20153610408306122 2023-01-22 12:43:44.201660: step: 288/464, loss: 0.057165857404470444 2023-01-22 12:43:44.916504: step: 290/464, loss: 0.007611685432493687 2023-01-22 12:43:45.536518: step: 292/464, loss: 0.05858425796031952 2023-01-22 12:43:46.144362: step: 294/464, loss: 0.03117351420223713 2023-01-22 12:43:46.728388: step: 296/464, loss: 0.01491332147270441 2023-01-22 12:43:47.344960: step: 298/464, loss: 0.00705760857090354 2023-01-22 12:43:47.967565: step: 300/464, loss: 0.001687489217147231 2023-01-22 12:43:48.583126: step: 302/464, loss: 0.014096073806285858 2023-01-22 12:43:49.216376: step: 304/464, loss: 0.027246735990047455 2023-01-22 12:43:49.866625: step: 306/464, loss: 0.009920596145093441 2023-01-22 12:43:50.542484: step: 308/464, loss: 0.2069355994462967 2023-01-22 12:43:51.154325: step: 310/464, loss: 0.09881345927715302 2023-01-22 12:43:51.796870: step: 312/464, loss: 0.037157244980335236 2023-01-22 12:43:52.418718: step: 314/464, loss: 0.01551311369985342 2023-01-22 12:43:53.049742: step: 316/464, loss: 0.014572424814105034 2023-01-22 12:43:53.688294: step: 318/464, loss: 0.022611740976572037 2023-01-22 12:43:54.333621: step: 320/464, loss: 0.07246682792901993 2023-01-22 12:43:54.968668: step: 322/464, loss: 0.05923476070165634 2023-01-22 12:43:55.594785: step: 324/464, loss: 0.020231883972883224 2023-01-22 12:43:56.205757: step: 326/464, loss: 0.4922608733177185 2023-01-22 12:43:56.831037: step: 328/464, loss: 0.09566665440797806 2023-01-22 12:43:57.452094: step: 330/464, loss: 0.15764565765857697 2023-01-22 12:43:58.045042: step: 332/464, loss: 0.003560718148946762 2023-01-22 12:43:58.657114: step: 334/464, loss: 0.036379773169755936 2023-01-22 12:43:59.233734: step: 336/464, loss: 0.0053924997337162495 2023-01-22 12:43:59.850219: step: 338/464, loss: 0.06640272587537766 2023-01-22 12:44:00.406673: step: 340/464, loss: 0.012376081198453903 2023-01-22 12:44:01.026860: step: 342/464, loss: 0.1460789442062378 2023-01-22 12:44:01.680084: step: 344/464, loss: 0.045234907418489456 2023-01-22 12:44:02.274317: step: 346/464, loss: 0.03875354677438736 2023-01-22 12:44:02.867673: step: 348/464, loss: 0.014639015309512615 2023-01-22 12:44:03.521500: step: 350/464, loss: 0.03967462480068207 2023-01-22 12:44:04.143987: step: 352/464, loss: 0.00846176128834486 2023-01-22 12:44:04.815484: step: 354/464, loss: 0.08720821887254715 2023-01-22 12:44:05.488634: step: 356/464, loss: 0.05173289775848389 2023-01-22 12:44:06.110449: step: 358/464, loss: 0.12688124179840088 2023-01-22 12:44:06.747398: step: 360/464, loss: 0.07991411536931992 2023-01-22 12:44:07.328019: step: 362/464, loss: 0.004395974334329367 2023-01-22 12:44:07.962713: step: 364/464, loss: 0.0535275898873806 2023-01-22 12:44:08.576542: step: 366/464, loss: 0.032049063593149185 2023-01-22 12:44:09.197634: step: 368/464, loss: 0.04657631739974022 2023-01-22 12:44:09.843383: step: 370/464, loss: 0.22765032947063446 2023-01-22 12:44:10.481076: step: 372/464, loss: 0.1500069946050644 2023-01-22 12:44:11.037613: step: 374/464, loss: 0.2392122894525528 2023-01-22 12:44:11.634039: step: 376/464, loss: 0.0341184064745903 2023-01-22 12:44:12.246425: step: 378/464, loss: 0.14299477636814117 2023-01-22 12:44:12.955349: step: 380/464, loss: 0.030992744490504265 2023-01-22 12:44:13.577823: step: 382/464, loss: 0.07625354826450348 2023-01-22 12:44:14.179507: step: 384/464, loss: 0.004008305259048939 2023-01-22 12:44:14.868963: step: 386/464, loss: 0.054362375289201736 2023-01-22 12:44:15.554165: step: 388/464, loss: 0.029171712696552277 2023-01-22 12:44:16.209126: step: 390/464, loss: 0.15573173761367798 2023-01-22 12:44:16.868814: step: 392/464, loss: 0.002424610545858741 2023-01-22 12:44:17.492013: step: 394/464, loss: 0.09689239412546158 2023-01-22 12:44:18.062695: step: 396/464, loss: 0.012249691411852837 2023-01-22 12:44:18.703762: step: 398/464, loss: 0.07477396726608276 2023-01-22 12:44:19.389372: step: 400/464, loss: 0.0323805995285511 2023-01-22 12:44:19.986689: step: 402/464, loss: 0.03978384658694267 2023-01-22 12:44:20.563964: step: 404/464, loss: 0.10091626644134521 2023-01-22 12:44:21.201297: step: 406/464, loss: 0.022926170378923416 2023-01-22 12:44:21.782751: step: 408/464, loss: 0.005678442306816578 2023-01-22 12:44:22.441744: step: 410/464, loss: 0.03556925430893898 2023-01-22 12:44:23.017869: step: 412/464, loss: 0.07874489575624466 2023-01-22 12:44:23.680614: step: 414/464, loss: 0.008459270000457764 2023-01-22 12:44:24.281424: step: 416/464, loss: 0.04491732642054558 2023-01-22 12:44:24.990572: step: 418/464, loss: 0.009785176254808903 2023-01-22 12:44:25.670753: step: 420/464, loss: 0.022729527205228806 2023-01-22 12:44:26.311839: step: 422/464, loss: 0.03167550638318062 2023-01-22 12:44:26.930199: step: 424/464, loss: 0.012540756724774837 2023-01-22 12:44:27.566319: step: 426/464, loss: 0.13927604258060455 2023-01-22 12:44:28.154034: step: 428/464, loss: 0.020264718681573868 2023-01-22 12:44:28.735208: step: 430/464, loss: 0.0176665261387825 2023-01-22 12:44:29.399254: step: 432/464, loss: 0.050978146493434906 2023-01-22 12:44:30.067256: step: 434/464, loss: 0.03432874381542206 2023-01-22 12:44:30.666142: step: 436/464, loss: 0.2599300444126129 2023-01-22 12:44:31.239032: step: 438/464, loss: 0.03393875062465668 2023-01-22 12:44:31.833023: step: 440/464, loss: 0.010586414486169815 2023-01-22 12:44:32.444736: step: 442/464, loss: 0.09786012023687363 2023-01-22 12:44:32.999842: step: 444/464, loss: 0.020368900150060654 2023-01-22 12:44:33.632518: step: 446/464, loss: 0.04849978908896446 2023-01-22 12:44:34.247124: step: 448/464, loss: 0.06933494657278061 2023-01-22 12:44:34.854204: step: 450/464, loss: 0.0121515654027462 2023-01-22 12:44:35.472491: step: 452/464, loss: 0.2773245573043823 2023-01-22 12:44:36.153317: step: 454/464, loss: 0.021812712773680687 2023-01-22 12:44:36.746935: step: 456/464, loss: 0.04217422008514404 2023-01-22 12:44:37.382715: step: 458/464, loss: 0.09407848864793777 2023-01-22 12:44:37.998824: step: 460/464, loss: 0.0038906384725123644 2023-01-22 12:44:38.709334: step: 462/464, loss: 0.023373616859316826 2023-01-22 12:44:39.367997: step: 464/464, loss: 0.0807759091258049 2023-01-22 12:44:39.978406: step: 466/464, loss: 0.0077609531581401825 2023-01-22 12:44:40.670684: step: 468/464, loss: 0.14945146441459656 2023-01-22 12:44:41.409691: step: 470/464, loss: 0.03414865955710411 2023-01-22 12:44:42.054783: step: 472/464, loss: 0.04370049387216568 2023-01-22 12:44:42.657961: step: 474/464, loss: 0.02193155698478222 2023-01-22 12:44:43.277549: step: 476/464, loss: 0.039664387702941895 2023-01-22 12:44:43.950651: step: 478/464, loss: 0.014600432477891445 2023-01-22 12:44:44.567349: step: 480/464, loss: 0.02662612497806549 2023-01-22 12:44:45.186670: step: 482/464, loss: 0.00013828226656187326 2023-01-22 12:44:45.808357: step: 484/464, loss: 0.43025022745132446 2023-01-22 12:44:46.376624: step: 486/464, loss: 0.019535545259714127 2023-01-22 12:44:46.917048: step: 488/464, loss: 0.010172214359045029 2023-01-22 12:44:47.489048: step: 490/464, loss: 0.017740504816174507 2023-01-22 12:44:48.135052: step: 492/464, loss: 0.007563222665339708 2023-01-22 12:44:48.756636: step: 494/464, loss: 0.02568977326154709 2023-01-22 12:44:49.384650: step: 496/464, loss: 0.020611144602298737 2023-01-22 12:44:50.001021: step: 498/464, loss: 0.1862446367740631 2023-01-22 12:44:50.614935: step: 500/464, loss: 0.039029985666275024 2023-01-22 12:44:51.221813: step: 502/464, loss: 0.09535839408636093 2023-01-22 12:44:51.816118: step: 504/464, loss: 0.05537525564432144 2023-01-22 12:44:52.425896: step: 506/464, loss: 0.07750478386878967 2023-01-22 12:44:53.088055: step: 508/464, loss: 0.04838641732931137 2023-01-22 12:44:53.744913: step: 510/464, loss: 0.04079439863562584 2023-01-22 12:44:54.381553: step: 512/464, loss: 0.0039293644949793816 2023-01-22 12:44:54.997780: step: 514/464, loss: 0.0030393495690077543 2023-01-22 12:44:55.650482: step: 516/464, loss: 0.06650812178850174 2023-01-22 12:44:56.280859: step: 518/464, loss: 0.01758035458624363 2023-01-22 12:44:56.914882: step: 520/464, loss: 0.025199897587299347 2023-01-22 12:44:57.511828: step: 522/464, loss: 0.03755476325750351 2023-01-22 12:44:58.179075: step: 524/464, loss: 0.02975156530737877 2023-01-22 12:44:58.815552: step: 526/464, loss: 0.009417744353413582 2023-01-22 12:44:59.348324: step: 528/464, loss: 0.00026950312894769013 2023-01-22 12:44:59.957219: step: 530/464, loss: 0.044083017855882645 2023-01-22 12:45:00.553034: step: 532/464, loss: 0.06772328168153763 2023-01-22 12:45:01.190934: step: 534/464, loss: 0.024661093950271606 2023-01-22 12:45:01.815241: step: 536/464, loss: 0.12543563544750214 2023-01-22 12:45:02.435585: step: 538/464, loss: 0.016915543004870415 2023-01-22 12:45:03.076980: step: 540/464, loss: 0.005815993528813124 2023-01-22 12:45:03.678148: step: 542/464, loss: 0.023189205676317215 2023-01-22 12:45:04.312883: step: 544/464, loss: 0.02398007921874523 2023-01-22 12:45:04.993396: step: 546/464, loss: 0.04083281382918358 2023-01-22 12:45:05.640475: step: 548/464, loss: 0.0018307537538930774 2023-01-22 12:45:06.283254: step: 550/464, loss: 0.003622284159064293 2023-01-22 12:45:06.904553: step: 552/464, loss: 0.0152712631970644 2023-01-22 12:45:07.558279: step: 554/464, loss: 0.01601882465183735 2023-01-22 12:45:08.217962: step: 556/464, loss: 0.10541737824678421 2023-01-22 12:45:08.806636: step: 558/464, loss: 0.45145365595817566 2023-01-22 12:45:09.454428: step: 560/464, loss: 0.007181126624345779 2023-01-22 12:45:10.085301: step: 562/464, loss: 0.008781393989920616 2023-01-22 12:45:10.769395: step: 564/464, loss: 0.10936623066663742 2023-01-22 12:45:11.286602: step: 566/464, loss: 0.13772058486938477 2023-01-22 12:45:11.980341: step: 568/464, loss: 0.00794376153498888 2023-01-22 12:45:12.668306: step: 570/464, loss: 0.020956581458449364 2023-01-22 12:45:13.302265: step: 572/464, loss: 0.02092120237648487 2023-01-22 12:45:13.955146: step: 574/464, loss: 0.07100170105695724 2023-01-22 12:45:14.575005: step: 576/464, loss: 0.25848352909088135 2023-01-22 12:45:15.164145: step: 578/464, loss: 0.07166639715433121 2023-01-22 12:45:15.728251: step: 580/464, loss: 0.025764163583517075 2023-01-22 12:45:16.359071: step: 582/464, loss: 0.46929800510406494 2023-01-22 12:45:16.980194: step: 584/464, loss: 0.02144567295908928 2023-01-22 12:45:17.567375: step: 586/464, loss: 0.003281695768237114 2023-01-22 12:45:18.171907: step: 588/464, loss: 0.03542201966047287 2023-01-22 12:45:18.801551: step: 590/464, loss: 0.018880341202020645 2023-01-22 12:45:19.462066: step: 592/464, loss: 0.04871486499905586 2023-01-22 12:45:20.030736: step: 594/464, loss: 0.0028785879258066416 2023-01-22 12:45:20.598533: step: 596/464, loss: 0.007425788324326277 2023-01-22 12:45:21.183879: step: 598/464, loss: 0.008637347258627415 2023-01-22 12:45:21.803802: step: 600/464, loss: 0.022225894033908844 2023-01-22 12:45:22.421689: step: 602/464, loss: 0.04577256366610527 2023-01-22 12:45:23.113797: step: 604/464, loss: 0.03168138116598129 2023-01-22 12:45:23.725563: step: 606/464, loss: 0.04955857992172241 2023-01-22 12:45:24.332187: step: 608/464, loss: 0.04444937780499458 2023-01-22 12:45:25.039763: step: 610/464, loss: 0.061654992401599884 2023-01-22 12:45:25.651684: step: 612/464, loss: 0.12426318973302841 2023-01-22 12:45:26.266772: step: 614/464, loss: 0.02370787225663662 2023-01-22 12:45:26.883607: step: 616/464, loss: 0.029241489246487617 2023-01-22 12:45:27.499186: step: 618/464, loss: 0.016876980662345886 2023-01-22 12:45:28.134745: step: 620/464, loss: 0.12833718955516815 2023-01-22 12:45:28.846384: step: 622/464, loss: 0.009948083199560642 2023-01-22 12:45:29.455921: step: 624/464, loss: 0.0030475077219307423 2023-01-22 12:45:30.019631: step: 626/464, loss: 0.028402313590049744 2023-01-22 12:45:30.671672: step: 628/464, loss: 0.003545090788975358 2023-01-22 12:45:31.275561: step: 630/464, loss: 0.016871029511094093 2023-01-22 12:45:31.915887: step: 632/464, loss: 0.013180889189243317 2023-01-22 12:45:32.483735: step: 634/464, loss: 0.012245554476976395 2023-01-22 12:45:33.120166: step: 636/464, loss: 0.2753337025642395 2023-01-22 12:45:33.768678: step: 638/464, loss: 0.0011746642412617803 2023-01-22 12:45:34.345266: step: 640/464, loss: 0.014507484622299671 2023-01-22 12:45:34.881588: step: 642/464, loss: 0.030890408903360367 2023-01-22 12:45:35.441966: step: 644/464, loss: 0.011430202051997185 2023-01-22 12:45:36.071886: step: 646/464, loss: 0.01833469420671463 2023-01-22 12:45:36.656043: step: 648/464, loss: 0.017186565324664116 2023-01-22 12:45:37.225271: step: 650/464, loss: 0.008223423734307289 2023-01-22 12:45:37.845256: step: 652/464, loss: 0.13581836223602295 2023-01-22 12:45:38.460569: step: 654/464, loss: 0.07392071187496185 2023-01-22 12:45:39.129184: step: 656/464, loss: 0.02771533839404583 2023-01-22 12:45:39.813608: step: 658/464, loss: 0.02668609656393528 2023-01-22 12:45:40.423170: step: 660/464, loss: 0.004450938664376736 2023-01-22 12:45:41.068380: step: 662/464, loss: 0.18493559956550598 2023-01-22 12:45:41.642176: step: 664/464, loss: 0.004563276655972004 2023-01-22 12:45:42.255115: step: 666/464, loss: 0.03617537021636963 2023-01-22 12:45:42.873050: step: 668/464, loss: 0.017863882705569267 2023-01-22 12:45:43.551224: step: 670/464, loss: 0.015972094610333443 2023-01-22 12:45:44.115572: step: 672/464, loss: 0.017575904726982117 2023-01-22 12:45:44.703469: step: 674/464, loss: 0.05038394406437874 2023-01-22 12:45:45.340880: step: 676/464, loss: 0.0042044371366500854 2023-01-22 12:45:45.966115: step: 678/464, loss: 0.01900422014296055 2023-01-22 12:45:46.572988: step: 680/464, loss: 0.004950101021677256 2023-01-22 12:45:47.213872: step: 682/464, loss: 0.04155075177550316 2023-01-22 12:45:47.898743: step: 684/464, loss: 0.02320196107029915 2023-01-22 12:45:48.563668: step: 686/464, loss: 0.016851622611284256 2023-01-22 12:45:49.181022: step: 688/464, loss: 0.025884533300995827 2023-01-22 12:45:49.814268: step: 690/464, loss: 0.0692359209060669 2023-01-22 12:45:50.419934: step: 692/464, loss: 0.01677657850086689 2023-01-22 12:45:51.039441: step: 694/464, loss: 0.02548445202410221 2023-01-22 12:45:51.669625: step: 696/464, loss: 0.03491387888789177 2023-01-22 12:45:52.322987: step: 698/464, loss: 0.04632949456572533 2023-01-22 12:45:53.011747: step: 700/464, loss: 0.12078473716974258 2023-01-22 12:45:53.555344: step: 702/464, loss: 0.016643565148115158 2023-01-22 12:45:54.182999: step: 704/464, loss: 0.06165686622262001 2023-01-22 12:45:54.720677: step: 706/464, loss: 0.01805802620947361 2023-01-22 12:45:55.352235: step: 708/464, loss: 0.06611974537372589 2023-01-22 12:45:55.955133: step: 710/464, loss: 0.047510262578725815 2023-01-22 12:45:56.619245: step: 712/464, loss: 0.07157109677791595 2023-01-22 12:45:57.251963: step: 714/464, loss: 0.06719717383384705 2023-01-22 12:45:57.920496: step: 716/464, loss: 0.2461855709552765 2023-01-22 12:45:58.525432: step: 718/464, loss: 0.018671412020921707 2023-01-22 12:45:59.154037: step: 720/464, loss: 0.02307756617665291 2023-01-22 12:45:59.813915: step: 722/464, loss: 0.016722489148378372 2023-01-22 12:46:00.421779: step: 724/464, loss: 1.2357501983642578 2023-01-22 12:46:01.064398: step: 726/464, loss: 0.013528386130928993 2023-01-22 12:46:01.661393: step: 728/464, loss: 0.12979376316070557 2023-01-22 12:46:02.243813: step: 730/464, loss: 0.05797789990901947 2023-01-22 12:46:02.874097: step: 732/464, loss: 0.057885877788066864 2023-01-22 12:46:03.447315: step: 734/464, loss: 0.004529331810772419 2023-01-22 12:46:04.090943: step: 736/464, loss: 0.018771404400467873 2023-01-22 12:46:04.704232: step: 738/464, loss: 0.015658782795071602 2023-01-22 12:46:05.375637: step: 740/464, loss: 0.01374346949160099 2023-01-22 12:46:05.980167: step: 742/464, loss: 0.005287653300911188 2023-01-22 12:46:06.551671: step: 744/464, loss: 0.050519898533821106 2023-01-22 12:46:07.118586: step: 746/464, loss: 0.010387287475168705 2023-01-22 12:46:07.702863: step: 748/464, loss: 0.021677561104297638 2023-01-22 12:46:08.297591: step: 750/464, loss: 0.009915663860738277 2023-01-22 12:46:08.951474: step: 752/464, loss: 0.015420086681842804 2023-01-22 12:46:09.601683: step: 754/464, loss: 0.11248577386140823 2023-01-22 12:46:10.219156: step: 756/464, loss: 0.039718352258205414 2023-01-22 12:46:10.824156: step: 758/464, loss: 0.006666975561529398 2023-01-22 12:46:11.396073: step: 760/464, loss: 0.04726141691207886 2023-01-22 12:46:12.063996: step: 762/464, loss: 0.0873069241642952 2023-01-22 12:46:12.672403: step: 764/464, loss: 0.061604391783475876 2023-01-22 12:46:13.411416: step: 766/464, loss: 0.01452117133885622 2023-01-22 12:46:14.091103: step: 768/464, loss: 0.02040213905274868 2023-01-22 12:46:14.719132: step: 770/464, loss: 0.08517874032258987 2023-01-22 12:46:15.343553: step: 772/464, loss: 0.011451417580246925 2023-01-22 12:46:16.033972: step: 774/464, loss: 0.00817059725522995 2023-01-22 12:46:16.660497: step: 776/464, loss: 0.020757826045155525 2023-01-22 12:46:17.282018: step: 778/464, loss: 0.01110632810741663 2023-01-22 12:46:17.871757: step: 780/464, loss: 0.126845121383667 2023-01-22 12:46:18.513208: step: 782/464, loss: 0.07332803308963776 2023-01-22 12:46:19.106141: step: 784/464, loss: 0.022527750581502914 2023-01-22 12:46:19.728040: step: 786/464, loss: 0.05135509744286537 2023-01-22 12:46:20.318576: step: 788/464, loss: 0.1043723076581955 2023-01-22 12:46:20.935613: step: 790/464, loss: 0.09155040234327316 2023-01-22 12:46:21.587831: step: 792/464, loss: 0.021373983472585678 2023-01-22 12:46:22.175344: step: 794/464, loss: 0.00931843277066946 2023-01-22 12:46:22.779685: step: 796/464, loss: 0.003143107518553734 2023-01-22 12:46:23.383139: step: 798/464, loss: 0.07849517464637756 2023-01-22 12:46:23.967622: step: 800/464, loss: 0.018494488671422005 2023-01-22 12:46:24.560095: step: 802/464, loss: 0.010890285484492779 2023-01-22 12:46:25.390834: step: 804/464, loss: 0.0023902691900730133 2023-01-22 12:46:26.014502: step: 806/464, loss: 0.097561776638031 2023-01-22 12:46:26.599109: step: 808/464, loss: 0.047068167477846146 2023-01-22 12:46:27.241694: step: 810/464, loss: 0.04132939130067825 2023-01-22 12:46:27.888651: step: 812/464, loss: 0.01832829788327217 2023-01-22 12:46:28.453094: step: 814/464, loss: 0.018750792369246483 2023-01-22 12:46:29.069724: step: 816/464, loss: 0.031217556446790695 2023-01-22 12:46:29.664488: step: 818/464, loss: 0.03930818289518356 2023-01-22 12:46:30.265708: step: 820/464, loss: 0.06962235271930695 2023-01-22 12:46:30.894940: step: 822/464, loss: 0.25037118792533875 2023-01-22 12:46:31.535142: step: 824/464, loss: 0.018033863976597786 2023-01-22 12:46:32.156322: step: 826/464, loss: 0.03868522867560387 2023-01-22 12:46:32.770850: step: 828/464, loss: 0.04825150594115257 2023-01-22 12:46:33.377536: step: 830/464, loss: 0.012502568773925304 2023-01-22 12:46:34.016782: step: 832/464, loss: 0.01429159939289093 2023-01-22 12:46:34.611830: step: 834/464, loss: 0.017824998125433922 2023-01-22 12:46:35.291044: step: 836/464, loss: 0.013970798812806606 2023-01-22 12:46:35.923092: step: 838/464, loss: 0.09116805344820023 2023-01-22 12:46:36.576168: step: 840/464, loss: 0.03165208920836449 2023-01-22 12:46:37.160596: step: 842/464, loss: 0.6362419128417969 2023-01-22 12:46:37.804051: step: 844/464, loss: 0.22480525076389313 2023-01-22 12:46:38.431807: step: 846/464, loss: 0.060532331466674805 2023-01-22 12:46:39.025873: step: 848/464, loss: 0.011441945098340511 2023-01-22 12:46:39.674492: step: 850/464, loss: 0.1043558344244957 2023-01-22 12:46:40.281892: step: 852/464, loss: 0.04051947966217995 2023-01-22 12:46:40.848530: step: 854/464, loss: 0.05182785913348198 2023-01-22 12:46:41.480392: step: 856/464, loss: 0.04946213960647583 2023-01-22 12:46:42.145416: step: 858/464, loss: 0.04631351679563522 2023-01-22 12:46:42.831466: step: 860/464, loss: 0.002893391763791442 2023-01-22 12:46:43.429772: step: 862/464, loss: 0.02671853080391884 2023-01-22 12:46:44.012292: step: 864/464, loss: 0.0488939993083477 2023-01-22 12:46:44.632253: step: 866/464, loss: 0.010563675314188004 2023-01-22 12:46:45.268873: step: 868/464, loss: 0.003323337761685252 2023-01-22 12:46:45.877099: step: 870/464, loss: 0.012685799039900303 2023-01-22 12:46:46.423767: step: 872/464, loss: 0.00802539847791195 2023-01-22 12:46:47.035687: step: 874/464, loss: 0.0006253144238144159 2023-01-22 12:46:47.681161: step: 876/464, loss: 0.03252573311328888 2023-01-22 12:46:48.320042: step: 878/464, loss: 0.01457914337515831 2023-01-22 12:46:49.011658: step: 880/464, loss: 0.04448043555021286 2023-01-22 12:46:49.617694: step: 882/464, loss: 0.6970482468605042 2023-01-22 12:46:50.203103: step: 884/464, loss: 0.04112203046679497 2023-01-22 12:46:50.785160: step: 886/464, loss: 0.1385151445865631 2023-01-22 12:46:51.368299: step: 888/464, loss: 0.009903647005558014 2023-01-22 12:46:51.977343: step: 890/464, loss: 0.08298555761575699 2023-01-22 12:46:52.562376: step: 892/464, loss: 0.008226878941059113 2023-01-22 12:46:53.131801: step: 894/464, loss: 0.039297979325056076 2023-01-22 12:46:53.741414: step: 896/464, loss: 0.02497115731239319 2023-01-22 12:46:54.375883: step: 898/464, loss: 0.03017505444586277 2023-01-22 12:46:55.068186: step: 900/464, loss: 0.0020218086428940296 2023-01-22 12:46:55.707081: step: 902/464, loss: 0.5994249582290649 2023-01-22 12:46:56.291986: step: 904/464, loss: 0.07182029634714127 2023-01-22 12:46:56.944215: step: 906/464, loss: 0.032601069658994675 2023-01-22 12:46:57.576638: step: 908/464, loss: 0.03782958909869194 2023-01-22 12:46:58.186672: step: 910/464, loss: 0.0879812017083168 2023-01-22 12:46:58.860294: step: 912/464, loss: 0.014246356673538685 2023-01-22 12:46:59.431487: step: 914/464, loss: 0.0419892780482769 2023-01-22 12:47:00.065714: step: 916/464, loss: 0.06602410227060318 2023-01-22 12:47:00.673278: step: 918/464, loss: 0.5060414671897888 2023-01-22 12:47:01.270874: step: 920/464, loss: 0.04006993770599365 2023-01-22 12:47:01.987532: step: 922/464, loss: 0.31256306171417236 2023-01-22 12:47:02.574126: step: 924/464, loss: 0.12733837962150574 2023-01-22 12:47:03.169208: step: 926/464, loss: 0.2875961661338806 2023-01-22 12:47:03.828931: step: 928/464, loss: 0.05005858838558197 2023-01-22 12:47:04.383703: step: 930/464, loss: 0.001698375097475946 ================================================== Loss: 0.071 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28013997395833334, 'r': 0.3402079380139153, 'f1': 0.30726578120536996}, 'combined': 0.22640636509869363, 'epoch': 24} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2859621894050847, 'r': 0.3235127799330252, 'f1': 0.3035807129229336}, 'combined': 0.1981925897838841, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27632017875125886, 'r': 0.34710428526249215, 'f1': 0.30769379029997207}, 'combined': 0.22672174022103203, 'epoch': 24} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3025527413902752, 'r': 0.3267458374389461, 'f1': 0.3141842431582619}, 'combined': 0.20511510175098963, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2991888933121019, 'r': 0.3565287001897533, 'f1': 0.3253517316017316}, 'combined': 0.2397328548644338, 'epoch': 24} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29261164320841265, 'r': 0.3090021943890492, 'f1': 0.30058364420694467}, 'combined': 0.1962359542490934, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22424242424242422, 'r': 0.35238095238095235, 'f1': 0.274074074074074}, 'combined': 0.18271604938271602, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26282051282051283, 'r': 0.44565217391304346, 'f1': 0.3306451612903226}, 'combined': 0.1653225806451613, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45588235294117646, 'r': 0.2672413793103448, 'f1': 0.33695652173913043}, 'combined': 0.2246376811594203, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951249161636486, 'r': 0.3578459609650312, 'f1': 0.3234731070815977}, 'combined': 0.23834860521801934, 'epoch': 22} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29521273154881944, 'r': 0.3185261336729686, 'f1': 0.30642664272956077}, 'combined': 0.20005055432085314, 'epoch': 22} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 22} ****************************** Epoch: 25 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:49:43.299379: step: 2/464, loss: 0.01094895415008068 2023-01-22 12:49:43.906517: step: 4/464, loss: 0.14575912058353424 2023-01-22 12:49:44.549254: step: 6/464, loss: 0.024992192164063454 2023-01-22 12:49:45.232798: step: 8/464, loss: 0.04499730095267296 2023-01-22 12:49:45.805377: step: 10/464, loss: 0.02886922098696232 2023-01-22 12:49:46.391895: step: 12/464, loss: 0.00381839950568974 2023-01-22 12:49:46.964889: step: 14/464, loss: 0.024683594703674316 2023-01-22 12:49:47.613032: step: 16/464, loss: 0.09384645521640778 2023-01-22 12:49:48.227883: step: 18/464, loss: 0.01277201622724533 2023-01-22 12:49:48.854473: step: 20/464, loss: 0.037589460611343384 2023-01-22 12:49:49.479494: step: 22/464, loss: 0.20418094098567963 2023-01-22 12:49:50.091592: step: 24/464, loss: 0.06199120730161667 2023-01-22 12:49:50.699355: step: 26/464, loss: 0.0007472278084605932 2023-01-22 12:49:51.461229: step: 28/464, loss: 0.009938925504684448 2023-01-22 12:49:52.156343: step: 30/464, loss: 0.6464297771453857 2023-01-22 12:49:52.779582: step: 32/464, loss: 0.028073349967598915 2023-01-22 12:49:53.405105: step: 34/464, loss: 0.0018921191804111004 2023-01-22 12:49:54.062645: step: 36/464, loss: 0.00033368379808962345 2023-01-22 12:49:54.623765: step: 38/464, loss: 0.002079681260511279 2023-01-22 12:49:55.205398: step: 40/464, loss: 0.00684864679351449 2023-01-22 12:49:55.854443: step: 42/464, loss: 0.015362454578280449 2023-01-22 12:49:56.445215: step: 44/464, loss: 0.09487225860357285 2023-01-22 12:49:57.092451: step: 46/464, loss: 0.06227598711848259 2023-01-22 12:49:57.701297: step: 48/464, loss: 0.024862142279744148 2023-01-22 12:49:58.368933: step: 50/464, loss: 0.0038648508489131927 2023-01-22 12:49:58.986604: step: 52/464, loss: 0.005437001120299101 2023-01-22 12:49:59.575964: step: 54/464, loss: 0.030410602688789368 2023-01-22 12:50:00.153443: step: 56/464, loss: 0.009388812817633152 2023-01-22 12:50:00.773875: step: 58/464, loss: 0.0414985828101635 2023-01-22 12:50:01.436191: step: 60/464, loss: 0.08260368555784225 2023-01-22 12:50:02.021155: step: 62/464, loss: 0.013569515198469162 2023-01-22 12:50:02.614748: step: 64/464, loss: 0.004947878886014223 2023-01-22 12:50:03.218081: step: 66/464, loss: 0.052869606763124466 2023-01-22 12:50:03.856311: step: 68/464, loss: 0.060434550046920776 2023-01-22 12:50:04.486352: step: 70/464, loss: 0.02222449891269207 2023-01-22 12:50:05.125845: step: 72/464, loss: 0.008808063343167305 2023-01-22 12:50:05.797740: step: 74/464, loss: 0.01708069071173668 2023-01-22 12:50:06.445144: step: 76/464, loss: 0.005162985995411873 2023-01-22 12:50:07.098238: step: 78/464, loss: 0.003248975146561861 2023-01-22 12:50:07.667548: step: 80/464, loss: 0.014609781093895435 2023-01-22 12:50:08.255438: step: 82/464, loss: 0.04347887635231018 2023-01-22 12:50:08.852040: step: 84/464, loss: 0.0234987810254097 2023-01-22 12:50:09.506712: step: 86/464, loss: 0.1509189009666443 2023-01-22 12:50:10.149968: step: 88/464, loss: 0.019962340593338013 2023-01-22 12:50:10.824497: step: 90/464, loss: 0.03884506598114967 2023-01-22 12:50:11.423504: step: 92/464, loss: 0.0009039240540005267 2023-01-22 12:50:12.055695: step: 94/464, loss: 0.0034994767047464848 2023-01-22 12:50:12.668814: step: 96/464, loss: 0.08864002674818039 2023-01-22 12:50:13.320685: step: 98/464, loss: 0.006740335375070572 2023-01-22 12:50:13.896629: step: 100/464, loss: 0.05475465953350067 2023-01-22 12:50:14.501201: step: 102/464, loss: 0.03010905347764492 2023-01-22 12:50:15.165025: step: 104/464, loss: 0.016187317669391632 2023-01-22 12:50:15.763804: step: 106/464, loss: 0.0055516562424600124 2023-01-22 12:50:16.360507: step: 108/464, loss: 0.022660691291093826 2023-01-22 12:50:17.015878: step: 110/464, loss: 0.031852543354034424 2023-01-22 12:50:17.608320: step: 112/464, loss: 0.07544074952602386 2023-01-22 12:50:18.233298: step: 114/464, loss: 0.008051667362451553 2023-01-22 12:50:18.804892: step: 116/464, loss: 0.012353715486824512 2023-01-22 12:50:19.457428: step: 118/464, loss: 0.016197267919778824 2023-01-22 12:50:20.128716: step: 120/464, loss: 0.01565691828727722 2023-01-22 12:50:20.730575: step: 122/464, loss: 0.04089139774441719 2023-01-22 12:50:21.421383: step: 124/464, loss: 0.030935386195778847 2023-01-22 12:50:22.020341: step: 126/464, loss: 0.031809840351343155 2023-01-22 12:50:22.617117: step: 128/464, loss: 0.23842322826385498 2023-01-22 12:50:23.238890: step: 130/464, loss: 0.020492425188422203 2023-01-22 12:50:23.865263: step: 132/464, loss: 0.0230704378336668 2023-01-22 12:50:24.461215: step: 134/464, loss: 0.0029736270662397146 2023-01-22 12:50:25.115222: step: 136/464, loss: 0.05544079467654228 2023-01-22 12:50:25.718557: step: 138/464, loss: 0.02358139678835869 2023-01-22 12:50:26.341907: step: 140/464, loss: 0.017108041793107986 2023-01-22 12:50:26.957331: step: 142/464, loss: 0.0015132430708035827 2023-01-22 12:50:27.599914: step: 144/464, loss: 0.012823511846363544 2023-01-22 12:50:28.196916: step: 146/464, loss: 0.025987349450588226 2023-01-22 12:50:28.846300: step: 148/464, loss: 0.04257926717400551 2023-01-22 12:50:29.430221: step: 150/464, loss: 0.04711727797985077 2023-01-22 12:50:30.013036: step: 152/464, loss: 0.07601583749055862 2023-01-22 12:50:30.620581: step: 154/464, loss: 0.001698075095191598 2023-01-22 12:50:31.277640: step: 156/464, loss: 0.002336803823709488 2023-01-22 12:50:31.861950: step: 158/464, loss: 0.001236848533153534 2023-01-22 12:50:32.508217: step: 160/464, loss: 0.007346364203840494 2023-01-22 12:50:33.147022: step: 162/464, loss: 0.00528703723102808 2023-01-22 12:50:33.772403: step: 164/464, loss: 0.05930967628955841 2023-01-22 12:50:34.329096: step: 166/464, loss: 0.011992568150162697 2023-01-22 12:50:34.976840: step: 168/464, loss: 0.042673785239458084 2023-01-22 12:50:35.697147: step: 170/464, loss: 0.2894831895828247 2023-01-22 12:50:36.344370: step: 172/464, loss: 0.021240130066871643 2023-01-22 12:50:36.876472: step: 174/464, loss: 0.007897155359387398 2023-01-22 12:50:37.424822: step: 176/464, loss: 0.013241810724139214 2023-01-22 12:50:38.016474: step: 178/464, loss: 0.0017146612517535686 2023-01-22 12:50:38.623375: step: 180/464, loss: 0.052760347723960876 2023-01-22 12:50:39.247603: step: 182/464, loss: 0.02053925022482872 2023-01-22 12:50:39.859993: step: 184/464, loss: 0.019012922421097755 2023-01-22 12:50:40.535260: step: 186/464, loss: 0.08293021470308304 2023-01-22 12:50:41.127329: step: 188/464, loss: 0.05099023878574371 2023-01-22 12:50:41.711922: step: 190/464, loss: 0.011305413208901882 2023-01-22 12:50:42.327011: step: 192/464, loss: 0.017405983060598373 2023-01-22 12:50:42.947599: step: 194/464, loss: 0.046850383281707764 2023-01-22 12:50:43.562780: step: 196/464, loss: 0.0115335863083601 2023-01-22 12:50:44.185422: step: 198/464, loss: 0.028845172375440598 2023-01-22 12:50:44.785869: step: 200/464, loss: 0.015684882178902626 2023-01-22 12:50:45.479069: step: 202/464, loss: 0.005955643951892853 2023-01-22 12:50:46.124606: step: 204/464, loss: 0.03082350827753544 2023-01-22 12:50:46.680880: step: 206/464, loss: 0.001468464732170105 2023-01-22 12:50:47.263911: step: 208/464, loss: 0.13009512424468994 2023-01-22 12:50:47.873688: step: 210/464, loss: 0.020539836958050728 2023-01-22 12:50:48.479437: step: 212/464, loss: 0.13391415774822235 2023-01-22 12:50:49.093935: step: 214/464, loss: 0.013976972550153732 2023-01-22 12:50:49.713756: step: 216/464, loss: 0.1380707025527954 2023-01-22 12:50:50.328737: step: 218/464, loss: 0.02879462204873562 2023-01-22 12:50:50.940869: step: 220/464, loss: 0.01533366460353136 2023-01-22 12:50:51.551804: step: 222/464, loss: 1.0113894939422607 2023-01-22 12:50:52.142221: step: 224/464, loss: 0.0025485767982900143 2023-01-22 12:50:52.847097: step: 226/464, loss: 0.05745875835418701 2023-01-22 12:50:53.412287: step: 228/464, loss: 0.0076041747815907 2023-01-22 12:50:53.980796: step: 230/464, loss: 0.08491874486207962 2023-01-22 12:50:54.590632: step: 232/464, loss: 0.029457198455929756 2023-01-22 12:50:55.227752: step: 234/464, loss: 0.05527511239051819 2023-01-22 12:50:55.866286: step: 236/464, loss: 0.005635358393192291 2023-01-22 12:50:56.545944: step: 238/464, loss: 0.010738439857959747 2023-01-22 12:50:57.170740: step: 240/464, loss: 0.0373210646212101 2023-01-22 12:50:57.808184: step: 242/464, loss: 0.24425114691257477 2023-01-22 12:50:58.407137: step: 244/464, loss: 0.02821933850646019 2023-01-22 12:50:59.007663: step: 246/464, loss: 0.009423431940376759 2023-01-22 12:50:59.700251: step: 248/464, loss: 0.036287467926740646 2023-01-22 12:51:00.294920: step: 250/464, loss: 0.05840136110782623 2023-01-22 12:51:00.927041: step: 252/464, loss: 0.0367143414914608 2023-01-22 12:51:01.524995: step: 254/464, loss: 0.09120728075504303 2023-01-22 12:51:02.103034: step: 256/464, loss: 0.06416139751672745 2023-01-22 12:51:02.751633: step: 258/464, loss: 0.2775331139564514 2023-01-22 12:51:03.365882: step: 260/464, loss: 0.01628378964960575 2023-01-22 12:51:03.921347: step: 262/464, loss: 0.03143422678112984 2023-01-22 12:51:04.480266: step: 264/464, loss: 0.04143984988331795 2023-01-22 12:51:05.194546: step: 266/464, loss: 0.0008044016431085765 2023-01-22 12:51:05.820943: step: 268/464, loss: 0.44265875220298767 2023-01-22 12:51:06.407762: step: 270/464, loss: 0.052031636238098145 2023-01-22 12:51:07.018753: step: 272/464, loss: 0.3623453676700592 2023-01-22 12:51:07.644336: step: 274/464, loss: 0.018070541322231293 2023-01-22 12:51:08.322655: step: 276/464, loss: 0.03422160446643829 2023-01-22 12:51:08.934536: step: 278/464, loss: 0.09279845654964447 2023-01-22 12:51:09.570571: step: 280/464, loss: 0.009126405231654644 2023-01-22 12:51:10.170009: step: 282/464, loss: 0.045235250145196915 2023-01-22 12:51:10.820164: step: 284/464, loss: 0.23937299847602844 2023-01-22 12:51:11.516534: step: 286/464, loss: 0.018642853945493698 2023-01-22 12:51:12.189885: step: 288/464, loss: 0.0056993127800524235 2023-01-22 12:51:12.755803: step: 290/464, loss: 0.007555719930678606 2023-01-22 12:51:13.359225: step: 292/464, loss: 0.03261446952819824 2023-01-22 12:51:13.979440: step: 294/464, loss: 0.0309117641299963 2023-01-22 12:51:14.589621: step: 296/464, loss: 0.013928813859820366 2023-01-22 12:51:15.174413: step: 298/464, loss: 0.0032964213751256466 2023-01-22 12:51:15.802809: step: 300/464, loss: 0.03079323098063469 2023-01-22 12:51:16.451787: step: 302/464, loss: 0.06833625584840775 2023-01-22 12:51:17.026522: step: 304/464, loss: 0.024271182715892792 2023-01-22 12:51:17.673818: step: 306/464, loss: 0.1986471265554428 2023-01-22 12:51:18.321705: step: 308/464, loss: 0.0008915074868127704 2023-01-22 12:51:19.042885: step: 310/464, loss: 0.016452452167868614 2023-01-22 12:51:19.635557: step: 312/464, loss: 0.052120424807071686 2023-01-22 12:51:20.390356: step: 314/464, loss: 0.050676923245191574 2023-01-22 12:51:20.998562: step: 316/464, loss: 0.0032034774776548147 2023-01-22 12:51:21.607375: step: 318/464, loss: 0.03368563950061798 2023-01-22 12:51:22.170559: step: 320/464, loss: 0.00951187964528799 2023-01-22 12:51:22.777935: step: 322/464, loss: 0.05167800188064575 2023-01-22 12:51:23.478009: step: 324/464, loss: 0.03427198529243469 2023-01-22 12:51:24.152616: step: 326/464, loss: 0.046183791011571884 2023-01-22 12:51:24.720117: step: 328/464, loss: 0.0038602089043706656 2023-01-22 12:51:25.373884: step: 330/464, loss: 0.4236118793487549 2023-01-22 12:51:26.063068: step: 332/464, loss: 0.015052303671836853 2023-01-22 12:51:26.714446: step: 334/464, loss: 0.023639438673853874 2023-01-22 12:51:27.352443: step: 336/464, loss: 0.019682051613926888 2023-01-22 12:51:27.971718: step: 338/464, loss: 0.13966712355613708 2023-01-22 12:51:28.667463: step: 340/464, loss: 0.0024587088264524937 2023-01-22 12:51:29.310187: step: 342/464, loss: 0.02569568157196045 2023-01-22 12:51:29.907306: step: 344/464, loss: 0.04826827347278595 2023-01-22 12:51:30.552740: step: 346/464, loss: 0.03584090992808342 2023-01-22 12:51:31.196764: step: 348/464, loss: 0.15560881793498993 2023-01-22 12:51:31.829855: step: 350/464, loss: 0.01604614406824112 2023-01-22 12:51:32.493342: step: 352/464, loss: 0.06136579066514969 2023-01-22 12:51:33.191140: step: 354/464, loss: 0.04448382183909416 2023-01-22 12:51:33.785193: step: 356/464, loss: 0.0005365906399674714 2023-01-22 12:51:34.409476: step: 358/464, loss: 0.09390643984079361 2023-01-22 12:51:35.045239: step: 360/464, loss: 0.034720465540885925 2023-01-22 12:51:35.617249: step: 362/464, loss: 0.9380174279212952 2023-01-22 12:51:36.175948: step: 364/464, loss: 0.026559194549918175 2023-01-22 12:51:36.758521: step: 366/464, loss: 0.01126338355243206 2023-01-22 12:51:37.447097: step: 368/464, loss: 0.04304554685950279 2023-01-22 12:51:38.002838: step: 370/464, loss: 0.014592224732041359 2023-01-22 12:51:38.597398: step: 372/464, loss: 0.043677981942892075 2023-01-22 12:51:39.185568: step: 374/464, loss: 0.005666018463671207 2023-01-22 12:51:39.782991: step: 376/464, loss: 0.03042283095419407 2023-01-22 12:51:40.383243: step: 378/464, loss: 0.01923169009387493 2023-01-22 12:51:40.993254: step: 380/464, loss: 0.019020728766918182 2023-01-22 12:51:41.597689: step: 382/464, loss: 0.0346912145614624 2023-01-22 12:51:42.220965: step: 384/464, loss: 0.04588151350617409 2023-01-22 12:51:42.896127: step: 386/464, loss: 0.0751832127571106 2023-01-22 12:51:43.481101: step: 388/464, loss: 0.004094639793038368 2023-01-22 12:51:44.167423: step: 390/464, loss: 0.022207781672477722 2023-01-22 12:51:44.792311: step: 392/464, loss: 0.06836681813001633 2023-01-22 12:51:45.398806: step: 394/464, loss: 0.0121278902515769 2023-01-22 12:51:45.995379: step: 396/464, loss: 0.6992283463478088 2023-01-22 12:51:46.588446: step: 398/464, loss: 0.0797678679227829 2023-01-22 12:51:47.223135: step: 400/464, loss: 0.003167911432683468 2023-01-22 12:51:47.841400: step: 402/464, loss: 0.011715085245668888 2023-01-22 12:51:48.485963: step: 404/464, loss: 0.052617043256759644 2023-01-22 12:51:49.114867: step: 406/464, loss: 0.019754866138100624 2023-01-22 12:51:49.692608: step: 408/464, loss: 0.0014015489723533392 2023-01-22 12:51:50.333346: step: 410/464, loss: 0.0034204889088869095 2023-01-22 12:51:51.008975: step: 412/464, loss: 0.3143950402736664 2023-01-22 12:51:51.651742: step: 414/464, loss: 0.02351572923362255 2023-01-22 12:51:52.284820: step: 416/464, loss: 0.05147172138094902 2023-01-22 12:51:52.899586: step: 418/464, loss: 1.0159331560134888 2023-01-22 12:51:53.559103: step: 420/464, loss: 0.23005777597427368 2023-01-22 12:51:54.123725: step: 422/464, loss: 0.01528138481080532 2023-01-22 12:51:54.750674: step: 424/464, loss: 0.032140932977199554 2023-01-22 12:51:55.363827: step: 426/464, loss: 0.0063538933172822 2023-01-22 12:51:55.965748: step: 428/464, loss: 0.02749541401863098 2023-01-22 12:51:56.648465: step: 430/464, loss: 0.058274418115615845 2023-01-22 12:51:57.217265: step: 432/464, loss: 0.024172352626919746 2023-01-22 12:51:57.845231: step: 434/464, loss: 0.08550658077001572 2023-01-22 12:51:58.443943: step: 436/464, loss: 0.04020044207572937 2023-01-22 12:51:59.130515: step: 438/464, loss: 1.4247195720672607 2023-01-22 12:51:59.773462: step: 440/464, loss: 0.0749029591679573 2023-01-22 12:52:00.432742: step: 442/464, loss: 0.1032380685210228 2023-01-22 12:52:01.053619: step: 444/464, loss: 0.01930875889956951 2023-01-22 12:52:01.676347: step: 446/464, loss: 0.002203272422775626 2023-01-22 12:52:02.327765: step: 448/464, loss: 0.3476521670818329 2023-01-22 12:52:02.941924: step: 450/464, loss: 0.018893271684646606 2023-01-22 12:52:03.507259: step: 452/464, loss: 0.03659308701753616 2023-01-22 12:52:04.127451: step: 454/464, loss: 0.03388116881251335 2023-01-22 12:52:04.756167: step: 456/464, loss: 0.06619930267333984 2023-01-22 12:52:05.362087: step: 458/464, loss: 0.003419067244976759 2023-01-22 12:52:05.989359: step: 460/464, loss: 0.06586448848247528 2023-01-22 12:52:06.565006: step: 462/464, loss: 0.006851766724139452 2023-01-22 12:52:07.139520: step: 464/464, loss: 0.42624276876449585 2023-01-22 12:52:07.801793: step: 466/464, loss: 0.21237380802631378 2023-01-22 12:52:08.428567: step: 468/464, loss: 0.11286122351884842 2023-01-22 12:52:09.024775: step: 470/464, loss: 0.04325117543339729 2023-01-22 12:52:09.673303: step: 472/464, loss: 0.06070108339190483 2023-01-22 12:52:10.349329: step: 474/464, loss: 0.023442333564162254 2023-01-22 12:52:10.956528: step: 476/464, loss: 0.04634851962327957 2023-01-22 12:52:11.558550: step: 478/464, loss: 0.00919394101947546 2023-01-22 12:52:12.180888: step: 480/464, loss: 0.019036393612623215 2023-01-22 12:52:12.877504: step: 482/464, loss: 0.022216234356164932 2023-01-22 12:52:13.563301: step: 484/464, loss: 0.030017098411917686 2023-01-22 12:52:14.240995: step: 486/464, loss: 0.0024047254119068384 2023-01-22 12:52:14.830277: step: 488/464, loss: 0.09450496733188629 2023-01-22 12:52:15.458183: step: 490/464, loss: 0.04067765921354294 2023-01-22 12:52:16.090130: step: 492/464, loss: 0.07102949917316437 2023-01-22 12:52:16.768628: step: 494/464, loss: 0.06277811527252197 2023-01-22 12:52:17.491852: step: 496/464, loss: 0.023850714787840843 2023-01-22 12:52:18.099523: step: 498/464, loss: 0.005263129249215126 2023-01-22 12:52:18.718152: step: 500/464, loss: 0.2657757103443146 2023-01-22 12:52:19.328041: step: 502/464, loss: 0.13945844769477844 2023-01-22 12:52:20.023411: step: 504/464, loss: 0.04101903364062309 2023-01-22 12:52:20.626772: step: 506/464, loss: 0.07458826899528503 2023-01-22 12:52:21.262310: step: 508/464, loss: 0.06257973611354828 2023-01-22 12:52:21.899774: step: 510/464, loss: 0.03478274866938591 2023-01-22 12:52:22.509410: step: 512/464, loss: 0.011548043228685856 2023-01-22 12:52:23.142184: step: 514/464, loss: 0.02769150212407112 2023-01-22 12:52:23.823473: step: 516/464, loss: 0.017998792231082916 2023-01-22 12:52:24.462217: step: 518/464, loss: 0.1678367555141449 2023-01-22 12:52:25.123529: step: 520/464, loss: 1.509641170501709 2023-01-22 12:52:25.720445: step: 522/464, loss: 0.07102823257446289 2023-01-22 12:52:26.308334: step: 524/464, loss: 0.055757131427526474 2023-01-22 12:52:27.032353: step: 526/464, loss: 0.01679256744682789 2023-01-22 12:52:27.660137: step: 528/464, loss: 0.04048018902540207 2023-01-22 12:52:28.439090: step: 530/464, loss: 0.04043349251151085 2023-01-22 12:52:29.030888: step: 532/464, loss: 0.005710093304514885 2023-01-22 12:52:29.651737: step: 534/464, loss: 0.0880838930606842 2023-01-22 12:52:30.237479: step: 536/464, loss: 0.07711517810821533 2023-01-22 12:52:30.855617: step: 538/464, loss: 0.0020625698380172253 2023-01-22 12:52:31.422723: step: 540/464, loss: 0.0283072330057621 2023-01-22 12:52:32.030917: step: 542/464, loss: 0.03311380743980408 2023-01-22 12:52:32.618876: step: 544/464, loss: 0.018642157316207886 2023-01-22 12:52:33.212840: step: 546/464, loss: 0.0008091746713034809 2023-01-22 12:52:33.800210: step: 548/464, loss: 0.006606127135455608 2023-01-22 12:52:34.423299: step: 550/464, loss: 0.13055063784122467 2023-01-22 12:52:35.106359: step: 552/464, loss: 0.06652757525444031 2023-01-22 12:52:35.705500: step: 554/464, loss: 0.016496941447257996 2023-01-22 12:52:36.303314: step: 556/464, loss: 0.021097218617796898 2023-01-22 12:52:36.965650: step: 558/464, loss: 0.044590700417757034 2023-01-22 12:52:37.670371: step: 560/464, loss: 0.10488364100456238 2023-01-22 12:52:38.267895: step: 562/464, loss: 0.09305278956890106 2023-01-22 12:52:38.930712: step: 564/464, loss: 0.0067495619878172874 2023-01-22 12:52:39.548110: step: 566/464, loss: 0.010041550733149052 2023-01-22 12:52:40.156856: step: 568/464, loss: 0.01691114529967308 2023-01-22 12:52:40.782152: step: 570/464, loss: 0.06539332866668701 2023-01-22 12:52:41.428712: step: 572/464, loss: 0.08897537738084793 2023-01-22 12:52:41.991657: step: 574/464, loss: 0.014962859451770782 2023-01-22 12:52:42.649167: step: 576/464, loss: 0.009278004057705402 2023-01-22 12:52:43.269305: step: 578/464, loss: 0.04308057576417923 2023-01-22 12:52:43.855072: step: 580/464, loss: 0.024415653198957443 2023-01-22 12:52:44.445883: step: 582/464, loss: 0.07050704210996628 2023-01-22 12:52:45.056522: step: 584/464, loss: 0.04006562381982803 2023-01-22 12:52:45.708251: step: 586/464, loss: 0.019665470346808434 2023-01-22 12:52:46.314293: step: 588/464, loss: 0.016614586114883423 2023-01-22 12:52:46.903622: step: 590/464, loss: 0.0010337578132748604 2023-01-22 12:52:47.544768: step: 592/464, loss: 0.17083393037319183 2023-01-22 12:52:48.207436: step: 594/464, loss: 0.07128389924764633 2023-01-22 12:52:48.814607: step: 596/464, loss: 0.04955251142382622 2023-01-22 12:52:49.433540: step: 598/464, loss: 0.03497675061225891 2023-01-22 12:52:50.026526: step: 600/464, loss: 0.004128795117139816 2023-01-22 12:52:50.561488: step: 602/464, loss: 0.014348876662552357 2023-01-22 12:52:51.155179: step: 604/464, loss: 0.04881225898861885 2023-01-22 12:52:51.812116: step: 606/464, loss: 0.027765098959207535 2023-01-22 12:52:52.379591: step: 608/464, loss: 0.03792242333292961 2023-01-22 12:52:52.965026: step: 610/464, loss: 0.00026430690195411444 2023-01-22 12:52:53.562155: step: 612/464, loss: 0.05211463198065758 2023-01-22 12:52:54.153173: step: 614/464, loss: 0.0476502850651741 2023-01-22 12:52:54.846040: step: 616/464, loss: 0.01162576675415039 2023-01-22 12:52:55.509452: step: 618/464, loss: 0.07381214946508408 2023-01-22 12:52:56.168067: step: 620/464, loss: 0.027710363268852234 2023-01-22 12:52:56.752046: step: 622/464, loss: 0.1238107681274414 2023-01-22 12:52:57.393224: step: 624/464, loss: 0.3625965714454651 2023-01-22 12:52:58.020005: step: 626/464, loss: 0.10400807857513428 2023-01-22 12:52:58.556651: step: 628/464, loss: 0.005050726234912872 2023-01-22 12:52:59.144427: step: 630/464, loss: 0.03973061591386795 2023-01-22 12:52:59.782943: step: 632/464, loss: 0.06549963355064392 2023-01-22 12:53:00.351073: step: 634/464, loss: 0.15217608213424683 2023-01-22 12:53:00.956502: step: 636/464, loss: 0.030870968475937843 2023-01-22 12:53:01.563063: step: 638/464, loss: 0.010599116794764996 2023-01-22 12:53:02.193194: step: 640/464, loss: 0.18119975924491882 2023-01-22 12:53:02.845605: step: 642/464, loss: 0.0009294974734075367 2023-01-22 12:53:03.506891: step: 644/464, loss: 0.02494949661195278 2023-01-22 12:53:04.055988: step: 646/464, loss: 0.03424317017197609 2023-01-22 12:53:04.667577: step: 648/464, loss: 0.06232265755534172 2023-01-22 12:53:05.296769: step: 650/464, loss: 0.08077694475650787 2023-01-22 12:53:05.929580: step: 652/464, loss: 0.04820266366004944 2023-01-22 12:53:06.478794: step: 654/464, loss: 0.03566118702292442 2023-01-22 12:53:07.103436: step: 656/464, loss: 0.08889000862836838 2023-01-22 12:53:07.731409: step: 658/464, loss: 0.07314766198396683 2023-01-22 12:53:08.314796: step: 660/464, loss: 0.03364182636141777 2023-01-22 12:53:08.978329: step: 662/464, loss: 0.03165220841765404 2023-01-22 12:53:09.610057: step: 664/464, loss: 0.05097051337361336 2023-01-22 12:53:10.199571: step: 666/464, loss: 0.04210424795746803 2023-01-22 12:53:10.884180: step: 668/464, loss: 0.06841326504945755 2023-01-22 12:53:11.495449: step: 670/464, loss: 0.04322848841547966 2023-01-22 12:53:12.153921: step: 672/464, loss: 0.06704898923635483 2023-01-22 12:53:12.791948: step: 674/464, loss: 0.09451629221439362 2023-01-22 12:53:13.376346: step: 676/464, loss: 0.02983608841896057 2023-01-22 12:53:14.031250: step: 678/464, loss: 0.02176210656762123 2023-01-22 12:53:14.667206: step: 680/464, loss: 0.024272693321108818 2023-01-22 12:53:15.304308: step: 682/464, loss: 0.05879077687859535 2023-01-22 12:53:15.936521: step: 684/464, loss: 0.03125687316060066 2023-01-22 12:53:16.564523: step: 686/464, loss: 0.023755548521876335 2023-01-22 12:53:17.121153: step: 688/464, loss: 0.011906933039426804 2023-01-22 12:53:17.745165: step: 690/464, loss: 0.03976349160075188 2023-01-22 12:53:18.357460: step: 692/464, loss: 0.2375657856464386 2023-01-22 12:53:18.934051: step: 694/464, loss: 0.011740381829440594 2023-01-22 12:53:19.562445: step: 696/464, loss: 0.02481149137020111 2023-01-22 12:53:20.196848: step: 698/464, loss: 0.23874157667160034 2023-01-22 12:53:20.796028: step: 700/464, loss: 0.040182825177907944 2023-01-22 12:53:21.472184: step: 702/464, loss: 0.031606245785951614 2023-01-22 12:53:22.119164: step: 704/464, loss: 0.01072809100151062 2023-01-22 12:53:22.731288: step: 706/464, loss: 0.012844547629356384 2023-01-22 12:53:23.390344: step: 708/464, loss: 0.023855801671743393 2023-01-22 12:53:24.021031: step: 710/464, loss: 0.025959152728319168 2023-01-22 12:53:24.651980: step: 712/464, loss: 0.05024197697639465 2023-01-22 12:53:25.338300: step: 714/464, loss: 0.02967996522784233 2023-01-22 12:53:25.887441: step: 716/464, loss: 0.022867849096655846 2023-01-22 12:53:26.522765: step: 718/464, loss: 0.02212836965918541 2023-01-22 12:53:27.093009: step: 720/464, loss: 0.02384709008038044 2023-01-22 12:53:27.707979: step: 722/464, loss: 0.01609223149716854 2023-01-22 12:53:28.338925: step: 724/464, loss: 0.03944723308086395 2023-01-22 12:53:28.943371: step: 726/464, loss: 0.32151544094085693 2023-01-22 12:53:29.589484: step: 728/464, loss: 0.03509880602359772 2023-01-22 12:53:30.293839: step: 730/464, loss: 0.0048608374781906605 2023-01-22 12:53:30.914302: step: 732/464, loss: 0.050102487206459045 2023-01-22 12:53:31.441572: step: 734/464, loss: 0.09356710314750671 2023-01-22 12:53:32.042565: step: 736/464, loss: 0.010913309641182423 2023-01-22 12:53:32.651337: step: 738/464, loss: 0.0152819212526083 2023-01-22 12:53:33.289095: step: 740/464, loss: 0.03910623863339424 2023-01-22 12:53:33.949510: step: 742/464, loss: 0.04414622113108635 2023-01-22 12:53:34.519821: step: 744/464, loss: 0.011421327479183674 2023-01-22 12:53:35.108220: step: 746/464, loss: 0.01073770597577095 2023-01-22 12:53:35.756387: step: 748/464, loss: 0.023593632504343987 2023-01-22 12:53:36.336210: step: 750/464, loss: 0.044311657547950745 2023-01-22 12:53:37.000826: step: 752/464, loss: 0.06402845680713654 2023-01-22 12:53:37.572306: step: 754/464, loss: 0.002189035527408123 2023-01-22 12:53:38.205810: step: 756/464, loss: 0.07352970540523529 2023-01-22 12:53:38.841856: step: 758/464, loss: 0.04176979884505272 2023-01-22 12:53:39.500323: step: 760/464, loss: 0.02098209597170353 2023-01-22 12:53:40.081126: step: 762/464, loss: 0.07031551003456116 2023-01-22 12:53:40.669768: step: 764/464, loss: 0.005239306483417749 2023-01-22 12:53:41.254976: step: 766/464, loss: 0.006717341020703316 2023-01-22 12:53:41.937110: step: 768/464, loss: 0.20943881571292877 2023-01-22 12:53:42.533020: step: 770/464, loss: 0.03284173086285591 2023-01-22 12:53:43.173661: step: 772/464, loss: 0.016307709738612175 2023-01-22 12:53:43.779660: step: 774/464, loss: 0.06885068863630295 2023-01-22 12:53:44.363127: step: 776/464, loss: 0.000454139692010358 2023-01-22 12:53:44.979622: step: 778/464, loss: 0.02808069810271263 2023-01-22 12:53:45.676585: step: 780/464, loss: 0.1787603348493576 2023-01-22 12:53:46.313355: step: 782/464, loss: 0.02926480397582054 2023-01-22 12:53:46.874631: step: 784/464, loss: 0.04911215603351593 2023-01-22 12:53:47.496166: step: 786/464, loss: 0.046737734228372574 2023-01-22 12:53:48.156952: step: 788/464, loss: 0.002738171024248004 2023-01-22 12:53:48.811128: step: 790/464, loss: 0.29980382323265076 2023-01-22 12:53:49.473096: step: 792/464, loss: 0.05199276655912399 2023-01-22 12:53:50.037370: step: 794/464, loss: 0.005134768784046173 2023-01-22 12:53:50.704793: step: 796/464, loss: 0.08432340621948242 2023-01-22 12:53:51.283547: step: 798/464, loss: 0.058853939175605774 2023-01-22 12:53:51.912340: step: 800/464, loss: 0.05134215205907822 2023-01-22 12:53:52.562046: step: 802/464, loss: 0.7040004134178162 2023-01-22 12:53:53.218267: step: 804/464, loss: 0.01902609132230282 2023-01-22 12:53:53.847642: step: 806/464, loss: 0.016261931508779526 2023-01-22 12:53:54.445031: step: 808/464, loss: 0.020014706999063492 2023-01-22 12:53:55.101713: step: 810/464, loss: 0.048233408480882645 2023-01-22 12:53:55.776189: step: 812/464, loss: 0.03912653028964996 2023-01-22 12:53:56.415924: step: 814/464, loss: 0.01740916632115841 2023-01-22 12:53:57.032781: step: 816/464, loss: 0.16831934452056885 2023-01-22 12:53:57.646140: step: 818/464, loss: 0.03522450849413872 2023-01-22 12:53:58.293845: step: 820/464, loss: 0.0683288425207138 2023-01-22 12:53:58.934231: step: 822/464, loss: 0.07296296209096909 2023-01-22 12:53:59.635898: step: 824/464, loss: 0.22791068255901337 2023-01-22 12:54:00.257048: step: 826/464, loss: 0.032518237829208374 2023-01-22 12:54:00.879938: step: 828/464, loss: 0.11278972774744034 2023-01-22 12:54:01.502423: step: 830/464, loss: 0.038358110934495926 2023-01-22 12:54:02.161254: step: 832/464, loss: 0.05259322375059128 2023-01-22 12:54:02.819521: step: 834/464, loss: 0.016430115327239037 2023-01-22 12:54:03.415714: step: 836/464, loss: 0.014489209279417992 2023-01-22 12:54:04.012689: step: 838/464, loss: 0.006181823089718819 2023-01-22 12:54:04.744301: step: 840/464, loss: 0.02740761823952198 2023-01-22 12:54:05.393668: step: 842/464, loss: 0.0018187963869422674 2023-01-22 12:54:06.042704: step: 844/464, loss: 0.038892749696969986 2023-01-22 12:54:06.622497: step: 846/464, loss: 0.016636164858937263 2023-01-22 12:54:07.269507: step: 848/464, loss: 0.0516558475792408 2023-01-22 12:54:07.883753: step: 850/464, loss: 0.02258567325770855 2023-01-22 12:54:08.583659: step: 852/464, loss: 0.055364180356264114 2023-01-22 12:54:09.203510: step: 854/464, loss: 0.06610779464244843 2023-01-22 12:54:09.804707: step: 856/464, loss: 0.1411096751689911 2023-01-22 12:54:10.411599: step: 858/464, loss: 0.03036370873451233 2023-01-22 12:54:11.003164: step: 860/464, loss: 0.037055253982543945 2023-01-22 12:54:11.627291: step: 862/464, loss: 0.029586972668766975 2023-01-22 12:54:12.239723: step: 864/464, loss: 0.028649387881159782 2023-01-22 12:54:12.848440: step: 866/464, loss: 0.021050531417131424 2023-01-22 12:54:13.484033: step: 868/464, loss: 0.01597771607339382 2023-01-22 12:54:14.175415: step: 870/464, loss: 0.014619365334510803 2023-01-22 12:54:14.790746: step: 872/464, loss: 0.069264717400074 2023-01-22 12:54:15.430540: step: 874/464, loss: 0.022397087886929512 2023-01-22 12:54:16.073633: step: 876/464, loss: 0.00017047250003088266 2023-01-22 12:54:16.760303: step: 878/464, loss: 0.0019862446933984756 2023-01-22 12:54:17.381207: step: 880/464, loss: 0.010659612715244293 2023-01-22 12:54:17.981968: step: 882/464, loss: 0.04737301543354988 2023-01-22 12:54:18.642214: step: 884/464, loss: 0.07188910245895386 2023-01-22 12:54:19.220003: step: 886/464, loss: 0.06555438041687012 2023-01-22 12:54:19.874492: step: 888/464, loss: 0.017469795420765877 2023-01-22 12:54:20.451233: step: 890/464, loss: 0.05681777000427246 2023-01-22 12:54:21.055610: step: 892/464, loss: 0.011965092271566391 2023-01-22 12:54:21.699696: step: 894/464, loss: 0.10202238708734512 2023-01-22 12:54:22.392383: step: 896/464, loss: 0.04688293859362602 2023-01-22 12:54:22.995619: step: 898/464, loss: 0.033308885991573334 2023-01-22 12:54:23.619572: step: 900/464, loss: 0.015246191993355751 2023-01-22 12:54:24.178709: step: 902/464, loss: 0.21349254250526428 2023-01-22 12:54:24.792615: step: 904/464, loss: 0.09325052052736282 2023-01-22 12:54:25.410290: step: 906/464, loss: 0.0010594649938866496 2023-01-22 12:54:26.067213: step: 908/464, loss: 0.016955086961388588 2023-01-22 12:54:26.699516: step: 910/464, loss: 0.05161542445421219 2023-01-22 12:54:27.298114: step: 912/464, loss: 0.12337085604667664 2023-01-22 12:54:27.916944: step: 914/464, loss: 0.016918625682592392 2023-01-22 12:54:28.456622: step: 916/464, loss: 0.028833843767642975 2023-01-22 12:54:29.046180: step: 918/464, loss: 0.0705912858247757 2023-01-22 12:54:29.716589: step: 920/464, loss: 0.1489098221063614 2023-01-22 12:54:30.386171: step: 922/464, loss: 0.02580682933330536 2023-01-22 12:54:31.020321: step: 924/464, loss: 0.25764748454093933 2023-01-22 12:54:31.664528: step: 926/464, loss: 0.01660194620490074 2023-01-22 12:54:32.318218: step: 928/464, loss: 0.0014980026753619313 2023-01-22 12:54:32.830824: step: 930/464, loss: 0.002765296958386898 ================================================== Loss: 0.068 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29052817841880346, 'r': 0.34400300442757753, 'f1': 0.3150123081378512}, 'combined': 0.23211433231210088, 'epoch': 25} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29244856853466383, 'r': 0.31901041099790395, 'f1': 0.3051525651493772}, 'combined': 0.19921877310270222, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2893559726522188, 'r': 0.3546944180898166, 'f1': 0.31871092639954535}, 'combined': 0.23483962997861235, 'epoch': 25} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3112831612530933, 'r': 0.31614696064767284, 'f1': 0.3136962090147452}, 'combined': 0.20479648878682846, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3048786704730832, 'r': 0.3546311669829222, 'f1': 0.32787828947368425}, 'combined': 0.2415945290858726, 'epoch': 25} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3030828152811698, 'r': 0.3069756220829463, 'f1': 0.3050167986056622}, 'combined': 0.19913013794981058, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2394179894179894, 'r': 0.4309523809523809, 'f1': 0.30782312925170063}, 'combined': 0.20521541950113376, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.27439024390243905, 'r': 0.4891304347826087, 'f1': 0.35156250000000006}, 'combined': 0.17578125000000003, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.3017241379310345, 'f1': 0.35}, 'combined': 0.2333333333333333, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951249161636486, 'r': 0.3578459609650312, 'f1': 0.3234731070815977}, 'combined': 0.23834860521801934, 'epoch': 22} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29521273154881944, 'r': 0.3185261336729686, 'f1': 0.30642664272956077}, 'combined': 0.20005055432085314, 'epoch': 22} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 22} ****************************** Epoch: 26 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:57:11.415471: step: 2/464, loss: 0.04391373693943024 2023-01-22 12:57:12.118205: step: 4/464, loss: 0.007874233648180962 2023-01-22 12:57:12.771856: step: 6/464, loss: 0.02224157564342022 2023-01-22 12:57:13.415095: step: 8/464, loss: 0.019984586164355278 2023-01-22 12:57:14.059793: step: 10/464, loss: 0.003371479921042919 2023-01-22 12:57:14.692065: step: 12/464, loss: 0.01139430794864893 2023-01-22 12:57:15.328874: step: 14/464, loss: 0.03944086283445358 2023-01-22 12:57:15.872390: step: 16/464, loss: 0.08040128648281097 2023-01-22 12:57:16.551132: step: 18/464, loss: 0.21492762863636017 2023-01-22 12:57:17.176510: step: 20/464, loss: 0.01825053244829178 2023-01-22 12:57:17.852640: step: 22/464, loss: 0.026273366063833237 2023-01-22 12:57:18.430532: step: 24/464, loss: 0.0006889184005558491 2023-01-22 12:57:19.115236: step: 26/464, loss: 0.01594449020922184 2023-01-22 12:57:19.744502: step: 28/464, loss: 0.040616974234580994 2023-01-22 12:57:20.410930: step: 30/464, loss: 0.002110840752720833 2023-01-22 12:57:20.950061: step: 32/464, loss: 0.007855681702494621 2023-01-22 12:57:21.569158: step: 34/464, loss: 0.018313631415367126 2023-01-22 12:57:22.150587: step: 36/464, loss: 0.013031150214374065 2023-01-22 12:57:22.793073: step: 38/464, loss: 0.014306176453828812 2023-01-22 12:57:23.434649: step: 40/464, loss: 0.009338781237602234 2023-01-22 12:57:24.103125: step: 42/464, loss: 0.022448761388659477 2023-01-22 12:57:24.726943: step: 44/464, loss: 0.012604963965713978 2023-01-22 12:57:25.354333: step: 46/464, loss: 0.03372041508555412 2023-01-22 12:57:26.016863: step: 48/464, loss: 0.017403187230229378 2023-01-22 12:57:26.577419: step: 50/464, loss: 0.014642245136201382 2023-01-22 12:57:27.205887: step: 52/464, loss: 0.007306413725018501 2023-01-22 12:57:27.760919: step: 54/464, loss: 0.012175238691270351 2023-01-22 12:57:28.390437: step: 56/464, loss: 0.013652827590703964 2023-01-22 12:57:28.946267: step: 58/464, loss: 0.12427759170532227 2023-01-22 12:57:29.607197: step: 60/464, loss: 0.012083382345736027 2023-01-22 12:57:30.207292: step: 62/464, loss: 0.009524141438305378 2023-01-22 12:57:30.825949: step: 64/464, loss: 0.7988739609718323 2023-01-22 12:57:31.440621: step: 66/464, loss: 0.021220678463578224 2023-01-22 12:57:32.036310: step: 68/464, loss: 0.004771019332110882 2023-01-22 12:57:32.680538: step: 70/464, loss: 0.03755776584148407 2023-01-22 12:57:33.419408: step: 72/464, loss: 0.11564905196428299 2023-01-22 12:57:34.051895: step: 74/464, loss: 0.004108037333935499 2023-01-22 12:57:34.718740: step: 76/464, loss: 0.01695541851222515 2023-01-22 12:57:35.421355: step: 78/464, loss: 0.06649809330701828 2023-01-22 12:57:36.197010: step: 80/464, loss: 0.019438304007053375 2023-01-22 12:57:36.851117: step: 82/464, loss: 0.026070335879921913 2023-01-22 12:57:37.560147: step: 84/464, loss: 0.008641884662210941 2023-01-22 12:57:38.203365: step: 86/464, loss: 0.0016679865075275302 2023-01-22 12:57:38.899031: step: 88/464, loss: 0.1379016488790512 2023-01-22 12:57:39.451591: step: 90/464, loss: 0.09687651693820953 2023-01-22 12:57:40.076159: step: 92/464, loss: 0.12728893756866455 2023-01-22 12:57:40.722883: step: 94/464, loss: 0.0065714772790670395 2023-01-22 12:57:41.297517: step: 96/464, loss: 0.03216441348195076 2023-01-22 12:57:41.925320: step: 98/464, loss: 0.1337531954050064 2023-01-22 12:57:42.559853: step: 100/464, loss: 0.027989579364657402 2023-01-22 12:57:43.189766: step: 102/464, loss: 0.05962573364377022 2023-01-22 12:57:43.851324: step: 104/464, loss: 0.033715952187776566 2023-01-22 12:57:44.453007: step: 106/464, loss: 0.005919927265495062 2023-01-22 12:57:45.072656: step: 108/464, loss: 0.010916773229837418 2023-01-22 12:57:45.668275: step: 110/464, loss: 0.05076739192008972 2023-01-22 12:57:46.262219: step: 112/464, loss: 0.02558835968375206 2023-01-22 12:57:46.913849: step: 114/464, loss: 0.02433537319302559 2023-01-22 12:57:47.582456: step: 116/464, loss: 0.040127020329236984 2023-01-22 12:57:48.173973: step: 118/464, loss: 0.004513516556471586 2023-01-22 12:57:48.813939: step: 120/464, loss: 0.006538499612361193 2023-01-22 12:57:49.384704: step: 122/464, loss: 0.01373555138707161 2023-01-22 12:57:49.992921: step: 124/464, loss: 0.007735065184533596 2023-01-22 12:57:50.578703: step: 126/464, loss: 0.003255724674090743 2023-01-22 12:57:51.220264: step: 128/464, loss: 0.028008991852402687 2023-01-22 12:57:51.886617: step: 130/464, loss: 0.03875065594911575 2023-01-22 12:57:52.455761: step: 132/464, loss: 0.04180491715669632 2023-01-22 12:57:53.030347: step: 134/464, loss: 0.002450503408908844 2023-01-22 12:57:53.641599: step: 136/464, loss: 0.039049725979566574 2023-01-22 12:57:54.333022: step: 138/464, loss: 0.009984776377677917 2023-01-22 12:57:55.009140: step: 140/464, loss: 0.017390163615345955 2023-01-22 12:57:55.654711: step: 142/464, loss: 0.04249592870473862 2023-01-22 12:57:56.219378: step: 144/464, loss: 0.00951914582401514 2023-01-22 12:57:56.808670: step: 146/464, loss: 0.02458665519952774 2023-01-22 12:57:57.485162: step: 148/464, loss: 0.02037750743329525 2023-01-22 12:57:58.073420: step: 150/464, loss: 0.0328526496887207 2023-01-22 12:57:58.721284: step: 152/464, loss: 0.052238550037145615 2023-01-22 12:57:59.368085: step: 154/464, loss: 0.23137396574020386 2023-01-22 12:57:59.962928: step: 156/464, loss: 0.005670635029673576 2023-01-22 12:58:00.593376: step: 158/464, loss: 0.07691482454538345 2023-01-22 12:58:01.242732: step: 160/464, loss: 0.1446666568517685 2023-01-22 12:58:01.847584: step: 162/464, loss: 0.033114343881607056 2023-01-22 12:58:02.461077: step: 164/464, loss: 0.0020079005043953657 2023-01-22 12:58:03.071875: step: 166/464, loss: 0.0011540675768628716 2023-01-22 12:58:03.676223: step: 168/464, loss: 0.02620735578238964 2023-01-22 12:58:04.275933: step: 170/464, loss: 0.014304089359939098 2023-01-22 12:58:04.921634: step: 172/464, loss: 0.027400551363825798 2023-01-22 12:58:05.611665: step: 174/464, loss: 0.002438499126583338 2023-01-22 12:58:06.244817: step: 176/464, loss: 0.00013663464051205665 2023-01-22 12:58:06.824531: step: 178/464, loss: 0.009929274208843708 2023-01-22 12:58:07.468952: step: 180/464, loss: 0.550064742565155 2023-01-22 12:58:08.169698: step: 182/464, loss: 0.015332071110606194 2023-01-22 12:58:08.766865: step: 184/464, loss: 0.009523403830826283 2023-01-22 12:58:09.370075: step: 186/464, loss: 0.02809010073542595 2023-01-22 12:58:10.024989: step: 188/464, loss: 0.19071964919567108 2023-01-22 12:58:10.611509: step: 190/464, loss: 0.6839189529418945 2023-01-22 12:58:11.268883: step: 192/464, loss: 0.045062173157930374 2023-01-22 12:58:11.992291: step: 194/464, loss: 0.022391490638256073 2023-01-22 12:58:12.638275: step: 196/464, loss: 0.0592818409204483 2023-01-22 12:58:13.265499: step: 198/464, loss: 0.4567221999168396 2023-01-22 12:58:13.845423: step: 200/464, loss: 0.00868324562907219 2023-01-22 12:58:14.430717: step: 202/464, loss: 0.014107579365372658 2023-01-22 12:58:15.050856: step: 204/464, loss: 0.044665493071079254 2023-01-22 12:58:15.698095: step: 206/464, loss: 0.0018825248116627336 2023-01-22 12:58:16.360685: step: 208/464, loss: 0.02518109232187271 2023-01-22 12:58:17.005771: step: 210/464, loss: 0.016200797632336617 2023-01-22 12:58:17.609224: step: 212/464, loss: 0.008192908018827438 2023-01-22 12:58:18.204901: step: 214/464, loss: 0.019114751368761063 2023-01-22 12:58:18.902723: step: 216/464, loss: 0.014841261319816113 2023-01-22 12:58:19.486413: step: 218/464, loss: 0.019637972116470337 2023-01-22 12:58:20.144864: step: 220/464, loss: 0.004951437469571829 2023-01-22 12:58:20.823330: step: 222/464, loss: 0.01096371840685606 2023-01-22 12:58:21.412666: step: 224/464, loss: 0.00991129782050848 2023-01-22 12:58:22.068775: step: 226/464, loss: 0.020300405099987984 2023-01-22 12:58:22.671069: step: 228/464, loss: 0.005053536035120487 2023-01-22 12:58:23.370170: step: 230/464, loss: 0.08775275945663452 2023-01-22 12:58:24.050698: step: 232/464, loss: 0.010210863314568996 2023-01-22 12:58:24.626142: step: 234/464, loss: 0.06775163114070892 2023-01-22 12:58:25.310807: step: 236/464, loss: 0.14004534482955933 2023-01-22 12:58:25.889657: step: 238/464, loss: 0.042682547122240067 2023-01-22 12:58:26.477559: step: 240/464, loss: 0.004201119765639305 2023-01-22 12:58:27.047679: step: 242/464, loss: 0.004126168787479401 2023-01-22 12:58:27.685602: step: 244/464, loss: 0.005134413484483957 2023-01-22 12:58:28.352937: step: 246/464, loss: 0.08357443660497665 2023-01-22 12:58:28.942259: step: 248/464, loss: 0.06599913537502289 2023-01-22 12:58:29.567818: step: 250/464, loss: 0.019962815567851067 2023-01-22 12:58:30.197326: step: 252/464, loss: 0.05692172423005104 2023-01-22 12:58:30.904562: step: 254/464, loss: 0.05064583569765091 2023-01-22 12:58:31.479847: step: 256/464, loss: 0.01198429986834526 2023-01-22 12:58:32.165793: step: 258/464, loss: 0.06167212128639221 2023-01-22 12:58:32.805121: step: 260/464, loss: 0.27730756998062134 2023-01-22 12:58:33.465951: step: 262/464, loss: 0.030600082129240036 2023-01-22 12:58:34.041070: step: 264/464, loss: 0.00288955494761467 2023-01-22 12:58:34.717692: step: 266/464, loss: 0.02289651893079281 2023-01-22 12:58:35.343324: step: 268/464, loss: 0.011433032341301441 2023-01-22 12:58:35.973514: step: 270/464, loss: 0.22712191939353943 2023-01-22 12:58:36.598734: step: 272/464, loss: 0.015967974439263344 2023-01-22 12:58:37.183026: step: 274/464, loss: 0.009424269199371338 2023-01-22 12:58:37.816445: step: 276/464, loss: 0.008342879824340343 2023-01-22 12:58:38.424481: step: 278/464, loss: 0.16277647018432617 2023-01-22 12:58:39.001452: step: 280/464, loss: 0.0248686745762825 2023-01-22 12:58:39.614137: step: 282/464, loss: 0.016462594270706177 2023-01-22 12:58:40.226632: step: 284/464, loss: 0.03282509371638298 2023-01-22 12:58:40.943471: step: 286/464, loss: 0.033736176788806915 2023-01-22 12:58:41.645107: step: 288/464, loss: 0.012655275873839855 2023-01-22 12:58:42.284153: step: 290/464, loss: 0.06633565574884415 2023-01-22 12:58:42.965244: step: 292/464, loss: 0.032862331718206406 2023-01-22 12:58:43.635850: step: 294/464, loss: 0.028874710202217102 2023-01-22 12:58:44.258864: step: 296/464, loss: 0.5041033625602722 2023-01-22 12:58:44.853216: step: 298/464, loss: 0.0024245341774076223 2023-01-22 12:58:45.478641: step: 300/464, loss: 0.013810448348522186 2023-01-22 12:58:46.062687: step: 302/464, loss: 0.10798203200101852 2023-01-22 12:58:46.675899: step: 304/464, loss: 0.051899854093790054 2023-01-22 12:58:47.297747: step: 306/464, loss: 0.060129180550575256 2023-01-22 12:58:47.904225: step: 308/464, loss: 0.009245732799172401 2023-01-22 12:58:48.491302: step: 310/464, loss: 0.05325021967291832 2023-01-22 12:58:49.090403: step: 312/464, loss: 0.005145874805748463 2023-01-22 12:58:49.694331: step: 314/464, loss: 0.03422572836279869 2023-01-22 12:58:50.328184: step: 316/464, loss: 0.011272044852375984 2023-01-22 12:58:50.945372: step: 318/464, loss: 0.05873195827007294 2023-01-22 12:58:51.553869: step: 320/464, loss: 0.020659292116761208 2023-01-22 12:58:52.206313: step: 322/464, loss: 0.025461694225668907 2023-01-22 12:58:52.921371: step: 324/464, loss: 0.15957403182983398 2023-01-22 12:58:53.580788: step: 326/464, loss: 0.049867674708366394 2023-01-22 12:58:54.213382: step: 328/464, loss: 0.049304038286209106 2023-01-22 12:58:54.839541: step: 330/464, loss: 0.0015535791171714664 2023-01-22 12:58:55.420085: step: 332/464, loss: 0.02825341187417507 2023-01-22 12:58:56.002538: step: 334/464, loss: 0.10240758955478668 2023-01-22 12:58:56.575683: step: 336/464, loss: 0.06264287978410721 2023-01-22 12:58:57.196322: step: 338/464, loss: 0.035395149141550064 2023-01-22 12:58:57.802741: step: 340/464, loss: 0.02415728010237217 2023-01-22 12:58:58.475414: step: 342/464, loss: 0.04024200513958931 2023-01-22 12:58:59.098315: step: 344/464, loss: 0.005539848934859037 2023-01-22 12:58:59.635455: step: 346/464, loss: 0.006253221072256565 2023-01-22 12:59:00.262038: step: 348/464, loss: 0.05701465159654617 2023-01-22 12:59:00.886444: step: 350/464, loss: 0.041590526700019836 2023-01-22 12:59:01.464410: step: 352/464, loss: 0.02236504666507244 2023-01-22 12:59:02.124360: step: 354/464, loss: 0.035344481468200684 2023-01-22 12:59:02.711789: step: 356/464, loss: 0.0006936495774425566 2023-01-22 12:59:03.325659: step: 358/464, loss: 0.07155963033437729 2023-01-22 12:59:03.903445: step: 360/464, loss: 0.03924102708697319 2023-01-22 12:59:04.554526: step: 362/464, loss: 0.03008159250020981 2023-01-22 12:59:05.210254: step: 364/464, loss: 0.018457040190696716 2023-01-22 12:59:05.799155: step: 366/464, loss: 0.11442568153142929 2023-01-22 12:59:06.440450: step: 368/464, loss: 0.0151005107909441 2023-01-22 12:59:07.089962: step: 370/464, loss: 0.02877574786543846 2023-01-22 12:59:07.760245: step: 372/464, loss: 0.011581357568502426 2023-01-22 12:59:08.371061: step: 374/464, loss: 0.019876640290021896 2023-01-22 12:59:09.076716: step: 376/464, loss: 0.009046114049851894 2023-01-22 12:59:09.744574: step: 378/464, loss: 0.03848227113485336 2023-01-22 12:59:10.377056: step: 380/464, loss: 0.005663391202688217 2023-01-22 12:59:11.092122: step: 382/464, loss: 0.03750398010015488 2023-01-22 12:59:11.726701: step: 384/464, loss: 0.007229901850223541 2023-01-22 12:59:12.396935: step: 386/464, loss: 0.01637895777821541 2023-01-22 12:59:13.019434: step: 388/464, loss: 0.11286875605583191 2023-01-22 12:59:13.627901: step: 390/464, loss: 0.01437292154878378 2023-01-22 12:59:14.297078: step: 392/464, loss: 0.024987051263451576 2023-01-22 12:59:14.963436: step: 394/464, loss: 0.000298120838124305 2023-01-22 12:59:15.566454: step: 396/464, loss: 0.060187749564647675 2023-01-22 12:59:16.139496: step: 398/464, loss: 0.008325144648551941 2023-01-22 12:59:16.714040: step: 400/464, loss: 0.002636404475197196 2023-01-22 12:59:17.328938: step: 402/464, loss: 0.010944833047688007 2023-01-22 12:59:18.003637: step: 404/464, loss: 0.06787846237421036 2023-01-22 12:59:18.580064: step: 406/464, loss: 0.035629112273454666 2023-01-22 12:59:19.153254: step: 408/464, loss: 0.02141980081796646 2023-01-22 12:59:19.695938: step: 410/464, loss: 0.0010935988975688815 2023-01-22 12:59:20.359802: step: 412/464, loss: 0.02366950921714306 2023-01-22 12:59:20.967740: step: 414/464, loss: 0.0024041745346039534 2023-01-22 12:59:21.621168: step: 416/464, loss: 0.0168649572879076 2023-01-22 12:59:22.202876: step: 418/464, loss: 0.022741887718439102 2023-01-22 12:59:22.882854: step: 420/464, loss: 0.0457460917532444 2023-01-22 12:59:23.507821: step: 422/464, loss: 0.01615896075963974 2023-01-22 12:59:24.114159: step: 424/464, loss: 0.0027021560817956924 2023-01-22 12:59:24.709145: step: 426/464, loss: 0.02107520028948784 2023-01-22 12:59:25.318788: step: 428/464, loss: 0.0010393020929768682 2023-01-22 12:59:25.935440: step: 430/464, loss: 0.03457380831241608 2023-01-22 12:59:26.523101: step: 432/464, loss: 0.006112591363489628 2023-01-22 12:59:27.133227: step: 434/464, loss: 0.017656736075878143 2023-01-22 12:59:27.741597: step: 436/464, loss: 0.0700252577662468 2023-01-22 12:59:28.383387: step: 438/464, loss: 0.019659023731946945 2023-01-22 12:59:29.048175: step: 440/464, loss: 0.018227603286504745 2023-01-22 12:59:29.685701: step: 442/464, loss: 0.002650787588208914 2023-01-22 12:59:30.341880: step: 444/464, loss: 0.03067925199866295 2023-01-22 12:59:30.951736: step: 446/464, loss: 0.045789267867803574 2023-01-22 12:59:31.532678: step: 448/464, loss: 0.04718998074531555 2023-01-22 12:59:32.208028: step: 450/464, loss: 0.03693093731999397 2023-01-22 12:59:32.817580: step: 452/464, loss: 0.05785346403717995 2023-01-22 12:59:33.378902: step: 454/464, loss: 0.012178266420960426 2023-01-22 12:59:33.961632: step: 456/464, loss: 0.0007852213457226753 2023-01-22 12:59:34.540802: step: 458/464, loss: 0.03168824315071106 2023-01-22 12:59:35.112808: step: 460/464, loss: 0.0010413274867460132 2023-01-22 12:59:35.797448: step: 462/464, loss: 0.060342393815517426 2023-01-22 12:59:36.430299: step: 464/464, loss: 0.00010225496225757524 2023-01-22 12:59:37.050021: step: 466/464, loss: 0.11511260271072388 2023-01-22 12:59:37.653703: step: 468/464, loss: 0.23531371355056763 2023-01-22 12:59:38.285219: step: 470/464, loss: 0.031879253685474396 2023-01-22 12:59:38.904375: step: 472/464, loss: 0.020632173866033554 2023-01-22 12:59:39.565371: step: 474/464, loss: 0.017486222088336945 2023-01-22 12:59:40.203468: step: 476/464, loss: 0.02898748219013214 2023-01-22 12:59:40.808585: step: 478/464, loss: 0.0013614544877782464 2023-01-22 12:59:41.497170: step: 480/464, loss: 0.03617943823337555 2023-01-22 12:59:42.129907: step: 482/464, loss: 0.0786338523030281 2023-01-22 12:59:42.731933: step: 484/464, loss: 0.018358413130044937 2023-01-22 12:59:43.331523: step: 486/464, loss: 0.020440151914954185 2023-01-22 12:59:43.939489: step: 488/464, loss: 0.0033799484372138977 2023-01-22 12:59:44.542627: step: 490/464, loss: 0.007676076143980026 2023-01-22 12:59:45.123784: step: 492/464, loss: 0.015695925801992416 2023-01-22 12:59:45.770934: step: 494/464, loss: 0.015165206044912338 2023-01-22 12:59:46.418504: step: 496/464, loss: 0.014235205017030239 2023-01-22 12:59:47.072895: step: 498/464, loss: 0.7016212940216064 2023-01-22 12:59:47.702813: step: 500/464, loss: 0.025006012991070747 2023-01-22 12:59:48.358128: step: 502/464, loss: 0.005251850001513958 2023-01-22 12:59:48.961339: step: 504/464, loss: 0.2685335874557495 2023-01-22 12:59:49.641816: step: 506/464, loss: 0.0096308384090662 2023-01-22 12:59:50.247120: step: 508/464, loss: 0.01977044716477394 2023-01-22 12:59:50.862838: step: 510/464, loss: 0.008932722732424736 2023-01-22 12:59:51.497955: step: 512/464, loss: 0.002780807903036475 2023-01-22 12:59:52.098147: step: 514/464, loss: 0.08242969214916229 2023-01-22 12:59:52.720900: step: 516/464, loss: 0.02369045838713646 2023-01-22 12:59:53.346873: step: 518/464, loss: 0.007691401522606611 2023-01-22 12:59:53.989905: step: 520/464, loss: 0.07566172629594803 2023-01-22 12:59:54.639495: step: 522/464, loss: 0.05063261091709137 2023-01-22 12:59:55.318724: step: 524/464, loss: 0.013660422526299953 2023-01-22 12:59:55.951463: step: 526/464, loss: 0.24020728468894958 2023-01-22 12:59:56.503784: step: 528/464, loss: 0.01348627358675003 2023-01-22 12:59:57.077959: step: 530/464, loss: 0.007197367958724499 2023-01-22 12:59:57.659286: step: 532/464, loss: 0.010228368453681469 2023-01-22 12:59:58.332452: step: 534/464, loss: 0.023895204067230225 2023-01-22 12:59:58.906920: step: 536/464, loss: 0.021335279569029808 2023-01-22 12:59:59.563800: step: 538/464, loss: 0.08497224748134613 2023-01-22 13:00:00.137817: step: 540/464, loss: 0.022408263757824898 2023-01-22 13:00:00.749965: step: 542/464, loss: 0.02243008278310299 2023-01-22 13:00:01.297897: step: 544/464, loss: 0.02225842885673046 2023-01-22 13:00:01.912787: step: 546/464, loss: 0.035303566604852676 2023-01-22 13:00:02.451311: step: 548/464, loss: 0.000824810762424022 2023-01-22 13:00:03.039118: step: 550/464, loss: 0.2665346562862396 2023-01-22 13:00:03.701335: step: 552/464, loss: 0.012323501519858837 2023-01-22 13:00:04.447871: step: 554/464, loss: 0.1625223010778427 2023-01-22 13:00:05.081482: step: 556/464, loss: 0.06891467422246933 2023-01-22 13:00:05.719477: step: 558/464, loss: 0.01002768985927105 2023-01-22 13:00:06.291455: step: 560/464, loss: 0.033113449811935425 2023-01-22 13:00:06.866648: step: 562/464, loss: 0.0017599809216335416 2023-01-22 13:00:07.454537: step: 564/464, loss: 0.007383772637695074 2023-01-22 13:00:08.055750: step: 566/464, loss: 0.05605030059814453 2023-01-22 13:00:08.696765: step: 568/464, loss: 0.013501882553100586 2023-01-22 13:00:09.277168: step: 570/464, loss: 0.1326901763677597 2023-01-22 13:00:09.932316: step: 572/464, loss: 0.013902074657380581 2023-01-22 13:00:10.498500: step: 574/464, loss: 0.0038822093047201633 2023-01-22 13:00:11.159129: step: 576/464, loss: 0.06847728043794632 2023-01-22 13:00:11.796828: step: 578/464, loss: 0.2082699090242386 2023-01-22 13:00:12.436032: step: 580/464, loss: 0.03914272040128708 2023-01-22 13:00:13.175019: step: 582/464, loss: 0.02024165354669094 2023-01-22 13:00:13.784130: step: 584/464, loss: 0.14907458424568176 2023-01-22 13:00:14.445310: step: 586/464, loss: 0.00691397488117218 2023-01-22 13:00:15.094822: step: 588/464, loss: 0.04703788459300995 2023-01-22 13:00:15.742110: step: 590/464, loss: 0.004772043786942959 2023-01-22 13:00:16.374419: step: 592/464, loss: 0.023602420464158058 2023-01-22 13:00:16.959683: step: 594/464, loss: 0.007593000307679176 2023-01-22 13:00:17.564168: step: 596/464, loss: 0.03346647694706917 2023-01-22 13:00:18.199028: step: 598/464, loss: 0.03485536202788353 2023-01-22 13:00:18.841850: step: 600/464, loss: 0.02620747685432434 2023-01-22 13:00:19.448320: step: 602/464, loss: 0.005401256028562784 2023-01-22 13:00:20.158034: step: 604/464, loss: 0.18123404681682587 2023-01-22 13:00:20.755531: step: 606/464, loss: 0.01579710841178894 2023-01-22 13:00:21.368473: step: 608/464, loss: 0.028616640716791153 2023-01-22 13:00:21.956442: step: 610/464, loss: 0.0857577919960022 2023-01-22 13:00:22.544744: step: 612/464, loss: 0.0643952488899231 2023-01-22 13:00:23.167984: step: 614/464, loss: 0.027972189709544182 2023-01-22 13:00:23.937457: step: 616/464, loss: 0.036930255591869354 2023-01-22 13:00:24.597670: step: 618/464, loss: 0.010757374577224255 2023-01-22 13:00:25.187294: step: 620/464, loss: 0.006771758198738098 2023-01-22 13:00:25.857237: step: 622/464, loss: 0.022771861404180527 2023-01-22 13:00:26.469093: step: 624/464, loss: 0.004120378289371729 2023-01-22 13:00:27.138829: step: 626/464, loss: 0.03058917075395584 2023-01-22 13:00:27.778456: step: 628/464, loss: 0.05850667878985405 2023-01-22 13:00:28.374759: step: 630/464, loss: 0.013523498550057411 2023-01-22 13:00:28.995606: step: 632/464, loss: 0.2245532125234604 2023-01-22 13:00:29.794444: step: 634/464, loss: 0.0036853866185992956 2023-01-22 13:00:30.450134: step: 636/464, loss: 0.02361578680574894 2023-01-22 13:00:31.091601: step: 638/464, loss: 0.015455491840839386 2023-01-22 13:00:31.679155: step: 640/464, loss: 0.024254411458969116 2023-01-22 13:00:32.396205: step: 642/464, loss: 0.04180416092276573 2023-01-22 13:00:33.090187: step: 644/464, loss: 0.03187788277864456 2023-01-22 13:00:33.713525: step: 646/464, loss: 0.0003483604232314974 2023-01-22 13:00:34.340203: step: 648/464, loss: 0.00013609840243589133 2023-01-22 13:00:34.986565: step: 650/464, loss: 0.04429443180561066 2023-01-22 13:00:35.580193: step: 652/464, loss: 0.04185933619737625 2023-01-22 13:00:36.150938: step: 654/464, loss: 0.006211341358721256 2023-01-22 13:00:36.748036: step: 656/464, loss: 0.034331731498241425 2023-01-22 13:00:37.354288: step: 658/464, loss: 0.08382061123847961 2023-01-22 13:00:37.942455: step: 660/464, loss: 0.005517153535038233 2023-01-22 13:00:38.585739: step: 662/464, loss: 0.011968887411057949 2023-01-22 13:00:39.184622: step: 664/464, loss: 0.002495914464816451 2023-01-22 13:00:39.768912: step: 666/464, loss: 0.015255164355039597 2023-01-22 13:00:40.346294: step: 668/464, loss: 0.026585770770907402 2023-01-22 13:00:40.962372: step: 670/464, loss: 0.05654933676123619 2023-01-22 13:00:41.537197: step: 672/464, loss: 0.0011084601283073425 2023-01-22 13:00:42.150361: step: 674/464, loss: 0.05362668260931969 2023-01-22 13:00:42.764476: step: 676/464, loss: 0.03868886083364487 2023-01-22 13:00:43.392639: step: 678/464, loss: 0.016717160120606422 2023-01-22 13:00:43.992823: step: 680/464, loss: 0.03761445730924606 2023-01-22 13:00:44.625445: step: 682/464, loss: 0.037883222103118896 2023-01-22 13:00:45.211347: step: 684/464, loss: 0.16490760445594788 2023-01-22 13:00:45.780580: step: 686/464, loss: 0.019592376425862312 2023-01-22 13:00:46.337531: step: 688/464, loss: 0.01852923259139061 2023-01-22 13:00:46.934649: step: 690/464, loss: 0.0750657320022583 2023-01-22 13:00:47.569507: step: 692/464, loss: 0.03591347485780716 2023-01-22 13:00:48.214124: step: 694/464, loss: 0.030060309916734695 2023-01-22 13:00:48.893996: step: 696/464, loss: 0.03932882100343704 2023-01-22 13:00:49.539069: step: 698/464, loss: 0.10937557369470596 2023-01-22 13:00:50.167304: step: 700/464, loss: 0.03864862024784088 2023-01-22 13:00:50.810774: step: 702/464, loss: 0.02068173885345459 2023-01-22 13:00:51.424647: step: 704/464, loss: 0.0007597276126034558 2023-01-22 13:00:52.044073: step: 706/464, loss: 0.0005308131221681833 2023-01-22 13:00:52.655878: step: 708/464, loss: 0.005687220022082329 2023-01-22 13:00:53.294272: step: 710/464, loss: 0.027716923505067825 2023-01-22 13:00:53.904934: step: 712/464, loss: 0.017588965594768524 2023-01-22 13:00:54.493114: step: 714/464, loss: 0.042746786028146744 2023-01-22 13:00:55.119792: step: 716/464, loss: 0.11261896789073944 2023-01-22 13:00:55.725673: step: 718/464, loss: 0.01578381098806858 2023-01-22 13:00:56.306432: step: 720/464, loss: 0.04451392590999603 2023-01-22 13:00:56.971293: step: 722/464, loss: 0.6705551147460938 2023-01-22 13:00:57.590925: step: 724/464, loss: 0.8189102411270142 2023-01-22 13:00:58.210804: step: 726/464, loss: 0.011479933746159077 2023-01-22 13:00:58.851873: step: 728/464, loss: 0.015497986227273941 2023-01-22 13:00:59.478743: step: 730/464, loss: 0.09625409543514252 2023-01-22 13:01:00.124972: step: 732/464, loss: 0.03274373710155487 2023-01-22 13:01:00.675489: step: 734/464, loss: 0.040542371571063995 2023-01-22 13:01:01.287148: step: 736/464, loss: 0.010151658207178116 2023-01-22 13:01:01.961981: step: 738/464, loss: 0.009617464616894722 2023-01-22 13:01:02.601781: step: 740/464, loss: 0.00679465476423502 2023-01-22 13:01:03.222171: step: 742/464, loss: 0.013655357994139194 2023-01-22 13:01:03.857388: step: 744/464, loss: 0.007171467877924442 2023-01-22 13:01:04.465754: step: 746/464, loss: 0.0001739412546157837 2023-01-22 13:01:05.097486: step: 748/464, loss: 0.02507726289331913 2023-01-22 13:01:05.651511: step: 750/464, loss: 0.01518337707966566 2023-01-22 13:01:06.247191: step: 752/464, loss: 0.04084146022796631 2023-01-22 13:01:06.886419: step: 754/464, loss: 0.0035083615221083164 2023-01-22 13:01:07.555853: step: 756/464, loss: 0.012889510951936245 2023-01-22 13:01:08.154118: step: 758/464, loss: 0.9736987948417664 2023-01-22 13:01:08.751460: step: 760/464, loss: 0.07425379753112793 2023-01-22 13:01:09.403885: step: 762/464, loss: 0.05876192823052406 2023-01-22 13:01:09.956420: step: 764/464, loss: 0.005951893515884876 2023-01-22 13:01:10.564362: step: 766/464, loss: 0.02159908041357994 2023-01-22 13:01:11.349910: step: 768/464, loss: 0.038079917430877686 2023-01-22 13:01:11.981642: step: 770/464, loss: 0.09124045819044113 2023-01-22 13:01:12.584496: step: 772/464, loss: 0.023096373304724693 2023-01-22 13:01:13.182517: step: 774/464, loss: 0.0024730029981583357 2023-01-22 13:01:13.857766: step: 776/464, loss: 0.050720226019620895 2023-01-22 13:01:14.455643: step: 778/464, loss: 0.024673130363225937 2023-01-22 13:01:15.102614: step: 780/464, loss: 0.03335406631231308 2023-01-22 13:01:15.734838: step: 782/464, loss: 0.004928721114993095 2023-01-22 13:01:16.338141: step: 784/464, loss: 0.024341443553566933 2023-01-22 13:01:16.879882: step: 786/464, loss: 0.01013362966477871 2023-01-22 13:01:17.494150: step: 788/464, loss: 0.006453828886151314 2023-01-22 13:01:18.097371: step: 790/464, loss: 0.16598333418369293 2023-01-22 13:01:18.720623: step: 792/464, loss: 0.03524326533079147 2023-01-22 13:01:19.394393: step: 794/464, loss: 0.1181488037109375 2023-01-22 13:01:20.018389: step: 796/464, loss: 0.09138718247413635 2023-01-22 13:01:20.665967: step: 798/464, loss: 0.5004169940948486 2023-01-22 13:01:21.296364: step: 800/464, loss: 0.07259329408407211 2023-01-22 13:01:21.885339: step: 802/464, loss: 0.015081651508808136 2023-01-22 13:01:22.484329: step: 804/464, loss: 0.038550931960344315 2023-01-22 13:01:23.237522: step: 806/464, loss: 0.12904329597949982 2023-01-22 13:01:23.804240: step: 808/464, loss: 0.40018248558044434 2023-01-22 13:01:24.419682: step: 810/464, loss: 0.04147998243570328 2023-01-22 13:01:25.015858: step: 812/464, loss: 0.02219959907233715 2023-01-22 13:01:25.606351: step: 814/464, loss: 0.042695704847574234 2023-01-22 13:01:26.288907: step: 816/464, loss: 0.015478466637432575 2023-01-22 13:01:26.909461: step: 818/464, loss: 0.04031830653548241 2023-01-22 13:01:27.465949: step: 820/464, loss: 0.002473577158525586 2023-01-22 13:01:28.097315: step: 822/464, loss: 0.013560102321207523 2023-01-22 13:01:28.779199: step: 824/464, loss: 0.021678712218999863 2023-01-22 13:01:29.383626: step: 826/464, loss: 0.003429161384701729 2023-01-22 13:01:30.014976: step: 828/464, loss: 0.031064260751008987 2023-01-22 13:01:30.604139: step: 830/464, loss: 0.016433386132121086 2023-01-22 13:01:31.286703: step: 832/464, loss: 0.025123601779341698 2023-01-22 13:01:31.852062: step: 834/464, loss: 0.02761431410908699 2023-01-22 13:01:32.573888: step: 836/464, loss: 0.006232484709471464 2023-01-22 13:01:33.215142: step: 838/464, loss: 0.00811823084950447 2023-01-22 13:01:33.835438: step: 840/464, loss: 0.12833282351493835 2023-01-22 13:01:34.460231: step: 842/464, loss: 0.009222570806741714 2023-01-22 13:01:35.030843: step: 844/464, loss: 0.07020155340433121 2023-01-22 13:01:35.608485: step: 846/464, loss: 0.002051841700449586 2023-01-22 13:01:36.252717: step: 848/464, loss: 0.03661072626709938 2023-01-22 13:01:36.897527: step: 850/464, loss: 0.026866046711802483 2023-01-22 13:01:37.482886: step: 852/464, loss: 0.007825160399079323 2023-01-22 13:01:38.060966: step: 854/464, loss: 0.04166368395090103 2023-01-22 13:01:38.660334: step: 856/464, loss: 0.0006112029659561813 2023-01-22 13:01:39.247102: step: 858/464, loss: 0.05629737675189972 2023-01-22 13:01:39.936169: step: 860/464, loss: 0.014596642926335335 2023-01-22 13:01:40.577125: step: 862/464, loss: 0.09020011126995087 2023-01-22 13:01:41.248891: step: 864/464, loss: 0.0339503139257431 2023-01-22 13:01:41.866456: step: 866/464, loss: 0.022090664133429527 2023-01-22 13:01:42.489881: step: 868/464, loss: 0.04064103960990906 2023-01-22 13:01:43.089032: step: 870/464, loss: 0.00135325628798455 2023-01-22 13:01:43.860283: step: 872/464, loss: 0.011642823927104473 2023-01-22 13:01:44.455190: step: 874/464, loss: 0.05684065818786621 2023-01-22 13:01:45.115119: step: 876/464, loss: 0.008805211633443832 2023-01-22 13:01:45.728089: step: 878/464, loss: 0.04149453341960907 2023-01-22 13:01:46.325798: step: 880/464, loss: 0.021980678662657738 2023-01-22 13:01:46.932590: step: 882/464, loss: 0.4108825922012329 2023-01-22 13:01:47.572269: step: 884/464, loss: 0.051128219813108444 2023-01-22 13:01:48.189013: step: 886/464, loss: 0.0010531533043831587 2023-01-22 13:01:48.805815: step: 888/464, loss: 0.0466160923242569 2023-01-22 13:01:49.404304: step: 890/464, loss: 0.07535409182310104 2023-01-22 13:01:50.205257: step: 892/464, loss: 0.029860571026802063 2023-01-22 13:01:50.911088: step: 894/464, loss: 0.15788963437080383 2023-01-22 13:01:51.578148: step: 896/464, loss: 0.01945146918296814 2023-01-22 13:01:52.188452: step: 898/464, loss: 0.02043139562010765 2023-01-22 13:01:52.811066: step: 900/464, loss: 0.06940814107656479 2023-01-22 13:01:53.448272: step: 902/464, loss: 0.010013815015554428 2023-01-22 13:01:54.081976: step: 904/464, loss: 0.5424590110778809 2023-01-22 13:01:54.723043: step: 906/464, loss: 0.01136374194175005 2023-01-22 13:01:55.306699: step: 908/464, loss: 0.03829554468393326 2023-01-22 13:01:55.890150: step: 910/464, loss: 0.0687684416770935 2023-01-22 13:01:56.491504: step: 912/464, loss: 0.002499011345207691 2023-01-22 13:01:57.135465: step: 914/464, loss: 0.13825161755084991 2023-01-22 13:01:57.659291: step: 916/464, loss: 0.024322301149368286 2023-01-22 13:01:58.310224: step: 918/464, loss: 0.27383631467819214 2023-01-22 13:01:58.902616: step: 920/464, loss: 0.0559036023914814 2023-01-22 13:01:59.457955: step: 922/464, loss: 0.09064502269029617 2023-01-22 13:02:00.084058: step: 924/464, loss: 0.05530287325382233 2023-01-22 13:02:00.674961: step: 926/464, loss: 0.08000970631837845 2023-01-22 13:02:01.274244: step: 928/464, loss: 0.014721273444592953 2023-01-22 13:02:01.739191: step: 930/464, loss: 0.032917868345975876 ================================================== Loss: 0.055 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982237576772753, 'r': 0.3378360215053764, 'f1': 0.3167964116251484}, 'combined': 0.23342893488168825, 'epoch': 26} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29923125313284993, 'r': 0.31214573329560835, 'f1': 0.3055520930866674}, 'combined': 0.1994796048130575, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2915986597664544, 'r': 0.34748379190385836, 'f1': 0.3170977633477634}, 'combined': 0.2336509835194046, 'epoch': 26} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3144876599742, 'r': 0.31390955765807094, 'f1': 0.31419834289970666}, 'combined': 0.20512430676353907, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3142208466609456, 'r': 0.3458218046742854, 'f1': 0.3292648438362212}, 'combined': 0.24261620072142612, 'epoch': 26} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.31078700628129236, 'r': 0.30735920841789577, 'f1': 0.3090636032889747}, 'combined': 0.20177209333891613, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22798742138364778, 'r': 0.3452380952380952, 'f1': 0.2746212121212121}, 'combined': 0.18308080808080807, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32432432432432434, 'r': 0.5217391304347826, 'f1': 0.4}, 'combined': 0.2, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.3017241379310345, 'f1': 0.35}, 'combined': 0.2333333333333333, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951249161636486, 'r': 0.3578459609650312, 'f1': 0.3234731070815977}, 'combined': 0.23834860521801934, 'epoch': 22} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29521273154881944, 'r': 0.3185261336729686, 'f1': 0.30642664272956077}, 'combined': 0.20005055432085314, 'epoch': 22} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 22} ****************************** Epoch: 27 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:04:39.326129: step: 2/464, loss: 0.03520013764500618 2023-01-22 13:04:39.915577: step: 4/464, loss: 0.01785721816122532 2023-01-22 13:04:40.512845: step: 6/464, loss: 0.16269555687904358 2023-01-22 13:04:41.111891: step: 8/464, loss: 0.07691093534231186 2023-01-22 13:04:41.749758: step: 10/464, loss: 0.007392262574285269 2023-01-22 13:04:42.393277: step: 12/464, loss: 0.18206527829170227 2023-01-22 13:04:43.057511: step: 14/464, loss: 0.07249999046325684 2023-01-22 13:04:43.697706: step: 16/464, loss: 0.021117065101861954 2023-01-22 13:04:44.328373: step: 18/464, loss: 0.030323537066578865 2023-01-22 13:04:44.935708: step: 20/464, loss: 0.02338639833033085 2023-01-22 13:04:45.521253: step: 22/464, loss: 0.1333690583705902 2023-01-22 13:04:46.128131: step: 24/464, loss: 0.0010341749293729663 2023-01-22 13:04:46.830632: step: 26/464, loss: 0.007895023562014103 2023-01-22 13:04:47.522587: step: 28/464, loss: 2.761321783065796 2023-01-22 13:04:48.214895: step: 30/464, loss: 0.035957738757133484 2023-01-22 13:04:48.799111: step: 32/464, loss: 0.028074631467461586 2023-01-22 13:04:49.370602: step: 34/464, loss: 0.011213401332497597 2023-01-22 13:04:49.987824: step: 36/464, loss: 0.005153039935976267 2023-01-22 13:04:50.658832: step: 38/464, loss: 0.002318080049008131 2023-01-22 13:04:51.276312: step: 40/464, loss: 0.006432653870433569 2023-01-22 13:04:51.915516: step: 42/464, loss: 0.007253899239003658 2023-01-22 13:04:52.504809: step: 44/464, loss: 0.12813512980937958 2023-01-22 13:04:53.099335: step: 46/464, loss: 0.08304651826620102 2023-01-22 13:04:53.801568: step: 48/464, loss: 0.007072602864354849 2023-01-22 13:04:54.415379: step: 50/464, loss: 0.003953339997678995 2023-01-22 13:04:55.045518: step: 52/464, loss: 0.03858869522809982 2023-01-22 13:04:55.682797: step: 54/464, loss: 1.0059211254119873 2023-01-22 13:04:56.298770: step: 56/464, loss: 0.07535743713378906 2023-01-22 13:04:57.016680: step: 58/464, loss: 0.005572025198489428 2023-01-22 13:04:57.670463: step: 60/464, loss: 0.011123955249786377 2023-01-22 13:04:58.270906: step: 62/464, loss: 0.002408726839348674 2023-01-22 13:04:58.840836: step: 64/464, loss: 0.006805627141147852 2023-01-22 13:04:59.440752: step: 66/464, loss: 0.018881412222981453 2023-01-22 13:04:59.981456: step: 68/464, loss: 0.02203463576734066 2023-01-22 13:05:00.538981: step: 70/464, loss: 0.026272814720869064 2023-01-22 13:05:01.191835: step: 72/464, loss: 0.012682809494435787 2023-01-22 13:05:01.755791: step: 74/464, loss: 0.05023544281721115 2023-01-22 13:05:02.423331: step: 76/464, loss: 0.006426983047276735 2023-01-22 13:05:03.098749: step: 78/464, loss: 0.0036636111326515675 2023-01-22 13:05:03.691308: step: 80/464, loss: 0.026904908940196037 2023-01-22 13:05:04.284311: step: 82/464, loss: 0.024857092648744583 2023-01-22 13:05:04.896673: step: 84/464, loss: 0.07651542872190475 2023-01-22 13:05:05.592976: step: 86/464, loss: 0.159203439950943 2023-01-22 13:05:06.273938: step: 88/464, loss: 0.002764118369668722 2023-01-22 13:05:06.900138: step: 90/464, loss: 0.010748973116278648 2023-01-22 13:05:07.563233: step: 92/464, loss: 0.7456044554710388 2023-01-22 13:05:08.237510: step: 94/464, loss: 0.0021835712250322104 2023-01-22 13:05:08.946435: step: 96/464, loss: 0.000798621098510921 2023-01-22 13:05:09.589056: step: 98/464, loss: 0.016292361542582512 2023-01-22 13:05:10.272591: step: 100/464, loss: 0.043895792216062546 2023-01-22 13:05:10.838810: step: 102/464, loss: 0.059375472366809845 2023-01-22 13:05:11.496298: step: 104/464, loss: 0.025569267570972443 2023-01-22 13:05:12.190341: step: 106/464, loss: 0.016790715977549553 2023-01-22 13:05:12.831185: step: 108/464, loss: 0.09495478123426437 2023-01-22 13:05:13.456210: step: 110/464, loss: 0.04769524931907654 2023-01-22 13:05:14.064565: step: 112/464, loss: 0.006700258236378431 2023-01-22 13:05:14.603038: step: 114/464, loss: 0.0009436507825739682 2023-01-22 13:05:15.267057: step: 116/464, loss: 0.08529170602560043 2023-01-22 13:05:15.916759: step: 118/464, loss: 0.06692947447299957 2023-01-22 13:05:16.539921: step: 120/464, loss: 0.01741936057806015 2023-01-22 13:05:17.201232: step: 122/464, loss: 0.06238299608230591 2023-01-22 13:05:17.831697: step: 124/464, loss: 0.013601443730294704 2023-01-22 13:05:18.399631: step: 126/464, loss: 0.005797897465527058 2023-01-22 13:05:18.997461: step: 128/464, loss: 0.02559986710548401 2023-01-22 13:05:19.642269: step: 130/464, loss: 0.0006582848145626485 2023-01-22 13:05:20.224277: step: 132/464, loss: 0.010648852214217186 2023-01-22 13:05:20.825099: step: 134/464, loss: 0.0007578640943393111 2023-01-22 13:05:21.450411: step: 136/464, loss: 0.020853610709309578 2023-01-22 13:05:22.139602: step: 138/464, loss: 1.2599151134490967 2023-01-22 13:05:22.746353: step: 140/464, loss: 0.0707487240433693 2023-01-22 13:05:23.381637: step: 142/464, loss: 0.20164254307746887 2023-01-22 13:05:23.999801: step: 144/464, loss: 0.01755233108997345 2023-01-22 13:05:24.593277: step: 146/464, loss: 0.0019922242499887943 2023-01-22 13:05:25.214217: step: 148/464, loss: 0.011276560835540295 2023-01-22 13:05:25.839700: step: 150/464, loss: 0.001740350155159831 2023-01-22 13:05:26.467508: step: 152/464, loss: 0.024404587224125862 2023-01-22 13:05:27.123586: step: 154/464, loss: 0.029476886615157127 2023-01-22 13:05:27.735236: step: 156/464, loss: 0.002116193762049079 2023-01-22 13:05:28.433611: step: 158/464, loss: 0.020922478288412094 2023-01-22 13:05:29.138268: step: 160/464, loss: 0.22654925286769867 2023-01-22 13:05:29.748671: step: 162/464, loss: 0.0761280506849289 2023-01-22 13:05:30.364602: step: 164/464, loss: 0.049508724361658096 2023-01-22 13:05:30.945069: step: 166/464, loss: 0.00400913693010807 2023-01-22 13:05:31.583860: step: 168/464, loss: 0.0035668325144797564 2023-01-22 13:05:32.269008: step: 170/464, loss: 0.15200269222259521 2023-01-22 13:05:32.890771: step: 172/464, loss: 0.0035742297768592834 2023-01-22 13:05:33.490318: step: 174/464, loss: 0.017629073932766914 2023-01-22 13:05:34.038741: step: 176/464, loss: 0.008834774605929852 2023-01-22 13:05:34.644561: step: 178/464, loss: 0.002919742139056325 2023-01-22 13:05:35.224119: step: 180/464, loss: 0.005628170445561409 2023-01-22 13:05:35.874117: step: 182/464, loss: 0.0009138965397141874 2023-01-22 13:05:36.523845: step: 184/464, loss: 0.010701551102101803 2023-01-22 13:05:37.142105: step: 186/464, loss: 0.01624779775738716 2023-01-22 13:05:37.724240: step: 188/464, loss: 0.02109084278345108 2023-01-22 13:05:38.298492: step: 190/464, loss: 0.006756091956049204 2023-01-22 13:05:39.003466: step: 192/464, loss: 0.01783033274114132 2023-01-22 13:05:39.639565: step: 194/464, loss: 0.005625900812447071 2023-01-22 13:05:40.249156: step: 196/464, loss: 0.0433078333735466 2023-01-22 13:05:40.898479: step: 198/464, loss: 0.10533846914768219 2023-01-22 13:05:41.581059: step: 200/464, loss: 0.03561583533883095 2023-01-22 13:05:42.283357: step: 202/464, loss: 0.029630184173583984 2023-01-22 13:05:42.894239: step: 204/464, loss: 0.007734477519989014 2023-01-22 13:05:43.486010: step: 206/464, loss: 0.010086983442306519 2023-01-22 13:05:44.034456: step: 208/464, loss: 0.011256583966314793 2023-01-22 13:05:44.616006: step: 210/464, loss: 0.010693291202187538 2023-01-22 13:05:45.214194: step: 212/464, loss: 0.011730333790183067 2023-01-22 13:05:45.892193: step: 214/464, loss: 0.07052070647478104 2023-01-22 13:05:46.564434: step: 216/464, loss: 0.026574324816465378 2023-01-22 13:05:47.149795: step: 218/464, loss: 0.03399469703435898 2023-01-22 13:05:47.773674: step: 220/464, loss: 0.030823074281215668 2023-01-22 13:05:48.433900: step: 222/464, loss: 0.051768504083156586 2023-01-22 13:05:49.060265: step: 224/464, loss: 0.09838014841079712 2023-01-22 13:05:49.779477: step: 226/464, loss: 0.04117704927921295 2023-01-22 13:05:50.404269: step: 228/464, loss: 0.012569655664265156 2023-01-22 13:05:50.987529: step: 230/464, loss: 0.011671887710690498 2023-01-22 13:05:51.607439: step: 232/464, loss: 0.0007638675742782652 2023-01-22 13:05:52.274346: step: 234/464, loss: 0.051818255335092545 2023-01-22 13:05:52.943063: step: 236/464, loss: 0.0054818205535411835 2023-01-22 13:05:53.524962: step: 238/464, loss: 0.0012856718385592103 2023-01-22 13:05:54.113462: step: 240/464, loss: 0.0016444082139059901 2023-01-22 13:05:54.736841: step: 242/464, loss: 0.002486496465280652 2023-01-22 13:05:55.382452: step: 244/464, loss: 0.0032496124040335417 2023-01-22 13:05:56.082697: step: 246/464, loss: 0.009185628965497017 2023-01-22 13:05:56.738562: step: 248/464, loss: 0.0025085736997425556 2023-01-22 13:05:57.423322: step: 250/464, loss: 0.04779759421944618 2023-01-22 13:05:58.160010: step: 252/464, loss: 0.1278035044670105 2023-01-22 13:05:58.742164: step: 254/464, loss: 0.029156673699617386 2023-01-22 13:05:59.387441: step: 256/464, loss: 0.004071261268109083 2023-01-22 13:05:59.978559: step: 258/464, loss: 0.0031680958345532417 2023-01-22 13:06:00.571735: step: 260/464, loss: 0.037706535309553146 2023-01-22 13:06:01.204290: step: 262/464, loss: 0.05441688373684883 2023-01-22 13:06:01.809098: step: 264/464, loss: 0.00813133455812931 2023-01-22 13:06:02.477055: step: 266/464, loss: 0.0021063615567982197 2023-01-22 13:06:03.092018: step: 268/464, loss: 0.008211891166865826 2023-01-22 13:06:03.861809: step: 270/464, loss: 0.0014769110130146146 2023-01-22 13:06:04.424870: step: 272/464, loss: 0.10388434678316116 2023-01-22 13:06:05.111923: step: 274/464, loss: 0.0014674770645797253 2023-01-22 13:06:05.750818: step: 276/464, loss: 0.005743144545704126 2023-01-22 13:06:06.307746: step: 278/464, loss: 0.03033411130309105 2023-01-22 13:06:06.950856: step: 280/464, loss: 0.04292159900069237 2023-01-22 13:06:07.643610: step: 282/464, loss: 0.005484298337250948 2023-01-22 13:06:08.371000: step: 284/464, loss: 0.07534424960613251 2023-01-22 13:06:09.028071: step: 286/464, loss: 0.020643752068281174 2023-01-22 13:06:09.636437: step: 288/464, loss: 0.007683439180254936 2023-01-22 13:06:10.295161: step: 290/464, loss: 0.03788389265537262 2023-01-22 13:06:10.947625: step: 292/464, loss: 0.02440880984067917 2023-01-22 13:06:11.586875: step: 294/464, loss: 0.04807315021753311 2023-01-22 13:06:12.256247: step: 296/464, loss: 0.08031445741653442 2023-01-22 13:06:12.959219: step: 298/464, loss: 0.004971335642039776 2023-01-22 13:06:13.595001: step: 300/464, loss: 0.06626083701848984 2023-01-22 13:06:14.191001: step: 302/464, loss: 0.008229141123592854 2023-01-22 13:06:14.818058: step: 304/464, loss: 0.012430696748197079 2023-01-22 13:06:15.489480: step: 306/464, loss: 0.015409497544169426 2023-01-22 13:06:16.035467: step: 308/464, loss: 0.03891903534531593 2023-01-22 13:06:16.670957: step: 310/464, loss: 0.003821104532107711 2023-01-22 13:06:17.295451: step: 312/464, loss: 0.01957610994577408 2023-01-22 13:06:17.900148: step: 314/464, loss: 0.004578686784952879 2023-01-22 13:06:18.542393: step: 316/464, loss: 0.021410878747701645 2023-01-22 13:06:19.094584: step: 318/464, loss: 0.002462534699589014 2023-01-22 13:06:19.678388: step: 320/464, loss: 0.028250494971871376 2023-01-22 13:06:20.384230: step: 322/464, loss: 0.0746573656797409 2023-01-22 13:06:21.008016: step: 324/464, loss: 0.011263798922300339 2023-01-22 13:06:21.556558: step: 326/464, loss: 0.02622237429022789 2023-01-22 13:06:22.162341: step: 328/464, loss: 0.006810697726905346 2023-01-22 13:06:22.776017: step: 330/464, loss: 0.02033045142889023 2023-01-22 13:06:23.380376: step: 332/464, loss: 0.028290104120969772 2023-01-22 13:06:24.015916: step: 334/464, loss: 0.006970271468162537 2023-01-22 13:06:24.636526: step: 336/464, loss: 0.10415491461753845 2023-01-22 13:06:25.309009: step: 338/464, loss: 0.03805846348404884 2023-01-22 13:06:25.938906: step: 340/464, loss: 0.008582512848079205 2023-01-22 13:06:26.617616: step: 342/464, loss: 0.02193283848464489 2023-01-22 13:06:27.246875: step: 344/464, loss: 0.00040405866457149386 2023-01-22 13:06:27.838007: step: 346/464, loss: 0.005250776186585426 2023-01-22 13:06:28.460252: step: 348/464, loss: 0.316387414932251 2023-01-22 13:06:29.150696: step: 350/464, loss: 0.021355492994189262 2023-01-22 13:06:29.780691: step: 352/464, loss: 0.0944470763206482 2023-01-22 13:06:30.386159: step: 354/464, loss: 0.005789034068584442 2023-01-22 13:06:30.938217: step: 356/464, loss: 0.03233187273144722 2023-01-22 13:06:31.588469: step: 358/464, loss: 0.013468354009091854 2023-01-22 13:06:32.186340: step: 360/464, loss: 0.007178623229265213 2023-01-22 13:06:32.813056: step: 362/464, loss: 0.02026214264333248 2023-01-22 13:06:33.453690: step: 364/464, loss: 0.014081430621445179 2023-01-22 13:06:34.061851: step: 366/464, loss: 0.00034141322248615324 2023-01-22 13:06:34.692645: step: 368/464, loss: 0.009255238808691502 2023-01-22 13:06:35.301134: step: 370/464, loss: 0.0002265293151140213 2023-01-22 13:06:35.882266: step: 372/464, loss: 0.02084532380104065 2023-01-22 13:06:36.627329: step: 374/464, loss: 0.01657322235405445 2023-01-22 13:06:37.276407: step: 376/464, loss: 0.01808655634522438 2023-01-22 13:06:37.921529: step: 378/464, loss: 0.026209594681859016 2023-01-22 13:06:38.598031: step: 380/464, loss: 0.03522813320159912 2023-01-22 13:06:39.257101: step: 382/464, loss: 0.4924827516078949 2023-01-22 13:06:39.852298: step: 384/464, loss: 0.04399174824357033 2023-01-22 13:06:40.485600: step: 386/464, loss: 0.11709188669919968 2023-01-22 13:06:41.122339: step: 388/464, loss: 0.011554055847227573 2023-01-22 13:06:41.795249: step: 390/464, loss: 0.059646569192409515 2023-01-22 13:06:42.425799: step: 392/464, loss: 0.02467474900186062 2023-01-22 13:06:43.083322: step: 394/464, loss: 0.010192793793976307 2023-01-22 13:06:43.719456: step: 396/464, loss: 0.01305388007313013 2023-01-22 13:06:44.415187: step: 398/464, loss: 0.11766955256462097 2023-01-22 13:06:44.961986: step: 400/464, loss: 0.02509024366736412 2023-01-22 13:06:45.585093: step: 402/464, loss: 0.0038161729462444782 2023-01-22 13:06:46.228584: step: 404/464, loss: 0.003525168402120471 2023-01-22 13:06:46.842024: step: 406/464, loss: 0.022189294919371605 2023-01-22 13:06:47.505153: step: 408/464, loss: 0.015227475203573704 2023-01-22 13:06:48.117858: step: 410/464, loss: 0.029578283429145813 2023-01-22 13:06:48.728103: step: 412/464, loss: 0.04139862209558487 2023-01-22 13:06:49.352227: step: 414/464, loss: 0.0006235550390556455 2023-01-22 13:06:49.990383: step: 416/464, loss: 0.00983441062271595 2023-01-22 13:06:50.689132: step: 418/464, loss: 0.007791872136294842 2023-01-22 13:06:51.320815: step: 420/464, loss: 0.0033019613474607468 2023-01-22 13:06:52.012221: step: 422/464, loss: 0.04036351293325424 2023-01-22 13:06:52.661516: step: 424/464, loss: 0.051174964755773544 2023-01-22 13:06:53.242611: step: 426/464, loss: 0.06642129272222519 2023-01-22 13:06:53.787382: step: 428/464, loss: 0.04952915012836456 2023-01-22 13:06:54.552796: step: 430/464, loss: 0.00039676425512880087 2023-01-22 13:06:55.114141: step: 432/464, loss: 0.002911365358158946 2023-01-22 13:06:55.739737: step: 434/464, loss: 0.009221483021974564 2023-01-22 13:06:56.333626: step: 436/464, loss: 0.03754451125860214 2023-01-22 13:06:56.958391: step: 438/464, loss: 0.05024517700076103 2023-01-22 13:06:57.595678: step: 440/464, loss: 0.0031745005398988724 2023-01-22 13:06:58.212084: step: 442/464, loss: 0.007519016973674297 2023-01-22 13:06:58.800805: step: 444/464, loss: 0.06694741547107697 2023-01-22 13:06:59.397043: step: 446/464, loss: 0.007199062965810299 2023-01-22 13:07:00.042419: step: 448/464, loss: 0.03117830865085125 2023-01-22 13:07:00.636226: step: 450/464, loss: 0.009340462274849415 2023-01-22 13:07:01.320006: step: 452/464, loss: 0.006021823268383741 2023-01-22 13:07:01.950344: step: 454/464, loss: 0.0030698971822857857 2023-01-22 13:07:02.500935: step: 456/464, loss: 0.008624221198260784 2023-01-22 13:07:03.159710: step: 458/464, loss: 0.059307970106601715 2023-01-22 13:07:03.807946: step: 460/464, loss: 0.011224465444684029 2023-01-22 13:07:04.564693: step: 462/464, loss: 0.03303281217813492 2023-01-22 13:07:05.209391: step: 464/464, loss: 0.008707696571946144 2023-01-22 13:07:05.846073: step: 466/464, loss: 0.02195972390472889 2023-01-22 13:07:06.460596: step: 468/464, loss: 0.021515917032957077 2023-01-22 13:07:07.169088: step: 470/464, loss: 0.01887495443224907 2023-01-22 13:07:07.850651: step: 472/464, loss: 0.007549591362476349 2023-01-22 13:07:08.508376: step: 474/464, loss: 0.012140336446464062 2023-01-22 13:07:09.167495: step: 476/464, loss: 0.0239426139742136 2023-01-22 13:07:09.869530: step: 478/464, loss: 0.0699852779507637 2023-01-22 13:07:10.486519: step: 480/464, loss: 0.032511156052351 2023-01-22 13:07:11.187849: step: 482/464, loss: 0.034606803208589554 2023-01-22 13:07:11.786268: step: 484/464, loss: 0.8022106885910034 2023-01-22 13:07:12.381908: step: 486/464, loss: 0.004343557637184858 2023-01-22 13:07:13.162602: step: 488/464, loss: 0.018259840086102486 2023-01-22 13:07:13.792287: step: 490/464, loss: 0.004218821879476309 2023-01-22 13:07:14.404055: step: 492/464, loss: 0.020487593486905098 2023-01-22 13:07:15.058952: step: 494/464, loss: 0.004174598027020693 2023-01-22 13:07:15.730369: step: 496/464, loss: 0.015948574990034103 2023-01-22 13:07:16.481738: step: 498/464, loss: 0.025023166090250015 2023-01-22 13:07:17.069997: step: 500/464, loss: 0.007463703863322735 2023-01-22 13:07:17.682949: step: 502/464, loss: 0.023295581340789795 2023-01-22 13:07:18.383991: step: 504/464, loss: 0.035140346735715866 2023-01-22 13:07:19.052106: step: 506/464, loss: 0.04719272628426552 2023-01-22 13:07:19.699051: step: 508/464, loss: 0.024057535454630852 2023-01-22 13:07:20.338350: step: 510/464, loss: 0.02392573654651642 2023-01-22 13:07:20.983855: step: 512/464, loss: 0.004249283578246832 2023-01-22 13:07:21.592256: step: 514/464, loss: 0.015805572271347046 2023-01-22 13:07:22.193486: step: 516/464, loss: 0.05258805304765701 2023-01-22 13:07:22.823148: step: 518/464, loss: 0.01823163963854313 2023-01-22 13:07:23.458207: step: 520/464, loss: 0.03767416626214981 2023-01-22 13:07:24.057765: step: 522/464, loss: 0.041998717933893204 2023-01-22 13:07:24.697746: step: 524/464, loss: 0.045012932270765305 2023-01-22 13:07:25.298413: step: 526/464, loss: 0.01743290200829506 2023-01-22 13:07:25.913410: step: 528/464, loss: 0.028209581971168518 2023-01-22 13:07:26.585073: step: 530/464, loss: 0.029669439420104027 2023-01-22 13:07:27.191251: step: 532/464, loss: 0.025862492620944977 2023-01-22 13:07:27.819853: step: 534/464, loss: 0.025721795856952667 2023-01-22 13:07:28.435687: step: 536/464, loss: 0.010643397457897663 2023-01-22 13:07:29.037533: step: 538/464, loss: 0.0009918089490383863 2023-01-22 13:07:29.708592: step: 540/464, loss: 0.0919719785451889 2023-01-22 13:07:30.340563: step: 542/464, loss: 0.0036847067531198263 2023-01-22 13:07:30.892323: step: 544/464, loss: 0.03340010717511177 2023-01-22 13:07:31.696275: step: 546/464, loss: 1.4899811744689941 2023-01-22 13:07:32.290658: step: 548/464, loss: 0.013500018045306206 2023-01-22 13:07:32.896210: step: 550/464, loss: 0.027026327326893806 2023-01-22 13:07:33.483866: step: 552/464, loss: 0.03297542780637741 2023-01-22 13:07:34.105300: step: 554/464, loss: 0.04438992589712143 2023-01-22 13:07:34.742077: step: 556/464, loss: 0.02105231210589409 2023-01-22 13:07:35.377857: step: 558/464, loss: 0.00616777129471302 2023-01-22 13:07:36.009864: step: 560/464, loss: 0.17797285318374634 2023-01-22 13:07:36.594558: step: 562/464, loss: 0.45834028720855713 2023-01-22 13:07:37.161407: step: 564/464, loss: 0.0040410347282886505 2023-01-22 13:07:37.820595: step: 566/464, loss: 0.01322061289101839 2023-01-22 13:07:38.448409: step: 568/464, loss: 0.018731502816081047 2023-01-22 13:07:39.050123: step: 570/464, loss: 0.044756487011909485 2023-01-22 13:07:39.748904: step: 572/464, loss: 0.0435539074242115 2023-01-22 13:07:40.411114: step: 574/464, loss: 0.951160192489624 2023-01-22 13:07:40.981200: step: 576/464, loss: 0.0037094554863870144 2023-01-22 13:07:41.592032: step: 578/464, loss: 0.022865185514092445 2023-01-22 13:07:42.201177: step: 580/464, loss: 0.07532264292240143 2023-01-22 13:07:42.859395: step: 582/464, loss: 0.0011326372623443604 2023-01-22 13:07:43.442253: step: 584/464, loss: 0.11389389634132385 2023-01-22 13:07:44.105054: step: 586/464, loss: 0.23151782155036926 2023-01-22 13:07:44.714022: step: 588/464, loss: 0.025923380628228188 2023-01-22 13:07:45.363997: step: 590/464, loss: 0.009010151959955692 2023-01-22 13:07:45.958911: step: 592/464, loss: 0.002571831690147519 2023-01-22 13:07:46.633331: step: 594/464, loss: 0.04727496579289436 2023-01-22 13:07:47.225557: step: 596/464, loss: 0.015008511021733284 2023-01-22 13:07:47.800541: step: 598/464, loss: 0.006229538936167955 2023-01-22 13:07:48.414559: step: 600/464, loss: 1.0692181587219238 2023-01-22 13:07:49.014232: step: 602/464, loss: 0.0024318471550941467 2023-01-22 13:07:49.639573: step: 604/464, loss: 0.04745035246014595 2023-01-22 13:07:50.232981: step: 606/464, loss: 0.013820142485201359 2023-01-22 13:07:50.825510: step: 608/464, loss: 0.008597586303949356 2023-01-22 13:07:51.384329: step: 610/464, loss: 0.0037027972284704447 2023-01-22 13:07:51.990805: step: 612/464, loss: 0.02084978111088276 2023-01-22 13:07:52.577354: step: 614/464, loss: 0.003373411949723959 2023-01-22 13:07:53.216223: step: 616/464, loss: 0.034973613917827606 2023-01-22 13:07:53.819564: step: 618/464, loss: 1.1798455715179443 2023-01-22 13:07:54.410831: step: 620/464, loss: 0.001084931311197579 2023-01-22 13:07:55.048980: step: 622/464, loss: 0.009436777792870998 2023-01-22 13:07:55.676009: step: 624/464, loss: 0.0111102145165205 2023-01-22 13:07:56.216993: step: 626/464, loss: 0.042641185224056244 2023-01-22 13:07:56.748207: step: 628/464, loss: 1.43836510181427 2023-01-22 13:07:57.368753: step: 630/464, loss: 0.003855292685329914 2023-01-22 13:07:57.907063: step: 632/464, loss: 0.013502503745257854 2023-01-22 13:07:58.570432: step: 634/464, loss: 0.23189158737659454 2023-01-22 13:07:59.200623: step: 636/464, loss: 0.17636005580425262 2023-01-22 13:07:59.772252: step: 638/464, loss: 0.047434959560632706 2023-01-22 13:08:00.433867: step: 640/464, loss: 0.07551106810569763 2023-01-22 13:08:01.035789: step: 642/464, loss: 0.0022081949282437563 2023-01-22 13:08:01.622496: step: 644/464, loss: 0.011727227829396725 2023-01-22 13:08:02.243904: step: 646/464, loss: 0.01832597889006138 2023-01-22 13:08:02.858259: step: 648/464, loss: 0.011980734765529633 2023-01-22 13:08:03.473156: step: 650/464, loss: 0.35407641530036926 2023-01-22 13:08:04.065234: step: 652/464, loss: 0.00555523531511426 2023-01-22 13:08:04.757561: step: 654/464, loss: 0.009562639519572258 2023-01-22 13:08:05.348806: step: 656/464, loss: 0.5297197699546814 2023-01-22 13:08:06.004773: step: 658/464, loss: 0.04865730553865433 2023-01-22 13:08:06.565163: step: 660/464, loss: 0.04232815280556679 2023-01-22 13:08:07.164985: step: 662/464, loss: 0.01379953883588314 2023-01-22 13:08:07.727446: step: 664/464, loss: 0.002938771154731512 2023-01-22 13:08:08.389552: step: 666/464, loss: 0.00024933667737059295 2023-01-22 13:08:09.001986: step: 668/464, loss: 0.04576539248228073 2023-01-22 13:08:09.646607: step: 670/464, loss: 0.030819378793239594 2023-01-22 13:08:10.284643: step: 672/464, loss: 0.2565461993217468 2023-01-22 13:08:10.864490: step: 674/464, loss: 0.014622553251683712 2023-01-22 13:08:11.487460: step: 676/464, loss: 0.030066970735788345 2023-01-22 13:08:12.215533: step: 678/464, loss: 0.028679603710770607 2023-01-22 13:08:12.837956: step: 680/464, loss: 0.009739887900650501 2023-01-22 13:08:13.457542: step: 682/464, loss: 0.03857453912496567 2023-01-22 13:08:14.126222: step: 684/464, loss: 0.04997088387608528 2023-01-22 13:08:14.695388: step: 686/464, loss: 0.1312706023454666 2023-01-22 13:08:15.271229: step: 688/464, loss: 0.007372731808573008 2023-01-22 13:08:15.890513: step: 690/464, loss: 0.0016976733459159732 2023-01-22 13:08:16.503988: step: 692/464, loss: 0.04558177664875984 2023-01-22 13:08:17.110494: step: 694/464, loss: 0.011670437641441822 2023-01-22 13:08:17.698983: step: 696/464, loss: 0.04827771708369255 2023-01-22 13:08:18.326539: step: 698/464, loss: 0.0075889453291893005 2023-01-22 13:08:18.988092: step: 700/464, loss: 0.06361132860183716 2023-01-22 13:08:19.546577: step: 702/464, loss: 0.003011963563039899 2023-01-22 13:08:20.210751: step: 704/464, loss: 0.02163800224661827 2023-01-22 13:08:20.821180: step: 706/464, loss: 0.13394685089588165 2023-01-22 13:08:21.430684: step: 708/464, loss: 0.06432411074638367 2023-01-22 13:08:22.080356: step: 710/464, loss: 0.013955528847873211 2023-01-22 13:08:22.665347: step: 712/464, loss: 0.008491216227412224 2023-01-22 13:08:23.216128: step: 714/464, loss: 0.11980558931827545 2023-01-22 13:08:23.822598: step: 716/464, loss: 0.016868501901626587 2023-01-22 13:08:24.418252: step: 718/464, loss: 0.1915367841720581 2023-01-22 13:08:25.072350: step: 720/464, loss: 0.004546544048935175 2023-01-22 13:08:25.640514: step: 722/464, loss: 0.0005246453802101314 2023-01-22 13:08:26.326660: step: 724/464, loss: 0.018939411267638206 2023-01-22 13:08:26.956008: step: 726/464, loss: 0.10874909162521362 2023-01-22 13:08:27.548855: step: 728/464, loss: 0.004177314229309559 2023-01-22 13:08:28.172978: step: 730/464, loss: 0.04618740826845169 2023-01-22 13:08:28.796912: step: 732/464, loss: 0.008233455941081047 2023-01-22 13:08:29.385703: step: 734/464, loss: 0.004605564288794994 2023-01-22 13:08:30.121512: step: 736/464, loss: 0.18734845519065857 2023-01-22 13:08:30.768384: step: 738/464, loss: 0.0029743590857833624 2023-01-22 13:08:31.405151: step: 740/464, loss: 0.0211980901658535 2023-01-22 13:08:32.046896: step: 742/464, loss: 0.02819528616964817 2023-01-22 13:08:32.607688: step: 744/464, loss: 0.05711721256375313 2023-01-22 13:08:33.196064: step: 746/464, loss: 0.04029686003923416 2023-01-22 13:08:33.774834: step: 748/464, loss: 0.049593035131692886 2023-01-22 13:08:34.403962: step: 750/464, loss: 0.025946997106075287 2023-01-22 13:08:35.091560: step: 752/464, loss: 0.01631246879696846 2023-01-22 13:08:35.762731: step: 754/464, loss: 0.08335703611373901 2023-01-22 13:08:36.390865: step: 756/464, loss: 0.017137154936790466 2023-01-22 13:08:36.966795: step: 758/464, loss: 0.03726530447602272 2023-01-22 13:08:37.516699: step: 760/464, loss: 0.011103518307209015 2023-01-22 13:08:38.128947: step: 762/464, loss: 0.025130685418844223 2023-01-22 13:08:38.823125: step: 764/464, loss: 0.021800890564918518 2023-01-22 13:08:39.433576: step: 766/464, loss: 0.015432463027536869 2023-01-22 13:08:40.002785: step: 768/464, loss: 0.4171266555786133 2023-01-22 13:08:40.557996: step: 770/464, loss: 0.0417230986058712 2023-01-22 13:08:41.190338: step: 772/464, loss: 0.0015053371898829937 2023-01-22 13:08:41.816083: step: 774/464, loss: 0.021203722804784775 2023-01-22 13:08:42.431866: step: 776/464, loss: 0.024750908836722374 2023-01-22 13:08:43.026137: step: 778/464, loss: 0.06008352339267731 2023-01-22 13:08:43.580799: step: 780/464, loss: 0.0718572586774826 2023-01-22 13:08:44.216040: step: 782/464, loss: 0.004066292196512222 2023-01-22 13:08:44.852192: step: 784/464, loss: 0.006483875680714846 2023-01-22 13:08:45.453037: step: 786/464, loss: 0.020831121131777763 2023-01-22 13:08:46.091056: step: 788/464, loss: 0.04707172140479088 2023-01-22 13:08:46.680995: step: 790/464, loss: 0.011199343018233776 2023-01-22 13:08:47.294054: step: 792/464, loss: 0.145524799823761 2023-01-22 13:08:47.975594: step: 794/464, loss: 0.0018645271193236113 2023-01-22 13:08:48.598159: step: 796/464, loss: 0.41246137022972107 2023-01-22 13:08:49.219693: step: 798/464, loss: 0.007940721698105335 2023-01-22 13:08:49.904607: step: 800/464, loss: 0.015825096517801285 2023-01-22 13:08:50.526136: step: 802/464, loss: 0.03498406708240509 2023-01-22 13:08:51.161818: step: 804/464, loss: 0.010817916132509708 2023-01-22 13:08:51.799597: step: 806/464, loss: 0.021481124684214592 2023-01-22 13:08:52.379849: step: 808/464, loss: 0.00514302309602499 2023-01-22 13:08:52.986395: step: 810/464, loss: 0.001514037256129086 2023-01-22 13:08:53.594385: step: 812/464, loss: 0.0049459426663815975 2023-01-22 13:08:54.217175: step: 814/464, loss: 0.023048903793096542 2023-01-22 13:08:54.911779: step: 816/464, loss: 0.058704547584056854 2023-01-22 13:08:55.512316: step: 818/464, loss: 0.006693670060485601 2023-01-22 13:08:56.141657: step: 820/464, loss: 0.03921428322792053 2023-01-22 13:08:56.814300: step: 822/464, loss: 0.004509300924837589 2023-01-22 13:08:57.443761: step: 824/464, loss: 0.1113775372505188 2023-01-22 13:08:58.053557: step: 826/464, loss: 0.0070705353282392025 2023-01-22 13:08:58.714319: step: 828/464, loss: 0.008664405904710293 2023-01-22 13:08:59.311892: step: 830/464, loss: 0.0015526512870565057 2023-01-22 13:08:59.945206: step: 832/464, loss: 0.0028195951599627733 2023-01-22 13:09:00.530973: step: 834/464, loss: 0.0036235605366528034 2023-01-22 13:09:01.173875: step: 836/464, loss: 0.015041164122521877 2023-01-22 13:09:01.735265: step: 838/464, loss: 0.06809394806623459 2023-01-22 13:09:02.321390: step: 840/464, loss: 0.048990003764629364 2023-01-22 13:09:02.988474: step: 842/464, loss: 0.013416965492069721 2023-01-22 13:09:03.618224: step: 844/464, loss: 0.11701352894306183 2023-01-22 13:09:04.257091: step: 846/464, loss: 0.15656238794326782 2023-01-22 13:09:04.812236: step: 848/464, loss: 0.7169525623321533 2023-01-22 13:09:05.453899: step: 850/464, loss: 0.01056838221848011 2023-01-22 13:09:06.081919: step: 852/464, loss: 0.012577138841152191 2023-01-22 13:09:06.700255: step: 854/464, loss: 0.060538213700056076 2023-01-22 13:09:07.323499: step: 856/464, loss: 0.02752896212041378 2023-01-22 13:09:07.921186: step: 858/464, loss: 0.02793855592608452 2023-01-22 13:09:08.553921: step: 860/464, loss: 0.40132588148117065 2023-01-22 13:09:09.187074: step: 862/464, loss: 0.06512241810560226 2023-01-22 13:09:09.823243: step: 864/464, loss: 0.003781597362831235 2023-01-22 13:09:10.458768: step: 866/464, loss: 0.06454236805438995 2023-01-22 13:09:11.066324: step: 868/464, loss: 0.014481059275567532 2023-01-22 13:09:11.674378: step: 870/464, loss: 0.021092643961310387 2023-01-22 13:09:12.305723: step: 872/464, loss: 0.010052897967398167 2023-01-22 13:09:12.871374: step: 874/464, loss: 0.08746406435966492 2023-01-22 13:09:13.509112: step: 876/464, loss: 0.011426975019276142 2023-01-22 13:09:14.138444: step: 878/464, loss: 0.009791023097932339 2023-01-22 13:09:14.755813: step: 880/464, loss: 0.11707156896591187 2023-01-22 13:09:15.501187: step: 882/464, loss: 0.07980296015739441 2023-01-22 13:09:16.157553: step: 884/464, loss: 0.03473206236958504 2023-01-22 13:09:16.775786: step: 886/464, loss: 0.012438970617949963 2023-01-22 13:09:17.424393: step: 888/464, loss: 0.01267226878553629 2023-01-22 13:09:18.029031: step: 890/464, loss: 0.002416931791231036 2023-01-22 13:09:18.698356: step: 892/464, loss: 0.001159779028967023 2023-01-22 13:09:19.328307: step: 894/464, loss: 0.0028834762051701546 2023-01-22 13:09:20.013312: step: 896/464, loss: 0.045279476791620255 2023-01-22 13:09:20.632155: step: 898/464, loss: 0.015352551825344563 2023-01-22 13:09:21.261646: step: 900/464, loss: 0.1051325798034668 2023-01-22 13:09:21.907135: step: 902/464, loss: 0.03135009855031967 2023-01-22 13:09:22.535045: step: 904/464, loss: 0.12836480140686035 2023-01-22 13:09:23.124109: step: 906/464, loss: 0.003560137003660202 2023-01-22 13:09:23.841918: step: 908/464, loss: 0.18377456068992615 2023-01-22 13:09:24.434924: step: 910/464, loss: 0.019912661984562874 2023-01-22 13:09:25.066288: step: 912/464, loss: 0.021315833553671837 2023-01-22 13:09:25.722888: step: 914/464, loss: 0.154340922832489 2023-01-22 13:09:26.336076: step: 916/464, loss: 0.006456819362938404 2023-01-22 13:09:26.944159: step: 918/464, loss: 0.02382693812251091 2023-01-22 13:09:27.556583: step: 920/464, loss: 0.033757977187633514 2023-01-22 13:09:28.172686: step: 922/464, loss: 0.0026924435514956713 2023-01-22 13:09:28.762682: step: 924/464, loss: 0.0012073888210579753 2023-01-22 13:09:29.344608: step: 926/464, loss: 0.019931938499212265 2023-01-22 13:09:29.979653: step: 928/464, loss: 0.05094848573207855 2023-01-22 13:09:30.465788: step: 930/464, loss: 0.0012850743951275945 ================================================== Loss: 0.068 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28598333333333337, 'r': 0.33916429475015814, 'f1': 0.3103117766203704}, 'combined': 0.2286507827729045, 'epoch': 27} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2985925999272198, 'r': 0.3133988445517101, 'f1': 0.3058166144415881}, 'combined': 0.1996522975110886, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2800590701219512, 'r': 0.34861242884250476, 'f1': 0.3105980557903635}, 'combined': 0.22886172531921517, 'epoch': 27} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3212256375381405, 'r': 0.3168010419797639, 'f1': 0.31899799788808403}, 'combined': 0.2082577602792673, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29498570261437906, 'r': 0.3425640417457306, 'f1': 0.31699956101843724}, 'combined': 0.23357862390832215, 'epoch': 27} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3087757842520618, 'r': 0.30735808092675393, 'f1': 0.30806530154554534}, 'combined': 0.20112035230434566, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24228395061728392, 'r': 0.37380952380952376, 'f1': 0.2940074906367041}, 'combined': 0.19600499375780273, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26282051282051283, 'r': 0.44565217391304346, 'f1': 0.3306451612903226}, 'combined': 0.1653225806451613, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951249161636486, 'r': 0.3578459609650312, 'f1': 0.3234731070815977}, 'combined': 0.23834860521801934, 'epoch': 22} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29521273154881944, 'r': 0.3185261336729686, 'f1': 0.30642664272956077}, 'combined': 0.20005055432085314, 'epoch': 22} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 22} ****************************** Epoch: 28 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:12:08.836318: step: 2/464, loss: 0.01975341886281967 2023-01-22 13:12:09.411679: step: 4/464, loss: 0.01888517662882805 2023-01-22 13:12:10.087984: step: 6/464, loss: 0.011520719155669212 2023-01-22 13:12:10.745096: step: 8/464, loss: 0.6255261301994324 2023-01-22 13:12:11.336583: step: 10/464, loss: 0.03990140184760094 2023-01-22 13:12:11.983959: step: 12/464, loss: 0.025017209351062775 2023-01-22 13:12:12.674083: step: 14/464, loss: 0.026359407231211662 2023-01-22 13:12:13.326388: step: 16/464, loss: 0.005490477662533522 2023-01-22 13:12:13.954578: step: 18/464, loss: 0.053964029997587204 2023-01-22 13:12:14.590025: step: 20/464, loss: 0.008139424957334995 2023-01-22 13:12:15.197710: step: 22/464, loss: 0.016445273533463478 2023-01-22 13:12:15.793387: step: 24/464, loss: 0.041273459792137146 2023-01-22 13:12:16.428704: step: 26/464, loss: 0.0012901576701551676 2023-01-22 13:12:17.090431: step: 28/464, loss: 0.014636056497693062 2023-01-22 13:12:17.734481: step: 30/464, loss: 0.04501689225435257 2023-01-22 13:12:18.397214: step: 32/464, loss: 0.05421580374240875 2023-01-22 13:12:18.969725: step: 34/464, loss: 0.0032574422657489777 2023-01-22 13:12:19.578403: step: 36/464, loss: 0.050775010138750076 2023-01-22 13:12:20.166012: step: 38/464, loss: 0.0015177004970610142 2023-01-22 13:12:20.804999: step: 40/464, loss: 0.0023237792775034904 2023-01-22 13:12:21.383779: step: 42/464, loss: 0.06346136331558228 2023-01-22 13:12:21.936377: step: 44/464, loss: 0.0019482603529468179 2023-01-22 13:12:22.555967: step: 46/464, loss: 0.021754002198576927 2023-01-22 13:12:23.110895: step: 48/464, loss: 0.10162035375833511 2023-01-22 13:12:23.760115: step: 50/464, loss: 0.13380345702171326 2023-01-22 13:12:24.359376: step: 52/464, loss: 0.005131959915161133 2023-01-22 13:12:24.935853: step: 54/464, loss: 6.972807022975758e-05 2023-01-22 13:12:25.544361: step: 56/464, loss: 0.063968226313591 2023-01-22 13:12:26.139459: step: 58/464, loss: 0.05545021966099739 2023-01-22 13:12:26.699229: step: 60/464, loss: 0.00356881832703948 2023-01-22 13:12:27.309186: step: 62/464, loss: 0.03784513846039772 2023-01-22 13:12:27.939509: step: 64/464, loss: 0.03425294905900955 2023-01-22 13:12:28.533504: step: 66/464, loss: 0.10109174996614456 2023-01-22 13:12:29.186346: step: 68/464, loss: 0.013180552050471306 2023-01-22 13:12:29.859953: step: 70/464, loss: 0.0019827696960419416 2023-01-22 13:12:30.492456: step: 72/464, loss: 0.01931658200919628 2023-01-22 13:12:31.267369: step: 74/464, loss: 0.05979328230023384 2023-01-22 13:12:31.914944: step: 76/464, loss: 0.059981685131788254 2023-01-22 13:12:32.562982: step: 78/464, loss: 0.027963552623987198 2023-01-22 13:12:33.209946: step: 80/464, loss: 0.01616048812866211 2023-01-22 13:12:33.758011: step: 82/464, loss: 0.014934813603758812 2023-01-22 13:12:34.331196: step: 84/464, loss: 0.014981115236878395 2023-01-22 13:12:35.014412: step: 86/464, loss: 0.07403749227523804 2023-01-22 13:12:35.766893: step: 88/464, loss: 0.07628615945577621 2023-01-22 13:12:36.465184: step: 90/464, loss: 0.05683693662285805 2023-01-22 13:12:37.073780: step: 92/464, loss: 0.014785525389015675 2023-01-22 13:12:37.804724: step: 94/464, loss: 0.0014446486020460725 2023-01-22 13:12:38.371035: step: 96/464, loss: 0.01637359708547592 2023-01-22 13:12:39.004684: step: 98/464, loss: 0.5492805242538452 2023-01-22 13:12:39.591153: step: 100/464, loss: 0.04689208045601845 2023-01-22 13:12:40.265896: step: 102/464, loss: 0.017917821183800697 2023-01-22 13:12:40.863257: step: 104/464, loss: 0.18207520246505737 2023-01-22 13:12:41.525757: step: 106/464, loss: 0.028904229402542114 2023-01-22 13:12:42.108828: step: 108/464, loss: 0.08580604940652847 2023-01-22 13:12:42.693656: step: 110/464, loss: 0.06547223031520844 2023-01-22 13:12:43.301738: step: 112/464, loss: 0.012034471146762371 2023-01-22 13:12:43.874552: step: 114/464, loss: 0.011492324993014336 2023-01-22 13:12:44.438044: step: 116/464, loss: 0.012730980291962624 2023-01-22 13:12:45.084354: step: 118/464, loss: 0.01702752150595188 2023-01-22 13:12:45.697184: step: 120/464, loss: 0.003987180069088936 2023-01-22 13:12:46.300746: step: 122/464, loss: 0.01272681262344122 2023-01-22 13:12:46.906832: step: 124/464, loss: 0.003189630573615432 2023-01-22 13:12:47.529135: step: 126/464, loss: 0.012792550027370453 2023-01-22 13:12:48.107746: step: 128/464, loss: 0.033105701208114624 2023-01-22 13:12:48.783879: step: 130/464, loss: 0.3838423490524292 2023-01-22 13:12:49.426493: step: 132/464, loss: 0.012216437608003616 2023-01-22 13:12:50.146353: step: 134/464, loss: 0.1442188322544098 2023-01-22 13:12:50.794430: step: 136/464, loss: 0.027723701670765877 2023-01-22 13:12:51.378159: step: 138/464, loss: 0.027388494461774826 2023-01-22 13:12:52.035327: step: 140/464, loss: 0.028884828090667725 2023-01-22 13:12:52.669692: step: 142/464, loss: 0.002762697171419859 2023-01-22 13:12:53.329499: step: 144/464, loss: 0.017410848289728165 2023-01-22 13:12:53.979154: step: 146/464, loss: 0.24691028892993927 2023-01-22 13:12:54.586519: step: 148/464, loss: 0.016644123941659927 2023-01-22 13:12:55.133358: step: 150/464, loss: 0.0072594149969518185 2023-01-22 13:12:55.854168: step: 152/464, loss: 0.02167939767241478 2023-01-22 13:12:56.402163: step: 154/464, loss: 0.00871216133236885 2023-01-22 13:12:56.969910: step: 156/464, loss: 0.0033676326274871826 2023-01-22 13:12:57.643507: step: 158/464, loss: 0.20985104143619537 2023-01-22 13:12:58.272098: step: 160/464, loss: 0.005667650140821934 2023-01-22 13:12:58.890051: step: 162/464, loss: 0.0029226827900856733 2023-01-22 13:12:59.466966: step: 164/464, loss: 0.07242726534605026 2023-01-22 13:13:00.131349: step: 166/464, loss: 0.34875330328941345 2023-01-22 13:13:00.789440: step: 168/464, loss: 0.008606811985373497 2023-01-22 13:13:01.444223: step: 170/464, loss: 0.008142728358507156 2023-01-22 13:13:02.061682: step: 172/464, loss: 0.007337086368352175 2023-01-22 13:13:02.678458: step: 174/464, loss: 0.005187767092138529 2023-01-22 13:13:03.331556: step: 176/464, loss: 0.02520335279405117 2023-01-22 13:13:03.942025: step: 178/464, loss: 0.007265692111104727 2023-01-22 13:13:04.584484: step: 180/464, loss: 0.010373773984611034 2023-01-22 13:13:05.222808: step: 182/464, loss: 0.046069592237472534 2023-01-22 13:13:05.844881: step: 184/464, loss: 0.026207102462649345 2023-01-22 13:13:06.465618: step: 186/464, loss: 0.018650302663445473 2023-01-22 13:13:07.119408: step: 188/464, loss: 0.0171427670866251 2023-01-22 13:13:07.705901: step: 190/464, loss: 0.03326038271188736 2023-01-22 13:13:08.307905: step: 192/464, loss: 0.01100210566073656 2023-01-22 13:13:09.027345: step: 194/464, loss: 0.032505106180906296 2023-01-22 13:13:09.625156: step: 196/464, loss: 0.01049841195344925 2023-01-22 13:13:10.270609: step: 198/464, loss: 0.04443327710032463 2023-01-22 13:13:10.853282: step: 200/464, loss: 0.0108529943972826 2023-01-22 13:13:11.489581: step: 202/464, loss: 0.008697934448719025 2023-01-22 13:13:12.105980: step: 204/464, loss: 0.02991078607738018 2023-01-22 13:13:12.743562: step: 206/464, loss: 0.017947711050510406 2023-01-22 13:13:13.342615: step: 208/464, loss: 0.028416339308023453 2023-01-22 13:13:13.976870: step: 210/464, loss: 0.027255281805992126 2023-01-22 13:13:14.572672: step: 212/464, loss: 0.04304584860801697 2023-01-22 13:13:15.266225: step: 214/464, loss: 0.02375870756804943 2023-01-22 13:13:15.872560: step: 216/464, loss: 0.04932486638426781 2023-01-22 13:13:16.472479: step: 218/464, loss: 0.018953483551740646 2023-01-22 13:13:17.072542: step: 220/464, loss: 0.025900574401021004 2023-01-22 13:13:17.728888: step: 222/464, loss: 0.004785214085131884 2023-01-22 13:13:18.315530: step: 224/464, loss: 0.007400230038911104 2023-01-22 13:13:18.978246: step: 226/464, loss: 0.04882337525486946 2023-01-22 13:13:19.588927: step: 228/464, loss: 0.000596017693169415 2023-01-22 13:13:20.242936: step: 230/464, loss: 0.042570240795612335 2023-01-22 13:13:20.865288: step: 232/464, loss: 0.056120481342077255 2023-01-22 13:13:21.452986: step: 234/464, loss: 0.003333766246214509 2023-01-22 13:13:22.063675: step: 236/464, loss: 0.02945108525454998 2023-01-22 13:13:22.665239: step: 238/464, loss: 0.02998744510114193 2023-01-22 13:13:23.314104: step: 240/464, loss: 0.24635662138462067 2023-01-22 13:13:23.892704: step: 242/464, loss: 0.032885901629924774 2023-01-22 13:13:24.526903: step: 244/464, loss: 0.015478289686143398 2023-01-22 13:13:25.090671: step: 246/464, loss: 0.009795871563255787 2023-01-22 13:13:25.716845: step: 248/464, loss: 0.03254992142319679 2023-01-22 13:13:26.345816: step: 250/464, loss: 0.0371343195438385 2023-01-22 13:13:26.964389: step: 252/464, loss: 0.02613062411546707 2023-01-22 13:13:27.591540: step: 254/464, loss: 0.028173061087727547 2023-01-22 13:13:28.176887: step: 256/464, loss: 0.010123823769390583 2023-01-22 13:13:28.733857: step: 258/464, loss: 0.007183664478361607 2023-01-22 13:13:29.310688: step: 260/464, loss: 0.014079362154006958 2023-01-22 13:13:29.941961: step: 262/464, loss: 0.009176979772746563 2023-01-22 13:13:30.532167: step: 264/464, loss: 0.06820368766784668 2023-01-22 13:13:31.128639: step: 266/464, loss: 0.009957423433661461 2023-01-22 13:13:31.750599: step: 268/464, loss: 0.01703375019133091 2023-01-22 13:13:32.377563: step: 270/464, loss: 0.05943138897418976 2023-01-22 13:13:33.006315: step: 272/464, loss: 0.02275952324271202 2023-01-22 13:13:33.635286: step: 274/464, loss: 0.03507295250892639 2023-01-22 13:13:34.249574: step: 276/464, loss: 0.4227091073989868 2023-01-22 13:13:34.870326: step: 278/464, loss: 0.023873407393693924 2023-01-22 13:13:35.493683: step: 280/464, loss: 0.020889200270175934 2023-01-22 13:13:36.132991: step: 282/464, loss: 0.006823091302067041 2023-01-22 13:13:36.684760: step: 284/464, loss: 0.02021654322743416 2023-01-22 13:13:37.346267: step: 286/464, loss: 0.019388005137443542 2023-01-22 13:13:37.952173: step: 288/464, loss: 0.04393423721194267 2023-01-22 13:13:38.530269: step: 290/464, loss: 0.1124618798494339 2023-01-22 13:13:39.186411: step: 292/464, loss: 0.06299597769975662 2023-01-22 13:13:39.890879: step: 294/464, loss: 0.005311224143952131 2023-01-22 13:13:40.445943: step: 296/464, loss: 0.013554797507822514 2023-01-22 13:13:41.009143: step: 298/464, loss: 0.051145315170288086 2023-01-22 13:13:41.555921: step: 300/464, loss: 0.003098517656326294 2023-01-22 13:13:42.176603: step: 302/464, loss: 0.009821097366511822 2023-01-22 13:13:42.787417: step: 304/464, loss: 0.02515912801027298 2023-01-22 13:13:43.347638: step: 306/464, loss: 0.004545064643025398 2023-01-22 13:13:43.959722: step: 308/464, loss: 0.020346971228718758 2023-01-22 13:13:44.589926: step: 310/464, loss: 0.03274424001574516 2023-01-22 13:13:45.213903: step: 312/464, loss: 0.012141672894358635 2023-01-22 13:13:45.829444: step: 314/464, loss: 0.0037430974189192057 2023-01-22 13:13:46.406186: step: 316/464, loss: 0.010012580081820488 2023-01-22 13:13:47.017596: step: 318/464, loss: 0.23996806144714355 2023-01-22 13:13:47.597122: step: 320/464, loss: 0.0048306286334991455 2023-01-22 13:13:48.188972: step: 322/464, loss: 0.028304725885391235 2023-01-22 13:13:48.813179: step: 324/464, loss: 0.007011666893959045 2023-01-22 13:13:49.475795: step: 326/464, loss: 0.026433467864990234 2023-01-22 13:13:50.103701: step: 328/464, loss: 0.014295603148639202 2023-01-22 13:13:50.735247: step: 330/464, loss: 0.0036247014068067074 2023-01-22 13:13:51.345261: step: 332/464, loss: 0.03816038370132446 2023-01-22 13:13:51.951591: step: 334/464, loss: 0.32058051228523254 2023-01-22 13:13:52.573460: step: 336/464, loss: 0.006642151158303022 2023-01-22 13:13:53.210557: step: 338/464, loss: 0.09794043749570847 2023-01-22 13:13:53.793233: step: 340/464, loss: 0.04090559482574463 2023-01-22 13:13:54.355616: step: 342/464, loss: 0.010088179260492325 2023-01-22 13:13:54.954969: step: 344/464, loss: 0.0022901766933500767 2023-01-22 13:13:55.586937: step: 346/464, loss: 0.030262116342782974 2023-01-22 13:13:56.230361: step: 348/464, loss: 0.024959390982985497 2023-01-22 13:13:56.905284: step: 350/464, loss: 0.038163553923368454 2023-01-22 13:13:57.501211: step: 352/464, loss: 0.03988020494580269 2023-01-22 13:13:58.074575: step: 354/464, loss: 0.00906051229685545 2023-01-22 13:13:58.723391: step: 356/464, loss: 0.17054124176502228 2023-01-22 13:13:59.338648: step: 358/464, loss: 0.018445579335093498 2023-01-22 13:13:59.965150: step: 360/464, loss: 0.01864020898938179 2023-01-22 13:14:00.621028: step: 362/464, loss: 0.003797942539677024 2023-01-22 13:14:01.217320: step: 364/464, loss: 0.04716122895479202 2023-01-22 13:14:01.813916: step: 366/464, loss: 0.020112771540880203 2023-01-22 13:14:02.459986: step: 368/464, loss: 0.15953640639781952 2023-01-22 13:14:03.125958: step: 370/464, loss: 0.018856249749660492 2023-01-22 13:14:03.799192: step: 372/464, loss: 0.01189302746206522 2023-01-22 13:14:04.383689: step: 374/464, loss: 0.014004247263073921 2023-01-22 13:14:05.042917: step: 376/464, loss: 0.05325167998671532 2023-01-22 13:14:05.653982: step: 378/464, loss: 0.06514342874288559 2023-01-22 13:14:06.287853: step: 380/464, loss: 0.03061126358807087 2023-01-22 13:14:06.947860: step: 382/464, loss: 0.11431025713682175 2023-01-22 13:14:07.504338: step: 384/464, loss: 0.020242024213075638 2023-01-22 13:14:08.077041: step: 386/464, loss: 0.005004480481147766 2023-01-22 13:14:08.723424: step: 388/464, loss: 0.004809098783880472 2023-01-22 13:14:09.389456: step: 390/464, loss: 0.06833072751760483 2023-01-22 13:14:09.979524: step: 392/464, loss: 0.05740839242935181 2023-01-22 13:14:10.563617: step: 394/464, loss: 0.013233033008873463 2023-01-22 13:14:11.231249: step: 396/464, loss: 0.04136299714446068 2023-01-22 13:14:11.816549: step: 398/464, loss: 0.06713368743658066 2023-01-22 13:14:12.480735: step: 400/464, loss: 0.053051579743623734 2023-01-22 13:14:13.085609: step: 402/464, loss: 0.034219738095998764 2023-01-22 13:14:13.709656: step: 404/464, loss: 0.017612462863326073 2023-01-22 13:14:14.380590: step: 406/464, loss: 0.00702511565759778 2023-01-22 13:14:15.051216: step: 408/464, loss: 0.003682539099827409 2023-01-22 13:14:15.664560: step: 410/464, loss: 0.013064548373222351 2023-01-22 13:14:16.282804: step: 412/464, loss: 0.0051510087214410305 2023-01-22 13:14:16.969987: step: 414/464, loss: 0.01193144265562296 2023-01-22 13:14:17.646918: step: 416/464, loss: 8.737286567687988 2023-01-22 13:14:18.235951: step: 418/464, loss: 0.04521564766764641 2023-01-22 13:14:18.886188: step: 420/464, loss: 0.08151457458734512 2023-01-22 13:14:19.446190: step: 422/464, loss: 0.09461862593889236 2023-01-22 13:14:20.057870: step: 424/464, loss: 0.0011021374957635999 2023-01-22 13:14:20.715543: step: 426/464, loss: 0.1207464188337326 2023-01-22 13:14:21.368350: step: 428/464, loss: 0.008433245122432709 2023-01-22 13:14:22.019950: step: 430/464, loss: 0.06080570071935654 2023-01-22 13:14:22.758562: step: 432/464, loss: 0.06965363770723343 2023-01-22 13:14:23.465052: step: 434/464, loss: 0.03309911489486694 2023-01-22 13:14:24.112462: step: 436/464, loss: 0.012495990842580795 2023-01-22 13:14:24.680242: step: 438/464, loss: 0.019360091537237167 2023-01-22 13:14:25.254601: step: 440/464, loss: 0.27905771136283875 2023-01-22 13:14:25.836260: step: 442/464, loss: 0.0017130867345258594 2023-01-22 13:14:26.495429: step: 444/464, loss: 0.10627894848585129 2023-01-22 13:14:27.142864: step: 446/464, loss: 0.006129761692136526 2023-01-22 13:14:27.808742: step: 448/464, loss: 0.018852759152650833 2023-01-22 13:14:28.494347: step: 450/464, loss: 0.015564347617328167 2023-01-22 13:14:29.110180: step: 452/464, loss: 0.06356043368577957 2023-01-22 13:14:29.652273: step: 454/464, loss: 0.02592889964580536 2023-01-22 13:14:30.287717: step: 456/464, loss: 0.007412649691104889 2023-01-22 13:14:30.886219: step: 458/464, loss: 0.027132531628012657 2023-01-22 13:14:31.502379: step: 460/464, loss: 0.0023493319749832153 2023-01-22 13:14:32.081255: step: 462/464, loss: 0.0009935208363458514 2023-01-22 13:14:32.740745: step: 464/464, loss: 0.0038652773946523666 2023-01-22 13:14:33.370220: step: 466/464, loss: 0.002001130022108555 2023-01-22 13:14:33.974622: step: 468/464, loss: 0.009971032850444317 2023-01-22 13:14:34.529479: step: 470/464, loss: 0.027622388675808907 2023-01-22 13:14:35.150711: step: 472/464, loss: 0.014356517232954502 2023-01-22 13:14:35.790425: step: 474/464, loss: 0.06259731203317642 2023-01-22 13:14:36.401472: step: 476/464, loss: 0.007335342466831207 2023-01-22 13:14:37.049220: step: 478/464, loss: 0.03115548938512802 2023-01-22 13:14:37.704201: step: 480/464, loss: 0.023620828986167908 2023-01-22 13:14:38.368982: step: 482/464, loss: 0.00033822975819930434 2023-01-22 13:14:39.026623: step: 484/464, loss: 0.01832190714776516 2023-01-22 13:14:39.578827: step: 486/464, loss: 0.000453435379313305 2023-01-22 13:14:40.166955: step: 488/464, loss: 0.05982121825218201 2023-01-22 13:14:40.830697: step: 490/464, loss: 0.23867210745811462 2023-01-22 13:14:41.504641: step: 492/464, loss: 0.037391725927591324 2023-01-22 13:14:42.162151: step: 494/464, loss: 0.06826174259185791 2023-01-22 13:14:42.851889: step: 496/464, loss: 0.025316348299384117 2023-01-22 13:14:43.450326: step: 498/464, loss: 0.06300205737352371 2023-01-22 13:14:44.137416: step: 500/464, loss: 0.015299019403755665 2023-01-22 13:14:44.742512: step: 502/464, loss: 0.009448371827602386 2023-01-22 13:14:45.345767: step: 504/464, loss: 0.07433760166168213 2023-01-22 13:14:45.985430: step: 506/464, loss: 0.013551932759582996 2023-01-22 13:14:46.597999: step: 508/464, loss: 0.0019346019253134727 2023-01-22 13:14:47.256431: step: 510/464, loss: 0.013648522086441517 2023-01-22 13:14:47.901546: step: 512/464, loss: 0.03566650673747063 2023-01-22 13:14:48.576828: step: 514/464, loss: 0.014076773077249527 2023-01-22 13:14:49.207301: step: 516/464, loss: 0.0006180580821819603 2023-01-22 13:14:49.841488: step: 518/464, loss: 0.03681022673845291 2023-01-22 13:14:50.466272: step: 520/464, loss: 0.0250330101698637 2023-01-22 13:14:51.079571: step: 522/464, loss: 0.038896046578884125 2023-01-22 13:14:51.681551: step: 524/464, loss: 0.006692049093544483 2023-01-22 13:14:52.292867: step: 526/464, loss: 0.07458885759115219 2023-01-22 13:14:52.900732: step: 528/464, loss: 0.041242264211177826 2023-01-22 13:14:53.536861: step: 530/464, loss: 0.20899175107479095 2023-01-22 13:14:54.228620: step: 532/464, loss: 0.04120601713657379 2023-01-22 13:14:54.937451: step: 534/464, loss: 0.0675627663731575 2023-01-22 13:14:55.521184: step: 536/464, loss: 0.00530249485746026 2023-01-22 13:14:56.156498: step: 538/464, loss: 0.05314037576317787 2023-01-22 13:14:56.711393: step: 540/464, loss: 0.007039431016892195 2023-01-22 13:14:57.389413: step: 542/464, loss: 0.002162415534257889 2023-01-22 13:14:57.991075: step: 544/464, loss: 0.03981251269578934 2023-01-22 13:14:58.599343: step: 546/464, loss: 0.013181759044528008 2023-01-22 13:14:59.177524: step: 548/464, loss: 0.017265036702156067 2023-01-22 13:14:59.817652: step: 550/464, loss: 0.0014363001100718975 2023-01-22 13:15:00.482577: step: 552/464, loss: 0.05713506042957306 2023-01-22 13:15:01.122083: step: 554/464, loss: 0.1750723123550415 2023-01-22 13:15:01.708421: step: 556/464, loss: 0.0009091845713555813 2023-01-22 13:15:02.342276: step: 558/464, loss: 0.01886255480349064 2023-01-22 13:15:02.901356: step: 560/464, loss: 0.0021657454781234264 2023-01-22 13:15:03.528914: step: 562/464, loss: 0.029084540903568268 2023-01-22 13:15:04.128822: step: 564/464, loss: 0.03066820092499256 2023-01-22 13:15:04.792297: step: 566/464, loss: 0.021754052489995956 2023-01-22 13:15:05.343475: step: 568/464, loss: 0.0076250056736171246 2023-01-22 13:15:05.984199: step: 570/464, loss: 0.09450612962245941 2023-01-22 13:15:06.656296: step: 572/464, loss: 0.031405095010995865 2023-01-22 13:15:07.185340: step: 574/464, loss: 0.02798873744904995 2023-01-22 13:15:07.752815: step: 576/464, loss: 0.11750727891921997 2023-01-22 13:15:08.328340: step: 578/464, loss: 0.022053493186831474 2023-01-22 13:15:08.948721: step: 580/464, loss: 0.0028090430423617363 2023-01-22 13:15:09.630124: step: 582/464, loss: 0.001743658329360187 2023-01-22 13:15:10.185039: step: 584/464, loss: 0.12413953244686127 2023-01-22 13:15:10.787516: step: 586/464, loss: 0.07030326128005981 2023-01-22 13:15:11.477699: step: 588/464, loss: 0.12284321337938309 2023-01-22 13:15:12.095500: step: 590/464, loss: 0.03481021523475647 2023-01-22 13:15:12.748199: step: 592/464, loss: 1.2914907932281494 2023-01-22 13:15:13.374764: step: 594/464, loss: 0.00812312588095665 2023-01-22 13:15:13.906078: step: 596/464, loss: 0.0014134242665022612 2023-01-22 13:15:14.567472: step: 598/464, loss: 0.020025134086608887 2023-01-22 13:15:15.162935: step: 600/464, loss: 0.06840483099222183 2023-01-22 13:15:15.836183: step: 602/464, loss: 0.10379452258348465 2023-01-22 13:15:16.502777: step: 604/464, loss: 0.01580253802239895 2023-01-22 13:15:17.125939: step: 606/464, loss: 0.051684606820344925 2023-01-22 13:15:17.770979: step: 608/464, loss: 0.21527676284313202 2023-01-22 13:15:18.477085: step: 610/464, loss: 0.013423663564026356 2023-01-22 13:15:19.013990: step: 612/464, loss: 0.005301930010318756 2023-01-22 13:15:19.775453: step: 614/464, loss: 0.27966079115867615 2023-01-22 13:15:20.426580: step: 616/464, loss: 0.07844717800617218 2023-01-22 13:15:21.076917: step: 618/464, loss: 0.015785658732056618 2023-01-22 13:15:21.742775: step: 620/464, loss: 0.05178140103816986 2023-01-22 13:15:22.401694: step: 622/464, loss: 0.003366190241649747 2023-01-22 13:15:23.070358: step: 624/464, loss: 0.2321268618106842 2023-01-22 13:15:23.691258: step: 626/464, loss: 0.04104392230510712 2023-01-22 13:15:24.319378: step: 628/464, loss: 0.019443267956376076 2023-01-22 13:15:24.888417: step: 630/464, loss: 0.026274191215634346 2023-01-22 13:15:25.560364: step: 632/464, loss: 0.026117945089936256 2023-01-22 13:15:26.211397: step: 634/464, loss: 0.003952042665332556 2023-01-22 13:15:26.783560: step: 636/464, loss: 0.0019362128805369139 2023-01-22 13:15:27.372071: step: 638/464, loss: 0.05736561492085457 2023-01-22 13:15:27.992374: step: 640/464, loss: 0.42328062653541565 2023-01-22 13:15:28.579604: step: 642/464, loss: 0.03586931154131889 2023-01-22 13:15:29.297650: step: 644/464, loss: 0.0722898617386818 2023-01-22 13:15:29.973770: step: 646/464, loss: 0.00392839265987277 2023-01-22 13:15:30.647626: step: 648/464, loss: 0.015032708644866943 2023-01-22 13:15:31.317670: step: 650/464, loss: 0.008987652137875557 2023-01-22 13:15:31.924229: step: 652/464, loss: 0.02345702238380909 2023-01-22 13:15:32.505831: step: 654/464, loss: 0.000292833661660552 2023-01-22 13:15:33.132618: step: 656/464, loss: 0.00433304812759161 2023-01-22 13:15:33.718870: step: 658/464, loss: 0.02030690759420395 2023-01-22 13:15:34.369671: step: 660/464, loss: 0.005063401069492102 2023-01-22 13:15:35.048766: step: 662/464, loss: 0.01505844946950674 2023-01-22 13:15:35.654399: step: 664/464, loss: 0.002417447743937373 2023-01-22 13:15:36.317705: step: 666/464, loss: 0.1500273495912552 2023-01-22 13:15:36.922394: step: 668/464, loss: 0.03942999243736267 2023-01-22 13:15:37.585880: step: 670/464, loss: 0.03954707458615303 2023-01-22 13:15:38.226237: step: 672/464, loss: 0.0012041820446029305 2023-01-22 13:15:38.829419: step: 674/464, loss: 0.016088049858808517 2023-01-22 13:15:39.483366: step: 676/464, loss: 0.019116047769784927 2023-01-22 13:15:40.157414: step: 678/464, loss: 0.03320910409092903 2023-01-22 13:15:40.746897: step: 680/464, loss: 0.004189823288470507 2023-01-22 13:15:41.414920: step: 682/464, loss: 0.18053016066551208 2023-01-22 13:15:42.000182: step: 684/464, loss: 0.009326234459877014 2023-01-22 13:15:42.590085: step: 686/464, loss: 0.012497087940573692 2023-01-22 13:15:43.198466: step: 688/464, loss: 0.10844147205352783 2023-01-22 13:15:43.761884: step: 690/464, loss: 0.008908872492611408 2023-01-22 13:15:44.371391: step: 692/464, loss: 0.020197900012135506 2023-01-22 13:15:44.979861: step: 694/464, loss: 0.041542936116456985 2023-01-22 13:15:45.593036: step: 696/464, loss: 0.010704525746405125 2023-01-22 13:15:46.231247: step: 698/464, loss: 0.0005475578946061432 2023-01-22 13:15:46.883131: step: 700/464, loss: 0.05417340248823166 2023-01-22 13:15:47.533245: step: 702/464, loss: 0.014135652221739292 2023-01-22 13:15:48.234955: step: 704/464, loss: 0.018218714743852615 2023-01-22 13:15:48.880765: step: 706/464, loss: 0.030065085738897324 2023-01-22 13:15:49.503114: step: 708/464, loss: 0.0010269619524478912 2023-01-22 13:15:50.135616: step: 710/464, loss: 0.0035107091534882784 2023-01-22 13:15:50.843096: step: 712/464, loss: 0.010718021541833878 2023-01-22 13:15:51.492633: step: 714/464, loss: 0.01341515127569437 2023-01-22 13:15:52.129534: step: 716/464, loss: 0.029262878000736237 2023-01-22 13:15:52.759872: step: 718/464, loss: 0.02531573548913002 2023-01-22 13:15:53.355582: step: 720/464, loss: 0.01895340532064438 2023-01-22 13:15:53.965452: step: 722/464, loss: 0.023700516670942307 2023-01-22 13:15:54.578381: step: 724/464, loss: 0.00893034040927887 2023-01-22 13:15:55.144559: step: 726/464, loss: 0.09975457936525345 2023-01-22 13:15:55.778068: step: 728/464, loss: 0.07586190849542618 2023-01-22 13:15:56.420022: step: 730/464, loss: 0.07693785429000854 2023-01-22 13:15:56.991186: step: 732/464, loss: 0.02519100159406662 2023-01-22 13:15:57.631555: step: 734/464, loss: 0.016743384301662445 2023-01-22 13:15:58.252853: step: 736/464, loss: 0.02917795442044735 2023-01-22 13:15:58.881685: step: 738/464, loss: 0.01388038881123066 2023-01-22 13:15:59.520929: step: 740/464, loss: 0.0042625125497579575 2023-01-22 13:16:00.153179: step: 742/464, loss: 0.0025034870486706495 2023-01-22 13:16:00.792895: step: 744/464, loss: 0.11787613481283188 2023-01-22 13:16:01.383534: step: 746/464, loss: 0.014147958718240261 2023-01-22 13:16:02.018762: step: 748/464, loss: 0.003653917694464326 2023-01-22 13:16:02.612156: step: 750/464, loss: 0.03221333026885986 2023-01-22 13:16:03.252214: step: 752/464, loss: 0.021750828251242638 2023-01-22 13:16:03.857192: step: 754/464, loss: 0.0009350177133455873 2023-01-22 13:16:04.477221: step: 756/464, loss: 0.06062895804643631 2023-01-22 13:16:05.072982: step: 758/464, loss: 0.030687013640999794 2023-01-22 13:16:05.745766: step: 760/464, loss: 0.016679823398590088 2023-01-22 13:16:06.371888: step: 762/464, loss: 0.0034643004182726145 2023-01-22 13:16:07.118905: step: 764/464, loss: 0.0504496768116951 2023-01-22 13:16:07.714007: step: 766/464, loss: 0.018822191283106804 2023-01-22 13:16:08.308107: step: 768/464, loss: 0.045075658708810806 2023-01-22 13:16:08.834227: step: 770/464, loss: 0.07747234404087067 2023-01-22 13:16:09.406453: step: 772/464, loss: 0.002193465130403638 2023-01-22 13:16:10.030405: step: 774/464, loss: 0.0380885973572731 2023-01-22 13:16:10.631103: step: 776/464, loss: 0.0033210322726517916 2023-01-22 13:16:11.240518: step: 778/464, loss: 0.027617856860160828 2023-01-22 13:16:11.830090: step: 780/464, loss: 0.04869770258665085 2023-01-22 13:16:12.447599: step: 782/464, loss: 0.037156715989112854 2023-01-22 13:16:13.056428: step: 784/464, loss: 0.046691734343767166 2023-01-22 13:16:13.620645: step: 786/464, loss: 0.013460393995046616 2023-01-22 13:16:14.222203: step: 788/464, loss: 0.040278829634189606 2023-01-22 13:16:14.888553: step: 790/464, loss: 0.09691322594881058 2023-01-22 13:16:15.486205: step: 792/464, loss: 0.03089674934744835 2023-01-22 13:16:16.056623: step: 794/464, loss: 0.002035632263869047 2023-01-22 13:16:16.669713: step: 796/464, loss: 0.013593480922281742 2023-01-22 13:16:17.292367: step: 798/464, loss: 0.0064804041758179665 2023-01-22 13:16:17.924661: step: 800/464, loss: 0.016399724408984184 2023-01-22 13:16:18.517141: step: 802/464, loss: 0.009268310852348804 2023-01-22 13:16:19.158641: step: 804/464, loss: 0.014182067476212978 2023-01-22 13:16:19.756044: step: 806/464, loss: 0.0030377162620425224 2023-01-22 13:16:20.325095: step: 808/464, loss: 0.015894349664449692 2023-01-22 13:16:20.953276: step: 810/464, loss: 0.003473340068012476 2023-01-22 13:16:21.571703: step: 812/464, loss: 0.02591157890856266 2023-01-22 13:16:22.197613: step: 814/464, loss: 0.058492448180913925 2023-01-22 13:16:22.976692: step: 816/464, loss: 0.24168823659420013 2023-01-22 13:16:23.606302: step: 818/464, loss: 0.00014129285409580916 2023-01-22 13:16:24.280975: step: 820/464, loss: 0.015311875380575657 2023-01-22 13:16:24.865584: step: 822/464, loss: 0.14531171321868896 2023-01-22 13:16:25.478348: step: 824/464, loss: 0.01077133696526289 2023-01-22 13:16:26.042352: step: 826/464, loss: 0.003518162528052926 2023-01-22 13:16:26.691088: step: 828/464, loss: 0.004680001176893711 2023-01-22 13:16:27.301962: step: 830/464, loss: 0.7380512952804565 2023-01-22 13:16:28.050952: step: 832/464, loss: 0.009875763207674026 2023-01-22 13:16:28.709130: step: 834/464, loss: 0.00943849328905344 2023-01-22 13:16:29.338997: step: 836/464, loss: 0.03605659678578377 2023-01-22 13:16:29.944872: step: 838/464, loss: 0.049411047250032425 2023-01-22 13:16:30.582599: step: 840/464, loss: 0.012199982069432735 2023-01-22 13:16:31.172707: step: 842/464, loss: 0.03667812421917915 2023-01-22 13:16:31.807052: step: 844/464, loss: 0.026908107101917267 2023-01-22 13:16:32.345747: step: 846/464, loss: 0.02269853465259075 2023-01-22 13:16:32.954120: step: 848/464, loss: 0.0023813126608729362 2023-01-22 13:16:33.547051: step: 850/464, loss: 0.0199296772480011 2023-01-22 13:16:34.178073: step: 852/464, loss: 0.02812843583524227 2023-01-22 13:16:34.855129: step: 854/464, loss: 0.033090393990278244 2023-01-22 13:16:35.456764: step: 856/464, loss: 0.0023359793704003096 2023-01-22 13:16:36.038752: step: 858/464, loss: 0.08649339526891708 2023-01-22 13:16:36.709231: step: 860/464, loss: 0.0031652937177568674 2023-01-22 13:16:37.338423: step: 862/464, loss: 0.03506851941347122 2023-01-22 13:16:37.994066: step: 864/464, loss: 0.023619432002305984 2023-01-22 13:16:38.618176: step: 866/464, loss: 0.0031147352419793606 2023-01-22 13:16:39.172996: step: 868/464, loss: 0.004032780881971121 2023-01-22 13:16:39.787664: step: 870/464, loss: 0.03890826925635338 2023-01-22 13:16:40.430306: step: 872/464, loss: 0.030549127608537674 2023-01-22 13:16:41.115502: step: 874/464, loss: 0.08162281662225723 2023-01-22 13:16:41.691534: step: 876/464, loss: 0.0021298760548233986 2023-01-22 13:16:42.385278: step: 878/464, loss: 0.007051995489746332 2023-01-22 13:16:42.948904: step: 880/464, loss: 0.02699892781674862 2023-01-22 13:16:43.505375: step: 882/464, loss: 0.007444900926202536 2023-01-22 13:16:44.136958: step: 884/464, loss: 0.018984658643603325 2023-01-22 13:16:44.894259: step: 886/464, loss: 0.27770155668258667 2023-01-22 13:16:45.520822: step: 888/464, loss: 0.004165395628660917 2023-01-22 13:16:46.208822: step: 890/464, loss: 0.4924483895301819 2023-01-22 13:16:46.808693: step: 892/464, loss: 0.051497023552656174 2023-01-22 13:16:47.392675: step: 894/464, loss: 0.004653936717659235 2023-01-22 13:16:48.058140: step: 896/464, loss: 0.009835487231612206 2023-01-22 13:16:48.615862: step: 898/464, loss: 0.002643953077495098 2023-01-22 13:16:49.247293: step: 900/464, loss: 0.004552288446575403 2023-01-22 13:16:49.905152: step: 902/464, loss: 0.15693362057209015 2023-01-22 13:16:50.454917: step: 904/464, loss: 0.009014283306896687 2023-01-22 13:16:51.047284: step: 906/464, loss: 0.030812203884124756 2023-01-22 13:16:51.637686: step: 908/464, loss: 0.012034276500344276 2023-01-22 13:16:52.254290: step: 910/464, loss: 0.0003978584718424827 2023-01-22 13:16:52.880906: step: 912/464, loss: 0.030327772721648216 2023-01-22 13:16:53.542459: step: 914/464, loss: 0.1456509679555893 2023-01-22 13:16:54.141399: step: 916/464, loss: 0.007402684073895216 2023-01-22 13:16:54.787720: step: 918/464, loss: 0.029965035617351532 2023-01-22 13:16:55.449242: step: 920/464, loss: 0.018859058618545532 2023-01-22 13:16:56.041231: step: 922/464, loss: 0.008814035914838314 2023-01-22 13:16:56.706461: step: 924/464, loss: 0.07245197892189026 2023-01-22 13:16:57.345771: step: 926/464, loss: 0.06136466562747955 2023-01-22 13:16:57.962233: step: 928/464, loss: 0.0006068818620406091 2023-01-22 13:16:58.465067: step: 930/464, loss: 0.0022186944261193275 ================================================== Loss: 0.066 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29464908749329044, 'r': 0.347205091714105, 'f1': 0.31877540650406505}, 'combined': 0.23488714163457425, 'epoch': 28} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2840114870758206, 'r': 0.30696191027386666, 'f1': 0.2950410593894447}, 'combined': 0.19261747918689134, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923500510464523, 'r': 0.3622477862112587, 'f1': 0.3235670903954802}, 'combined': 0.23841785608088015, 'epoch': 28} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3028762699840867, 'r': 0.31401142696879575, 'f1': 0.30834335066971996}, 'combined': 0.20130187660302962, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2997795712789319, 'r': 0.3578014237845316, 'f1': 0.32623070992119063}, 'combined': 0.24038052309982466, 'epoch': 28} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29787315933941727, 'r': 0.3044378570659058, 'f1': 0.3011197332831711}, 'combined': 0.19658593986362466, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2285353535353535, 'r': 0.4309523809523809, 'f1': 0.29867986798679863}, 'combined': 0.19911991199119908, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26785714285714285, 'r': 0.4891304347826087, 'f1': 0.34615384615384615}, 'combined': 0.17307692307692307, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4605263157894737, 'r': 0.3017241379310345, 'f1': 0.3645833333333333}, 'combined': 0.24305555555555552, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951249161636486, 'r': 0.3578459609650312, 'f1': 0.3234731070815977}, 'combined': 0.23834860521801934, 'epoch': 22} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29521273154881944, 'r': 0.3185261336729686, 'f1': 0.30642664272956077}, 'combined': 0.20005055432085314, 'epoch': 22} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 22} ****************************** Epoch: 29 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:19:36.633887: step: 2/464, loss: 0.20718348026275635 2023-01-22 13:19:37.221488: step: 4/464, loss: 0.00092584069352597 2023-01-22 13:19:37.868731: step: 6/464, loss: 0.00072711386019364 2023-01-22 13:19:38.467558: step: 8/464, loss: 0.2181786745786667 2023-01-22 13:19:39.136470: step: 10/464, loss: 0.02145509421825409 2023-01-22 13:19:39.773928: step: 12/464, loss: 0.031037023290991783 2023-01-22 13:19:40.398084: step: 14/464, loss: 0.021666940301656723 2023-01-22 13:19:41.016452: step: 16/464, loss: 0.005939903669059277 2023-01-22 13:19:41.644599: step: 18/464, loss: 0.019667789340019226 2023-01-22 13:19:42.266154: step: 20/464, loss: 0.802329421043396 2023-01-22 13:19:42.899505: step: 22/464, loss: 0.001183252315968275 2023-01-22 13:19:43.656260: step: 24/464, loss: 0.002743531484156847 2023-01-22 13:19:44.241004: step: 26/464, loss: 0.039093371480703354 2023-01-22 13:19:44.959948: step: 28/464, loss: 0.021364763379096985 2023-01-22 13:19:45.505477: step: 30/464, loss: 0.0051937587559223175 2023-01-22 13:19:46.090462: step: 32/464, loss: 0.04549663886427879 2023-01-22 13:19:46.693451: step: 34/464, loss: 0.03777649998664856 2023-01-22 13:19:47.278460: step: 36/464, loss: 0.009439066983759403 2023-01-22 13:19:47.895252: step: 38/464, loss: 0.021647615358233452 2023-01-22 13:19:48.485599: step: 40/464, loss: 0.050232235342264175 2023-01-22 13:19:49.053398: step: 42/464, loss: 0.0005928269238211215 2023-01-22 13:19:49.696906: step: 44/464, loss: 0.021608717739582062 2023-01-22 13:19:50.493681: step: 46/464, loss: 0.00045615588896907866 2023-01-22 13:19:51.134341: step: 48/464, loss: 0.006590969394892454 2023-01-22 13:19:51.785883: step: 50/464, loss: 0.016590625047683716 2023-01-22 13:19:52.420762: step: 52/464, loss: 0.007587079890072346 2023-01-22 13:19:53.042687: step: 54/464, loss: 0.021061744540929794 2023-01-22 13:19:53.680191: step: 56/464, loss: 0.07708916813135147 2023-01-22 13:19:54.267218: step: 58/464, loss: 0.01860615983605385 2023-01-22 13:19:54.913253: step: 60/464, loss: 0.034726161509752274 2023-01-22 13:19:55.566647: step: 62/464, loss: 0.1536983698606491 2023-01-22 13:19:56.224295: step: 64/464, loss: 0.0010793593246489763 2023-01-22 13:19:56.797882: step: 66/464, loss: 0.038470372557640076 2023-01-22 13:19:57.381031: step: 68/464, loss: 0.033768318593502045 2023-01-22 13:19:58.003113: step: 70/464, loss: 0.1774447113275528 2023-01-22 13:19:58.631007: step: 72/464, loss: 0.01382715068757534 2023-01-22 13:19:59.294922: step: 74/464, loss: 0.0067392922937870026 2023-01-22 13:19:59.904788: step: 76/464, loss: 0.24856817722320557 2023-01-22 13:20:00.632914: step: 78/464, loss: 0.10166781395673752 2023-01-22 13:20:01.227770: step: 80/464, loss: 0.023978451266884804 2023-01-22 13:20:01.922467: step: 82/464, loss: 0.03615910932421684 2023-01-22 13:20:02.587953: step: 84/464, loss: 0.009203365072607994 2023-01-22 13:20:03.193979: step: 86/464, loss: 0.024670295417308807 2023-01-22 13:20:03.783158: step: 88/464, loss: 0.11750826239585876 2023-01-22 13:20:04.434339: step: 90/464, loss: 0.0012939282460138202 2023-01-22 13:20:05.130069: step: 92/464, loss: 0.024970868602395058 2023-01-22 13:20:05.720579: step: 94/464, loss: 0.022319048643112183 2023-01-22 13:20:06.369216: step: 96/464, loss: 0.0035962723195552826 2023-01-22 13:20:06.968541: step: 98/464, loss: 0.00042239887989126146 2023-01-22 13:20:07.591575: step: 100/464, loss: 0.012578553520143032 2023-01-22 13:20:08.200423: step: 102/464, loss: 0.0005949849728494883 2023-01-22 13:20:08.814274: step: 104/464, loss: 0.004973983392119408 2023-01-22 13:20:09.395969: step: 106/464, loss: 0.0001440097257727757 2023-01-22 13:20:10.063973: step: 108/464, loss: 0.0024634215515106916 2023-01-22 13:20:10.646904: step: 110/464, loss: 0.024518417194485664 2023-01-22 13:20:11.311628: step: 112/464, loss: 0.032566480338573456 2023-01-22 13:20:11.961395: step: 114/464, loss: 0.02598331868648529 2023-01-22 13:20:12.539258: step: 116/464, loss: 0.023710399866104126 2023-01-22 13:20:13.135995: step: 118/464, loss: 0.011170746758580208 2023-01-22 13:20:13.745011: step: 120/464, loss: 0.004629192873835564 2023-01-22 13:20:14.400281: step: 122/464, loss: 0.016308680176734924 2023-01-22 13:20:14.962763: step: 124/464, loss: 0.017663175240159035 2023-01-22 13:20:15.574481: step: 126/464, loss: 0.07340771704912186 2023-01-22 13:20:16.182903: step: 128/464, loss: 0.010101350955665112 2023-01-22 13:20:16.734669: step: 130/464, loss: 0.20134709775447845 2023-01-22 13:20:17.327706: step: 132/464, loss: 0.014041811227798462 2023-01-22 13:20:17.966358: step: 134/464, loss: 0.0008428994915448129 2023-01-22 13:20:18.538480: step: 136/464, loss: 0.0007383274496532977 2023-01-22 13:20:19.186472: step: 138/464, loss: 0.013044522143900394 2023-01-22 13:20:19.785597: step: 140/464, loss: 0.019865253940224648 2023-01-22 13:20:20.332599: step: 142/464, loss: 0.006404084153473377 2023-01-22 13:20:20.898322: step: 144/464, loss: 0.027074342593550682 2023-01-22 13:20:21.482292: step: 146/464, loss: 0.011449289508163929 2023-01-22 13:20:22.085760: step: 148/464, loss: 0.10458236932754517 2023-01-22 13:20:22.747275: step: 150/464, loss: 0.0007995071937330067 2023-01-22 13:20:23.393165: step: 152/464, loss: 0.010486981831490993 2023-01-22 13:20:24.004894: step: 154/464, loss: 0.0464809276163578 2023-01-22 13:20:24.602551: step: 156/464, loss: 0.011342362500727177 2023-01-22 13:20:25.241131: step: 158/464, loss: 0.042877405881881714 2023-01-22 13:20:25.854544: step: 160/464, loss: 0.0033706913236528635 2023-01-22 13:20:26.501230: step: 162/464, loss: 0.015807805582880974 2023-01-22 13:20:27.101241: step: 164/464, loss: 0.013546344824135303 2023-01-22 13:20:27.704890: step: 166/464, loss: 0.05058148130774498 2023-01-22 13:20:28.234547: step: 168/464, loss: 3.490782910375856e-05 2023-01-22 13:20:28.898735: step: 170/464, loss: 0.01310047972947359 2023-01-22 13:20:29.481805: step: 172/464, loss: 0.018710266798734665 2023-01-22 13:20:30.114400: step: 174/464, loss: 0.000846777344122529 2023-01-22 13:20:30.833064: step: 176/464, loss: 1.2617532014846802 2023-01-22 13:20:31.446275: step: 178/464, loss: 0.008382102474570274 2023-01-22 13:20:32.059870: step: 180/464, loss: 9.83451354841236e-06 2023-01-22 13:20:32.654749: step: 182/464, loss: 0.044426459819078445 2023-01-22 13:20:33.223980: step: 184/464, loss: 0.04976968094706535 2023-01-22 13:20:33.850978: step: 186/464, loss: 0.0009115879656746984 2023-01-22 13:20:34.452744: step: 188/464, loss: 0.02329590730369091 2023-01-22 13:20:35.040608: step: 190/464, loss: 0.0021927249617874622 2023-01-22 13:20:35.681740: step: 192/464, loss: 0.00851105060428381 2023-01-22 13:20:36.278237: step: 194/464, loss: 0.011024788953363895 2023-01-22 13:20:36.861661: step: 196/464, loss: 0.020354608073830605 2023-01-22 13:20:37.492968: step: 198/464, loss: 0.21923001110553741 2023-01-22 13:20:38.082776: step: 200/464, loss: 0.4580487310886383 2023-01-22 13:20:38.787166: step: 202/464, loss: 0.05740246921777725 2023-01-22 13:20:39.373619: step: 204/464, loss: 0.021749237552285194 2023-01-22 13:20:39.987935: step: 206/464, loss: 0.0008607152849435806 2023-01-22 13:20:40.594665: step: 208/464, loss: 0.004498638678342104 2023-01-22 13:20:41.181730: step: 210/464, loss: 0.001176126766949892 2023-01-22 13:20:41.801335: step: 212/464, loss: 0.014383724890649319 2023-01-22 13:20:42.456894: step: 214/464, loss: 0.004588214214891195 2023-01-22 13:20:43.052246: step: 216/464, loss: 0.004308333154767752 2023-01-22 13:20:43.702976: step: 218/464, loss: 0.0035217590630054474 2023-01-22 13:20:44.276513: step: 220/464, loss: 0.2619556486606598 2023-01-22 13:20:44.932234: step: 222/464, loss: 0.01845749095082283 2023-01-22 13:20:45.570059: step: 224/464, loss: 0.343423455953598 2023-01-22 13:20:46.129226: step: 226/464, loss: 0.00013274258526507765 2023-01-22 13:20:46.842486: step: 228/464, loss: 0.04242993891239166 2023-01-22 13:20:47.461880: step: 230/464, loss: 0.004917399492114782 2023-01-22 13:20:48.006644: step: 232/464, loss: 0.05042678490281105 2023-01-22 13:20:48.619932: step: 234/464, loss: 0.015125767327845097 2023-01-22 13:20:49.329925: step: 236/464, loss: 0.023845607414841652 2023-01-22 13:20:49.930412: step: 238/464, loss: 0.002912424271926284 2023-01-22 13:20:50.564894: step: 240/464, loss: 0.030322978273034096 2023-01-22 13:20:51.230127: step: 242/464, loss: 0.04087292402982712 2023-01-22 13:20:51.835716: step: 244/464, loss: 0.024132249876856804 2023-01-22 13:20:52.483304: step: 246/464, loss: 0.00927796121686697 2023-01-22 13:20:53.106169: step: 248/464, loss: 0.04161551967263222 2023-01-22 13:20:53.718299: step: 250/464, loss: 0.010051172226667404 2023-01-22 13:20:54.318874: step: 252/464, loss: 0.01845390349626541 2023-01-22 13:20:54.901449: step: 254/464, loss: 0.00814163126051426 2023-01-22 13:20:55.498299: step: 256/464, loss: 0.011661508120596409 2023-01-22 13:20:56.131798: step: 258/464, loss: 0.0111940186470747 2023-01-22 13:20:56.715312: step: 260/464, loss: 0.17088641226291656 2023-01-22 13:20:57.350672: step: 262/464, loss: 0.013552245683968067 2023-01-22 13:20:57.949135: step: 264/464, loss: 0.0031578170601278543 2023-01-22 13:20:58.676374: step: 266/464, loss: 0.8463234305381775 2023-01-22 13:20:59.300119: step: 268/464, loss: 0.05910234898328781 2023-01-22 13:20:59.906452: step: 270/464, loss: 0.01681624911725521 2023-01-22 13:21:00.502058: step: 272/464, loss: 0.0013331277295947075 2023-01-22 13:21:01.093128: step: 274/464, loss: 0.0015316865174099803 2023-01-22 13:21:01.626836: step: 276/464, loss: 0.0023572868667542934 2023-01-22 13:21:02.291212: step: 278/464, loss: 0.015970777720212936 2023-01-22 13:21:02.945167: step: 280/464, loss: 0.010668879374861717 2023-01-22 13:21:03.657707: step: 282/464, loss: 0.041640013456344604 2023-01-22 13:21:04.297715: step: 284/464, loss: 0.013012656942009926 2023-01-22 13:21:04.861784: step: 286/464, loss: 0.00033443470601923764 2023-01-22 13:21:05.483464: step: 288/464, loss: 0.007970185950398445 2023-01-22 13:21:06.041663: step: 290/464, loss: 0.03468017280101776 2023-01-22 13:21:06.715436: step: 292/464, loss: 0.0018570302054286003 2023-01-22 13:21:07.296172: step: 294/464, loss: 0.003946130629628897 2023-01-22 13:21:07.959102: step: 296/464, loss: 0.033612918108701706 2023-01-22 13:21:08.579374: step: 298/464, loss: 0.0026805405505001545 2023-01-22 13:21:09.175959: step: 300/464, loss: 0.053611185401678085 2023-01-22 13:21:09.865813: step: 302/464, loss: 0.06214655190706253 2023-01-22 13:21:10.425744: step: 304/464, loss: 0.05859425291419029 2023-01-22 13:21:11.003644: step: 306/464, loss: 0.05776157230138779 2023-01-22 13:21:11.602769: step: 308/464, loss: 0.012165514752268791 2023-01-22 13:21:12.270105: step: 310/464, loss: 0.0843651294708252 2023-01-22 13:21:12.953364: step: 312/464, loss: 0.05880401283502579 2023-01-22 13:21:13.602045: step: 314/464, loss: 0.07659254223108292 2023-01-22 13:21:14.159903: step: 316/464, loss: 2.4264578819274902 2023-01-22 13:21:14.774446: step: 318/464, loss: 0.00882178358733654 2023-01-22 13:21:15.372314: step: 320/464, loss: 0.0015311307506635785 2023-01-22 13:21:15.969366: step: 322/464, loss: 0.031146302819252014 2023-01-22 13:21:16.582926: step: 324/464, loss: 0.6656548380851746 2023-01-22 13:21:17.290107: step: 326/464, loss: 0.018226604908704758 2023-01-22 13:21:17.894588: step: 328/464, loss: 0.012589401565492153 2023-01-22 13:21:18.537276: step: 330/464, loss: 0.021131210029125214 2023-01-22 13:21:19.138832: step: 332/464, loss: 0.0200370941311121 2023-01-22 13:21:19.754974: step: 334/464, loss: 0.029083983972668648 2023-01-22 13:21:20.375480: step: 336/464, loss: 0.009164446033537388 2023-01-22 13:21:20.980163: step: 338/464, loss: 0.006219303701072931 2023-01-22 13:21:21.554387: step: 340/464, loss: 0.003315514186397195 2023-01-22 13:21:22.107888: step: 342/464, loss: 0.011149341240525246 2023-01-22 13:21:22.699885: step: 344/464, loss: 0.02427523024380207 2023-01-22 13:21:23.316198: step: 346/464, loss: 0.031062254682183266 2023-01-22 13:21:23.915168: step: 348/464, loss: 0.00999541487544775 2023-01-22 13:21:24.616886: step: 350/464, loss: 0.06719443202018738 2023-01-22 13:21:25.229795: step: 352/464, loss: 0.006315178237855434 2023-01-22 13:21:25.921216: step: 354/464, loss: 0.022876959294080734 2023-01-22 13:21:26.485442: step: 356/464, loss: 0.026826461777091026 2023-01-22 13:21:27.114910: step: 358/464, loss: 0.005475881043821573 2023-01-22 13:21:27.721082: step: 360/464, loss: 0.01010300312191248 2023-01-22 13:21:28.308921: step: 362/464, loss: 0.014562626369297504 2023-01-22 13:21:28.926615: step: 364/464, loss: 0.01299215853214264 2023-01-22 13:21:29.542024: step: 366/464, loss: 0.0004880430060438812 2023-01-22 13:21:30.171501: step: 368/464, loss: 0.005313422996550798 2023-01-22 13:21:30.846216: step: 370/464, loss: 0.004638213198632002 2023-01-22 13:21:31.433255: step: 372/464, loss: 0.04647836461663246 2023-01-22 13:21:32.025988: step: 374/464, loss: 0.00019831047393381596 2023-01-22 13:21:32.563560: step: 376/464, loss: 0.02199379913508892 2023-01-22 13:21:33.190347: step: 378/464, loss: 0.006296336650848389 2023-01-22 13:21:33.889840: step: 380/464, loss: 0.011403193697333336 2023-01-22 13:21:34.507439: step: 382/464, loss: 0.012966877780854702 2023-01-22 13:21:35.128966: step: 384/464, loss: 0.007062949705868959 2023-01-22 13:21:35.705462: step: 386/464, loss: 0.09825027734041214 2023-01-22 13:21:36.306049: step: 388/464, loss: 0.02858874760568142 2023-01-22 13:21:36.916869: step: 390/464, loss: 0.018788516521453857 2023-01-22 13:21:37.530272: step: 392/464, loss: 0.005923361051827669 2023-01-22 13:21:38.189930: step: 394/464, loss: 0.1265401393175125 2023-01-22 13:21:38.882803: step: 396/464, loss: 0.012780094519257545 2023-01-22 13:21:39.451129: step: 398/464, loss: 0.00039305436075665057 2023-01-22 13:21:40.095229: step: 400/464, loss: 0.02331719361245632 2023-01-22 13:21:40.729469: step: 402/464, loss: 0.10353614389896393 2023-01-22 13:21:41.342668: step: 404/464, loss: 0.006237336900085211 2023-01-22 13:21:41.979566: step: 406/464, loss: 0.028251267969608307 2023-01-22 13:21:42.581712: step: 408/464, loss: 0.019966894760727882 2023-01-22 13:21:43.210996: step: 410/464, loss: 0.02114271931350231 2023-01-22 13:21:43.808860: step: 412/464, loss: 0.018811846151947975 2023-01-22 13:21:44.441700: step: 414/464, loss: 0.01861034333705902 2023-01-22 13:21:45.096798: step: 416/464, loss: 0.12198765575885773 2023-01-22 13:21:45.777182: step: 418/464, loss: 0.012938577681779861 2023-01-22 13:21:46.479294: step: 420/464, loss: 0.025013018399477005 2023-01-22 13:21:47.062806: step: 422/464, loss: 0.016791654750704765 2023-01-22 13:21:47.657136: step: 424/464, loss: 0.01789095811545849 2023-01-22 13:21:48.277032: step: 426/464, loss: 0.0005331274005584419 2023-01-22 13:21:48.899148: step: 428/464, loss: 0.007982496172189713 2023-01-22 13:21:49.486671: step: 430/464, loss: 0.0011905856663361192 2023-01-22 13:21:50.075197: step: 432/464, loss: 0.016070405021309853 2023-01-22 13:21:50.841226: step: 434/464, loss: 0.013353471644222736 2023-01-22 13:21:51.423892: step: 436/464, loss: 0.023610210046172142 2023-01-22 13:21:52.098576: step: 438/464, loss: 0.0031759832054376602 2023-01-22 13:21:52.698253: step: 440/464, loss: 0.015572638250887394 2023-01-22 13:21:53.353865: step: 442/464, loss: 0.015378996729850769 2023-01-22 13:21:53.974098: step: 444/464, loss: 0.008406156674027443 2023-01-22 13:21:54.661836: step: 446/464, loss: 0.0370088592171669 2023-01-22 13:21:55.317915: step: 448/464, loss: 0.5323182344436646 2023-01-22 13:21:55.906341: step: 450/464, loss: 0.0011735439766198397 2023-01-22 13:21:56.594385: step: 452/464, loss: 0.001455902587622404 2023-01-22 13:21:57.179943: step: 454/464, loss: 0.009804188273847103 2023-01-22 13:21:57.840131: step: 456/464, loss: 0.0053962101228535175 2023-01-22 13:21:58.504512: step: 458/464, loss: 0.05759469047188759 2023-01-22 13:21:59.167965: step: 460/464, loss: 0.003220080863684416 2023-01-22 13:21:59.791980: step: 462/464, loss: 0.0031053987331688404 2023-01-22 13:22:00.449890: step: 464/464, loss: 0.2687501013278961 2023-01-22 13:22:01.069825: step: 466/464, loss: 0.07093895971775055 2023-01-22 13:22:01.740691: step: 468/464, loss: 0.00012292155588511378 2023-01-22 13:22:02.323640: step: 470/464, loss: 0.03172307088971138 2023-01-22 13:22:02.975457: step: 472/464, loss: 0.09837689250707626 2023-01-22 13:22:03.523851: step: 474/464, loss: 0.017058294266462326 2023-01-22 13:22:04.191307: step: 476/464, loss: 0.011396778747439384 2023-01-22 13:22:04.752587: step: 478/464, loss: 0.003416843479499221 2023-01-22 13:22:05.409143: step: 480/464, loss: 0.0012493301182985306 2023-01-22 13:22:06.136842: step: 482/464, loss: 0.8681263327598572 2023-01-22 13:22:06.838593: step: 484/464, loss: 0.006136356852948666 2023-01-22 13:22:07.486802: step: 486/464, loss: 8.88936483534053e-06 2023-01-22 13:22:08.094736: step: 488/464, loss: 0.008741669356822968 2023-01-22 13:22:08.666826: step: 490/464, loss: 0.01587357372045517 2023-01-22 13:22:09.248658: step: 492/464, loss: 0.005742009729146957 2023-01-22 13:22:09.934799: step: 494/464, loss: 0.008913460187613964 2023-01-22 13:22:10.472550: step: 496/464, loss: 0.018264610320329666 2023-01-22 13:22:11.127491: step: 498/464, loss: 0.0014765068190172315 2023-01-22 13:22:11.734141: step: 500/464, loss: 0.05725078657269478 2023-01-22 13:22:12.327912: step: 502/464, loss: 0.01852232776582241 2023-01-22 13:22:12.939552: step: 504/464, loss: 0.03316880390048027 2023-01-22 13:22:13.589795: step: 506/464, loss: 0.04357683286070824 2023-01-22 13:22:14.216883: step: 508/464, loss: 9.290242451243103e-05 2023-01-22 13:22:14.814299: step: 510/464, loss: 0.029402637854218483 2023-01-22 13:22:15.419997: step: 512/464, loss: 0.036833539605140686 2023-01-22 13:22:15.980335: step: 514/464, loss: 0.007631601300090551 2023-01-22 13:22:16.533809: step: 516/464, loss: 0.04601089283823967 2023-01-22 13:22:17.173298: step: 518/464, loss: 0.04939999058842659 2023-01-22 13:22:17.826632: step: 520/464, loss: 0.0015252236044034362 2023-01-22 13:22:18.448935: step: 522/464, loss: 0.0031517392490059137 2023-01-22 13:22:19.049690: step: 524/464, loss: 0.025582026690244675 2023-01-22 13:22:19.665899: step: 526/464, loss: 0.011233438737690449 2023-01-22 13:22:20.306799: step: 528/464, loss: 0.0003041566233150661 2023-01-22 13:22:20.900179: step: 530/464, loss: 0.04653728008270264 2023-01-22 13:22:21.491954: step: 532/464, loss: 0.015551424585282803 2023-01-22 13:22:22.132701: step: 534/464, loss: 0.045204851776361465 2023-01-22 13:22:22.740588: step: 536/464, loss: 0.03499005362391472 2023-01-22 13:22:23.395735: step: 538/464, loss: 0.03213276341557503 2023-01-22 13:22:23.954690: step: 540/464, loss: 0.045557327568531036 2023-01-22 13:22:24.602940: step: 542/464, loss: 0.017820246517658234 2023-01-22 13:22:25.234784: step: 544/464, loss: 0.030133241787552834 2023-01-22 13:22:25.875374: step: 546/464, loss: 0.07815069705247879 2023-01-22 13:22:26.531724: step: 548/464, loss: 0.013082635588943958 2023-01-22 13:22:27.121130: step: 550/464, loss: 0.041477177292108536 2023-01-22 13:22:27.693733: step: 552/464, loss: 0.017172643914818764 2023-01-22 13:22:28.394400: step: 554/464, loss: 0.04386192187666893 2023-01-22 13:22:29.043720: step: 556/464, loss: 0.0157090462744236 2023-01-22 13:22:29.657213: step: 558/464, loss: 0.0031350385397672653 2023-01-22 13:22:30.269547: step: 560/464, loss: 0.4393223822116852 2023-01-22 13:22:30.944477: step: 562/464, loss: 0.013955345377326012 2023-01-22 13:22:31.511291: step: 564/464, loss: 0.038877204060554504 2023-01-22 13:22:32.157480: step: 566/464, loss: 0.02907596156001091 2023-01-22 13:22:32.776125: step: 568/464, loss: 0.03757504001259804 2023-01-22 13:22:33.400504: step: 570/464, loss: 0.010801984928548336 2023-01-22 13:22:33.982659: step: 572/464, loss: 0.02427094802260399 2023-01-22 13:22:34.589834: step: 574/464, loss: 0.09442934393882751 2023-01-22 13:22:35.221597: step: 576/464, loss: 0.00747566157951951 2023-01-22 13:22:35.906248: step: 578/464, loss: 0.0013725977623835206 2023-01-22 13:22:36.463703: step: 580/464, loss: 0.0008823683601804078 2023-01-22 13:22:37.046352: step: 582/464, loss: 0.002390040084719658 2023-01-22 13:22:37.656419: step: 584/464, loss: 0.12122202664613724 2023-01-22 13:22:38.291718: step: 586/464, loss: 0.046643223613500595 2023-01-22 13:22:38.983696: step: 588/464, loss: 0.06922060251235962 2023-01-22 13:22:39.578911: step: 590/464, loss: 0.0049684857949614525 2023-01-22 13:22:40.127986: step: 592/464, loss: 0.04633360728621483 2023-01-22 13:22:40.748543: step: 594/464, loss: 0.004706260748207569 2023-01-22 13:22:41.403794: step: 596/464, loss: 0.030159030109643936 2023-01-22 13:22:42.087499: step: 598/464, loss: 0.0060491920448839664 2023-01-22 13:22:42.706608: step: 600/464, loss: 0.031114358454942703 2023-01-22 13:22:43.357991: step: 602/464, loss: 0.016858017072081566 2023-01-22 13:22:44.013047: step: 604/464, loss: 0.005272059701383114 2023-01-22 13:22:44.680807: step: 606/464, loss: 0.0002044775610556826 2023-01-22 13:22:45.261541: step: 608/464, loss: 6.030996799468994 2023-01-22 13:22:45.886959: step: 610/464, loss: 0.02099800668656826 2023-01-22 13:22:46.500023: step: 612/464, loss: 0.006913003511726856 2023-01-22 13:22:47.070307: step: 614/464, loss: 0.004233603831380606 2023-01-22 13:22:47.740519: step: 616/464, loss: 0.025346634909510612 2023-01-22 13:22:48.329046: step: 618/464, loss: 0.06679163873195648 2023-01-22 13:22:48.955508: step: 620/464, loss: 0.004545097704976797 2023-01-22 13:22:49.555857: step: 622/464, loss: 0.0001765040506143123 2023-01-22 13:22:50.269664: step: 624/464, loss: 0.003950684797018766 2023-01-22 13:22:50.971563: step: 626/464, loss: 0.01283080130815506 2023-01-22 13:22:51.635038: step: 628/464, loss: 0.04789276793599129 2023-01-22 13:22:52.285996: step: 630/464, loss: 0.20595882833003998 2023-01-22 13:22:52.878635: step: 632/464, loss: 0.03244363144040108 2023-01-22 13:22:53.473234: step: 634/464, loss: 0.7603281140327454 2023-01-22 13:22:54.102475: step: 636/464, loss: 0.006186482030898333 2023-01-22 13:22:54.762748: step: 638/464, loss: 0.019982969388365746 2023-01-22 13:22:55.354918: step: 640/464, loss: 0.006591130048036575 2023-01-22 13:22:55.965505: step: 642/464, loss: 0.012419759295880795 2023-01-22 13:22:56.554199: step: 644/464, loss: 0.006895511411130428 2023-01-22 13:22:57.137193: step: 646/464, loss: 0.07272930443286896 2023-01-22 13:22:57.809693: step: 648/464, loss: 0.015621660277247429 2023-01-22 13:22:58.380416: step: 650/464, loss: 0.009616071358323097 2023-01-22 13:22:58.959941: step: 652/464, loss: 0.0020380548667162657 2023-01-22 13:22:59.580621: step: 654/464, loss: 0.0068487850949168205 2023-01-22 13:23:00.250900: step: 656/464, loss: 1.3829022645950317 2023-01-22 13:23:00.875420: step: 658/464, loss: 0.11870887130498886 2023-01-22 13:23:01.517936: step: 660/464, loss: 0.021173374727368355 2023-01-22 13:23:02.130656: step: 662/464, loss: 0.00863352045416832 2023-01-22 13:23:02.754362: step: 664/464, loss: 0.005785847082734108 2023-01-22 13:23:03.349465: step: 666/464, loss: 0.004254731349647045 2023-01-22 13:23:03.968235: step: 668/464, loss: 0.01884510926902294 2023-01-22 13:23:04.578389: step: 670/464, loss: 0.02798466570675373 2023-01-22 13:23:05.276422: step: 672/464, loss: 0.03392200171947479 2023-01-22 13:23:05.893105: step: 674/464, loss: 0.03376045823097229 2023-01-22 13:23:06.536298: step: 676/464, loss: 0.020718907937407494 2023-01-22 13:23:07.147895: step: 678/464, loss: 0.014963822439312935 2023-01-22 13:23:07.729097: step: 680/464, loss: 0.020511649549007416 2023-01-22 13:23:08.400212: step: 682/464, loss: 0.22455890476703644 2023-01-22 13:23:09.061167: step: 684/464, loss: 0.2074844390153885 2023-01-22 13:23:09.717492: step: 686/464, loss: 0.011452744714915752 2023-01-22 13:23:10.372067: step: 688/464, loss: 0.004009000025689602 2023-01-22 13:23:10.992626: step: 690/464, loss: 0.000250319397309795 2023-01-22 13:23:11.592830: step: 692/464, loss: 0.04512270167469978 2023-01-22 13:23:12.262197: step: 694/464, loss: 0.009123125113546848 2023-01-22 13:23:12.940246: step: 696/464, loss: 0.014779305085539818 2023-01-22 13:23:13.530836: step: 698/464, loss: 0.009732699953019619 2023-01-22 13:23:14.263493: step: 700/464, loss: 0.02503376267850399 2023-01-22 13:23:14.885150: step: 702/464, loss: 0.0029226504266262054 2023-01-22 13:23:15.542570: step: 704/464, loss: 0.007843797095119953 2023-01-22 13:23:16.108380: step: 706/464, loss: 0.021176619455218315 2023-01-22 13:23:16.801590: step: 708/464, loss: 0.04709320142865181 2023-01-22 13:23:17.417361: step: 710/464, loss: 0.00755814416334033 2023-01-22 13:23:18.072526: step: 712/464, loss: 0.13147243857383728 2023-01-22 13:23:18.640237: step: 714/464, loss: 0.023370176553726196 2023-01-22 13:23:19.297024: step: 716/464, loss: 0.03681657835841179 2023-01-22 13:23:19.951634: step: 718/464, loss: 0.012402649037539959 2023-01-22 13:23:20.622613: step: 720/464, loss: 0.01318159606307745 2023-01-22 13:23:21.278702: step: 722/464, loss: 0.021943572908639908 2023-01-22 13:23:21.927263: step: 724/464, loss: 0.001379357068799436 2023-01-22 13:23:22.554949: step: 726/464, loss: 0.033673420548439026 2023-01-22 13:23:23.225611: step: 728/464, loss: 0.004524344112724066 2023-01-22 13:23:23.785367: step: 730/464, loss: 0.014040261507034302 2023-01-22 13:23:24.354344: step: 732/464, loss: 0.004589345771819353 2023-01-22 13:23:25.115557: step: 734/464, loss: 0.02422994375228882 2023-01-22 13:23:25.721443: step: 736/464, loss: 0.012395837344229221 2023-01-22 13:23:26.319481: step: 738/464, loss: 0.07498691976070404 2023-01-22 13:23:26.957162: step: 740/464, loss: 0.01698930375277996 2023-01-22 13:23:27.561404: step: 742/464, loss: 0.2359449863433838 2023-01-22 13:23:28.166679: step: 744/464, loss: 0.16284212470054626 2023-01-22 13:23:28.841167: step: 746/464, loss: 0.0486256368458271 2023-01-22 13:23:29.475411: step: 748/464, loss: 0.013262578286230564 2023-01-22 13:23:30.148635: step: 750/464, loss: 0.054008420556783676 2023-01-22 13:23:30.826938: step: 752/464, loss: 0.010377590544521809 2023-01-22 13:23:31.394942: step: 754/464, loss: 0.011460366658866405 2023-01-22 13:23:32.094066: step: 756/464, loss: 0.004750390071421862 2023-01-22 13:23:32.704771: step: 758/464, loss: 0.01390005275607109 2023-01-22 13:23:33.277889: step: 760/464, loss: 0.05716053768992424 2023-01-22 13:23:33.921521: step: 762/464, loss: 0.014838357456028461 2023-01-22 13:23:34.564847: step: 764/464, loss: 0.034498848021030426 2023-01-22 13:23:35.209568: step: 766/464, loss: 0.06667480617761612 2023-01-22 13:23:35.842011: step: 768/464, loss: 0.0033385204151272774 2023-01-22 13:23:36.501238: step: 770/464, loss: 0.10323575884103775 2023-01-22 13:23:37.195708: step: 772/464, loss: 0.06185237690806389 2023-01-22 13:23:37.832987: step: 774/464, loss: 0.0025028877425938845 2023-01-22 13:23:38.431514: step: 776/464, loss: 0.01816844753921032 2023-01-22 13:23:39.035096: step: 778/464, loss: 0.07951855659484863 2023-01-22 13:23:39.646008: step: 780/464, loss: 0.06930948048830032 2023-01-22 13:23:40.305455: step: 782/464, loss: 0.029431408271193504 2023-01-22 13:23:41.002476: step: 784/464, loss: 0.03639020770788193 2023-01-22 13:23:41.680106: step: 786/464, loss: 0.028380228206515312 2023-01-22 13:23:42.339504: step: 788/464, loss: 0.07341472059488297 2023-01-22 13:23:42.973972: step: 790/464, loss: 0.14059926569461823 2023-01-22 13:23:43.641094: step: 792/464, loss: 0.02317161299288273 2023-01-22 13:23:44.265813: step: 794/464, loss: 0.0962318554520607 2023-01-22 13:23:44.851879: step: 796/464, loss: 0.023787975311279297 2023-01-22 13:23:45.501268: step: 798/464, loss: 0.016284290701150894 2023-01-22 13:23:46.140680: step: 800/464, loss: 0.004960604943335056 2023-01-22 13:23:46.788256: step: 802/464, loss: 0.3363596498966217 2023-01-22 13:23:47.407026: step: 804/464, loss: 0.018801333382725716 2023-01-22 13:23:48.098337: step: 806/464, loss: 0.02355259843170643 2023-01-22 13:23:48.623783: step: 808/464, loss: 0.017150631174445152 2023-01-22 13:23:49.245751: step: 810/464, loss: 0.003461694810539484 2023-01-22 13:23:49.911658: step: 812/464, loss: 0.7432679533958435 2023-01-22 13:23:50.558719: step: 814/464, loss: 0.016445057466626167 2023-01-22 13:23:51.178075: step: 816/464, loss: 0.2394285649061203 2023-01-22 13:23:51.790930: step: 818/464, loss: 0.17523111402988434 2023-01-22 13:23:52.361034: step: 820/464, loss: 0.0672837644815445 2023-01-22 13:23:52.981475: step: 822/464, loss: 0.002954719355329871 2023-01-22 13:23:53.557855: step: 824/464, loss: 0.06920049339532852 2023-01-22 13:23:54.163994: step: 826/464, loss: 0.07915281504392624 2023-01-22 13:23:54.825890: step: 828/464, loss: 0.010470702312886715 2023-01-22 13:23:55.416965: step: 830/464, loss: 0.009583774022758007 2023-01-22 13:23:56.034194: step: 832/464, loss: 0.009306436404585838 2023-01-22 13:23:56.589426: step: 834/464, loss: 0.006306438706815243 2023-01-22 13:23:57.184304: step: 836/464, loss: 0.0045156157575547695 2023-01-22 13:23:57.761236: step: 838/464, loss: 0.0021650197450071573 2023-01-22 13:23:58.378920: step: 840/464, loss: 0.05883180722594261 2023-01-22 13:23:59.045054: step: 842/464, loss: 0.08518681675195694 2023-01-22 13:23:59.661537: step: 844/464, loss: 0.2572651207447052 2023-01-22 13:24:00.268400: step: 846/464, loss: 0.008243846707046032 2023-01-22 13:24:00.854304: step: 848/464, loss: 0.004368755035102367 2023-01-22 13:24:01.527652: step: 850/464, loss: 0.09649864584207535 2023-01-22 13:24:02.172089: step: 852/464, loss: 0.402221143245697 2023-01-22 13:24:02.789569: step: 854/464, loss: 0.04158594086766243 2023-01-22 13:24:03.406675: step: 856/464, loss: 0.01596343144774437 2023-01-22 13:24:04.059469: step: 858/464, loss: 0.0030320510268211365 2023-01-22 13:24:04.683953: step: 860/464, loss: 0.009376936592161655 2023-01-22 13:24:05.312950: step: 862/464, loss: 0.030942099168896675 2023-01-22 13:24:05.931820: step: 864/464, loss: 0.06399189680814743 2023-01-22 13:24:06.616716: step: 866/464, loss: 0.012965050525963306 2023-01-22 13:24:07.207779: step: 868/464, loss: 0.006405264604836702 2023-01-22 13:24:07.827356: step: 870/464, loss: 0.010999604128301144 2023-01-22 13:24:08.425537: step: 872/464, loss: 0.016261886805295944 2023-01-22 13:24:09.060971: step: 874/464, loss: 0.024095727130770683 2023-01-22 13:24:09.763530: step: 876/464, loss: 0.2649255394935608 2023-01-22 13:24:10.384047: step: 878/464, loss: 0.00711466558277607 2023-01-22 13:24:10.975907: step: 880/464, loss: 0.1561778485774994 2023-01-22 13:24:11.563921: step: 882/464, loss: 0.006854875944554806 2023-01-22 13:24:12.242682: step: 884/464, loss: 0.005609060171991587 2023-01-22 13:24:12.900303: step: 886/464, loss: 0.026153935119509697 2023-01-22 13:24:13.567876: step: 888/464, loss: 0.0005563063896261156 2023-01-22 13:24:14.211302: step: 890/464, loss: 0.02071342244744301 2023-01-22 13:24:14.828453: step: 892/464, loss: 0.007495345547795296 2023-01-22 13:24:15.514452: step: 894/464, loss: 0.022217558696866035 2023-01-22 13:24:16.211845: step: 896/464, loss: 0.016546782106161118 2023-01-22 13:24:16.897094: step: 898/464, loss: 0.008249156177043915 2023-01-22 13:24:17.509703: step: 900/464, loss: 0.02281145751476288 2023-01-22 13:24:18.142004: step: 902/464, loss: 0.053221337497234344 2023-01-22 13:24:18.750592: step: 904/464, loss: 0.00181183114182204 2023-01-22 13:24:19.364987: step: 906/464, loss: 0.06797477602958679 2023-01-22 13:24:19.971130: step: 908/464, loss: 0.01370300818234682 2023-01-22 13:24:20.594471: step: 910/464, loss: 0.06573604792356491 2023-01-22 13:24:21.158071: step: 912/464, loss: 0.006355880293995142 2023-01-22 13:24:21.773722: step: 914/464, loss: 0.04955657199025154 2023-01-22 13:24:22.416672: step: 916/464, loss: 0.03783131390810013 2023-01-22 13:24:23.055633: step: 918/464, loss: 0.05432085320353508 2023-01-22 13:24:23.681764: step: 920/464, loss: 0.18720349669456482 2023-01-22 13:24:24.371763: step: 922/464, loss: 0.0176202692091465 2023-01-22 13:24:24.935424: step: 924/464, loss: 0.008661163039505482 2023-01-22 13:24:25.481686: step: 926/464, loss: 0.02440672367811203 2023-01-22 13:24:26.061928: step: 928/464, loss: 0.007154208607971668 2023-01-22 13:24:26.621498: step: 930/464, loss: 0.006525322794914246 ================================================== Loss: 0.071 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3145722123368921, 'r': 0.3354641049968375, 'f1': 0.3246824303642486}, 'combined': 0.2392396855315516, 'epoch': 29} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2992663378626119, 'r': 0.30641135603013064, 'f1': 0.3027967030098115}, 'combined': 0.19768074911521372, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30943149717514123, 'r': 0.34642235926628717, 'f1': 0.32688376603998803}, 'combined': 0.24086172234525433, 'epoch': 29} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32009604558344085, 'r': 0.31185827970445523, 'f1': 0.31592347143244626}, 'combined': 0.20625055647921361, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32803899082568805, 'r': 0.3392433586337761, 'f1': 0.33354710820895517}, 'combined': 0.24577155341712484, 'epoch': 29} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3129195726792338, 'r': 0.2979775912473512, 'f1': 0.30526584841802956}, 'combined': 0.1992927300552939, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2312925170068027, 'r': 0.32380952380952377, 'f1': 0.26984126984126977}, 'combined': 0.17989417989417983, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2642857142857143, 'r': 0.40217391304347827, 'f1': 0.31896551724137934}, 'combined': 0.15948275862068967, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45588235294117646, 'r': 0.2672413793103448, 'f1': 0.33695652173913043}, 'combined': 0.2246376811594203, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951249161636486, 'r': 0.3578459609650312, 'f1': 0.3234731070815977}, 'combined': 0.23834860521801934, 'epoch': 22} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29521273154881944, 'r': 0.3185261336729686, 'f1': 0.30642664272956077}, 'combined': 0.20005055432085314, 'epoch': 22} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 22} ****************************** Epoch: 30 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:27:04.545448: step: 2/464, loss: 0.015284399501979351 2023-01-22 13:27:05.223559: step: 4/464, loss: 0.0022311429493129253 2023-01-22 13:27:05.852490: step: 6/464, loss: 0.03886798024177551 2023-01-22 13:27:06.443398: step: 8/464, loss: 0.002904294990003109 2023-01-22 13:27:07.048801: step: 10/464, loss: 0.00875371228903532 2023-01-22 13:27:07.670914: step: 12/464, loss: 0.011434866115450859 2023-01-22 13:27:08.261409: step: 14/464, loss: 0.03433942422270775 2023-01-22 13:27:08.849811: step: 16/464, loss: 0.00443413807079196 2023-01-22 13:27:09.434904: step: 18/464, loss: 0.0009977881563827395 2023-01-22 13:27:10.178760: step: 20/464, loss: 0.014764295890927315 2023-01-22 13:27:10.771321: step: 22/464, loss: 0.004287984687834978 2023-01-22 13:27:11.387379: step: 24/464, loss: 0.049406565725803375 2023-01-22 13:27:11.966357: step: 26/464, loss: 0.040634434670209885 2023-01-22 13:27:12.593825: step: 28/464, loss: 0.003474750090390444 2023-01-22 13:27:13.211547: step: 30/464, loss: 0.014196655713021755 2023-01-22 13:27:13.801042: step: 32/464, loss: 0.0022962952498346567 2023-01-22 13:27:14.406896: step: 34/464, loss: 0.01741405576467514 2023-01-22 13:27:15.096292: step: 36/464, loss: 0.0005106744938530028 2023-01-22 13:27:15.724748: step: 38/464, loss: 0.020914586260914803 2023-01-22 13:27:16.330378: step: 40/464, loss: 0.004347099456936121 2023-01-22 13:27:16.905471: step: 42/464, loss: 0.004731880035251379 2023-01-22 13:27:17.481825: step: 44/464, loss: 0.00010120868682861328 2023-01-22 13:27:18.138248: step: 46/464, loss: 0.021717039868235588 2023-01-22 13:27:18.734277: step: 48/464, loss: 0.010191944427788258 2023-01-22 13:27:19.372804: step: 50/464, loss: 0.02108851820230484 2023-01-22 13:27:19.975937: step: 52/464, loss: 0.008559471927583218 2023-01-22 13:27:20.559970: step: 54/464, loss: 0.03864532709121704 2023-01-22 13:27:21.126030: step: 56/464, loss: 0.005444041453301907 2023-01-22 13:27:21.776916: step: 58/464, loss: 0.007448922842741013 2023-01-22 13:27:22.331692: step: 60/464, loss: 0.002818542066961527 2023-01-22 13:27:22.922429: step: 62/464, loss: 0.00016934421728365123 2023-01-22 13:27:23.554915: step: 64/464, loss: 0.0505225844681263 2023-01-22 13:27:24.147669: step: 66/464, loss: 0.009668254293501377 2023-01-22 13:27:24.798995: step: 68/464, loss: 0.000820213055703789 2023-01-22 13:27:25.452778: step: 70/464, loss: 0.020397549495100975 2023-01-22 13:27:26.131415: step: 72/464, loss: 0.0032460566144436598 2023-01-22 13:27:26.754605: step: 74/464, loss: 5.213002077653073e-05 2023-01-22 13:27:27.437566: step: 76/464, loss: 0.09260469675064087 2023-01-22 13:27:28.031136: step: 78/464, loss: 0.021970590576529503 2023-01-22 13:27:28.603191: step: 80/464, loss: 0.008407541550695896 2023-01-22 13:27:29.165406: step: 82/464, loss: 0.04296931251883507 2023-01-22 13:27:29.788989: step: 84/464, loss: 0.007767929695546627 2023-01-22 13:27:30.374383: step: 86/464, loss: 0.0032091503962874413 2023-01-22 13:27:30.977239: step: 88/464, loss: 0.006214894820004702 2023-01-22 13:27:31.553584: step: 90/464, loss: 0.03251410275697708 2023-01-22 13:27:32.125347: step: 92/464, loss: 0.013027353212237358 2023-01-22 13:27:32.705137: step: 94/464, loss: 0.008885751478374004 2023-01-22 13:27:33.330587: step: 96/464, loss: 0.029318206012248993 2023-01-22 13:27:33.921818: step: 98/464, loss: 0.0012817407259717584 2023-01-22 13:27:34.552221: step: 100/464, loss: 0.4874168932437897 2023-01-22 13:27:35.280422: step: 102/464, loss: 0.3966943919658661 2023-01-22 13:27:35.855895: step: 104/464, loss: 0.42609456181526184 2023-01-22 13:27:36.446105: step: 106/464, loss: 0.02490398660302162 2023-01-22 13:27:37.088603: step: 108/464, loss: 0.2202860713005066 2023-01-22 13:27:37.691511: step: 110/464, loss: 0.004081774968653917 2023-01-22 13:27:38.320123: step: 112/464, loss: 0.0020068904850631952 2023-01-22 13:27:38.952126: step: 114/464, loss: 0.008217177353799343 2023-01-22 13:27:39.554134: step: 116/464, loss: 0.0032490852754563093 2023-01-22 13:27:40.244585: step: 118/464, loss: 0.045100998133420944 2023-01-22 13:27:40.823573: step: 120/464, loss: 0.027695056051015854 2023-01-22 13:27:41.420624: step: 122/464, loss: 0.0003648280689958483 2023-01-22 13:27:42.151602: step: 124/464, loss: 0.10975858569145203 2023-01-22 13:27:42.735641: step: 126/464, loss: 0.00019776269618887454 2023-01-22 13:27:43.328407: step: 128/464, loss: 0.007610243279486895 2023-01-22 13:27:43.937160: step: 130/464, loss: 0.037492718547582626 2023-01-22 13:27:44.521934: step: 132/464, loss: 0.026891810819506645 2023-01-22 13:27:45.113521: step: 134/464, loss: 0.07546871155500412 2023-01-22 13:27:45.746744: step: 136/464, loss: 0.0031861874740570784 2023-01-22 13:27:46.373463: step: 138/464, loss: 0.0006391544593498111 2023-01-22 13:27:47.020910: step: 140/464, loss: 0.012248599901795387 2023-01-22 13:27:47.634752: step: 142/464, loss: 0.007616680581122637 2023-01-22 13:27:48.209681: step: 144/464, loss: 0.01309220027178526 2023-01-22 13:27:48.880442: step: 146/464, loss: 0.018922271206974983 2023-01-22 13:27:49.470053: step: 148/464, loss: 0.010868867859244347 2023-01-22 13:27:50.039390: step: 150/464, loss: 0.014443072490394115 2023-01-22 13:27:50.662276: step: 152/464, loss: 0.04655880481004715 2023-01-22 13:27:51.259460: step: 154/464, loss: 0.0075754448771476746 2023-01-22 13:27:51.992346: step: 156/464, loss: 0.012672694399952888 2023-01-22 13:27:52.638376: step: 158/464, loss: 0.008323279209434986 2023-01-22 13:27:53.259282: step: 160/464, loss: 0.01113010011613369 2023-01-22 13:27:53.851333: step: 162/464, loss: 0.004104962106794119 2023-01-22 13:27:54.451836: step: 164/464, loss: 0.004299768712371588 2023-01-22 13:27:55.058983: step: 166/464, loss: 0.0036474387161433697 2023-01-22 13:27:55.742605: step: 168/464, loss: 0.024346238002181053 2023-01-22 13:27:56.404339: step: 170/464, loss: 0.009751499630510807 2023-01-22 13:27:57.124978: step: 172/464, loss: 0.017550071701407433 2023-01-22 13:27:57.713041: step: 174/464, loss: 0.01081873383373022 2023-01-22 13:27:58.318052: step: 176/464, loss: 0.004264844581484795 2023-01-22 13:27:58.948711: step: 178/464, loss: 0.028513511642813683 2023-01-22 13:27:59.609790: step: 180/464, loss: 0.029072461649775505 2023-01-22 13:28:00.242858: step: 182/464, loss: 0.0013808414805680513 2023-01-22 13:28:00.845721: step: 184/464, loss: 0.0019550700671970844 2023-01-22 13:28:01.450646: step: 186/464, loss: 0.04768887162208557 2023-01-22 13:28:02.084767: step: 188/464, loss: 0.01719737984240055 2023-01-22 13:28:02.709630: step: 190/464, loss: 0.018912989646196365 2023-01-22 13:28:03.330360: step: 192/464, loss: 0.006397397257387638 2023-01-22 13:28:03.938402: step: 194/464, loss: 0.0028882757760584354 2023-01-22 13:28:04.629301: step: 196/464, loss: 0.0076980902813375 2023-01-22 13:28:05.210520: step: 198/464, loss: 0.0008096446981653571 2023-01-22 13:28:05.804654: step: 200/464, loss: 0.043125562369823456 2023-01-22 13:28:06.410638: step: 202/464, loss: 0.010093793272972107 2023-01-22 13:28:06.969876: step: 204/464, loss: 0.0005989423370920122 2023-01-22 13:28:07.600515: step: 206/464, loss: 0.0317736454308033 2023-01-22 13:28:08.196083: step: 208/464, loss: 0.04524993523955345 2023-01-22 13:28:08.815988: step: 210/464, loss: 0.0008322806097567081 2023-01-22 13:28:09.452973: step: 212/464, loss: 0.00863353069871664 2023-01-22 13:28:10.135883: step: 214/464, loss: 0.012559205293655396 2023-01-22 13:28:10.744295: step: 216/464, loss: 0.006640761159360409 2023-01-22 13:28:11.328401: step: 218/464, loss: 0.014305575750768185 2023-01-22 13:28:11.980821: step: 220/464, loss: 0.005848568864166737 2023-01-22 13:28:12.591534: step: 222/464, loss: 0.020689699798822403 2023-01-22 13:28:13.197529: step: 224/464, loss: 0.017257021740078926 2023-01-22 13:28:13.879825: step: 226/464, loss: 0.010369797237217426 2023-01-22 13:28:14.520413: step: 228/464, loss: 0.41616204380989075 2023-01-22 13:28:15.140328: step: 230/464, loss: 0.05756490305066109 2023-01-22 13:28:15.794210: step: 232/464, loss: 0.003744078567251563 2023-01-22 13:28:16.373386: step: 234/464, loss: 0.16744425892829895 2023-01-22 13:28:16.984658: step: 236/464, loss: 0.00409188587218523 2023-01-22 13:28:17.588204: step: 238/464, loss: 0.05485359579324722 2023-01-22 13:28:18.226880: step: 240/464, loss: 0.03245936334133148 2023-01-22 13:28:18.790173: step: 242/464, loss: 0.0014707011869177222 2023-01-22 13:28:19.404190: step: 244/464, loss: 0.028101148083806038 2023-01-22 13:28:20.033832: step: 246/464, loss: 0.025969622656702995 2023-01-22 13:28:20.627883: step: 248/464, loss: 0.03521401435136795 2023-01-22 13:28:21.224302: step: 250/464, loss: 0.001902177231386304 2023-01-22 13:28:21.867842: step: 252/464, loss: 0.0075007411651313305 2023-01-22 13:28:22.517442: step: 254/464, loss: 0.025483937934041023 2023-01-22 13:28:23.133914: step: 256/464, loss: 0.013025188818573952 2023-01-22 13:28:23.716679: step: 258/464, loss: 0.0023038771469146013 2023-01-22 13:28:24.305893: step: 260/464, loss: 0.011823596432805061 2023-01-22 13:28:24.909075: step: 262/464, loss: 0.0017983571160584688 2023-01-22 13:28:25.600414: step: 264/464, loss: 0.009744478389620781 2023-01-22 13:28:26.147522: step: 266/464, loss: 0.0004119996738154441 2023-01-22 13:28:26.708607: step: 268/464, loss: 0.004429759457707405 2023-01-22 13:28:27.308338: step: 270/464, loss: 0.001029555220156908 2023-01-22 13:28:27.934813: step: 272/464, loss: 0.009982489980757236 2023-01-22 13:28:28.611807: step: 274/464, loss: 0.062253210693597794 2023-01-22 13:28:29.214229: step: 276/464, loss: 0.008633971214294434 2023-01-22 13:28:29.842127: step: 278/464, loss: 0.0017187268240377307 2023-01-22 13:28:30.535338: step: 280/464, loss: 0.02328815683722496 2023-01-22 13:28:31.154159: step: 282/464, loss: 0.00636210897937417 2023-01-22 13:28:31.856790: step: 284/464, loss: 0.007372157648205757 2023-01-22 13:28:32.450417: step: 286/464, loss: 0.015651492401957512 2023-01-22 13:28:33.057330: step: 288/464, loss: 0.030818969011306763 2023-01-22 13:28:33.600586: step: 290/464, loss: 0.0004586923460010439 2023-01-22 13:28:34.225120: step: 292/464, loss: 0.023854084312915802 2023-01-22 13:28:34.746505: step: 294/464, loss: 0.015337026678025723 2023-01-22 13:28:35.381091: step: 296/464, loss: 0.029326308518648148 2023-01-22 13:28:35.993724: step: 298/464, loss: 0.03470027819275856 2023-01-22 13:28:36.604413: step: 300/464, loss: 0.048934027552604675 2023-01-22 13:28:37.283756: step: 302/464, loss: 0.0003046983329113573 2023-01-22 13:28:37.877685: step: 304/464, loss: 0.038852933794260025 2023-01-22 13:28:38.491053: step: 306/464, loss: 0.04453202709555626 2023-01-22 13:28:39.080268: step: 308/464, loss: 0.002494563115760684 2023-01-22 13:28:39.759987: step: 310/464, loss: 0.006276068277657032 2023-01-22 13:28:40.409286: step: 312/464, loss: 0.03738127648830414 2023-01-22 13:28:40.996561: step: 314/464, loss: 0.0026037623174488544 2023-01-22 13:28:41.624805: step: 316/464, loss: 0.00884893536567688 2023-01-22 13:28:42.290939: step: 318/464, loss: 0.010780254378914833 2023-01-22 13:28:42.993502: step: 320/464, loss: 0.00928980391472578 2023-01-22 13:28:43.625485: step: 322/464, loss: 0.017196692526340485 2023-01-22 13:28:44.258114: step: 324/464, loss: 0.00023449427681043744 2023-01-22 13:28:44.912577: step: 326/464, loss: 0.01587003655731678 2023-01-22 13:28:45.523526: step: 328/464, loss: 0.027382345870137215 2023-01-22 13:28:46.183058: step: 330/464, loss: 0.32194334268569946 2023-01-22 13:28:46.795755: step: 332/464, loss: 0.011561702936887741 2023-01-22 13:28:47.454211: step: 334/464, loss: 0.22262977063655853 2023-01-22 13:28:48.102947: step: 336/464, loss: 0.02064697816967964 2023-01-22 13:28:48.694391: step: 338/464, loss: 0.021129708737134933 2023-01-22 13:28:49.294065: step: 340/464, loss: 0.012493270449340343 2023-01-22 13:28:49.884843: step: 342/464, loss: 0.038638342171907425 2023-01-22 13:28:50.507900: step: 344/464, loss: 0.15898793935775757 2023-01-22 13:28:51.166891: step: 346/464, loss: 0.030613379552960396 2023-01-22 13:28:51.768799: step: 348/464, loss: 0.042543865740299225 2023-01-22 13:28:52.399509: step: 350/464, loss: 0.006682658568024635 2023-01-22 13:28:53.051485: step: 352/464, loss: 0.03325079381465912 2023-01-22 13:28:53.787956: step: 354/464, loss: 0.047726865857839584 2023-01-22 13:28:54.336048: step: 356/464, loss: 0.012037553824484348 2023-01-22 13:28:54.969993: step: 358/464, loss: 0.03466491773724556 2023-01-22 13:28:55.610599: step: 360/464, loss: 0.016195526346564293 2023-01-22 13:28:56.282423: step: 362/464, loss: 0.03511173278093338 2023-01-22 13:28:56.877778: step: 364/464, loss: 0.0003981849877163768 2023-01-22 13:28:57.463206: step: 366/464, loss: 0.07734028995037079 2023-01-22 13:28:58.124785: step: 368/464, loss: 0.07165095210075378 2023-01-22 13:28:58.747085: step: 370/464, loss: 0.020368773490190506 2023-01-22 13:28:59.362681: step: 372/464, loss: 0.04683218523859978 2023-01-22 13:28:59.969401: step: 374/464, loss: 0.00047499319771304727 2023-01-22 13:29:00.545801: step: 376/464, loss: 0.0017459866357967257 2023-01-22 13:29:01.177710: step: 378/464, loss: 0.0060515995137393475 2023-01-22 13:29:01.825675: step: 380/464, loss: 0.0008434664341621101 2023-01-22 13:29:02.428165: step: 382/464, loss: 0.01529900822788477 2023-01-22 13:29:03.066566: step: 384/464, loss: 0.01310059241950512 2023-01-22 13:29:03.717827: step: 386/464, loss: 0.04771070182323456 2023-01-22 13:29:04.294141: step: 388/464, loss: 0.0028760803397744894 2023-01-22 13:29:04.917216: step: 390/464, loss: 0.013585273176431656 2023-01-22 13:29:05.482458: step: 392/464, loss: 0.0009268809226341546 2023-01-22 13:29:06.113676: step: 394/464, loss: 0.028596773743629456 2023-01-22 13:29:06.702786: step: 396/464, loss: 0.02000209130346775 2023-01-22 13:29:07.318961: step: 398/464, loss: 0.008988582529127598 2023-01-22 13:29:08.020884: step: 400/464, loss: 0.015802456066012383 2023-01-22 13:29:08.625839: step: 402/464, loss: 0.04091927409172058 2023-01-22 13:29:09.218432: step: 404/464, loss: 0.005606489256024361 2023-01-22 13:29:09.813715: step: 406/464, loss: 0.00022503657964989543 2023-01-22 13:29:10.478578: step: 408/464, loss: 0.01612733118236065 2023-01-22 13:29:11.029238: step: 410/464, loss: 0.03441927209496498 2023-01-22 13:29:11.688970: step: 412/464, loss: 0.007009692490100861 2023-01-22 13:29:12.326735: step: 414/464, loss: 0.006589479278773069 2023-01-22 13:29:13.005924: step: 416/464, loss: 0.028384795412421227 2023-01-22 13:29:13.718167: step: 418/464, loss: 0.0031787222251296043 2023-01-22 13:29:14.227951: step: 420/464, loss: 0.05162772163748741 2023-01-22 13:29:14.886463: step: 422/464, loss: 0.003943289164453745 2023-01-22 13:29:15.638838: step: 424/464, loss: 0.0566033273935318 2023-01-22 13:29:16.278158: step: 426/464, loss: 0.0016180764650925994 2023-01-22 13:29:16.896180: step: 428/464, loss: 0.023380430415272713 2023-01-22 13:29:17.538551: step: 430/464, loss: 0.0003730784519575536 2023-01-22 13:29:18.124039: step: 432/464, loss: 0.014182335697114468 2023-01-22 13:29:18.703254: step: 434/464, loss: 0.015464498661458492 2023-01-22 13:29:19.272543: step: 436/464, loss: 0.025532225146889687 2023-01-22 13:29:19.916414: step: 438/464, loss: 0.02354058437049389 2023-01-22 13:29:20.554836: step: 440/464, loss: 0.00013290946662891656 2023-01-22 13:29:21.167822: step: 442/464, loss: 0.0007013229769654572 2023-01-22 13:29:21.775133: step: 444/464, loss: 2.71716507995734e-05 2023-01-22 13:29:22.409245: step: 446/464, loss: 0.06851676851511002 2023-01-22 13:29:23.057756: step: 448/464, loss: 0.00036148345679976046 2023-01-22 13:29:23.681067: step: 450/464, loss: 0.004605370108038187 2023-01-22 13:29:24.373769: step: 452/464, loss: 0.013162552379071712 2023-01-22 13:29:24.981083: step: 454/464, loss: 0.007672559469938278 2023-01-22 13:29:25.629497: step: 456/464, loss: 0.08680587261915207 2023-01-22 13:29:26.286158: step: 458/464, loss: 0.007985075935721397 2023-01-22 13:29:26.886930: step: 460/464, loss: 0.0062067508697509766 2023-01-22 13:29:27.434939: step: 462/464, loss: 0.04797374829649925 2023-01-22 13:29:28.047871: step: 464/464, loss: 0.018111487850546837 2023-01-22 13:29:28.741425: step: 466/464, loss: 0.006130396854132414 2023-01-22 13:29:29.407298: step: 468/464, loss: 0.0072482856921851635 2023-01-22 13:29:30.106070: step: 470/464, loss: 0.00020309189858380705 2023-01-22 13:29:30.780428: step: 472/464, loss: 0.023160506039857864 2023-01-22 13:29:31.396413: step: 474/464, loss: 0.06689438223838806 2023-01-22 13:29:32.059492: step: 476/464, loss: 0.03167044743895531 2023-01-22 13:29:32.699898: step: 478/464, loss: 0.009050305932760239 2023-01-22 13:29:33.341464: step: 480/464, loss: 0.04196888580918312 2023-01-22 13:29:33.971657: step: 482/464, loss: 0.13746626675128937 2023-01-22 13:29:34.606252: step: 484/464, loss: 0.047761742025613785 2023-01-22 13:29:35.221249: step: 486/464, loss: 0.008654715493321419 2023-01-22 13:29:35.838575: step: 488/464, loss: 0.005094234831631184 2023-01-22 13:29:36.421042: step: 490/464, loss: 0.06825714558362961 2023-01-22 13:29:37.052200: step: 492/464, loss: 0.01563999429345131 2023-01-22 13:29:37.608940: step: 494/464, loss: 0.04438317194581032 2023-01-22 13:29:38.276245: step: 496/464, loss: 0.031057126820087433 2023-01-22 13:29:38.863260: step: 498/464, loss: 0.0031860892195254564 2023-01-22 13:29:39.450088: step: 500/464, loss: 0.03213706985116005 2023-01-22 13:29:40.033340: step: 502/464, loss: 0.0020905076526105404 2023-01-22 13:29:40.626696: step: 504/464, loss: 0.0006013476522639394 2023-01-22 13:29:41.231543: step: 506/464, loss: 0.0025591696612536907 2023-01-22 13:29:41.817008: step: 508/464, loss: 0.03588062524795532 2023-01-22 13:29:42.474047: step: 510/464, loss: 0.3627071678638458 2023-01-22 13:29:43.052607: step: 512/464, loss: 0.0039060432463884354 2023-01-22 13:29:43.684473: step: 514/464, loss: 0.02504384145140648 2023-01-22 13:29:44.343984: step: 516/464, loss: 0.014837171882390976 2023-01-22 13:29:44.986346: step: 518/464, loss: 0.016154740005731583 2023-01-22 13:29:45.569224: step: 520/464, loss: 0.5221549868583679 2023-01-22 13:29:46.210117: step: 522/464, loss: 0.03457175940275192 2023-01-22 13:29:46.831274: step: 524/464, loss: 0.006636395119130611 2023-01-22 13:29:47.403501: step: 526/464, loss: 0.003287211060523987 2023-01-22 13:29:48.083040: step: 528/464, loss: 0.01604713499546051 2023-01-22 13:29:48.702436: step: 530/464, loss: 0.04508698731660843 2023-01-22 13:29:49.278083: step: 532/464, loss: 0.036552201956510544 2023-01-22 13:29:49.911446: step: 534/464, loss: 0.03101583756506443 2023-01-22 13:29:50.555234: step: 536/464, loss: 0.035786811262369156 2023-01-22 13:29:51.149335: step: 538/464, loss: 0.0007765466580167413 2023-01-22 13:29:51.771649: step: 540/464, loss: 0.010097292251884937 2023-01-22 13:29:52.412740: step: 542/464, loss: 0.004103609826415777 2023-01-22 13:29:53.037843: step: 544/464, loss: 0.02460542879998684 2023-01-22 13:29:53.732491: step: 546/464, loss: 0.029425041750073433 2023-01-22 13:29:54.321050: step: 548/464, loss: 0.002205929020419717 2023-01-22 13:29:54.947900: step: 550/464, loss: 0.004628791008144617 2023-01-22 13:29:55.595350: step: 552/464, loss: 0.005380065180361271 2023-01-22 13:29:56.271329: step: 554/464, loss: 0.041205957531929016 2023-01-22 13:29:56.894607: step: 556/464, loss: 0.005021595396101475 2023-01-22 13:29:57.575331: step: 558/464, loss: 0.018770398572087288 2023-01-22 13:29:58.274544: step: 560/464, loss: 0.040609098970890045 2023-01-22 13:29:58.888106: step: 562/464, loss: 0.029900405555963516 2023-01-22 13:29:59.470373: step: 564/464, loss: 0.023872636258602142 2023-01-22 13:30:00.061975: step: 566/464, loss: 0.04181007668375969 2023-01-22 13:30:00.668181: step: 568/464, loss: 0.038093894720077515 2023-01-22 13:30:01.301222: step: 570/464, loss: 0.03319160267710686 2023-01-22 13:30:01.918336: step: 572/464, loss: 0.0010064254747703671 2023-01-22 13:30:02.543785: step: 574/464, loss: 0.012854035943746567 2023-01-22 13:30:03.126781: step: 576/464, loss: 0.034058213233947754 2023-01-22 13:30:03.698152: step: 578/464, loss: 0.018953103572130203 2023-01-22 13:30:04.255456: step: 580/464, loss: 0.00026215752586722374 2023-01-22 13:30:04.830620: step: 582/464, loss: 0.005657382775098085 2023-01-22 13:30:05.463905: step: 584/464, loss: 0.03003401681780815 2023-01-22 13:30:06.143090: step: 586/464, loss: 0.005930361337959766 2023-01-22 13:30:06.733421: step: 588/464, loss: 0.037126656621694565 2023-01-22 13:30:07.320296: step: 590/464, loss: 0.01343232486397028 2023-01-22 13:30:07.932379: step: 592/464, loss: 0.002364499494433403 2023-01-22 13:30:08.566285: step: 594/464, loss: 0.2112520933151245 2023-01-22 13:30:09.154689: step: 596/464, loss: 0.016033155843615532 2023-01-22 13:30:09.754288: step: 598/464, loss: 0.029524585232138634 2023-01-22 13:30:10.349689: step: 600/464, loss: 0.008080963045358658 2023-01-22 13:30:10.998256: step: 602/464, loss: 0.030004560947418213 2023-01-22 13:30:11.587316: step: 604/464, loss: 0.029935169965028763 2023-01-22 13:30:12.322147: step: 606/464, loss: 0.0037073304411023855 2023-01-22 13:30:12.933968: step: 608/464, loss: 0.031628526747226715 2023-01-22 13:30:13.573039: step: 610/464, loss: 0.4366910755634308 2023-01-22 13:30:14.223436: step: 612/464, loss: 0.0004109439323656261 2023-01-22 13:30:14.854167: step: 614/464, loss: 0.031441304832696915 2023-01-22 13:30:15.497627: step: 616/464, loss: 0.007856231182813644 2023-01-22 13:30:16.234883: step: 618/464, loss: 0.032301947474479675 2023-01-22 13:30:16.822945: step: 620/464, loss: 0.0006108984816819429 2023-01-22 13:30:17.377370: step: 622/464, loss: 0.028849845752120018 2023-01-22 13:30:18.006176: step: 624/464, loss: 0.036408454179763794 2023-01-22 13:30:18.595895: step: 626/464, loss: 0.003924418706446886 2023-01-22 13:30:19.246260: step: 628/464, loss: 0.0049882857128977776 2023-01-22 13:30:19.811757: step: 630/464, loss: 0.01432995218783617 2023-01-22 13:30:20.417718: step: 632/464, loss: 0.01632368192076683 2023-01-22 13:30:21.024515: step: 634/464, loss: 0.06306029856204987 2023-01-22 13:30:21.696573: step: 636/464, loss: 0.02596416138112545 2023-01-22 13:30:22.328348: step: 638/464, loss: 0.0005985710886307061 2023-01-22 13:30:22.963204: step: 640/464, loss: 0.03676480054855347 2023-01-22 13:30:23.631571: step: 642/464, loss: 0.008700869046151638 2023-01-22 13:30:24.189400: step: 644/464, loss: 0.026712315157055855 2023-01-22 13:30:24.751593: step: 646/464, loss: 0.00017385899263899773 2023-01-22 13:30:25.400084: step: 648/464, loss: 0.0314791165292263 2023-01-22 13:30:25.984430: step: 650/464, loss: 0.002847842639312148 2023-01-22 13:30:26.621186: step: 652/464, loss: 0.003370642429217696 2023-01-22 13:30:27.254220: step: 654/464, loss: 0.022391922771930695 2023-01-22 13:30:27.927344: step: 656/464, loss: 0.001920976908877492 2023-01-22 13:30:28.522243: step: 658/464, loss: 4.001665365649387e-05 2023-01-22 13:30:29.083954: step: 660/464, loss: 0.09575840830802917 2023-01-22 13:30:29.669833: step: 662/464, loss: 0.0033561275340616703 2023-01-22 13:30:30.199632: step: 664/464, loss: 0.007227852009236813 2023-01-22 13:30:30.834407: step: 666/464, loss: 0.016562720760703087 2023-01-22 13:30:31.428861: step: 668/464, loss: 0.04740128293633461 2023-01-22 13:30:32.113346: step: 670/464, loss: 0.01998371258378029 2023-01-22 13:30:32.701177: step: 672/464, loss: 0.010545728728175163 2023-01-22 13:30:33.325597: step: 674/464, loss: 0.029419176280498505 2023-01-22 13:30:33.997017: step: 676/464, loss: 0.00707714119926095 2023-01-22 13:30:34.588732: step: 678/464, loss: 0.0011200368171557784 2023-01-22 13:30:35.226508: step: 680/464, loss: 0.4056033790111542 2023-01-22 13:30:35.867394: step: 682/464, loss: 0.2395782321691513 2023-01-22 13:30:36.527985: step: 684/464, loss: 0.02981843799352646 2023-01-22 13:30:37.175655: step: 686/464, loss: 0.0024933991953730583 2023-01-22 13:30:37.754234: step: 688/464, loss: 0.0015063255559653044 2023-01-22 13:30:38.396392: step: 690/464, loss: 0.1298820823431015 2023-01-22 13:30:39.008285: step: 692/464, loss: 0.011537283658981323 2023-01-22 13:30:39.588725: step: 694/464, loss: 0.025633297860622406 2023-01-22 13:30:40.222058: step: 696/464, loss: 0.18669280409812927 2023-01-22 13:30:40.888302: step: 698/464, loss: 0.0602891631424427 2023-01-22 13:30:41.453991: step: 700/464, loss: 0.018188297748565674 2023-01-22 13:30:42.086665: step: 702/464, loss: 0.07520942389965057 2023-01-22 13:30:42.676158: step: 704/464, loss: 0.017319330945611 2023-01-22 13:30:43.281483: step: 706/464, loss: 0.034891169518232346 2023-01-22 13:30:43.872001: step: 708/464, loss: 0.0018907490884885192 2023-01-22 13:30:44.538797: step: 710/464, loss: 0.005784806329756975 2023-01-22 13:30:45.089511: step: 712/464, loss: 0.0198514387011528 2023-01-22 13:30:45.674704: step: 714/464, loss: 0.018424250185489655 2023-01-22 13:30:46.341412: step: 716/464, loss: 0.02576695941388607 2023-01-22 13:30:46.982681: step: 718/464, loss: 0.020191747695207596 2023-01-22 13:30:47.574388: step: 720/464, loss: 0.006152929272502661 2023-01-22 13:30:48.135782: step: 722/464, loss: 0.022174587473273277 2023-01-22 13:30:48.683100: step: 724/464, loss: 0.002321321051567793 2023-01-22 13:30:49.329444: step: 726/464, loss: 0.0443158783018589 2023-01-22 13:30:50.077410: step: 728/464, loss: 0.031350310891866684 2023-01-22 13:30:50.683936: step: 730/464, loss: 0.13132141530513763 2023-01-22 13:30:51.363178: step: 732/464, loss: 0.12540392577648163 2023-01-22 13:30:51.965032: step: 734/464, loss: 3.791404378716834e-05 2023-01-22 13:30:52.568981: step: 736/464, loss: 0.0021358360536396503 2023-01-22 13:30:53.202333: step: 738/464, loss: 0.044767990708351135 2023-01-22 13:30:53.793840: step: 740/464, loss: 0.03425537794828415 2023-01-22 13:30:54.415375: step: 742/464, loss: 0.004247912671416998 2023-01-22 13:30:55.014022: step: 744/464, loss: 0.09336188435554504 2023-01-22 13:30:55.650742: step: 746/464, loss: 0.01353756058961153 2023-01-22 13:30:56.355081: step: 748/464, loss: 0.0011425215052440763 2023-01-22 13:30:57.016218: step: 750/464, loss: 0.03342825174331665 2023-01-22 13:30:57.649890: step: 752/464, loss: 0.03215102478861809 2023-01-22 13:30:58.288548: step: 754/464, loss: 0.0020427191630005836 2023-01-22 13:30:58.933784: step: 756/464, loss: 0.040538668632507324 2023-01-22 13:30:59.607604: step: 758/464, loss: 0.03943773731589317 2023-01-22 13:31:00.261549: step: 760/464, loss: 0.05264481529593468 2023-01-22 13:31:00.866973: step: 762/464, loss: 0.011125179007649422 2023-01-22 13:31:01.567628: step: 764/464, loss: 0.008942645974457264 2023-01-22 13:31:02.194321: step: 766/464, loss: 0.02644633874297142 2023-01-22 13:31:02.871953: step: 768/464, loss: 0.018786994740366936 2023-01-22 13:31:03.482038: step: 770/464, loss: 0.010282132774591446 2023-01-22 13:31:04.118830: step: 772/464, loss: 0.010151483118534088 2023-01-22 13:31:04.752080: step: 774/464, loss: 0.032033149152994156 2023-01-22 13:31:05.343599: step: 776/464, loss: 0.009923784993588924 2023-01-22 13:31:06.026917: step: 778/464, loss: 0.0010054961312562227 2023-01-22 13:31:06.630590: step: 780/464, loss: 0.013663525693118572 2023-01-22 13:31:07.312131: step: 782/464, loss: 0.01674613729119301 2023-01-22 13:31:07.929498: step: 784/464, loss: 0.003713731886819005 2023-01-22 13:31:08.604969: step: 786/464, loss: 0.006112845614552498 2023-01-22 13:31:09.229141: step: 788/464, loss: 0.04470454528927803 2023-01-22 13:31:09.888229: step: 790/464, loss: 0.0272509828209877 2023-01-22 13:31:10.591481: step: 792/464, loss: 0.02032126858830452 2023-01-22 13:31:11.243839: step: 794/464, loss: 0.11515185981988907 2023-01-22 13:31:11.834506: step: 796/464, loss: 0.0023762150667607784 2023-01-22 13:31:12.480935: step: 798/464, loss: 0.02724236063659191 2023-01-22 13:31:13.080863: step: 800/464, loss: 0.008190082386136055 2023-01-22 13:31:13.644327: step: 802/464, loss: 0.000902436557225883 2023-01-22 13:31:14.296378: step: 804/464, loss: 0.2506117522716522 2023-01-22 13:31:14.910918: step: 806/464, loss: 0.019741175696253777 2023-01-22 13:31:15.553635: step: 808/464, loss: 0.0019921723287552595 2023-01-22 13:31:16.192688: step: 810/464, loss: 0.04300304129719734 2023-01-22 13:31:16.806194: step: 812/464, loss: 0.01057429052889347 2023-01-22 13:31:17.401917: step: 814/464, loss: 0.015183776617050171 2023-01-22 13:31:18.027123: step: 816/464, loss: 0.05513448268175125 2023-01-22 13:31:18.626848: step: 818/464, loss: 0.014224525541067123 2023-01-22 13:31:19.242773: step: 820/464, loss: 0.029979640617966652 2023-01-22 13:31:19.844651: step: 822/464, loss: 0.017401661723852158 2023-01-22 13:31:20.456231: step: 824/464, loss: 0.03680138662457466 2023-01-22 13:31:21.029333: step: 826/464, loss: 0.04820271208882332 2023-01-22 13:31:21.662125: step: 828/464, loss: 0.02106683887541294 2023-01-22 13:31:22.259725: step: 830/464, loss: 0.0020527937449514866 2023-01-22 13:31:22.872631: step: 832/464, loss: 0.023780956864356995 2023-01-22 13:31:23.452688: step: 834/464, loss: 0.004647578578442335 2023-01-22 13:31:24.105341: step: 836/464, loss: 0.00931186880916357 2023-01-22 13:31:24.701136: step: 838/464, loss: 0.04925067722797394 2023-01-22 13:31:25.299754: step: 840/464, loss: 0.05159972980618477 2023-01-22 13:31:25.943935: step: 842/464, loss: 0.018002033233642578 2023-01-22 13:31:26.534822: step: 844/464, loss: 0.01309546921402216 2023-01-22 13:31:27.173611: step: 846/464, loss: 0.20000101625919342 2023-01-22 13:31:27.767624: step: 848/464, loss: 0.027313530445098877 2023-01-22 13:31:28.360311: step: 850/464, loss: 0.039358749985694885 2023-01-22 13:31:29.159092: step: 852/464, loss: 0.02418423257768154 2023-01-22 13:31:29.782818: step: 854/464, loss: 0.2089696079492569 2023-01-22 13:31:30.598564: step: 856/464, loss: 0.005666371434926987 2023-01-22 13:31:31.269763: step: 858/464, loss: 0.3731476366519928 2023-01-22 13:31:31.866302: step: 860/464, loss: 0.007269079331308603 2023-01-22 13:31:32.526384: step: 862/464, loss: 0.0024322133976966143 2023-01-22 13:31:33.173487: step: 864/464, loss: 0.07621672749519348 2023-01-22 13:31:33.751962: step: 866/464, loss: 0.0018724174005910754 2023-01-22 13:31:34.357326: step: 868/464, loss: 0.16458921134471893 2023-01-22 13:31:35.000959: step: 870/464, loss: 0.008762835524976254 2023-01-22 13:31:35.636125: step: 872/464, loss: 0.035141076892614365 2023-01-22 13:31:36.264612: step: 874/464, loss: 0.008137132972478867 2023-01-22 13:31:36.829921: step: 876/464, loss: 0.045945920050144196 2023-01-22 13:31:37.458310: step: 878/464, loss: 0.009523745626211166 2023-01-22 13:31:38.111979: step: 880/464, loss: 0.0029311825055629015 2023-01-22 13:31:38.754445: step: 882/464, loss: 0.002755506196990609 2023-01-22 13:31:39.406961: step: 884/464, loss: 0.030550362542271614 2023-01-22 13:31:40.042527: step: 886/464, loss: 0.0654936134815216 2023-01-22 13:31:40.631204: step: 888/464, loss: 0.02397008426487446 2023-01-22 13:31:41.296724: step: 890/464, loss: 0.0029233875684440136 2023-01-22 13:31:41.908515: step: 892/464, loss: 0.011992864310741425 2023-01-22 13:31:42.550565: step: 894/464, loss: 0.036842696368694305 2023-01-22 13:31:43.130590: step: 896/464, loss: 0.004817049019038677 2023-01-22 13:31:43.782773: step: 898/464, loss: 0.003490469418466091 2023-01-22 13:31:44.379153: step: 900/464, loss: 0.07046039402484894 2023-01-22 13:31:45.112310: step: 902/464, loss: 0.0031182370148599148 2023-01-22 13:31:45.693397: step: 904/464, loss: 0.011086697690188885 2023-01-22 13:31:46.356523: step: 906/464, loss: 0.004499399568885565 2023-01-22 13:31:46.972780: step: 908/464, loss: 0.0021340511739253998 2023-01-22 13:31:47.657587: step: 910/464, loss: 0.03975437954068184 2023-01-22 13:31:48.394217: step: 912/464, loss: 0.0034305299632251263 2023-01-22 13:31:49.006990: step: 914/464, loss: 0.0036547647323459387 2023-01-22 13:31:49.579190: step: 916/464, loss: 0.017508070915937424 2023-01-22 13:31:50.250764: step: 918/464, loss: 0.050328031182289124 2023-01-22 13:31:50.916178: step: 920/464, loss: 0.07117703557014465 2023-01-22 13:31:51.542631: step: 922/464, loss: 0.005616291426122189 2023-01-22 13:31:52.155131: step: 924/464, loss: 0.0036431937478482723 2023-01-22 13:31:52.931215: step: 926/464, loss: 0.012959247455000877 2023-01-22 13:31:53.567732: step: 928/464, loss: 0.062261439859867096 2023-01-22 13:31:54.052356: step: 930/464, loss: 0.007322330493479967 ================================================== Loss: 0.034 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2896633333333334, 'r': 0.34352862112586974, 'f1': 0.31430483217592603}, 'combined': 0.23159303423489286, 'epoch': 30} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30168216618764226, 'r': 0.3299388980068705, 'f1': 0.3151784736223526}, 'combined': 0.20576418485189857, 'epoch': 30} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.286516818411735, 'r': 0.3582819418089817, 'f1': 0.31840570545250146}, 'combined': 0.23461473033342212, 'epoch': 30} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3170267368463243, 'r': 0.3315959802675708, 'f1': 0.3241477327323603}, 'combined': 0.21161976333822485, 'epoch': 30} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30367943548387094, 'r': 0.35726992409867175, 'f1': 0.3283020924149957}, 'combined': 0.24190680493736524, 'epoch': 30} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3117668284970213, 'r': 0.32036334031219654, 'f1': 0.31600663124176775}, 'combined': 0.20630484733918517, 'epoch': 30} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20698924731182794, 'r': 0.36666666666666664, 'f1': 0.2646048109965636}, 'combined': 0.17640320733104237, 'epoch': 30} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.19186046511627908, 'r': 0.358695652173913, 'f1': 0.25000000000000006}, 'combined': 0.12500000000000003, 'epoch': 30} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42105263157894735, 'r': 0.27586206896551724, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 30} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951249161636486, 'r': 0.3578459609650312, 'f1': 0.3234731070815977}, 'combined': 0.23834860521801934, 'epoch': 22} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29521273154881944, 'r': 0.3185261336729686, 'f1': 0.30642664272956077}, 'combined': 0.20005055432085314, 'epoch': 22} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 22} ****************************** Epoch: 31 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:34:32.203640: step: 2/464, loss: 0.012801757082343102 2023-01-22 13:34:32.776733: step: 4/464, loss: 0.0031697582453489304 2023-01-22 13:34:33.406505: step: 6/464, loss: 0.20902244746685028 2023-01-22 13:34:34.033788: step: 8/464, loss: 0.008833828382194042 2023-01-22 13:34:34.654138: step: 10/464, loss: 0.0024788305163383484 2023-01-22 13:34:35.271680: step: 12/464, loss: 0.011560603976249695 2023-01-22 13:34:35.898731: step: 14/464, loss: 0.0013002302730455995 2023-01-22 13:34:36.495634: step: 16/464, loss: 0.000876986188814044 2023-01-22 13:34:37.105738: step: 18/464, loss: 0.008125225082039833 2023-01-22 13:34:37.690568: step: 20/464, loss: 0.004483926109969616 2023-01-22 13:34:38.318888: step: 22/464, loss: 0.3768042325973511 2023-01-22 13:34:38.960175: step: 24/464, loss: 0.0006819473928771913 2023-01-22 13:34:39.517741: step: 26/464, loss: 0.0023310992401093245 2023-01-22 13:34:40.121129: step: 28/464, loss: 0.021799221634864807 2023-01-22 13:34:40.719137: step: 30/464, loss: 0.0017524950671941042 2023-01-22 13:34:41.299630: step: 32/464, loss: 0.08368723094463348 2023-01-22 13:34:41.970401: step: 34/464, loss: 0.025992590934038162 2023-01-22 13:34:42.569894: step: 36/464, loss: 0.001605651923455298 2023-01-22 13:34:43.180153: step: 38/464, loss: 0.03241652250289917 2023-01-22 13:34:43.850277: step: 40/464, loss: 0.025575609877705574 2023-01-22 13:34:44.510057: step: 42/464, loss: 0.04545750468969345 2023-01-22 13:34:45.099890: step: 44/464, loss: 0.08779991418123245 2023-01-22 13:34:45.727647: step: 46/464, loss: 0.0172318946570158 2023-01-22 13:34:46.389963: step: 48/464, loss: 0.0016695542726665735 2023-01-22 13:34:47.000916: step: 50/464, loss: 0.002183513715863228 2023-01-22 13:34:47.653515: step: 52/464, loss: 0.01340510230511427 2023-01-22 13:34:48.225170: step: 54/464, loss: 0.003130425699055195 2023-01-22 13:34:48.919722: step: 56/464, loss: 0.004296897444874048 2023-01-22 13:34:49.542203: step: 58/464, loss: 0.007053975015878677 2023-01-22 13:34:50.216792: step: 60/464, loss: 0.022943077608942986 2023-01-22 13:34:50.799327: step: 62/464, loss: 0.03398842364549637 2023-01-22 13:34:51.435021: step: 64/464, loss: 0.0008252724655903876 2023-01-22 13:34:52.032151: step: 66/464, loss: 0.00969845149666071 2023-01-22 13:34:52.608511: step: 68/464, loss: 0.004528787452727556 2023-01-22 13:34:53.229507: step: 70/464, loss: 0.004442054778337479 2023-01-22 13:34:53.864962: step: 72/464, loss: 0.024034662172198296 2023-01-22 13:34:54.494059: step: 74/464, loss: 0.008690690621733665 2023-01-22 13:34:55.119448: step: 76/464, loss: 0.07943525910377502 2023-01-22 13:34:55.714745: step: 78/464, loss: 0.16995173692703247 2023-01-22 13:34:56.332557: step: 80/464, loss: 0.00436097476631403 2023-01-22 13:34:56.897832: step: 82/464, loss: 0.0009877000702545047 2023-01-22 13:34:57.552920: step: 84/464, loss: 0.03591643646359444 2023-01-22 13:34:58.141496: step: 86/464, loss: 0.006157433148473501 2023-01-22 13:34:58.833177: step: 88/464, loss: 0.054600995033979416 2023-01-22 13:34:59.426316: step: 90/464, loss: 0.007441329304128885 2023-01-22 13:35:00.026187: step: 92/464, loss: 0.02023962140083313 2023-01-22 13:35:00.635968: step: 94/464, loss: 0.028564514592289925 2023-01-22 13:35:01.259078: step: 96/464, loss: 0.01373264566063881 2023-01-22 13:35:01.941356: step: 98/464, loss: 0.0034415097907185555 2023-01-22 13:35:02.531292: step: 100/464, loss: 0.0004931734292767942 2023-01-22 13:35:03.091423: step: 102/464, loss: 0.654776930809021 2023-01-22 13:35:03.726963: step: 104/464, loss: 0.0618969202041626 2023-01-22 13:35:04.317670: step: 106/464, loss: 0.01492062583565712 2023-01-22 13:35:04.906893: step: 108/464, loss: 0.01146434061229229 2023-01-22 13:35:05.512912: step: 110/464, loss: 0.3723164498806 2023-01-22 13:35:06.132490: step: 112/464, loss: 0.025970859453082085 2023-01-22 13:35:06.810987: step: 114/464, loss: 0.0003878195711877197 2023-01-22 13:35:07.421467: step: 116/464, loss: 0.01065858080983162 2023-01-22 13:35:08.107695: step: 118/464, loss: 0.0033834916539490223 2023-01-22 13:35:08.684722: step: 120/464, loss: 0.025389622896909714 2023-01-22 13:35:09.303230: step: 122/464, loss: 0.0264760572463274 2023-01-22 13:35:09.936640: step: 124/464, loss: 8.322868234245107e-05 2023-01-22 13:35:10.518388: step: 126/464, loss: 0.0043555330485105515 2023-01-22 13:35:11.151978: step: 128/464, loss: 0.024566804990172386 2023-01-22 13:35:11.761548: step: 130/464, loss: 0.0017508940072730184 2023-01-22 13:35:12.411914: step: 132/464, loss: 0.023766344413161278 2023-01-22 13:35:13.068402: step: 134/464, loss: 0.008885723538696766 2023-01-22 13:35:13.658058: step: 136/464, loss: 0.012811705470085144 2023-01-22 13:35:14.247311: step: 138/464, loss: 0.011576555669307709 2023-01-22 13:35:14.910706: step: 140/464, loss: 0.0056231520138680935 2023-01-22 13:35:15.565157: step: 142/464, loss: 0.02179417386651039 2023-01-22 13:35:16.156317: step: 144/464, loss: 0.0001759190927259624 2023-01-22 13:35:16.785750: step: 146/464, loss: 1.1809196472167969 2023-01-22 13:35:17.371123: step: 148/464, loss: 0.015272276476025581 2023-01-22 13:35:18.036106: step: 150/464, loss: 0.005801384802907705 2023-01-22 13:35:18.675861: step: 152/464, loss: 0.003080572932958603 2023-01-22 13:35:19.260522: step: 154/464, loss: 0.003637011395767331 2023-01-22 13:35:19.911732: step: 156/464, loss: 0.03135967627167702 2023-01-22 13:35:20.632073: step: 158/464, loss: 0.18661335110664368 2023-01-22 13:35:21.244568: step: 160/464, loss: 0.03001299314200878 2023-01-22 13:35:21.871915: step: 162/464, loss: 0.017466213554143906 2023-01-22 13:35:22.454807: step: 164/464, loss: 0.03128621727228165 2023-01-22 13:35:23.104954: step: 166/464, loss: 0.006187606602907181 2023-01-22 13:35:23.650483: step: 168/464, loss: 0.00043134059524163604 2023-01-22 13:35:24.249716: step: 170/464, loss: 0.006160136312246323 2023-01-22 13:35:24.913846: step: 172/464, loss: 0.019473716616630554 2023-01-22 13:35:25.597221: step: 174/464, loss: 0.010255226865410805 2023-01-22 13:35:26.267666: step: 176/464, loss: 0.06173697113990784 2023-01-22 13:35:26.872218: step: 178/464, loss: 0.03466442972421646 2023-01-22 13:35:27.469259: step: 180/464, loss: 0.014254294335842133 2023-01-22 13:35:28.053214: step: 182/464, loss: 0.010866356082260609 2023-01-22 13:35:28.695572: step: 184/464, loss: 0.026608051732182503 2023-01-22 13:35:29.253476: step: 186/464, loss: 0.09039905667304993 2023-01-22 13:35:29.841737: step: 188/464, loss: 0.005921590141952038 2023-01-22 13:35:30.506830: step: 190/464, loss: 0.06350155919790268 2023-01-22 13:35:31.062143: step: 192/464, loss: 0.0011387598933652043 2023-01-22 13:35:31.593982: step: 194/464, loss: 0.8153964877128601 2023-01-22 13:35:32.197674: step: 196/464, loss: 0.01737302727997303 2023-01-22 13:35:32.811138: step: 198/464, loss: 0.00207641557790339 2023-01-22 13:35:33.417885: step: 200/464, loss: 0.003981293644756079 2023-01-22 13:35:34.064457: step: 202/464, loss: 0.11159694194793701 2023-01-22 13:35:34.622937: step: 204/464, loss: 0.02585158683359623 2023-01-22 13:35:35.193911: step: 206/464, loss: 0.2752548158168793 2023-01-22 13:35:35.853001: step: 208/464, loss: 0.00791968870908022 2023-01-22 13:35:36.444444: step: 210/464, loss: 0.00585110392421484 2023-01-22 13:35:37.064750: step: 212/464, loss: 0.011281449347734451 2023-01-22 13:35:37.635789: step: 214/464, loss: 0.1474539339542389 2023-01-22 13:35:38.213550: step: 216/464, loss: 0.016068890690803528 2023-01-22 13:35:38.882233: step: 218/464, loss: 0.45983371138572693 2023-01-22 13:35:39.464517: step: 220/464, loss: 0.01438911259174347 2023-01-22 13:35:40.134261: step: 222/464, loss: 0.6527339220046997 2023-01-22 13:35:40.721698: step: 224/464, loss: 0.0012176393065601587 2023-01-22 13:35:41.352076: step: 226/464, loss: 0.0028944960795342922 2023-01-22 13:35:41.984975: step: 228/464, loss: 0.09391969442367554 2023-01-22 13:35:42.626895: step: 230/464, loss: 0.006250654812902212 2023-01-22 13:35:43.214603: step: 232/464, loss: 0.02643810398876667 2023-01-22 13:35:43.901241: step: 234/464, loss: 0.008487485349178314 2023-01-22 13:35:44.456145: step: 236/464, loss: 0.0020647584460675716 2023-01-22 13:35:45.032081: step: 238/464, loss: 0.0007894524023868144 2023-01-22 13:35:45.612519: step: 240/464, loss: 0.017385359853506088 2023-01-22 13:35:46.218987: step: 242/464, loss: 0.001665014191530645 2023-01-22 13:35:46.791346: step: 244/464, loss: 4.0113471186487004e-05 2023-01-22 13:35:47.359480: step: 246/464, loss: 0.012017929926514626 2023-01-22 13:35:47.923192: step: 248/464, loss: 0.04166106879711151 2023-01-22 13:35:48.699215: step: 250/464, loss: 0.00880045723170042 2023-01-22 13:35:49.277407: step: 252/464, loss: 0.09647093713283539 2023-01-22 13:35:49.838827: step: 254/464, loss: 0.031999941915273666 2023-01-22 13:35:50.413407: step: 256/464, loss: 0.017220599576830864 2023-01-22 13:35:51.044919: step: 258/464, loss: 0.005170927383005619 2023-01-22 13:35:51.699158: step: 260/464, loss: 0.051612719893455505 2023-01-22 13:35:52.240507: step: 262/464, loss: 0.019278405234217644 2023-01-22 13:35:53.024560: step: 264/464, loss: 0.007655612658709288 2023-01-22 13:35:53.652482: step: 266/464, loss: 0.002190679544582963 2023-01-22 13:35:54.217858: step: 268/464, loss: 0.0022087539546191692 2023-01-22 13:35:54.795444: step: 270/464, loss: 0.0024045798927545547 2023-01-22 13:35:55.350754: step: 272/464, loss: 0.11145664751529694 2023-01-22 13:35:56.057580: step: 274/464, loss: 0.08271219581365585 2023-01-22 13:35:56.663403: step: 276/464, loss: 0.012441127561032772 2023-01-22 13:35:57.278751: step: 278/464, loss: 0.1865144520998001 2023-01-22 13:35:57.902476: step: 280/464, loss: 0.0013879581820219755 2023-01-22 13:35:58.502465: step: 282/464, loss: 0.028617314994335175 2023-01-22 13:35:59.121549: step: 284/464, loss: 0.004597559105604887 2023-01-22 13:35:59.782530: step: 286/464, loss: 0.03204414248466492 2023-01-22 13:36:00.450190: step: 288/464, loss: 0.16663306951522827 2023-01-22 13:36:01.075943: step: 290/464, loss: 0.006491425447165966 2023-01-22 13:36:01.731430: step: 292/464, loss: 0.05674513056874275 2023-01-22 13:36:02.360011: step: 294/464, loss: 0.019246671348810196 2023-01-22 13:36:03.049871: step: 296/464, loss: 0.0095362588763237 2023-01-22 13:36:03.662883: step: 298/464, loss: 0.49789726734161377 2023-01-22 13:36:04.296817: step: 300/464, loss: 0.0010461852652952075 2023-01-22 13:36:04.954564: step: 302/464, loss: 0.0013769067591056228 2023-01-22 13:36:05.570065: step: 304/464, loss: 0.010894766077399254 2023-01-22 13:36:06.175009: step: 306/464, loss: 0.007526957895606756 2023-01-22 13:36:06.759696: step: 308/464, loss: 0.11928742378950119 2023-01-22 13:36:07.396094: step: 310/464, loss: 0.03182828798890114 2023-01-22 13:36:08.009211: step: 312/464, loss: 0.0009552632691338658 2023-01-22 13:36:08.572987: step: 314/464, loss: 0.007491611409932375 2023-01-22 13:36:09.219739: step: 316/464, loss: 0.0003158711770083755 2023-01-22 13:36:09.827318: step: 318/464, loss: 0.14994694292545319 2023-01-22 13:36:10.454832: step: 320/464, loss: 0.022585401311516762 2023-01-22 13:36:11.070466: step: 322/464, loss: 0.021226288750767708 2023-01-22 13:36:11.833564: step: 324/464, loss: 0.007166236639022827 2023-01-22 13:36:12.422468: step: 326/464, loss: 0.00879592727869749 2023-01-22 13:36:13.062185: step: 328/464, loss: 0.03222249075770378 2023-01-22 13:36:13.582073: step: 330/464, loss: 0.003964867442846298 2023-01-22 13:36:14.223818: step: 332/464, loss: 0.0036193111445754766 2023-01-22 13:36:14.824459: step: 334/464, loss: 0.01711282692849636 2023-01-22 13:36:15.453465: step: 336/464, loss: 0.017109058797359467 2023-01-22 13:36:16.048201: step: 338/464, loss: 0.010447176173329353 2023-01-22 13:36:16.680013: step: 340/464, loss: 0.0787728950381279 2023-01-22 13:36:17.284229: step: 342/464, loss: 0.02096523903310299 2023-01-22 13:36:17.936507: step: 344/464, loss: 0.04040298983454704 2023-01-22 13:36:18.552248: step: 346/464, loss: 0.03919469192624092 2023-01-22 13:36:19.225561: step: 348/464, loss: 0.6271852850914001 2023-01-22 13:36:19.825280: step: 350/464, loss: 0.00023240085283759981 2023-01-22 13:36:20.454168: step: 352/464, loss: 0.03590834513306618 2023-01-22 13:36:21.153197: step: 354/464, loss: 0.011127419769763947 2023-01-22 13:36:21.758999: step: 356/464, loss: 0.004278120584785938 2023-01-22 13:36:22.357693: step: 358/464, loss: 0.015366523526608944 2023-01-22 13:36:22.915901: step: 360/464, loss: 0.012401978485286236 2023-01-22 13:36:23.493636: step: 362/464, loss: 0.08308908343315125 2023-01-22 13:36:24.094246: step: 364/464, loss: 0.06377576291561127 2023-01-22 13:36:24.695680: step: 366/464, loss: 0.002781669842079282 2023-01-22 13:36:25.313042: step: 368/464, loss: 0.0010306322947144508 2023-01-22 13:36:25.936898: step: 370/464, loss: 0.012695304118096828 2023-01-22 13:36:26.641404: step: 372/464, loss: 0.043762415647506714 2023-01-22 13:36:27.298604: step: 374/464, loss: 0.0017013137694448233 2023-01-22 13:36:27.925426: step: 376/464, loss: 0.03337812051177025 2023-01-22 13:36:28.524233: step: 378/464, loss: 0.0051479581743478775 2023-01-22 13:36:29.206284: step: 380/464, loss: 0.032452017068862915 2023-01-22 13:36:29.846009: step: 382/464, loss: 0.010491715744137764 2023-01-22 13:36:30.508641: step: 384/464, loss: 0.03126560151576996 2023-01-22 13:36:31.161934: step: 386/464, loss: 0.0004801799077540636 2023-01-22 13:36:31.792274: step: 388/464, loss: 0.0002512967330403626 2023-01-22 13:36:32.555959: step: 390/464, loss: 0.00766033073887229 2023-01-22 13:36:33.282766: step: 392/464, loss: 0.04300985857844353 2023-01-22 13:36:33.870137: step: 394/464, loss: 0.009559862315654755 2023-01-22 13:36:34.478649: step: 396/464, loss: 0.0011807261034846306 2023-01-22 13:36:35.067750: step: 398/464, loss: 0.009414239786565304 2023-01-22 13:36:35.651969: step: 400/464, loss: 0.0029089900199323893 2023-01-22 13:36:36.258874: step: 402/464, loss: 0.0033333939500153065 2023-01-22 13:36:36.904807: step: 404/464, loss: 0.04904211685061455 2023-01-22 13:36:37.559214: step: 406/464, loss: 0.009782656095921993 2023-01-22 13:36:38.249144: step: 408/464, loss: 0.07015188038349152 2023-01-22 13:36:38.791956: step: 410/464, loss: 0.004249213729053736 2023-01-22 13:36:39.361594: step: 412/464, loss: 0.006609804462641478 2023-01-22 13:36:39.959836: step: 414/464, loss: 0.0011780187487602234 2023-01-22 13:36:40.602380: step: 416/464, loss: 0.18723560869693756 2023-01-22 13:36:41.225090: step: 418/464, loss: 0.00481009716168046 2023-01-22 13:36:41.834101: step: 420/464, loss: 0.01568804867565632 2023-01-22 13:36:42.451943: step: 422/464, loss: 0.06890048086643219 2023-01-22 13:36:43.072683: step: 424/464, loss: 0.01726832427084446 2023-01-22 13:36:43.649520: step: 426/464, loss: 0.0030659439507871866 2023-01-22 13:36:44.282259: step: 428/464, loss: 0.009536556899547577 2023-01-22 13:36:45.010778: step: 430/464, loss: 0.020365754142403603 2023-01-22 13:36:45.638646: step: 432/464, loss: 0.06949149072170258 2023-01-22 13:36:46.293055: step: 434/464, loss: 0.01435832865536213 2023-01-22 13:36:46.953575: step: 436/464, loss: 0.016788696870207787 2023-01-22 13:36:47.556921: step: 438/464, loss: 0.17906318604946136 2023-01-22 13:36:48.142419: step: 440/464, loss: 0.0181912649422884 2023-01-22 13:36:48.750400: step: 442/464, loss: 0.10065846145153046 2023-01-22 13:36:49.425890: step: 444/464, loss: 0.0006792846834287047 2023-01-22 13:36:50.008744: step: 446/464, loss: 0.027612632140517235 2023-01-22 13:36:50.618335: step: 448/464, loss: 0.019827254116535187 2023-01-22 13:36:51.276438: step: 450/464, loss: 0.017565179616212845 2023-01-22 13:36:51.887656: step: 452/464, loss: 0.003037205198779702 2023-01-22 13:36:52.489013: step: 454/464, loss: 0.0018163080094382167 2023-01-22 13:36:53.135583: step: 456/464, loss: 0.01641642488539219 2023-01-22 13:36:53.738636: step: 458/464, loss: 0.0009892077650874853 2023-01-22 13:36:54.397351: step: 460/464, loss: 0.0039025216829031706 2023-01-22 13:36:55.090037: step: 462/464, loss: 0.035910461097955704 2023-01-22 13:36:55.666432: step: 464/464, loss: 0.41302695870399475 2023-01-22 13:36:56.226211: step: 466/464, loss: 0.027785949409008026 2023-01-22 13:36:56.894947: step: 468/464, loss: 0.06098407134413719 2023-01-22 13:36:57.557258: step: 470/464, loss: 0.06805586069822311 2023-01-22 13:36:58.248419: step: 472/464, loss: 0.0003168827679473907 2023-01-22 13:36:58.883082: step: 474/464, loss: 0.017504960298538208 2023-01-22 13:36:59.514913: step: 476/464, loss: 0.004010757897049189 2023-01-22 13:37:00.189487: step: 478/464, loss: 0.0031999624334275723 2023-01-22 13:37:00.824969: step: 480/464, loss: 0.006119747180491686 2023-01-22 13:37:01.462286: step: 482/464, loss: 0.02626779116690159 2023-01-22 13:37:02.104094: step: 484/464, loss: 0.01521299872547388 2023-01-22 13:37:02.754345: step: 486/464, loss: 0.008856347762048244 2023-01-22 13:37:03.370434: step: 488/464, loss: 0.007745738606899977 2023-01-22 13:37:04.034549: step: 490/464, loss: 0.016153812408447266 2023-01-22 13:37:04.639335: step: 492/464, loss: 0.030608683824539185 2023-01-22 13:37:05.284596: step: 494/464, loss: 0.006272049155086279 2023-01-22 13:37:05.948138: step: 496/464, loss: 0.0008258870802819729 2023-01-22 13:37:06.668342: step: 498/464, loss: 0.02096143178641796 2023-01-22 13:37:07.287945: step: 500/464, loss: 0.031219899654388428 2023-01-22 13:37:07.935159: step: 502/464, loss: 0.14262311160564423 2023-01-22 13:37:08.524160: step: 504/464, loss: 0.046533871442079544 2023-01-22 13:37:09.139506: step: 506/464, loss: 0.07897719740867615 2023-01-22 13:37:09.765849: step: 508/464, loss: 0.03483173996210098 2023-01-22 13:37:10.401093: step: 510/464, loss: 0.22256095707416534 2023-01-22 13:37:10.998260: step: 512/464, loss: 0.021794581785798073 2023-01-22 13:37:11.644419: step: 514/464, loss: 0.001026372192427516 2023-01-22 13:37:12.378272: step: 516/464, loss: 0.004475842230021954 2023-01-22 13:37:12.977684: step: 518/464, loss: 0.025761837139725685 2023-01-22 13:37:13.602802: step: 520/464, loss: 0.020630907267332077 2023-01-22 13:37:14.181086: step: 522/464, loss: 0.004385307896882296 2023-01-22 13:37:14.864865: step: 524/464, loss: 0.02225436642765999 2023-01-22 13:37:15.502915: step: 526/464, loss: 0.020955931395292282 2023-01-22 13:37:16.117901: step: 528/464, loss: 0.0035734125413000584 2023-01-22 13:37:16.790739: step: 530/464, loss: 0.19535847008228302 2023-01-22 13:37:17.419213: step: 532/464, loss: 0.00577906658872962 2023-01-22 13:37:18.099388: step: 534/464, loss: 0.1850377768278122 2023-01-22 13:37:18.815340: step: 536/464, loss: 0.0009395240340381861 2023-01-22 13:37:19.430751: step: 538/464, loss: 0.002650128910318017 2023-01-22 13:37:20.056645: step: 540/464, loss: 0.00598529726266861 2023-01-22 13:37:20.710314: step: 542/464, loss: 0.005069917067885399 2023-01-22 13:37:21.415534: step: 544/464, loss: 0.05108673498034477 2023-01-22 13:37:22.044877: step: 546/464, loss: 0.01029582042247057 2023-01-22 13:37:22.686377: step: 548/464, loss: 0.028246359899640083 2023-01-22 13:37:23.253280: step: 550/464, loss: 0.007826417684555054 2023-01-22 13:37:23.895344: step: 552/464, loss: 0.0016339016146957874 2023-01-22 13:37:24.494670: step: 554/464, loss: 0.013940151780843735 2023-01-22 13:37:25.079888: step: 556/464, loss: 0.0003476462443359196 2023-01-22 13:37:25.664087: step: 558/464, loss: 0.002292930381372571 2023-01-22 13:37:26.246711: step: 560/464, loss: 0.0012554709101095796 2023-01-22 13:37:26.899769: step: 562/464, loss: 1.4624099731445312 2023-01-22 13:37:27.505329: step: 564/464, loss: 0.0303787998855114 2023-01-22 13:37:28.106186: step: 566/464, loss: 0.011416810564696789 2023-01-22 13:37:28.717864: step: 568/464, loss: 0.0025077073369175196 2023-01-22 13:37:29.443445: step: 570/464, loss: 0.139719158411026 2023-01-22 13:37:29.989251: step: 572/464, loss: 0.010094721801578999 2023-01-22 13:37:30.565022: step: 574/464, loss: 0.0012248513521626592 2023-01-22 13:37:31.232470: step: 576/464, loss: 0.005344181787222624 2023-01-22 13:37:31.782747: step: 578/464, loss: 0.02629689872264862 2023-01-22 13:37:32.437791: step: 580/464, loss: 0.009889775887131691 2023-01-22 13:37:33.029199: step: 582/464, loss: 1.107112257159315e-05 2023-01-22 13:37:33.623779: step: 584/464, loss: 0.0021250001154839993 2023-01-22 13:37:34.231527: step: 586/464, loss: 0.0021111273672431707 2023-01-22 13:37:34.823974: step: 588/464, loss: 0.0018420673441141844 2023-01-22 13:37:35.433918: step: 590/464, loss: 3.5897333873435855e-05 2023-01-22 13:37:36.043229: step: 592/464, loss: 0.02338157594203949 2023-01-22 13:37:36.683817: step: 594/464, loss: 0.026911022141575813 2023-01-22 13:37:37.318152: step: 596/464, loss: 0.01676015369594097 2023-01-22 13:37:37.939971: step: 598/464, loss: 0.0024795825593173504 2023-01-22 13:37:38.526341: step: 600/464, loss: 0.12006665021181107 2023-01-22 13:37:39.144987: step: 602/464, loss: 0.8700026273727417 2023-01-22 13:37:39.774638: step: 604/464, loss: 0.001991113880649209 2023-01-22 13:37:40.410663: step: 606/464, loss: 0.002249934710562229 2023-01-22 13:37:40.986241: step: 608/464, loss: 0.06895353645086288 2023-01-22 13:37:41.523629: step: 610/464, loss: 0.011820238083600998 2023-01-22 13:37:42.184353: step: 612/464, loss: 0.002337306272238493 2023-01-22 13:37:42.779749: step: 614/464, loss: 0.024844679981470108 2023-01-22 13:37:43.385382: step: 616/464, loss: 0.022155219689011574 2023-01-22 13:37:43.964703: step: 618/464, loss: 0.0002563460438977927 2023-01-22 13:37:44.715992: step: 620/464, loss: 0.03686891868710518 2023-01-22 13:37:45.324415: step: 622/464, loss: 0.0467064343392849 2023-01-22 13:37:45.931685: step: 624/464, loss: 0.014262551441788673 2023-01-22 13:37:46.513137: step: 626/464, loss: 0.00019088482076767832 2023-01-22 13:37:47.119661: step: 628/464, loss: 0.025904875248670578 2023-01-22 13:37:47.756136: step: 630/464, loss: 0.008668944239616394 2023-01-22 13:37:48.374825: step: 632/464, loss: 0.014816109091043472 2023-01-22 13:37:49.034804: step: 634/464, loss: 0.11665499955415726 2023-01-22 13:37:49.654934: step: 636/464, loss: 0.01366348098963499 2023-01-22 13:37:50.210148: step: 638/464, loss: 0.007681042887270451 2023-01-22 13:37:50.864326: step: 640/464, loss: 0.005328443832695484 2023-01-22 13:37:51.465162: step: 642/464, loss: 0.03169764205813408 2023-01-22 13:37:52.061965: step: 644/464, loss: 0.01761089451611042 2023-01-22 13:37:52.733914: step: 646/464, loss: 0.007290259003639221 2023-01-22 13:37:53.414532: step: 648/464, loss: 0.051352642476558685 2023-01-22 13:37:54.040433: step: 650/464, loss: 0.018428362905979156 2023-01-22 13:37:54.690298: step: 652/464, loss: 0.11865729838609695 2023-01-22 13:37:55.312359: step: 654/464, loss: 0.16155581176280975 2023-01-22 13:37:55.939539: step: 656/464, loss: 0.8317537903785706 2023-01-22 13:37:56.533410: step: 658/464, loss: 0.0015119991730898619 2023-01-22 13:37:57.186854: step: 660/464, loss: 0.006626639515161514 2023-01-22 13:37:57.808300: step: 662/464, loss: 0.012667542323470116 2023-01-22 13:37:58.430489: step: 664/464, loss: 0.008510514162480831 2023-01-22 13:37:59.142788: step: 666/464, loss: 0.0040636323392391205 2023-01-22 13:37:59.772781: step: 668/464, loss: 0.11379527300596237 2023-01-22 13:38:00.407498: step: 670/464, loss: 0.011296688579022884 2023-01-22 13:38:01.037929: step: 672/464, loss: 0.0011617145501077175 2023-01-22 13:38:01.643181: step: 674/464, loss: 0.005766835995018482 2023-01-22 13:38:02.392194: step: 676/464, loss: 0.038305796682834625 2023-01-22 13:38:02.973273: step: 678/464, loss: 0.0003724046691786498 2023-01-22 13:38:03.583328: step: 680/464, loss: 0.060129255056381226 2023-01-22 13:38:04.223285: step: 682/464, loss: 0.019781537353992462 2023-01-22 13:38:04.985242: step: 684/464, loss: 0.011479828506708145 2023-01-22 13:38:05.569648: step: 686/464, loss: 0.011218197643756866 2023-01-22 13:38:06.198596: step: 688/464, loss: 0.0019634906202554703 2023-01-22 13:38:06.799601: step: 690/464, loss: 0.017490766942501068 2023-01-22 13:38:07.467505: step: 692/464, loss: 0.01794765330851078 2023-01-22 13:38:08.108513: step: 694/464, loss: 0.01079262513667345 2023-01-22 13:38:08.760588: step: 696/464, loss: 0.007698724512010813 2023-01-22 13:38:09.353651: step: 698/464, loss: 0.001549478736706078 2023-01-22 13:38:09.978025: step: 700/464, loss: 0.0767119973897934 2023-01-22 13:38:10.529844: step: 702/464, loss: 0.037706900388002396 2023-01-22 13:38:11.142307: step: 704/464, loss: 0.08163590729236603 2023-01-22 13:38:11.694149: step: 706/464, loss: 0.015356204472482204 2023-01-22 13:38:12.363446: step: 708/464, loss: 0.1496073603630066 2023-01-22 13:38:13.007006: step: 710/464, loss: 0.024661073461174965 2023-01-22 13:38:13.655649: step: 712/464, loss: 0.048554107546806335 2023-01-22 13:38:14.312330: step: 714/464, loss: 0.010555480606853962 2023-01-22 13:38:14.933480: step: 716/464, loss: 0.1697586625814438 2023-01-22 13:38:15.612854: step: 718/464, loss: 0.008124444633722305 2023-01-22 13:38:16.193583: step: 720/464, loss: 0.014189078472554684 2023-01-22 13:38:16.807832: step: 722/464, loss: 0.013432067818939686 2023-01-22 13:38:17.457495: step: 724/464, loss: 0.002215616637840867 2023-01-22 13:38:18.062980: step: 726/464, loss: 0.00029035957413725555 2023-01-22 13:38:18.708855: step: 728/464, loss: 1.7936588525772095 2023-01-22 13:38:19.329358: step: 730/464, loss: 0.021537071093916893 2023-01-22 13:38:19.919052: step: 732/464, loss: 0.03023180551826954 2023-01-22 13:38:20.488862: step: 734/464, loss: 0.014803903177380562 2023-01-22 13:38:21.033208: step: 736/464, loss: 0.0011444580741226673 2023-01-22 13:38:21.625613: step: 738/464, loss: 0.0055016083642840385 2023-01-22 13:38:22.261259: step: 740/464, loss: 0.02439718320965767 2023-01-22 13:38:22.873969: step: 742/464, loss: 0.0008676930447109044 2023-01-22 13:38:23.442496: step: 744/464, loss: 0.010000188834965229 2023-01-22 13:38:24.153534: step: 746/464, loss: 0.2724458873271942 2023-01-22 13:38:24.768983: step: 748/464, loss: 0.003781168255954981 2023-01-22 13:38:25.356029: step: 750/464, loss: 0.006330311298370361 2023-01-22 13:38:25.944601: step: 752/464, loss: 0.008123427629470825 2023-01-22 13:38:26.558023: step: 754/464, loss: 0.01807366870343685 2023-01-22 13:38:27.147971: step: 756/464, loss: 0.008394982665777206 2023-01-22 13:38:27.744110: step: 758/464, loss: 0.0008706397493369877 2023-01-22 13:38:28.358631: step: 760/464, loss: 0.004383837804198265 2023-01-22 13:38:28.985973: step: 762/464, loss: 0.036036934703588486 2023-01-22 13:38:29.599619: step: 764/464, loss: 0.08744674921035767 2023-01-22 13:38:30.227896: step: 766/464, loss: 0.0012109780218452215 2023-01-22 13:38:30.893984: step: 768/464, loss: 0.003694704966619611 2023-01-22 13:38:31.494598: step: 770/464, loss: 0.03521769866347313 2023-01-22 13:38:32.084745: step: 772/464, loss: 0.013438494876027107 2023-01-22 13:38:32.753014: step: 774/464, loss: 0.018687814474105835 2023-01-22 13:38:33.375679: step: 776/464, loss: 0.01550370454788208 2023-01-22 13:38:34.034156: step: 778/464, loss: 0.04559968411922455 2023-01-22 13:38:34.581681: step: 780/464, loss: 0.06916767358779907 2023-01-22 13:38:35.244450: step: 782/464, loss: 0.0004504562239162624 2023-01-22 13:38:35.882599: step: 784/464, loss: 0.02127774991095066 2023-01-22 13:38:36.476352: step: 786/464, loss: 0.07551337778568268 2023-01-22 13:38:37.151118: step: 788/464, loss: 0.04706054925918579 2023-01-22 13:38:37.799423: step: 790/464, loss: 0.041895508766174316 2023-01-22 13:38:38.432016: step: 792/464, loss: 0.021193142980337143 2023-01-22 13:38:38.981708: step: 794/464, loss: 0.014277678914368153 2023-01-22 13:38:39.631886: step: 796/464, loss: 0.017894389107823372 2023-01-22 13:38:40.330081: step: 798/464, loss: 0.1131686195731163 2023-01-22 13:38:40.924613: step: 800/464, loss: 0.005173950921744108 2023-01-22 13:38:41.612659: step: 802/464, loss: 0.007500513922423124 2023-01-22 13:38:42.200488: step: 804/464, loss: 0.025352556258440018 2023-01-22 13:38:42.879225: step: 806/464, loss: 0.0008665485074743629 2023-01-22 13:38:43.473008: step: 808/464, loss: 0.003123636357486248 2023-01-22 13:38:44.200273: step: 810/464, loss: 0.020570002496242523 2023-01-22 13:38:44.944053: step: 812/464, loss: 0.003088674508035183 2023-01-22 13:38:45.552830: step: 814/464, loss: 0.015568751841783524 2023-01-22 13:38:46.171208: step: 816/464, loss: 0.002063547261059284 2023-01-22 13:38:46.797387: step: 818/464, loss: 0.002489405684173107 2023-01-22 13:38:47.438390: step: 820/464, loss: 0.0054278913885355 2023-01-22 13:38:48.090490: step: 822/464, loss: 0.032391082495450974 2023-01-22 13:38:48.723394: step: 824/464, loss: 0.01377673726528883 2023-01-22 13:38:49.351444: step: 826/464, loss: 0.03902078792452812 2023-01-22 13:38:49.967061: step: 828/464, loss: 0.47893691062927246 2023-01-22 13:38:50.671790: step: 830/464, loss: 0.057942770421504974 2023-01-22 13:38:51.247463: step: 832/464, loss: 0.0043868571519851685 2023-01-22 13:38:51.840904: step: 834/464, loss: 0.031184788793325424 2023-01-22 13:38:52.490623: step: 836/464, loss: 0.013812736608088017 2023-01-22 13:38:53.131181: step: 838/464, loss: 0.036795474588871 2023-01-22 13:38:53.731231: step: 840/464, loss: 0.011091233231127262 2023-01-22 13:38:54.345440: step: 842/464, loss: 0.01760336197912693 2023-01-22 13:38:54.982121: step: 844/464, loss: 0.031159533187747 2023-01-22 13:38:55.721579: step: 846/464, loss: 0.0019408116349950433 2023-01-22 13:38:56.329287: step: 848/464, loss: 0.013441966846585274 2023-01-22 13:38:56.972399: step: 850/464, loss: 0.023758893832564354 2023-01-22 13:38:57.618653: step: 852/464, loss: 0.011388260871171951 2023-01-22 13:38:58.184383: step: 854/464, loss: 0.0026496564969420433 2023-01-22 13:38:58.830146: step: 856/464, loss: 0.0152037488296628 2023-01-22 13:38:59.454936: step: 858/464, loss: 0.05477975308895111 2023-01-22 13:39:00.078357: step: 860/464, loss: 0.007257247343659401 2023-01-22 13:39:00.686938: step: 862/464, loss: 0.6482580900192261 2023-01-22 13:39:01.342114: step: 864/464, loss: 0.13274411857128143 2023-01-22 13:39:01.975689: step: 866/464, loss: 0.07746298611164093 2023-01-22 13:39:02.571557: step: 868/464, loss: 0.003369443118572235 2023-01-22 13:39:03.209505: step: 870/464, loss: 0.040803004056215286 2023-01-22 13:39:03.809900: step: 872/464, loss: 0.02492385357618332 2023-01-22 13:39:04.416183: step: 874/464, loss: 0.005217418074607849 2023-01-22 13:39:05.041948: step: 876/464, loss: 0.009234433062374592 2023-01-22 13:39:05.664223: step: 878/464, loss: 0.027107814326882362 2023-01-22 13:39:06.261485: step: 880/464, loss: 0.04328801855444908 2023-01-22 13:39:06.936628: step: 882/464, loss: 0.00160908920224756 2023-01-22 13:39:07.526420: step: 884/464, loss: 0.019726203754544258 2023-01-22 13:39:08.170714: step: 886/464, loss: 0.010629426687955856 2023-01-22 13:39:08.799054: step: 888/464, loss: 0.0021526608616113663 2023-01-22 13:39:09.396220: step: 890/464, loss: 0.0002275644801557064 2023-01-22 13:39:09.992646: step: 892/464, loss: 0.0007767993956804276 2023-01-22 13:39:10.585957: step: 894/464, loss: 0.07486575841903687 2023-01-22 13:39:11.216856: step: 896/464, loss: 0.029513342306017876 2023-01-22 13:39:11.830680: step: 898/464, loss: 0.19507844746112823 2023-01-22 13:39:12.439845: step: 900/464, loss: 0.07666248828172684 2023-01-22 13:39:13.066176: step: 902/464, loss: 0.0041303797625005245 2023-01-22 13:39:13.711255: step: 904/464, loss: 0.041221149265766144 2023-01-22 13:39:14.300430: step: 906/464, loss: 0.0340176559984684 2023-01-22 13:39:14.997076: step: 908/464, loss: 0.030654940754175186 2023-01-22 13:39:15.551531: step: 910/464, loss: 0.004920803476125002 2023-01-22 13:39:16.135404: step: 912/464, loss: 0.2938683032989502 2023-01-22 13:39:16.735269: step: 914/464, loss: 0.012634389102458954 2023-01-22 13:39:17.381874: step: 916/464, loss: 0.025104986503720284 2023-01-22 13:39:18.012787: step: 918/464, loss: 0.0012669715797528625 2023-01-22 13:39:18.677147: step: 920/464, loss: 0.08574622869491577 2023-01-22 13:39:19.350770: step: 922/464, loss: 0.0421142652630806 2023-01-22 13:39:19.988077: step: 924/464, loss: 0.028157049790024757 2023-01-22 13:39:20.598507: step: 926/464, loss: 0.007767365779727697 2023-01-22 13:39:21.206948: step: 928/464, loss: 0.04685765877366066 2023-01-22 13:39:21.753920: step: 930/464, loss: 0.06659197062253952 ================================================== Loss: 0.054 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30630858230814295, 'r': 0.3307202719797597, 'f1': 0.3180466849148419}, 'combined': 0.23435018888462034, 'epoch': 31} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.31179374448145886, 'r': 0.32380781536973524, 'f1': 0.3176872356012704}, 'combined': 0.20740202945989672, 'epoch': 31} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3039802271483306, 'r': 0.3512788583175206, 'f1': 0.3259224618544601}, 'combined': 0.24015339294539165, 'epoch': 31} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32346826269159185, 'r': 0.3243585423136788, 'f1': 0.32391279076675206}, 'combined': 0.2114663815368433, 'epoch': 31} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32125333037300174, 'r': 0.3431985294117647, 'f1': 0.3318635321100918}, 'combined': 0.24453102366006763, 'epoch': 31} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32006547269951113, 'r': 0.3133055958656371, 'f1': 0.3166494606939015}, 'combined': 0.20672451838047454, 'epoch': 31} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.19791666666666666, 'r': 0.31666666666666665, 'f1': 0.2435897435897436}, 'combined': 0.1623931623931624, 'epoch': 31} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.21153846153846154, 'r': 0.358695652173913, 'f1': 0.2661290322580645}, 'combined': 0.13306451612903225, 'epoch': 31} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40789473684210525, 'r': 0.2672413793103448, 'f1': 0.3229166666666667}, 'combined': 0.2152777777777778, 'epoch': 31} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951249161636486, 'r': 0.3578459609650312, 'f1': 0.3234731070815977}, 'combined': 0.23834860521801934, 'epoch': 22} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29521273154881944, 'r': 0.3185261336729686, 'f1': 0.30642664272956077}, 'combined': 0.20005055432085314, 'epoch': 22} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 22} ****************************** Epoch: 32 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:41:59.482326: step: 2/464, loss: 0.1374712586402893 2023-01-22 13:42:00.156918: step: 4/464, loss: 0.010180720128118992 2023-01-22 13:42:00.795081: step: 6/464, loss: 0.02252374216914177 2023-01-22 13:42:01.437555: step: 8/464, loss: 0.016324935480952263 2023-01-22 13:42:02.100171: step: 10/464, loss: 0.013695857487618923 2023-01-22 13:42:02.783056: step: 12/464, loss: 0.11942312866449356 2023-01-22 13:42:03.408769: step: 14/464, loss: 0.05008179694414139 2023-01-22 13:42:03.999877: step: 16/464, loss: 0.005809451453387737 2023-01-22 13:42:04.533844: step: 18/464, loss: 1.2273625135421753 2023-01-22 13:42:05.129278: step: 20/464, loss: 0.007264153566211462 2023-01-22 13:42:05.726102: step: 22/464, loss: 0.01582128368318081 2023-01-22 13:42:06.339592: step: 24/464, loss: 0.005514255724847317 2023-01-22 13:42:06.958524: step: 26/464, loss: 0.0349930077791214 2023-01-22 13:42:07.544576: step: 28/464, loss: 0.01226122211664915 2023-01-22 13:42:08.131623: step: 30/464, loss: 0.0031147468835115433 2023-01-22 13:42:08.730077: step: 32/464, loss: 0.010408380068838596 2023-01-22 13:42:09.337558: step: 34/464, loss: 0.0029626174364238977 2023-01-22 13:42:09.996295: step: 36/464, loss: 0.003125266870483756 2023-01-22 13:42:10.582896: step: 38/464, loss: 0.015703922137618065 2023-01-22 13:42:11.276613: step: 40/464, loss: 0.16384877264499664 2023-01-22 13:42:11.978156: step: 42/464, loss: 0.4196951687335968 2023-01-22 13:42:12.643605: step: 44/464, loss: 0.015279560349881649 2023-01-22 13:42:13.249921: step: 46/464, loss: 0.0070780557580292225 2023-01-22 13:42:13.891147: step: 48/464, loss: 0.08592184633016586 2023-01-22 13:42:14.548850: step: 50/464, loss: 0.02106037363409996 2023-01-22 13:42:15.150733: step: 52/464, loss: 0.009690009988844395 2023-01-22 13:42:15.717831: step: 54/464, loss: 0.002293247263878584 2023-01-22 13:42:16.292403: step: 56/464, loss: 0.038529325276613235 2023-01-22 13:42:16.842825: step: 58/464, loss: 0.005708710290491581 2023-01-22 13:42:17.431732: step: 60/464, loss: 0.0013180101523175836 2023-01-22 13:42:18.019258: step: 62/464, loss: 0.024688029661774635 2023-01-22 13:42:18.614446: step: 64/464, loss: 0.0011120281415060163 2023-01-22 13:42:19.161481: step: 66/464, loss: 0.0012752178590744734 2023-01-22 13:42:19.796450: step: 68/464, loss: 0.022770212963223457 2023-01-22 13:42:20.448305: step: 70/464, loss: 0.0012789067113772035 2023-01-22 13:42:21.057026: step: 72/464, loss: 0.14106199145317078 2023-01-22 13:42:21.800142: step: 74/464, loss: 0.0728851780295372 2023-01-22 13:42:22.418109: step: 76/464, loss: 0.0076300278306007385 2023-01-22 13:42:23.008418: step: 78/464, loss: 0.33532583713531494 2023-01-22 13:42:23.627507: step: 80/464, loss: 0.0028517949394881725 2023-01-22 13:42:24.250458: step: 82/464, loss: 0.001625005155801773 2023-01-22 13:42:24.854463: step: 84/464, loss: 0.06250736117362976 2023-01-22 13:42:25.495035: step: 86/464, loss: 0.003237940836697817 2023-01-22 13:42:26.135613: step: 88/464, loss: 0.03381800279021263 2023-01-22 13:42:26.769790: step: 90/464, loss: 0.15457159280776978 2023-01-22 13:42:27.414248: step: 92/464, loss: 0.02301689051091671 2023-01-22 13:42:27.945463: step: 94/464, loss: 0.0011168025666847825 2023-01-22 13:42:28.494667: step: 96/464, loss: 0.02981138974428177 2023-01-22 13:42:29.149028: step: 98/464, loss: 0.0009769659955054522 2023-01-22 13:42:29.750554: step: 100/464, loss: 0.014786754734814167 2023-01-22 13:42:30.387112: step: 102/464, loss: 0.006124967243522406 2023-01-22 13:42:31.001438: step: 104/464, loss: 0.010965868830680847 2023-01-22 13:42:31.515722: step: 106/464, loss: 0.0019391337409615517 2023-01-22 13:42:32.120587: step: 108/464, loss: 0.3080310821533203 2023-01-22 13:42:32.748754: step: 110/464, loss: 0.09916416555643082 2023-01-22 13:42:33.360768: step: 112/464, loss: 0.019109727814793587 2023-01-22 13:42:33.936295: step: 114/464, loss: 0.001565584447234869 2023-01-22 13:42:34.530599: step: 116/464, loss: 0.0002652221010066569 2023-01-22 13:42:35.131944: step: 118/464, loss: 0.1852545142173767 2023-01-22 13:42:35.714935: step: 120/464, loss: 0.038834791630506516 2023-01-22 13:42:36.340430: step: 122/464, loss: 0.09213980287313461 2023-01-22 13:42:36.990166: step: 124/464, loss: 0.00192394875921309 2023-01-22 13:42:37.594592: step: 126/464, loss: 0.002106940373778343 2023-01-22 13:42:38.164245: step: 128/464, loss: 0.0022854506969451904 2023-01-22 13:42:38.808705: step: 130/464, loss: 0.0008612187230028212 2023-01-22 13:42:39.433808: step: 132/464, loss: 0.028198976069688797 2023-01-22 13:42:40.021764: step: 134/464, loss: 0.00705569377169013 2023-01-22 13:42:40.666072: step: 136/464, loss: 0.21060891449451447 2023-01-22 13:42:41.256842: step: 138/464, loss: 0.00014653653488494456 2023-01-22 13:42:41.857087: step: 140/464, loss: 0.006557180546224117 2023-01-22 13:42:42.467263: step: 142/464, loss: 0.004567587282508612 2023-01-22 13:42:43.128880: step: 144/464, loss: 0.05902276560664177 2023-01-22 13:42:43.741939: step: 146/464, loss: 0.06563450396060944 2023-01-22 13:42:44.360595: step: 148/464, loss: 0.0009326763683930039 2023-01-22 13:42:44.974718: step: 150/464, loss: 0.002292963908985257 2023-01-22 13:42:45.620508: step: 152/464, loss: 0.04217243194580078 2023-01-22 13:42:46.277378: step: 154/464, loss: 0.008173865266144276 2023-01-22 13:42:46.914242: step: 156/464, loss: 0.03438876196742058 2023-01-22 13:42:47.542699: step: 158/464, loss: 0.003344327211380005 2023-01-22 13:42:48.133392: step: 160/464, loss: 0.011284800246357918 2023-01-22 13:42:48.675580: step: 162/464, loss: 0.03331875428557396 2023-01-22 13:42:49.312818: step: 164/464, loss: 0.00024161580950021744 2023-01-22 13:42:49.977572: step: 166/464, loss: 0.010135755874216557 2023-01-22 13:42:50.552337: step: 168/464, loss: 8.51072181831114e-05 2023-01-22 13:42:51.182568: step: 170/464, loss: 0.03148839250206947 2023-01-22 13:42:51.851238: step: 172/464, loss: 0.00888749212026596 2023-01-22 13:42:52.400856: step: 174/464, loss: 0.04342638701200485 2023-01-22 13:42:52.999792: step: 176/464, loss: 0.07448451966047287 2023-01-22 13:42:53.658200: step: 178/464, loss: 0.036748576909303665 2023-01-22 13:42:54.240814: step: 180/464, loss: 0.0023196316324174404 2023-01-22 13:42:54.865713: step: 182/464, loss: 0.04282921180129051 2023-01-22 13:42:55.446003: step: 184/464, loss: 0.05800343677401543 2023-01-22 13:42:56.042772: step: 186/464, loss: 0.016823645681142807 2023-01-22 13:42:56.812302: step: 188/464, loss: 0.00039676681626588106 2023-01-22 13:42:57.455434: step: 190/464, loss: 0.024343879893422127 2023-01-22 13:42:58.083603: step: 192/464, loss: 0.008399303071200848 2023-01-22 13:42:58.638132: step: 194/464, loss: 0.007016150280833244 2023-01-22 13:42:59.279831: step: 196/464, loss: 0.01540520042181015 2023-01-22 13:42:59.789142: step: 198/464, loss: 0.01566270925104618 2023-01-22 13:43:00.395724: step: 200/464, loss: 0.037253670394420624 2023-01-22 13:43:01.043344: step: 202/464, loss: 0.003179313614964485 2023-01-22 13:43:01.714777: step: 204/464, loss: 0.03366708382964134 2023-01-22 13:43:02.351723: step: 206/464, loss: 0.028714319691061974 2023-01-22 13:43:02.984207: step: 208/464, loss: 0.2324700802564621 2023-01-22 13:43:03.637040: step: 210/464, loss: 0.07700219005346298 2023-01-22 13:43:04.215136: step: 212/464, loss: 0.0030666450038552284 2023-01-22 13:43:04.796193: step: 214/464, loss: 0.0012264112010598183 2023-01-22 13:43:05.469085: step: 216/464, loss: 0.0726943090558052 2023-01-22 13:43:06.046152: step: 218/464, loss: 0.021450810134410858 2023-01-22 13:43:06.675231: step: 220/464, loss: 0.004526334349066019 2023-01-22 13:43:07.344996: step: 222/464, loss: 0.038611650466918945 2023-01-22 13:43:07.984001: step: 224/464, loss: 0.0003367810568306595 2023-01-22 13:43:08.609797: step: 226/464, loss: 0.008392960764467716 2023-01-22 13:43:09.226374: step: 228/464, loss: 0.0759117379784584 2023-01-22 13:43:09.851868: step: 230/464, loss: 0.0077818832360208035 2023-01-22 13:43:10.577602: step: 232/464, loss: 0.007422207854688168 2023-01-22 13:43:11.217534: step: 234/464, loss: 0.004921207204461098 2023-01-22 13:43:11.828515: step: 236/464, loss: 0.01048721931874752 2023-01-22 13:43:12.522631: step: 238/464, loss: 0.011839354410767555 2023-01-22 13:43:13.155284: step: 240/464, loss: 0.003599931485950947 2023-01-22 13:43:13.760699: step: 242/464, loss: 0.016076581552624702 2023-01-22 13:43:14.393420: step: 244/464, loss: 0.008714699186384678 2023-01-22 13:43:14.998003: step: 246/464, loss: 0.0007286720210686326 2023-01-22 13:43:15.602717: step: 248/464, loss: 0.011836939491331577 2023-01-22 13:43:16.211308: step: 250/464, loss: 0.11396396905183792 2023-01-22 13:43:16.847337: step: 252/464, loss: 0.0001828370150178671 2023-01-22 13:43:17.394760: step: 254/464, loss: 0.0002706579689402133 2023-01-22 13:43:18.104398: step: 256/464, loss: 0.006874525919556618 2023-01-22 13:43:18.768403: step: 258/464, loss: 0.012534061446785927 2023-01-22 13:43:19.381639: step: 260/464, loss: 0.03968286141753197 2023-01-22 13:43:19.956644: step: 262/464, loss: 0.030112750828266144 2023-01-22 13:43:20.546849: step: 264/464, loss: 0.002630100352689624 2023-01-22 13:43:21.135714: step: 266/464, loss: 0.0007398867746815085 2023-01-22 13:43:21.756873: step: 268/464, loss: 0.033645376563072205 2023-01-22 13:43:22.411123: step: 270/464, loss: 0.02185887098312378 2023-01-22 13:43:23.037011: step: 272/464, loss: 0.007533758878707886 2023-01-22 13:43:23.658403: step: 274/464, loss: 0.034230563789606094 2023-01-22 13:43:24.276079: step: 276/464, loss: 0.0003835291718132794 2023-01-22 13:43:24.899456: step: 278/464, loss: 0.006568636279553175 2023-01-22 13:43:25.514152: step: 280/464, loss: 0.01467649545520544 2023-01-22 13:43:26.122392: step: 282/464, loss: 0.00968607421964407 2023-01-22 13:43:26.710032: step: 284/464, loss: 0.008147986605763435 2023-01-22 13:43:27.310638: step: 286/464, loss: 0.0042988816276192665 2023-01-22 13:43:27.980410: step: 288/464, loss: 0.04639403894543648 2023-01-22 13:43:28.564762: step: 290/464, loss: 0.007763709872961044 2023-01-22 13:43:29.138490: step: 292/464, loss: 0.002152147702872753 2023-01-22 13:43:29.738634: step: 294/464, loss: 0.02833113819360733 2023-01-22 13:43:30.338937: step: 296/464, loss: 0.02986162155866623 2023-01-22 13:43:30.973143: step: 298/464, loss: 0.0022444010246545076 2023-01-22 13:43:31.632687: step: 300/464, loss: 0.0035056171473115683 2023-01-22 13:43:32.293950: step: 302/464, loss: 1.122079849243164 2023-01-22 13:43:32.878444: step: 304/464, loss: 0.007843993604183197 2023-01-22 13:43:33.529622: step: 306/464, loss: 0.0015960789751261473 2023-01-22 13:43:34.142084: step: 308/464, loss: 0.003823076607659459 2023-01-22 13:43:34.825831: step: 310/464, loss: 0.07746380567550659 2023-01-22 13:43:35.452152: step: 312/464, loss: 0.002466361504048109 2023-01-22 13:43:36.049451: step: 314/464, loss: 0.0016166861169040203 2023-01-22 13:43:36.625945: step: 316/464, loss: 0.0008088828180916607 2023-01-22 13:43:37.239799: step: 318/464, loss: 0.10763225704431534 2023-01-22 13:43:37.869483: step: 320/464, loss: 0.008803927339613438 2023-01-22 13:43:38.495072: step: 322/464, loss: 0.07278378307819366 2023-01-22 13:43:39.091308: step: 324/464, loss: 0.009004500694572926 2023-01-22 13:43:39.745114: step: 326/464, loss: 0.006256289314478636 2023-01-22 13:43:40.439597: step: 328/464, loss: 0.00622887909412384 2023-01-22 13:43:41.046863: step: 330/464, loss: 0.029455173760652542 2023-01-22 13:43:41.631811: step: 332/464, loss: 0.0009756234940141439 2023-01-22 13:43:42.291495: step: 334/464, loss: 0.002182058524340391 2023-01-22 13:43:42.869033: step: 336/464, loss: 0.0010947698028758168 2023-01-22 13:43:43.440840: step: 338/464, loss: 0.04082098603248596 2023-01-22 13:43:44.010956: step: 340/464, loss: 0.016495386138558388 2023-01-22 13:43:44.641759: step: 342/464, loss: 0.005853863898664713 2023-01-22 13:43:45.323662: step: 344/464, loss: 0.03994187340140343 2023-01-22 13:43:46.014766: step: 346/464, loss: 0.013703061267733574 2023-01-22 13:43:46.657096: step: 348/464, loss: 0.0014292305568233132 2023-01-22 13:43:47.241911: step: 350/464, loss: 0.029019614681601524 2023-01-22 13:43:47.883846: step: 352/464, loss: 0.048232026398181915 2023-01-22 13:43:48.587240: step: 354/464, loss: 0.0015602021012455225 2023-01-22 13:43:49.212112: step: 356/464, loss: 0.011963332071900368 2023-01-22 13:43:49.858604: step: 358/464, loss: 0.004896857775747776 2023-01-22 13:43:50.536849: step: 360/464, loss: 0.007370649836957455 2023-01-22 13:43:51.139844: step: 362/464, loss: 0.00025744843878783286 2023-01-22 13:43:51.730919: step: 364/464, loss: 0.14153197407722473 2023-01-22 13:43:52.358201: step: 366/464, loss: 0.019035013392567635 2023-01-22 13:43:52.953976: step: 368/464, loss: 0.0006584279472008348 2023-01-22 13:43:53.517628: step: 370/464, loss: 0.07147854566574097 2023-01-22 13:43:54.153866: step: 372/464, loss: 0.018193015828728676 2023-01-22 13:43:54.743155: step: 374/464, loss: 0.0103158513084054 2023-01-22 13:43:55.383141: step: 376/464, loss: 0.0066604227758944035 2023-01-22 13:43:56.018724: step: 378/464, loss: 0.006821786984801292 2023-01-22 13:43:56.668061: step: 380/464, loss: 0.0039542485028505325 2023-01-22 13:43:57.296805: step: 382/464, loss: 4.337979044066742e-05 2023-01-22 13:43:57.899555: step: 384/464, loss: 0.07129360735416412 2023-01-22 13:43:58.531776: step: 386/464, loss: 0.002703416394069791 2023-01-22 13:43:59.132682: step: 388/464, loss: 0.018774086609482765 2023-01-22 13:43:59.704022: step: 390/464, loss: 0.023931795731186867 2023-01-22 13:44:00.361715: step: 392/464, loss: 0.009366103447973728 2023-01-22 13:44:00.980594: step: 394/464, loss: 0.1786050796508789 2023-01-22 13:44:01.681048: step: 396/464, loss: 0.013859412632882595 2023-01-22 13:44:02.321595: step: 398/464, loss: 0.00012523426266852766 2023-01-22 13:44:02.965677: step: 400/464, loss: 0.004233901854604483 2023-01-22 13:44:03.626148: step: 402/464, loss: 0.012055453844368458 2023-01-22 13:44:04.262223: step: 404/464, loss: 0.00043928564991801977 2023-01-22 13:44:04.942431: step: 406/464, loss: 0.06493253260850906 2023-01-22 13:44:05.566058: step: 408/464, loss: 0.015318336896598339 2023-01-22 13:44:06.171989: step: 410/464, loss: 0.003939393442124128 2023-01-22 13:44:06.755046: step: 412/464, loss: 0.042990926653146744 2023-01-22 13:44:07.460693: step: 414/464, loss: 0.6873338222503662 2023-01-22 13:44:08.082483: step: 416/464, loss: 0.00958840548992157 2023-01-22 13:44:08.646047: step: 418/464, loss: 0.006285862997174263 2023-01-22 13:44:09.268438: step: 420/464, loss: 0.013670720160007477 2023-01-22 13:44:09.806598: step: 422/464, loss: 0.000234775579883717 2023-01-22 13:44:10.453192: step: 424/464, loss: 0.0011437935754656792 2023-01-22 13:44:11.140026: step: 426/464, loss: 0.01316257193684578 2023-01-22 13:44:11.689861: step: 428/464, loss: 0.0035191448405385017 2023-01-22 13:44:12.334144: step: 430/464, loss: 0.020264482125639915 2023-01-22 13:44:12.960135: step: 432/464, loss: 0.005611537955701351 2023-01-22 13:44:13.570307: step: 434/464, loss: 0.004806075245141983 2023-01-22 13:44:14.198434: step: 436/464, loss: 0.0727372094988823 2023-01-22 13:44:14.754765: step: 438/464, loss: 0.00040400371653959155 2023-01-22 13:44:15.440633: step: 440/464, loss: 0.025443876162171364 2023-01-22 13:44:16.031058: step: 442/464, loss: 0.007278925273567438 2023-01-22 13:44:16.611170: step: 444/464, loss: 0.0007324048201553524 2023-01-22 13:44:17.264356: step: 446/464, loss: 0.9470614790916443 2023-01-22 13:44:17.821912: step: 448/464, loss: 0.005414521787315607 2023-01-22 13:44:18.488423: step: 450/464, loss: 0.039313700050115585 2023-01-22 13:44:19.163115: step: 452/464, loss: 0.02977609820663929 2023-01-22 13:44:19.793866: step: 454/464, loss: 0.0796954482793808 2023-01-22 13:44:20.488801: step: 456/464, loss: 0.0037060920149087906 2023-01-22 13:44:21.189160: step: 458/464, loss: 0.030984675511717796 2023-01-22 13:44:21.845982: step: 460/464, loss: 0.031195633113384247 2023-01-22 13:44:22.497434: step: 462/464, loss: 0.050389666110277176 2023-01-22 13:44:23.113893: step: 464/464, loss: 0.002835672115907073 2023-01-22 13:44:23.759817: step: 466/464, loss: 0.009972220286726952 2023-01-22 13:44:24.380698: step: 468/464, loss: 0.006782420910894871 2023-01-22 13:44:25.020793: step: 470/464, loss: 0.001250862143933773 2023-01-22 13:44:25.574684: step: 472/464, loss: 0.0008862206595949829 2023-01-22 13:44:26.241542: step: 474/464, loss: 0.0006641732179559767 2023-01-22 13:44:26.841691: step: 476/464, loss: 0.0071775889955461025 2023-01-22 13:44:27.401785: step: 478/464, loss: 0.03260122612118721 2023-01-22 13:44:28.012319: step: 480/464, loss: 0.005589763168245554 2023-01-22 13:44:28.641137: step: 482/464, loss: 0.04313836246728897 2023-01-22 13:44:29.207511: step: 484/464, loss: 0.012813220731914043 2023-01-22 13:44:29.802239: step: 486/464, loss: 0.06320730596780777 2023-01-22 13:44:30.447725: step: 488/464, loss: 0.016038797795772552 2023-01-22 13:44:31.056146: step: 490/464, loss: 0.4349803626537323 2023-01-22 13:44:31.679006: step: 492/464, loss: 0.02631193771958351 2023-01-22 13:44:32.275723: step: 494/464, loss: 0.00018490907677914947 2023-01-22 13:44:32.836584: step: 496/464, loss: 0.007844946347177029 2023-01-22 13:44:33.455655: step: 498/464, loss: 0.02207314968109131 2023-01-22 13:44:34.101592: step: 500/464, loss: 0.0034127626568078995 2023-01-22 13:44:34.766818: step: 502/464, loss: 0.0290288794785738 2023-01-22 13:44:35.363040: step: 504/464, loss: 0.027108201757073402 2023-01-22 13:44:35.986828: step: 506/464, loss: 0.004797337576746941 2023-01-22 13:44:36.650136: step: 508/464, loss: 0.0073133353143930435 2023-01-22 13:44:37.334275: step: 510/464, loss: 0.07168885320425034 2023-01-22 13:44:37.936359: step: 512/464, loss: 0.036796074360609055 2023-01-22 13:44:38.584235: step: 514/464, loss: 0.07598067820072174 2023-01-22 13:44:39.188699: step: 516/464, loss: 0.14883510768413544 2023-01-22 13:44:39.773030: step: 518/464, loss: 5.250910180620849e-05 2023-01-22 13:44:40.326117: step: 520/464, loss: 0.017701705917716026 2023-01-22 13:44:40.928981: step: 522/464, loss: 0.08047399669885635 2023-01-22 13:44:41.522716: step: 524/464, loss: 0.006779791321605444 2023-01-22 13:44:42.119008: step: 526/464, loss: 0.0016784468898549676 2023-01-22 13:44:42.754167: step: 528/464, loss: 0.06029986962676048 2023-01-22 13:44:43.377085: step: 530/464, loss: 7.868605462135747e-05 2023-01-22 13:44:43.971787: step: 532/464, loss: 0.0018813211936503649 2023-01-22 13:44:44.582648: step: 534/464, loss: 0.002208078047260642 2023-01-22 13:44:45.161921: step: 536/464, loss: 0.0004298434068914503 2023-01-22 13:44:45.844225: step: 538/464, loss: 0.005829576402902603 2023-01-22 13:44:46.535473: step: 540/464, loss: 0.019377706572413445 2023-01-22 13:44:47.184333: step: 542/464, loss: 0.0014036053325980902 2023-01-22 13:44:47.773712: step: 544/464, loss: 0.00861271470785141 2023-01-22 13:44:48.530259: step: 546/464, loss: 0.009145848453044891 2023-01-22 13:44:49.171078: step: 548/464, loss: 0.016987405717372894 2023-01-22 13:44:49.751207: step: 550/464, loss: 0.00026997787063010037 2023-01-22 13:44:50.371599: step: 552/464, loss: 0.0016153783071786165 2023-01-22 13:44:51.029252: step: 554/464, loss: 0.3739587068557739 2023-01-22 13:44:51.693190: step: 556/464, loss: 0.015637751668691635 2023-01-22 13:44:52.258750: step: 558/464, loss: 0.010172784328460693 2023-01-22 13:44:52.879957: step: 560/464, loss: 0.06389915943145752 2023-01-22 13:44:53.507712: step: 562/464, loss: 0.0174139142036438 2023-01-22 13:44:54.054586: step: 564/464, loss: 0.003111387137323618 2023-01-22 13:44:54.655374: step: 566/464, loss: 0.03332321718335152 2023-01-22 13:44:55.454523: step: 568/464, loss: 0.009706872515380383 2023-01-22 13:44:56.127509: step: 570/464, loss: 0.012194119393825531 2023-01-22 13:44:56.718830: step: 572/464, loss: 0.02395642362535 2023-01-22 13:44:57.330438: step: 574/464, loss: 0.046492986381053925 2023-01-22 13:44:57.884419: step: 576/464, loss: 0.01595861092209816 2023-01-22 13:44:58.611618: step: 578/464, loss: 0.0010339318541809916 2023-01-22 13:44:59.249032: step: 580/464, loss: 0.17204780876636505 2023-01-22 13:44:59.958851: step: 582/464, loss: 0.012429947964847088 2023-01-22 13:45:00.667461: step: 584/464, loss: 0.21090374886989594 2023-01-22 13:45:01.288930: step: 586/464, loss: 0.03177638351917267 2023-01-22 13:45:01.857392: step: 588/464, loss: 0.0053174374625086784 2023-01-22 13:45:02.457465: step: 590/464, loss: 0.014543687924742699 2023-01-22 13:45:03.133349: step: 592/464, loss: 0.020352229475975037 2023-01-22 13:45:03.800007: step: 594/464, loss: 0.03623180836439133 2023-01-22 13:45:04.408963: step: 596/464, loss: 0.0016851243562996387 2023-01-22 13:45:05.006940: step: 598/464, loss: 0.059681087732315063 2023-01-22 13:45:05.622544: step: 600/464, loss: 0.0010504459496587515 2023-01-22 13:45:06.174706: step: 602/464, loss: 0.015765614807605743 2023-01-22 13:45:06.831834: step: 604/464, loss: 0.11763416975736618 2023-01-22 13:45:07.480384: step: 606/464, loss: 0.047599419951438904 2023-01-22 13:45:08.152079: step: 608/464, loss: 0.007170728407800198 2023-01-22 13:45:08.764019: step: 610/464, loss: 0.006606565788388252 2023-01-22 13:45:09.418329: step: 612/464, loss: 0.02955743670463562 2023-01-22 13:45:10.058481: step: 614/464, loss: 0.01352652721107006 2023-01-22 13:45:10.667037: step: 616/464, loss: 0.019116820767521858 2023-01-22 13:45:11.284261: step: 618/464, loss: 0.03399045392870903 2023-01-22 13:45:11.934918: step: 620/464, loss: 0.00732051208615303 2023-01-22 13:45:12.587975: step: 622/464, loss: 0.013931176625192165 2023-01-22 13:45:13.224621: step: 624/464, loss: 0.02485789731144905 2023-01-22 13:45:13.924367: step: 626/464, loss: 0.04422234743833542 2023-01-22 13:45:14.555104: step: 628/464, loss: 0.0006379493279382586 2023-01-22 13:45:15.167308: step: 630/464, loss: 0.04369499161839485 2023-01-22 13:45:15.809599: step: 632/464, loss: 0.12479616701602936 2023-01-22 13:45:16.380414: step: 634/464, loss: 0.22262367606163025 2023-01-22 13:45:17.000285: step: 636/464, loss: 0.05760623514652252 2023-01-22 13:45:17.733997: step: 638/464, loss: 0.10373300313949585 2023-01-22 13:45:18.392181: step: 640/464, loss: 0.10711164027452469 2023-01-22 13:45:19.067953: step: 642/464, loss: 0.005474729463458061 2023-01-22 13:45:19.662644: step: 644/464, loss: 0.03453825041651726 2023-01-22 13:45:20.283755: step: 646/464, loss: 0.007322102319449186 2023-01-22 13:45:20.909860: step: 648/464, loss: 0.0455363430082798 2023-01-22 13:45:21.538832: step: 650/464, loss: 0.021714529022574425 2023-01-22 13:45:22.117750: step: 652/464, loss: 0.007238437887281179 2023-01-22 13:45:22.750737: step: 654/464, loss: 0.006591625977307558 2023-01-22 13:45:23.413854: step: 656/464, loss: 0.047648973762989044 2023-01-22 13:45:24.126256: step: 658/464, loss: 0.018461909145116806 2023-01-22 13:45:24.797996: step: 660/464, loss: 0.002493728417903185 2023-01-22 13:45:25.405610: step: 662/464, loss: 0.015594934113323689 2023-01-22 13:45:26.044817: step: 664/464, loss: 0.016749706119298935 2023-01-22 13:45:26.668355: step: 666/464, loss: 0.02515016496181488 2023-01-22 13:45:27.233899: step: 668/464, loss: 0.0007680103299207985 2023-01-22 13:45:27.787929: step: 670/464, loss: 0.005315244663506746 2023-01-22 13:45:28.426462: step: 672/464, loss: 0.01851995289325714 2023-01-22 13:45:29.011453: step: 674/464, loss: 0.33771756291389465 2023-01-22 13:45:29.656757: step: 676/464, loss: 0.031938109546899796 2023-01-22 13:45:30.260304: step: 678/464, loss: 0.008982275612652302 2023-01-22 13:45:30.954674: step: 680/464, loss: 0.006587052717804909 2023-01-22 13:45:31.593651: step: 682/464, loss: 0.004672519396990538 2023-01-22 13:45:32.253269: step: 684/464, loss: 0.0054403976537287235 2023-01-22 13:45:32.853801: step: 686/464, loss: 0.015013152733445168 2023-01-22 13:45:33.465100: step: 688/464, loss: 0.022577261552214622 2023-01-22 13:45:34.103800: step: 690/464, loss: 0.008266960270702839 2023-01-22 13:45:34.751652: step: 692/464, loss: 0.012359803542494774 2023-01-22 13:45:35.390324: step: 694/464, loss: 0.010465661995112896 2023-01-22 13:45:35.951918: step: 696/464, loss: 0.018571270629763603 2023-01-22 13:45:36.553899: step: 698/464, loss: 0.19208259880542755 2023-01-22 13:45:37.138511: step: 700/464, loss: 0.05983246862888336 2023-01-22 13:45:37.710488: step: 702/464, loss: 0.010622991248965263 2023-01-22 13:45:38.266215: step: 704/464, loss: 0.05283036082983017 2023-01-22 13:45:38.937450: step: 706/464, loss: 0.03840656951069832 2023-01-22 13:45:39.558388: step: 708/464, loss: 0.005075534805655479 2023-01-22 13:45:40.173601: step: 710/464, loss: 0.017260050401091576 2023-01-22 13:45:40.819229: step: 712/464, loss: 0.003130019875243306 2023-01-22 13:45:41.412269: step: 714/464, loss: 0.004431078210473061 2023-01-22 13:45:42.078623: step: 716/464, loss: 0.05127481743693352 2023-01-22 13:45:42.689146: step: 718/464, loss: 0.0026954528875648975 2023-01-22 13:45:43.295015: step: 720/464, loss: 0.03995548561215401 2023-01-22 13:45:43.983613: step: 722/464, loss: 0.11368819326162338 2023-01-22 13:45:44.550086: step: 724/464, loss: 0.0023431519512087107 2023-01-22 13:45:45.188616: step: 726/464, loss: 0.001431989367119968 2023-01-22 13:45:45.878345: step: 728/464, loss: 0.0038282026071101427 2023-01-22 13:45:46.490324: step: 730/464, loss: 0.008284663781523705 2023-01-22 13:45:47.093429: step: 732/464, loss: 0.006050837226212025 2023-01-22 13:45:47.662770: step: 734/464, loss: 0.014839560724794865 2023-01-22 13:45:48.362079: step: 736/464, loss: 0.003803978208452463 2023-01-22 13:45:48.927404: step: 738/464, loss: 0.044560253620147705 2023-01-22 13:45:49.617466: step: 740/464, loss: 0.013758180662989616 2023-01-22 13:45:50.285806: step: 742/464, loss: 0.053472235798835754 2023-01-22 13:45:50.883153: step: 744/464, loss: 0.053535766899585724 2023-01-22 13:45:51.509673: step: 746/464, loss: 0.03132905066013336 2023-01-22 13:45:52.170963: step: 748/464, loss: 0.011415134184062481 2023-01-22 13:45:52.734429: step: 750/464, loss: 0.009031401947140694 2023-01-22 13:45:53.399079: step: 752/464, loss: 0.006600252818316221 2023-01-22 13:45:54.054583: step: 754/464, loss: 8.853591862134635e-05 2023-01-22 13:45:54.710684: step: 756/464, loss: 1.2533529996871948 2023-01-22 13:45:55.304730: step: 758/464, loss: 0.011251486837863922 2023-01-22 13:45:55.939218: step: 760/464, loss: 0.021243376657366753 2023-01-22 13:45:56.606236: step: 762/464, loss: 0.022263169288635254 2023-01-22 13:45:57.216258: step: 764/464, loss: 0.01969255320727825 2023-01-22 13:45:57.840222: step: 766/464, loss: 0.021357337012887 2023-01-22 13:45:58.481780: step: 768/464, loss: 0.006406448315829039 2023-01-22 13:45:59.166240: step: 770/464, loss: 0.013859029859304428 2023-01-22 13:45:59.738648: step: 772/464, loss: 0.008305447176098824 2023-01-22 13:46:00.444265: step: 774/464, loss: 0.0027322552632540464 2023-01-22 13:46:01.101749: step: 776/464, loss: 0.019399166107177734 2023-01-22 13:46:01.718305: step: 778/464, loss: 0.003915107809007168 2023-01-22 13:46:02.295361: step: 780/464, loss: 0.006939701735973358 2023-01-22 13:46:02.945762: step: 782/464, loss: 0.01559397205710411 2023-01-22 13:46:03.534521: step: 784/464, loss: 0.013243515975773335 2023-01-22 13:46:04.112647: step: 786/464, loss: 0.021561570465564728 2023-01-22 13:46:04.702585: step: 788/464, loss: 0.0011126205790787935 2023-01-22 13:46:05.351287: step: 790/464, loss: 0.0189791489392519 2023-01-22 13:46:06.003527: step: 792/464, loss: 0.02173781767487526 2023-01-22 13:46:06.679611: step: 794/464, loss: 0.020091254264116287 2023-01-22 13:46:07.302130: step: 796/464, loss: 0.02431781403720379 2023-01-22 13:46:07.958596: step: 798/464, loss: 0.021222827956080437 2023-01-22 13:46:08.545359: step: 800/464, loss: 0.00465004239231348 2023-01-22 13:46:09.243756: step: 802/464, loss: 0.03723061829805374 2023-01-22 13:46:09.944324: step: 804/464, loss: 0.025808952748775482 2023-01-22 13:46:10.566564: step: 806/464, loss: 0.00010458481847308576 2023-01-22 13:46:11.157656: step: 808/464, loss: 0.05023520067334175 2023-01-22 13:46:11.796650: step: 810/464, loss: 0.0019826339557766914 2023-01-22 13:46:12.420726: step: 812/464, loss: 0.000529944256413728 2023-01-22 13:46:13.068544: step: 814/464, loss: 0.004346379078924656 2023-01-22 13:46:13.637652: step: 816/464, loss: 0.0038693081587553024 2023-01-22 13:46:14.218623: step: 818/464, loss: 0.002243026392534375 2023-01-22 13:46:14.872500: step: 820/464, loss: 0.14987914264202118 2023-01-22 13:46:15.492746: step: 822/464, loss: 0.00967397540807724 2023-01-22 13:46:16.085919: step: 824/464, loss: 0.02487901970744133 2023-01-22 13:46:16.659926: step: 826/464, loss: 0.26290661096572876 2023-01-22 13:46:17.259327: step: 828/464, loss: 0.004128835629671812 2023-01-22 13:46:17.934781: step: 830/464, loss: 0.0026098049711436033 2023-01-22 13:46:18.607499: step: 832/464, loss: 0.004145448096096516 2023-01-22 13:46:19.243296: step: 834/464, loss: 0.291154146194458 2023-01-22 13:46:19.834699: step: 836/464, loss: 0.009006387554109097 2023-01-22 13:46:20.446629: step: 838/464, loss: 0.012768622487783432 2023-01-22 13:46:21.070001: step: 840/464, loss: 0.05619003251194954 2023-01-22 13:46:21.733387: step: 842/464, loss: 0.01126299798488617 2023-01-22 13:46:22.376232: step: 844/464, loss: 0.004039745312184095 2023-01-22 13:46:23.014277: step: 846/464, loss: 0.10724084079265594 2023-01-22 13:46:23.654122: step: 848/464, loss: 0.005021022167056799 2023-01-22 13:46:24.298525: step: 850/464, loss: 0.005245794542133808 2023-01-22 13:46:24.842175: step: 852/464, loss: 0.011281571350991726 2023-01-22 13:46:25.458258: step: 854/464, loss: 0.0015350535977631807 2023-01-22 13:46:26.029832: step: 856/464, loss: 0.014480022713541985 2023-01-22 13:46:26.697459: step: 858/464, loss: 0.0038870610296726227 2023-01-22 13:46:27.374804: step: 860/464, loss: 0.04260418564081192 2023-01-22 13:46:28.041223: step: 862/464, loss: 0.0012214086018502712 2023-01-22 13:46:28.729618: step: 864/464, loss: 0.008705553598701954 2023-01-22 13:46:29.351113: step: 866/464, loss: 2.5658152103424072 2023-01-22 13:46:29.968690: step: 868/464, loss: 0.008928397670388222 2023-01-22 13:46:30.657822: step: 870/464, loss: 0.012816757895052433 2023-01-22 13:46:31.300623: step: 872/464, loss: 0.007330951280891895 2023-01-22 13:46:31.872262: step: 874/464, loss: 0.9620086550712585 2023-01-22 13:46:32.513282: step: 876/464, loss: 0.10326791554689407 2023-01-22 13:46:33.121699: step: 878/464, loss: 0.023309925571084023 2023-01-22 13:46:33.712291: step: 880/464, loss: 0.005438762251287699 2023-01-22 13:46:34.339058: step: 882/464, loss: 0.06091240420937538 2023-01-22 13:46:34.988517: step: 884/464, loss: 0.0758993998169899 2023-01-22 13:46:35.612197: step: 886/464, loss: 0.000790759630035609 2023-01-22 13:46:36.276492: step: 888/464, loss: 0.07822221517562866 2023-01-22 13:46:36.879447: step: 890/464, loss: 0.0005388292483985424 2023-01-22 13:46:37.444821: step: 892/464, loss: 0.001909834798425436 2023-01-22 13:46:38.081274: step: 894/464, loss: 0.005446003284305334 2023-01-22 13:46:38.740676: step: 896/464, loss: 0.006591171491891146 2023-01-22 13:46:39.443865: step: 898/464, loss: 0.026384249329566956 2023-01-22 13:46:40.075441: step: 900/464, loss: 0.043026503175497055 2023-01-22 13:46:40.680615: step: 902/464, loss: 0.034636832773685455 2023-01-22 13:46:41.298778: step: 904/464, loss: 0.013785764575004578 2023-01-22 13:46:41.932770: step: 906/464, loss: 0.0037164506502449512 2023-01-22 13:46:42.517567: step: 908/464, loss: 0.003406970528885722 2023-01-22 13:46:43.125065: step: 910/464, loss: 0.009995612315833569 2023-01-22 13:46:43.747716: step: 912/464, loss: 0.010833344422280788 2023-01-22 13:46:44.364697: step: 914/464, loss: 0.010535611771047115 2023-01-22 13:46:44.998930: step: 916/464, loss: 0.000829737342428416 2023-01-22 13:46:45.620259: step: 918/464, loss: 0.011057580821216106 2023-01-22 13:46:46.278523: step: 920/464, loss: 0.008482350967824459 2023-01-22 13:46:46.950217: step: 922/464, loss: 0.04329945519566536 2023-01-22 13:46:47.561845: step: 924/464, loss: 0.0016789406072348356 2023-01-22 13:46:48.205096: step: 926/464, loss: 0.047825418412685394 2023-01-22 13:46:48.842493: step: 928/464, loss: 0.014367038384079933 2023-01-22 13:46:49.325275: step: 930/464, loss: 0.0022941920906305313 ================================================== Loss: 0.049 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2898931100217865, 'r': 0.3366500632511069, 'f1': 0.3115269242025168}, 'combined': 0.22954615467553868, 'epoch': 32} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29704230882033217, 'r': 0.32213679221011227, 'f1': 0.309081027944328}, 'combined': 0.20178346902064936, 'epoch': 32} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2834326147251639, 'r': 0.3555008697027198, 'f1': 0.3154022867564534}, 'combined': 0.23240168497843933, 'epoch': 32} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3094470992347633, 'r': 0.3273489975376009, 'f1': 0.31814641527750764}, 'combined': 0.20770180479256975, 'epoch': 32} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3015934405940594, 'r': 0.34680384250474383, 'f1': 0.32262246248896737}, 'combined': 0.23772181446555488, 'epoch': 32} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30368770390171734, 'r': 0.3181888614801281, 'f1': 0.3107692108985287}, 'combined': 0.2028855988249462, 'epoch': 32} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20555555555555555, 'r': 0.35238095238095235, 'f1': 0.25964912280701746}, 'combined': 0.1730994152046783, 'epoch': 32} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2556818181818182, 'r': 0.4891304347826087, 'f1': 0.3358208955223881}, 'combined': 0.16791044776119404, 'epoch': 32} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4305555555555556, 'r': 0.2672413793103448, 'f1': 0.3297872340425532}, 'combined': 0.2198581560283688, 'epoch': 32} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951249161636486, 'r': 0.3578459609650312, 'f1': 0.3234731070815977}, 'combined': 0.23834860521801934, 'epoch': 22} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29521273154881944, 'r': 0.3185261336729686, 'f1': 0.30642664272956077}, 'combined': 0.20005055432085314, 'epoch': 22} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 22} ****************************** Epoch: 33 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:49:27.830659: step: 2/464, loss: 0.014374053105711937 2023-01-22 13:49:28.506959: step: 4/464, loss: 0.2830972969532013 2023-01-22 13:49:29.207617: step: 6/464, loss: 0.23990294337272644 2023-01-22 13:49:29.801125: step: 8/464, loss: 0.00041447050170972943 2023-01-22 13:49:30.391277: step: 10/464, loss: 0.0003809631452895701 2023-01-22 13:49:31.069171: step: 12/464, loss: 0.025611156597733498 2023-01-22 13:49:31.692653: step: 14/464, loss: 0.01804334670305252 2023-01-22 13:49:32.227503: step: 16/464, loss: 0.001929254038259387 2023-01-22 13:49:32.847776: step: 18/464, loss: 0.036881223320961 2023-01-22 13:49:33.439737: step: 20/464, loss: 0.012756387703120708 2023-01-22 13:49:34.058657: step: 22/464, loss: 0.008184123784303665 2023-01-22 13:49:34.691976: step: 24/464, loss: 0.0449734702706337 2023-01-22 13:49:35.385412: step: 26/464, loss: 0.00029686116613447666 2023-01-22 13:49:35.994695: step: 28/464, loss: 0.02302325703203678 2023-01-22 13:49:36.657818: step: 30/464, loss: 0.05118812620639801 2023-01-22 13:49:37.230669: step: 32/464, loss: 0.001830504508689046 2023-01-22 13:49:37.815158: step: 34/464, loss: 0.009466273710131645 2023-01-22 13:49:38.413731: step: 36/464, loss: 0.03142885863780975 2023-01-22 13:49:39.109650: step: 38/464, loss: 0.027976183220744133 2023-01-22 13:49:39.744705: step: 40/464, loss: 0.01014450192451477 2023-01-22 13:49:40.372097: step: 42/464, loss: 0.011748143471777439 2023-01-22 13:49:40.978273: step: 44/464, loss: 0.008829900063574314 2023-01-22 13:49:41.608652: step: 46/464, loss: 0.009688672609627247 2023-01-22 13:49:42.209775: step: 48/464, loss: 0.0021476442925632 2023-01-22 13:49:42.867649: step: 50/464, loss: 0.009073460474610329 2023-01-22 13:49:43.495411: step: 52/464, loss: 0.0006571787525899708 2023-01-22 13:49:44.145658: step: 54/464, loss: 0.005301504395902157 2023-01-22 13:49:44.756302: step: 56/464, loss: 0.006080952472984791 2023-01-22 13:49:45.394702: step: 58/464, loss: 0.8253498077392578 2023-01-22 13:49:45.997063: step: 60/464, loss: 0.06126685440540314 2023-01-22 13:49:46.542995: step: 62/464, loss: 0.01091950386762619 2023-01-22 13:49:47.142071: step: 64/464, loss: 0.018319545313715935 2023-01-22 13:49:47.699406: step: 66/464, loss: 0.00018545903731137514 2023-01-22 13:49:48.301659: step: 68/464, loss: 0.0008794991299510002 2023-01-22 13:49:48.945699: step: 70/464, loss: 0.007405332755297422 2023-01-22 13:49:49.603496: step: 72/464, loss: 0.0023415430914610624 2023-01-22 13:49:50.171585: step: 74/464, loss: 0.04160754755139351 2023-01-22 13:49:50.879083: step: 76/464, loss: 0.00813635066151619 2023-01-22 13:49:51.516365: step: 78/464, loss: 0.0026107614394277334 2023-01-22 13:49:52.145685: step: 80/464, loss: 0.004070333205163479 2023-01-22 13:49:52.689677: step: 82/464, loss: 0.0002775926550384611 2023-01-22 13:49:53.286912: step: 84/464, loss: 0.00025254907086491585 2023-01-22 13:49:53.927477: step: 86/464, loss: 0.04424852132797241 2023-01-22 13:49:54.533745: step: 88/464, loss: 0.004443845711648464 2023-01-22 13:49:55.138959: step: 90/464, loss: 0.00035951321478933096 2023-01-22 13:49:55.740027: step: 92/464, loss: 0.017380233854055405 2023-01-22 13:49:56.342721: step: 94/464, loss: 0.00895154569298029 2023-01-22 13:49:56.939736: step: 96/464, loss: 0.009547477588057518 2023-01-22 13:49:57.594914: step: 98/464, loss: 0.0018229386769235134 2023-01-22 13:49:58.246444: step: 100/464, loss: 0.020579863339662552 2023-01-22 13:49:58.943460: step: 102/464, loss: 0.32163500785827637 2023-01-22 13:49:59.514612: step: 104/464, loss: 0.012846940197050571 2023-01-22 13:50:00.269869: step: 106/464, loss: 0.0021412346977740526 2023-01-22 13:50:00.870484: step: 108/464, loss: 0.0016689874464645982 2023-01-22 13:50:01.443550: step: 110/464, loss: 0.04337398707866669 2023-01-22 13:50:02.091290: step: 112/464, loss: 0.011758524924516678 2023-01-22 13:50:02.663810: step: 114/464, loss: 0.001188629656098783 2023-01-22 13:50:03.232353: step: 116/464, loss: 0.0006290088058449328 2023-01-22 13:50:03.869889: step: 118/464, loss: 0.3245905935764313 2023-01-22 13:50:04.472311: step: 120/464, loss: 0.030581016093492508 2023-01-22 13:50:05.101039: step: 122/464, loss: 0.013289973139762878 2023-01-22 13:50:05.693352: step: 124/464, loss: 0.009468546137213707 2023-01-22 13:50:06.275413: step: 126/464, loss: 0.028483226895332336 2023-01-22 13:50:06.854938: step: 128/464, loss: 0.012841271236538887 2023-01-22 13:50:07.427904: step: 130/464, loss: 0.03689542040228844 2023-01-22 13:50:08.134917: step: 132/464, loss: 0.004030969459563494 2023-01-22 13:50:08.741509: step: 134/464, loss: 0.006892730947583914 2023-01-22 13:50:09.401878: step: 136/464, loss: 0.011415884830057621 2023-01-22 13:50:09.963625: step: 138/464, loss: 3.393308725208044e-05 2023-01-22 13:50:10.571041: step: 140/464, loss: 0.01180098857730627 2023-01-22 13:50:11.161716: step: 142/464, loss: 0.042764972895383835 2023-01-22 13:50:11.800765: step: 144/464, loss: 0.005582286510616541 2023-01-22 13:50:12.472973: step: 146/464, loss: 0.33375313878059387 2023-01-22 13:50:13.035598: step: 148/464, loss: 0.0011688163504004478 2023-01-22 13:50:13.668629: step: 150/464, loss: 0.003209100104868412 2023-01-22 13:50:14.285938: step: 152/464, loss: 0.02336471527814865 2023-01-22 13:50:14.913422: step: 154/464, loss: 0.05035460367798805 2023-01-22 13:50:15.537480: step: 156/464, loss: 0.0631193295121193 2023-01-22 13:50:16.153650: step: 158/464, loss: 0.0029422007501125336 2023-01-22 13:50:16.859860: step: 160/464, loss: 0.1768340766429901 2023-01-22 13:50:17.501008: step: 162/464, loss: 3.6499710083007812 2023-01-22 13:50:18.113299: step: 164/464, loss: 0.010985293425619602 2023-01-22 13:50:18.653221: step: 166/464, loss: 0.002972731599584222 2023-01-22 13:50:19.245746: step: 168/464, loss: 0.010138231329619884 2023-01-22 13:50:19.864913: step: 170/464, loss: 0.010469421744346619 2023-01-22 13:50:20.507454: step: 172/464, loss: 0.09447196871042252 2023-01-22 13:50:21.132661: step: 174/464, loss: 0.011761488392949104 2023-01-22 13:50:21.770839: step: 176/464, loss: 0.014840611256659031 2023-01-22 13:50:22.356872: step: 178/464, loss: 0.03037756122648716 2023-01-22 13:50:22.939935: step: 180/464, loss: 0.014970389194786549 2023-01-22 13:50:23.512103: step: 182/464, loss: 0.004324909765273333 2023-01-22 13:50:24.160015: step: 184/464, loss: 0.17347773909568787 2023-01-22 13:50:24.735621: step: 186/464, loss: 0.06512332707643509 2023-01-22 13:50:25.360745: step: 188/464, loss: 0.010195410810410976 2023-01-22 13:50:25.993977: step: 190/464, loss: 0.0038245213218033314 2023-01-22 13:50:26.595029: step: 192/464, loss: 0.006037340499460697 2023-01-22 13:50:27.176948: step: 194/464, loss: 0.0025265971198678017 2023-01-22 13:50:27.799427: step: 196/464, loss: 0.0001403355272486806 2023-01-22 13:50:28.495284: step: 198/464, loss: 0.03302770480513573 2023-01-22 13:50:29.155138: step: 200/464, loss: 0.044800423085689545 2023-01-22 13:50:29.817139: step: 202/464, loss: 0.0025492501445114613 2023-01-22 13:50:30.412576: step: 204/464, loss: 0.00408302852883935 2023-01-22 13:50:31.021166: step: 206/464, loss: 0.0031438700389117002 2023-01-22 13:50:31.653875: step: 208/464, loss: 0.014370287768542767 2023-01-22 13:50:32.275200: step: 210/464, loss: 0.0064779119566082954 2023-01-22 13:50:32.860245: step: 212/464, loss: 0.05422116443514824 2023-01-22 13:50:33.463260: step: 214/464, loss: 0.0004520063812378794 2023-01-22 13:50:34.117589: step: 216/464, loss: 0.10309507697820663 2023-01-22 13:50:34.800486: step: 218/464, loss: 0.12332595884799957 2023-01-22 13:50:35.432937: step: 220/464, loss: 0.006602240726351738 2023-01-22 13:50:36.000964: step: 222/464, loss: 0.014455534517765045 2023-01-22 13:50:36.537556: step: 224/464, loss: 0.004008309915661812 2023-01-22 13:50:37.190939: step: 226/464, loss: 0.0005987048498354852 2023-01-22 13:50:37.862007: step: 228/464, loss: 0.01726825349032879 2023-01-22 13:50:38.479088: step: 230/464, loss: 0.00909911748021841 2023-01-22 13:50:39.089228: step: 232/464, loss: 0.0018394223880022764 2023-01-22 13:50:39.643243: step: 234/464, loss: 0.5204110145568848 2023-01-22 13:50:40.262432: step: 236/464, loss: 9.062099707080051e-05 2023-01-22 13:50:40.790852: step: 238/464, loss: 0.0006134660798124969 2023-01-22 13:50:41.453327: step: 240/464, loss: 0.010994810611009598 2023-01-22 13:50:42.121376: step: 242/464, loss: 1.8539471057010815e-05 2023-01-22 13:50:42.709685: step: 244/464, loss: 0.07794538885354996 2023-01-22 13:50:43.293106: step: 246/464, loss: 0.010720056481659412 2023-01-22 13:50:43.870366: step: 248/464, loss: 0.022959182038903236 2023-01-22 13:50:44.455336: step: 250/464, loss: 0.00048418642836622894 2023-01-22 13:50:45.087160: step: 252/464, loss: 0.015802789479494095 2023-01-22 13:50:45.687876: step: 254/464, loss: 0.001293016248382628 2023-01-22 13:50:46.353339: step: 256/464, loss: 0.006310163531452417 2023-01-22 13:50:46.951480: step: 258/464, loss: 0.011721929535269737 2023-01-22 13:50:47.606282: step: 260/464, loss: 0.00028431994724087417 2023-01-22 13:50:48.202920: step: 262/464, loss: 0.001610343810170889 2023-01-22 13:50:48.817797: step: 264/464, loss: 0.004501787945628166 2023-01-22 13:50:49.556060: step: 266/464, loss: 0.012815630063414574 2023-01-22 13:50:50.145967: step: 268/464, loss: 0.021414536982774734 2023-01-22 13:50:50.731011: step: 270/464, loss: 0.015873683616518974 2023-01-22 13:50:51.314485: step: 272/464, loss: 0.0038522426038980484 2023-01-22 13:50:51.919110: step: 274/464, loss: 0.03324449434876442 2023-01-22 13:50:52.528302: step: 276/464, loss: 0.003817223245278001 2023-01-22 13:50:53.118547: step: 278/464, loss: 0.005478051956743002 2023-01-22 13:50:53.752266: step: 280/464, loss: 0.016270257532596588 2023-01-22 13:50:54.377057: step: 282/464, loss: 0.018138011917471886 2023-01-22 13:50:54.991567: step: 284/464, loss: 0.0062078433111310005 2023-01-22 13:50:55.617594: step: 286/464, loss: 0.007623214274644852 2023-01-22 13:50:56.273202: step: 288/464, loss: 0.0191037654876709 2023-01-22 13:50:56.924624: step: 290/464, loss: 0.03720256686210632 2023-01-22 13:50:57.535790: step: 292/464, loss: 0.07807682454586029 2023-01-22 13:50:58.265704: step: 294/464, loss: 0.002669744659215212 2023-01-22 13:50:58.893182: step: 296/464, loss: 0.018069475889205933 2023-01-22 13:50:59.546994: step: 298/464, loss: 0.05704755336046219 2023-01-22 13:51:00.217958: step: 300/464, loss: 0.018284089863300323 2023-01-22 13:51:00.867614: step: 302/464, loss: 0.052654922008514404 2023-01-22 13:51:01.436354: step: 304/464, loss: 0.9298496842384338 2023-01-22 13:51:02.028846: step: 306/464, loss: 0.0066015636548399925 2023-01-22 13:51:02.669666: step: 308/464, loss: 0.006413571536540985 2023-01-22 13:51:03.319883: step: 310/464, loss: 0.007779460400342941 2023-01-22 13:51:04.003608: step: 312/464, loss: 0.030138764530420303 2023-01-22 13:51:04.636111: step: 314/464, loss: 0.0013628305168822408 2023-01-22 13:51:05.239833: step: 316/464, loss: 0.01542107854038477 2023-01-22 13:51:05.801444: step: 318/464, loss: 0.003668449819087982 2023-01-22 13:51:06.414879: step: 320/464, loss: 0.028360677883028984 2023-01-22 13:51:07.088185: step: 322/464, loss: 0.8447021842002869 2023-01-22 13:51:07.682813: step: 324/464, loss: 0.0032662516459822655 2023-01-22 13:51:08.371541: step: 326/464, loss: 0.23302333056926727 2023-01-22 13:51:09.029091: step: 328/464, loss: 0.012652361765503883 2023-01-22 13:51:09.577679: step: 330/464, loss: 0.004331211093813181 2023-01-22 13:51:10.180175: step: 332/464, loss: 0.004175769165158272 2023-01-22 13:51:10.811960: step: 334/464, loss: 0.002427124185487628 2023-01-22 13:51:11.515070: step: 336/464, loss: 0.0012440033024176955 2023-01-22 13:51:12.155066: step: 338/464, loss: 0.010690954513847828 2023-01-22 13:51:12.741465: step: 340/464, loss: 0.0003702449903357774 2023-01-22 13:51:13.325708: step: 342/464, loss: 9.117002628045157e-05 2023-01-22 13:51:13.928302: step: 344/464, loss: 0.0034490206744521856 2023-01-22 13:51:14.507915: step: 346/464, loss: 0.13205718994140625 2023-01-22 13:51:15.149277: step: 348/464, loss: 0.0798322930932045 2023-01-22 13:51:15.761357: step: 350/464, loss: 0.020838763564825058 2023-01-22 13:51:16.361598: step: 352/464, loss: 0.005004078149795532 2023-01-22 13:51:16.939986: step: 354/464, loss: 0.009846973232924938 2023-01-22 13:51:17.530978: step: 356/464, loss: 0.00175945064984262 2023-01-22 13:51:18.139792: step: 358/464, loss: 0.026817964389920235 2023-01-22 13:51:18.745368: step: 360/464, loss: 0.013861851766705513 2023-01-22 13:51:19.362462: step: 362/464, loss: 0.001218835124745965 2023-01-22 13:51:19.900024: step: 364/464, loss: 0.016749968752264977 2023-01-22 13:51:20.598000: step: 366/464, loss: 0.015844695270061493 2023-01-22 13:51:21.228321: step: 368/464, loss: 0.003026836784556508 2023-01-22 13:51:21.904558: step: 370/464, loss: 0.0302230603992939 2023-01-22 13:51:22.572237: step: 372/464, loss: 0.009233558550477028 2023-01-22 13:51:23.190123: step: 374/464, loss: 0.019739385694265366 2023-01-22 13:51:23.820441: step: 376/464, loss: 0.014525365084409714 2023-01-22 13:51:24.444084: step: 378/464, loss: 0.0338415801525116 2023-01-22 13:51:25.008077: step: 380/464, loss: 0.07181494683027267 2023-01-22 13:51:25.616249: step: 382/464, loss: 0.022824645042419434 2023-01-22 13:51:26.208940: step: 384/464, loss: 0.0061698416247963905 2023-01-22 13:51:26.915260: step: 386/464, loss: 0.040327515453100204 2023-01-22 13:51:27.550880: step: 388/464, loss: 0.0004780854796990752 2023-01-22 13:51:28.169803: step: 390/464, loss: 0.036215294152498245 2023-01-22 13:51:28.806454: step: 392/464, loss: 0.015350079163908958 2023-01-22 13:51:29.406368: step: 394/464, loss: 0.11014533787965775 2023-01-22 13:51:29.990638: step: 396/464, loss: 0.00023063892149366438 2023-01-22 13:51:30.602837: step: 398/464, loss: 0.018782315775752068 2023-01-22 13:51:31.331705: step: 400/464, loss: 0.006294018588960171 2023-01-22 13:51:32.007192: step: 402/464, loss: 0.014814279973506927 2023-01-22 13:51:32.642441: step: 404/464, loss: 0.053902145475149155 2023-01-22 13:51:33.213014: step: 406/464, loss: 0.028109122067689896 2023-01-22 13:51:33.865526: step: 408/464, loss: 0.013292652554810047 2023-01-22 13:51:34.503392: step: 410/464, loss: 0.006991738453507423 2023-01-22 13:51:35.128793: step: 412/464, loss: 0.0037923045456409454 2023-01-22 13:51:35.791978: step: 414/464, loss: 0.00360084674321115 2023-01-22 13:51:36.428025: step: 416/464, loss: 0.0037328642792999744 2023-01-22 13:51:37.081005: step: 418/464, loss: 0.001704493653960526 2023-01-22 13:51:37.701646: step: 420/464, loss: 0.004305397160351276 2023-01-22 13:51:38.317611: step: 422/464, loss: 0.0022913780994713306 2023-01-22 13:51:38.918571: step: 424/464, loss: 0.005939268507063389 2023-01-22 13:51:39.545976: step: 426/464, loss: 0.03526054322719574 2023-01-22 13:51:40.167263: step: 428/464, loss: 0.0073980726301670074 2023-01-22 13:51:40.749164: step: 430/464, loss: 0.0022466492373496294 2023-01-22 13:51:41.336045: step: 432/464, loss: 0.0006893372628837824 2023-01-22 13:51:41.980254: step: 434/464, loss: 0.04242715984582901 2023-01-22 13:51:42.576348: step: 436/464, loss: 0.005191306583583355 2023-01-22 13:51:43.252917: step: 438/464, loss: 0.24681374430656433 2023-01-22 13:51:43.856081: step: 440/464, loss: 0.0015327599830925465 2023-01-22 13:51:44.476305: step: 442/464, loss: 0.0064181857742369175 2023-01-22 13:51:45.105114: step: 444/464, loss: 0.00031510370899923146 2023-01-22 13:51:45.769002: step: 446/464, loss: 0.00802667811512947 2023-01-22 13:51:46.383699: step: 448/464, loss: 1.0939445495605469 2023-01-22 13:51:47.067934: step: 450/464, loss: 0.010794229805469513 2023-01-22 13:51:47.679920: step: 452/464, loss: 0.003955810330808163 2023-01-22 13:51:48.370511: step: 454/464, loss: 0.009406117722392082 2023-01-22 13:51:48.981316: step: 456/464, loss: 0.023797964677214622 2023-01-22 13:51:49.616681: step: 458/464, loss: 0.09177647531032562 2023-01-22 13:51:50.344838: step: 460/464, loss: 0.045457497239112854 2023-01-22 13:51:50.914019: step: 462/464, loss: 0.00666583888232708 2023-01-22 13:51:51.528287: step: 464/464, loss: 0.1608838140964508 2023-01-22 13:51:52.135409: step: 466/464, loss: 0.931765079498291 2023-01-22 13:51:52.687904: step: 468/464, loss: 0.0022043404169380665 2023-01-22 13:51:53.299667: step: 470/464, loss: 0.001080443849787116 2023-01-22 13:51:53.918256: step: 472/464, loss: 0.05975859612226486 2023-01-22 13:51:54.514445: step: 474/464, loss: 0.004577296786010265 2023-01-22 13:51:55.118420: step: 476/464, loss: 0.004997294396162033 2023-01-22 13:51:55.762675: step: 478/464, loss: 0.10205055773258209 2023-01-22 13:51:56.355894: step: 480/464, loss: 0.24596334993839264 2023-01-22 13:51:57.027093: step: 482/464, loss: 0.025164317339658737 2023-01-22 13:51:57.732014: step: 484/464, loss: 0.028020521625876427 2023-01-22 13:51:58.361272: step: 486/464, loss: 0.010484244674444199 2023-01-22 13:51:58.993724: step: 488/464, loss: 0.0005600190488621593 2023-01-22 13:51:59.640121: step: 490/464, loss: 0.0018563539488241076 2023-01-22 13:52:00.283459: step: 492/464, loss: 0.0025135499890893698 2023-01-22 13:52:00.922057: step: 494/464, loss: 0.004712886642664671 2023-01-22 13:52:01.540865: step: 496/464, loss: 0.020760148763656616 2023-01-22 13:52:02.154499: step: 498/464, loss: 0.02509947493672371 2023-01-22 13:52:02.774469: step: 500/464, loss: 0.014062878675758839 2023-01-22 13:52:03.499819: step: 502/464, loss: 0.004606351256370544 2023-01-22 13:52:04.181157: step: 504/464, loss: 0.0003082886105403304 2023-01-22 13:52:04.749337: step: 506/464, loss: 0.005637813825160265 2023-01-22 13:52:05.314869: step: 508/464, loss: 0.0023179217241704464 2023-01-22 13:52:05.935307: step: 510/464, loss: 0.009163595736026764 2023-01-22 13:52:06.558145: step: 512/464, loss: 0.1648324877023697 2023-01-22 13:52:07.161271: step: 514/464, loss: 0.26366400718688965 2023-01-22 13:52:07.832906: step: 516/464, loss: 0.009502682834863663 2023-01-22 13:52:08.426305: step: 518/464, loss: 0.01026153564453125 2023-01-22 13:52:09.044147: step: 520/464, loss: 0.017595071345567703 2023-01-22 13:52:09.719568: step: 522/464, loss: 0.008763373829424381 2023-01-22 13:52:10.297451: step: 524/464, loss: 0.004001053050160408 2023-01-22 13:52:10.874164: step: 526/464, loss: 0.007184656802564859 2023-01-22 13:52:11.556820: step: 528/464, loss: 0.05734050273895264 2023-01-22 13:52:12.140337: step: 530/464, loss: 0.009585640393197536 2023-01-22 13:52:12.731410: step: 532/464, loss: 0.027792764827609062 2023-01-22 13:52:13.378751: step: 534/464, loss: 0.19172044098377228 2023-01-22 13:52:13.948592: step: 536/464, loss: 0.00415234686806798 2023-01-22 13:52:14.579461: step: 538/464, loss: 0.0013831807300448418 2023-01-22 13:52:15.270750: step: 540/464, loss: 0.011251897551119328 2023-01-22 13:52:15.878076: step: 542/464, loss: 0.020211560651659966 2023-01-22 13:52:16.670939: step: 544/464, loss: 0.010223829187452793 2023-01-22 13:52:17.258840: step: 546/464, loss: 0.028799928724765778 2023-01-22 13:52:17.889117: step: 548/464, loss: 0.004515249282121658 2023-01-22 13:52:18.474340: step: 550/464, loss: 0.0034398529678583145 2023-01-22 13:52:19.115330: step: 552/464, loss: 0.00010570652375463396 2023-01-22 13:52:19.711990: step: 554/464, loss: 0.01961778849363327 2023-01-22 13:52:20.340518: step: 556/464, loss: 0.7934921383857727 2023-01-22 13:52:20.989936: step: 558/464, loss: 0.007351873442530632 2023-01-22 13:52:21.652136: step: 560/464, loss: 0.02307305485010147 2023-01-22 13:52:22.335146: step: 562/464, loss: 3.743881461559795e-05 2023-01-22 13:52:22.900967: step: 564/464, loss: 0.0005810451111756265 2023-01-22 13:52:23.520251: step: 566/464, loss: 0.008573451079428196 2023-01-22 13:52:24.148332: step: 568/464, loss: 0.0031232843175530434 2023-01-22 13:52:24.754887: step: 570/464, loss: 0.06860752403736115 2023-01-22 13:52:25.385020: step: 572/464, loss: 0.005422333255410194 2023-01-22 13:52:25.946377: step: 574/464, loss: 0.0013447273522615433 2023-01-22 13:52:26.571484: step: 576/464, loss: 0.01279196422547102 2023-01-22 13:52:27.308986: step: 578/464, loss: 0.1749306619167328 2023-01-22 13:52:27.908915: step: 580/464, loss: 0.018388798460364342 2023-01-22 13:52:28.537435: step: 582/464, loss: 0.0017691449029371142 2023-01-22 13:52:29.106940: step: 584/464, loss: 0.6445823907852173 2023-01-22 13:52:29.705794: step: 586/464, loss: 0.0028402141761034727 2023-01-22 13:52:30.405870: step: 588/464, loss: 0.0070165046490728855 2023-01-22 13:52:31.052895: step: 590/464, loss: 0.016421951353549957 2023-01-22 13:52:31.673068: step: 592/464, loss: 0.06449508666992188 2023-01-22 13:52:32.302181: step: 594/464, loss: 0.022456709295511246 2023-01-22 13:52:32.895393: step: 596/464, loss: 0.024202289059758186 2023-01-22 13:52:33.583526: step: 598/464, loss: 0.0032440153881907463 2023-01-22 13:52:34.195290: step: 600/464, loss: 0.0017527195159345865 2023-01-22 13:52:34.799508: step: 602/464, loss: 0.023841669782996178 2023-01-22 13:52:35.406725: step: 604/464, loss: 0.04659492149949074 2023-01-22 13:52:35.929487: step: 606/464, loss: 6.184981612022966e-05 2023-01-22 13:52:36.540035: step: 608/464, loss: 0.003048345912247896 2023-01-22 13:52:37.198045: step: 610/464, loss: 0.018488649278879166 2023-01-22 13:52:37.727609: step: 612/464, loss: 0.023030301555991173 2023-01-22 13:52:38.305257: step: 614/464, loss: 0.00235570571385324 2023-01-22 13:52:38.963628: step: 616/464, loss: 0.0018681371584534645 2023-01-22 13:52:39.548328: step: 618/464, loss: 0.018395736813545227 2023-01-22 13:52:40.164761: step: 620/464, loss: 0.0008612312958575785 2023-01-22 13:52:40.778272: step: 622/464, loss: 0.030212650075554848 2023-01-22 13:52:41.375897: step: 624/464, loss: 0.00812254473567009 2023-01-22 13:52:42.063236: step: 626/464, loss: 0.03114047646522522 2023-01-22 13:52:42.655517: step: 628/464, loss: 0.000972464622464031 2023-01-22 13:52:43.292705: step: 630/464, loss: 0.0186289194971323 2023-01-22 13:52:43.911809: step: 632/464, loss: 0.017052393406629562 2023-01-22 13:52:44.686115: step: 634/464, loss: 0.009422325529158115 2023-01-22 13:52:45.377316: step: 636/464, loss: 0.002550107426941395 2023-01-22 13:52:45.993873: step: 638/464, loss: 0.001601535128429532 2023-01-22 13:52:46.654066: step: 640/464, loss: 0.03888073191046715 2023-01-22 13:52:47.275343: step: 642/464, loss: 0.0026565822772681713 2023-01-22 13:52:47.867259: step: 644/464, loss: 0.0016079742927104235 2023-01-22 13:52:48.574916: step: 646/464, loss: 0.09242173284292221 2023-01-22 13:52:49.231818: step: 648/464, loss: 0.0007898823241703212 2023-01-22 13:52:49.946815: step: 650/464, loss: 0.0019716075621545315 2023-01-22 13:52:50.570674: step: 652/464, loss: 0.0017185850301757455 2023-01-22 13:52:51.184737: step: 654/464, loss: 0.13222934305667877 2023-01-22 13:52:51.842768: step: 656/464, loss: 0.01925063319504261 2023-01-22 13:52:52.478250: step: 658/464, loss: 0.012142672203481197 2023-01-22 13:52:53.084408: step: 660/464, loss: 0.0601552352309227 2023-01-22 13:52:53.667865: step: 662/464, loss: 0.012354286387562752 2023-01-22 13:52:54.355832: step: 664/464, loss: 0.03817038610577583 2023-01-22 13:52:54.932424: step: 666/464, loss: 0.0015115304850041866 2023-01-22 13:52:55.530493: step: 668/464, loss: 0.042541444301605225 2023-01-22 13:52:56.133222: step: 670/464, loss: 8.507548773195595e-05 2023-01-22 13:52:56.748158: step: 672/464, loss: 0.19663390517234802 2023-01-22 13:52:57.382890: step: 674/464, loss: 0.004419144243001938 2023-01-22 13:52:58.099717: step: 676/464, loss: 0.008868024684488773 2023-01-22 13:52:58.661081: step: 678/464, loss: 0.0003636557958088815 2023-01-22 13:52:59.295122: step: 680/464, loss: 0.03782545030117035 2023-01-22 13:52:59.884206: step: 682/464, loss: 0.008933094330132008 2023-01-22 13:53:00.533220: step: 684/464, loss: 0.07945708185434341 2023-01-22 13:53:01.195846: step: 686/464, loss: 0.006608300376683474 2023-01-22 13:53:01.873517: step: 688/464, loss: 0.05401575192809105 2023-01-22 13:53:02.499169: step: 690/464, loss: 0.0007566389977000654 2023-01-22 13:53:03.064822: step: 692/464, loss: 0.006353262811899185 2023-01-22 13:53:03.676626: step: 694/464, loss: 0.008637607097625732 2023-01-22 13:53:04.332772: step: 696/464, loss: 0.026068687438964844 2023-01-22 13:53:04.973750: step: 698/464, loss: 0.0015986430225893855 2023-01-22 13:53:05.625083: step: 700/464, loss: 0.001187260844744742 2023-01-22 13:53:06.185073: step: 702/464, loss: 0.001432645134627819 2023-01-22 13:53:06.896753: step: 704/464, loss: 0.02928977645933628 2023-01-22 13:53:07.537384: step: 706/464, loss: 0.009631271474063396 2023-01-22 13:53:08.205735: step: 708/464, loss: 0.041873540729284286 2023-01-22 13:53:08.832268: step: 710/464, loss: 0.0003186961112078279 2023-01-22 13:53:09.420482: step: 712/464, loss: 0.000907586480025202 2023-01-22 13:53:10.177975: step: 714/464, loss: 0.0038270133081823587 2023-01-22 13:53:10.821947: step: 716/464, loss: 0.14580540359020233 2023-01-22 13:53:11.445713: step: 718/464, loss: 0.004005967639386654 2023-01-22 13:53:12.029859: step: 720/464, loss: 0.004942305386066437 2023-01-22 13:53:12.604335: step: 722/464, loss: 0.0007397461449727416 2023-01-22 13:53:13.216157: step: 724/464, loss: 0.034463461488485336 2023-01-22 13:53:13.877929: step: 726/464, loss: 0.019920919090509415 2023-01-22 13:53:14.506920: step: 728/464, loss: 0.023921029642224312 2023-01-22 13:53:15.117222: step: 730/464, loss: 0.02836955524981022 2023-01-22 13:53:15.708710: step: 732/464, loss: 0.0049094269052147865 2023-01-22 13:53:16.280931: step: 734/464, loss: 0.023686110973358154 2023-01-22 13:53:16.847696: step: 736/464, loss: 0.22229552268981934 2023-01-22 13:53:17.449400: step: 738/464, loss: 0.038047630339860916 2023-01-22 13:53:18.061487: step: 740/464, loss: 8.502315904479474e-05 2023-01-22 13:53:18.733514: step: 742/464, loss: 0.004070245660841465 2023-01-22 13:53:19.356743: step: 744/464, loss: 0.0008259325986728072 2023-01-22 13:53:20.047046: step: 746/464, loss: 0.03511256352066994 2023-01-22 13:53:20.704122: step: 748/464, loss: 0.02191769890487194 2023-01-22 13:53:21.337620: step: 750/464, loss: 0.01094724703580141 2023-01-22 13:53:22.079838: step: 752/464, loss: 0.04792044311761856 2023-01-22 13:53:22.718717: step: 754/464, loss: 0.1264556348323822 2023-01-22 13:53:23.277953: step: 756/464, loss: 0.0019065093947574496 2023-01-22 13:53:23.865554: step: 758/464, loss: 0.0019241455011069775 2023-01-22 13:53:24.461303: step: 760/464, loss: 0.001199294812977314 2023-01-22 13:53:25.152747: step: 762/464, loss: 0.034956250339746475 2023-01-22 13:53:25.801120: step: 764/464, loss: 0.19993415474891663 2023-01-22 13:53:26.381899: step: 766/464, loss: 0.06722612679004669 2023-01-22 13:53:27.004758: step: 768/464, loss: 0.009393088519573212 2023-01-22 13:53:27.633285: step: 770/464, loss: 0.008216023445129395 2023-01-22 13:53:28.268863: step: 772/464, loss: 0.10442720353603363 2023-01-22 13:53:28.937952: step: 774/464, loss: 0.028010781854391098 2023-01-22 13:53:29.504434: step: 776/464, loss: 0.0024207436945289373 2023-01-22 13:53:30.077003: step: 778/464, loss: 0.05850352719426155 2023-01-22 13:53:30.652300: step: 780/464, loss: 0.01725189760327339 2023-01-22 13:53:31.276033: step: 782/464, loss: 0.028750715777277946 2023-01-22 13:53:31.867896: step: 784/464, loss: 0.0005509581533260643 2023-01-22 13:53:32.429153: step: 786/464, loss: 0.0018569778185337782 2023-01-22 13:53:33.074185: step: 788/464, loss: 0.11752443015575409 2023-01-22 13:53:33.728593: step: 790/464, loss: 0.010551149025559425 2023-01-22 13:53:34.328436: step: 792/464, loss: 0.0026742308400571346 2023-01-22 13:53:34.926001: step: 794/464, loss: 0.005357430782169104 2023-01-22 13:53:35.532637: step: 796/464, loss: 0.3049858808517456 2023-01-22 13:53:36.157536: step: 798/464, loss: 0.020999440923333168 2023-01-22 13:53:36.877908: step: 800/464, loss: 0.01579812727868557 2023-01-22 13:53:37.528256: step: 802/464, loss: 0.010090960189700127 2023-01-22 13:53:38.151955: step: 804/464, loss: 0.00029950885800644755 2023-01-22 13:53:38.791572: step: 806/464, loss: 0.7088912129402161 2023-01-22 13:53:39.368021: step: 808/464, loss: 0.00036309740971773863 2023-01-22 13:53:39.989746: step: 810/464, loss: 0.009281838312745094 2023-01-22 13:53:40.586974: step: 812/464, loss: 0.009021191857755184 2023-01-22 13:53:41.265200: step: 814/464, loss: 0.02642044611275196 2023-01-22 13:53:41.860635: step: 816/464, loss: 0.04272956773638725 2023-01-22 13:53:42.439256: step: 818/464, loss: 0.0760403648018837 2023-01-22 13:53:43.024874: step: 820/464, loss: 0.0499514602124691 2023-01-22 13:53:43.729998: step: 822/464, loss: 6.40268611907959 2023-01-22 13:53:44.326673: step: 824/464, loss: 0.003643059404566884 2023-01-22 13:53:44.954930: step: 826/464, loss: 0.0012509127845987678 2023-01-22 13:53:45.541443: step: 828/464, loss: 0.0003478997678030282 2023-01-22 13:53:46.156320: step: 830/464, loss: 0.03138204663991928 2023-01-22 13:53:46.727125: step: 832/464, loss: 0.012598642148077488 2023-01-22 13:53:47.368299: step: 834/464, loss: 0.4908383786678314 2023-01-22 13:53:47.960234: step: 836/464, loss: 0.0039227623492479324 2023-01-22 13:53:48.611093: step: 838/464, loss: 0.003043395932763815 2023-01-22 13:53:49.259852: step: 840/464, loss: 0.0032116002403199673 2023-01-22 13:53:49.914840: step: 842/464, loss: 0.014169511385262012 2023-01-22 13:53:50.577980: step: 844/464, loss: 0.3224925994873047 2023-01-22 13:53:51.255008: step: 846/464, loss: 0.019328588619828224 2023-01-22 13:53:51.947451: step: 848/464, loss: 0.005719814915210009 2023-01-22 13:53:52.509975: step: 850/464, loss: 0.00022499442275147885 2023-01-22 13:53:53.122045: step: 852/464, loss: 0.02572174370288849 2023-01-22 13:53:53.761412: step: 854/464, loss: 0.00021003466099500656 2023-01-22 13:53:54.423499: step: 856/464, loss: 0.023753268644213676 2023-01-22 13:53:55.122808: step: 858/464, loss: 0.005463962908834219 2023-01-22 13:53:55.776801: step: 860/464, loss: 0.007517958525568247 2023-01-22 13:53:56.368379: step: 862/464, loss: 0.00097902852576226 2023-01-22 13:53:56.971194: step: 864/464, loss: 0.011939832009375095 2023-01-22 13:53:57.602984: step: 866/464, loss: 0.002941383281722665 2023-01-22 13:53:58.274408: step: 868/464, loss: 0.04505275562405586 2023-01-22 13:53:58.921958: step: 870/464, loss: 0.007924825884401798 2023-01-22 13:53:59.490274: step: 872/464, loss: 0.0026531440671533346 2023-01-22 13:54:00.121872: step: 874/464, loss: 0.005883718375116587 2023-01-22 13:54:00.762331: step: 876/464, loss: 0.010706374421715736 2023-01-22 13:54:01.352775: step: 878/464, loss: 0.030511975288391113 2023-01-22 13:54:01.917332: step: 880/464, loss: 0.0035022543743252754 2023-01-22 13:54:02.500593: step: 882/464, loss: 0.015873609110713005 2023-01-22 13:54:03.082552: step: 884/464, loss: 0.02552478015422821 2023-01-22 13:54:03.719275: step: 886/464, loss: 0.0021093592513352633 2023-01-22 13:54:04.318604: step: 888/464, loss: 0.003897372866049409 2023-01-22 13:54:04.903499: step: 890/464, loss: 0.03527354449033737 2023-01-22 13:54:05.540942: step: 892/464, loss: 0.020696407184004784 2023-01-22 13:54:06.057617: step: 894/464, loss: 0.002208180958405137 2023-01-22 13:54:06.656509: step: 896/464, loss: 0.0005641351453959942 2023-01-22 13:54:07.334250: step: 898/464, loss: 0.0031764640007168055 2023-01-22 13:54:07.930218: step: 900/464, loss: 0.01316324807703495 2023-01-22 13:54:08.473142: step: 902/464, loss: 0.0003927868092432618 2023-01-22 13:54:09.085790: step: 904/464, loss: 0.002863897942006588 2023-01-22 13:54:09.711224: step: 906/464, loss: 0.06512665748596191 2023-01-22 13:54:10.390398: step: 908/464, loss: 0.00653707142919302 2023-01-22 13:54:10.997197: step: 910/464, loss: 0.029721427708864212 2023-01-22 13:54:11.627506: step: 912/464, loss: 0.07226139307022095 2023-01-22 13:54:12.321846: step: 914/464, loss: 0.06094507500529289 2023-01-22 13:54:12.959895: step: 916/464, loss: 0.006911165080964565 2023-01-22 13:54:13.540850: step: 918/464, loss: 0.002758361166343093 2023-01-22 13:54:14.222218: step: 920/464, loss: 0.03506612032651901 2023-01-22 13:54:14.801389: step: 922/464, loss: 0.00020291132386773825 2023-01-22 13:54:15.387155: step: 924/464, loss: 0.023218167945742607 2023-01-22 13:54:16.029031: step: 926/464, loss: 0.003805319545790553 2023-01-22 13:54:16.707078: step: 928/464, loss: 0.011131572537124157 2023-01-22 13:54:17.180982: step: 930/464, loss: 0.0022440047468990088 ================================================== Loss: 0.065 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2891484942886812, 'r': 0.3522454142947502, 'f1': 0.31759338465925296}, 'combined': 0.23401617816997586, 'epoch': 33} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29824013967456664, 'r': 0.320697156619759, 'f1': 0.3090612420875377}, 'combined': 0.2017705518291697, 'epoch': 33} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2864430372001958, 'r': 0.37014745414294753, 'f1': 0.3229597820088301}, 'combined': 0.2379703656907169, 'epoch': 33} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3173865934143823, 'r': 0.32350699604220784, 'f1': 0.3204175704319821}, 'combined': 0.20918452784678623, 'epoch': 33} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30067765210608427, 'r': 0.36571987666034156, 'f1': 0.3300246147260274}, 'combined': 0.2431760319033886, 'epoch': 33} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3131671775541258, 'r': 0.3140298970515201, 'f1': 0.31359794396066526}, 'combined': 0.20473233647172964, 'epoch': 33} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20512820512820512, 'r': 0.38095238095238093, 'f1': 0.26666666666666666}, 'combined': 0.17777777777777776, 'epoch': 33} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.4891304347826087, 'f1': 0.3308823529411765}, 'combined': 0.16544117647058826, 'epoch': 33} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4605263157894737, 'r': 0.3017241379310345, 'f1': 0.3645833333333333}, 'combined': 0.24305555555555552, 'epoch': 33} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951249161636486, 'r': 0.3578459609650312, 'f1': 0.3234731070815977}, 'combined': 0.23834860521801934, 'epoch': 22} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29521273154881944, 'r': 0.3185261336729686, 'f1': 0.30642664272956077}, 'combined': 0.20005055432085314, 'epoch': 22} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 22} ****************************** Epoch: 34 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:56:54.957277: step: 2/464, loss: 0.029898690059781075 2023-01-22 13:56:55.532761: step: 4/464, loss: 0.00015233525482472032 2023-01-22 13:56:56.115494: step: 6/464, loss: 0.008884308859705925 2023-01-22 13:56:56.694040: step: 8/464, loss: 0.000584449153393507 2023-01-22 13:56:57.363759: step: 10/464, loss: 0.004273217637091875 2023-01-22 13:56:58.015939: step: 12/464, loss: 0.0005258715245872736 2023-01-22 13:56:58.622889: step: 14/464, loss: 0.001208961708471179 2023-01-22 13:56:59.246980: step: 16/464, loss: 0.004396416246891022 2023-01-22 13:56:59.847679: step: 18/464, loss: 0.026321645826101303 2023-01-22 13:57:00.471394: step: 20/464, loss: 0.0018481980077922344 2023-01-22 13:57:01.094212: step: 22/464, loss: 0.0054297191090881824 2023-01-22 13:57:01.724851: step: 24/464, loss: 0.01054367981851101 2023-01-22 13:57:02.380501: step: 26/464, loss: 0.005763222463428974 2023-01-22 13:57:02.917444: step: 28/464, loss: 0.871414840221405 2023-01-22 13:57:03.556802: step: 30/464, loss: 0.00016669723845552653 2023-01-22 13:57:04.163101: step: 32/464, loss: 0.03802872449159622 2023-01-22 13:57:04.757985: step: 34/464, loss: 0.02912573330104351 2023-01-22 13:57:05.391357: step: 36/464, loss: 0.00691071804612875 2023-01-22 13:57:05.994619: step: 38/464, loss: 0.006158178672194481 2023-01-22 13:57:06.618109: step: 40/464, loss: 0.002746821381151676 2023-01-22 13:57:07.192776: step: 42/464, loss: 0.006122718099504709 2023-01-22 13:57:07.833966: step: 44/464, loss: 0.0004740080039482564 2023-01-22 13:57:08.423344: step: 46/464, loss: 0.005896700546145439 2023-01-22 13:57:09.042275: step: 48/464, loss: 0.0034731540363281965 2023-01-22 13:57:09.693322: step: 50/464, loss: 0.0029471227899193764 2023-01-22 13:57:10.290822: step: 52/464, loss: 0.009528739377856255 2023-01-22 13:57:10.980676: step: 54/464, loss: 0.004815476015210152 2023-01-22 13:57:11.611646: step: 56/464, loss: 0.004503963980823755 2023-01-22 13:57:12.228717: step: 58/464, loss: 0.0020536768715828657 2023-01-22 13:57:12.848137: step: 60/464, loss: 0.03384426608681679 2023-01-22 13:57:13.480883: step: 62/464, loss: 0.002092523267492652 2023-01-22 13:57:14.144815: step: 64/464, loss: 0.04285869002342224 2023-01-22 13:57:14.755734: step: 66/464, loss: 0.014329276978969574 2023-01-22 13:57:15.408636: step: 68/464, loss: 0.02071165293455124 2023-01-22 13:57:15.998061: step: 70/464, loss: 0.00439227931201458 2023-01-22 13:57:16.608069: step: 72/464, loss: 0.006787712220102549 2023-01-22 13:57:17.244675: step: 74/464, loss: 0.009815738536417484 2023-01-22 13:57:17.835017: step: 76/464, loss: 0.010096452198922634 2023-01-22 13:57:18.438309: step: 78/464, loss: 0.010066819377243519 2023-01-22 13:57:19.071146: step: 80/464, loss: 0.0009568737586960196 2023-01-22 13:57:19.780168: step: 82/464, loss: 0.02412015199661255 2023-01-22 13:57:20.300941: step: 84/464, loss: 0.00012044959294144064 2023-01-22 13:57:20.894082: step: 86/464, loss: 0.0015337973600253463 2023-01-22 13:57:21.563759: step: 88/464, loss: 2.098287723129033e-06 2023-01-22 13:57:22.122673: step: 90/464, loss: 0.00764232175424695 2023-01-22 13:57:22.737187: step: 92/464, loss: 0.015353173948824406 2023-01-22 13:57:23.355177: step: 94/464, loss: 0.020151887089014053 2023-01-22 13:57:24.074842: step: 96/464, loss: 0.014923064969480038 2023-01-22 13:57:24.731852: step: 98/464, loss: 0.0006783062126487494 2023-01-22 13:57:25.363293: step: 100/464, loss: 0.0035995738580822945 2023-01-22 13:57:25.953296: step: 102/464, loss: 0.0024014213122427464 2023-01-22 13:57:26.630435: step: 104/464, loss: 0.002425145823508501 2023-01-22 13:57:27.243415: step: 106/464, loss: 0.004979377146810293 2023-01-22 13:57:27.861073: step: 108/464, loss: 0.04373719543218613 2023-01-22 13:57:28.573532: step: 110/464, loss: 0.03467360511422157 2023-01-22 13:57:29.169462: step: 112/464, loss: 0.0004658075049519539 2023-01-22 13:57:29.754591: step: 114/464, loss: 3.7881338357692584e-05 2023-01-22 13:57:30.437465: step: 116/464, loss: 0.5974915623664856 2023-01-22 13:57:31.028023: step: 118/464, loss: 8.322542998939753e-05 2023-01-22 13:57:31.572068: step: 120/464, loss: 0.02062365785241127 2023-01-22 13:57:32.232178: step: 122/464, loss: 0.008859731256961823 2023-01-22 13:57:32.895223: step: 124/464, loss: 0.0002515219384804368 2023-01-22 13:57:33.527533: step: 126/464, loss: 0.028871677815914154 2023-01-22 13:57:34.110408: step: 128/464, loss: 0.0009980584727600217 2023-01-22 13:57:34.769118: step: 130/464, loss: 0.007541639730334282 2023-01-22 13:57:35.379141: step: 132/464, loss: 0.009181492030620575 2023-01-22 13:57:36.028592: step: 134/464, loss: 0.002932182513177395 2023-01-22 13:57:36.674873: step: 136/464, loss: 0.002279211301356554 2023-01-22 13:57:37.275147: step: 138/464, loss: 0.01624910905957222 2023-01-22 13:57:37.911395: step: 140/464, loss: 0.0007104272372089326 2023-01-22 13:57:38.546430: step: 142/464, loss: 0.0013539748033508658 2023-01-22 13:57:39.196320: step: 144/464, loss: 0.4728025794029236 2023-01-22 13:57:39.850799: step: 146/464, loss: 0.0043846312910318375 2023-01-22 13:57:40.538150: step: 148/464, loss: 0.00045613080146722496 2023-01-22 13:57:41.214194: step: 150/464, loss: 0.014981592074036598 2023-01-22 13:57:41.815939: step: 152/464, loss: 0.0012546187499538064 2023-01-22 13:57:42.447162: step: 154/464, loss: 0.015420163981616497 2023-01-22 13:57:43.064888: step: 156/464, loss: 0.006943312007933855 2023-01-22 13:57:43.702099: step: 158/464, loss: 0.039076171815395355 2023-01-22 13:57:44.255015: step: 160/464, loss: 9.373047214467078e-05 2023-01-22 13:57:44.844545: step: 162/464, loss: 0.0021961224265396595 2023-01-22 13:57:45.478477: step: 164/464, loss: 0.13457642495632172 2023-01-22 13:57:46.106361: step: 166/464, loss: 0.06285510957241058 2023-01-22 13:57:46.752085: step: 168/464, loss: 0.05896016210317612 2023-01-22 13:57:47.353959: step: 170/464, loss: 0.20885112881660461 2023-01-22 13:57:47.970365: step: 172/464, loss: 0.0013530661817640066 2023-01-22 13:57:48.628158: step: 174/464, loss: 0.005364841315895319 2023-01-22 13:57:49.251243: step: 176/464, loss: 0.004437359981238842 2023-01-22 13:57:49.862509: step: 178/464, loss: 0.012415021657943726 2023-01-22 13:57:50.493821: step: 180/464, loss: 0.030497750267386436 2023-01-22 13:57:51.101850: step: 182/464, loss: 0.017394132912158966 2023-01-22 13:57:51.671796: step: 184/464, loss: 0.15832246840000153 2023-01-22 13:57:52.366690: step: 186/464, loss: 0.0012430723290890455 2023-01-22 13:57:53.069454: step: 188/464, loss: 0.008626168593764305 2023-01-22 13:57:53.724964: step: 190/464, loss: 0.3963608145713806 2023-01-22 13:57:54.314366: step: 192/464, loss: 0.015922117978334427 2023-01-22 13:57:54.980702: step: 194/464, loss: 0.1013675332069397 2023-01-22 13:57:55.676957: step: 196/464, loss: 0.007717953063547611 2023-01-22 13:57:56.358427: step: 198/464, loss: 0.058719441294670105 2023-01-22 13:57:56.947420: step: 200/464, loss: 0.0013162650866433978 2023-01-22 13:57:57.564747: step: 202/464, loss: 0.005683231167495251 2023-01-22 13:57:58.176024: step: 204/464, loss: 0.0007428164826706052 2023-01-22 13:57:58.838181: step: 206/464, loss: 0.06769642233848572 2023-01-22 13:57:59.617484: step: 208/464, loss: 0.0025914330035448074 2023-01-22 13:58:00.209100: step: 210/464, loss: 0.002708293031901121 2023-01-22 13:58:00.836527: step: 212/464, loss: 0.031754009425640106 2023-01-22 13:58:01.546951: step: 214/464, loss: 0.08462861180305481 2023-01-22 13:58:02.116881: step: 216/464, loss: 0.0007086574914865196 2023-01-22 13:58:02.766352: step: 218/464, loss: 0.02312740683555603 2023-01-22 13:58:03.416912: step: 220/464, loss: 0.003332710824906826 2023-01-22 13:58:04.100833: step: 222/464, loss: 0.05921299010515213 2023-01-22 13:58:04.707638: step: 224/464, loss: 0.008811285719275475 2023-01-22 13:58:05.348232: step: 226/464, loss: 0.1366608589887619 2023-01-22 13:58:05.972338: step: 228/464, loss: 0.08018012344837189 2023-01-22 13:58:06.627732: step: 230/464, loss: 0.003253837348893285 2023-01-22 13:58:07.193105: step: 232/464, loss: 0.0008934770012274384 2023-01-22 13:58:07.802337: step: 234/464, loss: 0.04704802855849266 2023-01-22 13:58:08.437768: step: 236/464, loss: 0.0060298205353319645 2023-01-22 13:58:09.059773: step: 238/464, loss: 0.0531141459941864 2023-01-22 13:58:09.719568: step: 240/464, loss: 8.777321636443958e-05 2023-01-22 13:58:10.337635: step: 242/464, loss: 0.0010361942695453763 2023-01-22 13:58:10.953116: step: 244/464, loss: 0.7005062699317932 2023-01-22 13:58:11.629735: step: 246/464, loss: 0.007535006385296583 2023-01-22 13:58:12.316945: step: 248/464, loss: 0.01733449101448059 2023-01-22 13:58:12.897780: step: 250/464, loss: 0.04061458632349968 2023-01-22 13:58:13.513445: step: 252/464, loss: 0.03368212282657623 2023-01-22 13:58:14.134182: step: 254/464, loss: 0.0033325874246656895 2023-01-22 13:58:14.747175: step: 256/464, loss: 0.010388418100774288 2023-01-22 13:58:15.365452: step: 258/464, loss: 0.03565354645252228 2023-01-22 13:58:15.992595: step: 260/464, loss: 0.03277277201414108 2023-01-22 13:58:16.569101: step: 262/464, loss: 0.004038808401674032 2023-01-22 13:58:17.168616: step: 264/464, loss: 0.0038243993185460567 2023-01-22 13:58:17.793578: step: 266/464, loss: 0.03558727353811264 2023-01-22 13:58:18.358617: step: 268/464, loss: 0.0010459619807079434 2023-01-22 13:58:18.926012: step: 270/464, loss: 0.0026864504907280207 2023-01-22 13:58:19.550997: step: 272/464, loss: 0.00539025804027915 2023-01-22 13:58:20.115658: step: 274/464, loss: 0.11854858696460724 2023-01-22 13:58:20.732989: step: 276/464, loss: 0.02091136947274208 2023-01-22 13:58:21.393178: step: 278/464, loss: 0.43733465671539307 2023-01-22 13:58:22.020671: step: 280/464, loss: 0.07855101674795151 2023-01-22 13:58:22.578264: step: 282/464, loss: 0.000969623273704201 2023-01-22 13:58:23.212009: step: 284/464, loss: 0.0034854025579988956 2023-01-22 13:58:23.870261: step: 286/464, loss: 0.002548051765188575 2023-01-22 13:58:24.468959: step: 288/464, loss: 0.00924122054129839 2023-01-22 13:58:25.091425: step: 290/464, loss: 0.019163407385349274 2023-01-22 13:58:25.724780: step: 292/464, loss: 0.03953166678547859 2023-01-22 13:58:26.325125: step: 294/464, loss: 0.01325959898531437 2023-01-22 13:58:26.949979: step: 296/464, loss: 0.003425328526645899 2023-01-22 13:58:27.611372: step: 298/464, loss: 0.007728288881480694 2023-01-22 13:58:28.270462: step: 300/464, loss: 0.05159435048699379 2023-01-22 13:58:28.940332: step: 302/464, loss: 0.010436614975333214 2023-01-22 13:58:29.564876: step: 304/464, loss: 0.00015244621317833662 2023-01-22 13:58:30.224511: step: 306/464, loss: 0.01785079762339592 2023-01-22 13:58:30.859141: step: 308/464, loss: 0.006395932752639055 2023-01-22 13:58:31.512805: step: 310/464, loss: 0.0019831436220556498 2023-01-22 13:58:32.123144: step: 312/464, loss: 0.002111723180860281 2023-01-22 13:58:32.651869: step: 314/464, loss: 0.015801504254341125 2023-01-22 13:58:33.268444: step: 316/464, loss: 0.0014175876276567578 2023-01-22 13:58:33.916424: step: 318/464, loss: 0.004971159156411886 2023-01-22 13:58:34.574402: step: 320/464, loss: 0.022550541907548904 2023-01-22 13:58:35.213612: step: 322/464, loss: 0.07797756046056747 2023-01-22 13:58:35.797119: step: 324/464, loss: 0.0010559335350990295 2023-01-22 13:58:36.365413: step: 326/464, loss: 0.026913126930594444 2023-01-22 13:58:36.982371: step: 328/464, loss: 0.010977164842188358 2023-01-22 13:58:37.571745: step: 330/464, loss: 0.0040641240775585175 2023-01-22 13:58:38.107241: step: 332/464, loss: 0.013235529884696007 2023-01-22 13:58:38.739037: step: 334/464, loss: 0.03829799219965935 2023-01-22 13:58:39.415111: step: 336/464, loss: 0.001352404011413455 2023-01-22 13:58:40.040785: step: 338/464, loss: 0.002952033653855324 2023-01-22 13:58:40.660665: step: 340/464, loss: 0.0017866799607872963 2023-01-22 13:58:41.279545: step: 342/464, loss: 0.0002513266808819026 2023-01-22 13:58:41.926595: step: 344/464, loss: 0.035913605242967606 2023-01-22 13:58:42.562185: step: 346/464, loss: 0.04931747168302536 2023-01-22 13:58:43.109143: step: 348/464, loss: 0.0003991990233771503 2023-01-22 13:58:43.739771: step: 350/464, loss: 0.02780473604798317 2023-01-22 13:58:44.336983: step: 352/464, loss: 0.0005611433298327029 2023-01-22 13:58:44.965700: step: 354/464, loss: 0.0004125482519157231 2023-01-22 13:58:45.546716: step: 356/464, loss: 0.03075682371854782 2023-01-22 13:58:46.138440: step: 358/464, loss: 0.0006990233669057488 2023-01-22 13:58:46.760380: step: 360/464, loss: 1.3560085790231824e-05 2023-01-22 13:58:47.428637: step: 362/464, loss: 0.0019139517098665237 2023-01-22 13:58:48.031591: step: 364/464, loss: 0.013786913827061653 2023-01-22 13:58:48.601540: step: 366/464, loss: 0.0009525257046334445 2023-01-22 13:58:49.173384: step: 368/464, loss: 0.001372171682305634 2023-01-22 13:58:49.768358: step: 370/464, loss: 0.005218212027102709 2023-01-22 13:58:50.417004: step: 372/464, loss: 0.02613021992146969 2023-01-22 13:58:51.028607: step: 374/464, loss: 0.005402700509876013 2023-01-22 13:58:51.620264: step: 376/464, loss: 0.020430030301213264 2023-01-22 13:58:52.223676: step: 378/464, loss: 0.007723488844931126 2023-01-22 13:58:52.867762: step: 380/464, loss: 0.0007314560352824628 2023-01-22 13:58:53.458341: step: 382/464, loss: 0.008527128025889397 2023-01-22 13:58:54.058467: step: 384/464, loss: 0.008409584872424603 2023-01-22 13:58:54.700258: step: 386/464, loss: 0.17246656119823456 2023-01-22 13:58:55.309106: step: 388/464, loss: 0.0072148642502725124 2023-01-22 13:58:55.951304: step: 390/464, loss: 0.005247201304882765 2023-01-22 13:58:56.507194: step: 392/464, loss: 9.893503010971472e-05 2023-01-22 13:58:57.058216: step: 394/464, loss: 0.0018716860795393586 2023-01-22 13:58:57.716984: step: 396/464, loss: 0.028478436172008514 2023-01-22 13:58:58.305047: step: 398/464, loss: 0.004302097950130701 2023-01-22 13:58:58.951379: step: 400/464, loss: 0.02306191623210907 2023-01-22 13:58:59.580614: step: 402/464, loss: 0.018625525757670403 2023-01-22 13:59:00.177597: step: 404/464, loss: 0.008604537695646286 2023-01-22 13:59:00.769456: step: 406/464, loss: 0.0012301687384024262 2023-01-22 13:59:01.398839: step: 408/464, loss: 0.0857953205704689 2023-01-22 13:59:01.989525: step: 410/464, loss: 0.000577143975533545 2023-01-22 13:59:03.240344: step: 412/464, loss: 0.034847501665353775 2023-01-22 13:59:03.846736: step: 414/464, loss: 0.11727949231863022 2023-01-22 13:59:04.451691: step: 416/464, loss: 0.0004280091670807451 2023-01-22 13:59:05.058673: step: 418/464, loss: 0.03354734182357788 2023-01-22 13:59:05.699360: step: 420/464, loss: 0.006127386353909969 2023-01-22 13:59:06.349559: step: 422/464, loss: 0.04086967185139656 2023-01-22 13:59:06.975137: step: 424/464, loss: 0.00029856053879484534 2023-01-22 13:59:07.655032: step: 426/464, loss: 0.06409810483455658 2023-01-22 13:59:08.287111: step: 428/464, loss: 0.00031498592579737306 2023-01-22 13:59:08.843917: step: 430/464, loss: 0.01463546697050333 2023-01-22 13:59:09.504443: step: 432/464, loss: 0.018200265243649483 2023-01-22 13:59:10.033080: step: 434/464, loss: 0.0031720134429633617 2023-01-22 13:59:10.648128: step: 436/464, loss: 0.0008702730992808938 2023-01-22 13:59:11.325796: step: 438/464, loss: 0.012718631885945797 2023-01-22 13:59:11.969401: step: 440/464, loss: 0.01520999614149332 2023-01-22 13:59:12.568534: step: 442/464, loss: 0.006882749032229185 2023-01-22 13:59:13.147154: step: 444/464, loss: 0.020486094057559967 2023-01-22 13:59:13.810562: step: 446/464, loss: 0.011144982650876045 2023-01-22 13:59:14.416398: step: 448/464, loss: 0.08577293157577515 2023-01-22 13:59:15.005218: step: 450/464, loss: 0.0003010949585586786 2023-01-22 13:59:15.609532: step: 452/464, loss: 0.000813738617580384 2023-01-22 13:59:16.199572: step: 454/464, loss: 0.010779373347759247 2023-01-22 13:59:16.830333: step: 456/464, loss: 0.03919750824570656 2023-01-22 13:59:17.431914: step: 458/464, loss: 0.0032593016512691975 2023-01-22 13:59:18.040890: step: 460/464, loss: 0.015266706235706806 2023-01-22 13:59:18.710101: step: 462/464, loss: 0.008129923604428768 2023-01-22 13:59:19.338274: step: 464/464, loss: 0.002459451789036393 2023-01-22 13:59:19.898246: step: 466/464, loss: 0.008117524906992912 2023-01-22 13:59:20.535823: step: 468/464, loss: 0.014871403574943542 2023-01-22 13:59:21.136126: step: 470/464, loss: 0.03958430886268616 2023-01-22 13:59:21.697519: step: 472/464, loss: 0.018035847693681717 2023-01-22 13:59:22.341352: step: 474/464, loss: 0.02293417975306511 2023-01-22 13:59:22.918834: step: 476/464, loss: 7.562783139292151e-05 2023-01-22 13:59:23.546685: step: 478/464, loss: 0.012890690006315708 2023-01-22 13:59:24.172497: step: 480/464, loss: 0.017653323709964752 2023-01-22 13:59:24.797794: step: 482/464, loss: 0.0276399627327919 2023-01-22 13:59:25.382846: step: 484/464, loss: 0.0012498322175815701 2023-01-22 13:59:26.019471: step: 486/464, loss: 0.024069275707006454 2023-01-22 13:59:26.614408: step: 488/464, loss: 0.0013034387957304716 2023-01-22 13:59:27.255740: step: 490/464, loss: 0.021182270720601082 2023-01-22 13:59:27.831926: step: 492/464, loss: 0.016577089205384254 2023-01-22 13:59:28.471453: step: 494/464, loss: 0.0003162265056744218 2023-01-22 13:59:29.078720: step: 496/464, loss: 0.0013988955179229379 2023-01-22 13:59:29.712065: step: 498/464, loss: 0.0226691085845232 2023-01-22 13:59:30.326374: step: 500/464, loss: 0.013925151899456978 2023-01-22 13:59:30.895212: step: 502/464, loss: 0.005288612563163042 2023-01-22 13:59:31.493028: step: 504/464, loss: 0.013529365882277489 2023-01-22 13:59:32.143895: step: 506/464, loss: 0.007406800985336304 2023-01-22 13:59:32.751102: step: 508/464, loss: 0.006143561564385891 2023-01-22 13:59:33.475464: step: 510/464, loss: 0.017350686714053154 2023-01-22 13:59:34.077290: step: 512/464, loss: 0.08247264474630356 2023-01-22 13:59:34.735765: step: 514/464, loss: 0.028843006119132042 2023-01-22 13:59:35.341061: step: 516/464, loss: 0.00849137268960476 2023-01-22 13:59:35.950167: step: 518/464, loss: 0.0173712857067585 2023-01-22 13:59:36.625579: step: 520/464, loss: 0.005316558293998241 2023-01-22 13:59:37.252068: step: 522/464, loss: 0.00480164960026741 2023-01-22 13:59:37.861902: step: 524/464, loss: 0.007387985475361347 2023-01-22 13:59:38.474621: step: 526/464, loss: 0.6308395862579346 2023-01-22 13:59:39.043832: step: 528/464, loss: 0.007524359505623579 2023-01-22 13:59:39.642076: step: 530/464, loss: 0.0019702170975506306 2023-01-22 13:59:40.256625: step: 532/464, loss: 0.0017888193251565099 2023-01-22 13:59:40.872058: step: 534/464, loss: 0.0034075435250997543 2023-01-22 13:59:41.450387: step: 536/464, loss: 0.0363345593214035 2023-01-22 13:59:42.082337: step: 538/464, loss: 0.0026970033068209887 2023-01-22 13:59:42.714028: step: 540/464, loss: 0.0017541086999699473 2023-01-22 13:59:43.341274: step: 542/464, loss: 0.0018105398630723357 2023-01-22 13:59:43.939463: step: 544/464, loss: 0.0020671342499554157 2023-01-22 13:59:44.539982: step: 546/464, loss: 0.014645855873823166 2023-01-22 13:59:45.142940: step: 548/464, loss: 0.02025480754673481 2023-01-22 13:59:45.775195: step: 550/464, loss: 0.06476173549890518 2023-01-22 13:59:46.382103: step: 552/464, loss: 0.0009601297206245363 2023-01-22 13:59:46.985217: step: 554/464, loss: 0.005175075493752956 2023-01-22 13:59:47.615950: step: 556/464, loss: 0.0003164306690450758 2023-01-22 13:59:48.231864: step: 558/464, loss: 0.19691286981105804 2023-01-22 13:59:48.803063: step: 560/464, loss: 0.002686547813937068 2023-01-22 13:59:49.391304: step: 562/464, loss: 0.007883097976446152 2023-01-22 13:59:50.020694: step: 564/464, loss: 3.246891719754785e-05 2023-01-22 13:59:50.673116: step: 566/464, loss: 0.001068502082489431 2023-01-22 13:59:51.287245: step: 568/464, loss: 0.022433584555983543 2023-01-22 13:59:51.895078: step: 570/464, loss: 0.013481689617037773 2023-01-22 13:59:52.549622: step: 572/464, loss: 0.006244510877877474 2023-01-22 13:59:53.182296: step: 574/464, loss: 0.0014088694006204605 2023-01-22 13:59:53.797524: step: 576/464, loss: 0.0019497391767799854 2023-01-22 13:59:54.492963: step: 578/464, loss: 0.06185242161154747 2023-01-22 13:59:55.150573: step: 580/464, loss: 0.006675357930362225 2023-01-22 13:59:55.881861: step: 582/464, loss: 0.19827669858932495 2023-01-22 13:59:56.591450: step: 584/464, loss: 0.0012004076270386577 2023-01-22 13:59:57.196478: step: 586/464, loss: 0.02289130911231041 2023-01-22 13:59:57.793062: step: 588/464, loss: 0.0054329452104866505 2023-01-22 13:59:58.436098: step: 590/464, loss: 0.0020336357410997152 2023-01-22 13:59:58.964307: step: 592/464, loss: 0.0027439245022833347 2023-01-22 13:59:59.554459: step: 594/464, loss: 0.011579768732190132 2023-01-22 14:00:00.102289: step: 596/464, loss: 0.010270710103213787 2023-01-22 14:00:00.761876: step: 598/464, loss: 0.0049523417837917805 2023-01-22 14:00:01.341711: step: 600/464, loss: 0.003858374198898673 2023-01-22 14:00:01.987093: step: 602/464, loss: 0.06019090116024017 2023-01-22 14:00:02.516519: step: 604/464, loss: 0.010263212956488132 2023-01-22 14:00:03.103722: step: 606/464, loss: 0.0025204953271895647 2023-01-22 14:00:03.707425: step: 608/464, loss: 0.06136419251561165 2023-01-22 14:00:04.306938: step: 610/464, loss: 0.0016090819844976068 2023-01-22 14:00:05.021330: step: 612/464, loss: 0.016572527587413788 2023-01-22 14:00:05.686454: step: 614/464, loss: 0.5664610266685486 2023-01-22 14:00:06.325634: step: 616/464, loss: 0.04934962838888168 2023-01-22 14:00:06.960037: step: 618/464, loss: 0.054494671523571014 2023-01-22 14:00:07.592617: step: 620/464, loss: 0.00043719136738218367 2023-01-22 14:00:08.193951: step: 622/464, loss: 0.007431011646986008 2023-01-22 14:00:08.794395: step: 624/464, loss: 0.03331771120429039 2023-01-22 14:00:09.416429: step: 626/464, loss: 0.01933049038052559 2023-01-22 14:00:10.002086: step: 628/464, loss: 0.006974204443395138 2023-01-22 14:00:10.689389: step: 630/464, loss: 0.004200476221740246 2023-01-22 14:00:11.351643: step: 632/464, loss: 0.03736580163240433 2023-01-22 14:00:11.983841: step: 634/464, loss: 0.040448278188705444 2023-01-22 14:00:12.594985: step: 636/464, loss: 0.009122759103775024 2023-01-22 14:00:13.233436: step: 638/464, loss: 0.046271827071905136 2023-01-22 14:00:13.954361: step: 640/464, loss: 0.09109504520893097 2023-01-22 14:00:14.619775: step: 642/464, loss: 0.0010769476648420095 2023-01-22 14:00:15.208140: step: 644/464, loss: 0.019725728780031204 2023-01-22 14:00:15.780794: step: 646/464, loss: 0.015822215005755424 2023-01-22 14:00:16.390225: step: 648/464, loss: 0.05484939366579056 2023-01-22 14:00:17.078091: step: 650/464, loss: 0.141653910279274 2023-01-22 14:00:17.779924: step: 652/464, loss: 0.00035650088102556765 2023-01-22 14:00:18.389993: step: 654/464, loss: 0.011836215853691101 2023-01-22 14:00:18.965399: step: 656/464, loss: 0.0006070904200896621 2023-01-22 14:00:19.574297: step: 658/464, loss: 0.015699708834290504 2023-01-22 14:00:20.201590: step: 660/464, loss: 0.03450365364551544 2023-01-22 14:00:20.853379: step: 662/464, loss: 0.0009372765780426562 2023-01-22 14:00:21.484631: step: 664/464, loss: 0.0006784518482163548 2023-01-22 14:00:22.179915: step: 666/464, loss: 0.005149946082383394 2023-01-22 14:00:22.796946: step: 668/464, loss: 0.015397715382277966 2023-01-22 14:00:23.371685: step: 670/464, loss: 0.015877025201916695 2023-01-22 14:00:24.049924: step: 672/464, loss: 0.03446386754512787 2023-01-22 14:00:24.667469: step: 674/464, loss: 0.036043643951416016 2023-01-22 14:00:25.242294: step: 676/464, loss: 0.0029709930531680584 2023-01-22 14:00:25.915102: step: 678/464, loss: 0.15701141953468323 2023-01-22 14:00:26.516958: step: 680/464, loss: 0.04647189378738403 2023-01-22 14:00:27.167877: step: 682/464, loss: 0.008912712335586548 2023-01-22 14:00:27.754237: step: 684/464, loss: 0.00948739517480135 2023-01-22 14:00:28.382694: step: 686/464, loss: 0.0025628781877458096 2023-01-22 14:00:29.001868: step: 688/464, loss: 0.58965003490448 2023-01-22 14:00:29.555141: step: 690/464, loss: 0.4068011939525604 2023-01-22 14:00:30.209766: step: 692/464, loss: 0.011587106622755527 2023-01-22 14:00:30.845336: step: 694/464, loss: 0.00972069427371025 2023-01-22 14:00:31.455816: step: 696/464, loss: 0.0010438722092658281 2023-01-22 14:00:32.006738: step: 698/464, loss: 5.963250259810593e-06 2023-01-22 14:00:32.593653: step: 700/464, loss: 0.018201308324933052 2023-01-22 14:00:33.197680: step: 702/464, loss: 0.017866387963294983 2023-01-22 14:00:33.809922: step: 704/464, loss: 0.03244994580745697 2023-01-22 14:00:34.512471: step: 706/464, loss: 0.007438444998115301 2023-01-22 14:00:35.049206: step: 708/464, loss: 0.024666374549269676 2023-01-22 14:00:35.605660: step: 710/464, loss: 0.016314072534441948 2023-01-22 14:00:36.230471: step: 712/464, loss: 0.005182948894798756 2023-01-22 14:00:36.840796: step: 714/464, loss: 0.0046609812416136265 2023-01-22 14:00:37.510641: step: 716/464, loss: 0.008606769144535065 2023-01-22 14:00:38.152654: step: 718/464, loss: 0.003123520640656352 2023-01-22 14:00:38.806536: step: 720/464, loss: 0.005103525705635548 2023-01-22 14:00:39.452179: step: 722/464, loss: 0.010820210911333561 2023-01-22 14:00:40.088331: step: 724/464, loss: 0.014116911217570305 2023-01-22 14:00:40.647580: step: 726/464, loss: 0.0008885234128683805 2023-01-22 14:00:41.257853: step: 728/464, loss: 0.015137244015932083 2023-01-22 14:00:41.971873: step: 730/464, loss: 0.03720799833536148 2023-01-22 14:00:42.548088: step: 732/464, loss: 0.0055466219782829285 2023-01-22 14:00:43.165255: step: 734/464, loss: 0.026318326592445374 2023-01-22 14:00:43.712157: step: 736/464, loss: 0.0023751596454530954 2023-01-22 14:00:44.366307: step: 738/464, loss: 0.04240196943283081 2023-01-22 14:00:45.010317: step: 740/464, loss: 0.02381092496216297 2023-01-22 14:00:45.773784: step: 742/464, loss: 0.03649526461958885 2023-01-22 14:00:46.362551: step: 744/464, loss: 0.0008556586690247059 2023-01-22 14:00:47.012951: step: 746/464, loss: 0.045671314001083374 2023-01-22 14:00:47.648387: step: 748/464, loss: 0.005988705437630415 2023-01-22 14:00:48.237914: step: 750/464, loss: 0.06765050441026688 2023-01-22 14:00:48.833079: step: 752/464, loss: 0.2693063020706177 2023-01-22 14:00:49.406901: step: 754/464, loss: 0.0002042855485342443 2023-01-22 14:00:49.996410: step: 756/464, loss: 0.01689450442790985 2023-01-22 14:00:50.604525: step: 758/464, loss: 0.005171219818294048 2023-01-22 14:00:51.241329: step: 760/464, loss: 0.007994354702532291 2023-01-22 14:00:51.807407: step: 762/464, loss: 0.020884480327367783 2023-01-22 14:00:52.425066: step: 764/464, loss: 0.027513671666383743 2023-01-22 14:00:53.052379: step: 766/464, loss: 0.0021339538507163525 2023-01-22 14:00:53.689668: step: 768/464, loss: 0.03193753585219383 2023-01-22 14:00:54.282144: step: 770/464, loss: 0.049453891813755035 2023-01-22 14:00:54.911967: step: 772/464, loss: 0.0009148464305326343 2023-01-22 14:00:55.506606: step: 774/464, loss: 0.011001508682966232 2023-01-22 14:00:56.111367: step: 776/464, loss: 0.008015145547688007 2023-01-22 14:00:56.737570: step: 778/464, loss: 0.030116822570562363 2023-01-22 14:00:57.372121: step: 780/464, loss: 0.016844095662236214 2023-01-22 14:00:57.982375: step: 782/464, loss: 0.0012863740557804704 2023-01-22 14:00:58.587643: step: 784/464, loss: 0.029196161776781082 2023-01-22 14:00:59.191237: step: 786/464, loss: 0.002277803374454379 2023-01-22 14:00:59.813033: step: 788/464, loss: 0.010096283629536629 2023-01-22 14:01:00.410214: step: 790/464, loss: 0.0548112578690052 2023-01-22 14:01:01.009279: step: 792/464, loss: 0.013359840027987957 2023-01-22 14:01:01.682899: step: 794/464, loss: 0.006645455956459045 2023-01-22 14:01:02.311875: step: 796/464, loss: 0.0192551352083683 2023-01-22 14:01:02.938787: step: 798/464, loss: 0.009141849353909492 2023-01-22 14:01:03.552054: step: 800/464, loss: 0.006403472740203142 2023-01-22 14:01:04.187994: step: 802/464, loss: 0.02189779095351696 2023-01-22 14:01:04.818391: step: 804/464, loss: 0.0024047603365033865 2023-01-22 14:01:05.607779: step: 806/464, loss: 0.06709612160921097 2023-01-22 14:01:06.233566: step: 808/464, loss: 0.02736440673470497 2023-01-22 14:01:06.863212: step: 810/464, loss: 0.006112470757216215 2023-01-22 14:01:07.459925: step: 812/464, loss: 0.0044360077008605 2023-01-22 14:01:08.086130: step: 814/464, loss: 0.009985252283513546 2023-01-22 14:01:08.726538: step: 816/464, loss: 0.01186640840023756 2023-01-22 14:01:09.404680: step: 818/464, loss: 0.09793198108673096 2023-01-22 14:01:09.972097: step: 820/464, loss: 0.003989395219832659 2023-01-22 14:01:10.588930: step: 822/464, loss: 0.01700315810739994 2023-01-22 14:01:11.178543: step: 824/464, loss: 0.01287770178169012 2023-01-22 14:01:11.969661: step: 826/464, loss: 0.07658130675554276 2023-01-22 14:01:12.531409: step: 828/464, loss: 0.0006500289891846478 2023-01-22 14:01:13.213256: step: 830/464, loss: 0.037975821644067764 2023-01-22 14:01:13.839222: step: 832/464, loss: 0.003844544989988208 2023-01-22 14:01:14.370931: step: 834/464, loss: 0.0028473336715251207 2023-01-22 14:01:14.951353: step: 836/464, loss: 0.28509947657585144 2023-01-22 14:01:15.604051: step: 838/464, loss: 0.013126951642334461 2023-01-22 14:01:16.171120: step: 840/464, loss: 0.020548366010189056 2023-01-22 14:01:16.742305: step: 842/464, loss: 0.03744273632764816 2023-01-22 14:01:17.344348: step: 844/464, loss: 0.042428817600011826 2023-01-22 14:01:18.066606: step: 846/464, loss: 0.007418156135827303 2023-01-22 14:01:18.645987: step: 848/464, loss: 0.0024940096773207188 2023-01-22 14:01:19.269433: step: 850/464, loss: 0.01644994504749775 2023-01-22 14:01:19.943725: step: 852/464, loss: 0.1557295024394989 2023-01-22 14:01:20.523824: step: 854/464, loss: 0.004354654811322689 2023-01-22 14:01:21.246541: step: 856/464, loss: 0.0145355723798275 2023-01-22 14:01:21.911672: step: 858/464, loss: 0.08611620962619781 2023-01-22 14:01:22.555733: step: 860/464, loss: 0.023508962243795395 2023-01-22 14:01:23.153161: step: 862/464, loss: 0.0006393216899596155 2023-01-22 14:01:23.788513: step: 864/464, loss: 0.008929580450057983 2023-01-22 14:01:24.414993: step: 866/464, loss: 0.018466824665665627 2023-01-22 14:01:25.031984: step: 868/464, loss: 0.00402337359264493 2023-01-22 14:01:25.706022: step: 870/464, loss: 0.00023938572849147022 2023-01-22 14:01:26.337681: step: 872/464, loss: 0.0013220008695498109 2023-01-22 14:01:26.950917: step: 874/464, loss: 0.09605110436677933 2023-01-22 14:01:27.502439: step: 876/464, loss: 0.03537971153855324 2023-01-22 14:01:28.151632: step: 878/464, loss: 0.01987377181649208 2023-01-22 14:01:28.773303: step: 880/464, loss: 0.025345321744680405 2023-01-22 14:01:29.467767: step: 882/464, loss: 0.011560996063053608 2023-01-22 14:01:30.177468: step: 884/464, loss: 0.008712761104106903 2023-01-22 14:01:30.783015: step: 886/464, loss: 0.0005412409082055092 2023-01-22 14:01:31.372116: step: 888/464, loss: 0.0016988108400255442 2023-01-22 14:01:31.998864: step: 890/464, loss: 0.01637883111834526 2023-01-22 14:01:32.559322: step: 892/464, loss: 0.05341380089521408 2023-01-22 14:01:33.204415: step: 894/464, loss: 0.0030567431822419167 2023-01-22 14:01:33.742487: step: 896/464, loss: 0.005006662104278803 2023-01-22 14:01:34.349943: step: 898/464, loss: 0.014662880450487137 2023-01-22 14:01:34.960471: step: 900/464, loss: 0.0011214803671464324 2023-01-22 14:01:35.560596: step: 902/464, loss: 0.005864960141479969 2023-01-22 14:01:36.157173: step: 904/464, loss: 0.0007619211683049798 2023-01-22 14:01:36.806976: step: 906/464, loss: 0.05297547206282616 2023-01-22 14:01:37.406879: step: 908/464, loss: 0.058883581310510635 2023-01-22 14:01:38.079958: step: 910/464, loss: 0.0002728099934756756 2023-01-22 14:01:38.632332: step: 912/464, loss: 0.0058806040324270725 2023-01-22 14:01:39.189725: step: 914/464, loss: 0.015054954215884209 2023-01-22 14:01:39.863062: step: 916/464, loss: 0.045919161289930344 2023-01-22 14:01:40.493289: step: 918/464, loss: 0.0026584642473608255 2023-01-22 14:01:41.125697: step: 920/464, loss: 0.023800550028681755 2023-01-22 14:01:41.789543: step: 922/464, loss: 0.0045110126957297325 2023-01-22 14:01:42.430079: step: 924/464, loss: 0.003882175777107477 2023-01-22 14:01:43.063445: step: 926/464, loss: 0.022211167961359024 2023-01-22 14:01:43.684888: step: 928/464, loss: 0.028050176799297333 2023-01-22 14:01:44.213446: step: 930/464, loss: 0.0009635902242735028 ================================================== Loss: 0.033 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29705471246006393, 'r': 0.3528581593927894, 'f1': 0.32256071118820473}, 'combined': 0.2376763135070982, 'epoch': 34} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2893179785618371, 'r': 0.32571518982260084, 'f1': 0.30643960407499987}, 'combined': 0.2000590161318652, 'epoch': 34} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2937453007518797, 'r': 0.3706653225806452, 'f1': 0.32775272651006715}, 'combined': 0.2415020090074179, 'epoch': 34} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30014503994956515, 'r': 0.32800776884194205, 'f1': 0.3134584563021809}, 'combined': 0.20464127199002483, 'epoch': 34} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.298121719160105, 'r': 0.3592168722327641, 'f1': 0.3258301061388411}, 'combined': 0.24008534136546186, 'epoch': 34} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3019906215685765, 'r': 0.31975477577849276, 'f1': 0.3106189250419644}, 'combined': 0.20278748474242236, 'epoch': 34} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.193075117370892, 'r': 0.3916666666666666, 'f1': 0.25864779874213834}, 'combined': 0.17243186582809222, 'epoch': 34} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2393617021276596, 'r': 0.4891304347826087, 'f1': 0.32142857142857145}, 'combined': 0.16071428571428573, 'epoch': 34} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3977272727272727, 'r': 0.3017241379310345, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 34} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951249161636486, 'r': 0.3578459609650312, 'f1': 0.3234731070815977}, 'combined': 0.23834860521801934, 'epoch': 22} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29521273154881944, 'r': 0.3185261336729686, 'f1': 0.30642664272956077}, 'combined': 0.20005055432085314, 'epoch': 22} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 22} ****************************** Epoch: 35 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:04:22.373390: step: 2/464, loss: 0.0009644743404351175 2023-01-22 14:04:22.932362: step: 4/464, loss: 0.0003162659995723516 2023-01-22 14:04:23.581989: step: 6/464, loss: 0.0058449688367545605 2023-01-22 14:04:24.166563: step: 8/464, loss: 0.015397859737277031 2023-01-22 14:04:24.752812: step: 10/464, loss: 0.0003571268171072006 2023-01-22 14:04:25.410765: step: 12/464, loss: 0.0007609457243233919 2023-01-22 14:04:25.989653: step: 14/464, loss: 0.003951466176658869 2023-01-22 14:04:26.648372: step: 16/464, loss: 0.01241021417081356 2023-01-22 14:04:27.192331: step: 18/464, loss: 0.01995595172047615 2023-01-22 14:04:27.826585: step: 20/464, loss: 0.013361471705138683 2023-01-22 14:04:28.495385: step: 22/464, loss: 0.00025080880732275546 2023-01-22 14:04:29.107443: step: 24/464, loss: 0.010684192180633545 2023-01-22 14:04:29.750102: step: 26/464, loss: 0.026312733069062233 2023-01-22 14:04:30.367500: step: 28/464, loss: 0.012011365965008736 2023-01-22 14:04:30.972754: step: 30/464, loss: 0.0087062306702137 2023-01-22 14:04:31.513141: step: 32/464, loss: 0.011229324154555798 2023-01-22 14:04:32.092892: step: 34/464, loss: 0.001367847784422338 2023-01-22 14:04:32.757344: step: 36/464, loss: 0.00034738570684567094 2023-01-22 14:04:33.358065: step: 38/464, loss: 0.008835665881633759 2023-01-22 14:04:34.019294: step: 40/464, loss: 0.0559537373483181 2023-01-22 14:04:34.674487: step: 42/464, loss: 0.0006257090135477483 2023-01-22 14:04:35.305507: step: 44/464, loss: 0.054278358817100525 2023-01-22 14:04:35.878751: step: 46/464, loss: 0.00020819462952204049 2023-01-22 14:04:36.453116: step: 48/464, loss: 0.0766400471329689 2023-01-22 14:04:37.053616: step: 50/464, loss: 0.0037050002720206976 2023-01-22 14:04:37.651514: step: 52/464, loss: 0.023854294791817665 2023-01-22 14:04:38.211185: step: 54/464, loss: 0.00019046322267968208 2023-01-22 14:04:38.820869: step: 56/464, loss: 0.013129734434187412 2023-01-22 14:04:39.378693: step: 58/464, loss: 0.01394770760089159 2023-01-22 14:04:40.052767: step: 60/464, loss: 0.0300760380923748 2023-01-22 14:04:40.642357: step: 62/464, loss: 0.006102480459958315 2023-01-22 14:04:41.278795: step: 64/464, loss: 0.0016553985187783837 2023-01-22 14:04:41.925816: step: 66/464, loss: 0.00021854121587239206 2023-01-22 14:04:42.540104: step: 68/464, loss: 0.048340536653995514 2023-01-22 14:04:43.197435: step: 70/464, loss: 0.022228294983506203 2023-01-22 14:04:43.775048: step: 72/464, loss: 0.0033827880397439003 2023-01-22 14:04:44.420853: step: 74/464, loss: 0.3537949025630951 2023-01-22 14:04:45.059833: step: 76/464, loss: 0.0006294180057011545 2023-01-22 14:04:45.757098: step: 78/464, loss: 0.306569904088974 2023-01-22 14:04:46.352979: step: 80/464, loss: 0.006658884231001139 2023-01-22 14:04:46.932672: step: 82/464, loss: 0.008759608492255211 2023-01-22 14:04:47.497969: step: 84/464, loss: 0.0007474158192053437 2023-01-22 14:04:48.063496: step: 86/464, loss: 0.009229986928403378 2023-01-22 14:04:48.692612: step: 88/464, loss: 0.012151729315519333 2023-01-22 14:04:49.280398: step: 90/464, loss: 0.02027531899511814 2023-01-22 14:04:49.920567: step: 92/464, loss: 0.039069026708602905 2023-01-22 14:04:50.545495: step: 94/464, loss: 0.005869823507964611 2023-01-22 14:04:51.112848: step: 96/464, loss: 0.3314107656478882 2023-01-22 14:04:51.703367: step: 98/464, loss: 0.00550828455016017 2023-01-22 14:04:52.302549: step: 100/464, loss: 0.0024665382225066423 2023-01-22 14:04:52.888022: step: 102/464, loss: 0.017504358664155006 2023-01-22 14:04:53.473045: step: 104/464, loss: 4.007333245681366e-06 2023-01-22 14:04:54.112112: step: 106/464, loss: 0.00032739387825131416 2023-01-22 14:04:54.720777: step: 108/464, loss: 0.008687363937497139 2023-01-22 14:04:55.359509: step: 110/464, loss: 0.011980629526078701 2023-01-22 14:04:55.933179: step: 112/464, loss: 0.008489969186484814 2023-01-22 14:04:56.623558: step: 114/464, loss: 0.015836533159017563 2023-01-22 14:04:57.286201: step: 116/464, loss: 0.012812396511435509 2023-01-22 14:04:57.896929: step: 118/464, loss: 0.03620085120201111 2023-01-22 14:04:58.541503: step: 120/464, loss: 0.023637806996703148 2023-01-22 14:04:59.247509: step: 122/464, loss: 0.006331432610750198 2023-01-22 14:04:59.890965: step: 124/464, loss: 0.0021719210781157017 2023-01-22 14:05:00.550913: step: 126/464, loss: 0.014731621369719505 2023-01-22 14:05:01.175480: step: 128/464, loss: 0.0040918574668467045 2023-01-22 14:05:01.797190: step: 130/464, loss: 0.014182882383465767 2023-01-22 14:05:02.470928: step: 132/464, loss: 0.16358043253421783 2023-01-22 14:05:03.037926: step: 134/464, loss: 0.0007946311379782856 2023-01-22 14:05:03.658179: step: 136/464, loss: 0.01686181128025055 2023-01-22 14:05:04.284013: step: 138/464, loss: 0.5621294379234314 2023-01-22 14:05:05.062769: step: 140/464, loss: 0.1334395855665207 2023-01-22 14:05:05.765516: step: 142/464, loss: 1.1283353567123413 2023-01-22 14:05:06.307667: step: 144/464, loss: 0.03229658678174019 2023-01-22 14:05:06.922644: step: 146/464, loss: 0.017992986366152763 2023-01-22 14:05:07.548871: step: 148/464, loss: 0.008097312413156033 2023-01-22 14:05:08.140046: step: 150/464, loss: 0.09446200728416443 2023-01-22 14:05:08.748790: step: 152/464, loss: 0.012921723537147045 2023-01-22 14:05:09.359004: step: 154/464, loss: 0.036877911537885666 2023-01-22 14:05:09.934630: step: 156/464, loss: 0.0023639483842998743 2023-01-22 14:05:10.584287: step: 158/464, loss: 0.1434338539838791 2023-01-22 14:05:11.207468: step: 160/464, loss: 0.07745251059532166 2023-01-22 14:05:11.786022: step: 162/464, loss: 0.002024970017373562 2023-01-22 14:05:12.413144: step: 164/464, loss: 0.004907531198114157 2023-01-22 14:05:12.990990: step: 166/464, loss: 0.027228038758039474 2023-01-22 14:05:13.585817: step: 168/464, loss: 0.017929313704371452 2023-01-22 14:05:14.264510: step: 170/464, loss: 0.0005961810238659382 2023-01-22 14:05:14.866086: step: 172/464, loss: 0.002728164428845048 2023-01-22 14:05:15.501163: step: 174/464, loss: 0.0008025284623727202 2023-01-22 14:05:16.131485: step: 176/464, loss: 0.0021660258062183857 2023-01-22 14:05:16.708470: step: 178/464, loss: 0.01789056695997715 2023-01-22 14:05:17.322963: step: 180/464, loss: 0.00203095655888319 2023-01-22 14:05:18.037169: step: 182/464, loss: 0.008937697857618332 2023-01-22 14:05:18.654784: step: 184/464, loss: 0.008091083727777004 2023-01-22 14:05:19.340811: step: 186/464, loss: 0.0040649184957146645 2023-01-22 14:05:19.973076: step: 188/464, loss: 0.08236940205097198 2023-01-22 14:05:20.611960: step: 190/464, loss: 0.08165348321199417 2023-01-22 14:05:21.251194: step: 192/464, loss: 0.00024181792105082422 2023-01-22 14:05:21.891772: step: 194/464, loss: 0.03893275558948517 2023-01-22 14:05:22.522862: step: 196/464, loss: 0.012685553170740604 2023-01-22 14:05:23.140559: step: 198/464, loss: 0.02778731659054756 2023-01-22 14:05:23.768325: step: 200/464, loss: 0.006302070338279009 2023-01-22 14:05:24.391847: step: 202/464, loss: 0.017433539032936096 2023-01-22 14:05:24.917286: step: 204/464, loss: 0.0018429952906444669 2023-01-22 14:05:25.512291: step: 206/464, loss: 0.0035465233959257603 2023-01-22 14:05:26.190479: step: 208/464, loss: 0.012276760302484035 2023-01-22 14:05:26.870060: step: 210/464, loss: 0.00873927865177393 2023-01-22 14:05:27.484734: step: 212/464, loss: 0.006358860060572624 2023-01-22 14:05:28.066460: step: 214/464, loss: 0.011782050132751465 2023-01-22 14:05:28.707626: step: 216/464, loss: 0.0018012769287452102 2023-01-22 14:05:29.351034: step: 218/464, loss: 0.0972636342048645 2023-01-22 14:05:30.020490: step: 220/464, loss: 0.017716892063617706 2023-01-22 14:05:30.555862: step: 222/464, loss: 0.0028683850541710854 2023-01-22 14:05:31.063357: step: 224/464, loss: 0.0020713862031698227 2023-01-22 14:05:31.674131: step: 226/464, loss: 0.014332927763462067 2023-01-22 14:05:32.320998: step: 228/464, loss: 0.00759549718350172 2023-01-22 14:05:32.958177: step: 230/464, loss: 0.014684975147247314 2023-01-22 14:05:33.578974: step: 232/464, loss: 0.005183230619877577 2023-01-22 14:05:34.232436: step: 234/464, loss: 0.03961469233036041 2023-01-22 14:05:34.832946: step: 236/464, loss: 0.0497465543448925 2023-01-22 14:05:35.377879: step: 238/464, loss: 0.0266302190721035 2023-01-22 14:05:35.969877: step: 240/464, loss: 0.0003741618129424751 2023-01-22 14:05:36.537126: step: 242/464, loss: 0.026745961979031563 2023-01-22 14:05:37.142904: step: 244/464, loss: 0.0012106267968192697 2023-01-22 14:05:37.782900: step: 246/464, loss: 0.026130206882953644 2023-01-22 14:05:38.320659: step: 248/464, loss: 0.006671784445643425 2023-01-22 14:05:38.927062: step: 250/464, loss: 0.005533790681511164 2023-01-22 14:05:39.474321: step: 252/464, loss: 0.007999777793884277 2023-01-22 14:05:40.105196: step: 254/464, loss: 4.036992686451413e-05 2023-01-22 14:05:40.738658: step: 256/464, loss: 0.0013090185821056366 2023-01-22 14:05:41.378030: step: 258/464, loss: 0.004869567696005106 2023-01-22 14:05:42.067359: step: 260/464, loss: 0.07180750370025635 2023-01-22 14:05:42.687037: step: 262/464, loss: 0.0008018343360163271 2023-01-22 14:05:43.207237: step: 264/464, loss: 0.0023172374349087477 2023-01-22 14:05:43.898083: step: 266/464, loss: 0.10949315875768661 2023-01-22 14:05:44.485377: step: 268/464, loss: 0.007839790545403957 2023-01-22 14:05:45.080229: step: 270/464, loss: 0.0038458656053990126 2023-01-22 14:05:45.698553: step: 272/464, loss: 0.001103203627280891 2023-01-22 14:05:46.326500: step: 274/464, loss: 0.014842512086033821 2023-01-22 14:05:47.022044: step: 276/464, loss: 0.0005287323729135096 2023-01-22 14:05:47.620936: step: 278/464, loss: 0.0026160588022321463 2023-01-22 14:05:48.296642: step: 280/464, loss: 0.7641627192497253 2023-01-22 14:05:49.058029: step: 282/464, loss: 0.007436547894030809 2023-01-22 14:05:49.694079: step: 284/464, loss: 0.018948597833514214 2023-01-22 14:05:50.324785: step: 286/464, loss: 0.004016129299998283 2023-01-22 14:05:50.952372: step: 288/464, loss: 0.21743503212928772 2023-01-22 14:05:51.591190: step: 290/464, loss: 0.012820246629416943 2023-01-22 14:05:52.185063: step: 292/464, loss: 0.00928540900349617 2023-01-22 14:05:52.795912: step: 294/464, loss: 0.006636911071836948 2023-01-22 14:05:53.352258: step: 296/464, loss: 0.00810930784791708 2023-01-22 14:05:53.978955: step: 298/464, loss: 0.006895044352859259 2023-01-22 14:05:54.615766: step: 300/464, loss: 0.0021101750899106264 2023-01-22 14:05:55.204674: step: 302/464, loss: 0.004637501202523708 2023-01-22 14:05:55.794496: step: 304/464, loss: 0.0017597886035218835 2023-01-22 14:05:56.456646: step: 306/464, loss: 0.060324400663375854 2023-01-22 14:05:57.141500: step: 308/464, loss: 0.02072199061512947 2023-01-22 14:05:57.713031: step: 310/464, loss: 0.00800775270909071 2023-01-22 14:05:58.335051: step: 312/464, loss: 0.00040108468965627253 2023-01-22 14:05:58.912121: step: 314/464, loss: 0.021276621147990227 2023-01-22 14:05:59.504913: step: 316/464, loss: 0.006905107758939266 2023-01-22 14:06:00.113208: step: 318/464, loss: 0.032484881579875946 2023-01-22 14:06:00.672282: step: 320/464, loss: 0.008478713221848011 2023-01-22 14:06:01.273310: step: 322/464, loss: 0.00045442843111231923 2023-01-22 14:06:01.855038: step: 324/464, loss: 0.11216661334037781 2023-01-22 14:06:02.509339: step: 326/464, loss: 0.5668357014656067 2023-01-22 14:06:03.111524: step: 328/464, loss: 0.006192249245941639 2023-01-22 14:06:03.703237: step: 330/464, loss: 0.03319069743156433 2023-01-22 14:06:04.325393: step: 332/464, loss: 0.0035295835696160793 2023-01-22 14:06:04.965999: step: 334/464, loss: 0.00975918211042881 2023-01-22 14:06:05.589658: step: 336/464, loss: 0.0027129671070724726 2023-01-22 14:06:06.283693: step: 338/464, loss: 0.005572467111051083 2023-01-22 14:06:06.900577: step: 340/464, loss: 0.001601192052476108 2023-01-22 14:06:07.576335: step: 342/464, loss: 0.013441353105008602 2023-01-22 14:06:08.259260: step: 344/464, loss: 0.013300052843987942 2023-01-22 14:06:08.783433: step: 346/464, loss: 0.006340089254081249 2023-01-22 14:06:09.328571: step: 348/464, loss: 0.015834132209420204 2023-01-22 14:06:09.932880: step: 350/464, loss: 0.008800752460956573 2023-01-22 14:06:10.519070: step: 352/464, loss: 0.0007862219936214387 2023-01-22 14:06:11.231991: step: 354/464, loss: 0.0015308655565604568 2023-01-22 14:06:11.876435: step: 356/464, loss: 0.02814129739999771 2023-01-22 14:06:12.522793: step: 358/464, loss: 0.0009849341586232185 2023-01-22 14:06:13.117494: step: 360/464, loss: 0.008692414499819279 2023-01-22 14:06:13.751987: step: 362/464, loss: 0.0036799232475459576 2023-01-22 14:06:14.419037: step: 364/464, loss: 0.11109455674886703 2023-01-22 14:06:15.055168: step: 366/464, loss: 0.003390450030565262 2023-01-22 14:06:15.707114: step: 368/464, loss: 0.01457061618566513 2023-01-22 14:06:16.264899: step: 370/464, loss: 0.002210398204624653 2023-01-22 14:06:16.856178: step: 372/464, loss: 0.007588067092001438 2023-01-22 14:06:17.532753: step: 374/464, loss: 0.00048443939886055887 2023-01-22 14:06:18.163829: step: 376/464, loss: 0.0052320328541100025 2023-01-22 14:06:18.776108: step: 378/464, loss: 0.002138762501999736 2023-01-22 14:06:19.446880: step: 380/464, loss: 0.010138359852135181 2023-01-22 14:06:20.029797: step: 382/464, loss: 0.014441246166825294 2023-01-22 14:06:20.644385: step: 384/464, loss: 0.008367008529603481 2023-01-22 14:06:21.342009: step: 386/464, loss: 0.0007067288970574737 2023-01-22 14:06:22.028637: step: 388/464, loss: 0.0012309409212321043 2023-01-22 14:06:22.618074: step: 390/464, loss: 0.004554093815386295 2023-01-22 14:06:23.222927: step: 392/464, loss: 0.07715360075235367 2023-01-22 14:06:23.814547: step: 394/464, loss: 0.013104358687996864 2023-01-22 14:06:24.422775: step: 396/464, loss: 0.003941704053431749 2023-01-22 14:06:25.009369: step: 398/464, loss: 0.002259533852338791 2023-01-22 14:06:25.586105: step: 400/464, loss: 0.0028181481175124645 2023-01-22 14:06:26.201287: step: 402/464, loss: 0.0019231357146054506 2023-01-22 14:06:26.918091: step: 404/464, loss: 0.013126783072948456 2023-01-22 14:06:27.537491: step: 406/464, loss: 0.0007588164880871773 2023-01-22 14:06:28.148461: step: 408/464, loss: 0.0044506611302495 2023-01-22 14:06:28.825400: step: 410/464, loss: 0.011158186011016369 2023-01-22 14:06:29.471730: step: 412/464, loss: 0.017263837158679962 2023-01-22 14:06:30.143989: step: 414/464, loss: 0.026343664154410362 2023-01-22 14:06:30.774672: step: 416/464, loss: 0.022229742258787155 2023-01-22 14:06:31.408464: step: 418/464, loss: 0.012956931255757809 2023-01-22 14:06:32.084971: step: 420/464, loss: 0.027147667482495308 2023-01-22 14:06:32.643421: step: 422/464, loss: 0.0002858864900190383 2023-01-22 14:06:33.182575: step: 424/464, loss: 0.009432642720639706 2023-01-22 14:06:33.743787: step: 426/464, loss: 0.0012652672594413161 2023-01-22 14:06:34.360236: step: 428/464, loss: 0.0009018271812237799 2023-01-22 14:06:35.049861: step: 430/464, loss: 0.06326231360435486 2023-01-22 14:06:35.652443: step: 432/464, loss: 0.0007856090669520199 2023-01-22 14:06:36.345256: step: 434/464, loss: 0.019646979868412018 2023-01-22 14:06:36.988744: step: 436/464, loss: 0.01722937636077404 2023-01-22 14:06:37.540600: step: 438/464, loss: 0.01092112623155117 2023-01-22 14:06:38.129031: step: 440/464, loss: 2.2829706722404808e-05 2023-01-22 14:06:38.768291: step: 442/464, loss: 0.000356467702658847 2023-01-22 14:06:39.488731: step: 444/464, loss: 0.005447516683489084 2023-01-22 14:06:40.120566: step: 446/464, loss: 0.0006174911395646632 2023-01-22 14:06:40.712020: step: 448/464, loss: 0.13536754250526428 2023-01-22 14:06:41.325307: step: 450/464, loss: 0.015333844348788261 2023-01-22 14:06:41.972839: step: 452/464, loss: 0.011071569286286831 2023-01-22 14:06:42.550056: step: 454/464, loss: 0.0035984970163553953 2023-01-22 14:06:43.139370: step: 456/464, loss: 0.006359519436955452 2023-01-22 14:06:43.778522: step: 458/464, loss: 0.07651659846305847 2023-01-22 14:06:44.420882: step: 460/464, loss: 0.25249356031417847 2023-01-22 14:06:45.039887: step: 462/464, loss: 0.006903337314724922 2023-01-22 14:06:45.646899: step: 464/464, loss: 0.020166227594017982 2023-01-22 14:06:46.317933: step: 466/464, loss: 0.01649313047528267 2023-01-22 14:06:46.987071: step: 468/464, loss: 0.06049361452460289 2023-01-22 14:06:47.563216: step: 470/464, loss: 0.00118854152970016 2023-01-22 14:06:48.195854: step: 472/464, loss: 0.005978343542665243 2023-01-22 14:06:48.809570: step: 474/464, loss: 0.0140788983553648 2023-01-22 14:06:49.380981: step: 476/464, loss: 0.037724100053310394 2023-01-22 14:06:49.978108: step: 478/464, loss: 0.00423229718580842 2023-01-22 14:06:50.614581: step: 480/464, loss: 0.013618550263345242 2023-01-22 14:06:51.412020: step: 482/464, loss: 0.004492191597819328 2023-01-22 14:06:52.047363: step: 484/464, loss: 0.06540249288082123 2023-01-22 14:06:52.693874: step: 486/464, loss: 7.475945312762633e-05 2023-01-22 14:06:53.365633: step: 488/464, loss: 0.0009398137335665524 2023-01-22 14:06:53.950934: step: 490/464, loss: 0.0005184581386856735 2023-01-22 14:06:54.555743: step: 492/464, loss: 0.011175619438290596 2023-01-22 14:06:55.216060: step: 494/464, loss: 0.2823795676231384 2023-01-22 14:06:55.818843: step: 496/464, loss: 0.0016089630080386996 2023-01-22 14:06:56.396820: step: 498/464, loss: 0.0025382675230503082 2023-01-22 14:06:57.013262: step: 500/464, loss: 4.747584342956543 2023-01-22 14:06:57.701163: step: 502/464, loss: 0.02053333818912506 2023-01-22 14:06:58.409126: step: 504/464, loss: 0.0034306731540709734 2023-01-22 14:06:59.064666: step: 506/464, loss: 0.0025277994573116302 2023-01-22 14:06:59.736156: step: 508/464, loss: 0.0006845752359367907 2023-01-22 14:07:00.341193: step: 510/464, loss: 0.0007348746294155717 2023-01-22 14:07:00.897079: step: 512/464, loss: 0.0009130208636634052 2023-01-22 14:07:01.512698: step: 514/464, loss: 0.012286031618714333 2023-01-22 14:07:02.091708: step: 516/464, loss: 0.06218738853931427 2023-01-22 14:07:02.677768: step: 518/464, loss: 0.005067503545433283 2023-01-22 14:07:03.293068: step: 520/464, loss: 0.05404721945524216 2023-01-22 14:07:03.906017: step: 522/464, loss: 0.012997281737625599 2023-01-22 14:07:04.581621: step: 524/464, loss: 0.0011278531746938825 2023-01-22 14:07:05.199987: step: 526/464, loss: 0.015108383260667324 2023-01-22 14:07:05.838922: step: 528/464, loss: 0.01597750186920166 2023-01-22 14:07:06.479878: step: 530/464, loss: 0.003203788073733449 2023-01-22 14:07:07.099895: step: 532/464, loss: 0.26965925097465515 2023-01-22 14:07:07.696794: step: 534/464, loss: 0.004773963242769241 2023-01-22 14:07:08.300843: step: 536/464, loss: 0.00966687873005867 2023-01-22 14:07:08.910215: step: 538/464, loss: 0.005131350364536047 2023-01-22 14:07:09.561086: step: 540/464, loss: 0.15959526598453522 2023-01-22 14:07:10.167216: step: 542/464, loss: 0.014910740777850151 2023-01-22 14:07:10.836063: step: 544/464, loss: 0.00035401046625338495 2023-01-22 14:07:11.486126: step: 546/464, loss: 0.024254245683550835 2023-01-22 14:07:12.074453: step: 548/464, loss: 0.00013935545575805008 2023-01-22 14:07:12.659512: step: 550/464, loss: 0.0026739283930510283 2023-01-22 14:07:13.276855: step: 552/464, loss: 0.011179208755493164 2023-01-22 14:07:13.913074: step: 554/464, loss: 0.005806444212794304 2023-01-22 14:07:14.546971: step: 556/464, loss: 0.20071232318878174 2023-01-22 14:07:15.197060: step: 558/464, loss: 0.00031759965349920094 2023-01-22 14:07:15.821445: step: 560/464, loss: 0.015619128942489624 2023-01-22 14:07:16.519548: step: 562/464, loss: 0.006494753994047642 2023-01-22 14:07:17.109876: step: 564/464, loss: 0.07191771268844604 2023-01-22 14:07:17.764027: step: 566/464, loss: 0.039309412240982056 2023-01-22 14:07:18.440263: step: 568/464, loss: 0.009192845784127712 2023-01-22 14:07:19.035190: step: 570/464, loss: 0.14159759879112244 2023-01-22 14:07:19.645026: step: 572/464, loss: 0.0028477036394178867 2023-01-22 14:07:20.229566: step: 574/464, loss: 0.0030880693811923265 2023-01-22 14:07:20.865111: step: 576/464, loss: 0.0008346071117557585 2023-01-22 14:07:21.420232: step: 578/464, loss: 0.0027719526551663876 2023-01-22 14:07:22.061223: step: 580/464, loss: 0.002395773306488991 2023-01-22 14:07:22.690496: step: 582/464, loss: 0.005813604686409235 2023-01-22 14:07:23.272230: step: 584/464, loss: 0.005093331448733807 2023-01-22 14:07:23.829657: step: 586/464, loss: 0.002429689047858119 2023-01-22 14:07:24.463229: step: 588/464, loss: 0.01826808974146843 2023-01-22 14:07:25.065470: step: 590/464, loss: 0.01065827440470457 2023-01-22 14:07:25.706436: step: 592/464, loss: 0.014795523136854172 2023-01-22 14:07:26.284825: step: 594/464, loss: 0.01895059272646904 2023-01-22 14:07:26.888655: step: 596/464, loss: 0.0066263931803405285 2023-01-22 14:07:27.462930: step: 598/464, loss: 0.027680065482854843 2023-01-22 14:07:28.067848: step: 600/464, loss: 0.041019223630428314 2023-01-22 14:07:28.662539: step: 602/464, loss: 0.0005921527626924217 2023-01-22 14:07:29.272484: step: 604/464, loss: 0.00405742134898901 2023-01-22 14:07:29.878999: step: 606/464, loss: 0.0395742766559124 2023-01-22 14:07:30.520229: step: 608/464, loss: 0.0009591281996108592 2023-01-22 14:07:31.119779: step: 610/464, loss: 0.1311628818511963 2023-01-22 14:07:31.741674: step: 612/464, loss: 0.020746229216456413 2023-01-22 14:07:32.333997: step: 614/464, loss: 0.022783661261200905 2023-01-22 14:07:33.002205: step: 616/464, loss: 9.927034261636436e-05 2023-01-22 14:07:33.578901: step: 618/464, loss: 0.008369174785912037 2023-01-22 14:07:34.284388: step: 620/464, loss: 0.09095170348882675 2023-01-22 14:07:34.876956: step: 622/464, loss: 0.005317453760653734 2023-01-22 14:07:35.507117: step: 624/464, loss: 0.002785000717267394 2023-01-22 14:07:36.200527: step: 626/464, loss: 0.013620770536363125 2023-01-22 14:07:36.822833: step: 628/464, loss: 0.03168050944805145 2023-01-22 14:07:37.463942: step: 630/464, loss: 0.013114920817315578 2023-01-22 14:07:38.122978: step: 632/464, loss: 0.0720565915107727 2023-01-22 14:07:38.836370: step: 634/464, loss: 0.016401885077357292 2023-01-22 14:07:39.472369: step: 636/464, loss: 0.0019064360531046987 2023-01-22 14:07:40.100187: step: 638/464, loss: 0.05868987366557121 2023-01-22 14:07:40.724614: step: 640/464, loss: 0.04122765362262726 2023-01-22 14:07:41.275689: step: 642/464, loss: 0.0013818825827911496 2023-01-22 14:07:41.927027: step: 644/464, loss: 0.029495844617486 2023-01-22 14:07:42.571757: step: 646/464, loss: 0.006111129652708769 2023-01-22 14:07:43.288334: step: 648/464, loss: 0.0003144819347653538 2023-01-22 14:07:43.851758: step: 650/464, loss: 0.004753305576741695 2023-01-22 14:07:44.429098: step: 652/464, loss: 0.007937440648674965 2023-01-22 14:07:45.056373: step: 654/464, loss: 0.010105142369866371 2023-01-22 14:07:45.682827: step: 656/464, loss: 0.01663767173886299 2023-01-22 14:07:46.312232: step: 658/464, loss: 0.04917242377996445 2023-01-22 14:07:46.916951: step: 660/464, loss: 0.0013400838943198323 2023-01-22 14:07:47.540364: step: 662/464, loss: 0.012249905616044998 2023-01-22 14:07:48.176271: step: 664/464, loss: 0.013516448438167572 2023-01-22 14:07:48.836242: step: 666/464, loss: 0.023560237139463425 2023-01-22 14:07:49.455730: step: 668/464, loss: 0.03818031772971153 2023-01-22 14:07:50.008239: step: 670/464, loss: 0.01443792600184679 2023-01-22 14:07:50.613978: step: 672/464, loss: 0.01864960603415966 2023-01-22 14:07:51.211758: step: 674/464, loss: 0.022748306393623352 2023-01-22 14:07:51.835896: step: 676/464, loss: 0.008343067020177841 2023-01-22 14:07:52.427486: step: 678/464, loss: 9.029127977555618e-05 2023-01-22 14:07:53.035278: step: 680/464, loss: 0.0007436299347318709 2023-01-22 14:07:53.616764: step: 682/464, loss: 0.0011212803656235337 2023-01-22 14:07:54.299150: step: 684/464, loss: 0.0007027724641375244 2023-01-22 14:07:54.869695: step: 686/464, loss: 0.00898168608546257 2023-01-22 14:07:55.468516: step: 688/464, loss: 0.017184296622872353 2023-01-22 14:07:56.060173: step: 690/464, loss: 0.004175418987870216 2023-01-22 14:07:56.732120: step: 692/464, loss: 0.03173663839697838 2023-01-22 14:07:57.349167: step: 694/464, loss: 0.03442293033003807 2023-01-22 14:07:58.000748: step: 696/464, loss: 0.0225666593760252 2023-01-22 14:07:58.571126: step: 698/464, loss: 0.009640194475650787 2023-01-22 14:07:59.211536: step: 700/464, loss: 0.011529440991580486 2023-01-22 14:07:59.825892: step: 702/464, loss: 0.0011612694943323731 2023-01-22 14:08:00.454038: step: 704/464, loss: 0.01782440021634102 2023-01-22 14:08:01.089584: step: 706/464, loss: 0.010416793636977673 2023-01-22 14:08:01.696265: step: 708/464, loss: 0.00046717614168301225 2023-01-22 14:08:02.298524: step: 710/464, loss: 0.31452393531799316 2023-01-22 14:08:02.985645: step: 712/464, loss: 0.03398028016090393 2023-01-22 14:08:03.581520: step: 714/464, loss: 0.02827555313706398 2023-01-22 14:08:04.274416: step: 716/464, loss: 0.0073892888613045216 2023-01-22 14:08:04.987423: step: 718/464, loss: 0.015934636816382408 2023-01-22 14:08:05.649536: step: 720/464, loss: 0.00607600063085556 2023-01-22 14:08:06.281830: step: 722/464, loss: 0.005109555087983608 2023-01-22 14:08:06.944619: step: 724/464, loss: 0.00027257108013145626 2023-01-22 14:08:07.636860: step: 726/464, loss: 0.00013226554438006133 2023-01-22 14:08:08.261978: step: 728/464, loss: 0.042293351143598557 2023-01-22 14:08:08.844352: step: 730/464, loss: 0.006536096800118685 2023-01-22 14:08:09.462355: step: 732/464, loss: 0.023406287655234337 2023-01-22 14:08:10.030790: step: 734/464, loss: 0.2510569095611572 2023-01-22 14:08:10.651992: step: 736/464, loss: 0.17887428402900696 2023-01-22 14:08:11.282866: step: 738/464, loss: 0.0026564225554466248 2023-01-22 14:08:11.900812: step: 740/464, loss: 0.0019115894101560116 2023-01-22 14:08:12.465836: step: 742/464, loss: 0.008141586557030678 2023-01-22 14:08:13.077530: step: 744/464, loss: 0.022172318771481514 2023-01-22 14:08:13.699526: step: 746/464, loss: 0.002620603423565626 2023-01-22 14:08:14.277230: step: 748/464, loss: 0.029885120689868927 2023-01-22 14:08:14.943882: step: 750/464, loss: 0.00040005400660447776 2023-01-22 14:08:15.530555: step: 752/464, loss: 0.00014553121582139283 2023-01-22 14:08:16.161249: step: 754/464, loss: 0.014732057228684425 2023-01-22 14:08:16.832155: step: 756/464, loss: 0.002545611932873726 2023-01-22 14:08:17.426980: step: 758/464, loss: 0.013696306385099888 2023-01-22 14:08:18.066801: step: 760/464, loss: 0.02129439264535904 2023-01-22 14:08:18.651537: step: 762/464, loss: 0.0017211624654009938 2023-01-22 14:08:19.222361: step: 764/464, loss: 0.012150133959949017 2023-01-22 14:08:19.925875: step: 766/464, loss: 0.010667501017451286 2023-01-22 14:08:20.534557: step: 768/464, loss: 0.026343297213315964 2023-01-22 14:08:21.150925: step: 770/464, loss: 0.05519673600792885 2023-01-22 14:08:21.740897: step: 772/464, loss: 4.755964255309664e-05 2023-01-22 14:08:22.452234: step: 774/464, loss: 0.011045753955841064 2023-01-22 14:08:23.040758: step: 776/464, loss: 0.0020963868591934443 2023-01-22 14:08:23.693669: step: 778/464, loss: 7.541560649871826 2023-01-22 14:08:24.347168: step: 780/464, loss: 0.006616545375436544 2023-01-22 14:08:24.943770: step: 782/464, loss: 0.0030220819171518087 2023-01-22 14:08:25.542990: step: 784/464, loss: 0.0029843649826943874 2023-01-22 14:08:26.136364: step: 786/464, loss: 0.0005856929928995669 2023-01-22 14:08:26.780318: step: 788/464, loss: 2.3059473037719727 2023-01-22 14:08:27.402875: step: 790/464, loss: 0.0026003012899309397 2023-01-22 14:08:27.997847: step: 792/464, loss: 0.050763748586177826 2023-01-22 14:08:28.645432: step: 794/464, loss: 0.06708303093910217 2023-01-22 14:08:29.272800: step: 796/464, loss: 0.03334089741110802 2023-01-22 14:08:29.894187: step: 798/464, loss: 0.013586047105491161 2023-01-22 14:08:30.536585: step: 800/464, loss: 0.013940623961389065 2023-01-22 14:08:31.267937: step: 802/464, loss: 0.08756569772958755 2023-01-22 14:08:31.850296: step: 804/464, loss: 0.007653203327208757 2023-01-22 14:08:32.486273: step: 806/464, loss: 0.005163044203072786 2023-01-22 14:08:33.138894: step: 808/464, loss: 0.014750438742339611 2023-01-22 14:08:33.832257: step: 810/464, loss: 0.002899068407714367 2023-01-22 14:08:34.429972: step: 812/464, loss: 0.00044651940697804093 2023-01-22 14:08:35.031114: step: 814/464, loss: 0.0063481805846095085 2023-01-22 14:08:35.720911: step: 816/464, loss: 0.027266254648566246 2023-01-22 14:08:36.391579: step: 818/464, loss: 0.0041908444836735725 2023-01-22 14:08:37.044502: step: 820/464, loss: 0.00024215153825934976 2023-01-22 14:08:37.665006: step: 822/464, loss: 0.0004711594374384731 2023-01-22 14:08:38.264861: step: 824/464, loss: 0.042581092566251755 2023-01-22 14:08:38.957222: step: 826/464, loss: 0.012439103797078133 2023-01-22 14:08:39.651145: step: 828/464, loss: 0.029706567525863647 2023-01-22 14:08:40.330603: step: 830/464, loss: 0.019360028207302094 2023-01-22 14:08:40.938596: step: 832/464, loss: 0.014573116786777973 2023-01-22 14:08:41.529443: step: 834/464, loss: 0.010647988878190517 2023-01-22 14:08:42.161084: step: 836/464, loss: 0.0393616147339344 2023-01-22 14:08:42.765287: step: 838/464, loss: 0.003897633170709014 2023-01-22 14:08:43.342306: step: 840/464, loss: 0.0009832321666181087 2023-01-22 14:08:43.974523: step: 842/464, loss: 0.008502716198563576 2023-01-22 14:08:44.619900: step: 844/464, loss: 0.0011305073276162148 2023-01-22 14:08:45.255981: step: 846/464, loss: 0.02720501646399498 2023-01-22 14:08:45.802216: step: 848/464, loss: 0.02629738114774227 2023-01-22 14:08:46.355116: step: 850/464, loss: 0.012617183849215508 2023-01-22 14:08:46.976968: step: 852/464, loss: 0.03479094058275223 2023-01-22 14:08:47.561603: step: 854/464, loss: 0.030419880524277687 2023-01-22 14:08:48.148520: step: 856/464, loss: 0.0031641533132642508 2023-01-22 14:08:48.787377: step: 858/464, loss: 0.007085779681801796 2023-01-22 14:08:49.409803: step: 860/464, loss: 0.0025283698923885822 2023-01-22 14:08:50.066107: step: 862/464, loss: 0.07772478461265564 2023-01-22 14:08:50.723000: step: 864/464, loss: 0.03700843080878258 2023-01-22 14:08:51.322070: step: 866/464, loss: 0.0037723893765360117 2023-01-22 14:08:51.925580: step: 868/464, loss: 0.0020687098149210215 2023-01-22 14:08:52.529328: step: 870/464, loss: 0.00994616188108921 2023-01-22 14:08:53.111220: step: 872/464, loss: 0.00024091260274872184 2023-01-22 14:08:53.771376: step: 874/464, loss: 0.00023713010887149721 2023-01-22 14:08:54.415666: step: 876/464, loss: 0.0046511306427419186 2023-01-22 14:08:54.975331: step: 878/464, loss: 9.003627383208368e-06 2023-01-22 14:08:55.562587: step: 880/464, loss: 0.32005923986434937 2023-01-22 14:08:56.203446: step: 882/464, loss: 0.07506310939788818 2023-01-22 14:08:56.779582: step: 884/464, loss: 0.017441291362047195 2023-01-22 14:08:57.390953: step: 886/464, loss: 0.03993603214621544 2023-01-22 14:08:57.981185: step: 888/464, loss: 0.0002836991334334016 2023-01-22 14:08:58.671673: step: 890/464, loss: 0.0018918365240097046 2023-01-22 14:08:59.351696: step: 892/464, loss: 0.03895119950175285 2023-01-22 14:09:00.070506: step: 894/464, loss: 0.10691957920789719 2023-01-22 14:09:00.651669: step: 896/464, loss: 0.0010565068805590272 2023-01-22 14:09:01.283924: step: 898/464, loss: 0.0009570553665980697 2023-01-22 14:09:01.928456: step: 900/464, loss: 0.026494259014725685 2023-01-22 14:09:02.595833: step: 902/464, loss: 0.00028691813349723816 2023-01-22 14:09:03.206325: step: 904/464, loss: 0.084366574883461 2023-01-22 14:09:03.857108: step: 906/464, loss: 0.007411717902868986 2023-01-22 14:09:04.509165: step: 908/464, loss: 0.010169975459575653 2023-01-22 14:09:05.125151: step: 910/464, loss: 0.007501612417399883 2023-01-22 14:09:05.748032: step: 912/464, loss: 0.03972383588552475 2023-01-22 14:09:06.352808: step: 914/464, loss: 0.0008084288565441966 2023-01-22 14:09:07.036442: step: 916/464, loss: 0.07327800244092941 2023-01-22 14:09:07.707449: step: 918/464, loss: 0.030206453055143356 2023-01-22 14:09:08.312214: step: 920/464, loss: 0.0031599088106304407 2023-01-22 14:09:08.901737: step: 922/464, loss: 0.005930094514042139 2023-01-22 14:09:09.530760: step: 924/464, loss: 0.0011120139388367534 2023-01-22 14:09:10.191905: step: 926/464, loss: 0.036906830966472626 2023-01-22 14:09:10.854650: step: 928/464, loss: 0.002705650869756937 2023-01-22 14:09:11.335199: step: 930/464, loss: 0.000839819957036525 ================================================== Loss: 0.062 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30269053601340035, 'r': 0.342896110056926, 'f1': 0.3215413701067616}, 'combined': 0.23692522007866643, 'epoch': 35} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2972319233153357, 'r': 0.32998475233079966, 'f1': 0.31275317257462215}, 'combined': 0.2041808276912041, 'epoch': 35} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2873310810810811, 'r': 0.34294354838709673, 'f1': 0.31268382352941176}, 'combined': 0.2303986068111455, 'epoch': 35} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3122661100492581, 'r': 0.33293077909663543, 'f1': 0.32226751570919876}, 'combined': 0.2103922641417567, 'epoch': 35} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3062874455337691, 'r': 0.3556886464263125, 'f1': 0.3291447175885279}, 'combined': 0.24252768664417845, 'epoch': 35} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30242058923650983, 'r': 0.3238235169674026, 'f1': 0.3127563128810777}, 'combined': 0.20418287783946004, 'epoch': 35} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2205128205128205, 'r': 0.4095238095238095, 'f1': 0.2866666666666667}, 'combined': 0.19111111111111112, 'epoch': 35} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.21808510638297873, 'r': 0.44565217391304346, 'f1': 0.29285714285714287}, 'combined': 0.14642857142857144, 'epoch': 35} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3977272727272727, 'r': 0.3017241379310345, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 35} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951249161636486, 'r': 0.3578459609650312, 'f1': 0.3234731070815977}, 'combined': 0.23834860521801934, 'epoch': 22} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29521273154881944, 'r': 0.3185261336729686, 'f1': 0.30642664272956077}, 'combined': 0.20005055432085314, 'epoch': 22} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 22} ****************************** Epoch: 36 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:11:49.418009: step: 2/464, loss: 0.0036940753925591707 2023-01-22 14:11:49.998249: step: 4/464, loss: 0.04975832626223564 2023-01-22 14:11:50.589060: step: 6/464, loss: 0.0008109904592856765 2023-01-22 14:11:51.212920: step: 8/464, loss: 0.0008208010112866759 2023-01-22 14:11:51.800294: step: 10/464, loss: 0.0042820945382118225 2023-01-22 14:11:52.453322: step: 12/464, loss: 0.0039556315168738365 2023-01-22 14:11:53.090029: step: 14/464, loss: 0.02546033449470997 2023-01-22 14:11:53.729507: step: 16/464, loss: 0.00010900833876803517 2023-01-22 14:11:54.284920: step: 18/464, loss: 0.002483731135725975 2023-01-22 14:11:54.868277: step: 20/464, loss: 0.00024638380273245275 2023-01-22 14:11:55.606874: step: 22/464, loss: 0.0012883706949651241 2023-01-22 14:11:56.194354: step: 24/464, loss: 0.0003679947112686932 2023-01-22 14:11:56.805089: step: 26/464, loss: 0.08963681757450104 2023-01-22 14:11:57.403114: step: 28/464, loss: 0.04453660547733307 2023-01-22 14:11:57.974834: step: 30/464, loss: 0.0038836635649204254 2023-01-22 14:11:58.566038: step: 32/464, loss: 0.005359238479286432 2023-01-22 14:11:59.178627: step: 34/464, loss: 0.05803626403212547 2023-01-22 14:11:59.831415: step: 36/464, loss: 0.003457083133980632 2023-01-22 14:12:00.470109: step: 38/464, loss: 0.20208704471588135 2023-01-22 14:12:01.076173: step: 40/464, loss: 0.04237399250268936 2023-01-22 14:12:01.699883: step: 42/464, loss: 0.0036644453648477793 2023-01-22 14:12:02.365052: step: 44/464, loss: 0.002522712107747793 2023-01-22 14:12:02.948339: step: 46/464, loss: 0.000686106679495424 2023-01-22 14:12:03.580222: step: 48/464, loss: 0.021788040176033974 2023-01-22 14:12:04.232131: step: 50/464, loss: 0.0007058007759042084 2023-01-22 14:12:04.847094: step: 52/464, loss: 5.509668335434981e-05 2023-01-22 14:12:05.500152: step: 54/464, loss: 0.02686609886586666 2023-01-22 14:12:06.119398: step: 56/464, loss: 0.0005029297899454832 2023-01-22 14:12:06.830286: step: 58/464, loss: 0.054092515259981155 2023-01-22 14:12:07.406027: step: 60/464, loss: 0.0016269775805994868 2023-01-22 14:12:08.039169: step: 62/464, loss: 0.03045477904379368 2023-01-22 14:12:08.674744: step: 64/464, loss: 0.0004281103319954127 2023-01-22 14:12:09.337255: step: 66/464, loss: 0.018816521391272545 2023-01-22 14:12:09.920396: step: 68/464, loss: 0.0003489804803393781 2023-01-22 14:12:10.535084: step: 70/464, loss: 0.001230527414008975 2023-01-22 14:12:11.138472: step: 72/464, loss: 0.0004906057729385793 2023-01-22 14:12:11.787509: step: 74/464, loss: 0.0012830095365643501 2023-01-22 14:12:12.382793: step: 76/464, loss: 0.03462563455104828 2023-01-22 14:12:13.025492: step: 78/464, loss: 0.0034266517031937838 2023-01-22 14:12:13.632780: step: 80/464, loss: 0.004816330503672361 2023-01-22 14:12:14.263350: step: 82/464, loss: 0.004266914911568165 2023-01-22 14:12:14.879154: step: 84/464, loss: 0.017530955374240875 2023-01-22 14:12:15.449863: step: 86/464, loss: 0.025057358667254448 2023-01-22 14:12:16.047394: step: 88/464, loss: 0.05076591297984123 2023-01-22 14:12:16.671619: step: 90/464, loss: 0.00010926132381428033 2023-01-22 14:12:17.290250: step: 92/464, loss: 0.0014297146117314696 2023-01-22 14:12:17.832729: step: 94/464, loss: 0.001774539239704609 2023-01-22 14:12:18.481363: step: 96/464, loss: 0.013956896960735321 2023-01-22 14:12:19.081756: step: 98/464, loss: 0.02583293244242668 2023-01-22 14:12:19.689956: step: 100/464, loss: 0.0030617262236773968 2023-01-22 14:12:20.239966: step: 102/464, loss: 0.003205106593668461 2023-01-22 14:12:20.847962: step: 104/464, loss: 0.0025927547831088305 2023-01-22 14:12:21.461691: step: 106/464, loss: 0.01715610921382904 2023-01-22 14:12:22.107670: step: 108/464, loss: 0.0008542435825802386 2023-01-22 14:12:22.710321: step: 110/464, loss: 2.9173745588195743e-06 2023-01-22 14:12:23.373495: step: 112/464, loss: 0.03331207111477852 2023-01-22 14:12:23.991314: step: 114/464, loss: 0.04286523163318634 2023-01-22 14:12:24.621177: step: 116/464, loss: 0.04366404190659523 2023-01-22 14:12:25.288186: step: 118/464, loss: 0.04296250268816948 2023-01-22 14:12:25.887316: step: 120/464, loss: 0.5016422867774963 2023-01-22 14:12:26.508334: step: 122/464, loss: 0.006844162475317717 2023-01-22 14:12:27.088383: step: 124/464, loss: 0.008258841931819916 2023-01-22 14:12:27.795004: step: 126/464, loss: 0.005316090304404497 2023-01-22 14:12:28.341827: step: 128/464, loss: 0.00038320745807141066 2023-01-22 14:12:28.936506: step: 130/464, loss: 0.009655006229877472 2023-01-22 14:12:29.630864: step: 132/464, loss: 0.0001869757688837126 2023-01-22 14:12:30.224630: step: 134/464, loss: 0.008668635971844196 2023-01-22 14:12:30.819160: step: 136/464, loss: 0.001098167267628014 2023-01-22 14:12:31.444542: step: 138/464, loss: 0.003017690032720566 2023-01-22 14:12:32.061692: step: 140/464, loss: 0.0026568910107016563 2023-01-22 14:12:32.715180: step: 142/464, loss: 0.002854824997484684 2023-01-22 14:12:33.345835: step: 144/464, loss: 3.517479126458056e-05 2023-01-22 14:12:33.947316: step: 146/464, loss: 0.00023735500872135162 2023-01-22 14:12:34.577510: step: 148/464, loss: 0.0016523152589797974 2023-01-22 14:12:35.197290: step: 150/464, loss: 0.02731013298034668 2023-01-22 14:12:35.849042: step: 152/464, loss: 0.02933180145919323 2023-01-22 14:12:36.443716: step: 154/464, loss: 0.00342610664665699 2023-01-22 14:12:37.023673: step: 156/464, loss: 0.5142154693603516 2023-01-22 14:12:37.627948: step: 158/464, loss: 0.020955195650458336 2023-01-22 14:12:38.184974: step: 160/464, loss: 0.0027112129610031843 2023-01-22 14:12:38.844257: step: 162/464, loss: 0.034296292811632156 2023-01-22 14:12:39.490619: step: 164/464, loss: 0.0006291309255175292 2023-01-22 14:12:40.089295: step: 166/464, loss: 0.003323235781863332 2023-01-22 14:12:40.696912: step: 168/464, loss: 0.0009546473156660795 2023-01-22 14:12:41.302477: step: 170/464, loss: 0.0013110835570842028 2023-01-22 14:12:41.915195: step: 172/464, loss: 0.0026694394182413816 2023-01-22 14:12:42.534664: step: 174/464, loss: 0.010749605484306812 2023-01-22 14:12:43.184247: step: 176/464, loss: 0.2029525488615036 2023-01-22 14:12:43.883753: step: 178/464, loss: 0.01791108027100563 2023-01-22 14:12:44.609794: step: 180/464, loss: 0.0067817047238349915 2023-01-22 14:12:45.171382: step: 182/464, loss: 0.014205916784703732 2023-01-22 14:12:45.843179: step: 184/464, loss: 0.017666643485426903 2023-01-22 14:12:46.414874: step: 186/464, loss: 2.091298119921703e-05 2023-01-22 14:12:47.041446: step: 188/464, loss: 0.011905474588274956 2023-01-22 14:12:47.619677: step: 190/464, loss: 0.07707890123128891 2023-01-22 14:12:48.227670: step: 192/464, loss: 0.004543210845440626 2023-01-22 14:12:48.832483: step: 194/464, loss: 0.012926164083182812 2023-01-22 14:12:49.420001: step: 196/464, loss: 0.01449363213032484 2023-01-22 14:12:49.989493: step: 198/464, loss: 0.00012760139361489564 2023-01-22 14:12:50.588678: step: 200/464, loss: 0.005846938583999872 2023-01-22 14:12:51.229571: step: 202/464, loss: 0.0015401836717501283 2023-01-22 14:12:51.856649: step: 204/464, loss: 0.0009976581204682589 2023-01-22 14:12:52.402285: step: 206/464, loss: 0.004876827355474234 2023-01-22 14:12:53.017945: step: 208/464, loss: 0.010364729911088943 2023-01-22 14:12:53.737807: step: 210/464, loss: 0.3535803258419037 2023-01-22 14:12:54.387479: step: 212/464, loss: 0.09206904470920563 2023-01-22 14:12:55.086325: step: 214/464, loss: 0.011944558471441269 2023-01-22 14:12:55.681926: step: 216/464, loss: 0.008668388240039349 2023-01-22 14:12:56.318404: step: 218/464, loss: 6.387718167388812e-05 2023-01-22 14:12:56.979915: step: 220/464, loss: 0.005396423861384392 2023-01-22 14:12:57.769299: step: 222/464, loss: 0.0007495448808185756 2023-01-22 14:12:58.452003: step: 224/464, loss: 0.01975761540234089 2023-01-22 14:12:58.999226: step: 226/464, loss: 0.00019024180073756725 2023-01-22 14:12:59.662122: step: 228/464, loss: 0.0291658453643322 2023-01-22 14:13:00.364634: step: 230/464, loss: 0.0006478412542492151 2023-01-22 14:13:01.047219: step: 232/464, loss: 0.03055237978696823 2023-01-22 14:13:01.682093: step: 234/464, loss: 0.0791534036397934 2023-01-22 14:13:02.343176: step: 236/464, loss: 0.013012934476137161 2023-01-22 14:13:02.954740: step: 238/464, loss: 0.0001017287650029175 2023-01-22 14:13:03.616751: step: 240/464, loss: 0.0075666275806725025 2023-01-22 14:13:04.214465: step: 242/464, loss: 0.008738645352423191 2023-01-22 14:13:04.903970: step: 244/464, loss: 7.892473513493314e-05 2023-01-22 14:13:05.546113: step: 246/464, loss: 0.0020362145733088255 2023-01-22 14:13:06.244829: step: 248/464, loss: 0.0003480454906821251 2023-01-22 14:13:06.827380: step: 250/464, loss: 0.01788203977048397 2023-01-22 14:13:07.524464: step: 252/464, loss: 0.0037269634194672108 2023-01-22 14:13:08.061651: step: 254/464, loss: 0.00046149862464517355 2023-01-22 14:13:08.688206: step: 256/464, loss: 0.018515929579734802 2023-01-22 14:13:09.280412: step: 258/464, loss: 0.024124393239617348 2023-01-22 14:13:09.883176: step: 260/464, loss: 0.006267345976084471 2023-01-22 14:13:10.497041: step: 262/464, loss: 0.1672067642211914 2023-01-22 14:13:11.144870: step: 264/464, loss: 0.04586471989750862 2023-01-22 14:13:11.706091: step: 266/464, loss: 0.0006315509090200067 2023-01-22 14:13:12.308120: step: 268/464, loss: 0.019157512113451958 2023-01-22 14:13:13.025634: step: 270/464, loss: 0.025100653991103172 2023-01-22 14:13:13.660252: step: 272/464, loss: 0.011103455908596516 2023-01-22 14:13:14.298291: step: 274/464, loss: 0.007256446406245232 2023-01-22 14:13:14.980524: step: 276/464, loss: 0.00605237390846014 2023-01-22 14:13:15.571189: step: 278/464, loss: 0.004968108143657446 2023-01-22 14:13:16.247591: step: 280/464, loss: 0.19366006553173065 2023-01-22 14:13:16.895656: step: 282/464, loss: 0.025342747569084167 2023-01-22 14:13:17.535412: step: 284/464, loss: 0.09072583168745041 2023-01-22 14:13:18.140099: step: 286/464, loss: 0.009893305599689484 2023-01-22 14:13:18.732171: step: 288/464, loss: 9.708423749543726e-05 2023-01-22 14:13:19.339850: step: 290/464, loss: 0.004454748705029488 2023-01-22 14:13:19.939561: step: 292/464, loss: 0.0009248661808669567 2023-01-22 14:13:20.577825: step: 294/464, loss: 0.01104151550680399 2023-01-22 14:13:21.137195: step: 296/464, loss: 0.020130300894379616 2023-01-22 14:13:21.751715: step: 298/464, loss: 0.08098264038562775 2023-01-22 14:13:22.362630: step: 300/464, loss: 0.00018765582353807986 2023-01-22 14:13:23.009260: step: 302/464, loss: 0.00038301621680147946 2023-01-22 14:13:23.628843: step: 304/464, loss: 0.03141546621918678 2023-01-22 14:13:24.265227: step: 306/464, loss: 0.016768259927630424 2023-01-22 14:13:24.949371: step: 308/464, loss: 0.01261827815324068 2023-01-22 14:13:25.589548: step: 310/464, loss: 0.016717633232474327 2023-01-22 14:13:26.240117: step: 312/464, loss: 0.035504184663295746 2023-01-22 14:13:26.827409: step: 314/464, loss: 0.006222328171133995 2023-01-22 14:13:27.453967: step: 316/464, loss: 0.018137024715542793 2023-01-22 14:13:28.016598: step: 318/464, loss: 0.020232651382684708 2023-01-22 14:13:28.574711: step: 320/464, loss: 0.0003459895960986614 2023-01-22 14:13:29.161150: step: 322/464, loss: 0.0039661722257733345 2023-01-22 14:13:29.782530: step: 324/464, loss: 0.01878402568399906 2023-01-22 14:13:30.447308: step: 326/464, loss: 0.3380680978298187 2023-01-22 14:13:31.044192: step: 328/464, loss: 0.015422080643475056 2023-01-22 14:13:31.636423: step: 330/464, loss: 0.0011237740982323885 2023-01-22 14:13:32.322886: step: 332/464, loss: 0.006381748244166374 2023-01-22 14:13:32.902624: step: 334/464, loss: 0.0002370486909057945 2023-01-22 14:13:33.522237: step: 336/464, loss: 0.009715653955936432 2023-01-22 14:13:34.153570: step: 338/464, loss: 0.017769871279597282 2023-01-22 14:13:34.741313: step: 340/464, loss: 0.0009704561671242118 2023-01-22 14:13:35.319922: step: 342/464, loss: 6.408966146409512e-05 2023-01-22 14:13:35.954390: step: 344/464, loss: 0.00027266753022558987 2023-01-22 14:13:36.645465: step: 346/464, loss: 0.0036946088075637817 2023-01-22 14:13:37.268738: step: 348/464, loss: 0.02732905186712742 2023-01-22 14:13:37.914661: step: 350/464, loss: 0.004943967796862125 2023-01-22 14:13:38.584319: step: 352/464, loss: 0.341863214969635 2023-01-22 14:13:39.208107: step: 354/464, loss: 0.007155687548220158 2023-01-22 14:13:39.860810: step: 356/464, loss: 0.025890706107020378 2023-01-22 14:13:40.606152: step: 358/464, loss: 0.002095653209835291 2023-01-22 14:13:41.225310: step: 360/464, loss: 0.02985740266740322 2023-01-22 14:13:41.831972: step: 362/464, loss: 3.325389843666926e-05 2023-01-22 14:13:42.484251: step: 364/464, loss: 0.006313004996627569 2023-01-22 14:13:43.126715: step: 366/464, loss: 1.2024905681610107 2023-01-22 14:13:43.740041: step: 368/464, loss: 0.0041957120411098 2023-01-22 14:13:44.391160: step: 370/464, loss: 0.00036565324990078807 2023-01-22 14:13:44.965944: step: 372/464, loss: 8.443810656899586e-05 2023-01-22 14:13:45.606448: step: 374/464, loss: 0.016036560758948326 2023-01-22 14:13:46.288353: step: 376/464, loss: 0.003646174678578973 2023-01-22 14:13:46.924914: step: 378/464, loss: 0.006669667083770037 2023-01-22 14:13:47.497625: step: 380/464, loss: 0.005503546912223101 2023-01-22 14:13:48.100851: step: 382/464, loss: 0.006535328924655914 2023-01-22 14:13:48.783416: step: 384/464, loss: 0.027504896745085716 2023-01-22 14:13:49.418190: step: 386/464, loss: 0.001106930780224502 2023-01-22 14:13:50.085292: step: 388/464, loss: 0.02494746819138527 2023-01-22 14:13:50.713570: step: 390/464, loss: 0.00857547391206026 2023-01-22 14:13:51.330993: step: 392/464, loss: 0.0018851346103474498 2023-01-22 14:13:51.903633: step: 394/464, loss: 2.1371150069171563e-05 2023-01-22 14:13:52.477360: step: 396/464, loss: 0.2606925964355469 2023-01-22 14:13:53.136680: step: 398/464, loss: 0.03222309798002243 2023-01-22 14:13:53.715436: step: 400/464, loss: 0.00016328068159054965 2023-01-22 14:13:54.332953: step: 402/464, loss: 0.01708224229514599 2023-01-22 14:13:54.940323: step: 404/464, loss: 0.010649572126567364 2023-01-22 14:13:55.564316: step: 406/464, loss: 0.011210165917873383 2023-01-22 14:13:56.222244: step: 408/464, loss: 0.02789386548101902 2023-01-22 14:13:56.766469: step: 410/464, loss: 0.001550469663925469 2023-01-22 14:13:57.403933: step: 412/464, loss: 0.002343697240576148 2023-01-22 14:13:58.012388: step: 414/464, loss: 0.0039685931988060474 2023-01-22 14:13:58.606986: step: 416/464, loss: 0.002458785893395543 2023-01-22 14:13:59.260960: step: 418/464, loss: 0.004095163196325302 2023-01-22 14:13:59.863553: step: 420/464, loss: 0.00020929214952047914 2023-01-22 14:14:00.525617: step: 422/464, loss: 0.11573031544685364 2023-01-22 14:14:01.152167: step: 424/464, loss: 0.015518685802817345 2023-01-22 14:14:01.774625: step: 426/464, loss: 0.010964400134980679 2023-01-22 14:14:02.316373: step: 428/464, loss: 5.291239722282626e-05 2023-01-22 14:14:02.942041: step: 430/464, loss: 0.033461663872003555 2023-01-22 14:14:03.591420: step: 432/464, loss: 0.05376812815666199 2023-01-22 14:14:04.238816: step: 434/464, loss: 0.0001575053174747154 2023-01-22 14:14:04.832981: step: 436/464, loss: 0.015117555856704712 2023-01-22 14:14:05.453987: step: 438/464, loss: 0.01067406591027975 2023-01-22 14:14:06.064372: step: 440/464, loss: 0.006663429085165262 2023-01-22 14:14:06.728806: step: 442/464, loss: 0.00010586978169158101 2023-01-22 14:14:07.353722: step: 444/464, loss: 0.016213309019804 2023-01-22 14:14:07.992150: step: 446/464, loss: 0.02754809707403183 2023-01-22 14:14:08.536058: step: 448/464, loss: 0.0036638882011175156 2023-01-22 14:14:09.171405: step: 450/464, loss: 0.013981817290186882 2023-01-22 14:14:09.848890: step: 452/464, loss: 0.0026171025820076466 2023-01-22 14:14:10.449228: step: 454/464, loss: 0.051369551569223404 2023-01-22 14:14:11.027491: step: 456/464, loss: 0.0019740727730095387 2023-01-22 14:14:11.686760: step: 458/464, loss: 0.4339074492454529 2023-01-22 14:14:12.338851: step: 460/464, loss: 0.001750551862642169 2023-01-22 14:14:12.980143: step: 462/464, loss: 0.0016651484183967113 2023-01-22 14:14:13.635878: step: 464/464, loss: 0.0006342732231132686 2023-01-22 14:14:14.298058: step: 466/464, loss: 3.723198413848877 2023-01-22 14:14:14.890923: step: 468/464, loss: 0.018522344529628754 2023-01-22 14:14:15.474309: step: 470/464, loss: 0.03673872724175453 2023-01-22 14:14:16.111233: step: 472/464, loss: 0.0006409911438822746 2023-01-22 14:14:16.729661: step: 474/464, loss: 0.00557336863130331 2023-01-22 14:14:17.407803: step: 476/464, loss: 0.010654748417437077 2023-01-22 14:14:18.026048: step: 478/464, loss: 0.02702816016972065 2023-01-22 14:14:18.653775: step: 480/464, loss: 0.0016854925779625773 2023-01-22 14:14:19.234109: step: 482/464, loss: 0.014287048950791359 2023-01-22 14:14:19.860311: step: 484/464, loss: 0.003183132503181696 2023-01-22 14:14:20.548768: step: 486/464, loss: 0.019391268491744995 2023-01-22 14:14:21.134218: step: 488/464, loss: 0.0006723207188770175 2023-01-22 14:14:21.763139: step: 490/464, loss: 0.0011718154419213533 2023-01-22 14:14:22.351390: step: 492/464, loss: 0.014938319101929665 2023-01-22 14:14:23.015474: step: 494/464, loss: 0.09425222128629684 2023-01-22 14:14:23.643135: step: 496/464, loss: 0.03835434466600418 2023-01-22 14:14:24.324438: step: 498/464, loss: 0.09424697607755661 2023-01-22 14:14:25.017750: step: 500/464, loss: 0.0001944851828739047 2023-01-22 14:14:25.587425: step: 502/464, loss: 0.12420111894607544 2023-01-22 14:14:26.167173: step: 504/464, loss: 0.0020259881857782602 2023-01-22 14:14:26.750031: step: 506/464, loss: 0.08971814811229706 2023-01-22 14:14:27.352407: step: 508/464, loss: 0.015696369111537933 2023-01-22 14:14:27.949527: step: 510/464, loss: 0.02257942035794258 2023-01-22 14:14:28.538676: step: 512/464, loss: 0.004465331323444843 2023-01-22 14:14:29.159818: step: 514/464, loss: 0.0001645921729505062 2023-01-22 14:14:29.751539: step: 516/464, loss: 0.00840042158961296 2023-01-22 14:14:30.459408: step: 518/464, loss: 0.0004073931195307523 2023-01-22 14:14:31.122775: step: 520/464, loss: 0.0217901561409235 2023-01-22 14:14:31.772648: step: 522/464, loss: 0.12779176235198975 2023-01-22 14:14:32.444676: step: 524/464, loss: 0.001443426706828177 2023-01-22 14:14:33.139009: step: 526/464, loss: 0.020604323595762253 2023-01-22 14:14:33.776474: step: 528/464, loss: 0.006679283920675516 2023-01-22 14:14:34.425026: step: 530/464, loss: 1.2766868167091161e-05 2023-01-22 14:14:35.164348: step: 532/464, loss: 0.001376515720039606 2023-01-22 14:14:35.795006: step: 534/464, loss: 0.050487276166677475 2023-01-22 14:14:36.380987: step: 536/464, loss: 0.0036363143008202314 2023-01-22 14:14:36.995250: step: 538/464, loss: 0.010122239589691162 2023-01-22 14:14:37.674799: step: 540/464, loss: 0.0013998758513480425 2023-01-22 14:14:38.391838: step: 542/464, loss: 0.0008087092428468168 2023-01-22 14:14:38.970874: step: 544/464, loss: 0.004818467888981104 2023-01-22 14:14:39.594354: step: 546/464, loss: 0.015545015223324299 2023-01-22 14:14:40.263907: step: 548/464, loss: 0.014555670320987701 2023-01-22 14:14:40.885107: step: 550/464, loss: 0.003583703190088272 2023-01-22 14:14:41.541373: step: 552/464, loss: 0.005766857415437698 2023-01-22 14:14:42.166053: step: 554/464, loss: 0.02280147559940815 2023-01-22 14:14:42.775758: step: 556/464, loss: 0.0035246696788817644 2023-01-22 14:14:43.442665: step: 558/464, loss: 0.02302168682217598 2023-01-22 14:14:44.044159: step: 560/464, loss: 0.004515378270298243 2023-01-22 14:14:44.621719: step: 562/464, loss: 0.008165445178747177 2023-01-22 14:14:45.229160: step: 564/464, loss: 0.00960410013794899 2023-01-22 14:14:45.818076: step: 566/464, loss: 0.00022214172349777073 2023-01-22 14:14:46.430397: step: 568/464, loss: 0.012886843644082546 2023-01-22 14:14:47.108005: step: 570/464, loss: 0.0118499044328928 2023-01-22 14:14:47.711350: step: 572/464, loss: 0.014691988937556744 2023-01-22 14:14:48.336457: step: 574/464, loss: 0.009075475856661797 2023-01-22 14:14:49.025803: step: 576/464, loss: 0.062365129590034485 2023-01-22 14:14:49.629751: step: 578/464, loss: 0.010842734016478062 2023-01-22 14:14:50.352839: step: 580/464, loss: 0.007636451628059149 2023-01-22 14:14:51.019526: step: 582/464, loss: 0.033298321068286896 2023-01-22 14:14:51.614714: step: 584/464, loss: 0.02356777898967266 2023-01-22 14:14:52.230987: step: 586/464, loss: 0.0002865640271920711 2023-01-22 14:14:52.840138: step: 588/464, loss: 0.13140997290611267 2023-01-22 14:14:53.438384: step: 590/464, loss: 0.003056851914152503 2023-01-22 14:14:54.073781: step: 592/464, loss: 0.0005417139618657529 2023-01-22 14:14:54.731069: step: 594/464, loss: 0.002889784285798669 2023-01-22 14:14:55.371796: step: 596/464, loss: 0.012304414063692093 2023-01-22 14:14:56.000187: step: 598/464, loss: 0.0003184415982104838 2023-01-22 14:14:56.636564: step: 600/464, loss: 0.005693783052265644 2023-01-22 14:14:57.292737: step: 602/464, loss: 0.0038007793482393026 2023-01-22 14:14:57.895738: step: 604/464, loss: 0.04014163836836815 2023-01-22 14:14:58.475908: step: 606/464, loss: 0.0021637456957250834 2023-01-22 14:14:59.107582: step: 608/464, loss: 0.049259163439273834 2023-01-22 14:14:59.740187: step: 610/464, loss: 0.0007301790756173432 2023-01-22 14:15:00.329632: step: 612/464, loss: 0.0011699952883645892 2023-01-22 14:15:00.918609: step: 614/464, loss: 0.011053789407014847 2023-01-22 14:15:01.547430: step: 616/464, loss: 0.010029182769358158 2023-01-22 14:15:02.122882: step: 618/464, loss: 0.0029339087195694447 2023-01-22 14:15:02.748455: step: 620/464, loss: 0.03637511283159256 2023-01-22 14:15:03.355008: step: 622/464, loss: 0.00411232328042388 2023-01-22 14:15:03.970245: step: 624/464, loss: 0.001893992186523974 2023-01-22 14:15:04.578111: step: 626/464, loss: 0.005734010133892298 2023-01-22 14:15:05.197353: step: 628/464, loss: 0.0007987542194314301 2023-01-22 14:15:05.856535: step: 630/464, loss: 0.0362277552485466 2023-01-22 14:15:06.439132: step: 632/464, loss: 0.0009814549703150988 2023-01-22 14:15:07.040884: step: 634/464, loss: 0.01007351465523243 2023-01-22 14:15:07.597166: step: 636/464, loss: 0.00042207157821394503 2023-01-22 14:15:08.181853: step: 638/464, loss: 0.028407089412212372 2023-01-22 14:15:08.847270: step: 640/464, loss: 0.005203355569392443 2023-01-22 14:15:09.599885: step: 642/464, loss: 0.0018461854197084904 2023-01-22 14:15:10.260248: step: 644/464, loss: 0.004764284007251263 2023-01-22 14:15:10.943968: step: 646/464, loss: 0.039781831204891205 2023-01-22 14:15:11.520138: step: 648/464, loss: 0.2181847244501114 2023-01-22 14:15:12.155126: step: 650/464, loss: 0.044329460710287094 2023-01-22 14:15:12.740038: step: 652/464, loss: 0.018158644437789917 2023-01-22 14:15:13.353826: step: 654/464, loss: 0.3767031729221344 2023-01-22 14:15:14.004756: step: 656/464, loss: 0.0006728554726578295 2023-01-22 14:15:14.615421: step: 658/464, loss: 0.00984268169850111 2023-01-22 14:15:15.201081: step: 660/464, loss: 0.004165737424045801 2023-01-22 14:15:15.870354: step: 662/464, loss: 0.005901847034692764 2023-01-22 14:15:16.477833: step: 664/464, loss: 0.005652338732033968 2023-01-22 14:15:17.087161: step: 666/464, loss: 0.01282537542283535 2023-01-22 14:15:17.762190: step: 668/464, loss: 0.024484090507030487 2023-01-22 14:15:18.361899: step: 670/464, loss: 0.06346960365772247 2023-01-22 14:15:18.966314: step: 672/464, loss: 0.0004375589778646827 2023-01-22 14:15:19.606444: step: 674/464, loss: 0.010980060324072838 2023-01-22 14:15:20.220250: step: 676/464, loss: 0.0042953877709805965 2023-01-22 14:15:20.831209: step: 678/464, loss: 0.40047112107276917 2023-01-22 14:15:21.448953: step: 680/464, loss: 0.013633492402732372 2023-01-22 14:15:22.077129: step: 682/464, loss: 0.0179133340716362 2023-01-22 14:15:22.723207: step: 684/464, loss: 0.0058382549323141575 2023-01-22 14:15:23.353816: step: 686/464, loss: 0.012440420687198639 2023-01-22 14:15:23.962869: step: 688/464, loss: 0.002699640579521656 2023-01-22 14:15:24.725605: step: 690/464, loss: 0.07426819205284119 2023-01-22 14:15:25.410011: step: 692/464, loss: 2.1392199993133545 2023-01-22 14:15:26.019726: step: 694/464, loss: 0.0050012702122330666 2023-01-22 14:15:26.739471: step: 696/464, loss: 0.020631911233067513 2023-01-22 14:15:27.357861: step: 698/464, loss: 0.0006646870751865208 2023-01-22 14:15:28.036299: step: 700/464, loss: 0.0987074002623558 2023-01-22 14:15:28.656881: step: 702/464, loss: 0.008953984826803207 2023-01-22 14:15:29.259636: step: 704/464, loss: 0.007717052940279245 2023-01-22 14:15:30.009400: step: 706/464, loss: 0.007053047884255648 2023-01-22 14:15:30.640743: step: 708/464, loss: 0.004249035846441984 2023-01-22 14:15:31.268460: step: 710/464, loss: 0.002700645476579666 2023-01-22 14:15:31.879328: step: 712/464, loss: 0.11764708906412125 2023-01-22 14:15:32.499181: step: 714/464, loss: 0.002235703868791461 2023-01-22 14:15:33.139013: step: 716/464, loss: 0.0013340015430003405 2023-01-22 14:15:33.772571: step: 718/464, loss: 0.031899593770504 2023-01-22 14:15:34.393762: step: 720/464, loss: 0.021519597619771957 2023-01-22 14:15:35.040306: step: 722/464, loss: 0.05550776794552803 2023-01-22 14:15:35.674194: step: 724/464, loss: 0.014864136464893818 2023-01-22 14:15:36.295195: step: 726/464, loss: 0.015005495399236679 2023-01-22 14:15:36.923080: step: 728/464, loss: 0.043122515082359314 2023-01-22 14:15:37.556283: step: 730/464, loss: 0.0057805743999779224 2023-01-22 14:15:38.203344: step: 732/464, loss: 0.013059835880994797 2023-01-22 14:15:38.758434: step: 734/464, loss: 0.22827355563640594 2023-01-22 14:15:39.358112: step: 736/464, loss: 0.001594355795532465 2023-01-22 14:15:39.936955: step: 738/464, loss: 0.002286111004650593 2023-01-22 14:15:40.631518: step: 740/464, loss: 0.040832217782735825 2023-01-22 14:15:41.248828: step: 742/464, loss: 0.014757783152163029 2023-01-22 14:15:41.928121: step: 744/464, loss: 0.00119683553930372 2023-01-22 14:15:42.484589: step: 746/464, loss: 0.009748230688273907 2023-01-22 14:15:43.109924: step: 748/464, loss: 0.009451358579099178 2023-01-22 14:15:43.740434: step: 750/464, loss: 0.002357813995331526 2023-01-22 14:15:44.314431: step: 752/464, loss: 0.00039199861930683255 2023-01-22 14:15:45.002394: step: 754/464, loss: 0.0008302823989652097 2023-01-22 14:15:45.663292: step: 756/464, loss: 0.04160595312714577 2023-01-22 14:15:46.264317: step: 758/464, loss: 0.0005140166613273323 2023-01-22 14:15:46.966343: step: 760/464, loss: 0.09402786940336227 2023-01-22 14:15:47.608361: step: 762/464, loss: 0.015791242942214012 2023-01-22 14:15:48.210531: step: 764/464, loss: 0.008117442019283772 2023-01-22 14:15:48.846022: step: 766/464, loss: 0.020854402333498 2023-01-22 14:15:49.483546: step: 768/464, loss: 0.003169047413393855 2023-01-22 14:15:50.094229: step: 770/464, loss: 0.009248015470802784 2023-01-22 14:15:50.718244: step: 772/464, loss: 0.041055675595998764 2023-01-22 14:15:51.323959: step: 774/464, loss: 0.0022171782329678535 2023-01-22 14:15:51.942021: step: 776/464, loss: 0.022929474711418152 2023-01-22 14:15:52.558704: step: 778/464, loss: 0.011255311779677868 2023-01-22 14:15:53.162796: step: 780/464, loss: 0.01118182111531496 2023-01-22 14:15:53.698047: step: 782/464, loss: 4.8654284910298884e-05 2023-01-22 14:15:54.302578: step: 784/464, loss: 0.004450024571269751 2023-01-22 14:15:54.876699: step: 786/464, loss: 0.01964416168630123 2023-01-22 14:15:55.473112: step: 788/464, loss: 0.006778170820325613 2023-01-22 14:15:56.023077: step: 790/464, loss: 0.004299049731343985 2023-01-22 14:15:56.673578: step: 792/464, loss: 0.0021851633209735155 2023-01-22 14:15:57.318064: step: 794/464, loss: 0.020739721134305 2023-01-22 14:15:57.918422: step: 796/464, loss: 0.008872399106621742 2023-01-22 14:15:58.554702: step: 798/464, loss: 0.00020837679039686918 2023-01-22 14:15:59.125282: step: 800/464, loss: 1.2180484533309937 2023-01-22 14:15:59.805774: step: 802/464, loss: 0.0006499432493001223 2023-01-22 14:16:00.355713: step: 804/464, loss: 0.014890742488205433 2023-01-22 14:16:00.981482: step: 806/464, loss: 0.00425117090344429 2023-01-22 14:16:01.596909: step: 808/464, loss: 0.1405792236328125 2023-01-22 14:16:02.229949: step: 810/464, loss: 0.004117715172469616 2023-01-22 14:16:02.825064: step: 812/464, loss: 0.001980512635782361 2023-01-22 14:16:03.456352: step: 814/464, loss: 0.013748962432146072 2023-01-22 14:16:04.046130: step: 816/464, loss: 0.0009049120708368719 2023-01-22 14:16:04.749702: step: 818/464, loss: 0.05674424394965172 2023-01-22 14:16:05.395882: step: 820/464, loss: 0.08134204149246216 2023-01-22 14:16:05.947364: step: 822/464, loss: 0.13224709033966064 2023-01-22 14:16:06.542349: step: 824/464, loss: 0.030569393187761307 2023-01-22 14:16:07.824832: step: 826/464, loss: 0.00033594819251447916 2023-01-22 14:16:08.439192: step: 828/464, loss: 0.024964628741145134 2023-01-22 14:16:09.061312: step: 830/464, loss: 0.0020226610358804464 2023-01-22 14:16:09.687719: step: 832/464, loss: 0.00052282476099208 2023-01-22 14:16:10.321805: step: 834/464, loss: 0.0018711028387770057 2023-01-22 14:16:10.988619: step: 836/464, loss: 0.004274678882211447 2023-01-22 14:16:11.638847: step: 838/464, loss: 0.0010742597514763474 2023-01-22 14:16:12.250889: step: 840/464, loss: 0.008641785010695457 2023-01-22 14:16:12.839529: step: 842/464, loss: 0.0010377444559708238 2023-01-22 14:16:13.513438: step: 844/464, loss: 0.0007217152742668986 2023-01-22 14:16:14.184567: step: 846/464, loss: 0.00023196318943519145 2023-01-22 14:16:14.795561: step: 848/464, loss: 0.012066647410392761 2023-01-22 14:16:15.395982: step: 850/464, loss: 0.0038928319700062275 2023-01-22 14:16:16.073890: step: 852/464, loss: 0.005979029927402735 2023-01-22 14:16:16.687108: step: 854/464, loss: 0.02144569158554077 2023-01-22 14:16:17.359159: step: 856/464, loss: 0.02421189285814762 2023-01-22 14:16:17.978056: step: 858/464, loss: 0.07788847386837006 2023-01-22 14:16:18.613407: step: 860/464, loss: 0.059540193527936935 2023-01-22 14:16:19.209594: step: 862/464, loss: 0.0012361510889604688 2023-01-22 14:16:19.859719: step: 864/464, loss: 0.020145397633314133 2023-01-22 14:16:20.571520: step: 866/464, loss: 0.007799180690199137 2023-01-22 14:16:21.186460: step: 868/464, loss: 0.0013386164791882038 2023-01-22 14:16:21.803540: step: 870/464, loss: 0.014053912833333015 2023-01-22 14:16:22.352899: step: 872/464, loss: 0.01760275289416313 2023-01-22 14:16:23.012247: step: 874/464, loss: 0.0513724610209465 2023-01-22 14:16:23.581034: step: 876/464, loss: 0.0005567611078731716 2023-01-22 14:16:24.169252: step: 878/464, loss: 0.01748538762331009 2023-01-22 14:16:24.769659: step: 880/464, loss: 0.047362834215164185 2023-01-22 14:16:25.328875: step: 882/464, loss: 0.00245377654209733 2023-01-22 14:16:25.906690: step: 884/464, loss: 0.005447067320346832 2023-01-22 14:16:26.486863: step: 886/464, loss: 0.0004946636036038399 2023-01-22 14:16:27.139801: step: 888/464, loss: 0.0263107530772686 2023-01-22 14:16:27.755774: step: 890/464, loss: 0.007242008112370968 2023-01-22 14:16:28.399720: step: 892/464, loss: 0.02358938939869404 2023-01-22 14:16:29.066434: step: 894/464, loss: 0.019537916406989098 2023-01-22 14:16:29.687032: step: 896/464, loss: 0.0031970730051398277 2023-01-22 14:16:30.305714: step: 898/464, loss: 0.014965091831982136 2023-01-22 14:16:31.037441: step: 900/464, loss: 0.008327051997184753 2023-01-22 14:16:31.696800: step: 902/464, loss: 0.000980615266598761 2023-01-22 14:16:32.334894: step: 904/464, loss: 0.0018039607675746083 2023-01-22 14:16:32.993018: step: 906/464, loss: 0.020369213074445724 2023-01-22 14:16:33.624713: step: 908/464, loss: 0.0035111133474856615 2023-01-22 14:16:34.338495: step: 910/464, loss: 0.14214608073234558 2023-01-22 14:16:34.958863: step: 912/464, loss: 0.0030803342815488577 2023-01-22 14:16:35.551909: step: 914/464, loss: 0.011297043412923813 2023-01-22 14:16:36.179672: step: 916/464, loss: 0.005871086847037077 2023-01-22 14:16:36.840635: step: 918/464, loss: 0.0007762617897242308 2023-01-22 14:16:37.512396: step: 920/464, loss: 0.0025786582846194506 2023-01-22 14:16:38.088664: step: 922/464, loss: 0.038762062788009644 2023-01-22 14:16:38.651384: step: 924/464, loss: 0.004394220653921366 2023-01-22 14:16:39.284349: step: 926/464, loss: 0.02181841805577278 2023-01-22 14:16:39.905929: step: 928/464, loss: 0.005161995533853769 2023-01-22 14:16:40.411200: step: 930/464, loss: 0.0001706589391687885 ================================================== Loss: 0.043 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29040674603174604, 'r': 0.347165559772296, 'f1': 0.31625972342264475}, 'combined': 0.2330334804166856, 'epoch': 36} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.296721556467049, 'r': 0.3269937005551395, 'f1': 0.3111229962431061}, 'combined': 0.2031165674954993, 'epoch': 36} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2869081158878986, 'r': 0.3582268316019683, 'f1': 0.3186253843953372}, 'combined': 0.2347765990281432, 'epoch': 36} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3100106286373475, 'r': 0.3259670580525051, 'f1': 0.3177886730834458}, 'combined': 0.20746825289385581, 'epoch': 36} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3039593612938597, 'r': 0.35067797280202406, 'f1': 0.32565161527165937}, 'combined': 0.23995382177911742, 'epoch': 36} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3104216185119001, 'r': 0.3175544682019713, 'f1': 0.3139475342151248}, 'combined': 0.20496056637878612, 'epoch': 36} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21038251366120217, 'r': 0.36666666666666664, 'f1': 0.2673611111111111}, 'combined': 0.17824074074074073, 'epoch': 36} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.205, 'r': 0.44565217391304346, 'f1': 0.2808219178082192}, 'combined': 0.1404109589041096, 'epoch': 36} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.27586206896551724, 'f1': 0.32653061224489793}, 'combined': 0.21768707482993194, 'epoch': 36} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951249161636486, 'r': 0.3578459609650312, 'f1': 0.3234731070815977}, 'combined': 0.23834860521801934, 'epoch': 22} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29521273154881944, 'r': 0.3185261336729686, 'f1': 0.30642664272956077}, 'combined': 0.20005055432085314, 'epoch': 22} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 22} ****************************** Epoch: 37 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:19:19.098229: step: 2/464, loss: 0.09267619997262955 2023-01-22 14:19:19.721200: step: 4/464, loss: 0.026597000658512115 2023-01-22 14:19:20.416130: step: 6/464, loss: 0.005423111375421286 2023-01-22 14:19:21.031851: step: 8/464, loss: 0.0020108595490455627 2023-01-22 14:19:21.571721: step: 10/464, loss: 0.10110600292682648 2023-01-22 14:19:22.132240: step: 12/464, loss: 0.013702924363315105 2023-01-22 14:19:22.753692: step: 14/464, loss: 0.03764618560671806 2023-01-22 14:19:23.373479: step: 16/464, loss: 0.01584353670477867 2023-01-22 14:19:23.996324: step: 18/464, loss: 0.0006389468908309937 2023-01-22 14:19:24.646004: step: 20/464, loss: 0.00021083604951854795 2023-01-22 14:19:25.237472: step: 22/464, loss: 0.003850563894957304 2023-01-22 14:19:25.879952: step: 24/464, loss: 0.015138974413275719 2023-01-22 14:19:26.480389: step: 26/464, loss: 0.003434586338698864 2023-01-22 14:19:27.176935: step: 28/464, loss: 0.2523220479488373 2023-01-22 14:19:27.823948: step: 30/464, loss: 0.0033589720260351896 2023-01-22 14:19:28.491440: step: 32/464, loss: 0.022264551371335983 2023-01-22 14:19:29.080571: step: 34/464, loss: 0.009132283739745617 2023-01-22 14:19:29.749966: step: 36/464, loss: 0.0029106466099619865 2023-01-22 14:19:30.393674: step: 38/464, loss: 0.0030777426436543465 2023-01-22 14:19:30.969971: step: 40/464, loss: 0.0011019902303814888 2023-01-22 14:19:31.667599: step: 42/464, loss: 0.004676553886383772 2023-01-22 14:19:32.236296: step: 44/464, loss: 0.0007982408860698342 2023-01-22 14:19:32.897696: step: 46/464, loss: 0.011835400015115738 2023-01-22 14:19:33.509323: step: 48/464, loss: 0.06304922699928284 2023-01-22 14:19:34.091211: step: 50/464, loss: 5.2777814865112305 2023-01-22 14:19:34.764773: step: 52/464, loss: 0.003288473468273878 2023-01-22 14:19:35.464041: step: 54/464, loss: 0.0034744839649647474 2023-01-22 14:19:36.088106: step: 56/464, loss: 0.014087451621890068 2023-01-22 14:19:36.680325: step: 58/464, loss: 0.06869802623987198 2023-01-22 14:19:37.321735: step: 60/464, loss: 0.09902717173099518 2023-01-22 14:19:38.012653: step: 62/464, loss: 0.2621070444583893 2023-01-22 14:19:38.615329: step: 64/464, loss: 0.006215594243258238 2023-01-22 14:19:39.242469: step: 66/464, loss: 0.0017401105724275112 2023-01-22 14:19:39.878084: step: 68/464, loss: 0.0027333374600857496 2023-01-22 14:19:40.424650: step: 70/464, loss: 0.006508005317300558 2023-01-22 14:19:40.997282: step: 72/464, loss: 0.0001003606230369769 2023-01-22 14:19:41.622882: step: 74/464, loss: 0.015184964053332806 2023-01-22 14:19:42.239121: step: 76/464, loss: 0.006664635613560677 2023-01-22 14:19:42.880136: step: 78/464, loss: 0.013055585324764252 2023-01-22 14:19:43.504161: step: 80/464, loss: 0.010733279399573803 2023-01-22 14:19:44.059134: step: 82/464, loss: 0.00017453398322686553 2023-01-22 14:19:44.656398: step: 84/464, loss: 0.012425980530679226 2023-01-22 14:19:45.246787: step: 86/464, loss: 0.008501471020281315 2023-01-22 14:19:45.907784: step: 88/464, loss: 0.0169044379144907 2023-01-22 14:19:46.569464: step: 90/464, loss: 0.002584279514849186 2023-01-22 14:19:47.128451: step: 92/464, loss: 0.006378215737640858 2023-01-22 14:19:47.823119: step: 94/464, loss: 0.0023710145615041256 2023-01-22 14:19:48.493616: step: 96/464, loss: 0.020976275205612183 2023-01-22 14:19:49.148709: step: 98/464, loss: 0.01542168203741312 2023-01-22 14:19:49.741672: step: 100/464, loss: 0.02872079610824585 2023-01-22 14:19:50.412959: step: 102/464, loss: 0.0529462993144989 2023-01-22 14:19:51.004255: step: 104/464, loss: 0.0027728017885237932 2023-01-22 14:19:51.608156: step: 106/464, loss: 0.04583406448364258 2023-01-22 14:19:52.272404: step: 108/464, loss: 0.04017186909914017 2023-01-22 14:19:52.931058: step: 110/464, loss: 0.004438281990587711 2023-01-22 14:19:53.541853: step: 112/464, loss: 0.5971619486808777 2023-01-22 14:19:54.128539: step: 114/464, loss: 0.018442340195178986 2023-01-22 14:19:54.765896: step: 116/464, loss: 0.002727809129282832 2023-01-22 14:19:55.410876: step: 118/464, loss: 0.013315348885953426 2023-01-22 14:19:56.010546: step: 120/464, loss: 7.021363671810832e-06 2023-01-22 14:19:56.578821: step: 122/464, loss: 0.007461446337401867 2023-01-22 14:19:57.216896: step: 124/464, loss: 0.004144869279116392 2023-01-22 14:19:57.794976: step: 126/464, loss: 0.0007806203211657703 2023-01-22 14:19:58.448059: step: 128/464, loss: 0.008138487115502357 2023-01-22 14:19:59.022713: step: 130/464, loss: 0.026130348443984985 2023-01-22 14:19:59.723566: step: 132/464, loss: 0.013566428795456886 2023-01-22 14:20:00.328473: step: 134/464, loss: 0.01589006558060646 2023-01-22 14:20:00.971784: step: 136/464, loss: 0.02959158644080162 2023-01-22 14:20:01.597685: step: 138/464, loss: 0.0009720510570332408 2023-01-22 14:20:02.168885: step: 140/464, loss: 0.031041495501995087 2023-01-22 14:20:02.806652: step: 142/464, loss: 0.12288791686296463 2023-01-22 14:20:03.510989: step: 144/464, loss: 0.020230216905474663 2023-01-22 14:20:04.121866: step: 146/464, loss: 0.013478816486895084 2023-01-22 14:20:04.709742: step: 148/464, loss: 0.004989316686987877 2023-01-22 14:20:05.297284: step: 150/464, loss: 0.007645792793482542 2023-01-22 14:20:05.860563: step: 152/464, loss: 0.0022924228105694056 2023-01-22 14:20:06.490548: step: 154/464, loss: 0.0008439055527560413 2023-01-22 14:20:07.204279: step: 156/464, loss: 0.0036724477540701628 2023-01-22 14:20:07.915835: step: 158/464, loss: 0.03467431664466858 2023-01-22 14:20:08.523057: step: 160/464, loss: 0.002105517778545618 2023-01-22 14:20:09.069129: step: 162/464, loss: 0.0003491532406769693 2023-01-22 14:20:09.706372: step: 164/464, loss: 0.0011833166936412454 2023-01-22 14:20:10.244325: step: 166/464, loss: 0.004322324879467487 2023-01-22 14:20:10.858225: step: 168/464, loss: 0.0022278232499957085 2023-01-22 14:20:11.522007: step: 170/464, loss: 0.02636023610830307 2023-01-22 14:20:12.136929: step: 172/464, loss: 0.0285699050873518 2023-01-22 14:20:12.759287: step: 174/464, loss: 0.0008239694871008396 2023-01-22 14:20:13.364664: step: 176/464, loss: 0.00035955157363787293 2023-01-22 14:20:13.997240: step: 178/464, loss: 0.05594809353351593 2023-01-22 14:20:14.628206: step: 180/464, loss: 0.01072358526289463 2023-01-22 14:20:15.223765: step: 182/464, loss: 0.0035648152697831392 2023-01-22 14:20:15.831579: step: 184/464, loss: 0.015191210433840752 2023-01-22 14:20:16.459614: step: 186/464, loss: 0.0001803378399927169 2023-01-22 14:20:17.070091: step: 188/464, loss: 0.0023557611275464296 2023-01-22 14:20:17.691752: step: 190/464, loss: 0.009699953719973564 2023-01-22 14:20:18.230559: step: 192/464, loss: 0.0130624333396554 2023-01-22 14:20:18.801910: step: 194/464, loss: 0.010105582885444164 2023-01-22 14:20:19.406504: step: 196/464, loss: 0.00039616189314983785 2023-01-22 14:20:20.015686: step: 198/464, loss: 0.008077583275735378 2023-01-22 14:20:20.652342: step: 200/464, loss: 0.014863918535411358 2023-01-22 14:20:21.254981: step: 202/464, loss: 0.025830352678894997 2023-01-22 14:20:21.871506: step: 204/464, loss: 0.008622733876109123 2023-01-22 14:20:22.511173: step: 206/464, loss: 0.006310919299721718 2023-01-22 14:20:23.138556: step: 208/464, loss: 0.004576168488711119 2023-01-22 14:20:23.800758: step: 210/464, loss: 0.004799055866897106 2023-01-22 14:20:24.436954: step: 212/464, loss: 0.010103247128427029 2023-01-22 14:20:25.134222: step: 214/464, loss: 0.028001364320516586 2023-01-22 14:20:25.799189: step: 216/464, loss: 0.02767540141940117 2023-01-22 14:20:26.373894: step: 218/464, loss: 0.03786350414156914 2023-01-22 14:20:26.965642: step: 220/464, loss: 0.0211105365306139 2023-01-22 14:20:27.602844: step: 222/464, loss: 0.024852849543094635 2023-01-22 14:20:28.225810: step: 224/464, loss: 0.0018844869919121265 2023-01-22 14:20:28.890992: step: 226/464, loss: 0.0008984083542600274 2023-01-22 14:20:29.483912: step: 228/464, loss: 0.003061442170292139 2023-01-22 14:20:30.073768: step: 230/464, loss: 0.005059152841567993 2023-01-22 14:20:30.733743: step: 232/464, loss: 0.010660984553396702 2023-01-22 14:20:31.391285: step: 234/464, loss: 0.04760711267590523 2023-01-22 14:20:32.106338: step: 236/464, loss: 0.0017878114013001323 2023-01-22 14:20:32.787122: step: 238/464, loss: 0.000544917129445821 2023-01-22 14:20:33.365701: step: 240/464, loss: 0.002339319558814168 2023-01-22 14:20:34.076210: step: 242/464, loss: 0.0008954803925007582 2023-01-22 14:20:34.647619: step: 244/464, loss: 0.01935073919594288 2023-01-22 14:20:35.207407: step: 246/464, loss: 0.0002384855761192739 2023-01-22 14:20:35.790749: step: 248/464, loss: 0.009137220680713654 2023-01-22 14:20:36.395773: step: 250/464, loss: 0.00015304001863114536 2023-01-22 14:20:37.050839: step: 252/464, loss: 0.12504711747169495 2023-01-22 14:20:37.648998: step: 254/464, loss: 0.002635303186252713 2023-01-22 14:20:38.216410: step: 256/464, loss: 0.020652201026678085 2023-01-22 14:20:38.832665: step: 258/464, loss: 0.0019404953345656395 2023-01-22 14:20:39.459539: step: 260/464, loss: 0.006000785622745752 2023-01-22 14:20:40.137273: step: 262/464, loss: 0.028538094833493233 2023-01-22 14:20:40.744814: step: 264/464, loss: 0.01821240969002247 2023-01-22 14:20:41.349253: step: 266/464, loss: 0.008518668822944164 2023-01-22 14:20:41.973813: step: 268/464, loss: 0.4000067114830017 2023-01-22 14:20:42.598439: step: 270/464, loss: 0.0006602701032534242 2023-01-22 14:20:43.378642: step: 272/464, loss: 0.03606516122817993 2023-01-22 14:20:43.987764: step: 274/464, loss: 0.00956253707408905 2023-01-22 14:20:44.557594: step: 276/464, loss: 0.0006510717212222517 2023-01-22 14:20:45.164418: step: 278/464, loss: 0.0020940338727086782 2023-01-22 14:20:45.749028: step: 280/464, loss: 0.005512750707566738 2023-01-22 14:20:46.293156: step: 282/464, loss: 0.00011423335672589019 2023-01-22 14:20:46.869098: step: 284/464, loss: 0.00028186841518618166 2023-01-22 14:20:47.429567: step: 286/464, loss: 0.0005944963777437806 2023-01-22 14:20:47.989631: step: 288/464, loss: 0.003952810075134039 2023-01-22 14:20:48.658421: step: 290/464, loss: 0.009363141842186451 2023-01-22 14:20:49.330275: step: 292/464, loss: 0.014664217829704285 2023-01-22 14:20:49.874749: step: 294/464, loss: 0.0002833757607731968 2023-01-22 14:20:50.486871: step: 296/464, loss: 0.021436164155602455 2023-01-22 14:20:51.148858: step: 298/464, loss: 0.004286719486117363 2023-01-22 14:20:51.837359: step: 300/464, loss: 0.19252799451351166 2023-01-22 14:20:52.435781: step: 302/464, loss: 0.005125043913722038 2023-01-22 14:20:53.041800: step: 304/464, loss: 0.00039617405855096877 2023-01-22 14:20:53.656582: step: 306/464, loss: 0.0016010634135454893 2023-01-22 14:20:54.260849: step: 308/464, loss: 0.012710998766124249 2023-01-22 14:20:54.899424: step: 310/464, loss: 0.010261507704854012 2023-01-22 14:20:55.513772: step: 312/464, loss: 1.8873350200010464e-05 2023-01-22 14:20:56.066320: step: 314/464, loss: 0.0005399395013228059 2023-01-22 14:20:56.641736: step: 316/464, loss: 0.06412962824106216 2023-01-22 14:20:57.281434: step: 318/464, loss: 0.0034419228322803974 2023-01-22 14:20:57.881194: step: 320/464, loss: 0.013939480297267437 2023-01-22 14:20:58.452682: step: 322/464, loss: 0.008041913621127605 2023-01-22 14:20:59.059518: step: 324/464, loss: 0.2070617526769638 2023-01-22 14:20:59.766295: step: 326/464, loss: 0.016397660598158836 2023-01-22 14:21:00.408050: step: 328/464, loss: 0.0075540849938988686 2023-01-22 14:21:01.027522: step: 330/464, loss: 0.027589106932282448 2023-01-22 14:21:01.630755: step: 332/464, loss: 0.0036028767935931683 2023-01-22 14:21:02.277668: step: 334/464, loss: 0.023024383932352066 2023-01-22 14:21:02.909987: step: 336/464, loss: 0.13009671866893768 2023-01-22 14:21:03.571319: step: 338/464, loss: 0.03645065799355507 2023-01-22 14:21:04.179452: step: 340/464, loss: 0.024611737579107285 2023-01-22 14:21:04.832511: step: 342/464, loss: 0.0030728767160326242 2023-01-22 14:21:05.377627: step: 344/464, loss: 0.0013950022403150797 2023-01-22 14:21:06.049499: step: 346/464, loss: 0.020612915977835655 2023-01-22 14:21:06.655164: step: 348/464, loss: 0.0010369179071858525 2023-01-22 14:21:07.299160: step: 350/464, loss: 0.08027210831642151 2023-01-22 14:21:07.901482: step: 352/464, loss: 0.5803027749061584 2023-01-22 14:21:08.540309: step: 354/464, loss: 0.00014347555406857282 2023-01-22 14:21:09.128071: step: 356/464, loss: 0.00094506551977247 2023-01-22 14:21:09.677764: step: 358/464, loss: 0.0030374499037861824 2023-01-22 14:21:10.357572: step: 360/464, loss: 0.004331836476922035 2023-01-22 14:21:11.045586: step: 362/464, loss: 0.0029369716066867113 2023-01-22 14:21:11.674150: step: 364/464, loss: 0.003449542447924614 2023-01-22 14:21:12.290645: step: 366/464, loss: 0.13225261867046356 2023-01-22 14:21:12.881003: step: 368/464, loss: 0.0019141642842441797 2023-01-22 14:21:13.532486: step: 370/464, loss: 0.04574725404381752 2023-01-22 14:21:14.129730: step: 372/464, loss: 0.0033610507380217314 2023-01-22 14:21:14.733076: step: 374/464, loss: 0.21356722712516785 2023-01-22 14:21:15.323938: step: 376/464, loss: 0.0360436886548996 2023-01-22 14:21:16.054807: step: 378/464, loss: 0.038373976945877075 2023-01-22 14:21:16.766517: step: 380/464, loss: 0.005195892881602049 2023-01-22 14:21:17.348486: step: 382/464, loss: 0.004966085311025381 2023-01-22 14:21:17.884181: step: 384/464, loss: 0.0003348653845023364 2023-01-22 14:21:18.524602: step: 386/464, loss: 0.00865214318037033 2023-01-22 14:21:19.178696: step: 388/464, loss: 0.0016444892389699817 2023-01-22 14:21:19.816749: step: 390/464, loss: 0.0025462752673774958 2023-01-22 14:21:20.493832: step: 392/464, loss: 0.005985606927424669 2023-01-22 14:21:21.137216: step: 394/464, loss: 0.01848895289003849 2023-01-22 14:21:21.759803: step: 396/464, loss: 0.014712951146066189 2023-01-22 14:21:22.364168: step: 398/464, loss: 0.01997094601392746 2023-01-22 14:21:23.016847: step: 400/464, loss: 0.0033737528137862682 2023-01-22 14:21:23.722049: step: 402/464, loss: 0.08923365920782089 2023-01-22 14:21:24.388358: step: 404/464, loss: 0.007990365847945213 2023-01-22 14:21:25.007372: step: 406/464, loss: 0.0056911977007985115 2023-01-22 14:21:25.607623: step: 408/464, loss: 0.006825309246778488 2023-01-22 14:21:26.217329: step: 410/464, loss: 0.00802522711455822 2023-01-22 14:21:26.823271: step: 412/464, loss: 0.00013568451686296612 2023-01-22 14:21:27.380488: step: 414/464, loss: 0.04462343081831932 2023-01-22 14:21:27.992458: step: 416/464, loss: 0.0414571575820446 2023-01-22 14:21:28.616814: step: 418/464, loss: 0.045396171510219574 2023-01-22 14:21:29.248713: step: 420/464, loss: 0.0010843529598787427 2023-01-22 14:21:29.874792: step: 422/464, loss: 0.0035425713285803795 2023-01-22 14:21:30.467414: step: 424/464, loss: 0.01614678092300892 2023-01-22 14:21:31.102672: step: 426/464, loss: 0.0030302645172923803 2023-01-22 14:21:31.672767: step: 428/464, loss: 0.0004536340420600027 2023-01-22 14:21:32.299171: step: 430/464, loss: 9.008437336888164e-05 2023-01-22 14:21:32.902539: step: 432/464, loss: 0.008234814740717411 2023-01-22 14:21:33.559741: step: 434/464, loss: 0.008559430949389935 2023-01-22 14:21:34.147047: step: 436/464, loss: 0.0018182012718170881 2023-01-22 14:21:34.842083: step: 438/464, loss: 6.693278464808827e-06 2023-01-22 14:21:35.511729: step: 440/464, loss: 0.0007661496638320386 2023-01-22 14:21:36.111568: step: 442/464, loss: 0.00024162699992302805 2023-01-22 14:21:36.671587: step: 444/464, loss: 0.001283713267184794 2023-01-22 14:21:37.329229: step: 446/464, loss: 0.01634528674185276 2023-01-22 14:21:37.910650: step: 448/464, loss: 0.019098889082670212 2023-01-22 14:21:38.556625: step: 450/464, loss: 0.0008116748067550361 2023-01-22 14:21:39.136989: step: 452/464, loss: 0.29386022686958313 2023-01-22 14:21:39.833179: step: 454/464, loss: 0.042279280722141266 2023-01-22 14:21:40.467711: step: 456/464, loss: 0.0006630786811001599 2023-01-22 14:21:41.069865: step: 458/464, loss: 0.0030012684874236584 2023-01-22 14:21:41.668427: step: 460/464, loss: 0.06653464585542679 2023-01-22 14:21:42.337263: step: 462/464, loss: 0.018292022868990898 2023-01-22 14:21:43.019298: step: 464/464, loss: 0.014761765487492085 2023-01-22 14:21:43.632914: step: 466/464, loss: 0.009232250042259693 2023-01-22 14:21:44.264264: step: 468/464, loss: 0.0020965656731277704 2023-01-22 14:21:44.991623: step: 470/464, loss: 0.0011324052466079593 2023-01-22 14:21:45.617962: step: 472/464, loss: 0.08012159913778305 2023-01-22 14:21:46.292100: step: 474/464, loss: 0.008564132265746593 2023-01-22 14:21:46.925295: step: 476/464, loss: 0.0025925911031663418 2023-01-22 14:21:47.607298: step: 478/464, loss: 0.009631761349737644 2023-01-22 14:21:48.238226: step: 480/464, loss: 0.013792794197797775 2023-01-22 14:21:48.821486: step: 482/464, loss: 0.8465481996536255 2023-01-22 14:21:49.391155: step: 484/464, loss: 0.025237884372472763 2023-01-22 14:21:50.031435: step: 486/464, loss: 0.06432559341192245 2023-01-22 14:21:50.715870: step: 488/464, loss: 0.004095649812370539 2023-01-22 14:21:51.325703: step: 490/464, loss: 0.00688563659787178 2023-01-22 14:21:51.928886: step: 492/464, loss: 0.0009884837782010436 2023-01-22 14:21:52.532597: step: 494/464, loss: 0.0033845400903373957 2023-01-22 14:21:53.187424: step: 496/464, loss: 0.06000358238816261 2023-01-22 14:21:53.818214: step: 498/464, loss: 0.059953149408102036 2023-01-22 14:21:54.458265: step: 500/464, loss: 0.0382724367082119 2023-01-22 14:21:55.110324: step: 502/464, loss: 0.010647490620613098 2023-01-22 14:21:55.854645: step: 504/464, loss: 0.014845588244497776 2023-01-22 14:21:56.424261: step: 506/464, loss: 0.15880149602890015 2023-01-22 14:21:57.086505: step: 508/464, loss: 0.04840749502182007 2023-01-22 14:21:57.714578: step: 510/464, loss: 0.01429628673940897 2023-01-22 14:21:58.322147: step: 512/464, loss: 0.00044565758435055614 2023-01-22 14:21:58.939499: step: 514/464, loss: 0.007999812252819538 2023-01-22 14:21:59.502342: step: 516/464, loss: 0.06677156686782837 2023-01-22 14:22:00.134657: step: 518/464, loss: 0.00020486098947003484 2023-01-22 14:22:00.780292: step: 520/464, loss: 0.0037851138040423393 2023-01-22 14:22:01.425119: step: 522/464, loss: 0.002987146843224764 2023-01-22 14:22:01.994940: step: 524/464, loss: 0.00014971356722526252 2023-01-22 14:22:02.647382: step: 526/464, loss: 0.00678643211722374 2023-01-22 14:22:03.298753: step: 528/464, loss: 0.04216880723834038 2023-01-22 14:22:03.985023: step: 530/464, loss: 0.00484335795044899 2023-01-22 14:22:04.645276: step: 532/464, loss: 0.12765325605869293 2023-01-22 14:22:05.231553: step: 534/464, loss: 0.001677449676208198 2023-01-22 14:22:05.862614: step: 536/464, loss: 0.0025106756947934628 2023-01-22 14:22:06.470561: step: 538/464, loss: 0.009580564685165882 2023-01-22 14:22:07.096200: step: 540/464, loss: 0.10899461805820465 2023-01-22 14:22:07.722382: step: 542/464, loss: 0.00041793863056227565 2023-01-22 14:22:08.448938: step: 544/464, loss: 0.008387402631342411 2023-01-22 14:22:09.059222: step: 546/464, loss: 0.010032331570982933 2023-01-22 14:22:09.680883: step: 548/464, loss: 0.016491416841745377 2023-01-22 14:22:10.306352: step: 550/464, loss: 0.001153131597675383 2023-01-22 14:22:10.930714: step: 552/464, loss: 0.1899278461933136 2023-01-22 14:22:11.564896: step: 554/464, loss: 0.0023474283516407013 2023-01-22 14:22:12.169941: step: 556/464, loss: 0.00033688393887132406 2023-01-22 14:22:12.733151: step: 558/464, loss: 0.12733258306980133 2023-01-22 14:22:13.375541: step: 560/464, loss: 0.0013179974630475044 2023-01-22 14:22:13.964189: step: 562/464, loss: 0.002366940723732114 2023-01-22 14:22:14.592765: step: 564/464, loss: 0.0010238544782623649 2023-01-22 14:22:15.238075: step: 566/464, loss: 0.11703494191169739 2023-01-22 14:22:15.799573: step: 568/464, loss: 0.003410003613680601 2023-01-22 14:22:16.341703: step: 570/464, loss: 0.0008537861285731196 2023-01-22 14:22:16.920329: step: 572/464, loss: 0.0039034802466630936 2023-01-22 14:22:17.573529: step: 574/464, loss: 0.010530706495046616 2023-01-22 14:22:18.176830: step: 576/464, loss: 0.009586167521774769 2023-01-22 14:22:18.828287: step: 578/464, loss: 0.007921237498521805 2023-01-22 14:22:19.425079: step: 580/464, loss: 0.013114217668771744 2023-01-22 14:22:20.044791: step: 582/464, loss: 0.0015510583762079477 2023-01-22 14:22:20.660947: step: 584/464, loss: 0.0006341558764688671 2023-01-22 14:22:21.260410: step: 586/464, loss: 0.007430794648826122 2023-01-22 14:22:21.843871: step: 588/464, loss: 0.034198254346847534 2023-01-22 14:22:22.474610: step: 590/464, loss: 0.036947060376405716 2023-01-22 14:22:23.105800: step: 592/464, loss: 5.735832382924855e-05 2023-01-22 14:22:23.760149: step: 594/464, loss: 0.013463602401316166 2023-01-22 14:22:24.462518: step: 596/464, loss: 0.06106730177998543 2023-01-22 14:22:25.037919: step: 598/464, loss: 0.0011482078116387129 2023-01-22 14:22:25.632967: step: 600/464, loss: 0.002552524907514453 2023-01-22 14:22:26.271860: step: 602/464, loss: 0.016755448654294014 2023-01-22 14:22:26.896660: step: 604/464, loss: 0.03485510125756264 2023-01-22 14:22:27.597066: step: 606/464, loss: 1.016755223274231 2023-01-22 14:22:28.257365: step: 608/464, loss: 0.0009947152575477958 2023-01-22 14:22:28.843668: step: 610/464, loss: 0.008130676113069057 2023-01-22 14:22:29.496810: step: 612/464, loss: 0.009686904959380627 2023-01-22 14:22:30.096167: step: 614/464, loss: 0.02364383079111576 2023-01-22 14:22:30.733402: step: 616/464, loss: 0.0007474590674974024 2023-01-22 14:22:31.340574: step: 618/464, loss: 0.0047137551009655 2023-01-22 14:22:31.957347: step: 620/464, loss: 0.000658941688016057 2023-01-22 14:22:32.550021: step: 622/464, loss: 0.0004643475986085832 2023-01-22 14:22:33.138771: step: 624/464, loss: 0.2423620969057083 2023-01-22 14:22:33.728920: step: 626/464, loss: 0.001640212256461382 2023-01-22 14:22:34.327547: step: 628/464, loss: 0.025120731443166733 2023-01-22 14:22:34.964080: step: 630/464, loss: 0.012573404237627983 2023-01-22 14:22:35.580745: step: 632/464, loss: 0.007707800250500441 2023-01-22 14:22:36.181366: step: 634/464, loss: 0.039470117539167404 2023-01-22 14:22:36.821353: step: 636/464, loss: 0.03454526141285896 2023-01-22 14:22:37.405519: step: 638/464, loss: 0.0012105669593438506 2023-01-22 14:22:38.048621: step: 640/464, loss: 0.02095707319676876 2023-01-22 14:22:38.635672: step: 642/464, loss: 0.009889095090329647 2023-01-22 14:22:39.205761: step: 644/464, loss: 0.014576292596757412 2023-01-22 14:22:39.813190: step: 646/464, loss: 0.00138474116101861 2023-01-22 14:22:40.484534: step: 648/464, loss: 0.00338058196939528 2023-01-22 14:22:41.108494: step: 650/464, loss: 0.0009252754971385002 2023-01-22 14:22:41.759092: step: 652/464, loss: 0.00416414812207222 2023-01-22 14:22:42.318767: step: 654/464, loss: 0.00028824794571846724 2023-01-22 14:22:42.944976: step: 656/464, loss: 0.0022176536731421947 2023-01-22 14:22:43.612645: step: 658/464, loss: 0.03203499689698219 2023-01-22 14:22:44.241070: step: 660/464, loss: 0.0019834109116345644 2023-01-22 14:22:44.897690: step: 662/464, loss: 0.001044319011271 2023-01-22 14:22:45.627468: step: 664/464, loss: 0.030004270374774933 2023-01-22 14:22:46.335789: step: 666/464, loss: 0.06255772709846497 2023-01-22 14:22:46.936709: step: 668/464, loss: 7.002799975452945e-06 2023-01-22 14:22:47.581886: step: 670/464, loss: 0.01589263416826725 2023-01-22 14:22:48.230116: step: 672/464, loss: 0.03454243019223213 2023-01-22 14:22:48.917784: step: 674/464, loss: 0.08794011175632477 2023-01-22 14:22:49.569964: step: 676/464, loss: 0.005668723955750465 2023-01-22 14:22:50.180880: step: 678/464, loss: 0.017416013404726982 2023-01-22 14:22:50.792309: step: 680/464, loss: 0.15912096202373505 2023-01-22 14:22:51.404774: step: 682/464, loss: 0.0011662282049655914 2023-01-22 14:22:52.073137: step: 684/464, loss: 0.007506620604544878 2023-01-22 14:22:52.709483: step: 686/464, loss: 1.0766023397445679 2023-01-22 14:22:53.343427: step: 688/464, loss: 7.692611688980833e-05 2023-01-22 14:22:54.023855: step: 690/464, loss: 0.00015255837934091687 2023-01-22 14:22:54.640099: step: 692/464, loss: 0.003920267801731825 2023-01-22 14:22:55.193276: step: 694/464, loss: 0.008264812640845776 2023-01-22 14:22:55.821426: step: 696/464, loss: 0.02437479980289936 2023-01-22 14:22:56.381334: step: 698/464, loss: 0.00026347560924477875 2023-01-22 14:22:57.004385: step: 700/464, loss: 0.0008082684362307191 2023-01-22 14:22:57.604423: step: 702/464, loss: 0.05713580548763275 2023-01-22 14:22:58.180201: step: 704/464, loss: 0.0007922378135845065 2023-01-22 14:22:58.798099: step: 706/464, loss: 0.10754930973052979 2023-01-22 14:22:59.404371: step: 708/464, loss: 2.2035123038222082e-05 2023-01-22 14:22:59.983222: step: 710/464, loss: 0.012474643997848034 2023-01-22 14:23:00.543349: step: 712/464, loss: 6.510557432193309e-05 2023-01-22 14:23:01.179849: step: 714/464, loss: 0.009142348542809486 2023-01-22 14:23:01.789841: step: 716/464, loss: 0.01799674890935421 2023-01-22 14:23:02.538120: step: 718/464, loss: 0.4685097634792328 2023-01-22 14:23:03.148496: step: 720/464, loss: 0.00978299044072628 2023-01-22 14:23:03.712066: step: 722/464, loss: 0.012385339476168156 2023-01-22 14:23:04.266017: step: 724/464, loss: 0.004721686244010925 2023-01-22 14:23:04.862569: step: 726/464, loss: 0.005557945929467678 2023-01-22 14:23:05.441540: step: 728/464, loss: 0.003821711055934429 2023-01-22 14:23:06.051227: step: 730/464, loss: 0.0028337466064840555 2023-01-22 14:23:06.667323: step: 732/464, loss: 0.016650592908263206 2023-01-22 14:23:07.297766: step: 734/464, loss: 0.0019164554541930556 2023-01-22 14:23:07.922880: step: 736/464, loss: 0.00016860793402884156 2023-01-22 14:23:08.534571: step: 738/464, loss: 0.005961798131465912 2023-01-22 14:23:09.086142: step: 740/464, loss: 0.0008520600385963917 2023-01-22 14:23:09.709736: step: 742/464, loss: 0.041630685329437256 2023-01-22 14:23:10.302775: step: 744/464, loss: 0.017452936619520187 2023-01-22 14:23:10.874980: step: 746/464, loss: 0.0014399095671251416 2023-01-22 14:23:11.436149: step: 748/464, loss: 0.003561159363016486 2023-01-22 14:23:12.061614: step: 750/464, loss: 0.009633861482143402 2023-01-22 14:23:12.632950: step: 752/464, loss: 0.0030405675061047077 2023-01-22 14:23:13.217593: step: 754/464, loss: 0.009415525943040848 2023-01-22 14:23:13.822903: step: 756/464, loss: 0.09051462262868881 2023-01-22 14:23:14.470490: step: 758/464, loss: 0.02376624383032322 2023-01-22 14:23:15.073197: step: 760/464, loss: 0.03737789765000343 2023-01-22 14:23:15.700261: step: 762/464, loss: 0.0007035199669189751 2023-01-22 14:23:16.282304: step: 764/464, loss: 0.010482733137905598 2023-01-22 14:23:16.901552: step: 766/464, loss: 0.0009111549006775022 2023-01-22 14:23:17.559631: step: 768/464, loss: 0.06286580115556717 2023-01-22 14:23:18.178707: step: 770/464, loss: 0.0038697372656315565 2023-01-22 14:23:18.832427: step: 772/464, loss: 0.016095953062176704 2023-01-22 14:23:19.404036: step: 774/464, loss: 0.0009386722231283784 2023-01-22 14:23:20.000637: step: 776/464, loss: 0.002113762078806758 2023-01-22 14:23:20.548108: step: 778/464, loss: 0.004145435523241758 2023-01-22 14:23:21.163912: step: 780/464, loss: 0.004087845329195261 2023-01-22 14:23:21.803478: step: 782/464, loss: 0.46168965101242065 2023-01-22 14:23:22.357484: step: 784/464, loss: 0.0006339970277622342 2023-01-22 14:23:23.092754: step: 786/464, loss: 0.0003590055275708437 2023-01-22 14:23:23.746140: step: 788/464, loss: 0.04442901164293289 2023-01-22 14:23:24.354883: step: 790/464, loss: 0.011943993158638477 2023-01-22 14:23:24.934141: step: 792/464, loss: 0.005789447575807571 2023-01-22 14:23:25.539104: step: 794/464, loss: 0.00178483163472265 2023-01-22 14:23:26.187235: step: 796/464, loss: 0.025112107396125793 2023-01-22 14:23:26.831084: step: 798/464, loss: 0.0019318273989483714 2023-01-22 14:23:27.463018: step: 800/464, loss: 0.004134173039346933 2023-01-22 14:23:28.074905: step: 802/464, loss: 0.01001482829451561 2023-01-22 14:23:28.718658: step: 804/464, loss: 0.006891184486448765 2023-01-22 14:23:29.319797: step: 806/464, loss: 0.016399085521697998 2023-01-22 14:23:29.936227: step: 808/464, loss: 0.08795420080423355 2023-01-22 14:23:30.578714: step: 810/464, loss: 0.006551109254360199 2023-01-22 14:23:31.153226: step: 812/464, loss: 0.037414442747831345 2023-01-22 14:23:31.705417: step: 814/464, loss: 0.00023505538410972804 2023-01-22 14:23:32.345241: step: 816/464, loss: 0.005884307436645031 2023-01-22 14:23:32.964781: step: 818/464, loss: 0.5972034931182861 2023-01-22 14:23:33.613835: step: 820/464, loss: 0.033281926065683365 2023-01-22 14:23:34.260319: step: 822/464, loss: 0.002844218397513032 2023-01-22 14:23:34.886645: step: 824/464, loss: 0.0022140166256576777 2023-01-22 14:23:35.529264: step: 826/464, loss: 0.06122094765305519 2023-01-22 14:23:36.146087: step: 828/464, loss: 0.02330681122839451 2023-01-22 14:23:36.736705: step: 830/464, loss: 0.00125783565454185 2023-01-22 14:23:37.436097: step: 832/464, loss: 0.0038115172646939754 2023-01-22 14:23:38.093203: step: 834/464, loss: 0.03195692598819733 2023-01-22 14:23:38.730167: step: 836/464, loss: 0.009772353805601597 2023-01-22 14:23:39.335421: step: 838/464, loss: 0.008238757960498333 2023-01-22 14:23:39.950495: step: 840/464, loss: 0.008015172556042671 2023-01-22 14:23:40.508300: step: 842/464, loss: 0.014947726391255856 2023-01-22 14:23:41.096163: step: 844/464, loss: 0.00028287232271395624 2023-01-22 14:23:41.679922: step: 846/464, loss: 0.0006196981994435191 2023-01-22 14:23:42.178055: step: 848/464, loss: 0.005727910902351141 2023-01-22 14:23:42.815019: step: 850/464, loss: 1.8394459402770735e-05 2023-01-22 14:23:43.406001: step: 852/464, loss: 0.08610616624355316 2023-01-22 14:23:44.033447: step: 854/464, loss: 0.010558906942605972 2023-01-22 14:23:44.613938: step: 856/464, loss: 0.009178843349218369 2023-01-22 14:23:45.217639: step: 858/464, loss: 0.0027270710561424494 2023-01-22 14:23:45.774004: step: 860/464, loss: 0.00411981763318181 2023-01-22 14:23:46.334491: step: 862/464, loss: 0.00033645035000517964 2023-01-22 14:23:46.940253: step: 864/464, loss: 0.0021524657495319843 2023-01-22 14:23:47.541337: step: 866/464, loss: 0.0221529770642519 2023-01-22 14:23:48.158490: step: 868/464, loss: 0.0135272815823555 2023-01-22 14:23:48.762243: step: 870/464, loss: 0.017445018514990807 2023-01-22 14:23:49.380344: step: 872/464, loss: 0.0009814107324928045 2023-01-22 14:23:50.105895: step: 874/464, loss: 0.12281250953674316 2023-01-22 14:23:50.820558: step: 876/464, loss: 0.0002310747659066692 2023-01-22 14:23:51.449739: step: 878/464, loss: 0.008918222039937973 2023-01-22 14:23:52.070266: step: 880/464, loss: 0.03904656320810318 2023-01-22 14:23:52.672713: step: 882/464, loss: 0.008514349348843098 2023-01-22 14:23:53.268079: step: 884/464, loss: 0.027735181152820587 2023-01-22 14:23:53.895365: step: 886/464, loss: 0.019990824162960052 2023-01-22 14:23:54.486735: step: 888/464, loss: 0.002079431666061282 2023-01-22 14:23:55.068240: step: 890/464, loss: 0.0047247945331037045 2023-01-22 14:23:55.666631: step: 892/464, loss: 0.10039626806974411 2023-01-22 14:23:56.282030: step: 894/464, loss: 0.032289791852235794 2023-01-22 14:23:56.940761: step: 896/464, loss: 0.001553431968204677 2023-01-22 14:23:57.599309: step: 898/464, loss: 0.06520793586969376 2023-01-22 14:23:58.229246: step: 900/464, loss: 0.006170877255499363 2023-01-22 14:23:58.868051: step: 902/464, loss: 0.015883471816778183 2023-01-22 14:23:59.483175: step: 904/464, loss: 0.0009544222266413271 2023-01-22 14:24:00.229643: step: 906/464, loss: 0.00172118388582021 2023-01-22 14:24:00.891149: step: 908/464, loss: 0.0004212943895254284 2023-01-22 14:24:01.486841: step: 910/464, loss: 9.579287143424153e-05 2023-01-22 14:24:02.145230: step: 912/464, loss: 0.0012574447318911552 2023-01-22 14:24:02.846005: step: 914/464, loss: 0.010224048979580402 2023-01-22 14:24:03.498149: step: 916/464, loss: 0.03736108914017677 2023-01-22 14:24:04.122496: step: 918/464, loss: 0.00016129278810694814 2023-01-22 14:24:04.793183: step: 920/464, loss: 0.05936411768198013 2023-01-22 14:24:05.399636: step: 922/464, loss: 0.00186917616520077 2023-01-22 14:24:06.081597: step: 924/464, loss: 0.008587202057242393 2023-01-22 14:24:06.663782: step: 926/464, loss: 0.0009364963043481112 2023-01-22 14:24:07.289697: step: 928/464, loss: 0.01637764275074005 2023-01-22 14:24:07.779256: step: 930/464, loss: 8.549598715035245e-05 ================================================== Loss: 0.045 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2958712748344371, 'r': 0.33910104364326377, 'f1': 0.31601458885941647}, 'combined': 0.2328528549490437, 'epoch': 37} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2975710985119941, 'r': 0.32159518719920094, 'f1': 0.30911706706096037}, 'combined': 0.20180699714860625, 'epoch': 37} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29832377373417723, 'r': 0.35776209677419357, 'f1': 0.3253505176876618}, 'combined': 0.23973196040143502, 'epoch': 37} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30953777063127025, 'r': 0.3177807415663546, 'f1': 0.31360509974241974}, 'combined': 0.20473700812199422, 'epoch': 37} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30482137921503594, 'r': 0.34878043959519295, 'f1': 0.32532264011799417}, 'combined': 0.23971141903431148, 'epoch': 37} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30497544222897244, 'r': 0.31198315000077786, 'f1': 0.3084394976836405}, 'combined': 0.20136464615615907, 'epoch': 37} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22557471264367815, 'r': 0.37380952380952376, 'f1': 0.28136200716845877}, 'combined': 0.18757467144563916, 'epoch': 37} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25625, 'r': 0.44565217391304346, 'f1': 0.3253968253968254}, 'combined': 0.1626984126984127, 'epoch': 37} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4125, 'r': 0.28448275862068967, 'f1': 0.33673469387755106}, 'combined': 0.22448979591836737, 'epoch': 37} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951249161636486, 'r': 0.3578459609650312, 'f1': 0.3234731070815977}, 'combined': 0.23834860521801934, 'epoch': 22} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29521273154881944, 'r': 0.3185261336729686, 'f1': 0.30642664272956077}, 'combined': 0.20005055432085314, 'epoch': 22} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 22} ****************************** Epoch: 38 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:26:46.106979: step: 2/464, loss: 0.07666371762752533 2023-01-22 14:26:46.656923: step: 4/464, loss: 0.0041556404903531075 2023-01-22 14:26:47.277648: step: 6/464, loss: 0.016601957380771637 2023-01-22 14:26:47.900719: step: 8/464, loss: 0.008504652418196201 2023-01-22 14:26:48.555601: step: 10/464, loss: 0.1495015174150467 2023-01-22 14:26:49.158861: step: 12/464, loss: 0.000984443468041718 2023-01-22 14:26:49.761444: step: 14/464, loss: 0.01359118428081274 2023-01-22 14:26:50.377222: step: 16/464, loss: 0.0004940666840411723 2023-01-22 14:26:50.988612: step: 18/464, loss: 0.004424249287694693 2023-01-22 14:26:51.677166: step: 20/464, loss: 0.016282692551612854 2023-01-22 14:26:52.309241: step: 22/464, loss: 0.03299436718225479 2023-01-22 14:26:52.922179: step: 24/464, loss: 2.0109622710151598e-05 2023-01-22 14:26:53.582567: step: 26/464, loss: 0.0018645920790731907 2023-01-22 14:26:54.195549: step: 28/464, loss: 0.02996472455561161 2023-01-22 14:26:54.807195: step: 30/464, loss: 0.00014173431554809213 2023-01-22 14:26:55.413862: step: 32/464, loss: 0.004367295186966658 2023-01-22 14:26:56.051640: step: 34/464, loss: 0.05591445416212082 2023-01-22 14:26:56.722974: step: 36/464, loss: 0.0004241672868374735 2023-01-22 14:26:57.407157: step: 38/464, loss: 0.015961840748786926 2023-01-22 14:26:58.033319: step: 40/464, loss: 0.004817094653844833 2023-01-22 14:26:58.622657: step: 42/464, loss: 0.0018298542127013206 2023-01-22 14:26:59.237201: step: 44/464, loss: 0.0121889878064394 2023-01-22 14:26:59.823969: step: 46/464, loss: 0.0021618735045194626 2023-01-22 14:27:00.376017: step: 48/464, loss: 0.0003386117168702185 2023-01-22 14:27:00.929872: step: 50/464, loss: 0.00016865790530573577 2023-01-22 14:27:01.496100: step: 52/464, loss: 0.008086579851806164 2023-01-22 14:27:02.110457: step: 54/464, loss: 0.001665792427957058 2023-01-22 14:27:02.737929: step: 56/464, loss: 0.02543218433856964 2023-01-22 14:27:03.318737: step: 58/464, loss: 0.8957485556602478 2023-01-22 14:27:03.891030: step: 60/464, loss: 0.0036911722272634506 2023-01-22 14:27:04.479371: step: 62/464, loss: 0.0007915475289337337 2023-01-22 14:27:05.110139: step: 64/464, loss: 0.005884534679353237 2023-01-22 14:27:05.730191: step: 66/464, loss: 0.06674639135599136 2023-01-22 14:27:06.312608: step: 68/464, loss: 0.004447010345757008 2023-01-22 14:27:06.878795: step: 70/464, loss: 0.0007373158005066216 2023-01-22 14:27:07.510732: step: 72/464, loss: 0.07409743219614029 2023-01-22 14:27:08.168048: step: 74/464, loss: 0.00021967102657072246 2023-01-22 14:27:08.745570: step: 76/464, loss: 0.006402883678674698 2023-01-22 14:27:09.414068: step: 78/464, loss: 0.1285560429096222 2023-01-22 14:27:10.008290: step: 80/464, loss: 0.0064278459176421165 2023-01-22 14:27:10.589683: step: 82/464, loss: 0.0027200295589864254 2023-01-22 14:27:11.219366: step: 84/464, loss: 0.002880350686609745 2023-01-22 14:27:11.830076: step: 86/464, loss: 0.006422638893127441 2023-01-22 14:27:12.406578: step: 88/464, loss: 5.416858402895741e-05 2023-01-22 14:27:13.113730: step: 90/464, loss: 0.05255880579352379 2023-01-22 14:27:13.672897: step: 92/464, loss: 0.0015383812133222818 2023-01-22 14:27:14.253500: step: 94/464, loss: 0.028287207707762718 2023-01-22 14:27:14.901961: step: 96/464, loss: 0.0023062839172780514 2023-01-22 14:27:15.520720: step: 98/464, loss: 0.01701662316918373 2023-01-22 14:27:16.117671: step: 100/464, loss: 0.0005828766734339297 2023-01-22 14:27:16.781909: step: 102/464, loss: 0.051405563950538635 2023-01-22 14:27:17.461873: step: 104/464, loss: 0.00017306103836745024 2023-01-22 14:27:18.117660: step: 106/464, loss: 0.015117556788027287 2023-01-22 14:27:18.689325: step: 108/464, loss: 0.0004980931989848614 2023-01-22 14:27:19.253205: step: 110/464, loss: 0.0075327204540371895 2023-01-22 14:27:19.845662: step: 112/464, loss: 0.006094371899962425 2023-01-22 14:27:20.482659: step: 114/464, loss: 0.04257412627339363 2023-01-22 14:27:21.142346: step: 116/464, loss: 0.01640826277434826 2023-01-22 14:27:21.799060: step: 118/464, loss: 0.01762639544904232 2023-01-22 14:27:22.308318: step: 120/464, loss: 0.005846071057021618 2023-01-22 14:27:22.979950: step: 122/464, loss: 0.5272281169891357 2023-01-22 14:27:23.643792: step: 124/464, loss: 0.00011272434494458139 2023-01-22 14:27:24.329032: step: 126/464, loss: 0.15199708938598633 2023-01-22 14:27:25.030935: step: 128/464, loss: 0.04380028694868088 2023-01-22 14:27:25.654556: step: 130/464, loss: 0.010487067513167858 2023-01-22 14:27:26.302661: step: 132/464, loss: 0.0066137295216321945 2023-01-22 14:27:26.898530: step: 134/464, loss: 0.0038525154814124107 2023-01-22 14:27:27.521821: step: 136/464, loss: 0.0010183482663705945 2023-01-22 14:27:28.169880: step: 138/464, loss: 0.003813040442764759 2023-01-22 14:27:28.870834: step: 140/464, loss: 0.0023907809518277645 2023-01-22 14:27:29.469423: step: 142/464, loss: 5.111067002872005e-05 2023-01-22 14:27:30.095279: step: 144/464, loss: 0.004993663635104895 2023-01-22 14:27:30.709630: step: 146/464, loss: 0.13828477263450623 2023-01-22 14:27:31.353825: step: 148/464, loss: 0.0017009323928505182 2023-01-22 14:27:32.028458: step: 150/464, loss: 0.005286176223307848 2023-01-22 14:27:32.645857: step: 152/464, loss: 0.001723647816106677 2023-01-22 14:27:33.253117: step: 154/464, loss: 0.001361824688501656 2023-01-22 14:27:33.846776: step: 156/464, loss: 0.0003422506561037153 2023-01-22 14:27:34.428562: step: 158/464, loss: 0.0026143400464206934 2023-01-22 14:27:35.055101: step: 160/464, loss: 0.0035480279475450516 2023-01-22 14:27:35.776031: step: 162/464, loss: 0.06149844825267792 2023-01-22 14:27:36.379807: step: 164/464, loss: 0.012968159280717373 2023-01-22 14:27:37.032109: step: 166/464, loss: 0.008539444766938686 2023-01-22 14:27:37.689969: step: 168/464, loss: 0.006774135399609804 2023-01-22 14:27:38.317766: step: 170/464, loss: 0.0024416123051196337 2023-01-22 14:27:38.931558: step: 172/464, loss: 0.0045210495591163635 2023-01-22 14:27:39.514508: step: 174/464, loss: 0.0001654277293710038 2023-01-22 14:27:40.111460: step: 176/464, loss: 0.1539025604724884 2023-01-22 14:27:40.735826: step: 178/464, loss: 0.016555478796362877 2023-01-22 14:27:41.357240: step: 180/464, loss: 0.0009038833668455482 2023-01-22 14:27:41.997292: step: 182/464, loss: 0.04463731870055199 2023-01-22 14:27:42.581273: step: 184/464, loss: 0.0007485342212021351 2023-01-22 14:27:43.190221: step: 186/464, loss: 0.09173037111759186 2023-01-22 14:27:43.733522: step: 188/464, loss: 0.22339533269405365 2023-01-22 14:27:44.356041: step: 190/464, loss: 0.4433741271495819 2023-01-22 14:27:44.975336: step: 192/464, loss: 0.00010236509115202352 2023-01-22 14:27:45.573954: step: 194/464, loss: 8.915683429222554e-05 2023-01-22 14:27:46.202086: step: 196/464, loss: 0.0865844190120697 2023-01-22 14:27:46.836574: step: 198/464, loss: 0.09011106193065643 2023-01-22 14:27:47.436445: step: 200/464, loss: 0.0009091346873901784 2023-01-22 14:27:48.058548: step: 202/464, loss: 0.0038832188583910465 2023-01-22 14:27:48.673466: step: 204/464, loss: 0.0029701266903430223 2023-01-22 14:27:49.248087: step: 206/464, loss: 0.0030103796161711216 2023-01-22 14:27:49.869181: step: 208/464, loss: 0.00433703837916255 2023-01-22 14:27:50.528259: step: 210/464, loss: 0.0011400451185181737 2023-01-22 14:27:51.106711: step: 212/464, loss: 0.005956655368208885 2023-01-22 14:27:51.757543: step: 214/464, loss: 0.014067273586988449 2023-01-22 14:27:52.342484: step: 216/464, loss: 0.004998539574444294 2023-01-22 14:27:52.949183: step: 218/464, loss: 0.05108063295483589 2023-01-22 14:27:53.608163: step: 220/464, loss: 0.15355080366134644 2023-01-22 14:27:54.364170: step: 222/464, loss: 0.00015312989125959575 2023-01-22 14:27:54.975063: step: 224/464, loss: 0.0011173977982252836 2023-01-22 14:27:55.627032: step: 226/464, loss: 0.06421932578086853 2023-01-22 14:27:56.291282: step: 228/464, loss: 2.7660456908051856e-05 2023-01-22 14:27:56.915641: step: 230/464, loss: 0.0002391198358964175 2023-01-22 14:27:57.585187: step: 232/464, loss: 0.0005655947607010603 2023-01-22 14:27:58.213249: step: 234/464, loss: 0.01645440049469471 2023-01-22 14:27:58.774512: step: 236/464, loss: 0.005959731992334127 2023-01-22 14:27:59.418609: step: 238/464, loss: 0.008238804526627064 2023-01-22 14:27:59.965897: step: 240/464, loss: 0.0012029623612761497 2023-01-22 14:28:00.654492: step: 242/464, loss: 0.000807002536021173 2023-01-22 14:28:01.387654: step: 244/464, loss: 2.4607074010418728e-05 2023-01-22 14:28:01.991710: step: 246/464, loss: 0.003851450514048338 2023-01-22 14:28:02.592839: step: 248/464, loss: 0.01687595620751381 2023-01-22 14:28:03.185097: step: 250/464, loss: 0.0029494829941540956 2023-01-22 14:28:03.796912: step: 252/464, loss: 1.2497486750362441e-05 2023-01-22 14:28:04.370126: step: 254/464, loss: 0.000681983889080584 2023-01-22 14:28:04.981415: step: 256/464, loss: 0.007440278772264719 2023-01-22 14:28:05.620475: step: 258/464, loss: 0.0018620517803356051 2023-01-22 14:28:06.287208: step: 260/464, loss: 0.008712049573659897 2023-01-22 14:28:06.887015: step: 262/464, loss: 0.0009259435464628041 2023-01-22 14:28:07.605390: step: 264/464, loss: 0.010966053232550621 2023-01-22 14:28:08.213961: step: 266/464, loss: 2.162556666007731e-05 2023-01-22 14:28:08.939000: step: 268/464, loss: 0.07548662275075912 2023-01-22 14:28:09.514920: step: 270/464, loss: 0.0036649368703365326 2023-01-22 14:28:10.145194: step: 272/464, loss: 0.09332875162363052 2023-01-22 14:28:10.838191: step: 274/464, loss: 0.0022640973329544067 2023-01-22 14:28:11.439545: step: 276/464, loss: 0.0028570217546075583 2023-01-22 14:28:12.144087: step: 278/464, loss: 0.029439497739076614 2023-01-22 14:28:12.825929: step: 280/464, loss: 0.00011209556396352127 2023-01-22 14:28:13.515921: step: 282/464, loss: 0.0016224890714511275 2023-01-22 14:28:14.089236: step: 284/464, loss: 0.10801452398300171 2023-01-22 14:28:14.665996: step: 286/464, loss: 0.002695675939321518 2023-01-22 14:28:15.366183: step: 288/464, loss: 0.0012047748314216733 2023-01-22 14:28:15.940425: step: 290/464, loss: 0.0006728891166858375 2023-01-22 14:28:16.564489: step: 292/464, loss: 0.00027445441810414195 2023-01-22 14:28:17.212731: step: 294/464, loss: 0.0254234429448843 2023-01-22 14:28:17.811264: step: 296/464, loss: 0.0009080729796551168 2023-01-22 14:28:18.396286: step: 298/464, loss: 8.996039105113596e-05 2023-01-22 14:28:18.970747: step: 300/464, loss: 0.014653047546744347 2023-01-22 14:28:19.605904: step: 302/464, loss: 0.12983016669750214 2023-01-22 14:28:20.211013: step: 304/464, loss: 0.03050478920340538 2023-01-22 14:28:20.857684: step: 306/464, loss: 0.009392405860126019 2023-01-22 14:28:21.498072: step: 308/464, loss: 0.0025869968812912703 2023-01-22 14:28:22.010245: step: 310/464, loss: 0.000151861910126172 2023-01-22 14:28:22.654635: step: 312/464, loss: 0.0013560017105191946 2023-01-22 14:28:23.317040: step: 314/464, loss: 0.0028233586344867945 2023-01-22 14:28:23.920373: step: 316/464, loss: 0.020580098032951355 2023-01-22 14:28:24.585394: step: 318/464, loss: 1.2516134977340698 2023-01-22 14:28:25.250689: step: 320/464, loss: 0.16352632641792297 2023-01-22 14:28:25.826622: step: 322/464, loss: 0.001475161057896912 2023-01-22 14:28:26.463373: step: 324/464, loss: 0.0037892444524914026 2023-01-22 14:28:27.103484: step: 326/464, loss: 0.014337840490043163 2023-01-22 14:28:27.705227: step: 328/464, loss: 0.00012592771963682026 2023-01-22 14:28:28.302385: step: 330/464, loss: 0.010473833419382572 2023-01-22 14:28:28.866350: step: 332/464, loss: 0.05411313846707344 2023-01-22 14:28:29.401055: step: 334/464, loss: 0.0004032576980534941 2023-01-22 14:28:29.990517: step: 336/464, loss: 0.37042418122291565 2023-01-22 14:28:30.563143: step: 338/464, loss: 8.61345324665308e-05 2023-01-22 14:28:31.098169: step: 340/464, loss: 0.0011708943638950586 2023-01-22 14:28:31.795821: step: 342/464, loss: 4.38054895401001 2023-01-22 14:28:32.386599: step: 344/464, loss: 0.004698981065303087 2023-01-22 14:28:33.004875: step: 346/464, loss: 0.00021359566017054021 2023-01-22 14:28:33.673567: step: 348/464, loss: 0.011838965117931366 2023-01-22 14:28:34.249052: step: 350/464, loss: 0.02740328386425972 2023-01-22 14:28:34.867073: step: 352/464, loss: 0.006860440131276846 2023-01-22 14:28:35.530119: step: 354/464, loss: 0.24267810583114624 2023-01-22 14:28:36.152879: step: 356/464, loss: 1.5543577319476753e-05 2023-01-22 14:28:36.762152: step: 358/464, loss: 0.0018179682083427906 2023-01-22 14:28:37.423225: step: 360/464, loss: 0.0008576444233767688 2023-01-22 14:28:38.143611: step: 362/464, loss: 0.10859065502882004 2023-01-22 14:28:38.770041: step: 364/464, loss: 0.018232515081763268 2023-01-22 14:28:39.466890: step: 366/464, loss: 0.018040932714939117 2023-01-22 14:28:40.144022: step: 368/464, loss: 0.020858148112893105 2023-01-22 14:28:40.742322: step: 370/464, loss: 0.007685009855777025 2023-01-22 14:28:41.390381: step: 372/464, loss: 0.02899814024567604 2023-01-22 14:28:42.064465: step: 374/464, loss: 0.021727699786424637 2023-01-22 14:28:42.624933: step: 376/464, loss: 0.06514622271060944 2023-01-22 14:28:43.243427: step: 378/464, loss: 5.6364660849794745e-06 2023-01-22 14:28:43.829734: step: 380/464, loss: 0.0019106330582872033 2023-01-22 14:28:44.445542: step: 382/464, loss: 0.0014959658728912473 2023-01-22 14:28:44.988983: step: 384/464, loss: 0.001561407814733684 2023-01-22 14:28:45.593862: step: 386/464, loss: 0.00809608306735754 2023-01-22 14:28:46.129231: step: 388/464, loss: 0.00039438524981960654 2023-01-22 14:28:46.820761: step: 390/464, loss: 0.013873212039470673 2023-01-22 14:28:47.430933: step: 392/464, loss: 0.002111123176291585 2023-01-22 14:28:48.053202: step: 394/464, loss: 0.15582974255084991 2023-01-22 14:28:48.631589: step: 396/464, loss: 0.03531589359045029 2023-01-22 14:28:49.283821: step: 398/464, loss: 0.09494128823280334 2023-01-22 14:28:49.935360: step: 400/464, loss: 0.028393391519784927 2023-01-22 14:28:50.502448: step: 402/464, loss: 0.002526383614167571 2023-01-22 14:28:51.145979: step: 404/464, loss: 0.006093881092965603 2023-01-22 14:28:51.780239: step: 406/464, loss: 0.02048211172223091 2023-01-22 14:28:52.357490: step: 408/464, loss: 0.0005945987650193274 2023-01-22 14:28:52.978633: step: 410/464, loss: 1.6490535870161693e-07 2023-01-22 14:28:53.643458: step: 412/464, loss: 0.01898992620408535 2023-01-22 14:28:54.234616: step: 414/464, loss: 0.030549850314855576 2023-01-22 14:28:54.852773: step: 416/464, loss: 0.0011503227287903428 2023-01-22 14:28:55.507709: step: 418/464, loss: 0.06716148555278778 2023-01-22 14:28:56.180113: step: 420/464, loss: 0.002034626202657819 2023-01-22 14:28:56.824403: step: 422/464, loss: 0.006199575029313564 2023-01-22 14:28:57.433832: step: 424/464, loss: 0.006802136544138193 2023-01-22 14:28:58.158525: step: 426/464, loss: 0.04740991070866585 2023-01-22 14:28:58.762189: step: 428/464, loss: 0.02002396434545517 2023-01-22 14:28:59.316017: step: 430/464, loss: 0.009074503555893898 2023-01-22 14:28:59.942328: step: 432/464, loss: 0.026804577559232712 2023-01-22 14:29:00.530106: step: 434/464, loss: 0.002207412151619792 2023-01-22 14:29:01.088636: step: 436/464, loss: 0.000302294734865427 2023-01-22 14:29:01.724601: step: 438/464, loss: 0.0211816243827343 2023-01-22 14:29:02.394121: step: 440/464, loss: 0.006016803439706564 2023-01-22 14:29:03.073916: step: 442/464, loss: 0.0037967958487570286 2023-01-22 14:29:03.734939: step: 444/464, loss: 0.003919376991689205 2023-01-22 14:29:04.307108: step: 446/464, loss: 0.0012541390024125576 2023-01-22 14:29:04.935680: step: 448/464, loss: 0.007256507407873869 2023-01-22 14:29:05.565864: step: 450/464, loss: 0.01981954276561737 2023-01-22 14:29:06.272296: step: 452/464, loss: 0.005772142205387354 2023-01-22 14:29:06.884564: step: 454/464, loss: 0.0013192944461479783 2023-01-22 14:29:07.523062: step: 456/464, loss: 0.03334563598036766 2023-01-22 14:29:08.102409: step: 458/464, loss: 0.0009300485835410655 2023-01-22 14:29:08.643071: step: 460/464, loss: 8.524296572431922e-05 2023-01-22 14:29:09.328713: step: 462/464, loss: 0.0048855082131922245 2023-01-22 14:29:09.965877: step: 464/464, loss: 0.00821410957723856 2023-01-22 14:29:10.612674: step: 466/464, loss: 0.018185364082455635 2023-01-22 14:29:11.210182: step: 468/464, loss: 0.008915431797504425 2023-01-22 14:29:11.831660: step: 470/464, loss: 0.3980819284915924 2023-01-22 14:29:12.497209: step: 472/464, loss: 0.015454445965588093 2023-01-22 14:29:13.244305: step: 474/464, loss: 0.0009482467430643737 2023-01-22 14:29:13.809110: step: 476/464, loss: 0.0003818488912656903 2023-01-22 14:29:14.387632: step: 478/464, loss: 0.01932300068438053 2023-01-22 14:29:14.907961: step: 480/464, loss: 0.02119056135416031 2023-01-22 14:29:15.501065: step: 482/464, loss: 0.010176424868404865 2023-01-22 14:29:16.155591: step: 484/464, loss: 0.002057582139968872 2023-01-22 14:29:16.776509: step: 486/464, loss: 0.038074057549238205 2023-01-22 14:29:17.484480: step: 488/464, loss: 0.0008602479356341064 2023-01-22 14:29:18.090918: step: 490/464, loss: 0.0010032361606135964 2023-01-22 14:29:18.675541: step: 492/464, loss: 0.02331576868891716 2023-01-22 14:29:19.275550: step: 494/464, loss: 0.0032299442682415247 2023-01-22 14:29:19.845061: step: 496/464, loss: 0.007703948765993118 2023-01-22 14:29:20.523059: step: 498/464, loss: 0.07672279328107834 2023-01-22 14:29:21.205954: step: 500/464, loss: 0.013739488087594509 2023-01-22 14:29:21.965838: step: 502/464, loss: 0.004015688318759203 2023-01-22 14:29:22.645721: step: 504/464, loss: 0.0002704980142880231 2023-01-22 14:29:23.193614: step: 506/464, loss: 0.04848206043243408 2023-01-22 14:29:23.826355: step: 508/464, loss: 0.022236688062548637 2023-01-22 14:29:24.480772: step: 510/464, loss: 0.042567480355501175 2023-01-22 14:29:25.129446: step: 512/464, loss: 4.5881301957706455e-06 2023-01-22 14:29:25.736408: step: 514/464, loss: 0.00047643258585594594 2023-01-22 14:29:26.367759: step: 516/464, loss: 0.0048703039065003395 2023-01-22 14:29:27.018524: step: 518/464, loss: 0.03520174324512482 2023-01-22 14:29:27.609501: step: 520/464, loss: 0.000222586008021608 2023-01-22 14:29:28.270217: step: 522/464, loss: 0.006125647574663162 2023-01-22 14:29:28.929390: step: 524/464, loss: 0.009394499473273754 2023-01-22 14:29:29.513849: step: 526/464, loss: 0.0008229393279179931 2023-01-22 14:29:30.167198: step: 528/464, loss: 0.007002570666372776 2023-01-22 14:29:30.724815: step: 530/464, loss: 0.005204002372920513 2023-01-22 14:29:31.341097: step: 532/464, loss: 0.004415723029524088 2023-01-22 14:29:31.966169: step: 534/464, loss: 0.007514380384236574 2023-01-22 14:29:32.568381: step: 536/464, loss: 0.0004743316094391048 2023-01-22 14:29:33.156949: step: 538/464, loss: 0.002290072152391076 2023-01-22 14:29:33.821329: step: 540/464, loss: 0.005570805165916681 2023-01-22 14:29:34.427010: step: 542/464, loss: 5.998162305331789e-05 2023-01-22 14:29:35.028995: step: 544/464, loss: 0.0 2023-01-22 14:29:35.730694: step: 546/464, loss: 0.00013383693294599652 2023-01-22 14:29:36.356960: step: 548/464, loss: 0.023291975259780884 2023-01-22 14:29:36.985745: step: 550/464, loss: 0.00010557601490290835 2023-01-22 14:29:37.514528: step: 552/464, loss: 0.001020438619889319 2023-01-22 14:29:38.179470: step: 554/464, loss: 0.002947790315374732 2023-01-22 14:29:38.800542: step: 556/464, loss: 0.0017539083492010832 2023-01-22 14:29:39.425597: step: 558/464, loss: 0.0005117803229950368 2023-01-22 14:29:40.072592: step: 560/464, loss: 0.00025536149041727185 2023-01-22 14:29:40.681240: step: 562/464, loss: 0.24654802680015564 2023-01-22 14:29:41.438535: step: 564/464, loss: 0.0009103059419430792 2023-01-22 14:29:42.133124: step: 566/464, loss: 0.004263360984623432 2023-01-22 14:29:42.790755: step: 568/464, loss: 0.0005020697717554867 2023-01-22 14:29:43.443472: step: 570/464, loss: 0.03391928970813751 2023-01-22 14:29:44.035908: step: 572/464, loss: 0.0711340457201004 2023-01-22 14:29:44.664493: step: 574/464, loss: 0.00024413218488916755 2023-01-22 14:29:45.268670: step: 576/464, loss: 0.0878123864531517 2023-01-22 14:29:45.831841: step: 578/464, loss: 0.0047101471573114395 2023-01-22 14:29:46.546173: step: 580/464, loss: 0.001482927706092596 2023-01-22 14:29:47.227610: step: 582/464, loss: 0.0059550609439611435 2023-01-22 14:29:47.821503: step: 584/464, loss: 0.006339132785797119 2023-01-22 14:29:48.488794: step: 586/464, loss: 0.007386078126728535 2023-01-22 14:29:49.087306: step: 588/464, loss: 0.008458703756332397 2023-01-22 14:29:49.742548: step: 590/464, loss: 0.005758496467024088 2023-01-22 14:29:50.362535: step: 592/464, loss: 0.0013132354943081737 2023-01-22 14:29:50.993327: step: 594/464, loss: 0.0005673606647178531 2023-01-22 14:29:51.690835: step: 596/464, loss: 0.018045693635940552 2023-01-22 14:29:52.305164: step: 598/464, loss: 0.4945283830165863 2023-01-22 14:29:52.966032: step: 600/464, loss: 0.0024164745118469 2023-01-22 14:29:53.623833: step: 602/464, loss: 0.004685352556407452 2023-01-22 14:29:54.237634: step: 604/464, loss: 0.004564021248370409 2023-01-22 14:29:54.919609: step: 606/464, loss: 0.050456490367650986 2023-01-22 14:29:55.593649: step: 608/464, loss: 0.04056352376937866 2023-01-22 14:29:56.242134: step: 610/464, loss: 0.0005739732296206057 2023-01-22 14:29:56.860354: step: 612/464, loss: 0.007927405647933483 2023-01-22 14:29:57.484281: step: 614/464, loss: 0.0051388186402618885 2023-01-22 14:29:58.116663: step: 616/464, loss: 0.0002850943128578365 2023-01-22 14:29:58.683031: step: 618/464, loss: 0.005885418504476547 2023-01-22 14:29:59.280504: step: 620/464, loss: 0.005376122426241636 2023-01-22 14:29:59.850138: step: 622/464, loss: 0.00022895917936693877 2023-01-22 14:30:00.435457: step: 624/464, loss: 1.3114871978759766 2023-01-22 14:30:01.108905: step: 626/464, loss: 0.0682322308421135 2023-01-22 14:30:01.728735: step: 628/464, loss: 2.1218525944277644e-05 2023-01-22 14:30:02.303472: step: 630/464, loss: 0.0011831369483843446 2023-01-22 14:30:02.857790: step: 632/464, loss: 0.0001217541066580452 2023-01-22 14:30:03.459760: step: 634/464, loss: 0.000267831957899034 2023-01-22 14:30:04.096991: step: 636/464, loss: 0.01607227884232998 2023-01-22 14:30:04.691929: step: 638/464, loss: 0.01205810159444809 2023-01-22 14:30:05.261561: step: 640/464, loss: 0.009893765673041344 2023-01-22 14:30:05.867342: step: 642/464, loss: 0.002671779366210103 2023-01-22 14:30:06.476679: step: 644/464, loss: 0.00041898165363818407 2023-01-22 14:30:07.070329: step: 646/464, loss: 0.00014477742661256343 2023-01-22 14:30:07.602665: step: 648/464, loss: 0.008365098387002945 2023-01-22 14:30:08.245973: step: 650/464, loss: 0.026274938136339188 2023-01-22 14:30:08.833263: step: 652/464, loss: 0.00337810511700809 2023-01-22 14:30:09.429613: step: 654/464, loss: 0.006393271032720804 2023-01-22 14:30:09.996896: step: 656/464, loss: 0.00383376725949347 2023-01-22 14:30:10.655299: step: 658/464, loss: 0.06319655478000641 2023-01-22 14:30:11.231975: step: 660/464, loss: 0.0021081226877868176 2023-01-22 14:30:11.882686: step: 662/464, loss: 0.006011603865772486 2023-01-22 14:30:12.500866: step: 664/464, loss: 0.023292817175388336 2023-01-22 14:30:13.316348: step: 666/464, loss: 0.0008032124023884535 2023-01-22 14:30:13.971846: step: 668/464, loss: 0.0015976447612047195 2023-01-22 14:30:14.615610: step: 670/464, loss: 0.006079916842281818 2023-01-22 14:30:15.244867: step: 672/464, loss: 0.0005906568258069456 2023-01-22 14:30:15.858768: step: 674/464, loss: 0.037261757999658585 2023-01-22 14:30:16.533413: step: 676/464, loss: 0.00014715935685671866 2023-01-22 14:30:17.235169: step: 678/464, loss: 0.04640545696020126 2023-01-22 14:30:17.821945: step: 680/464, loss: 0.00031977047910913825 2023-01-22 14:30:18.368776: step: 682/464, loss: 0.004776813089847565 2023-01-22 14:30:18.955000: step: 684/464, loss: 0.00037920771865174174 2023-01-22 14:30:19.573052: step: 686/464, loss: 0.016807299107313156 2023-01-22 14:30:20.183394: step: 688/464, loss: 0.0012932555982843041 2023-01-22 14:30:20.722003: step: 690/464, loss: 0.014235904440283775 2023-01-22 14:30:21.380342: step: 692/464, loss: 0.0753798708319664 2023-01-22 14:30:22.070343: step: 694/464, loss: 0.012994196265935898 2023-01-22 14:30:22.677064: step: 696/464, loss: 0.00023477750073652714 2023-01-22 14:30:23.287258: step: 698/464, loss: 0.012356506660580635 2023-01-22 14:30:23.884521: step: 700/464, loss: 0.00019970822904724628 2023-01-22 14:30:24.437624: step: 702/464, loss: 0.03570036590099335 2023-01-22 14:30:24.989721: step: 704/464, loss: 0.1842038929462433 2023-01-22 14:30:25.638655: step: 706/464, loss: 0.0002647065557539463 2023-01-22 14:30:26.250245: step: 708/464, loss: 1.2724791765213013 2023-01-22 14:30:26.948141: step: 710/464, loss: 0.0004814733983948827 2023-01-22 14:30:27.532746: step: 712/464, loss: 1.2289744972804328e-06 2023-01-22 14:30:28.145396: step: 714/464, loss: 0.007565478794276714 2023-01-22 14:30:28.686279: step: 716/464, loss: 0.007476923055946827 2023-01-22 14:30:29.269910: step: 718/464, loss: 0.0003839374694507569 2023-01-22 14:30:29.991425: step: 720/464, loss: 0.0027928303461521864 2023-01-22 14:30:30.668739: step: 722/464, loss: 0.004604507237672806 2023-01-22 14:30:31.259504: step: 724/464, loss: 0.003077725414186716 2023-01-22 14:30:31.861709: step: 726/464, loss: 0.0004951234441250563 2023-01-22 14:30:32.460583: step: 728/464, loss: 0.026010407134890556 2023-01-22 14:30:33.054021: step: 730/464, loss: 0.0009832883952185512 2023-01-22 14:30:33.716851: step: 732/464, loss: 0.0680810809135437 2023-01-22 14:30:34.362127: step: 734/464, loss: 0.08909051865339279 2023-01-22 14:30:34.969564: step: 736/464, loss: 0.0001485932880314067 2023-01-22 14:30:35.578608: step: 738/464, loss: 0.16136470437049866 2023-01-22 14:30:36.186296: step: 740/464, loss: 0.0013271862408146262 2023-01-22 14:30:36.819488: step: 742/464, loss: 0.0001475284807384014 2023-01-22 14:30:37.451850: step: 744/464, loss: 0.004155992995947599 2023-01-22 14:30:38.081136: step: 746/464, loss: 0.0003829763736575842 2023-01-22 14:30:38.652006: step: 748/464, loss: 0.006544989533722401 2023-01-22 14:30:39.258392: step: 750/464, loss: 0.0001559268857818097 2023-01-22 14:30:39.833794: step: 752/464, loss: 0.00019917615281883627 2023-01-22 14:30:40.456429: step: 754/464, loss: 0.0016130805015563965 2023-01-22 14:30:41.053095: step: 756/464, loss: 0.0039898729883134365 2023-01-22 14:30:41.791397: step: 758/464, loss: 0.04093284532427788 2023-01-22 14:30:42.432256: step: 760/464, loss: 0.001979441847652197 2023-01-22 14:30:43.037601: step: 762/464, loss: 0.3502618968486786 2023-01-22 14:30:43.662007: step: 764/464, loss: 0.0003438458370510489 2023-01-22 14:30:44.336033: step: 766/464, loss: 0.0072568198665976524 2023-01-22 14:30:44.893200: step: 768/464, loss: 0.017397085204720497 2023-01-22 14:30:45.481933: step: 770/464, loss: 0.03964915871620178 2023-01-22 14:30:46.073951: step: 772/464, loss: 0.0007863616337999701 2023-01-22 14:30:46.685692: step: 774/464, loss: 0.0190030075609684 2023-01-22 14:30:47.333852: step: 776/464, loss: 0.004721463192254305 2023-01-22 14:30:47.959059: step: 778/464, loss: 7.103903772076592e-05 2023-01-22 14:30:48.581011: step: 780/464, loss: 0.0064157661981880665 2023-01-22 14:30:49.232998: step: 782/464, loss: 0.0024287928827106953 2023-01-22 14:30:49.850630: step: 784/464, loss: 0.011441824026405811 2023-01-22 14:30:50.473123: step: 786/464, loss: 0.013380954042077065 2023-01-22 14:30:51.054906: step: 788/464, loss: 0.0010275207459926605 2023-01-22 14:30:51.710222: step: 790/464, loss: 0.0008638154831714928 2023-01-22 14:30:52.281765: step: 792/464, loss: 0.0020451450254768133 2023-01-22 14:30:52.903982: step: 794/464, loss: 0.00572402635589242 2023-01-22 14:30:53.552150: step: 796/464, loss: 0.006697699893265963 2023-01-22 14:30:54.115853: step: 798/464, loss: 0.003393400926142931 2023-01-22 14:30:54.722898: step: 800/464, loss: 0.004453219939023256 2023-01-22 14:30:55.343422: step: 802/464, loss: 0.02158048003911972 2023-01-22 14:30:55.896629: step: 804/464, loss: 0.0020164845045655966 2023-01-22 14:30:56.546012: step: 806/464, loss: 0.02241668850183487 2023-01-22 14:30:57.202833: step: 808/464, loss: 0.06384516507387161 2023-01-22 14:30:57.858243: step: 810/464, loss: 0.011012006551027298 2023-01-22 14:30:58.464567: step: 812/464, loss: 0.0005875803180970252 2023-01-22 14:30:59.133246: step: 814/464, loss: 0.00029726020875386894 2023-01-22 14:30:59.778309: step: 816/464, loss: 0.02253718674182892 2023-01-22 14:31:00.374660: step: 818/464, loss: 0.002219903515651822 2023-01-22 14:31:00.989487: step: 820/464, loss: 0.000503991381265223 2023-01-22 14:31:01.571744: step: 822/464, loss: 0.0007541986415162683 2023-01-22 14:31:02.198046: step: 824/464, loss: 0.003817042801529169 2023-01-22 14:31:02.896072: step: 826/464, loss: 2.5243652999051847e-05 2023-01-22 14:31:03.528943: step: 828/464, loss: 0.028099194169044495 2023-01-22 14:31:04.106600: step: 830/464, loss: 0.009699639864265919 2023-01-22 14:31:04.741476: step: 832/464, loss: 0.025268562138080597 2023-01-22 14:31:05.381928: step: 834/464, loss: 0.013387763872742653 2023-01-22 14:31:05.972317: step: 836/464, loss: 0.00692420918494463 2023-01-22 14:31:06.567210: step: 838/464, loss: 0.00038922022213228047 2023-01-22 14:31:07.169442: step: 840/464, loss: 0.0006864046445116401 2023-01-22 14:31:07.785756: step: 842/464, loss: 0.013146799989044666 2023-01-22 14:31:08.366731: step: 844/464, loss: 4.239610916556558e-06 2023-01-22 14:31:08.955589: step: 846/464, loss: 0.0018868102924898267 2023-01-22 14:31:09.557925: step: 848/464, loss: 0.0007481158245354891 2023-01-22 14:31:10.156208: step: 850/464, loss: 0.0018886085599660873 2023-01-22 14:31:10.783912: step: 852/464, loss: 0.005018434952944517 2023-01-22 14:31:11.486397: step: 854/464, loss: 0.05035892874002457 2023-01-22 14:31:12.179053: step: 856/464, loss: 0.03840731829404831 2023-01-22 14:31:12.750331: step: 858/464, loss: 0.003952166996896267 2023-01-22 14:31:13.391348: step: 860/464, loss: 0.007025779690593481 2023-01-22 14:31:13.993164: step: 862/464, loss: 0.007006064988672733 2023-01-22 14:31:14.580324: step: 864/464, loss: 4.2151474190177396e-05 2023-01-22 14:31:15.260230: step: 866/464, loss: 0.012056934647262096 2023-01-22 14:31:15.868692: step: 868/464, loss: 0.2325555384159088 2023-01-22 14:31:16.453277: step: 870/464, loss: 3.44830584526062 2023-01-22 14:31:17.048189: step: 872/464, loss: 0.02701178938150406 2023-01-22 14:31:17.617360: step: 874/464, loss: 0.002139961114153266 2023-01-22 14:31:18.233253: step: 876/464, loss: 0.0028142149094492197 2023-01-22 14:31:18.864506: step: 878/464, loss: 0.005788634996861219 2023-01-22 14:31:19.470415: step: 880/464, loss: 4.943248495692387e-05 2023-01-22 14:31:20.059301: step: 882/464, loss: 0.0016170486342161894 2023-01-22 14:31:20.627833: step: 884/464, loss: 0.0027413603384047747 2023-01-22 14:31:21.194395: step: 886/464, loss: 0.003258903743699193 2023-01-22 14:31:21.832520: step: 888/464, loss: 2.534052327973768e-06 2023-01-22 14:31:22.487231: step: 890/464, loss: 0.000867818424012512 2023-01-22 14:31:23.110286: step: 892/464, loss: 0.0012389046605676413 2023-01-22 14:31:23.758205: step: 894/464, loss: 8.449164306512102e-05 2023-01-22 14:31:24.434024: step: 896/464, loss: 0.012688858434557915 2023-01-22 14:31:25.032230: step: 898/464, loss: 7.011165871517733e-05 2023-01-22 14:31:25.629087: step: 900/464, loss: 0.0015451086219400167 2023-01-22 14:31:26.251635: step: 902/464, loss: 0.0895097628235817 2023-01-22 14:31:26.900454: step: 904/464, loss: 0.02035101316869259 2023-01-22 14:31:27.501116: step: 906/464, loss: 0.019604161381721497 2023-01-22 14:31:28.076972: step: 908/464, loss: 0.0012368483003228903 2023-01-22 14:31:28.790699: step: 910/464, loss: 0.015060057863593102 2023-01-22 14:31:29.462213: step: 912/464, loss: 0.02042451687157154 2023-01-22 14:31:30.170235: step: 914/464, loss: 0.002836141036823392 2023-01-22 14:31:30.785724: step: 916/464, loss: 0.0005727356183342636 2023-01-22 14:31:31.410382: step: 918/464, loss: 0.014801361598074436 2023-01-22 14:31:32.004578: step: 920/464, loss: 0.00039233363349922 2023-01-22 14:31:32.640363: step: 922/464, loss: 0.008480165153741837 2023-01-22 14:31:33.252667: step: 924/464, loss: 0.003106620628386736 2023-01-22 14:31:33.837343: step: 926/464, loss: 0.01438205223530531 2023-01-22 14:31:34.476387: step: 928/464, loss: 0.019052069634199142 2023-01-22 14:31:34.965910: step: 930/464, loss: 0.0014227000065147877 ================================================== Loss: 0.050 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30073588709677423, 'r': 0.35380692599620495, 'f1': 0.32511987794245867}, 'combined': 0.23956201532602217, 'epoch': 38} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2848412773285725, 'r': 0.3248602997631653, 'f1': 0.3035374229447337}, 'combined': 0.1981643279328313, 'epoch': 38} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29180652006172836, 'r': 0.3588057400379506, 'f1': 0.3218563829787234}, 'combined': 0.23715733482642773, 'epoch': 38} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3049375037092924, 'r': 0.33240430091840145, 'f1': 0.3180790496035363}, 'combined': 0.2076578251297698, 'epoch': 38} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.303808225632741, 'r': 0.3568449557242252, 'f1': 0.32819771669575337}, 'combined': 0.24182989651266038, 'epoch': 38} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29588591907533457, 'r': 0.31709832871492655, 'f1': 0.30612509462452536}, 'combined': 0.19985368871860204, 'epoch': 38} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20442708333333331, 'r': 0.37380952380952376, 'f1': 0.26430976430976433}, 'combined': 0.17620650953984288, 'epoch': 38} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.24444444444444444, 'r': 0.4782608695652174, 'f1': 0.32352941176470584}, 'combined': 0.16176470588235292, 'epoch': 38} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.3017241379310345, 'f1': 0.35}, 'combined': 0.2333333333333333, 'epoch': 38} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951249161636486, 'r': 0.3578459609650312, 'f1': 0.3234731070815977}, 'combined': 0.23834860521801934, 'epoch': 22} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29521273154881944, 'r': 0.3185261336729686, 'f1': 0.30642664272956077}, 'combined': 0.20005055432085314, 'epoch': 22} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 22} ****************************** Epoch: 39 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:34:14.008328: step: 2/464, loss: 0.005334419198334217 2023-01-22 14:34:14.599885: step: 4/464, loss: 7.04170306562446e-05 2023-01-22 14:34:15.240679: step: 6/464, loss: 0.06576389819383621 2023-01-22 14:34:15.878219: step: 8/464, loss: 0.0003311616601422429 2023-01-22 14:34:16.479307: step: 10/464, loss: 0.0016958436463028193 2023-01-22 14:34:17.049265: step: 12/464, loss: 0.005034781061112881 2023-01-22 14:34:17.708944: step: 14/464, loss: 0.8912667036056519 2023-01-22 14:34:18.378254: step: 16/464, loss: 0.004827701486647129 2023-01-22 14:34:18.998075: step: 18/464, loss: 0.0013554502511397004 2023-01-22 14:34:19.616438: step: 20/464, loss: 0.0004365647619124502 2023-01-22 14:34:20.223442: step: 22/464, loss: 0.001043907948769629 2023-01-22 14:34:20.859789: step: 24/464, loss: 0.0014231398236006498 2023-01-22 14:34:21.475536: step: 26/464, loss: 0.0029816306196153164 2023-01-22 14:34:22.106860: step: 28/464, loss: 0.003940364811569452 2023-01-22 14:34:22.728974: step: 30/464, loss: 0.0007442350615747273 2023-01-22 14:34:23.312841: step: 32/464, loss: 0.0001322044263361022 2023-01-22 14:34:24.007462: step: 34/464, loss: 0.0019910086411982775 2023-01-22 14:34:24.672358: step: 36/464, loss: 0.010590208694338799 2023-01-22 14:34:25.302132: step: 38/464, loss: 0.0035319929011166096 2023-01-22 14:34:25.954118: step: 40/464, loss: 0.007291679736226797 2023-01-22 14:34:26.562580: step: 42/464, loss: 0.0005593971582129598 2023-01-22 14:34:27.155151: step: 44/464, loss: 0.0059903026558458805 2023-01-22 14:34:27.757292: step: 46/464, loss: 0.00047973901382647455 2023-01-22 14:34:28.324102: step: 48/464, loss: 0.002392848953604698 2023-01-22 14:34:28.916736: step: 50/464, loss: 0.0029812839347869158 2023-01-22 14:34:29.537840: step: 52/464, loss: 0.016058560460805893 2023-01-22 14:34:30.215201: step: 54/464, loss: 0.00029119051760062575 2023-01-22 14:34:30.883189: step: 56/464, loss: 0.004253122489899397 2023-01-22 14:34:31.685286: step: 58/464, loss: 0.016664346680045128 2023-01-22 14:34:32.327629: step: 60/464, loss: 0.006586653180420399 2023-01-22 14:34:32.970924: step: 62/464, loss: 0.0007175652426667511 2023-01-22 14:34:33.593511: step: 64/464, loss: 0.018905365839600563 2023-01-22 14:34:34.208720: step: 66/464, loss: 1.7158316040877253e-05 2023-01-22 14:34:34.822585: step: 68/464, loss: 0.0022365667391568422 2023-01-22 14:34:35.465717: step: 70/464, loss: 0.0004463378863874823 2023-01-22 14:34:36.080173: step: 72/464, loss: 0.0002532684011384845 2023-01-22 14:34:36.763923: step: 74/464, loss: 0.012067864648997784 2023-01-22 14:34:37.385053: step: 76/464, loss: 0.02911846898496151 2023-01-22 14:34:38.022791: step: 78/464, loss: 0.008488481864333153 2023-01-22 14:34:38.622442: step: 80/464, loss: 0.06633666157722473 2023-01-22 14:34:39.230810: step: 82/464, loss: 0.0010267571778967977 2023-01-22 14:34:39.832736: step: 84/464, loss: 0.12181129306554794 2023-01-22 14:34:40.463634: step: 86/464, loss: 0.0045325239188969135 2023-01-22 14:34:41.131071: step: 88/464, loss: 0.0010455892188474536 2023-01-22 14:34:41.742765: step: 90/464, loss: 0.0067656678147614 2023-01-22 14:34:42.412314: step: 92/464, loss: 0.20134253799915314 2023-01-22 14:34:42.970962: step: 94/464, loss: 0.06584080308675766 2023-01-22 14:34:43.578066: step: 96/464, loss: 0.00019669999892357737 2023-01-22 14:34:44.140422: step: 98/464, loss: 0.022210508584976196 2023-01-22 14:34:44.771722: step: 100/464, loss: 0.05593627318739891 2023-01-22 14:34:45.412520: step: 102/464, loss: 0.04347601160407066 2023-01-22 14:34:46.094298: step: 104/464, loss: 0.0016960520297288895 2023-01-22 14:34:46.713365: step: 106/464, loss: 0.005098738707602024 2023-01-22 14:34:47.335137: step: 108/464, loss: 0.013563141226768494 2023-01-22 14:34:47.920546: step: 110/464, loss: 0.00041637677350081503 2023-01-22 14:34:48.510769: step: 112/464, loss: 0.00041668335325084627 2023-01-22 14:34:49.160243: step: 114/464, loss: 0.00100265396758914 2023-01-22 14:34:49.758429: step: 116/464, loss: 0.0170939639210701 2023-01-22 14:34:50.364741: step: 118/464, loss: 8.722050552023575e-06 2023-01-22 14:34:50.976804: step: 120/464, loss: 0.0005133725935593247 2023-01-22 14:34:51.592725: step: 122/464, loss: 0.00265505351126194 2023-01-22 14:34:52.199334: step: 124/464, loss: 0.006810938473790884 2023-01-22 14:34:52.897946: step: 126/464, loss: 0.011815540492534637 2023-01-22 14:34:53.555098: step: 128/464, loss: 0.00028624487458728254 2023-01-22 14:34:54.104282: step: 130/464, loss: 0.0007571761962026358 2023-01-22 14:34:54.736143: step: 132/464, loss: 5.493920616572723e-05 2023-01-22 14:34:55.399150: step: 134/464, loss: 0.08115773648023605 2023-01-22 14:34:55.969778: step: 136/464, loss: 0.003416879801079631 2023-01-22 14:34:56.582668: step: 138/464, loss: 0.1467238962650299 2023-01-22 14:34:57.261874: step: 140/464, loss: 0.13560757040977478 2023-01-22 14:34:57.884672: step: 142/464, loss: 9.385305929754395e-06 2023-01-22 14:34:58.526476: step: 144/464, loss: 0.006654942408204079 2023-01-22 14:34:59.190203: step: 146/464, loss: 0.06987083703279495 2023-01-22 14:34:59.895823: step: 148/464, loss: 0.015678076073527336 2023-01-22 14:35:00.486067: step: 150/464, loss: 0.007049160078167915 2023-01-22 14:35:01.099570: step: 152/464, loss: 0.002859183819964528 2023-01-22 14:35:01.679264: step: 154/464, loss: 0.0075204456225037575 2023-01-22 14:35:02.293358: step: 156/464, loss: 0.004340674262493849 2023-01-22 14:35:02.857745: step: 158/464, loss: 0.01086458284407854 2023-01-22 14:35:03.422279: step: 160/464, loss: 0.00022155193437356502 2023-01-22 14:35:04.006403: step: 162/464, loss: 0.01219822559505701 2023-01-22 14:35:04.639109: step: 164/464, loss: 0.007934968918561935 2023-01-22 14:35:05.284106: step: 166/464, loss: 0.010010930709540844 2023-01-22 14:35:05.851938: step: 168/464, loss: 0.00616831099614501 2023-01-22 14:35:06.474640: step: 170/464, loss: 0.010001836344599724 2023-01-22 14:35:07.086658: step: 172/464, loss: 0.0013399292947724462 2023-01-22 14:35:07.708946: step: 174/464, loss: 0.03098447248339653 2023-01-22 14:35:08.335471: step: 176/464, loss: 0.03986944630742073 2023-01-22 14:35:08.995750: step: 178/464, loss: 0.1782996505498886 2023-01-22 14:35:09.626785: step: 180/464, loss: 0.0041822222992777824 2023-01-22 14:35:10.260119: step: 182/464, loss: 0.1593576818704605 2023-01-22 14:35:10.879971: step: 184/464, loss: 0.014084400609135628 2023-01-22 14:35:11.451435: step: 186/464, loss: 0.008832301013171673 2023-01-22 14:35:12.105065: step: 188/464, loss: 0.01603720150887966 2023-01-22 14:35:12.754734: step: 190/464, loss: 0.0220944806933403 2023-01-22 14:35:13.417697: step: 192/464, loss: 0.003413004567846656 2023-01-22 14:35:14.062962: step: 194/464, loss: 0.0013775130501016974 2023-01-22 14:35:14.658625: step: 196/464, loss: 0.00013820311869494617 2023-01-22 14:35:15.256693: step: 198/464, loss: 0.4320983290672302 2023-01-22 14:35:15.854020: step: 200/464, loss: 0.0289418064057827 2023-01-22 14:35:16.574911: step: 202/464, loss: 0.0008436589851044118 2023-01-22 14:35:17.139547: step: 204/464, loss: 0.00023445415718015283 2023-01-22 14:35:17.782143: step: 206/464, loss: 0.013457918539643288 2023-01-22 14:35:18.418042: step: 208/464, loss: 0.00038137033698149025 2023-01-22 14:35:19.025093: step: 210/464, loss: 0.00128361361566931 2023-01-22 14:35:19.624953: step: 212/464, loss: 0.000436326110502705 2023-01-22 14:35:20.152672: step: 214/464, loss: 0.00017113862850237638 2023-01-22 14:35:20.754699: step: 216/464, loss: 0.013265226036310196 2023-01-22 14:35:21.411129: step: 218/464, loss: 0.0004172701155766845 2023-01-22 14:35:22.063117: step: 220/464, loss: 0.004786666948348284 2023-01-22 14:35:22.631546: step: 222/464, loss: 0.007202350068837404 2023-01-22 14:35:23.303961: step: 224/464, loss: 0.00547541119158268 2023-01-22 14:35:23.980341: step: 226/464, loss: 0.01085236668586731 2023-01-22 14:35:24.580059: step: 228/464, loss: 0.0012195601593703032 2023-01-22 14:35:25.222732: step: 230/464, loss: 0.0015526131028309464 2023-01-22 14:35:25.829878: step: 232/464, loss: 0.0009176727035082877 2023-01-22 14:35:26.460442: step: 234/464, loss: 0.00871170312166214 2023-01-22 14:35:27.099401: step: 236/464, loss: 0.0004042471118737012 2023-01-22 14:35:27.701389: step: 238/464, loss: 0.0050428323447704315 2023-01-22 14:35:28.294835: step: 240/464, loss: 0.0018469096394255757 2023-01-22 14:35:28.910682: step: 242/464, loss: 0.0012668923009186983 2023-01-22 14:35:29.518014: step: 244/464, loss: 0.009330748580396175 2023-01-22 14:35:30.112817: step: 246/464, loss: 0.020866425707936287 2023-01-22 14:35:30.781287: step: 248/464, loss: 0.010160157456994057 2023-01-22 14:35:31.388779: step: 250/464, loss: 0.0020632955711334944 2023-01-22 14:35:31.985974: step: 252/464, loss: 0.04051090404391289 2023-01-22 14:35:32.574940: step: 254/464, loss: 0.02441595308482647 2023-01-22 14:35:33.249642: step: 256/464, loss: 0.00020131834025960416 2023-01-22 14:35:33.860310: step: 258/464, loss: 0.0008216965361498296 2023-01-22 14:35:34.469488: step: 260/464, loss: 0.0014969498151913285 2023-01-22 14:35:35.128644: step: 262/464, loss: 0.015299106016755104 2023-01-22 14:35:35.796348: step: 264/464, loss: 0.008774522691965103 2023-01-22 14:35:36.434579: step: 266/464, loss: 0.006688565015792847 2023-01-22 14:35:37.009352: step: 268/464, loss: 0.008134461008012295 2023-01-22 14:35:37.638807: step: 270/464, loss: 0.0023492120672017336 2023-01-22 14:35:38.276465: step: 272/464, loss: 0.0002234416751889512 2023-01-22 14:35:38.846346: step: 274/464, loss: 0.11775083839893341 2023-01-22 14:35:39.495902: step: 276/464, loss: 0.005091332830488682 2023-01-22 14:35:40.166290: step: 278/464, loss: 0.16419044137001038 2023-01-22 14:35:40.800021: step: 280/464, loss: 0.3668004274368286 2023-01-22 14:35:41.479680: step: 282/464, loss: 0.007253877818584442 2023-01-22 14:35:42.149917: step: 284/464, loss: 0.04187341406941414 2023-01-22 14:35:42.767231: step: 286/464, loss: 0.11548540741205215 2023-01-22 14:35:43.355377: step: 288/464, loss: 0.005352118983864784 2023-01-22 14:35:43.934287: step: 290/464, loss: 0.006731715518981218 2023-01-22 14:35:44.565695: step: 292/464, loss: 0.028945349156856537 2023-01-22 14:35:45.189533: step: 294/464, loss: 0.0008053245837800205 2023-01-22 14:35:45.780747: step: 296/464, loss: 0.00040874775731936097 2023-01-22 14:35:46.433177: step: 298/464, loss: 0.002919417340308428 2023-01-22 14:35:46.953410: step: 300/464, loss: 0.00022127020929474384 2023-01-22 14:35:47.530782: step: 302/464, loss: 0.0016775665571913123 2023-01-22 14:35:48.116487: step: 304/464, loss: 0.007574569899588823 2023-01-22 14:35:48.800704: step: 306/464, loss: 0.002090072724968195 2023-01-22 14:35:49.405066: step: 308/464, loss: 0.03746446967124939 2023-01-22 14:35:50.013526: step: 310/464, loss: 0.020062992349267006 2023-01-22 14:35:50.618172: step: 312/464, loss: 4.8538749979343265e-05 2023-01-22 14:35:51.314779: step: 314/464, loss: 0.0010255994275212288 2023-01-22 14:35:51.986568: step: 316/464, loss: 0.006342700682580471 2023-01-22 14:35:52.620152: step: 318/464, loss: 0.023778468370437622 2023-01-22 14:35:53.275662: step: 320/464, loss: 0.010570963844656944 2023-01-22 14:35:53.960179: step: 322/464, loss: 0.00012406407040543854 2023-01-22 14:35:54.566282: step: 324/464, loss: 0.0011799990897998214 2023-01-22 14:35:55.212826: step: 326/464, loss: 0.23131757974624634 2023-01-22 14:35:55.821830: step: 328/464, loss: 0.0009250523871742189 2023-01-22 14:35:56.475893: step: 330/464, loss: 0.0004726681509055197 2023-01-22 14:35:57.119812: step: 332/464, loss: 0.0024111729580909014 2023-01-22 14:35:57.737154: step: 334/464, loss: 0.00034986893297173083 2023-01-22 14:35:58.312549: step: 336/464, loss: 0.00017758288595359772 2023-01-22 14:35:58.912531: step: 338/464, loss: 0.00551891652867198 2023-01-22 14:35:59.577525: step: 340/464, loss: 0.006714930757880211 2023-01-22 14:36:00.189498: step: 342/464, loss: 0.010364735499024391 2023-01-22 14:36:00.816152: step: 344/464, loss: 0.03158561512827873 2023-01-22 14:36:01.533253: step: 346/464, loss: 0.0007301444420590997 2023-01-22 14:36:02.239946: step: 348/464, loss: 0.0017152574146166444 2023-01-22 14:36:02.838527: step: 350/464, loss: 0.010084620676934719 2023-01-22 14:36:03.490323: step: 352/464, loss: 0.007742516230791807 2023-01-22 14:36:04.120746: step: 354/464, loss: 0.0004419960896484554 2023-01-22 14:36:04.699753: step: 356/464, loss: 1.7414729882148094e-05 2023-01-22 14:36:05.309143: step: 358/464, loss: 0.004977700766175985 2023-01-22 14:36:05.915584: step: 360/464, loss: 0.009078857488930225 2023-01-22 14:36:06.586185: step: 362/464, loss: 0.012264758348464966 2023-01-22 14:36:07.177410: step: 364/464, loss: 0.0050095547921955585 2023-01-22 14:36:07.772026: step: 366/464, loss: 0.011527528055012226 2023-01-22 14:36:08.364763: step: 368/464, loss: 0.001211122376844287 2023-01-22 14:36:08.977186: step: 370/464, loss: 0.005661012604832649 2023-01-22 14:36:09.663863: step: 372/464, loss: 0.02688767947256565 2023-01-22 14:36:10.261763: step: 374/464, loss: 0.00032966237631626427 2023-01-22 14:36:10.888863: step: 376/464, loss: 0.040558621287345886 2023-01-22 14:36:11.454286: step: 378/464, loss: 0.0012790873879566789 2023-01-22 14:36:12.126853: step: 380/464, loss: 1.3759578905592207e-05 2023-01-22 14:36:12.710008: step: 382/464, loss: 0.030656851828098297 2023-01-22 14:36:13.307658: step: 384/464, loss: 2.6676945708459243e-05 2023-01-22 14:36:13.949575: step: 386/464, loss: 0.016037778928875923 2023-01-22 14:36:14.547653: step: 388/464, loss: 8.989993511931971e-05 2023-01-22 14:36:15.161647: step: 390/464, loss: 0.014434353448450565 2023-01-22 14:36:15.822430: step: 392/464, loss: 8.683041960466653e-05 2023-01-22 14:36:16.394915: step: 394/464, loss: 0.007126760669052601 2023-01-22 14:36:17.031345: step: 396/464, loss: 0.0069049145095050335 2023-01-22 14:36:17.668847: step: 398/464, loss: 0.006755793001502752 2023-01-22 14:36:18.318569: step: 400/464, loss: 0.13241246342658997 2023-01-22 14:36:18.902931: step: 402/464, loss: 0.018767550587654114 2023-01-22 14:36:19.513499: step: 404/464, loss: 0.0015962908510118723 2023-01-22 14:36:20.073760: step: 406/464, loss: 0.014316637068986893 2023-01-22 14:36:20.667131: step: 408/464, loss: 0.009027567692101002 2023-01-22 14:36:21.306154: step: 410/464, loss: 0.00208667921833694 2023-01-22 14:36:21.906965: step: 412/464, loss: 6.704343104502186e-05 2023-01-22 14:36:22.605525: step: 414/464, loss: 0.0064193690195679665 2023-01-22 14:36:23.207489: step: 416/464, loss: 0.0024382879491895437 2023-01-22 14:36:23.960005: step: 418/464, loss: 0.05062605068087578 2023-01-22 14:36:24.556278: step: 420/464, loss: 0.0002532451180741191 2023-01-22 14:36:25.119090: step: 422/464, loss: 0.002652210183441639 2023-01-22 14:36:25.821484: step: 424/464, loss: 0.0003879011783283204 2023-01-22 14:36:26.549280: step: 426/464, loss: 0.0086573651060462 2023-01-22 14:36:27.215309: step: 428/464, loss: 0.002407163381576538 2023-01-22 14:36:27.775446: step: 430/464, loss: 0.0006860074354335666 2023-01-22 14:36:28.372409: step: 432/464, loss: 0.0076747131533920765 2023-01-22 14:36:29.063766: step: 434/464, loss: 0.003394588129594922 2023-01-22 14:36:29.684522: step: 436/464, loss: 0.030170533806085587 2023-01-22 14:36:30.292950: step: 438/464, loss: 0.00032836163882166147 2023-01-22 14:36:30.865253: step: 440/464, loss: 0.00248022866435349 2023-01-22 14:36:31.536239: step: 442/464, loss: 0.0016721858410164714 2023-01-22 14:36:32.170979: step: 444/464, loss: 0.003967962693423033 2023-01-22 14:36:32.858429: step: 446/464, loss: 0.05698193982243538 2023-01-22 14:36:33.408861: step: 448/464, loss: 5.1779232308035716e-05 2023-01-22 14:36:34.095406: step: 450/464, loss: 0.0011327448301017284 2023-01-22 14:36:34.755482: step: 452/464, loss: 0.0063192518427968025 2023-01-22 14:36:35.331195: step: 454/464, loss: 0.011665189638733864 2023-01-22 14:36:35.876713: step: 456/464, loss: 0.0004848650423809886 2023-01-22 14:36:36.516489: step: 458/464, loss: 2.0286659491830505e-05 2023-01-22 14:36:37.103564: step: 460/464, loss: 0.028747156262397766 2023-01-22 14:36:37.704387: step: 462/464, loss: 0.0013898280449211597 2023-01-22 14:36:38.292837: step: 464/464, loss: 0.02706788294017315 2023-01-22 14:36:38.915169: step: 466/464, loss: 0.010710745118558407 2023-01-22 14:36:39.485810: step: 468/464, loss: 0.00042844729614444077 2023-01-22 14:36:40.048274: step: 470/464, loss: 0.013478504493832588 2023-01-22 14:36:40.686066: step: 472/464, loss: 0.07601866871118546 2023-01-22 14:36:41.288931: step: 474/464, loss: 9.289790614275262e-05 2023-01-22 14:36:41.868300: step: 476/464, loss: 0.005359324160963297 2023-01-22 14:36:42.446402: step: 478/464, loss: 0.0006429508794099092 2023-01-22 14:36:43.028007: step: 480/464, loss: 0.010076269507408142 2023-01-22 14:36:43.661799: step: 482/464, loss: 0.03780921921133995 2023-01-22 14:36:44.230977: step: 484/464, loss: 0.006643231958150864 2023-01-22 14:36:44.836488: step: 486/464, loss: 0.0005986772594042122 2023-01-22 14:36:45.407192: step: 488/464, loss: 1.6072845028247684e-05 2023-01-22 14:36:46.034071: step: 490/464, loss: 0.001547905383631587 2023-01-22 14:36:46.658313: step: 492/464, loss: 0.01453624852001667 2023-01-22 14:36:47.241057: step: 494/464, loss: 0.00017860019579529762 2023-01-22 14:36:47.852686: step: 496/464, loss: 0.0028860324528068304 2023-01-22 14:36:48.480108: step: 498/464, loss: 0.022191910073161125 2023-01-22 14:36:49.055093: step: 500/464, loss: 0.0008213834371417761 2023-01-22 14:36:49.622733: step: 502/464, loss: 0.0013431920669972897 2023-01-22 14:36:50.206837: step: 504/464, loss: 0.20636188983917236 2023-01-22 14:36:50.815918: step: 506/464, loss: 0.0002305272064404562 2023-01-22 14:36:51.449293: step: 508/464, loss: 0.008078988641500473 2023-01-22 14:36:52.075877: step: 510/464, loss: 0.0010731914080679417 2023-01-22 14:36:52.691806: step: 512/464, loss: 0.03191307187080383 2023-01-22 14:36:53.310493: step: 514/464, loss: 0.001679626409895718 2023-01-22 14:36:53.855074: step: 516/464, loss: 0.011458156630396843 2023-01-22 14:36:54.422843: step: 518/464, loss: 0.0052367281168699265 2023-01-22 14:36:55.064422: step: 520/464, loss: 0.0033925592433661222 2023-01-22 14:36:55.717591: step: 522/464, loss: 0.03281421214342117 2023-01-22 14:36:56.387511: step: 524/464, loss: 0.005167921539396048 2023-01-22 14:36:56.985529: step: 526/464, loss: 0.00027469813358038664 2023-01-22 14:36:57.659465: step: 528/464, loss: 0.020571140572428703 2023-01-22 14:36:58.197823: step: 530/464, loss: 0.16448889672756195 2023-01-22 14:36:58.804424: step: 532/464, loss: 0.0003321019175928086 2023-01-22 14:36:59.368858: step: 534/464, loss: 0.0021385548170655966 2023-01-22 14:36:59.974758: step: 536/464, loss: 0.00895167887210846 2023-01-22 14:37:00.605757: step: 538/464, loss: 0.003001745790243149 2023-01-22 14:37:01.261467: step: 540/464, loss: 0.14123773574829102 2023-01-22 14:37:01.882153: step: 542/464, loss: 0.04274594783782959 2023-01-22 14:37:02.455758: step: 544/464, loss: 0.00013322725135367364 2023-01-22 14:37:03.013803: step: 546/464, loss: 1.2787042578565888e-05 2023-01-22 14:37:03.660245: step: 548/464, loss: 0.002002798020839691 2023-01-22 14:37:04.324066: step: 550/464, loss: 0.00823196116834879 2023-01-22 14:37:04.846817: step: 552/464, loss: 0.0031336620450019836 2023-01-22 14:37:05.469993: step: 554/464, loss: 0.001206969260238111 2023-01-22 14:37:06.108905: step: 556/464, loss: 0.00016807409701868892 2023-01-22 14:37:06.693931: step: 558/464, loss: 0.0024363440461456776 2023-01-22 14:37:07.250100: step: 560/464, loss: 0.023397495970129967 2023-01-22 14:37:07.864224: step: 562/464, loss: 0.014201251789927483 2023-01-22 14:37:08.484192: step: 564/464, loss: 0.008838113397359848 2023-01-22 14:37:09.080665: step: 566/464, loss: 0.00206809607334435 2023-01-22 14:37:09.644275: step: 568/464, loss: 3.33831922034733e-05 2023-01-22 14:37:10.260900: step: 570/464, loss: 0.00028463671333156526 2023-01-22 14:37:10.820288: step: 572/464, loss: 0.004011175595223904 2023-01-22 14:37:11.438257: step: 574/464, loss: 0.03371892869472504 2023-01-22 14:37:12.022936: step: 576/464, loss: 0.02161114476621151 2023-01-22 14:37:12.710202: step: 578/464, loss: 0.05861933156847954 2023-01-22 14:37:13.297792: step: 580/464, loss: 0.00013423307973425835 2023-01-22 14:37:13.996246: step: 582/464, loss: 0.08435353636741638 2023-01-22 14:37:14.591012: step: 584/464, loss: 0.015690000727772713 2023-01-22 14:37:15.220666: step: 586/464, loss: 0.010642273351550102 2023-01-22 14:37:15.737409: step: 588/464, loss: 0.0003432645171415061 2023-01-22 14:37:16.365138: step: 590/464, loss: 0.003194859717041254 2023-01-22 14:37:16.933641: step: 592/464, loss: 0.02521173097193241 2023-01-22 14:37:17.620367: step: 594/464, loss: 0.0001475270837545395 2023-01-22 14:37:18.220007: step: 596/464, loss: 0.001698898384347558 2023-01-22 14:37:18.813698: step: 598/464, loss: 0.00022349257778842002 2023-01-22 14:37:19.429337: step: 600/464, loss: 0.000832016346976161 2023-01-22 14:37:20.086179: step: 602/464, loss: 0.004213997162878513 2023-01-22 14:37:20.740366: step: 604/464, loss: 0.004446571692824364 2023-01-22 14:37:21.379758: step: 606/464, loss: 0.009489525109529495 2023-01-22 14:37:22.068421: step: 608/464, loss: 0.005802988074719906 2023-01-22 14:37:22.689900: step: 610/464, loss: 0.005094396416097879 2023-01-22 14:37:23.339261: step: 612/464, loss: 0.008050832897424698 2023-01-22 14:37:23.951764: step: 614/464, loss: 0.013616934418678284 2023-01-22 14:37:24.663460: step: 616/464, loss: 0.0034955181181430817 2023-01-22 14:37:25.338626: step: 618/464, loss: 0.0064336154609918594 2023-01-22 14:37:25.982024: step: 620/464, loss: 0.0051127406768500805 2023-01-22 14:37:26.646069: step: 622/464, loss: 0.019992150366306305 2023-01-22 14:37:27.189157: step: 624/464, loss: 0.000994180329144001 2023-01-22 14:37:27.751393: step: 626/464, loss: 0.03615922853350639 2023-01-22 14:37:28.350357: step: 628/464, loss: 0.039472438395023346 2023-01-22 14:37:28.925253: step: 630/464, loss: 0.0010863380739465356 2023-01-22 14:37:29.549949: step: 632/464, loss: 0.012934243306517601 2023-01-22 14:37:30.170733: step: 634/464, loss: 0.0006051979144103825 2023-01-22 14:37:30.770662: step: 636/464, loss: 0.01047486998140812 2023-01-22 14:37:31.379590: step: 638/464, loss: 0.0045492686331272125 2023-01-22 14:37:32.043801: step: 640/464, loss: 0.00013454600411932915 2023-01-22 14:37:32.713506: step: 642/464, loss: 0.00737126637250185 2023-01-22 14:37:33.366813: step: 644/464, loss: 0.035146042704582214 2023-01-22 14:37:33.921532: step: 646/464, loss: 0.019110510125756264 2023-01-22 14:37:34.486410: step: 648/464, loss: 0.006026388145983219 2023-01-22 14:37:35.143932: step: 650/464, loss: 0.007406257558614016 2023-01-22 14:37:35.811793: step: 652/464, loss: 0.07835182547569275 2023-01-22 14:37:36.428622: step: 654/464, loss: 4.0928618545876816e-05 2023-01-22 14:37:37.029543: step: 656/464, loss: 0.0014599744463339448 2023-01-22 14:37:37.642496: step: 658/464, loss: 0.011592582799494267 2023-01-22 14:37:38.217306: step: 660/464, loss: 0.00022379629081115127 2023-01-22 14:37:38.861020: step: 662/464, loss: 0.07811188697814941 2023-01-22 14:37:39.488976: step: 664/464, loss: 0.008354385383427143 2023-01-22 14:37:40.112444: step: 666/464, loss: 0.0011065627913922071 2023-01-22 14:37:40.717510: step: 668/464, loss: 0.026546290144324303 2023-01-22 14:37:41.341219: step: 670/464, loss: 0.000566209782846272 2023-01-22 14:37:41.965719: step: 672/464, loss: 0.001381652895361185 2023-01-22 14:37:42.595095: step: 674/464, loss: 0.030852001160383224 2023-01-22 14:37:43.208273: step: 676/464, loss: 0.012157046236097813 2023-01-22 14:37:43.771832: step: 678/464, loss: 0.002967291511595249 2023-01-22 14:37:44.381361: step: 680/464, loss: 0.01703762449324131 2023-01-22 14:37:45.008026: step: 682/464, loss: 0.005857993848621845 2023-01-22 14:37:45.587230: step: 684/464, loss: 0.011864163912832737 2023-01-22 14:37:46.246922: step: 686/464, loss: 0.018091727048158646 2023-01-22 14:37:46.819006: step: 688/464, loss: 0.00010335772094549611 2023-01-22 14:37:47.405783: step: 690/464, loss: 0.03895269334316254 2023-01-22 14:37:47.953186: step: 692/464, loss: 0.0002217577857663855 2023-01-22 14:37:48.494115: step: 694/464, loss: 4.977637217962183e-05 2023-01-22 14:37:49.127901: step: 696/464, loss: 0.0008721100748516619 2023-01-22 14:37:49.728708: step: 698/464, loss: 0.0021709641441702843 2023-01-22 14:37:50.385797: step: 700/464, loss: 0.038126856088638306 2023-01-22 14:37:51.218520: step: 702/464, loss: 0.003595249028876424 2023-01-22 14:37:51.866070: step: 704/464, loss: 0.003833782859146595 2023-01-22 14:37:52.478158: step: 706/464, loss: 0.0010085896356031299 2023-01-22 14:37:53.063663: step: 708/464, loss: 0.001090651610866189 2023-01-22 14:37:53.697694: step: 710/464, loss: 0.005513668060302734 2023-01-22 14:37:54.273481: step: 712/464, loss: 0.011009197682142258 2023-01-22 14:37:54.853953: step: 714/464, loss: 0.0013777940766885877 2023-01-22 14:37:55.523711: step: 716/464, loss: 0.008790001273155212 2023-01-22 14:37:56.097845: step: 718/464, loss: 0.001057420508004725 2023-01-22 14:37:56.693679: step: 720/464, loss: 0.07679551094770432 2023-01-22 14:37:57.312885: step: 722/464, loss: 0.009837577119469643 2023-01-22 14:37:57.933960: step: 724/464, loss: 0.018549852073192596 2023-01-22 14:37:58.517271: step: 726/464, loss: 0.00033019756665453315 2023-01-22 14:37:59.149976: step: 728/464, loss: 0.006539863999933004 2023-01-22 14:37:59.762905: step: 730/464, loss: 1.3240577573014889e-05 2023-01-22 14:38:00.407929: step: 732/464, loss: 0.024357983842492104 2023-01-22 14:38:01.045455: step: 734/464, loss: 0.0012778789969161153 2023-01-22 14:38:01.689849: step: 736/464, loss: 0.0013503863010555506 2023-01-22 14:38:02.289382: step: 738/464, loss: 0.0009660544455982745 2023-01-22 14:38:02.863649: step: 740/464, loss: 0.008409286849200726 2023-01-22 14:38:03.453907: step: 742/464, loss: 0.0019777226261794567 2023-01-22 14:38:04.073292: step: 744/464, loss: 0.01464751921594143 2023-01-22 14:38:04.717067: step: 746/464, loss: 0.0016079582273960114 2023-01-22 14:38:05.369477: step: 748/464, loss: 0.003580467775464058 2023-01-22 14:38:05.955667: step: 750/464, loss: 0.004533705301582813 2023-01-22 14:38:06.651703: step: 752/464, loss: 0.03962564468383789 2023-01-22 14:38:07.278581: step: 754/464, loss: 0.019578954204916954 2023-01-22 14:38:07.883637: step: 756/464, loss: 7.120955706341192e-05 2023-01-22 14:38:08.532857: step: 758/464, loss: 0.01011847797781229 2023-01-22 14:38:09.108577: step: 760/464, loss: 0.08529553562402725 2023-01-22 14:38:09.722573: step: 762/464, loss: 0.002987058600410819 2023-01-22 14:38:10.328982: step: 764/464, loss: 0.04267577826976776 2023-01-22 14:38:10.950848: step: 766/464, loss: 0.12578915059566498 2023-01-22 14:38:11.585240: step: 768/464, loss: 0.015906495973467827 2023-01-22 14:38:12.234633: step: 770/464, loss: 0.010736898519098759 2023-01-22 14:38:12.861161: step: 772/464, loss: 0.00668590422719717 2023-01-22 14:38:13.481623: step: 774/464, loss: 0.0006817388930357993 2023-01-22 14:38:14.215153: step: 776/464, loss: 0.0001948197023011744 2023-01-22 14:38:14.872646: step: 778/464, loss: 2.019037310674321e-05 2023-01-22 14:38:15.497806: step: 780/464, loss: 0.020074861124157906 2023-01-22 14:38:16.115536: step: 782/464, loss: 0.05165455490350723 2023-01-22 14:38:16.706235: step: 784/464, loss: 0.004251818172633648 2023-01-22 14:38:17.458300: step: 786/464, loss: 5.3936899348627776e-05 2023-01-22 14:38:18.063216: step: 788/464, loss: 0.00522686867043376 2023-01-22 14:38:18.680028: step: 790/464, loss: 5.8365999393572565e-06 2023-01-22 14:38:19.406332: step: 792/464, loss: 0.004626940470188856 2023-01-22 14:38:20.070836: step: 794/464, loss: 0.02032882533967495 2023-01-22 14:38:20.646096: step: 796/464, loss: 0.0004746417107526213 2023-01-22 14:38:21.297370: step: 798/464, loss: 0.0975630059838295 2023-01-22 14:38:21.901327: step: 800/464, loss: 0.008815807290375233 2023-01-22 14:38:22.566018: step: 802/464, loss: 0.01239369623363018 2023-01-22 14:38:23.217595: step: 804/464, loss: 0.0012308191508054733 2023-01-22 14:38:23.923544: step: 806/464, loss: 0.008979709818959236 2023-01-22 14:38:24.516385: step: 808/464, loss: 0.5861056447029114 2023-01-22 14:38:25.160576: step: 810/464, loss: 0.0027114953845739365 2023-01-22 14:38:25.762301: step: 812/464, loss: 0.01977531611919403 2023-01-22 14:38:26.409317: step: 814/464, loss: 0.04423713684082031 2023-01-22 14:38:27.038196: step: 816/464, loss: 0.2734917104244232 2023-01-22 14:38:27.629780: step: 818/464, loss: 0.0002222139446530491 2023-01-22 14:38:28.256800: step: 820/464, loss: 0.0006343009881675243 2023-01-22 14:38:28.881443: step: 822/464, loss: 0.003511299379169941 2023-01-22 14:38:29.484567: step: 824/464, loss: 0.04748666286468506 2023-01-22 14:38:30.104494: step: 826/464, loss: 0.002951279981061816 2023-01-22 14:38:30.785130: step: 828/464, loss: 7.458165782736614e-05 2023-01-22 14:38:31.365660: step: 830/464, loss: 0.003993411548435688 2023-01-22 14:38:31.999212: step: 832/464, loss: 2.084258794784546 2023-01-22 14:38:32.649317: step: 834/464, loss: 0.0006001291912980378 2023-01-22 14:38:33.317526: step: 836/464, loss: 1.1620572877291124e-05 2023-01-22 14:38:33.887843: step: 838/464, loss: 0.0013079376658424735 2023-01-22 14:38:34.463695: step: 840/464, loss: 0.02507406286895275 2023-01-22 14:38:35.059551: step: 842/464, loss: 0.00011102350254077464 2023-01-22 14:38:35.687858: step: 844/464, loss: 0.020627956837415695 2023-01-22 14:38:36.302488: step: 846/464, loss: 0.006506875157356262 2023-01-22 14:38:36.915915: step: 848/464, loss: 0.0049045030027627945 2023-01-22 14:38:37.634070: step: 850/464, loss: 0.02644026279449463 2023-01-22 14:38:38.204155: step: 852/464, loss: 0.010783646255731583 2023-01-22 14:38:38.855349: step: 854/464, loss: 0.07383334636688232 2023-01-22 14:38:39.489204: step: 856/464, loss: 0.003980218432843685 2023-01-22 14:38:40.025991: step: 858/464, loss: 6.878763815620914e-05 2023-01-22 14:38:40.605669: step: 860/464, loss: 0.0005876432987861335 2023-01-22 14:38:41.175473: step: 862/464, loss: 0.0001143648914876394 2023-01-22 14:38:41.813163: step: 864/464, loss: 0.0001652220234973356 2023-01-22 14:38:42.561082: step: 866/464, loss: 0.00033615445136092603 2023-01-22 14:38:43.188209: step: 868/464, loss: 0.1940748542547226 2023-01-22 14:38:43.743888: step: 870/464, loss: 0.0013214467326179147 2023-01-22 14:38:44.347772: step: 872/464, loss: 0.721562385559082 2023-01-22 14:38:44.997768: step: 874/464, loss: 0.003268428845331073 2023-01-22 14:38:45.612771: step: 876/464, loss: 0.40066006779670715 2023-01-22 14:38:46.271510: step: 878/464, loss: 0.0004343487962614745 2023-01-22 14:38:46.869539: step: 880/464, loss: 0.020197506994009018 2023-01-22 14:38:47.504272: step: 882/464, loss: 0.037329163402318954 2023-01-22 14:38:48.137423: step: 884/464, loss: 0.027084853500127792 2023-01-22 14:38:48.797984: step: 886/464, loss: 0.020723722875118256 2023-01-22 14:38:49.388638: step: 888/464, loss: 0.001170316361822188 2023-01-22 14:38:49.960424: step: 890/464, loss: 0.004882392939180136 2023-01-22 14:38:50.647689: step: 892/464, loss: 0.02754181995987892 2023-01-22 14:38:51.229379: step: 894/464, loss: 0.0029659119900316 2023-01-22 14:38:51.837424: step: 896/464, loss: 0.012844149954617023 2023-01-22 14:38:52.491262: step: 898/464, loss: 0.0032909393776208162 2023-01-22 14:38:53.069020: step: 900/464, loss: 0.004768472630530596 2023-01-22 14:38:53.718397: step: 902/464, loss: 0.011873023584485054 2023-01-22 14:38:54.358853: step: 904/464, loss: 0.01567987911403179 2023-01-22 14:38:54.954700: step: 906/464, loss: 0.017684731632471085 2023-01-22 14:38:55.581611: step: 908/464, loss: 0.020612113177776337 2023-01-22 14:38:56.215062: step: 910/464, loss: 0.0011249807430431247 2023-01-22 14:38:56.888363: step: 912/464, loss: 0.010123012587428093 2023-01-22 14:38:57.481124: step: 914/464, loss: 0.0662151426076889 2023-01-22 14:38:58.204996: step: 916/464, loss: 0.14610925316810608 2023-01-22 14:38:58.777994: step: 918/464, loss: 0.06939881294965744 2023-01-22 14:38:59.534223: step: 920/464, loss: 0.012881439179182053 2023-01-22 14:39:00.151136: step: 922/464, loss: 0.03754853457212448 2023-01-22 14:39:00.813116: step: 924/464, loss: 0.02277289517223835 2023-01-22 14:39:01.413646: step: 926/464, loss: 0.6945991516113281 2023-01-22 14:39:02.036153: step: 928/464, loss: 0.010687149129807949 2023-01-22 14:39:02.559452: step: 930/464, loss: 0.049016449600458145 ================================================== Loss: 0.030 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30557432432432435, 'r': 0.3647177419354839, 'f1': 0.33253676470588245}, 'combined': 0.2450270897832818, 'epoch': 39} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29418570044488296, 'r': 0.3206840447864257, 'f1': 0.30686388806300013}, 'combined': 0.20033600982351304, 'epoch': 39} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29376422610015174, 'r': 0.36734463946869067, 'f1': 0.3264597386172007}, 'combined': 0.24054928108635837, 'epoch': 39} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.31177992844693336, 'r': 0.3286871120667579, 'f1': 0.3200103605625348}, 'combined': 0.20891868098901234, 'epoch': 39} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3069273317435082, 'r': 0.3663326217583808, 'f1': 0.3340091551326413}, 'combined': 0.2461120090451041, 'epoch': 39} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30449985081495556, 'r': 0.3137271190214694, 'f1': 0.30904462470771615}, 'combined': 0.20175970317705821, 'epoch': 39} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2513020833333333, 'r': 0.4595238095238095, 'f1': 0.3249158249158249}, 'combined': 0.21661054994388323, 'epoch': 39} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2393617021276596, 'r': 0.4891304347826087, 'f1': 0.32142857142857145}, 'combined': 0.16071428571428573, 'epoch': 39} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4473684210526316, 'r': 0.29310344827586204, 'f1': 0.35416666666666663}, 'combined': 0.23611111111111108, 'epoch': 39} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29695411392405063, 'r': 0.3561195445920304, 'f1': 0.3238567730802416}, 'combined': 0.238631306480178, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2807321256264801, 'r': 0.29901834665352606, 'f1': 0.2895868483805805}, 'combined': 0.18905669894276242, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29166666666666663, 'r': 0.41666666666666663, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982358358739837, 'r': 0.3712385357368754, 'f1': 0.3307569033530572}, 'combined': 0.2437156129969895, 'epoch': 11} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30011669394414364, 'r': 0.3031509619895348, 'f1': 0.30162619720586537}, 'combined': 0.19691658470434734, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.5217391304347826, 'f1': 0.3870967741935484}, 'combined': 0.1935483870967742, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951249161636486, 'r': 0.3578459609650312, 'f1': 0.3234731070815977}, 'combined': 0.23834860521801934, 'epoch': 22} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.29521273154881944, 'r': 0.3185261336729686, 'f1': 0.30642664272956077}, 'combined': 0.20005055432085314, 'epoch': 22} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 22}