Command that produces this log: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> basic_gcn.T_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.0.bias: torch.Size([1024]) >>> basic_gcn.T_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.1.bias: torch.Size([1024]) >>> basic_gcn.T_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.2.bias: torch.Size([1024]) >>> basic_gcn.T_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.0.bias: torch.Size([1024]) >>> basic_gcn.T_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.1.bias: torch.Size([1024]) >>> basic_gcn.T_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.2.bias: torch.Size([1024]) >>> basic_gcn.E_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.0.bias: torch.Size([1024]) >>> basic_gcn.E_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.1.bias: torch.Size([1024]) >>> basic_gcn.E_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.2.bias: torch.Size([1024]) >>> basic_gcn.E_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.0.bias: torch.Size([1024]) >>> basic_gcn.E_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.1.bias: torch.Size([1024]) >>> basic_gcn.E_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.2.bias: torch.Size([1024]) >>> basic_gcn.f_t.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_t.0.bias: torch.Size([1024]) >>> basic_gcn.f_e.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_e.0.bias: torch.Size([1024]) >>> name2classifier.outcome-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outcome-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outcome-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outcome-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-against-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-against-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-against-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-against-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-for-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-for-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-for-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-for-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.when-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.when-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.when-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.when-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.where-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.where-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.where-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.where-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.who-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.who-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.who-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.who-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.wounded-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.wounded-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.wounded-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.wounded-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.arrested-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.arrested-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.arrested-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.arrested-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.occupy-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.occupy-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.occupy-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.occupy-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.imprisoned-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.imprisoned-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.imprisoned-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.imprisoned-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.corrupt-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.corrupt-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.corrupt-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.corrupt-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.judicial-actions-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.judicial-actions-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.judicial-actions-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.judicial-actions-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.charged-with-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.charged-with-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.charged-with-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.charged-with-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.prison-term-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.prison-term-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.prison-term-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.prison-term-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.fine-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.fine-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.fine-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.fine-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.npi-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.npi-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.npi-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.npi-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outbreak-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outbreak-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outbreak-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outbreak-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.kidnapped-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.kidnapped-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.kidnapped-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.kidnapped-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.terror-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.terror-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.terror-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.terror-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-org-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-org-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-org-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-org-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-physical-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-physical-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-physical-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-physical-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blamed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blamed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blamed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blamed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.claimed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.claimed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.claimed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.claimed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-human-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-human-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-human-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-human-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-captured-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-captured-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-captured-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-captured-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-objective-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-objective-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-objective-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-objective-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.weapon-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.weapon-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.weapon-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.weapon-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.damage-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.damage-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.damage-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.damage-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.major-disaster-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.major-disaster-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.major-disaster-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.major-disaster-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.responders-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.responders-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.responders-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.responders-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-provided-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-provided-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-provided-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-provided-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescue-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescue-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescue-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescue-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.individuals-affected-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.individuals-affected-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.individuals-affected-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.individuals-affected-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.missing-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.missing-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.missing-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.missing-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-needed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-needed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-needed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-needed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.injured-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.injured-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.injured-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.injured-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescued-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescued-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescued-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescued-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.repair-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.repair-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.repair-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.repair-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.declare-emergency-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.declare-emergency-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.declare-emergency-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.declare-emergency-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.current-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.current-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.current-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.current-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.group-identity-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.group-identity-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.group-identity-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.group-identity-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.total-displaced-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.total-displaced-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.total-displaced-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.total-displaced-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transitory-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transitory-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transitory-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transitory-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.destination-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.destination-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.destination-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.destination-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transiting-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transiting-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transiting-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transiting-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.detained-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.detained-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.detained-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.detained-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blocked-migration-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blocked-migration-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.cybercrime-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.cybercrime-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.cybercrime-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.cybercrime-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perpetrator-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perpetrator-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perpetrator-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perpetrator-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.response-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.response-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.response-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.response-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.information-stolen-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.information-stolen-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.information-stolen-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.information-stolen-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-crimes-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-crimes-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-crimes-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-crimes-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-impact-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-impact-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-impact-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-impact-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.etip-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.etip-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.etip-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.etip-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-name-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-name-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-name-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-name-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.signatories-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.signatories-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.signatories-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.signatories-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awardee-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awardee-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awardee-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awardee-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.overall-project-value-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.overall-project-value-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.overall-project-value-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.overall-project-value-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-recipient-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-recipient-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-recipient-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-recipient-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-source-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-source-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-source-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-source-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awarder-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awarder-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awarder-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awarder-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.agreement-length-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.agreement-length-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.agreement-length-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.agreement-length-ffn.layers.1.bias: torch.Size([2]) >>> irrealis_classifier.layers.0.weight: torch.Size([350, 1128]) >>> irrealis_classifier.layers.0.bias: torch.Size([350]) >>> irrealis_classifier.layers.1.weight: torch.Size([7, 350]) >>> irrealis_classifier.layers.1.bias: torch.Size([7]) n_trainable_params: 614103147, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:06:48.410269: step: 2/469, loss: 15.25331974029541 2023-01-22 10:06:49.049613: step: 4/469, loss: 21.970951080322266 2023-01-22 10:06:49.618010: step: 6/469, loss: 13.212173461914062 2023-01-22 10:06:50.191004: step: 8/469, loss: 18.27519989013672 2023-01-22 10:06:50.810139: step: 10/469, loss: 6.163782119750977 2023-01-22 10:06:51.469967: step: 12/469, loss: 10.581079483032227 2023-01-22 10:06:52.076881: step: 14/469, loss: 15.638595581054688 2023-01-22 10:06:52.734229: step: 16/469, loss: 11.160401344299316 2023-01-22 10:06:53.356244: step: 18/469, loss: 9.45910358428955 2023-01-22 10:06:53.968936: step: 20/469, loss: 13.210053443908691 2023-01-22 10:06:54.638690: step: 22/469, loss: 27.54082679748535 2023-01-22 10:06:55.287306: step: 24/469, loss: 9.979040145874023 2023-01-22 10:06:55.981498: step: 26/469, loss: 39.79236602783203 2023-01-22 10:06:56.607196: step: 28/469, loss: 19.85337257385254 2023-01-22 10:06:57.230164: step: 30/469, loss: 8.446556091308594 2023-01-22 10:06:57.786408: step: 32/469, loss: 11.976491928100586 2023-01-22 10:06:58.560508: step: 34/469, loss: 16.93046760559082 2023-01-22 10:06:59.227342: step: 36/469, loss: 11.251506805419922 2023-01-22 10:06:59.840408: step: 38/469, loss: 26.54198455810547 2023-01-22 10:07:00.579989: step: 40/469, loss: 13.888751029968262 2023-01-22 10:07:01.228662: step: 42/469, loss: 14.683391571044922 2023-01-22 10:07:01.868078: step: 44/469, loss: 12.402320861816406 2023-01-22 10:07:02.454045: step: 46/469, loss: 12.877483367919922 2023-01-22 10:07:03.060243: step: 48/469, loss: 12.395111083984375 2023-01-22 10:07:03.630176: step: 50/469, loss: 15.595003128051758 2023-01-22 10:07:04.247454: step: 52/469, loss: 12.594765663146973 2023-01-22 10:07:04.908822: step: 54/469, loss: 17.614059448242188 2023-01-22 10:07:05.553839: step: 56/469, loss: 20.128860473632812 2023-01-22 10:07:06.158090: step: 58/469, loss: 11.931539535522461 2023-01-22 10:07:06.784327: step: 60/469, loss: 11.201589584350586 2023-01-22 10:07:07.421783: step: 62/469, loss: 6.375723838806152 2023-01-22 10:07:08.112281: step: 64/469, loss: 9.771102905273438 2023-01-22 10:07:08.722270: step: 66/469, loss: 14.409685134887695 2023-01-22 10:07:09.329973: step: 68/469, loss: 23.893356323242188 2023-01-22 10:07:09.963850: step: 70/469, loss: 5.700743675231934 2023-01-22 10:07:10.682642: step: 72/469, loss: 13.422945976257324 2023-01-22 10:07:11.352440: step: 74/469, loss: 10.815437316894531 2023-01-22 10:07:11.989499: step: 76/469, loss: 11.343101501464844 2023-01-22 10:07:12.605781: step: 78/469, loss: 18.26156997680664 2023-01-22 10:07:13.312365: step: 80/469, loss: 12.07800006866455 2023-01-22 10:07:13.948877: step: 82/469, loss: 30.563390731811523 2023-01-22 10:07:14.543090: step: 84/469, loss: 15.843639373779297 2023-01-22 10:07:15.258456: step: 86/469, loss: 14.654402732849121 2023-01-22 10:07:15.843710: step: 88/469, loss: 7.263551235198975 2023-01-22 10:07:16.436289: step: 90/469, loss: 6.958093643188477 2023-01-22 10:07:17.045814: step: 92/469, loss: 4.206276893615723 2023-01-22 10:07:17.691190: step: 94/469, loss: 19.03128433227539 2023-01-22 10:07:18.320337: step: 96/469, loss: 29.198678970336914 2023-01-22 10:07:18.967089: step: 98/469, loss: 8.968064308166504 2023-01-22 10:07:19.578327: step: 100/469, loss: 14.914346694946289 2023-01-22 10:07:20.211281: step: 102/469, loss: 22.362733840942383 2023-01-22 10:07:20.821677: step: 104/469, loss: 11.355212211608887 2023-01-22 10:07:21.521321: step: 106/469, loss: 5.288875579833984 2023-01-22 10:07:22.218104: step: 108/469, loss: 11.654914855957031 2023-01-22 10:07:22.884502: step: 110/469, loss: 15.550905227661133 2023-01-22 10:07:23.564125: step: 112/469, loss: 5.483501434326172 2023-01-22 10:07:24.152689: step: 114/469, loss: 7.941393852233887 2023-01-22 10:07:24.745373: step: 116/469, loss: 6.272886276245117 2023-01-22 10:07:25.351173: step: 118/469, loss: 3.443401336669922 2023-01-22 10:07:25.935683: step: 120/469, loss: 19.11764907836914 2023-01-22 10:07:26.752694: step: 122/469, loss: 10.003095626831055 2023-01-22 10:07:27.362650: step: 124/469, loss: 9.644991874694824 2023-01-22 10:07:27.933145: step: 126/469, loss: 4.290789604187012 2023-01-22 10:07:28.630202: step: 128/469, loss: 14.747127532958984 2023-01-22 10:07:29.321144: step: 130/469, loss: 8.645899772644043 2023-01-22 10:07:29.947316: step: 132/469, loss: 12.569320678710938 2023-01-22 10:07:30.571448: step: 134/469, loss: 5.450272560119629 2023-01-22 10:07:31.225396: step: 136/469, loss: 4.271622657775879 2023-01-22 10:07:31.893044: step: 138/469, loss: 13.993158340454102 2023-01-22 10:07:32.573915: step: 140/469, loss: 12.455425262451172 2023-01-22 10:07:33.320634: step: 142/469, loss: 9.445347785949707 2023-01-22 10:07:33.985268: step: 144/469, loss: 12.140447616577148 2023-01-22 10:07:34.557128: step: 146/469, loss: 4.812276363372803 2023-01-22 10:07:35.191970: step: 148/469, loss: 4.675212383270264 2023-01-22 10:07:35.833061: step: 150/469, loss: 10.567374229431152 2023-01-22 10:07:36.390551: step: 152/469, loss: 11.976028442382812 2023-01-22 10:07:37.095792: step: 154/469, loss: 14.886834144592285 2023-01-22 10:07:37.730733: step: 156/469, loss: 6.296490669250488 2023-01-22 10:07:38.383529: step: 158/469, loss: 5.976956367492676 2023-01-22 10:07:39.038830: step: 160/469, loss: 11.025665283203125 2023-01-22 10:07:39.660964: step: 162/469, loss: 4.316882610321045 2023-01-22 10:07:40.300207: step: 164/469, loss: 6.996575355529785 2023-01-22 10:07:40.990402: step: 166/469, loss: 8.291420936584473 2023-01-22 10:07:41.583520: step: 168/469, loss: 11.635712623596191 2023-01-22 10:07:42.219531: step: 170/469, loss: 13.164066314697266 2023-01-22 10:07:42.941674: step: 172/469, loss: 22.71116065979004 2023-01-22 10:07:43.582677: step: 174/469, loss: 4.483279228210449 2023-01-22 10:07:44.219424: step: 176/469, loss: 3.100710153579712 2023-01-22 10:07:44.873150: step: 178/469, loss: 5.378321647644043 2023-01-22 10:07:45.482215: step: 180/469, loss: 4.432587623596191 2023-01-22 10:07:46.155646: step: 182/469, loss: 19.970138549804688 2023-01-22 10:07:46.809324: step: 184/469, loss: 3.3743908405303955 2023-01-22 10:07:47.433826: step: 186/469, loss: 3.5790181159973145 2023-01-22 10:07:48.020607: step: 188/469, loss: 4.171597957611084 2023-01-22 10:07:48.622016: step: 190/469, loss: 2.8348236083984375 2023-01-22 10:07:49.282134: step: 192/469, loss: 4.021544933319092 2023-01-22 10:07:49.892803: step: 194/469, loss: 8.497549057006836 2023-01-22 10:07:50.531068: step: 196/469, loss: 5.995969772338867 2023-01-22 10:07:51.153303: step: 198/469, loss: 16.996166229248047 2023-01-22 10:07:51.730891: step: 200/469, loss: 3.164473533630371 2023-01-22 10:07:52.360382: step: 202/469, loss: 7.8727288246154785 2023-01-22 10:07:52.977063: step: 204/469, loss: 9.354909896850586 2023-01-22 10:07:53.670526: step: 206/469, loss: 4.296377658843994 2023-01-22 10:07:54.283008: step: 208/469, loss: 10.702449798583984 2023-01-22 10:07:54.862510: step: 210/469, loss: 8.718423843383789 2023-01-22 10:07:55.479416: step: 212/469, loss: 5.151759147644043 2023-01-22 10:07:56.114738: step: 214/469, loss: 14.128950119018555 2023-01-22 10:07:56.721222: step: 216/469, loss: 12.070377349853516 2023-01-22 10:07:57.340600: step: 218/469, loss: 2.4820210933685303 2023-01-22 10:07:57.891564: step: 220/469, loss: 5.188449859619141 2023-01-22 10:07:58.471812: step: 222/469, loss: 10.08375358581543 2023-01-22 10:07:59.099625: step: 224/469, loss: 16.227426528930664 2023-01-22 10:07:59.774276: step: 226/469, loss: 3.6772079467773438 2023-01-22 10:08:00.421240: step: 228/469, loss: 10.027013778686523 2023-01-22 10:08:01.109399: step: 230/469, loss: 10.387798309326172 2023-01-22 10:08:01.720127: step: 232/469, loss: 4.331166744232178 2023-01-22 10:08:02.365353: step: 234/469, loss: 8.990900993347168 2023-01-22 10:08:02.974232: step: 236/469, loss: 6.255729675292969 2023-01-22 10:08:03.591752: step: 238/469, loss: 10.792963027954102 2023-01-22 10:08:04.240513: step: 240/469, loss: 14.049783706665039 2023-01-22 10:08:04.841365: step: 242/469, loss: 5.845194339752197 2023-01-22 10:08:05.522598: step: 244/469, loss: 12.815567016601562 2023-01-22 10:08:06.186682: step: 246/469, loss: 2.849949598312378 2023-01-22 10:08:06.906325: step: 248/469, loss: 5.038797855377197 2023-01-22 10:08:07.532973: step: 250/469, loss: 4.091221809387207 2023-01-22 10:08:08.130542: step: 252/469, loss: 1.6356446743011475 2023-01-22 10:08:08.744314: step: 254/469, loss: 5.083172798156738 2023-01-22 10:08:09.326321: step: 256/469, loss: 3.7159621715545654 2023-01-22 10:08:09.932398: step: 258/469, loss: 6.490711212158203 2023-01-22 10:08:10.560485: step: 260/469, loss: 6.164132118225098 2023-01-22 10:08:11.175130: step: 262/469, loss: 10.140697479248047 2023-01-22 10:08:11.864541: step: 264/469, loss: 7.924156188964844 2023-01-22 10:08:12.520190: step: 266/469, loss: 13.918113708496094 2023-01-22 10:08:13.183545: step: 268/469, loss: 8.852334976196289 2023-01-22 10:08:13.778197: step: 270/469, loss: 12.88856315612793 2023-01-22 10:08:14.400172: step: 272/469, loss: 3.2401225566864014 2023-01-22 10:08:15.032264: step: 274/469, loss: 2.070889711380005 2023-01-22 10:08:15.693388: step: 276/469, loss: 5.971617221832275 2023-01-22 10:08:16.287837: step: 278/469, loss: 11.369691848754883 2023-01-22 10:08:16.939069: step: 280/469, loss: 10.891914367675781 2023-01-22 10:08:17.659973: step: 282/469, loss: 9.760658264160156 2023-01-22 10:08:18.330384: step: 284/469, loss: 10.473724365234375 2023-01-22 10:08:18.994714: step: 286/469, loss: 6.3389763832092285 2023-01-22 10:08:19.626684: step: 288/469, loss: 2.5295441150665283 2023-01-22 10:08:20.255282: step: 290/469, loss: 7.045894622802734 2023-01-22 10:08:20.903634: step: 292/469, loss: 3.7562596797943115 2023-01-22 10:08:21.526906: step: 294/469, loss: 1.6220287084579468 2023-01-22 10:08:22.172123: step: 296/469, loss: 10.069462776184082 2023-01-22 10:08:22.811542: step: 298/469, loss: 1.358391523361206 2023-01-22 10:08:23.539317: step: 300/469, loss: 5.900286674499512 2023-01-22 10:08:24.181071: step: 302/469, loss: 9.4619722366333 2023-01-22 10:08:24.800952: step: 304/469, loss: 3.0025548934936523 2023-01-22 10:08:25.442072: step: 306/469, loss: 4.691314697265625 2023-01-22 10:08:26.058830: step: 308/469, loss: 5.232211589813232 2023-01-22 10:08:26.655595: step: 310/469, loss: 1.8659954071044922 2023-01-22 10:08:27.320689: step: 312/469, loss: 2.8411831855773926 2023-01-22 10:08:27.903673: step: 314/469, loss: 8.379655838012695 2023-01-22 10:08:28.559142: step: 316/469, loss: 16.53976058959961 2023-01-22 10:08:29.204679: step: 318/469, loss: 4.165069103240967 2023-01-22 10:08:29.798004: step: 320/469, loss: 4.469231128692627 2023-01-22 10:08:30.358520: step: 322/469, loss: 7.795541763305664 2023-01-22 10:08:30.974534: step: 324/469, loss: 4.559579849243164 2023-01-22 10:08:31.559177: step: 326/469, loss: 1.3640480041503906 2023-01-22 10:08:32.138432: step: 328/469, loss: 5.399559020996094 2023-01-22 10:08:32.765000: step: 330/469, loss: 4.02651309967041 2023-01-22 10:08:33.368173: step: 332/469, loss: 3.0965323448181152 2023-01-22 10:08:33.995289: step: 334/469, loss: 3.435300350189209 2023-01-22 10:08:34.714507: step: 336/469, loss: 3.2126917839050293 2023-01-22 10:08:35.340538: step: 338/469, loss: 3.2306101322174072 2023-01-22 10:08:35.881461: step: 340/469, loss: 1.1037557125091553 2023-01-22 10:08:36.474032: step: 342/469, loss: 2.682487726211548 2023-01-22 10:08:37.075284: step: 344/469, loss: 3.663137912750244 2023-01-22 10:08:37.670929: step: 346/469, loss: 5.510931968688965 2023-01-22 10:08:38.274924: step: 348/469, loss: 4.036832332611084 2023-01-22 10:08:38.979046: step: 350/469, loss: 5.561859607696533 2023-01-22 10:08:39.678128: step: 352/469, loss: 5.070573329925537 2023-01-22 10:08:40.380173: step: 354/469, loss: 7.536715507507324 2023-01-22 10:08:41.014930: step: 356/469, loss: 6.385239124298096 2023-01-22 10:08:41.654195: step: 358/469, loss: 8.187983512878418 2023-01-22 10:08:42.267420: step: 360/469, loss: 3.9256420135498047 2023-01-22 10:08:42.900742: step: 362/469, loss: 2.4221854209899902 2023-01-22 10:08:43.634520: step: 364/469, loss: 5.579275131225586 2023-01-22 10:08:44.233131: step: 366/469, loss: 11.50503158569336 2023-01-22 10:08:44.769858: step: 368/469, loss: 7.054537296295166 2023-01-22 10:08:45.545104: step: 370/469, loss: 13.798737525939941 2023-01-22 10:08:46.158709: step: 372/469, loss: 1.4237483739852905 2023-01-22 10:08:46.767987: step: 374/469, loss: 1.0680210590362549 2023-01-22 10:08:47.382526: step: 376/469, loss: 8.365805625915527 2023-01-22 10:08:47.988559: step: 378/469, loss: 2.9956741333007812 2023-01-22 10:08:48.663830: step: 380/469, loss: 2.4886374473571777 2023-01-22 10:08:49.331903: step: 382/469, loss: 2.274200439453125 2023-01-22 10:08:49.897570: step: 384/469, loss: 2.3594021797180176 2023-01-22 10:08:50.507164: step: 386/469, loss: 3.8289542198181152 2023-01-22 10:08:51.203520: step: 388/469, loss: 3.451693534851074 2023-01-22 10:08:51.858607: step: 390/469, loss: 4.932161331176758 2023-01-22 10:08:52.480929: step: 392/469, loss: 8.29738712310791 2023-01-22 10:08:53.109818: step: 394/469, loss: 2.3075027465820312 2023-01-22 10:08:53.771943: step: 396/469, loss: 6.424660682678223 2023-01-22 10:08:54.435638: step: 398/469, loss: 7.8025360107421875 2023-01-22 10:08:55.085074: step: 400/469, loss: 9.999170303344727 2023-01-22 10:08:55.804459: step: 402/469, loss: 6.752676963806152 2023-01-22 10:08:56.405598: step: 404/469, loss: 4.343136787414551 2023-01-22 10:08:57.035428: step: 406/469, loss: 2.4729204177856445 2023-01-22 10:08:57.681595: step: 408/469, loss: 1.6550145149230957 2023-01-22 10:08:58.287506: step: 410/469, loss: 3.60208797454834 2023-01-22 10:08:58.948107: step: 412/469, loss: 5.994638919830322 2023-01-22 10:08:59.568114: step: 414/469, loss: 2.4971179962158203 2023-01-22 10:09:00.193275: step: 416/469, loss: 1.627087950706482 2023-01-22 10:09:00.783021: step: 418/469, loss: 1.1389089822769165 2023-01-22 10:09:01.440831: step: 420/469, loss: 2.5901660919189453 2023-01-22 10:09:02.059366: step: 422/469, loss: 5.187749862670898 2023-01-22 10:09:02.696302: step: 424/469, loss: 3.3282017707824707 2023-01-22 10:09:03.306350: step: 426/469, loss: 1.9786550998687744 2023-01-22 10:09:03.961382: step: 428/469, loss: 1.9001668691635132 2023-01-22 10:09:04.563840: step: 430/469, loss: 1.549994707107544 2023-01-22 10:09:05.232650: step: 432/469, loss: 1.5623385906219482 2023-01-22 10:09:05.844299: step: 434/469, loss: 1.4152212142944336 2023-01-22 10:09:06.430390: step: 436/469, loss: 5.6168107986450195 2023-01-22 10:09:07.024323: step: 438/469, loss: 4.321221828460693 2023-01-22 10:09:07.615196: step: 440/469, loss: 1.9432930946350098 2023-01-22 10:09:08.235869: step: 442/469, loss: 6.5523271560668945 2023-01-22 10:09:08.848582: step: 444/469, loss: 6.476694107055664 2023-01-22 10:09:09.466696: step: 446/469, loss: 5.213431358337402 2023-01-22 10:09:10.097208: step: 448/469, loss: 2.914058208465576 2023-01-22 10:09:10.714373: step: 450/469, loss: 2.643801212310791 2023-01-22 10:09:11.287968: step: 452/469, loss: 2.708839178085327 2023-01-22 10:09:11.927000: step: 454/469, loss: 2.0420830249786377 2023-01-22 10:09:12.584699: step: 456/469, loss: 3.3017520904541016 2023-01-22 10:09:13.195550: step: 458/469, loss: 2.964846611022949 2023-01-22 10:09:13.847565: step: 460/469, loss: 5.405810356140137 2023-01-22 10:09:14.426809: step: 462/469, loss: 1.0382235050201416 2023-01-22 10:09:15.065180: step: 464/469, loss: 5.912646293640137 2023-01-22 10:09:15.686666: step: 466/469, loss: 2.2638630867004395 2023-01-22 10:09:16.283632: step: 468/469, loss: 1.6986610889434814 2023-01-22 10:09:16.915168: step: 470/469, loss: 3.486217737197876 2023-01-22 10:09:17.524519: step: 472/469, loss: 1.7115916013717651 2023-01-22 10:09:18.233702: step: 474/469, loss: 1.375154733657837 2023-01-22 10:09:18.819625: step: 476/469, loss: 2.235123634338379 2023-01-22 10:09:19.483969: step: 478/469, loss: 0.8839229941368103 2023-01-22 10:09:20.138481: step: 480/469, loss: 4.83689546585083 2023-01-22 10:09:20.753772: step: 482/469, loss: 3.807434558868408 2023-01-22 10:09:21.364678: step: 484/469, loss: 2.270559310913086 2023-01-22 10:09:21.942097: step: 486/469, loss: 4.294483184814453 2023-01-22 10:09:22.608900: step: 488/469, loss: 2.208724021911621 2023-01-22 10:09:23.261230: step: 490/469, loss: 5.8313093185424805 2023-01-22 10:09:23.889798: step: 492/469, loss: 1.0127171277999878 2023-01-22 10:09:24.532104: step: 494/469, loss: 1.4656672477722168 2023-01-22 10:09:25.109486: step: 496/469, loss: 2.5641977787017822 2023-01-22 10:09:25.761596: step: 498/469, loss: 2.8379359245300293 2023-01-22 10:09:26.367497: step: 500/469, loss: 2.2115871906280518 2023-01-22 10:09:26.966958: step: 502/469, loss: 1.2317479848861694 2023-01-22 10:09:27.586037: step: 504/469, loss: 1.4589911699295044 2023-01-22 10:09:28.188057: step: 506/469, loss: 1.1911771297454834 2023-01-22 10:09:28.812350: step: 508/469, loss: 3.527251720428467 2023-01-22 10:09:29.435126: step: 510/469, loss: 0.5120771527290344 2023-01-22 10:09:30.030835: step: 512/469, loss: 0.37717804312705994 2023-01-22 10:09:30.672380: step: 514/469, loss: 8.541683197021484 2023-01-22 10:09:31.278167: step: 516/469, loss: 5.253645896911621 2023-01-22 10:09:31.870448: step: 518/469, loss: 1.1242091655731201 2023-01-22 10:09:32.484479: step: 520/469, loss: 2.308777093887329 2023-01-22 10:09:33.065230: step: 522/469, loss: 1.7971981763839722 2023-01-22 10:09:33.689323: step: 524/469, loss: 1.091801643371582 2023-01-22 10:09:34.290882: step: 526/469, loss: 0.8410761952400208 2023-01-22 10:09:34.896794: step: 528/469, loss: 7.412558555603027 2023-01-22 10:09:35.558059: step: 530/469, loss: 2.230325222015381 2023-01-22 10:09:36.169601: step: 532/469, loss: 1.1105403900146484 2023-01-22 10:09:36.749488: step: 534/469, loss: 1.8819019794464111 2023-01-22 10:09:37.385500: step: 536/469, loss: 3.5490570068359375 2023-01-22 10:09:38.025052: step: 538/469, loss: 3.6693639755249023 2023-01-22 10:09:38.640972: step: 540/469, loss: 0.673589289188385 2023-01-22 10:09:39.286721: step: 542/469, loss: 1.112943172454834 2023-01-22 10:09:39.892521: step: 544/469, loss: 1.9021234512329102 2023-01-22 10:09:40.590533: step: 546/469, loss: 3.3115804195404053 2023-01-22 10:09:41.145758: step: 548/469, loss: 2.3288450241088867 2023-01-22 10:09:41.755715: step: 550/469, loss: 1.0352078676223755 2023-01-22 10:09:42.371139: step: 552/469, loss: 1.2727808952331543 2023-01-22 10:09:42.997060: step: 554/469, loss: 0.9366053342819214 2023-01-22 10:09:43.654684: step: 556/469, loss: 0.9389679431915283 2023-01-22 10:09:44.274864: step: 558/469, loss: 4.485273361206055 2023-01-22 10:09:44.935771: step: 560/469, loss: 3.1431937217712402 2023-01-22 10:09:45.528496: step: 562/469, loss: 2.523747682571411 2023-01-22 10:09:46.189699: step: 564/469, loss: 29.507835388183594 2023-01-22 10:09:46.824513: step: 566/469, loss: 1.6524688005447388 2023-01-22 10:09:47.456163: step: 568/469, loss: 1.1085619926452637 2023-01-22 10:09:48.083397: step: 570/469, loss: 1.5616161823272705 2023-01-22 10:09:48.794982: step: 572/469, loss: 3.4873030185699463 2023-01-22 10:09:49.437736: step: 574/469, loss: 7.625667572021484 2023-01-22 10:09:50.116605: step: 576/469, loss: 2.3042359352111816 2023-01-22 10:09:50.847421: step: 578/469, loss: 2.9243178367614746 2023-01-22 10:09:51.465328: step: 580/469, loss: 9.828018188476562 2023-01-22 10:09:52.070625: step: 582/469, loss: 2.924833059310913 2023-01-22 10:09:52.743505: step: 584/469, loss: 3.211275100708008 2023-01-22 10:09:53.372143: step: 586/469, loss: 9.504095077514648 2023-01-22 10:09:54.005347: step: 588/469, loss: 3.026228666305542 2023-01-22 10:09:54.634014: step: 590/469, loss: 3.233726739883423 2023-01-22 10:09:55.270528: step: 592/469, loss: 2.781447649002075 2023-01-22 10:09:55.979437: step: 594/469, loss: 1.2948658466339111 2023-01-22 10:09:56.636548: step: 596/469, loss: 2.5743391513824463 2023-01-22 10:09:57.315790: step: 598/469, loss: 2.1096224784851074 2023-01-22 10:09:57.932177: step: 600/469, loss: 2.6158783435821533 2023-01-22 10:09:58.579393: step: 602/469, loss: 1.2256364822387695 2023-01-22 10:09:59.188333: step: 604/469, loss: 2.1764283180236816 2023-01-22 10:09:59.881897: step: 606/469, loss: 3.365410327911377 2023-01-22 10:10:00.492323: step: 608/469, loss: 2.545825242996216 2023-01-22 10:10:01.068858: step: 610/469, loss: 2.083864450454712 2023-01-22 10:10:01.702384: step: 612/469, loss: 2.2250077724456787 2023-01-22 10:10:02.294548: step: 614/469, loss: 2.8714287281036377 2023-01-22 10:10:02.936359: step: 616/469, loss: 1.2265431880950928 2023-01-22 10:10:03.597765: step: 618/469, loss: 2.6019041538238525 2023-01-22 10:10:04.188473: step: 620/469, loss: 1.0531139373779297 2023-01-22 10:10:04.762005: step: 622/469, loss: 1.9407305717468262 2023-01-22 10:10:05.423096: step: 624/469, loss: 2.221311330795288 2023-01-22 10:10:06.024099: step: 626/469, loss: 2.031471014022827 2023-01-22 10:10:06.659205: step: 628/469, loss: 1.2906171083450317 2023-01-22 10:10:07.332843: step: 630/469, loss: 3.350187063217163 2023-01-22 10:10:07.958388: step: 632/469, loss: 1.5802769660949707 2023-01-22 10:10:08.579541: step: 634/469, loss: 2.8914926052093506 2023-01-22 10:10:09.119342: step: 636/469, loss: 1.4508517980575562 2023-01-22 10:10:09.799498: step: 638/469, loss: 0.6638691425323486 2023-01-22 10:10:10.417681: step: 640/469, loss: 1.0972434282302856 2023-01-22 10:10:11.024939: step: 642/469, loss: 2.6330790519714355 2023-01-22 10:10:11.629329: step: 644/469, loss: 1.5233993530273438 2023-01-22 10:10:12.208725: step: 646/469, loss: 2.0933659076690674 2023-01-22 10:10:12.827819: step: 648/469, loss: 1.0653287172317505 2023-01-22 10:10:13.513501: step: 650/469, loss: 5.881309509277344 2023-01-22 10:10:14.209815: step: 652/469, loss: 3.5361099243164062 2023-01-22 10:10:14.953913: step: 654/469, loss: 2.0320074558258057 2023-01-22 10:10:15.582068: step: 656/469, loss: 1.1001843214035034 2023-01-22 10:10:16.259673: step: 658/469, loss: 2.1982741355895996 2023-01-22 10:10:16.858538: step: 660/469, loss: 0.8307514190673828 2023-01-22 10:10:17.523107: step: 662/469, loss: 1.8594615459442139 2023-01-22 10:10:18.148697: step: 664/469, loss: 0.6427603363990784 2023-01-22 10:10:18.880001: step: 666/469, loss: 1.2357300519943237 2023-01-22 10:10:19.486839: step: 668/469, loss: 1.6569201946258545 2023-01-22 10:10:20.066643: step: 670/469, loss: 4.301616668701172 2023-01-22 10:10:20.730520: step: 672/469, loss: 0.4775109887123108 2023-01-22 10:10:21.381915: step: 674/469, loss: 7.053918838500977 2023-01-22 10:10:21.994226: step: 676/469, loss: 0.4933178722858429 2023-01-22 10:10:22.820481: step: 678/469, loss: 11.713030815124512 2023-01-22 10:10:23.453630: step: 680/469, loss: 1.852529764175415 2023-01-22 10:10:24.115095: step: 682/469, loss: 1.1127151250839233 2023-01-22 10:10:24.749363: step: 684/469, loss: 1.204505443572998 2023-01-22 10:10:25.342888: step: 686/469, loss: 1.263776183128357 2023-01-22 10:10:25.967700: step: 688/469, loss: 6.019697189331055 2023-01-22 10:10:26.610621: step: 690/469, loss: 2.318279981613159 2023-01-22 10:10:27.265728: step: 692/469, loss: 0.42347732186317444 2023-01-22 10:10:27.907006: step: 694/469, loss: 3.384830951690674 2023-01-22 10:10:28.591916: step: 696/469, loss: 0.8585728406906128 2023-01-22 10:10:29.216950: step: 698/469, loss: 2.9020438194274902 2023-01-22 10:10:29.809043: step: 700/469, loss: 1.2200758457183838 2023-01-22 10:10:30.438085: step: 702/469, loss: 1.8751510381698608 2023-01-22 10:10:31.062764: step: 704/469, loss: 4.669819355010986 2023-01-22 10:10:31.761354: step: 706/469, loss: 2.9634554386138916 2023-01-22 10:10:32.382127: step: 708/469, loss: 1.1599161624908447 2023-01-22 10:10:32.958380: step: 710/469, loss: 0.8766475915908813 2023-01-22 10:10:33.569621: step: 712/469, loss: 1.331620454788208 2023-01-22 10:10:34.174083: step: 714/469, loss: 2.1192688941955566 2023-01-22 10:10:34.841019: step: 716/469, loss: 1.7069741487503052 2023-01-22 10:10:35.461087: step: 718/469, loss: 2.662444591522217 2023-01-22 10:10:36.024130: step: 720/469, loss: 2.0508873462677 2023-01-22 10:10:36.620171: step: 722/469, loss: 0.8278382420539856 2023-01-22 10:10:37.278710: step: 724/469, loss: 0.6639025211334229 2023-01-22 10:10:37.989484: step: 726/469, loss: 4.585159778594971 2023-01-22 10:10:38.614475: step: 728/469, loss: 0.5474594235420227 2023-01-22 10:10:39.292453: step: 730/469, loss: 2.0028669834136963 2023-01-22 10:10:39.955278: step: 732/469, loss: 2.336195945739746 2023-01-22 10:10:40.608089: step: 734/469, loss: 2.4578120708465576 2023-01-22 10:10:41.268853: step: 736/469, loss: 1.6725635528564453 2023-01-22 10:10:41.876671: step: 738/469, loss: 2.13787579536438 2023-01-22 10:10:42.539143: step: 740/469, loss: 1.5839296579360962 2023-01-22 10:10:43.180950: step: 742/469, loss: 6.753298759460449 2023-01-22 10:10:43.778455: step: 744/469, loss: 1.4909178018569946 2023-01-22 10:10:44.444931: step: 746/469, loss: 0.5993936061859131 2023-01-22 10:10:45.069121: step: 748/469, loss: 8.058149337768555 2023-01-22 10:10:45.775697: step: 750/469, loss: 0.4440227150917053 2023-01-22 10:10:46.423587: step: 752/469, loss: 1.278122067451477 2023-01-22 10:10:47.079018: step: 754/469, loss: 1.295558214187622 2023-01-22 10:10:47.685412: step: 756/469, loss: 6.894983768463135 2023-01-22 10:10:48.317664: step: 758/469, loss: 5.87663459777832 2023-01-22 10:10:48.957096: step: 760/469, loss: 0.998386800289154 2023-01-22 10:10:49.517052: step: 762/469, loss: 0.8195573687553406 2023-01-22 10:10:50.127801: step: 764/469, loss: 3.0501463413238525 2023-01-22 10:10:50.803362: step: 766/469, loss: 4.492542743682861 2023-01-22 10:10:51.486172: step: 768/469, loss: 1.9226833581924438 2023-01-22 10:10:52.134155: step: 770/469, loss: 0.6186398863792419 2023-01-22 10:10:52.707318: step: 772/469, loss: 1.1675491333007812 2023-01-22 10:10:53.266679: step: 774/469, loss: 2.159048080444336 2023-01-22 10:10:53.849794: step: 776/469, loss: 1.388982892036438 2023-01-22 10:10:54.487052: step: 778/469, loss: 1.492708683013916 2023-01-22 10:10:55.188981: step: 780/469, loss: 2.3841464519500732 2023-01-22 10:10:55.863398: step: 782/469, loss: 7.847356796264648 2023-01-22 10:10:56.510835: step: 784/469, loss: 3.5350546836853027 2023-01-22 10:10:57.167362: step: 786/469, loss: 5.889021873474121 2023-01-22 10:10:57.738219: step: 788/469, loss: 1.210557460784912 2023-01-22 10:10:58.404171: step: 790/469, loss: 4.059767723083496 2023-01-22 10:10:59.022474: step: 792/469, loss: 1.8310210704803467 2023-01-22 10:10:59.631474: step: 794/469, loss: 0.7778642773628235 2023-01-22 10:11:00.220346: step: 796/469, loss: 4.306910514831543 2023-01-22 10:11:00.934397: step: 798/469, loss: 3.923731565475464 2023-01-22 10:11:01.559717: step: 800/469, loss: 2.197035312652588 2023-01-22 10:11:02.212423: step: 802/469, loss: 0.8405678272247314 2023-01-22 10:11:02.807890: step: 804/469, loss: 0.9253308773040771 2023-01-22 10:11:03.384490: step: 806/469, loss: 1.0027258396148682 2023-01-22 10:11:04.018014: step: 808/469, loss: 2.245345115661621 2023-01-22 10:11:04.700314: step: 810/469, loss: 1.8663848638534546 2023-01-22 10:11:05.272612: step: 812/469, loss: 4.9495134353637695 2023-01-22 10:11:05.839279: step: 814/469, loss: 6.545098304748535 2023-01-22 10:11:06.448965: step: 816/469, loss: 0.43992239236831665 2023-01-22 10:11:07.066800: step: 818/469, loss: 3.352154493331909 2023-01-22 10:11:07.657005: step: 820/469, loss: 1.6969307661056519 2023-01-22 10:11:08.273554: step: 822/469, loss: 2.3811986446380615 2023-01-22 10:11:08.874888: step: 824/469, loss: 8.892226219177246 2023-01-22 10:11:09.467129: step: 826/469, loss: 7.079122543334961 2023-01-22 10:11:10.093723: step: 828/469, loss: 1.0855681896209717 2023-01-22 10:11:10.762452: step: 830/469, loss: 0.5330224633216858 2023-01-22 10:11:11.348930: step: 832/469, loss: 1.5775420665740967 2023-01-22 10:11:11.963122: step: 834/469, loss: 0.6331006288528442 2023-01-22 10:11:12.599177: step: 836/469, loss: 3.898449182510376 2023-01-22 10:11:13.377517: step: 838/469, loss: 1.4449671506881714 2023-01-22 10:11:14.211229: step: 840/469, loss: 2.1984500885009766 2023-01-22 10:11:14.853030: step: 842/469, loss: 1.5962657928466797 2023-01-22 10:11:15.410681: step: 844/469, loss: 2.144331455230713 2023-01-22 10:11:16.069384: step: 846/469, loss: 15.531341552734375 2023-01-22 10:11:16.704631: step: 848/469, loss: 1.8532617092132568 2023-01-22 10:11:17.304026: step: 850/469, loss: 0.6547366380691528 2023-01-22 10:11:17.919697: step: 852/469, loss: 2.006427764892578 2023-01-22 10:11:18.507770: step: 854/469, loss: 1.9097877740859985 2023-01-22 10:11:19.067554: step: 856/469, loss: 1.9105238914489746 2023-01-22 10:11:19.749236: step: 858/469, loss: 1.0565221309661865 2023-01-22 10:11:20.358245: step: 860/469, loss: 0.807134747505188 2023-01-22 10:11:20.989581: step: 862/469, loss: 0.49223843216896057 2023-01-22 10:11:21.622670: step: 864/469, loss: 2.2668728828430176 2023-01-22 10:11:22.241090: step: 866/469, loss: 1.8541752099990845 2023-01-22 10:11:22.794273: step: 868/469, loss: 1.8676748275756836 2023-01-22 10:11:23.492279: step: 870/469, loss: 1.1457188129425049 2023-01-22 10:11:24.215630: step: 872/469, loss: 2.078320026397705 2023-01-22 10:11:24.831817: step: 874/469, loss: 0.7892571687698364 2023-01-22 10:11:25.384337: step: 876/469, loss: 0.4894615709781647 2023-01-22 10:11:26.064460: step: 878/469, loss: 10.354825973510742 2023-01-22 10:11:26.694316: step: 880/469, loss: 1.5977815389633179 2023-01-22 10:11:27.284482: step: 882/469, loss: 0.6383422613143921 2023-01-22 10:11:27.980193: step: 884/469, loss: 1.309027075767517 2023-01-22 10:11:28.678179: step: 886/469, loss: 4.253027439117432 2023-01-22 10:11:29.288993: step: 888/469, loss: 1.439487099647522 2023-01-22 10:11:30.002848: step: 890/469, loss: 2.6710166931152344 2023-01-22 10:11:30.624748: step: 892/469, loss: 0.7880006432533264 2023-01-22 10:11:31.258590: step: 894/469, loss: 1.1811531782150269 2023-01-22 10:11:31.968349: step: 896/469, loss: 2.1026933193206787 2023-01-22 10:11:32.585559: step: 898/469, loss: 2.2342774868011475 2023-01-22 10:11:33.253294: step: 900/469, loss: 3.9589414596557617 2023-01-22 10:11:33.862043: step: 902/469, loss: 1.32570219039917 2023-01-22 10:11:34.494207: step: 904/469, loss: 1.8051782846450806 2023-01-22 10:11:35.113025: step: 906/469, loss: 0.7008894085884094 2023-01-22 10:11:35.741349: step: 908/469, loss: 2.1311097145080566 2023-01-22 10:11:36.390972: step: 910/469, loss: 1.001122236251831 2023-01-22 10:11:37.000067: step: 912/469, loss: 1.2099871635437012 2023-01-22 10:11:37.679443: step: 914/469, loss: 0.3105977177619934 2023-01-22 10:11:38.340197: step: 916/469, loss: 2.5670032501220703 2023-01-22 10:11:38.953249: step: 918/469, loss: 3.0132174491882324 2023-01-22 10:11:39.537911: step: 920/469, loss: 0.9042558670043945 2023-01-22 10:11:40.164318: step: 922/469, loss: 5.169037818908691 2023-01-22 10:11:40.736931: step: 924/469, loss: 5.225088119506836 2023-01-22 10:11:41.317971: step: 926/469, loss: 3.599080801010132 2023-01-22 10:11:41.944365: step: 928/469, loss: 1.2377464771270752 2023-01-22 10:11:42.573119: step: 930/469, loss: 3.1070525646209717 2023-01-22 10:11:43.232402: step: 932/469, loss: 1.3288240432739258 2023-01-22 10:11:43.904643: step: 934/469, loss: 3.260507106781006 2023-01-22 10:11:44.509973: step: 936/469, loss: 3.220916748046875 2023-01-22 10:11:45.174539: step: 938/469, loss: 1.1544103622436523 ================================================== Loss: 5.462 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26948992347327666, 'r': 0.19205614584012343, 'f1': 0.22427748598107128}, 'combined': 0.16525709493342094, 'epoch': 0} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.1917712355212355, 'r': 0.09702191040500065, 'f1': 0.12885355413351782}, 'combined': 0.07028375680010063, 'epoch': 0} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2632367541650523, 'r': 0.17714987614888209, 'f1': 0.21177917397233584}, 'combined': 0.1560478124006685, 'epoch': 0} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.21396758791107062, 'r': 0.0957685375244628, 'f1': 0.1323149693505487}, 'combined': 0.07217180146393565, 'epoch': 0} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.25949469807349845, 'r': 0.17414105447276362, 'f1': 0.20841768736672386}, 'combined': 0.15357092753337548, 'epoch': 0} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.20345511140235914, 'r': 0.10107842818075272, 'f1': 0.13505850878719333}, 'combined': 0.07366827752028726, 'epoch': 0} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2413793103448276, 'r': 0.2, 'f1': 0.21875000000000003}, 'combined': 0.14583333333333334, 'epoch': 0} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.1896551724137931, 'f1': 0.27499999999999997}, 'combined': 0.1833333333333333, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26948992347327666, 'r': 0.19205614584012343, 'f1': 0.22427748598107128}, 'combined': 0.16525709493342094, 'epoch': 0} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.1917712355212355, 'r': 0.09702191040500065, 'f1': 0.12885355413351782}, 'combined': 0.07028375680010063, 'epoch': 0} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2413793103448276, 'r': 0.2, 'f1': 0.21875000000000003}, 'combined': 0.14583333333333334, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2632367541650523, 'r': 0.17714987614888209, 'f1': 0.21177917397233584}, 'combined': 0.1560478124006685, 'epoch': 0} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.21396758791107062, 'r': 0.0957685375244628, 'f1': 0.1323149693505487}, 'combined': 0.07217180146393565, 'epoch': 0} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.25949469807349845, 'r': 0.17414105447276362, 'f1': 0.20841768736672386}, 'combined': 0.15357092753337548, 'epoch': 0} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.20345511140235914, 'r': 0.10107842818075272, 'f1': 0.13505850878719333}, 'combined': 0.07366827752028726, 'epoch': 0} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.1896551724137931, 'f1': 0.27499999999999997}, 'combined': 0.1833333333333333, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:14:59.493022: step: 2/469, loss: 0.9774237275123596 2023-01-22 10:15:00.184219: step: 4/469, loss: 6.636249542236328 2023-01-22 10:15:00.788663: step: 6/469, loss: 1.990834355354309 2023-01-22 10:15:01.540505: step: 8/469, loss: 1.2279059886932373 2023-01-22 10:15:02.147297: step: 10/469, loss: 2.2220447063446045 2023-01-22 10:15:02.795583: step: 12/469, loss: 1.0082879066467285 2023-01-22 10:15:03.402734: step: 14/469, loss: 2.0571486949920654 2023-01-22 10:15:04.074632: step: 16/469, loss: 3.718430995941162 2023-01-22 10:15:04.693594: step: 18/469, loss: 1.8861489295959473 2023-01-22 10:15:05.381587: step: 20/469, loss: 4.982763767242432 2023-01-22 10:15:06.049472: step: 22/469, loss: 1.4882051944732666 2023-01-22 10:15:06.663609: step: 24/469, loss: 0.5637761950492859 2023-01-22 10:15:07.325591: step: 26/469, loss: 0.5937745571136475 2023-01-22 10:15:07.954032: step: 28/469, loss: 5.643711566925049 2023-01-22 10:15:08.575547: step: 30/469, loss: 2.5158143043518066 2023-01-22 10:15:09.222075: step: 32/469, loss: 3.539027690887451 2023-01-22 10:15:09.878632: step: 34/469, loss: 1.1145998239517212 2023-01-22 10:15:10.484742: step: 36/469, loss: 3.8723175525665283 2023-01-22 10:15:11.091993: step: 38/469, loss: 2.6833505630493164 2023-01-22 10:15:11.740807: step: 40/469, loss: 2.282891273498535 2023-01-22 10:15:12.363075: step: 42/469, loss: 1.2406158447265625 2023-01-22 10:15:12.982571: step: 44/469, loss: 0.4674290418624878 2023-01-22 10:15:13.585067: step: 46/469, loss: 4.22768497467041 2023-01-22 10:15:14.187890: step: 48/469, loss: 1.3391175270080566 2023-01-22 10:15:14.772301: step: 50/469, loss: 0.47300630807876587 2023-01-22 10:15:15.409814: step: 52/469, loss: 1.3602882623672485 2023-01-22 10:15:16.024451: step: 54/469, loss: 4.702951908111572 2023-01-22 10:15:16.691169: step: 56/469, loss: 0.9324207901954651 2023-01-22 10:15:17.261133: step: 58/469, loss: 3.174039363861084 2023-01-22 10:15:17.862253: step: 60/469, loss: 1.0151244401931763 2023-01-22 10:15:18.455322: step: 62/469, loss: 7.272899627685547 2023-01-22 10:15:19.123207: step: 64/469, loss: 1.0152450799942017 2023-01-22 10:15:19.686996: step: 66/469, loss: 2.161332368850708 2023-01-22 10:15:20.303899: step: 68/469, loss: 1.2895456552505493 2023-01-22 10:15:20.916631: step: 70/469, loss: 0.6681989431381226 2023-01-22 10:15:21.530856: step: 72/469, loss: 1.1446471214294434 2023-01-22 10:15:22.129529: step: 74/469, loss: 2.418056011199951 2023-01-22 10:15:22.763163: step: 76/469, loss: 2.500084400177002 2023-01-22 10:15:23.419709: step: 78/469, loss: 4.361448287963867 2023-01-22 10:15:24.005395: step: 80/469, loss: 10.124239921569824 2023-01-22 10:15:24.669915: step: 82/469, loss: 6.290243148803711 2023-01-22 10:15:25.258920: step: 84/469, loss: 0.88554447889328 2023-01-22 10:15:25.861485: step: 86/469, loss: 4.046614646911621 2023-01-22 10:15:26.497548: step: 88/469, loss: 3.138495922088623 2023-01-22 10:15:27.196770: step: 90/469, loss: 2.158499240875244 2023-01-22 10:15:27.892389: step: 92/469, loss: 2.5786585807800293 2023-01-22 10:15:28.565511: step: 94/469, loss: 14.496940612792969 2023-01-22 10:15:29.231773: step: 96/469, loss: 0.9019399285316467 2023-01-22 10:15:29.927605: step: 98/469, loss: 5.931264400482178 2023-01-22 10:15:30.523537: step: 100/469, loss: 0.28076714277267456 2023-01-22 10:15:31.144944: step: 102/469, loss: 2.052495002746582 2023-01-22 10:15:31.742255: step: 104/469, loss: 1.6943092346191406 2023-01-22 10:15:32.336056: step: 106/469, loss: 1.6038990020751953 2023-01-22 10:15:32.928680: step: 108/469, loss: 2.9601356983184814 2023-01-22 10:15:33.489721: step: 110/469, loss: 0.3992002010345459 2023-01-22 10:15:34.116973: step: 112/469, loss: 1.721425175666809 2023-01-22 10:15:34.801905: step: 114/469, loss: 1.7144372463226318 2023-01-22 10:15:35.523112: step: 116/469, loss: 1.106978416442871 2023-01-22 10:15:36.106732: step: 118/469, loss: 1.5618661642074585 2023-01-22 10:15:36.796299: step: 120/469, loss: 4.940499782562256 2023-01-22 10:15:37.398013: step: 122/469, loss: 1.1992238759994507 2023-01-22 10:15:38.101048: step: 124/469, loss: 2.4845125675201416 2023-01-22 10:15:38.754265: step: 126/469, loss: 0.6097687482833862 2023-01-22 10:15:39.324123: step: 128/469, loss: 0.7245121002197266 2023-01-22 10:15:39.970323: step: 130/469, loss: 0.5907683968544006 2023-01-22 10:15:40.632617: step: 132/469, loss: 0.4911862909793854 2023-01-22 10:15:41.296235: step: 134/469, loss: 1.2377562522888184 2023-01-22 10:15:41.927796: step: 136/469, loss: 0.9672662019729614 2023-01-22 10:15:42.559891: step: 138/469, loss: 1.845298409461975 2023-01-22 10:15:43.172262: step: 140/469, loss: 1.1161091327667236 2023-01-22 10:15:43.755818: step: 142/469, loss: 0.8173484802246094 2023-01-22 10:15:44.417278: step: 144/469, loss: 0.5894840955734253 2023-01-22 10:15:45.049816: step: 146/469, loss: 4.70170783996582 2023-01-22 10:15:45.667261: step: 148/469, loss: 0.27044326066970825 2023-01-22 10:15:46.247972: step: 150/469, loss: 1.4389760494232178 2023-01-22 10:15:46.836402: step: 152/469, loss: 0.5464712977409363 2023-01-22 10:15:47.438082: step: 154/469, loss: 1.8928881883621216 2023-01-22 10:15:48.049722: step: 156/469, loss: 4.485570430755615 2023-01-22 10:15:48.795775: step: 158/469, loss: 5.045022964477539 2023-01-22 10:15:49.420216: step: 160/469, loss: 0.9949291944503784 2023-01-22 10:15:50.018340: step: 162/469, loss: 4.282439708709717 2023-01-22 10:15:50.648243: step: 164/469, loss: 1.9260460138320923 2023-01-22 10:15:51.322117: step: 166/469, loss: 5.081108093261719 2023-01-22 10:15:51.979428: step: 168/469, loss: 1.3129239082336426 2023-01-22 10:15:52.549171: step: 170/469, loss: 0.8752832412719727 2023-01-22 10:15:53.193565: step: 172/469, loss: 1.3963277339935303 2023-01-22 10:15:53.844515: step: 174/469, loss: 2.5459213256835938 2023-01-22 10:15:54.432252: step: 176/469, loss: 7.6412248611450195 2023-01-22 10:15:55.024674: step: 178/469, loss: 1.6908068656921387 2023-01-22 10:15:55.623767: step: 180/469, loss: 2.211958169937134 2023-01-22 10:15:56.258444: step: 182/469, loss: 1.1137548685073853 2023-01-22 10:15:56.822237: step: 184/469, loss: 0.7184733748435974 2023-01-22 10:15:57.493873: step: 186/469, loss: 1.2796401977539062 2023-01-22 10:15:58.072523: step: 188/469, loss: 2.113682746887207 2023-01-22 10:15:58.697374: step: 190/469, loss: 2.014291763305664 2023-01-22 10:15:59.328018: step: 192/469, loss: 0.6369805932044983 2023-01-22 10:15:59.938430: step: 194/469, loss: 19.249549865722656 2023-01-22 10:16:00.611057: step: 196/469, loss: 1.919909954071045 2023-01-22 10:16:01.256145: step: 198/469, loss: 1.2311121225357056 2023-01-22 10:16:01.893494: step: 200/469, loss: 3.7839255332946777 2023-01-22 10:16:02.564429: step: 202/469, loss: 1.2812457084655762 2023-01-22 10:16:03.205772: step: 204/469, loss: 0.6068203449249268 2023-01-22 10:16:03.837925: step: 206/469, loss: 1.3794814348220825 2023-01-22 10:16:04.462053: step: 208/469, loss: 1.9105955362319946 2023-01-22 10:16:05.059508: step: 210/469, loss: 0.36456459760665894 2023-01-22 10:16:05.729284: step: 212/469, loss: 0.7991377115249634 2023-01-22 10:16:06.330031: step: 214/469, loss: 1.475459098815918 2023-01-22 10:16:07.018462: step: 216/469, loss: 1.1212122440338135 2023-01-22 10:16:07.656425: step: 218/469, loss: 0.34821829199790955 2023-01-22 10:16:08.280252: step: 220/469, loss: 9.339518547058105 2023-01-22 10:16:08.925398: step: 222/469, loss: 0.6210970878601074 2023-01-22 10:16:09.522541: step: 224/469, loss: 0.5798214077949524 2023-01-22 10:16:10.136542: step: 226/469, loss: 2.763742208480835 2023-01-22 10:16:10.755135: step: 228/469, loss: 0.3588736951351166 2023-01-22 10:16:11.400628: step: 230/469, loss: 1.1018152236938477 2023-01-22 10:16:12.010669: step: 232/469, loss: 0.42224645614624023 2023-01-22 10:16:12.673414: step: 234/469, loss: 1.294297456741333 2023-01-22 10:16:13.344421: step: 236/469, loss: 9.18282413482666 2023-01-22 10:16:13.922298: step: 238/469, loss: 4.582605838775635 2023-01-22 10:16:14.599671: step: 240/469, loss: 1.3106979131698608 2023-01-22 10:16:15.271790: step: 242/469, loss: 1.0300555229187012 2023-01-22 10:16:15.917683: step: 244/469, loss: 0.8543984889984131 2023-01-22 10:16:16.537609: step: 246/469, loss: 0.404795378446579 2023-01-22 10:16:17.172344: step: 248/469, loss: 0.4309930205345154 2023-01-22 10:16:17.801439: step: 250/469, loss: 0.9510715007781982 2023-01-22 10:16:18.505948: step: 252/469, loss: 1.3495937585830688 2023-01-22 10:16:19.150003: step: 254/469, loss: 1.5015547275543213 2023-01-22 10:16:19.722511: step: 256/469, loss: 1.1266480684280396 2023-01-22 10:16:20.369655: step: 258/469, loss: 10.146392822265625 2023-01-22 10:16:20.939084: step: 260/469, loss: 2.3589329719543457 2023-01-22 10:16:21.587560: step: 262/469, loss: 1.3057126998901367 2023-01-22 10:16:22.244912: step: 264/469, loss: 1.6830081939697266 2023-01-22 10:16:22.820858: step: 266/469, loss: 0.7595487833023071 2023-01-22 10:16:23.418523: step: 268/469, loss: 1.0855481624603271 2023-01-22 10:16:24.064004: step: 270/469, loss: 0.8032743334770203 2023-01-22 10:16:24.743864: step: 272/469, loss: 0.6891888380050659 2023-01-22 10:16:25.302436: step: 274/469, loss: 0.5035603642463684 2023-01-22 10:16:25.961540: step: 276/469, loss: 3.7007110118865967 2023-01-22 10:16:26.612002: step: 278/469, loss: 0.6537836790084839 2023-01-22 10:16:27.308040: step: 280/469, loss: 1.039383053779602 2023-01-22 10:16:27.910655: step: 282/469, loss: 0.5192900896072388 2023-01-22 10:16:28.554739: step: 284/469, loss: 0.5746899843215942 2023-01-22 10:16:29.128901: step: 286/469, loss: 0.7903205156326294 2023-01-22 10:16:29.725955: step: 288/469, loss: 0.4842279255390167 2023-01-22 10:16:30.336280: step: 290/469, loss: 0.2623206079006195 2023-01-22 10:16:30.963938: step: 292/469, loss: 1.2897734642028809 2023-01-22 10:16:31.622511: step: 294/469, loss: 3.3858587741851807 2023-01-22 10:16:32.204385: step: 296/469, loss: 2.190295457839966 2023-01-22 10:16:32.837194: step: 298/469, loss: 0.24754571914672852 2023-01-22 10:16:33.493986: step: 300/469, loss: 0.7817400693893433 2023-01-22 10:16:34.111192: step: 302/469, loss: 0.5100713968276978 2023-01-22 10:16:34.684136: step: 304/469, loss: 0.37411972880363464 2023-01-22 10:16:35.271148: step: 306/469, loss: 2.3437163829803467 2023-01-22 10:16:35.995965: step: 308/469, loss: 1.5898593664169312 2023-01-22 10:16:36.674679: step: 310/469, loss: 1.0790491104125977 2023-01-22 10:16:37.354393: step: 312/469, loss: 0.24286043643951416 2023-01-22 10:16:37.940804: step: 314/469, loss: 5.71418571472168 2023-01-22 10:16:38.536780: step: 316/469, loss: 1.5091514587402344 2023-01-22 10:16:39.241954: step: 318/469, loss: 0.9952667951583862 2023-01-22 10:16:39.913659: step: 320/469, loss: 0.8960474729537964 2023-01-22 10:16:40.493391: step: 322/469, loss: 4.323493957519531 2023-01-22 10:16:41.122210: step: 324/469, loss: 0.9344325661659241 2023-01-22 10:16:41.809608: step: 326/469, loss: 0.7204203009605408 2023-01-22 10:16:42.418782: step: 328/469, loss: 0.5804534554481506 2023-01-22 10:16:43.106203: step: 330/469, loss: 0.6996358036994934 2023-01-22 10:16:43.846561: step: 332/469, loss: 3.4042019844055176 2023-01-22 10:16:44.503958: step: 334/469, loss: 0.8509220480918884 2023-01-22 10:16:45.181615: step: 336/469, loss: 1.7719075679779053 2023-01-22 10:16:45.761656: step: 338/469, loss: 0.34857359528541565 2023-01-22 10:16:46.378046: step: 340/469, loss: 1.335108757019043 2023-01-22 10:16:47.049395: step: 342/469, loss: 0.5170474648475647 2023-01-22 10:16:47.750336: step: 344/469, loss: 2.5913052558898926 2023-01-22 10:16:48.393243: step: 346/469, loss: 3.233607769012451 2023-01-22 10:16:48.984121: step: 348/469, loss: 3.2166647911071777 2023-01-22 10:16:49.607635: step: 350/469, loss: 0.8338192105293274 2023-01-22 10:16:50.231857: step: 352/469, loss: 3.7104640007019043 2023-01-22 10:16:50.861161: step: 354/469, loss: 1.0243717432022095 2023-01-22 10:16:51.481091: step: 356/469, loss: 1.4346163272857666 2023-01-22 10:16:52.093472: step: 358/469, loss: 0.8183726668357849 2023-01-22 10:16:52.720441: step: 360/469, loss: 5.192439079284668 2023-01-22 10:16:53.406477: step: 362/469, loss: 6.544826507568359 2023-01-22 10:16:54.021055: step: 364/469, loss: 0.6790279746055603 2023-01-22 10:16:54.734895: step: 366/469, loss: 0.9295821785926819 2023-01-22 10:16:55.289586: step: 368/469, loss: 1.3576148748397827 2023-01-22 10:16:55.946351: step: 370/469, loss: 0.5722967386245728 2023-01-22 10:16:56.615276: step: 372/469, loss: 0.6305257678031921 2023-01-22 10:16:57.268294: step: 374/469, loss: 1.633368968963623 2023-01-22 10:16:57.925795: step: 376/469, loss: 3.1764259338378906 2023-01-22 10:16:58.585706: step: 378/469, loss: 0.44796910881996155 2023-01-22 10:16:59.233959: step: 380/469, loss: 0.6575629711151123 2023-01-22 10:16:59.867226: step: 382/469, loss: 1.8687303066253662 2023-01-22 10:17:00.472843: step: 384/469, loss: 1.4110556840896606 2023-01-22 10:17:01.154226: step: 386/469, loss: 1.8152185678482056 2023-01-22 10:17:01.726615: step: 388/469, loss: 0.3786051571369171 2023-01-22 10:17:02.378897: step: 390/469, loss: 4.476983070373535 2023-01-22 10:17:03.062198: step: 392/469, loss: 2.22505259513855 2023-01-22 10:17:03.715050: step: 394/469, loss: 1.1761585474014282 2023-01-22 10:17:04.375639: step: 396/469, loss: 2.04634952545166 2023-01-22 10:17:04.964972: step: 398/469, loss: 5.857431411743164 2023-01-22 10:17:05.516975: step: 400/469, loss: 1.104049801826477 2023-01-22 10:17:06.162495: step: 402/469, loss: 0.5394413471221924 2023-01-22 10:17:06.788035: step: 404/469, loss: 0.927696943283081 2023-01-22 10:17:07.450252: step: 406/469, loss: 5.998225688934326 2023-01-22 10:17:08.066281: step: 408/469, loss: 0.5810955762863159 2023-01-22 10:17:08.724030: step: 410/469, loss: 1.7282037734985352 2023-01-22 10:17:09.329791: step: 412/469, loss: 2.7687971591949463 2023-01-22 10:17:09.987303: step: 414/469, loss: 2.0616419315338135 2023-01-22 10:17:10.569602: step: 416/469, loss: 0.9342669248580933 2023-01-22 10:17:11.168319: step: 418/469, loss: 1.6862801313400269 2023-01-22 10:17:11.783070: step: 420/469, loss: 2.388059139251709 2023-01-22 10:17:12.363430: step: 422/469, loss: 0.32551687955856323 2023-01-22 10:17:12.973608: step: 424/469, loss: 1.7422549724578857 2023-01-22 10:17:13.591630: step: 426/469, loss: 0.2748425006866455 2023-01-22 10:17:14.231793: step: 428/469, loss: 0.4891715347766876 2023-01-22 10:17:14.869871: step: 430/469, loss: 2.017852306365967 2023-01-22 10:17:15.487363: step: 432/469, loss: 1.773969292640686 2023-01-22 10:17:16.103815: step: 434/469, loss: 0.3198941648006439 2023-01-22 10:17:16.749655: step: 436/469, loss: 1.1070201396942139 2023-01-22 10:17:17.319962: step: 438/469, loss: 2.6657259464263916 2023-01-22 10:17:17.978136: step: 440/469, loss: 0.3954508602619171 2023-01-22 10:17:18.536425: step: 442/469, loss: 0.5992934107780457 2023-01-22 10:17:19.131348: step: 444/469, loss: 1.867953896522522 2023-01-22 10:17:19.724040: step: 446/469, loss: 2.983337163925171 2023-01-22 10:17:20.297484: step: 448/469, loss: 2.466913938522339 2023-01-22 10:17:20.956524: step: 450/469, loss: 0.406933456659317 2023-01-22 10:17:21.649513: step: 452/469, loss: 0.6800675392150879 2023-01-22 10:17:22.324846: step: 454/469, loss: 0.30527159571647644 2023-01-22 10:17:23.025113: step: 456/469, loss: 1.2411680221557617 2023-01-22 10:17:23.664864: step: 458/469, loss: 3.157832145690918 2023-01-22 10:17:24.272295: step: 460/469, loss: 0.5001078844070435 2023-01-22 10:17:24.838180: step: 462/469, loss: 0.9012775421142578 2023-01-22 10:17:25.514317: step: 464/469, loss: 2.03584361076355 2023-01-22 10:17:26.109042: step: 466/469, loss: 1.7823234796524048 2023-01-22 10:17:26.735256: step: 468/469, loss: 1.4428259134292603 2023-01-22 10:17:27.402795: step: 470/469, loss: 1.5176260471343994 2023-01-22 10:17:28.003525: step: 472/469, loss: 1.4027972221374512 2023-01-22 10:17:28.619276: step: 474/469, loss: 0.5751864314079285 2023-01-22 10:17:29.273605: step: 476/469, loss: 0.9515081644058228 2023-01-22 10:17:29.912112: step: 478/469, loss: 0.7199758291244507 2023-01-22 10:17:30.493492: step: 480/469, loss: 1.1840629577636719 2023-01-22 10:17:31.141226: step: 482/469, loss: 1.6275490522384644 2023-01-22 10:17:31.837182: step: 484/469, loss: 0.6351293325424194 2023-01-22 10:17:32.526669: step: 486/469, loss: 0.4245985150337219 2023-01-22 10:17:33.143086: step: 488/469, loss: 0.4057943820953369 2023-01-22 10:17:33.880076: step: 490/469, loss: 4.20799446105957 2023-01-22 10:17:34.497559: step: 492/469, loss: 1.136854887008667 2023-01-22 10:17:35.180800: step: 494/469, loss: 6.3963236808776855 2023-01-22 10:17:35.802622: step: 496/469, loss: 0.7923392057418823 2023-01-22 10:17:36.467322: step: 498/469, loss: 3.3991355895996094 2023-01-22 10:17:37.088949: step: 500/469, loss: 2.36592698097229 2023-01-22 10:17:37.699591: step: 502/469, loss: 2.509711742401123 2023-01-22 10:17:38.356099: step: 504/469, loss: 0.8525946140289307 2023-01-22 10:17:38.908614: step: 506/469, loss: 0.6230922937393188 2023-01-22 10:17:39.507226: step: 508/469, loss: 4.56134557723999 2023-01-22 10:17:40.088177: step: 510/469, loss: 1.9812548160552979 2023-01-22 10:17:40.692527: step: 512/469, loss: 0.4840005040168762 2023-01-22 10:17:41.274195: step: 514/469, loss: 2.0905354022979736 2023-01-22 10:17:41.887990: step: 516/469, loss: 1.3597198724746704 2023-01-22 10:17:42.582929: step: 518/469, loss: 4.797149181365967 2023-01-22 10:17:43.180476: step: 520/469, loss: 0.32428330183029175 2023-01-22 10:17:43.779632: step: 522/469, loss: 5.728045463562012 2023-01-22 10:17:44.373191: step: 524/469, loss: 2.3465561866760254 2023-01-22 10:17:44.923834: step: 526/469, loss: 0.33189481496810913 2023-01-22 10:17:45.568583: step: 528/469, loss: 0.8946348428726196 2023-01-22 10:17:46.183282: step: 530/469, loss: 0.8239257335662842 2023-01-22 10:17:46.855409: step: 532/469, loss: 2.6375632286071777 2023-01-22 10:17:47.451660: step: 534/469, loss: 2.830780506134033 2023-01-22 10:17:48.046663: step: 536/469, loss: 1.2361962795257568 2023-01-22 10:17:48.712349: step: 538/469, loss: 0.996695876121521 2023-01-22 10:17:49.332721: step: 540/469, loss: 3.408931255340576 2023-01-22 10:17:49.925525: step: 542/469, loss: 2.9397573471069336 2023-01-22 10:17:50.557663: step: 544/469, loss: 1.2733079195022583 2023-01-22 10:17:51.174114: step: 546/469, loss: 0.9938843250274658 2023-01-22 10:17:51.774979: step: 548/469, loss: 0.4276813268661499 2023-01-22 10:17:52.356505: step: 550/469, loss: 0.6970816254615784 2023-01-22 10:17:53.032993: step: 552/469, loss: 2.603245496749878 2023-01-22 10:17:53.637169: step: 554/469, loss: 1.8236650228500366 2023-01-22 10:17:54.282161: step: 556/469, loss: 6.190192222595215 2023-01-22 10:17:54.916799: step: 558/469, loss: 0.42915213108062744 2023-01-22 10:17:55.554185: step: 560/469, loss: 0.8009711503982544 2023-01-22 10:17:56.252456: step: 562/469, loss: 1.7196707725524902 2023-01-22 10:17:56.839749: step: 564/469, loss: 0.5973418951034546 2023-01-22 10:17:57.495350: step: 566/469, loss: 3.4538116455078125 2023-01-22 10:17:58.136148: step: 568/469, loss: 0.6614938974380493 2023-01-22 10:17:58.705778: step: 570/469, loss: 0.4046335220336914 2023-01-22 10:17:59.260150: step: 572/469, loss: 0.9673312902450562 2023-01-22 10:17:59.912231: step: 574/469, loss: 2.780062198638916 2023-01-22 10:18:00.543837: step: 576/469, loss: 1.4446372985839844 2023-01-22 10:18:01.159104: step: 578/469, loss: 2.5449981689453125 2023-01-22 10:18:01.831826: step: 580/469, loss: 0.23068566620349884 2023-01-22 10:18:02.456926: step: 582/469, loss: 2.6683239936828613 2023-01-22 10:18:03.081615: step: 584/469, loss: 1.0787433385849 2023-01-22 10:18:03.789316: step: 586/469, loss: 2.1862850189208984 2023-01-22 10:18:04.391195: step: 588/469, loss: 1.7944048643112183 2023-01-22 10:18:05.034893: step: 590/469, loss: 2.3639705181121826 2023-01-22 10:18:05.643802: step: 592/469, loss: 2.702691078186035 2023-01-22 10:18:06.277087: step: 594/469, loss: 3.1447505950927734 2023-01-22 10:18:06.943065: step: 596/469, loss: 3.9168894290924072 2023-01-22 10:18:07.598036: step: 598/469, loss: 1.368726134300232 2023-01-22 10:18:08.210754: step: 600/469, loss: 6.212615966796875 2023-01-22 10:18:08.849672: step: 602/469, loss: 1.748282551765442 2023-01-22 10:18:09.536531: step: 604/469, loss: 0.47934800386428833 2023-01-22 10:18:10.138175: step: 606/469, loss: 0.2283998727798462 2023-01-22 10:18:10.792595: step: 608/469, loss: 12.818977355957031 2023-01-22 10:18:11.460591: step: 610/469, loss: 1.1269757747650146 2023-01-22 10:18:12.046559: step: 612/469, loss: 1.3322343826293945 2023-01-22 10:18:12.697310: step: 614/469, loss: 0.34730276465415955 2023-01-22 10:18:13.333887: step: 616/469, loss: 1.4259893894195557 2023-01-22 10:18:13.995841: step: 618/469, loss: 0.6219801902770996 2023-01-22 10:18:14.649097: step: 620/469, loss: 1.68389093875885 2023-01-22 10:18:15.251579: step: 622/469, loss: 0.4739355444908142 2023-01-22 10:18:15.899650: step: 624/469, loss: 3.3404035568237305 2023-01-22 10:18:16.538828: step: 626/469, loss: 0.9970574378967285 2023-01-22 10:18:17.124193: step: 628/469, loss: 2.1485400199890137 2023-01-22 10:18:17.719631: step: 630/469, loss: 0.563214123249054 2023-01-22 10:18:18.298813: step: 632/469, loss: 0.49547895789146423 2023-01-22 10:18:18.960097: step: 634/469, loss: 0.8724790215492249 2023-01-22 10:18:19.664141: step: 636/469, loss: 0.574462890625 2023-01-22 10:18:20.291369: step: 638/469, loss: 0.618561327457428 2023-01-22 10:18:20.927463: step: 640/469, loss: 1.007251262664795 2023-01-22 10:18:21.522987: step: 642/469, loss: 0.5910242795944214 2023-01-22 10:18:22.110676: step: 644/469, loss: 3.602355480194092 2023-01-22 10:18:22.721861: step: 646/469, loss: 2.5281431674957275 2023-01-22 10:18:23.356115: step: 648/469, loss: 2.624279499053955 2023-01-22 10:18:23.992841: step: 650/469, loss: 1.8061567544937134 2023-01-22 10:18:24.586266: step: 652/469, loss: 0.9593743085861206 2023-01-22 10:18:25.294708: step: 654/469, loss: 1.447396159172058 2023-01-22 10:18:25.959203: step: 656/469, loss: 1.6114580631256104 2023-01-22 10:18:26.570640: step: 658/469, loss: 2.4865214824676514 2023-01-22 10:18:27.189074: step: 660/469, loss: 1.9151530265808105 2023-01-22 10:18:27.766150: step: 662/469, loss: 2.6298186779022217 2023-01-22 10:18:28.440785: step: 664/469, loss: 0.6157950162887573 2023-01-22 10:18:29.042751: step: 666/469, loss: 0.31964510679244995 2023-01-22 10:18:29.676861: step: 668/469, loss: 1.302176594734192 2023-01-22 10:18:30.338914: step: 670/469, loss: 0.8198046088218689 2023-01-22 10:18:31.021629: step: 672/469, loss: 1.230098843574524 2023-01-22 10:18:31.683250: step: 674/469, loss: 1.8937286138534546 2023-01-22 10:18:32.399395: step: 676/469, loss: 1.7790850400924683 2023-01-22 10:18:33.049117: step: 678/469, loss: 0.42032474279403687 2023-01-22 10:18:33.663217: step: 680/469, loss: 3.8505940437316895 2023-01-22 10:18:34.296350: step: 682/469, loss: 0.7740234136581421 2023-01-22 10:18:34.922329: step: 684/469, loss: 0.3754080533981323 2023-01-22 10:18:35.553784: step: 686/469, loss: 1.0969812870025635 2023-01-22 10:18:36.159470: step: 688/469, loss: 2.9668822288513184 2023-01-22 10:18:36.804089: step: 690/469, loss: 2.951083183288574 2023-01-22 10:18:37.381439: step: 692/469, loss: 3.410388469696045 2023-01-22 10:18:38.009370: step: 694/469, loss: 1.3995883464813232 2023-01-22 10:18:38.617645: step: 696/469, loss: 0.7069652080535889 2023-01-22 10:18:39.249753: step: 698/469, loss: 0.6556860208511353 2023-01-22 10:18:39.869999: step: 700/469, loss: 0.6808304190635681 2023-01-22 10:18:40.500854: step: 702/469, loss: 1.951819658279419 2023-01-22 10:18:41.150344: step: 704/469, loss: 0.8062426447868347 2023-01-22 10:18:41.896731: step: 706/469, loss: 0.6777491569519043 2023-01-22 10:18:42.510172: step: 708/469, loss: 1.3558197021484375 2023-01-22 10:18:43.187819: step: 710/469, loss: 0.651256799697876 2023-01-22 10:18:43.812964: step: 712/469, loss: 1.024844765663147 2023-01-22 10:18:44.424890: step: 714/469, loss: 5.307661056518555 2023-01-22 10:18:45.068036: step: 716/469, loss: 1.0373034477233887 2023-01-22 10:18:45.662700: step: 718/469, loss: 1.2897006273269653 2023-01-22 10:18:46.294028: step: 720/469, loss: 1.1747374534606934 2023-01-22 10:18:46.928369: step: 722/469, loss: 2.1981310844421387 2023-01-22 10:18:47.552420: step: 724/469, loss: 0.5594189167022705 2023-01-22 10:18:48.193332: step: 726/469, loss: 0.9148130416870117 2023-01-22 10:18:48.748467: step: 728/469, loss: 2.197479724884033 2023-01-22 10:18:49.348721: step: 730/469, loss: 1.027839183807373 2023-01-22 10:18:49.954300: step: 732/469, loss: 9.317862510681152 2023-01-22 10:18:50.521512: step: 734/469, loss: 2.0460798740386963 2023-01-22 10:18:51.184264: step: 736/469, loss: 2.078953742980957 2023-01-22 10:18:51.872844: step: 738/469, loss: 5.71103572845459 2023-01-22 10:18:52.487431: step: 740/469, loss: 1.327780842781067 2023-01-22 10:18:53.099468: step: 742/469, loss: 1.3744038343429565 2023-01-22 10:18:53.684863: step: 744/469, loss: 5.301520347595215 2023-01-22 10:18:54.332497: step: 746/469, loss: 1.3241816759109497 2023-01-22 10:18:54.941129: step: 748/469, loss: 4.036094665527344 2023-01-22 10:18:55.560154: step: 750/469, loss: 1.3682143688201904 2023-01-22 10:18:56.127707: step: 752/469, loss: 1.1460245847702026 2023-01-22 10:18:56.848680: step: 754/469, loss: 1.4363397359848022 2023-01-22 10:18:57.483080: step: 756/469, loss: 2.2570321559906006 2023-01-22 10:18:58.194525: step: 758/469, loss: 4.692358493804932 2023-01-22 10:18:58.816598: step: 760/469, loss: 0.7012701034545898 2023-01-22 10:18:59.465696: step: 762/469, loss: 2.7670552730560303 2023-01-22 10:19:00.081383: step: 764/469, loss: 1.3826501369476318 2023-01-22 10:19:00.755402: step: 766/469, loss: 0.5983228087425232 2023-01-22 10:19:01.369578: step: 768/469, loss: 0.7576615810394287 2023-01-22 10:19:02.001849: step: 770/469, loss: 1.623666763305664 2023-01-22 10:19:02.640519: step: 772/469, loss: 1.1034537553787231 2023-01-22 10:19:03.283602: step: 774/469, loss: 1.1922709941864014 2023-01-22 10:19:03.953668: step: 776/469, loss: 0.9126417636871338 2023-01-22 10:19:04.606359: step: 778/469, loss: 0.5356414318084717 2023-01-22 10:19:05.222106: step: 780/469, loss: 0.4846305549144745 2023-01-22 10:19:05.889005: step: 782/469, loss: 3.597834587097168 2023-01-22 10:19:06.548024: step: 784/469, loss: 1.1163527965545654 2023-01-22 10:19:07.148709: step: 786/469, loss: 1.3866658210754395 2023-01-22 10:19:07.756924: step: 788/469, loss: 0.658324658870697 2023-01-22 10:19:08.315130: step: 790/469, loss: 1.4993462562561035 2023-01-22 10:19:08.937847: step: 792/469, loss: 1.1692321300506592 2023-01-22 10:19:09.547888: step: 794/469, loss: 4.7747344970703125 2023-01-22 10:19:10.205086: step: 796/469, loss: 0.9007105827331543 2023-01-22 10:19:10.858271: step: 798/469, loss: 0.8143647909164429 2023-01-22 10:19:11.456134: step: 800/469, loss: 6.239959239959717 2023-01-22 10:19:12.092491: step: 802/469, loss: 0.3260728716850281 2023-01-22 10:19:12.685939: step: 804/469, loss: 2.2098755836486816 2023-01-22 10:19:13.323693: step: 806/469, loss: 1.023186445236206 2023-01-22 10:19:13.946082: step: 808/469, loss: 1.4482007026672363 2023-01-22 10:19:14.554890: step: 810/469, loss: 0.33770811557769775 2023-01-22 10:19:15.134612: step: 812/469, loss: 1.0102179050445557 2023-01-22 10:19:15.754208: step: 814/469, loss: 3.1881463527679443 2023-01-22 10:19:16.409173: step: 816/469, loss: 0.9331550598144531 2023-01-22 10:19:17.128379: step: 818/469, loss: 0.8774794340133667 2023-01-22 10:19:17.904259: step: 820/469, loss: 10.5547456741333 2023-01-22 10:19:18.577398: step: 822/469, loss: 1.2317283153533936 2023-01-22 10:19:19.125131: step: 824/469, loss: 0.9386170506477356 2023-01-22 10:19:19.749869: step: 826/469, loss: 1.76652991771698 2023-01-22 10:19:20.507133: step: 828/469, loss: 1.5632579326629639 2023-01-22 10:19:21.102290: step: 830/469, loss: 11.621553421020508 2023-01-22 10:19:21.903121: step: 832/469, loss: 1.3102751970291138 2023-01-22 10:19:22.575169: step: 834/469, loss: 2.7988474369049072 2023-01-22 10:19:23.196842: step: 836/469, loss: 2.1238455772399902 2023-01-22 10:19:23.834838: step: 838/469, loss: 1.6453871726989746 2023-01-22 10:19:24.463902: step: 840/469, loss: 1.2638801336288452 2023-01-22 10:19:25.059561: step: 842/469, loss: 0.5004633069038391 2023-01-22 10:19:25.688720: step: 844/469, loss: 0.9848936200141907 2023-01-22 10:19:26.330860: step: 846/469, loss: 0.4151782989501953 2023-01-22 10:19:27.063704: step: 848/469, loss: 0.4190213978290558 2023-01-22 10:19:27.720971: step: 850/469, loss: 1.2330005168914795 2023-01-22 10:19:28.380970: step: 852/469, loss: 0.44728994369506836 2023-01-22 10:19:29.080639: step: 854/469, loss: 6.191889762878418 2023-01-22 10:19:29.761597: step: 856/469, loss: 1.6545212268829346 2023-01-22 10:19:30.388483: step: 858/469, loss: 1.0753962993621826 2023-01-22 10:19:30.949323: step: 860/469, loss: 0.5099169611930847 2023-01-22 10:19:31.630555: step: 862/469, loss: 0.35161808133125305 2023-01-22 10:19:32.415268: step: 864/469, loss: 0.5195026993751526 2023-01-22 10:19:33.101144: step: 866/469, loss: 1.169761300086975 2023-01-22 10:19:33.652389: step: 868/469, loss: 1.8479701280593872 2023-01-22 10:19:34.323723: step: 870/469, loss: 1.3604129552841187 2023-01-22 10:19:34.963569: step: 872/469, loss: 2.2602038383483887 2023-01-22 10:19:35.568897: step: 874/469, loss: 1.2506535053253174 2023-01-22 10:19:36.180071: step: 876/469, loss: 1.0251960754394531 2023-01-22 10:19:36.767643: step: 878/469, loss: 0.8896273374557495 2023-01-22 10:19:37.412265: step: 880/469, loss: 1.996160626411438 2023-01-22 10:19:38.035000: step: 882/469, loss: 3.311429977416992 2023-01-22 10:19:38.669627: step: 884/469, loss: 0.540127158164978 2023-01-22 10:19:39.222594: step: 886/469, loss: 0.43265557289123535 2023-01-22 10:19:39.853840: step: 888/469, loss: 1.1783123016357422 2023-01-22 10:19:40.503086: step: 890/469, loss: 0.6725778579711914 2023-01-22 10:19:41.128919: step: 892/469, loss: 0.6339295506477356 2023-01-22 10:19:41.818720: step: 894/469, loss: 0.7197149991989136 2023-01-22 10:19:42.495975: step: 896/469, loss: 1.1898581981658936 2023-01-22 10:19:43.079331: step: 898/469, loss: 0.3726484179496765 2023-01-22 10:19:43.646746: step: 900/469, loss: 1.3890084028244019 2023-01-22 10:19:44.231773: step: 902/469, loss: 2.965548276901245 2023-01-22 10:19:44.900607: step: 904/469, loss: 7.361764907836914 2023-01-22 10:19:45.582317: step: 906/469, loss: 1.0197678804397583 2023-01-22 10:19:46.224560: step: 908/469, loss: 0.5380075573921204 2023-01-22 10:19:46.890713: step: 910/469, loss: 0.2265072911977768 2023-01-22 10:19:47.579140: step: 912/469, loss: 6.4279985427856445 2023-01-22 10:19:48.271709: step: 914/469, loss: 0.30310600996017456 2023-01-22 10:19:48.898095: step: 916/469, loss: 3.4315881729125977 2023-01-22 10:19:49.554952: step: 918/469, loss: 0.9987183809280396 2023-01-22 10:19:50.192936: step: 920/469, loss: 1.304203748703003 2023-01-22 10:19:50.738275: step: 922/469, loss: 0.5848813056945801 2023-01-22 10:19:51.384785: step: 924/469, loss: 0.40204232931137085 2023-01-22 10:19:52.027548: step: 926/469, loss: 0.4296698570251465 2023-01-22 10:19:52.719226: step: 928/469, loss: 0.6505110859870911 2023-01-22 10:19:53.348041: step: 930/469, loss: 0.6078650951385498 2023-01-22 10:19:54.042987: step: 932/469, loss: 1.256596565246582 2023-01-22 10:19:54.644352: step: 934/469, loss: 1.8910768032073975 2023-01-22 10:19:55.274234: step: 936/469, loss: 0.4733887314796448 2023-01-22 10:19:55.913920: step: 938/469, loss: 0.3660109341144562 ================================================== Loss: 1.945 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2810668174058052, 'r': 0.2283667891422167, 'f1': 0.25199093974313574}, 'combined': 0.18567753454757369, 'epoch': 1} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2533866932645599, 'r': 0.1388642536738073, 'f1': 0.17940736319795672}, 'combined': 0.09785856174434002, 'epoch': 1} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26802519787457796, 'r': 0.22589244896626362, 'f1': 0.24516179456153583}, 'combined': 0.18064553283481588, 'epoch': 1} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.24415862762194238, 'r': 0.13467745304443213, 'f1': 0.1735983650190149}, 'combined': 0.09469001728309903, 'epoch': 1} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2675974296837756, 'r': 0.21995697818704282, 'f1': 0.24144965588931105}, 'combined': 0.17791027276054497, 'epoch': 1} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2590920765378194, 'r': 0.14199099162502637, 'f1': 0.18344699036188397}, 'combined': 0.1000619947428458, 'epoch': 1} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.28809523809523807, 'r': 0.28809523809523807, 'f1': 0.28809523809523807}, 'combined': 0.19206349206349205, 'epoch': 1} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.1956521739130435, 'f1': 0.21951219512195125}, 'combined': 0.10975609756097562, 'epoch': 1} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.2413793103448276, 'f1': 0.32558139534883723}, 'combined': 0.21705426356589147, 'epoch': 1} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2810668174058052, 'r': 0.2283667891422167, 'f1': 0.25199093974313574}, 'combined': 0.18567753454757369, 'epoch': 1} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2533866932645599, 'r': 0.1388642536738073, 'f1': 0.17940736319795672}, 'combined': 0.09785856174434002, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.28809523809523807, 'r': 0.28809523809523807, 'f1': 0.28809523809523807}, 'combined': 0.19206349206349205, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26802519787457796, 'r': 0.22589244896626362, 'f1': 0.24516179456153583}, 'combined': 0.18064553283481588, 'epoch': 1} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.24415862762194238, 'r': 0.13467745304443213, 'f1': 0.1735983650190149}, 'combined': 0.09469001728309903, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.1956521739130435, 'f1': 0.21951219512195125}, 'combined': 0.10975609756097562, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2675974296837756, 'r': 0.21995697818704282, 'f1': 0.24144965588931105}, 'combined': 0.17791027276054497, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2590920765378194, 'r': 0.14199099162502637, 'f1': 0.18344699036188397}, 'combined': 0.1000619947428458, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.2413793103448276, 'f1': 0.32558139534883723}, 'combined': 0.21705426356589147, 'epoch': 1} ****************************** Epoch: 2 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:23:12.101539: step: 2/469, loss: 0.5353066325187683 2023-01-22 10:23:12.749123: step: 4/469, loss: 1.8436578512191772 2023-01-22 10:23:13.400184: step: 6/469, loss: 0.38073140382766724 2023-01-22 10:23:14.073989: step: 8/469, loss: 0.6198972463607788 2023-01-22 10:23:14.791709: step: 10/469, loss: 1.1369569301605225 2023-01-22 10:23:15.447788: step: 12/469, loss: 1.104857325553894 2023-01-22 10:23:16.081179: step: 14/469, loss: 0.5385766625404358 2023-01-22 10:23:16.702697: step: 16/469, loss: 0.813564121723175 2023-01-22 10:23:17.385655: step: 18/469, loss: 0.5973976850509644 2023-01-22 10:23:18.047780: step: 20/469, loss: 3.3589892387390137 2023-01-22 10:23:18.668830: step: 22/469, loss: 0.8600585460662842 2023-01-22 10:23:19.258121: step: 24/469, loss: 0.6128389835357666 2023-01-22 10:23:19.931833: step: 26/469, loss: 1.1072347164154053 2023-01-22 10:23:20.528645: step: 28/469, loss: 1.4190499782562256 2023-01-22 10:23:21.087684: step: 30/469, loss: 0.7740609645843506 2023-01-22 10:23:21.670799: step: 32/469, loss: 1.2469971179962158 2023-01-22 10:23:22.309155: step: 34/469, loss: 1.4400287866592407 2023-01-22 10:23:22.918490: step: 36/469, loss: 1.1377840042114258 2023-01-22 10:23:23.633732: step: 38/469, loss: 1.1450287103652954 2023-01-22 10:23:24.295384: step: 40/469, loss: 1.0870027542114258 2023-01-22 10:23:24.904275: step: 42/469, loss: 0.2694031298160553 2023-01-22 10:23:25.520850: step: 44/469, loss: 0.5426982641220093 2023-01-22 10:23:26.122441: step: 46/469, loss: 0.41402673721313477 2023-01-22 10:23:26.734778: step: 48/469, loss: 0.6934465765953064 2023-01-22 10:23:27.380224: step: 50/469, loss: 1.388898491859436 2023-01-22 10:23:28.032810: step: 52/469, loss: 2.3539657592773438 2023-01-22 10:23:28.625810: step: 54/469, loss: 0.24942156672477722 2023-01-22 10:23:29.259240: step: 56/469, loss: 1.6694965362548828 2023-01-22 10:23:29.889057: step: 58/469, loss: 3.645967483520508 2023-01-22 10:23:30.548839: step: 60/469, loss: 0.7160361409187317 2023-01-22 10:23:31.253875: step: 62/469, loss: 2.0979177951812744 2023-01-22 10:23:31.906901: step: 64/469, loss: 0.8871322870254517 2023-01-22 10:23:32.557168: step: 66/469, loss: 3.3475236892700195 2023-01-22 10:23:33.178934: step: 68/469, loss: 1.5695114135742188 2023-01-22 10:23:33.813086: step: 70/469, loss: 0.3799401819705963 2023-01-22 10:23:34.502868: step: 72/469, loss: 1.5135334730148315 2023-01-22 10:23:35.091807: step: 74/469, loss: 2.1811304092407227 2023-01-22 10:23:35.748277: step: 76/469, loss: 1.3809643983840942 2023-01-22 10:23:36.401125: step: 78/469, loss: 0.36087802052497864 2023-01-22 10:23:36.996266: step: 80/469, loss: 0.8317145109176636 2023-01-22 10:23:37.654173: step: 82/469, loss: 0.38330256938934326 2023-01-22 10:23:38.232730: step: 84/469, loss: 3.9333324432373047 2023-01-22 10:23:38.856526: step: 86/469, loss: 0.3897731602191925 2023-01-22 10:23:39.465303: step: 88/469, loss: 1.5685337781906128 2023-01-22 10:23:40.125625: step: 90/469, loss: 0.732214629650116 2023-01-22 10:23:40.759374: step: 92/469, loss: 1.2500853538513184 2023-01-22 10:23:41.342162: step: 94/469, loss: 0.6063386797904968 2023-01-22 10:23:42.042641: step: 96/469, loss: 3.0048389434814453 2023-01-22 10:23:42.674930: step: 98/469, loss: 0.2374560683965683 2023-01-22 10:23:43.354131: step: 100/469, loss: 0.34000909328460693 2023-01-22 10:23:43.974606: step: 102/469, loss: 0.369872510433197 2023-01-22 10:23:44.591178: step: 104/469, loss: 2.179455518722534 2023-01-22 10:23:45.176120: step: 106/469, loss: 1.6280885934829712 2023-01-22 10:23:45.924949: step: 108/469, loss: 1.45135498046875 2023-01-22 10:23:46.567545: step: 110/469, loss: 0.21561674773693085 2023-01-22 10:23:47.177609: step: 112/469, loss: 4.606247901916504 2023-01-22 10:23:47.739040: step: 114/469, loss: 2.7122511863708496 2023-01-22 10:23:48.291394: step: 116/469, loss: 0.2609219253063202 2023-01-22 10:23:48.899693: step: 118/469, loss: 0.5242354273796082 2023-01-22 10:23:49.540488: step: 120/469, loss: 0.3231472373008728 2023-01-22 10:23:50.172831: step: 122/469, loss: 0.9146525263786316 2023-01-22 10:23:50.735540: step: 124/469, loss: 0.4059615433216095 2023-01-22 10:23:51.303762: step: 126/469, loss: 0.6007198691368103 2023-01-22 10:23:51.953919: step: 128/469, loss: 0.4161914885044098 2023-01-22 10:23:52.563020: step: 130/469, loss: 0.6699537634849548 2023-01-22 10:23:53.278527: step: 132/469, loss: 1.0586177110671997 2023-01-22 10:23:53.902173: step: 134/469, loss: 0.6724517941474915 2023-01-22 10:23:54.501281: step: 136/469, loss: 1.4755992889404297 2023-01-22 10:23:55.151425: step: 138/469, loss: 2.309426784515381 2023-01-22 10:23:55.803622: step: 140/469, loss: 2.3876473903656006 2023-01-22 10:23:56.446888: step: 142/469, loss: 0.5938212275505066 2023-01-22 10:23:57.080891: step: 144/469, loss: 1.8941092491149902 2023-01-22 10:23:57.654456: step: 146/469, loss: 0.20273509621620178 2023-01-22 10:23:58.314434: step: 148/469, loss: 1.0266315937042236 2023-01-22 10:23:58.896252: step: 150/469, loss: 2.066869020462036 2023-01-22 10:23:59.470490: step: 152/469, loss: 2.767587423324585 2023-01-22 10:24:00.041673: step: 154/469, loss: 0.2977367341518402 2023-01-22 10:24:00.677494: step: 156/469, loss: 1.2398371696472168 2023-01-22 10:24:01.224859: step: 158/469, loss: 1.8017405271530151 2023-01-22 10:24:01.910297: step: 160/469, loss: 0.6679936647415161 2023-01-22 10:24:02.545746: step: 162/469, loss: 2.998906373977661 2023-01-22 10:24:03.112161: step: 164/469, loss: 2.467369556427002 2023-01-22 10:24:03.744340: step: 166/469, loss: 4.161844253540039 2023-01-22 10:24:04.359443: step: 168/469, loss: 6.108869552612305 2023-01-22 10:24:04.976450: step: 170/469, loss: 4.873496055603027 2023-01-22 10:24:05.644852: step: 172/469, loss: 1.1807408332824707 2023-01-22 10:24:06.293029: step: 174/469, loss: 0.3957067131996155 2023-01-22 10:24:06.928121: step: 176/469, loss: 0.6589661836624146 2023-01-22 10:24:07.566125: step: 178/469, loss: 0.9177929162979126 2023-01-22 10:24:08.186710: step: 180/469, loss: 1.5773760080337524 2023-01-22 10:24:08.866114: step: 182/469, loss: 4.654255390167236 2023-01-22 10:24:09.511840: step: 184/469, loss: 4.3159260749816895 2023-01-22 10:24:10.139004: step: 186/469, loss: 0.6257660388946533 2023-01-22 10:24:10.749663: step: 188/469, loss: 2.640253782272339 2023-01-22 10:24:11.371422: step: 190/469, loss: 1.3661240339279175 2023-01-22 10:24:11.993469: step: 192/469, loss: 2.0238704681396484 2023-01-22 10:24:12.658659: step: 194/469, loss: 1.169008493423462 2023-01-22 10:24:13.234099: step: 196/469, loss: 0.5621331930160522 2023-01-22 10:24:13.815089: step: 198/469, loss: 2.576171398162842 2023-01-22 10:24:14.415474: step: 200/469, loss: 3.413377046585083 2023-01-22 10:24:15.108715: step: 202/469, loss: 0.6116236448287964 2023-01-22 10:24:15.750231: step: 204/469, loss: 2.3727691173553467 2023-01-22 10:24:16.379068: step: 206/469, loss: 1.110275149345398 2023-01-22 10:24:17.024456: step: 208/469, loss: 1.7331864833831787 2023-01-22 10:24:17.662078: step: 210/469, loss: 1.9998188018798828 2023-01-22 10:24:18.382176: step: 212/469, loss: 4.6289448738098145 2023-01-22 10:24:19.007049: step: 214/469, loss: 5.65755558013916 2023-01-22 10:24:19.613532: step: 216/469, loss: 0.7672577500343323 2023-01-22 10:24:20.198584: step: 218/469, loss: 0.4674344062805176 2023-01-22 10:24:20.797545: step: 220/469, loss: 0.982247531414032 2023-01-22 10:24:21.458032: step: 222/469, loss: 3.89165997505188 2023-01-22 10:24:22.090953: step: 224/469, loss: 0.6990604996681213 2023-01-22 10:24:22.724089: step: 226/469, loss: 0.43852055072784424 2023-01-22 10:24:23.326955: step: 228/469, loss: 0.9134107828140259 2023-01-22 10:24:23.989418: step: 230/469, loss: 0.48390674591064453 2023-01-22 10:24:24.610003: step: 232/469, loss: 0.8309329748153687 2023-01-22 10:24:25.227880: step: 234/469, loss: 1.504846215248108 2023-01-22 10:24:25.904170: step: 236/469, loss: 1.4761226177215576 2023-01-22 10:24:26.497142: step: 238/469, loss: 0.20416758954524994 2023-01-22 10:24:27.145809: step: 240/469, loss: 0.2646782100200653 2023-01-22 10:24:27.821641: step: 242/469, loss: 0.3520800471305847 2023-01-22 10:24:28.534811: step: 244/469, loss: 4.36689567565918 2023-01-22 10:24:29.241604: step: 246/469, loss: 1.7954626083374023 2023-01-22 10:24:29.862811: step: 248/469, loss: 1.6993610858917236 2023-01-22 10:24:30.467266: step: 250/469, loss: 1.2133893966674805 2023-01-22 10:24:31.199000: step: 252/469, loss: 0.7194636464118958 2023-01-22 10:24:31.836478: step: 254/469, loss: 1.1498796939849854 2023-01-22 10:24:32.479040: step: 256/469, loss: 1.4167494773864746 2023-01-22 10:24:33.078446: step: 258/469, loss: 1.2200649976730347 2023-01-22 10:24:33.721267: step: 260/469, loss: 0.3104654848575592 2023-01-22 10:24:34.326235: step: 262/469, loss: 3.2491283416748047 2023-01-22 10:24:34.964137: step: 264/469, loss: 5.202022075653076 2023-01-22 10:24:35.657361: step: 266/469, loss: 1.3647511005401611 2023-01-22 10:24:36.281613: step: 268/469, loss: 1.081467628479004 2023-01-22 10:24:36.931785: step: 270/469, loss: 1.4648642539978027 2023-01-22 10:24:37.569064: step: 272/469, loss: 1.9662424325942993 2023-01-22 10:24:38.192222: step: 274/469, loss: 0.8211962580680847 2023-01-22 10:24:38.870558: step: 276/469, loss: 0.24146252870559692 2023-01-22 10:24:39.539062: step: 278/469, loss: 0.8601873517036438 2023-01-22 10:24:40.171696: step: 280/469, loss: 1.9408857822418213 2023-01-22 10:24:40.787244: step: 282/469, loss: 1.6274000406265259 2023-01-22 10:24:41.434144: step: 284/469, loss: 0.9171558618545532 2023-01-22 10:24:42.057338: step: 286/469, loss: 5.5370869636535645 2023-01-22 10:24:42.717643: step: 288/469, loss: 0.7206916809082031 2023-01-22 10:24:43.358670: step: 290/469, loss: 9.945982933044434 2023-01-22 10:24:44.009915: step: 292/469, loss: 1.080707311630249 2023-01-22 10:24:44.603149: step: 294/469, loss: 0.6142443418502808 2023-01-22 10:24:45.276302: step: 296/469, loss: 2.342301368713379 2023-01-22 10:24:45.932519: step: 298/469, loss: 0.36795201897621155 2023-01-22 10:24:46.681438: step: 300/469, loss: 9.360123634338379 2023-01-22 10:24:47.287184: step: 302/469, loss: 0.8583207130432129 2023-01-22 10:24:47.996422: step: 304/469, loss: 1.77339768409729 2023-01-22 10:24:48.604280: step: 306/469, loss: 0.34043794870376587 2023-01-22 10:24:49.279563: step: 308/469, loss: 2.13614821434021 2023-01-22 10:24:49.896242: step: 310/469, loss: 2.374389171600342 2023-01-22 10:24:50.579488: step: 312/469, loss: 0.537878155708313 2023-01-22 10:24:51.227807: step: 314/469, loss: 0.28871050477027893 2023-01-22 10:24:51.806657: step: 316/469, loss: 0.7118523120880127 2023-01-22 10:24:52.431217: step: 318/469, loss: 0.5514461398124695 2023-01-22 10:24:53.008954: step: 320/469, loss: 1.193490743637085 2023-01-22 10:24:53.589142: step: 322/469, loss: 0.5438361167907715 2023-01-22 10:24:54.200102: step: 324/469, loss: 0.8662155866622925 2023-01-22 10:24:54.865243: step: 326/469, loss: 4.128833770751953 2023-01-22 10:24:55.433629: step: 328/469, loss: 0.26334819197654724 2023-01-22 10:24:56.044714: step: 330/469, loss: 0.6601830720901489 2023-01-22 10:24:56.684885: step: 332/469, loss: 1.41522216796875 2023-01-22 10:24:57.347455: step: 334/469, loss: 1.5807245969772339 2023-01-22 10:24:57.980358: step: 336/469, loss: 0.9234598875045776 2023-01-22 10:24:58.597080: step: 338/469, loss: 0.9473558664321899 2023-01-22 10:24:59.219027: step: 340/469, loss: 0.8189126253128052 2023-01-22 10:24:59.774116: step: 342/469, loss: 1.153841495513916 2023-01-22 10:25:00.423631: step: 344/469, loss: 0.6549341082572937 2023-01-22 10:25:01.084819: step: 346/469, loss: 0.5125113129615784 2023-01-22 10:25:01.740262: step: 348/469, loss: 3.0550527572631836 2023-01-22 10:25:02.386842: step: 350/469, loss: 4.007517337799072 2023-01-22 10:25:02.957585: step: 352/469, loss: 1.2425651550292969 2023-01-22 10:25:03.629689: step: 354/469, loss: 0.4758748412132263 2023-01-22 10:25:04.261602: step: 356/469, loss: 1.3866406679153442 2023-01-22 10:25:04.941648: step: 358/469, loss: 2.6619386672973633 2023-01-22 10:25:05.549800: step: 360/469, loss: 1.7839328050613403 2023-01-22 10:25:06.209387: step: 362/469, loss: 0.40629005432128906 2023-01-22 10:25:06.787723: step: 364/469, loss: 1.3062337636947632 2023-01-22 10:25:07.406337: step: 366/469, loss: 1.236457347869873 2023-01-22 10:25:08.055222: step: 368/469, loss: 1.409936547279358 2023-01-22 10:25:08.682991: step: 370/469, loss: 4.042206764221191 2023-01-22 10:25:09.263600: step: 372/469, loss: 0.6997310519218445 2023-01-22 10:25:09.830689: step: 374/469, loss: 0.41233372688293457 2023-01-22 10:25:10.626065: step: 376/469, loss: 1.12722909450531 2023-01-22 10:25:11.261363: step: 378/469, loss: 0.9107418060302734 2023-01-22 10:25:11.946131: step: 380/469, loss: 0.7936978340148926 2023-01-22 10:25:12.603529: step: 382/469, loss: 1.2343323230743408 2023-01-22 10:25:13.353901: step: 384/469, loss: 1.285002589225769 2023-01-22 10:25:13.945293: step: 386/469, loss: 0.4721754789352417 2023-01-22 10:25:14.511159: step: 388/469, loss: 1.0002391338348389 2023-01-22 10:25:15.142680: step: 390/469, loss: 0.5865353941917419 2023-01-22 10:25:15.744246: step: 392/469, loss: 0.43929287791252136 2023-01-22 10:25:16.303637: step: 394/469, loss: 1.8687264919281006 2023-01-22 10:25:16.937852: step: 396/469, loss: 0.8266392946243286 2023-01-22 10:25:17.550014: step: 398/469, loss: 0.7542542219161987 2023-01-22 10:25:18.128278: step: 400/469, loss: 1.4228665828704834 2023-01-22 10:25:18.756929: step: 402/469, loss: 0.6893333792686462 2023-01-22 10:25:19.404398: step: 404/469, loss: 0.8588920831680298 2023-01-22 10:25:20.039908: step: 406/469, loss: 0.2756017744541168 2023-01-22 10:25:20.634786: step: 408/469, loss: 0.8203291893005371 2023-01-22 10:25:21.265669: step: 410/469, loss: 3.027172803878784 2023-01-22 10:25:21.894849: step: 412/469, loss: 0.7559589743614197 2023-01-22 10:25:22.510509: step: 414/469, loss: 4.531430244445801 2023-01-22 10:25:23.211659: step: 416/469, loss: 1.2973449230194092 2023-01-22 10:25:23.868918: step: 418/469, loss: 0.7788103222846985 2023-01-22 10:25:24.456000: step: 420/469, loss: 0.9620156288146973 2023-01-22 10:25:25.027619: step: 422/469, loss: 1.9674732685089111 2023-01-22 10:25:25.668481: step: 424/469, loss: 0.7176800966262817 2023-01-22 10:25:26.273884: step: 426/469, loss: 1.4385719299316406 2023-01-22 10:25:26.995025: step: 428/469, loss: 0.31072425842285156 2023-01-22 10:25:27.617910: step: 430/469, loss: 0.5030708909034729 2023-01-22 10:25:28.287179: step: 432/469, loss: 0.8212928175926208 2023-01-22 10:25:28.969788: step: 434/469, loss: 5.32388162612915 2023-01-22 10:25:29.621957: step: 436/469, loss: 3.257072925567627 2023-01-22 10:25:30.228668: step: 438/469, loss: 0.6465895771980286 2023-01-22 10:25:30.835554: step: 440/469, loss: 2.1670315265655518 2023-01-22 10:25:31.518338: step: 442/469, loss: 0.7426234483718872 2023-01-22 10:25:32.211772: step: 444/469, loss: 2.1683270931243896 2023-01-22 10:25:32.856677: step: 446/469, loss: 2.1971077919006348 2023-01-22 10:25:33.476842: step: 448/469, loss: 1.287776231765747 2023-01-22 10:25:34.100999: step: 450/469, loss: 0.3704913258552551 2023-01-22 10:25:34.735998: step: 452/469, loss: 0.6952789425849915 2023-01-22 10:25:35.354164: step: 454/469, loss: 2.212874174118042 2023-01-22 10:25:35.955709: step: 456/469, loss: 0.8737174272537231 2023-01-22 10:25:36.544034: step: 458/469, loss: 0.9015182852745056 2023-01-22 10:25:37.147394: step: 460/469, loss: 1.221562147140503 2023-01-22 10:25:37.812797: step: 462/469, loss: 1.1128971576690674 2023-01-22 10:25:38.466864: step: 464/469, loss: 0.8728907108306885 2023-01-22 10:25:39.074701: step: 466/469, loss: 0.25806495547294617 2023-01-22 10:25:39.716141: step: 468/469, loss: 2.4693655967712402 2023-01-22 10:25:40.325946: step: 470/469, loss: 1.1539511680603027 2023-01-22 10:25:40.980067: step: 472/469, loss: 0.7793067693710327 2023-01-22 10:25:41.617457: step: 474/469, loss: 1.5572283267974854 2023-01-22 10:25:42.345732: step: 476/469, loss: 0.9491767883300781 2023-01-22 10:25:43.080851: step: 478/469, loss: 0.8359588980674744 2023-01-22 10:25:43.712971: step: 480/469, loss: 0.9200326800346375 2023-01-22 10:25:44.366252: step: 482/469, loss: 1.0874435901641846 2023-01-22 10:25:45.043435: step: 484/469, loss: 1.3438928127288818 2023-01-22 10:25:45.709762: step: 486/469, loss: 2.581448793411255 2023-01-22 10:25:46.358534: step: 488/469, loss: 1.4607781171798706 2023-01-22 10:25:46.997079: step: 490/469, loss: 2.7576074600219727 2023-01-22 10:25:47.603565: step: 492/469, loss: 1.3457109928131104 2023-01-22 10:25:48.322853: step: 494/469, loss: 1.15946364402771 2023-01-22 10:25:48.979351: step: 496/469, loss: 0.9746388792991638 2023-01-22 10:25:49.646722: step: 498/469, loss: 3.0564427375793457 2023-01-22 10:25:50.298260: step: 500/469, loss: 0.3690948486328125 2023-01-22 10:25:50.958693: step: 502/469, loss: 1.1224571466445923 2023-01-22 10:25:51.583188: step: 504/469, loss: 1.7088745832443237 2023-01-22 10:25:52.257110: step: 506/469, loss: 3.922182559967041 2023-01-22 10:25:52.879173: step: 508/469, loss: 0.7903258204460144 2023-01-22 10:25:53.534576: step: 510/469, loss: 5.102704048156738 2023-01-22 10:25:54.211918: step: 512/469, loss: 1.3014575242996216 2023-01-22 10:25:54.835392: step: 514/469, loss: 1.4630281925201416 2023-01-22 10:25:55.415626: step: 516/469, loss: 1.5601834058761597 2023-01-22 10:25:56.033842: step: 518/469, loss: 0.6343270540237427 2023-01-22 10:25:56.678680: step: 520/469, loss: 1.5897058248519897 2023-01-22 10:25:57.275456: step: 522/469, loss: 0.8664147853851318 2023-01-22 10:25:57.903226: step: 524/469, loss: 0.6561774611473083 2023-01-22 10:25:58.548827: step: 526/469, loss: 0.2877194881439209 2023-01-22 10:25:59.186179: step: 528/469, loss: 0.352175235748291 2023-01-22 10:25:59.813552: step: 530/469, loss: 1.875475287437439 2023-01-22 10:26:00.405568: step: 532/469, loss: 1.1144897937774658 2023-01-22 10:26:01.022247: step: 534/469, loss: 5.207115173339844 2023-01-22 10:26:01.578395: step: 536/469, loss: 0.47226953506469727 2023-01-22 10:26:02.228259: step: 538/469, loss: 0.3436928391456604 2023-01-22 10:26:02.920395: step: 540/469, loss: 1.0703237056732178 2023-01-22 10:26:03.588390: step: 542/469, loss: 5.599519729614258 2023-01-22 10:26:04.168608: step: 544/469, loss: 0.4232501983642578 2023-01-22 10:26:04.806871: step: 546/469, loss: 1.7377922534942627 2023-01-22 10:26:05.406497: step: 548/469, loss: 1.4081114530563354 2023-01-22 10:26:05.999138: step: 550/469, loss: 1.319005012512207 2023-01-22 10:26:06.580979: step: 552/469, loss: 0.7447803616523743 2023-01-22 10:26:07.142451: step: 554/469, loss: 0.9361552000045776 2023-01-22 10:26:07.773919: step: 556/469, loss: 0.48655951023101807 2023-01-22 10:26:08.460413: step: 558/469, loss: 1.8592716455459595 2023-01-22 10:26:09.121829: step: 560/469, loss: 0.7301490306854248 2023-01-22 10:26:09.789895: step: 562/469, loss: 0.5893291234970093 2023-01-22 10:26:10.484269: step: 564/469, loss: 1.2110570669174194 2023-01-22 10:26:11.114169: step: 566/469, loss: 0.45537492632865906 2023-01-22 10:26:11.750418: step: 568/469, loss: 0.3384642004966736 2023-01-22 10:26:12.397691: step: 570/469, loss: 1.717210054397583 2023-01-22 10:26:12.962488: step: 572/469, loss: 1.2995985746383667 2023-01-22 10:26:13.646582: step: 574/469, loss: 2.483738422393799 2023-01-22 10:26:14.311076: step: 576/469, loss: 3.003157615661621 2023-01-22 10:26:14.938559: step: 578/469, loss: 2.044503927230835 2023-01-22 10:26:15.593040: step: 580/469, loss: 1.1646147966384888 2023-01-22 10:26:16.203029: step: 582/469, loss: 1.753267765045166 2023-01-22 10:26:16.889226: step: 584/469, loss: 2.5488531589508057 2023-01-22 10:26:17.529609: step: 586/469, loss: 0.42046964168548584 2023-01-22 10:26:18.150168: step: 588/469, loss: 2.019542694091797 2023-01-22 10:26:18.703799: step: 590/469, loss: 1.6281038522720337 2023-01-22 10:26:19.339380: step: 592/469, loss: 0.3488382399082184 2023-01-22 10:26:20.084540: step: 594/469, loss: 5.730656623840332 2023-01-22 10:26:20.738271: step: 596/469, loss: 0.6978225111961365 2023-01-22 10:26:21.368710: step: 598/469, loss: 2.5393149852752686 2023-01-22 10:26:22.064731: step: 600/469, loss: 3.9996895790100098 2023-01-22 10:26:22.637455: step: 602/469, loss: 0.1451789289712906 2023-01-22 10:26:23.256921: step: 604/469, loss: 2.5012001991271973 2023-01-22 10:26:23.943679: step: 606/469, loss: 0.3928256928920746 2023-01-22 10:26:24.641605: step: 608/469, loss: 4.79415225982666 2023-01-22 10:26:25.281009: step: 610/469, loss: 0.42651355266571045 2023-01-22 10:26:25.863125: step: 612/469, loss: 0.732073187828064 2023-01-22 10:26:26.487036: step: 614/469, loss: 1.5955448150634766 2023-01-22 10:26:27.098073: step: 616/469, loss: 0.35005825757980347 2023-01-22 10:26:27.705606: step: 618/469, loss: 0.4389929175376892 2023-01-22 10:26:28.339330: step: 620/469, loss: 0.5638032555580139 2023-01-22 10:26:28.979390: step: 622/469, loss: 0.6681195497512817 2023-01-22 10:26:29.672964: step: 624/469, loss: 0.3109690845012665 2023-01-22 10:26:30.344579: step: 626/469, loss: 1.2713466882705688 2023-01-22 10:26:31.051539: step: 628/469, loss: 0.49100279808044434 2023-01-22 10:26:31.701507: step: 630/469, loss: 4.195455074310303 2023-01-22 10:26:32.342680: step: 632/469, loss: 0.32286709547042847 2023-01-22 10:26:33.024577: step: 634/469, loss: 0.5429559350013733 2023-01-22 10:26:33.654687: step: 636/469, loss: 4.631080627441406 2023-01-22 10:26:34.235820: step: 638/469, loss: 3.8390443325042725 2023-01-22 10:26:34.861566: step: 640/469, loss: 1.423614263534546 2023-01-22 10:26:35.470767: step: 642/469, loss: 2.320253610610962 2023-01-22 10:26:36.140390: step: 644/469, loss: 0.704879641532898 2023-01-22 10:26:36.960021: step: 646/469, loss: 0.2985306680202484 2023-01-22 10:26:37.537233: step: 648/469, loss: 1.1311973333358765 2023-01-22 10:26:38.139754: step: 650/469, loss: 1.9515068531036377 2023-01-22 10:26:38.769176: step: 652/469, loss: 2.115736246109009 2023-01-22 10:26:39.412924: step: 654/469, loss: 0.6140111088752747 2023-01-22 10:26:40.034964: step: 656/469, loss: 1.2745435237884521 2023-01-22 10:26:40.675443: step: 658/469, loss: 1.8633383512496948 2023-01-22 10:26:41.286989: step: 660/469, loss: 1.288448452949524 2023-01-22 10:26:41.893631: step: 662/469, loss: 1.4117538928985596 2023-01-22 10:26:42.575509: step: 664/469, loss: 10.073023796081543 2023-01-22 10:26:43.219117: step: 666/469, loss: 0.965945839881897 2023-01-22 10:26:43.857706: step: 668/469, loss: 0.662321150302887 2023-01-22 10:26:44.596971: step: 670/469, loss: 0.33532407879829407 2023-01-22 10:26:45.263336: step: 672/469, loss: 4.552870273590088 2023-01-22 10:26:45.968911: step: 674/469, loss: 0.46886712312698364 2023-01-22 10:26:46.621763: step: 676/469, loss: 5.918253421783447 2023-01-22 10:26:47.216641: step: 678/469, loss: 0.5071016550064087 2023-01-22 10:26:47.802798: step: 680/469, loss: 0.7586919665336609 2023-01-22 10:26:48.460332: step: 682/469, loss: 0.2960484027862549 2023-01-22 10:26:49.169408: step: 684/469, loss: 0.4234350025653839 2023-01-22 10:26:49.840818: step: 686/469, loss: 1.6380354166030884 2023-01-22 10:26:50.495427: step: 688/469, loss: 1.306402325630188 2023-01-22 10:26:51.120246: step: 690/469, loss: 1.4292117357254028 2023-01-22 10:26:51.716673: step: 692/469, loss: 0.2863052189350128 2023-01-22 10:26:52.344525: step: 694/469, loss: 1.137115478515625 2023-01-22 10:26:52.933665: step: 696/469, loss: 5.062310695648193 2023-01-22 10:26:53.612922: step: 698/469, loss: 1.6446831226348877 2023-01-22 10:26:54.210759: step: 700/469, loss: 0.8182534575462341 2023-01-22 10:26:54.876278: step: 702/469, loss: 6.349822521209717 2023-01-22 10:26:55.469199: step: 704/469, loss: 0.6283247470855713 2023-01-22 10:26:56.145913: step: 706/469, loss: 0.47316333651542664 2023-01-22 10:26:56.792844: step: 708/469, loss: 0.5101945400238037 2023-01-22 10:26:57.444422: step: 710/469, loss: 0.4977058172225952 2023-01-22 10:26:58.083916: step: 712/469, loss: 1.8934651613235474 2023-01-22 10:26:58.743009: step: 714/469, loss: 1.3311917781829834 2023-01-22 10:26:59.411225: step: 716/469, loss: 0.7333809733390808 2023-01-22 10:27:00.039365: step: 718/469, loss: 0.4255601167678833 2023-01-22 10:27:00.676819: step: 720/469, loss: 0.8671449422836304 2023-01-22 10:27:01.248743: step: 722/469, loss: 0.8723408579826355 2023-01-22 10:27:01.811283: step: 724/469, loss: 0.4028339385986328 2023-01-22 10:27:02.458635: step: 726/469, loss: 0.6326789259910583 2023-01-22 10:27:03.189554: step: 728/469, loss: 0.5795637369155884 2023-01-22 10:27:03.837569: step: 730/469, loss: 1.3285558223724365 2023-01-22 10:27:04.499052: step: 732/469, loss: 0.4316285252571106 2023-01-22 10:27:05.165239: step: 734/469, loss: 2.0834743976593018 2023-01-22 10:27:05.775004: step: 736/469, loss: 0.5148096680641174 2023-01-22 10:27:06.327350: step: 738/469, loss: 0.5455061197280884 2023-01-22 10:27:06.949050: step: 740/469, loss: 0.8825864791870117 2023-01-22 10:27:07.583665: step: 742/469, loss: 0.5048531293869019 2023-01-22 10:27:08.186352: step: 744/469, loss: 0.9531071782112122 2023-01-22 10:27:08.789482: step: 746/469, loss: 2.5482559204101562 2023-01-22 10:27:09.480021: step: 748/469, loss: 1.278427243232727 2023-01-22 10:27:10.104778: step: 750/469, loss: 3.294517993927002 2023-01-22 10:27:10.681465: step: 752/469, loss: 1.537923812866211 2023-01-22 10:27:11.354473: step: 754/469, loss: 1.2817145586013794 2023-01-22 10:27:11.965225: step: 756/469, loss: 1.3262953758239746 2023-01-22 10:27:12.604327: step: 758/469, loss: 2.2460379600524902 2023-01-22 10:27:13.260382: step: 760/469, loss: 0.8554172515869141 2023-01-22 10:27:13.990825: step: 762/469, loss: 1.2948087453842163 2023-01-22 10:27:14.642078: step: 764/469, loss: 0.9453698396682739 2023-01-22 10:27:15.287370: step: 766/469, loss: 2.4158775806427 2023-01-22 10:27:15.926545: step: 768/469, loss: 2.356306552886963 2023-01-22 10:27:16.533142: step: 770/469, loss: 0.622253954410553 2023-01-22 10:27:17.175080: step: 772/469, loss: 3.284148693084717 2023-01-22 10:27:17.838026: step: 774/469, loss: 1.1687579154968262 2023-01-22 10:27:18.475874: step: 776/469, loss: 2.5336499214172363 2023-01-22 10:27:19.056705: step: 778/469, loss: 1.128499150276184 2023-01-22 10:27:19.675439: step: 780/469, loss: 2.3241851329803467 2023-01-22 10:27:20.259756: step: 782/469, loss: 0.908094048500061 2023-01-22 10:27:20.857035: step: 784/469, loss: 1.9209789037704468 2023-01-22 10:27:21.531406: step: 786/469, loss: 0.5153329372406006 2023-01-22 10:27:22.136269: step: 788/469, loss: 0.4622742533683777 2023-01-22 10:27:22.712281: step: 790/469, loss: 0.35491305589675903 2023-01-22 10:27:23.323737: step: 792/469, loss: 0.2799530625343323 2023-01-22 10:27:23.926446: step: 794/469, loss: 0.6011663675308228 2023-01-22 10:27:24.531084: step: 796/469, loss: 0.7505756616592407 2023-01-22 10:27:25.169367: step: 798/469, loss: 0.774082601070404 2023-01-22 10:27:25.747168: step: 800/469, loss: 2.89441180229187 2023-01-22 10:27:26.394368: step: 802/469, loss: 1.2030647993087769 2023-01-22 10:27:26.951216: step: 804/469, loss: 1.4437882900238037 2023-01-22 10:27:27.621325: step: 806/469, loss: 1.3417713642120361 2023-01-22 10:27:28.161532: step: 808/469, loss: 0.897671639919281 2023-01-22 10:27:28.815604: step: 810/469, loss: 1.5458115339279175 2023-01-22 10:27:29.379447: step: 812/469, loss: 2.537576675415039 2023-01-22 10:27:30.052581: step: 814/469, loss: 0.6162286996841431 2023-01-22 10:27:30.631355: step: 816/469, loss: 2.908498764038086 2023-01-22 10:27:31.176588: step: 818/469, loss: 2.7944982051849365 2023-01-22 10:27:31.885170: step: 820/469, loss: 2.1001415252685547 2023-01-22 10:27:32.560377: step: 822/469, loss: 0.516606867313385 2023-01-22 10:27:33.154770: step: 824/469, loss: 1.3633286952972412 2023-01-22 10:27:33.694044: step: 826/469, loss: 0.6928209662437439 2023-01-22 10:27:34.311351: step: 828/469, loss: 6.307271957397461 2023-01-22 10:27:34.882732: step: 830/469, loss: 1.373197317123413 2023-01-22 10:27:35.495942: step: 832/469, loss: 2.2309093475341797 2023-01-22 10:27:36.095867: step: 834/469, loss: 0.9808170795440674 2023-01-22 10:27:36.746112: step: 836/469, loss: 0.8308428525924683 2023-01-22 10:27:37.389422: step: 838/469, loss: 2.0854196548461914 2023-01-22 10:27:38.104283: step: 840/469, loss: 0.5977488160133362 2023-01-22 10:27:38.673122: step: 842/469, loss: 2.2959518432617188 2023-01-22 10:27:39.316985: step: 844/469, loss: 0.8557641506195068 2023-01-22 10:27:39.921961: step: 846/469, loss: 1.5363421440124512 2023-01-22 10:27:40.544780: step: 848/469, loss: 0.9423149228096008 2023-01-22 10:27:41.206378: step: 850/469, loss: 0.31465744972229004 2023-01-22 10:27:41.852800: step: 852/469, loss: 1.1953325271606445 2023-01-22 10:27:42.435116: step: 854/469, loss: 0.9645344018936157 2023-01-22 10:27:43.079837: step: 856/469, loss: 2.452528476715088 2023-01-22 10:27:43.661832: step: 858/469, loss: 0.8919779062271118 2023-01-22 10:27:44.262492: step: 860/469, loss: 0.4718051552772522 2023-01-22 10:27:44.924421: step: 862/469, loss: 0.6578753590583801 2023-01-22 10:27:45.514452: step: 864/469, loss: 0.32035863399505615 2023-01-22 10:27:46.221299: step: 866/469, loss: 0.6884527206420898 2023-01-22 10:27:46.859088: step: 868/469, loss: 1.0650737285614014 2023-01-22 10:27:47.508854: step: 870/469, loss: 1.591570496559143 2023-01-22 10:27:48.159471: step: 872/469, loss: 1.2843754291534424 2023-01-22 10:27:48.858589: step: 874/469, loss: 0.6252328157424927 2023-01-22 10:27:49.621206: step: 876/469, loss: 0.641391932964325 2023-01-22 10:27:50.251229: step: 878/469, loss: 1.5632202625274658 2023-01-22 10:27:50.886211: step: 880/469, loss: 0.32845205068588257 2023-01-22 10:27:51.465439: step: 882/469, loss: 0.8776138424873352 2023-01-22 10:27:52.097233: step: 884/469, loss: 0.8035073280334473 2023-01-22 10:27:52.723037: step: 886/469, loss: 3.431936264038086 2023-01-22 10:27:53.337433: step: 888/469, loss: 1.466493844985962 2023-01-22 10:27:53.931106: step: 890/469, loss: 1.4160956144332886 2023-01-22 10:27:54.625089: step: 892/469, loss: 0.23779946565628052 2023-01-22 10:27:55.220373: step: 894/469, loss: 0.6482412815093994 2023-01-22 10:27:55.848648: step: 896/469, loss: 1.543224573135376 2023-01-22 10:27:56.506597: step: 898/469, loss: 0.6387840509414673 2023-01-22 10:27:57.048725: step: 900/469, loss: 1.2316296100616455 2023-01-22 10:27:57.626272: step: 902/469, loss: 0.20950624346733093 2023-01-22 10:27:58.226892: step: 904/469, loss: 0.31963419914245605 2023-01-22 10:27:58.886114: step: 906/469, loss: 0.23390412330627441 2023-01-22 10:27:59.503818: step: 908/469, loss: 1.7978248596191406 2023-01-22 10:28:00.113568: step: 910/469, loss: 0.3089022636413574 2023-01-22 10:28:00.742892: step: 912/469, loss: 0.5211467146873474 2023-01-22 10:28:01.393178: step: 914/469, loss: 2.172574281692505 2023-01-22 10:28:02.008269: step: 916/469, loss: 1.0389721393585205 2023-01-22 10:28:02.656629: step: 918/469, loss: 0.6100372076034546 2023-01-22 10:28:03.286787: step: 920/469, loss: 0.3743990361690521 2023-01-22 10:28:03.961760: step: 922/469, loss: 0.5694857835769653 2023-01-22 10:28:04.594390: step: 924/469, loss: 1.7047919034957886 2023-01-22 10:28:05.209842: step: 926/469, loss: 1.1307367086410522 2023-01-22 10:28:05.824010: step: 928/469, loss: 1.5924110412597656 2023-01-22 10:28:06.438683: step: 930/469, loss: 0.39301446080207825 2023-01-22 10:28:07.081891: step: 932/469, loss: 2.1423330307006836 2023-01-22 10:28:07.644951: step: 934/469, loss: 1.2712610960006714 2023-01-22 10:28:08.244032: step: 936/469, loss: 1.3836865425109863 2023-01-22 10:28:08.846164: step: 938/469, loss: 1.4844310283660889 ================================================== Loss: 1.479 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2939502446101825, 'r': 0.2889302214574469, 'f1': 0.2914186157092335}, 'combined': 0.21472950631206678, 'epoch': 2} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.24836124396069234, 'r': 0.22836509623101173, 'f1': 0.23794380379456226}, 'combined': 0.1297875293424885, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2857443909240715, 'r': 0.2922509045694014, 'f1': 0.28896102571871396}, 'combined': 0.2129186505295787, 'epoch': 2} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2391582356549897, 'r': 0.22274756074728227, 'f1': 0.23066137744839368}, 'combined': 0.12581529679003292, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2909337570867437, 'r': 0.29535020880722557, 'f1': 0.29312534847722765}, 'combined': 0.2159870988779572, 'epoch': 2} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.24920961162582314, 'r': 0.2296011700889331, 'f1': 0.2390038846735275}, 'combined': 0.13036575527646954, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2708333333333333, 'r': 0.3095238095238095, 'f1': 0.28888888888888886}, 'combined': 0.19259259259259257, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26, 'r': 0.2826086956521739, 'f1': 0.27083333333333337}, 'combined': 0.13541666666666669, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 2} New best chinese model... New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2939502446101825, 'r': 0.2889302214574469, 'f1': 0.2914186157092335}, 'combined': 0.21472950631206678, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.24836124396069234, 'r': 0.22836509623101173, 'f1': 0.23794380379456226}, 'combined': 0.1297875293424885, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2708333333333333, 'r': 0.3095238095238095, 'f1': 0.28888888888888886}, 'combined': 0.19259259259259257, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2857443909240715, 'r': 0.2922509045694014, 'f1': 0.28896102571871396}, 'combined': 0.2129186505295787, 'epoch': 2} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2391582356549897, 'r': 0.22274756074728227, 'f1': 0.23066137744839368}, 'combined': 0.12581529679003292, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26, 'r': 0.2826086956521739, 'f1': 0.27083333333333337}, 'combined': 0.13541666666666669, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2675974296837756, 'r': 0.21995697818704282, 'f1': 0.24144965588931105}, 'combined': 0.17791027276054497, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2590920765378194, 'r': 0.14199099162502637, 'f1': 0.18344699036188397}, 'combined': 0.1000619947428458, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.2413793103448276, 'f1': 0.32558139534883723}, 'combined': 0.21705426356589147, 'epoch': 1} ****************************** Epoch: 3 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:31:14.579479: step: 2/469, loss: 0.7462694644927979 2023-01-22 10:31:15.153976: step: 4/469, loss: 2.308076858520508 2023-01-22 10:31:15.799497: step: 6/469, loss: 0.6802846193313599 2023-01-22 10:31:16.411096: step: 8/469, loss: 1.4681999683380127 2023-01-22 10:31:17.107793: step: 10/469, loss: 1.167622685432434 2023-01-22 10:31:17.784314: step: 12/469, loss: 0.6306251883506775 2023-01-22 10:31:18.436280: step: 14/469, loss: 0.3387022614479065 2023-01-22 10:31:19.091101: step: 16/469, loss: 1.5801018476486206 2023-01-22 10:31:19.713778: step: 18/469, loss: 2.136303663253784 2023-01-22 10:31:20.342392: step: 20/469, loss: 1.0299530029296875 2023-01-22 10:31:20.960299: step: 22/469, loss: 1.8741533756256104 2023-01-22 10:31:21.573061: step: 24/469, loss: 1.4600305557250977 2023-01-22 10:31:22.122423: step: 26/469, loss: 0.3246730864048004 2023-01-22 10:31:22.718438: step: 28/469, loss: 0.8257705569267273 2023-01-22 10:31:23.324366: step: 30/469, loss: 1.4913718700408936 2023-01-22 10:31:24.008101: step: 32/469, loss: 6.998908042907715 2023-01-22 10:31:24.618388: step: 34/469, loss: 0.7767741084098816 2023-01-22 10:31:25.307221: step: 36/469, loss: 1.3027576208114624 2023-01-22 10:31:25.945578: step: 38/469, loss: 2.3133039474487305 2023-01-22 10:31:26.661516: step: 40/469, loss: 1.537103533744812 2023-01-22 10:31:27.348075: step: 42/469, loss: 0.4171549081802368 2023-01-22 10:31:27.916065: step: 44/469, loss: 0.6975013017654419 2023-01-22 10:31:28.611380: step: 46/469, loss: 0.4346381425857544 2023-01-22 10:31:29.187783: step: 48/469, loss: 1.0038912296295166 2023-01-22 10:31:29.778789: step: 50/469, loss: 1.3999648094177246 2023-01-22 10:31:30.452736: step: 52/469, loss: 1.855452060699463 2023-01-22 10:31:31.058582: step: 54/469, loss: 0.5486173033714294 2023-01-22 10:31:31.672473: step: 56/469, loss: 0.7375549077987671 2023-01-22 10:31:32.270403: step: 58/469, loss: 0.20711149275302887 2023-01-22 10:31:32.891881: step: 60/469, loss: 0.5829421877861023 2023-01-22 10:31:33.585277: step: 62/469, loss: 4.038946628570557 2023-01-22 10:31:34.221697: step: 64/469, loss: 1.4320002794265747 2023-01-22 10:31:34.861001: step: 66/469, loss: 0.5214782953262329 2023-01-22 10:31:35.457372: step: 68/469, loss: 1.410658597946167 2023-01-22 10:31:36.059979: step: 70/469, loss: 0.8122127056121826 2023-01-22 10:31:36.738975: step: 72/469, loss: 0.4889269471168518 2023-01-22 10:31:37.370200: step: 74/469, loss: 2.0898125171661377 2023-01-22 10:31:37.952990: step: 76/469, loss: 0.7922834157943726 2023-01-22 10:31:38.554417: step: 78/469, loss: 0.6663730144500732 2023-01-22 10:31:39.207754: step: 80/469, loss: 3.074713706970215 2023-01-22 10:31:39.800369: step: 82/469, loss: 4.164635181427002 2023-01-22 10:31:40.402570: step: 84/469, loss: 2.532348155975342 2023-01-22 10:31:40.991877: step: 86/469, loss: 0.5383509397506714 2023-01-22 10:31:41.603360: step: 88/469, loss: 1.1348263025283813 2023-01-22 10:31:42.251846: step: 90/469, loss: 0.40761780738830566 2023-01-22 10:31:42.895862: step: 92/469, loss: 0.3754013776779175 2023-01-22 10:31:43.557664: step: 94/469, loss: 0.4742070138454437 2023-01-22 10:31:44.177717: step: 96/469, loss: 0.9675819873809814 2023-01-22 10:31:44.786370: step: 98/469, loss: 1.2843284606933594 2023-01-22 10:31:45.460431: step: 100/469, loss: 0.9341386556625366 2023-01-22 10:31:46.101559: step: 102/469, loss: 2.3884425163269043 2023-01-22 10:31:46.753613: step: 104/469, loss: 1.4162704944610596 2023-01-22 10:31:47.437696: step: 106/469, loss: 0.5961185693740845 2023-01-22 10:31:48.074854: step: 108/469, loss: 0.6799505949020386 2023-01-22 10:31:48.728710: step: 110/469, loss: 0.3097529113292694 2023-01-22 10:31:49.302529: step: 112/469, loss: 0.43643665313720703 2023-01-22 10:31:49.936913: step: 114/469, loss: 0.7347835302352905 2023-01-22 10:31:50.568830: step: 116/469, loss: 0.29180267453193665 2023-01-22 10:31:51.281280: step: 118/469, loss: 0.3017939329147339 2023-01-22 10:31:51.876975: step: 120/469, loss: 1.890954852104187 2023-01-22 10:31:52.537760: step: 122/469, loss: 0.3954887092113495 2023-01-22 10:31:53.164617: step: 124/469, loss: 1.7034146785736084 2023-01-22 10:31:53.770692: step: 126/469, loss: 0.5341311693191528 2023-01-22 10:31:54.408708: step: 128/469, loss: 0.38662242889404297 2023-01-22 10:31:55.133105: step: 130/469, loss: 0.4594056308269501 2023-01-22 10:31:55.767649: step: 132/469, loss: 1.335113763809204 2023-01-22 10:31:56.369315: step: 134/469, loss: 0.5873169898986816 2023-01-22 10:31:56.978667: step: 136/469, loss: 0.814865231513977 2023-01-22 10:31:57.592235: step: 138/469, loss: 0.9610941410064697 2023-01-22 10:31:58.218642: step: 140/469, loss: 0.9838212132453918 2023-01-22 10:31:58.930787: step: 142/469, loss: 0.7003613710403442 2023-01-22 10:31:59.655046: step: 144/469, loss: 0.9541289210319519 2023-01-22 10:32:00.312428: step: 146/469, loss: 0.9891658425331116 2023-01-22 10:32:00.890287: step: 148/469, loss: 1.1233322620391846 2023-01-22 10:32:01.531134: step: 150/469, loss: 0.5784217715263367 2023-01-22 10:32:02.152329: step: 152/469, loss: 0.5650678873062134 2023-01-22 10:32:02.786385: step: 154/469, loss: 1.263162612915039 2023-01-22 10:32:03.397662: step: 156/469, loss: 4.338045120239258 2023-01-22 10:32:04.055659: step: 158/469, loss: 0.3969321846961975 2023-01-22 10:32:04.714837: step: 160/469, loss: 0.9037598967552185 2023-01-22 10:32:05.365752: step: 162/469, loss: 0.9089833498001099 2023-01-22 10:32:05.984805: step: 164/469, loss: 2.0887653827667236 2023-01-22 10:32:06.663685: step: 166/469, loss: 0.9849932193756104 2023-01-22 10:32:07.244078: step: 168/469, loss: 0.6771668791770935 2023-01-22 10:32:07.806298: step: 170/469, loss: 3.571570873260498 2023-01-22 10:32:08.421156: step: 172/469, loss: 5.442474842071533 2023-01-22 10:32:08.962595: step: 174/469, loss: 1.5340092182159424 2023-01-22 10:32:09.614286: step: 176/469, loss: 0.504738986492157 2023-01-22 10:32:10.204707: step: 178/469, loss: 0.7917119264602661 2023-01-22 10:32:10.780240: step: 180/469, loss: 1.0336380004882812 2023-01-22 10:32:11.388072: step: 182/469, loss: 0.8102993965148926 2023-01-22 10:32:11.962462: step: 184/469, loss: 2.229205846786499 2023-01-22 10:32:12.580683: step: 186/469, loss: 2.003970146179199 2023-01-22 10:32:13.225259: step: 188/469, loss: 0.8723458647727966 2023-01-22 10:32:13.814102: step: 190/469, loss: 0.7709511518478394 2023-01-22 10:32:14.465708: step: 192/469, loss: 1.1595580577850342 2023-01-22 10:32:15.118543: step: 194/469, loss: 1.3885128498077393 2023-01-22 10:32:15.740223: step: 196/469, loss: 0.6130228042602539 2023-01-22 10:32:16.396559: step: 198/469, loss: 0.4494071900844574 2023-01-22 10:32:17.055582: step: 200/469, loss: 1.9434261322021484 2023-01-22 10:32:17.748353: step: 202/469, loss: 0.2860829830169678 2023-01-22 10:32:18.374383: step: 204/469, loss: 1.2471989393234253 2023-01-22 10:32:19.007146: step: 206/469, loss: 2.7279419898986816 2023-01-22 10:32:19.619756: step: 208/469, loss: 0.5134357213973999 2023-01-22 10:32:20.281993: step: 210/469, loss: 1.6876628398895264 2023-01-22 10:32:20.922278: step: 212/469, loss: 0.38942527770996094 2023-01-22 10:32:21.591108: step: 214/469, loss: 3.4711406230926514 2023-01-22 10:32:22.162236: step: 216/469, loss: 0.9194731116294861 2023-01-22 10:32:22.805995: step: 218/469, loss: 0.2916356325149536 2023-01-22 10:32:23.384884: step: 220/469, loss: 0.3098664879798889 2023-01-22 10:32:23.979375: step: 222/469, loss: 0.2948499023914337 2023-01-22 10:32:24.605865: step: 224/469, loss: 1.8921873569488525 2023-01-22 10:32:25.255147: step: 226/469, loss: 0.43950873613357544 2023-01-22 10:32:25.835952: step: 228/469, loss: 1.2830665111541748 2023-01-22 10:32:26.427082: step: 230/469, loss: 0.6315370798110962 2023-01-22 10:32:27.053194: step: 232/469, loss: 2.4393703937530518 2023-01-22 10:32:27.687478: step: 234/469, loss: 1.4722431898117065 2023-01-22 10:32:28.276637: step: 236/469, loss: 0.4021998345851898 2023-01-22 10:32:28.875847: step: 238/469, loss: 0.47072941064834595 2023-01-22 10:32:29.522035: step: 240/469, loss: 3.191890239715576 2023-01-22 10:32:30.131801: step: 242/469, loss: 1.1580078601837158 2023-01-22 10:32:30.762692: step: 244/469, loss: 2.136058807373047 2023-01-22 10:32:31.345787: step: 246/469, loss: 0.6803306937217712 2023-01-22 10:32:32.045503: step: 248/469, loss: 3.1184935569763184 2023-01-22 10:32:32.725214: step: 250/469, loss: 0.7907870411872864 2023-01-22 10:32:33.295701: step: 252/469, loss: 1.046438455581665 2023-01-22 10:32:33.960921: step: 254/469, loss: 1.5306437015533447 2023-01-22 10:32:34.564501: step: 256/469, loss: 2.008882999420166 2023-01-22 10:32:35.284713: step: 258/469, loss: 1.6360721588134766 2023-01-22 10:32:35.896812: step: 260/469, loss: 3.3590636253356934 2023-01-22 10:32:36.539562: step: 262/469, loss: 0.5646939277648926 2023-01-22 10:32:37.148137: step: 264/469, loss: 9.744297981262207 2023-01-22 10:32:37.809777: step: 266/469, loss: 0.5964086651802063 2023-01-22 10:32:38.399623: step: 268/469, loss: 0.3714085519313812 2023-01-22 10:32:39.036119: step: 270/469, loss: 1.0284358263015747 2023-01-22 10:32:39.661687: step: 272/469, loss: 1.037506103515625 2023-01-22 10:32:40.258893: step: 274/469, loss: 1.135972023010254 2023-01-22 10:32:40.935657: step: 276/469, loss: 0.8764917850494385 2023-01-22 10:32:41.526324: step: 278/469, loss: 1.267731785774231 2023-01-22 10:32:42.167769: step: 280/469, loss: 0.672271728515625 2023-01-22 10:32:42.781546: step: 282/469, loss: 0.7760641574859619 2023-01-22 10:32:43.484825: step: 284/469, loss: 0.41035887598991394 2023-01-22 10:32:44.071340: step: 286/469, loss: 0.3258203864097595 2023-01-22 10:32:44.797674: step: 288/469, loss: 0.7342487573623657 2023-01-22 10:32:45.448956: step: 290/469, loss: 0.48638105392456055 2023-01-22 10:32:46.162691: step: 292/469, loss: 0.19374558329582214 2023-01-22 10:32:46.777402: step: 294/469, loss: 0.8818792700767517 2023-01-22 10:32:47.451169: step: 296/469, loss: 1.3577371835708618 2023-01-22 10:32:48.069191: step: 298/469, loss: 1.1259976625442505 2023-01-22 10:32:48.705409: step: 300/469, loss: 1.1442805528640747 2023-01-22 10:32:49.366466: step: 302/469, loss: 0.4293137192726135 2023-01-22 10:32:50.059528: step: 304/469, loss: 2.314356565475464 2023-01-22 10:32:50.730420: step: 306/469, loss: 2.0014007091522217 2023-01-22 10:32:51.386142: step: 308/469, loss: 0.2514479160308838 2023-01-22 10:32:52.005722: step: 310/469, loss: 0.37805867195129395 2023-01-22 10:32:52.603663: step: 312/469, loss: 0.29183441400527954 2023-01-22 10:32:53.337387: step: 314/469, loss: 0.41868263483047485 2023-01-22 10:32:53.996839: step: 316/469, loss: 0.7934480905532837 2023-01-22 10:32:54.662046: step: 318/469, loss: 0.8494001030921936 2023-01-22 10:32:55.284474: step: 320/469, loss: 1.078753113746643 2023-01-22 10:32:55.890283: step: 322/469, loss: 1.1665825843811035 2023-01-22 10:32:56.504715: step: 324/469, loss: 0.700657308101654 2023-01-22 10:32:57.084555: step: 326/469, loss: 1.3261842727661133 2023-01-22 10:32:57.732824: step: 328/469, loss: 0.9993078708648682 2023-01-22 10:32:58.355078: step: 330/469, loss: 0.39668697118759155 2023-01-22 10:32:58.960297: step: 332/469, loss: 1.7187021970748901 2023-01-22 10:32:59.612185: step: 334/469, loss: 0.6725361347198486 2023-01-22 10:33:00.268708: step: 336/469, loss: 0.47204115986824036 2023-01-22 10:33:00.902457: step: 338/469, loss: 0.36646369099617004 2023-01-22 10:33:01.529323: step: 340/469, loss: 0.686259388923645 2023-01-22 10:33:02.203651: step: 342/469, loss: 1.2658360004425049 2023-01-22 10:33:02.842539: step: 344/469, loss: 1.2248035669326782 2023-01-22 10:33:03.491669: step: 346/469, loss: 0.48144322633743286 2023-01-22 10:33:04.110176: step: 348/469, loss: 0.5733658075332642 2023-01-22 10:33:04.733776: step: 350/469, loss: 0.4137745797634125 2023-01-22 10:33:05.289305: step: 352/469, loss: 0.9133309125900269 2023-01-22 10:33:06.030643: step: 354/469, loss: 1.0647354125976562 2023-01-22 10:33:06.655245: step: 356/469, loss: 1.0498676300048828 2023-01-22 10:33:07.276944: step: 358/469, loss: 1.1451226472854614 2023-01-22 10:33:07.902620: step: 360/469, loss: 0.6535454988479614 2023-01-22 10:33:08.546621: step: 362/469, loss: 3.7403507232666016 2023-01-22 10:33:09.181794: step: 364/469, loss: 5.339025974273682 2023-01-22 10:33:09.794450: step: 366/469, loss: 0.40764179825782776 2023-01-22 10:33:10.446145: step: 368/469, loss: 0.7537856101989746 2023-01-22 10:33:11.034073: step: 370/469, loss: 0.23601745069026947 2023-01-22 10:33:11.650470: step: 372/469, loss: 0.7223626971244812 2023-01-22 10:33:12.298127: step: 374/469, loss: 0.3159669041633606 2023-01-22 10:33:12.912274: step: 376/469, loss: 0.7688659429550171 2023-01-22 10:33:13.543252: step: 378/469, loss: 4.336686134338379 2023-01-22 10:33:14.183289: step: 380/469, loss: 1.7180509567260742 2023-01-22 10:33:14.844480: step: 382/469, loss: 0.6608020067214966 2023-01-22 10:33:15.464970: step: 384/469, loss: 0.7804028987884521 2023-01-22 10:33:16.088387: step: 386/469, loss: 2.748377799987793 2023-01-22 10:33:16.695025: step: 388/469, loss: 0.3519863784313202 2023-01-22 10:33:17.303596: step: 390/469, loss: 1.5813013315200806 2023-01-22 10:33:17.894403: step: 392/469, loss: 0.32403886318206787 2023-01-22 10:33:18.532312: step: 394/469, loss: 1.7431018352508545 2023-01-22 10:33:19.120495: step: 396/469, loss: 0.5060060024261475 2023-01-22 10:33:19.794295: step: 398/469, loss: 0.3230733871459961 2023-01-22 10:33:20.438664: step: 400/469, loss: 3.857712507247925 2023-01-22 10:33:21.097995: step: 402/469, loss: 0.6641594767570496 2023-01-22 10:33:21.689283: step: 404/469, loss: 2.561873435974121 2023-01-22 10:33:22.326058: step: 406/469, loss: 0.4389699101448059 2023-01-22 10:33:22.940291: step: 408/469, loss: 0.5542924404144287 2023-01-22 10:33:23.578648: step: 410/469, loss: 0.47851628065109253 2023-01-22 10:33:24.247239: step: 412/469, loss: 1.3034582138061523 2023-01-22 10:33:24.864819: step: 414/469, loss: 0.903071403503418 2023-01-22 10:33:25.501200: step: 416/469, loss: 0.7119318842887878 2023-01-22 10:33:26.168591: step: 418/469, loss: 0.4629986584186554 2023-01-22 10:33:26.803780: step: 420/469, loss: 4.017146110534668 2023-01-22 10:33:27.462349: step: 422/469, loss: 2.6478612422943115 2023-01-22 10:33:28.105876: step: 424/469, loss: 3.849916934967041 2023-01-22 10:33:28.733047: step: 426/469, loss: 1.000108003616333 2023-01-22 10:33:29.500121: step: 428/469, loss: 0.8655878305435181 2023-01-22 10:33:30.156321: step: 430/469, loss: 0.47275301814079285 2023-01-22 10:33:30.793293: step: 432/469, loss: 0.4634517431259155 2023-01-22 10:33:31.487498: step: 434/469, loss: 0.28048980236053467 2023-01-22 10:33:32.164912: step: 436/469, loss: 1.0606825351715088 2023-01-22 10:33:32.784050: step: 438/469, loss: 1.068650722503662 2023-01-22 10:33:33.357477: step: 440/469, loss: 0.8255683183670044 2023-01-22 10:33:34.014169: step: 442/469, loss: 6.129979610443115 2023-01-22 10:33:34.639461: step: 444/469, loss: 0.5486658811569214 2023-01-22 10:33:35.253867: step: 446/469, loss: 0.8216400742530823 2023-01-22 10:33:35.920648: step: 448/469, loss: 0.4615059196949005 2023-01-22 10:33:36.582189: step: 450/469, loss: 0.6068246960639954 2023-01-22 10:33:37.157373: step: 452/469, loss: 0.5918332934379578 2023-01-22 10:33:37.714435: step: 454/469, loss: 0.3285779356956482 2023-01-22 10:33:38.334232: step: 456/469, loss: 0.5419658422470093 2023-01-22 10:33:38.966088: step: 458/469, loss: 0.2379702776670456 2023-01-22 10:33:39.553387: step: 460/469, loss: 1.733943223953247 2023-01-22 10:33:40.093183: step: 462/469, loss: 0.5931634902954102 2023-01-22 10:33:40.665708: step: 464/469, loss: 2.92630672454834 2023-01-22 10:33:41.408188: step: 466/469, loss: 0.519819438457489 2023-01-22 10:33:42.024819: step: 468/469, loss: 0.8577695488929749 2023-01-22 10:33:42.602534: step: 470/469, loss: 1.3270702362060547 2023-01-22 10:33:43.265148: step: 472/469, loss: 1.971328616142273 2023-01-22 10:33:43.959339: step: 474/469, loss: 0.82817542552948 2023-01-22 10:33:44.626390: step: 476/469, loss: 0.955100953578949 2023-01-22 10:33:45.297788: step: 478/469, loss: 0.8063927888870239 2023-01-22 10:33:45.965929: step: 480/469, loss: 0.644195556640625 2023-01-22 10:33:46.546996: step: 482/469, loss: 0.45841577649116516 2023-01-22 10:33:47.138889: step: 484/469, loss: 1.9108983278274536 2023-01-22 10:33:47.853604: step: 486/469, loss: 0.9897705912590027 2023-01-22 10:33:48.470200: step: 488/469, loss: 2.820888042449951 2023-01-22 10:33:49.243491: step: 490/469, loss: 0.4491350054740906 2023-01-22 10:33:49.886137: step: 492/469, loss: 0.8418124318122864 2023-01-22 10:33:50.517546: step: 494/469, loss: 1.5335557460784912 2023-01-22 10:33:51.129660: step: 496/469, loss: 2.7892379760742188 2023-01-22 10:33:51.745631: step: 498/469, loss: 1.3039991855621338 2023-01-22 10:33:52.421048: step: 500/469, loss: 0.7867889404296875 2023-01-22 10:33:53.043472: step: 502/469, loss: 0.29364848136901855 2023-01-22 10:33:53.682704: step: 504/469, loss: 0.8529661297798157 2023-01-22 10:33:54.242035: step: 506/469, loss: 1.6458814144134521 2023-01-22 10:33:54.894489: step: 508/469, loss: 0.8420606255531311 2023-01-22 10:33:55.521258: step: 510/469, loss: 1.815138578414917 2023-01-22 10:33:56.163310: step: 512/469, loss: 1.5718107223510742 2023-01-22 10:33:56.837933: step: 514/469, loss: 1.2635329961776733 2023-01-22 10:33:57.606999: step: 516/469, loss: 0.8168675899505615 2023-01-22 10:33:58.238751: step: 518/469, loss: 0.31952908635139465 2023-01-22 10:33:58.857331: step: 520/469, loss: 0.42109614610671997 2023-01-22 10:33:59.490794: step: 522/469, loss: 0.6543498635292053 2023-01-22 10:34:00.051563: step: 524/469, loss: 0.34267324209213257 2023-01-22 10:34:00.723479: step: 526/469, loss: 3.5468616485595703 2023-01-22 10:34:01.383688: step: 528/469, loss: 1.022538423538208 2023-01-22 10:34:02.023588: step: 530/469, loss: 0.319031298160553 2023-01-22 10:34:02.672791: step: 532/469, loss: 0.2147497683763504 2023-01-22 10:34:03.283135: step: 534/469, loss: 0.993924081325531 2023-01-22 10:34:03.938398: step: 536/469, loss: 1.1825424432754517 2023-01-22 10:34:04.614391: step: 538/469, loss: 2.5301554203033447 2023-01-22 10:34:05.282061: step: 540/469, loss: 0.9243333339691162 2023-01-22 10:34:05.938390: step: 542/469, loss: 0.12446283549070358 2023-01-22 10:34:06.567735: step: 544/469, loss: 1.8147050142288208 2023-01-22 10:34:07.182230: step: 546/469, loss: 2.3415894508361816 2023-01-22 10:34:07.842983: step: 548/469, loss: 1.674572229385376 2023-01-22 10:34:08.528798: step: 550/469, loss: 1.5341517925262451 2023-01-22 10:34:09.124748: step: 552/469, loss: 3.357795238494873 2023-01-22 10:34:09.806702: step: 554/469, loss: 1.271854043006897 2023-01-22 10:34:10.426868: step: 556/469, loss: 0.6844816207885742 2023-01-22 10:34:10.993638: step: 558/469, loss: 0.4120926558971405 2023-01-22 10:34:11.667344: step: 560/469, loss: 1.2114992141723633 2023-01-22 10:34:12.322220: step: 562/469, loss: 0.7472277879714966 2023-01-22 10:34:12.980626: step: 564/469, loss: 7.392098426818848 2023-01-22 10:34:13.582984: step: 566/469, loss: 0.5819260478019714 2023-01-22 10:34:14.214573: step: 568/469, loss: 0.6787862777709961 2023-01-22 10:34:14.835017: step: 570/469, loss: 2.225268602371216 2023-01-22 10:34:15.437711: step: 572/469, loss: 1.2054699659347534 2023-01-22 10:34:16.084985: step: 574/469, loss: 0.3201145827770233 2023-01-22 10:34:16.709770: step: 576/469, loss: 8.458700180053711 2023-01-22 10:34:17.313300: step: 578/469, loss: 0.7514448165893555 2023-01-22 10:34:17.948930: step: 580/469, loss: 1.1390188932418823 2023-01-22 10:34:18.671159: step: 582/469, loss: 0.8211749792098999 2023-01-22 10:34:19.357466: step: 584/469, loss: 1.5722768306732178 2023-01-22 10:34:19.991825: step: 586/469, loss: 0.17679743468761444 2023-01-22 10:34:20.602260: step: 588/469, loss: 0.49373096227645874 2023-01-22 10:34:21.168778: step: 590/469, loss: 0.975458562374115 2023-01-22 10:34:21.806860: step: 592/469, loss: 0.325543612241745 2023-01-22 10:34:22.509415: step: 594/469, loss: 0.9262832403182983 2023-01-22 10:34:23.137916: step: 596/469, loss: 0.39504551887512207 2023-01-22 10:34:23.827745: step: 598/469, loss: 0.7976389527320862 2023-01-22 10:34:24.420372: step: 600/469, loss: 1.9807462692260742 2023-01-22 10:34:25.055436: step: 602/469, loss: 0.970691442489624 2023-01-22 10:34:25.792973: step: 604/469, loss: 4.698398113250732 2023-01-22 10:34:26.422298: step: 606/469, loss: 0.6179900169372559 2023-01-22 10:34:27.067088: step: 608/469, loss: 3.7843470573425293 2023-01-22 10:34:27.680233: step: 610/469, loss: 2.340299606323242 2023-01-22 10:34:28.335456: step: 612/469, loss: 1.9795724153518677 2023-01-22 10:34:28.951710: step: 614/469, loss: 0.4936739206314087 2023-01-22 10:34:29.580004: step: 616/469, loss: 0.5711659789085388 2023-01-22 10:34:30.277090: step: 618/469, loss: 1.575615406036377 2023-01-22 10:34:30.903740: step: 620/469, loss: 0.4934542179107666 2023-01-22 10:34:31.524569: step: 622/469, loss: 0.42838650941848755 2023-01-22 10:34:32.143297: step: 624/469, loss: 1.5150914192199707 2023-01-22 10:34:32.846026: step: 626/469, loss: 0.5379862785339355 2023-01-22 10:34:33.560411: step: 628/469, loss: 5.567632675170898 2023-01-22 10:34:34.236973: step: 630/469, loss: 1.0272163152694702 2023-01-22 10:34:34.875574: step: 632/469, loss: 0.8592121601104736 2023-01-22 10:34:35.553128: step: 634/469, loss: 0.1969228833913803 2023-01-22 10:34:36.193339: step: 636/469, loss: 0.2630600333213806 2023-01-22 10:34:36.832903: step: 638/469, loss: 0.8592279553413391 2023-01-22 10:34:37.462393: step: 640/469, loss: 0.3999764025211334 2023-01-22 10:34:38.052173: step: 642/469, loss: 0.8150302767753601 2023-01-22 10:34:38.631339: step: 644/469, loss: 1.3769086599349976 2023-01-22 10:34:39.307327: step: 646/469, loss: 0.3551313877105713 2023-01-22 10:34:39.933170: step: 648/469, loss: 0.5282874703407288 2023-01-22 10:34:40.522408: step: 650/469, loss: 0.40043526887893677 2023-01-22 10:34:41.170854: step: 652/469, loss: 0.24054086208343506 2023-01-22 10:34:41.772031: step: 654/469, loss: 0.5337856411933899 2023-01-22 10:34:42.400480: step: 656/469, loss: 1.8747979402542114 2023-01-22 10:34:43.057384: step: 658/469, loss: 8.904938697814941 2023-01-22 10:34:43.711564: step: 660/469, loss: 0.4225645661354065 2023-01-22 10:34:44.333612: step: 662/469, loss: 10.951725006103516 2023-01-22 10:34:44.938370: step: 664/469, loss: 0.9561553001403809 2023-01-22 10:34:45.554857: step: 666/469, loss: 0.28639301657676697 2023-01-22 10:34:46.229275: step: 668/469, loss: 1.4371557235717773 2023-01-22 10:34:46.891043: step: 670/469, loss: 1.474899172782898 2023-01-22 10:34:47.504593: step: 672/469, loss: 0.6126270294189453 2023-01-22 10:34:48.072293: step: 674/469, loss: 0.9130380153656006 2023-01-22 10:34:48.713310: step: 676/469, loss: 0.5676673054695129 2023-01-22 10:34:49.379589: step: 678/469, loss: 0.39143142104148865 2023-01-22 10:34:49.995174: step: 680/469, loss: 1.296806812286377 2023-01-22 10:34:50.597535: step: 682/469, loss: 1.231890320777893 2023-01-22 10:34:51.223295: step: 684/469, loss: 0.690446674823761 2023-01-22 10:34:51.714036: step: 686/469, loss: 0.5328521728515625 2023-01-22 10:34:52.315605: step: 688/469, loss: 0.33294424414634705 2023-01-22 10:34:52.963520: step: 690/469, loss: 0.783265233039856 2023-01-22 10:34:53.649056: step: 692/469, loss: 1.0659931898117065 2023-01-22 10:34:54.339256: step: 694/469, loss: 0.7789087891578674 2023-01-22 10:34:54.948466: step: 696/469, loss: 0.8979896903038025 2023-01-22 10:34:55.498427: step: 698/469, loss: 0.54157555103302 2023-01-22 10:34:56.217243: step: 700/469, loss: 2.14607834815979 2023-01-22 10:34:56.856594: step: 702/469, loss: 0.7440817356109619 2023-01-22 10:34:57.479812: step: 704/469, loss: 0.34577518701553345 2023-01-22 10:34:58.077550: step: 706/469, loss: 0.6384104490280151 2023-01-22 10:34:58.705083: step: 708/469, loss: 0.20221437513828278 2023-01-22 10:34:59.405552: step: 710/469, loss: 0.9820117354393005 2023-01-22 10:35:00.024438: step: 712/469, loss: 0.4067484736442566 2023-01-22 10:35:00.621509: step: 714/469, loss: 0.869258463382721 2023-01-22 10:35:01.221365: step: 716/469, loss: 1.5712285041809082 2023-01-22 10:35:01.853836: step: 718/469, loss: 0.40880221128463745 2023-01-22 10:35:02.442995: step: 720/469, loss: 0.9945792555809021 2023-01-22 10:35:03.096309: step: 722/469, loss: 0.3624591827392578 2023-01-22 10:35:03.731711: step: 724/469, loss: 4.292018890380859 2023-01-22 10:35:04.297465: step: 726/469, loss: 3.2685019969940186 2023-01-22 10:35:04.952765: step: 728/469, loss: 3.2190017700195312 2023-01-22 10:35:05.557029: step: 730/469, loss: 0.9183965921401978 2023-01-22 10:35:06.128316: step: 732/469, loss: 1.8828498125076294 2023-01-22 10:35:06.775559: step: 734/469, loss: 1.029219388961792 2023-01-22 10:35:07.437978: step: 736/469, loss: 1.1005010604858398 2023-01-22 10:35:08.062914: step: 738/469, loss: 1.6785683631896973 2023-01-22 10:35:08.682258: step: 740/469, loss: 0.6691035032272339 2023-01-22 10:35:09.301060: step: 742/469, loss: 0.4262792468070984 2023-01-22 10:35:09.912120: step: 744/469, loss: 1.005275845527649 2023-01-22 10:35:10.494431: step: 746/469, loss: 0.6209067106246948 2023-01-22 10:35:11.116955: step: 748/469, loss: 1.2406554222106934 2023-01-22 10:35:11.719893: step: 750/469, loss: 0.734446108341217 2023-01-22 10:35:12.368674: step: 752/469, loss: 0.2791799008846283 2023-01-22 10:35:13.051114: step: 754/469, loss: 0.5821304321289062 2023-01-22 10:35:13.667749: step: 756/469, loss: 2.020531415939331 2023-01-22 10:35:14.361678: step: 758/469, loss: 3.246051788330078 2023-01-22 10:35:15.010316: step: 760/469, loss: 1.128767490386963 2023-01-22 10:35:15.642402: step: 762/469, loss: 4.814456939697266 2023-01-22 10:35:16.233640: step: 764/469, loss: 2.0687928199768066 2023-01-22 10:35:17.037217: step: 766/469, loss: 0.6035587787628174 2023-01-22 10:35:17.801080: step: 768/469, loss: 1.5669211149215698 2023-01-22 10:35:18.490972: step: 770/469, loss: 0.523723840713501 2023-01-22 10:35:19.110959: step: 772/469, loss: 2.5504186153411865 2023-01-22 10:35:19.729290: step: 774/469, loss: 0.7082122564315796 2023-01-22 10:35:20.394667: step: 776/469, loss: 0.48421937227249146 2023-01-22 10:35:21.001463: step: 778/469, loss: 0.39652830362319946 2023-01-22 10:35:21.630073: step: 780/469, loss: 0.2678658068180084 2023-01-22 10:35:22.306288: step: 782/469, loss: 0.4777577221393585 2023-01-22 10:35:22.951029: step: 784/469, loss: 0.6571331024169922 2023-01-22 10:35:23.567183: step: 786/469, loss: 0.8613175749778748 2023-01-22 10:35:24.208693: step: 788/469, loss: 0.9557031989097595 2023-01-22 10:35:24.868210: step: 790/469, loss: 1.1301566362380981 2023-01-22 10:35:25.500448: step: 792/469, loss: 0.18291328847408295 2023-01-22 10:35:26.084196: step: 794/469, loss: 2.295872688293457 2023-01-22 10:35:26.736895: step: 796/469, loss: 1.6139475107192993 2023-01-22 10:35:27.372725: step: 798/469, loss: 1.0522385835647583 2023-01-22 10:35:27.983100: step: 800/469, loss: 2.877741813659668 2023-01-22 10:35:28.622983: step: 802/469, loss: 0.4607471525669098 2023-01-22 10:35:29.267415: step: 804/469, loss: 0.3273234963417053 2023-01-22 10:35:29.882358: step: 806/469, loss: 0.9092198014259338 2023-01-22 10:35:30.497492: step: 808/469, loss: 0.4638720154762268 2023-01-22 10:35:31.088637: step: 810/469, loss: 4.013354301452637 2023-01-22 10:35:31.676142: step: 812/469, loss: 1.1741557121276855 2023-01-22 10:35:32.257372: step: 814/469, loss: 0.30634838342666626 2023-01-22 10:35:32.936966: step: 816/469, loss: 0.3868986964225769 2023-01-22 10:35:33.677466: step: 818/469, loss: 1.373685359954834 2023-01-22 10:35:34.349139: step: 820/469, loss: 1.350730538368225 2023-01-22 10:35:34.980023: step: 822/469, loss: 0.906842827796936 2023-01-22 10:35:35.571976: step: 824/469, loss: 1.3784503936767578 2023-01-22 10:35:36.237685: step: 826/469, loss: 1.1350330114364624 2023-01-22 10:35:36.817394: step: 828/469, loss: 0.5679834485054016 2023-01-22 10:35:37.426268: step: 830/469, loss: 1.3632066249847412 2023-01-22 10:35:38.062701: step: 832/469, loss: 0.5329816341400146 2023-01-22 10:35:38.694771: step: 834/469, loss: 0.2021186500787735 2023-01-22 10:35:39.271873: step: 836/469, loss: 3.89064621925354 2023-01-22 10:35:39.880966: step: 838/469, loss: 0.15132099390029907 2023-01-22 10:35:40.518067: step: 840/469, loss: 3.3353309631347656 2023-01-22 10:35:41.181954: step: 842/469, loss: 1.9553123712539673 2023-01-22 10:35:41.780951: step: 844/469, loss: 1.1097683906555176 2023-01-22 10:35:42.405897: step: 846/469, loss: 2.360145330429077 2023-01-22 10:35:43.024165: step: 848/469, loss: 1.4597703218460083 2023-01-22 10:35:43.650126: step: 850/469, loss: 1.0837464332580566 2023-01-22 10:35:44.271990: step: 852/469, loss: 0.16399157047271729 2023-01-22 10:35:44.885897: step: 854/469, loss: 0.23961219191551208 2023-01-22 10:35:45.513468: step: 856/469, loss: 1.4256689548492432 2023-01-22 10:35:46.167932: step: 858/469, loss: 0.370321124792099 2023-01-22 10:35:46.803439: step: 860/469, loss: 0.20350167155265808 2023-01-22 10:35:47.420444: step: 862/469, loss: 1.621083378791809 2023-01-22 10:35:48.009785: step: 864/469, loss: 5.380659103393555 2023-01-22 10:35:48.658149: step: 866/469, loss: 1.059912919998169 2023-01-22 10:35:49.303053: step: 868/469, loss: 5.188115119934082 2023-01-22 10:35:49.865682: step: 870/469, loss: 0.8952152729034424 2023-01-22 10:35:50.393505: step: 872/469, loss: 1.1000407934188843 2023-01-22 10:35:51.040140: step: 874/469, loss: 0.8673734664916992 2023-01-22 10:35:51.710254: step: 876/469, loss: 0.7367225289344788 2023-01-22 10:35:52.332793: step: 878/469, loss: 0.8778529763221741 2023-01-22 10:35:52.921005: step: 880/469, loss: 0.6977656483650208 2023-01-22 10:35:53.492920: step: 882/469, loss: 0.7239607572555542 2023-01-22 10:35:54.080185: step: 884/469, loss: 0.5380071997642517 2023-01-22 10:35:54.729248: step: 886/469, loss: 0.4684225022792816 2023-01-22 10:35:55.328597: step: 888/469, loss: 1.7237749099731445 2023-01-22 10:35:55.908119: step: 890/469, loss: 0.6721970438957214 2023-01-22 10:35:56.566391: step: 892/469, loss: 1.1410884857177734 2023-01-22 10:35:57.167153: step: 894/469, loss: 1.1650965213775635 2023-01-22 10:35:57.783897: step: 896/469, loss: 0.6795168519020081 2023-01-22 10:35:58.450209: step: 898/469, loss: 0.4723908603191376 2023-01-22 10:35:59.098816: step: 900/469, loss: 1.431899905204773 2023-01-22 10:35:59.706248: step: 902/469, loss: 1.9521191120147705 2023-01-22 10:36:00.317295: step: 904/469, loss: 0.8847671747207642 2023-01-22 10:36:00.973052: step: 906/469, loss: 1.087672233581543 2023-01-22 10:36:01.572454: step: 908/469, loss: 0.36223340034484863 2023-01-22 10:36:02.249676: step: 910/469, loss: 0.23744481801986694 2023-01-22 10:36:02.884582: step: 912/469, loss: 0.7776368856430054 2023-01-22 10:36:03.487643: step: 914/469, loss: 0.767355740070343 2023-01-22 10:36:04.103379: step: 916/469, loss: 1.4067308902740479 2023-01-22 10:36:04.743672: step: 918/469, loss: 0.6138659119606018 2023-01-22 10:36:05.341157: step: 920/469, loss: 1.3209500312805176 2023-01-22 10:36:05.955362: step: 922/469, loss: 1.3732243776321411 2023-01-22 10:36:06.493943: step: 924/469, loss: 0.24684622883796692 2023-01-22 10:36:07.098307: step: 926/469, loss: 0.3700372874736786 2023-01-22 10:36:07.717954: step: 928/469, loss: 0.6151580810546875 2023-01-22 10:36:08.272098: step: 930/469, loss: 0.5320045948028564 2023-01-22 10:36:08.910092: step: 932/469, loss: 0.9485318660736084 2023-01-22 10:36:09.549902: step: 934/469, loss: 1.2447638511657715 2023-01-22 10:36:10.184450: step: 936/469, loss: 1.3201947212219238 2023-01-22 10:36:10.784825: step: 938/469, loss: 0.31547755002975464 ================================================== Loss: 1.265 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32596855210591186, 'r': 0.21258818615602948, 'f1': 0.2573435937678252}, 'combined': 0.18962159540787116, 'epoch': 3} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2583219989469989, 'r': 0.19203114807566862, 'f1': 0.22029765022824216}, 'combined': 0.12016235466995026, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3049295176630435, 'r': 0.212124881852552, 'f1': 0.25019857859531774}, 'combined': 0.1843568473860236, 'epoch': 3} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2591856225382413, 'r': 0.1943300421040147, 'f1': 0.22212045522327356}, 'combined': 0.12115661193996739, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3122514865837142, 'r': 0.21131575084493323, 'f1': 0.2520541875918144}, 'combined': 0.18572413822554745, 'epoch': 3} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2697981589306216, 'r': 0.1980983742285112, 'f1': 0.22845468117979964}, 'combined': 0.1246116442798907, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.34476190476190477, 'r': 0.24625850340136055, 'f1': 0.28730158730158734}, 'combined': 0.19153439153439156, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.06896551724137931, 'f1': 0.1142857142857143}, 'combined': 0.0761904761904762, 'epoch': 3} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2939502446101825, 'r': 0.2889302214574469, 'f1': 0.2914186157092335}, 'combined': 0.21472950631206678, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.24836124396069234, 'r': 0.22836509623101173, 'f1': 0.23794380379456226}, 'combined': 0.1297875293424885, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2708333333333333, 'r': 0.3095238095238095, 'f1': 0.28888888888888886}, 'combined': 0.19259259259259257, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2857443909240715, 'r': 0.2922509045694014, 'f1': 0.28896102571871396}, 'combined': 0.2129186505295787, 'epoch': 2} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2391582356549897, 'r': 0.22274756074728227, 'f1': 0.23066137744839368}, 'combined': 0.12581529679003292, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26, 'r': 0.2826086956521739, 'f1': 0.27083333333333337}, 'combined': 0.13541666666666669, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2675974296837756, 'r': 0.21995697818704282, 'f1': 0.24144965588931105}, 'combined': 0.17791027276054497, 'epoch': 1} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2590920765378194, 'r': 0.14199099162502637, 'f1': 0.18344699036188397}, 'combined': 0.1000619947428458, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.2413793103448276, 'f1': 0.32558139534883723}, 'combined': 0.21705426356589147, 'epoch': 1} ****************************** Epoch: 4 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:38:58.209176: step: 2/469, loss: 0.9050880074501038 2023-01-22 10:38:58.867350: step: 4/469, loss: 1.7855778932571411 2023-01-22 10:38:59.530349: step: 6/469, loss: 1.377054214477539 2023-01-22 10:39:00.158376: step: 8/469, loss: 0.9699809551239014 2023-01-22 10:39:00.819049: step: 10/469, loss: 5.43241024017334 2023-01-22 10:39:01.438914: step: 12/469, loss: 0.7427436113357544 2023-01-22 10:39:02.077225: step: 14/469, loss: 0.2859767973423004 2023-01-22 10:39:02.768378: step: 16/469, loss: 0.3392329216003418 2023-01-22 10:39:03.384419: step: 18/469, loss: 1.175756812095642 2023-01-22 10:39:04.052207: step: 20/469, loss: 0.47212445735931396 2023-01-22 10:39:04.647527: step: 22/469, loss: 0.6327062845230103 2023-01-22 10:39:05.272473: step: 24/469, loss: 0.5672405958175659 2023-01-22 10:39:06.023717: step: 26/469, loss: 0.6528533697128296 2023-01-22 10:39:06.677781: step: 28/469, loss: 1.1529587507247925 2023-01-22 10:39:07.300490: step: 30/469, loss: 2.4324116706848145 2023-01-22 10:39:08.015074: step: 32/469, loss: 1.1140735149383545 2023-01-22 10:39:08.706986: step: 34/469, loss: 0.59650719165802 2023-01-22 10:39:09.409667: step: 36/469, loss: 0.557941734790802 2023-01-22 10:39:10.084780: step: 38/469, loss: 0.6107344627380371 2023-01-22 10:39:10.684457: step: 40/469, loss: 0.46826887130737305 2023-01-22 10:39:11.327808: step: 42/469, loss: 1.0535849332809448 2023-01-22 10:39:11.893375: step: 44/469, loss: 0.12947534024715424 2023-01-22 10:39:12.509464: step: 46/469, loss: 0.728455126285553 2023-01-22 10:39:13.123152: step: 48/469, loss: 0.2109626829624176 2023-01-22 10:39:13.780417: step: 50/469, loss: 0.6350926756858826 2023-01-22 10:39:14.364811: step: 52/469, loss: 0.8698463439941406 2023-01-22 10:39:15.037066: step: 54/469, loss: 0.5368098020553589 2023-01-22 10:39:15.630497: step: 56/469, loss: 0.48263952136039734 2023-01-22 10:39:16.296707: step: 58/469, loss: 0.44925135374069214 2023-01-22 10:39:16.929019: step: 60/469, loss: 0.693838357925415 2023-01-22 10:39:17.518306: step: 62/469, loss: 0.42136090993881226 2023-01-22 10:39:18.140057: step: 64/469, loss: 0.6921431422233582 2023-01-22 10:39:18.770929: step: 66/469, loss: 0.23543782532215118 2023-01-22 10:39:19.364537: step: 68/469, loss: 0.6824563145637512 2023-01-22 10:39:19.957520: step: 70/469, loss: 0.33878639340400696 2023-01-22 10:39:20.540476: step: 72/469, loss: 0.4124892055988312 2023-01-22 10:39:21.160640: step: 74/469, loss: 4.621402740478516 2023-01-22 10:39:21.881776: step: 76/469, loss: 2.4642035961151123 2023-01-22 10:39:22.542597: step: 78/469, loss: 0.4639299511909485 2023-01-22 10:39:23.171139: step: 80/469, loss: 0.8192798495292664 2023-01-22 10:39:23.849624: step: 82/469, loss: 0.8129348754882812 2023-01-22 10:39:24.407798: step: 84/469, loss: 0.5626464486122131 2023-01-22 10:39:25.073392: step: 86/469, loss: 0.6481913328170776 2023-01-22 10:39:25.708410: step: 88/469, loss: 0.5031366348266602 2023-01-22 10:39:26.410547: step: 90/469, loss: 7.856209754943848 2023-01-22 10:39:27.017068: step: 92/469, loss: 0.9202807545661926 2023-01-22 10:39:27.688688: step: 94/469, loss: 0.7684484720230103 2023-01-22 10:39:28.297907: step: 96/469, loss: 0.7751802802085876 2023-01-22 10:39:28.941768: step: 98/469, loss: 0.9721737504005432 2023-01-22 10:39:29.619917: step: 100/469, loss: 1.2791332006454468 2023-01-22 10:39:30.198911: step: 102/469, loss: 1.091760277748108 2023-01-22 10:39:30.760938: step: 104/469, loss: 2.574777603149414 2023-01-22 10:39:31.361997: step: 106/469, loss: 0.768923282623291 2023-01-22 10:39:32.002838: step: 108/469, loss: 1.2980910539627075 2023-01-22 10:39:32.662320: step: 110/469, loss: 0.33849582076072693 2023-01-22 10:39:33.334162: step: 112/469, loss: 2.3791964054107666 2023-01-22 10:39:34.045599: step: 114/469, loss: 3.5567097663879395 2023-01-22 10:39:34.637685: step: 116/469, loss: 0.5802083015441895 2023-01-22 10:39:35.212417: step: 118/469, loss: 1.870171308517456 2023-01-22 10:39:35.831071: step: 120/469, loss: 0.9516633749008179 2023-01-22 10:39:36.457377: step: 122/469, loss: 0.45377635955810547 2023-01-22 10:39:37.009647: step: 124/469, loss: 1.0239235162734985 2023-01-22 10:39:37.718337: step: 126/469, loss: 0.9929549694061279 2023-01-22 10:39:38.346645: step: 128/469, loss: 1.1157602071762085 2023-01-22 10:39:38.975514: step: 130/469, loss: 1.861595630645752 2023-01-22 10:39:39.579677: step: 132/469, loss: 0.6413525342941284 2023-01-22 10:39:40.203964: step: 134/469, loss: 0.5361921787261963 2023-01-22 10:39:40.823705: step: 136/469, loss: 0.8921282887458801 2023-01-22 10:39:41.450495: step: 138/469, loss: 0.2659511864185333 2023-01-22 10:39:42.078229: step: 140/469, loss: 1.3578380346298218 2023-01-22 10:39:42.753916: step: 142/469, loss: 0.2122288942337036 2023-01-22 10:39:43.421117: step: 144/469, loss: 0.5673994421958923 2023-01-22 10:39:44.012922: step: 146/469, loss: 0.2596486508846283 2023-01-22 10:39:44.646563: step: 148/469, loss: 0.8392143249511719 2023-01-22 10:39:45.291580: step: 150/469, loss: 1.1436114311218262 2023-01-22 10:39:45.862130: step: 152/469, loss: 0.6285895705223083 2023-01-22 10:39:46.453066: step: 154/469, loss: 0.6549550890922546 2023-01-22 10:39:47.112889: step: 156/469, loss: 1.0163320302963257 2023-01-22 10:39:47.668166: step: 158/469, loss: 0.9874000549316406 2023-01-22 10:39:48.290487: step: 160/469, loss: 0.7254908680915833 2023-01-22 10:39:49.032873: step: 162/469, loss: 1.7849186658859253 2023-01-22 10:39:49.691902: step: 164/469, loss: 0.49793195724487305 2023-01-22 10:39:50.272706: step: 166/469, loss: 0.19076751172542572 2023-01-22 10:39:50.976020: step: 168/469, loss: 0.9988154768943787 2023-01-22 10:39:51.562488: step: 170/469, loss: 1.3795875310897827 2023-01-22 10:39:52.164054: step: 172/469, loss: 1.5315974950790405 2023-01-22 10:39:52.796399: step: 174/469, loss: 0.7056239247322083 2023-01-22 10:39:53.440528: step: 176/469, loss: 0.3275761604309082 2023-01-22 10:39:54.027918: step: 178/469, loss: 2.3691744804382324 2023-01-22 10:39:54.676817: step: 180/469, loss: 0.6250331997871399 2023-01-22 10:39:55.320577: step: 182/469, loss: 1.0679845809936523 2023-01-22 10:39:55.959019: step: 184/469, loss: 1.6039279699325562 2023-01-22 10:39:56.611369: step: 186/469, loss: 1.0112781524658203 2023-01-22 10:39:57.240888: step: 188/469, loss: 0.4016167223453522 2023-01-22 10:39:57.831363: step: 190/469, loss: 1.3260960578918457 2023-01-22 10:39:58.438710: step: 192/469, loss: 1.1128637790679932 2023-01-22 10:39:59.036439: step: 194/469, loss: 0.8418487310409546 2023-01-22 10:39:59.729658: step: 196/469, loss: 0.3964131772518158 2023-01-22 10:40:00.361508: step: 198/469, loss: 0.2029745727777481 2023-01-22 10:40:01.003170: step: 200/469, loss: 0.6018311381340027 2023-01-22 10:40:01.674497: step: 202/469, loss: 0.6261540651321411 2023-01-22 10:40:02.281815: step: 204/469, loss: 0.1680295318365097 2023-01-22 10:40:02.941543: step: 206/469, loss: 1.5685604810714722 2023-01-22 10:40:03.539421: step: 208/469, loss: 1.788804531097412 2023-01-22 10:40:04.110689: step: 210/469, loss: 0.24198587238788605 2023-01-22 10:40:04.722125: step: 212/469, loss: 0.2349502146244049 2023-01-22 10:40:05.368666: step: 214/469, loss: 0.4023289680480957 2023-01-22 10:40:05.979747: step: 216/469, loss: 0.44897568225860596 2023-01-22 10:40:06.560210: step: 218/469, loss: 1.668560266494751 2023-01-22 10:40:07.155237: step: 220/469, loss: 0.40680357813835144 2023-01-22 10:40:07.725611: step: 222/469, loss: 1.4389904737472534 2023-01-22 10:40:08.407671: step: 224/469, loss: 1.057631015777588 2023-01-22 10:40:09.091201: step: 226/469, loss: 1.1992883682250977 2023-01-22 10:40:09.812848: step: 228/469, loss: 1.7467793226242065 2023-01-22 10:40:10.446403: step: 230/469, loss: 1.4633595943450928 2023-01-22 10:40:11.049960: step: 232/469, loss: 1.3489941358566284 2023-01-22 10:40:11.596364: step: 234/469, loss: 0.4777461290359497 2023-01-22 10:40:12.247711: step: 236/469, loss: 0.1002981960773468 2023-01-22 10:40:12.876179: step: 238/469, loss: 0.7926585674285889 2023-01-22 10:40:13.501061: step: 240/469, loss: 1.6339585781097412 2023-01-22 10:40:14.145523: step: 242/469, loss: 0.6274259686470032 2023-01-22 10:40:14.745428: step: 244/469, loss: 2.0815725326538086 2023-01-22 10:40:15.391936: step: 246/469, loss: 0.2336321324110031 2023-01-22 10:40:15.999831: step: 248/469, loss: 0.8457404375076294 2023-01-22 10:40:16.621616: step: 250/469, loss: 0.5937406420707703 2023-01-22 10:40:17.308788: step: 252/469, loss: 1.0179897546768188 2023-01-22 10:40:17.932220: step: 254/469, loss: 0.622052013874054 2023-01-22 10:40:18.473314: step: 256/469, loss: 0.14757096767425537 2023-01-22 10:40:19.068857: step: 258/469, loss: 0.9615466594696045 2023-01-22 10:40:19.720912: step: 260/469, loss: 0.49650633335113525 2023-01-22 10:40:20.411072: step: 262/469, loss: 0.6913086175918579 2023-01-22 10:40:21.007435: step: 264/469, loss: 2.780974864959717 2023-01-22 10:40:21.618292: step: 266/469, loss: 0.9193152189254761 2023-01-22 10:40:22.308393: step: 268/469, loss: 1.0149012804031372 2023-01-22 10:40:22.926591: step: 270/469, loss: 1.1764382123947144 2023-01-22 10:40:23.594705: step: 272/469, loss: 1.2303965091705322 2023-01-22 10:40:24.250096: step: 274/469, loss: 0.20664067566394806 2023-01-22 10:40:24.808186: step: 276/469, loss: 0.3211139440536499 2023-01-22 10:40:25.424875: step: 278/469, loss: 2.22507381439209 2023-01-22 10:40:26.103762: step: 280/469, loss: 0.9276461601257324 2023-01-22 10:40:26.901141: step: 282/469, loss: 1.9384804964065552 2023-01-22 10:40:27.572940: step: 284/469, loss: 1.841937780380249 2023-01-22 10:40:28.148564: step: 286/469, loss: 1.1619877815246582 2023-01-22 10:40:28.807997: step: 288/469, loss: 4.353485107421875 2023-01-22 10:40:29.447280: step: 290/469, loss: 0.3011418581008911 2023-01-22 10:40:30.122345: step: 292/469, loss: 0.760834813117981 2023-01-22 10:40:30.798972: step: 294/469, loss: 0.5934541821479797 2023-01-22 10:40:31.407496: step: 296/469, loss: 0.13107189536094666 2023-01-22 10:40:32.003277: step: 298/469, loss: 1.6270647048950195 2023-01-22 10:40:32.618976: step: 300/469, loss: 0.38889196515083313 2023-01-22 10:40:33.292638: step: 302/469, loss: 0.344219446182251 2023-01-22 10:40:33.956021: step: 304/469, loss: 0.9043945670127869 2023-01-22 10:40:34.695102: step: 306/469, loss: 0.22975823283195496 2023-01-22 10:40:35.341763: step: 308/469, loss: 1.487387776374817 2023-01-22 10:40:35.964653: step: 310/469, loss: 2.59348201751709 2023-01-22 10:40:36.598993: step: 312/469, loss: 1.0137042999267578 2023-01-22 10:40:37.270846: step: 314/469, loss: 0.2761698365211487 2023-01-22 10:40:37.963533: step: 316/469, loss: 0.6512198448181152 2023-01-22 10:40:38.550887: step: 318/469, loss: 0.48551467061042786 2023-01-22 10:40:39.081323: step: 320/469, loss: 0.6216294765472412 2023-01-22 10:40:39.755347: step: 322/469, loss: 0.5735813975334167 2023-01-22 10:40:40.381787: step: 324/469, loss: 0.34954631328582764 2023-01-22 10:40:40.961597: step: 326/469, loss: 0.31153106689453125 2023-01-22 10:40:41.517508: step: 328/469, loss: 0.4782946705818176 2023-01-22 10:40:42.110572: step: 330/469, loss: 0.7630382776260376 2023-01-22 10:40:42.728743: step: 332/469, loss: 0.30100852251052856 2023-01-22 10:40:43.366701: step: 334/469, loss: 0.5501344203948975 2023-01-22 10:40:44.009895: step: 336/469, loss: 0.9008646607398987 2023-01-22 10:40:44.600613: step: 338/469, loss: 0.39844298362731934 2023-01-22 10:40:45.264920: step: 340/469, loss: 0.30655503273010254 2023-01-22 10:40:45.885941: step: 342/469, loss: 0.9962322115898132 2023-01-22 10:40:46.553627: step: 344/469, loss: 0.9260802268981934 2023-01-22 10:40:47.265812: step: 346/469, loss: 3.0620388984680176 2023-01-22 10:40:47.877222: step: 348/469, loss: 1.3196375370025635 2023-01-22 10:40:48.539844: step: 350/469, loss: 3.8971800804138184 2023-01-22 10:40:49.119836: step: 352/469, loss: 0.5359402894973755 2023-01-22 10:40:49.671250: step: 354/469, loss: 0.6907138824462891 2023-01-22 10:40:50.300232: step: 356/469, loss: 0.3582236170768738 2023-01-22 10:40:50.982346: step: 358/469, loss: 2.1482317447662354 2023-01-22 10:40:51.567810: step: 360/469, loss: 0.34017834067344666 2023-01-22 10:40:52.123507: step: 362/469, loss: 1.7577235698699951 2023-01-22 10:40:52.757023: step: 364/469, loss: 0.723415732383728 2023-01-22 10:40:53.385964: step: 366/469, loss: 2.5710692405700684 2023-01-22 10:40:54.082358: step: 368/469, loss: 0.3038272559642792 2023-01-22 10:40:54.797668: step: 370/469, loss: 1.0049059391021729 2023-01-22 10:40:55.401892: step: 372/469, loss: 0.4403672218322754 2023-01-22 10:40:56.045216: step: 374/469, loss: 0.8747326731681824 2023-01-22 10:40:56.673805: step: 376/469, loss: 0.3144104778766632 2023-01-22 10:40:57.327369: step: 378/469, loss: 1.0310653448104858 2023-01-22 10:40:57.954189: step: 380/469, loss: 0.6088466048240662 2023-01-22 10:40:58.550458: step: 382/469, loss: 0.3269631564617157 2023-01-22 10:40:59.182745: step: 384/469, loss: 0.3779875934123993 2023-01-22 10:40:59.819494: step: 386/469, loss: 0.4860456883907318 2023-01-22 10:41:00.377407: step: 388/469, loss: 1.0699275732040405 2023-01-22 10:41:00.974617: step: 390/469, loss: 0.33538371324539185 2023-01-22 10:41:01.664229: step: 392/469, loss: 0.8100714683532715 2023-01-22 10:41:02.360220: step: 394/469, loss: 0.5360186696052551 2023-01-22 10:41:03.014768: step: 396/469, loss: 0.8737802505493164 2023-01-22 10:41:03.592294: step: 398/469, loss: 1.6417113542556763 2023-01-22 10:41:04.244086: step: 400/469, loss: 0.5677798986434937 2023-01-22 10:41:04.904770: step: 402/469, loss: 0.5191171765327454 2023-01-22 10:41:05.591178: step: 404/469, loss: 1.025195598602295 2023-01-22 10:41:06.211046: step: 406/469, loss: 0.1223532035946846 2023-01-22 10:41:06.892402: step: 408/469, loss: 0.5266261696815491 2023-01-22 10:41:07.458101: step: 410/469, loss: 1.3209253549575806 2023-01-22 10:41:08.084375: step: 412/469, loss: 0.7187099456787109 2023-01-22 10:41:08.760373: step: 414/469, loss: 0.8508545756340027 2023-01-22 10:41:09.436077: step: 416/469, loss: 0.9024240374565125 2023-01-22 10:41:10.062947: step: 418/469, loss: 3.2285799980163574 2023-01-22 10:41:10.662511: step: 420/469, loss: 0.5391601920127869 2023-01-22 10:41:11.353600: step: 422/469, loss: 0.7348294258117676 2023-01-22 10:41:12.028137: step: 424/469, loss: 0.6996200084686279 2023-01-22 10:41:12.724819: step: 426/469, loss: 2.633759021759033 2023-01-22 10:41:13.413382: step: 428/469, loss: 1.1351779699325562 2023-01-22 10:41:14.065560: step: 430/469, loss: 0.646392285823822 2023-01-22 10:41:14.694711: step: 432/469, loss: 1.884870171546936 2023-01-22 10:41:15.290494: step: 434/469, loss: 0.3008376657962799 2023-01-22 10:41:15.924757: step: 436/469, loss: 1.3550143241882324 2023-01-22 10:41:16.580000: step: 438/469, loss: 0.7264467477798462 2023-01-22 10:41:17.195093: step: 440/469, loss: 0.198820561170578 2023-01-22 10:41:17.805670: step: 442/469, loss: 1.4231986999511719 2023-01-22 10:41:18.433552: step: 444/469, loss: 0.7573931813240051 2023-01-22 10:41:19.073003: step: 446/469, loss: 0.7570392489433289 2023-01-22 10:41:19.674545: step: 448/469, loss: 0.6987957954406738 2023-01-22 10:41:20.268981: step: 450/469, loss: 0.960996150970459 2023-01-22 10:41:20.890233: step: 452/469, loss: 0.9436129331588745 2023-01-22 10:41:21.564716: step: 454/469, loss: 2.3032419681549072 2023-01-22 10:41:22.137477: step: 456/469, loss: 0.8013098835945129 2023-01-22 10:41:22.766962: step: 458/469, loss: 1.3922255039215088 2023-01-22 10:41:23.366641: step: 460/469, loss: 0.8197945356369019 2023-01-22 10:41:23.992910: step: 462/469, loss: 0.954828143119812 2023-01-22 10:41:24.591691: step: 464/469, loss: 0.39458319544792175 2023-01-22 10:41:25.190982: step: 466/469, loss: 0.6810545325279236 2023-01-22 10:41:25.822368: step: 468/469, loss: 0.6716473698616028 2023-01-22 10:41:26.455298: step: 470/469, loss: 1.6145966053009033 2023-01-22 10:41:27.046147: step: 472/469, loss: 0.14848540723323822 2023-01-22 10:41:27.662632: step: 474/469, loss: 0.3003862202167511 2023-01-22 10:41:28.219387: step: 476/469, loss: 0.3544888198375702 2023-01-22 10:41:28.914633: step: 478/469, loss: 0.4897359013557434 2023-01-22 10:41:29.550026: step: 480/469, loss: 0.1578856110572815 2023-01-22 10:41:30.162660: step: 482/469, loss: 1.2774384021759033 2023-01-22 10:41:30.814427: step: 484/469, loss: 0.39732471108436584 2023-01-22 10:41:31.361205: step: 486/469, loss: 0.634797215461731 2023-01-22 10:41:31.990257: step: 488/469, loss: 1.6133782863616943 2023-01-22 10:41:32.622655: step: 490/469, loss: 1.0287816524505615 2023-01-22 10:41:33.273645: step: 492/469, loss: 0.3966401219367981 2023-01-22 10:41:33.902913: step: 494/469, loss: 0.4844699203968048 2023-01-22 10:41:34.534784: step: 496/469, loss: 1.137656569480896 2023-01-22 10:41:35.179106: step: 498/469, loss: 0.879274308681488 2023-01-22 10:41:35.835671: step: 500/469, loss: 2.080451726913452 2023-01-22 10:41:36.460471: step: 502/469, loss: 0.8389492034912109 2023-01-22 10:41:37.097843: step: 504/469, loss: 0.9974808692932129 2023-01-22 10:41:37.742313: step: 506/469, loss: 4.700196266174316 2023-01-22 10:41:38.381031: step: 508/469, loss: 1.077690839767456 2023-01-22 10:41:39.044460: step: 510/469, loss: 3.365203380584717 2023-01-22 10:41:39.689751: step: 512/469, loss: 1.383488655090332 2023-01-22 10:41:40.250225: step: 514/469, loss: 0.41354992985725403 2023-01-22 10:41:40.823534: step: 516/469, loss: 0.3490378260612488 2023-01-22 10:41:41.473037: step: 518/469, loss: 3.3838109970092773 2023-01-22 10:41:42.173891: step: 520/469, loss: 0.788618803024292 2023-01-22 10:41:42.881156: step: 522/469, loss: 2.7722554206848145 2023-01-22 10:41:43.512651: step: 524/469, loss: 0.3936830759048462 2023-01-22 10:41:44.057339: step: 526/469, loss: 1.5183649063110352 2023-01-22 10:41:44.646614: step: 528/469, loss: 0.3742457628250122 2023-01-22 10:41:45.265369: step: 530/469, loss: 1.0776413679122925 2023-01-22 10:41:45.873114: step: 532/469, loss: 0.2397650182247162 2023-01-22 10:41:46.516932: step: 534/469, loss: 0.9025579690933228 2023-01-22 10:41:47.129957: step: 536/469, loss: 0.18482525646686554 2023-01-22 10:41:47.790692: step: 538/469, loss: 1.2417709827423096 2023-01-22 10:41:48.431643: step: 540/469, loss: 0.5819209814071655 2023-01-22 10:41:49.045905: step: 542/469, loss: 0.7533178329467773 2023-01-22 10:41:49.707549: step: 544/469, loss: 0.6205441951751709 2023-01-22 10:41:50.352285: step: 546/469, loss: 0.4174264967441559 2023-01-22 10:41:51.010050: step: 548/469, loss: 0.7254074811935425 2023-01-22 10:41:51.654107: step: 550/469, loss: 0.41729649901390076 2023-01-22 10:41:52.267385: step: 552/469, loss: 3.5037264823913574 2023-01-22 10:41:52.915018: step: 554/469, loss: 0.3591243624687195 2023-01-22 10:41:53.510402: step: 556/469, loss: 6.033249378204346 2023-01-22 10:41:54.097459: step: 558/469, loss: 0.7233971357345581 2023-01-22 10:41:54.758212: step: 560/469, loss: 1.761699914932251 2023-01-22 10:41:55.375007: step: 562/469, loss: 0.5329675674438477 2023-01-22 10:41:55.961930: step: 564/469, loss: 0.46260109543800354 2023-01-22 10:41:56.545339: step: 566/469, loss: 2.472346305847168 2023-01-22 10:41:57.164082: step: 568/469, loss: 1.0746632814407349 2023-01-22 10:41:57.772403: step: 570/469, loss: 1.0300151109695435 2023-01-22 10:41:58.400445: step: 572/469, loss: 1.0891962051391602 2023-01-22 10:41:58.975329: step: 574/469, loss: 0.5832064151763916 2023-01-22 10:41:59.595265: step: 576/469, loss: 0.5408498048782349 2023-01-22 10:42:00.202288: step: 578/469, loss: 1.7607018947601318 2023-01-22 10:42:00.827201: step: 580/469, loss: 0.34821629524230957 2023-01-22 10:42:01.494599: step: 582/469, loss: 0.18294112384319305 2023-01-22 10:42:02.136842: step: 584/469, loss: 1.9501097202301025 2023-01-22 10:42:02.738246: step: 586/469, loss: 0.16994327306747437 2023-01-22 10:42:03.330826: step: 588/469, loss: 0.3547922372817993 2023-01-22 10:42:04.040341: step: 590/469, loss: 0.7541895508766174 2023-01-22 10:42:04.591338: step: 592/469, loss: 1.7084599733352661 2023-01-22 10:42:05.240560: step: 594/469, loss: 0.2464788854122162 2023-01-22 10:42:05.984304: step: 596/469, loss: 1.1210328340530396 2023-01-22 10:42:06.607109: step: 598/469, loss: 0.2027255892753601 2023-01-22 10:42:07.176246: step: 600/469, loss: 1.2290934324264526 2023-01-22 10:42:07.799911: step: 602/469, loss: 0.9369786381721497 2023-01-22 10:42:08.428683: step: 604/469, loss: 0.700100839138031 2023-01-22 10:42:09.011657: step: 606/469, loss: 0.4026202857494354 2023-01-22 10:42:09.650028: step: 608/469, loss: 0.398833692073822 2023-01-22 10:42:10.317001: step: 610/469, loss: 0.3640027940273285 2023-01-22 10:42:10.890907: step: 612/469, loss: 0.9620557427406311 2023-01-22 10:42:11.538149: step: 614/469, loss: 1.4510769844055176 2023-01-22 10:42:12.098019: step: 616/469, loss: 0.15558823943138123 2023-01-22 10:42:12.708148: step: 618/469, loss: 1.2236813306808472 2023-01-22 10:42:13.378880: step: 620/469, loss: 1.2272909879684448 2023-01-22 10:42:13.932859: step: 622/469, loss: 0.23699456453323364 2023-01-22 10:42:14.490946: step: 624/469, loss: 0.4633136987686157 2023-01-22 10:42:15.134796: step: 626/469, loss: 1.6583020687103271 2023-01-22 10:42:15.711175: step: 628/469, loss: 0.7730790376663208 2023-01-22 10:42:16.344905: step: 630/469, loss: 0.8075206875801086 2023-01-22 10:42:16.925583: step: 632/469, loss: 0.7500634789466858 2023-01-22 10:42:17.569520: step: 634/469, loss: 0.5523080825805664 2023-01-22 10:42:18.185501: step: 636/469, loss: 0.6977297067642212 2023-01-22 10:42:18.782611: step: 638/469, loss: 0.9085575342178345 2023-01-22 10:42:19.403947: step: 640/469, loss: 1.6701316833496094 2023-01-22 10:42:20.005806: step: 642/469, loss: 1.1490122079849243 2023-01-22 10:42:20.591804: step: 644/469, loss: 1.8123161792755127 2023-01-22 10:42:21.186759: step: 646/469, loss: 0.2635463774204254 2023-01-22 10:42:21.728407: step: 648/469, loss: 0.8812653422355652 2023-01-22 10:42:22.330149: step: 650/469, loss: 0.4694075286388397 2023-01-22 10:42:22.995203: step: 652/469, loss: 0.693772554397583 2023-01-22 10:42:23.631541: step: 654/469, loss: 0.6004814505577087 2023-01-22 10:42:24.272136: step: 656/469, loss: 0.9450744390487671 2023-01-22 10:42:24.872631: step: 658/469, loss: 1.323035717010498 2023-01-22 10:42:25.537765: step: 660/469, loss: 1.7586731910705566 2023-01-22 10:42:26.166760: step: 662/469, loss: 3.1064038276672363 2023-01-22 10:42:26.812494: step: 664/469, loss: 2.1606502532958984 2023-01-22 10:42:27.467496: step: 666/469, loss: 0.5083552598953247 2023-01-22 10:42:28.053356: step: 668/469, loss: 0.5356027483940125 2023-01-22 10:42:28.713979: step: 670/469, loss: 0.4719902575016022 2023-01-22 10:42:29.352676: step: 672/469, loss: 2.2638511657714844 2023-01-22 10:42:29.962873: step: 674/469, loss: 0.540315568447113 2023-01-22 10:42:30.722148: step: 676/469, loss: 0.6927760243415833 2023-01-22 10:42:31.268178: step: 678/469, loss: 0.7447448372840881 2023-01-22 10:42:31.854442: step: 680/469, loss: 0.6245925426483154 2023-01-22 10:42:32.497767: step: 682/469, loss: 1.2349224090576172 2023-01-22 10:42:33.098715: step: 684/469, loss: 4.0819854736328125 2023-01-22 10:42:33.713608: step: 686/469, loss: 3.266864538192749 2023-01-22 10:42:34.298078: step: 688/469, loss: 0.9348659515380859 2023-01-22 10:42:35.022544: step: 690/469, loss: 0.36093536019325256 2023-01-22 10:42:35.660207: step: 692/469, loss: 1.21728515625 2023-01-22 10:42:36.303943: step: 694/469, loss: 2.6873040199279785 2023-01-22 10:42:36.937863: step: 696/469, loss: 1.687104344367981 2023-01-22 10:42:37.615879: step: 698/469, loss: 1.1994125843048096 2023-01-22 10:42:38.254587: step: 700/469, loss: 0.6642340421676636 2023-01-22 10:42:38.919732: step: 702/469, loss: 0.1603505164384842 2023-01-22 10:42:39.619089: step: 704/469, loss: 0.4437829852104187 2023-01-22 10:42:40.334768: step: 706/469, loss: 0.8923883438110352 2023-01-22 10:42:40.979266: step: 708/469, loss: 2.0039939880371094 2023-01-22 10:42:41.532172: step: 710/469, loss: 1.2439526319503784 2023-01-22 10:42:42.240022: step: 712/469, loss: 0.8551022410392761 2023-01-22 10:42:42.943403: step: 714/469, loss: 2.400669574737549 2023-01-22 10:42:43.535227: step: 716/469, loss: 0.8577961325645447 2023-01-22 10:42:44.174940: step: 718/469, loss: 1.4866974353790283 2023-01-22 10:42:44.781968: step: 720/469, loss: 2.898620128631592 2023-01-22 10:42:45.384198: step: 722/469, loss: 1.0206254720687866 2023-01-22 10:42:45.966944: step: 724/469, loss: 0.9223724603652954 2023-01-22 10:42:46.526494: step: 726/469, loss: 1.4209712743759155 2023-01-22 10:42:47.122471: step: 728/469, loss: 0.8991994261741638 2023-01-22 10:42:47.868238: step: 730/469, loss: 1.7733222246170044 2023-01-22 10:42:48.528138: step: 732/469, loss: 0.49927276372909546 2023-01-22 10:42:49.131858: step: 734/469, loss: 1.2421603202819824 2023-01-22 10:42:49.804279: step: 736/469, loss: 0.5845636129379272 2023-01-22 10:42:50.457331: step: 738/469, loss: 0.8476709127426147 2023-01-22 10:42:51.005795: step: 740/469, loss: 0.3768523037433624 2023-01-22 10:42:51.656365: step: 742/469, loss: 0.7133253812789917 2023-01-22 10:42:52.246136: step: 744/469, loss: 1.2328099012374878 2023-01-22 10:42:52.832831: step: 746/469, loss: 0.4945378303527832 2023-01-22 10:42:53.429234: step: 748/469, loss: 0.808287501335144 2023-01-22 10:42:54.098368: step: 750/469, loss: 0.07773157209157944 2023-01-22 10:42:54.771806: step: 752/469, loss: 2.504279375076294 2023-01-22 10:42:55.386182: step: 754/469, loss: 1.210287094116211 2023-01-22 10:42:55.989044: step: 756/469, loss: 0.6139706969261169 2023-01-22 10:42:56.701850: step: 758/469, loss: 3.411437511444092 2023-01-22 10:42:57.350044: step: 760/469, loss: 0.9289321303367615 2023-01-22 10:42:57.930858: step: 762/469, loss: 0.8700323104858398 2023-01-22 10:42:58.549589: step: 764/469, loss: 1.5740787982940674 2023-01-22 10:42:59.160999: step: 766/469, loss: 0.9763445258140564 2023-01-22 10:42:59.760735: step: 768/469, loss: 0.9869160652160645 2023-01-22 10:43:00.384445: step: 770/469, loss: 1.6574498414993286 2023-01-22 10:43:00.925701: step: 772/469, loss: 2.648036003112793 2023-01-22 10:43:01.540743: step: 774/469, loss: 0.25199568271636963 2023-01-22 10:43:02.215683: step: 776/469, loss: 0.5826737284660339 2023-01-22 10:43:02.921291: step: 778/469, loss: 3.072148323059082 2023-01-22 10:43:03.621244: step: 780/469, loss: 1.031497836112976 2023-01-22 10:43:04.186951: step: 782/469, loss: 1.1756263971328735 2023-01-22 10:43:04.905228: step: 784/469, loss: 0.5341236591339111 2023-01-22 10:43:05.583680: step: 786/469, loss: 1.4256523847579956 2023-01-22 10:43:06.214793: step: 788/469, loss: 2.176980495452881 2023-01-22 10:43:06.878639: step: 790/469, loss: 1.3870797157287598 2023-01-22 10:43:07.503066: step: 792/469, loss: 0.4993237853050232 2023-01-22 10:43:08.142989: step: 794/469, loss: 0.2280917912721634 2023-01-22 10:43:08.750212: step: 796/469, loss: 0.6398214101791382 2023-01-22 10:43:09.375174: step: 798/469, loss: 0.5072592496871948 2023-01-22 10:43:10.081414: step: 800/469, loss: 1.4234272241592407 2023-01-22 10:43:10.651941: step: 802/469, loss: 0.8249620795249939 2023-01-22 10:43:11.299675: step: 804/469, loss: 0.5047292113304138 2023-01-22 10:43:11.900585: step: 806/469, loss: 3.447502374649048 2023-01-22 10:43:12.469559: step: 808/469, loss: 0.21030651032924652 2023-01-22 10:43:13.143874: step: 810/469, loss: 0.21714237332344055 2023-01-22 10:43:13.756754: step: 812/469, loss: 0.5368207097053528 2023-01-22 10:43:14.393323: step: 814/469, loss: 0.8567434549331665 2023-01-22 10:43:14.928515: step: 816/469, loss: 0.6117315888404846 2023-01-22 10:43:15.548499: step: 818/469, loss: 0.6387864351272583 2023-01-22 10:43:16.325223: step: 820/469, loss: 0.467052698135376 2023-01-22 10:43:16.972563: step: 822/469, loss: 0.2626565992832184 2023-01-22 10:43:17.593637: step: 824/469, loss: 0.4992005228996277 2023-01-22 10:43:18.347270: step: 826/469, loss: 0.391232967376709 2023-01-22 10:43:19.026552: step: 828/469, loss: 0.8604095578193665 2023-01-22 10:43:19.582021: step: 830/469, loss: 0.48425501585006714 2023-01-22 10:43:20.217980: step: 832/469, loss: 1.2435221672058105 2023-01-22 10:43:20.836913: step: 834/469, loss: 0.4931100308895111 2023-01-22 10:43:21.389005: step: 836/469, loss: 0.2618055045604706 2023-01-22 10:43:21.998714: step: 838/469, loss: 0.3797784447669983 2023-01-22 10:43:22.593599: step: 840/469, loss: 1.1894060373306274 2023-01-22 10:43:23.228287: step: 842/469, loss: 0.2713502049446106 2023-01-22 10:43:23.895782: step: 844/469, loss: 0.3778339624404907 2023-01-22 10:43:24.492327: step: 846/469, loss: 0.5766203999519348 2023-01-22 10:43:25.125296: step: 848/469, loss: 0.48425954580307007 2023-01-22 10:43:25.858889: step: 850/469, loss: 0.25006476044654846 2023-01-22 10:43:26.474002: step: 852/469, loss: 0.5502465963363647 2023-01-22 10:43:27.120435: step: 854/469, loss: 0.6801631450653076 2023-01-22 10:43:27.765201: step: 856/469, loss: 0.9096823334693909 2023-01-22 10:43:28.380595: step: 858/469, loss: 0.21374744176864624 2023-01-22 10:43:29.031741: step: 860/469, loss: 0.5357890129089355 2023-01-22 10:43:29.593022: step: 862/469, loss: 0.20686137676239014 2023-01-22 10:43:30.196743: step: 864/469, loss: 5.894195079803467 2023-01-22 10:43:30.870693: step: 866/469, loss: 1.6604406833648682 2023-01-22 10:43:31.489113: step: 868/469, loss: 1.2137590646743774 2023-01-22 10:43:32.143272: step: 870/469, loss: 0.36408406496047974 2023-01-22 10:43:32.709285: step: 872/469, loss: 4.091287612915039 2023-01-22 10:43:33.350499: step: 874/469, loss: 0.6501983404159546 2023-01-22 10:43:33.961343: step: 876/469, loss: 0.6695119738578796 2023-01-22 10:43:34.535687: step: 878/469, loss: 0.41327884793281555 2023-01-22 10:43:35.189642: step: 880/469, loss: 1.1272549629211426 2023-01-22 10:43:35.787423: step: 882/469, loss: 0.36916837096214294 2023-01-22 10:43:36.408273: step: 884/469, loss: 0.4161912798881531 2023-01-22 10:43:37.051063: step: 886/469, loss: 0.2770214378833771 2023-01-22 10:43:37.692420: step: 888/469, loss: 0.9130086302757263 2023-01-22 10:43:38.367478: step: 890/469, loss: 0.370594322681427 2023-01-22 10:43:38.971582: step: 892/469, loss: 0.7045907974243164 2023-01-22 10:43:39.623543: step: 894/469, loss: 0.5197343826293945 2023-01-22 10:43:40.182926: step: 896/469, loss: 0.6735163927078247 2023-01-22 10:43:40.849958: step: 898/469, loss: 1.0374263525009155 2023-01-22 10:43:41.488314: step: 900/469, loss: 0.24287647008895874 2023-01-22 10:43:42.141975: step: 902/469, loss: 0.6834206581115723 2023-01-22 10:43:42.792970: step: 904/469, loss: 0.47497767210006714 2023-01-22 10:43:43.435429: step: 906/469, loss: 1.5947412252426147 2023-01-22 10:43:44.099598: step: 908/469, loss: 0.2094680666923523 2023-01-22 10:43:44.669067: step: 910/469, loss: 2.8062334060668945 2023-01-22 10:43:45.342434: step: 912/469, loss: 2.472537040710449 2023-01-22 10:43:45.987263: step: 914/469, loss: 0.35665011405944824 2023-01-22 10:43:46.614244: step: 916/469, loss: 1.0690548419952393 2023-01-22 10:43:47.294409: step: 918/469, loss: 0.687286376953125 2023-01-22 10:43:47.933808: step: 920/469, loss: 0.5456666350364685 2023-01-22 10:43:48.558984: step: 922/469, loss: 0.7411112785339355 2023-01-22 10:43:49.143109: step: 924/469, loss: 0.8117825388908386 2023-01-22 10:43:49.744485: step: 926/469, loss: 3.4163637161254883 2023-01-22 10:43:50.396390: step: 928/469, loss: 0.5145717859268188 2023-01-22 10:43:50.970554: step: 930/469, loss: 0.7052063941955566 2023-01-22 10:43:51.565332: step: 932/469, loss: 0.7517820000648499 2023-01-22 10:43:52.163992: step: 934/469, loss: 0.48496013879776 2023-01-22 10:43:52.798341: step: 936/469, loss: 1.1040966510772705 2023-01-22 10:43:53.519708: step: 938/469, loss: 12.866398811340332 ================================================== Loss: 1.053 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29348448017950635, 'r': 0.3191017213336948, 'f1': 0.3057574675324676}, 'combined': 0.22529497607655505, 'epoch': 4} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.28644581280788173, 'r': 0.18982926351527812, 'f1': 0.22833778371161548}, 'combined': 0.12454788202451753, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2781530526061776, 'r': 0.31246035510978587, 'f1': 0.2943102897995659}, 'combined': 0.21686021353652224, 'epoch': 4} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2755418046286606, 'r': 0.1906628392174553, 'f1': 0.2253756305822756}, 'combined': 0.12293216213578669, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27656658148779106, 'r': 0.30805423782416197, 'f1': 0.29146244763614604}, 'combined': 0.21476180352137075, 'epoch': 4} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2817963877338877, 'r': 0.19061181620025314, 'f1': 0.22740384615384615}, 'combined': 0.12403846153846153, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23692810457516336, 'r': 0.3452380952380952, 'f1': 0.28100775193798444}, 'combined': 0.18733850129198962, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.18269230769230768, 'r': 0.20652173913043478, 'f1': 0.19387755102040816}, 'combined': 0.09693877551020408, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4666666666666667, 'r': 0.2413793103448276, 'f1': 0.3181818181818182}, 'combined': 0.2121212121212121, 'epoch': 4} New best chinese model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29348448017950635, 'r': 0.3191017213336948, 'f1': 0.3057574675324676}, 'combined': 0.22529497607655505, 'epoch': 4} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.28644581280788173, 'r': 0.18982926351527812, 'f1': 0.22833778371161548}, 'combined': 0.12454788202451753, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23692810457516336, 'r': 0.3452380952380952, 'f1': 0.28100775193798444}, 'combined': 0.18733850129198962, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2857443909240715, 'r': 0.2922509045694014, 'f1': 0.28896102571871396}, 'combined': 0.2129186505295787, 'epoch': 2} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2391582356549897, 'r': 0.22274756074728227, 'f1': 0.23066137744839368}, 'combined': 0.12581529679003292, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26, 'r': 0.2826086956521739, 'f1': 0.27083333333333337}, 'combined': 0.13541666666666669, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27656658148779106, 'r': 0.30805423782416197, 'f1': 0.29146244763614604}, 'combined': 0.21476180352137075, 'epoch': 4} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2817963877338877, 'r': 0.19061181620025314, 'f1': 0.22740384615384615}, 'combined': 0.12403846153846153, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4666666666666667, 'r': 0.2413793103448276, 'f1': 0.3181818181818182}, 'combined': 0.2121212121212121, 'epoch': 4} ****************************** Epoch: 5 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:46:51.984017: step: 2/469, loss: 0.23082216084003448 2023-01-22 10:46:52.653894: step: 4/469, loss: 0.24468812346458435 2023-01-22 10:46:53.320415: step: 6/469, loss: 2.715531826019287 2023-01-22 10:46:53.938625: step: 8/469, loss: 0.8829636573791504 2023-01-22 10:46:54.567789: step: 10/469, loss: 0.6191074252128601 2023-01-22 10:46:55.210375: step: 12/469, loss: 0.8923539519309998 2023-01-22 10:46:55.839600: step: 14/469, loss: 0.4938492178916931 2023-01-22 10:46:56.465452: step: 16/469, loss: 8.393678665161133 2023-01-22 10:46:57.028830: step: 18/469, loss: 0.29203730821609497 2023-01-22 10:46:57.696513: step: 20/469, loss: 0.6454170942306519 2023-01-22 10:46:58.329391: step: 22/469, loss: 3.398102283477783 2023-01-22 10:46:58.912603: step: 24/469, loss: 0.4247226417064667 2023-01-22 10:46:59.615034: step: 26/469, loss: 0.14708152413368225 2023-01-22 10:47:00.251048: step: 28/469, loss: 1.7728263139724731 2023-01-22 10:47:00.897497: step: 30/469, loss: 1.058443307876587 2023-01-22 10:47:01.453039: step: 32/469, loss: 2.357333183288574 2023-01-22 10:47:02.009830: step: 34/469, loss: 0.5698264837265015 2023-01-22 10:47:02.602534: step: 36/469, loss: 0.2533670961856842 2023-01-22 10:47:03.249302: step: 38/469, loss: 0.42915767431259155 2023-01-22 10:47:03.866631: step: 40/469, loss: 1.779348373413086 2023-01-22 10:47:04.504376: step: 42/469, loss: 0.4198766350746155 2023-01-22 10:47:05.150525: step: 44/469, loss: 0.5436288118362427 2023-01-22 10:47:05.801368: step: 46/469, loss: 2.6733551025390625 2023-01-22 10:47:06.400361: step: 48/469, loss: 1.272593379020691 2023-01-22 10:47:07.103003: step: 50/469, loss: 0.5839976072311401 2023-01-22 10:47:07.728118: step: 52/469, loss: 0.22010065615177155 2023-01-22 10:47:08.398613: step: 54/469, loss: 0.8428466320037842 2023-01-22 10:47:09.012439: step: 56/469, loss: 0.21989446878433228 2023-01-22 10:47:09.597349: step: 58/469, loss: 0.2790488600730896 2023-01-22 10:47:10.253160: step: 60/469, loss: 0.5408099293708801 2023-01-22 10:47:10.840502: step: 62/469, loss: 0.35421276092529297 2023-01-22 10:47:11.538387: step: 64/469, loss: 2.1708390712738037 2023-01-22 10:47:12.222950: step: 66/469, loss: 0.29272645711898804 2023-01-22 10:47:12.841447: step: 68/469, loss: 0.8206204771995544 2023-01-22 10:47:13.419213: step: 70/469, loss: 0.4898074269294739 2023-01-22 10:47:14.002948: step: 72/469, loss: 1.111472487449646 2023-01-22 10:47:14.645436: step: 74/469, loss: 0.6634534597396851 2023-01-22 10:47:15.275087: step: 76/469, loss: 0.5694586634635925 2023-01-22 10:47:15.938993: step: 78/469, loss: 0.22259896993637085 2023-01-22 10:47:16.509750: step: 80/469, loss: 0.4513004422187805 2023-01-22 10:47:17.141492: step: 82/469, loss: 2.6729772090911865 2023-01-22 10:47:17.793891: step: 84/469, loss: 1.9189927577972412 2023-01-22 10:47:18.395063: step: 86/469, loss: 0.8211997747421265 2023-01-22 10:47:19.022465: step: 88/469, loss: 0.3043842315673828 2023-01-22 10:47:19.668805: step: 90/469, loss: 0.7644754648208618 2023-01-22 10:47:20.328395: step: 92/469, loss: 0.5473629832267761 2023-01-22 10:47:20.962910: step: 94/469, loss: 0.188741073012352 2023-01-22 10:47:21.607755: step: 96/469, loss: 0.3651156723499298 2023-01-22 10:47:22.262092: step: 98/469, loss: 0.1702229380607605 2023-01-22 10:47:22.913017: step: 100/469, loss: 0.7699720859527588 2023-01-22 10:47:23.467134: step: 102/469, loss: 0.21461914479732513 2023-01-22 10:47:24.031325: step: 104/469, loss: 0.40581464767456055 2023-01-22 10:47:24.627918: step: 106/469, loss: 1.7870793342590332 2023-01-22 10:47:25.225732: step: 108/469, loss: 0.23843681812286377 2023-01-22 10:47:25.838199: step: 110/469, loss: 0.7743473649024963 2023-01-22 10:47:26.498613: step: 112/469, loss: 1.068926215171814 2023-01-22 10:47:27.130234: step: 114/469, loss: 0.3782900869846344 2023-01-22 10:47:27.755366: step: 116/469, loss: 0.15446829795837402 2023-01-22 10:47:28.396417: step: 118/469, loss: 1.8861833810806274 2023-01-22 10:47:29.000241: step: 120/469, loss: 0.6652607321739197 2023-01-22 10:47:29.618234: step: 122/469, loss: 0.3511238694190979 2023-01-22 10:47:30.283550: step: 124/469, loss: 0.26555123925209045 2023-01-22 10:47:30.841391: step: 126/469, loss: 0.3291059136390686 2023-01-22 10:47:31.420316: step: 128/469, loss: 0.493099182844162 2023-01-22 10:47:32.057782: step: 130/469, loss: 2.3179497718811035 2023-01-22 10:47:32.699365: step: 132/469, loss: 0.8168757557868958 2023-01-22 10:47:33.400286: step: 134/469, loss: 1.5501023530960083 2023-01-22 10:47:34.043851: step: 136/469, loss: 0.5716238617897034 2023-01-22 10:47:34.670904: step: 138/469, loss: 0.443341463804245 2023-01-22 10:47:35.225724: step: 140/469, loss: 0.6165660619735718 2023-01-22 10:47:35.846210: step: 142/469, loss: 0.5413652062416077 2023-01-22 10:47:36.474454: step: 144/469, loss: 0.8951048254966736 2023-01-22 10:47:37.181160: step: 146/469, loss: 1.015242099761963 2023-01-22 10:47:37.797996: step: 148/469, loss: 0.5760049819946289 2023-01-22 10:47:38.377715: step: 150/469, loss: 0.5865218639373779 2023-01-22 10:47:39.042542: step: 152/469, loss: 0.48217886686325073 2023-01-22 10:47:39.690593: step: 154/469, loss: 1.7473310232162476 2023-01-22 10:47:40.211868: step: 156/469, loss: 0.15513287484645844 2023-01-22 10:47:40.816776: step: 158/469, loss: 0.23924881219863892 2023-01-22 10:47:41.502738: step: 160/469, loss: 0.6334220767021179 2023-01-22 10:47:42.090247: step: 162/469, loss: 0.36883997917175293 2023-01-22 10:47:42.757086: step: 164/469, loss: 0.969382107257843 2023-01-22 10:47:43.375406: step: 166/469, loss: 0.9840506315231323 2023-01-22 10:47:44.017050: step: 168/469, loss: 0.5908848643302917 2023-01-22 10:47:44.713482: step: 170/469, loss: 0.16666610538959503 2023-01-22 10:47:45.368226: step: 172/469, loss: 0.6665465831756592 2023-01-22 10:47:46.041785: step: 174/469, loss: 0.3576303720474243 2023-01-22 10:47:46.715349: step: 176/469, loss: 1.0005073547363281 2023-01-22 10:47:47.362088: step: 178/469, loss: 0.20010122656822205 2023-01-22 10:47:48.023710: step: 180/469, loss: 0.4356316328048706 2023-01-22 10:47:48.718780: step: 182/469, loss: 0.518362820148468 2023-01-22 10:47:49.316606: step: 184/469, loss: 0.1921110451221466 2023-01-22 10:47:49.936807: step: 186/469, loss: 0.44512102007865906 2023-01-22 10:47:50.569824: step: 188/469, loss: 0.590453565120697 2023-01-22 10:47:51.150988: step: 190/469, loss: 1.0626299381256104 2023-01-22 10:47:51.821235: step: 192/469, loss: 0.6436001062393188 2023-01-22 10:47:52.416571: step: 194/469, loss: 1.544126272201538 2023-01-22 10:47:53.020423: step: 196/469, loss: 0.32917964458465576 2023-01-22 10:47:53.620770: step: 198/469, loss: 1.4476020336151123 2023-01-22 10:47:54.240737: step: 200/469, loss: 0.7620005011558533 2023-01-22 10:47:54.869490: step: 202/469, loss: 1.2102952003479004 2023-01-22 10:47:55.474551: step: 204/469, loss: 0.9657041430473328 2023-01-22 10:47:56.097856: step: 206/469, loss: 1.2441942691802979 2023-01-22 10:47:56.686676: step: 208/469, loss: 0.22580377757549286 2023-01-22 10:47:57.325532: step: 210/469, loss: 0.20513802766799927 2023-01-22 10:47:58.022008: step: 212/469, loss: 0.5054277777671814 2023-01-22 10:47:58.678792: step: 214/469, loss: 0.5155064463615417 2023-01-22 10:47:59.293762: step: 216/469, loss: 0.5800778865814209 2023-01-22 10:48:00.009396: step: 218/469, loss: 0.5771896839141846 2023-01-22 10:48:00.611302: step: 220/469, loss: 0.6965882182121277 2023-01-22 10:48:01.266741: step: 222/469, loss: 0.36101800203323364 2023-01-22 10:48:01.933288: step: 224/469, loss: 1.1969101428985596 2023-01-22 10:48:02.550925: step: 226/469, loss: 1.0366992950439453 2023-01-22 10:48:03.262901: step: 228/469, loss: 0.6197690963745117 2023-01-22 10:48:03.845998: step: 230/469, loss: 0.4069611728191376 2023-01-22 10:48:04.461226: step: 232/469, loss: 1.0281972885131836 2023-01-22 10:48:05.099786: step: 234/469, loss: 0.6776111125946045 2023-01-22 10:48:05.660936: step: 236/469, loss: 0.3738084137439728 2023-01-22 10:48:06.295718: step: 238/469, loss: 1.2709115743637085 2023-01-22 10:48:06.839105: step: 240/469, loss: 5.531229496002197 2023-01-22 10:48:07.485025: step: 242/469, loss: 1.31396484375 2023-01-22 10:48:08.036056: step: 244/469, loss: 1.396083116531372 2023-01-22 10:48:08.788124: step: 246/469, loss: 0.8968508243560791 2023-01-22 10:48:09.417386: step: 248/469, loss: 0.14772929251194 2023-01-22 10:48:10.003282: step: 250/469, loss: 0.845439076423645 2023-01-22 10:48:10.592213: step: 252/469, loss: 1.2677044868469238 2023-01-22 10:48:11.244742: step: 254/469, loss: 0.5776054859161377 2023-01-22 10:48:11.902524: step: 256/469, loss: 4.868602752685547 2023-01-22 10:48:12.536407: step: 258/469, loss: 1.289811134338379 2023-01-22 10:48:13.196771: step: 260/469, loss: 0.9429295659065247 2023-01-22 10:48:13.822635: step: 262/469, loss: 0.18802818655967712 2023-01-22 10:48:14.452641: step: 264/469, loss: 0.7797534465789795 2023-01-22 10:48:15.026484: step: 266/469, loss: 1.340875267982483 2023-01-22 10:48:15.636029: step: 268/469, loss: 1.9435718059539795 2023-01-22 10:48:16.206911: step: 270/469, loss: 0.8291800618171692 2023-01-22 10:48:16.865445: step: 272/469, loss: 0.7410264015197754 2023-01-22 10:48:17.530990: step: 274/469, loss: 0.6898849010467529 2023-01-22 10:48:18.223902: step: 276/469, loss: 3.346698760986328 2023-01-22 10:48:18.826565: step: 278/469, loss: 0.8722501397132874 2023-01-22 10:48:19.487799: step: 280/469, loss: 1.635509729385376 2023-01-22 10:48:20.170239: step: 282/469, loss: 0.8487719297409058 2023-01-22 10:48:20.804469: step: 284/469, loss: 0.6330685615539551 2023-01-22 10:48:21.404868: step: 286/469, loss: 2.2650954723358154 2023-01-22 10:48:21.997098: step: 288/469, loss: 0.660061776638031 2023-01-22 10:48:22.587972: step: 290/469, loss: 1.5336778163909912 2023-01-22 10:48:23.177140: step: 292/469, loss: 0.2699785828590393 2023-01-22 10:48:23.827503: step: 294/469, loss: 0.3881272077560425 2023-01-22 10:48:24.491700: step: 296/469, loss: 1.184231162071228 2023-01-22 10:48:25.134605: step: 298/469, loss: 0.5889768600463867 2023-01-22 10:48:25.742744: step: 300/469, loss: 0.7570130228996277 2023-01-22 10:48:26.329711: step: 302/469, loss: 0.5367091298103333 2023-01-22 10:48:27.029148: step: 304/469, loss: 2.8680503368377686 2023-01-22 10:48:27.646928: step: 306/469, loss: 0.05204400420188904 2023-01-22 10:48:28.246686: step: 308/469, loss: 1.35739004611969 2023-01-22 10:48:28.877515: step: 310/469, loss: 0.3258724510669708 2023-01-22 10:48:29.521090: step: 312/469, loss: 0.29013729095458984 2023-01-22 10:48:30.150104: step: 314/469, loss: 0.27146241068840027 2023-01-22 10:48:30.813025: step: 316/469, loss: 0.49948427081108093 2023-01-22 10:48:31.437128: step: 318/469, loss: 1.1261435747146606 2023-01-22 10:48:31.998351: step: 320/469, loss: 0.25626009702682495 2023-01-22 10:48:32.605124: step: 322/469, loss: 0.22019492089748383 2023-01-22 10:48:33.196479: step: 324/469, loss: 0.3955521583557129 2023-01-22 10:48:33.756221: step: 326/469, loss: 0.3750266134738922 2023-01-22 10:48:34.358227: step: 328/469, loss: 0.5368538498878479 2023-01-22 10:48:34.984510: step: 330/469, loss: 0.5084934234619141 2023-01-22 10:48:35.671602: step: 332/469, loss: 1.0227596759796143 2023-01-22 10:48:36.206884: step: 334/469, loss: 0.6266493201255798 2023-01-22 10:48:36.788577: step: 336/469, loss: 0.8267876505851746 2023-01-22 10:48:37.377262: step: 338/469, loss: 4.548115253448486 2023-01-22 10:48:37.976227: step: 340/469, loss: 0.7014940977096558 2023-01-22 10:48:38.672435: step: 342/469, loss: 2.1303517818450928 2023-01-22 10:48:39.323683: step: 344/469, loss: 0.31942474842071533 2023-01-22 10:48:39.970969: step: 346/469, loss: 0.4026111662387848 2023-01-22 10:48:40.576958: step: 348/469, loss: 1.4867360591888428 2023-01-22 10:48:41.346379: step: 350/469, loss: 0.27226313948631287 2023-01-22 10:48:42.030848: step: 352/469, loss: 0.6493767499923706 2023-01-22 10:48:42.679586: step: 354/469, loss: 2.735508441925049 2023-01-22 10:48:43.295553: step: 356/469, loss: 1.5636558532714844 2023-01-22 10:48:43.959132: step: 358/469, loss: 0.47091829776763916 2023-01-22 10:48:44.611181: step: 360/469, loss: 1.05391263961792 2023-01-22 10:48:45.262658: step: 362/469, loss: 0.3859108090400696 2023-01-22 10:48:45.886633: step: 364/469, loss: 0.9916458129882812 2023-01-22 10:48:46.517381: step: 366/469, loss: 1.3550721406936646 2023-01-22 10:48:47.216404: step: 368/469, loss: 0.328305184841156 2023-01-22 10:48:47.861698: step: 370/469, loss: 0.8070709705352783 2023-01-22 10:48:48.448403: step: 372/469, loss: 2.137571334838867 2023-01-22 10:48:49.078501: step: 374/469, loss: 0.5778708457946777 2023-01-22 10:48:49.704456: step: 376/469, loss: 0.2450869083404541 2023-01-22 10:48:50.300593: step: 378/469, loss: 0.4169224798679352 2023-01-22 10:48:50.910593: step: 380/469, loss: 0.37350788712501526 2023-01-22 10:48:51.517795: step: 382/469, loss: 1.5586321353912354 2023-01-22 10:48:52.124152: step: 384/469, loss: 0.21327990293502808 2023-01-22 10:48:52.772159: step: 386/469, loss: 0.5003604292869568 2023-01-22 10:48:53.374975: step: 388/469, loss: 0.7882447838783264 2023-01-22 10:48:54.001044: step: 390/469, loss: 1.6546030044555664 2023-01-22 10:48:54.673909: step: 392/469, loss: 0.2584904730319977 2023-01-22 10:48:55.267210: step: 394/469, loss: 1.1590557098388672 2023-01-22 10:48:55.947473: step: 396/469, loss: 1.5481541156768799 2023-01-22 10:48:56.550453: step: 398/469, loss: 0.8572282791137695 2023-01-22 10:48:57.101177: step: 400/469, loss: 1.0574965476989746 2023-01-22 10:48:57.716103: step: 402/469, loss: 10.332171440124512 2023-01-22 10:48:58.393467: step: 404/469, loss: 0.7423107624053955 2023-01-22 10:48:58.981538: step: 406/469, loss: 0.5139293670654297 2023-01-22 10:48:59.636899: step: 408/469, loss: 4.425941467285156 2023-01-22 10:49:00.306979: step: 410/469, loss: 1.8335258960723877 2023-01-22 10:49:00.906340: step: 412/469, loss: 1.4402554035186768 2023-01-22 10:49:01.538646: step: 414/469, loss: 0.42355209589004517 2023-01-22 10:49:02.126860: step: 416/469, loss: 1.0914325714111328 2023-01-22 10:49:02.700898: step: 418/469, loss: 0.9456291198730469 2023-01-22 10:49:03.337906: step: 420/469, loss: 0.2497742921113968 2023-01-22 10:49:03.972008: step: 422/469, loss: 1.0893634557724 2023-01-22 10:49:04.621199: step: 424/469, loss: 0.17269906401634216 2023-01-22 10:49:05.239837: step: 426/469, loss: 1.409543752670288 2023-01-22 10:49:05.866482: step: 428/469, loss: 0.3833792507648468 2023-01-22 10:49:06.480461: step: 430/469, loss: 0.3209575116634369 2023-01-22 10:49:07.111259: step: 432/469, loss: 0.7200053334236145 2023-01-22 10:49:07.690671: step: 434/469, loss: 1.5605002641677856 2023-01-22 10:49:08.340410: step: 436/469, loss: 1.0715489387512207 2023-01-22 10:49:08.981087: step: 438/469, loss: 1.4137178659439087 2023-01-22 10:49:09.689842: step: 440/469, loss: 4.9024529457092285 2023-01-22 10:49:10.524804: step: 442/469, loss: 0.38857245445251465 2023-01-22 10:49:11.172213: step: 444/469, loss: 0.44481369853019714 2023-01-22 10:49:11.827447: step: 446/469, loss: 0.6289005279541016 2023-01-22 10:49:12.435187: step: 448/469, loss: 0.23422524333000183 2023-01-22 10:49:13.085576: step: 450/469, loss: 0.2247019112110138 2023-01-22 10:49:13.690041: step: 452/469, loss: 0.5358929634094238 2023-01-22 10:49:14.322349: step: 454/469, loss: 0.5743767619132996 2023-01-22 10:49:14.913197: step: 456/469, loss: 0.4127555191516876 2023-01-22 10:49:15.566251: step: 458/469, loss: 1.174877405166626 2023-01-22 10:49:16.122832: step: 460/469, loss: 0.8510198593139648 2023-01-22 10:49:16.740795: step: 462/469, loss: 0.31051793694496155 2023-01-22 10:49:17.331392: step: 464/469, loss: 2.053152084350586 2023-01-22 10:49:17.899003: step: 466/469, loss: 1.6194663047790527 2023-01-22 10:49:18.481684: step: 468/469, loss: 0.8800867199897766 2023-01-22 10:49:19.173660: step: 470/469, loss: 0.22569392621517181 2023-01-22 10:49:19.770721: step: 472/469, loss: 0.4594739079475403 2023-01-22 10:49:20.486037: step: 474/469, loss: 0.5039684176445007 2023-01-22 10:49:21.091393: step: 476/469, loss: 0.3357508182525635 2023-01-22 10:49:21.733207: step: 478/469, loss: 0.2989819347858429 2023-01-22 10:49:22.441121: step: 480/469, loss: 0.8009461164474487 2023-01-22 10:49:23.086762: step: 482/469, loss: 0.4875156283378601 2023-01-22 10:49:23.729711: step: 484/469, loss: 0.7731254696846008 2023-01-22 10:49:24.322950: step: 486/469, loss: 9.65363597869873 2023-01-22 10:49:24.944421: step: 488/469, loss: 1.6249033212661743 2023-01-22 10:49:25.524842: step: 490/469, loss: 0.4438440799713135 2023-01-22 10:49:26.165929: step: 492/469, loss: 1.6996326446533203 2023-01-22 10:49:26.769429: step: 494/469, loss: 1.0711129903793335 2023-01-22 10:49:27.350192: step: 496/469, loss: 0.6992626786231995 2023-01-22 10:49:27.987341: step: 498/469, loss: 0.8322696089744568 2023-01-22 10:49:28.638744: step: 500/469, loss: 0.9370493292808533 2023-01-22 10:49:29.269402: step: 502/469, loss: 4.563638687133789 2023-01-22 10:49:29.868160: step: 504/469, loss: 0.646310031414032 2023-01-22 10:49:30.515256: step: 506/469, loss: 0.26005351543426514 2023-01-22 10:49:31.097457: step: 508/469, loss: 1.821281909942627 2023-01-22 10:49:31.665796: step: 510/469, loss: 0.8955795168876648 2023-01-22 10:49:32.255862: step: 512/469, loss: 0.8382113575935364 2023-01-22 10:49:32.899540: step: 514/469, loss: 0.6199796199798584 2023-01-22 10:49:33.491258: step: 516/469, loss: 0.7629304528236389 2023-01-22 10:49:34.122852: step: 518/469, loss: 0.7643672823905945 2023-01-22 10:49:34.740415: step: 520/469, loss: 2.778000831604004 2023-01-22 10:49:35.437036: step: 522/469, loss: 0.3729744255542755 2023-01-22 10:49:36.045951: step: 524/469, loss: 2.275259256362915 2023-01-22 10:49:36.755273: step: 526/469, loss: 0.3773912489414215 2023-01-22 10:49:37.339038: step: 528/469, loss: 0.48497679829597473 2023-01-22 10:49:38.016706: step: 530/469, loss: 0.49681442975997925 2023-01-22 10:49:38.652158: step: 532/469, loss: 1.2908484935760498 2023-01-22 10:49:39.294340: step: 534/469, loss: 0.43272584676742554 2023-01-22 10:49:39.917569: step: 536/469, loss: 0.5426748991012573 2023-01-22 10:49:40.545254: step: 538/469, loss: 1.1667996644973755 2023-01-22 10:49:41.280348: step: 540/469, loss: 0.8148952722549438 2023-01-22 10:49:41.967933: step: 542/469, loss: 2.1252646446228027 2023-01-22 10:49:42.611389: step: 544/469, loss: 0.38486993312835693 2023-01-22 10:49:43.305551: step: 546/469, loss: 0.33599501848220825 2023-01-22 10:49:44.014203: step: 548/469, loss: 1.1564935445785522 2023-01-22 10:49:44.633160: step: 550/469, loss: 0.3236123025417328 2023-01-22 10:49:45.231138: step: 552/469, loss: 0.4061366617679596 2023-01-22 10:49:45.837320: step: 554/469, loss: 1.3559249639511108 2023-01-22 10:49:46.510141: step: 556/469, loss: 1.3511203527450562 2023-01-22 10:49:47.152849: step: 558/469, loss: 0.9502658843994141 2023-01-22 10:49:47.775064: step: 560/469, loss: 0.4477919936180115 2023-01-22 10:49:48.424850: step: 562/469, loss: 0.549677848815918 2023-01-22 10:49:49.060980: step: 564/469, loss: 0.6040837168693542 2023-01-22 10:49:49.773624: step: 566/469, loss: 1.6376280784606934 2023-01-22 10:49:50.529082: step: 568/469, loss: 0.6095688939094543 2023-01-22 10:49:51.275557: step: 570/469, loss: 0.9783769249916077 2023-01-22 10:49:51.893762: step: 572/469, loss: 0.24008788168430328 2023-01-22 10:49:52.455650: step: 574/469, loss: 0.3771034777164459 2023-01-22 10:49:53.207657: step: 576/469, loss: 0.20061251521110535 2023-01-22 10:49:53.835338: step: 578/469, loss: 0.9132511615753174 2023-01-22 10:49:54.488003: step: 580/469, loss: 0.503856897354126 2023-01-22 10:49:55.263121: step: 582/469, loss: 1.8077858686447144 2023-01-22 10:49:55.925334: step: 584/469, loss: 0.3756069540977478 2023-01-22 10:49:56.613282: step: 586/469, loss: 0.7689679265022278 2023-01-22 10:49:57.271009: step: 588/469, loss: 0.8368195295333862 2023-01-22 10:49:57.875542: step: 590/469, loss: 0.35350868105888367 2023-01-22 10:49:58.556272: step: 592/469, loss: 0.4454704225063324 2023-01-22 10:49:59.261110: step: 594/469, loss: 0.573936939239502 2023-01-22 10:50:00.029972: step: 596/469, loss: 1.1190050840377808 2023-01-22 10:50:00.697627: step: 598/469, loss: 0.4485684335231781 2023-01-22 10:50:01.345881: step: 600/469, loss: 0.6778914332389832 2023-01-22 10:50:01.991204: step: 602/469, loss: 1.0663076639175415 2023-01-22 10:50:02.610906: step: 604/469, loss: 1.1312828063964844 2023-01-22 10:50:03.233094: step: 606/469, loss: 0.3247070610523224 2023-01-22 10:50:03.857440: step: 608/469, loss: 0.8766698837280273 2023-01-22 10:50:04.508837: step: 610/469, loss: 1.199439287185669 2023-01-22 10:50:05.154725: step: 612/469, loss: 1.1807929277420044 2023-01-22 10:50:05.800053: step: 614/469, loss: 8.480010032653809 2023-01-22 10:50:06.418140: step: 616/469, loss: 0.8339220285415649 2023-01-22 10:50:07.145871: step: 618/469, loss: 0.4360521137714386 2023-01-22 10:50:07.849446: step: 620/469, loss: 3.8431954383850098 2023-01-22 10:50:08.544692: step: 622/469, loss: 1.5351340770721436 2023-01-22 10:50:09.141819: step: 624/469, loss: 0.12041905522346497 2023-01-22 10:50:09.797335: step: 626/469, loss: 0.4511202573776245 2023-01-22 10:50:10.502114: step: 628/469, loss: 0.6246999502182007 2023-01-22 10:50:11.113422: step: 630/469, loss: 1.0157084465026855 2023-01-22 10:50:11.784090: step: 632/469, loss: 0.19886675477027893 2023-01-22 10:50:12.416525: step: 634/469, loss: 1.221996545791626 2023-01-22 10:50:13.056031: step: 636/469, loss: 0.64617919921875 2023-01-22 10:50:13.731368: step: 638/469, loss: 0.5407251119613647 2023-01-22 10:50:14.410259: step: 640/469, loss: 1.7271944284439087 2023-01-22 10:50:15.087274: step: 642/469, loss: 0.2501175105571747 2023-01-22 10:50:15.797690: step: 644/469, loss: 0.4819124937057495 2023-01-22 10:50:16.446424: step: 646/469, loss: 0.628630518913269 2023-01-22 10:50:17.029723: step: 648/469, loss: 0.6487637758255005 2023-01-22 10:50:17.702430: step: 650/469, loss: 0.5462210178375244 2023-01-22 10:50:18.299336: step: 652/469, loss: 1.4214494228363037 2023-01-22 10:50:18.915941: step: 654/469, loss: 1.1150355339050293 2023-01-22 10:50:19.563164: step: 656/469, loss: 0.8862869739532471 2023-01-22 10:50:20.237997: step: 658/469, loss: 0.4711628556251526 2023-01-22 10:50:20.915653: step: 660/469, loss: 0.5369870662689209 2023-01-22 10:50:21.592824: step: 662/469, loss: 0.3013482093811035 2023-01-22 10:50:22.257195: step: 664/469, loss: 0.8464496731758118 2023-01-22 10:50:22.882392: step: 666/469, loss: 2.2260985374450684 2023-01-22 10:50:23.629137: step: 668/469, loss: 0.6346535086631775 2023-01-22 10:50:24.338849: step: 670/469, loss: 0.40371185541152954 2023-01-22 10:50:25.027177: step: 672/469, loss: 0.46036118268966675 2023-01-22 10:50:25.633581: step: 674/469, loss: 1.002408742904663 2023-01-22 10:50:26.265506: step: 676/469, loss: 0.8121116161346436 2023-01-22 10:50:26.993760: step: 678/469, loss: 2.1659016609191895 2023-01-22 10:50:27.642134: step: 680/469, loss: 1.4213732481002808 2023-01-22 10:50:28.281939: step: 682/469, loss: 9.95286750793457 2023-01-22 10:50:28.981569: step: 684/469, loss: 0.698919951915741 2023-01-22 10:50:29.565890: step: 686/469, loss: 0.5194299817085266 2023-01-22 10:50:30.219329: step: 688/469, loss: 0.5758535265922546 2023-01-22 10:50:30.880223: step: 690/469, loss: 1.1437273025512695 2023-01-22 10:50:31.501349: step: 692/469, loss: 3.149909496307373 2023-01-22 10:50:32.299907: step: 694/469, loss: 3.176224946975708 2023-01-22 10:50:32.975730: step: 696/469, loss: 1.150980830192566 2023-01-22 10:50:33.616174: step: 698/469, loss: 0.47864046692848206 2023-01-22 10:50:34.228856: step: 700/469, loss: 0.32273152470588684 2023-01-22 10:50:34.924021: step: 702/469, loss: 0.6380746364593506 2023-01-22 10:50:35.575657: step: 704/469, loss: 0.8380205631256104 2023-01-22 10:50:36.248795: step: 706/469, loss: 1.3941864967346191 2023-01-22 10:50:36.951458: step: 708/469, loss: 0.4604773223400116 2023-01-22 10:50:37.600734: step: 710/469, loss: 0.4957902133464813 2023-01-22 10:50:38.249110: step: 712/469, loss: 0.32155537605285645 2023-01-22 10:50:38.912724: step: 714/469, loss: 0.30814430117607117 2023-01-22 10:50:39.557932: step: 716/469, loss: 1.67085599899292 2023-01-22 10:50:40.355079: step: 718/469, loss: 0.6058162450790405 2023-01-22 10:50:41.000654: step: 720/469, loss: 1.4921011924743652 2023-01-22 10:50:41.596874: step: 722/469, loss: 0.1933315545320511 2023-01-22 10:50:42.199964: step: 724/469, loss: 0.4738868474960327 2023-01-22 10:50:42.784607: step: 726/469, loss: 0.4650869369506836 2023-01-22 10:50:43.465363: step: 728/469, loss: 0.25048828125 2023-01-22 10:50:44.153218: step: 730/469, loss: 0.3387415409088135 2023-01-22 10:50:44.814208: step: 732/469, loss: 0.3434506058692932 2023-01-22 10:50:45.452682: step: 734/469, loss: 0.253253310918808 2023-01-22 10:50:46.191629: step: 736/469, loss: 0.3523101210594177 2023-01-22 10:50:46.804384: step: 738/469, loss: 0.5128129124641418 2023-01-22 10:50:47.447349: step: 740/469, loss: 0.9110864400863647 2023-01-22 10:50:48.055215: step: 742/469, loss: 3.7775814533233643 2023-01-22 10:50:48.679501: step: 744/469, loss: 0.6204109191894531 2023-01-22 10:50:49.366526: step: 746/469, loss: 1.1810518503189087 2023-01-22 10:50:50.114787: step: 748/469, loss: 0.7742908000946045 2023-01-22 10:50:50.856121: step: 750/469, loss: 2.7484686374664307 2023-01-22 10:50:51.471358: step: 752/469, loss: 0.34478098154067993 2023-01-22 10:50:52.118286: step: 754/469, loss: 0.5299589037895203 2023-01-22 10:50:52.769740: step: 756/469, loss: 0.6918959617614746 2023-01-22 10:50:53.368948: step: 758/469, loss: 0.6955001950263977 2023-01-22 10:50:54.019372: step: 760/469, loss: 0.3608554005622864 2023-01-22 10:50:54.684584: step: 762/469, loss: 2.166154146194458 2023-01-22 10:50:55.417517: step: 764/469, loss: 0.20256467163562775 2023-01-22 10:50:56.070449: step: 766/469, loss: 0.271989643573761 2023-01-22 10:50:56.716229: step: 768/469, loss: 0.8315707445144653 2023-01-22 10:50:57.345277: step: 770/469, loss: 0.8766003251075745 2023-01-22 10:50:57.979369: step: 772/469, loss: 0.45454347133636475 2023-01-22 10:50:58.540647: step: 774/469, loss: 0.9226114749908447 2023-01-22 10:50:59.241344: step: 776/469, loss: 0.5401996374130249 2023-01-22 10:50:59.968904: step: 778/469, loss: 0.9735084772109985 2023-01-22 10:51:00.631287: step: 780/469, loss: 0.4931544363498688 2023-01-22 10:51:01.284593: step: 782/469, loss: 3.062002658843994 2023-01-22 10:51:01.977275: step: 784/469, loss: 1.9800618886947632 2023-01-22 10:51:02.662637: step: 786/469, loss: 0.2877175509929657 2023-01-22 10:51:03.238638: step: 788/469, loss: 0.6904782056808472 2023-01-22 10:51:03.956376: step: 790/469, loss: 1.2676526308059692 2023-01-22 10:51:04.587923: step: 792/469, loss: 0.347474604845047 2023-01-22 10:51:05.245984: step: 794/469, loss: 0.6748522520065308 2023-01-22 10:51:05.834497: step: 796/469, loss: 0.5393024682998657 2023-01-22 10:51:06.466594: step: 798/469, loss: 0.2017233967781067 2023-01-22 10:51:07.094249: step: 800/469, loss: 0.7778514623641968 2023-01-22 10:51:07.732790: step: 802/469, loss: 0.5172813534736633 2023-01-22 10:51:08.393648: step: 804/469, loss: 0.7103946208953857 2023-01-22 10:51:09.000849: step: 806/469, loss: 0.3048156499862671 2023-01-22 10:51:09.668320: step: 808/469, loss: 0.6835830211639404 2023-01-22 10:51:10.419260: step: 810/469, loss: 0.8040359020233154 2023-01-22 10:51:11.111316: step: 812/469, loss: 0.7363240718841553 2023-01-22 10:51:11.716247: step: 814/469, loss: 0.1961253434419632 2023-01-22 10:51:12.329464: step: 816/469, loss: 0.3886089324951172 2023-01-22 10:51:13.048409: step: 818/469, loss: 1.4280881881713867 2023-01-22 10:51:13.673507: step: 820/469, loss: 0.51703941822052 2023-01-22 10:51:14.360713: step: 822/469, loss: 7.723766803741455 2023-01-22 10:51:15.014442: step: 824/469, loss: 0.4861343801021576 2023-01-22 10:51:15.642235: step: 826/469, loss: 0.2845913767814636 2023-01-22 10:51:16.301482: step: 828/469, loss: 1.5565015077590942 2023-01-22 10:51:16.973635: step: 830/469, loss: 0.479922890663147 2023-01-22 10:51:17.605036: step: 832/469, loss: 0.3722665309906006 2023-01-22 10:51:18.226645: step: 834/469, loss: 0.5464308857917786 2023-01-22 10:51:18.918443: step: 836/469, loss: 1.9788055419921875 2023-01-22 10:51:19.534132: step: 838/469, loss: 1.0019735097885132 2023-01-22 10:51:20.191454: step: 840/469, loss: 1.1319061517715454 2023-01-22 10:51:20.832721: step: 842/469, loss: 0.2502128481864929 2023-01-22 10:51:21.449932: step: 844/469, loss: 1.523635745048523 2023-01-22 10:51:22.144849: step: 846/469, loss: 0.8167132139205933 2023-01-22 10:51:22.792071: step: 848/469, loss: 0.2752529978752136 2023-01-22 10:51:23.484287: step: 850/469, loss: 0.3101945221424103 2023-01-22 10:51:24.172662: step: 852/469, loss: 1.3561729192733765 2023-01-22 10:51:24.835238: step: 854/469, loss: 0.6020959615707397 2023-01-22 10:51:25.481626: step: 856/469, loss: 1.5760339498519897 2023-01-22 10:51:26.172723: step: 858/469, loss: 9.753297805786133 2023-01-22 10:51:26.773326: step: 860/469, loss: 1.0035114288330078 2023-01-22 10:51:27.406239: step: 862/469, loss: 0.16224248707294464 2023-01-22 10:51:28.083771: step: 864/469, loss: 0.3010299503803253 2023-01-22 10:51:28.727944: step: 866/469, loss: 1.1205472946166992 2023-01-22 10:51:29.390277: step: 868/469, loss: 0.31108251214027405 2023-01-22 10:51:30.024827: step: 870/469, loss: 0.3121650218963623 2023-01-22 10:51:30.709680: step: 872/469, loss: 0.4858582019805908 2023-01-22 10:51:31.320480: step: 874/469, loss: 1.6347750425338745 2023-01-22 10:51:31.959138: step: 876/469, loss: 0.514006495475769 2023-01-22 10:51:32.588104: step: 878/469, loss: 0.19382283091545105 2023-01-22 10:51:33.229711: step: 880/469, loss: 0.16118831932544708 2023-01-22 10:51:33.910029: step: 882/469, loss: 0.7053067684173584 2023-01-22 10:51:34.571132: step: 884/469, loss: 1.0186896324157715 2023-01-22 10:51:35.338411: step: 886/469, loss: 0.8057031631469727 2023-01-22 10:51:36.024754: step: 888/469, loss: 0.9202927350997925 2023-01-22 10:51:36.683594: step: 890/469, loss: 0.4016353189945221 2023-01-22 10:51:37.386339: step: 892/469, loss: 0.5799937844276428 2023-01-22 10:51:38.019202: step: 894/469, loss: 0.4366009831428528 2023-01-22 10:51:38.752850: step: 896/469, loss: 0.2307821810245514 2023-01-22 10:51:39.394282: step: 898/469, loss: 1.2212483882904053 2023-01-22 10:51:40.042441: step: 900/469, loss: 0.5073572397232056 2023-01-22 10:51:40.708464: step: 902/469, loss: 0.9757054448127747 2023-01-22 10:51:41.316061: step: 904/469, loss: 0.21069343388080597 2023-01-22 10:51:41.933302: step: 906/469, loss: 2.8934285640716553 2023-01-22 10:51:42.618571: step: 908/469, loss: 0.9867075085639954 2023-01-22 10:51:43.442693: step: 910/469, loss: 1.1562473773956299 2023-01-22 10:51:44.108663: step: 912/469, loss: 2.1939942836761475 2023-01-22 10:51:44.749326: step: 914/469, loss: 0.48719465732574463 2023-01-22 10:51:45.342750: step: 916/469, loss: 1.0412923097610474 2023-01-22 10:51:46.029103: step: 918/469, loss: 1.0130817890167236 2023-01-22 10:51:46.668135: step: 920/469, loss: 5.039875030517578 2023-01-22 10:51:47.376660: step: 922/469, loss: 0.6794548034667969 2023-01-22 10:51:47.950715: step: 924/469, loss: 0.1540706604719162 2023-01-22 10:51:48.642754: step: 926/469, loss: 2.3417463302612305 2023-01-22 10:51:49.238449: step: 928/469, loss: 0.888870894908905 2023-01-22 10:51:49.918311: step: 930/469, loss: 0.41191956400871277 2023-01-22 10:51:50.511714: step: 932/469, loss: 0.402072012424469 2023-01-22 10:51:51.239300: step: 934/469, loss: 1.3436135053634644 2023-01-22 10:51:51.900900: step: 936/469, loss: 0.31931743025779724 2023-01-22 10:51:52.587671: step: 938/469, loss: 0.8803346157073975 ================================================== Loss: 1.023 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31895915354330706, 'r': 0.3074596774193548, 'f1': 0.31310386473429946}, 'combined': 0.23070811085685222, 'epoch': 5} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.29929483395392487, 'r': 0.23494507551003432, 'f1': 0.26324445672216046}, 'combined': 0.1435878854848148, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3123600447856686, 'r': 0.30880376344086025, 'f1': 0.31057172391857507}, 'combined': 0.22884232288737108, 'epoch': 5} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.29928638434735994, 'r': 0.23575990569357447, 'f1': 0.26375186993150146}, 'combined': 0.14386465632627352, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2980769230769231, 'r': 0.33695652173913043, 'f1': 0.31632653061224486}, 'combined': 0.15816326530612243, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 5} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31895915354330706, 'r': 0.3074596774193548, 'f1': 0.31310386473429946}, 'combined': 0.23070811085685222, 'epoch': 5} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.29929483395392487, 'r': 0.23494507551003432, 'f1': 0.26324445672216046}, 'combined': 0.1435878854848148, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2980769230769231, 'r': 0.33695652173913043, 'f1': 0.31632653061224486}, 'combined': 0.15816326530612243, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3123600447856686, 'r': 0.30880376344086025, 'f1': 0.31057172391857507}, 'combined': 0.22884232288737108, 'epoch': 5} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.29928638434735994, 'r': 0.23575990569357447, 'f1': 0.26375186993150146}, 'combined': 0.14386465632627352, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 5} ****************************** Epoch: 6 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:55:11.540442: step: 2/469, loss: 0.582108199596405 2023-01-22 10:55:12.185182: step: 4/469, loss: 0.6165879368782043 2023-01-22 10:55:12.904962: step: 6/469, loss: 1.1597636938095093 2023-01-22 10:55:13.532992: step: 8/469, loss: 1.2105425596237183 2023-01-22 10:55:14.165775: step: 10/469, loss: 0.9095755815505981 2023-01-22 10:55:14.801266: step: 12/469, loss: 0.36189937591552734 2023-01-22 10:55:15.513911: step: 14/469, loss: 0.36565449833869934 2023-01-22 10:55:16.149481: step: 16/469, loss: 0.11751201003789902 2023-01-22 10:55:16.849046: step: 18/469, loss: 0.4030844569206238 2023-01-22 10:55:17.450017: step: 20/469, loss: 1.0583242177963257 2023-01-22 10:55:18.173162: step: 22/469, loss: 0.5221636295318604 2023-01-22 10:55:18.776060: step: 24/469, loss: 0.2243850976228714 2023-01-22 10:55:19.480972: step: 26/469, loss: 0.16562789678573608 2023-01-22 10:55:20.177674: step: 28/469, loss: 0.6720852851867676 2023-01-22 10:55:20.798230: step: 30/469, loss: 1.2087796926498413 2023-01-22 10:55:21.454235: step: 32/469, loss: 0.3587951064109802 2023-01-22 10:55:22.106938: step: 34/469, loss: 0.1405404657125473 2023-01-22 10:55:22.734069: step: 36/469, loss: 0.5024339556694031 2023-01-22 10:55:23.413979: step: 38/469, loss: 0.18670833110809326 2023-01-22 10:55:24.087547: step: 40/469, loss: 1.147240161895752 2023-01-22 10:55:24.776162: step: 42/469, loss: 1.405549168586731 2023-01-22 10:55:25.427398: step: 44/469, loss: 0.9249565601348877 2023-01-22 10:55:26.147343: step: 46/469, loss: 0.253816157579422 2023-01-22 10:55:26.801082: step: 48/469, loss: 0.2027207314968109 2023-01-22 10:55:27.391394: step: 50/469, loss: 0.11158337444067001 2023-01-22 10:55:28.052804: step: 52/469, loss: 1.1416137218475342 2023-01-22 10:55:28.631488: step: 54/469, loss: 0.6252304315567017 2023-01-22 10:55:29.233455: step: 56/469, loss: 0.17594438791275024 2023-01-22 10:55:29.902993: step: 58/469, loss: 1.0408457517623901 2023-01-22 10:55:30.635405: step: 60/469, loss: 0.2611750662326813 2023-01-22 10:55:31.275993: step: 62/469, loss: 0.5898363590240479 2023-01-22 10:55:31.930912: step: 64/469, loss: 1.304792046546936 2023-01-22 10:55:32.638153: step: 66/469, loss: 0.7455921173095703 2023-01-22 10:55:33.336667: step: 68/469, loss: 0.3708344101905823 2023-01-22 10:55:33.965262: step: 70/469, loss: 0.580329954624176 2023-01-22 10:55:34.578038: step: 72/469, loss: 0.8678571581840515 2023-01-22 10:55:35.275009: step: 74/469, loss: 0.8212671279907227 2023-01-22 10:55:35.912502: step: 76/469, loss: 0.4896354377269745 2023-01-22 10:55:36.594849: step: 78/469, loss: 0.7276855707168579 2023-01-22 10:55:37.185056: step: 80/469, loss: 1.178435206413269 2023-01-22 10:55:37.845649: step: 82/469, loss: 0.3564695119857788 2023-01-22 10:55:38.511607: step: 84/469, loss: 0.3392344117164612 2023-01-22 10:55:39.171569: step: 86/469, loss: 0.36357319355010986 2023-01-22 10:55:39.813337: step: 88/469, loss: 0.4256187379360199 2023-01-22 10:55:40.491996: step: 90/469, loss: 0.6775780916213989 2023-01-22 10:55:41.190504: step: 92/469, loss: 0.4577982425689697 2023-01-22 10:55:41.894111: step: 94/469, loss: 0.4166240990161896 2023-01-22 10:55:42.550108: step: 96/469, loss: 0.14975017309188843 2023-01-22 10:55:43.252559: step: 98/469, loss: 0.5553939938545227 2023-01-22 10:55:43.927082: step: 100/469, loss: 1.331783652305603 2023-01-22 10:55:44.549247: step: 102/469, loss: 0.7636451721191406 2023-01-22 10:55:45.148922: step: 104/469, loss: 0.5785241723060608 2023-01-22 10:55:45.798526: step: 106/469, loss: 1.0434573888778687 2023-01-22 10:55:46.419323: step: 108/469, loss: 0.21206778287887573 2023-01-22 10:55:47.113872: step: 110/469, loss: 0.6471192836761475 2023-01-22 10:55:47.756625: step: 112/469, loss: 0.3738044798374176 2023-01-22 10:55:48.375703: step: 114/469, loss: 0.5797178745269775 2023-01-22 10:55:49.122821: step: 116/469, loss: 0.9984303712844849 2023-01-22 10:55:49.790257: step: 118/469, loss: 0.5647268295288086 2023-01-22 10:55:50.507840: step: 120/469, loss: 0.67030268907547 2023-01-22 10:55:51.128354: step: 122/469, loss: 0.4455380141735077 2023-01-22 10:55:51.826169: step: 124/469, loss: 0.7645474672317505 2023-01-22 10:55:52.399184: step: 126/469, loss: 0.41484707593917847 2023-01-22 10:55:53.054082: step: 128/469, loss: 0.1937958002090454 2023-01-22 10:55:53.703465: step: 130/469, loss: 0.4437099099159241 2023-01-22 10:55:54.333671: step: 132/469, loss: 0.49780598282814026 2023-01-22 10:55:55.081003: step: 134/469, loss: 0.5588032007217407 2023-01-22 10:55:55.725992: step: 136/469, loss: 0.32195794582366943 2023-01-22 10:55:56.377444: step: 138/469, loss: 0.568348228931427 2023-01-22 10:55:57.029589: step: 140/469, loss: 0.41528013348579407 2023-01-22 10:55:57.720869: step: 142/469, loss: 0.2227383553981781 2023-01-22 10:55:58.348508: step: 144/469, loss: 0.828467607498169 2023-01-22 10:55:58.953030: step: 146/469, loss: 0.3052496314048767 2023-01-22 10:55:59.500203: step: 148/469, loss: 0.32668405771255493 2023-01-22 10:56:00.178320: step: 150/469, loss: 0.8024691939353943 2023-01-22 10:56:00.787205: step: 152/469, loss: 0.7282994985580444 2023-01-22 10:56:01.478741: step: 154/469, loss: 1.111225962638855 2023-01-22 10:56:02.108677: step: 156/469, loss: 0.2026520073413849 2023-01-22 10:56:02.766752: step: 158/469, loss: 0.09721113741397858 2023-01-22 10:56:03.463406: step: 160/469, loss: 0.5165294408798218 2023-01-22 10:56:04.067145: step: 162/469, loss: 0.32028067111968994 2023-01-22 10:56:04.679278: step: 164/469, loss: 0.34433838725090027 2023-01-22 10:56:05.296897: step: 166/469, loss: 0.5954673886299133 2023-01-22 10:56:05.976990: step: 168/469, loss: 0.32776808738708496 2023-01-22 10:56:06.696951: step: 170/469, loss: 0.7825974225997925 2023-01-22 10:56:07.398994: step: 172/469, loss: 0.1546805500984192 2023-01-22 10:56:08.054673: step: 174/469, loss: 1.407179832458496 2023-01-22 10:56:08.679554: step: 176/469, loss: 1.138911485671997 2023-01-22 10:56:09.401682: step: 178/469, loss: 1.2073984146118164 2023-01-22 10:56:10.051410: step: 180/469, loss: 0.39008909463882446 2023-01-22 10:56:10.731937: step: 182/469, loss: 1.1200722455978394 2023-01-22 10:56:11.407097: step: 184/469, loss: 0.18921564519405365 2023-01-22 10:56:12.096214: step: 186/469, loss: 0.32686442136764526 2023-01-22 10:56:12.767377: step: 188/469, loss: 0.4216076135635376 2023-01-22 10:56:13.435245: step: 190/469, loss: 0.6890570521354675 2023-01-22 10:56:14.140326: step: 192/469, loss: 0.7068150639533997 2023-01-22 10:56:14.739210: step: 194/469, loss: 0.2108181267976761 2023-01-22 10:56:15.546506: step: 196/469, loss: 0.23207297921180725 2023-01-22 10:56:16.224534: step: 198/469, loss: 0.8247882127761841 2023-01-22 10:56:16.857606: step: 200/469, loss: 0.44725847244262695 2023-01-22 10:56:17.519456: step: 202/469, loss: 0.19397661089897156 2023-01-22 10:56:18.272831: step: 204/469, loss: 0.40939685702323914 2023-01-22 10:56:18.971611: step: 206/469, loss: 0.3779168128967285 2023-01-22 10:56:19.594244: step: 208/469, loss: 0.48507606983184814 2023-01-22 10:56:20.230042: step: 210/469, loss: 1.1623269319534302 2023-01-22 10:56:20.868175: step: 212/469, loss: 1.4632008075714111 2023-01-22 10:56:21.521506: step: 214/469, loss: 0.5960787534713745 2023-01-22 10:56:22.155138: step: 216/469, loss: 0.6466684341430664 2023-01-22 10:56:22.800259: step: 218/469, loss: 0.3629465103149414 2023-01-22 10:56:23.439945: step: 220/469, loss: 0.5430543422698975 2023-01-22 10:56:24.099366: step: 222/469, loss: 0.42043596506118774 2023-01-22 10:56:24.741560: step: 224/469, loss: 0.8249005675315857 2023-01-22 10:56:25.387299: step: 226/469, loss: 1.074183702468872 2023-01-22 10:56:26.034840: step: 228/469, loss: 1.0409280061721802 2023-01-22 10:56:26.661104: step: 230/469, loss: 0.6099473237991333 2023-01-22 10:56:27.402385: step: 232/469, loss: 0.995842695236206 2023-01-22 10:56:28.024688: step: 234/469, loss: 1.058698296546936 2023-01-22 10:56:28.696306: step: 236/469, loss: 0.6503398418426514 2023-01-22 10:56:29.365617: step: 238/469, loss: 0.8363496661186218 2023-01-22 10:56:30.089250: step: 240/469, loss: 0.6152865290641785 2023-01-22 10:56:30.726178: step: 242/469, loss: 0.6883412003517151 2023-01-22 10:56:31.322982: step: 244/469, loss: 0.2523246705532074 2023-01-22 10:56:32.015528: step: 246/469, loss: 0.7020666599273682 2023-01-22 10:56:32.709036: step: 248/469, loss: 0.5206868052482605 2023-01-22 10:56:33.311376: step: 250/469, loss: 1.1940706968307495 2023-01-22 10:56:33.916662: step: 252/469, loss: 1.035109519958496 2023-01-22 10:56:34.592099: step: 254/469, loss: 0.22835978865623474 2023-01-22 10:56:35.202251: step: 256/469, loss: 0.3665621876716614 2023-01-22 10:56:35.881846: step: 258/469, loss: 1.1108589172363281 2023-01-22 10:56:36.501846: step: 260/469, loss: 0.31099414825439453 2023-01-22 10:56:37.236196: step: 262/469, loss: 1.4731876850128174 2023-01-22 10:56:37.882236: step: 264/469, loss: 0.10067793726921082 2023-01-22 10:56:38.620758: step: 266/469, loss: 1.7326617240905762 2023-01-22 10:56:39.301412: step: 268/469, loss: 0.31641972064971924 2023-01-22 10:56:39.965735: step: 270/469, loss: 1.152388572692871 2023-01-22 10:56:40.690433: step: 272/469, loss: 0.968422532081604 2023-01-22 10:56:41.306570: step: 274/469, loss: 0.5858252048492432 2023-01-22 10:56:41.931301: step: 276/469, loss: 0.586940586566925 2023-01-22 10:56:42.604450: step: 278/469, loss: 0.5209425687789917 2023-01-22 10:56:43.342688: step: 280/469, loss: 0.40567511320114136 2023-01-22 10:56:44.056062: step: 282/469, loss: 0.6916226744651794 2023-01-22 10:56:44.801986: step: 284/469, loss: 1.2991178035736084 2023-01-22 10:56:45.599948: step: 286/469, loss: 0.5992436408996582 2023-01-22 10:56:46.263883: step: 288/469, loss: 0.3350967764854431 2023-01-22 10:56:46.924966: step: 290/469, loss: 0.3736046254634857 2023-01-22 10:56:47.579215: step: 292/469, loss: 1.1027233600616455 2023-01-22 10:56:48.268045: step: 294/469, loss: 0.7615113854408264 2023-01-22 10:56:48.950617: step: 296/469, loss: 1.3420709371566772 2023-01-22 10:56:49.630153: step: 298/469, loss: 0.3673308491706848 2023-01-22 10:56:50.288530: step: 300/469, loss: 1.106971263885498 2023-01-22 10:56:50.880524: step: 302/469, loss: 0.07255806028842926 2023-01-22 10:56:51.562457: step: 304/469, loss: 0.3658123016357422 2023-01-22 10:56:52.226337: step: 306/469, loss: 0.7242321968078613 2023-01-22 10:56:52.896955: step: 308/469, loss: 1.4762146472930908 2023-01-22 10:56:53.564958: step: 310/469, loss: 0.7645450234413147 2023-01-22 10:56:54.198771: step: 312/469, loss: 0.4076809883117676 2023-01-22 10:56:54.846774: step: 314/469, loss: 0.3468690514564514 2023-01-22 10:56:55.469859: step: 316/469, loss: 0.6406058073043823 2023-01-22 10:56:56.095218: step: 318/469, loss: 0.12084714323282242 2023-01-22 10:56:56.753557: step: 320/469, loss: 1.0068501234054565 2023-01-22 10:56:57.431830: step: 322/469, loss: 1.2782275676727295 2023-01-22 10:56:58.043946: step: 324/469, loss: 1.1477534770965576 2023-01-22 10:56:58.687409: step: 326/469, loss: 0.27162471413612366 2023-01-22 10:56:59.417825: step: 328/469, loss: 1.0024360418319702 2023-01-22 10:57:00.044750: step: 330/469, loss: 0.9895776510238647 2023-01-22 10:57:00.727693: step: 332/469, loss: 0.6782008409500122 2023-01-22 10:57:01.352008: step: 334/469, loss: 0.26413270831108093 2023-01-22 10:57:02.028190: step: 336/469, loss: 0.37584081292152405 2023-01-22 10:57:02.721250: step: 338/469, loss: 0.48382851481437683 2023-01-22 10:57:03.397763: step: 340/469, loss: 0.940863847732544 2023-01-22 10:57:04.025693: step: 342/469, loss: 0.08170486241579056 2023-01-22 10:57:04.637943: step: 344/469, loss: 0.3922652304172516 2023-01-22 10:57:05.300112: step: 346/469, loss: 0.3113369643688202 2023-01-22 10:57:05.998388: step: 348/469, loss: 0.5442818999290466 2023-01-22 10:57:06.652026: step: 350/469, loss: 0.45570242404937744 2023-01-22 10:57:07.360511: step: 352/469, loss: 0.16213591396808624 2023-01-22 10:57:07.990987: step: 354/469, loss: 0.3619459867477417 2023-01-22 10:57:08.680514: step: 356/469, loss: 0.23970070481300354 2023-01-22 10:57:09.332391: step: 358/469, loss: 0.7487192749977112 2023-01-22 10:57:09.930884: step: 360/469, loss: 0.5412108302116394 2023-01-22 10:57:10.548179: step: 362/469, loss: 1.0828173160552979 2023-01-22 10:57:11.175527: step: 364/469, loss: 0.610897958278656 2023-01-22 10:57:11.896856: step: 366/469, loss: 0.2275417000055313 2023-01-22 10:57:12.545174: step: 368/469, loss: 0.4648996591567993 2023-01-22 10:57:13.250270: step: 370/469, loss: 0.6641898155212402 2023-01-22 10:57:13.887003: step: 372/469, loss: 0.25401371717453003 2023-01-22 10:57:14.612845: step: 374/469, loss: 0.42171090841293335 2023-01-22 10:57:15.248555: step: 376/469, loss: 0.14892850816249847 2023-01-22 10:57:15.946939: step: 378/469, loss: 0.8934428691864014 2023-01-22 10:57:16.561489: step: 380/469, loss: 0.22290942072868347 2023-01-22 10:57:17.144205: step: 382/469, loss: 0.6647469401359558 2023-01-22 10:57:17.823687: step: 384/469, loss: 0.4884270429611206 2023-01-22 10:57:18.440539: step: 386/469, loss: 0.36011263728141785 2023-01-22 10:57:19.087498: step: 388/469, loss: 0.5522686243057251 2023-01-22 10:57:19.668889: step: 390/469, loss: 0.4173738360404968 2023-01-22 10:57:20.303827: step: 392/469, loss: 0.4940659701824188 2023-01-22 10:57:20.940838: step: 394/469, loss: 1.0775045156478882 2023-01-22 10:57:21.611365: step: 396/469, loss: 0.7616162300109863 2023-01-22 10:57:22.203807: step: 398/469, loss: 0.1997879147529602 2023-01-22 10:57:22.834277: step: 400/469, loss: 1.7772319316864014 2023-01-22 10:57:23.490991: step: 402/469, loss: 0.2205466330051422 2023-01-22 10:57:24.089203: step: 404/469, loss: 0.41694650053977966 2023-01-22 10:57:24.702662: step: 406/469, loss: 0.2993175983428955 2023-01-22 10:57:25.333096: step: 408/469, loss: 0.8849889039993286 2023-01-22 10:57:26.185562: step: 410/469, loss: 0.48914721608161926 2023-01-22 10:57:26.860302: step: 412/469, loss: 0.7150527238845825 2023-01-22 10:57:27.528270: step: 414/469, loss: 0.4554973840713501 2023-01-22 10:57:28.112829: step: 416/469, loss: 0.29352641105651855 2023-01-22 10:57:28.770994: step: 418/469, loss: 1.387034296989441 2023-01-22 10:57:29.369998: step: 420/469, loss: 0.1845788061618805 2023-01-22 10:57:29.952748: step: 422/469, loss: 0.12556815147399902 2023-01-22 10:57:30.591112: step: 424/469, loss: 1.3801896572113037 2023-01-22 10:57:31.192129: step: 426/469, loss: 0.8334442377090454 2023-01-22 10:57:31.822524: step: 428/469, loss: 0.692019522190094 2023-01-22 10:57:32.530404: step: 430/469, loss: 0.30013611912727356 2023-01-22 10:57:33.227976: step: 432/469, loss: 0.4331887364387512 2023-01-22 10:57:33.849868: step: 434/469, loss: 0.6363438367843628 2023-01-22 10:57:34.512073: step: 436/469, loss: 0.5658471584320068 2023-01-22 10:57:35.192695: step: 438/469, loss: 0.1740536391735077 2023-01-22 10:57:35.782802: step: 440/469, loss: 4.930780410766602 2023-01-22 10:57:36.466471: step: 442/469, loss: 1.058233618736267 2023-01-22 10:57:37.188269: step: 444/469, loss: 0.6602569818496704 2023-01-22 10:57:37.856352: step: 446/469, loss: 0.3973201811313629 2023-01-22 10:57:38.452292: step: 448/469, loss: 0.6186159253120422 2023-01-22 10:57:39.066548: step: 450/469, loss: 0.491231769323349 2023-01-22 10:57:39.749412: step: 452/469, loss: 1.0658018589019775 2023-01-22 10:57:40.413859: step: 454/469, loss: 0.8787103891372681 2023-01-22 10:57:41.114308: step: 456/469, loss: 0.5756931900978088 2023-01-22 10:57:41.781010: step: 458/469, loss: 0.5747436881065369 2023-01-22 10:57:42.506236: step: 460/469, loss: 0.6343942880630493 2023-01-22 10:57:43.173341: step: 462/469, loss: 0.6502560973167419 2023-01-22 10:57:43.923214: step: 464/469, loss: 0.6004595756530762 2023-01-22 10:57:44.690214: step: 466/469, loss: 0.34474602341651917 2023-01-22 10:57:45.329615: step: 468/469, loss: 0.321304589509964 2023-01-22 10:57:46.049816: step: 470/469, loss: 0.2675541639328003 2023-01-22 10:57:46.706198: step: 472/469, loss: 0.9362136125564575 2023-01-22 10:57:47.394424: step: 474/469, loss: 0.3079529404640198 2023-01-22 10:57:48.026040: step: 476/469, loss: 3.6796469688415527 2023-01-22 10:57:48.718282: step: 478/469, loss: 1.3912123441696167 2023-01-22 10:57:49.320836: step: 480/469, loss: 2.4084930419921875 2023-01-22 10:57:49.954957: step: 482/469, loss: 0.26777970790863037 2023-01-22 10:57:50.592684: step: 484/469, loss: 0.4960557222366333 2023-01-22 10:57:51.281035: step: 486/469, loss: 0.7903075814247131 2023-01-22 10:57:51.975040: step: 488/469, loss: 0.8850454688072205 2023-01-22 10:57:52.667659: step: 490/469, loss: 0.7258738875389099 2023-01-22 10:57:53.272030: step: 492/469, loss: 0.17257800698280334 2023-01-22 10:57:53.898730: step: 494/469, loss: 0.6114130616188049 2023-01-22 10:57:54.561223: step: 496/469, loss: 0.5821794271469116 2023-01-22 10:57:55.140358: step: 498/469, loss: 0.5853869318962097 2023-01-22 10:57:55.785865: step: 500/469, loss: 0.8860760927200317 2023-01-22 10:57:56.444871: step: 502/469, loss: 1.057038426399231 2023-01-22 10:57:57.087693: step: 504/469, loss: 0.5471564531326294 2023-01-22 10:57:57.741211: step: 506/469, loss: 0.6805905103683472 2023-01-22 10:57:58.373718: step: 508/469, loss: 0.8268406987190247 2023-01-22 10:57:59.016235: step: 510/469, loss: 0.4200073480606079 2023-01-22 10:57:59.668485: step: 512/469, loss: 0.15350694954395294 2023-01-22 10:58:00.322941: step: 514/469, loss: 0.3868025839328766 2023-01-22 10:58:01.008343: step: 516/469, loss: 0.5283547043800354 2023-01-22 10:58:01.717120: step: 518/469, loss: 0.1625261753797531 2023-01-22 10:58:02.381037: step: 520/469, loss: 0.36215347051620483 2023-01-22 10:58:02.957683: step: 522/469, loss: 0.0768042579293251 2023-01-22 10:58:03.590504: step: 524/469, loss: 0.15860725939273834 2023-01-22 10:58:04.211093: step: 526/469, loss: 0.6164580583572388 2023-01-22 10:58:04.820686: step: 528/469, loss: 0.613944947719574 2023-01-22 10:58:05.417285: step: 530/469, loss: 0.27885621786117554 2023-01-22 10:58:06.172669: step: 532/469, loss: 1.2978847026824951 2023-01-22 10:58:06.871628: step: 534/469, loss: 0.49276411533355713 2023-01-22 10:58:07.488960: step: 536/469, loss: 0.21811699867248535 2023-01-22 10:58:08.175385: step: 538/469, loss: 0.6028008460998535 2023-01-22 10:58:08.819644: step: 540/469, loss: 1.0664536952972412 2023-01-22 10:58:09.456612: step: 542/469, loss: 1.1393210887908936 2023-01-22 10:58:10.149578: step: 544/469, loss: 0.9323995113372803 2023-01-22 10:58:10.808504: step: 546/469, loss: 0.47356510162353516 2023-01-22 10:58:11.525932: step: 548/469, loss: 1.209115982055664 2023-01-22 10:58:12.135813: step: 550/469, loss: 2.5095744132995605 2023-01-22 10:58:12.791349: step: 552/469, loss: 1.9994875192642212 2023-01-22 10:58:13.547315: step: 554/469, loss: 0.7227581143379211 2023-01-22 10:58:14.184851: step: 556/469, loss: 0.23846879601478577 2023-01-22 10:58:14.825816: step: 558/469, loss: 1.4068973064422607 2023-01-22 10:58:15.480858: step: 560/469, loss: 0.7880046963691711 2023-01-22 10:58:16.150769: step: 562/469, loss: 1.1210275888442993 2023-01-22 10:58:16.818822: step: 564/469, loss: 0.5986941456794739 2023-01-22 10:58:17.474394: step: 566/469, loss: 0.5583336353302002 2023-01-22 10:58:18.069408: step: 568/469, loss: 1.992104172706604 2023-01-22 10:58:18.705747: step: 570/469, loss: 0.7541818022727966 2023-01-22 10:58:19.379797: step: 572/469, loss: 1.3579281568527222 2023-01-22 10:58:20.064245: step: 574/469, loss: 2.0197794437408447 2023-01-22 10:58:20.739110: step: 576/469, loss: 0.7188225388526917 2023-01-22 10:58:21.392997: step: 578/469, loss: 1.6764938831329346 2023-01-22 10:58:22.026875: step: 580/469, loss: 0.7155629396438599 2023-01-22 10:58:22.709324: step: 582/469, loss: 0.28579264879226685 2023-01-22 10:58:23.373895: step: 584/469, loss: 0.20873846113681793 2023-01-22 10:58:24.005057: step: 586/469, loss: 1.1142839193344116 2023-01-22 10:58:24.671621: step: 588/469, loss: 0.480831116437912 2023-01-22 10:58:25.316725: step: 590/469, loss: 0.14513547718524933 2023-01-22 10:58:25.934524: step: 592/469, loss: 1.2173819541931152 2023-01-22 10:58:26.568438: step: 594/469, loss: 0.8949723243713379 2023-01-22 10:58:27.213218: step: 596/469, loss: 0.5596634745597839 2023-01-22 10:58:27.841347: step: 598/469, loss: 0.3700833022594452 2023-01-22 10:58:28.590309: step: 600/469, loss: 0.7241194248199463 2023-01-22 10:58:29.251583: step: 602/469, loss: 0.7003096342086792 2023-01-22 10:58:29.918278: step: 604/469, loss: 1.0163302421569824 2023-01-22 10:58:30.586208: step: 606/469, loss: 0.5491389036178589 2023-01-22 10:58:31.229418: step: 608/469, loss: 0.2459547221660614 2023-01-22 10:58:31.863079: step: 610/469, loss: 0.25904160737991333 2023-01-22 10:58:32.497825: step: 612/469, loss: 0.4976344704627991 2023-01-22 10:58:33.135303: step: 614/469, loss: 0.6955742835998535 2023-01-22 10:58:33.786412: step: 616/469, loss: 0.23495228588581085 2023-01-22 10:58:34.493435: step: 618/469, loss: 0.5154277086257935 2023-01-22 10:58:35.098537: step: 620/469, loss: 1.1025559902191162 2023-01-22 10:58:35.789500: step: 622/469, loss: 0.2690567970275879 2023-01-22 10:58:36.537689: step: 624/469, loss: 0.4573833644390106 2023-01-22 10:58:37.205614: step: 626/469, loss: 0.4949929416179657 2023-01-22 10:58:37.821725: step: 628/469, loss: 0.3546310365200043 2023-01-22 10:58:38.487419: step: 630/469, loss: 0.6758636832237244 2023-01-22 10:58:39.115454: step: 632/469, loss: 0.15792903304100037 2023-01-22 10:58:39.785955: step: 634/469, loss: 0.29518723487854004 2023-01-22 10:58:40.415503: step: 636/469, loss: 0.2186373770236969 2023-01-22 10:58:41.093192: step: 638/469, loss: 2.310868740081787 2023-01-22 10:58:41.687854: step: 640/469, loss: 0.8885836005210876 2023-01-22 10:58:42.320091: step: 642/469, loss: 0.9277083873748779 2023-01-22 10:58:42.949422: step: 644/469, loss: 0.5045440793037415 2023-01-22 10:58:43.705642: step: 646/469, loss: 1.30831778049469 2023-01-22 10:58:44.316668: step: 648/469, loss: 0.6179578304290771 2023-01-22 10:58:45.002838: step: 650/469, loss: 0.70857834815979 2023-01-22 10:58:45.612512: step: 652/469, loss: 1.2064461708068848 2023-01-22 10:58:46.231917: step: 654/469, loss: 1.025246024131775 2023-01-22 10:58:46.817980: step: 656/469, loss: 0.4187513291835785 2023-01-22 10:58:47.517537: step: 658/469, loss: 0.2228047400712967 2023-01-22 10:58:48.204469: step: 660/469, loss: 0.6203461289405823 2023-01-22 10:58:48.862282: step: 662/469, loss: 0.19843462109565735 2023-01-22 10:58:49.507473: step: 664/469, loss: 0.7199745178222656 2023-01-22 10:58:50.168626: step: 666/469, loss: 1.5590795278549194 2023-01-22 10:58:50.866812: step: 668/469, loss: 1.0396335124969482 2023-01-22 10:58:51.543627: step: 670/469, loss: 0.20521724224090576 2023-01-22 10:58:52.197121: step: 672/469, loss: 0.23070786893367767 2023-01-22 10:58:52.850072: step: 674/469, loss: 0.8485086560249329 2023-01-22 10:58:53.527760: step: 676/469, loss: 0.34147462248802185 2023-01-22 10:58:54.214074: step: 678/469, loss: 0.39903852343559265 2023-01-22 10:58:54.826426: step: 680/469, loss: 1.0546317100524902 2023-01-22 10:58:55.467077: step: 682/469, loss: 0.28590115904808044 2023-01-22 10:58:56.083418: step: 684/469, loss: 0.31622838973999023 2023-01-22 10:58:56.743390: step: 686/469, loss: 0.9028953313827515 2023-01-22 10:58:57.414290: step: 688/469, loss: 0.6884586215019226 2023-01-22 10:58:58.059145: step: 690/469, loss: 0.5982221364974976 2023-01-22 10:58:58.802952: step: 692/469, loss: 0.451449990272522 2023-01-22 10:58:59.434446: step: 694/469, loss: 1.27504301071167 2023-01-22 10:59:00.047423: step: 696/469, loss: 0.8286892175674438 2023-01-22 10:59:00.689218: step: 698/469, loss: 0.23540833592414856 2023-01-22 10:59:01.383284: step: 700/469, loss: 1.447479486465454 2023-01-22 10:59:02.063802: step: 702/469, loss: 0.14541903138160706 2023-01-22 10:59:02.690102: step: 704/469, loss: 0.14279323816299438 2023-01-22 10:59:03.358062: step: 706/469, loss: 3.02829647064209 2023-01-22 10:59:04.046814: step: 708/469, loss: 0.4244763255119324 2023-01-22 10:59:04.823575: step: 710/469, loss: 0.16301007568836212 2023-01-22 10:59:05.386315: step: 712/469, loss: 0.4454129934310913 2023-01-22 10:59:06.120262: step: 714/469, loss: 0.23517531156539917 2023-01-22 10:59:06.775604: step: 716/469, loss: 0.27578550577163696 2023-01-22 10:59:07.480001: step: 718/469, loss: 0.26431217789649963 2023-01-22 10:59:08.085527: step: 720/469, loss: 0.34922391176223755 2023-01-22 10:59:08.755401: step: 722/469, loss: 0.8056902885437012 2023-01-22 10:59:09.420384: step: 724/469, loss: 1.5670617818832397 2023-01-22 10:59:10.105224: step: 726/469, loss: 0.3796059489250183 2023-01-22 10:59:10.851030: step: 728/469, loss: 0.5573244094848633 2023-01-22 10:59:11.542655: step: 730/469, loss: 1.4573637247085571 2023-01-22 10:59:12.226588: step: 732/469, loss: 1.4007136821746826 2023-01-22 10:59:12.885940: step: 734/469, loss: 0.3005386292934418 2023-01-22 10:59:13.528311: step: 736/469, loss: 0.37273505330085754 2023-01-22 10:59:14.211361: step: 738/469, loss: 0.8748267292976379 2023-01-22 10:59:14.902948: step: 740/469, loss: 1.02974271774292 2023-01-22 10:59:15.539263: step: 742/469, loss: 0.961559534072876 2023-01-22 10:59:16.177106: step: 744/469, loss: 2.440967082977295 2023-01-22 10:59:16.794033: step: 746/469, loss: 0.8475446701049805 2023-01-22 10:59:17.426428: step: 748/469, loss: 0.6550959348678589 2023-01-22 10:59:18.059558: step: 750/469, loss: 0.3683474063873291 2023-01-22 10:59:18.757152: step: 752/469, loss: 1.9358210563659668 2023-01-22 10:59:19.424531: step: 754/469, loss: 0.4114786684513092 2023-01-22 10:59:20.136293: step: 756/469, loss: 0.6471695303916931 2023-01-22 10:59:20.760098: step: 758/469, loss: 0.3643433153629303 2023-01-22 10:59:21.523479: step: 760/469, loss: 0.6222134828567505 2023-01-22 10:59:22.154722: step: 762/469, loss: 0.24578146636486053 2023-01-22 10:59:22.784966: step: 764/469, loss: 0.22691859304904938 2023-01-22 10:59:23.477021: step: 766/469, loss: 1.0229220390319824 2023-01-22 10:59:24.159255: step: 768/469, loss: 0.1557282954454422 2023-01-22 10:59:24.801435: step: 770/469, loss: 0.9089685678482056 2023-01-22 10:59:25.473198: step: 772/469, loss: 1.0979036092758179 2023-01-22 10:59:26.177983: step: 774/469, loss: 1.168755054473877 2023-01-22 10:59:26.830208: step: 776/469, loss: 0.6211987733840942 2023-01-22 10:59:27.509556: step: 778/469, loss: 0.7354152798652649 2023-01-22 10:59:28.175648: step: 780/469, loss: 0.9869888424873352 2023-01-22 10:59:28.896179: step: 782/469, loss: 1.8314378261566162 2023-01-22 10:59:29.569336: step: 784/469, loss: 0.818301796913147 2023-01-22 10:59:30.296087: step: 786/469, loss: 1.2537380456924438 2023-01-22 10:59:30.937530: step: 788/469, loss: 0.8417704701423645 2023-01-22 10:59:31.582392: step: 790/469, loss: 1.2719812393188477 2023-01-22 10:59:32.202137: step: 792/469, loss: 1.78606116771698 2023-01-22 10:59:32.848540: step: 794/469, loss: 0.5059295296669006 2023-01-22 10:59:33.465320: step: 796/469, loss: 0.2696778178215027 2023-01-22 10:59:34.147418: step: 798/469, loss: 1.026119589805603 2023-01-22 10:59:34.755920: step: 800/469, loss: 1.3550070524215698 2023-01-22 10:59:35.456080: step: 802/469, loss: 0.9727777242660522 2023-01-22 10:59:36.085356: step: 804/469, loss: 0.8629355430603027 2023-01-22 10:59:36.695395: step: 806/469, loss: 1.5440435409545898 2023-01-22 10:59:37.295927: step: 808/469, loss: 0.3212146759033203 2023-01-22 10:59:37.942066: step: 810/469, loss: 0.3778342604637146 2023-01-22 10:59:38.663667: step: 812/469, loss: 0.3516838550567627 2023-01-22 10:59:39.346874: step: 814/469, loss: 1.2941991090774536 2023-01-22 10:59:40.062160: step: 816/469, loss: 1.964330792427063 2023-01-22 10:59:40.773964: step: 818/469, loss: 1.1915626525878906 2023-01-22 10:59:41.374284: step: 820/469, loss: 0.3223915994167328 2023-01-22 10:59:41.971624: step: 822/469, loss: 0.3422360420227051 2023-01-22 10:59:42.594235: step: 824/469, loss: 0.5502710342407227 2023-01-22 10:59:43.285827: step: 826/469, loss: 0.3715992271900177 2023-01-22 10:59:43.892439: step: 828/469, loss: 0.5587812662124634 2023-01-22 10:59:44.556453: step: 830/469, loss: 0.5461872816085815 2023-01-22 10:59:45.219741: step: 832/469, loss: 0.49875330924987793 2023-01-22 10:59:45.907096: step: 834/469, loss: 0.6409670114517212 2023-01-22 10:59:46.710870: step: 836/469, loss: 0.6133067607879639 2023-01-22 10:59:47.365738: step: 838/469, loss: 0.9243052005767822 2023-01-22 10:59:48.056443: step: 840/469, loss: 0.3548726439476013 2023-01-22 10:59:48.677303: step: 842/469, loss: 0.5439071655273438 2023-01-22 10:59:49.283528: step: 844/469, loss: 0.49044719338417053 2023-01-22 10:59:49.958427: step: 846/469, loss: 0.2944038510322571 2023-01-22 10:59:50.605154: step: 848/469, loss: 0.25485801696777344 2023-01-22 10:59:51.255716: step: 850/469, loss: 1.390325665473938 2023-01-22 10:59:51.874672: step: 852/469, loss: 0.6629309058189392 2023-01-22 10:59:52.531068: step: 854/469, loss: 0.9643487930297852 2023-01-22 10:59:53.225084: step: 856/469, loss: 0.5227541327476501 2023-01-22 10:59:53.878690: step: 858/469, loss: 0.917007565498352 2023-01-22 10:59:54.587283: step: 860/469, loss: 0.38237178325653076 2023-01-22 10:59:55.279885: step: 862/469, loss: 0.728380024433136 2023-01-22 10:59:56.070908: step: 864/469, loss: 0.5563600659370422 2023-01-22 10:59:56.706305: step: 866/469, loss: 0.7234206199645996 2023-01-22 10:59:57.341307: step: 868/469, loss: 1.027193546295166 2023-01-22 10:59:58.115736: step: 870/469, loss: 0.9818785786628723 2023-01-22 10:59:58.865469: step: 872/469, loss: 4.488598823547363 2023-01-22 10:59:59.465330: step: 874/469, loss: 0.3961482346057892 2023-01-22 11:00:00.138268: step: 876/469, loss: 0.3079904019832611 2023-01-22 11:00:00.814216: step: 878/469, loss: 0.29708707332611084 2023-01-22 11:00:01.461194: step: 880/469, loss: 0.23251432180404663 2023-01-22 11:00:02.132063: step: 882/469, loss: 0.17885802686214447 2023-01-22 11:00:02.757714: step: 884/469, loss: 1.7155933380126953 2023-01-22 11:00:03.450005: step: 886/469, loss: 2.7264580726623535 2023-01-22 11:00:04.131629: step: 888/469, loss: 0.21096928417682648 2023-01-22 11:00:04.797572: step: 890/469, loss: 1.1697462797164917 2023-01-22 11:00:05.450349: step: 892/469, loss: 0.892507791519165 2023-01-22 11:00:06.079250: step: 894/469, loss: 0.3625553846359253 2023-01-22 11:00:06.694411: step: 896/469, loss: 0.2119859904050827 2023-01-22 11:00:07.264848: step: 898/469, loss: 0.2572576701641083 2023-01-22 11:00:07.939018: step: 900/469, loss: 0.23684285581111908 2023-01-22 11:00:08.561902: step: 902/469, loss: 0.2484087198972702 2023-01-22 11:00:09.232476: step: 904/469, loss: 0.43317052721977234 2023-01-22 11:00:09.894055: step: 906/469, loss: 0.4847605228424072 2023-01-22 11:00:10.560179: step: 908/469, loss: 0.446374773979187 2023-01-22 11:00:11.240698: step: 910/469, loss: 0.252265989780426 2023-01-22 11:00:11.857540: step: 912/469, loss: 0.5236475467681885 2023-01-22 11:00:12.499159: step: 914/469, loss: 1.2354389429092407 2023-01-22 11:00:13.123707: step: 916/469, loss: 0.32738977670669556 2023-01-22 11:00:13.838548: step: 918/469, loss: 0.958543062210083 2023-01-22 11:00:14.483755: step: 920/469, loss: 0.6848151087760925 2023-01-22 11:00:15.098115: step: 922/469, loss: 0.35125288367271423 2023-01-22 11:00:15.726484: step: 924/469, loss: 0.7038163542747498 2023-01-22 11:00:16.374703: step: 926/469, loss: 0.4657997190952301 2023-01-22 11:00:17.079509: step: 928/469, loss: 1.0136849880218506 2023-01-22 11:00:17.673582: step: 930/469, loss: 0.5984984040260315 2023-01-22 11:00:18.401441: step: 932/469, loss: 0.19931790232658386 2023-01-22 11:00:19.113453: step: 934/469, loss: 2.58497953414917 2023-01-22 11:00:19.728712: step: 936/469, loss: 0.33287858963012695 2023-01-22 11:00:20.369749: step: 938/469, loss: 0.36068013310432434 ================================================== Loss: 0.701 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30441859066859067, 'r': 0.3119279676680056, 'f1': 0.3081275331978237}, 'combined': 0.22704134025102798, 'epoch': 6} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.29184666381522667, 'r': 0.22295696640961965, 'f1': 0.2527924940723177}, 'combined': 0.1378868149485369, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.295257173998293, 'r': 0.3154265445181005, 'f1': 0.30500878708447515}, 'combined': 0.22474331679908693, 'epoch': 6} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2905953443234069, 'r': 0.22545732112191127, 'f1': 0.2539153549574951}, 'combined': 0.13849928452227006, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29421494335956017, 'r': 0.3059388784839449, 'f1': 0.2999623980670492}, 'combined': 0.22102492489150993, 'epoch': 6} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.28981316725978645, 'r': 0.22352470265324792, 'f1': 0.25238894628099173}, 'combined': 0.13766669797145004, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23391812865497075, 'r': 0.38095238095238093, 'f1': 0.2898550724637681}, 'combined': 0.1932367149758454, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.27419354838709675, 'r': 0.3695652173913043, 'f1': 0.3148148148148148}, 'combined': 0.1574074074074074, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.2413793103448276, 'f1': 0.3111111111111111}, 'combined': 0.2074074074074074, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31895915354330706, 'r': 0.3074596774193548, 'f1': 0.31310386473429946}, 'combined': 0.23070811085685222, 'epoch': 5} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.29929483395392487, 'r': 0.23494507551003432, 'f1': 0.26324445672216046}, 'combined': 0.1435878854848148, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2980769230769231, 'r': 0.33695652173913043, 'f1': 0.31632653061224486}, 'combined': 0.15816326530612243, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3123600447856686, 'r': 0.30880376344086025, 'f1': 0.31057172391857507}, 'combined': 0.22884232288737108, 'epoch': 5} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.29928638434735994, 'r': 0.23575990569357447, 'f1': 0.26375186993150146}, 'combined': 0.14386465632627352, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 5} ****************************** Epoch: 7 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:03:15.754945: step: 2/469, loss: 0.3034147024154663 2023-01-22 11:03:16.457250: step: 4/469, loss: 0.503424882888794 2023-01-22 11:03:17.136920: step: 6/469, loss: 0.6823604702949524 2023-01-22 11:03:17.741944: step: 8/469, loss: 0.33306610584259033 2023-01-22 11:03:18.390509: step: 10/469, loss: 0.2602163255214691 2023-01-22 11:03:19.165823: step: 12/469, loss: 0.3141564726829529 2023-01-22 11:03:19.864189: step: 14/469, loss: 0.7219853401184082 2023-01-22 11:03:20.532193: step: 16/469, loss: 0.2112475484609604 2023-01-22 11:03:21.185460: step: 18/469, loss: 0.885100781917572 2023-01-22 11:03:21.841283: step: 20/469, loss: 0.7438501119613647 2023-01-22 11:03:22.591119: step: 22/469, loss: 1.2695364952087402 2023-01-22 11:03:23.214464: step: 24/469, loss: 0.43204641342163086 2023-01-22 11:03:23.900505: step: 26/469, loss: 0.22428709268569946 2023-01-22 11:03:24.611282: step: 28/469, loss: 0.8295562267303467 2023-01-22 11:03:25.279057: step: 30/469, loss: 0.47287508845329285 2023-01-22 11:03:25.940430: step: 32/469, loss: 1.0738602876663208 2023-01-22 11:03:26.555083: step: 34/469, loss: 1.0092536211013794 2023-01-22 11:03:27.168312: step: 36/469, loss: 0.34260115027427673 2023-01-22 11:03:27.815950: step: 38/469, loss: 0.5713402628898621 2023-01-22 11:03:28.563186: step: 40/469, loss: 0.29429081082344055 2023-01-22 11:03:29.245617: step: 42/469, loss: 0.5646462440490723 2023-01-22 11:03:29.879537: step: 44/469, loss: 0.4239635467529297 2023-01-22 11:03:30.549589: step: 46/469, loss: 0.36079859733581543 2023-01-22 11:03:31.239007: step: 48/469, loss: 0.54287189245224 2023-01-22 11:03:31.945592: step: 50/469, loss: 0.838449239730835 2023-01-22 11:03:32.628934: step: 52/469, loss: 0.4343792498111725 2023-01-22 11:03:33.307658: step: 54/469, loss: 0.8721284866333008 2023-01-22 11:03:33.998588: step: 56/469, loss: 0.2720860540866852 2023-01-22 11:03:34.689952: step: 58/469, loss: 0.19316355884075165 2023-01-22 11:03:35.427874: step: 60/469, loss: 0.5932791829109192 2023-01-22 11:03:36.069623: step: 62/469, loss: 0.5315917134284973 2023-01-22 11:03:36.746369: step: 64/469, loss: 1.0495598316192627 2023-01-22 11:03:37.442503: step: 66/469, loss: 0.46429717540740967 2023-01-22 11:03:38.080599: step: 68/469, loss: 0.29560616612434387 2023-01-22 11:03:38.798885: step: 70/469, loss: 0.7473350167274475 2023-01-22 11:03:39.486381: step: 72/469, loss: 0.3408643305301666 2023-01-22 11:03:40.114781: step: 74/469, loss: 0.10760396718978882 2023-01-22 11:03:40.842872: step: 76/469, loss: 1.275162696838379 2023-01-22 11:03:41.505979: step: 78/469, loss: 0.31276509165763855 2023-01-22 11:03:42.112877: step: 80/469, loss: 0.18019674718379974 2023-01-22 11:03:42.772519: step: 82/469, loss: 0.19032791256904602 2023-01-22 11:03:43.496287: step: 84/469, loss: 0.5119924545288086 2023-01-22 11:03:44.189055: step: 86/469, loss: 0.633182168006897 2023-01-22 11:03:44.870953: step: 88/469, loss: 0.6701422333717346 2023-01-22 11:03:45.447873: step: 90/469, loss: 0.31592199206352234 2023-01-22 11:03:46.076435: step: 92/469, loss: 0.1339643895626068 2023-01-22 11:03:46.679140: step: 94/469, loss: 1.2725943326950073 2023-01-22 11:03:47.325342: step: 96/469, loss: 1.557843565940857 2023-01-22 11:03:48.019190: step: 98/469, loss: 0.16810758411884308 2023-01-22 11:03:48.717812: step: 100/469, loss: 1.3217558860778809 2023-01-22 11:03:49.349672: step: 102/469, loss: 0.16093072295188904 2023-01-22 11:03:49.947126: step: 104/469, loss: 0.39869099855422974 2023-01-22 11:03:50.643094: step: 106/469, loss: 0.2008829414844513 2023-01-22 11:03:51.298795: step: 108/469, loss: 0.46230778098106384 2023-01-22 11:03:51.956885: step: 110/469, loss: 0.25707271695137024 2023-01-22 11:03:52.632588: step: 112/469, loss: 0.5694282650947571 2023-01-22 11:03:53.233435: step: 114/469, loss: 0.1926581710577011 2023-01-22 11:03:53.907100: step: 116/469, loss: 0.2244986742734909 2023-01-22 11:03:54.581549: step: 118/469, loss: 0.19084006547927856 2023-01-22 11:03:55.227293: step: 120/469, loss: 0.2766150236129761 2023-01-22 11:03:55.928389: step: 122/469, loss: 0.2977721095085144 2023-01-22 11:03:56.582828: step: 124/469, loss: 0.1356898695230484 2023-01-22 11:03:57.322994: step: 126/469, loss: 0.5761306285858154 2023-01-22 11:03:57.965128: step: 128/469, loss: 0.38432493805885315 2023-01-22 11:03:58.590217: step: 130/469, loss: 0.26987239718437195 2023-01-22 11:03:59.213892: step: 132/469, loss: 0.6441469788551331 2023-01-22 11:03:59.943887: step: 134/469, loss: 0.3023805022239685 2023-01-22 11:04:00.618907: step: 136/469, loss: 0.5059775114059448 2023-01-22 11:04:01.267665: step: 138/469, loss: 0.3556457459926605 2023-01-22 11:04:01.901166: step: 140/469, loss: 0.39019137620925903 2023-01-22 11:04:02.486578: step: 142/469, loss: 0.13567368686199188 2023-01-22 11:04:03.136771: step: 144/469, loss: 0.3696707785129547 2023-01-22 11:04:03.793024: step: 146/469, loss: 0.23034143447875977 2023-01-22 11:04:04.459099: step: 148/469, loss: 0.4747249186038971 2023-01-22 11:04:05.113842: step: 150/469, loss: 0.5245690941810608 2023-01-22 11:04:05.798591: step: 152/469, loss: 0.45697712898254395 2023-01-22 11:04:06.379538: step: 154/469, loss: 0.11479607969522476 2023-01-22 11:04:06.990329: step: 156/469, loss: 0.4539227783679962 2023-01-22 11:04:07.637183: step: 158/469, loss: 0.6690745949745178 2023-01-22 11:04:08.367935: step: 160/469, loss: 0.44240736961364746 2023-01-22 11:04:08.980867: step: 162/469, loss: 0.7192642688751221 2023-01-22 11:04:09.620527: step: 164/469, loss: 0.21808096766471863 2023-01-22 11:04:10.316617: step: 166/469, loss: 0.38638466596603394 2023-01-22 11:04:10.947127: step: 168/469, loss: 0.5944715142250061 2023-01-22 11:04:11.589355: step: 170/469, loss: 0.5639787316322327 2023-01-22 11:04:12.204697: step: 172/469, loss: 0.9023991823196411 2023-01-22 11:04:12.904532: step: 174/469, loss: 0.7423897981643677 2023-01-22 11:04:13.615711: step: 176/469, loss: 1.2526183128356934 2023-01-22 11:04:14.257633: step: 178/469, loss: 0.31094348430633545 2023-01-22 11:04:14.892576: step: 180/469, loss: 0.4512382745742798 2023-01-22 11:04:15.603375: step: 182/469, loss: 2.056619882583618 2023-01-22 11:04:16.272042: step: 184/469, loss: 0.5389247536659241 2023-01-22 11:04:16.855759: step: 186/469, loss: 0.3956908881664276 2023-01-22 11:04:17.537030: step: 188/469, loss: 1.105033278465271 2023-01-22 11:04:18.239540: step: 190/469, loss: 0.9648261070251465 2023-01-22 11:04:18.857115: step: 192/469, loss: 0.5629838705062866 2023-01-22 11:04:19.547502: step: 194/469, loss: 0.2560095489025116 2023-01-22 11:04:20.295180: step: 196/469, loss: 1.1309959888458252 2023-01-22 11:04:20.915385: step: 198/469, loss: 0.9821249842643738 2023-01-22 11:04:21.509395: step: 200/469, loss: 0.2722633481025696 2023-01-22 11:04:22.156739: step: 202/469, loss: 0.3103010356426239 2023-01-22 11:04:22.783414: step: 204/469, loss: 0.3793702721595764 2023-01-22 11:04:23.404552: step: 206/469, loss: 0.43455466628074646 2023-01-22 11:04:24.046366: step: 208/469, loss: 0.6203905940055847 2023-01-22 11:04:24.732136: step: 210/469, loss: 0.18608418107032776 2023-01-22 11:04:25.428027: step: 212/469, loss: 2.0353469848632812 2023-01-22 11:04:26.113558: step: 214/469, loss: 0.3603098392486572 2023-01-22 11:04:26.743712: step: 216/469, loss: 0.5063532590866089 2023-01-22 11:04:27.347133: step: 218/469, loss: 0.2816372513771057 2023-01-22 11:04:27.945232: step: 220/469, loss: 0.22815579175949097 2023-01-22 11:04:28.581005: step: 222/469, loss: 0.2961884140968323 2023-01-22 11:04:29.297638: step: 224/469, loss: 0.37092456221580505 2023-01-22 11:04:29.949653: step: 226/469, loss: 0.8792437314987183 2023-01-22 11:04:30.682339: step: 228/469, loss: 1.2625809907913208 2023-01-22 11:04:31.315053: step: 230/469, loss: 0.34319931268692017 2023-01-22 11:04:32.024682: step: 232/469, loss: 0.42412394285202026 2023-01-22 11:04:32.676793: step: 234/469, loss: 0.2557583451271057 2023-01-22 11:04:33.333325: step: 236/469, loss: 0.2378799021244049 2023-01-22 11:04:33.983802: step: 238/469, loss: 0.7149409055709839 2023-01-22 11:04:34.677717: step: 240/469, loss: 0.27111420035362244 2023-01-22 11:04:35.384445: step: 242/469, loss: 0.5395912528038025 2023-01-22 11:04:36.005389: step: 244/469, loss: 0.6727349162101746 2023-01-22 11:04:36.692188: step: 246/469, loss: 0.5664801597595215 2023-01-22 11:04:37.268616: step: 248/469, loss: 0.36088114976882935 2023-01-22 11:04:37.940203: step: 250/469, loss: 2.157541275024414 2023-01-22 11:04:38.691992: step: 252/469, loss: 0.1849389523267746 2023-01-22 11:04:39.314124: step: 254/469, loss: 0.34292373061180115 2023-01-22 11:04:39.967600: step: 256/469, loss: 0.46344757080078125 2023-01-22 11:04:40.645911: step: 258/469, loss: 0.6474393606185913 2023-01-22 11:04:41.298210: step: 260/469, loss: 0.667199432849884 2023-01-22 11:04:42.024057: step: 262/469, loss: 0.3647635877132416 2023-01-22 11:04:42.632939: step: 264/469, loss: 0.48776039481163025 2023-01-22 11:04:43.285627: step: 266/469, loss: 0.35894811153411865 2023-01-22 11:04:43.983399: step: 268/469, loss: 0.4408510625362396 2023-01-22 11:04:44.665948: step: 270/469, loss: 0.1577748954296112 2023-01-22 11:04:45.337427: step: 272/469, loss: 9.59257698059082 2023-01-22 11:04:45.981825: step: 274/469, loss: 0.34344255924224854 2023-01-22 11:04:46.594277: step: 276/469, loss: 0.7591780424118042 2023-01-22 11:04:47.206748: step: 278/469, loss: 0.6208717226982117 2023-01-22 11:04:47.896825: step: 280/469, loss: 1.1715564727783203 2023-01-22 11:04:48.578525: step: 282/469, loss: 0.7485136389732361 2023-01-22 11:04:49.168930: step: 284/469, loss: 0.25127243995666504 2023-01-22 11:04:49.832894: step: 286/469, loss: 0.49830514192581177 2023-01-22 11:04:50.482974: step: 288/469, loss: 0.6897481679916382 2023-01-22 11:04:51.265086: step: 290/469, loss: 0.3550090789794922 2023-01-22 11:04:51.907992: step: 292/469, loss: 0.48031723499298096 2023-01-22 11:04:52.565270: step: 294/469, loss: 0.18158164620399475 2023-01-22 11:04:53.177578: step: 296/469, loss: 0.22862082719802856 2023-01-22 11:04:53.814179: step: 298/469, loss: 0.3542085289955139 2023-01-22 11:04:54.440012: step: 300/469, loss: 1.4608161449432373 2023-01-22 11:04:55.081395: step: 302/469, loss: 0.24203746020793915 2023-01-22 11:04:55.693874: step: 304/469, loss: 0.3416796922683716 2023-01-22 11:04:56.461136: step: 306/469, loss: 0.1868167668581009 2023-01-22 11:04:57.041317: step: 308/469, loss: 0.9778412580490112 2023-01-22 11:04:57.744690: step: 310/469, loss: 0.13471634685993195 2023-01-22 11:04:58.425929: step: 312/469, loss: 0.4944702088832855 2023-01-22 11:04:59.116024: step: 314/469, loss: 0.7598327994346619 2023-01-22 11:04:59.824346: step: 316/469, loss: 0.38385286927223206 2023-01-22 11:05:00.491719: step: 318/469, loss: 0.13890638947486877 2023-01-22 11:05:01.131132: step: 320/469, loss: 0.3973208963871002 2023-01-22 11:05:01.766820: step: 322/469, loss: 0.3698139488697052 2023-01-22 11:05:02.420620: step: 324/469, loss: 0.5473549962043762 2023-01-22 11:05:03.096220: step: 326/469, loss: 0.36224818229675293 2023-01-22 11:05:03.696015: step: 328/469, loss: 0.20923873782157898 2023-01-22 11:05:04.348196: step: 330/469, loss: 0.5139366388320923 2023-01-22 11:05:05.007148: step: 332/469, loss: 0.6960780024528503 2023-01-22 11:05:05.644026: step: 334/469, loss: 1.0095466375350952 2023-01-22 11:05:06.319570: step: 336/469, loss: 0.6473803520202637 2023-01-22 11:05:06.971325: step: 338/469, loss: 0.22387218475341797 2023-01-22 11:05:07.643905: step: 340/469, loss: 0.21535411477088928 2023-01-22 11:05:08.305053: step: 342/469, loss: 0.24951550364494324 2023-01-22 11:05:08.983258: step: 344/469, loss: 0.7308018803596497 2023-01-22 11:05:09.756763: step: 346/469, loss: 1.2594232559204102 2023-01-22 11:05:10.443145: step: 348/469, loss: 0.3323275148868561 2023-01-22 11:05:11.130095: step: 350/469, loss: 0.2357524037361145 2023-01-22 11:05:11.688834: step: 352/469, loss: 0.40152549743652344 2023-01-22 11:05:12.342420: step: 354/469, loss: 0.29077887535095215 2023-01-22 11:05:12.986737: step: 356/469, loss: 0.2026900053024292 2023-01-22 11:05:13.691803: step: 358/469, loss: 0.5275459885597229 2023-01-22 11:05:14.373040: step: 360/469, loss: 0.5878629684448242 2023-01-22 11:05:15.072051: step: 362/469, loss: 0.5410671234130859 2023-01-22 11:05:15.706816: step: 364/469, loss: 0.27543145418167114 2023-01-22 11:05:16.386396: step: 366/469, loss: 0.07763580977916718 2023-01-22 11:05:17.099474: step: 368/469, loss: 2.3904075622558594 2023-01-22 11:05:17.771821: step: 370/469, loss: 0.9165710210800171 2023-01-22 11:05:18.460993: step: 372/469, loss: 0.5919375419616699 2023-01-22 11:05:19.199316: step: 374/469, loss: 0.24279849231243134 2023-01-22 11:05:19.900581: step: 376/469, loss: 0.5104106664657593 2023-01-22 11:05:20.581536: step: 378/469, loss: 1.4496835470199585 2023-01-22 11:05:21.216935: step: 380/469, loss: 0.19177675247192383 2023-01-22 11:05:21.878411: step: 382/469, loss: 0.7152149081230164 2023-01-22 11:05:22.646403: step: 384/469, loss: 0.21729762852191925 2023-01-22 11:05:23.433066: step: 386/469, loss: 0.47995713353157043 2023-01-22 11:05:24.070044: step: 388/469, loss: 0.2849056124687195 2023-01-22 11:05:24.761085: step: 390/469, loss: 0.30151689052581787 2023-01-22 11:05:25.453745: step: 392/469, loss: 0.9165915250778198 2023-01-22 11:05:26.036200: step: 394/469, loss: 0.1873292773962021 2023-01-22 11:05:26.681004: step: 396/469, loss: 0.133287250995636 2023-01-22 11:05:27.376091: step: 398/469, loss: 0.5160813927650452 2023-01-22 11:05:28.034483: step: 400/469, loss: 1.4087209701538086 2023-01-22 11:05:28.668797: step: 402/469, loss: 0.40327349305152893 2023-01-22 11:05:29.369039: step: 404/469, loss: 0.23506242036819458 2023-01-22 11:05:30.041379: step: 406/469, loss: 0.38241589069366455 2023-01-22 11:05:30.729492: step: 408/469, loss: 0.4073394536972046 2023-01-22 11:05:31.433364: step: 410/469, loss: 0.26036936044692993 2023-01-22 11:05:32.096674: step: 412/469, loss: 0.8148781061172485 2023-01-22 11:05:32.787627: step: 414/469, loss: 0.20191913843154907 2023-01-22 11:05:33.483944: step: 416/469, loss: 0.22603236138820648 2023-01-22 11:05:34.216512: step: 418/469, loss: 0.900985598564148 2023-01-22 11:05:34.855751: step: 420/469, loss: 0.4961312413215637 2023-01-22 11:05:35.453720: step: 422/469, loss: 0.46023181080818176 2023-01-22 11:05:36.067871: step: 424/469, loss: 0.5713052153587341 2023-01-22 11:05:36.677661: step: 426/469, loss: 1.788339376449585 2023-01-22 11:05:37.409459: step: 428/469, loss: 0.22763192653656006 2023-01-22 11:05:38.075386: step: 430/469, loss: 0.2552791237831116 2023-01-22 11:05:38.767289: step: 432/469, loss: 0.8556559085845947 2023-01-22 11:05:39.400459: step: 434/469, loss: 0.6025184392929077 2023-01-22 11:05:40.182920: step: 436/469, loss: 0.3650805652141571 2023-01-22 11:05:40.851131: step: 438/469, loss: 0.4491713047027588 2023-01-22 11:05:41.577144: step: 440/469, loss: 0.7329959869384766 2023-01-22 11:05:42.265291: step: 442/469, loss: 0.7308976650238037 2023-01-22 11:05:42.958260: step: 444/469, loss: 1.1651263236999512 2023-01-22 11:05:43.667688: step: 446/469, loss: 0.14074893295764923 2023-01-22 11:05:44.364636: step: 448/469, loss: 0.17097920179367065 2023-01-22 11:05:44.981277: step: 450/469, loss: 0.7660448551177979 2023-01-22 11:05:45.581110: step: 452/469, loss: 0.12841929495334625 2023-01-22 11:05:46.263570: step: 454/469, loss: 0.4611152112483978 2023-01-22 11:05:46.859710: step: 456/469, loss: 0.16392871737480164 2023-01-22 11:05:47.518642: step: 458/469, loss: 0.2862749397754669 2023-01-22 11:05:48.197578: step: 460/469, loss: 0.6968491673469543 2023-01-22 11:05:48.814297: step: 462/469, loss: 0.27910956740379333 2023-01-22 11:05:49.518300: step: 464/469, loss: 0.3194659948348999 2023-01-22 11:05:50.113177: step: 466/469, loss: 0.5117559432983398 2023-01-22 11:05:50.780208: step: 468/469, loss: 0.18484525382518768 2023-01-22 11:05:51.382080: step: 470/469, loss: 2.5478875637054443 2023-01-22 11:05:52.011972: step: 472/469, loss: 0.24330207705497742 2023-01-22 11:05:52.643295: step: 474/469, loss: 4.195929050445557 2023-01-22 11:05:53.313078: step: 476/469, loss: 0.2522517740726471 2023-01-22 11:05:53.944820: step: 478/469, loss: 2.127666473388672 2023-01-22 11:05:54.628319: step: 480/469, loss: 0.5200915932655334 2023-01-22 11:05:55.301233: step: 482/469, loss: 0.43802380561828613 2023-01-22 11:05:55.877846: step: 484/469, loss: 0.4296901822090149 2023-01-22 11:05:56.501863: step: 486/469, loss: 0.5036375522613525 2023-01-22 11:05:57.108255: step: 488/469, loss: 0.4596751630306244 2023-01-22 11:05:57.746051: step: 490/469, loss: 0.3468911647796631 2023-01-22 11:05:58.441337: step: 492/469, loss: 2.6914615631103516 2023-01-22 11:05:59.051124: step: 494/469, loss: 0.3425523638725281 2023-01-22 11:05:59.706667: step: 496/469, loss: 0.5966498255729675 2023-01-22 11:06:00.392672: step: 498/469, loss: 0.12489598244428635 2023-01-22 11:06:01.154007: step: 500/469, loss: 0.14763931930065155 2023-01-22 11:06:01.769725: step: 502/469, loss: 0.2640705108642578 2023-01-22 11:06:02.442328: step: 504/469, loss: 0.6138185858726501 2023-01-22 11:06:03.070961: step: 506/469, loss: 0.25276580452919006 2023-01-22 11:06:03.674897: step: 508/469, loss: 0.47454220056533813 2023-01-22 11:06:04.475298: step: 510/469, loss: 0.2023448944091797 2023-01-22 11:06:05.219070: step: 512/469, loss: 0.3868090510368347 2023-01-22 11:06:05.911405: step: 514/469, loss: 0.7646952271461487 2023-01-22 11:06:06.564657: step: 516/469, loss: 0.2611725926399231 2023-01-22 11:06:07.175286: step: 518/469, loss: 0.29690372943878174 2023-01-22 11:06:07.814334: step: 520/469, loss: 0.9276142716407776 2023-01-22 11:06:08.459355: step: 522/469, loss: 0.3030405640602112 2023-01-22 11:06:09.113042: step: 524/469, loss: 0.3247101604938507 2023-01-22 11:06:09.790167: step: 526/469, loss: 0.1622537225484848 2023-01-22 11:06:10.475750: step: 528/469, loss: 0.32321006059646606 2023-01-22 11:06:11.101811: step: 530/469, loss: 0.15134075284004211 2023-01-22 11:06:11.725557: step: 532/469, loss: 0.24118146300315857 2023-01-22 11:06:12.318897: step: 534/469, loss: 0.22268259525299072 2023-01-22 11:06:12.960166: step: 536/469, loss: 0.3385399281978607 2023-01-22 11:06:13.587887: step: 538/469, loss: 1.840336799621582 2023-01-22 11:06:14.266873: step: 540/469, loss: 0.31376150250434875 2023-01-22 11:06:14.978327: step: 542/469, loss: 0.26314258575439453 2023-01-22 11:06:15.585606: step: 544/469, loss: 0.7010689377784729 2023-01-22 11:06:16.246454: step: 546/469, loss: 0.7424901127815247 2023-01-22 11:06:16.935220: step: 548/469, loss: 0.6966558694839478 2023-01-22 11:06:17.605948: step: 550/469, loss: 0.2281423956155777 2023-01-22 11:06:18.330195: step: 552/469, loss: 0.4474644362926483 2023-01-22 11:06:19.028638: step: 554/469, loss: 0.8803151845932007 2023-01-22 11:06:19.656673: step: 556/469, loss: 0.2599521279335022 2023-01-22 11:06:20.411929: step: 558/469, loss: 0.6703866124153137 2023-01-22 11:06:21.015046: step: 560/469, loss: 0.5517482161521912 2023-01-22 11:06:21.626960: step: 562/469, loss: 0.44817402958869934 2023-01-22 11:06:22.344580: step: 564/469, loss: 0.5980182886123657 2023-01-22 11:06:22.907817: step: 566/469, loss: 0.15622496604919434 2023-01-22 11:06:23.569997: step: 568/469, loss: 0.5188809633255005 2023-01-22 11:06:24.192440: step: 570/469, loss: 0.5812439918518066 2023-01-22 11:06:24.770079: step: 572/469, loss: 0.2790174186229706 2023-01-22 11:06:25.390313: step: 574/469, loss: 0.2066507637500763 2023-01-22 11:06:26.051137: step: 576/469, loss: 0.1465393453836441 2023-01-22 11:06:26.682653: step: 578/469, loss: 0.2461715191602707 2023-01-22 11:06:27.336824: step: 580/469, loss: 0.19781449437141418 2023-01-22 11:06:28.040632: step: 582/469, loss: 0.38093888759613037 2023-01-22 11:06:28.739979: step: 584/469, loss: 0.46796655654907227 2023-01-22 11:06:29.440643: step: 586/469, loss: 0.4513213634490967 2023-01-22 11:06:30.110476: step: 588/469, loss: 0.25914958119392395 2023-01-22 11:06:30.746018: step: 590/469, loss: 0.8821139931678772 2023-01-22 11:06:31.392330: step: 592/469, loss: 0.4917592406272888 2023-01-22 11:06:32.098490: step: 594/469, loss: 0.27000677585601807 2023-01-22 11:06:32.838242: step: 596/469, loss: 0.28437694907188416 2023-01-22 11:06:33.475781: step: 598/469, loss: 0.5301434397697449 2023-01-22 11:06:34.060934: step: 600/469, loss: 0.7056336402893066 2023-01-22 11:06:34.807227: step: 602/469, loss: 0.4812750518321991 2023-01-22 11:06:35.423746: step: 604/469, loss: 1.2242769002914429 2023-01-22 11:06:36.084674: step: 606/469, loss: 0.10403191298246384 2023-01-22 11:06:36.732167: step: 608/469, loss: 0.1673997938632965 2023-01-22 11:06:37.431318: step: 610/469, loss: 0.5051147937774658 2023-01-22 11:06:38.005775: step: 612/469, loss: 0.2315412163734436 2023-01-22 11:06:38.718050: step: 614/469, loss: 0.2814974784851074 2023-01-22 11:06:39.423672: step: 616/469, loss: 0.9631189107894897 2023-01-22 11:06:40.093961: step: 618/469, loss: 0.25975683331489563 2023-01-22 11:06:40.711541: step: 620/469, loss: 1.003028154373169 2023-01-22 11:06:41.341594: step: 622/469, loss: 0.23933352530002594 2023-01-22 11:06:41.975317: step: 624/469, loss: 0.38538992404937744 2023-01-22 11:06:42.759649: step: 626/469, loss: 0.2421412318944931 2023-01-22 11:06:43.501394: step: 628/469, loss: 0.3802891969680786 2023-01-22 11:06:44.159952: step: 630/469, loss: 0.4327777624130249 2023-01-22 11:06:44.833018: step: 632/469, loss: 1.3673803806304932 2023-01-22 11:06:45.453402: step: 634/469, loss: 0.2792380154132843 2023-01-22 11:06:46.126353: step: 636/469, loss: 0.6696301698684692 2023-01-22 11:06:46.731252: step: 638/469, loss: 0.40074968338012695 2023-01-22 11:06:47.365633: step: 640/469, loss: 1.3802582025527954 2023-01-22 11:06:47.994643: step: 642/469, loss: 0.7367796897888184 2023-01-22 11:06:48.640299: step: 644/469, loss: 0.8440686464309692 2023-01-22 11:06:49.295925: step: 646/469, loss: 0.15096645057201385 2023-01-22 11:06:49.943096: step: 648/469, loss: 0.35317832231521606 2023-01-22 11:06:50.601938: step: 650/469, loss: 0.1401774138212204 2023-01-22 11:06:51.226475: step: 652/469, loss: 0.18275809288024902 2023-01-22 11:06:51.855789: step: 654/469, loss: 1.1960855722427368 2023-01-22 11:06:52.451918: step: 656/469, loss: 0.44214916229248047 2023-01-22 11:06:53.148250: step: 658/469, loss: 0.833315908908844 2023-01-22 11:06:53.734777: step: 660/469, loss: 0.2281283736228943 2023-01-22 11:06:54.324639: step: 662/469, loss: 0.651561975479126 2023-01-22 11:06:55.002421: step: 664/469, loss: 0.7029745578765869 2023-01-22 11:06:55.679446: step: 666/469, loss: 0.6088948845863342 2023-01-22 11:06:56.341745: step: 668/469, loss: 0.4874654710292816 2023-01-22 11:06:57.037726: step: 670/469, loss: 0.3011801838874817 2023-01-22 11:06:57.752110: step: 672/469, loss: 0.26479166746139526 2023-01-22 11:06:58.438163: step: 674/469, loss: 0.2449568808078766 2023-01-22 11:06:59.079085: step: 676/469, loss: 1.486423373222351 2023-01-22 11:06:59.781631: step: 678/469, loss: 0.18135876953601837 2023-01-22 11:07:00.461043: step: 680/469, loss: 0.2924414873123169 2023-01-22 11:07:01.065573: step: 682/469, loss: 1.2524816989898682 2023-01-22 11:07:01.697476: step: 684/469, loss: 0.6766750812530518 2023-01-22 11:07:02.334705: step: 686/469, loss: 0.6198279857635498 2023-01-22 11:07:02.919975: step: 688/469, loss: 0.6449093222618103 2023-01-22 11:07:03.572304: step: 690/469, loss: 0.23987683653831482 2023-01-22 11:07:04.259125: step: 692/469, loss: 0.9177189469337463 2023-01-22 11:07:04.893742: step: 694/469, loss: 0.8281612992286682 2023-01-22 11:07:05.523646: step: 696/469, loss: 0.5510213375091553 2023-01-22 11:07:06.123830: step: 698/469, loss: 0.09462345391511917 2023-01-22 11:07:06.797960: step: 700/469, loss: 0.2250729352235794 2023-01-22 11:07:07.434334: step: 702/469, loss: 0.13198135793209076 2023-01-22 11:07:08.071356: step: 704/469, loss: 0.7246847748756409 2023-01-22 11:07:08.696765: step: 706/469, loss: 0.7825203537940979 2023-01-22 11:07:09.383382: step: 708/469, loss: 0.7200645804405212 2023-01-22 11:07:10.072456: step: 710/469, loss: 0.39589351415634155 2023-01-22 11:07:10.741077: step: 712/469, loss: 0.33914634585380554 2023-01-22 11:07:11.440732: step: 714/469, loss: 0.9127599596977234 2023-01-22 11:07:12.097412: step: 716/469, loss: 0.6256756782531738 2023-01-22 11:07:12.784153: step: 718/469, loss: 0.6755963563919067 2023-01-22 11:07:13.425047: step: 720/469, loss: 0.6067854762077332 2023-01-22 11:07:14.042057: step: 722/469, loss: 0.2231627255678177 2023-01-22 11:07:14.754645: step: 724/469, loss: 0.5573198795318604 2023-01-22 11:07:15.382807: step: 726/469, loss: 1.5285015106201172 2023-01-22 11:07:15.998879: step: 728/469, loss: 0.2820298671722412 2023-01-22 11:07:16.561616: step: 730/469, loss: 0.335647851228714 2023-01-22 11:07:17.157416: step: 732/469, loss: 0.14330941438674927 2023-01-22 11:07:17.806291: step: 734/469, loss: 0.40038958191871643 2023-01-22 11:07:18.454627: step: 736/469, loss: 0.1632099747657776 2023-01-22 11:07:19.094412: step: 738/469, loss: 0.11054205894470215 2023-01-22 11:07:19.711632: step: 740/469, loss: 0.3448159694671631 2023-01-22 11:07:20.403460: step: 742/469, loss: 1.9047409296035767 2023-01-22 11:07:21.043819: step: 744/469, loss: 0.5617899894714355 2023-01-22 11:07:21.731369: step: 746/469, loss: 0.39616265892982483 2023-01-22 11:07:22.367395: step: 748/469, loss: 0.6567694544792175 2023-01-22 11:07:23.033392: step: 750/469, loss: 3.6219592094421387 2023-01-22 11:07:23.684379: step: 752/469, loss: 0.3712438642978668 2023-01-22 11:07:24.389872: step: 754/469, loss: 0.2743932902812958 2023-01-22 11:07:25.023593: step: 756/469, loss: 0.2320937067270279 2023-01-22 11:07:25.655060: step: 758/469, loss: 0.2250518500804901 2023-01-22 11:07:26.296778: step: 760/469, loss: 0.6834797859191895 2023-01-22 11:07:26.894048: step: 762/469, loss: 1.64217209815979 2023-01-22 11:07:27.561612: step: 764/469, loss: 0.3303803503513336 2023-01-22 11:07:28.185599: step: 766/469, loss: 1.1484766006469727 2023-01-22 11:07:28.871736: step: 768/469, loss: 0.3432031571865082 2023-01-22 11:07:29.533504: step: 770/469, loss: 0.21072743833065033 2023-01-22 11:07:30.150868: step: 772/469, loss: 0.18219800293445587 2023-01-22 11:07:30.794709: step: 774/469, loss: 0.22114837169647217 2023-01-22 11:07:31.431815: step: 776/469, loss: 0.7861373424530029 2023-01-22 11:07:32.075501: step: 778/469, loss: 0.21906259655952454 2023-01-22 11:07:32.798051: step: 780/469, loss: 0.7411484718322754 2023-01-22 11:07:33.447818: step: 782/469, loss: 1.7447690963745117 2023-01-22 11:07:34.083194: step: 784/469, loss: 1.0749149322509766 2023-01-22 11:07:34.736724: step: 786/469, loss: 0.41501766443252563 2023-01-22 11:07:35.385239: step: 788/469, loss: 0.2512631416320801 2023-01-22 11:07:36.046296: step: 790/469, loss: 0.4638896882534027 2023-01-22 11:07:36.635860: step: 792/469, loss: 1.6946717500686646 2023-01-22 11:07:37.231295: step: 794/469, loss: 0.2728288471698761 2023-01-22 11:07:37.893653: step: 796/469, loss: 0.19500260055065155 2023-01-22 11:07:38.551589: step: 798/469, loss: 0.18098615109920502 2023-01-22 11:07:39.241312: step: 800/469, loss: 4.3521904945373535 2023-01-22 11:07:39.904813: step: 802/469, loss: 2.5424928665161133 2023-01-22 11:07:40.482077: step: 804/469, loss: 0.6280380487442017 2023-01-22 11:07:41.122040: step: 806/469, loss: 0.16399222612380981 2023-01-22 11:07:41.746529: step: 808/469, loss: 0.39004218578338623 2023-01-22 11:07:42.502265: step: 810/469, loss: 0.44297417998313904 2023-01-22 11:07:43.181380: step: 812/469, loss: 0.6048476696014404 2023-01-22 11:07:43.815892: step: 814/469, loss: 0.30073437094688416 2023-01-22 11:07:44.488913: step: 816/469, loss: 0.32141587138175964 2023-01-22 11:07:45.087712: step: 818/469, loss: 0.27014002203941345 2023-01-22 11:07:45.724610: step: 820/469, loss: 0.15073515474796295 2023-01-22 11:07:46.383576: step: 822/469, loss: 0.09293115139007568 2023-01-22 11:07:47.067454: step: 824/469, loss: 0.18213669955730438 2023-01-22 11:07:47.657351: step: 826/469, loss: 0.26865479350090027 2023-01-22 11:07:48.271980: step: 828/469, loss: 1.8637187480926514 2023-01-22 11:07:48.952104: step: 830/469, loss: 1.0831323862075806 2023-01-22 11:07:49.668883: step: 832/469, loss: 1.0688282251358032 2023-01-22 11:07:50.329496: step: 834/469, loss: 0.4998587965965271 2023-01-22 11:07:51.018328: step: 836/469, loss: 0.9172436594963074 2023-01-22 11:07:51.687066: step: 838/469, loss: 0.5721681118011475 2023-01-22 11:07:52.414363: step: 840/469, loss: 1.643479824066162 2023-01-22 11:07:53.028548: step: 842/469, loss: 0.251740425825119 2023-01-22 11:07:53.610930: step: 844/469, loss: 0.3436243236064911 2023-01-22 11:07:54.213215: step: 846/469, loss: 1.3460259437561035 2023-01-22 11:07:54.878471: step: 848/469, loss: 0.17538982629776 2023-01-22 11:07:55.503688: step: 850/469, loss: 0.5882743000984192 2023-01-22 11:07:56.161416: step: 852/469, loss: 0.28470170497894287 2023-01-22 11:07:56.837011: step: 854/469, loss: 0.8674442172050476 2023-01-22 11:07:57.501068: step: 856/469, loss: 0.7559859752655029 2023-01-22 11:07:58.156611: step: 858/469, loss: 0.2747848629951477 2023-01-22 11:07:58.840552: step: 860/469, loss: 0.6750397086143494 2023-01-22 11:07:59.456375: step: 862/469, loss: 0.4371977150440216 2023-01-22 11:07:59.999778: step: 864/469, loss: 1.105844259262085 2023-01-22 11:08:00.675821: step: 866/469, loss: 0.44471046328544617 2023-01-22 11:08:01.283024: step: 868/469, loss: 0.62237948179245 2023-01-22 11:08:01.923433: step: 870/469, loss: 0.30178648233413696 2023-01-22 11:08:02.595209: step: 872/469, loss: 0.31751108169555664 2023-01-22 11:08:03.259997: step: 874/469, loss: 0.19263657927513123 2023-01-22 11:08:03.918912: step: 876/469, loss: 0.5150735378265381 2023-01-22 11:08:04.630653: step: 878/469, loss: 0.6839023232460022 2023-01-22 11:08:05.287295: step: 880/469, loss: 0.5650581121444702 2023-01-22 11:08:05.981216: step: 882/469, loss: 0.7526754140853882 2023-01-22 11:08:06.643617: step: 884/469, loss: 0.201514333486557 2023-01-22 11:08:07.292489: step: 886/469, loss: 0.42249104380607605 2023-01-22 11:08:07.954350: step: 888/469, loss: 0.30510246753692627 2023-01-22 11:08:08.619638: step: 890/469, loss: 0.5134368538856506 2023-01-22 11:08:09.282346: step: 892/469, loss: 0.8806252479553223 2023-01-22 11:08:09.938464: step: 894/469, loss: 1.4703314304351807 2023-01-22 11:08:10.586866: step: 896/469, loss: 0.2756319046020508 2023-01-22 11:08:11.238508: step: 898/469, loss: 0.8927412629127502 2023-01-22 11:08:11.878109: step: 900/469, loss: 0.3346920609474182 2023-01-22 11:08:12.541509: step: 902/469, loss: 0.21807126700878143 2023-01-22 11:08:13.139988: step: 904/469, loss: 0.13336999714374542 2023-01-22 11:08:13.779945: step: 906/469, loss: 0.058336127549409866 2023-01-22 11:08:14.549908: step: 908/469, loss: 0.17020748555660248 2023-01-22 11:08:15.207775: step: 910/469, loss: 0.4886654317378998 2023-01-22 11:08:15.852052: step: 912/469, loss: 0.17047971487045288 2023-01-22 11:08:16.599123: step: 914/469, loss: 0.2284172922372818 2023-01-22 11:08:17.272454: step: 916/469, loss: 1.0500428676605225 2023-01-22 11:08:17.909923: step: 918/469, loss: 2.9921510219573975 2023-01-22 11:08:18.503432: step: 920/469, loss: 0.35923266410827637 2023-01-22 11:08:19.103054: step: 922/469, loss: 0.4361855983734131 2023-01-22 11:08:19.762417: step: 924/469, loss: 0.23665739595890045 2023-01-22 11:08:20.345716: step: 926/469, loss: 1.2017784118652344 2023-01-22 11:08:20.978075: step: 928/469, loss: 0.22746577858924866 2023-01-22 11:08:21.617530: step: 930/469, loss: 0.16186939179897308 2023-01-22 11:08:22.278065: step: 932/469, loss: 0.32564249634742737 2023-01-22 11:08:22.980286: step: 934/469, loss: 1.1063103675842285 2023-01-22 11:08:23.626820: step: 936/469, loss: 0.36923596262931824 2023-01-22 11:08:24.295177: step: 938/469, loss: 0.5784530639648438 ================================================== Loss: 0.587 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29494800250887965, 'r': 0.3425202609780538, 'f1': 0.31695904747222886}, 'combined': 0.2335487718216423, 'epoch': 7} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2833828324089342, 'r': 0.24708493987714022, 'f1': 0.26399202276218403}, 'combined': 0.1439956487793731, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28406668165643617, 'r': 0.3363521999119852, 'f1': 0.30800627168308636}, 'combined': 0.22695198966122151, 'epoch': 7} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2926828560503639, 'r': 0.2546581849074987, 'f1': 0.2723497026456909}, 'combined': 0.14855438326128595, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29479481623436987, 'r': 0.3423423672399134, 'f1': 0.3167944293861885}, 'combined': 0.23342747428455995, 'epoch': 7} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.28319040194040196, 'r': 0.24924900884415982, 'f1': 0.26513787510137876}, 'combined': 0.1446206591462066, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24293785310734461, 'r': 0.4095238095238095, 'f1': 0.3049645390070922}, 'combined': 0.2033096926713948, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.24193548387096775, 'r': 0.32608695652173914, 'f1': 0.2777777777777778}, 'combined': 0.1388888888888889, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36904761904761907, 'r': 0.2672413793103448, 'f1': 0.31}, 'combined': 0.20666666666666667, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31895915354330706, 'r': 0.3074596774193548, 'f1': 0.31310386473429946}, 'combined': 0.23070811085685222, 'epoch': 5} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.29929483395392487, 'r': 0.23494507551003432, 'f1': 0.26324445672216046}, 'combined': 0.1435878854848148, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2980769230769231, 'r': 0.33695652173913043, 'f1': 0.31632653061224486}, 'combined': 0.15816326530612243, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3123600447856686, 'r': 0.30880376344086025, 'f1': 0.31057172391857507}, 'combined': 0.22884232288737108, 'epoch': 5} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.29928638434735994, 'r': 0.23575990569357447, 'f1': 0.26375186993150146}, 'combined': 0.14386465632627352, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 5} ****************************** Epoch: 8 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:11:24.412882: step: 2/469, loss: 0.9961361289024353 2023-01-22 11:11:25.025641: step: 4/469, loss: 0.21724113821983337 2023-01-22 11:11:25.649100: step: 6/469, loss: 0.38133177161216736 2023-01-22 11:11:26.228075: step: 8/469, loss: 0.14478692412376404 2023-01-22 11:11:26.899620: step: 10/469, loss: 0.33363667130470276 2023-01-22 11:11:27.650570: step: 12/469, loss: 0.589388906955719 2023-01-22 11:11:28.345710: step: 14/469, loss: 0.5699613094329834 2023-01-22 11:11:28.983592: step: 16/469, loss: 0.1767663210630417 2023-01-22 11:11:29.633839: step: 18/469, loss: 0.5059394240379333 2023-01-22 11:11:30.360657: step: 20/469, loss: 0.25279659032821655 2023-01-22 11:11:31.074111: step: 22/469, loss: 0.4721870422363281 2023-01-22 11:11:31.680588: step: 24/469, loss: 0.21340186893939972 2023-01-22 11:11:32.386324: step: 26/469, loss: 0.29136303067207336 2023-01-22 11:11:33.056045: step: 28/469, loss: 0.2648336887359619 2023-01-22 11:11:33.679855: step: 30/469, loss: 0.3934226930141449 2023-01-22 11:11:34.395606: step: 32/469, loss: 0.4900856912136078 2023-01-22 11:11:35.054604: step: 34/469, loss: 0.32868123054504395 2023-01-22 11:11:35.705055: step: 36/469, loss: 0.13562451303005219 2023-01-22 11:11:36.355888: step: 38/469, loss: 0.5227346420288086 2023-01-22 11:11:36.970197: step: 40/469, loss: 0.08767542988061905 2023-01-22 11:11:37.653507: step: 42/469, loss: 2.7418696880340576 2023-01-22 11:11:38.285421: step: 44/469, loss: 0.21661318838596344 2023-01-22 11:11:38.923216: step: 46/469, loss: 0.44279196858406067 2023-01-22 11:11:39.594301: step: 48/469, loss: 0.2080720067024231 2023-01-22 11:11:40.283767: step: 50/469, loss: 0.1683308333158493 2023-01-22 11:11:40.903056: step: 52/469, loss: 0.20721568167209625 2023-01-22 11:11:41.544197: step: 54/469, loss: 0.1343807578086853 2023-01-22 11:11:42.139398: step: 56/469, loss: 0.19176188111305237 2023-01-22 11:11:42.757953: step: 58/469, loss: 0.14346924424171448 2023-01-22 11:11:43.445030: step: 60/469, loss: 0.14417722821235657 2023-01-22 11:11:44.070349: step: 62/469, loss: 0.28534001111984253 2023-01-22 11:11:44.671454: step: 64/469, loss: 0.20595861971378326 2023-01-22 11:11:45.379155: step: 66/469, loss: 0.39788079261779785 2023-01-22 11:11:46.087757: step: 68/469, loss: 0.42548391222953796 2023-01-22 11:11:46.688676: step: 70/469, loss: 0.1159709244966507 2023-01-22 11:11:47.321284: step: 72/469, loss: 0.18989577889442444 2023-01-22 11:11:48.044243: step: 74/469, loss: 0.5328851342201233 2023-01-22 11:11:48.702256: step: 76/469, loss: 0.19202660024166107 2023-01-22 11:11:49.314362: step: 78/469, loss: 0.11115836352109909 2023-01-22 11:11:49.970653: step: 80/469, loss: 0.20907065272331238 2023-01-22 11:11:50.707873: step: 82/469, loss: 0.5931469202041626 2023-01-22 11:11:51.405446: step: 84/469, loss: 0.4220855236053467 2023-01-22 11:11:52.087861: step: 86/469, loss: 0.5431109070777893 2023-01-22 11:11:52.798779: step: 88/469, loss: 0.40732496976852417 2023-01-22 11:11:53.415623: step: 90/469, loss: 0.20699037611484528 2023-01-22 11:11:54.059079: step: 92/469, loss: 0.47190195322036743 2023-01-22 11:11:54.706127: step: 94/469, loss: 0.7683658003807068 2023-01-22 11:11:55.338261: step: 96/469, loss: 0.22656986117362976 2023-01-22 11:11:55.946438: step: 98/469, loss: 0.2687866687774658 2023-01-22 11:11:56.575838: step: 100/469, loss: 0.17973271012306213 2023-01-22 11:11:57.210685: step: 102/469, loss: 0.44705235958099365 2023-01-22 11:11:57.789869: step: 104/469, loss: 0.40831154584884644 2023-01-22 11:11:58.410003: step: 106/469, loss: 0.16774891316890717 2023-01-22 11:11:59.032289: step: 108/469, loss: 0.1206790879368782 2023-01-22 11:11:59.649772: step: 110/469, loss: 0.22693736851215363 2023-01-22 11:12:00.281001: step: 112/469, loss: 0.37868762016296387 2023-01-22 11:12:00.894715: step: 114/469, loss: 2.0113837718963623 2023-01-22 11:12:01.556411: step: 116/469, loss: 1.5968806743621826 2023-01-22 11:12:02.213877: step: 118/469, loss: 0.6039673686027527 2023-01-22 11:12:02.870517: step: 120/469, loss: 0.877234160900116 2023-01-22 11:12:03.562216: step: 122/469, loss: 0.8293070793151855 2023-01-22 11:12:04.229277: step: 124/469, loss: 0.24055759608745575 2023-01-22 11:12:04.929695: step: 126/469, loss: 0.5461827516555786 2023-01-22 11:12:05.655859: step: 128/469, loss: 0.5503721237182617 2023-01-22 11:12:06.307087: step: 130/469, loss: 0.513506293296814 2023-01-22 11:12:06.992823: step: 132/469, loss: 1.2275398969650269 2023-01-22 11:12:07.663033: step: 134/469, loss: 0.826102614402771 2023-01-22 11:12:08.343423: step: 136/469, loss: 0.38516363501548767 2023-01-22 11:12:09.018163: step: 138/469, loss: 0.4331507384777069 2023-01-22 11:12:09.678493: step: 140/469, loss: 0.17020680010318756 2023-01-22 11:12:10.332288: step: 142/469, loss: 0.6887328624725342 2023-01-22 11:12:10.978623: step: 144/469, loss: 0.13362735509872437 2023-01-22 11:12:11.627142: step: 146/469, loss: 0.49540039896965027 2023-01-22 11:12:12.348732: step: 148/469, loss: 0.26626360416412354 2023-01-22 11:12:12.962677: step: 150/469, loss: 0.36373767256736755 2023-01-22 11:12:13.652177: step: 152/469, loss: 0.40150022506713867 2023-01-22 11:12:14.301069: step: 154/469, loss: 0.3933314085006714 2023-01-22 11:12:14.876576: step: 156/469, loss: 0.2946799397468567 2023-01-22 11:12:15.538362: step: 158/469, loss: 1.396859049797058 2023-01-22 11:12:16.157598: step: 160/469, loss: 0.12313838303089142 2023-01-22 11:12:16.754739: step: 162/469, loss: 0.5091023445129395 2023-01-22 11:12:17.400393: step: 164/469, loss: 0.4060091972351074 2023-01-22 11:12:18.043529: step: 166/469, loss: 0.5097238421440125 2023-01-22 11:12:18.726715: step: 168/469, loss: 0.3354381322860718 2023-01-22 11:12:19.362811: step: 170/469, loss: 0.13180392980575562 2023-01-22 11:12:20.009924: step: 172/469, loss: 0.22569696605205536 2023-01-22 11:12:20.672990: step: 174/469, loss: 0.043637704104185104 2023-01-22 11:12:21.290120: step: 176/469, loss: 1.1725974082946777 2023-01-22 11:12:21.921116: step: 178/469, loss: 0.29839888215065 2023-01-22 11:12:22.492953: step: 180/469, loss: 0.4605640470981598 2023-01-22 11:12:23.149152: step: 182/469, loss: 0.37565863132476807 2023-01-22 11:12:23.812294: step: 184/469, loss: 0.27172836661338806 2023-01-22 11:12:24.447693: step: 186/469, loss: 0.5739880204200745 2023-01-22 11:12:25.057543: step: 188/469, loss: 0.3598175644874573 2023-01-22 11:12:25.726020: step: 190/469, loss: 0.19782230257987976 2023-01-22 11:12:26.427010: step: 192/469, loss: 0.5162298679351807 2023-01-22 11:12:27.058126: step: 194/469, loss: 0.24618904292583466 2023-01-22 11:12:27.663303: step: 196/469, loss: 0.2614660859107971 2023-01-22 11:12:28.253405: step: 198/469, loss: 0.5913073420524597 2023-01-22 11:12:28.907438: step: 200/469, loss: 0.20946751534938812 2023-01-22 11:12:29.547748: step: 202/469, loss: 0.7523478865623474 2023-01-22 11:12:30.270175: step: 204/469, loss: 0.2856263220310211 2023-01-22 11:12:30.871269: step: 206/469, loss: 0.38644376397132874 2023-01-22 11:12:31.490097: step: 208/469, loss: 0.0799332931637764 2023-01-22 11:12:32.178871: step: 210/469, loss: 0.14310799539089203 2023-01-22 11:12:32.775477: step: 212/469, loss: 0.17182767391204834 2023-01-22 11:12:33.424883: step: 214/469, loss: 0.8383781909942627 2023-01-22 11:12:34.114744: step: 216/469, loss: 0.68065345287323 2023-01-22 11:12:34.790193: step: 218/469, loss: 0.48425042629241943 2023-01-22 11:12:35.461939: step: 220/469, loss: 0.29772573709487915 2023-01-22 11:12:36.129342: step: 222/469, loss: 0.1649387776851654 2023-01-22 11:12:36.720404: step: 224/469, loss: 0.5369493365287781 2023-01-22 11:12:37.394533: step: 226/469, loss: 0.45795154571533203 2023-01-22 11:12:37.996079: step: 228/469, loss: 0.15101347863674164 2023-01-22 11:12:38.629234: step: 230/469, loss: 0.22972552478313446 2023-01-22 11:12:39.269795: step: 232/469, loss: 0.269235759973526 2023-01-22 11:12:39.889905: step: 234/469, loss: 0.6715574860572815 2023-01-22 11:12:40.591767: step: 236/469, loss: 0.6828577518463135 2023-01-22 11:12:41.234542: step: 238/469, loss: 0.07857867330312729 2023-01-22 11:12:41.909659: step: 240/469, loss: 0.16601179540157318 2023-01-22 11:12:42.570732: step: 242/469, loss: 0.29657378792762756 2023-01-22 11:12:43.236179: step: 244/469, loss: 0.1468263566493988 2023-01-22 11:12:43.875847: step: 246/469, loss: 0.3764883875846863 2023-01-22 11:12:44.502724: step: 248/469, loss: 0.2938169538974762 2023-01-22 11:12:45.159927: step: 250/469, loss: 0.135310560464859 2023-01-22 11:12:45.864159: step: 252/469, loss: 0.670644998550415 2023-01-22 11:12:46.451668: step: 254/469, loss: 0.2843133807182312 2023-01-22 11:12:47.076695: step: 256/469, loss: 0.5504776835441589 2023-01-22 11:12:47.744333: step: 258/469, loss: 2.205552577972412 2023-01-22 11:12:48.412935: step: 260/469, loss: 0.5212454795837402 2023-01-22 11:12:49.023370: step: 262/469, loss: 0.16184431314468384 2023-01-22 11:12:49.626430: step: 264/469, loss: 0.9438133835792542 2023-01-22 11:12:50.260911: step: 266/469, loss: 0.5826058387756348 2023-01-22 11:12:50.971215: step: 268/469, loss: 0.2877679467201233 2023-01-22 11:12:51.738320: step: 270/469, loss: 0.21185234189033508 2023-01-22 11:12:52.361894: step: 272/469, loss: 0.31699898838996887 2023-01-22 11:12:53.031867: step: 274/469, loss: 0.25983649492263794 2023-01-22 11:12:53.679177: step: 276/469, loss: 0.8715678453445435 2023-01-22 11:12:54.449817: step: 278/469, loss: 0.3698544502258301 2023-01-22 11:12:55.067354: step: 280/469, loss: 0.2095068395137787 2023-01-22 11:12:55.713073: step: 282/469, loss: 0.3880450427532196 2023-01-22 11:12:56.394860: step: 284/469, loss: 0.19999688863754272 2023-01-22 11:12:57.047891: step: 286/469, loss: 0.43148836493492126 2023-01-22 11:12:57.695874: step: 288/469, loss: 0.15177132189273834 2023-01-22 11:12:58.409621: step: 290/469, loss: 0.3301406502723694 2023-01-22 11:12:59.035041: step: 292/469, loss: 0.460880845785141 2023-01-22 11:12:59.776804: step: 294/469, loss: 0.17909863591194153 2023-01-22 11:13:00.460303: step: 296/469, loss: 0.33465319871902466 2023-01-22 11:13:01.111097: step: 298/469, loss: 0.11174724251031876 2023-01-22 11:13:01.813446: step: 300/469, loss: 0.1678203046321869 2023-01-22 11:13:02.485019: step: 302/469, loss: 0.22508102655410767 2023-01-22 11:13:03.138456: step: 304/469, loss: 0.1979251652956009 2023-01-22 11:13:03.751371: step: 306/469, loss: 0.2560000419616699 2023-01-22 11:13:04.389649: step: 308/469, loss: 0.28345680236816406 2023-01-22 11:13:05.066512: step: 310/469, loss: 0.9701858162879944 2023-01-22 11:13:05.696178: step: 312/469, loss: 0.8830705881118774 2023-01-22 11:13:06.444565: step: 314/469, loss: 0.6692978143692017 2023-01-22 11:13:07.100419: step: 316/469, loss: 0.2865965962409973 2023-01-22 11:13:07.704453: step: 318/469, loss: 0.33784371614456177 2023-01-22 11:13:08.450132: step: 320/469, loss: 0.22370503842830658 2023-01-22 11:13:09.051538: step: 322/469, loss: 0.4606347382068634 2023-01-22 11:13:09.713414: step: 324/469, loss: 0.23428668081760406 2023-01-22 11:13:10.371051: step: 326/469, loss: 0.09276342391967773 2023-01-22 11:13:11.072911: step: 328/469, loss: 0.5830388069152832 2023-01-22 11:13:11.792682: step: 330/469, loss: 1.0051289796829224 2023-01-22 11:13:12.530231: step: 332/469, loss: 0.30512693524360657 2023-01-22 11:13:13.245283: step: 334/469, loss: 0.2026054561138153 2023-01-22 11:13:13.929103: step: 336/469, loss: 0.18126386404037476 2023-01-22 11:13:14.637062: step: 338/469, loss: 0.23084834218025208 2023-01-22 11:13:15.245851: step: 340/469, loss: 0.13632763922214508 2023-01-22 11:13:15.894992: step: 342/469, loss: 0.39497995376586914 2023-01-22 11:13:16.547163: step: 344/469, loss: 0.16630469262599945 2023-01-22 11:13:17.235005: step: 346/469, loss: 0.2959747910499573 2023-01-22 11:13:17.894522: step: 348/469, loss: 0.10582728683948517 2023-01-22 11:13:18.540705: step: 350/469, loss: 0.7902545928955078 2023-01-22 11:13:19.208232: step: 352/469, loss: 0.785956084728241 2023-01-22 11:13:19.931906: step: 354/469, loss: 0.438224732875824 2023-01-22 11:13:20.623678: step: 356/469, loss: 0.18054988980293274 2023-01-22 11:13:21.222771: step: 358/469, loss: 0.19129571318626404 2023-01-22 11:13:21.864355: step: 360/469, loss: 0.21083766222000122 2023-01-22 11:13:22.527511: step: 362/469, loss: 0.6799478530883789 2023-01-22 11:13:23.047062: step: 364/469, loss: 0.32006406784057617 2023-01-22 11:13:23.727028: step: 366/469, loss: 5.038124084472656 2023-01-22 11:13:24.347575: step: 368/469, loss: 0.5870373249053955 2023-01-22 11:13:24.998358: step: 370/469, loss: 0.3491811454296112 2023-01-22 11:13:25.659248: step: 372/469, loss: 0.49556946754455566 2023-01-22 11:13:26.321477: step: 374/469, loss: 0.5719321966171265 2023-01-22 11:13:27.003719: step: 376/469, loss: 0.25649404525756836 2023-01-22 11:13:27.694322: step: 378/469, loss: 0.46808499097824097 2023-01-22 11:13:28.343417: step: 380/469, loss: 0.8488327264785767 2023-01-22 11:13:28.973906: step: 382/469, loss: 0.42529749870300293 2023-01-22 11:13:29.604094: step: 384/469, loss: 0.1451246738433838 2023-01-22 11:13:30.241872: step: 386/469, loss: 0.4919374883174896 2023-01-22 11:13:30.937817: step: 388/469, loss: 0.13510015606880188 2023-01-22 11:13:31.590586: step: 390/469, loss: 0.21749433875083923 2023-01-22 11:13:32.209254: step: 392/469, loss: 0.13819032907485962 2023-01-22 11:13:32.862923: step: 394/469, loss: 0.2085658311843872 2023-01-22 11:13:33.519146: step: 396/469, loss: 0.5387779474258423 2023-01-22 11:13:34.166633: step: 398/469, loss: 0.574633777141571 2023-01-22 11:13:34.786711: step: 400/469, loss: 0.2833281457424164 2023-01-22 11:13:35.424742: step: 402/469, loss: 0.23054522275924683 2023-01-22 11:13:36.012102: step: 404/469, loss: 0.22763407230377197 2023-01-22 11:13:36.691661: step: 406/469, loss: 0.5856924057006836 2023-01-22 11:13:37.261635: step: 408/469, loss: 0.1854855865240097 2023-01-22 11:13:37.891948: step: 410/469, loss: 1.0070726871490479 2023-01-22 11:13:38.508230: step: 412/469, loss: 0.27506905794143677 2023-01-22 11:13:39.149377: step: 414/469, loss: 0.23910748958587646 2023-01-22 11:13:39.832544: step: 416/469, loss: 0.5609286427497864 2023-01-22 11:13:40.493481: step: 418/469, loss: 0.1637156754732132 2023-01-22 11:13:41.065692: step: 420/469, loss: 0.22534653544425964 2023-01-22 11:13:41.714813: step: 422/469, loss: 1.2738531827926636 2023-01-22 11:13:42.340586: step: 424/469, loss: 0.5713269114494324 2023-01-22 11:13:42.975030: step: 426/469, loss: 0.4196932017803192 2023-01-22 11:13:43.581786: step: 428/469, loss: 0.19753192365169525 2023-01-22 11:13:44.282156: step: 430/469, loss: 0.2704198360443115 2023-01-22 11:13:44.963962: step: 432/469, loss: 0.2068815976381302 2023-01-22 11:13:45.624218: step: 434/469, loss: 0.7152396440505981 2023-01-22 11:13:46.286218: step: 436/469, loss: 0.760033905506134 2023-01-22 11:13:46.970230: step: 438/469, loss: 0.8266289830207825 2023-01-22 11:13:47.578779: step: 440/469, loss: 0.2040933072566986 2023-01-22 11:13:48.287204: step: 442/469, loss: 0.19095304608345032 2023-01-22 11:13:48.892994: step: 444/469, loss: 0.25177013874053955 2023-01-22 11:13:49.611793: step: 446/469, loss: 0.7534641623497009 2023-01-22 11:13:50.235863: step: 448/469, loss: 0.19340673089027405 2023-01-22 11:13:50.893463: step: 450/469, loss: 0.1908774971961975 2023-01-22 11:13:51.610316: step: 452/469, loss: 0.2998519241809845 2023-01-22 11:13:52.277350: step: 454/469, loss: 0.34291404485702515 2023-01-22 11:13:52.926605: step: 456/469, loss: 0.18961063027381897 2023-01-22 11:13:53.583017: step: 458/469, loss: 0.2002243995666504 2023-01-22 11:13:54.213808: step: 460/469, loss: 0.30675777792930603 2023-01-22 11:13:54.822254: step: 462/469, loss: 0.1929704248905182 2023-01-22 11:13:55.451743: step: 464/469, loss: 0.3591207265853882 2023-01-22 11:13:56.105322: step: 466/469, loss: 0.13202813267707825 2023-01-22 11:13:56.758723: step: 468/469, loss: 0.7541679739952087 2023-01-22 11:13:57.407660: step: 470/469, loss: 1.4168643951416016 2023-01-22 11:13:58.058388: step: 472/469, loss: 0.5182743072509766 2023-01-22 11:13:58.715892: step: 474/469, loss: 0.7530785202980042 2023-01-22 11:13:59.282086: step: 476/469, loss: 0.5371955037117004 2023-01-22 11:13:59.914187: step: 478/469, loss: 0.156224325299263 2023-01-22 11:14:00.555001: step: 480/469, loss: 1.1019047498703003 2023-01-22 11:14:01.142633: step: 482/469, loss: 0.15114130079746246 2023-01-22 11:14:01.876884: step: 484/469, loss: 0.34647971391677856 2023-01-22 11:14:02.570843: step: 486/469, loss: 0.6130858659744263 2023-01-22 11:14:03.168069: step: 488/469, loss: 0.17797544598579407 2023-01-22 11:14:03.797654: step: 490/469, loss: 0.40765994787216187 2023-01-22 11:14:04.559669: step: 492/469, loss: 0.3031141459941864 2023-01-22 11:14:05.244628: step: 494/469, loss: 0.1624971181154251 2023-01-22 11:14:05.951303: step: 496/469, loss: 0.20007124543190002 2023-01-22 11:14:06.607107: step: 498/469, loss: 0.9323029518127441 2023-01-22 11:14:07.303519: step: 500/469, loss: 0.3443988263607025 2023-01-22 11:14:07.917592: step: 502/469, loss: 0.11479859799146652 2023-01-22 11:14:08.587512: step: 504/469, loss: 0.9197838306427002 2023-01-22 11:14:09.288808: step: 506/469, loss: 1.08487069606781 2023-01-22 11:14:09.932144: step: 508/469, loss: 0.4270364046096802 2023-01-22 11:14:10.565615: step: 510/469, loss: 0.2636466920375824 2023-01-22 11:14:11.191331: step: 512/469, loss: 0.6892208456993103 2023-01-22 11:14:11.927672: step: 514/469, loss: 0.10705716162919998 2023-01-22 11:14:12.721032: step: 516/469, loss: 1.1481060981750488 2023-01-22 11:14:13.332362: step: 518/469, loss: 0.34143489599227905 2023-01-22 11:14:13.939357: step: 520/469, loss: 0.6894757151603699 2023-01-22 11:14:14.624602: step: 522/469, loss: 0.5917198657989502 2023-01-22 11:14:15.312455: step: 524/469, loss: 0.2364565134048462 2023-01-22 11:14:15.932963: step: 526/469, loss: 0.11821852624416351 2023-01-22 11:14:16.697033: step: 528/469, loss: 0.9814774394035339 2023-01-22 11:14:17.377360: step: 530/469, loss: 0.4304909110069275 2023-01-22 11:14:18.047041: step: 532/469, loss: 0.1012810468673706 2023-01-22 11:14:18.702026: step: 534/469, loss: 0.4052692949771881 2023-01-22 11:14:19.363391: step: 536/469, loss: 0.24921931326389313 2023-01-22 11:14:20.104081: step: 538/469, loss: 0.13539421558380127 2023-01-22 11:14:20.728246: step: 540/469, loss: 0.3716905415058136 2023-01-22 11:14:21.340043: step: 542/469, loss: 0.32280153036117554 2023-01-22 11:14:22.012602: step: 544/469, loss: 0.06774081289768219 2023-01-22 11:14:22.667167: step: 546/469, loss: 0.18182620406150818 2023-01-22 11:14:23.405040: step: 548/469, loss: 0.26528051495552063 2023-01-22 11:14:24.086634: step: 550/469, loss: 0.2427050769329071 2023-01-22 11:14:24.727557: step: 552/469, loss: 0.2890981137752533 2023-01-22 11:14:25.391213: step: 554/469, loss: 1.08097243309021 2023-01-22 11:14:26.082567: step: 556/469, loss: 0.14689016342163086 2023-01-22 11:14:26.784930: step: 558/469, loss: 0.1632222980260849 2023-01-22 11:14:27.419923: step: 560/469, loss: 0.25574785470962524 2023-01-22 11:14:28.062247: step: 562/469, loss: 0.5995178818702698 2023-01-22 11:14:28.634749: step: 564/469, loss: 0.1721377968788147 2023-01-22 11:14:29.323689: step: 566/469, loss: 1.2596428394317627 2023-01-22 11:14:29.974077: step: 568/469, loss: 0.2343950867652893 2023-01-22 11:14:30.641968: step: 570/469, loss: 0.24954719841480255 2023-01-22 11:14:31.297073: step: 572/469, loss: 0.5721375942230225 2023-01-22 11:14:31.922447: step: 574/469, loss: 0.5895306468009949 2023-01-22 11:14:32.564499: step: 576/469, loss: 0.1783619225025177 2023-01-22 11:14:33.168826: step: 578/469, loss: 0.2190074622631073 2023-01-22 11:14:33.841423: step: 580/469, loss: 0.1833002269268036 2023-01-22 11:14:34.484669: step: 582/469, loss: 0.27503082156181335 2023-01-22 11:14:35.146360: step: 584/469, loss: 0.3686704635620117 2023-01-22 11:14:35.849913: step: 586/469, loss: 0.2426997572183609 2023-01-22 11:14:36.510843: step: 588/469, loss: 1.4560201168060303 2023-01-22 11:14:37.153678: step: 590/469, loss: 0.41090160608291626 2023-01-22 11:14:37.849536: step: 592/469, loss: 0.7190005779266357 2023-01-22 11:14:38.500372: step: 594/469, loss: 0.2541142404079437 2023-01-22 11:14:39.137720: step: 596/469, loss: 1.2114495038986206 2023-01-22 11:14:39.818988: step: 598/469, loss: 0.43150845170021057 2023-01-22 11:14:40.501059: step: 600/469, loss: 0.16728082299232483 2023-01-22 11:14:41.241903: step: 602/469, loss: 0.32608771324157715 2023-01-22 11:14:41.954632: step: 604/469, loss: 0.8031672835350037 2023-01-22 11:14:42.546193: step: 606/469, loss: 0.46160343289375305 2023-01-22 11:14:43.206828: step: 608/469, loss: 0.5382565259933472 2023-01-22 11:14:43.867085: step: 610/469, loss: 0.5774716138839722 2023-01-22 11:14:44.500153: step: 612/469, loss: 0.6985758543014526 2023-01-22 11:14:45.089460: step: 614/469, loss: 0.5699069499969482 2023-01-22 11:14:45.670082: step: 616/469, loss: 0.39563262462615967 2023-01-22 11:14:46.300511: step: 618/469, loss: 0.31927981972694397 2023-01-22 11:14:46.960380: step: 620/469, loss: 0.16570867598056793 2023-01-22 11:14:47.604640: step: 622/469, loss: 0.19120311737060547 2023-01-22 11:14:48.291928: step: 624/469, loss: 1.4697961807250977 2023-01-22 11:14:48.965689: step: 626/469, loss: 0.5406908392906189 2023-01-22 11:14:49.661054: step: 628/469, loss: 0.34849169850349426 2023-01-22 11:14:50.333887: step: 630/469, loss: 0.09745623916387558 2023-01-22 11:14:50.949369: step: 632/469, loss: 0.20540249347686768 2023-01-22 11:14:51.570718: step: 634/469, loss: 0.4506654143333435 2023-01-22 11:14:52.211815: step: 636/469, loss: 0.47304269671440125 2023-01-22 11:14:52.863268: step: 638/469, loss: 0.28370389342308044 2023-01-22 11:14:53.508972: step: 640/469, loss: 0.10397260636091232 2023-01-22 11:14:54.188391: step: 642/469, loss: 0.16313014924526215 2023-01-22 11:14:54.972312: step: 644/469, loss: 0.614755392074585 2023-01-22 11:14:55.632052: step: 646/469, loss: 0.19631730020046234 2023-01-22 11:14:56.246600: step: 648/469, loss: 1.504908800125122 2023-01-22 11:14:56.890857: step: 650/469, loss: 0.2620968222618103 2023-01-22 11:14:57.589395: step: 652/469, loss: 0.20982445776462555 2023-01-22 11:14:58.218480: step: 654/469, loss: 0.35562485456466675 2023-01-22 11:14:58.874526: step: 656/469, loss: 0.1485298126935959 2023-01-22 11:14:59.522405: step: 658/469, loss: 0.16371245682239532 2023-01-22 11:15:00.160125: step: 660/469, loss: 0.4689534306526184 2023-01-22 11:15:00.857180: step: 662/469, loss: 0.6594911813735962 2023-01-22 11:15:01.516985: step: 664/469, loss: 0.10823681950569153 2023-01-22 11:15:02.246685: step: 666/469, loss: 0.30061373114585876 2023-01-22 11:15:02.927082: step: 668/469, loss: 0.8325690627098083 2023-01-22 11:15:03.615442: step: 670/469, loss: 0.4599584937095642 2023-01-22 11:15:04.190996: step: 672/469, loss: 0.5109724998474121 2023-01-22 11:15:04.833977: step: 674/469, loss: 0.643949568271637 2023-01-22 11:15:05.558103: step: 676/469, loss: 0.19060009717941284 2023-01-22 11:15:06.227268: step: 678/469, loss: 0.1913621574640274 2023-01-22 11:15:06.963088: step: 680/469, loss: 0.22661826014518738 2023-01-22 11:15:07.622400: step: 682/469, loss: 0.5973053574562073 2023-01-22 11:15:08.319680: step: 684/469, loss: 0.253712922334671 2023-01-22 11:15:09.000475: step: 686/469, loss: 0.47143083810806274 2023-01-22 11:15:09.622618: step: 688/469, loss: 0.3521772623062134 2023-01-22 11:15:10.187947: step: 690/469, loss: 0.0996263399720192 2023-01-22 11:15:10.826868: step: 692/469, loss: 0.1923132985830307 2023-01-22 11:15:11.470421: step: 694/469, loss: 0.3770560026168823 2023-01-22 11:15:12.242768: step: 696/469, loss: 0.24515292048454285 2023-01-22 11:15:12.937092: step: 698/469, loss: 0.17921769618988037 2023-01-22 11:15:13.604549: step: 700/469, loss: 0.25507134199142456 2023-01-22 11:15:14.185983: step: 702/469, loss: 0.3950207829475403 2023-01-22 11:15:14.871561: step: 704/469, loss: 0.16794122755527496 2023-01-22 11:15:15.582708: step: 706/469, loss: 0.26940253376960754 2023-01-22 11:15:16.314913: step: 708/469, loss: 0.8904720544815063 2023-01-22 11:15:16.967789: step: 710/469, loss: 0.2583911716938019 2023-01-22 11:15:17.636826: step: 712/469, loss: 0.4519367218017578 2023-01-22 11:15:18.363678: step: 714/469, loss: 0.4631194770336151 2023-01-22 11:15:19.174893: step: 716/469, loss: 0.484460711479187 2023-01-22 11:15:19.809831: step: 718/469, loss: 0.2521439492702484 2023-01-22 11:15:20.563469: step: 720/469, loss: 0.2882177531719208 2023-01-22 11:15:21.249573: step: 722/469, loss: 0.35357216000556946 2023-01-22 11:15:21.901900: step: 724/469, loss: 0.3042793869972229 2023-01-22 11:15:22.505505: step: 726/469, loss: 0.1204552948474884 2023-01-22 11:15:23.189582: step: 728/469, loss: 0.532158374786377 2023-01-22 11:15:23.826615: step: 730/469, loss: 0.2990210950374603 2023-01-22 11:15:24.449229: step: 732/469, loss: 1.4791412353515625 2023-01-22 11:15:25.129119: step: 734/469, loss: 0.34803155064582825 2023-01-22 11:15:25.837535: step: 736/469, loss: 0.2030235081911087 2023-01-22 11:15:26.556173: step: 738/469, loss: 1.250240445137024 2023-01-22 11:15:27.170891: step: 740/469, loss: 0.24611452221870422 2023-01-22 11:15:27.839218: step: 742/469, loss: 0.226565420627594 2023-01-22 11:15:28.550453: step: 744/469, loss: 0.47330066561698914 2023-01-22 11:15:29.209456: step: 746/469, loss: 0.710346519947052 2023-01-22 11:15:29.857184: step: 748/469, loss: 0.8516194820404053 2023-01-22 11:15:30.530682: step: 750/469, loss: 0.6422107815742493 2023-01-22 11:15:31.248355: step: 752/469, loss: 1.0098201036453247 2023-01-22 11:15:31.942210: step: 754/469, loss: 0.6223190426826477 2023-01-22 11:15:32.558755: step: 756/469, loss: 0.5177649855613708 2023-01-22 11:15:33.215892: step: 758/469, loss: 0.39691728353500366 2023-01-22 11:15:33.814480: step: 760/469, loss: 0.4080984890460968 2023-01-22 11:15:34.536816: step: 762/469, loss: 0.383847177028656 2023-01-22 11:15:35.183566: step: 764/469, loss: 0.2949841022491455 2023-01-22 11:15:35.806493: step: 766/469, loss: 0.33717209100723267 2023-01-22 11:15:36.469856: step: 768/469, loss: 0.27422723174095154 2023-01-22 11:15:37.033724: step: 770/469, loss: 0.32133570313453674 2023-01-22 11:15:37.687609: step: 772/469, loss: 0.3171902000904083 2023-01-22 11:15:38.348451: step: 774/469, loss: 0.2577151358127594 2023-01-22 11:15:38.982228: step: 776/469, loss: 0.09333942085504532 2023-01-22 11:15:39.679336: step: 778/469, loss: 0.3875003457069397 2023-01-22 11:15:40.300108: step: 780/469, loss: 0.2966225743293762 2023-01-22 11:15:40.958385: step: 782/469, loss: 1.2088453769683838 2023-01-22 11:15:41.587014: step: 784/469, loss: 0.23390939831733704 2023-01-22 11:15:42.246175: step: 786/469, loss: 0.27754467725753784 2023-01-22 11:15:42.880740: step: 788/469, loss: 0.40995508432388306 2023-01-22 11:15:43.538442: step: 790/469, loss: 0.3927914798259735 2023-01-22 11:15:44.179880: step: 792/469, loss: 0.4827421307563782 2023-01-22 11:15:44.786403: step: 794/469, loss: 0.10398051887750626 2023-01-22 11:15:45.514144: step: 796/469, loss: 0.2812942862510681 2023-01-22 11:15:46.178695: step: 798/469, loss: 0.23930960893630981 2023-01-22 11:15:46.843868: step: 800/469, loss: 0.6100246906280518 2023-01-22 11:15:47.507573: step: 802/469, loss: 0.29489901661872864 2023-01-22 11:15:48.167869: step: 804/469, loss: 0.5952594876289368 2023-01-22 11:15:48.892936: step: 806/469, loss: 0.6478204727172852 2023-01-22 11:15:49.633206: step: 808/469, loss: 0.24434375762939453 2023-01-22 11:15:50.348387: step: 810/469, loss: 0.45539042353630066 2023-01-22 11:15:51.004628: step: 812/469, loss: 0.3725740909576416 2023-01-22 11:15:51.661604: step: 814/469, loss: 0.7191227674484253 2023-01-22 11:15:52.348815: step: 816/469, loss: 0.11773187667131424 2023-01-22 11:15:52.986079: step: 818/469, loss: 0.7031224966049194 2023-01-22 11:15:53.612974: step: 820/469, loss: 0.14404354989528656 2023-01-22 11:15:54.300141: step: 822/469, loss: 1.1291322708129883 2023-01-22 11:15:54.952508: step: 824/469, loss: 0.14968664944171906 2023-01-22 11:15:55.587715: step: 826/469, loss: 0.17701198160648346 2023-01-22 11:15:56.410782: step: 828/469, loss: 0.2988724708557129 2023-01-22 11:15:57.058736: step: 830/469, loss: 0.19408518075942993 2023-01-22 11:15:57.805230: step: 832/469, loss: 0.9875146150588989 2023-01-22 11:15:58.430476: step: 834/469, loss: 0.6374933123588562 2023-01-22 11:15:59.049753: step: 836/469, loss: 0.510638415813446 2023-01-22 11:15:59.687462: step: 838/469, loss: 0.10488545149564743 2023-01-22 11:16:00.363106: step: 840/469, loss: 0.15842315554618835 2023-01-22 11:16:01.172827: step: 842/469, loss: 0.4394344091415405 2023-01-22 11:16:01.805406: step: 844/469, loss: 0.1641053408384323 2023-01-22 11:16:02.501650: step: 846/469, loss: 0.6463753581047058 2023-01-22 11:16:03.134452: step: 848/469, loss: 0.1366187334060669 2023-01-22 11:16:03.781535: step: 850/469, loss: 2.8138809204101562 2023-01-22 11:16:04.423844: step: 852/469, loss: 1.6097359657287598 2023-01-22 11:16:05.165277: step: 854/469, loss: 0.5740014314651489 2023-01-22 11:16:05.781092: step: 856/469, loss: 0.20026065409183502 2023-01-22 11:16:06.409954: step: 858/469, loss: 0.6817509531974792 2023-01-22 11:16:07.121738: step: 860/469, loss: 0.22292299568653107 2023-01-22 11:16:07.807589: step: 862/469, loss: 0.12401055544614792 2023-01-22 11:16:08.416989: step: 864/469, loss: 0.7487518787384033 2023-01-22 11:16:09.029721: step: 866/469, loss: 0.5284701585769653 2023-01-22 11:16:09.709126: step: 868/469, loss: 0.280505895614624 2023-01-22 11:16:10.367406: step: 870/469, loss: 0.24373166263103485 2023-01-22 11:16:11.091429: step: 872/469, loss: 0.16333827376365662 2023-01-22 11:16:11.778483: step: 874/469, loss: 0.5490494966506958 2023-01-22 11:16:12.480670: step: 876/469, loss: 0.2833230197429657 2023-01-22 11:16:13.185208: step: 878/469, loss: 0.8896983861923218 2023-01-22 11:16:13.836710: step: 880/469, loss: 0.324676513671875 2023-01-22 11:16:14.551317: step: 882/469, loss: 0.42407119274139404 2023-01-22 11:16:15.214284: step: 884/469, loss: 0.29535263776779175 2023-01-22 11:16:15.943267: step: 886/469, loss: 0.4275474548339844 2023-01-22 11:16:16.636255: step: 888/469, loss: 0.5988882184028625 2023-01-22 11:16:17.280268: step: 890/469, loss: 0.7113070487976074 2023-01-22 11:16:17.987738: step: 892/469, loss: 0.6220961809158325 2023-01-22 11:16:18.636656: step: 894/469, loss: 0.6407210230827332 2023-01-22 11:16:19.382660: step: 896/469, loss: 0.25663474202156067 2023-01-22 11:16:19.995240: step: 898/469, loss: 0.7068220376968384 2023-01-22 11:16:20.704578: step: 900/469, loss: 0.266611248254776 2023-01-22 11:16:21.319779: step: 902/469, loss: 0.5101432800292969 2023-01-22 11:16:21.991980: step: 904/469, loss: 0.14897340536117554 2023-01-22 11:16:22.611440: step: 906/469, loss: 0.3603914678096771 2023-01-22 11:16:23.285440: step: 908/469, loss: 0.5295233726501465 2023-01-22 11:16:24.003136: step: 910/469, loss: 0.17674796283245087 2023-01-22 11:16:24.654564: step: 912/469, loss: 0.3394225835800171 2023-01-22 11:16:25.321321: step: 914/469, loss: 0.4201357960700989 2023-01-22 11:16:25.988150: step: 916/469, loss: 0.5441609025001526 2023-01-22 11:16:26.610269: step: 918/469, loss: 0.2413608580827713 2023-01-22 11:16:27.283137: step: 920/469, loss: 2.331967353820801 2023-01-22 11:16:28.017946: step: 922/469, loss: 0.4835992753505707 2023-01-22 11:16:28.666290: step: 924/469, loss: 0.47907790541648865 2023-01-22 11:16:29.358525: step: 926/469, loss: 1.3020224571228027 2023-01-22 11:16:29.986273: step: 928/469, loss: 0.3140222132205963 2023-01-22 11:16:30.657033: step: 930/469, loss: 0.8970685005187988 2023-01-22 11:16:31.399002: step: 932/469, loss: 0.29329001903533936 2023-01-22 11:16:32.031329: step: 934/469, loss: 0.35678309202194214 2023-01-22 11:16:32.655195: step: 936/469, loss: 0.5961318612098694 2023-01-22 11:16:33.333665: step: 938/469, loss: 0.22860561311244965 ================================================== Loss: 0.447 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3004242370929563, 'r': 0.34716956430666107, 'f1': 0.32210978941832813}, 'combined': 0.23734405536087336, 'epoch': 8} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2958460629551878, 'r': 0.2536210073092507, 'f1': 0.2731110945704542}, 'combined': 0.14896968794752044, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28932143369249047, 'r': 0.34806411567559575, 'f1': 0.31598585523004125}, 'combined': 0.23283168280108302, 'epoch': 8} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2915539886359941, 'r': 0.2547429635383114, 'f1': 0.27190826088610776}, 'combined': 0.14831359684696785, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2899250167035537, 'r': 0.3421885396387294, 'f1': 0.3138961886677292}, 'combined': 0.231291928492011, 'epoch': 8} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2878462629053366, 'r': 0.254400265294195, 'f1': 0.2700917823861633}, 'combined': 0.14732279039245272, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27777777777777773, 'r': 0.38095238095238093, 'f1': 0.321285140562249}, 'combined': 0.214190093708166, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3055555555555556, 'r': 0.358695652173913, 'f1': 0.32999999999999996}, 'combined': 0.16499999999999998, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.23275862068965517, 'f1': 0.2872340425531915}, 'combined': 0.19148936170212766, 'epoch': 8} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28932143369249047, 'r': 0.34806411567559575, 'f1': 0.31598585523004125}, 'combined': 0.23283168280108302, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2915539886359941, 'r': 0.2547429635383114, 'f1': 0.27190826088610776}, 'combined': 0.14831359684696785, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3055555555555556, 'r': 0.358695652173913, 'f1': 0.32999999999999996}, 'combined': 0.16499999999999998, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3123600447856686, 'r': 0.30880376344086025, 'f1': 0.31057172391857507}, 'combined': 0.22884232288737108, 'epoch': 5} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.29928638434735994, 'r': 0.23575990569357447, 'f1': 0.26375186993150146}, 'combined': 0.14386465632627352, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 5} ****************************** Epoch: 9 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:19:42.899799: step: 2/469, loss: 0.39117011427879333 2023-01-22 11:19:43.525285: step: 4/469, loss: 0.0922774225473404 2023-01-22 11:19:44.205507: step: 6/469, loss: 0.2141404002904892 2023-01-22 11:19:44.880996: step: 8/469, loss: 0.13863630592823029 2023-01-22 11:19:45.532825: step: 10/469, loss: 0.29343122243881226 2023-01-22 11:19:46.266521: step: 12/469, loss: 0.23387955129146576 2023-01-22 11:19:46.884075: step: 14/469, loss: 0.6630819439888 2023-01-22 11:19:47.555928: step: 16/469, loss: 0.20282596349716187 2023-01-22 11:19:48.259682: step: 18/469, loss: 0.07033102214336395 2023-01-22 11:19:48.928023: step: 20/469, loss: 0.9723142981529236 2023-01-22 11:19:49.691380: step: 22/469, loss: 0.13188546895980835 2023-01-22 11:19:50.338072: step: 24/469, loss: 0.17766205966472626 2023-01-22 11:19:50.965575: step: 26/469, loss: 0.1398642510175705 2023-01-22 11:19:51.623892: step: 28/469, loss: 0.19310599565505981 2023-01-22 11:19:52.283239: step: 30/469, loss: 0.22215953469276428 2023-01-22 11:19:52.911777: step: 32/469, loss: 0.4158446192741394 2023-01-22 11:19:53.721616: step: 34/469, loss: 0.26109573245048523 2023-01-22 11:19:54.389417: step: 36/469, loss: 0.6764843463897705 2023-01-22 11:19:55.098844: step: 38/469, loss: 0.18109768629074097 2023-01-22 11:19:55.700475: step: 40/469, loss: 0.4471879303455353 2023-01-22 11:19:56.338871: step: 42/469, loss: 0.025926098227500916 2023-01-22 11:19:57.006967: step: 44/469, loss: 0.1759929656982422 2023-01-22 11:19:57.655796: step: 46/469, loss: 0.9183593988418579 2023-01-22 11:19:58.324519: step: 48/469, loss: 0.23044739663600922 2023-01-22 11:19:58.992714: step: 50/469, loss: 0.08991853892803192 2023-01-22 11:19:59.650746: step: 52/469, loss: 1.2173129320144653 2023-01-22 11:20:00.253060: step: 54/469, loss: 0.20458266139030457 2023-01-22 11:20:00.928518: step: 56/469, loss: 0.07910409569740295 2023-01-22 11:20:01.563548: step: 58/469, loss: 1.1281652450561523 2023-01-22 11:20:02.309022: step: 60/469, loss: 0.16504903137683868 2023-01-22 11:20:02.987156: step: 62/469, loss: 0.4823247492313385 2023-01-22 11:20:03.608825: step: 64/469, loss: 0.3919984698295593 2023-01-22 11:20:04.230730: step: 66/469, loss: 0.08961279690265656 2023-01-22 11:20:04.899734: step: 68/469, loss: 0.44954511523246765 2023-01-22 11:20:05.535080: step: 70/469, loss: 0.11863629519939423 2023-01-22 11:20:06.214249: step: 72/469, loss: 0.47804224491119385 2023-01-22 11:20:06.904113: step: 74/469, loss: 0.23678982257843018 2023-01-22 11:20:07.536954: step: 76/469, loss: 0.21039217710494995 2023-01-22 11:20:08.155817: step: 78/469, loss: 0.13153764605522156 2023-01-22 11:20:08.808178: step: 80/469, loss: 0.31582456827163696 2023-01-22 11:20:09.485077: step: 82/469, loss: 0.09279027581214905 2023-01-22 11:20:10.227761: step: 84/469, loss: 1.699034333229065 2023-01-22 11:20:10.877762: step: 86/469, loss: 0.10394525527954102 2023-01-22 11:20:11.559014: step: 88/469, loss: 0.3462316691875458 2023-01-22 11:20:12.211428: step: 90/469, loss: 0.22357968986034393 2023-01-22 11:20:12.893598: step: 92/469, loss: 0.14341087639331818 2023-01-22 11:20:13.606424: step: 94/469, loss: 0.21398556232452393 2023-01-22 11:20:14.234878: step: 96/469, loss: 0.5410624146461487 2023-01-22 11:20:14.932424: step: 98/469, loss: 0.5251842141151428 2023-01-22 11:20:15.591246: step: 100/469, loss: 0.8311374187469482 2023-01-22 11:20:16.234754: step: 102/469, loss: 0.15862813591957092 2023-01-22 11:20:16.922149: step: 104/469, loss: 0.18727891147136688 2023-01-22 11:20:17.583407: step: 106/469, loss: 0.4002154767513275 2023-01-22 11:20:18.250169: step: 108/469, loss: 0.37686851620674133 2023-01-22 11:20:18.914498: step: 110/469, loss: 0.3620848059654236 2023-01-22 11:20:19.559669: step: 112/469, loss: 0.3019811511039734 2023-01-22 11:20:20.210236: step: 114/469, loss: 0.28944191336631775 2023-01-22 11:20:20.962877: step: 116/469, loss: 0.19414857029914856 2023-01-22 11:20:21.605267: step: 118/469, loss: 0.4151228368282318 2023-01-22 11:20:22.258424: step: 120/469, loss: 0.11766630411148071 2023-01-22 11:20:22.908695: step: 122/469, loss: 0.0966775119304657 2023-01-22 11:20:23.570106: step: 124/469, loss: 0.2307339906692505 2023-01-22 11:20:24.252583: step: 126/469, loss: 0.07661121338605881 2023-01-22 11:20:24.928878: step: 128/469, loss: 1.253353238105774 2023-01-22 11:20:25.537820: step: 130/469, loss: 0.3100510835647583 2023-01-22 11:20:26.153893: step: 132/469, loss: 0.07813211530447006 2023-01-22 11:20:26.831090: step: 134/469, loss: 0.5194969773292542 2023-01-22 11:20:27.514636: step: 136/469, loss: 0.22474873065948486 2023-01-22 11:20:28.143111: step: 138/469, loss: 0.8398063778877258 2023-01-22 11:20:28.760947: step: 140/469, loss: 0.18400143086910248 2023-01-22 11:20:29.437969: step: 142/469, loss: 0.12026840448379517 2023-01-22 11:20:30.109578: step: 144/469, loss: 0.44883477687835693 2023-01-22 11:20:30.697440: step: 146/469, loss: 0.13671113550662994 2023-01-22 11:20:31.317300: step: 148/469, loss: 0.271941214799881 2023-01-22 11:20:32.072453: step: 150/469, loss: 0.19189752638339996 2023-01-22 11:20:32.654725: step: 152/469, loss: 0.30071648955345154 2023-01-22 11:20:33.332624: step: 154/469, loss: 0.34274008870124817 2023-01-22 11:20:34.001777: step: 156/469, loss: 0.1937110424041748 2023-01-22 11:20:34.694410: step: 158/469, loss: 0.1706651747226715 2023-01-22 11:20:35.362191: step: 160/469, loss: 0.315620481967926 2023-01-22 11:20:35.969928: step: 162/469, loss: 0.5146969556808472 2023-01-22 11:20:36.601180: step: 164/469, loss: 0.22744236886501312 2023-01-22 11:20:37.286257: step: 166/469, loss: 0.7629155516624451 2023-01-22 11:20:37.902807: step: 168/469, loss: 0.1929679811000824 2023-01-22 11:20:38.565557: step: 170/469, loss: 0.05950044095516205 2023-01-22 11:20:39.171699: step: 172/469, loss: 0.38969722390174866 2023-01-22 11:20:39.836478: step: 174/469, loss: 0.2708067297935486 2023-01-22 11:20:40.440314: step: 176/469, loss: 0.3015950918197632 2023-01-22 11:20:41.057758: step: 178/469, loss: 0.8002981543540955 2023-01-22 11:20:41.637943: step: 180/469, loss: 0.0946851596236229 2023-01-22 11:20:42.283355: step: 182/469, loss: 0.09820695966482162 2023-01-22 11:20:42.959220: step: 184/469, loss: 1.2328671216964722 2023-01-22 11:20:43.624533: step: 186/469, loss: 0.012314117513597012 2023-01-22 11:20:44.263704: step: 188/469, loss: 0.13698436319828033 2023-01-22 11:20:44.950811: step: 190/469, loss: 0.2808561623096466 2023-01-22 11:20:45.571033: step: 192/469, loss: 0.16265708208084106 2023-01-22 11:20:46.180510: step: 194/469, loss: 0.1761542707681656 2023-01-22 11:20:46.849444: step: 196/469, loss: 0.3574264347553253 2023-01-22 11:20:47.497793: step: 198/469, loss: 0.31109192967414856 2023-01-22 11:20:48.134680: step: 200/469, loss: 0.1486627757549286 2023-01-22 11:20:48.831139: step: 202/469, loss: 0.33616459369659424 2023-01-22 11:20:49.469377: step: 204/469, loss: 0.2765996754169464 2023-01-22 11:20:50.097018: step: 206/469, loss: 0.32589688897132874 2023-01-22 11:20:50.678651: step: 208/469, loss: 0.21071374416351318 2023-01-22 11:20:51.333292: step: 210/469, loss: 0.23598650097846985 2023-01-22 11:20:52.019307: step: 212/469, loss: 0.6158100962638855 2023-01-22 11:20:52.619166: step: 214/469, loss: 0.25500190258026123 2023-01-22 11:20:53.330640: step: 216/469, loss: 0.6955997347831726 2023-01-22 11:20:53.953534: step: 218/469, loss: 0.21698550879955292 2023-01-22 11:20:54.542189: step: 220/469, loss: 0.2044127732515335 2023-01-22 11:20:55.189603: step: 222/469, loss: 0.2289697825908661 2023-01-22 11:20:55.911964: step: 224/469, loss: 0.6374063491821289 2023-01-22 11:20:56.591823: step: 226/469, loss: 0.25750455260276794 2023-01-22 11:20:57.339372: step: 228/469, loss: 0.22121837735176086 2023-01-22 11:20:57.984300: step: 230/469, loss: 0.26868629455566406 2023-01-22 11:20:58.661020: step: 232/469, loss: 0.16197511553764343 2023-01-22 11:20:59.391926: step: 234/469, loss: 0.33671021461486816 2023-01-22 11:21:00.052367: step: 236/469, loss: 0.18029142916202545 2023-01-22 11:21:00.706796: step: 238/469, loss: 0.08106422424316406 2023-01-22 11:21:01.456688: step: 240/469, loss: 0.7172487378120422 2023-01-22 11:21:02.206279: step: 242/469, loss: 0.36248961091041565 2023-01-22 11:21:02.854608: step: 244/469, loss: 0.16364188492298126 2023-01-22 11:21:03.492107: step: 246/469, loss: 0.07686292380094528 2023-01-22 11:21:04.105720: step: 248/469, loss: 0.16886407136917114 2023-01-22 11:21:04.707327: step: 250/469, loss: 0.8551551699638367 2023-01-22 11:21:05.347288: step: 252/469, loss: 1.0002883672714233 2023-01-22 11:21:05.956993: step: 254/469, loss: 0.4218061566352844 2023-01-22 11:21:06.675025: step: 256/469, loss: 0.2653208374977112 2023-01-22 11:21:07.376035: step: 258/469, loss: 0.40442734956741333 2023-01-22 11:21:08.020037: step: 260/469, loss: 0.09808026254177094 2023-01-22 11:21:08.627172: step: 262/469, loss: 0.21416310966014862 2023-01-22 11:21:09.291858: step: 264/469, loss: 0.739740788936615 2023-01-22 11:21:09.941155: step: 266/469, loss: 0.18710766732692719 2023-01-22 11:21:10.669585: step: 268/469, loss: 0.38552895188331604 2023-01-22 11:21:11.277587: step: 270/469, loss: 0.15326951444149017 2023-01-22 11:21:11.971352: step: 272/469, loss: 0.10515882074832916 2023-01-22 11:21:12.663514: step: 274/469, loss: 0.3892996609210968 2023-01-22 11:21:13.265733: step: 276/469, loss: 0.40951618552207947 2023-01-22 11:21:13.926987: step: 278/469, loss: 0.1631772369146347 2023-01-22 11:21:14.639799: step: 280/469, loss: 0.17427664995193481 2023-01-22 11:21:15.298178: step: 282/469, loss: 0.16767585277557373 2023-01-22 11:21:15.941853: step: 284/469, loss: 0.2996757924556732 2023-01-22 11:21:16.615794: step: 286/469, loss: 0.4054833948612213 2023-01-22 11:21:17.273723: step: 288/469, loss: 0.19466261565685272 2023-01-22 11:21:17.888237: step: 290/469, loss: 0.5838653445243835 2023-01-22 11:21:18.557139: step: 292/469, loss: 0.18020103871822357 2023-01-22 11:21:19.191265: step: 294/469, loss: 0.7194074988365173 2023-01-22 11:21:19.862702: step: 296/469, loss: 1.1572341918945312 2023-01-22 11:21:20.506495: step: 298/469, loss: 0.24678093194961548 2023-01-22 11:21:21.182168: step: 300/469, loss: 0.13127844035625458 2023-01-22 11:21:21.852603: step: 302/469, loss: 0.1344376504421234 2023-01-22 11:21:22.451047: step: 304/469, loss: 0.5381799340248108 2023-01-22 11:21:23.120810: step: 306/469, loss: 1.9210658073425293 2023-01-22 11:21:23.762294: step: 308/469, loss: 0.6812648177146912 2023-01-22 11:21:24.384901: step: 310/469, loss: 0.29162102937698364 2023-01-22 11:21:25.013759: step: 312/469, loss: 0.17570732533931732 2023-01-22 11:21:25.717469: step: 314/469, loss: 0.21051527559757233 2023-01-22 11:21:26.416612: step: 316/469, loss: 0.4683579206466675 2023-01-22 11:21:27.101202: step: 318/469, loss: 0.1692407876253128 2023-01-22 11:21:27.703600: step: 320/469, loss: 0.08784981817007065 2023-01-22 11:21:28.344541: step: 322/469, loss: 1.063167691230774 2023-01-22 11:21:28.981174: step: 324/469, loss: 0.09959164261817932 2023-01-22 11:21:29.704054: step: 326/469, loss: 0.126796156167984 2023-01-22 11:21:30.339025: step: 328/469, loss: 0.7276470065116882 2023-01-22 11:21:30.935931: step: 330/469, loss: 0.1952531486749649 2023-01-22 11:21:31.657335: step: 332/469, loss: 0.47737717628479004 2023-01-22 11:21:32.376650: step: 334/469, loss: 1.034452199935913 2023-01-22 11:21:33.029874: step: 336/469, loss: 0.6077302694320679 2023-01-22 11:21:33.677563: step: 338/469, loss: 0.3177364766597748 2023-01-22 11:21:34.307235: step: 340/469, loss: 0.3545728027820587 2023-01-22 11:21:34.971597: step: 342/469, loss: 0.33772382140159607 2023-01-22 11:21:35.647582: step: 344/469, loss: 0.22115831077098846 2023-01-22 11:21:36.271272: step: 346/469, loss: 0.23858800530433655 2023-01-22 11:21:36.970268: step: 348/469, loss: 0.34981250762939453 2023-01-22 11:21:37.634949: step: 350/469, loss: 0.19385243952274323 2023-01-22 11:21:38.240881: step: 352/469, loss: 0.18082115054130554 2023-01-22 11:21:38.926563: step: 354/469, loss: 0.6923066973686218 2023-01-22 11:21:39.601695: step: 356/469, loss: 0.2674540877342224 2023-01-22 11:21:40.261734: step: 358/469, loss: 0.12808631360530853 2023-01-22 11:21:40.952294: step: 360/469, loss: 0.334553599357605 2023-01-22 11:21:41.594749: step: 362/469, loss: 0.39071792364120483 2023-01-22 11:21:42.272368: step: 364/469, loss: 0.6498745083808899 2023-01-22 11:21:42.910120: step: 366/469, loss: 0.48026883602142334 2023-01-22 11:21:43.613905: step: 368/469, loss: 0.13347609341144562 2023-01-22 11:21:44.257052: step: 370/469, loss: 0.38516297936439514 2023-01-22 11:21:44.954382: step: 372/469, loss: 0.48706355690956116 2023-01-22 11:21:45.674031: step: 374/469, loss: 0.23065999150276184 2023-01-22 11:21:46.270162: step: 376/469, loss: 0.21758006513118744 2023-01-22 11:21:46.797023: step: 378/469, loss: 0.3067559599876404 2023-01-22 11:21:47.579908: step: 380/469, loss: 0.34745267033576965 2023-01-22 11:21:48.261687: step: 382/469, loss: 0.20142170786857605 2023-01-22 11:21:48.938298: step: 384/469, loss: 0.20668385922908783 2023-01-22 11:21:49.577494: step: 386/469, loss: 0.18116232752799988 2023-01-22 11:21:50.246803: step: 388/469, loss: 0.4907574951648712 2023-01-22 11:21:50.903711: step: 390/469, loss: 0.6749672889709473 2023-01-22 11:21:51.549754: step: 392/469, loss: 0.21538802981376648 2023-01-22 11:21:52.291706: step: 394/469, loss: 0.2518099546432495 2023-01-22 11:21:52.952517: step: 396/469, loss: 0.23233908414840698 2023-01-22 11:21:53.635851: step: 398/469, loss: 1.26188063621521 2023-01-22 11:21:54.307033: step: 400/469, loss: 0.4455063045024872 2023-01-22 11:21:54.981236: step: 402/469, loss: 0.25249287486076355 2023-01-22 11:21:55.589636: step: 404/469, loss: 0.0908951386809349 2023-01-22 11:21:56.235433: step: 406/469, loss: 0.33772221207618713 2023-01-22 11:21:56.951419: step: 408/469, loss: 0.28536897897720337 2023-01-22 11:21:57.588422: step: 410/469, loss: 0.22849786281585693 2023-01-22 11:21:58.313531: step: 412/469, loss: 0.2997598946094513 2023-01-22 11:21:58.977047: step: 414/469, loss: 0.45876169204711914 2023-01-22 11:21:59.605682: step: 416/469, loss: 3.180163860321045 2023-01-22 11:22:00.272898: step: 418/469, loss: 0.848159670829773 2023-01-22 11:22:00.918654: step: 420/469, loss: 0.08702873438596725 2023-01-22 11:22:01.592824: step: 422/469, loss: 0.5647095441818237 2023-01-22 11:22:02.206832: step: 424/469, loss: 0.1951759159564972 2023-01-22 11:22:02.888197: step: 426/469, loss: 0.5742413401603699 2023-01-22 11:22:03.568131: step: 428/469, loss: 0.481643944978714 2023-01-22 11:22:04.244063: step: 430/469, loss: 0.18370461463928223 2023-01-22 11:22:04.928552: step: 432/469, loss: 6.539400100708008 2023-01-22 11:22:05.622881: step: 434/469, loss: 0.19780448079109192 2023-01-22 11:22:06.292686: step: 436/469, loss: 0.2445353865623474 2023-01-22 11:22:07.046451: step: 438/469, loss: 0.454621821641922 2023-01-22 11:22:07.718869: step: 440/469, loss: 0.182010218501091 2023-01-22 11:22:08.381579: step: 442/469, loss: 1.381121277809143 2023-01-22 11:22:09.049784: step: 444/469, loss: 0.3562524914741516 2023-01-22 11:22:09.675358: step: 446/469, loss: 0.4234894812107086 2023-01-22 11:22:10.292335: step: 448/469, loss: 0.6700560450553894 2023-01-22 11:22:11.010640: step: 450/469, loss: 0.6063768267631531 2023-01-22 11:22:11.684044: step: 452/469, loss: 0.8879229426383972 2023-01-22 11:22:12.366691: step: 454/469, loss: 0.2727753818035126 2023-01-22 11:22:13.004777: step: 456/469, loss: 0.10498542338609695 2023-01-22 11:22:13.631660: step: 458/469, loss: 0.7134118676185608 2023-01-22 11:22:14.365050: step: 460/469, loss: 0.5197572708129883 2023-01-22 11:22:15.044042: step: 462/469, loss: 0.06188105791807175 2023-01-22 11:22:15.679141: step: 464/469, loss: 0.2188866138458252 2023-01-22 11:22:16.383197: step: 466/469, loss: 0.2279590666294098 2023-01-22 11:22:17.090021: step: 468/469, loss: 2.0994179248809814 2023-01-22 11:22:17.737710: step: 470/469, loss: 0.1228613406419754 2023-01-22 11:22:18.395550: step: 472/469, loss: 0.7680542469024658 2023-01-22 11:22:19.052930: step: 474/469, loss: 0.14352768659591675 2023-01-22 11:22:19.630501: step: 476/469, loss: 0.20120570063591003 2023-01-22 11:22:20.238722: step: 478/469, loss: 3.547847270965576 2023-01-22 11:22:20.930597: step: 480/469, loss: 3.7162673473358154 2023-01-22 11:22:21.492740: step: 482/469, loss: 0.5510678291320801 2023-01-22 11:22:22.104307: step: 484/469, loss: 0.3959076702594757 2023-01-22 11:22:22.708651: step: 486/469, loss: 0.10709948092699051 2023-01-22 11:22:23.341489: step: 488/469, loss: 0.12302729487419128 2023-01-22 11:22:24.063292: step: 490/469, loss: 1.2091511487960815 2023-01-22 11:22:24.692148: step: 492/469, loss: 0.15840092301368713 2023-01-22 11:22:25.380024: step: 494/469, loss: 0.5001250505447388 2023-01-22 11:22:25.981138: step: 496/469, loss: 0.33823445439338684 2023-01-22 11:22:26.785072: step: 498/469, loss: 0.28428220748901367 2023-01-22 11:22:27.460878: step: 500/469, loss: 0.5808488130569458 2023-01-22 11:22:28.119355: step: 502/469, loss: 0.51570725440979 2023-01-22 11:22:28.711448: step: 504/469, loss: 0.1313687264919281 2023-01-22 11:22:29.397507: step: 506/469, loss: 0.8132759928703308 2023-01-22 11:22:30.130946: step: 508/469, loss: 1.2488203048706055 2023-01-22 11:22:30.720231: step: 510/469, loss: 0.3761872947216034 2023-01-22 11:22:31.361092: step: 512/469, loss: 0.26774799823760986 2023-01-22 11:22:31.950324: step: 514/469, loss: 0.54317307472229 2023-01-22 11:22:32.568122: step: 516/469, loss: 0.3677058815956116 2023-01-22 11:22:33.225307: step: 518/469, loss: 5.430683135986328 2023-01-22 11:22:33.925828: step: 520/469, loss: 0.0835961401462555 2023-01-22 11:22:34.517430: step: 522/469, loss: 0.5383288264274597 2023-01-22 11:22:35.172231: step: 524/469, loss: 0.34538698196411133 2023-01-22 11:22:35.841119: step: 526/469, loss: 0.2064712792634964 2023-01-22 11:22:36.533412: step: 528/469, loss: 0.1574334055185318 2023-01-22 11:22:37.096855: step: 530/469, loss: 0.6473233103752136 2023-01-22 11:22:37.778750: step: 532/469, loss: 0.4723932445049286 2023-01-22 11:22:38.532699: step: 534/469, loss: 1.2586174011230469 2023-01-22 11:22:39.222206: step: 536/469, loss: 0.11741548031568527 2023-01-22 11:22:39.852218: step: 538/469, loss: 2.808222770690918 2023-01-22 11:22:40.539209: step: 540/469, loss: 0.14677190780639648 2023-01-22 11:22:41.176194: step: 542/469, loss: 0.31913504004478455 2023-01-22 11:22:41.859864: step: 544/469, loss: 0.36843451857566833 2023-01-22 11:22:42.506105: step: 546/469, loss: 0.8397376537322998 2023-01-22 11:22:43.200223: step: 548/469, loss: 0.7211434841156006 2023-01-22 11:22:43.896896: step: 550/469, loss: 0.4781056344509125 2023-01-22 11:22:44.537326: step: 552/469, loss: 0.21089711785316467 2023-01-22 11:22:45.195740: step: 554/469, loss: 0.3777916431427002 2023-01-22 11:22:45.791555: step: 556/469, loss: 0.286102294921875 2023-01-22 11:22:46.462965: step: 558/469, loss: 0.454565167427063 2023-01-22 11:22:47.061693: step: 560/469, loss: 0.38984936475753784 2023-01-22 11:22:47.717275: step: 562/469, loss: 0.549746036529541 2023-01-22 11:22:48.427119: step: 564/469, loss: 0.5132108926773071 2023-01-22 11:22:49.114548: step: 566/469, loss: 0.4313230812549591 2023-01-22 11:22:49.812835: step: 568/469, loss: 0.34913328289985657 2023-01-22 11:22:50.450596: step: 570/469, loss: 0.24797344207763672 2023-01-22 11:22:51.168611: step: 572/469, loss: 0.15785956382751465 2023-01-22 11:22:51.812793: step: 574/469, loss: 0.10448366403579712 2023-01-22 11:22:52.474775: step: 576/469, loss: 0.3704490065574646 2023-01-22 11:22:53.129358: step: 578/469, loss: 0.13047370314598083 2023-01-22 11:22:53.700536: step: 580/469, loss: 0.21161755919456482 2023-01-22 11:22:54.320519: step: 582/469, loss: 0.7316820025444031 2023-01-22 11:22:54.987616: step: 584/469, loss: 0.19775795936584473 2023-01-22 11:22:55.639400: step: 586/469, loss: 0.399997353553772 2023-01-22 11:22:56.272286: step: 588/469, loss: 0.06698208302259445 2023-01-22 11:22:56.958806: step: 590/469, loss: 0.4157121777534485 2023-01-22 11:22:57.619845: step: 592/469, loss: 1.3520870208740234 2023-01-22 11:22:58.281699: step: 594/469, loss: 0.39281463623046875 2023-01-22 11:22:58.957087: step: 596/469, loss: 0.23385417461395264 2023-01-22 11:22:59.626156: step: 598/469, loss: 0.16917254030704498 2023-01-22 11:23:00.250722: step: 600/469, loss: 0.3146384358406067 2023-01-22 11:23:00.877155: step: 602/469, loss: 0.2427896410226822 2023-01-22 11:23:01.541549: step: 604/469, loss: 0.22958119213581085 2023-01-22 11:23:02.188974: step: 606/469, loss: 0.20856578648090363 2023-01-22 11:23:02.886506: step: 608/469, loss: 0.5497919321060181 2023-01-22 11:23:03.535371: step: 610/469, loss: 0.29124996066093445 2023-01-22 11:23:04.174861: step: 612/469, loss: 0.11491380631923676 2023-01-22 11:23:04.860067: step: 614/469, loss: 1.065051555633545 2023-01-22 11:23:05.536195: step: 616/469, loss: 0.7414249181747437 2023-01-22 11:23:06.162369: step: 618/469, loss: 0.18989019095897675 2023-01-22 11:23:06.794302: step: 620/469, loss: 0.5554919242858887 2023-01-22 11:23:07.380609: step: 622/469, loss: 0.27418890595436096 2023-01-22 11:23:08.120031: step: 624/469, loss: 0.5021735429763794 2023-01-22 11:23:08.788155: step: 626/469, loss: 0.42460668087005615 2023-01-22 11:23:09.425749: step: 628/469, loss: 0.5467215776443481 2023-01-22 11:23:10.062170: step: 630/469, loss: 0.18194285035133362 2023-01-22 11:23:10.743962: step: 632/469, loss: 0.1417941153049469 2023-01-22 11:23:11.366514: step: 634/469, loss: 1.695264220237732 2023-01-22 11:23:12.078935: step: 636/469, loss: 0.05234282463788986 2023-01-22 11:23:12.688563: step: 638/469, loss: 0.26596134901046753 2023-01-22 11:23:13.401107: step: 640/469, loss: 0.9196944236755371 2023-01-22 11:23:13.996724: step: 642/469, loss: 0.6783714890480042 2023-01-22 11:23:14.603003: step: 644/469, loss: 0.6495234966278076 2023-01-22 11:23:15.251626: step: 646/469, loss: 0.2590858042240143 2023-01-22 11:23:15.889148: step: 648/469, loss: 0.2058669477701187 2023-01-22 11:23:16.613404: step: 650/469, loss: 0.6924328804016113 2023-01-22 11:23:17.345324: step: 652/469, loss: 0.11647651344537735 2023-01-22 11:23:18.054188: step: 654/469, loss: 2.0422580242156982 2023-01-22 11:23:18.672067: step: 656/469, loss: 0.7838826179504395 2023-01-22 11:23:19.365251: step: 658/469, loss: 0.29782330989837646 2023-01-22 11:23:20.076217: step: 660/469, loss: 0.1432504653930664 2023-01-22 11:23:20.728788: step: 662/469, loss: 0.8504621386528015 2023-01-22 11:23:21.320447: step: 664/469, loss: 0.6591264605522156 2023-01-22 11:23:22.011311: step: 666/469, loss: 0.7096976041793823 2023-01-22 11:23:22.662943: step: 668/469, loss: 0.6823020577430725 2023-01-22 11:23:23.335560: step: 670/469, loss: 0.6952880620956421 2023-01-22 11:23:23.958633: step: 672/469, loss: 0.19265688955783844 2023-01-22 11:23:24.626922: step: 674/469, loss: 0.3284139931201935 2023-01-22 11:23:25.201809: step: 676/469, loss: 0.1648135930299759 2023-01-22 11:23:25.873520: step: 678/469, loss: 0.36375126242637634 2023-01-22 11:23:26.633193: step: 680/469, loss: 0.19929485023021698 2023-01-22 11:23:27.246973: step: 682/469, loss: 0.48844996094703674 2023-01-22 11:23:27.916490: step: 684/469, loss: 0.15448260307312012 2023-01-22 11:23:28.529415: step: 686/469, loss: 0.26222851872444153 2023-01-22 11:23:29.204250: step: 688/469, loss: 0.11118326336145401 2023-01-22 11:23:29.833366: step: 690/469, loss: 0.16954626142978668 2023-01-22 11:23:30.496270: step: 692/469, loss: 0.8205360174179077 2023-01-22 11:23:31.138106: step: 694/469, loss: 0.5341930985450745 2023-01-22 11:23:31.747200: step: 696/469, loss: 0.27343830466270447 2023-01-22 11:23:32.373990: step: 698/469, loss: 0.17632602155208588 2023-01-22 11:23:33.043907: step: 700/469, loss: 0.4227660894393921 2023-01-22 11:23:33.683893: step: 702/469, loss: 0.7923645377159119 2023-01-22 11:23:34.372417: step: 704/469, loss: 0.6778087019920349 2023-01-22 11:23:35.092074: step: 706/469, loss: 0.6295285820960999 2023-01-22 11:23:35.723111: step: 708/469, loss: 0.11903341859579086 2023-01-22 11:23:36.392678: step: 710/469, loss: 0.14304491877555847 2023-01-22 11:23:37.037372: step: 712/469, loss: 0.45329543948173523 2023-01-22 11:23:37.657862: step: 714/469, loss: 0.3691706955432892 2023-01-22 11:23:38.263328: step: 716/469, loss: 0.3766706883907318 2023-01-22 11:23:38.950365: step: 718/469, loss: 0.657667338848114 2023-01-22 11:23:39.602122: step: 720/469, loss: 0.10985753685235977 2023-01-22 11:23:40.280311: step: 722/469, loss: 0.6467972993850708 2023-01-22 11:23:40.991816: step: 724/469, loss: 0.46754735708236694 2023-01-22 11:23:41.679786: step: 726/469, loss: 0.15916554629802704 2023-01-22 11:23:42.259182: step: 728/469, loss: 2.8822710514068604 2023-01-22 11:23:42.864894: step: 730/469, loss: 0.42414069175720215 2023-01-22 11:23:43.538071: step: 732/469, loss: 0.2098163366317749 2023-01-22 11:23:44.243719: step: 734/469, loss: 0.20797768235206604 2023-01-22 11:23:44.848284: step: 736/469, loss: 0.3841201364994049 2023-01-22 11:23:45.545255: step: 738/469, loss: 0.05002370849251747 2023-01-22 11:23:46.194598: step: 740/469, loss: 0.29889240860939026 2023-01-22 11:23:46.808387: step: 742/469, loss: 0.17595617473125458 2023-01-22 11:23:47.456353: step: 744/469, loss: 0.4108639657497406 2023-01-22 11:23:48.077490: step: 746/469, loss: 0.3597712516784668 2023-01-22 11:23:48.783284: step: 748/469, loss: 0.23803681135177612 2023-01-22 11:23:49.397933: step: 750/469, loss: 0.18777571618556976 2023-01-22 11:23:49.983534: step: 752/469, loss: 0.10587222874164581 2023-01-22 11:23:50.619524: step: 754/469, loss: 0.7175968885421753 2023-01-22 11:23:51.259253: step: 756/469, loss: 0.4509286880493164 2023-01-22 11:23:51.901113: step: 758/469, loss: 0.7743290066719055 2023-01-22 11:23:52.496288: step: 760/469, loss: 0.9395543932914734 2023-01-22 11:23:53.125893: step: 762/469, loss: 0.231919527053833 2023-01-22 11:23:53.824466: step: 764/469, loss: 0.27963942289352417 2023-01-22 11:23:54.529977: step: 766/469, loss: 0.2812337577342987 2023-01-22 11:23:55.159131: step: 768/469, loss: 0.11665716767311096 2023-01-22 11:23:55.783853: step: 770/469, loss: 0.5176973342895508 2023-01-22 11:23:56.407726: step: 772/469, loss: 0.16111698746681213 2023-01-22 11:23:57.046468: step: 774/469, loss: 0.23417110741138458 2023-01-22 11:23:57.678127: step: 776/469, loss: 0.41849949955940247 2023-01-22 11:23:58.336989: step: 778/469, loss: 0.13244368135929108 2023-01-22 11:23:58.949964: step: 780/469, loss: 0.42881375551223755 2023-01-22 11:23:59.621144: step: 782/469, loss: 0.18922825157642365 2023-01-22 11:24:00.297169: step: 784/469, loss: 0.2613990604877472 2023-01-22 11:24:00.960537: step: 786/469, loss: 0.2780478000640869 2023-01-22 11:24:01.606166: step: 788/469, loss: 0.6603026390075684 2023-01-22 11:24:02.354452: step: 790/469, loss: 0.14456990361213684 2023-01-22 11:24:03.018650: step: 792/469, loss: 0.20027607679367065 2023-01-22 11:24:03.674630: step: 794/469, loss: 0.3052690923213959 2023-01-22 11:24:04.349994: step: 796/469, loss: 0.20652316510677338 2023-01-22 11:24:05.033377: step: 798/469, loss: 0.20102040469646454 2023-01-22 11:24:05.648760: step: 800/469, loss: 0.13201051950454712 2023-01-22 11:24:06.343498: step: 802/469, loss: 0.6680241823196411 2023-01-22 11:24:06.901630: step: 804/469, loss: 0.16611360013484955 2023-01-22 11:24:07.577851: step: 806/469, loss: 0.27408918738365173 2023-01-22 11:24:08.184916: step: 808/469, loss: 0.11027664691209793 2023-01-22 11:24:08.796416: step: 810/469, loss: 0.7532581686973572 2023-01-22 11:24:09.518788: step: 812/469, loss: 3.7997052669525146 2023-01-22 11:24:10.179590: step: 814/469, loss: 1.519590973854065 2023-01-22 11:24:10.777551: step: 816/469, loss: 0.0590866357088089 2023-01-22 11:24:11.351411: step: 818/469, loss: 0.7658993005752563 2023-01-22 11:24:11.986815: step: 820/469, loss: 1.321593165397644 2023-01-22 11:24:12.632402: step: 822/469, loss: 0.2512640953063965 2023-01-22 11:24:13.240452: step: 824/469, loss: 0.08269482851028442 2023-01-22 11:24:13.934029: step: 826/469, loss: 0.2748689353466034 2023-01-22 11:24:14.546571: step: 828/469, loss: 0.16837763786315918 2023-01-22 11:24:15.291222: step: 830/469, loss: 0.5090327858924866 2023-01-22 11:24:15.924091: step: 832/469, loss: 0.15456701815128326 2023-01-22 11:24:16.595370: step: 834/469, loss: 1.3860313892364502 2023-01-22 11:24:17.262124: step: 836/469, loss: 0.1489032357931137 2023-01-22 11:24:17.955378: step: 838/469, loss: 0.23921431601047516 2023-01-22 11:24:18.571594: step: 840/469, loss: 0.7365400195121765 2023-01-22 11:24:19.255179: step: 842/469, loss: 0.3689229488372803 2023-01-22 11:24:19.908299: step: 844/469, loss: 0.1368788778781891 2023-01-22 11:24:20.572307: step: 846/469, loss: 0.4069958031177521 2023-01-22 11:24:21.210190: step: 848/469, loss: 0.13186892867088318 2023-01-22 11:24:21.955764: step: 850/469, loss: 0.14404664933681488 2023-01-22 11:24:22.645296: step: 852/469, loss: 0.4834625720977783 2023-01-22 11:24:23.251890: step: 854/469, loss: 0.3989724814891815 2023-01-22 11:24:23.979190: step: 856/469, loss: 0.49936750531196594 2023-01-22 11:24:24.585751: step: 858/469, loss: 0.18716418743133545 2023-01-22 11:24:25.126796: step: 860/469, loss: 0.31274962425231934 2023-01-22 11:24:25.728718: step: 862/469, loss: 0.17767015099525452 2023-01-22 11:24:26.432060: step: 864/469, loss: 0.08762048929929733 2023-01-22 11:24:27.026861: step: 866/469, loss: 0.18887192010879517 2023-01-22 11:24:27.668897: step: 868/469, loss: 0.2997731566429138 2023-01-22 11:24:28.462524: step: 870/469, loss: 0.10482577234506607 2023-01-22 11:24:29.070577: step: 872/469, loss: 0.22939112782478333 2023-01-22 11:24:29.743457: step: 874/469, loss: 0.9563087821006775 2023-01-22 11:24:30.394229: step: 876/469, loss: 0.15791034698486328 2023-01-22 11:24:31.079500: step: 878/469, loss: 0.1795186847448349 2023-01-22 11:24:31.778750: step: 880/469, loss: 0.45465949177742004 2023-01-22 11:24:32.448714: step: 882/469, loss: 0.10572908818721771 2023-01-22 11:24:33.092048: step: 884/469, loss: 0.2038441002368927 2023-01-22 11:24:33.730317: step: 886/469, loss: 0.34216368198394775 2023-01-22 11:24:34.366248: step: 888/469, loss: 0.43507444858551025 2023-01-22 11:24:35.093611: step: 890/469, loss: 0.21979469060897827 2023-01-22 11:24:35.743798: step: 892/469, loss: 0.09537164121866226 2023-01-22 11:24:36.462011: step: 894/469, loss: 0.6587051153182983 2023-01-22 11:24:37.104376: step: 896/469, loss: 0.27228352427482605 2023-01-22 11:24:37.793575: step: 898/469, loss: 0.2525215148925781 2023-01-22 11:24:38.424277: step: 900/469, loss: 0.15978290140628815 2023-01-22 11:24:39.079092: step: 902/469, loss: 0.236827090382576 2023-01-22 11:24:39.714730: step: 904/469, loss: 0.7649135589599609 2023-01-22 11:24:40.360947: step: 906/469, loss: 0.33343371748924255 2023-01-22 11:24:41.036981: step: 908/469, loss: 0.239822655916214 2023-01-22 11:24:41.671986: step: 910/469, loss: 3.051790475845337 2023-01-22 11:24:42.399093: step: 912/469, loss: 0.4371585547924042 2023-01-22 11:24:43.106359: step: 914/469, loss: 0.30161041021347046 2023-01-22 11:24:43.835457: step: 916/469, loss: 0.3371298313140869 2023-01-22 11:24:44.454149: step: 918/469, loss: 0.19050012528896332 2023-01-22 11:24:45.067330: step: 920/469, loss: 0.6725030541419983 2023-01-22 11:24:45.704638: step: 922/469, loss: 0.2457217574119568 2023-01-22 11:24:46.292273: step: 924/469, loss: 0.16936399042606354 2023-01-22 11:24:46.917226: step: 926/469, loss: 0.11183946579694748 2023-01-22 11:24:47.565457: step: 928/469, loss: 0.15361808240413666 2023-01-22 11:24:48.211612: step: 930/469, loss: 0.42871227860450745 2023-01-22 11:24:48.849030: step: 932/469, loss: 0.43631088733673096 2023-01-22 11:24:49.486741: step: 934/469, loss: 0.4583870768547058 2023-01-22 11:24:50.141995: step: 936/469, loss: 0.34182703495025635 2023-01-22 11:24:50.805247: step: 938/469, loss: 0.5038894414901733 ================================================== Loss: 0.450 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29800384592226875, 'r': 0.31836084488470073, 'f1': 0.307846174778417}, 'combined': 0.22683402352093884, 'epoch': 9} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.29176565664433707, 'r': 0.24478417853875306, 'f1': 0.2662180170575692}, 'combined': 0.14520982748594682, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29734019264448336, 'r': 0.322165559772296, 'f1': 0.3092554644808743}, 'combined': 0.22787244751222316, 'epoch': 9} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2902835245355737, 'r': 0.24300953792319302, 'f1': 0.2645512200697709}, 'combined': 0.1443006654926023, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2963260080625143, 'r': 0.31994212255706006, 'f1': 0.3076815667656399}, 'combined': 0.22671273340626097, 'epoch': 9} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2919551578091449, 'r': 0.24788141486448897, 'f1': 0.26811913552388567}, 'combined': 0.1462468011948467, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2537878787878788, 'r': 0.3988095238095238, 'f1': 0.3101851851851851}, 'combined': 0.20679012345679007, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2578125, 'r': 0.358695652173913, 'f1': 0.29999999999999993}, 'combined': 0.14999999999999997, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35, 'r': 0.2413793103448276, 'f1': 0.2857142857142857}, 'combined': 0.19047619047619047, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28932143369249047, 'r': 0.34806411567559575, 'f1': 0.31598585523004125}, 'combined': 0.23283168280108302, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2915539886359941, 'r': 0.2547429635383114, 'f1': 0.27190826088610776}, 'combined': 0.14831359684696785, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3055555555555556, 'r': 0.358695652173913, 'f1': 0.32999999999999996}, 'combined': 0.16499999999999998, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3123600447856686, 'r': 0.30880376344086025, 'f1': 0.31057172391857507}, 'combined': 0.22884232288737108, 'epoch': 5} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.29928638434735994, 'r': 0.23575990569357447, 'f1': 0.26375186993150146}, 'combined': 0.14386465632627352, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 5} ****************************** Epoch: 10 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:27:49.605922: step: 2/469, loss: 0.3260871171951294 2023-01-22 11:27:50.200937: step: 4/469, loss: 0.11803068220615387 2023-01-22 11:27:50.886563: step: 6/469, loss: 0.1941731572151184 2023-01-22 11:27:51.590434: step: 8/469, loss: 0.09274458140134811 2023-01-22 11:27:52.269301: step: 10/469, loss: 0.15450076758861542 2023-01-22 11:27:52.914755: step: 12/469, loss: 0.3043484687805176 2023-01-22 11:27:53.550975: step: 14/469, loss: 0.0967775285243988 2023-01-22 11:27:54.184424: step: 16/469, loss: 0.32464948296546936 2023-01-22 11:27:54.834674: step: 18/469, loss: 0.2270539104938507 2023-01-22 11:27:55.488258: step: 20/469, loss: 0.06182711943984032 2023-01-22 11:27:56.161004: step: 22/469, loss: 0.10078060626983643 2023-01-22 11:27:56.736682: step: 24/469, loss: 0.13304398953914642 2023-01-22 11:27:57.428845: step: 26/469, loss: 0.46218162775039673 2023-01-22 11:27:58.079979: step: 28/469, loss: 0.18660806119441986 2023-01-22 11:27:58.712615: step: 30/469, loss: 0.07191115617752075 2023-01-22 11:27:59.364571: step: 32/469, loss: 0.1009015217423439 2023-01-22 11:28:00.030683: step: 34/469, loss: 0.06282003223896027 2023-01-22 11:28:00.650419: step: 36/469, loss: 0.05078323930501938 2023-01-22 11:28:01.276514: step: 38/469, loss: 0.16965973377227783 2023-01-22 11:28:01.912560: step: 40/469, loss: 0.2929583191871643 2023-01-22 11:28:02.613165: step: 42/469, loss: 0.4280208349227905 2023-01-22 11:28:03.266907: step: 44/469, loss: 0.22449113428592682 2023-01-22 11:28:03.977404: step: 46/469, loss: 0.08549151569604874 2023-01-22 11:28:04.664734: step: 48/469, loss: 0.13578908145427704 2023-01-22 11:28:05.321263: step: 50/469, loss: 0.21607942879199982 2023-01-22 11:28:05.928782: step: 52/469, loss: 0.7591243982315063 2023-01-22 11:28:06.619102: step: 54/469, loss: 0.17915067076683044 2023-01-22 11:28:07.249301: step: 56/469, loss: 0.1202983483672142 2023-01-22 11:28:07.891365: step: 58/469, loss: 0.1737108677625656 2023-01-22 11:28:08.523529: step: 60/469, loss: 0.13567295670509338 2023-01-22 11:28:09.172658: step: 62/469, loss: 0.584044873714447 2023-01-22 11:28:09.855618: step: 64/469, loss: 0.18515388667583466 2023-01-22 11:28:10.473996: step: 66/469, loss: 0.20488345623016357 2023-01-22 11:28:11.119594: step: 68/469, loss: 0.148570254445076 2023-01-22 11:28:11.916818: step: 70/469, loss: 0.2079058736562729 2023-01-22 11:28:12.533889: step: 72/469, loss: 0.18115873634815216 2023-01-22 11:28:13.211399: step: 74/469, loss: 0.12436431646347046 2023-01-22 11:28:13.913503: step: 76/469, loss: 0.1123075783252716 2023-01-22 11:28:14.570649: step: 78/469, loss: 0.20267552137374878 2023-01-22 11:28:15.172143: step: 80/469, loss: 0.23112595081329346 2023-01-22 11:28:15.844258: step: 82/469, loss: 0.19608274102210999 2023-01-22 11:28:16.459102: step: 84/469, loss: 0.30020779371261597 2023-01-22 11:28:17.075451: step: 86/469, loss: 0.24599343538284302 2023-01-22 11:28:17.740603: step: 88/469, loss: 0.2468404322862625 2023-01-22 11:28:18.350043: step: 90/469, loss: 1.329355001449585 2023-01-22 11:28:18.974333: step: 92/469, loss: 0.5803744792938232 2023-01-22 11:28:19.723297: step: 94/469, loss: 0.32619214057922363 2023-01-22 11:28:20.382476: step: 96/469, loss: 0.17178703844547272 2023-01-22 11:28:21.024655: step: 98/469, loss: 0.5373564958572388 2023-01-22 11:28:21.706241: step: 100/469, loss: 0.28605222702026367 2023-01-22 11:28:22.365737: step: 102/469, loss: 0.08383268117904663 2023-01-22 11:28:23.056916: step: 104/469, loss: 1.9409605264663696 2023-01-22 11:28:23.809897: step: 106/469, loss: 0.17641407251358032 2023-01-22 11:28:24.479248: step: 108/469, loss: 0.773276686668396 2023-01-22 11:28:25.165718: step: 110/469, loss: 0.11978209018707275 2023-01-22 11:28:25.831171: step: 112/469, loss: 0.34194859862327576 2023-01-22 11:28:26.496773: step: 114/469, loss: 0.18613550066947937 2023-01-22 11:28:27.172850: step: 116/469, loss: 0.12781774997711182 2023-01-22 11:28:27.784193: step: 118/469, loss: 0.16251547634601593 2023-01-22 11:28:28.405049: step: 120/469, loss: 0.3708127439022064 2023-01-22 11:28:29.001700: step: 122/469, loss: 0.14788971841335297 2023-01-22 11:28:29.711799: step: 124/469, loss: 0.2537625730037689 2023-01-22 11:28:30.463412: step: 126/469, loss: 0.5536888241767883 2023-01-22 11:28:31.133646: step: 128/469, loss: 0.16357994079589844 2023-01-22 11:28:31.769624: step: 130/469, loss: 0.24256977438926697 2023-01-22 11:28:32.375828: step: 132/469, loss: 0.08456134796142578 2023-01-22 11:28:33.082006: step: 134/469, loss: 0.24679823219776154 2023-01-22 11:28:33.734395: step: 136/469, loss: 0.31085076928138733 2023-01-22 11:28:34.411694: step: 138/469, loss: 0.10154884308576584 2023-01-22 11:28:35.059698: step: 140/469, loss: 0.6388983130455017 2023-01-22 11:28:35.729149: step: 142/469, loss: 0.4659835696220398 2023-01-22 11:28:36.375797: step: 144/469, loss: 0.2552926242351532 2023-01-22 11:28:37.070085: step: 146/469, loss: 0.2275351583957672 2023-01-22 11:28:37.691636: step: 148/469, loss: 0.14156070351600647 2023-01-22 11:28:38.410896: step: 150/469, loss: 0.10421870648860931 2023-01-22 11:28:39.130365: step: 152/469, loss: 0.08590361475944519 2023-01-22 11:28:39.876372: step: 154/469, loss: 0.19352349638938904 2023-01-22 11:28:40.496507: step: 156/469, loss: 0.20251214504241943 2023-01-22 11:28:41.098466: step: 158/469, loss: 0.37143537402153015 2023-01-22 11:28:41.903610: step: 160/469, loss: 0.29133740067481995 2023-01-22 11:28:42.611310: step: 162/469, loss: 0.18984173238277435 2023-01-22 11:28:43.255712: step: 164/469, loss: 0.127028688788414 2023-01-22 11:28:43.944681: step: 166/469, loss: 0.08901457488536835 2023-01-22 11:28:44.546824: step: 168/469, loss: 0.08538316935300827 2023-01-22 11:28:45.143662: step: 170/469, loss: 1.7103285789489746 2023-01-22 11:28:45.830932: step: 172/469, loss: 0.11412934958934784 2023-01-22 11:28:46.431187: step: 174/469, loss: 0.14428536593914032 2023-01-22 11:28:47.135833: step: 176/469, loss: 0.18397821485996246 2023-01-22 11:28:47.774249: step: 178/469, loss: 0.17066001892089844 2023-01-22 11:28:48.449134: step: 180/469, loss: 0.31434497237205505 2023-01-22 11:28:49.127002: step: 182/469, loss: 0.654872477054596 2023-01-22 11:28:49.770005: step: 184/469, loss: 0.20487254858016968 2023-01-22 11:28:50.450654: step: 186/469, loss: 0.12853318452835083 2023-01-22 11:28:51.101494: step: 188/469, loss: 0.11310946941375732 2023-01-22 11:28:51.722282: step: 190/469, loss: 0.6582525968551636 2023-01-22 11:28:52.363058: step: 192/469, loss: 0.3505593240261078 2023-01-22 11:28:52.965328: step: 194/469, loss: 0.16400399804115295 2023-01-22 11:28:53.610367: step: 196/469, loss: 0.13037331402301788 2023-01-22 11:28:54.294965: step: 198/469, loss: 7.986757755279541 2023-01-22 11:28:54.969099: step: 200/469, loss: 0.15797364711761475 2023-01-22 11:28:55.610534: step: 202/469, loss: 0.7425860166549683 2023-01-22 11:28:56.163969: step: 204/469, loss: 1.7749760150909424 2023-01-22 11:28:56.787820: step: 206/469, loss: 0.4389791786670685 2023-01-22 11:28:57.468108: step: 208/469, loss: 0.14861886203289032 2023-01-22 11:28:58.159240: step: 210/469, loss: 0.08495287597179413 2023-01-22 11:28:58.807812: step: 212/469, loss: 0.13479702174663544 2023-01-22 11:28:59.444780: step: 214/469, loss: 1.7719552516937256 2023-01-22 11:29:00.111171: step: 216/469, loss: 0.17068809270858765 2023-01-22 11:29:00.784494: step: 218/469, loss: 0.1426297277212143 2023-01-22 11:29:01.395419: step: 220/469, loss: 0.1827821433544159 2023-01-22 11:29:02.029704: step: 222/469, loss: 0.16923899948596954 2023-01-22 11:29:02.724438: step: 224/469, loss: 0.8253857493400574 2023-01-22 11:29:03.313975: step: 226/469, loss: 0.28710123896598816 2023-01-22 11:29:03.972751: step: 228/469, loss: 0.14482896029949188 2023-01-22 11:29:04.561455: step: 230/469, loss: 0.1563882976770401 2023-01-22 11:29:05.211668: step: 232/469, loss: 0.12219942361116409 2023-01-22 11:29:06.000157: step: 234/469, loss: 0.1577947735786438 2023-01-22 11:29:06.640752: step: 236/469, loss: 0.11812220513820648 2023-01-22 11:29:07.273969: step: 238/469, loss: 0.06741271167993546 2023-01-22 11:29:07.863020: step: 240/469, loss: 0.8435651659965515 2023-01-22 11:29:08.604214: step: 242/469, loss: 0.43402570486068726 2023-01-22 11:29:09.258763: step: 244/469, loss: 0.2857842743396759 2023-01-22 11:29:09.920567: step: 246/469, loss: 0.2907649278640747 2023-01-22 11:29:10.546498: step: 248/469, loss: 0.19685302674770355 2023-01-22 11:29:11.209088: step: 250/469, loss: 0.29523003101348877 2023-01-22 11:29:11.846549: step: 252/469, loss: 0.7411558628082275 2023-01-22 11:29:12.502370: step: 254/469, loss: 0.14356470108032227 2023-01-22 11:29:13.201099: step: 256/469, loss: 0.3514004945755005 2023-01-22 11:29:13.843887: step: 258/469, loss: 0.40200075507164 2023-01-22 11:29:14.486119: step: 260/469, loss: 0.18726316094398499 2023-01-22 11:29:15.186735: step: 262/469, loss: 0.5279545187950134 2023-01-22 11:29:15.889532: step: 264/469, loss: 0.2940979599952698 2023-01-22 11:29:16.601158: step: 266/469, loss: 0.1055612862110138 2023-01-22 11:29:17.248902: step: 268/469, loss: 1.7026387453079224 2023-01-22 11:29:17.921950: step: 270/469, loss: 0.5599890351295471 2023-01-22 11:29:18.532812: step: 272/469, loss: 0.3769027888774872 2023-01-22 11:29:19.193634: step: 274/469, loss: 1.31482994556427 2023-01-22 11:29:20.002461: step: 276/469, loss: 0.6392829418182373 2023-01-22 11:29:20.709670: step: 278/469, loss: 0.12252487242221832 2023-01-22 11:29:21.374413: step: 280/469, loss: 0.3937845230102539 2023-01-22 11:29:22.014973: step: 282/469, loss: 0.8877031803131104 2023-01-22 11:29:22.685918: step: 284/469, loss: 0.746691107749939 2023-01-22 11:29:23.366890: step: 286/469, loss: 0.14422902464866638 2023-01-22 11:29:23.990189: step: 288/469, loss: 0.18659165501594543 2023-01-22 11:29:24.696313: step: 290/469, loss: 0.18137985467910767 2023-01-22 11:29:25.361630: step: 292/469, loss: 0.3956001400947571 2023-01-22 11:29:26.013948: step: 294/469, loss: 0.3132602274417877 2023-01-22 11:29:26.675799: step: 296/469, loss: 0.1650928407907486 2023-01-22 11:29:27.287079: step: 298/469, loss: 0.1954052895307541 2023-01-22 11:29:27.893977: step: 300/469, loss: 1.538400411605835 2023-01-22 11:29:28.519666: step: 302/469, loss: 0.395701140165329 2023-01-22 11:29:29.215013: step: 304/469, loss: 0.13635271787643433 2023-01-22 11:29:29.953492: step: 306/469, loss: 0.2199510931968689 2023-01-22 11:29:30.639719: step: 308/469, loss: 0.18347452580928802 2023-01-22 11:29:31.210656: step: 310/469, loss: 0.1973084658384323 2023-01-22 11:29:31.810954: step: 312/469, loss: 0.41131314635276794 2023-01-22 11:29:32.473560: step: 314/469, loss: 0.21307218074798584 2023-01-22 11:29:33.132128: step: 316/469, loss: 0.23374679684638977 2023-01-22 11:29:33.797030: step: 318/469, loss: 0.2610156536102295 2023-01-22 11:29:34.439054: step: 320/469, loss: 0.250539630651474 2023-01-22 11:29:35.102531: step: 322/469, loss: 0.11684828996658325 2023-01-22 11:29:35.761268: step: 324/469, loss: 0.3343842923641205 2023-01-22 11:29:36.404870: step: 326/469, loss: 0.19394487142562866 2023-01-22 11:29:37.124852: step: 328/469, loss: 0.15986189246177673 2023-01-22 11:29:37.777286: step: 330/469, loss: 0.16270574927330017 2023-01-22 11:29:38.399842: step: 332/469, loss: 0.1303102821111679 2023-01-22 11:29:38.978934: step: 334/469, loss: 0.2709577977657318 2023-01-22 11:29:39.697657: step: 336/469, loss: 0.22591176629066467 2023-01-22 11:29:40.388909: step: 338/469, loss: 0.07989803701639175 2023-01-22 11:29:41.037265: step: 340/469, loss: 0.3379601836204529 2023-01-22 11:29:41.680359: step: 342/469, loss: 2.177950143814087 2023-01-22 11:29:42.414189: step: 344/469, loss: 0.5851849913597107 2023-01-22 11:29:43.134799: step: 346/469, loss: 0.5799577236175537 2023-01-22 11:29:43.826632: step: 348/469, loss: 0.17609424889087677 2023-01-22 11:29:44.423976: step: 350/469, loss: 0.6569993495941162 2023-01-22 11:29:45.079461: step: 352/469, loss: 0.1533515453338623 2023-01-22 11:29:45.720913: step: 354/469, loss: 0.22334958612918854 2023-01-22 11:29:46.374388: step: 356/469, loss: 0.1917479783296585 2023-01-22 11:29:46.992555: step: 358/469, loss: 0.13376808166503906 2023-01-22 11:29:47.643778: step: 360/469, loss: 0.3077527582645416 2023-01-22 11:29:48.227705: step: 362/469, loss: 0.5522488355636597 2023-01-22 11:29:48.891035: step: 364/469, loss: 0.4673190116882324 2023-01-22 11:29:49.535036: step: 366/469, loss: 0.3516501784324646 2023-01-22 11:29:50.254476: step: 368/469, loss: 0.38323748111724854 2023-01-22 11:29:50.865938: step: 370/469, loss: 2.246837615966797 2023-01-22 11:29:51.510108: step: 372/469, loss: 0.1917182058095932 2023-01-22 11:29:52.121858: step: 374/469, loss: 0.15490412712097168 2023-01-22 11:29:52.798413: step: 376/469, loss: 0.21192680299282074 2023-01-22 11:29:53.544934: step: 378/469, loss: 0.2574155032634735 2023-01-22 11:29:54.242943: step: 380/469, loss: 0.2593514323234558 2023-01-22 11:29:54.913925: step: 382/469, loss: 0.4792279601097107 2023-01-22 11:29:55.709068: step: 384/469, loss: 0.1763969212770462 2023-01-22 11:29:56.335562: step: 386/469, loss: 0.2040223926305771 2023-01-22 11:29:57.028229: step: 388/469, loss: 0.2920312285423279 2023-01-22 11:29:57.634875: step: 390/469, loss: 0.2173030525445938 2023-01-22 11:29:58.224716: step: 392/469, loss: 0.15153872966766357 2023-01-22 11:29:58.853709: step: 394/469, loss: 0.5955541133880615 2023-01-22 11:29:59.455924: step: 396/469, loss: 0.2460816651582718 2023-01-22 11:30:00.113792: step: 398/469, loss: 0.4389353394508362 2023-01-22 11:30:00.718744: step: 400/469, loss: 0.28610822558403015 2023-01-22 11:30:01.323905: step: 402/469, loss: 0.09076850116252899 2023-01-22 11:30:01.942305: step: 404/469, loss: 0.4015452563762665 2023-01-22 11:30:02.635441: step: 406/469, loss: 0.2629058063030243 2023-01-22 11:30:03.274935: step: 408/469, loss: 0.1401469111442566 2023-01-22 11:30:03.883936: step: 410/469, loss: 0.1503347009420395 2023-01-22 11:30:04.573500: step: 412/469, loss: 0.1177896112203598 2023-01-22 11:30:05.318944: step: 414/469, loss: 0.2586066722869873 2023-01-22 11:30:05.972817: step: 416/469, loss: 0.22401678562164307 2023-01-22 11:30:06.631164: step: 418/469, loss: 0.4721553921699524 2023-01-22 11:30:07.226780: step: 420/469, loss: 0.10826145112514496 2023-01-22 11:30:07.913001: step: 422/469, loss: 0.14290405809879303 2023-01-22 11:30:08.595308: step: 424/469, loss: 0.18634554743766785 2023-01-22 11:30:09.248278: step: 426/469, loss: 0.19271937012672424 2023-01-22 11:30:09.955596: step: 428/469, loss: 0.4409353733062744 2023-01-22 11:30:10.612720: step: 430/469, loss: 0.2770993411540985 2023-01-22 11:30:11.281009: step: 432/469, loss: 0.17648662626743317 2023-01-22 11:30:11.899974: step: 434/469, loss: 0.6518418788909912 2023-01-22 11:30:12.653197: step: 436/469, loss: 0.1623111218214035 2023-01-22 11:30:13.325336: step: 438/469, loss: 0.20409198105335236 2023-01-22 11:30:13.995315: step: 440/469, loss: 0.3621858060359955 2023-01-22 11:30:14.714430: step: 442/469, loss: 0.4220235049724579 2023-01-22 11:30:15.341619: step: 444/469, loss: 0.07869580388069153 2023-01-22 11:30:15.987191: step: 446/469, loss: 0.18688485026359558 2023-01-22 11:30:16.561902: step: 448/469, loss: 0.19848749041557312 2023-01-22 11:30:17.237802: step: 450/469, loss: 0.14502377808094025 2023-01-22 11:30:17.920938: step: 452/469, loss: 0.2228161096572876 2023-01-22 11:30:18.680048: step: 454/469, loss: 0.08278413116931915 2023-01-22 11:30:19.358421: step: 456/469, loss: 0.20986926555633545 2023-01-22 11:30:20.014038: step: 458/469, loss: 0.28288328647613525 2023-01-22 11:30:20.716172: step: 460/469, loss: 0.14189527928829193 2023-01-22 11:30:21.406165: step: 462/469, loss: 0.27325865626335144 2023-01-22 11:30:22.074835: step: 464/469, loss: 0.10168289393186569 2023-01-22 11:30:22.682007: step: 466/469, loss: 0.16034327447414398 2023-01-22 11:30:23.338111: step: 468/469, loss: 0.07392926514148712 2023-01-22 11:30:24.040131: step: 470/469, loss: 0.1523371785879135 2023-01-22 11:30:24.697122: step: 472/469, loss: 0.1986953616142273 2023-01-22 11:30:25.347928: step: 474/469, loss: 0.4473738372325897 2023-01-22 11:30:25.982537: step: 476/469, loss: 0.10791049152612686 2023-01-22 11:30:26.618493: step: 478/469, loss: 0.23333097994327545 2023-01-22 11:30:27.263077: step: 480/469, loss: 0.1788780242204666 2023-01-22 11:30:28.012655: step: 482/469, loss: 0.22069504857063293 2023-01-22 11:30:28.661636: step: 484/469, loss: 0.127645343542099 2023-01-22 11:30:29.364743: step: 486/469, loss: 0.18005597591400146 2023-01-22 11:30:30.007552: step: 488/469, loss: 0.17945681512355804 2023-01-22 11:30:30.618836: step: 490/469, loss: 0.19683711230754852 2023-01-22 11:30:31.226545: step: 492/469, loss: 0.15534235537052155 2023-01-22 11:30:31.917372: step: 494/469, loss: 0.5154483914375305 2023-01-22 11:30:32.533011: step: 496/469, loss: 0.11100415885448456 2023-01-22 11:30:33.212487: step: 498/469, loss: 0.2955454885959625 2023-01-22 11:30:33.892761: step: 500/469, loss: 0.10263917595148087 2023-01-22 11:30:34.512947: step: 502/469, loss: 0.3018554747104645 2023-01-22 11:30:35.133800: step: 504/469, loss: 0.37817472219467163 2023-01-22 11:30:35.780292: step: 506/469, loss: 0.20461368560791016 2023-01-22 11:30:36.462716: step: 508/469, loss: 0.25689950585365295 2023-01-22 11:30:37.173028: step: 510/469, loss: 0.2914935350418091 2023-01-22 11:30:37.792962: step: 512/469, loss: 0.05203931778669357 2023-01-22 11:30:38.357482: step: 514/469, loss: 0.16389061510562897 2023-01-22 11:30:39.017609: step: 516/469, loss: 0.2884312868118286 2023-01-22 11:30:39.670502: step: 518/469, loss: 0.13901834189891815 2023-01-22 11:30:40.366125: step: 520/469, loss: 0.2569965720176697 2023-01-22 11:30:41.053706: step: 522/469, loss: 0.26480719447135925 2023-01-22 11:30:41.663174: step: 524/469, loss: 0.24072884023189545 2023-01-22 11:30:42.255735: step: 526/469, loss: 0.2926464080810547 2023-01-22 11:30:42.950590: step: 528/469, loss: 0.6798343062400818 2023-01-22 11:30:43.612380: step: 530/469, loss: 0.04340603947639465 2023-01-22 11:30:44.225590: step: 532/469, loss: 0.5107023119926453 2023-01-22 11:30:44.939517: step: 534/469, loss: 0.2708114683628082 2023-01-22 11:30:45.638442: step: 536/469, loss: 0.8310455679893494 2023-01-22 11:30:46.292055: step: 538/469, loss: 0.1858428567647934 2023-01-22 11:30:47.048128: step: 540/469, loss: 0.4196905195713043 2023-01-22 11:30:47.701775: step: 542/469, loss: 0.15431804955005646 2023-01-22 11:30:48.372741: step: 544/469, loss: 0.2692755162715912 2023-01-22 11:30:49.005937: step: 546/469, loss: 0.3524225950241089 2023-01-22 11:30:49.711031: step: 548/469, loss: 0.6620578169822693 2023-01-22 11:30:50.307970: step: 550/469, loss: 0.7527962327003479 2023-01-22 11:30:50.965283: step: 552/469, loss: 0.38842910528182983 2023-01-22 11:30:51.554488: step: 554/469, loss: 1.2896281480789185 2023-01-22 11:30:52.240444: step: 556/469, loss: 0.28871390223503113 2023-01-22 11:30:52.954777: step: 558/469, loss: 0.3317926824092865 2023-01-22 11:30:53.667494: step: 560/469, loss: 0.37933608889579773 2023-01-22 11:30:54.379475: step: 562/469, loss: 0.7165369987487793 2023-01-22 11:30:55.053608: step: 564/469, loss: 0.14440712332725525 2023-01-22 11:30:55.690158: step: 566/469, loss: 0.0998615026473999 2023-01-22 11:30:56.344965: step: 568/469, loss: 0.08365899324417114 2023-01-22 11:30:56.978925: step: 570/469, loss: 0.4213893413543701 2023-01-22 11:30:57.681782: step: 572/469, loss: 0.06474947929382324 2023-01-22 11:30:58.332739: step: 574/469, loss: 9.069156646728516 2023-01-22 11:30:59.052278: step: 576/469, loss: 0.46241387724876404 2023-01-22 11:30:59.650009: step: 578/469, loss: 0.09412537515163422 2023-01-22 11:31:00.313737: step: 580/469, loss: 0.31065821647644043 2023-01-22 11:31:01.006206: step: 582/469, loss: 0.18173867464065552 2023-01-22 11:31:01.610032: step: 584/469, loss: 0.12756435573101044 2023-01-22 11:31:02.223987: step: 586/469, loss: 0.13833735883235931 2023-01-22 11:31:02.897772: step: 588/469, loss: 1.476247787475586 2023-01-22 11:31:03.527894: step: 590/469, loss: 0.18005362153053284 2023-01-22 11:31:04.105557: step: 592/469, loss: 0.47371673583984375 2023-01-22 11:31:04.765307: step: 594/469, loss: 0.3745899796485901 2023-01-22 11:31:05.421649: step: 596/469, loss: 1.383629560470581 2023-01-22 11:31:06.063306: step: 598/469, loss: 0.32182469964027405 2023-01-22 11:31:06.703192: step: 600/469, loss: 0.16456946730613708 2023-01-22 11:31:07.318128: step: 602/469, loss: 0.096487857401371 2023-01-22 11:31:07.923921: step: 604/469, loss: 0.07716759294271469 2023-01-22 11:31:08.560883: step: 606/469, loss: 0.20448121428489685 2023-01-22 11:31:09.295182: step: 608/469, loss: 0.2204960435628891 2023-01-22 11:31:09.913019: step: 610/469, loss: 0.31164512038230896 2023-01-22 11:31:10.599757: step: 612/469, loss: 0.5891857147216797 2023-01-22 11:31:11.224273: step: 614/469, loss: 0.17195731401443481 2023-01-22 11:31:11.884126: step: 616/469, loss: 0.3017668128013611 2023-01-22 11:31:12.527570: step: 618/469, loss: 0.29598596692085266 2023-01-22 11:31:13.132533: step: 620/469, loss: 0.09344451874494553 2023-01-22 11:31:13.834040: step: 622/469, loss: 0.4150453209877014 2023-01-22 11:31:14.494961: step: 624/469, loss: 0.14968973398208618 2023-01-22 11:31:15.180478: step: 626/469, loss: 0.18221484124660492 2023-01-22 11:31:15.804860: step: 628/469, loss: 0.18337593972682953 2023-01-22 11:31:16.437716: step: 630/469, loss: 1.3400460481643677 2023-01-22 11:31:17.106470: step: 632/469, loss: 1.1392515897750854 2023-01-22 11:31:17.730608: step: 634/469, loss: 0.2779347598552704 2023-01-22 11:31:18.446473: step: 636/469, loss: 0.5234377384185791 2023-01-22 11:31:19.078560: step: 638/469, loss: 0.18483102321624756 2023-01-22 11:31:19.754709: step: 640/469, loss: 0.2284020036458969 2023-01-22 11:31:20.381489: step: 642/469, loss: 0.3503674268722534 2023-01-22 11:31:21.054422: step: 644/469, loss: 0.30721575021743774 2023-01-22 11:31:21.794827: step: 646/469, loss: 0.4204408824443817 2023-01-22 11:31:22.363846: step: 648/469, loss: 0.1218271404504776 2023-01-22 11:31:23.040339: step: 650/469, loss: 0.5836813449859619 2023-01-22 11:31:23.658833: step: 652/469, loss: 1.136457920074463 2023-01-22 11:31:24.402625: step: 654/469, loss: 0.09446422010660172 2023-01-22 11:31:25.050577: step: 656/469, loss: 0.0958859771490097 2023-01-22 11:31:25.674664: step: 658/469, loss: 0.9152986407279968 2023-01-22 11:31:26.351846: step: 660/469, loss: 0.3161552846431732 2023-01-22 11:31:26.990648: step: 662/469, loss: 0.10566958039999008 2023-01-22 11:31:27.683233: step: 664/469, loss: 0.12009944021701813 2023-01-22 11:31:28.309418: step: 666/469, loss: 0.09755319356918335 2023-01-22 11:31:29.007100: step: 668/469, loss: 0.19840706884860992 2023-01-22 11:31:29.682470: step: 670/469, loss: 0.4426737427711487 2023-01-22 11:31:30.322649: step: 672/469, loss: 0.21325407922267914 2023-01-22 11:31:31.025093: step: 674/469, loss: 0.9769893884658813 2023-01-22 11:31:31.692130: step: 676/469, loss: 0.4308414161205292 2023-01-22 11:31:32.335740: step: 678/469, loss: 0.6531209945678711 2023-01-22 11:31:33.013734: step: 680/469, loss: 0.2012004852294922 2023-01-22 11:31:33.696211: step: 682/469, loss: 0.16564176976680756 2023-01-22 11:31:34.334731: step: 684/469, loss: 0.17630264163017273 2023-01-22 11:31:35.043400: step: 686/469, loss: 0.11434575915336609 2023-01-22 11:31:35.748770: step: 688/469, loss: 0.2170795500278473 2023-01-22 11:31:36.427340: step: 690/469, loss: 0.18453888595104218 2023-01-22 11:31:37.247109: step: 692/469, loss: 0.3093826174736023 2023-01-22 11:31:37.847485: step: 694/469, loss: 0.15925228595733643 2023-01-22 11:31:38.481195: step: 696/469, loss: 0.14291183650493622 2023-01-22 11:31:39.137128: step: 698/469, loss: 0.15861262381076813 2023-01-22 11:31:39.772933: step: 700/469, loss: 0.15145477652549744 2023-01-22 11:31:40.427415: step: 702/469, loss: 0.17363622784614563 2023-01-22 11:31:41.021408: step: 704/469, loss: 0.182483971118927 2023-01-22 11:31:41.652059: step: 706/469, loss: 0.25083136558532715 2023-01-22 11:31:42.294367: step: 708/469, loss: 1.0732553005218506 2023-01-22 11:31:42.955948: step: 710/469, loss: 0.5289745330810547 2023-01-22 11:31:43.607188: step: 712/469, loss: 0.261511892080307 2023-01-22 11:31:44.214937: step: 714/469, loss: 0.1922958493232727 2023-01-22 11:31:44.868877: step: 716/469, loss: 0.3829682469367981 2023-01-22 11:31:45.691337: step: 718/469, loss: 0.3605692386627197 2023-01-22 11:31:46.337990: step: 720/469, loss: 0.9098966121673584 2023-01-22 11:31:47.025292: step: 722/469, loss: 0.15860295295715332 2023-01-22 11:31:47.713273: step: 724/469, loss: 0.36241665482521057 2023-01-22 11:31:48.406938: step: 726/469, loss: 0.1308407038450241 2023-01-22 11:31:49.069041: step: 728/469, loss: 0.16541844606399536 2023-01-22 11:31:49.781072: step: 730/469, loss: 0.4544020891189575 2023-01-22 11:31:50.452509: step: 732/469, loss: 0.31413185596466064 2023-01-22 11:31:51.159445: step: 734/469, loss: 0.2734842896461487 2023-01-22 11:31:51.813314: step: 736/469, loss: 0.7730116844177246 2023-01-22 11:31:52.452657: step: 738/469, loss: 0.17564791440963745 2023-01-22 11:31:53.093543: step: 740/469, loss: 0.3019658923149109 2023-01-22 11:31:53.796824: step: 742/469, loss: 0.33448800444602966 2023-01-22 11:31:54.396189: step: 744/469, loss: 0.1786222904920578 2023-01-22 11:31:55.032495: step: 746/469, loss: 0.23682421445846558 2023-01-22 11:31:55.639762: step: 748/469, loss: 0.42194151878356934 2023-01-22 11:31:56.273675: step: 750/469, loss: 0.5432416200637817 2023-01-22 11:31:56.889856: step: 752/469, loss: 0.27042850852012634 2023-01-22 11:31:57.545753: step: 754/469, loss: 0.39941948652267456 2023-01-22 11:31:58.192820: step: 756/469, loss: 0.13691657781600952 2023-01-22 11:31:58.825417: step: 758/469, loss: 0.11036242544651031 2023-01-22 11:31:59.607653: step: 760/469, loss: 0.22094962000846863 2023-01-22 11:32:00.267970: step: 762/469, loss: 0.20506402850151062 2023-01-22 11:32:00.950282: step: 764/469, loss: 0.382031112909317 2023-01-22 11:32:01.555809: step: 766/469, loss: 0.6536380052566528 2023-01-22 11:32:02.219158: step: 768/469, loss: 0.37386325001716614 2023-01-22 11:32:02.849427: step: 770/469, loss: 0.12124766409397125 2023-01-22 11:32:03.497399: step: 772/469, loss: 0.2321719527244568 2023-01-22 11:32:04.121341: step: 774/469, loss: 0.2473437637090683 2023-01-22 11:32:04.764263: step: 776/469, loss: 0.25117436051368713 2023-01-22 11:32:05.416151: step: 778/469, loss: 0.369494765996933 2023-01-22 11:32:06.183048: step: 780/469, loss: 0.4436790645122528 2023-01-22 11:32:06.855808: step: 782/469, loss: 0.6400638818740845 2023-01-22 11:32:07.543904: step: 784/469, loss: 0.5189934968948364 2023-01-22 11:32:08.236559: step: 786/469, loss: 0.48824337124824524 2023-01-22 11:32:08.901836: step: 788/469, loss: 0.16848042607307434 2023-01-22 11:32:09.537357: step: 790/469, loss: 0.16847187280654907 2023-01-22 11:32:10.147379: step: 792/469, loss: 0.6210781931877136 2023-01-22 11:32:10.796800: step: 794/469, loss: 0.34443339705467224 2023-01-22 11:32:11.431718: step: 796/469, loss: 0.1160089299082756 2023-01-22 11:32:12.169827: step: 798/469, loss: 0.28355202078819275 2023-01-22 11:32:12.839461: step: 800/469, loss: 0.3433855175971985 2023-01-22 11:32:13.591560: step: 802/469, loss: 0.24356277287006378 2023-01-22 11:32:14.255089: step: 804/469, loss: 8.318450927734375 2023-01-22 11:32:14.906960: step: 806/469, loss: 0.14145974814891815 2023-01-22 11:32:15.614840: step: 808/469, loss: 0.7049055695533752 2023-01-22 11:32:16.248720: step: 810/469, loss: 0.12125016748905182 2023-01-22 11:32:16.890528: step: 812/469, loss: 0.1970774084329605 2023-01-22 11:32:17.521176: step: 814/469, loss: 0.6058725118637085 2023-01-22 11:32:18.148819: step: 816/469, loss: 0.12787580490112305 2023-01-22 11:32:18.894333: step: 818/469, loss: 0.4807584285736084 2023-01-22 11:32:19.535346: step: 820/469, loss: 0.0792413204908371 2023-01-22 11:32:20.140506: step: 822/469, loss: 0.11749916523694992 2023-01-22 11:32:20.764375: step: 824/469, loss: 0.16650857031345367 2023-01-22 11:32:21.455414: step: 826/469, loss: 0.22886303067207336 2023-01-22 11:32:22.077246: step: 828/469, loss: 0.5567625761032104 2023-01-22 11:32:22.733685: step: 830/469, loss: 0.3949889540672302 2023-01-22 11:32:23.360025: step: 832/469, loss: 0.26541900634765625 2023-01-22 11:32:23.999793: step: 834/469, loss: 0.1304713785648346 2023-01-22 11:32:24.727043: step: 836/469, loss: 0.4830150306224823 2023-01-22 11:32:25.347736: step: 838/469, loss: 0.6056756377220154 2023-01-22 11:32:25.951525: step: 840/469, loss: 0.2323174625635147 2023-01-22 11:32:26.607894: step: 842/469, loss: 0.22079682350158691 2023-01-22 11:32:27.241672: step: 844/469, loss: 0.107571542263031 2023-01-22 11:32:27.923138: step: 846/469, loss: 0.16435562074184418 2023-01-22 11:32:28.526897: step: 848/469, loss: 0.3461824655532837 2023-01-22 11:32:29.249148: step: 850/469, loss: 0.5418260097503662 2023-01-22 11:32:29.902575: step: 852/469, loss: 0.1690748780965805 2023-01-22 11:32:30.553665: step: 854/469, loss: 0.49810123443603516 2023-01-22 11:32:31.228344: step: 856/469, loss: 0.1852003037929535 2023-01-22 11:32:31.892613: step: 858/469, loss: 0.2919261157512665 2023-01-22 11:32:32.554175: step: 860/469, loss: 0.5362018346786499 2023-01-22 11:32:33.194828: step: 862/469, loss: 0.6181149482727051 2023-01-22 11:32:33.876803: step: 864/469, loss: 0.16417354345321655 2023-01-22 11:32:34.543975: step: 866/469, loss: 0.1762048304080963 2023-01-22 11:32:35.308099: step: 868/469, loss: 0.1236971765756607 2023-01-22 11:32:35.916846: step: 870/469, loss: 0.6824985146522522 2023-01-22 11:32:36.562030: step: 872/469, loss: 0.1723754107952118 2023-01-22 11:32:37.173118: step: 874/469, loss: 0.6163443922996521 2023-01-22 11:32:37.791662: step: 876/469, loss: 0.14851923286914825 2023-01-22 11:32:38.437492: step: 878/469, loss: 0.16506151854991913 2023-01-22 11:32:39.103877: step: 880/469, loss: 0.9504268169403076 2023-01-22 11:32:39.732258: step: 882/469, loss: 0.5108989477157593 2023-01-22 11:32:40.379748: step: 884/469, loss: 0.3754722476005554 2023-01-22 11:32:41.053955: step: 886/469, loss: 0.2483329325914383 2023-01-22 11:32:41.653863: step: 888/469, loss: 0.09177801012992859 2023-01-22 11:32:42.425696: step: 890/469, loss: 0.41312384605407715 2023-01-22 11:32:43.128253: step: 892/469, loss: 0.07926923036575317 2023-01-22 11:32:43.763716: step: 894/469, loss: 0.3120330572128296 2023-01-22 11:32:44.394050: step: 896/469, loss: 0.2969527542591095 2023-01-22 11:32:45.024166: step: 898/469, loss: 0.10953614115715027 2023-01-22 11:32:45.690934: step: 900/469, loss: 0.5658564567565918 2023-01-22 11:32:46.367614: step: 902/469, loss: 0.1047172024846077 2023-01-22 11:32:47.003279: step: 904/469, loss: 0.41531217098236084 2023-01-22 11:32:47.673757: step: 906/469, loss: 0.12457849085330963 2023-01-22 11:32:48.287494: step: 908/469, loss: 0.18115508556365967 2023-01-22 11:32:49.016921: step: 910/469, loss: 0.2024720162153244 2023-01-22 11:32:49.675910: step: 912/469, loss: 0.5095484852790833 2023-01-22 11:32:50.362455: step: 914/469, loss: 0.2653312385082245 2023-01-22 11:32:51.011488: step: 916/469, loss: 1.2103748321533203 2023-01-22 11:32:51.641722: step: 918/469, loss: 0.41877707839012146 2023-01-22 11:32:52.250374: step: 920/469, loss: 0.12427324801683426 2023-01-22 11:32:52.849868: step: 922/469, loss: 0.6959134340286255 2023-01-22 11:32:53.389981: step: 924/469, loss: 0.3270226716995239 2023-01-22 11:32:54.026325: step: 926/469, loss: 0.1600121706724167 2023-01-22 11:32:54.686482: step: 928/469, loss: 0.24598374962806702 2023-01-22 11:32:55.371428: step: 930/469, loss: 0.28134551644325256 2023-01-22 11:32:56.028940: step: 932/469, loss: 0.39983755350112915 2023-01-22 11:32:56.680579: step: 934/469, loss: 0.0733414888381958 2023-01-22 11:32:57.333365: step: 936/469, loss: 0.13935169577598572 2023-01-22 11:32:57.965187: step: 938/469, loss: 0.09561121463775635 ================================================== Loss: 0.380 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2895240158455063, 'r': 0.3169930875576037, 'f1': 0.30263651656314694}, 'combined': 0.22299532799389774, 'epoch': 10} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3117052140375859, 'r': 0.26558992209777865, 'f1': 0.2868056851055894}, 'combined': 0.15643946460304875, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2819369313990656, 'r': 0.31082610463540256, 'f1': 0.2956775399690562}, 'combined': 0.21786766102983088, 'epoch': 10} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.31370997769403064, 'r': 0.2703703558900063, 'f1': 0.2904322348774219}, 'combined': 0.15841758266041195, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28630751626114187, 'r': 0.32216386554621845, 'f1': 0.3031792091836734}, 'combined': 0.22339520676691724, 'epoch': 10} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3080388075802286, 'r': 0.2643553536232886, 'f1': 0.2845301836634706}, 'combined': 0.1551982819982567, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2384751773049645, 'r': 0.3202380952380952, 'f1': 0.2733739837398374}, 'combined': 0.18224932249322492, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2890625, 'r': 0.40217391304347827, 'f1': 0.33636363636363636}, 'combined': 0.16818181818181818, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4305555555555556, 'r': 0.2672413793103448, 'f1': 0.3297872340425532}, 'combined': 0.2198581560283688, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28932143369249047, 'r': 0.34806411567559575, 'f1': 0.31598585523004125}, 'combined': 0.23283168280108302, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2915539886359941, 'r': 0.2547429635383114, 'f1': 0.27190826088610776}, 'combined': 0.14831359684696785, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3055555555555556, 'r': 0.358695652173913, 'f1': 0.32999999999999996}, 'combined': 0.16499999999999998, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3123600447856686, 'r': 0.30880376344086025, 'f1': 0.31057172391857507}, 'combined': 0.22884232288737108, 'epoch': 5} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.29928638434735994, 'r': 0.23575990569357447, 'f1': 0.26375186993150146}, 'combined': 0.14386465632627352, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 5} ****************************** Epoch: 11 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:35:55.588387: step: 2/469, loss: 1.3251230716705322 2023-01-22 11:35:56.230839: step: 4/469, loss: 0.3632338047027588 2023-01-22 11:35:56.902324: step: 6/469, loss: 0.12736497819423676 2023-01-22 11:35:57.613612: step: 8/469, loss: 0.07161015272140503 2023-01-22 11:35:58.291868: step: 10/469, loss: 0.1522126942873001 2023-01-22 11:35:58.978275: step: 12/469, loss: 0.35069236159324646 2023-01-22 11:35:59.685964: step: 14/469, loss: 0.13632357120513916 2023-01-22 11:36:00.279544: step: 16/469, loss: 0.16236285865306854 2023-01-22 11:36:00.876829: step: 18/469, loss: 0.9806352853775024 2023-01-22 11:36:01.527807: step: 20/469, loss: 0.10349486768245697 2023-01-22 11:36:02.131635: step: 22/469, loss: 0.07077273726463318 2023-01-22 11:36:02.869337: step: 24/469, loss: 0.11571285128593445 2023-01-22 11:36:03.524640: step: 26/469, loss: 0.1511596441268921 2023-01-22 11:36:04.266206: step: 28/469, loss: 0.35244497656822205 2023-01-22 11:36:04.919424: step: 30/469, loss: 0.22795195877552032 2023-01-22 11:36:05.510103: step: 32/469, loss: 0.08686845749616623 2023-01-22 11:36:06.200965: step: 34/469, loss: 0.15235574543476105 2023-01-22 11:36:06.815392: step: 36/469, loss: 0.19542719423770905 2023-01-22 11:36:07.592501: step: 38/469, loss: 0.22962529957294464 2023-01-22 11:36:08.281796: step: 40/469, loss: 0.2337765395641327 2023-01-22 11:36:08.822546: step: 42/469, loss: 0.18049144744873047 2023-01-22 11:36:09.449013: step: 44/469, loss: 0.16483674943447113 2023-01-22 11:36:10.133935: step: 46/469, loss: 0.1045350506901741 2023-01-22 11:36:10.810184: step: 48/469, loss: 0.11200503259897232 2023-01-22 11:36:11.590542: step: 50/469, loss: 0.3337440490722656 2023-01-22 11:36:12.305958: step: 52/469, loss: 0.4571230709552765 2023-01-22 11:36:12.972886: step: 54/469, loss: 0.30162525177001953 2023-01-22 11:36:13.604697: step: 56/469, loss: 0.2383003532886505 2023-01-22 11:36:14.347509: step: 58/469, loss: 0.36613091826438904 2023-01-22 11:36:14.990180: step: 60/469, loss: 0.14247438311576843 2023-01-22 11:36:15.618983: step: 62/469, loss: 0.5525928735733032 2023-01-22 11:36:16.287961: step: 64/469, loss: 0.11493909358978271 2023-01-22 11:36:16.941100: step: 66/469, loss: 0.34366175532341003 2023-01-22 11:36:17.562587: step: 68/469, loss: 0.14336878061294556 2023-01-22 11:36:18.188735: step: 70/469, loss: 2.229097604751587 2023-01-22 11:36:18.879246: step: 72/469, loss: 0.43957480788230896 2023-01-22 11:36:19.540908: step: 74/469, loss: 0.13142897188663483 2023-01-22 11:36:20.234913: step: 76/469, loss: 0.048468608409166336 2023-01-22 11:36:20.926508: step: 78/469, loss: 0.15741316974163055 2023-01-22 11:36:21.652288: step: 80/469, loss: 0.23230256140232086 2023-01-22 11:36:22.314888: step: 82/469, loss: 0.23593901097774506 2023-01-22 11:36:22.990144: step: 84/469, loss: 0.028536967933177948 2023-01-22 11:36:23.609009: step: 86/469, loss: 0.27680301666259766 2023-01-22 11:36:24.233522: step: 88/469, loss: 0.17345717549324036 2023-01-22 11:36:24.904371: step: 90/469, loss: 0.10710497200489044 2023-01-22 11:36:25.622468: step: 92/469, loss: 0.1412845402956009 2023-01-22 11:36:26.307423: step: 94/469, loss: 0.15174348652362823 2023-01-22 11:36:26.970008: step: 96/469, loss: 0.1045311912894249 2023-01-22 11:36:27.609756: step: 98/469, loss: 0.2490626722574234 2023-01-22 11:36:28.283992: step: 100/469, loss: 0.4371017813682556 2023-01-22 11:36:28.911765: step: 102/469, loss: 0.19339732825756073 2023-01-22 11:36:29.501679: step: 104/469, loss: 0.3534485101699829 2023-01-22 11:36:30.128521: step: 106/469, loss: 0.1900978982448578 2023-01-22 11:36:30.852431: step: 108/469, loss: 0.35299235582351685 2023-01-22 11:36:31.470453: step: 110/469, loss: 0.11424469947814941 2023-01-22 11:36:32.083087: step: 112/469, loss: 0.18450498580932617 2023-01-22 11:36:32.808680: step: 114/469, loss: 0.059010982513427734 2023-01-22 11:36:33.451300: step: 116/469, loss: 0.11341457813978195 2023-01-22 11:36:34.140013: step: 118/469, loss: 0.15264594554901123 2023-01-22 11:36:34.781591: step: 120/469, loss: 0.18101759254932404 2023-01-22 11:36:35.564682: step: 122/469, loss: 0.11984462291002274 2023-01-22 11:36:36.246817: step: 124/469, loss: 0.204877570271492 2023-01-22 11:36:36.892665: step: 126/469, loss: 0.1533675342798233 2023-01-22 11:36:37.622951: step: 128/469, loss: 0.18753385543823242 2023-01-22 11:36:38.192321: step: 130/469, loss: 0.4244779646396637 2023-01-22 11:36:38.804518: step: 132/469, loss: 0.20273007452487946 2023-01-22 11:36:39.430696: step: 134/469, loss: 0.09168665111064911 2023-01-22 11:36:40.052869: step: 136/469, loss: 0.09097080677747726 2023-01-22 11:36:40.669685: step: 138/469, loss: 0.6875452399253845 2023-01-22 11:36:41.313821: step: 140/469, loss: 0.392515629529953 2023-01-22 11:36:41.942379: step: 142/469, loss: 0.12772968411445618 2023-01-22 11:36:42.561717: step: 144/469, loss: 0.061967186629772186 2023-01-22 11:36:43.199696: step: 146/469, loss: 0.32866227626800537 2023-01-22 11:36:43.939777: step: 148/469, loss: 0.4913073182106018 2023-01-22 11:36:44.589655: step: 150/469, loss: 0.062448833137750626 2023-01-22 11:36:45.245245: step: 152/469, loss: 0.10945477336645126 2023-01-22 11:36:45.946523: step: 154/469, loss: 0.2194628268480301 2023-01-22 11:36:46.744834: step: 156/469, loss: 0.17987900972366333 2023-01-22 11:36:47.321477: step: 158/469, loss: 0.15505167841911316 2023-01-22 11:36:47.992814: step: 160/469, loss: 0.3138388395309448 2023-01-22 11:36:48.651835: step: 162/469, loss: 0.34724023938179016 2023-01-22 11:36:49.320985: step: 164/469, loss: 0.1631927639245987 2023-01-22 11:36:49.983519: step: 166/469, loss: 0.162771075963974 2023-01-22 11:36:50.625082: step: 168/469, loss: 0.08693363517522812 2023-01-22 11:36:51.312321: step: 170/469, loss: 0.20622491836547852 2023-01-22 11:36:52.047442: step: 172/469, loss: 0.1325293332338333 2023-01-22 11:36:52.717345: step: 174/469, loss: 0.12262620031833649 2023-01-22 11:36:53.317638: step: 176/469, loss: 0.37906044721603394 2023-01-22 11:36:54.020417: step: 178/469, loss: 0.20451989769935608 2023-01-22 11:36:54.686063: step: 180/469, loss: 0.2792142629623413 2023-01-22 11:36:55.316052: step: 182/469, loss: 0.10610752552747726 2023-01-22 11:36:55.979132: step: 184/469, loss: 0.11265362799167633 2023-01-22 11:36:56.626247: step: 186/469, loss: 0.14442527294158936 2023-01-22 11:36:57.233932: step: 188/469, loss: 0.1498418003320694 2023-01-22 11:36:57.857821: step: 190/469, loss: 0.3022550344467163 2023-01-22 11:36:58.424326: step: 192/469, loss: 0.13683943450450897 2023-01-22 11:36:59.099510: step: 194/469, loss: 0.24792000651359558 2023-01-22 11:36:59.749589: step: 196/469, loss: 0.3452637195587158 2023-01-22 11:37:00.436092: step: 198/469, loss: 0.3051605224609375 2023-01-22 11:37:01.114861: step: 200/469, loss: 0.12394502758979797 2023-01-22 11:37:01.853495: step: 202/469, loss: 0.12998688220977783 2023-01-22 11:37:02.550756: step: 204/469, loss: 0.18461468815803528 2023-01-22 11:37:03.214062: step: 206/469, loss: 0.16999229788780212 2023-01-22 11:37:03.856853: step: 208/469, loss: 0.0982012152671814 2023-01-22 11:37:04.511590: step: 210/469, loss: 0.09659772366285324 2023-01-22 11:37:05.156330: step: 212/469, loss: 0.11084352433681488 2023-01-22 11:37:05.755475: step: 214/469, loss: 0.2170136719942093 2023-01-22 11:37:06.411738: step: 216/469, loss: 0.19260884821414948 2023-01-22 11:37:07.112934: step: 218/469, loss: 0.20957617461681366 2023-01-22 11:37:07.807361: step: 220/469, loss: 0.1567096710205078 2023-01-22 11:37:08.481439: step: 222/469, loss: 0.7105504870414734 2023-01-22 11:37:09.151427: step: 224/469, loss: 0.12549707293510437 2023-01-22 11:37:09.766683: step: 226/469, loss: 0.1386052370071411 2023-01-22 11:37:10.449892: step: 228/469, loss: 0.24420537054538727 2023-01-22 11:37:11.065056: step: 230/469, loss: 0.12504376471042633 2023-01-22 11:37:11.684271: step: 232/469, loss: 0.12796583771705627 2023-01-22 11:37:12.343646: step: 234/469, loss: 0.2811918556690216 2023-01-22 11:37:12.971712: step: 236/469, loss: 0.17327384650707245 2023-01-22 11:37:13.622474: step: 238/469, loss: 0.12080953270196915 2023-01-22 11:37:14.253891: step: 240/469, loss: 0.17505046725273132 2023-01-22 11:37:14.917827: step: 242/469, loss: 0.31011468172073364 2023-01-22 11:37:15.536585: step: 244/469, loss: 0.12590456008911133 2023-01-22 11:37:16.188489: step: 246/469, loss: 0.4992937445640564 2023-01-22 11:37:16.889846: step: 248/469, loss: 0.10226418823003769 2023-01-22 11:37:17.534969: step: 250/469, loss: 0.29992449283599854 2023-01-22 11:37:18.218048: step: 252/469, loss: 0.13194029033184052 2023-01-22 11:37:18.924612: step: 254/469, loss: 0.43244099617004395 2023-01-22 11:37:19.614932: step: 256/469, loss: 0.46475687623023987 2023-01-22 11:37:20.266960: step: 258/469, loss: 0.07229424268007278 2023-01-22 11:37:20.948527: step: 260/469, loss: 0.1569201499223709 2023-01-22 11:37:21.606881: step: 262/469, loss: 0.033908870071172714 2023-01-22 11:37:22.233692: step: 264/469, loss: 0.20522554218769073 2023-01-22 11:37:22.875455: step: 266/469, loss: 0.16309337317943573 2023-01-22 11:37:23.515829: step: 268/469, loss: 0.10002125799655914 2023-01-22 11:37:24.191924: step: 270/469, loss: 0.14310507476329803 2023-01-22 11:37:24.812757: step: 272/469, loss: 0.09983787685632706 2023-01-22 11:37:25.443478: step: 274/469, loss: 0.7703390717506409 2023-01-22 11:37:26.186946: step: 276/469, loss: 0.16378170251846313 2023-01-22 11:37:26.798024: step: 278/469, loss: 0.7479329109191895 2023-01-22 11:37:27.381333: step: 280/469, loss: 0.5635196566581726 2023-01-22 11:37:28.023746: step: 282/469, loss: 0.14695723354816437 2023-01-22 11:37:28.617786: step: 284/469, loss: 0.11088906228542328 2023-01-22 11:37:29.216781: step: 286/469, loss: 0.3897385001182556 2023-01-22 11:37:29.855495: step: 288/469, loss: 0.16940830647945404 2023-01-22 11:37:30.434829: step: 290/469, loss: 0.18020634353160858 2023-01-22 11:37:31.063327: step: 292/469, loss: 0.08611126244068146 2023-01-22 11:37:31.776477: step: 294/469, loss: 0.18027669191360474 2023-01-22 11:37:32.429515: step: 296/469, loss: 0.23933443427085876 2023-01-22 11:37:33.085949: step: 298/469, loss: 0.2507540285587311 2023-01-22 11:37:33.694078: step: 300/469, loss: 0.27553731203079224 2023-01-22 11:37:34.331499: step: 302/469, loss: 0.21468937397003174 2023-01-22 11:37:34.902276: step: 304/469, loss: 0.29174986481666565 2023-01-22 11:37:35.531115: step: 306/469, loss: 0.31253933906555176 2023-01-22 11:37:36.143407: step: 308/469, loss: 0.24606849253177643 2023-01-22 11:37:36.805918: step: 310/469, loss: 0.11470603942871094 2023-01-22 11:37:37.462188: step: 312/469, loss: 0.11838089674711227 2023-01-22 11:37:38.116920: step: 314/469, loss: 0.06810196489095688 2023-01-22 11:37:38.824614: step: 316/469, loss: 0.5085752010345459 2023-01-22 11:37:39.435234: step: 318/469, loss: 0.7989282608032227 2023-01-22 11:37:40.054508: step: 320/469, loss: 0.1279248297214508 2023-01-22 11:37:40.710800: step: 322/469, loss: 1.1774379014968872 2023-01-22 11:37:41.376291: step: 324/469, loss: 1.7839431762695312 2023-01-22 11:37:41.990767: step: 326/469, loss: 0.08726254850625992 2023-01-22 11:37:42.779350: step: 328/469, loss: 0.12255539000034332 2023-01-22 11:37:43.451291: step: 330/469, loss: 0.2260095477104187 2023-01-22 11:37:44.095399: step: 332/469, loss: 0.29294177889823914 2023-01-22 11:37:44.769982: step: 334/469, loss: 0.3267204761505127 2023-01-22 11:37:45.344734: step: 336/469, loss: 0.46942639350891113 2023-01-22 11:37:46.014062: step: 338/469, loss: 0.167597696185112 2023-01-22 11:37:46.644823: step: 340/469, loss: 2.3828961849212646 2023-01-22 11:37:47.252180: step: 342/469, loss: 0.14724482595920563 2023-01-22 11:37:47.897153: step: 344/469, loss: 0.24119175970554352 2023-01-22 11:37:48.538588: step: 346/469, loss: 0.15888085961341858 2023-01-22 11:37:49.169942: step: 348/469, loss: 0.22132325172424316 2023-01-22 11:37:49.795576: step: 350/469, loss: 0.11238407343626022 2023-01-22 11:37:50.411178: step: 352/469, loss: 0.23809415102005005 2023-01-22 11:37:51.098083: step: 354/469, loss: 0.1637771725654602 2023-01-22 11:37:51.773653: step: 356/469, loss: 0.39375394582748413 2023-01-22 11:37:52.471391: step: 358/469, loss: 0.30669301748275757 2023-01-22 11:37:53.113455: step: 360/469, loss: 0.22790245711803436 2023-01-22 11:37:53.757182: step: 362/469, loss: 0.13602589070796967 2023-01-22 11:37:54.410869: step: 364/469, loss: 0.22972773015499115 2023-01-22 11:37:55.129508: step: 366/469, loss: 0.08034169673919678 2023-01-22 11:37:55.742396: step: 368/469, loss: 0.22970351576805115 2023-01-22 11:37:56.412919: step: 370/469, loss: 0.3514385223388672 2023-01-22 11:37:57.120182: step: 372/469, loss: 0.03130142763257027 2023-01-22 11:37:57.733591: step: 374/469, loss: 0.43215981125831604 2023-01-22 11:37:58.338769: step: 376/469, loss: 0.12602467834949493 2023-01-22 11:37:59.007015: step: 378/469, loss: 0.140254408121109 2023-01-22 11:37:59.708078: step: 380/469, loss: 0.6630123853683472 2023-01-22 11:38:00.348356: step: 382/469, loss: 0.0608147494494915 2023-01-22 11:38:01.081908: step: 384/469, loss: 0.4181896448135376 2023-01-22 11:38:01.690207: step: 386/469, loss: 0.13040032982826233 2023-01-22 11:38:02.318975: step: 388/469, loss: 0.14064259827136993 2023-01-22 11:38:02.994644: step: 390/469, loss: 0.42392319440841675 2023-01-22 11:38:03.626701: step: 392/469, loss: 0.11747951805591583 2023-01-22 11:38:04.238864: step: 394/469, loss: 0.18643918633460999 2023-01-22 11:38:04.843547: step: 396/469, loss: 0.29469653964042664 2023-01-22 11:38:05.524296: step: 398/469, loss: 1.9678469896316528 2023-01-22 11:38:06.180162: step: 400/469, loss: 0.29438015818595886 2023-01-22 11:38:06.842526: step: 402/469, loss: 0.19751419126987457 2023-01-22 11:38:07.480834: step: 404/469, loss: 0.09369108080863953 2023-01-22 11:38:08.074413: step: 406/469, loss: 0.15228500962257385 2023-01-22 11:38:08.697657: step: 408/469, loss: 0.3336558938026428 2023-01-22 11:38:09.298611: step: 410/469, loss: 0.3445265591144562 2023-01-22 11:38:09.874695: step: 412/469, loss: 0.19449476897716522 2023-01-22 11:38:10.502789: step: 414/469, loss: 0.04109570384025574 2023-01-22 11:38:11.250405: step: 416/469, loss: 0.0452982597053051 2023-01-22 11:38:11.896861: step: 418/469, loss: 0.06610807776451111 2023-01-22 11:38:12.548328: step: 420/469, loss: 0.25247299671173096 2023-01-22 11:38:13.168793: step: 422/469, loss: 0.4787249267101288 2023-01-22 11:38:13.798327: step: 424/469, loss: 0.19258639216423035 2023-01-22 11:38:14.456463: step: 426/469, loss: 0.09108295291662216 2023-01-22 11:38:15.042864: step: 428/469, loss: 0.1806219518184662 2023-01-22 11:38:15.698956: step: 430/469, loss: 0.24744611978530884 2023-01-22 11:38:16.306007: step: 432/469, loss: 0.31874167919158936 2023-01-22 11:38:16.971286: step: 434/469, loss: 0.08911796659231186 2023-01-22 11:38:17.633810: step: 436/469, loss: 0.23850701749324799 2023-01-22 11:38:18.262805: step: 438/469, loss: 0.20409946143627167 2023-01-22 11:38:19.007626: step: 440/469, loss: 0.5788114070892334 2023-01-22 11:38:19.653285: step: 442/469, loss: 0.9260547161102295 2023-01-22 11:38:20.313751: step: 444/469, loss: 0.08772344142198563 2023-01-22 11:38:20.938987: step: 446/469, loss: 0.36653298139572144 2023-01-22 11:38:21.622500: step: 448/469, loss: 0.14815068244934082 2023-01-22 11:38:22.249249: step: 450/469, loss: 0.28155285120010376 2023-01-22 11:38:22.956586: step: 452/469, loss: 0.07654920965433121 2023-01-22 11:38:23.581836: step: 454/469, loss: 0.1313786804676056 2023-01-22 11:38:24.209248: step: 456/469, loss: 0.263245552778244 2023-01-22 11:38:24.870236: step: 458/469, loss: 0.11381533741950989 2023-01-22 11:38:25.565064: step: 460/469, loss: 0.16223829984664917 2023-01-22 11:38:26.299114: step: 462/469, loss: 0.2280503362417221 2023-01-22 11:38:26.924757: step: 464/469, loss: 0.5836884379386902 2023-01-22 11:38:27.541512: step: 466/469, loss: 0.3427771329879761 2023-01-22 11:38:28.170680: step: 468/469, loss: 0.15839408338069916 2023-01-22 11:38:28.780072: step: 470/469, loss: 0.4516873061656952 2023-01-22 11:38:29.416161: step: 472/469, loss: 0.056761328130960464 2023-01-22 11:38:30.048021: step: 474/469, loss: 0.13814318180084229 2023-01-22 11:38:30.746687: step: 476/469, loss: 0.42337143421173096 2023-01-22 11:38:31.427782: step: 478/469, loss: 0.40164101123809814 2023-01-22 11:38:32.056598: step: 480/469, loss: 0.10862606018781662 2023-01-22 11:38:32.668888: step: 482/469, loss: 0.2292182892560959 2023-01-22 11:38:33.338013: step: 484/469, loss: 0.5554975271224976 2023-01-22 11:38:34.029532: step: 486/469, loss: 0.09619907289743423 2023-01-22 11:38:34.645053: step: 488/469, loss: 0.05585966631770134 2023-01-22 11:38:35.320749: step: 490/469, loss: 0.2571108341217041 2023-01-22 11:38:36.001515: step: 492/469, loss: 0.45524880290031433 2023-01-22 11:38:36.663384: step: 494/469, loss: 0.15589767694473267 2023-01-22 11:38:37.393424: step: 496/469, loss: 0.09902490675449371 2023-01-22 11:38:38.020417: step: 498/469, loss: 0.3878598213195801 2023-01-22 11:38:38.665551: step: 500/469, loss: 0.09806180000305176 2023-01-22 11:38:39.365402: step: 502/469, loss: 0.08868704736232758 2023-01-22 11:38:40.003535: step: 504/469, loss: 0.0991825982928276 2023-01-22 11:38:40.644284: step: 506/469, loss: 0.3053373396396637 2023-01-22 11:38:41.291380: step: 508/469, loss: 0.18532225489616394 2023-01-22 11:38:41.960027: step: 510/469, loss: 0.2965945601463318 2023-01-22 11:38:42.613299: step: 512/469, loss: 0.18866781890392303 2023-01-22 11:38:43.369907: step: 514/469, loss: 0.21066582202911377 2023-01-22 11:38:44.006612: step: 516/469, loss: 0.30293014645576477 2023-01-22 11:38:44.706479: step: 518/469, loss: 0.3269627094268799 2023-01-22 11:38:45.289341: step: 520/469, loss: 0.07745165377855301 2023-01-22 11:38:45.885588: step: 522/469, loss: 0.05759430676698685 2023-01-22 11:38:46.502697: step: 524/469, loss: 0.19504407048225403 2023-01-22 11:38:47.143676: step: 526/469, loss: 0.20144660770893097 2023-01-22 11:38:47.834215: step: 528/469, loss: 0.20814774930477142 2023-01-22 11:38:48.466680: step: 530/469, loss: 0.12292114645242691 2023-01-22 11:38:49.177855: step: 532/469, loss: 0.3041295111179352 2023-01-22 11:38:49.815714: step: 534/469, loss: 0.10824405401945114 2023-01-22 11:38:50.414774: step: 536/469, loss: 0.16285809874534607 2023-01-22 11:38:51.136870: step: 538/469, loss: 0.14948678016662598 2023-01-22 11:38:51.874403: step: 540/469, loss: 0.38732603192329407 2023-01-22 11:38:52.471902: step: 542/469, loss: 0.3438386023044586 2023-01-22 11:38:53.117498: step: 544/469, loss: 0.014788689091801643 2023-01-22 11:38:53.870356: step: 546/469, loss: 0.15183618664741516 2023-01-22 11:38:54.562407: step: 548/469, loss: 0.25229960680007935 2023-01-22 11:38:55.171612: step: 550/469, loss: 0.06977144628763199 2023-01-22 11:38:55.765430: step: 552/469, loss: 0.26242679357528687 2023-01-22 11:38:56.411802: step: 554/469, loss: 0.20218858122825623 2023-01-22 11:38:57.069220: step: 556/469, loss: 0.16469812393188477 2023-01-22 11:38:57.695182: step: 558/469, loss: 0.12485162168741226 2023-01-22 11:38:58.347431: step: 560/469, loss: 0.2898695170879364 2023-01-22 11:38:58.951907: step: 562/469, loss: 0.20773828029632568 2023-01-22 11:38:59.614705: step: 564/469, loss: 0.2075953632593155 2023-01-22 11:39:00.279482: step: 566/469, loss: 0.1356946974992752 2023-01-22 11:39:00.969528: step: 568/469, loss: 0.2844195067882538 2023-01-22 11:39:01.600365: step: 570/469, loss: 0.17094819247722626 2023-01-22 11:39:02.281444: step: 572/469, loss: 0.24282239377498627 2023-01-22 11:39:02.897601: step: 574/469, loss: 0.13948263227939606 2023-01-22 11:39:03.537209: step: 576/469, loss: 0.08740594983100891 2023-01-22 11:39:04.200239: step: 578/469, loss: 0.3155202269554138 2023-01-22 11:39:04.771665: step: 580/469, loss: 0.07801327854394913 2023-01-22 11:39:05.539979: step: 582/469, loss: 0.3262689411640167 2023-01-22 11:39:06.166368: step: 584/469, loss: 0.07614899426698685 2023-01-22 11:39:06.877996: step: 586/469, loss: 0.1717415601015091 2023-01-22 11:39:07.540375: step: 588/469, loss: 0.46940499544143677 2023-01-22 11:39:08.191740: step: 590/469, loss: 0.1658959835767746 2023-01-22 11:39:08.869027: step: 592/469, loss: 0.21351441740989685 2023-01-22 11:39:09.516004: step: 594/469, loss: 0.07654142379760742 2023-01-22 11:39:10.195083: step: 596/469, loss: 0.09888766705989838 2023-01-22 11:39:10.843644: step: 598/469, loss: 0.2221735268831253 2023-01-22 11:39:11.495045: step: 600/469, loss: 0.6418651938438416 2023-01-22 11:39:12.152445: step: 602/469, loss: 0.27026283740997314 2023-01-22 11:39:12.845851: step: 604/469, loss: 0.21834880113601685 2023-01-22 11:39:13.518270: step: 606/469, loss: 0.36651134490966797 2023-01-22 11:39:14.113748: step: 608/469, loss: 0.06847436726093292 2023-01-22 11:39:14.820573: step: 610/469, loss: 0.10016592592000961 2023-01-22 11:39:15.465004: step: 612/469, loss: 0.47368985414505005 2023-01-22 11:39:16.130899: step: 614/469, loss: 0.22600539028644562 2023-01-22 11:39:16.756818: step: 616/469, loss: 0.16001629829406738 2023-01-22 11:39:17.382115: step: 618/469, loss: 0.11494092643260956 2023-01-22 11:39:17.999343: step: 620/469, loss: 0.39927324652671814 2023-01-22 11:39:18.719675: step: 622/469, loss: 0.4548013508319855 2023-01-22 11:39:19.384352: step: 624/469, loss: 0.07355457544326782 2023-01-22 11:39:20.047178: step: 626/469, loss: 0.21411897242069244 2023-01-22 11:39:20.765791: step: 628/469, loss: 0.1988099217414856 2023-01-22 11:39:21.400571: step: 630/469, loss: 0.5104672908782959 2023-01-22 11:39:21.984053: step: 632/469, loss: 0.18827183544635773 2023-01-22 11:39:22.587998: step: 634/469, loss: 0.10365868359804153 2023-01-22 11:39:23.289030: step: 636/469, loss: 0.30276229977607727 2023-01-22 11:39:24.005237: step: 638/469, loss: 0.1437106877565384 2023-01-22 11:39:24.582939: step: 640/469, loss: 0.04100235179066658 2023-01-22 11:39:25.234268: step: 642/469, loss: 1.0136048793792725 2023-01-22 11:39:25.838042: step: 644/469, loss: 0.17712734639644623 2023-01-22 11:39:26.480187: step: 646/469, loss: 0.6398909091949463 2023-01-22 11:39:27.127878: step: 648/469, loss: 0.13892191648483276 2023-01-22 11:39:27.796086: step: 650/469, loss: 0.19111551344394684 2023-01-22 11:39:28.469954: step: 652/469, loss: 0.7705939412117004 2023-01-22 11:39:29.142231: step: 654/469, loss: 0.15863873064517975 2023-01-22 11:39:29.821828: step: 656/469, loss: 0.4326806664466858 2023-01-22 11:39:30.492096: step: 658/469, loss: 0.1146983727812767 2023-01-22 11:39:31.200009: step: 660/469, loss: 0.23201973736286163 2023-01-22 11:39:31.883444: step: 662/469, loss: 0.9465963244438171 2023-01-22 11:39:32.574841: step: 664/469, loss: 0.15898959338665009 2023-01-22 11:39:33.227707: step: 666/469, loss: 0.5527649521827698 2023-01-22 11:39:33.864253: step: 668/469, loss: 0.49680909514427185 2023-01-22 11:39:34.521149: step: 670/469, loss: 0.17138074338436127 2023-01-22 11:39:35.220321: step: 672/469, loss: 0.15890038013458252 2023-01-22 11:39:35.861574: step: 674/469, loss: 0.12234514206647873 2023-01-22 11:39:36.585389: step: 676/469, loss: 0.3505050241947174 2023-01-22 11:39:37.374920: step: 678/469, loss: 0.137950599193573 2023-01-22 11:39:38.174900: step: 680/469, loss: 0.7775441408157349 2023-01-22 11:39:38.894998: step: 682/469, loss: 0.09802941232919693 2023-01-22 11:39:39.637228: step: 684/469, loss: 0.2668774425983429 2023-01-22 11:39:40.253559: step: 686/469, loss: 0.2370632290840149 2023-01-22 11:39:40.886561: step: 688/469, loss: 0.3069482445716858 2023-01-22 11:39:41.570003: step: 690/469, loss: 0.6174138188362122 2023-01-22 11:39:42.166234: step: 692/469, loss: 0.1225072517991066 2023-01-22 11:39:42.862890: step: 694/469, loss: 0.03507116809487343 2023-01-22 11:39:43.532290: step: 696/469, loss: 0.1423187255859375 2023-01-22 11:39:44.083716: step: 698/469, loss: 0.10632891207933426 2023-01-22 11:39:44.774984: step: 700/469, loss: 0.07801169157028198 2023-01-22 11:39:45.448393: step: 702/469, loss: 0.17713706195354462 2023-01-22 11:39:46.042933: step: 704/469, loss: 0.10139768570661545 2023-01-22 11:39:46.654077: step: 706/469, loss: 0.12162438780069351 2023-01-22 11:39:47.186713: step: 708/469, loss: 0.37717896699905396 2023-01-22 11:39:47.914182: step: 710/469, loss: 0.14772465825080872 2023-01-22 11:39:48.636589: step: 712/469, loss: 0.3026566505432129 2023-01-22 11:39:49.281204: step: 714/469, loss: 0.16358682513237 2023-01-22 11:39:49.919627: step: 716/469, loss: 0.11009292304515839 2023-01-22 11:39:50.586624: step: 718/469, loss: 0.47127994894981384 2023-01-22 11:39:51.258760: step: 720/469, loss: 0.573236882686615 2023-01-22 11:39:51.874782: step: 722/469, loss: 0.26310306787490845 2023-01-22 11:39:52.584822: step: 724/469, loss: 0.14633750915527344 2023-01-22 11:39:53.272222: step: 726/469, loss: 0.39093732833862305 2023-01-22 11:39:54.077371: step: 728/469, loss: 0.09805526584386826 2023-01-22 11:39:54.809731: step: 730/469, loss: 0.07600551098585129 2023-01-22 11:39:55.446766: step: 732/469, loss: 0.30992385745048523 2023-01-22 11:39:56.115803: step: 734/469, loss: 1.5432090759277344 2023-01-22 11:39:56.741507: step: 736/469, loss: 0.25338101387023926 2023-01-22 11:39:57.392231: step: 738/469, loss: 0.42322248220443726 2023-01-22 11:39:58.051537: step: 740/469, loss: 0.4659385085105896 2023-01-22 11:39:58.706865: step: 742/469, loss: 0.14605183899402618 2023-01-22 11:39:59.405511: step: 744/469, loss: 0.08464605361223221 2023-01-22 11:40:00.031423: step: 746/469, loss: 0.8135920763015747 2023-01-22 11:40:00.630423: step: 748/469, loss: 0.1676606982946396 2023-01-22 11:40:01.339341: step: 750/469, loss: 0.1331390142440796 2023-01-22 11:40:02.007969: step: 752/469, loss: 0.10761026293039322 2023-01-22 11:40:02.685302: step: 754/469, loss: 0.13111016154289246 2023-01-22 11:40:03.369868: step: 756/469, loss: 0.25879067182540894 2023-01-22 11:40:04.082070: step: 758/469, loss: 0.30239158868789673 2023-01-22 11:40:04.722103: step: 760/469, loss: 0.14022549986839294 2023-01-22 11:40:05.474200: step: 762/469, loss: 0.16620653867721558 2023-01-22 11:40:06.089486: step: 764/469, loss: 0.18070916831493378 2023-01-22 11:40:06.739539: step: 766/469, loss: 0.25677546858787537 2023-01-22 11:40:07.414780: step: 768/469, loss: 0.10850760340690613 2023-01-22 11:40:08.023639: step: 770/469, loss: 0.6034120917320251 2023-01-22 11:40:08.683385: step: 772/469, loss: 1.017383098602295 2023-01-22 11:40:09.280111: step: 774/469, loss: 0.1369587481021881 2023-01-22 11:40:09.961803: step: 776/469, loss: 0.3048272728919983 2023-01-22 11:40:10.599886: step: 778/469, loss: 0.13232165575027466 2023-01-22 11:40:11.261762: step: 780/469, loss: 0.209258571267128 2023-01-22 11:40:11.974013: step: 782/469, loss: 0.14465820789337158 2023-01-22 11:40:12.617019: step: 784/469, loss: 0.05674801766872406 2023-01-22 11:40:13.256695: step: 786/469, loss: 0.1617845743894577 2023-01-22 11:40:13.917175: step: 788/469, loss: 0.1544911116361618 2023-01-22 11:40:14.538932: step: 790/469, loss: 0.08833402395248413 2023-01-22 11:40:15.155088: step: 792/469, loss: 0.2832323908805847 2023-01-22 11:40:15.770529: step: 794/469, loss: 1.1684081554412842 2023-01-22 11:40:16.435740: step: 796/469, loss: 0.11391933262348175 2023-01-22 11:40:17.034927: step: 798/469, loss: 0.05843877047300339 2023-01-22 11:40:17.708245: step: 800/469, loss: 0.13185347616672516 2023-01-22 11:40:18.376552: step: 802/469, loss: 0.1924135386943817 2023-01-22 11:40:18.988044: step: 804/469, loss: 0.20566807687282562 2023-01-22 11:40:19.607619: step: 806/469, loss: 0.1436580866575241 2023-01-22 11:40:20.186812: step: 808/469, loss: 0.19666725397109985 2023-01-22 11:40:20.838344: step: 810/469, loss: 0.14203034341335297 2023-01-22 11:40:21.532074: step: 812/469, loss: 0.12521588802337646 2023-01-22 11:40:22.193566: step: 814/469, loss: 0.20856155455112457 2023-01-22 11:40:22.795718: step: 816/469, loss: 0.18280331790447235 2023-01-22 11:40:23.516853: step: 818/469, loss: 0.5503715872764587 2023-01-22 11:40:24.118158: step: 820/469, loss: 0.09337566047906876 2023-01-22 11:40:24.866899: step: 822/469, loss: 0.27005404233932495 2023-01-22 11:40:25.560943: step: 824/469, loss: 0.25442826747894287 2023-01-22 11:40:26.312762: step: 826/469, loss: 0.12735864520072937 2023-01-22 11:40:26.962771: step: 828/469, loss: 1.8254402875900269 2023-01-22 11:40:27.658428: step: 830/469, loss: 0.19316191971302032 2023-01-22 11:40:28.350606: step: 832/469, loss: 0.11748109012842178 2023-01-22 11:40:29.026603: step: 834/469, loss: 1.3834993839263916 2023-01-22 11:40:29.747243: step: 836/469, loss: 0.3081236183643341 2023-01-22 11:40:30.380880: step: 838/469, loss: 0.19454190135002136 2023-01-22 11:40:31.059509: step: 840/469, loss: 0.16552944481372833 2023-01-22 11:40:31.685952: step: 842/469, loss: 0.2136889398097992 2023-01-22 11:40:32.355835: step: 844/469, loss: 0.2318936586380005 2023-01-22 11:40:32.998848: step: 846/469, loss: 0.46329569816589355 2023-01-22 11:40:33.632832: step: 848/469, loss: 0.36109212040901184 2023-01-22 11:40:34.287211: step: 850/469, loss: 0.09724821150302887 2023-01-22 11:40:34.966190: step: 852/469, loss: 0.15499748289585114 2023-01-22 11:40:35.596580: step: 854/469, loss: 0.24015632271766663 2023-01-22 11:40:36.238587: step: 856/469, loss: 0.09081809967756271 2023-01-22 11:40:36.930142: step: 858/469, loss: 0.18688549101352692 2023-01-22 11:40:37.600212: step: 860/469, loss: 0.1367340087890625 2023-01-22 11:40:38.218816: step: 862/469, loss: 0.13588404655456543 2023-01-22 11:40:38.892376: step: 864/469, loss: 1.043573260307312 2023-01-22 11:40:39.506144: step: 866/469, loss: 0.0923236832022667 2023-01-22 11:40:40.125290: step: 868/469, loss: 0.17240412533283234 2023-01-22 11:40:40.712610: step: 870/469, loss: 0.5087149739265442 2023-01-22 11:40:41.351241: step: 872/469, loss: 0.4754045307636261 2023-01-22 11:40:42.039773: step: 874/469, loss: 0.42853158712387085 2023-01-22 11:40:42.726699: step: 876/469, loss: 0.8060194253921509 2023-01-22 11:40:43.360905: step: 878/469, loss: 0.08807642012834549 2023-01-22 11:40:44.121881: step: 880/469, loss: 1.2634409666061401 2023-01-22 11:40:44.732555: step: 882/469, loss: 0.10770653188228607 2023-01-22 11:40:45.349903: step: 884/469, loss: 0.1813407838344574 2023-01-22 11:40:46.016462: step: 886/469, loss: 0.14426329731941223 2023-01-22 11:40:46.636857: step: 888/469, loss: 0.1587604135274887 2023-01-22 11:40:47.335581: step: 890/469, loss: 0.08517385274171829 2023-01-22 11:40:47.987152: step: 892/469, loss: 0.17000234127044678 2023-01-22 11:40:48.690167: step: 894/469, loss: 0.41661056876182556 2023-01-22 11:40:49.351017: step: 896/469, loss: 0.8561276197433472 2023-01-22 11:40:49.975220: step: 898/469, loss: 0.060232289135456085 2023-01-22 11:40:50.595190: step: 900/469, loss: 0.25848907232284546 2023-01-22 11:40:51.254478: step: 902/469, loss: 0.3494167923927307 2023-01-22 11:40:51.911298: step: 904/469, loss: 0.08899218589067459 2023-01-22 11:40:52.635816: step: 906/469, loss: 0.1723947674036026 2023-01-22 11:40:53.334733: step: 908/469, loss: 0.3268420100212097 2023-01-22 11:40:53.980802: step: 910/469, loss: 0.09955978393554688 2023-01-22 11:40:54.659238: step: 912/469, loss: 0.3480122983455658 2023-01-22 11:40:55.352858: step: 914/469, loss: 0.24432191252708435 2023-01-22 11:40:55.997240: step: 916/469, loss: 0.5372911691665649 2023-01-22 11:40:56.700455: step: 918/469, loss: 0.1267661452293396 2023-01-22 11:40:57.380168: step: 920/469, loss: 0.10970328748226166 2023-01-22 11:40:58.085542: step: 922/469, loss: 0.447407066822052 2023-01-22 11:40:58.729231: step: 924/469, loss: 0.173196479678154 2023-01-22 11:40:59.374401: step: 926/469, loss: 0.4349397122859955 2023-01-22 11:40:59.958712: step: 928/469, loss: 0.10653305798768997 2023-01-22 11:41:00.610237: step: 930/469, loss: 0.1968565434217453 2023-01-22 11:41:01.274172: step: 932/469, loss: 0.17384858429431915 2023-01-22 11:41:01.952895: step: 934/469, loss: 0.21895577013492584 2023-01-22 11:41:02.658047: step: 936/469, loss: 0.09605295211076736 2023-01-22 11:41:03.238961: step: 938/469, loss: 0.15988856554031372 ================================================== Loss: 0.270 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30834489786718866, 'r': 0.3299934011330065, 'f1': 0.31880205755654345}, 'combined': 0.2349067792521899, 'epoch': 11} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30364496130105684, 'r': 0.26399087046448044, 'f1': 0.2824328316156386}, 'combined': 0.1540542717903483, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3092797304106828, 'r': 0.33275447275684467, 'f1': 0.3205879472447114}, 'combined': 0.23622269796978734, 'epoch': 11} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3079957497265661, 'r': 0.27023597803090293, 'f1': 0.28788296684968506}, 'combined': 0.15702707282710093, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22562893081761004, 'r': 0.3416666666666666, 'f1': 0.27178030303030304}, 'combined': 0.18118686868686867, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2833333333333333, 'r': 0.3695652173913043, 'f1': 0.32075471698113206}, 'combined': 0.16037735849056603, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28932143369249047, 'r': 0.34806411567559575, 'f1': 0.31598585523004125}, 'combined': 0.23283168280108302, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2915539886359941, 'r': 0.2547429635383114, 'f1': 0.27190826088610776}, 'combined': 0.14831359684696785, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3055555555555556, 'r': 0.358695652173913, 'f1': 0.32999999999999996}, 'combined': 0.16499999999999998, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 12 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:44:11.734984: step: 2/469, loss: 0.09010709822177887 2023-01-22 11:44:12.397773: step: 4/469, loss: 0.3958519697189331 2023-01-22 11:44:13.128549: step: 6/469, loss: 0.12911579012870789 2023-01-22 11:44:13.800325: step: 8/469, loss: 0.14571508765220642 2023-01-22 11:44:14.481993: step: 10/469, loss: 0.18585599958896637 2023-01-22 11:44:15.108440: step: 12/469, loss: 0.7505022287368774 2023-01-22 11:44:15.656337: step: 14/469, loss: 0.42242422699928284 2023-01-22 11:44:16.310208: step: 16/469, loss: 0.06319308280944824 2023-01-22 11:44:16.943523: step: 18/469, loss: 0.17724671959877014 2023-01-22 11:44:17.516914: step: 20/469, loss: 0.2355174422264099 2023-01-22 11:44:18.140389: step: 22/469, loss: 0.06950865685939789 2023-01-22 11:44:18.759297: step: 24/469, loss: 0.09060060977935791 2023-01-22 11:44:19.531904: step: 26/469, loss: 0.21231645345687866 2023-01-22 11:44:20.160840: step: 28/469, loss: 0.18261641263961792 2023-01-22 11:44:20.839005: step: 30/469, loss: 0.40979400277137756 2023-01-22 11:44:21.549968: step: 32/469, loss: 0.11734183132648468 2023-01-22 11:44:22.197017: step: 34/469, loss: 0.26135605573654175 2023-01-22 11:44:22.812096: step: 36/469, loss: 0.222601518034935 2023-01-22 11:44:23.437988: step: 38/469, loss: 0.16646534204483032 2023-01-22 11:44:24.015295: step: 40/469, loss: 0.4240712821483612 2023-01-22 11:44:24.738903: step: 42/469, loss: 0.1280793845653534 2023-01-22 11:44:25.467937: step: 44/469, loss: 1.9127705097198486 2023-01-22 11:44:26.103615: step: 46/469, loss: 0.3722122609615326 2023-01-22 11:44:26.766525: step: 48/469, loss: 0.1303377002477646 2023-01-22 11:44:27.429304: step: 50/469, loss: 0.5981952548027039 2023-01-22 11:44:28.111693: step: 52/469, loss: 0.4974333345890045 2023-01-22 11:44:28.734150: step: 54/469, loss: 0.13185831904411316 2023-01-22 11:44:29.357663: step: 56/469, loss: 0.19388312101364136 2023-01-22 11:44:30.024293: step: 58/469, loss: 1.2019239664077759 2023-01-22 11:44:30.645836: step: 60/469, loss: 0.22199219465255737 2023-01-22 11:44:31.324718: step: 62/469, loss: 0.05821703374385834 2023-01-22 11:44:32.065645: step: 64/469, loss: 0.06818190217018127 2023-01-22 11:44:32.776362: step: 66/469, loss: 0.05169495567679405 2023-01-22 11:44:33.427598: step: 68/469, loss: 0.03145342320203781 2023-01-22 11:44:34.068069: step: 70/469, loss: 0.11096139252185822 2023-01-22 11:44:34.708462: step: 72/469, loss: 0.5940859913825989 2023-01-22 11:44:35.305154: step: 74/469, loss: 0.08544835448265076 2023-01-22 11:44:36.006395: step: 76/469, loss: 0.11480628699064255 2023-01-22 11:44:36.698191: step: 78/469, loss: 0.6271916627883911 2023-01-22 11:44:37.340600: step: 80/469, loss: 0.1922295242547989 2023-01-22 11:44:38.085320: step: 82/469, loss: 0.2981243133544922 2023-01-22 11:44:38.720343: step: 84/469, loss: 0.13314944505691528 2023-01-22 11:44:39.403946: step: 86/469, loss: 0.14996854960918427 2023-01-22 11:44:40.057638: step: 88/469, loss: 0.19276639819145203 2023-01-22 11:44:40.775609: step: 90/469, loss: 0.7176858186721802 2023-01-22 11:44:41.360649: step: 92/469, loss: 0.06875741481781006 2023-01-22 11:44:41.999606: step: 94/469, loss: 0.10084113478660583 2023-01-22 11:44:42.731350: step: 96/469, loss: 0.26804348826408386 2023-01-22 11:44:43.429858: step: 98/469, loss: 0.11273273825645447 2023-01-22 11:44:44.065711: step: 100/469, loss: 0.09384886920452118 2023-01-22 11:44:44.719206: step: 102/469, loss: 1.07734215259552 2023-01-22 11:44:45.368182: step: 104/469, loss: 0.09534644335508347 2023-01-22 11:44:46.072656: step: 106/469, loss: 0.06335356831550598 2023-01-22 11:44:46.740089: step: 108/469, loss: 0.044890131801366806 2023-01-22 11:44:47.428901: step: 110/469, loss: 0.4320107102394104 2023-01-22 11:44:48.060813: step: 112/469, loss: 0.15150602161884308 2023-01-22 11:44:48.697958: step: 114/469, loss: 0.1361083835363388 2023-01-22 11:44:49.343380: step: 116/469, loss: 0.08271677792072296 2023-01-22 11:44:50.023649: step: 118/469, loss: 0.16889916360378265 2023-01-22 11:44:50.652343: step: 120/469, loss: 0.3423912525177002 2023-01-22 11:44:51.298272: step: 122/469, loss: 0.10864198952913284 2023-01-22 11:44:51.871761: step: 124/469, loss: 0.13169211149215698 2023-01-22 11:44:52.520900: step: 126/469, loss: 0.08829011768102646 2023-01-22 11:44:53.177655: step: 128/469, loss: 0.028931546956300735 2023-01-22 11:44:53.821832: step: 130/469, loss: 0.7551465034484863 2023-01-22 11:44:54.527340: step: 132/469, loss: 0.17933650314807892 2023-01-22 11:44:55.268749: step: 134/469, loss: 0.16171324253082275 2023-01-22 11:44:55.899263: step: 136/469, loss: 0.04460104927420616 2023-01-22 11:44:56.539838: step: 138/469, loss: 0.4347587525844574 2023-01-22 11:44:57.157587: step: 140/469, loss: 0.10114738345146179 2023-01-22 11:44:57.837580: step: 142/469, loss: 0.623106062412262 2023-01-22 11:44:58.531064: step: 144/469, loss: 0.18777582049369812 2023-01-22 11:44:59.235228: step: 146/469, loss: 0.14334578812122345 2023-01-22 11:44:59.886211: step: 148/469, loss: 0.09308890998363495 2023-01-22 11:45:00.586376: step: 150/469, loss: 0.28365811705589294 2023-01-22 11:45:01.202596: step: 152/469, loss: 0.13981464505195618 2023-01-22 11:45:01.841138: step: 154/469, loss: 0.24605931341648102 2023-01-22 11:45:02.513588: step: 156/469, loss: 0.1875181943178177 2023-01-22 11:45:03.168316: step: 158/469, loss: 0.20010283589363098 2023-01-22 11:45:03.753610: step: 160/469, loss: 0.16939972341060638 2023-01-22 11:45:04.403671: step: 162/469, loss: 0.20508363842964172 2023-01-22 11:45:05.005249: step: 164/469, loss: 0.07715775817632675 2023-01-22 11:45:05.626696: step: 166/469, loss: 0.05183357745409012 2023-01-22 11:45:06.194696: step: 168/469, loss: 0.1138022318482399 2023-01-22 11:45:06.809153: step: 170/469, loss: 0.10665109008550644 2023-01-22 11:45:07.477156: step: 172/469, loss: 0.14467765390872955 2023-01-22 11:45:08.069770: step: 174/469, loss: 0.17302070558071136 2023-01-22 11:45:08.719713: step: 176/469, loss: 0.06281275302171707 2023-01-22 11:45:09.382143: step: 178/469, loss: 0.106350839138031 2023-01-22 11:45:10.005835: step: 180/469, loss: 0.12620073556900024 2023-01-22 11:45:10.659806: step: 182/469, loss: 0.15765975415706635 2023-01-22 11:45:11.273933: step: 184/469, loss: 0.1896129697561264 2023-01-22 11:45:12.002315: step: 186/469, loss: 0.1435815989971161 2023-01-22 11:45:12.636652: step: 188/469, loss: 0.04556216672062874 2023-01-22 11:45:13.349319: step: 190/469, loss: 0.3126879632472992 2023-01-22 11:45:14.007268: step: 192/469, loss: 0.6106399297714233 2023-01-22 11:45:14.690941: step: 194/469, loss: 0.07210603356361389 2023-01-22 11:45:15.312959: step: 196/469, loss: 0.09808190912008286 2023-01-22 11:45:15.934353: step: 198/469, loss: 0.07551100105047226 2023-01-22 11:45:16.553991: step: 200/469, loss: 0.08211780339479446 2023-01-22 11:45:17.181586: step: 202/469, loss: 0.15985342860221863 2023-01-22 11:45:17.780561: step: 204/469, loss: 0.2892969846725464 2023-01-22 11:45:18.362866: step: 206/469, loss: 0.10105925798416138 2023-01-22 11:45:19.022313: step: 208/469, loss: 0.1534845530986786 2023-01-22 11:45:19.635933: step: 210/469, loss: 0.152360200881958 2023-01-22 11:45:20.324352: step: 212/469, loss: 0.071222685277462 2023-01-22 11:45:21.011822: step: 214/469, loss: 0.2626939117908478 2023-01-22 11:45:21.619251: step: 216/469, loss: 0.11798075586557388 2023-01-22 11:45:22.291667: step: 218/469, loss: 0.17177097499370575 2023-01-22 11:45:22.885913: step: 220/469, loss: 0.0783061608672142 2023-01-22 11:45:23.498681: step: 222/469, loss: 0.4897153973579407 2023-01-22 11:45:24.154690: step: 224/469, loss: 0.10043233633041382 2023-01-22 11:45:24.789339: step: 226/469, loss: 0.2859938442707062 2023-01-22 11:45:25.426269: step: 228/469, loss: 0.11709606647491455 2023-01-22 11:45:26.059082: step: 230/469, loss: 0.10491877794265747 2023-01-22 11:45:26.680517: step: 232/469, loss: 0.38725748658180237 2023-01-22 11:45:27.428600: step: 234/469, loss: 0.11682604253292084 2023-01-22 11:45:28.058901: step: 236/469, loss: 0.3147163987159729 2023-01-22 11:45:28.746564: step: 238/469, loss: 0.07728300988674164 2023-01-22 11:45:29.401277: step: 240/469, loss: 0.15843608975410461 2023-01-22 11:45:30.021365: step: 242/469, loss: 0.1569214016199112 2023-01-22 11:45:30.624698: step: 244/469, loss: 0.23852930963039398 2023-01-22 11:45:31.244947: step: 246/469, loss: 0.37657713890075684 2023-01-22 11:45:31.853226: step: 248/469, loss: 0.27573999762535095 2023-01-22 11:45:32.486969: step: 250/469, loss: 0.33493033051490784 2023-01-22 11:45:33.109587: step: 252/469, loss: 0.094824880361557 2023-01-22 11:45:33.732370: step: 254/469, loss: 0.0782121792435646 2023-01-22 11:45:34.418894: step: 256/469, loss: 0.21393126249313354 2023-01-22 11:45:35.035779: step: 258/469, loss: 0.050006359815597534 2023-01-22 11:45:35.705518: step: 260/469, loss: 0.10235308855772018 2023-01-22 11:45:36.374264: step: 262/469, loss: 0.5770941972732544 2023-01-22 11:45:37.058284: step: 264/469, loss: 0.6711481809616089 2023-01-22 11:45:37.704779: step: 266/469, loss: 0.18015974760055542 2023-01-22 11:45:38.355537: step: 268/469, loss: 0.6386641263961792 2023-01-22 11:45:38.967953: step: 270/469, loss: 0.02753409929573536 2023-01-22 11:45:39.596300: step: 272/469, loss: 0.11631710082292557 2023-01-22 11:45:40.176364: step: 274/469, loss: 0.540796160697937 2023-01-22 11:45:40.882813: step: 276/469, loss: 0.06846290081739426 2023-01-22 11:45:41.568180: step: 278/469, loss: 0.21025104820728302 2023-01-22 11:45:42.232970: step: 280/469, loss: 0.36336255073547363 2023-01-22 11:45:42.908569: step: 282/469, loss: 0.10736263543367386 2023-01-22 11:45:43.603466: step: 284/469, loss: 0.15782277286052704 2023-01-22 11:45:44.212778: step: 286/469, loss: 0.16212287545204163 2023-01-22 11:45:44.946072: step: 288/469, loss: 0.1548091173171997 2023-01-22 11:45:45.652890: step: 290/469, loss: 0.0778033435344696 2023-01-22 11:45:46.330893: step: 292/469, loss: 0.0829491838812828 2023-01-22 11:45:46.980905: step: 294/469, loss: 0.1539023071527481 2023-01-22 11:45:48.385719: step: 296/469, loss: 0.1201256662607193 2023-01-22 11:45:49.045458: step: 298/469, loss: 0.09404920041561127 2023-01-22 11:45:49.739769: step: 300/469, loss: 0.14464302361011505 2023-01-22 11:45:50.441304: step: 302/469, loss: 0.059379640966653824 2023-01-22 11:45:51.167638: step: 304/469, loss: 0.18006101250648499 2023-01-22 11:45:51.799918: step: 306/469, loss: 0.14657913148403168 2023-01-22 11:45:52.441787: step: 308/469, loss: 0.15113556385040283 2023-01-22 11:45:53.031221: step: 310/469, loss: 0.06717038154602051 2023-01-22 11:45:53.708735: step: 312/469, loss: 0.18208912014961243 2023-01-22 11:45:54.372753: step: 314/469, loss: 0.12528879940509796 2023-01-22 11:45:55.059746: step: 316/469, loss: 0.16097323596477509 2023-01-22 11:45:55.738611: step: 318/469, loss: 0.323387086391449 2023-01-22 11:45:56.517970: step: 320/469, loss: 0.054082948714494705 2023-01-22 11:45:57.248923: step: 322/469, loss: 0.04615125432610512 2023-01-22 11:45:57.912747: step: 324/469, loss: 0.3247581124305725 2023-01-22 11:45:58.586464: step: 326/469, loss: 0.3015666604042053 2023-01-22 11:45:59.304831: step: 328/469, loss: 0.12031938880681992 2023-01-22 11:45:59.972799: step: 330/469, loss: 0.823621392250061 2023-01-22 11:46:00.618300: step: 332/469, loss: 0.6472270488739014 2023-01-22 11:46:01.216596: step: 334/469, loss: 0.1509711742401123 2023-01-22 11:46:01.972345: step: 336/469, loss: 0.09899154305458069 2023-01-22 11:46:02.584110: step: 338/469, loss: 0.34739452600479126 2023-01-22 11:46:03.209382: step: 340/469, loss: 0.1512516736984253 2023-01-22 11:46:03.851147: step: 342/469, loss: 0.3168966770172119 2023-01-22 11:46:04.471152: step: 344/469, loss: 0.11273898929357529 2023-01-22 11:46:05.107662: step: 346/469, loss: 0.09030656516551971 2023-01-22 11:46:05.709991: step: 348/469, loss: 0.12679389119148254 2023-01-22 11:46:06.357523: step: 350/469, loss: 0.22046662867069244 2023-01-22 11:46:07.014008: step: 352/469, loss: 0.2992478311061859 2023-01-22 11:46:07.656888: step: 354/469, loss: 0.2764323651790619 2023-01-22 11:46:08.273902: step: 356/469, loss: 0.16730737686157227 2023-01-22 11:46:08.931263: step: 358/469, loss: 0.4008782207965851 2023-01-22 11:46:09.661688: step: 360/469, loss: 0.08393470197916031 2023-01-22 11:46:10.252401: step: 362/469, loss: 0.023924363777041435 2023-01-22 11:46:10.935509: step: 364/469, loss: 0.027280492708086967 2023-01-22 11:46:11.646132: step: 366/469, loss: 0.5706105828285217 2023-01-22 11:46:12.267092: step: 368/469, loss: 0.12655659019947052 2023-01-22 11:46:12.930015: step: 370/469, loss: 0.1514558345079422 2023-01-22 11:46:13.568202: step: 372/469, loss: 0.16106680035591125 2023-01-22 11:46:14.236296: step: 374/469, loss: 0.06775608658790588 2023-01-22 11:46:14.963667: step: 376/469, loss: 0.13861997425556183 2023-01-22 11:46:15.568403: step: 378/469, loss: 0.02835659123957157 2023-01-22 11:46:16.275476: step: 380/469, loss: 0.11011745780706406 2023-01-22 11:46:16.951054: step: 382/469, loss: 0.5650831460952759 2023-01-22 11:46:17.677314: step: 384/469, loss: 0.08820359408855438 2023-01-22 11:46:18.279409: step: 386/469, loss: 0.06612352281808853 2023-01-22 11:46:18.877210: step: 388/469, loss: 0.06718561053276062 2023-01-22 11:46:19.599607: step: 390/469, loss: 0.5960380434989929 2023-01-22 11:46:20.208086: step: 392/469, loss: 0.2348286658525467 2023-01-22 11:46:20.816401: step: 394/469, loss: 0.14006319642066956 2023-01-22 11:46:21.478065: step: 396/469, loss: 0.18940773606300354 2023-01-22 11:46:22.147457: step: 398/469, loss: 0.13046416640281677 2023-01-22 11:46:22.803229: step: 400/469, loss: 0.1405685991048813 2023-01-22 11:46:23.498207: step: 402/469, loss: 0.10449093580245972 2023-01-22 11:46:24.218408: step: 404/469, loss: 0.2600097358226776 2023-01-22 11:46:24.844714: step: 406/469, loss: 0.2757759988307953 2023-01-22 11:46:25.532644: step: 408/469, loss: 0.2753802239894867 2023-01-22 11:46:26.203407: step: 410/469, loss: 0.18735244870185852 2023-01-22 11:46:26.862647: step: 412/469, loss: 0.18760113418102264 2023-01-22 11:46:27.563438: step: 414/469, loss: 0.13700029253959656 2023-01-22 11:46:28.211253: step: 416/469, loss: 0.27538928389549255 2023-01-22 11:46:28.846901: step: 418/469, loss: 0.05533416569232941 2023-01-22 11:46:29.497072: step: 420/469, loss: 0.37720417976379395 2023-01-22 11:46:30.097012: step: 422/469, loss: 0.16215504705905914 2023-01-22 11:46:30.830060: step: 424/469, loss: 0.1332855224609375 2023-01-22 11:46:31.475471: step: 426/469, loss: 0.044950053095817566 2023-01-22 11:46:32.079668: step: 428/469, loss: 0.07420222461223602 2023-01-22 11:46:32.812797: step: 430/469, loss: 0.17046400904655457 2023-01-22 11:46:33.471708: step: 432/469, loss: 0.1962832808494568 2023-01-22 11:46:34.140558: step: 434/469, loss: 0.08419742435216904 2023-01-22 11:46:34.830849: step: 436/469, loss: 0.23773783445358276 2023-01-22 11:46:35.559805: step: 438/469, loss: 0.20467659831047058 2023-01-22 11:46:36.160226: step: 440/469, loss: 0.5752460956573486 2023-01-22 11:46:36.777376: step: 442/469, loss: 0.1751350611448288 2023-01-22 11:46:37.451159: step: 444/469, loss: 0.23590019345283508 2023-01-22 11:46:38.081199: step: 446/469, loss: 0.11765924096107483 2023-01-22 11:46:38.755977: step: 448/469, loss: 0.12432537972927094 2023-01-22 11:46:39.380224: step: 450/469, loss: 1.031097412109375 2023-01-22 11:46:40.084636: step: 452/469, loss: 0.5516473054885864 2023-01-22 11:46:40.703073: step: 454/469, loss: 0.7052913308143616 2023-01-22 11:46:41.344545: step: 456/469, loss: 0.13842427730560303 2023-01-22 11:46:41.968464: step: 458/469, loss: 0.15683257579803467 2023-01-22 11:46:42.661673: step: 460/469, loss: 0.21235518157482147 2023-01-22 11:46:43.312971: step: 462/469, loss: 0.18652980029582977 2023-01-22 11:46:44.031597: step: 464/469, loss: 0.08775254338979721 2023-01-22 11:46:44.643591: step: 466/469, loss: 0.16776995360851288 2023-01-22 11:46:45.307738: step: 468/469, loss: 1.0242727994918823 2023-01-22 11:46:45.892808: step: 470/469, loss: 0.25791946053504944 2023-01-22 11:46:46.467425: step: 472/469, loss: 0.09766504168510437 2023-01-22 11:46:47.100819: step: 474/469, loss: 0.2593061923980713 2023-01-22 11:46:47.773254: step: 476/469, loss: 0.15998785197734833 2023-01-22 11:46:48.433951: step: 478/469, loss: 0.33104825019836426 2023-01-22 11:46:49.094970: step: 480/469, loss: 0.2022796869277954 2023-01-22 11:46:49.771505: step: 482/469, loss: 0.14389175176620483 2023-01-22 11:46:50.432151: step: 484/469, loss: 0.43419936299324036 2023-01-22 11:46:51.149462: step: 486/469, loss: 0.10547473281621933 2023-01-22 11:46:51.825611: step: 488/469, loss: 0.14546909928321838 2023-01-22 11:46:52.534836: step: 490/469, loss: 0.26507213711738586 2023-01-22 11:46:53.175359: step: 492/469, loss: 0.11528874933719635 2023-01-22 11:46:53.827801: step: 494/469, loss: 0.12346061319112778 2023-01-22 11:46:54.442087: step: 496/469, loss: 0.14933393895626068 2023-01-22 11:46:55.076604: step: 498/469, loss: 0.1556585431098938 2023-01-22 11:46:55.803674: step: 500/469, loss: 0.11973549425601959 2023-01-22 11:46:56.518006: step: 502/469, loss: 0.09847920387983322 2023-01-22 11:46:57.206043: step: 504/469, loss: 0.11896110326051712 2023-01-22 11:46:57.849419: step: 506/469, loss: 0.29117506742477417 2023-01-22 11:46:58.505883: step: 508/469, loss: 0.16343897581100464 2023-01-22 11:46:59.156208: step: 510/469, loss: 0.06698044389486313 2023-01-22 11:46:59.901697: step: 512/469, loss: 0.317922443151474 2023-01-22 11:47:00.520070: step: 514/469, loss: 0.06957085430622101 2023-01-22 11:47:01.189321: step: 516/469, loss: 0.09205619245767593 2023-01-22 11:47:01.846184: step: 518/469, loss: 0.18013393878936768 2023-01-22 11:47:02.449249: step: 520/469, loss: 0.12401673942804337 2023-01-22 11:47:03.122819: step: 522/469, loss: 0.36328259110450745 2023-01-22 11:47:03.734057: step: 524/469, loss: 0.31907233595848083 2023-01-22 11:47:04.420917: step: 526/469, loss: 0.2107231318950653 2023-01-22 11:47:05.083880: step: 528/469, loss: 0.06706923991441727 2023-01-22 11:47:05.710987: step: 530/469, loss: 0.21510553359985352 2023-01-22 11:47:06.362578: step: 532/469, loss: 0.1253836452960968 2023-01-22 11:47:06.994755: step: 534/469, loss: 0.1771828830242157 2023-01-22 11:47:07.651818: step: 536/469, loss: 0.24723005294799805 2023-01-22 11:47:08.286826: step: 538/469, loss: 0.9632300734519958 2023-01-22 11:47:08.982560: step: 540/469, loss: 0.6564993262290955 2023-01-22 11:47:09.646587: step: 542/469, loss: 0.3365788161754608 2023-01-22 11:47:10.316269: step: 544/469, loss: 0.1955217570066452 2023-01-22 11:47:10.977255: step: 546/469, loss: 0.27033689618110657 2023-01-22 11:47:11.693092: step: 548/469, loss: 0.1297452598810196 2023-01-22 11:47:12.335921: step: 550/469, loss: 0.17727072536945343 2023-01-22 11:47:13.020065: step: 552/469, loss: 0.22598281502723694 2023-01-22 11:47:13.674635: step: 554/469, loss: 0.09274069219827652 2023-01-22 11:47:14.349311: step: 556/469, loss: 0.17935216426849365 2023-01-22 11:47:14.989158: step: 558/469, loss: 0.09343497455120087 2023-01-22 11:47:15.872210: step: 560/469, loss: 0.14464518427848816 2023-01-22 11:47:16.426259: step: 562/469, loss: 0.10873056948184967 2023-01-22 11:47:17.009092: step: 564/469, loss: 0.0771813839673996 2023-01-22 11:47:17.725481: step: 566/469, loss: 0.16872255504131317 2023-01-22 11:47:18.318751: step: 568/469, loss: 0.12507933378219604 2023-01-22 11:47:19.004279: step: 570/469, loss: 0.14560864865779877 2023-01-22 11:47:19.622508: step: 572/469, loss: 0.35550230741500854 2023-01-22 11:47:20.235254: step: 574/469, loss: 0.2243213653564453 2023-01-22 11:47:20.884528: step: 576/469, loss: 0.12023797631263733 2023-01-22 11:47:21.468413: step: 578/469, loss: 0.12230618298053741 2023-01-22 11:47:22.175435: step: 580/469, loss: 0.10368078202009201 2023-01-22 11:47:22.820737: step: 582/469, loss: 0.2484348863363266 2023-01-22 11:47:23.481840: step: 584/469, loss: 0.1614336222410202 2023-01-22 11:47:24.174107: step: 586/469, loss: 0.20537163317203522 2023-01-22 11:47:24.887834: step: 588/469, loss: 0.3633364737033844 2023-01-22 11:47:25.534269: step: 590/469, loss: 0.10143674910068512 2023-01-22 11:47:26.230164: step: 592/469, loss: 0.40851813554763794 2023-01-22 11:47:26.813717: step: 594/469, loss: 0.13826799392700195 2023-01-22 11:47:27.464042: step: 596/469, loss: 0.1731613278388977 2023-01-22 11:47:28.102401: step: 598/469, loss: 1.3614288568496704 2023-01-22 11:47:28.765591: step: 600/469, loss: 0.2950364947319031 2023-01-22 11:47:29.433737: step: 602/469, loss: 0.15174712240695953 2023-01-22 11:47:30.105946: step: 604/469, loss: 0.17593331634998322 2023-01-22 11:47:30.744585: step: 606/469, loss: 0.23934026062488556 2023-01-22 11:47:31.456076: step: 608/469, loss: 0.20558743178844452 2023-01-22 11:47:32.140893: step: 610/469, loss: 0.039850398898124695 2023-01-22 11:47:32.769687: step: 612/469, loss: 0.5241489410400391 2023-01-22 11:47:33.396123: step: 614/469, loss: 0.3212416470050812 2023-01-22 11:47:34.030075: step: 616/469, loss: 0.17849406599998474 2023-01-22 11:47:34.713169: step: 618/469, loss: 0.1348249614238739 2023-01-22 11:47:35.392774: step: 620/469, loss: 0.11664216965436935 2023-01-22 11:47:36.004431: step: 622/469, loss: 0.22106176614761353 2023-01-22 11:47:36.678611: step: 624/469, loss: 0.08235683292150497 2023-01-22 11:47:37.339434: step: 626/469, loss: 0.3486630320549011 2023-01-22 11:47:37.987116: step: 628/469, loss: 0.13559679687023163 2023-01-22 11:47:38.669470: step: 630/469, loss: 0.13814496994018555 2023-01-22 11:47:39.280697: step: 632/469, loss: 0.10115935653448105 2023-01-22 11:47:39.909560: step: 634/469, loss: 0.15298683941364288 2023-01-22 11:47:40.550686: step: 636/469, loss: 0.05955592542886734 2023-01-22 11:47:41.119281: step: 638/469, loss: 0.10162433236837387 2023-01-22 11:47:41.742459: step: 640/469, loss: 1.179209589958191 2023-01-22 11:47:42.368933: step: 642/469, loss: 0.5895562767982483 2023-01-22 11:47:42.967967: step: 644/469, loss: 0.08995138853788376 2023-01-22 11:47:43.667720: step: 646/469, loss: 0.1272493600845337 2023-01-22 11:47:44.304784: step: 648/469, loss: 0.18245941400527954 2023-01-22 11:47:44.918556: step: 650/469, loss: 0.277253657579422 2023-01-22 11:47:45.565340: step: 652/469, loss: 0.8337023854255676 2023-01-22 11:47:46.255808: step: 654/469, loss: 0.45173418521881104 2023-01-22 11:47:46.895297: step: 656/469, loss: 0.18245379626750946 2023-01-22 11:47:47.643186: step: 658/469, loss: 0.2845979332923889 2023-01-22 11:47:48.211105: step: 660/469, loss: 0.1612120270729065 2023-01-22 11:47:48.918003: step: 662/469, loss: 0.4129007160663605 2023-01-22 11:47:49.664854: step: 664/469, loss: 0.3531123697757721 2023-01-22 11:47:50.288773: step: 666/469, loss: 0.3081263601779938 2023-01-22 11:47:50.921456: step: 668/469, loss: 1.1761054992675781 2023-01-22 11:47:51.577526: step: 670/469, loss: 0.2595185339450836 2023-01-22 11:47:52.316186: step: 672/469, loss: 0.24489150941371918 2023-01-22 11:47:52.936639: step: 674/469, loss: 0.1660284399986267 2023-01-22 11:47:53.631726: step: 676/469, loss: 0.047580648213624954 2023-01-22 11:47:54.276388: step: 678/469, loss: 0.21147482097148895 2023-01-22 11:47:54.911389: step: 680/469, loss: 0.05559933930635452 2023-01-22 11:47:55.514313: step: 682/469, loss: 0.18751806020736694 2023-01-22 11:47:56.158721: step: 684/469, loss: 0.054852958768606186 2023-01-22 11:47:56.833160: step: 686/469, loss: 0.14558443427085876 2023-01-22 11:47:57.511926: step: 688/469, loss: 0.24283434450626373 2023-01-22 11:47:58.132599: step: 690/469, loss: 0.1446641981601715 2023-01-22 11:47:58.815725: step: 692/469, loss: 0.07388562709093094 2023-01-22 11:47:59.390491: step: 694/469, loss: 0.053731344640254974 2023-01-22 11:48:00.112735: step: 696/469, loss: 0.11538675427436829 2023-01-22 11:48:00.910443: step: 698/469, loss: 0.18003545701503754 2023-01-22 11:48:01.518903: step: 700/469, loss: 0.5147695541381836 2023-01-22 11:48:02.194507: step: 702/469, loss: 0.11726795881986618 2023-01-22 11:48:02.849497: step: 704/469, loss: 0.30476054549217224 2023-01-22 11:48:03.502619: step: 706/469, loss: 0.20852771401405334 2023-01-22 11:48:04.115129: step: 708/469, loss: 0.12997330725193024 2023-01-22 11:48:04.755377: step: 710/469, loss: 0.6460703611373901 2023-01-22 11:48:05.338743: step: 712/469, loss: 0.5039768815040588 2023-01-22 11:48:06.000302: step: 714/469, loss: 0.355660617351532 2023-01-22 11:48:06.657367: step: 716/469, loss: 0.14265164732933044 2023-01-22 11:48:07.333337: step: 718/469, loss: 0.1854066252708435 2023-01-22 11:48:08.000208: step: 720/469, loss: 0.22185933589935303 2023-01-22 11:48:08.661917: step: 722/469, loss: 0.1451837569475174 2023-01-22 11:48:09.279366: step: 724/469, loss: 0.03779136762022972 2023-01-22 11:48:09.896924: step: 726/469, loss: 0.11523859947919846 2023-01-22 11:48:10.567382: step: 728/469, loss: 0.47882261872291565 2023-01-22 11:48:11.223875: step: 730/469, loss: 0.31595316529273987 2023-01-22 11:48:11.842208: step: 732/469, loss: 1.2247695922851562 2023-01-22 11:48:12.537794: step: 734/469, loss: 0.18435610830783844 2023-01-22 11:48:13.346858: step: 736/469, loss: 0.12068726867437363 2023-01-22 11:48:14.006182: step: 738/469, loss: 0.08250205963850021 2023-01-22 11:48:14.674663: step: 740/469, loss: 0.10171730071306229 2023-01-22 11:48:15.371100: step: 742/469, loss: 0.16402065753936768 2023-01-22 11:48:16.033928: step: 744/469, loss: 0.13324828445911407 2023-01-22 11:48:16.697903: step: 746/469, loss: 0.12444313615560532 2023-01-22 11:48:17.340943: step: 748/469, loss: 0.10346558690071106 2023-01-22 11:48:17.946902: step: 750/469, loss: 0.5168923139572144 2023-01-22 11:48:18.577978: step: 752/469, loss: 0.1270841658115387 2023-01-22 11:48:19.280901: step: 754/469, loss: 0.26641565561294556 2023-01-22 11:48:19.933498: step: 756/469, loss: 0.18214882910251617 2023-01-22 11:48:20.607546: step: 758/469, loss: 0.5728313326835632 2023-01-22 11:48:21.225462: step: 760/469, loss: 0.3768772482872009 2023-01-22 11:48:21.844261: step: 762/469, loss: 0.1931002140045166 2023-01-22 11:48:22.523250: step: 764/469, loss: 0.32628846168518066 2023-01-22 11:48:23.153789: step: 766/469, loss: 0.18113891780376434 2023-01-22 11:48:23.772512: step: 768/469, loss: 0.05588013678789139 2023-01-22 11:48:24.612804: step: 770/469, loss: 0.07006081938743591 2023-01-22 11:48:25.277767: step: 772/469, loss: 0.05329433083534241 2023-01-22 11:48:25.944141: step: 774/469, loss: 0.12903809547424316 2023-01-22 11:48:26.616600: step: 776/469, loss: 0.20359189808368683 2023-01-22 11:48:27.277544: step: 778/469, loss: 0.775844395160675 2023-01-22 11:48:27.889809: step: 780/469, loss: 0.10517729073762894 2023-01-22 11:48:28.471624: step: 782/469, loss: 0.3936934471130371 2023-01-22 11:48:29.060413: step: 784/469, loss: 0.5093246102333069 2023-01-22 11:48:29.756715: step: 786/469, loss: 0.5735411047935486 2023-01-22 11:48:30.446457: step: 788/469, loss: 0.16819803416728973 2023-01-22 11:48:31.065793: step: 790/469, loss: 0.27249181270599365 2023-01-22 11:48:31.687208: step: 792/469, loss: 0.1117519810795784 2023-01-22 11:48:32.381476: step: 794/469, loss: 0.08045224100351334 2023-01-22 11:48:33.048186: step: 796/469, loss: 0.1268969476222992 2023-01-22 11:48:33.688791: step: 798/469, loss: 0.3834597170352936 2023-01-22 11:48:34.345572: step: 800/469, loss: 0.6271202564239502 2023-01-22 11:48:35.066254: step: 802/469, loss: 0.16394072771072388 2023-01-22 11:48:35.692114: step: 804/469, loss: 0.10925667732954025 2023-01-22 11:48:36.390597: step: 806/469, loss: 0.06161484122276306 2023-01-22 11:48:37.121386: step: 808/469, loss: 0.1299452930688858 2023-01-22 11:48:37.842660: step: 810/469, loss: 0.13461150228977203 2023-01-22 11:48:38.552204: step: 812/469, loss: 0.16261617839336395 2023-01-22 11:48:39.189023: step: 814/469, loss: 0.49927422404289246 2023-01-22 11:48:39.868311: step: 816/469, loss: 0.27267399430274963 2023-01-22 11:48:40.589376: step: 818/469, loss: 0.4247361123561859 2023-01-22 11:48:41.246273: step: 820/469, loss: 0.17890776693820953 2023-01-22 11:48:41.897271: step: 822/469, loss: 0.17473213374614716 2023-01-22 11:48:42.583046: step: 824/469, loss: 0.2757745087146759 2023-01-22 11:48:43.243647: step: 826/469, loss: 0.1165783554315567 2023-01-22 11:48:44.000797: step: 828/469, loss: 0.19895058870315552 2023-01-22 11:48:44.590292: step: 830/469, loss: 0.16803598403930664 2023-01-22 11:48:45.235069: step: 832/469, loss: 0.5686655640602112 2023-01-22 11:48:45.891714: step: 834/469, loss: 0.2641165852546692 2023-01-22 11:48:46.529146: step: 836/469, loss: 0.0898580402135849 2023-01-22 11:48:47.143292: step: 838/469, loss: 0.21407265961170197 2023-01-22 11:48:47.816168: step: 840/469, loss: 0.13167890906333923 2023-01-22 11:48:48.451630: step: 842/469, loss: 0.49524736404418945 2023-01-22 11:48:49.033115: step: 844/469, loss: 0.1604488641023636 2023-01-22 11:48:49.666961: step: 846/469, loss: 0.12622714042663574 2023-01-22 11:48:50.363028: step: 848/469, loss: 0.12502378225326538 2023-01-22 11:48:51.025725: step: 850/469, loss: 1.2619335651397705 2023-01-22 11:48:51.717780: step: 852/469, loss: 0.13600540161132812 2023-01-22 11:48:52.366789: step: 854/469, loss: 0.4233364164829254 2023-01-22 11:48:52.962484: step: 856/469, loss: 0.15904124081134796 2023-01-22 11:48:53.527004: step: 858/469, loss: 0.8725924491882324 2023-01-22 11:48:54.173870: step: 860/469, loss: 0.21988923847675323 2023-01-22 11:48:54.765599: step: 862/469, loss: 0.123007632791996 2023-01-22 11:48:55.485696: step: 864/469, loss: 0.15686100721359253 2023-01-22 11:48:56.132580: step: 866/469, loss: 0.09428158402442932 2023-01-22 11:48:56.788114: step: 868/469, loss: 0.15478846430778503 2023-01-22 11:48:57.441113: step: 870/469, loss: 0.46862301230430603 2023-01-22 11:48:58.088854: step: 872/469, loss: 0.07323585450649261 2023-01-22 11:48:58.708776: step: 874/469, loss: 0.17957401275634766 2023-01-22 11:48:59.336334: step: 876/469, loss: 0.0937713235616684 2023-01-22 11:48:59.957992: step: 878/469, loss: 0.363863080739975 2023-01-22 11:49:00.597953: step: 880/469, loss: 0.11538223922252655 2023-01-22 11:49:01.204579: step: 882/469, loss: 0.030977336689829826 2023-01-22 11:49:01.848483: step: 884/469, loss: 0.4014894664287567 2023-01-22 11:49:02.458481: step: 886/469, loss: 0.227244034409523 2023-01-22 11:49:03.017742: step: 888/469, loss: 0.24971428513526917 2023-01-22 11:49:03.680874: step: 890/469, loss: 1.621824026107788 2023-01-22 11:49:04.381772: step: 892/469, loss: 0.1848357617855072 2023-01-22 11:49:05.017148: step: 894/469, loss: 0.39088448882102966 2023-01-22 11:49:05.702432: step: 896/469, loss: 0.1631229817867279 2023-01-22 11:49:06.356718: step: 898/469, loss: 1.3466644287109375 2023-01-22 11:49:06.951662: step: 900/469, loss: 0.07818323373794556 2023-01-22 11:49:07.611412: step: 902/469, loss: 0.1619444489479065 2023-01-22 11:49:08.244141: step: 904/469, loss: 0.1829451024532318 2023-01-22 11:49:08.934530: step: 906/469, loss: 0.06281502544879913 2023-01-22 11:49:09.514162: step: 908/469, loss: 0.1573103964328766 2023-01-22 11:49:10.109255: step: 910/469, loss: 0.12201010435819626 2023-01-22 11:49:10.817260: step: 912/469, loss: 0.28905412554740906 2023-01-22 11:49:11.504407: step: 914/469, loss: 0.18813428282737732 2023-01-22 11:49:12.162507: step: 916/469, loss: 0.536222517490387 2023-01-22 11:49:12.885345: step: 918/469, loss: 0.03911914303898811 2023-01-22 11:49:13.540996: step: 920/469, loss: 1.827509880065918 2023-01-22 11:49:14.244155: step: 922/469, loss: 0.13335424661636353 2023-01-22 11:49:15.049162: step: 924/469, loss: 0.11065218597650528 2023-01-22 11:49:15.756029: step: 926/469, loss: 0.2032957822084427 2023-01-22 11:49:16.396247: step: 928/469, loss: 1.170350432395935 2023-01-22 11:49:17.074269: step: 930/469, loss: 0.6633148193359375 2023-01-22 11:49:17.721785: step: 932/469, loss: 0.30619141459465027 2023-01-22 11:49:18.446346: step: 934/469, loss: 2.020390748977661 2023-01-22 11:49:19.036489: step: 936/469, loss: 0.23759694397449493 2023-01-22 11:49:19.665478: step: 938/469, loss: 0.6831357479095459 ================================================== Loss: 0.253 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3143055998066662, 'r': 0.34770429731932906, 'f1': 0.33016245889601153}, 'combined': 0.24327760129179796, 'epoch': 12} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2938020376862401, 'r': 0.26288965494706573, 'f1': 0.277487583637994}, 'combined': 0.15135686380254215, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31671128558563333, 'r': 0.34856270519860977, 'f1': 0.331874517867511}, 'combined': 0.2445391184286923, 'epoch': 12} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.28846984454939, 'r': 0.2610467514660842, 'f1': 0.27407403656803847}, 'combined': 0.1494949290371119, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3063987861498328, 'r': 0.34535271152372043, 'f1': 0.32471164847992984}, 'combined': 0.23926121466942196, 'epoch': 12} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.288972986269264, 'r': 0.2627988548505475, 'f1': 0.2752651158137503}, 'combined': 0.15014460862568196, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24113475177304963, 'r': 0.32380952380952377, 'f1': 0.2764227642276422}, 'combined': 0.18428184281842813, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.275, 'r': 0.358695652173913, 'f1': 0.3113207547169812}, 'combined': 0.1556603773584906, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.421875, 'r': 0.23275862068965517, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28932143369249047, 'r': 0.34806411567559575, 'f1': 0.31598585523004125}, 'combined': 0.23283168280108302, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2915539886359941, 'r': 0.2547429635383114, 'f1': 0.27190826088610776}, 'combined': 0.14831359684696785, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3055555555555556, 'r': 0.358695652173913, 'f1': 0.32999999999999996}, 'combined': 0.16499999999999998, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 13 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:52:18.424983: step: 2/469, loss: 0.2294110655784607 2023-01-22 11:52:19.090232: step: 4/469, loss: 0.10552580654621124 2023-01-22 11:52:19.703737: step: 6/469, loss: 0.15640881657600403 2023-01-22 11:52:20.330213: step: 8/469, loss: 0.034205976873636246 2023-01-22 11:52:20.994322: step: 10/469, loss: 0.06980501860380173 2023-01-22 11:52:21.638467: step: 12/469, loss: 0.095150887966156 2023-01-22 11:52:22.252294: step: 14/469, loss: 0.05807060748338699 2023-01-22 11:52:22.907308: step: 16/469, loss: 0.06640897691249847 2023-01-22 11:52:23.564130: step: 18/469, loss: 0.29062190651893616 2023-01-22 11:52:24.261865: step: 20/469, loss: 0.07286447286605835 2023-01-22 11:52:24.946141: step: 22/469, loss: 0.10701128840446472 2023-01-22 11:52:25.592352: step: 24/469, loss: 0.1147419810295105 2023-01-22 11:52:26.233303: step: 26/469, loss: 0.1374412178993225 2023-01-22 11:52:26.941325: step: 28/469, loss: 0.0968305692076683 2023-01-22 11:52:27.593131: step: 30/469, loss: 0.1279314160346985 2023-01-22 11:52:28.183968: step: 32/469, loss: 0.1515100598335266 2023-01-22 11:52:28.793763: step: 34/469, loss: 0.0876307263970375 2023-01-22 11:52:29.522814: step: 36/469, loss: 0.036387212574481964 2023-01-22 11:52:30.211423: step: 38/469, loss: 0.13604223728179932 2023-01-22 11:52:30.825744: step: 40/469, loss: 0.06580393761396408 2023-01-22 11:52:31.546225: step: 42/469, loss: 0.052817728370428085 2023-01-22 11:52:32.096423: step: 44/469, loss: 0.13042756915092468 2023-01-22 11:52:32.769046: step: 46/469, loss: 0.317474901676178 2023-01-22 11:52:33.519834: step: 48/469, loss: 0.29092055559158325 2023-01-22 11:52:34.216662: step: 50/469, loss: 0.029858211055397987 2023-01-22 11:52:34.848122: step: 52/469, loss: 0.12260261923074722 2023-01-22 11:52:35.451050: step: 54/469, loss: 0.05783361196517944 2023-01-22 11:52:36.102540: step: 56/469, loss: 0.1679166555404663 2023-01-22 11:52:36.801389: step: 58/469, loss: 0.4225844740867615 2023-01-22 11:52:37.487010: step: 60/469, loss: 0.26760512590408325 2023-01-22 11:52:38.106508: step: 62/469, loss: 0.06546816974878311 2023-01-22 11:52:38.776084: step: 64/469, loss: 0.25032976269721985 2023-01-22 11:52:39.353488: step: 66/469, loss: 1.215293288230896 2023-01-22 11:52:39.972452: step: 68/469, loss: 0.09485647827386856 2023-01-22 11:52:40.576091: step: 70/469, loss: 0.1537492573261261 2023-01-22 11:52:41.257773: step: 72/469, loss: 0.21111547946929932 2023-01-22 11:52:41.879604: step: 74/469, loss: 0.40520209074020386 2023-01-22 11:52:42.565815: step: 76/469, loss: 0.09451662749052048 2023-01-22 11:52:43.244849: step: 78/469, loss: 0.06155052036046982 2023-01-22 11:52:43.992307: step: 80/469, loss: 0.1046370193362236 2023-01-22 11:52:44.645283: step: 82/469, loss: 0.15164315700531006 2023-01-22 11:52:45.301043: step: 84/469, loss: 0.22775579988956451 2023-01-22 11:52:45.927811: step: 86/469, loss: 0.05479899421334267 2023-01-22 11:52:46.511902: step: 88/469, loss: 0.07875333726406097 2023-01-22 11:52:47.260145: step: 90/469, loss: 0.21325896680355072 2023-01-22 11:52:47.937074: step: 92/469, loss: 0.11635953187942505 2023-01-22 11:52:48.612383: step: 94/469, loss: 0.2095092236995697 2023-01-22 11:52:49.268499: step: 96/469, loss: 0.1170249804854393 2023-01-22 11:52:49.926780: step: 98/469, loss: 0.04608559235930443 2023-01-22 11:52:50.595507: step: 100/469, loss: 0.03804682567715645 2023-01-22 11:52:51.243178: step: 102/469, loss: 0.36065393686294556 2023-01-22 11:52:52.078898: step: 104/469, loss: 0.08604159951210022 2023-01-22 11:52:52.764152: step: 106/469, loss: 0.07397216558456421 2023-01-22 11:52:53.416602: step: 108/469, loss: 0.186575248837471 2023-01-22 11:52:54.067537: step: 110/469, loss: 0.3155174255371094 2023-01-22 11:52:54.722187: step: 112/469, loss: 0.579622745513916 2023-01-22 11:52:55.338791: step: 114/469, loss: 0.037597622722387314 2023-01-22 11:52:56.059805: step: 116/469, loss: 0.04044022038578987 2023-01-22 11:52:56.667489: step: 118/469, loss: 0.05252430960536003 2023-01-22 11:52:57.300984: step: 120/469, loss: 0.13036908209323883 2023-01-22 11:52:57.946768: step: 122/469, loss: 0.033417362719774246 2023-01-22 11:52:58.592807: step: 124/469, loss: 0.19477954506874084 2023-01-22 11:52:59.260279: step: 126/469, loss: 0.08658799529075623 2023-01-22 11:52:59.917433: step: 128/469, loss: 0.03600918874144554 2023-01-22 11:53:00.610585: step: 130/469, loss: 0.05813496559858322 2023-01-22 11:53:01.232862: step: 132/469, loss: 0.4854338467121124 2023-01-22 11:53:01.918445: step: 134/469, loss: 0.0947144627571106 2023-01-22 11:53:02.639113: step: 136/469, loss: 0.11194200813770294 2023-01-22 11:53:03.340712: step: 138/469, loss: 0.1804186999797821 2023-01-22 11:53:03.991095: step: 140/469, loss: 0.21446490287780762 2023-01-22 11:53:04.649356: step: 142/469, loss: 0.30432432889938354 2023-01-22 11:53:05.272932: step: 144/469, loss: 0.34499290585517883 2023-01-22 11:53:05.863341: step: 146/469, loss: 0.049611784517765045 2023-01-22 11:53:06.562151: step: 148/469, loss: 0.04391444846987724 2023-01-22 11:53:07.185284: step: 150/469, loss: 0.22266417741775513 2023-01-22 11:53:07.840775: step: 152/469, loss: 0.09504599869251251 2023-01-22 11:53:08.469971: step: 154/469, loss: 0.7208542823791504 2023-01-22 11:53:09.079646: step: 156/469, loss: 0.0622759573161602 2023-01-22 11:53:09.718546: step: 158/469, loss: 0.03672638535499573 2023-01-22 11:53:10.323183: step: 160/469, loss: 0.03442857041954994 2023-01-22 11:53:10.915147: step: 162/469, loss: 0.09264393895864487 2023-01-22 11:53:11.558088: step: 164/469, loss: 0.044600196182727814 2023-01-22 11:53:12.306032: step: 166/469, loss: 0.3165144920349121 2023-01-22 11:53:12.940829: step: 168/469, loss: 0.11863255500793457 2023-01-22 11:53:13.585373: step: 170/469, loss: 0.1762791872024536 2023-01-22 11:53:14.310121: step: 172/469, loss: 0.10699054598808289 2023-01-22 11:53:14.964468: step: 174/469, loss: 0.09779482334852219 2023-01-22 11:53:15.556546: step: 176/469, loss: 0.19619430601596832 2023-01-22 11:53:16.234525: step: 178/469, loss: 0.08160151541233063 2023-01-22 11:53:16.838288: step: 180/469, loss: 0.02518235705792904 2023-01-22 11:53:17.500067: step: 182/469, loss: 0.14843423664569855 2023-01-22 11:53:18.091072: step: 184/469, loss: 0.019742164760828018 2023-01-22 11:53:18.795035: step: 186/469, loss: 0.019009260460734367 2023-01-22 11:53:19.413233: step: 188/469, loss: 0.22973763942718506 2023-01-22 11:53:20.185759: step: 190/469, loss: 0.13438567519187927 2023-01-22 11:53:20.831971: step: 192/469, loss: 0.07546482980251312 2023-01-22 11:53:21.502336: step: 194/469, loss: 0.05986330285668373 2023-01-22 11:53:22.224278: step: 196/469, loss: 0.044918037950992584 2023-01-22 11:53:22.884899: step: 198/469, loss: 0.07734072208404541 2023-01-22 11:53:23.470770: step: 200/469, loss: 0.207444429397583 2023-01-22 11:53:24.171996: step: 202/469, loss: 0.20675028860569 2023-01-22 11:53:24.825298: step: 204/469, loss: 0.22096797823905945 2023-01-22 11:53:25.458311: step: 206/469, loss: 0.11853277683258057 2023-01-22 11:53:26.147023: step: 208/469, loss: 0.1394554078578949 2023-01-22 11:53:26.730066: step: 210/469, loss: 0.12088552862405777 2023-01-22 11:53:27.358285: step: 212/469, loss: 0.34791815280914307 2023-01-22 11:53:27.938377: step: 214/469, loss: 0.08338740468025208 2023-01-22 11:53:28.593604: step: 216/469, loss: 0.14494885504245758 2023-01-22 11:53:29.286755: step: 218/469, loss: 1.0744656324386597 2023-01-22 11:53:29.924313: step: 220/469, loss: 0.07820221036672592 2023-01-22 11:53:30.576785: step: 222/469, loss: 0.09651470929384232 2023-01-22 11:53:31.244497: step: 224/469, loss: 0.22024601697921753 2023-01-22 11:53:31.907096: step: 226/469, loss: 0.1264672875404358 2023-01-22 11:53:32.544201: step: 228/469, loss: 0.0487193688750267 2023-01-22 11:53:33.300622: step: 230/469, loss: 0.13652971386909485 2023-01-22 11:53:33.986177: step: 232/469, loss: 0.15800170600414276 2023-01-22 11:53:34.636094: step: 234/469, loss: 0.1348744034767151 2023-01-22 11:53:35.343457: step: 236/469, loss: 0.0199300404638052 2023-01-22 11:53:36.050065: step: 238/469, loss: 0.04722470045089722 2023-01-22 11:53:36.665185: step: 240/469, loss: 0.13642564415931702 2023-01-22 11:53:37.307261: step: 242/469, loss: 0.10824882984161377 2023-01-22 11:53:37.936687: step: 244/469, loss: 0.12253447622060776 2023-01-22 11:53:38.603407: step: 246/469, loss: 0.12152257561683655 2023-01-22 11:53:39.240530: step: 248/469, loss: 0.17997433245182037 2023-01-22 11:53:39.878232: step: 250/469, loss: 8.4161958694458 2023-01-22 11:53:40.559249: step: 252/469, loss: 0.08476436883211136 2023-01-22 11:53:41.199907: step: 254/469, loss: 0.07402852922677994 2023-01-22 11:53:41.947367: step: 256/469, loss: 0.36886370182037354 2023-01-22 11:53:42.580847: step: 258/469, loss: 0.09367343783378601 2023-01-22 11:53:43.190149: step: 260/469, loss: 0.18831424415111542 2023-01-22 11:53:43.840621: step: 262/469, loss: 0.13770127296447754 2023-01-22 11:53:44.490202: step: 264/469, loss: 0.0983630120754242 2023-01-22 11:53:45.135677: step: 266/469, loss: 0.1506970226764679 2023-01-22 11:53:45.821202: step: 268/469, loss: 0.07185398042201996 2023-01-22 11:53:46.451205: step: 270/469, loss: 0.3573124408721924 2023-01-22 11:53:47.162414: step: 272/469, loss: 1.5084404945373535 2023-01-22 11:53:47.805180: step: 274/469, loss: 0.07283905893564224 2023-01-22 11:53:48.450492: step: 276/469, loss: 0.12669117748737335 2023-01-22 11:53:49.130241: step: 278/469, loss: 0.18776993453502655 2023-01-22 11:53:49.822352: step: 280/469, loss: 0.429730087518692 2023-01-22 11:53:50.463817: step: 282/469, loss: 0.3037673234939575 2023-01-22 11:53:51.110319: step: 284/469, loss: 0.03288634866476059 2023-01-22 11:53:51.841213: step: 286/469, loss: 0.29557883739471436 2023-01-22 11:53:52.466414: step: 288/469, loss: 0.16269901394844055 2023-01-22 11:53:53.130167: step: 290/469, loss: 0.20090721547603607 2023-01-22 11:53:53.799919: step: 292/469, loss: 0.01363021694123745 2023-01-22 11:53:54.418019: step: 294/469, loss: 0.1702626496553421 2023-01-22 11:53:55.022251: step: 296/469, loss: 0.09344138950109482 2023-01-22 11:53:55.660059: step: 298/469, loss: 0.1148252934217453 2023-01-22 11:53:56.318631: step: 300/469, loss: 0.06467646360397339 2023-01-22 11:53:56.991394: step: 302/469, loss: 0.04325678572058678 2023-01-22 11:53:57.640628: step: 304/469, loss: 0.12897565960884094 2023-01-22 11:53:58.263737: step: 306/469, loss: 0.14464987814426422 2023-01-22 11:53:58.849375: step: 308/469, loss: 0.07649722695350647 2023-01-22 11:53:59.545925: step: 310/469, loss: 0.18311582505702972 2023-01-22 11:54:00.196976: step: 312/469, loss: 0.10752494633197784 2023-01-22 11:54:00.851213: step: 314/469, loss: 0.16310743987560272 2023-01-22 11:54:01.477896: step: 316/469, loss: 0.16025860607624054 2023-01-22 11:54:02.176327: step: 318/469, loss: 0.06761769205331802 2023-01-22 11:54:02.871421: step: 320/469, loss: 0.24413201212882996 2023-01-22 11:54:03.639128: step: 322/469, loss: 0.08819463849067688 2023-01-22 11:54:04.308024: step: 324/469, loss: 0.055806148797273636 2023-01-22 11:54:04.901132: step: 326/469, loss: 0.09786657243967056 2023-01-22 11:54:05.523635: step: 328/469, loss: 0.20256228744983673 2023-01-22 11:54:06.152828: step: 330/469, loss: 0.10871902108192444 2023-01-22 11:54:06.793378: step: 332/469, loss: 0.7324473261833191 2023-01-22 11:54:07.486603: step: 334/469, loss: 0.4415014684200287 2023-01-22 11:54:08.212348: step: 336/469, loss: 0.18037280440330505 2023-01-22 11:54:08.911955: step: 338/469, loss: 0.13206717371940613 2023-01-22 11:54:09.563597: step: 340/469, loss: 0.13370883464813232 2023-01-22 11:54:10.175077: step: 342/469, loss: 0.04568473622202873 2023-01-22 11:54:10.801133: step: 344/469, loss: 0.10528954863548279 2023-01-22 11:54:11.373008: step: 346/469, loss: 0.06718618422746658 2023-01-22 11:54:12.011338: step: 348/469, loss: 0.4360056519508362 2023-01-22 11:54:12.670449: step: 350/469, loss: 0.20691066980361938 2023-01-22 11:54:13.343261: step: 352/469, loss: 0.22220776975154877 2023-01-22 11:54:14.045604: step: 354/469, loss: 0.14346978068351746 2023-01-22 11:54:14.684052: step: 356/469, loss: 0.022748466581106186 2023-01-22 11:54:15.262304: step: 358/469, loss: 0.04106808826327324 2023-01-22 11:54:16.074265: step: 360/469, loss: 0.0920754000544548 2023-01-22 11:54:16.697260: step: 362/469, loss: 0.1296059489250183 2023-01-22 11:54:17.347536: step: 364/469, loss: 4.278023719787598 2023-01-22 11:54:17.996993: step: 366/469, loss: 0.2297745794057846 2023-01-22 11:54:18.704723: step: 368/469, loss: 0.1554986536502838 2023-01-22 11:54:19.441238: step: 370/469, loss: 0.2550108730792999 2023-01-22 11:54:20.155612: step: 372/469, loss: 0.7753260731697083 2023-01-22 11:54:20.825814: step: 374/469, loss: 0.09382228553295135 2023-01-22 11:54:21.498607: step: 376/469, loss: 0.1605989933013916 2023-01-22 11:54:22.108219: step: 378/469, loss: 0.10666244477033615 2023-01-22 11:54:22.842141: step: 380/469, loss: 0.07900448143482208 2023-01-22 11:54:23.491081: step: 382/469, loss: 0.1279546618461609 2023-01-22 11:54:24.105192: step: 384/469, loss: 0.22945332527160645 2023-01-22 11:54:24.824592: step: 386/469, loss: 0.09080290794372559 2023-01-22 11:54:25.497516: step: 388/469, loss: 0.1418457180261612 2023-01-22 11:54:26.203825: step: 390/469, loss: 0.24508439004421234 2023-01-22 11:54:26.826492: step: 392/469, loss: 0.17116393148899078 2023-01-22 11:54:27.461644: step: 394/469, loss: 0.242109015583992 2023-01-22 11:54:28.121506: step: 396/469, loss: 0.1886899173259735 2023-01-22 11:54:28.778281: step: 398/469, loss: 0.1522700935602188 2023-01-22 11:54:29.411768: step: 400/469, loss: 0.12411347031593323 2023-01-22 11:54:30.045885: step: 402/469, loss: 0.04242609441280365 2023-01-22 11:54:30.696742: step: 404/469, loss: 0.11626464128494263 2023-01-22 11:54:31.350755: step: 406/469, loss: 0.055477697402238846 2023-01-22 11:54:32.010370: step: 408/469, loss: 1.790461778640747 2023-01-22 11:54:32.738381: step: 410/469, loss: 0.07347521930932999 2023-01-22 11:54:33.372459: step: 412/469, loss: 0.26559871435165405 2023-01-22 11:54:34.126375: step: 414/469, loss: 0.14984188973903656 2023-01-22 11:54:34.704239: step: 416/469, loss: 0.09180110692977905 2023-01-22 11:54:35.340007: step: 418/469, loss: 0.020643198862671852 2023-01-22 11:54:35.998595: step: 420/469, loss: 1.720484733581543 2023-01-22 11:54:36.675383: step: 422/469, loss: 0.2571749985218048 2023-01-22 11:54:37.352191: step: 424/469, loss: 0.729462206363678 2023-01-22 11:54:38.000700: step: 426/469, loss: 0.09000333398580551 2023-01-22 11:54:38.689102: step: 428/469, loss: 0.14303572475910187 2023-01-22 11:54:39.271044: step: 430/469, loss: 0.43912845849990845 2023-01-22 11:54:39.921937: step: 432/469, loss: 0.1708480715751648 2023-01-22 11:54:40.512520: step: 434/469, loss: 0.12771329283714294 2023-01-22 11:54:41.218571: step: 436/469, loss: 0.13828711211681366 2023-01-22 11:54:41.883912: step: 438/469, loss: 0.05810357257723808 2023-01-22 11:54:42.470084: step: 440/469, loss: 0.10804131627082825 2023-01-22 11:54:43.191707: step: 442/469, loss: 0.39033204317092896 2023-01-22 11:54:43.857013: step: 444/469, loss: 0.24288532137870789 2023-01-22 11:54:44.547317: step: 446/469, loss: 0.06511227786540985 2023-01-22 11:54:45.245581: step: 448/469, loss: 0.1720336675643921 2023-01-22 11:54:45.888688: step: 450/469, loss: 0.03472485765814781 2023-01-22 11:54:46.536651: step: 452/469, loss: 0.1822018325328827 2023-01-22 11:54:47.211644: step: 454/469, loss: 0.3212077021598816 2023-01-22 11:54:47.870756: step: 456/469, loss: 0.15403354167938232 2023-01-22 11:54:48.517232: step: 458/469, loss: 0.06975863128900528 2023-01-22 11:54:49.245963: step: 460/469, loss: 0.09824459254741669 2023-01-22 11:54:49.891085: step: 462/469, loss: 0.23142506182193756 2023-01-22 11:54:50.451995: step: 464/469, loss: 0.3873835802078247 2023-01-22 11:54:51.084604: step: 466/469, loss: 0.06888294219970703 2023-01-22 11:54:51.689636: step: 468/469, loss: 0.09126056730747223 2023-01-22 11:54:52.279817: step: 470/469, loss: 0.17858542501926422 2023-01-22 11:54:52.933408: step: 472/469, loss: 0.3323478102684021 2023-01-22 11:54:53.644793: step: 474/469, loss: 0.17380979657173157 2023-01-22 11:54:54.304013: step: 476/469, loss: 0.062146540731191635 2023-01-22 11:54:54.975332: step: 478/469, loss: 0.27766653895378113 2023-01-22 11:54:55.587609: step: 480/469, loss: 0.23081547021865845 2023-01-22 11:54:56.237624: step: 482/469, loss: 0.08713293820619583 2023-01-22 11:54:56.887521: step: 484/469, loss: 0.14310593903064728 2023-01-22 11:54:57.578037: step: 486/469, loss: 0.12722964584827423 2023-01-22 11:54:58.221023: step: 488/469, loss: 0.11158411204814911 2023-01-22 11:54:58.867213: step: 490/469, loss: 0.15808811783790588 2023-01-22 11:54:59.537131: step: 492/469, loss: 0.1756688505411148 2023-01-22 11:55:00.178417: step: 494/469, loss: 0.24683654308319092 2023-01-22 11:55:00.857607: step: 496/469, loss: 0.08902494609355927 2023-01-22 11:55:01.580791: step: 498/469, loss: 0.795413613319397 2023-01-22 11:55:02.189859: step: 500/469, loss: 0.09781542420387268 2023-01-22 11:55:02.786135: step: 502/469, loss: 0.08164943754673004 2023-01-22 11:55:03.431830: step: 504/469, loss: 0.2103188931941986 2023-01-22 11:55:04.067788: step: 506/469, loss: 0.0565982349216938 2023-01-22 11:55:04.659653: step: 508/469, loss: 5.4032769203186035 2023-01-22 11:55:05.372009: step: 510/469, loss: 0.10968939960002899 2023-01-22 11:55:06.021388: step: 512/469, loss: 0.28326985239982605 2023-01-22 11:55:06.667386: step: 514/469, loss: 0.3121934235095978 2023-01-22 11:55:07.374556: step: 516/469, loss: 0.24992839992046356 2023-01-22 11:55:08.026072: step: 518/469, loss: 0.08476626873016357 2023-01-22 11:55:08.673926: step: 520/469, loss: 0.10821449011564255 2023-01-22 11:55:09.300002: step: 522/469, loss: 0.1344277411699295 2023-01-22 11:55:09.979069: step: 524/469, loss: 0.13020645081996918 2023-01-22 11:55:10.718502: step: 526/469, loss: 0.08565562218427658 2023-01-22 11:55:11.383745: step: 528/469, loss: 0.07130774110555649 2023-01-22 11:55:12.078078: step: 530/469, loss: 0.2628646492958069 2023-01-22 11:55:12.739204: step: 532/469, loss: 0.1671563684940338 2023-01-22 11:55:13.403822: step: 534/469, loss: 0.5488623976707458 2023-01-22 11:55:14.092147: step: 536/469, loss: 0.39646124839782715 2023-01-22 11:55:14.697794: step: 538/469, loss: 0.17689861357212067 2023-01-22 11:55:15.334514: step: 540/469, loss: 0.10598752647638321 2023-01-22 11:55:16.028507: step: 542/469, loss: 0.06343793123960495 2023-01-22 11:55:16.665558: step: 544/469, loss: 0.7502897381782532 2023-01-22 11:55:17.279835: step: 546/469, loss: 0.15774798393249512 2023-01-22 11:55:17.925183: step: 548/469, loss: 0.3987104296684265 2023-01-22 11:55:18.585626: step: 550/469, loss: 0.08389110118150711 2023-01-22 11:55:19.226602: step: 552/469, loss: 0.08007057011127472 2023-01-22 11:55:19.868649: step: 554/469, loss: 0.0611308217048645 2023-01-22 11:55:20.511040: step: 556/469, loss: 0.15443934500217438 2023-01-22 11:55:21.203477: step: 558/469, loss: 0.6160106658935547 2023-01-22 11:55:21.849977: step: 560/469, loss: 0.10961678624153137 2023-01-22 11:55:22.424540: step: 562/469, loss: 0.305407851934433 2023-01-22 11:55:23.120608: step: 564/469, loss: 0.14642421901226044 2023-01-22 11:55:23.774119: step: 566/469, loss: 0.0951080173254013 2023-01-22 11:55:24.462128: step: 568/469, loss: 0.5984731912612915 2023-01-22 11:55:25.059616: step: 570/469, loss: 0.11568977683782578 2023-01-22 11:55:25.712235: step: 572/469, loss: 6.023694038391113 2023-01-22 11:55:26.372971: step: 574/469, loss: 0.09959385544061661 2023-01-22 11:55:27.015853: step: 576/469, loss: 0.18258406221866608 2023-01-22 11:55:27.814832: step: 578/469, loss: 0.11768639832735062 2023-01-22 11:55:28.562992: step: 580/469, loss: 0.09279417246580124 2023-01-22 11:55:29.270564: step: 582/469, loss: 0.24119991064071655 2023-01-22 11:55:29.992386: step: 584/469, loss: 0.09772532433271408 2023-01-22 11:55:30.776663: step: 586/469, loss: 0.08091095834970474 2023-01-22 11:55:31.390806: step: 588/469, loss: 0.13571064174175262 2023-01-22 11:55:32.100395: step: 590/469, loss: 0.2654077112674713 2023-01-22 11:55:32.746778: step: 592/469, loss: 0.2599298655986786 2023-01-22 11:55:33.396597: step: 594/469, loss: 0.2853321433067322 2023-01-22 11:55:33.990978: step: 596/469, loss: 0.22529801726341248 2023-01-22 11:55:34.635665: step: 598/469, loss: 0.030955595895648003 2023-01-22 11:55:35.318400: step: 600/469, loss: 0.4796280860900879 2023-01-22 11:55:36.034658: step: 602/469, loss: 1.3972283601760864 2023-01-22 11:55:36.648809: step: 604/469, loss: 0.03801490366458893 2023-01-22 11:55:37.283471: step: 606/469, loss: 0.15013805031776428 2023-01-22 11:55:37.927844: step: 608/469, loss: 0.14290277659893036 2023-01-22 11:55:38.532879: step: 610/469, loss: 0.2417699247598648 2023-01-22 11:55:39.196435: step: 612/469, loss: 0.14221957325935364 2023-01-22 11:55:39.891218: step: 614/469, loss: 0.10216628015041351 2023-01-22 11:55:40.562595: step: 616/469, loss: 0.05295451357960701 2023-01-22 11:55:41.254935: step: 618/469, loss: 0.30216163396835327 2023-01-22 11:55:41.882599: step: 620/469, loss: 0.15360566973686218 2023-01-22 11:55:42.542810: step: 622/469, loss: 0.05645817890763283 2023-01-22 11:55:43.201813: step: 624/469, loss: 0.2515581250190735 2023-01-22 11:55:43.765521: step: 626/469, loss: 0.11840242147445679 2023-01-22 11:55:44.387321: step: 628/469, loss: 0.22216592729091644 2023-01-22 11:55:44.997101: step: 630/469, loss: 0.503872811794281 2023-01-22 11:55:45.600826: step: 632/469, loss: 0.19764861464500427 2023-01-22 11:55:46.207845: step: 634/469, loss: 0.1398444026708603 2023-01-22 11:55:46.824521: step: 636/469, loss: 0.055364448577165604 2023-01-22 11:55:47.458758: step: 638/469, loss: 0.25784334540367126 2023-01-22 11:55:48.085355: step: 640/469, loss: 0.10682844370603561 2023-01-22 11:55:48.756830: step: 642/469, loss: 0.06462714076042175 2023-01-22 11:55:49.632948: step: 644/469, loss: 0.22312414646148682 2023-01-22 11:55:50.350135: step: 646/469, loss: 0.15961511433124542 2023-01-22 11:55:51.036961: step: 648/469, loss: 0.20165929198265076 2023-01-22 11:55:51.768872: step: 650/469, loss: 0.06946925818920135 2023-01-22 11:55:52.382390: step: 652/469, loss: 0.12248215079307556 2023-01-22 11:55:53.063954: step: 654/469, loss: 0.05839003622531891 2023-01-22 11:55:53.726130: step: 656/469, loss: 0.21611042320728302 2023-01-22 11:55:54.370899: step: 658/469, loss: 0.11688397824764252 2023-01-22 11:55:54.943369: step: 660/469, loss: 0.06755553185939789 2023-01-22 11:55:55.570103: step: 662/469, loss: 0.7772153615951538 2023-01-22 11:55:56.179959: step: 664/469, loss: 0.026373213157057762 2023-01-22 11:55:56.805103: step: 666/469, loss: 0.5589932203292847 2023-01-22 11:55:57.472264: step: 668/469, loss: 0.13581423461437225 2023-01-22 11:55:58.042458: step: 670/469, loss: 0.03913614898920059 2023-01-22 11:55:58.721992: step: 672/469, loss: 0.04955853521823883 2023-01-22 11:55:59.397308: step: 674/469, loss: 0.11273442953824997 2023-01-22 11:56:00.111110: step: 676/469, loss: 0.23610681295394897 2023-01-22 11:56:00.825467: step: 678/469, loss: 0.3757669925689697 2023-01-22 11:56:01.464482: step: 680/469, loss: 0.10171528160572052 2023-01-22 11:56:02.131866: step: 682/469, loss: 0.16474813222885132 2023-01-22 11:56:02.778017: step: 684/469, loss: 0.08703788369894028 2023-01-22 11:56:03.403097: step: 686/469, loss: 0.08651354163885117 2023-01-22 11:56:04.042959: step: 688/469, loss: 0.06881536543369293 2023-01-22 11:56:04.679221: step: 690/469, loss: 0.058468058705329895 2023-01-22 11:56:05.361794: step: 692/469, loss: 0.21539010107517242 2023-01-22 11:56:05.971596: step: 694/469, loss: 0.1317528784275055 2023-01-22 11:56:06.674530: step: 696/469, loss: 0.10898716002702713 2023-01-22 11:56:07.304026: step: 698/469, loss: 0.0771978348493576 2023-01-22 11:56:07.986501: step: 700/469, loss: 0.2742895483970642 2023-01-22 11:56:08.590437: step: 702/469, loss: 0.6433630585670471 2023-01-22 11:56:09.198616: step: 704/469, loss: 0.09457377344369888 2023-01-22 11:56:09.833996: step: 706/469, loss: 0.1278889775276184 2023-01-22 11:56:10.450555: step: 708/469, loss: 0.18659457564353943 2023-01-22 11:56:11.049633: step: 710/469, loss: 0.02622041292488575 2023-01-22 11:56:11.785096: step: 712/469, loss: 0.20351165533065796 2023-01-22 11:56:12.454923: step: 714/469, loss: 0.1452672928571701 2023-01-22 11:56:13.128158: step: 716/469, loss: 0.05882904306054115 2023-01-22 11:56:13.752615: step: 718/469, loss: 0.37130770087242126 2023-01-22 11:56:14.462949: step: 720/469, loss: 0.31439873576164246 2023-01-22 11:56:15.151842: step: 722/469, loss: 0.3346779942512512 2023-01-22 11:56:15.818156: step: 724/469, loss: 0.10287675261497498 2023-01-22 11:56:16.419665: step: 726/469, loss: 0.12727101147174835 2023-01-22 11:56:17.037667: step: 728/469, loss: 0.11457782983779907 2023-01-22 11:56:17.672473: step: 730/469, loss: 0.09384400397539139 2023-01-22 11:56:18.339425: step: 732/469, loss: 0.22653897106647491 2023-01-22 11:56:18.960190: step: 734/469, loss: 0.13181161880493164 2023-01-22 11:56:19.609225: step: 736/469, loss: 0.10666980594396591 2023-01-22 11:56:20.279249: step: 738/469, loss: 0.30227920413017273 2023-01-22 11:56:20.931690: step: 740/469, loss: 0.009498314931988716 2023-01-22 11:56:21.621488: step: 742/469, loss: 0.10703202337026596 2023-01-22 11:56:22.264635: step: 744/469, loss: 0.09349490702152252 2023-01-22 11:56:22.882278: step: 746/469, loss: 0.11710042506456375 2023-01-22 11:56:23.551472: step: 748/469, loss: 0.13578684628009796 2023-01-22 11:56:24.216666: step: 750/469, loss: 0.5420204401016235 2023-01-22 11:56:24.913903: step: 752/469, loss: 0.09380146116018295 2023-01-22 11:56:25.645378: step: 754/469, loss: 0.13721023499965668 2023-01-22 11:56:26.412863: step: 756/469, loss: 0.06682857125997543 2023-01-22 11:56:27.047149: step: 758/469, loss: 0.18444392085075378 2023-01-22 11:56:27.710175: step: 760/469, loss: 0.10151823610067368 2023-01-22 11:56:28.279837: step: 762/469, loss: 0.13188521564006805 2023-01-22 11:56:28.953866: step: 764/469, loss: 0.05160628259181976 2023-01-22 11:56:29.646456: step: 766/469, loss: 0.05794839188456535 2023-01-22 11:56:30.343718: step: 768/469, loss: 0.22263142466545105 2023-01-22 11:56:30.975146: step: 770/469, loss: 0.1093907505273819 2023-01-22 11:56:31.565554: step: 772/469, loss: 0.430452823638916 2023-01-22 11:56:32.190046: step: 774/469, loss: 0.26735109090805054 2023-01-22 11:56:32.916997: step: 776/469, loss: 0.07764904201030731 2023-01-22 11:56:33.634979: step: 778/469, loss: 0.1075054332613945 2023-01-22 11:56:34.318939: step: 780/469, loss: 0.08013945072889328 2023-01-22 11:56:34.930147: step: 782/469, loss: 0.11447945237159729 2023-01-22 11:56:35.547126: step: 784/469, loss: 0.1060430034995079 2023-01-22 11:56:36.247542: step: 786/469, loss: 0.05648250877857208 2023-01-22 11:56:37.083357: step: 788/469, loss: 0.18438391387462616 2023-01-22 11:56:37.721539: step: 790/469, loss: 0.7078243494033813 2023-01-22 11:56:38.343235: step: 792/469, loss: 0.151078462600708 2023-01-22 11:56:38.928358: step: 794/469, loss: 0.1268588751554489 2023-01-22 11:56:39.594244: step: 796/469, loss: 0.23044250905513763 2023-01-22 11:56:40.252216: step: 798/469, loss: 0.08206214010715485 2023-01-22 11:56:40.837972: step: 800/469, loss: 4.870852470397949 2023-01-22 11:56:41.459103: step: 802/469, loss: 0.12381969392299652 2023-01-22 11:56:42.045916: step: 804/469, loss: 0.7066172361373901 2023-01-22 11:56:42.728560: step: 806/469, loss: 0.10656490176916122 2023-01-22 11:56:43.397707: step: 808/469, loss: 0.2443525493144989 2023-01-22 11:56:44.041679: step: 810/469, loss: 0.15172456204891205 2023-01-22 11:56:44.712673: step: 812/469, loss: 0.6226106286048889 2023-01-22 11:56:45.387375: step: 814/469, loss: 0.07676031440496445 2023-01-22 11:56:46.096227: step: 816/469, loss: 0.24102750420570374 2023-01-22 11:56:46.781793: step: 818/469, loss: 0.33800965547561646 2023-01-22 11:56:47.422286: step: 820/469, loss: 0.11378297954797745 2023-01-22 11:56:48.157525: step: 822/469, loss: 0.16470292210578918 2023-01-22 11:56:48.845847: step: 824/469, loss: 0.6618489027023315 2023-01-22 11:56:49.456362: step: 826/469, loss: 0.3794376254081726 2023-01-22 11:56:50.098151: step: 828/469, loss: 0.626031756401062 2023-01-22 11:56:50.751097: step: 830/469, loss: 0.07717182487249374 2023-01-22 11:56:51.425568: step: 832/469, loss: 0.44119834899902344 2023-01-22 11:56:52.056088: step: 834/469, loss: 0.10924064368009567 2023-01-22 11:56:52.694862: step: 836/469, loss: 0.7192559242248535 2023-01-22 11:56:53.455789: step: 838/469, loss: 0.4965914785861969 2023-01-22 11:56:54.068182: step: 840/469, loss: 0.16419176757335663 2023-01-22 11:56:54.723712: step: 842/469, loss: 0.0885121151804924 2023-01-22 11:56:55.416924: step: 844/469, loss: 0.1369175910949707 2023-01-22 11:56:56.070824: step: 846/469, loss: 0.099001444876194 2023-01-22 11:56:56.722982: step: 848/469, loss: 0.10022465139627457 2023-01-22 11:56:57.368928: step: 850/469, loss: 0.08107144385576248 2023-01-22 11:56:58.017086: step: 852/469, loss: 0.17233121395111084 2023-01-22 11:56:58.640328: step: 854/469, loss: 0.2208385467529297 2023-01-22 11:56:59.252656: step: 856/469, loss: 0.06338278204202652 2023-01-22 11:56:59.949855: step: 858/469, loss: 0.16312026977539062 2023-01-22 11:57:00.614084: step: 860/469, loss: 0.060767509043216705 2023-01-22 11:57:01.273909: step: 862/469, loss: 0.1104615107178688 2023-01-22 11:57:01.847544: step: 864/469, loss: 0.12299557030200958 2023-01-22 11:57:02.496375: step: 866/469, loss: 0.4790195822715759 2023-01-22 11:57:03.198759: step: 868/469, loss: 0.186064213514328 2023-01-22 11:57:03.906626: step: 870/469, loss: 0.11863426119089127 2023-01-22 11:57:04.553617: step: 872/469, loss: 0.17843379080295563 2023-01-22 11:57:05.195653: step: 874/469, loss: 0.09139204770326614 2023-01-22 11:57:05.788574: step: 876/469, loss: 0.08099238574504852 2023-01-22 11:57:06.475488: step: 878/469, loss: 0.6278374195098877 2023-01-22 11:57:07.117851: step: 880/469, loss: 0.1151077151298523 2023-01-22 11:57:07.739079: step: 882/469, loss: 0.193647563457489 2023-01-22 11:57:08.386571: step: 884/469, loss: 0.11502914875745773 2023-01-22 11:57:09.058025: step: 886/469, loss: 0.11790961772203445 2023-01-22 11:57:09.767987: step: 888/469, loss: 0.06212463602423668 2023-01-22 11:57:10.498985: step: 890/469, loss: 0.13378936052322388 2023-01-22 11:57:11.151114: step: 892/469, loss: 0.10703791677951813 2023-01-22 11:57:11.793194: step: 894/469, loss: 0.4593944847583771 2023-01-22 11:57:12.481594: step: 896/469, loss: 0.10451531410217285 2023-01-22 11:57:13.160549: step: 898/469, loss: 1.5565106868743896 2023-01-22 11:57:13.783389: step: 900/469, loss: 0.6149523854255676 2023-01-22 11:57:14.445712: step: 902/469, loss: 0.19858333468437195 2023-01-22 11:57:15.152018: step: 904/469, loss: 0.13087432086467743 2023-01-22 11:57:15.785076: step: 906/469, loss: 0.129456028342247 2023-01-22 11:57:16.427866: step: 908/469, loss: 0.08298467099666595 2023-01-22 11:57:17.058500: step: 910/469, loss: 0.061458345502614975 2023-01-22 11:57:17.720629: step: 912/469, loss: 0.10581742227077484 2023-01-22 11:57:18.446753: step: 914/469, loss: 0.11968250572681427 2023-01-22 11:57:19.083290: step: 916/469, loss: 0.061378706246614456 2023-01-22 11:57:19.812004: step: 918/469, loss: 0.10897963494062424 2023-01-22 11:57:20.492713: step: 920/469, loss: 0.053679272532463074 2023-01-22 11:57:21.184245: step: 922/469, loss: 0.12484706193208694 2023-01-22 11:57:21.820455: step: 924/469, loss: 0.052165526896715164 2023-01-22 11:57:22.561070: step: 926/469, loss: 0.09519276767969131 2023-01-22 11:57:23.133161: step: 928/469, loss: 0.11334328353404999 2023-01-22 11:57:23.816797: step: 930/469, loss: 0.14731420576572418 2023-01-22 11:57:24.441156: step: 932/469, loss: 0.27927833795547485 2023-01-22 11:57:25.126688: step: 934/469, loss: 0.12212363630533218 2023-01-22 11:57:25.756292: step: 936/469, loss: 0.07453291863203049 2023-01-22 11:57:26.466396: step: 938/469, loss: 0.12962879240512848 ================================================== Loss: 0.253 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2913810721509939, 'r': 0.3350605877106306, 'f1': 0.3116980224598452}, 'combined': 0.2296722270756754, 'epoch': 13} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2951779027802229, 'r': 0.2589895780111928, 'f1': 0.2759021527936002}, 'combined': 0.15049208334196373, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28131652081362346, 'r': 0.3224196936839252, 'f1': 0.30046892762410005}, 'combined': 0.22139815719670528, 'epoch': 13} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.29889834681837096, 'r': 0.25951924165657275, 'f1': 0.2778203047312772}, 'combined': 0.1515383480352421, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2785369008714597, 'r': 0.32346220746363064, 'f1': 0.29932323675738953}, 'combined': 0.22055396392649754, 'epoch': 13} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.288375698247605, 'r': 0.25777040913806964, 'f1': 0.27221551419121753}, 'combined': 0.14848118955884593, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.19236583522297804, 'r': 0.3462585034013605, 'f1': 0.24732750242954318}, 'combined': 0.16488500161969544, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.3695652173913043, 'f1': 0.2982456140350877}, 'combined': 0.14912280701754385, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4861111111111111, 'r': 0.3017241379310345, 'f1': 0.3723404255319149}, 'combined': 0.2482269503546099, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28932143369249047, 'r': 0.34806411567559575, 'f1': 0.31598585523004125}, 'combined': 0.23283168280108302, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2915539886359941, 'r': 0.2547429635383114, 'f1': 0.27190826088610776}, 'combined': 0.14831359684696785, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3055555555555556, 'r': 0.358695652173913, 'f1': 0.32999999999999996}, 'combined': 0.16499999999999998, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 14 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:00:26.301968: step: 2/469, loss: 0.10332942008972168 2023-01-22 12:00:26.930055: step: 4/469, loss: 0.15318186581134796 2023-01-22 12:00:27.518021: step: 6/469, loss: 0.02971917763352394 2023-01-22 12:00:28.151283: step: 8/469, loss: 0.23199088871479034 2023-01-22 12:00:28.827287: step: 10/469, loss: 0.4790060818195343 2023-01-22 12:00:29.537267: step: 12/469, loss: 0.10452570021152496 2023-01-22 12:00:30.131773: step: 14/469, loss: 0.29724445939064026 2023-01-22 12:00:30.759208: step: 16/469, loss: 0.08338034898042679 2023-01-22 12:00:31.426913: step: 18/469, loss: 0.17674116790294647 2023-01-22 12:00:32.110305: step: 20/469, loss: 0.2040647268295288 2023-01-22 12:00:32.814119: step: 22/469, loss: 0.06422922760248184 2023-01-22 12:00:33.463130: step: 24/469, loss: 0.04940135404467583 2023-01-22 12:00:34.198492: step: 26/469, loss: 0.8426660299301147 2023-01-22 12:00:34.846006: step: 28/469, loss: 0.1423974633216858 2023-01-22 12:00:35.514786: step: 30/469, loss: 0.11744549870491028 2023-01-22 12:00:36.209017: step: 32/469, loss: 0.11263231188058853 2023-01-22 12:00:36.889156: step: 34/469, loss: 0.1099047139286995 2023-01-22 12:00:37.536214: step: 36/469, loss: 0.041722141206264496 2023-01-22 12:00:38.200766: step: 38/469, loss: 0.14946915209293365 2023-01-22 12:00:38.833842: step: 40/469, loss: 0.020181039348244667 2023-01-22 12:00:39.501060: step: 42/469, loss: 0.0604877732694149 2023-01-22 12:00:40.173600: step: 44/469, loss: 0.24753527343273163 2023-01-22 12:00:40.842658: step: 46/469, loss: 0.07693887501955032 2023-01-22 12:00:41.476457: step: 48/469, loss: 0.12376142293214798 2023-01-22 12:00:42.063174: step: 50/469, loss: 0.17435595393180847 2023-01-22 12:00:42.747458: step: 52/469, loss: 0.08856833726167679 2023-01-22 12:00:43.424837: step: 54/469, loss: 0.09702426940202713 2023-01-22 12:00:44.076623: step: 56/469, loss: 0.09591094404459 2023-01-22 12:00:44.743310: step: 58/469, loss: 0.23975864052772522 2023-01-22 12:00:45.484072: step: 60/469, loss: 0.12130329757928848 2023-01-22 12:00:46.130952: step: 62/469, loss: 0.5318564176559448 2023-01-22 12:00:46.890249: step: 64/469, loss: 0.03839130699634552 2023-01-22 12:00:47.516976: step: 66/469, loss: 0.1523742824792862 2023-01-22 12:00:48.216128: step: 68/469, loss: 0.07893607020378113 2023-01-22 12:00:48.869178: step: 70/469, loss: 0.03052467294037342 2023-01-22 12:00:49.476606: step: 72/469, loss: 0.07498115301132202 2023-01-22 12:00:50.081999: step: 74/469, loss: 0.17781122028827667 2023-01-22 12:00:50.726713: step: 76/469, loss: 0.14667977392673492 2023-01-22 12:00:51.462108: step: 78/469, loss: 0.13367553055286407 2023-01-22 12:00:52.121417: step: 80/469, loss: 0.06649041175842285 2023-01-22 12:00:52.712933: step: 82/469, loss: 0.05659692734479904 2023-01-22 12:00:53.366944: step: 84/469, loss: 0.13454948365688324 2023-01-22 12:00:54.015426: step: 86/469, loss: 0.0643838495016098 2023-01-22 12:00:54.651633: step: 88/469, loss: 0.026117002591490746 2023-01-22 12:00:55.302993: step: 90/469, loss: 0.13594715297222137 2023-01-22 12:00:55.949950: step: 92/469, loss: 0.07614948600530624 2023-01-22 12:00:56.547568: step: 94/469, loss: 0.047005388885736465 2023-01-22 12:00:57.164894: step: 96/469, loss: 0.0802338495850563 2023-01-22 12:00:57.843077: step: 98/469, loss: 0.3998774588108063 2023-01-22 12:00:58.494564: step: 100/469, loss: 0.31478893756866455 2023-01-22 12:00:59.221822: step: 102/469, loss: 0.1145135685801506 2023-01-22 12:00:59.869317: step: 104/469, loss: 0.2985832691192627 2023-01-22 12:01:00.518863: step: 106/469, loss: 0.20097951591014862 2023-01-22 12:01:01.095323: step: 108/469, loss: 0.16306962072849274 2023-01-22 12:01:01.759558: step: 110/469, loss: 0.082069993019104 2023-01-22 12:01:02.446477: step: 112/469, loss: 0.06915789842605591 2023-01-22 12:01:03.124456: step: 114/469, loss: 0.15192940831184387 2023-01-22 12:01:03.733493: step: 116/469, loss: 0.0210536178201437 2023-01-22 12:01:04.488808: step: 118/469, loss: 0.088052898645401 2023-01-22 12:01:05.188099: step: 120/469, loss: 0.3189176619052887 2023-01-22 12:01:05.849184: step: 122/469, loss: 0.15892644226551056 2023-01-22 12:01:06.489320: step: 124/469, loss: 0.09577824920415878 2023-01-22 12:01:07.122632: step: 126/469, loss: 0.45442742109298706 2023-01-22 12:01:07.832056: step: 128/469, loss: 0.11800581961870193 2023-01-22 12:01:08.514176: step: 130/469, loss: 0.9997729063034058 2023-01-22 12:01:09.155877: step: 132/469, loss: 0.07901173084974289 2023-01-22 12:01:09.830559: step: 134/469, loss: 0.06526058912277222 2023-01-22 12:01:10.440966: step: 136/469, loss: 0.04950903728604317 2023-01-22 12:01:11.103748: step: 138/469, loss: 0.08496760576963425 2023-01-22 12:01:11.729572: step: 140/469, loss: 0.1285533308982849 2023-01-22 12:01:12.451128: step: 142/469, loss: 0.5144191980361938 2023-01-22 12:01:13.120773: step: 144/469, loss: 0.060713667422533035 2023-01-22 12:01:13.837647: step: 146/469, loss: 0.05216887965798378 2023-01-22 12:01:14.515116: step: 148/469, loss: 0.13779819011688232 2023-01-22 12:01:15.140484: step: 150/469, loss: 0.004605674184858799 2023-01-22 12:01:15.787902: step: 152/469, loss: 0.08874347805976868 2023-01-22 12:01:16.575826: step: 154/469, loss: 0.14072492718696594 2023-01-22 12:01:17.317599: step: 156/469, loss: 0.29532474279403687 2023-01-22 12:01:17.945316: step: 158/469, loss: 0.13115055859088898 2023-01-22 12:01:18.646360: step: 160/469, loss: 0.24021399021148682 2023-01-22 12:01:19.275827: step: 162/469, loss: 0.1281098872423172 2023-01-22 12:01:19.958884: step: 164/469, loss: 0.32909175753593445 2023-01-22 12:01:20.648384: step: 166/469, loss: 0.07739514857530594 2023-01-22 12:01:21.241316: step: 168/469, loss: 0.1669360101222992 2023-01-22 12:01:21.878210: step: 170/469, loss: 0.08854109048843384 2023-01-22 12:01:22.523175: step: 172/469, loss: 0.17195796966552734 2023-01-22 12:01:23.191721: step: 174/469, loss: 0.05586585775017738 2023-01-22 12:01:23.823674: step: 176/469, loss: 0.05410011112689972 2023-01-22 12:01:24.564208: step: 178/469, loss: 0.09139525145292282 2023-01-22 12:01:25.193299: step: 180/469, loss: 0.19453096389770508 2023-01-22 12:01:25.795522: step: 182/469, loss: 0.07657577097415924 2023-01-22 12:01:26.413289: step: 184/469, loss: 0.059658583253622055 2023-01-22 12:01:27.091336: step: 186/469, loss: 0.1249004378914833 2023-01-22 12:01:27.755488: step: 188/469, loss: 0.05884033069014549 2023-01-22 12:01:28.381413: step: 190/469, loss: 0.05664648860692978 2023-01-22 12:01:29.029470: step: 192/469, loss: 0.08319837599992752 2023-01-22 12:01:29.667136: step: 194/469, loss: 0.06288447231054306 2023-01-22 12:01:30.365003: step: 196/469, loss: 0.04063792526721954 2023-01-22 12:01:31.055963: step: 198/469, loss: 0.11769529432058334 2023-01-22 12:01:31.679565: step: 200/469, loss: 0.21704784035682678 2023-01-22 12:01:32.353589: step: 202/469, loss: 0.09529057890176773 2023-01-22 12:01:33.043413: step: 204/469, loss: 0.1181119978427887 2023-01-22 12:01:33.700181: step: 206/469, loss: 0.3056814968585968 2023-01-22 12:01:34.319609: step: 208/469, loss: 0.14676477015018463 2023-01-22 12:01:35.002859: step: 210/469, loss: 0.13852152228355408 2023-01-22 12:01:35.603580: step: 212/469, loss: 0.05900540575385094 2023-01-22 12:01:36.330891: step: 214/469, loss: 0.508470892906189 2023-01-22 12:01:36.973039: step: 216/469, loss: 0.08568944782018661 2023-01-22 12:01:37.667272: step: 218/469, loss: 1.2057976722717285 2023-01-22 12:01:38.328175: step: 220/469, loss: 0.13676388561725616 2023-01-22 12:01:38.999118: step: 222/469, loss: 1.3510915040969849 2023-01-22 12:01:39.670333: step: 224/469, loss: 0.061254069209098816 2023-01-22 12:01:40.272724: step: 226/469, loss: 0.06155312433838844 2023-01-22 12:01:40.961924: step: 228/469, loss: 0.12289540469646454 2023-01-22 12:01:41.626147: step: 230/469, loss: 0.10985579341650009 2023-01-22 12:01:42.206959: step: 232/469, loss: 0.06253089755773544 2023-01-22 12:01:42.879912: step: 234/469, loss: 0.0936756283044815 2023-01-22 12:01:43.506025: step: 236/469, loss: 0.22473719716072083 2023-01-22 12:01:44.133039: step: 238/469, loss: 0.17043496668338776 2023-01-22 12:01:44.782720: step: 240/469, loss: 0.17724712193012238 2023-01-22 12:01:45.412926: step: 242/469, loss: 0.19856032729148865 2023-01-22 12:01:46.075603: step: 244/469, loss: 0.14592309296131134 2023-01-22 12:01:46.694418: step: 246/469, loss: 0.2929247319698334 2023-01-22 12:01:47.343871: step: 248/469, loss: 0.03559292107820511 2023-01-22 12:01:48.076575: step: 250/469, loss: 0.18467766046524048 2023-01-22 12:01:48.744346: step: 252/469, loss: 0.22038336098194122 2023-01-22 12:01:49.476760: step: 254/469, loss: 0.2405225783586502 2023-01-22 12:01:50.108838: step: 256/469, loss: 0.06776655465364456 2023-01-22 12:01:50.823987: step: 258/469, loss: 0.39306890964508057 2023-01-22 12:01:51.496599: step: 260/469, loss: 0.197803795337677 2023-01-22 12:01:52.103097: step: 262/469, loss: 0.32039666175842285 2023-01-22 12:01:52.855381: step: 264/469, loss: 0.4617033302783966 2023-01-22 12:01:53.483748: step: 266/469, loss: 0.07465508580207825 2023-01-22 12:01:54.121760: step: 268/469, loss: 0.040636174380779266 2023-01-22 12:01:54.743840: step: 270/469, loss: 0.15625329315662384 2023-01-22 12:01:55.426204: step: 272/469, loss: 0.11528942734003067 2023-01-22 12:01:56.066528: step: 274/469, loss: 0.645728349685669 2023-01-22 12:01:56.739277: step: 276/469, loss: 0.10598640888929367 2023-01-22 12:01:57.385798: step: 278/469, loss: 0.043184954673051834 2023-01-22 12:01:58.036774: step: 280/469, loss: 0.07680285722017288 2023-01-22 12:01:58.706601: step: 282/469, loss: 0.11312734335660934 2023-01-22 12:01:59.342544: step: 284/469, loss: 0.15231028199195862 2023-01-22 12:02:00.026471: step: 286/469, loss: 0.16574519872665405 2023-01-22 12:02:00.675528: step: 288/469, loss: 0.06007889658212662 2023-01-22 12:02:01.347489: step: 290/469, loss: 0.1410341113805771 2023-01-22 12:02:01.987311: step: 292/469, loss: 0.05999002233147621 2023-01-22 12:02:02.583288: step: 294/469, loss: 0.3854758143424988 2023-01-22 12:02:03.302935: step: 296/469, loss: 0.11680712550878525 2023-01-22 12:02:03.999917: step: 298/469, loss: 0.02858937531709671 2023-01-22 12:02:04.625738: step: 300/469, loss: 0.2283831387758255 2023-01-22 12:02:05.320158: step: 302/469, loss: 0.09304335713386536 2023-01-22 12:02:06.012022: step: 304/469, loss: 0.21670663356781006 2023-01-22 12:02:06.673264: step: 306/469, loss: 0.41721203923225403 2023-01-22 12:02:07.305125: step: 308/469, loss: 0.04842031002044678 2023-01-22 12:02:07.999394: step: 310/469, loss: 0.02417251467704773 2023-01-22 12:02:08.655363: step: 312/469, loss: 0.01337968185544014 2023-01-22 12:02:09.291338: step: 314/469, loss: 1.1121264696121216 2023-01-22 12:02:09.987251: step: 316/469, loss: 0.08640328049659729 2023-01-22 12:02:10.643927: step: 318/469, loss: 0.19282464683055878 2023-01-22 12:02:11.330129: step: 320/469, loss: 0.028221305459737778 2023-01-22 12:02:12.046250: step: 322/469, loss: 0.14507897198200226 2023-01-22 12:02:12.667550: step: 324/469, loss: 1.0240217447280884 2023-01-22 12:02:13.362821: step: 326/469, loss: 0.09105583280324936 2023-01-22 12:02:14.010338: step: 328/469, loss: 0.13023589551448822 2023-01-22 12:02:14.678436: step: 330/469, loss: 0.07880572974681854 2023-01-22 12:02:15.279289: step: 332/469, loss: 0.16874323785305023 2023-01-22 12:02:16.019717: step: 334/469, loss: 0.05689356103539467 2023-01-22 12:02:16.699025: step: 336/469, loss: 0.07699718326330185 2023-01-22 12:02:17.310913: step: 338/469, loss: 0.07749658077955246 2023-01-22 12:02:17.917520: step: 340/469, loss: 0.12395375221967697 2023-01-22 12:02:18.600690: step: 342/469, loss: 0.08001910150051117 2023-01-22 12:02:19.242368: step: 344/469, loss: 0.20075903832912445 2023-01-22 12:02:19.871363: step: 346/469, loss: 0.10846106708049774 2023-01-22 12:02:20.543842: step: 348/469, loss: 0.04375477507710457 2023-01-22 12:02:21.223705: step: 350/469, loss: 0.09713562577962875 2023-01-22 12:02:21.802867: step: 352/469, loss: 0.030647573992609978 2023-01-22 12:02:22.466760: step: 354/469, loss: 0.07991889864206314 2023-01-22 12:02:23.059799: step: 356/469, loss: 0.11926233768463135 2023-01-22 12:02:23.696089: step: 358/469, loss: 0.15816357731819153 2023-01-22 12:02:24.430671: step: 360/469, loss: 0.0826299861073494 2023-01-22 12:02:25.034257: step: 362/469, loss: 0.18181084096431732 2023-01-22 12:02:25.634358: step: 364/469, loss: 0.06405311077833176 2023-01-22 12:02:26.247492: step: 366/469, loss: 0.939147412776947 2023-01-22 12:02:26.869710: step: 368/469, loss: 0.09762299060821533 2023-01-22 12:02:27.463741: step: 370/469, loss: 0.04762081056833267 2023-01-22 12:02:28.111114: step: 372/469, loss: 0.06453923135995865 2023-01-22 12:02:28.746462: step: 374/469, loss: 0.03962532430887222 2023-01-22 12:02:29.411420: step: 376/469, loss: 0.14591509103775024 2023-01-22 12:02:30.103877: step: 378/469, loss: 0.2262595146894455 2023-01-22 12:02:30.752458: step: 380/469, loss: 0.3208789825439453 2023-01-22 12:02:31.495447: step: 382/469, loss: 0.059827160090208054 2023-01-22 12:02:32.105345: step: 384/469, loss: 0.031631890684366226 2023-01-22 12:02:32.730174: step: 386/469, loss: 0.46747565269470215 2023-01-22 12:02:33.444950: step: 388/469, loss: 0.08008897304534912 2023-01-22 12:02:34.131956: step: 390/469, loss: 0.2304515391588211 2023-01-22 12:02:34.797395: step: 392/469, loss: 0.08552771061658859 2023-01-22 12:02:35.492434: step: 394/469, loss: 0.03198418766260147 2023-01-22 12:02:36.147018: step: 396/469, loss: 0.08952928334474564 2023-01-22 12:02:36.828782: step: 398/469, loss: 0.04365219548344612 2023-01-22 12:02:37.526283: step: 400/469, loss: 2.0542235374450684 2023-01-22 12:02:38.212167: step: 402/469, loss: 0.2224007099866867 2023-01-22 12:02:38.851978: step: 404/469, loss: 0.04883831366896629 2023-01-22 12:02:39.599976: step: 406/469, loss: 0.23871919512748718 2023-01-22 12:02:40.289199: step: 408/469, loss: 0.09538616240024567 2023-01-22 12:02:40.893331: step: 410/469, loss: 0.13070595264434814 2023-01-22 12:02:41.545835: step: 412/469, loss: 0.06503377854824066 2023-01-22 12:02:42.251007: step: 414/469, loss: 0.2566741704940796 2023-01-22 12:02:42.883077: step: 416/469, loss: 0.46768027544021606 2023-01-22 12:02:43.508128: step: 418/469, loss: 0.9209142923355103 2023-01-22 12:02:44.111496: step: 420/469, loss: 0.09707845747470856 2023-01-22 12:02:44.789153: step: 422/469, loss: 0.3248916268348694 2023-01-22 12:02:45.455471: step: 424/469, loss: 0.04956451803445816 2023-01-22 12:02:46.128232: step: 426/469, loss: 0.03982264921069145 2023-01-22 12:02:46.856659: step: 428/469, loss: 0.27753764390945435 2023-01-22 12:02:47.529505: step: 430/469, loss: 0.012592037208378315 2023-01-22 12:02:48.169234: step: 432/469, loss: 1.000629186630249 2023-01-22 12:02:48.892190: step: 434/469, loss: 0.07920895516872406 2023-01-22 12:02:49.556890: step: 436/469, loss: 0.21137556433677673 2023-01-22 12:02:50.176626: step: 438/469, loss: 0.03702995553612709 2023-01-22 12:02:50.834208: step: 440/469, loss: 0.06917298585176468 2023-01-22 12:02:51.470552: step: 442/469, loss: 0.03820595145225525 2023-01-22 12:02:52.190959: step: 444/469, loss: 0.04671890661120415 2023-01-22 12:02:52.855676: step: 446/469, loss: 0.37693044543266296 2023-01-22 12:02:53.434381: step: 448/469, loss: 0.029523273929953575 2023-01-22 12:02:54.038458: step: 450/469, loss: 0.1540946364402771 2023-01-22 12:02:54.674321: step: 452/469, loss: 0.10862761735916138 2023-01-22 12:02:55.320820: step: 454/469, loss: 0.3979809880256653 2023-01-22 12:02:55.926298: step: 456/469, loss: 0.5188076496124268 2023-01-22 12:02:56.560997: step: 458/469, loss: 0.25342944264411926 2023-01-22 12:02:57.218325: step: 460/469, loss: 0.18251484632492065 2023-01-22 12:02:57.925917: step: 462/469, loss: 0.04423234239220619 2023-01-22 12:02:58.521586: step: 464/469, loss: 0.12601114809513092 2023-01-22 12:02:59.165060: step: 466/469, loss: 0.0514974519610405 2023-01-22 12:02:59.872066: step: 468/469, loss: 0.24102641642093658 2023-01-22 12:03:00.520981: step: 470/469, loss: 0.35460788011550903 2023-01-22 12:03:01.225991: step: 472/469, loss: 0.7224856019020081 2023-01-22 12:03:01.813636: step: 474/469, loss: 0.18925578892230988 2023-01-22 12:03:02.513395: step: 476/469, loss: 0.03882612660527229 2023-01-22 12:03:03.141685: step: 478/469, loss: 0.05779165029525757 2023-01-22 12:03:03.813520: step: 480/469, loss: 0.169854998588562 2023-01-22 12:03:04.434145: step: 482/469, loss: 0.45715466141700745 2023-01-22 12:03:05.217080: step: 484/469, loss: 0.44506722688674927 2023-01-22 12:03:05.990887: step: 486/469, loss: 0.12539318203926086 2023-01-22 12:03:06.652077: step: 488/469, loss: 0.32278648018836975 2023-01-22 12:03:07.291591: step: 490/469, loss: 0.12357121706008911 2023-01-22 12:03:07.892831: step: 492/469, loss: 0.061674635857343674 2023-01-22 12:03:08.518120: step: 494/469, loss: 0.10104820877313614 2023-01-22 12:03:09.210305: step: 496/469, loss: 0.15971069037914276 2023-01-22 12:03:09.784701: step: 498/469, loss: 0.18366794288158417 2023-01-22 12:03:10.412189: step: 500/469, loss: 0.09132558852434158 2023-01-22 12:03:11.078618: step: 502/469, loss: 0.08771282434463501 2023-01-22 12:03:11.758789: step: 504/469, loss: 0.06579511612653732 2023-01-22 12:03:12.437199: step: 506/469, loss: 0.13895754516124725 2023-01-22 12:03:13.085641: step: 508/469, loss: 0.09910660237073898 2023-01-22 12:03:13.801871: step: 510/469, loss: 0.1334264576435089 2023-01-22 12:03:14.427518: step: 512/469, loss: 0.1699783354997635 2023-01-22 12:03:15.124100: step: 514/469, loss: 0.1673431396484375 2023-01-22 12:03:15.836069: step: 516/469, loss: 0.12258341163396835 2023-01-22 12:03:16.545719: step: 518/469, loss: 0.09879224002361298 2023-01-22 12:03:17.219092: step: 520/469, loss: 0.061435267329216 2023-01-22 12:03:17.807140: step: 522/469, loss: 0.19045239686965942 2023-01-22 12:03:18.494111: step: 524/469, loss: 0.0746120736002922 2023-01-22 12:03:19.198820: step: 526/469, loss: 0.030568497255444527 2023-01-22 12:03:19.829818: step: 528/469, loss: 0.08450386673212051 2023-01-22 12:03:20.596702: step: 530/469, loss: 0.13238397240638733 2023-01-22 12:03:21.377611: step: 532/469, loss: 0.21052587032318115 2023-01-22 12:03:22.012176: step: 534/469, loss: 0.11684330552816391 2023-01-22 12:03:22.693436: step: 536/469, loss: 0.17130185663700104 2023-01-22 12:03:23.360210: step: 538/469, loss: 0.3248347043991089 2023-01-22 12:03:23.994811: step: 540/469, loss: 0.09218096733093262 2023-01-22 12:03:24.684826: step: 542/469, loss: 0.07498276978731155 2023-01-22 12:03:25.246304: step: 544/469, loss: 0.05229423567652702 2023-01-22 12:03:25.893151: step: 546/469, loss: 0.09544291347265244 2023-01-22 12:03:26.476115: step: 548/469, loss: 0.09195572882890701 2023-01-22 12:03:27.152274: step: 550/469, loss: 0.16590982675552368 2023-01-22 12:03:27.842486: step: 552/469, loss: 0.06954732537269592 2023-01-22 12:03:28.464859: step: 554/469, loss: 0.11550594866275787 2023-01-22 12:03:29.108273: step: 556/469, loss: 0.06166649982333183 2023-01-22 12:03:29.757568: step: 558/469, loss: 0.11040358245372772 2023-01-22 12:03:30.437672: step: 560/469, loss: 0.18325157463550568 2023-01-22 12:03:31.113408: step: 562/469, loss: 0.07799236476421356 2023-01-22 12:03:31.758322: step: 564/469, loss: 0.17693808674812317 2023-01-22 12:03:32.348724: step: 566/469, loss: 0.08517203480005264 2023-01-22 12:03:33.002383: step: 568/469, loss: 0.37244269251823425 2023-01-22 12:03:33.635991: step: 570/469, loss: 0.16834932565689087 2023-01-22 12:03:34.306859: step: 572/469, loss: 0.14932765066623688 2023-01-22 12:03:34.937896: step: 574/469, loss: 0.015989581122994423 2023-01-22 12:03:35.621701: step: 576/469, loss: 0.12025109678506851 2023-01-22 12:03:36.305712: step: 578/469, loss: 0.07602535933256149 2023-01-22 12:03:37.017454: step: 580/469, loss: 0.2487332969903946 2023-01-22 12:03:37.683231: step: 582/469, loss: 0.2300061285495758 2023-01-22 12:03:38.334039: step: 584/469, loss: 0.05116070806980133 2023-01-22 12:03:39.020954: step: 586/469, loss: 1.4883637428283691 2023-01-22 12:03:39.739491: step: 588/469, loss: 0.030584409832954407 2023-01-22 12:03:40.387114: step: 590/469, loss: 0.07932163029909134 2023-01-22 12:03:41.028268: step: 592/469, loss: 0.08053095638751984 2023-01-22 12:03:41.609015: step: 594/469, loss: 0.0786694809794426 2023-01-22 12:03:42.291118: step: 596/469, loss: 0.21005229651927948 2023-01-22 12:03:42.962814: step: 598/469, loss: 0.12650232017040253 2023-01-22 12:03:43.643589: step: 600/469, loss: 0.07539433240890503 2023-01-22 12:03:44.339433: step: 602/469, loss: 0.3110903799533844 2023-01-22 12:03:44.969657: step: 604/469, loss: 0.06634613126516342 2023-01-22 12:03:45.613952: step: 606/469, loss: 0.09259108453989029 2023-01-22 12:03:46.260946: step: 608/469, loss: 0.1963626742362976 2023-01-22 12:03:46.885638: step: 610/469, loss: 0.07385430485010147 2023-01-22 12:03:47.534432: step: 612/469, loss: 0.41075727343559265 2023-01-22 12:03:48.214899: step: 614/469, loss: 0.07688222825527191 2023-01-22 12:03:48.819582: step: 616/469, loss: 0.06205300614237785 2023-01-22 12:03:49.474824: step: 618/469, loss: 0.3063707649707794 2023-01-22 12:03:50.101799: step: 620/469, loss: 0.1841689795255661 2023-01-22 12:03:50.789668: step: 622/469, loss: 1.570283055305481 2023-01-22 12:03:51.381649: step: 624/469, loss: 0.05395738407969475 2023-01-22 12:03:52.006179: step: 626/469, loss: 0.11619783937931061 2023-01-22 12:03:52.641735: step: 628/469, loss: 0.04714583978056908 2023-01-22 12:03:53.269925: step: 630/469, loss: 0.04560321196913719 2023-01-22 12:03:53.946388: step: 632/469, loss: 1.0108449459075928 2023-01-22 12:03:54.520096: step: 634/469, loss: 3.4579665660858154 2023-01-22 12:03:55.158314: step: 636/469, loss: 0.09656395763158798 2023-01-22 12:03:55.805106: step: 638/469, loss: 0.11232117563486099 2023-01-22 12:03:56.458585: step: 640/469, loss: 0.4612281322479248 2023-01-22 12:03:57.094788: step: 642/469, loss: 0.09604386240243912 2023-01-22 12:03:57.773688: step: 644/469, loss: 0.1310945302248001 2023-01-22 12:03:58.405829: step: 646/469, loss: 0.057665999978780746 2023-01-22 12:03:59.067362: step: 648/469, loss: 0.02677675150334835 2023-01-22 12:03:59.822055: step: 650/469, loss: 0.116474449634552 2023-01-22 12:04:00.415534: step: 652/469, loss: 0.15412276983261108 2023-01-22 12:04:01.086414: step: 654/469, loss: 0.10318965464830399 2023-01-22 12:04:01.760637: step: 656/469, loss: 0.7221606969833374 2023-01-22 12:04:02.383326: step: 658/469, loss: 0.10014528036117554 2023-01-22 12:04:03.058517: step: 660/469, loss: 0.23871949315071106 2023-01-22 12:04:03.689779: step: 662/469, loss: 0.06811057776212692 2023-01-22 12:04:04.335615: step: 664/469, loss: 0.12492357939481735 2023-01-22 12:04:04.974656: step: 666/469, loss: 0.060884740203619 2023-01-22 12:04:05.633919: step: 668/469, loss: 0.07236185669898987 2023-01-22 12:04:06.400835: step: 670/469, loss: 0.16880032420158386 2023-01-22 12:04:07.014321: step: 672/469, loss: 0.09256527572870255 2023-01-22 12:04:07.699110: step: 674/469, loss: 0.10509645938873291 2023-01-22 12:04:08.365305: step: 676/469, loss: 0.12360022962093353 2023-01-22 12:04:09.030545: step: 678/469, loss: 0.08761170506477356 2023-01-22 12:04:09.731492: step: 680/469, loss: 0.07590088993310928 2023-01-22 12:04:10.480830: step: 682/469, loss: 0.3243628740310669 2023-01-22 12:04:11.099536: step: 684/469, loss: 0.07249685376882553 2023-01-22 12:04:11.735642: step: 686/469, loss: 0.027272187173366547 2023-01-22 12:04:12.424821: step: 688/469, loss: 11.247618675231934 2023-01-22 12:04:13.144401: step: 690/469, loss: 0.5154850482940674 2023-01-22 12:04:13.841408: step: 692/469, loss: 0.06237754598259926 2023-01-22 12:04:14.470346: step: 694/469, loss: 0.07400119304656982 2023-01-22 12:04:15.081405: step: 696/469, loss: 0.13597814738750458 2023-01-22 12:04:15.703568: step: 698/469, loss: 0.08082101494073868 2023-01-22 12:04:16.309226: step: 700/469, loss: 0.5380794405937195 2023-01-22 12:04:16.937252: step: 702/469, loss: 0.04616934806108475 2023-01-22 12:04:17.754166: step: 704/469, loss: 0.03679390624165535 2023-01-22 12:04:18.395206: step: 706/469, loss: 8.062114715576172 2023-01-22 12:04:19.005663: step: 708/469, loss: 0.09626629203557968 2023-01-22 12:04:19.635168: step: 710/469, loss: 0.06008496880531311 2023-01-22 12:04:20.243436: step: 712/469, loss: 0.08474749326705933 2023-01-22 12:04:20.915626: step: 714/469, loss: 0.17426146566867828 2023-01-22 12:04:21.597754: step: 716/469, loss: 0.09438607841730118 2023-01-22 12:04:22.259474: step: 718/469, loss: 0.12769052386283875 2023-01-22 12:04:22.931333: step: 720/469, loss: 0.424898624420166 2023-01-22 12:04:23.594853: step: 722/469, loss: 0.08442848175764084 2023-01-22 12:04:24.220335: step: 724/469, loss: 0.24904683232307434 2023-01-22 12:04:24.842725: step: 726/469, loss: 0.19199064373970032 2023-01-22 12:04:25.593232: step: 728/469, loss: 0.18863005936145782 2023-01-22 12:04:26.256027: step: 730/469, loss: 0.056691914796829224 2023-01-22 12:04:26.880631: step: 732/469, loss: 0.06619631499052048 2023-01-22 12:04:27.535669: step: 734/469, loss: 0.12168781459331512 2023-01-22 12:04:28.138905: step: 736/469, loss: 0.1374291628599167 2023-01-22 12:04:28.871158: step: 738/469, loss: 0.31519442796707153 2023-01-22 12:04:29.508054: step: 740/469, loss: 0.053980860859155655 2023-01-22 12:04:30.157423: step: 742/469, loss: 0.144709050655365 2023-01-22 12:04:30.815040: step: 744/469, loss: 0.1715642511844635 2023-01-22 12:04:31.507796: step: 746/469, loss: 0.09068422764539719 2023-01-22 12:04:32.198141: step: 748/469, loss: 0.14486458897590637 2023-01-22 12:04:32.839108: step: 750/469, loss: 0.12170708924531937 2023-01-22 12:04:33.470091: step: 752/469, loss: 0.1538146436214447 2023-01-22 12:04:34.223831: step: 754/469, loss: 0.3749336898326874 2023-01-22 12:04:34.904566: step: 756/469, loss: 0.7911332249641418 2023-01-22 12:04:35.551331: step: 758/469, loss: 0.1352866291999817 2023-01-22 12:04:36.218916: step: 760/469, loss: 0.3471243977546692 2023-01-22 12:04:36.870172: step: 762/469, loss: 0.7950724363327026 2023-01-22 12:04:37.511810: step: 764/469, loss: 0.13323654234409332 2023-01-22 12:04:38.255622: step: 766/469, loss: 0.34459856152534485 2023-01-22 12:04:38.931442: step: 768/469, loss: 0.34449002146720886 2023-01-22 12:04:39.570635: step: 770/469, loss: 0.048628631979227066 2023-01-22 12:04:40.145086: step: 772/469, loss: 0.2818205654621124 2023-01-22 12:04:40.803842: step: 774/469, loss: 0.1073806881904602 2023-01-22 12:04:41.504351: step: 776/469, loss: 0.2336321771144867 2023-01-22 12:04:42.165078: step: 778/469, loss: 0.0834648609161377 2023-01-22 12:04:42.872055: step: 780/469, loss: 0.19987352192401886 2023-01-22 12:04:43.554038: step: 782/469, loss: 0.1378486305475235 2023-01-22 12:04:44.253481: step: 784/469, loss: 0.09632925689220428 2023-01-22 12:04:44.904872: step: 786/469, loss: 0.08961453288793564 2023-01-22 12:04:45.587672: step: 788/469, loss: 0.11353462934494019 2023-01-22 12:04:46.227065: step: 790/469, loss: 0.353186696767807 2023-01-22 12:04:46.867022: step: 792/469, loss: 0.35332679748535156 2023-01-22 12:04:47.565327: step: 794/469, loss: 0.4996044933795929 2023-01-22 12:04:48.189190: step: 796/469, loss: 0.11995209008455276 2023-01-22 12:04:48.809618: step: 798/469, loss: 0.11936438828706741 2023-01-22 12:04:49.435021: step: 800/469, loss: 0.1804143637418747 2023-01-22 12:04:50.085189: step: 802/469, loss: 0.10060111433267593 2023-01-22 12:04:50.676730: step: 804/469, loss: 0.18791353702545166 2023-01-22 12:04:51.328424: step: 806/469, loss: 0.04781004786491394 2023-01-22 12:04:51.886094: step: 808/469, loss: 0.1525021195411682 2023-01-22 12:04:52.597761: step: 810/469, loss: 0.044144872575998306 2023-01-22 12:04:53.224174: step: 812/469, loss: 0.08868099004030228 2023-01-22 12:04:53.839205: step: 814/469, loss: 0.08810263872146606 2023-01-22 12:04:54.533725: step: 816/469, loss: 0.15583884716033936 2023-01-22 12:04:55.253899: step: 818/469, loss: 0.22324155271053314 2023-01-22 12:04:55.887266: step: 820/469, loss: 0.5118790864944458 2023-01-22 12:04:56.537237: step: 822/469, loss: 0.13022835552692413 2023-01-22 12:04:57.251587: step: 824/469, loss: 0.5483819246292114 2023-01-22 12:04:57.900837: step: 826/469, loss: 0.12466764450073242 2023-01-22 12:04:58.524630: step: 828/469, loss: 0.25863000750541687 2023-01-22 12:04:59.176955: step: 830/469, loss: 0.21080994606018066 2023-01-22 12:04:59.969865: step: 832/469, loss: 0.2235356718301773 2023-01-22 12:05:00.672754: step: 834/469, loss: 0.11213290691375732 2023-01-22 12:05:01.323154: step: 836/469, loss: 0.08789695799350739 2023-01-22 12:05:02.079653: step: 838/469, loss: 0.2734290659427643 2023-01-22 12:05:02.786267: step: 840/469, loss: 0.04577796533703804 2023-01-22 12:05:03.414032: step: 842/469, loss: 0.036078400909900665 2023-01-22 12:05:04.028464: step: 844/469, loss: 0.14965537190437317 2023-01-22 12:05:04.652695: step: 846/469, loss: 0.06764727830886841 2023-01-22 12:05:05.211032: step: 848/469, loss: 0.9656215906143188 2023-01-22 12:05:05.815461: step: 850/469, loss: 0.09858908504247665 2023-01-22 12:05:06.547034: step: 852/469, loss: 0.6077346801757812 2023-01-22 12:05:07.272343: step: 854/469, loss: 0.3041757047176361 2023-01-22 12:05:07.912934: step: 856/469, loss: 0.08208796381950378 2023-01-22 12:05:08.566923: step: 858/469, loss: 0.26135125756263733 2023-01-22 12:05:09.201271: step: 860/469, loss: 0.03435799852013588 2023-01-22 12:05:09.851312: step: 862/469, loss: 0.29631078243255615 2023-01-22 12:05:10.537200: step: 864/469, loss: 0.05846825987100601 2023-01-22 12:05:11.184531: step: 866/469, loss: 0.03833848983049393 2023-01-22 12:05:11.911466: step: 868/469, loss: 0.02671683393418789 2023-01-22 12:05:12.620570: step: 870/469, loss: 0.0951947271823883 2023-01-22 12:05:13.331760: step: 872/469, loss: 0.04014415666460991 2023-01-22 12:05:13.967102: step: 874/469, loss: 1.1296528577804565 2023-01-22 12:05:14.604192: step: 876/469, loss: 0.02556871622800827 2023-01-22 12:05:15.165749: step: 878/469, loss: 0.1425725668668747 2023-01-22 12:05:15.783544: step: 880/469, loss: 0.061103127896785736 2023-01-22 12:05:16.538148: step: 882/469, loss: 0.11434217542409897 2023-01-22 12:05:17.165372: step: 884/469, loss: 0.12422147393226624 2023-01-22 12:05:17.824250: step: 886/469, loss: 0.36362916231155396 2023-01-22 12:05:18.503215: step: 888/469, loss: 0.1230747327208519 2023-01-22 12:05:19.223123: step: 890/469, loss: 0.22097797691822052 2023-01-22 12:05:19.914625: step: 892/469, loss: 0.12863381206989288 2023-01-22 12:05:20.591876: step: 894/469, loss: 0.6501291394233704 2023-01-22 12:05:21.225680: step: 896/469, loss: 0.07268564403057098 2023-01-22 12:05:21.877305: step: 898/469, loss: 0.2855556011199951 2023-01-22 12:05:22.512322: step: 900/469, loss: 0.09046179056167603 2023-01-22 12:05:23.199398: step: 902/469, loss: 0.040060173720121384 2023-01-22 12:05:23.836205: step: 904/469, loss: 0.12873654067516327 2023-01-22 12:05:24.484133: step: 906/469, loss: 0.19196546077728271 2023-01-22 12:05:25.220372: step: 908/469, loss: 0.36106956005096436 2023-01-22 12:05:25.888383: step: 910/469, loss: 0.13236966729164124 2023-01-22 12:05:26.564366: step: 912/469, loss: 0.08896508067846298 2023-01-22 12:05:27.186923: step: 914/469, loss: 0.06155938282608986 2023-01-22 12:05:27.817221: step: 916/469, loss: 0.17571577429771423 2023-01-22 12:05:28.487229: step: 918/469, loss: 0.1279701441526413 2023-01-22 12:05:29.132788: step: 920/469, loss: 0.09901431202888489 2023-01-22 12:05:29.743488: step: 922/469, loss: 0.10144391655921936 2023-01-22 12:05:30.351394: step: 924/469, loss: 0.08618828654289246 2023-01-22 12:05:31.050108: step: 926/469, loss: 0.08711037039756775 2023-01-22 12:05:31.653674: step: 928/469, loss: 0.07481534034013748 2023-01-22 12:05:32.287229: step: 930/469, loss: 0.26995033025741577 2023-01-22 12:05:32.939126: step: 932/469, loss: 0.0866168811917305 2023-01-22 12:05:33.524575: step: 934/469, loss: 0.11633653193712234 2023-01-22 12:05:34.169410: step: 936/469, loss: 0.7510954141616821 2023-01-22 12:05:34.809458: step: 938/469, loss: 0.8718492984771729 ================================================== Loss: 0.237 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31042326939115933, 'r': 0.32102596170432984, 'f1': 0.3156356004070557}, 'combined': 0.23257360029993576, 'epoch': 14} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.32112551304117565, 'r': 0.24385560459668418, 'f1': 0.2772066311223877}, 'combined': 0.15120361697584783, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30142883139991683, 'r': 0.3214478239976343, 'f1': 0.3111166267158003}, 'combined': 0.22924383021164232, 'epoch': 14} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3252281833064402, 'r': 0.24667352603937687, 'f1': 0.2805558417908834}, 'combined': 0.15303045915866365, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2959480558270881, 'r': 0.31335676499338744, 'f1': 0.3044037145650049}, 'combined': 0.22429747389000362, 'epoch': 14} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.31752984530158446, 'r': 0.24699990708560182, 'f1': 0.27785902144128366}, 'combined': 0.15155946624070016, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24913194444444442, 'r': 0.3416666666666666, 'f1': 0.28815261044176704}, 'combined': 0.19210174029451135, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.29347826086956524, 'r': 0.29347826086956524, 'f1': 0.29347826086956524}, 'combined': 0.14673913043478262, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45, 'r': 0.23275862068965517, 'f1': 0.3068181818181818}, 'combined': 0.20454545454545453, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28932143369249047, 'r': 0.34806411567559575, 'f1': 0.31598585523004125}, 'combined': 0.23283168280108302, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2915539886359941, 'r': 0.2547429635383114, 'f1': 0.27190826088610776}, 'combined': 0.14831359684696785, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3055555555555556, 'r': 0.358695652173913, 'f1': 0.32999999999999996}, 'combined': 0.16499999999999998, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 15 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:08:32.915619: step: 2/469, loss: 0.11154310405254364 2023-01-22 12:08:33.601720: step: 4/469, loss: 0.10185451060533524 2023-01-22 12:08:34.225795: step: 6/469, loss: 0.09049303829669952 2023-01-22 12:08:34.964228: step: 8/469, loss: 0.08959893882274628 2023-01-22 12:08:35.713574: step: 10/469, loss: 0.07926318049430847 2023-01-22 12:08:36.327645: step: 12/469, loss: 0.13308602571487427 2023-01-22 12:08:37.018401: step: 14/469, loss: 0.08020301908254623 2023-01-22 12:08:37.788296: step: 16/469, loss: 0.05582220107316971 2023-01-22 12:08:38.484405: step: 18/469, loss: 0.08290795236825943 2023-01-22 12:08:39.156894: step: 20/469, loss: 0.3549378514289856 2023-01-22 12:08:39.928501: step: 22/469, loss: 0.8654332160949707 2023-01-22 12:08:40.636715: step: 24/469, loss: 0.06048708036541939 2023-01-22 12:08:41.293277: step: 26/469, loss: 0.08002801984548569 2023-01-22 12:08:41.969547: step: 28/469, loss: 0.07778247445821762 2023-01-22 12:08:42.598179: step: 30/469, loss: 0.6301714777946472 2023-01-22 12:08:43.225374: step: 32/469, loss: 0.11404713243246078 2023-01-22 12:08:43.897995: step: 34/469, loss: 3.1983845233917236 2023-01-22 12:08:44.574133: step: 36/469, loss: 0.1961783915758133 2023-01-22 12:08:45.199934: step: 38/469, loss: 0.06832145899534225 2023-01-22 12:08:45.815051: step: 40/469, loss: 0.17111967504024506 2023-01-22 12:08:46.457926: step: 42/469, loss: 0.06059175729751587 2023-01-22 12:08:47.053882: step: 44/469, loss: 0.08661918342113495 2023-01-22 12:08:47.704111: step: 46/469, loss: 0.16389034688472748 2023-01-22 12:08:48.318649: step: 48/469, loss: 0.05933493375778198 2023-01-22 12:08:49.002720: step: 50/469, loss: 0.25778016448020935 2023-01-22 12:08:49.685678: step: 52/469, loss: 0.07134124636650085 2023-01-22 12:08:50.431667: step: 54/469, loss: 0.07459121942520142 2023-01-22 12:08:51.081539: step: 56/469, loss: 0.05604968219995499 2023-01-22 12:08:51.770024: step: 58/469, loss: 0.07312927395105362 2023-01-22 12:08:52.356048: step: 60/469, loss: 0.08122636377811432 2023-01-22 12:08:52.994049: step: 62/469, loss: 0.10961474478244781 2023-01-22 12:08:53.642273: step: 64/469, loss: 0.06546129286289215 2023-01-22 12:08:54.258996: step: 66/469, loss: 0.1335621178150177 2023-01-22 12:08:54.924961: step: 68/469, loss: 0.09232518821954727 2023-01-22 12:08:55.557679: step: 70/469, loss: 0.03416675329208374 2023-01-22 12:08:56.195594: step: 72/469, loss: 0.4200380742549896 2023-01-22 12:08:56.837799: step: 74/469, loss: 0.012624003924429417 2023-01-22 12:08:57.481253: step: 76/469, loss: 0.13828982412815094 2023-01-22 12:08:58.119569: step: 78/469, loss: 0.14082835614681244 2023-01-22 12:08:58.836325: step: 80/469, loss: 1.0979188680648804 2023-01-22 12:08:59.494946: step: 82/469, loss: 0.08209765702486038 2023-01-22 12:09:00.162391: step: 84/469, loss: 0.0791550725698471 2023-01-22 12:09:00.865470: step: 86/469, loss: 0.333895206451416 2023-01-22 12:09:01.486532: step: 88/469, loss: 0.1241508275270462 2023-01-22 12:09:02.148065: step: 90/469, loss: 0.07102589309215546 2023-01-22 12:09:02.803138: step: 92/469, loss: 0.043671127408742905 2023-01-22 12:09:03.454222: step: 94/469, loss: 0.178281769156456 2023-01-22 12:09:04.025264: step: 96/469, loss: 0.02716628648340702 2023-01-22 12:09:04.732253: step: 98/469, loss: 0.2626620829105377 2023-01-22 12:09:05.312525: step: 100/469, loss: 0.5035622119903564 2023-01-22 12:09:06.104088: step: 102/469, loss: 0.057853054255247116 2023-01-22 12:09:06.745169: step: 104/469, loss: 0.06141258776187897 2023-01-22 12:09:07.426660: step: 106/469, loss: 0.0898313969373703 2023-01-22 12:09:08.093251: step: 108/469, loss: 0.06163856014609337 2023-01-22 12:09:08.768549: step: 110/469, loss: 0.14861513674259186 2023-01-22 12:09:09.460344: step: 112/469, loss: 0.04367325082421303 2023-01-22 12:09:10.135144: step: 114/469, loss: 0.12304690480232239 2023-01-22 12:09:10.839979: step: 116/469, loss: 0.09863568842411041 2023-01-22 12:09:11.501431: step: 118/469, loss: 0.2262677103281021 2023-01-22 12:09:12.166250: step: 120/469, loss: 0.14186064898967743 2023-01-22 12:09:12.740384: step: 122/469, loss: 0.11820931732654572 2023-01-22 12:09:13.462484: step: 124/469, loss: 0.23409591615200043 2023-01-22 12:09:14.100179: step: 126/469, loss: 0.04979797080159187 2023-01-22 12:09:14.753580: step: 128/469, loss: 0.1126193255186081 2023-01-22 12:09:15.490746: step: 130/469, loss: 0.175623819231987 2023-01-22 12:09:16.094939: step: 132/469, loss: 0.0966227650642395 2023-01-22 12:09:16.707535: step: 134/469, loss: 0.15038621425628662 2023-01-22 12:09:17.388013: step: 136/469, loss: 0.042864251881837845 2023-01-22 12:09:17.987545: step: 138/469, loss: 0.1660320907831192 2023-01-22 12:09:18.662386: step: 140/469, loss: 0.10542742162942886 2023-01-22 12:09:19.264359: step: 142/469, loss: 0.03676040098071098 2023-01-22 12:09:19.874089: step: 144/469, loss: 0.07163447886705399 2023-01-22 12:09:20.474906: step: 146/469, loss: 0.0900966227054596 2023-01-22 12:09:21.105483: step: 148/469, loss: 0.059078194200992584 2023-01-22 12:09:21.727601: step: 150/469, loss: 0.06749924272298813 2023-01-22 12:09:22.359511: step: 152/469, loss: 0.07475146651268005 2023-01-22 12:09:23.104802: step: 154/469, loss: 0.3347472548484802 2023-01-22 12:09:23.781706: step: 156/469, loss: 0.1510920375585556 2023-01-22 12:09:24.392025: step: 158/469, loss: 0.06703579425811768 2023-01-22 12:09:25.037442: step: 160/469, loss: 0.0176719781011343 2023-01-22 12:09:25.692996: step: 162/469, loss: 0.13784371316432953 2023-01-22 12:09:26.394053: step: 164/469, loss: 0.03905270993709564 2023-01-22 12:09:27.069663: step: 166/469, loss: 0.14107176661491394 2023-01-22 12:09:27.724387: step: 168/469, loss: 0.13379943370819092 2023-01-22 12:09:28.394559: step: 170/469, loss: 0.04197041317820549 2023-01-22 12:09:29.015126: step: 172/469, loss: 0.10158641636371613 2023-01-22 12:09:29.686882: step: 174/469, loss: 0.12034301459789276 2023-01-22 12:09:30.294226: step: 176/469, loss: 0.07145047187805176 2023-01-22 12:09:31.041170: step: 178/469, loss: 0.10873144865036011 2023-01-22 12:09:31.730337: step: 180/469, loss: 0.05998546630144119 2023-01-22 12:09:32.410716: step: 182/469, loss: 0.09499622881412506 2023-01-22 12:09:33.048139: step: 184/469, loss: 0.0694514662027359 2023-01-22 12:09:33.643846: step: 186/469, loss: 0.09403524547815323 2023-01-22 12:09:34.353708: step: 188/469, loss: 0.19487684965133667 2023-01-22 12:09:35.038162: step: 190/469, loss: 0.2548619210720062 2023-01-22 12:09:35.671198: step: 192/469, loss: 0.1119566336274147 2023-01-22 12:09:36.365836: step: 194/469, loss: 0.2044508457183838 2023-01-22 12:09:37.029318: step: 196/469, loss: 0.4984990656375885 2023-01-22 12:09:37.726089: step: 198/469, loss: 0.2978976368904114 2023-01-22 12:09:38.403778: step: 200/469, loss: 0.06300626695156097 2023-01-22 12:09:39.016129: step: 202/469, loss: 0.09145215153694153 2023-01-22 12:09:39.665506: step: 204/469, loss: 0.06325855106115341 2023-01-22 12:09:40.286067: step: 206/469, loss: 0.15226240456104279 2023-01-22 12:09:40.957398: step: 208/469, loss: 0.15415604412555695 2023-01-22 12:09:41.642877: step: 210/469, loss: 0.042729929089546204 2023-01-22 12:09:42.293783: step: 212/469, loss: 0.15243494510650635 2023-01-22 12:09:42.979461: step: 214/469, loss: 0.1374005228281021 2023-01-22 12:09:43.566870: step: 216/469, loss: 0.0484742633998394 2023-01-22 12:09:44.250219: step: 218/469, loss: 0.1367819905281067 2023-01-22 12:09:44.933147: step: 220/469, loss: 0.060447901487350464 2023-01-22 12:09:45.552869: step: 222/469, loss: 0.0830463171005249 2023-01-22 12:09:46.215381: step: 224/469, loss: 0.1643836349248886 2023-01-22 12:09:46.885331: step: 226/469, loss: 0.04430633410811424 2023-01-22 12:09:47.544979: step: 228/469, loss: 0.05016676336526871 2023-01-22 12:09:48.319721: step: 230/469, loss: 0.10828897356987 2023-01-22 12:09:48.940439: step: 232/469, loss: 0.150638148188591 2023-01-22 12:09:49.601782: step: 234/469, loss: 0.03905714303255081 2023-01-22 12:09:50.248283: step: 236/469, loss: 0.09422975033521652 2023-01-22 12:09:50.877723: step: 238/469, loss: 0.4546284079551697 2023-01-22 12:09:51.506641: step: 240/469, loss: 0.08964105695486069 2023-01-22 12:09:52.093967: step: 242/469, loss: 0.0595172718167305 2023-01-22 12:09:52.686044: step: 244/469, loss: 0.12783460319042206 2023-01-22 12:09:53.410424: step: 246/469, loss: 0.07300969213247299 2023-01-22 12:09:54.111261: step: 248/469, loss: 0.07457521557807922 2023-01-22 12:09:54.802804: step: 250/469, loss: 0.19303950667381287 2023-01-22 12:09:55.433136: step: 252/469, loss: 0.07444415241479874 2023-01-22 12:09:56.070953: step: 254/469, loss: 0.021184952929615974 2023-01-22 12:09:56.726428: step: 256/469, loss: 0.2046414017677307 2023-01-22 12:09:57.359326: step: 258/469, loss: 0.10218964517116547 2023-01-22 12:09:57.996680: step: 260/469, loss: 0.05558760091662407 2023-01-22 12:09:58.681585: step: 262/469, loss: 0.510052502155304 2023-01-22 12:09:59.502633: step: 264/469, loss: 0.1685366928577423 2023-01-22 12:10:00.140026: step: 266/469, loss: 0.28381720185279846 2023-01-22 12:10:00.821143: step: 268/469, loss: 0.0380496121942997 2023-01-22 12:10:01.450153: step: 270/469, loss: 0.09100587666034698 2023-01-22 12:10:02.144124: step: 272/469, loss: 0.06771304458379745 2023-01-22 12:10:02.808230: step: 274/469, loss: 0.05137787014245987 2023-01-22 12:10:03.491831: step: 276/469, loss: 0.08808694779872894 2023-01-22 12:10:04.155184: step: 278/469, loss: 0.14675672352313995 2023-01-22 12:10:04.814458: step: 280/469, loss: 0.9552887678146362 2023-01-22 12:10:05.485401: step: 282/469, loss: 0.1043744906783104 2023-01-22 12:10:06.100309: step: 284/469, loss: 0.06530353426933289 2023-01-22 12:10:06.763840: step: 286/469, loss: 0.20721475780010223 2023-01-22 12:10:07.431152: step: 288/469, loss: 0.08885453641414642 2023-01-22 12:10:08.077690: step: 290/469, loss: 0.07269501686096191 2023-01-22 12:10:08.697191: step: 292/469, loss: 0.10241696238517761 2023-01-22 12:10:09.327592: step: 294/469, loss: 0.036570511758327484 2023-01-22 12:10:09.961593: step: 296/469, loss: 0.02546403370797634 2023-01-22 12:10:10.587763: step: 298/469, loss: 0.02204526960849762 2023-01-22 12:10:11.256154: step: 300/469, loss: 0.09121332317590714 2023-01-22 12:10:11.916607: step: 302/469, loss: 0.13340415060520172 2023-01-22 12:10:12.539715: step: 304/469, loss: 0.059527646750211716 2023-01-22 12:10:13.217404: step: 306/469, loss: 0.22927415370941162 2023-01-22 12:10:13.920314: step: 308/469, loss: 0.10273823142051697 2023-01-22 12:10:14.528439: step: 310/469, loss: 0.02172967605292797 2023-01-22 12:10:15.185256: step: 312/469, loss: 0.09245534241199493 2023-01-22 12:10:15.873736: step: 314/469, loss: 0.15194593369960785 2023-01-22 12:10:16.553165: step: 316/469, loss: 0.11920594424009323 2023-01-22 12:10:17.228471: step: 318/469, loss: 0.08647657185792923 2023-01-22 12:10:17.887015: step: 320/469, loss: 0.09207209199666977 2023-01-22 12:10:18.540358: step: 322/469, loss: 0.07460210472345352 2023-01-22 12:10:19.255801: step: 324/469, loss: 0.24142161011695862 2023-01-22 12:10:19.909149: step: 326/469, loss: 0.274515300989151 2023-01-22 12:10:20.550715: step: 328/469, loss: 0.08760512620210648 2023-01-22 12:10:21.256471: step: 330/469, loss: 0.13139471411705017 2023-01-22 12:10:21.895358: step: 332/469, loss: 0.07570262253284454 2023-01-22 12:10:22.559614: step: 334/469, loss: 0.18965232372283936 2023-01-22 12:10:23.206562: step: 336/469, loss: 0.021007876843214035 2023-01-22 12:10:23.849899: step: 338/469, loss: 0.5936605930328369 2023-01-22 12:10:24.537783: step: 340/469, loss: 0.06285455077886581 2023-01-22 12:10:25.221996: step: 342/469, loss: 0.3930582106113434 2023-01-22 12:10:25.924599: step: 344/469, loss: 0.17667238414287567 2023-01-22 12:10:26.592470: step: 346/469, loss: 0.28818178176879883 2023-01-22 12:10:27.262090: step: 348/469, loss: 0.08660731464624405 2023-01-22 12:10:27.885746: step: 350/469, loss: 0.09201332926750183 2023-01-22 12:10:28.643169: step: 352/469, loss: 0.46005502343177795 2023-01-22 12:10:29.284627: step: 354/469, loss: 0.05712362006306648 2023-01-22 12:10:29.933044: step: 356/469, loss: 0.31637391448020935 2023-01-22 12:10:30.573920: step: 358/469, loss: 0.013937942683696747 2023-01-22 12:10:31.194684: step: 360/469, loss: 0.06425005197525024 2023-01-22 12:10:31.846829: step: 362/469, loss: 0.10494048148393631 2023-01-22 12:10:32.486459: step: 364/469, loss: 0.10669367015361786 2023-01-22 12:10:33.123599: step: 366/469, loss: 0.14803123474121094 2023-01-22 12:10:33.755939: step: 368/469, loss: 0.1849016696214676 2023-01-22 12:10:34.419629: step: 370/469, loss: 0.04781125858426094 2023-01-22 12:10:34.996835: step: 372/469, loss: 0.0616668276488781 2023-01-22 12:10:35.610716: step: 374/469, loss: 0.07190537452697754 2023-01-22 12:10:36.320331: step: 376/469, loss: 0.0524645559489727 2023-01-22 12:10:36.977160: step: 378/469, loss: 0.6494707465171814 2023-01-22 12:10:37.638110: step: 380/469, loss: 0.18243408203125 2023-01-22 12:10:38.298544: step: 382/469, loss: 0.21436278522014618 2023-01-22 12:10:38.891163: step: 384/469, loss: 0.01952657476067543 2023-01-22 12:10:39.601593: step: 386/469, loss: 0.07805640250444412 2023-01-22 12:10:40.244952: step: 388/469, loss: 0.16420702636241913 2023-01-22 12:10:40.888413: step: 390/469, loss: 0.22387485206127167 2023-01-22 12:10:41.594696: step: 392/469, loss: 0.16565045714378357 2023-01-22 12:10:42.264183: step: 394/469, loss: 0.13501045107841492 2023-01-22 12:10:42.872586: step: 396/469, loss: 0.08366989344358444 2023-01-22 12:10:43.494680: step: 398/469, loss: 0.2138056457042694 2023-01-22 12:10:44.139659: step: 400/469, loss: 0.8156818747520447 2023-01-22 12:10:44.825225: step: 402/469, loss: 0.07744881510734558 2023-01-22 12:10:45.466613: step: 404/469, loss: 0.7006872892379761 2023-01-22 12:10:46.131478: step: 406/469, loss: 0.10675997287034988 2023-01-22 12:10:46.761270: step: 408/469, loss: 0.041311345994472504 2023-01-22 12:10:47.456650: step: 410/469, loss: 0.09614323079586029 2023-01-22 12:10:48.124639: step: 412/469, loss: 1.5351543426513672 2023-01-22 12:10:48.801172: step: 414/469, loss: 0.24632766842842102 2023-01-22 12:10:49.395407: step: 416/469, loss: 0.025399502366781235 2023-01-22 12:10:49.996393: step: 418/469, loss: 0.12387553602457047 2023-01-22 12:10:50.615524: step: 420/469, loss: 0.09193819761276245 2023-01-22 12:10:51.208792: step: 422/469, loss: 0.12497454136610031 2023-01-22 12:10:51.819194: step: 424/469, loss: 0.05305212363600731 2023-01-22 12:10:52.470288: step: 426/469, loss: 0.07651476562023163 2023-01-22 12:10:53.212799: step: 428/469, loss: 0.12583063542842865 2023-01-22 12:10:53.893457: step: 430/469, loss: 0.596038818359375 2023-01-22 12:10:54.531230: step: 432/469, loss: 0.6904774904251099 2023-01-22 12:10:55.186214: step: 434/469, loss: 0.08197285234928131 2023-01-22 12:10:55.825345: step: 436/469, loss: 0.11234140396118164 2023-01-22 12:10:56.470108: step: 438/469, loss: 0.1009846031665802 2023-01-22 12:10:57.132366: step: 440/469, loss: 0.21016834676265717 2023-01-22 12:10:57.793981: step: 442/469, loss: 0.2865082025527954 2023-01-22 12:10:58.472194: step: 444/469, loss: 0.09421136975288391 2023-01-22 12:10:59.124456: step: 446/469, loss: 0.2870556712150574 2023-01-22 12:10:59.717223: step: 448/469, loss: 0.08144180476665497 2023-01-22 12:11:00.394565: step: 450/469, loss: 0.060530755668878555 2023-01-22 12:11:01.037543: step: 452/469, loss: 0.06883741915225983 2023-01-22 12:11:01.616597: step: 454/469, loss: 0.34555795788764954 2023-01-22 12:11:02.352795: step: 456/469, loss: 0.5633630156517029 2023-01-22 12:11:02.974546: step: 458/469, loss: 0.048432573676109314 2023-01-22 12:11:03.727061: step: 460/469, loss: 0.4532848000526428 2023-01-22 12:11:04.364070: step: 462/469, loss: 0.21483466029167175 2023-01-22 12:11:05.044557: step: 464/469, loss: 0.13238316774368286 2023-01-22 12:11:05.850633: step: 466/469, loss: 0.12196317315101624 2023-01-22 12:11:06.573201: step: 468/469, loss: 0.09013567864894867 2023-01-22 12:11:07.189276: step: 470/469, loss: 0.18008871376514435 2023-01-22 12:11:07.781362: step: 472/469, loss: 0.02501816675066948 2023-01-22 12:11:08.405392: step: 474/469, loss: 0.34291648864746094 2023-01-22 12:11:08.996052: step: 476/469, loss: 0.07923520356416702 2023-01-22 12:11:09.646796: step: 478/469, loss: 0.03828778117895126 2023-01-22 12:11:10.308698: step: 480/469, loss: 0.12524761259555817 2023-01-22 12:11:10.995218: step: 482/469, loss: 0.5911293029785156 2023-01-22 12:11:11.650305: step: 484/469, loss: 0.0678834468126297 2023-01-22 12:11:12.346189: step: 486/469, loss: 0.019243234768509865 2023-01-22 12:11:12.988197: step: 488/469, loss: 0.01733345352113247 2023-01-22 12:11:13.661280: step: 490/469, loss: 0.1415814459323883 2023-01-22 12:11:14.337662: step: 492/469, loss: 0.17629176378250122 2023-01-22 12:11:14.930035: step: 494/469, loss: 0.41696739196777344 2023-01-22 12:11:15.609503: step: 496/469, loss: 0.06588204205036163 2023-01-22 12:11:16.259501: step: 498/469, loss: 0.09903915226459503 2023-01-22 12:11:16.950503: step: 500/469, loss: 0.1281720995903015 2023-01-22 12:11:17.539445: step: 502/469, loss: 0.01150738075375557 2023-01-22 12:11:18.155485: step: 504/469, loss: 0.094943568110466 2023-01-22 12:11:18.815066: step: 506/469, loss: 0.06442873924970627 2023-01-22 12:11:19.425159: step: 508/469, loss: 0.09710308164358139 2023-01-22 12:11:20.105155: step: 510/469, loss: 0.31544438004493713 2023-01-22 12:11:20.786052: step: 512/469, loss: 0.06528124958276749 2023-01-22 12:11:21.632405: step: 514/469, loss: 0.0361175574362278 2023-01-22 12:11:22.236847: step: 516/469, loss: 0.13634465634822845 2023-01-22 12:11:22.871947: step: 518/469, loss: 0.13550400733947754 2023-01-22 12:11:23.509468: step: 520/469, loss: 0.20977728068828583 2023-01-22 12:11:24.203498: step: 522/469, loss: 0.06899198889732361 2023-01-22 12:11:24.822338: step: 524/469, loss: 0.07991157472133636 2023-01-22 12:11:25.453052: step: 526/469, loss: 0.08164144307374954 2023-01-22 12:11:26.085749: step: 528/469, loss: 0.07033399492502213 2023-01-22 12:11:26.756699: step: 530/469, loss: 0.04005934298038483 2023-01-22 12:11:27.397833: step: 532/469, loss: 0.053803931921720505 2023-01-22 12:11:28.075018: step: 534/469, loss: 0.05650361627340317 2023-01-22 12:11:28.728155: step: 536/469, loss: 0.04314383864402771 2023-01-22 12:11:29.399984: step: 538/469, loss: 0.23455005884170532 2023-01-22 12:11:30.107042: step: 540/469, loss: 0.41306638717651367 2023-01-22 12:11:30.720915: step: 542/469, loss: 0.46649155020713806 2023-01-22 12:11:31.415992: step: 544/469, loss: 0.21067191660404205 2023-01-22 12:11:32.082825: step: 546/469, loss: 0.05309181287884712 2023-01-22 12:11:32.771538: step: 548/469, loss: 0.21252405643463135 2023-01-22 12:11:33.341649: step: 550/469, loss: 0.04839169234037399 2023-01-22 12:11:33.977821: step: 552/469, loss: 0.058077406138181686 2023-01-22 12:11:34.633500: step: 554/469, loss: 0.12673555314540863 2023-01-22 12:11:35.291910: step: 556/469, loss: 0.08447688817977905 2023-01-22 12:11:35.931033: step: 558/469, loss: 0.10869933664798737 2023-01-22 12:11:36.598213: step: 560/469, loss: 0.09575501084327698 2023-01-22 12:11:37.252041: step: 562/469, loss: 0.0852009505033493 2023-01-22 12:11:37.869710: step: 564/469, loss: 0.03301452100276947 2023-01-22 12:11:38.523019: step: 566/469, loss: 0.2083071917295456 2023-01-22 12:11:39.102775: step: 568/469, loss: 0.10271920263767242 2023-01-22 12:11:39.730534: step: 570/469, loss: 0.07137446850538254 2023-01-22 12:11:40.382159: step: 572/469, loss: 0.04790101200342178 2023-01-22 12:11:41.036035: step: 574/469, loss: 0.099668949842453 2023-01-22 12:11:41.754788: step: 576/469, loss: 0.1455017626285553 2023-01-22 12:11:42.405529: step: 578/469, loss: 0.07785852253437042 2023-01-22 12:11:43.044925: step: 580/469, loss: 0.25632062554359436 2023-01-22 12:11:43.693686: step: 582/469, loss: 0.07538482546806335 2023-01-22 12:11:44.301761: step: 584/469, loss: 0.09727531671524048 2023-01-22 12:11:44.915023: step: 586/469, loss: 0.13240544497966766 2023-01-22 12:11:45.506003: step: 588/469, loss: 0.04293511062860489 2023-01-22 12:11:46.150176: step: 590/469, loss: 0.2791767716407776 2023-01-22 12:11:46.800811: step: 592/469, loss: 0.34410086274147034 2023-01-22 12:11:47.443920: step: 594/469, loss: 0.2836783230304718 2023-01-22 12:11:48.068232: step: 596/469, loss: 0.05330923944711685 2023-01-22 12:11:48.735982: step: 598/469, loss: 0.12309497594833374 2023-01-22 12:11:49.411695: step: 600/469, loss: 0.09553201496601105 2023-01-22 12:11:50.099070: step: 602/469, loss: 0.08250755816698074 2023-01-22 12:11:50.734698: step: 604/469, loss: 1.0477052927017212 2023-01-22 12:11:51.319164: step: 606/469, loss: 0.14281758666038513 2023-01-22 12:11:51.997538: step: 608/469, loss: 0.09342851489782333 2023-01-22 12:11:52.629578: step: 610/469, loss: 0.014582914300262928 2023-01-22 12:11:53.270394: step: 612/469, loss: 0.6999114155769348 2023-01-22 12:11:53.857983: step: 614/469, loss: 0.11366409063339233 2023-01-22 12:11:54.483709: step: 616/469, loss: 0.10880661010742188 2023-01-22 12:11:55.064632: step: 618/469, loss: 0.4414944052696228 2023-01-22 12:11:55.756066: step: 620/469, loss: 0.01519444677978754 2023-01-22 12:11:56.318544: step: 622/469, loss: 0.08215223252773285 2023-01-22 12:11:56.920094: step: 624/469, loss: 0.251505047082901 2023-01-22 12:11:57.558006: step: 626/469, loss: 0.13148269057273865 2023-01-22 12:11:58.182391: step: 628/469, loss: 0.17193777859210968 2023-01-22 12:11:58.820436: step: 630/469, loss: 0.07418045401573181 2023-01-22 12:11:59.544323: step: 632/469, loss: 0.10217420756816864 2023-01-22 12:12:00.218283: step: 634/469, loss: 0.26963257789611816 2023-01-22 12:12:00.896241: step: 636/469, loss: 0.10058349370956421 2023-01-22 12:12:01.509576: step: 638/469, loss: 0.10942566394805908 2023-01-22 12:12:02.155145: step: 640/469, loss: 0.04665057733654976 2023-01-22 12:12:02.799356: step: 642/469, loss: 0.0690917894244194 2023-01-22 12:12:03.465010: step: 644/469, loss: 0.05420218035578728 2023-01-22 12:12:04.163875: step: 646/469, loss: 0.05774494633078575 2023-01-22 12:12:04.834240: step: 648/469, loss: 0.40795964002609253 2023-01-22 12:12:05.468023: step: 650/469, loss: 0.08505664020776749 2023-01-22 12:12:06.186169: step: 652/469, loss: 0.17751897871494293 2023-01-22 12:12:06.914265: step: 654/469, loss: 0.5836828947067261 2023-01-22 12:12:07.622724: step: 656/469, loss: 0.12484271079301834 2023-01-22 12:12:08.331076: step: 658/469, loss: 0.06690538674592972 2023-01-22 12:12:09.049072: step: 660/469, loss: 0.0452662892639637 2023-01-22 12:12:09.712913: step: 662/469, loss: 0.24605311453342438 2023-01-22 12:12:10.315845: step: 664/469, loss: 0.2452179491519928 2023-01-22 12:12:10.917163: step: 666/469, loss: 0.015766263008117676 2023-01-22 12:12:11.633683: step: 668/469, loss: 0.1808127462863922 2023-01-22 12:12:12.308566: step: 670/469, loss: 0.5678360462188721 2023-01-22 12:12:12.972449: step: 672/469, loss: 0.8446255326271057 2023-01-22 12:12:13.596845: step: 674/469, loss: 0.07731617242097855 2023-01-22 12:12:14.279373: step: 676/469, loss: 0.12699292600154877 2023-01-22 12:12:14.942267: step: 678/469, loss: 0.08847266435623169 2023-01-22 12:12:15.572441: step: 680/469, loss: 1.6162434816360474 2023-01-22 12:12:16.289557: step: 682/469, loss: 0.11218369752168655 2023-01-22 12:12:16.955893: step: 684/469, loss: 0.2254307121038437 2023-01-22 12:12:17.652070: step: 686/469, loss: 0.15231843292713165 2023-01-22 12:12:18.314381: step: 688/469, loss: 0.11132191121578217 2023-01-22 12:12:18.949335: step: 690/469, loss: 0.17959937453269958 2023-01-22 12:12:19.618462: step: 692/469, loss: 0.15276867151260376 2023-01-22 12:12:20.244639: step: 694/469, loss: 0.2105720043182373 2023-01-22 12:12:20.895112: step: 696/469, loss: 0.3055950403213501 2023-01-22 12:12:21.486279: step: 698/469, loss: 0.19766704738140106 2023-01-22 12:12:22.138386: step: 700/469, loss: 0.11132828891277313 2023-01-22 12:12:22.785579: step: 702/469, loss: 0.017213527113199234 2023-01-22 12:12:23.351001: step: 704/469, loss: 0.07785165309906006 2023-01-22 12:12:24.016944: step: 706/469, loss: 0.21710491180419922 2023-01-22 12:12:24.679473: step: 708/469, loss: 0.10139473527669907 2023-01-22 12:12:25.330111: step: 710/469, loss: 0.040440790355205536 2023-01-22 12:12:26.026306: step: 712/469, loss: 0.11204468458890915 2023-01-22 12:12:26.634243: step: 714/469, loss: 0.19281946122646332 2023-01-22 12:12:27.285895: step: 716/469, loss: 0.20367039740085602 2023-01-22 12:12:27.912687: step: 718/469, loss: 0.060095809400081635 2023-01-22 12:12:28.576692: step: 720/469, loss: 0.11108577251434326 2023-01-22 12:12:29.213961: step: 722/469, loss: 0.0680357813835144 2023-01-22 12:12:29.814101: step: 724/469, loss: 0.3617883324623108 2023-01-22 12:12:30.507031: step: 726/469, loss: 0.32320207357406616 2023-01-22 12:12:31.149746: step: 728/469, loss: 0.019802497699856758 2023-01-22 12:12:31.874343: step: 730/469, loss: 0.07566557824611664 2023-01-22 12:12:32.544193: step: 732/469, loss: 0.08792748302221298 2023-01-22 12:12:33.161639: step: 734/469, loss: 0.0913555771112442 2023-01-22 12:12:33.851914: step: 736/469, loss: 0.03404795378446579 2023-01-22 12:12:34.567286: step: 738/469, loss: 0.11625602841377258 2023-01-22 12:12:35.216782: step: 740/469, loss: 0.07428066432476044 2023-01-22 12:12:35.893251: step: 742/469, loss: 0.09052813798189163 2023-01-22 12:12:36.570760: step: 744/469, loss: 0.08908706903457642 2023-01-22 12:12:37.248014: step: 746/469, loss: 0.06369903683662415 2023-01-22 12:12:37.857989: step: 748/469, loss: 0.2825640141963959 2023-01-22 12:12:38.498175: step: 750/469, loss: 0.0785973072052002 2023-01-22 12:12:39.196547: step: 752/469, loss: 0.05707874149084091 2023-01-22 12:12:39.970386: step: 754/469, loss: 0.11375433951616287 2023-01-22 12:12:40.554954: step: 756/469, loss: 0.24645879864692688 2023-01-22 12:12:41.203461: step: 758/469, loss: 0.36877089738845825 2023-01-22 12:12:41.909114: step: 760/469, loss: 0.21835432946681976 2023-01-22 12:12:42.491630: step: 762/469, loss: 0.1805793195962906 2023-01-22 12:12:43.180212: step: 764/469, loss: 0.023476416245102882 2023-01-22 12:12:43.886274: step: 766/469, loss: 0.10177686810493469 2023-01-22 12:12:44.619267: step: 768/469, loss: 0.07214643061161041 2023-01-22 12:12:45.267564: step: 770/469, loss: 0.09948726743459702 2023-01-22 12:12:45.876549: step: 772/469, loss: 0.08886440843343735 2023-01-22 12:12:46.526946: step: 774/469, loss: 0.07293224334716797 2023-01-22 12:12:47.163700: step: 776/469, loss: 0.08997739851474762 2023-01-22 12:12:47.857011: step: 778/469, loss: 0.07979803532361984 2023-01-22 12:12:48.512969: step: 780/469, loss: 0.41471773386001587 2023-01-22 12:12:49.113120: step: 782/469, loss: 0.08494696021080017 2023-01-22 12:12:49.803604: step: 784/469, loss: 0.3089362680912018 2023-01-22 12:12:50.385608: step: 786/469, loss: 0.0335184782743454 2023-01-22 12:12:51.036887: step: 788/469, loss: 0.665024995803833 2023-01-22 12:12:51.629800: step: 790/469, loss: 0.5938014388084412 2023-01-22 12:12:52.293289: step: 792/469, loss: 0.07881605625152588 2023-01-22 12:12:53.008736: step: 794/469, loss: 0.0684218630194664 2023-01-22 12:12:53.630370: step: 796/469, loss: 0.1532132923603058 2023-01-22 12:12:54.343101: step: 798/469, loss: 0.09592583775520325 2023-01-22 12:12:54.946674: step: 800/469, loss: 0.15151216089725494 2023-01-22 12:12:55.604790: step: 802/469, loss: 0.05796181038022041 2023-01-22 12:12:56.276125: step: 804/469, loss: 0.07139807194471359 2023-01-22 12:12:56.975526: step: 806/469, loss: 0.060344018042087555 2023-01-22 12:12:57.621231: step: 808/469, loss: 0.563513994216919 2023-01-22 12:12:58.338463: step: 810/469, loss: 0.11856386810541153 2023-01-22 12:12:58.952724: step: 812/469, loss: 0.5364812612533569 2023-01-22 12:12:59.644215: step: 814/469, loss: 0.08153276890516281 2023-01-22 12:13:00.223821: step: 816/469, loss: 0.09765459597110748 2023-01-22 12:13:00.905509: step: 818/469, loss: 0.12858854234218597 2023-01-22 12:13:01.613494: step: 820/469, loss: 0.047056447714567184 2023-01-22 12:13:02.309102: step: 822/469, loss: 0.13528740406036377 2023-01-22 12:13:02.931452: step: 824/469, loss: 0.046440061181783676 2023-01-22 12:13:03.619208: step: 826/469, loss: 0.07523783296346664 2023-01-22 12:13:04.309273: step: 828/469, loss: 0.0931289866566658 2023-01-22 12:13:05.014633: step: 830/469, loss: 0.3882063925266266 2023-01-22 12:13:05.698009: step: 832/469, loss: 0.2032158076763153 2023-01-22 12:13:06.305262: step: 834/469, loss: 0.09220840036869049 2023-01-22 12:13:06.971731: step: 836/469, loss: 0.06647709012031555 2023-01-22 12:13:07.642551: step: 838/469, loss: 0.08813027292490005 2023-01-22 12:13:08.327741: step: 840/469, loss: 0.7979458570480347 2023-01-22 12:13:09.054712: step: 842/469, loss: 0.125069260597229 2023-01-22 12:13:09.698521: step: 844/469, loss: 0.05929647758603096 2023-01-22 12:13:10.283827: step: 846/469, loss: 0.7311899662017822 2023-01-22 12:13:10.929736: step: 848/469, loss: 0.07078272849321365 2023-01-22 12:13:11.626477: step: 850/469, loss: 0.15644574165344238 2023-01-22 12:13:12.242959: step: 852/469, loss: 0.309980183839798 2023-01-22 12:13:12.938028: step: 854/469, loss: 0.1734669804573059 2023-01-22 12:13:13.549334: step: 856/469, loss: 0.11677585542201996 2023-01-22 12:13:14.172692: step: 858/469, loss: 0.11954693496227264 2023-01-22 12:13:14.829638: step: 860/469, loss: 0.7869711518287659 2023-01-22 12:13:15.490028: step: 862/469, loss: 0.16252881288528442 2023-01-22 12:13:16.078191: step: 864/469, loss: 0.04953162744641304 2023-01-22 12:13:16.741656: step: 866/469, loss: 0.03249672055244446 2023-01-22 12:13:17.408792: step: 868/469, loss: 0.06179596856236458 2023-01-22 12:13:18.044537: step: 870/469, loss: 0.11205118149518967 2023-01-22 12:13:18.750211: step: 872/469, loss: 0.0674152597784996 2023-01-22 12:13:19.386539: step: 874/469, loss: 0.685667097568512 2023-01-22 12:13:19.992998: step: 876/469, loss: 0.1429910510778427 2023-01-22 12:13:20.712261: step: 878/469, loss: 0.26832348108291626 2023-01-22 12:13:21.392717: step: 880/469, loss: 0.05928882211446762 2023-01-22 12:13:22.054969: step: 882/469, loss: 0.07628697901964188 2023-01-22 12:13:22.711511: step: 884/469, loss: 0.5264723300933838 2023-01-22 12:13:23.417689: step: 886/469, loss: 0.08741269260644913 2023-01-22 12:13:24.018250: step: 888/469, loss: 0.03884569928050041 2023-01-22 12:13:24.733200: step: 890/469, loss: 0.19749529659748077 2023-01-22 12:13:25.316744: step: 892/469, loss: 0.09642384946346283 2023-01-22 12:13:25.951212: step: 894/469, loss: 0.027745483443140984 2023-01-22 12:13:26.585383: step: 896/469, loss: 0.24407720565795898 2023-01-22 12:13:27.321313: step: 898/469, loss: 0.10440047830343246 2023-01-22 12:13:27.925332: step: 900/469, loss: 0.09144382923841476 2023-01-22 12:13:28.578803: step: 902/469, loss: 0.09869574755430222 2023-01-22 12:13:29.189036: step: 904/469, loss: 0.015571353957057 2023-01-22 12:13:29.933845: step: 906/469, loss: 0.27356794476509094 2023-01-22 12:13:30.647882: step: 908/469, loss: 0.09513689577579498 2023-01-22 12:13:31.304212: step: 910/469, loss: 0.16539962589740753 2023-01-22 12:13:31.975144: step: 912/469, loss: 0.9347312450408936 2023-01-22 12:13:32.620108: step: 914/469, loss: 0.16957257688045502 2023-01-22 12:13:33.314588: step: 916/469, loss: 0.10880465060472488 2023-01-22 12:13:33.934622: step: 918/469, loss: 0.021691981703042984 2023-01-22 12:13:34.595139: step: 920/469, loss: 0.03262234851717949 2023-01-22 12:13:35.255181: step: 922/469, loss: 0.08738932013511658 2023-01-22 12:13:35.852651: step: 924/469, loss: 0.051625512540340424 2023-01-22 12:13:36.477917: step: 926/469, loss: 0.09986802935600281 2023-01-22 12:13:37.089572: step: 928/469, loss: 0.07723455131053925 2023-01-22 12:13:37.711056: step: 930/469, loss: 0.40475204586982727 2023-01-22 12:13:38.323280: step: 932/469, loss: 0.1682518869638443 2023-01-22 12:13:38.914555: step: 934/469, loss: 4.520831108093262 2023-01-22 12:13:39.587504: step: 936/469, loss: 0.018113207072019577 2023-01-22 12:13:40.301793: step: 938/469, loss: 0.07507234811782837 ================================================== Loss: 0.183 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3012612896061593, 'r': 0.35099512678971884, 'f1': 0.324232132897777}, 'combined': 0.238907887398362, 'epoch': 15} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2899916294642857, 'r': 0.2549376962323391, 'f1': 0.27133719715956556}, 'combined': 0.1480021075415812, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29350714809384165, 'r': 0.34530252716922544, 'f1': 0.3173050249663153}, 'combined': 0.23380370260675862, 'epoch': 15} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2921505467868984, 'r': 0.25335766282709965, 'f1': 0.27137475998743776}, 'combined': 0.14802259635678422, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29687600370060047, 'r': 0.34926588670658876, 'f1': 0.3209470310276762}, 'combined': 0.23648728602039296, 'epoch': 15} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2864113793730835, 'r': 0.25415075697117023, 'f1': 0.2693184149563493}, 'combined': 0.14690095361255415, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22442680776014104, 'r': 0.3462585034013605, 'f1': 0.27233814874264306}, 'combined': 0.1815587658284287, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.22727272727272727, 'r': 0.32608695652173914, 'f1': 0.26785714285714285}, 'combined': 0.13392857142857142, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36666666666666664, 'r': 0.1896551724137931, 'f1': 0.25}, 'combined': 0.16666666666666666, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28932143369249047, 'r': 0.34806411567559575, 'f1': 0.31598585523004125}, 'combined': 0.23283168280108302, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2915539886359941, 'r': 0.2547429635383114, 'f1': 0.27190826088610776}, 'combined': 0.14831359684696785, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3055555555555556, 'r': 0.358695652173913, 'f1': 0.32999999999999996}, 'combined': 0.16499999999999998, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 16 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:16:39.433603: step: 2/469, loss: 0.08759817481040955 2023-01-22 12:16:40.084103: step: 4/469, loss: 0.10862677544355392 2023-01-22 12:16:40.711978: step: 6/469, loss: 0.032449740916490555 2023-01-22 12:16:41.447768: step: 8/469, loss: 0.3330148458480835 2023-01-22 12:16:42.043717: step: 10/469, loss: 0.25631821155548096 2023-01-22 12:16:42.677436: step: 12/469, loss: 0.35224202275276184 2023-01-22 12:16:43.266845: step: 14/469, loss: 0.0253142099827528 2023-01-22 12:16:43.955933: step: 16/469, loss: 0.042935147881507874 2023-01-22 12:16:44.591768: step: 18/469, loss: 0.055103935301303864 2023-01-22 12:16:45.241814: step: 20/469, loss: 0.457619309425354 2023-01-22 12:16:45.959920: step: 22/469, loss: 0.1297680288553238 2023-01-22 12:16:46.537951: step: 24/469, loss: 0.13178087770938873 2023-01-22 12:16:47.141977: step: 26/469, loss: 0.021845370531082153 2023-01-22 12:16:47.804341: step: 28/469, loss: 0.09020737558603287 2023-01-22 12:16:48.441343: step: 30/469, loss: 0.11093532294034958 2023-01-22 12:16:49.069803: step: 32/469, loss: 0.1392078697681427 2023-01-22 12:16:49.681885: step: 34/469, loss: 0.16760289669036865 2023-01-22 12:16:50.281863: step: 36/469, loss: 0.14420682191848755 2023-01-22 12:16:50.939549: step: 38/469, loss: 0.05341527611017227 2023-01-22 12:16:51.562031: step: 40/469, loss: 0.634558379650116 2023-01-22 12:16:52.220563: step: 42/469, loss: 0.09511689841747284 2023-01-22 12:16:52.877208: step: 44/469, loss: 0.05534956604242325 2023-01-22 12:16:53.510993: step: 46/469, loss: 0.05722063034772873 2023-01-22 12:16:54.104403: step: 48/469, loss: 0.067926324903965 2023-01-22 12:16:54.747431: step: 50/469, loss: 0.17107677459716797 2023-01-22 12:16:55.380454: step: 52/469, loss: 0.5963416695594788 2023-01-22 12:16:56.055137: step: 54/469, loss: 0.1161472350358963 2023-01-22 12:16:56.720472: step: 56/469, loss: 0.45169392228126526 2023-01-22 12:16:57.377428: step: 58/469, loss: 0.09179622679948807 2023-01-22 12:16:58.085440: step: 60/469, loss: 0.12391123175621033 2023-01-22 12:16:58.826102: step: 62/469, loss: 0.11153832077980042 2023-01-22 12:16:59.457297: step: 64/469, loss: 0.2734530568122864 2023-01-22 12:17:00.140696: step: 66/469, loss: 0.08586464822292328 2023-01-22 12:17:00.782313: step: 68/469, loss: 0.027291813865303993 2023-01-22 12:17:01.384386: step: 70/469, loss: 0.12208575755357742 2023-01-22 12:17:02.050588: step: 72/469, loss: 0.04789937660098076 2023-01-22 12:17:02.702785: step: 74/469, loss: 0.06785420328378677 2023-01-22 12:17:03.352042: step: 76/469, loss: 0.8196120262145996 2023-01-22 12:17:04.007103: step: 78/469, loss: 0.03966684639453888 2023-01-22 12:17:04.715121: step: 80/469, loss: 0.19919990003108978 2023-01-22 12:17:05.375249: step: 82/469, loss: 1.3733240365982056 2023-01-22 12:17:06.027482: step: 84/469, loss: 0.17070399224758148 2023-01-22 12:17:06.739888: step: 86/469, loss: 0.041026100516319275 2023-01-22 12:17:07.443524: step: 88/469, loss: 0.031869444996118546 2023-01-22 12:17:08.147613: step: 90/469, loss: 0.10911519825458527 2023-01-22 12:17:08.863505: step: 92/469, loss: 0.04875505343079567 2023-01-22 12:17:09.567382: step: 94/469, loss: 0.016226528212428093 2023-01-22 12:17:10.165787: step: 96/469, loss: 0.09847255796194077 2023-01-22 12:17:10.882416: step: 98/469, loss: 0.08935950696468353 2023-01-22 12:17:11.590824: step: 100/469, loss: 0.38560184836387634 2023-01-22 12:17:12.270451: step: 102/469, loss: 0.03775491565465927 2023-01-22 12:17:12.903218: step: 104/469, loss: 0.11571477353572845 2023-01-22 12:17:13.574155: step: 106/469, loss: 0.1439637988805771 2023-01-22 12:17:14.291976: step: 108/469, loss: 0.3509625494480133 2023-01-22 12:17:14.965475: step: 110/469, loss: 0.8873723149299622 2023-01-22 12:17:15.605189: step: 112/469, loss: 0.030410833656787872 2023-01-22 12:17:16.309815: step: 114/469, loss: 0.1051577776670456 2023-01-22 12:17:16.881799: step: 116/469, loss: 0.24868875741958618 2023-01-22 12:17:17.540301: step: 118/469, loss: 0.027866119518876076 2023-01-22 12:17:18.283484: step: 120/469, loss: 0.05933655425906181 2023-01-22 12:17:18.887473: step: 122/469, loss: 0.058191679418087006 2023-01-22 12:17:19.487520: step: 124/469, loss: 0.04560903459787369 2023-01-22 12:17:20.174760: step: 126/469, loss: 0.1586115062236786 2023-01-22 12:17:20.806484: step: 128/469, loss: 0.0660063773393631 2023-01-22 12:17:21.482092: step: 130/469, loss: 0.07444681227207184 2023-01-22 12:17:22.136544: step: 132/469, loss: 0.03317635506391525 2023-01-22 12:17:22.764404: step: 134/469, loss: 0.027760345488786697 2023-01-22 12:17:23.458918: step: 136/469, loss: 0.034100595861673355 2023-01-22 12:17:24.068166: step: 138/469, loss: 0.07568786293268204 2023-01-22 12:17:24.686477: step: 140/469, loss: 0.03036593459546566 2023-01-22 12:17:25.338634: step: 142/469, loss: 0.05914986878633499 2023-01-22 12:17:26.022477: step: 144/469, loss: 0.06908878684043884 2023-01-22 12:17:26.727529: step: 146/469, loss: 0.10080679506063461 2023-01-22 12:17:27.301791: step: 148/469, loss: 0.04087192565202713 2023-01-22 12:17:27.974860: step: 150/469, loss: 0.10990936309099197 2023-01-22 12:17:28.602967: step: 152/469, loss: 0.07776656746864319 2023-01-22 12:17:29.280316: step: 154/469, loss: 0.10193280130624771 2023-01-22 12:17:29.950071: step: 156/469, loss: 0.1123528778553009 2023-01-22 12:17:30.601829: step: 158/469, loss: 0.0585317388176918 2023-01-22 12:17:31.222485: step: 160/469, loss: 0.04772958159446716 2023-01-22 12:17:31.887511: step: 162/469, loss: 0.03404593840241432 2023-01-22 12:17:32.560587: step: 164/469, loss: 0.061102382838726044 2023-01-22 12:17:33.209304: step: 166/469, loss: 0.14992041885852814 2023-01-22 12:17:33.978246: step: 168/469, loss: 0.0859682634472847 2023-01-22 12:17:34.635496: step: 170/469, loss: 0.10439613461494446 2023-01-22 12:17:35.325679: step: 172/469, loss: 0.326616495847702 2023-01-22 12:17:35.969365: step: 174/469, loss: 0.09904137253761292 2023-01-22 12:17:36.603894: step: 176/469, loss: 0.4188278615474701 2023-01-22 12:17:37.259823: step: 178/469, loss: 0.07928576320409775 2023-01-22 12:17:37.949594: step: 180/469, loss: 0.10259288549423218 2023-01-22 12:17:38.594983: step: 182/469, loss: 0.16187797486782074 2023-01-22 12:17:39.211869: step: 184/469, loss: 0.1374415159225464 2023-01-22 12:17:39.930168: step: 186/469, loss: 0.11020496487617493 2023-01-22 12:17:40.546337: step: 188/469, loss: 0.1418713927268982 2023-01-22 12:17:41.210684: step: 190/469, loss: 0.034615661948919296 2023-01-22 12:17:41.788676: step: 192/469, loss: 0.029697706922888756 2023-01-22 12:17:42.422449: step: 194/469, loss: 0.04615360498428345 2023-01-22 12:17:43.123020: step: 196/469, loss: 0.19317340850830078 2023-01-22 12:17:43.819018: step: 198/469, loss: 0.12580899894237518 2023-01-22 12:17:44.399128: step: 200/469, loss: 0.032612282782793045 2023-01-22 12:17:44.979713: step: 202/469, loss: 0.03660755231976509 2023-01-22 12:17:45.583877: step: 204/469, loss: 0.08443994075059891 2023-01-22 12:17:46.228654: step: 206/469, loss: 0.016526829451322556 2023-01-22 12:17:46.817878: step: 208/469, loss: 0.13248567283153534 2023-01-22 12:17:47.505858: step: 210/469, loss: 0.11488926410675049 2023-01-22 12:17:48.132733: step: 212/469, loss: 0.05002579838037491 2023-01-22 12:17:48.799087: step: 214/469, loss: 0.14463454484939575 2023-01-22 12:17:49.448501: step: 216/469, loss: 0.036352042108774185 2023-01-22 12:17:50.104558: step: 218/469, loss: 0.10473927855491638 2023-01-22 12:17:50.727280: step: 220/469, loss: 0.07550850510597229 2023-01-22 12:17:51.402873: step: 222/469, loss: 0.08478794991970062 2023-01-22 12:17:51.982531: step: 224/469, loss: 0.14666549861431122 2023-01-22 12:17:52.703140: step: 226/469, loss: 0.12549607455730438 2023-01-22 12:17:53.347357: step: 228/469, loss: 0.0864592120051384 2023-01-22 12:17:53.997105: step: 230/469, loss: 0.890785276889801 2023-01-22 12:17:54.673614: step: 232/469, loss: 0.02598583698272705 2023-01-22 12:17:55.422579: step: 234/469, loss: 0.3216521143913269 2023-01-22 12:17:56.107003: step: 236/469, loss: 0.11896971613168716 2023-01-22 12:17:56.811596: step: 238/469, loss: 0.13987763226032257 2023-01-22 12:17:57.489246: step: 240/469, loss: 0.14266404509544373 2023-01-22 12:17:58.093146: step: 242/469, loss: 0.039546750485897064 2023-01-22 12:17:58.783137: step: 244/469, loss: 0.059781335294246674 2023-01-22 12:17:59.353396: step: 246/469, loss: 0.03459937497973442 2023-01-22 12:18:00.018360: step: 248/469, loss: 0.07345850765705109 2023-01-22 12:18:00.684505: step: 250/469, loss: 0.2378780096769333 2023-01-22 12:18:01.285016: step: 252/469, loss: 0.048793938010931015 2023-01-22 12:18:01.972580: step: 254/469, loss: 0.04427121952176094 2023-01-22 12:18:02.639847: step: 256/469, loss: 0.400473415851593 2023-01-22 12:18:03.269862: step: 258/469, loss: 0.11825111508369446 2023-01-22 12:18:03.883441: step: 260/469, loss: 0.35052287578582764 2023-01-22 12:18:04.569199: step: 262/469, loss: 0.07198025286197662 2023-01-22 12:18:05.209200: step: 264/469, loss: 0.027040036395192146 2023-01-22 12:18:05.858916: step: 266/469, loss: 0.14885684847831726 2023-01-22 12:18:06.531419: step: 268/469, loss: 0.17571820318698883 2023-01-22 12:18:07.169299: step: 270/469, loss: 0.0809444785118103 2023-01-22 12:18:07.788451: step: 272/469, loss: 0.058086540549993515 2023-01-22 12:18:08.494717: step: 274/469, loss: 0.032617341727018356 2023-01-22 12:18:09.201447: step: 276/469, loss: 0.15086044371128082 2023-01-22 12:18:09.902569: step: 278/469, loss: 0.02176734246313572 2023-01-22 12:18:10.541914: step: 280/469, loss: 0.1225871592760086 2023-01-22 12:18:11.164420: step: 282/469, loss: 0.07715824991464615 2023-01-22 12:18:11.905666: step: 284/469, loss: 0.09599995613098145 2023-01-22 12:18:12.494234: step: 286/469, loss: 0.0792052149772644 2023-01-22 12:18:13.132935: step: 288/469, loss: 0.0774022787809372 2023-01-22 12:18:13.786252: step: 290/469, loss: 0.0499991774559021 2023-01-22 12:18:14.425952: step: 292/469, loss: 0.07230868935585022 2023-01-22 12:18:15.105522: step: 294/469, loss: 0.07355786114931107 2023-01-22 12:18:15.675825: step: 296/469, loss: 0.3096159100532532 2023-01-22 12:18:16.342400: step: 298/469, loss: 0.06396185606718063 2023-01-22 12:18:17.028366: step: 300/469, loss: 0.08230949938297272 2023-01-22 12:18:17.680334: step: 302/469, loss: 0.10528390854597092 2023-01-22 12:18:18.325126: step: 304/469, loss: 0.06791137158870697 2023-01-22 12:18:18.988070: step: 306/469, loss: 0.12583978474140167 2023-01-22 12:18:19.711566: step: 308/469, loss: 0.0679018422961235 2023-01-22 12:18:20.368825: step: 310/469, loss: 0.12969493865966797 2023-01-22 12:18:21.035988: step: 312/469, loss: 0.10740697383880615 2023-01-22 12:18:21.812323: step: 314/469, loss: 0.0835144892334938 2023-01-22 12:18:22.485918: step: 316/469, loss: 0.10374576598405838 2023-01-22 12:18:23.190312: step: 318/469, loss: 0.12236394733190536 2023-01-22 12:18:23.796708: step: 320/469, loss: 0.06672445684671402 2023-01-22 12:18:24.423503: step: 322/469, loss: 0.07186823338270187 2023-01-22 12:18:25.112482: step: 324/469, loss: 0.7088342905044556 2023-01-22 12:18:25.770895: step: 326/469, loss: 0.09231613576412201 2023-01-22 12:18:26.422095: step: 328/469, loss: 0.2536635398864746 2023-01-22 12:18:27.054029: step: 330/469, loss: 0.07841683179140091 2023-01-22 12:18:27.726808: step: 332/469, loss: 0.09141992777585983 2023-01-22 12:18:28.380350: step: 334/469, loss: 0.086724191904068 2023-01-22 12:18:29.050355: step: 336/469, loss: 0.09820393472909927 2023-01-22 12:18:29.774113: step: 338/469, loss: 0.0037140275817364454 2023-01-22 12:18:30.444385: step: 340/469, loss: 0.8518657088279724 2023-01-22 12:18:31.032389: step: 342/469, loss: 0.1746920794248581 2023-01-22 12:18:31.610818: step: 344/469, loss: 0.06727604568004608 2023-01-22 12:18:32.318684: step: 346/469, loss: 0.19728432595729828 2023-01-22 12:18:32.978821: step: 348/469, loss: 0.05319666862487793 2023-01-22 12:18:33.615259: step: 350/469, loss: 2.291508197784424 2023-01-22 12:18:34.238173: step: 352/469, loss: 0.04678662121295929 2023-01-22 12:18:34.890254: step: 354/469, loss: 0.1769683063030243 2023-01-22 12:18:35.506603: step: 356/469, loss: 0.11278193444013596 2023-01-22 12:18:36.182429: step: 358/469, loss: 0.048480868339538574 2023-01-22 12:18:36.873606: step: 360/469, loss: 0.1318223625421524 2023-01-22 12:18:37.509904: step: 362/469, loss: 0.06999413669109344 2023-01-22 12:18:38.172571: step: 364/469, loss: 0.12965962290763855 2023-01-22 12:18:38.819813: step: 366/469, loss: 0.13442222774028778 2023-01-22 12:18:39.478441: step: 368/469, loss: 1.2415266036987305 2023-01-22 12:18:40.084696: step: 370/469, loss: 0.09754236042499542 2023-01-22 12:18:40.785762: step: 372/469, loss: 0.11386800557374954 2023-01-22 12:18:41.415326: step: 374/469, loss: 0.06985150277614594 2023-01-22 12:18:42.083037: step: 376/469, loss: 0.020396117120981216 2023-01-22 12:18:42.755558: step: 378/469, loss: 0.10593730211257935 2023-01-22 12:18:43.425649: step: 380/469, loss: 0.06065959483385086 2023-01-22 12:18:44.094402: step: 382/469, loss: 0.08820229768753052 2023-01-22 12:18:44.699319: step: 384/469, loss: 0.03810331970453262 2023-01-22 12:18:45.424772: step: 386/469, loss: 0.12489712238311768 2023-01-22 12:18:46.064579: step: 388/469, loss: 0.8214848637580872 2023-01-22 12:18:46.730949: step: 390/469, loss: 0.03253836929798126 2023-01-22 12:18:47.371883: step: 392/469, loss: 0.12141628563404083 2023-01-22 12:18:47.988776: step: 394/469, loss: 0.04173850268125534 2023-01-22 12:18:48.670155: step: 396/469, loss: 0.7022337317466736 2023-01-22 12:18:49.363028: step: 398/469, loss: 0.330159068107605 2023-01-22 12:18:50.031659: step: 400/469, loss: 0.1802656650543213 2023-01-22 12:18:50.670739: step: 402/469, loss: 0.09463348984718323 2023-01-22 12:18:51.389538: step: 404/469, loss: 0.16628727316856384 2023-01-22 12:18:52.039718: step: 406/469, loss: 0.08815762400627136 2023-01-22 12:18:52.756853: step: 408/469, loss: 0.5145547389984131 2023-01-22 12:18:53.592021: step: 410/469, loss: 0.18800586462020874 2023-01-22 12:18:54.238854: step: 412/469, loss: 0.15183062851428986 2023-01-22 12:18:54.874928: step: 414/469, loss: 0.05885807424783707 2023-01-22 12:18:55.482128: step: 416/469, loss: 0.07807522267103195 2023-01-22 12:18:56.175811: step: 418/469, loss: 0.10117263346910477 2023-01-22 12:18:56.820003: step: 420/469, loss: 0.11940769851207733 2023-01-22 12:18:57.456616: step: 422/469, loss: 0.0769347995519638 2023-01-22 12:18:58.170028: step: 424/469, loss: 0.0977262333035469 2023-01-22 12:18:58.740781: step: 426/469, loss: 0.049495261162519455 2023-01-22 12:18:59.411638: step: 428/469, loss: 1.361379623413086 2023-01-22 12:19:00.015047: step: 430/469, loss: 0.0919654443860054 2023-01-22 12:19:00.653201: step: 432/469, loss: 0.11428482830524445 2023-01-22 12:19:01.283654: step: 434/469, loss: 0.4065563678741455 2023-01-22 12:19:01.938567: step: 436/469, loss: 0.07156356424093246 2023-01-22 12:19:02.624210: step: 438/469, loss: 0.05355653911828995 2023-01-22 12:19:03.288390: step: 440/469, loss: 0.05762242525815964 2023-01-22 12:19:03.963389: step: 442/469, loss: 0.02817855402827263 2023-01-22 12:19:04.582294: step: 444/469, loss: 0.05692010372877121 2023-01-22 12:19:05.238812: step: 446/469, loss: 0.07883297652006149 2023-01-22 12:19:05.913936: step: 448/469, loss: 0.031249063089489937 2023-01-22 12:19:06.585149: step: 450/469, loss: 0.15502671897411346 2023-01-22 12:19:07.151691: step: 452/469, loss: 0.1220131367444992 2023-01-22 12:19:07.847603: step: 454/469, loss: 0.20052069425582886 2023-01-22 12:19:08.575646: step: 456/469, loss: 0.1365944892168045 2023-01-22 12:19:09.248585: step: 458/469, loss: 0.05433924123644829 2023-01-22 12:19:09.916255: step: 460/469, loss: 0.7227022647857666 2023-01-22 12:19:10.614059: step: 462/469, loss: 0.8336557149887085 2023-01-22 12:19:11.248690: step: 464/469, loss: 0.054121725261211395 2023-01-22 12:19:11.893474: step: 466/469, loss: 0.16045308113098145 2023-01-22 12:19:12.533125: step: 468/469, loss: 0.8711228966712952 2023-01-22 12:19:13.183400: step: 470/469, loss: 0.07353794574737549 2023-01-22 12:19:13.876795: step: 472/469, loss: 0.21562889218330383 2023-01-22 12:19:14.540391: step: 474/469, loss: 0.11944035440683365 2023-01-22 12:19:15.214681: step: 476/469, loss: 0.19969020783901215 2023-01-22 12:19:15.825921: step: 478/469, loss: 0.032023049890995026 2023-01-22 12:19:16.473768: step: 480/469, loss: 0.23766747117042542 2023-01-22 12:19:17.127122: step: 482/469, loss: 0.11172518134117126 2023-01-22 12:19:17.796297: step: 484/469, loss: 0.13469244539737701 2023-01-22 12:19:18.473132: step: 486/469, loss: 0.0978257805109024 2023-01-22 12:19:19.127663: step: 488/469, loss: 0.17340059578418732 2023-01-22 12:19:19.749772: step: 490/469, loss: 0.028775939717888832 2023-01-22 12:19:20.436482: step: 492/469, loss: 0.058825958520174026 2023-01-22 12:19:21.138854: step: 494/469, loss: 0.09280989319086075 2023-01-22 12:19:21.775507: step: 496/469, loss: 0.050323184579610825 2023-01-22 12:19:22.413775: step: 498/469, loss: 0.048042405396699905 2023-01-22 12:19:23.044926: step: 500/469, loss: 0.13777221739292145 2023-01-22 12:19:23.768601: step: 502/469, loss: 0.23255488276481628 2023-01-22 12:19:24.412371: step: 504/469, loss: 0.12181255221366882 2023-01-22 12:19:25.052029: step: 506/469, loss: 0.10417280346155167 2023-01-22 12:19:25.719044: step: 508/469, loss: 0.19356317818164825 2023-01-22 12:19:26.415288: step: 510/469, loss: 0.17776364088058472 2023-01-22 12:19:27.064504: step: 512/469, loss: 0.11597738415002823 2023-01-22 12:19:27.770514: step: 514/469, loss: 0.1397777944803238 2023-01-22 12:19:28.399223: step: 516/469, loss: 0.1986369490623474 2023-01-22 12:19:29.092348: step: 518/469, loss: 0.06437753140926361 2023-01-22 12:19:29.837413: step: 520/469, loss: 0.06670169532299042 2023-01-22 12:19:30.572898: step: 522/469, loss: 0.045091353356838226 2023-01-22 12:19:31.221498: step: 524/469, loss: 0.03746681287884712 2023-01-22 12:19:31.851917: step: 526/469, loss: 0.09841018915176392 2023-01-22 12:19:32.612007: step: 528/469, loss: 0.08979086577892303 2023-01-22 12:19:33.304599: step: 530/469, loss: 0.14209061861038208 2023-01-22 12:19:33.916942: step: 532/469, loss: 0.1265891194343567 2023-01-22 12:19:34.581646: step: 534/469, loss: 0.3015148937702179 2023-01-22 12:19:35.239415: step: 536/469, loss: 0.05998941510915756 2023-01-22 12:19:35.817819: step: 538/469, loss: 0.04750685766339302 2023-01-22 12:19:36.500313: step: 540/469, loss: 0.07647495716810226 2023-01-22 12:19:37.211371: step: 542/469, loss: 0.01271448377519846 2023-01-22 12:19:37.893164: step: 544/469, loss: 0.3825611472129822 2023-01-22 12:19:38.540654: step: 546/469, loss: 0.1054229736328125 2023-01-22 12:19:39.209326: step: 548/469, loss: 0.06574345380067825 2023-01-22 12:19:39.848683: step: 550/469, loss: 0.07906662672758102 2023-01-22 12:19:40.449829: step: 552/469, loss: 0.07441949099302292 2023-01-22 12:19:41.093414: step: 554/469, loss: 0.038098521530628204 2023-01-22 12:19:41.754592: step: 556/469, loss: 0.5081790685653687 2023-01-22 12:19:42.416273: step: 558/469, loss: 0.2299017757177353 2023-01-22 12:19:43.108095: step: 560/469, loss: 0.02400112897157669 2023-01-22 12:19:43.746480: step: 562/469, loss: 0.05662433058023453 2023-01-22 12:19:44.364750: step: 564/469, loss: 0.056243449449539185 2023-01-22 12:19:45.035940: step: 566/469, loss: 0.167400062084198 2023-01-22 12:19:45.684133: step: 568/469, loss: 0.0932585820555687 2023-01-22 12:19:46.357045: step: 570/469, loss: 0.6299592852592468 2023-01-22 12:19:47.015464: step: 572/469, loss: 0.030439546331763268 2023-01-22 12:19:47.680953: step: 574/469, loss: 0.4335290789604187 2023-01-22 12:19:48.389748: step: 576/469, loss: 0.13615505397319794 2023-01-22 12:19:49.117312: step: 578/469, loss: 0.12170863151550293 2023-01-22 12:19:49.764554: step: 580/469, loss: 0.17706061899662018 2023-01-22 12:19:50.411291: step: 582/469, loss: 0.07675725966691971 2023-01-22 12:19:51.068547: step: 584/469, loss: 0.11557069420814514 2023-01-22 12:19:51.748367: step: 586/469, loss: 0.13605904579162598 2023-01-22 12:19:52.370412: step: 588/469, loss: 0.6559314131736755 2023-01-22 12:19:53.000045: step: 590/469, loss: 0.08097304403781891 2023-01-22 12:19:53.712364: step: 592/469, loss: 0.029244577512145042 2023-01-22 12:19:54.330083: step: 594/469, loss: 0.06050251051783562 2023-01-22 12:19:55.020461: step: 596/469, loss: 0.11156366020441055 2023-01-22 12:19:55.641452: step: 598/469, loss: 0.07792611420154572 2023-01-22 12:19:56.371444: step: 600/469, loss: 0.14440956711769104 2023-01-22 12:19:57.022295: step: 602/469, loss: 0.07254817336797714 2023-01-22 12:19:57.694187: step: 604/469, loss: 0.0520678274333477 2023-01-22 12:19:58.410775: step: 606/469, loss: 0.0892920047044754 2023-01-22 12:19:59.090224: step: 608/469, loss: 0.03410578519105911 2023-01-22 12:19:59.750788: step: 610/469, loss: 0.10717874020338058 2023-01-22 12:20:00.384802: step: 612/469, loss: 0.02829250507056713 2023-01-22 12:20:01.092990: step: 614/469, loss: 0.2779192328453064 2023-01-22 12:20:01.717635: step: 616/469, loss: 0.6068034172058105 2023-01-22 12:20:02.409605: step: 618/469, loss: 0.009849566966295242 2023-01-22 12:20:03.078142: step: 620/469, loss: 0.40928858518600464 2023-01-22 12:20:03.759883: step: 622/469, loss: 0.11569209396839142 2023-01-22 12:20:04.440892: step: 624/469, loss: 0.6247853636741638 2023-01-22 12:20:05.119557: step: 626/469, loss: 0.25124236941337585 2023-01-22 12:20:05.702786: step: 628/469, loss: 0.07776015251874924 2023-01-22 12:20:06.349988: step: 630/469, loss: 0.10470916330814362 2023-01-22 12:20:07.027050: step: 632/469, loss: 0.11792066693305969 2023-01-22 12:20:07.657154: step: 634/469, loss: 0.05234258621931076 2023-01-22 12:20:08.331424: step: 636/469, loss: 0.060398586094379425 2023-01-22 12:20:08.920302: step: 638/469, loss: 0.08910928666591644 2023-01-22 12:20:09.604182: step: 640/469, loss: 0.2223726212978363 2023-01-22 12:20:10.216612: step: 642/469, loss: 0.05613243207335472 2023-01-22 12:20:10.820562: step: 644/469, loss: 0.11752267926931381 2023-01-22 12:20:11.497687: step: 646/469, loss: 0.13896889984607697 2023-01-22 12:20:12.160812: step: 648/469, loss: 0.06304564327001572 2023-01-22 12:20:12.878924: step: 650/469, loss: 0.1676894724369049 2023-01-22 12:20:13.537071: step: 652/469, loss: 0.09562228620052338 2023-01-22 12:20:14.167227: step: 654/469, loss: 0.1165279820561409 2023-01-22 12:20:14.759603: step: 656/469, loss: 0.037642642855644226 2023-01-22 12:20:15.486414: step: 658/469, loss: 0.2296856939792633 2023-01-22 12:20:16.071304: step: 660/469, loss: 0.11191833019256592 2023-01-22 12:20:16.757459: step: 662/469, loss: 0.06068938970565796 2023-01-22 12:20:17.398070: step: 664/469, loss: 0.06496302038431168 2023-01-22 12:20:18.085567: step: 666/469, loss: 0.10366479307413101 2023-01-22 12:20:18.746420: step: 668/469, loss: 0.04444766789674759 2023-01-22 12:20:19.362745: step: 670/469, loss: 0.07094904780387878 2023-01-22 12:20:20.078148: step: 672/469, loss: 0.05575162172317505 2023-01-22 12:20:20.745089: step: 674/469, loss: 0.08301182836294174 2023-01-22 12:20:21.404036: step: 676/469, loss: 0.11818315833806992 2023-01-22 12:20:22.095475: step: 678/469, loss: 0.10328830033540726 2023-01-22 12:20:22.747735: step: 680/469, loss: 0.056759145110845566 2023-01-22 12:20:23.446435: step: 682/469, loss: 0.051041826605796814 2023-01-22 12:20:24.117369: step: 684/469, loss: 0.04933523014187813 2023-01-22 12:20:24.790191: step: 686/469, loss: 0.0768188014626503 2023-01-22 12:20:25.405231: step: 688/469, loss: 0.04857207462191582 2023-01-22 12:20:26.031476: step: 690/469, loss: 0.3262181282043457 2023-01-22 12:20:26.611305: step: 692/469, loss: 0.05440818890929222 2023-01-22 12:20:27.238810: step: 694/469, loss: 0.08826557546854019 2023-01-22 12:20:27.903802: step: 696/469, loss: 0.05439550429582596 2023-01-22 12:20:28.576105: step: 698/469, loss: 0.12256674468517303 2023-01-22 12:20:29.207616: step: 700/469, loss: 0.011911711655557156 2023-01-22 12:20:29.835894: step: 702/469, loss: 0.0815141573548317 2023-01-22 12:20:30.471613: step: 704/469, loss: 0.026219012215733528 2023-01-22 12:20:31.146076: step: 706/469, loss: 0.034951936453580856 2023-01-22 12:20:31.779384: step: 708/469, loss: 0.07204972952604294 2023-01-22 12:20:32.471542: step: 710/469, loss: 0.2694944441318512 2023-01-22 12:20:33.127666: step: 712/469, loss: 0.0628601685166359 2023-01-22 12:20:33.839116: step: 714/469, loss: 0.1304217427968979 2023-01-22 12:20:34.446046: step: 716/469, loss: 0.3279553949832916 2023-01-22 12:20:35.222044: step: 718/469, loss: 0.10063020139932632 2023-01-22 12:20:35.902929: step: 720/469, loss: 0.03350721672177315 2023-01-22 12:20:36.544587: step: 722/469, loss: 1.0422238111495972 2023-01-22 12:20:37.189161: step: 724/469, loss: 0.09029338508844376 2023-01-22 12:20:37.788409: step: 726/469, loss: 0.039478544145822525 2023-01-22 12:20:38.511992: step: 728/469, loss: 0.05626051500439644 2023-01-22 12:20:39.111566: step: 730/469, loss: 0.01795123890042305 2023-01-22 12:20:39.767977: step: 732/469, loss: 0.274331659078598 2023-01-22 12:20:40.319850: step: 734/469, loss: 0.032300472259521484 2023-01-22 12:20:40.928997: step: 736/469, loss: 0.5453379154205322 2023-01-22 12:20:41.511750: step: 738/469, loss: 0.023605981841683388 2023-01-22 12:20:42.182899: step: 740/469, loss: 0.04688532277941704 2023-01-22 12:20:42.799691: step: 742/469, loss: 0.1466190367937088 2023-01-22 12:20:43.628132: step: 744/469, loss: 0.07332953810691833 2023-01-22 12:20:44.279799: step: 746/469, loss: 0.08779364079236984 2023-01-22 12:20:44.949543: step: 748/469, loss: 0.04206997901201248 2023-01-22 12:20:45.710574: step: 750/469, loss: 0.1415180265903473 2023-01-22 12:20:46.438253: step: 752/469, loss: 1.082939624786377 2023-01-22 12:20:47.062462: step: 754/469, loss: 0.11613138020038605 2023-01-22 12:20:47.657435: step: 756/469, loss: 0.06066060811281204 2023-01-22 12:20:48.323276: step: 758/469, loss: 1.5213011503219604 2023-01-22 12:20:48.952157: step: 760/469, loss: 0.059715114533901215 2023-01-22 12:20:49.584091: step: 762/469, loss: 0.07200370728969574 2023-01-22 12:20:50.186319: step: 764/469, loss: 0.04771013930439949 2023-01-22 12:20:50.871764: step: 766/469, loss: 0.03102451004087925 2023-01-22 12:20:51.424287: step: 768/469, loss: 0.6230573654174805 2023-01-22 12:20:52.108612: step: 770/469, loss: 0.0779312327504158 2023-01-22 12:20:52.745496: step: 772/469, loss: 0.08453252166509628 2023-01-22 12:20:53.411264: step: 774/469, loss: 0.12380822002887726 2023-01-22 12:20:54.014580: step: 776/469, loss: 0.06706444174051285 2023-01-22 12:20:54.657402: step: 778/469, loss: 0.05670267343521118 2023-01-22 12:20:55.293532: step: 780/469, loss: 0.0678911879658699 2023-01-22 12:20:55.952607: step: 782/469, loss: 0.0839771181344986 2023-01-22 12:20:56.620406: step: 784/469, loss: 0.06591266393661499 2023-01-22 12:20:57.240406: step: 786/469, loss: 0.06710683554410934 2023-01-22 12:20:57.889544: step: 788/469, loss: 0.11415842920541763 2023-01-22 12:20:58.554343: step: 790/469, loss: 0.12424782663583755 2023-01-22 12:20:59.311431: step: 792/469, loss: 0.22576430439949036 2023-01-22 12:20:59.960918: step: 794/469, loss: 0.05598742142319679 2023-01-22 12:21:00.694828: step: 796/469, loss: 0.12410330027341843 2023-01-22 12:21:01.351072: step: 798/469, loss: 0.1357264667749405 2023-01-22 12:21:02.025704: step: 800/469, loss: 0.4761008620262146 2023-01-22 12:21:02.653784: step: 802/469, loss: 0.10413392633199692 2023-01-22 12:21:03.344949: step: 804/469, loss: 0.07092542946338654 2023-01-22 12:21:04.006956: step: 806/469, loss: 0.11728647351264954 2023-01-22 12:21:04.616822: step: 808/469, loss: 0.10438113659620285 2023-01-22 12:21:05.325999: step: 810/469, loss: 0.10724121332168579 2023-01-22 12:21:05.967803: step: 812/469, loss: 0.023226801306009293 2023-01-22 12:21:06.668820: step: 814/469, loss: 0.08560799062252045 2023-01-22 12:21:07.368798: step: 816/469, loss: 0.9491845369338989 2023-01-22 12:21:07.973225: step: 818/469, loss: 0.023172764107584953 2023-01-22 12:21:08.588696: step: 820/469, loss: 1.796709656715393 2023-01-22 12:21:09.286877: step: 822/469, loss: 0.0500091128051281 2023-01-22 12:21:09.922112: step: 824/469, loss: 0.6142128705978394 2023-01-22 12:21:10.528107: step: 826/469, loss: 0.013409084640443325 2023-01-22 12:21:11.224333: step: 828/469, loss: 0.22463026642799377 2023-01-22 12:21:11.898180: step: 830/469, loss: 0.17779836058616638 2023-01-22 12:21:12.531503: step: 832/469, loss: 0.07775621861219406 2023-01-22 12:21:13.183785: step: 834/469, loss: 0.13510532677173615 2023-01-22 12:21:13.891798: step: 836/469, loss: 0.5249348282814026 2023-01-22 12:21:14.522972: step: 838/469, loss: 0.10932567715644836 2023-01-22 12:21:15.121057: step: 840/469, loss: 0.08818814903497696 2023-01-22 12:21:15.870553: step: 842/469, loss: 0.10375554859638214 2023-01-22 12:21:16.527252: step: 844/469, loss: 0.4173268973827362 2023-01-22 12:21:17.177990: step: 846/469, loss: 0.06288052350282669 2023-01-22 12:21:17.760702: step: 848/469, loss: 0.09162698686122894 2023-01-22 12:21:18.421910: step: 850/469, loss: 0.30981189012527466 2023-01-22 12:21:19.043530: step: 852/469, loss: 0.047051552683115005 2023-01-22 12:21:19.664137: step: 854/469, loss: 0.31900516152381897 2023-01-22 12:21:20.337042: step: 856/469, loss: 0.13337601721286774 2023-01-22 12:21:21.001443: step: 858/469, loss: 0.20193684101104736 2023-01-22 12:21:21.722747: step: 860/469, loss: 0.07986877113580704 2023-01-22 12:21:22.330149: step: 862/469, loss: 0.10771413892507553 2023-01-22 12:21:22.976362: step: 864/469, loss: 0.07494720816612244 2023-01-22 12:21:23.658555: step: 866/469, loss: 0.15828967094421387 2023-01-22 12:21:24.426006: step: 868/469, loss: 0.05749820917844772 2023-01-22 12:21:25.106342: step: 870/469, loss: 0.169984832406044 2023-01-22 12:21:25.719685: step: 872/469, loss: 0.0744345560669899 2023-01-22 12:21:26.423854: step: 874/469, loss: 0.21342425048351288 2023-01-22 12:21:27.091062: step: 876/469, loss: 0.1242813766002655 2023-01-22 12:21:27.820066: step: 878/469, loss: 0.02060120925307274 2023-01-22 12:21:28.434520: step: 880/469, loss: 0.0629616379737854 2023-01-22 12:21:29.098472: step: 882/469, loss: 0.10362087935209274 2023-01-22 12:21:29.796001: step: 884/469, loss: 0.1605091094970703 2023-01-22 12:21:30.541956: step: 886/469, loss: 0.31162071228027344 2023-01-22 12:21:31.179175: step: 888/469, loss: 0.3976716995239258 2023-01-22 12:21:31.806017: step: 890/469, loss: 0.11037041246891022 2023-01-22 12:21:32.491102: step: 892/469, loss: 0.10884872823953629 2023-01-22 12:21:33.193071: step: 894/469, loss: 0.08175639808177948 2023-01-22 12:21:33.869083: step: 896/469, loss: 0.20332136750221252 2023-01-22 12:21:34.511050: step: 898/469, loss: 0.17498016357421875 2023-01-22 12:21:35.158906: step: 900/469, loss: 0.062188196927309036 2023-01-22 12:21:35.760502: step: 902/469, loss: 0.03288247808814049 2023-01-22 12:21:36.375361: step: 904/469, loss: 1.3207803964614868 2023-01-22 12:21:37.047597: step: 906/469, loss: 0.2592983841896057 2023-01-22 12:21:37.710867: step: 908/469, loss: 0.6696668863296509 2023-01-22 12:21:38.332932: step: 910/469, loss: 0.09069929271936417 2023-01-22 12:21:38.993002: step: 912/469, loss: 0.09416594356298447 2023-01-22 12:21:39.602856: step: 914/469, loss: 0.29435089230537415 2023-01-22 12:21:40.345325: step: 916/469, loss: 0.09426331520080566 2023-01-22 12:21:41.083508: step: 918/469, loss: 0.13866384327411652 2023-01-22 12:21:41.736962: step: 920/469, loss: 0.06049207225441933 2023-01-22 12:21:42.425367: step: 922/469, loss: 0.10886509716510773 2023-01-22 12:21:43.102807: step: 924/469, loss: 0.22822241485118866 2023-01-22 12:21:43.710104: step: 926/469, loss: 0.12083389610052109 2023-01-22 12:21:44.397769: step: 928/469, loss: 0.09527838230133057 2023-01-22 12:21:45.029866: step: 930/469, loss: 0.051837168633937836 2023-01-22 12:21:45.628233: step: 932/469, loss: 0.1378217339515686 2023-01-22 12:21:46.288201: step: 934/469, loss: 0.03517038747668266 2023-01-22 12:21:46.937010: step: 936/469, loss: 0.052448615431785583 2023-01-22 12:21:47.565141: step: 938/469, loss: 0.3225906491279602 ================================================== Loss: 0.168 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3358199257425743, 'r': 0.3218008776091082, 'f1': 0.3286609738372094}, 'combined': 0.24217124388004901, 'epoch': 16} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30352531040506087, 'r': 0.24692644082312995, 'f1': 0.2723160546980889}, 'combined': 0.14853602983532121, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3267597087378641, 'r': 0.31931925996204935, 'f1': 0.32299664107485604}, 'combined': 0.2379975250025255, 'epoch': 16} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.31276734161256137, 'r': 0.25101437471282345, 'f1': 0.27850884983349783}, 'combined': 0.1519139180909988, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3281469929622521, 'r': 0.32441097406704617, 'f1': 0.3262682888040712}, 'combined': 0.24040821280299984, 'epoch': 16} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.302479642017244, 'r': 0.24524629110538884, 'f1': 0.2708727334369464}, 'combined': 0.14774876369287984, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2936507936507936, 'r': 0.35238095238095235, 'f1': 0.3203463203463203}, 'combined': 0.21356421356421354, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.234375, 'r': 0.32608695652173914, 'f1': 0.2727272727272727}, 'combined': 0.13636363636363635, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40625, 'r': 0.22413793103448276, 'f1': 0.28888888888888886}, 'combined': 0.19259259259259257, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28932143369249047, 'r': 0.34806411567559575, 'f1': 0.31598585523004125}, 'combined': 0.23283168280108302, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2915539886359941, 'r': 0.2547429635383114, 'f1': 0.27190826088610776}, 'combined': 0.14831359684696785, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3055555555555556, 'r': 0.358695652173913, 'f1': 0.32999999999999996}, 'combined': 0.16499999999999998, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 17 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:24:44.668617: step: 2/469, loss: 0.08634264767169952 2023-01-22 12:24:45.349453: step: 4/469, loss: 0.09594616293907166 2023-01-22 12:24:46.048542: step: 6/469, loss: 0.0449104905128479 2023-01-22 12:24:46.663813: step: 8/469, loss: 0.002346464665606618 2023-01-22 12:24:47.377733: step: 10/469, loss: 0.04650544375181198 2023-01-22 12:24:48.053653: step: 12/469, loss: 0.05193765461444855 2023-01-22 12:24:48.743291: step: 14/469, loss: 0.08693695813417435 2023-01-22 12:24:49.400390: step: 16/469, loss: 0.9412013292312622 2023-01-22 12:24:50.077250: step: 18/469, loss: 0.05715508013963699 2023-01-22 12:24:50.693498: step: 20/469, loss: 0.17629854381084442 2023-01-22 12:24:51.395677: step: 22/469, loss: 0.08444023877382278 2023-01-22 12:24:52.052409: step: 24/469, loss: 0.07957160472869873 2023-01-22 12:24:52.830742: step: 26/469, loss: 0.04437550529837608 2023-01-22 12:24:53.469407: step: 28/469, loss: 0.05539350584149361 2023-01-22 12:24:54.132779: step: 30/469, loss: 0.08054979890584946 2023-01-22 12:24:54.792469: step: 32/469, loss: 0.4293166697025299 2023-01-22 12:24:55.434506: step: 34/469, loss: 0.2442564070224762 2023-01-22 12:24:56.059053: step: 36/469, loss: 0.1444416642189026 2023-01-22 12:24:56.757223: step: 38/469, loss: 0.09923011064529419 2023-01-22 12:24:57.449332: step: 40/469, loss: 0.1501404345035553 2023-01-22 12:24:58.080539: step: 42/469, loss: 0.022317150607705116 2023-01-22 12:24:58.709013: step: 44/469, loss: 0.18897463381290436 2023-01-22 12:24:59.388612: step: 46/469, loss: 0.1063184142112732 2023-01-22 12:24:59.991579: step: 48/469, loss: 0.012038880959153175 2023-01-22 12:25:00.738230: step: 50/469, loss: 0.13240452110767365 2023-01-22 12:25:01.393798: step: 52/469, loss: 0.023567305877804756 2023-01-22 12:25:02.064311: step: 54/469, loss: 0.05520399287343025 2023-01-22 12:25:02.716146: step: 56/469, loss: 0.05590912327170372 2023-01-22 12:25:03.370028: step: 58/469, loss: 0.05357128754258156 2023-01-22 12:25:04.050730: step: 60/469, loss: 0.06377561390399933 2023-01-22 12:25:04.673103: step: 62/469, loss: 0.07197009772062302 2023-01-22 12:25:05.352883: step: 64/469, loss: 0.08526509255170822 2023-01-22 12:25:05.962647: step: 66/469, loss: 0.16183842718601227 2023-01-22 12:25:06.652570: step: 68/469, loss: 0.03890698775649071 2023-01-22 12:25:07.298066: step: 70/469, loss: 0.08364640921354294 2023-01-22 12:25:07.987865: step: 72/469, loss: 0.06541049480438232 2023-01-22 12:25:08.648181: step: 74/469, loss: 0.1643519103527069 2023-01-22 12:25:09.303014: step: 76/469, loss: 0.04508458450436592 2023-01-22 12:25:09.934252: step: 78/469, loss: 0.027948787435889244 2023-01-22 12:25:10.550633: step: 80/469, loss: 0.12359713017940521 2023-01-22 12:25:11.181874: step: 82/469, loss: 0.09651672095060349 2023-01-22 12:25:11.805388: step: 84/469, loss: 0.47778555750846863 2023-01-22 12:25:12.460039: step: 86/469, loss: 0.35092127323150635 2023-01-22 12:25:13.070810: step: 88/469, loss: 0.05257191136479378 2023-01-22 12:25:13.749639: step: 90/469, loss: 0.03308539092540741 2023-01-22 12:25:14.487081: step: 92/469, loss: 0.22135744988918304 2023-01-22 12:25:15.094706: step: 94/469, loss: 0.08995926380157471 2023-01-22 12:25:15.723402: step: 96/469, loss: 1.2354682683944702 2023-01-22 12:25:16.344107: step: 98/469, loss: 0.024919720366597176 2023-01-22 12:25:16.981104: step: 100/469, loss: 0.06241423636674881 2023-01-22 12:25:17.668128: step: 102/469, loss: 0.07183968275785446 2023-01-22 12:25:18.266617: step: 104/469, loss: 0.1211848109960556 2023-01-22 12:25:18.899457: step: 106/469, loss: 0.11559804528951645 2023-01-22 12:25:19.603395: step: 108/469, loss: 0.12789353728294373 2023-01-22 12:25:20.278242: step: 110/469, loss: 0.08593995869159698 2023-01-22 12:25:20.930716: step: 112/469, loss: 0.13324569165706635 2023-01-22 12:25:21.573986: step: 114/469, loss: 0.15789788961410522 2023-01-22 12:25:22.180137: step: 116/469, loss: 0.04587888345122337 2023-01-22 12:25:22.827147: step: 118/469, loss: 0.23107899725437164 2023-01-22 12:25:23.471681: step: 120/469, loss: 0.20080715417861938 2023-01-22 12:25:24.159991: step: 122/469, loss: 0.3319023847579956 2023-01-22 12:25:24.850271: step: 124/469, loss: 0.08711359649896622 2023-01-22 12:25:25.449460: step: 126/469, loss: 0.07506758719682693 2023-01-22 12:25:26.053071: step: 128/469, loss: 0.062093090265989304 2023-01-22 12:25:26.717035: step: 130/469, loss: 0.06379514187574387 2023-01-22 12:25:27.348845: step: 132/469, loss: 0.02370130456984043 2023-01-22 12:25:27.958200: step: 134/469, loss: 0.0908806174993515 2023-01-22 12:25:28.641107: step: 136/469, loss: 0.1807468831539154 2023-01-22 12:25:29.355240: step: 138/469, loss: 0.8564741015434265 2023-01-22 12:25:30.008895: step: 140/469, loss: 0.15068039298057556 2023-01-22 12:25:30.679392: step: 142/469, loss: 0.10965810716152191 2023-01-22 12:25:31.330519: step: 144/469, loss: 0.05497078225016594 2023-01-22 12:25:32.047661: step: 146/469, loss: 0.055678028613328934 2023-01-22 12:25:32.714231: step: 148/469, loss: 1.1616432666778564 2023-01-22 12:25:33.315277: step: 150/469, loss: 0.16371865570545197 2023-01-22 12:25:33.971167: step: 152/469, loss: 0.03400680050253868 2023-01-22 12:25:34.730160: step: 154/469, loss: 0.06986163556575775 2023-01-22 12:25:35.448425: step: 156/469, loss: 0.04563900828361511 2023-01-22 12:25:36.125197: step: 158/469, loss: 0.0924905389547348 2023-01-22 12:25:36.808734: step: 160/469, loss: 0.1355503350496292 2023-01-22 12:25:37.390707: step: 162/469, loss: 0.03501926735043526 2023-01-22 12:25:38.087123: step: 164/469, loss: 0.05254092067480087 2023-01-22 12:25:38.752891: step: 166/469, loss: 0.10394218564033508 2023-01-22 12:25:39.419511: step: 168/469, loss: 0.1005459800362587 2023-01-22 12:25:40.065922: step: 170/469, loss: 0.23538199067115784 2023-01-22 12:25:40.705343: step: 172/469, loss: 0.09074113517999649 2023-01-22 12:25:41.357966: step: 174/469, loss: 0.04362407326698303 2023-01-22 12:25:42.011952: step: 176/469, loss: 0.061876360327005386 2023-01-22 12:25:42.721965: step: 178/469, loss: 0.05744713544845581 2023-01-22 12:25:43.400852: step: 180/469, loss: 0.041415516287088394 2023-01-22 12:25:44.064544: step: 182/469, loss: 0.06296899169683456 2023-01-22 12:25:44.701672: step: 184/469, loss: 0.017496343702077866 2023-01-22 12:25:45.363161: step: 186/469, loss: 0.05537179112434387 2023-01-22 12:25:45.979292: step: 188/469, loss: 0.10990407317876816 2023-01-22 12:25:46.601208: step: 190/469, loss: 0.05061742663383484 2023-01-22 12:25:47.232449: step: 192/469, loss: 0.15230831503868103 2023-01-22 12:25:47.914213: step: 194/469, loss: 0.17597559094429016 2023-01-22 12:25:48.552937: step: 196/469, loss: 0.2093985229730606 2023-01-22 12:25:49.249685: step: 198/469, loss: 0.35954588651657104 2023-01-22 12:25:49.857355: step: 200/469, loss: 0.015171553939580917 2023-01-22 12:25:50.526414: step: 202/469, loss: 0.29540735483169556 2023-01-22 12:25:51.160554: step: 204/469, loss: 0.2296929508447647 2023-01-22 12:25:51.830724: step: 206/469, loss: 0.2057746797800064 2023-01-22 12:25:52.457649: step: 208/469, loss: 0.06798412650823593 2023-01-22 12:25:53.077716: step: 210/469, loss: 0.04519176483154297 2023-01-22 12:25:53.723826: step: 212/469, loss: 0.06705667823553085 2023-01-22 12:25:54.368143: step: 214/469, loss: 0.13999922573566437 2023-01-22 12:25:55.022335: step: 216/469, loss: 0.03609000891447067 2023-01-22 12:25:55.641944: step: 218/469, loss: 0.8149121403694153 2023-01-22 12:25:56.402866: step: 220/469, loss: 0.047628432512283325 2023-01-22 12:25:57.053736: step: 222/469, loss: 0.10824434459209442 2023-01-22 12:25:57.639738: step: 224/469, loss: 0.04613751173019409 2023-01-22 12:25:58.231544: step: 226/469, loss: 0.045456916093826294 2023-01-22 12:25:58.819752: step: 228/469, loss: 0.09128410369157791 2023-01-22 12:25:59.557468: step: 230/469, loss: 0.08247118443250656 2023-01-22 12:26:00.215477: step: 232/469, loss: 0.041743699461221695 2023-01-22 12:26:00.876643: step: 234/469, loss: 0.12287967652082443 2023-01-22 12:26:01.549203: step: 236/469, loss: 0.0729488879442215 2023-01-22 12:26:02.182067: step: 238/469, loss: 0.08309607952833176 2023-01-22 12:26:02.804431: step: 240/469, loss: 0.26476842164993286 2023-01-22 12:26:03.496401: step: 242/469, loss: 0.29417094588279724 2023-01-22 12:26:04.134683: step: 244/469, loss: 0.054838959127664566 2023-01-22 12:26:04.823415: step: 246/469, loss: 0.14131005108356476 2023-01-22 12:26:05.462044: step: 248/469, loss: 0.2602645754814148 2023-01-22 12:26:06.146337: step: 250/469, loss: 0.17374669015407562 2023-01-22 12:26:06.673189: step: 252/469, loss: 0.10324373841285706 2023-01-22 12:26:07.422452: step: 254/469, loss: 0.025312131270766258 2023-01-22 12:26:08.047068: step: 256/469, loss: 0.03437881916761398 2023-01-22 12:26:08.707047: step: 258/469, loss: 0.0893375501036644 2023-01-22 12:26:09.332628: step: 260/469, loss: 0.4117080867290497 2023-01-22 12:26:10.008625: step: 262/469, loss: 0.20586228370666504 2023-01-22 12:26:10.651894: step: 264/469, loss: 0.031112268567085266 2023-01-22 12:26:11.315656: step: 266/469, loss: 0.1244141235947609 2023-01-22 12:26:12.008023: step: 268/469, loss: 7.396092414855957 2023-01-22 12:26:12.852349: step: 270/469, loss: 0.007993660867214203 2023-01-22 12:26:13.584197: step: 272/469, loss: 0.29955166578292847 2023-01-22 12:26:14.218549: step: 274/469, loss: 0.5268498659133911 2023-01-22 12:26:14.869244: step: 276/469, loss: 0.06592971831560135 2023-01-22 12:26:15.464710: step: 278/469, loss: 0.12488150596618652 2023-01-22 12:26:16.177917: step: 280/469, loss: 0.06631255149841309 2023-01-22 12:26:16.892177: step: 282/469, loss: 0.13297708332538605 2023-01-22 12:26:17.618340: step: 284/469, loss: 0.1295710951089859 2023-01-22 12:26:18.313010: step: 286/469, loss: 0.06153430789709091 2023-01-22 12:26:18.924601: step: 288/469, loss: 0.14072281122207642 2023-01-22 12:26:19.559038: step: 290/469, loss: 0.23823054134845734 2023-01-22 12:26:20.261275: step: 292/469, loss: 0.2127118706703186 2023-01-22 12:26:20.963121: step: 294/469, loss: 0.3261911869049072 2023-01-22 12:26:21.648321: step: 296/469, loss: 0.2718924283981323 2023-01-22 12:26:22.398832: step: 298/469, loss: 0.08088023215532303 2023-01-22 12:26:23.067370: step: 300/469, loss: 0.10450479388237 2023-01-22 12:26:23.698829: step: 302/469, loss: 0.04016311839222908 2023-01-22 12:26:24.382158: step: 304/469, loss: 0.2258114218711853 2023-01-22 12:26:25.076688: step: 306/469, loss: 0.1637382060289383 2023-01-22 12:26:25.740092: step: 308/469, loss: 0.07216216623783112 2023-01-22 12:26:26.450022: step: 310/469, loss: 0.07098806649446487 2023-01-22 12:26:27.111552: step: 312/469, loss: 0.009943253360688686 2023-01-22 12:26:27.780253: step: 314/469, loss: 0.03714841231703758 2023-01-22 12:26:28.385776: step: 316/469, loss: 0.3185752034187317 2023-01-22 12:26:29.139920: step: 318/469, loss: 0.11142365634441376 2023-01-22 12:26:29.817854: step: 320/469, loss: 0.1205689013004303 2023-01-22 12:26:30.587345: step: 322/469, loss: 0.04325685650110245 2023-01-22 12:26:31.294544: step: 324/469, loss: 0.09570005536079407 2023-01-22 12:26:31.958454: step: 326/469, loss: 0.5498594045639038 2023-01-22 12:26:32.569651: step: 328/469, loss: 0.07359788566827774 2023-01-22 12:26:33.228665: step: 330/469, loss: 0.049295492470264435 2023-01-22 12:26:33.959944: step: 332/469, loss: 0.04479082301259041 2023-01-22 12:26:34.581072: step: 334/469, loss: 0.11111129075288773 2023-01-22 12:26:35.249128: step: 336/469, loss: 0.16752097010612488 2023-01-22 12:26:35.943146: step: 338/469, loss: 0.1825641393661499 2023-01-22 12:26:36.626293: step: 340/469, loss: 0.0881267786026001 2023-01-22 12:26:37.245285: step: 342/469, loss: 0.10761849582195282 2023-01-22 12:26:37.901199: step: 344/469, loss: 0.1531354784965515 2023-01-22 12:26:38.539608: step: 346/469, loss: 0.03438398614525795 2023-01-22 12:26:39.145995: step: 348/469, loss: 0.02145991101861 2023-01-22 12:26:39.911017: step: 350/469, loss: 0.1290917545557022 2023-01-22 12:26:40.534984: step: 352/469, loss: 0.24007418751716614 2023-01-22 12:26:41.222812: step: 354/469, loss: 0.09991821646690369 2023-01-22 12:26:41.839561: step: 356/469, loss: 0.04672125354409218 2023-01-22 12:26:42.533485: step: 358/469, loss: 0.07012709975242615 2023-01-22 12:26:43.170416: step: 360/469, loss: 0.17604485154151917 2023-01-22 12:26:43.758142: step: 362/469, loss: 0.09747768938541412 2023-01-22 12:26:44.391363: step: 364/469, loss: 0.03829443082213402 2023-01-22 12:26:45.042444: step: 366/469, loss: 0.06037236377596855 2023-01-22 12:26:45.741335: step: 368/469, loss: 0.06923611462116241 2023-01-22 12:26:46.465149: step: 370/469, loss: 0.10725891590118408 2023-01-22 12:26:47.119808: step: 372/469, loss: 0.06752616912126541 2023-01-22 12:26:47.719973: step: 374/469, loss: 0.07115186750888824 2023-01-22 12:26:48.368214: step: 376/469, loss: 0.12045502662658691 2023-01-22 12:26:48.951530: step: 378/469, loss: 0.07575985044240952 2023-01-22 12:26:49.561018: step: 380/469, loss: 0.16566957533359528 2023-01-22 12:26:50.186101: step: 382/469, loss: 0.13173000514507294 2023-01-22 12:26:50.806922: step: 384/469, loss: 0.0986829400062561 2023-01-22 12:26:51.512056: step: 386/469, loss: 0.3410768508911133 2023-01-22 12:26:52.197225: step: 388/469, loss: 0.010278942063450813 2023-01-22 12:26:52.799139: step: 390/469, loss: 0.0442851297557354 2023-01-22 12:26:53.530204: step: 392/469, loss: 0.14880037307739258 2023-01-22 12:26:54.133535: step: 394/469, loss: 0.05399356782436371 2023-01-22 12:26:54.751961: step: 396/469, loss: 0.36965230107307434 2023-01-22 12:26:55.358021: step: 398/469, loss: 0.13781239092350006 2023-01-22 12:26:55.917876: step: 400/469, loss: 0.06432560086250305 2023-01-22 12:26:56.540607: step: 402/469, loss: 0.5758511424064636 2023-01-22 12:26:57.171969: step: 404/469, loss: 0.02899148315191269 2023-01-22 12:26:57.813071: step: 406/469, loss: 0.13911819458007812 2023-01-22 12:26:58.494324: step: 408/469, loss: 0.04071029648184776 2023-01-22 12:26:59.119870: step: 410/469, loss: 0.09789225459098816 2023-01-22 12:26:59.815246: step: 412/469, loss: 0.14823314547538757 2023-01-22 12:27:00.454679: step: 414/469, loss: 0.04966295510530472 2023-01-22 12:27:01.081867: step: 416/469, loss: 0.17922964692115784 2023-01-22 12:27:01.761707: step: 418/469, loss: 0.10353213548660278 2023-01-22 12:27:02.454838: step: 420/469, loss: 0.6564441323280334 2023-01-22 12:27:03.178136: step: 422/469, loss: 0.17564235627651215 2023-01-22 12:27:03.830614: step: 424/469, loss: 0.12924504280090332 2023-01-22 12:27:04.445475: step: 426/469, loss: 0.024177247658371925 2023-01-22 12:27:05.093740: step: 428/469, loss: 0.031831398606300354 2023-01-22 12:27:05.692516: step: 430/469, loss: 0.07344602793455124 2023-01-22 12:27:06.278351: step: 432/469, loss: 1.1346118450164795 2023-01-22 12:27:06.917073: step: 434/469, loss: 0.06825843453407288 2023-01-22 12:27:07.572355: step: 436/469, loss: 0.2813493311405182 2023-01-22 12:27:08.372538: step: 438/469, loss: 0.12063251435756683 2023-01-22 12:27:09.022740: step: 440/469, loss: 0.08347788453102112 2023-01-22 12:27:09.696701: step: 442/469, loss: 0.1416632980108261 2023-01-22 12:27:10.349910: step: 444/469, loss: 0.25256383419036865 2023-01-22 12:27:11.045015: step: 446/469, loss: 0.10278443247079849 2023-01-22 12:27:11.701081: step: 448/469, loss: 0.04323122277855873 2023-01-22 12:27:12.371264: step: 450/469, loss: 0.07204969972372055 2023-01-22 12:27:13.063859: step: 452/469, loss: 0.09187383204698563 2023-01-22 12:27:13.730712: step: 454/469, loss: 0.015891987830400467 2023-01-22 12:27:14.424818: step: 456/469, loss: 0.09873911738395691 2023-01-22 12:27:15.037552: step: 458/469, loss: 0.11525193601846695 2023-01-22 12:27:15.659364: step: 460/469, loss: 0.10839619487524033 2023-01-22 12:27:16.345269: step: 462/469, loss: 0.09071864932775497 2023-01-22 12:27:16.939500: step: 464/469, loss: 0.2341710329055786 2023-01-22 12:27:17.646792: step: 466/469, loss: 0.07345566898584366 2023-01-22 12:27:18.314708: step: 468/469, loss: 0.09756768494844437 2023-01-22 12:27:18.986667: step: 470/469, loss: 0.049766022711992264 2023-01-22 12:27:19.568880: step: 472/469, loss: 0.017392940819263458 2023-01-22 12:27:20.244521: step: 474/469, loss: 0.6033975481987 2023-01-22 12:27:20.929439: step: 476/469, loss: 0.19872227311134338 2023-01-22 12:27:21.600347: step: 478/469, loss: 0.05665751174092293 2023-01-22 12:27:22.314317: step: 480/469, loss: 0.03284783661365509 2023-01-22 12:27:22.914203: step: 482/469, loss: 0.12822693586349487 2023-01-22 12:27:23.553290: step: 484/469, loss: 0.021034792065620422 2023-01-22 12:27:24.239662: step: 486/469, loss: 0.7361587882041931 2023-01-22 12:27:24.838560: step: 488/469, loss: 0.13983362913131714 2023-01-22 12:27:25.477226: step: 490/469, loss: 0.08609239757061005 2023-01-22 12:27:26.103654: step: 492/469, loss: 0.1935974508523941 2023-01-22 12:27:26.718831: step: 494/469, loss: 0.10512497276067734 2023-01-22 12:27:27.370419: step: 496/469, loss: 0.09545408189296722 2023-01-22 12:27:28.043958: step: 498/469, loss: 0.031101834028959274 2023-01-22 12:27:28.645505: step: 500/469, loss: 0.015330853872001171 2023-01-22 12:27:29.362441: step: 502/469, loss: 0.10156291723251343 2023-01-22 12:27:30.018544: step: 504/469, loss: 0.07097966223955154 2023-01-22 12:27:30.653382: step: 506/469, loss: 0.04172803834080696 2023-01-22 12:27:31.259390: step: 508/469, loss: 0.027415333315730095 2023-01-22 12:27:31.854259: step: 510/469, loss: 0.30458492040634155 2023-01-22 12:27:32.516569: step: 512/469, loss: 0.1232391893863678 2023-01-22 12:27:33.063475: step: 514/469, loss: 0.04923182725906372 2023-01-22 12:27:33.690782: step: 516/469, loss: 0.03774645924568176 2023-01-22 12:27:34.382312: step: 518/469, loss: 0.057874489575624466 2023-01-22 12:27:35.084904: step: 520/469, loss: 0.0945911705493927 2023-01-22 12:27:35.722697: step: 522/469, loss: 0.060389697551727295 2023-01-22 12:27:36.424792: step: 524/469, loss: 0.13009800016880035 2023-01-22 12:27:37.095838: step: 526/469, loss: 0.1300317347049713 2023-01-22 12:27:37.754933: step: 528/469, loss: 0.6446691751480103 2023-01-22 12:27:38.398056: step: 530/469, loss: 0.03245815634727478 2023-01-22 12:27:39.037005: step: 532/469, loss: 0.7724708318710327 2023-01-22 12:27:39.679564: step: 534/469, loss: 0.07497677952051163 2023-01-22 12:27:40.302831: step: 536/469, loss: 0.12525536119937897 2023-01-22 12:27:40.920432: step: 538/469, loss: 0.051523033529520035 2023-01-22 12:27:41.548979: step: 540/469, loss: 0.03323983773589134 2023-01-22 12:27:42.151929: step: 542/469, loss: 0.07192233949899673 2023-01-22 12:27:42.781679: step: 544/469, loss: 0.03610096499323845 2023-01-22 12:27:43.488426: step: 546/469, loss: 0.39437055587768555 2023-01-22 12:27:44.155098: step: 548/469, loss: 0.04011363908648491 2023-01-22 12:27:44.816265: step: 550/469, loss: 0.03484440967440605 2023-01-22 12:27:45.456944: step: 552/469, loss: 0.08329173177480698 2023-01-22 12:27:46.131127: step: 554/469, loss: 0.02134370617568493 2023-01-22 12:27:46.737220: step: 556/469, loss: 0.10893407464027405 2023-01-22 12:27:47.446224: step: 558/469, loss: 0.07246120274066925 2023-01-22 12:27:48.042406: step: 560/469, loss: 0.07137060165405273 2023-01-22 12:27:48.663645: step: 562/469, loss: 0.04064168781042099 2023-01-22 12:27:49.406162: step: 564/469, loss: 0.14694608747959137 2023-01-22 12:27:50.036524: step: 566/469, loss: 0.14777693152427673 2023-01-22 12:27:50.703081: step: 568/469, loss: 0.06216861680150032 2023-01-22 12:27:51.336441: step: 570/469, loss: 0.4101116955280304 2023-01-22 12:27:51.931714: step: 572/469, loss: 0.05989619717001915 2023-01-22 12:27:52.591813: step: 574/469, loss: 0.5068990588188171 2023-01-22 12:27:53.249176: step: 576/469, loss: 0.038708191365003586 2023-01-22 12:27:53.963762: step: 578/469, loss: 0.1284210979938507 2023-01-22 12:27:54.640102: step: 580/469, loss: 0.1254810243844986 2023-01-22 12:27:55.356098: step: 582/469, loss: 0.09927408397197723 2023-01-22 12:27:56.076420: step: 584/469, loss: 0.053538136184215546 2023-01-22 12:27:56.695577: step: 586/469, loss: 0.28132787346839905 2023-01-22 12:27:57.325093: step: 588/469, loss: 0.040789805352687836 2023-01-22 12:27:58.024766: step: 590/469, loss: 0.33965402841567993 2023-01-22 12:27:58.683345: step: 592/469, loss: 0.4397242069244385 2023-01-22 12:27:59.334012: step: 594/469, loss: 0.026430943980813026 2023-01-22 12:27:59.983985: step: 596/469, loss: 0.06564439088106155 2023-01-22 12:28:00.654707: step: 598/469, loss: 0.04646633565425873 2023-01-22 12:28:01.319638: step: 600/469, loss: 0.05953690782189369 2023-01-22 12:28:01.919431: step: 602/469, loss: 0.2873972952365875 2023-01-22 12:28:02.633330: step: 604/469, loss: 0.0905509889125824 2023-01-22 12:28:03.280908: step: 606/469, loss: 0.01976645365357399 2023-01-22 12:28:03.873585: step: 608/469, loss: 0.09676215052604675 2023-01-22 12:28:04.595590: step: 610/469, loss: 0.0774192363023758 2023-01-22 12:28:05.210725: step: 612/469, loss: 0.08369574695825577 2023-01-22 12:28:05.893491: step: 614/469, loss: 0.09718339145183563 2023-01-22 12:28:06.581134: step: 616/469, loss: 0.010038639418780804 2023-01-22 12:28:07.214675: step: 618/469, loss: 0.08482632040977478 2023-01-22 12:28:07.854507: step: 620/469, loss: 0.07307785004377365 2023-01-22 12:28:08.490469: step: 622/469, loss: 0.15806318819522858 2023-01-22 12:28:09.093371: step: 624/469, loss: 0.03540217503905296 2023-01-22 12:28:09.725124: step: 626/469, loss: 0.19698740541934967 2023-01-22 12:28:10.291519: step: 628/469, loss: 0.07642629742622375 2023-01-22 12:28:10.960760: step: 630/469, loss: 0.05173496901988983 2023-01-22 12:28:11.626610: step: 632/469, loss: 0.04774576798081398 2023-01-22 12:28:12.235449: step: 634/469, loss: 0.15093399584293365 2023-01-22 12:28:12.948541: step: 636/469, loss: 0.01514753233641386 2023-01-22 12:28:13.645702: step: 638/469, loss: 0.04531419277191162 2023-01-22 12:28:14.300491: step: 640/469, loss: 0.10500968247652054 2023-01-22 12:28:15.018626: step: 642/469, loss: 0.061558909714221954 2023-01-22 12:28:15.741619: step: 644/469, loss: 0.21592919528484344 2023-01-22 12:28:16.497049: step: 646/469, loss: 0.02345101721584797 2023-01-22 12:28:17.181458: step: 648/469, loss: 0.22797884047031403 2023-01-22 12:28:17.894745: step: 650/469, loss: 0.05320119857788086 2023-01-22 12:28:18.545165: step: 652/469, loss: 0.03165804594755173 2023-01-22 12:28:19.221172: step: 654/469, loss: 0.13139890134334564 2023-01-22 12:28:19.929085: step: 656/469, loss: 0.11664785444736481 2023-01-22 12:28:20.565448: step: 658/469, loss: 0.09250274300575256 2023-01-22 12:28:21.297985: step: 660/469, loss: 0.10019751638174057 2023-01-22 12:28:21.948650: step: 662/469, loss: 0.09804801642894745 2023-01-22 12:28:22.526308: step: 664/469, loss: 0.2572440505027771 2023-01-22 12:28:23.210092: step: 666/469, loss: 0.05931685119867325 2023-01-22 12:28:23.932817: step: 668/469, loss: 0.23405978083610535 2023-01-22 12:28:24.605095: step: 670/469, loss: 0.08222799748182297 2023-01-22 12:28:25.227205: step: 672/469, loss: 0.054324980825185776 2023-01-22 12:28:25.873421: step: 674/469, loss: 0.016847794875502586 2023-01-22 12:28:26.479429: step: 676/469, loss: 0.07167066633701324 2023-01-22 12:28:27.119125: step: 678/469, loss: 0.04647509753704071 2023-01-22 12:28:27.750469: step: 680/469, loss: 0.14157341420650482 2023-01-22 12:28:28.383087: step: 682/469, loss: 0.34782132506370544 2023-01-22 12:28:29.045851: step: 684/469, loss: 0.0348874069750309 2023-01-22 12:28:29.672198: step: 686/469, loss: 0.02658282406628132 2023-01-22 12:28:30.370498: step: 688/469, loss: 0.10351291298866272 2023-01-22 12:28:31.023909: step: 690/469, loss: 0.06464104354381561 2023-01-22 12:28:31.676230: step: 692/469, loss: 0.03564321622252464 2023-01-22 12:28:32.321447: step: 694/469, loss: 0.024245334789156914 2023-01-22 12:28:32.924605: step: 696/469, loss: 0.0168426763266325 2023-01-22 12:28:33.640123: step: 698/469, loss: 0.22257305681705475 2023-01-22 12:28:34.274501: step: 700/469, loss: 0.07273201644420624 2023-01-22 12:28:34.911517: step: 702/469, loss: 0.030290089547634125 2023-01-22 12:28:35.464361: step: 704/469, loss: 0.08865196257829666 2023-01-22 12:28:36.099129: step: 706/469, loss: 0.056786973029375076 2023-01-22 12:28:36.685870: step: 708/469, loss: 0.0938844084739685 2023-01-22 12:28:37.344880: step: 710/469, loss: 0.03709043934941292 2023-01-22 12:28:38.058228: step: 712/469, loss: 0.5903059244155884 2023-01-22 12:28:38.749558: step: 714/469, loss: 0.09487691521644592 2023-01-22 12:28:39.390306: step: 716/469, loss: 0.0641891285777092 2023-01-22 12:28:40.021046: step: 718/469, loss: 0.07937748730182648 2023-01-22 12:28:40.652160: step: 720/469, loss: 0.011846196837723255 2023-01-22 12:28:41.320013: step: 722/469, loss: 0.06773900240659714 2023-01-22 12:28:41.942847: step: 724/469, loss: 0.03862158954143524 2023-01-22 12:28:42.531863: step: 726/469, loss: 0.06925870478153229 2023-01-22 12:28:43.168681: step: 728/469, loss: 0.18467117846012115 2023-01-22 12:28:43.809872: step: 730/469, loss: 0.010339128784835339 2023-01-22 12:28:44.423354: step: 732/469, loss: 0.024314304813742638 2023-01-22 12:28:45.048232: step: 734/469, loss: 0.1926196962594986 2023-01-22 12:28:45.699094: step: 736/469, loss: 0.08903618901968002 2023-01-22 12:28:46.443772: step: 738/469, loss: 0.2919049561023712 2023-01-22 12:28:47.101670: step: 740/469, loss: 0.24509607255458832 2023-01-22 12:28:47.742214: step: 742/469, loss: 0.1600656658411026 2023-01-22 12:28:48.390763: step: 744/469, loss: 0.27825015783309937 2023-01-22 12:28:48.993664: step: 746/469, loss: 0.014659815467894077 2023-01-22 12:28:49.604969: step: 748/469, loss: 0.04364389181137085 2023-01-22 12:28:50.296440: step: 750/469, loss: 0.020093008875846863 2023-01-22 12:28:50.917240: step: 752/469, loss: 0.03338088467717171 2023-01-22 12:28:51.530278: step: 754/469, loss: 0.03906276449561119 2023-01-22 12:28:52.177349: step: 756/469, loss: 0.15745244920253754 2023-01-22 12:28:52.912044: step: 758/469, loss: 0.14484204351902008 2023-01-22 12:28:53.555844: step: 760/469, loss: 0.1907242387533188 2023-01-22 12:28:54.206143: step: 762/469, loss: 0.04675031825900078 2023-01-22 12:28:54.849520: step: 764/469, loss: 0.42667675018310547 2023-01-22 12:28:55.501608: step: 766/469, loss: 0.07432986050844193 2023-01-22 12:28:56.204318: step: 768/469, loss: 0.06683173030614853 2023-01-22 12:28:56.826803: step: 770/469, loss: 0.029909415170550346 2023-01-22 12:28:57.473890: step: 772/469, loss: 0.13913476467132568 2023-01-22 12:28:58.097290: step: 774/469, loss: 0.1444622278213501 2023-01-22 12:28:58.763612: step: 776/469, loss: 0.06301633268594742 2023-01-22 12:28:59.451600: step: 778/469, loss: 0.09481874853372574 2023-01-22 12:29:00.097983: step: 780/469, loss: 0.07913465052843094 2023-01-22 12:29:00.753099: step: 782/469, loss: 0.04400838539004326 2023-01-22 12:29:01.391535: step: 784/469, loss: 1.2909154891967773 2023-01-22 12:29:02.005460: step: 786/469, loss: 0.07545351982116699 2023-01-22 12:29:02.664142: step: 788/469, loss: 0.033128563314676285 2023-01-22 12:29:03.296085: step: 790/469, loss: 0.029645385220646858 2023-01-22 12:29:03.921161: step: 792/469, loss: 0.08934933692216873 2023-01-22 12:29:04.604885: step: 794/469, loss: 0.07333355396986008 2023-01-22 12:29:05.268759: step: 796/469, loss: 0.11897078156471252 2023-01-22 12:29:05.978327: step: 798/469, loss: 0.20713704824447632 2023-01-22 12:29:06.626292: step: 800/469, loss: 0.061616744846105576 2023-01-22 12:29:07.269541: step: 802/469, loss: 0.11730055510997772 2023-01-22 12:29:07.953955: step: 804/469, loss: 0.01770954020321369 2023-01-22 12:29:08.571156: step: 806/469, loss: 0.038086600601673126 2023-01-22 12:29:09.230052: step: 808/469, loss: 0.4315366744995117 2023-01-22 12:29:09.862756: step: 810/469, loss: 0.11874110996723175 2023-01-22 12:29:10.486251: step: 812/469, loss: 0.037262242287397385 2023-01-22 12:29:11.168020: step: 814/469, loss: 0.12709051370620728 2023-01-22 12:29:11.789476: step: 816/469, loss: 0.042711228132247925 2023-01-22 12:29:12.494463: step: 818/469, loss: 0.1240357831120491 2023-01-22 12:29:13.154885: step: 820/469, loss: 0.40984612703323364 2023-01-22 12:29:13.767185: step: 822/469, loss: 0.07238990813493729 2023-01-22 12:29:14.438222: step: 824/469, loss: 0.023135146126151085 2023-01-22 12:29:15.079040: step: 826/469, loss: 0.09719256311655045 2023-01-22 12:29:15.687088: step: 828/469, loss: 0.2386380136013031 2023-01-22 12:29:16.355442: step: 830/469, loss: 0.06977793574333191 2023-01-22 12:29:17.007739: step: 832/469, loss: 0.06054136902093887 2023-01-22 12:29:17.671744: step: 834/469, loss: 0.031856805086135864 2023-01-22 12:29:18.292078: step: 836/469, loss: 0.07626467198133469 2023-01-22 12:29:18.926090: step: 838/469, loss: 0.033375199884176254 2023-01-22 12:29:19.603891: step: 840/469, loss: 0.056133005768060684 2023-01-22 12:29:20.231997: step: 842/469, loss: 0.5592582821846008 2023-01-22 12:29:20.949366: step: 844/469, loss: 0.09973210841417313 2023-01-22 12:29:21.616204: step: 846/469, loss: 0.03469455987215042 2023-01-22 12:29:22.257626: step: 848/469, loss: 0.07881100475788116 2023-01-22 12:29:22.885356: step: 850/469, loss: 0.21256226301193237 2023-01-22 12:29:23.504551: step: 852/469, loss: 0.4514442980289459 2023-01-22 12:29:24.141987: step: 854/469, loss: 0.1232815682888031 2023-01-22 12:29:24.760901: step: 856/469, loss: 0.04068541154265404 2023-01-22 12:29:25.475246: step: 858/469, loss: 0.06511027365922928 2023-01-22 12:29:26.128266: step: 860/469, loss: 0.11577273905277252 2023-01-22 12:29:26.739463: step: 862/469, loss: 0.054723381996154785 2023-01-22 12:29:27.346829: step: 864/469, loss: 0.039099425077438354 2023-01-22 12:29:27.959993: step: 866/469, loss: 0.11966274678707123 2023-01-22 12:29:28.563758: step: 868/469, loss: 0.03956418111920357 2023-01-22 12:29:29.159915: step: 870/469, loss: 0.06148413568735123 2023-01-22 12:29:29.849844: step: 872/469, loss: 0.0326245054602623 2023-01-22 12:29:30.445072: step: 874/469, loss: 0.09194914251565933 2023-01-22 12:29:31.133867: step: 876/469, loss: 0.017667468637228012 2023-01-22 12:29:31.789958: step: 878/469, loss: 0.054720357060432434 2023-01-22 12:29:32.449514: step: 880/469, loss: 0.07401636242866516 2023-01-22 12:29:32.997235: step: 882/469, loss: 0.10553588718175888 2023-01-22 12:29:33.582454: step: 884/469, loss: 0.020038213580846786 2023-01-22 12:29:34.253041: step: 886/469, loss: 0.3735326826572418 2023-01-22 12:29:34.911036: step: 888/469, loss: 0.03890379145741463 2023-01-22 12:29:35.535709: step: 890/469, loss: 0.2949785590171814 2023-01-22 12:29:36.167051: step: 892/469, loss: 0.029529161751270294 2023-01-22 12:29:36.730964: step: 894/469, loss: 0.03258060663938522 2023-01-22 12:29:37.383168: step: 896/469, loss: 0.12606225907802582 2023-01-22 12:29:38.047806: step: 898/469, loss: 0.15128742158412933 2023-01-22 12:29:38.687491: step: 900/469, loss: 0.2061641663312912 2023-01-22 12:29:39.400051: step: 902/469, loss: 0.04568546265363693 2023-01-22 12:29:40.082661: step: 904/469, loss: 0.012844710610806942 2023-01-22 12:29:40.735817: step: 906/469, loss: 0.0550975538790226 2023-01-22 12:29:41.372196: step: 908/469, loss: 0.07844427973031998 2023-01-22 12:29:41.958333: step: 910/469, loss: 0.2863953113555908 2023-01-22 12:29:42.586116: step: 912/469, loss: 0.14482593536376953 2023-01-22 12:29:43.191962: step: 914/469, loss: 0.2245837152004242 2023-01-22 12:29:43.898463: step: 916/469, loss: 0.059523265808820724 2023-01-22 12:29:44.609530: step: 918/469, loss: 0.09413760900497437 2023-01-22 12:29:45.255874: step: 920/469, loss: 0.13847032189369202 2023-01-22 12:29:45.886495: step: 922/469, loss: 0.12438680231571198 2023-01-22 12:29:46.565410: step: 924/469, loss: 0.19039230048656464 2023-01-22 12:29:47.202952: step: 926/469, loss: 0.04897954687476158 2023-01-22 12:29:47.887695: step: 928/469, loss: 0.32136696577072144 2023-01-22 12:29:48.478397: step: 930/469, loss: 0.061080172657966614 2023-01-22 12:29:49.134298: step: 932/469, loss: 0.023968001827597618 2023-01-22 12:29:49.810260: step: 934/469, loss: 0.8745543956756592 2023-01-22 12:29:50.477830: step: 936/469, loss: 0.07717323303222656 2023-01-22 12:29:51.078373: step: 938/469, loss: 0.09064988791942596 ================================================== Loss: 0.151 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2946972817133443, 'r': 0.3394331119544592, 'f1': 0.31548721340388003}, 'combined': 0.2324642625081221, 'epoch': 17} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3019928433156702, 'r': 0.26027196560234345, 'f1': 0.2795845291433527}, 'combined': 0.15250065226001056, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2910351335656214, 'r': 0.3396330306316075, 'f1': 0.313461658744058}, 'combined': 0.23097174854825325, 'epoch': 17} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3012506367584181, 'r': 0.26486904110232373, 'f1': 0.2818908100533981}, 'combined': 0.15375862366548987, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2933298898071625, 'r': 0.33674493991144844, 'f1': 0.31354166666666666}, 'combined': 0.23103070175438595, 'epoch': 17} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.29952642841688554, 'r': 0.2657885614798187, 'f1': 0.28165076092766816}, 'combined': 0.15362768777872807, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23717948717948717, 'r': 0.35238095238095235, 'f1': 0.2835249042145594}, 'combined': 0.18901660280970625, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.22297297297297297, 'r': 0.358695652173913, 'f1': 0.275}, 'combined': 0.1375, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45588235294117646, 'r': 0.2672413793103448, 'f1': 0.33695652173913043}, 'combined': 0.2246376811594203, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28932143369249047, 'r': 0.34806411567559575, 'f1': 0.31598585523004125}, 'combined': 0.23283168280108302, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2915539886359941, 'r': 0.2547429635383114, 'f1': 0.27190826088610776}, 'combined': 0.14831359684696785, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3055555555555556, 'r': 0.358695652173913, 'f1': 0.32999999999999996}, 'combined': 0.16499999999999998, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 18 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:32:44.039538: step: 2/469, loss: 0.19739031791687012 2023-01-22 12:32:44.765797: step: 4/469, loss: 0.10926885902881622 2023-01-22 12:32:45.330030: step: 6/469, loss: 0.040194518864154816 2023-01-22 12:32:45.947180: step: 8/469, loss: 0.07396736741065979 2023-01-22 12:32:46.593719: step: 10/469, loss: 0.03645572066307068 2023-01-22 12:32:47.225504: step: 12/469, loss: 0.031731922179460526 2023-01-22 12:32:47.864452: step: 14/469, loss: 0.05556264519691467 2023-01-22 12:32:48.478011: step: 16/469, loss: 0.013722613453865051 2023-01-22 12:32:49.080999: step: 18/469, loss: 0.07098843902349472 2023-01-22 12:32:49.717448: step: 20/469, loss: 0.04062904417514801 2023-01-22 12:32:50.303935: step: 22/469, loss: 0.09956200420856476 2023-01-22 12:32:50.957284: step: 24/469, loss: 0.01884957030415535 2023-01-22 12:32:51.694606: step: 26/469, loss: 0.06453052163124084 2023-01-22 12:32:52.351963: step: 28/469, loss: 0.1731313169002533 2023-01-22 12:32:52.949850: step: 30/469, loss: 0.07862535119056702 2023-01-22 12:32:53.569785: step: 32/469, loss: 0.4284873902797699 2023-01-22 12:32:54.170939: step: 34/469, loss: 0.0994366854429245 2023-01-22 12:32:54.809829: step: 36/469, loss: 0.06237749755382538 2023-01-22 12:32:55.420447: step: 38/469, loss: 0.795730471611023 2023-01-22 12:32:56.048257: step: 40/469, loss: 0.4500488042831421 2023-01-22 12:32:56.710800: step: 42/469, loss: 0.0233334768563509 2023-01-22 12:32:57.301362: step: 44/469, loss: 0.05385006219148636 2023-01-22 12:32:57.994031: step: 46/469, loss: 0.004826133605092764 2023-01-22 12:32:58.663562: step: 48/469, loss: 0.029154714196920395 2023-01-22 12:32:59.245784: step: 50/469, loss: 0.0385696142911911 2023-01-22 12:32:59.870039: step: 52/469, loss: 0.022908968850970268 2023-01-22 12:33:00.509474: step: 54/469, loss: 0.05257021263241768 2023-01-22 12:33:01.175359: step: 56/469, loss: 0.044094886630773544 2023-01-22 12:33:01.811952: step: 58/469, loss: 0.058982107788324356 2023-01-22 12:33:02.620140: step: 60/469, loss: 0.04889293015003204 2023-01-22 12:33:03.228392: step: 62/469, loss: 0.005112671758979559 2023-01-22 12:33:03.876903: step: 64/469, loss: 0.045796558260917664 2023-01-22 12:33:04.532977: step: 66/469, loss: 0.04271983355283737 2023-01-22 12:33:05.212976: step: 68/469, loss: 0.7052487134933472 2023-01-22 12:33:06.021482: step: 70/469, loss: 0.050847891718149185 2023-01-22 12:33:06.723118: step: 72/469, loss: 0.018044590950012207 2023-01-22 12:33:07.383355: step: 74/469, loss: 0.07699897885322571 2023-01-22 12:33:08.053378: step: 76/469, loss: 0.017341425642371178 2023-01-22 12:33:08.712861: step: 78/469, loss: 0.1727791130542755 2023-01-22 12:33:09.303459: step: 80/469, loss: 0.05113661661744118 2023-01-22 12:33:09.886920: step: 82/469, loss: 0.1645323485136032 2023-01-22 12:33:10.533821: step: 84/469, loss: 0.3509819209575653 2023-01-22 12:33:11.185605: step: 86/469, loss: 0.03935866802930832 2023-01-22 12:33:11.916011: step: 88/469, loss: 0.020971398800611496 2023-01-22 12:33:12.549634: step: 90/469, loss: 0.037076156586408615 2023-01-22 12:33:13.246886: step: 92/469, loss: 0.023122401908040047 2023-01-22 12:33:13.870048: step: 94/469, loss: 0.024980712682008743 2023-01-22 12:33:14.535249: step: 96/469, loss: 0.04867185652256012 2023-01-22 12:33:15.197042: step: 98/469, loss: 0.20667971670627594 2023-01-22 12:33:15.894943: step: 100/469, loss: 0.05252702906727791 2023-01-22 12:33:16.479773: step: 102/469, loss: 0.0459345281124115 2023-01-22 12:33:17.160154: step: 104/469, loss: 0.043612752109766006 2023-01-22 12:33:17.782811: step: 106/469, loss: 1.084824800491333 2023-01-22 12:33:18.402731: step: 108/469, loss: 0.03055424429476261 2023-01-22 12:33:18.992273: step: 110/469, loss: 0.019144048914313316 2023-01-22 12:33:19.583678: step: 112/469, loss: 0.059906549751758575 2023-01-22 12:33:20.206292: step: 114/469, loss: 0.05591762065887451 2023-01-22 12:33:20.816246: step: 116/469, loss: 0.032409392297267914 2023-01-22 12:33:21.459684: step: 118/469, loss: 0.1541178822517395 2023-01-22 12:33:22.059332: step: 120/469, loss: 0.04238314554095268 2023-01-22 12:33:22.695107: step: 122/469, loss: 0.07870107889175415 2023-01-22 12:33:23.317847: step: 124/469, loss: 0.05513609200716019 2023-01-22 12:33:23.918599: step: 126/469, loss: 0.03067687526345253 2023-01-22 12:33:24.548352: step: 128/469, loss: 0.12483498454093933 2023-01-22 12:33:25.193953: step: 130/469, loss: 0.0701747015118599 2023-01-22 12:33:25.728757: step: 132/469, loss: 0.07155679166316986 2023-01-22 12:33:26.403102: step: 134/469, loss: 0.05490366369485855 2023-01-22 12:33:27.069997: step: 136/469, loss: 0.04038708284497261 2023-01-22 12:33:27.685938: step: 138/469, loss: 0.058350540697574615 2023-01-22 12:33:28.258323: step: 140/469, loss: 0.2995723783969879 2023-01-22 12:33:28.926681: step: 142/469, loss: 0.08722950518131256 2023-01-22 12:33:29.523588: step: 144/469, loss: 0.0608787015080452 2023-01-22 12:33:30.177242: step: 146/469, loss: 0.057359132915735245 2023-01-22 12:33:30.889610: step: 148/469, loss: 1.2829951047897339 2023-01-22 12:33:31.472510: step: 150/469, loss: 0.05402109771966934 2023-01-22 12:33:32.188022: step: 152/469, loss: 0.2886006534099579 2023-01-22 12:33:32.764090: step: 154/469, loss: 0.021217528730630875 2023-01-22 12:33:33.361950: step: 156/469, loss: 0.04665805771946907 2023-01-22 12:33:34.029461: step: 158/469, loss: 0.035669777542352676 2023-01-22 12:33:34.670198: step: 160/469, loss: 0.019087495282292366 2023-01-22 12:33:35.339816: step: 162/469, loss: 0.360222190618515 2023-01-22 12:33:36.058639: step: 164/469, loss: 0.051359906792640686 2023-01-22 12:33:36.680763: step: 166/469, loss: 0.14188361167907715 2023-01-22 12:33:37.294721: step: 168/469, loss: 0.03651672601699829 2023-01-22 12:33:37.919309: step: 170/469, loss: 0.17242644727230072 2023-01-22 12:33:38.533433: step: 172/469, loss: 0.07706030458211899 2023-01-22 12:33:39.115804: step: 174/469, loss: 0.03869843855500221 2023-01-22 12:33:39.774014: step: 176/469, loss: 0.07919766008853912 2023-01-22 12:33:40.332914: step: 178/469, loss: 0.013099395669996738 2023-01-22 12:33:40.936358: step: 180/469, loss: 0.04147225618362427 2023-01-22 12:33:41.519879: step: 182/469, loss: 0.011648968793451786 2023-01-22 12:33:42.120217: step: 184/469, loss: 0.027431631460785866 2023-01-22 12:33:42.749691: step: 186/469, loss: 0.021427946165204048 2023-01-22 12:33:43.405481: step: 188/469, loss: 0.023229064419865608 2023-01-22 12:33:44.010349: step: 190/469, loss: 0.09563391655683517 2023-01-22 12:33:44.644644: step: 192/469, loss: 0.10777001082897186 2023-01-22 12:33:45.262358: step: 194/469, loss: 0.007140179164707661 2023-01-22 12:33:45.834648: step: 196/469, loss: 0.057528965175151825 2023-01-22 12:33:46.479041: step: 198/469, loss: 0.04632630944252014 2023-01-22 12:33:47.163938: step: 200/469, loss: 0.06112738698720932 2023-01-22 12:33:47.762039: step: 202/469, loss: 0.05195877328515053 2023-01-22 12:33:48.441158: step: 204/469, loss: 0.11310620605945587 2023-01-22 12:33:49.063806: step: 206/469, loss: 0.05004100129008293 2023-01-22 12:33:49.676365: step: 208/469, loss: 0.026865439489483833 2023-01-22 12:33:50.386667: step: 210/469, loss: 0.2298719733953476 2023-01-22 12:33:51.060744: step: 212/469, loss: 0.02781570330262184 2023-01-22 12:33:51.661362: step: 214/469, loss: 0.06808027625083923 2023-01-22 12:33:52.292722: step: 216/469, loss: 0.0895877555012703 2023-01-22 12:33:52.964617: step: 218/469, loss: 0.028829757124185562 2023-01-22 12:33:53.643325: step: 220/469, loss: 0.12048055231571198 2023-01-22 12:33:54.261667: step: 222/469, loss: 0.040304020047187805 2023-01-22 12:33:54.830524: step: 224/469, loss: 0.08068827539682388 2023-01-22 12:33:55.436369: step: 226/469, loss: 0.019230065867304802 2023-01-22 12:33:56.044426: step: 228/469, loss: 0.04150177910923958 2023-01-22 12:33:56.670601: step: 230/469, loss: 0.014615689404308796 2023-01-22 12:33:57.270380: step: 232/469, loss: 0.11862577497959137 2023-01-22 12:33:57.919399: step: 234/469, loss: 0.04175734892487526 2023-01-22 12:33:58.543129: step: 236/469, loss: 0.057141244411468506 2023-01-22 12:33:59.146652: step: 238/469, loss: 0.14053361117839813 2023-01-22 12:33:59.807400: step: 240/469, loss: 0.129434272646904 2023-01-22 12:34:00.433558: step: 242/469, loss: 0.071467824280262 2023-01-22 12:34:01.048829: step: 244/469, loss: 0.07568236440420151 2023-01-22 12:34:01.653554: step: 246/469, loss: 0.0067124986089766026 2023-01-22 12:34:02.299342: step: 248/469, loss: 0.004839214496314526 2023-01-22 12:34:02.997193: step: 250/469, loss: 0.1291448026895523 2023-01-22 12:34:03.635563: step: 252/469, loss: 0.02901940979063511 2023-01-22 12:34:04.355815: step: 254/469, loss: 0.046936552971601486 2023-01-22 12:34:04.955886: step: 256/469, loss: 0.0028923400677740574 2023-01-22 12:34:05.574284: step: 258/469, loss: 0.30275511741638184 2023-01-22 12:34:06.179128: step: 260/469, loss: 0.017310112714767456 2023-01-22 12:34:06.735981: step: 262/469, loss: 0.06175096333026886 2023-01-22 12:34:07.349874: step: 264/469, loss: 0.056193944066762924 2023-01-22 12:34:07.984618: step: 266/469, loss: 0.04413291811943054 2023-01-22 12:34:08.570185: step: 268/469, loss: 0.045018505305051804 2023-01-22 12:34:09.159835: step: 270/469, loss: 1.209478735923767 2023-01-22 12:34:09.834648: step: 272/469, loss: 0.1236867606639862 2023-01-22 12:34:10.506781: step: 274/469, loss: 0.1417955607175827 2023-01-22 12:34:11.165639: step: 276/469, loss: 0.07055442780256271 2023-01-22 12:34:11.914416: step: 278/469, loss: 0.050440967082977295 2023-01-22 12:34:12.489591: step: 280/469, loss: 0.10773095488548279 2023-01-22 12:34:13.141717: step: 282/469, loss: 0.051065798848867416 2023-01-22 12:34:13.790591: step: 284/469, loss: 0.052638184279203415 2023-01-22 12:34:14.414406: step: 286/469, loss: 0.042986348271369934 2023-01-22 12:34:15.086158: step: 288/469, loss: 0.039752598851919174 2023-01-22 12:34:15.648222: step: 290/469, loss: 0.05830540508031845 2023-01-22 12:34:16.277695: step: 292/469, loss: 0.05978779494762421 2023-01-22 12:34:16.898674: step: 294/469, loss: 0.23868657648563385 2023-01-22 12:34:17.508690: step: 296/469, loss: 0.01042854506522417 2023-01-22 12:34:18.105237: step: 298/469, loss: 0.10877780616283417 2023-01-22 12:34:18.673192: step: 300/469, loss: 0.11008822172880173 2023-01-22 12:34:19.288119: step: 302/469, loss: 0.03987894952297211 2023-01-22 12:34:19.959708: step: 304/469, loss: 0.02829763852059841 2023-01-22 12:34:20.581043: step: 306/469, loss: 0.020535774528980255 2023-01-22 12:34:21.206102: step: 308/469, loss: 0.02137993648648262 2023-01-22 12:34:21.838700: step: 310/469, loss: 0.026447640731930733 2023-01-22 12:34:22.436546: step: 312/469, loss: 0.03280575945973396 2023-01-22 12:34:23.080068: step: 314/469, loss: 0.1060236468911171 2023-01-22 12:34:23.696263: step: 316/469, loss: 0.11234564334154129 2023-01-22 12:34:24.321286: step: 318/469, loss: 0.028679821640253067 2023-01-22 12:34:24.971183: step: 320/469, loss: 0.05580789968371391 2023-01-22 12:34:25.511486: step: 322/469, loss: 0.11312264204025269 2023-01-22 12:34:26.125524: step: 324/469, loss: 0.08738502115011215 2023-01-22 12:34:26.753007: step: 326/469, loss: 0.09100356698036194 2023-01-22 12:34:27.365044: step: 328/469, loss: 0.09819810092449188 2023-01-22 12:34:27.995610: step: 330/469, loss: 0.01844073086977005 2023-01-22 12:34:28.629436: step: 332/469, loss: 0.05894249677658081 2023-01-22 12:34:29.218137: step: 334/469, loss: 0.10026882588863373 2023-01-22 12:34:29.962635: step: 336/469, loss: 0.0325361005961895 2023-01-22 12:34:30.586395: step: 338/469, loss: 0.08915646374225616 2023-01-22 12:34:31.194797: step: 340/469, loss: 0.01552667748183012 2023-01-22 12:34:31.804854: step: 342/469, loss: 0.07285908609628677 2023-01-22 12:34:32.432694: step: 344/469, loss: 0.14017005264759064 2023-01-22 12:34:32.966045: step: 346/469, loss: 0.04775165393948555 2023-01-22 12:34:33.619088: step: 348/469, loss: 0.31016653776168823 2023-01-22 12:34:34.195748: step: 350/469, loss: 0.016111040487885475 2023-01-22 12:34:34.876770: step: 352/469, loss: 0.13392649590969086 2023-01-22 12:34:35.471983: step: 354/469, loss: 0.09390953183174133 2023-01-22 12:34:36.255766: step: 356/469, loss: 0.07702707499265671 2023-01-22 12:34:36.914375: step: 358/469, loss: 0.27382218837738037 2023-01-22 12:34:37.532494: step: 360/469, loss: 0.07126467674970627 2023-01-22 12:34:38.177942: step: 362/469, loss: 0.05109051242470741 2023-01-22 12:34:38.780234: step: 364/469, loss: 0.04137686267495155 2023-01-22 12:34:39.372307: step: 366/469, loss: 0.047433849424123764 2023-01-22 12:34:39.937008: step: 368/469, loss: 0.060302574187517166 2023-01-22 12:34:40.608250: step: 370/469, loss: 0.04694000631570816 2023-01-22 12:34:41.199153: step: 372/469, loss: 0.044354137033224106 2023-01-22 12:34:41.836391: step: 374/469, loss: 0.05950804427266121 2023-01-22 12:34:42.468265: step: 376/469, loss: 0.06666885316371918 2023-01-22 12:34:43.122769: step: 378/469, loss: 0.11413034796714783 2023-01-22 12:34:43.813792: step: 380/469, loss: 0.035895489156246185 2023-01-22 12:34:44.543591: step: 382/469, loss: 0.2797907888889313 2023-01-22 12:34:45.195206: step: 384/469, loss: 0.06916212290525436 2023-01-22 12:34:45.871261: step: 386/469, loss: 0.2177465707063675 2023-01-22 12:34:46.584485: step: 388/469, loss: 0.06521226465702057 2023-01-22 12:34:47.221745: step: 390/469, loss: 0.1160454973578453 2023-01-22 12:34:47.838447: step: 392/469, loss: 0.10225497931241989 2023-01-22 12:34:48.455346: step: 394/469, loss: 0.023697322234511375 2023-01-22 12:34:49.107965: step: 396/469, loss: 0.033800724893808365 2023-01-22 12:34:49.699463: step: 398/469, loss: 0.31401947140693665 2023-01-22 12:34:50.325072: step: 400/469, loss: 0.20775017142295837 2023-01-22 12:34:50.995991: step: 402/469, loss: 0.21333889663219452 2023-01-22 12:34:51.680897: step: 404/469, loss: 0.04311218112707138 2023-01-22 12:34:52.388234: step: 406/469, loss: 0.22380219399929047 2023-01-22 12:34:53.040628: step: 408/469, loss: 0.10484687238931656 2023-01-22 12:34:53.700482: step: 410/469, loss: 0.060690879821777344 2023-01-22 12:34:54.317118: step: 412/469, loss: 0.1567976027727127 2023-01-22 12:34:55.031776: step: 414/469, loss: 0.23510277271270752 2023-01-22 12:34:55.724628: step: 416/469, loss: 0.10009729862213135 2023-01-22 12:34:56.346092: step: 418/469, loss: 0.1717015951871872 2023-01-22 12:34:56.982870: step: 420/469, loss: 0.05372271314263344 2023-01-22 12:34:57.650160: step: 422/469, loss: 0.2950482666492462 2023-01-22 12:34:58.329397: step: 424/469, loss: 0.0671352669596672 2023-01-22 12:34:58.981540: step: 426/469, loss: 0.13671229779720306 2023-01-22 12:34:59.549419: step: 428/469, loss: 0.34977227449417114 2023-01-22 12:35:00.237675: step: 430/469, loss: 0.08556195348501205 2023-01-22 12:35:00.866487: step: 432/469, loss: 0.2947121858596802 2023-01-22 12:35:01.497954: step: 434/469, loss: 0.051985498517751694 2023-01-22 12:35:02.120687: step: 436/469, loss: 0.13081999123096466 2023-01-22 12:35:02.743140: step: 438/469, loss: 0.16685709357261658 2023-01-22 12:35:03.325307: step: 440/469, loss: 0.43547242879867554 2023-01-22 12:35:04.011736: step: 442/469, loss: 0.07016035169363022 2023-01-22 12:35:04.627553: step: 444/469, loss: 0.1425005942583084 2023-01-22 12:35:05.316676: step: 446/469, loss: 0.055922720581293106 2023-01-22 12:35:05.881284: step: 448/469, loss: 0.03692716360092163 2023-01-22 12:35:06.461836: step: 450/469, loss: 0.04559524357318878 2023-01-22 12:35:07.139373: step: 452/469, loss: 0.23842787742614746 2023-01-22 12:35:07.784609: step: 454/469, loss: 0.16788306832313538 2023-01-22 12:35:08.374480: step: 456/469, loss: 0.07471518963575363 2023-01-22 12:35:08.980557: step: 458/469, loss: 0.06382163614034653 2023-01-22 12:35:09.616531: step: 460/469, loss: 0.025148622691631317 2023-01-22 12:35:10.267514: step: 462/469, loss: 0.09097937494516373 2023-01-22 12:35:10.845527: step: 464/469, loss: 0.02212938666343689 2023-01-22 12:35:11.512190: step: 466/469, loss: 0.057925183326005936 2023-01-22 12:35:12.108393: step: 468/469, loss: 0.04108898341655731 2023-01-22 12:35:12.681126: step: 470/469, loss: 0.0179759431630373 2023-01-22 12:35:13.324426: step: 472/469, loss: 0.04561353102326393 2023-01-22 12:35:13.888495: step: 474/469, loss: 0.020289180800318718 2023-01-22 12:35:14.566995: step: 476/469, loss: 0.07113469392061234 2023-01-22 12:35:15.125236: step: 478/469, loss: 0.019822226837277412 2023-01-22 12:35:15.733999: step: 480/469, loss: 0.07940469682216644 2023-01-22 12:35:16.374402: step: 482/469, loss: 0.06818144768476486 2023-01-22 12:35:17.017645: step: 484/469, loss: 0.028329774737358093 2023-01-22 12:35:17.653994: step: 486/469, loss: 0.1307142972946167 2023-01-22 12:35:18.229624: step: 488/469, loss: 0.03568470850586891 2023-01-22 12:35:18.885586: step: 490/469, loss: 0.1567821055650711 2023-01-22 12:35:19.491402: step: 492/469, loss: 0.2452036738395691 2023-01-22 12:35:20.031400: step: 494/469, loss: 0.4887661635875702 2023-01-22 12:35:20.619863: step: 496/469, loss: 0.23095785081386566 2023-01-22 12:35:21.266230: step: 498/469, loss: 0.33275020122528076 2023-01-22 12:35:21.907152: step: 500/469, loss: 0.25882789492607117 2023-01-22 12:35:22.564007: step: 502/469, loss: 0.08737694472074509 2023-01-22 12:35:23.181720: step: 504/469, loss: 0.034016311168670654 2023-01-22 12:35:23.810759: step: 506/469, loss: 0.024603933095932007 2023-01-22 12:35:24.481275: step: 508/469, loss: 0.042490214109420776 2023-01-22 12:35:25.111445: step: 510/469, loss: 0.0801747739315033 2023-01-22 12:35:25.778836: step: 512/469, loss: 0.030629804357886314 2023-01-22 12:35:26.420242: step: 514/469, loss: 0.08940828591585159 2023-01-22 12:35:27.046089: step: 516/469, loss: 0.17327681183815002 2023-01-22 12:35:27.694351: step: 518/469, loss: 0.08095608651638031 2023-01-22 12:35:28.338497: step: 520/469, loss: 0.05163678526878357 2023-01-22 12:35:28.908758: step: 522/469, loss: 0.6049695014953613 2023-01-22 12:35:29.501247: step: 524/469, loss: 0.05464332178235054 2023-01-22 12:35:30.120567: step: 526/469, loss: 0.07438329607248306 2023-01-22 12:35:30.725377: step: 528/469, loss: 0.01273268274962902 2023-01-22 12:35:31.515186: step: 530/469, loss: 0.05413738265633583 2023-01-22 12:35:32.125420: step: 532/469, loss: 0.1312224566936493 2023-01-22 12:35:32.768744: step: 534/469, loss: 0.031502481549978256 2023-01-22 12:35:33.364279: step: 536/469, loss: 0.027119850739836693 2023-01-22 12:35:33.957328: step: 538/469, loss: 0.029854027554392815 2023-01-22 12:35:34.569214: step: 540/469, loss: 0.047307178378105164 2023-01-22 12:35:35.185969: step: 542/469, loss: 0.23962295055389404 2023-01-22 12:35:35.781518: step: 544/469, loss: 0.10581046342849731 2023-01-22 12:35:36.382337: step: 546/469, loss: 0.11889756470918655 2023-01-22 12:35:37.009829: step: 548/469, loss: 0.021439019590616226 2023-01-22 12:35:37.636495: step: 550/469, loss: 0.0780877098441124 2023-01-22 12:35:38.262969: step: 552/469, loss: 0.12498989701271057 2023-01-22 12:35:38.852801: step: 554/469, loss: 0.025048013776540756 2023-01-22 12:35:39.489884: step: 556/469, loss: 0.09084697812795639 2023-01-22 12:35:40.117479: step: 558/469, loss: 0.2633206844329834 2023-01-22 12:35:40.765963: step: 560/469, loss: 0.33507969975471497 2023-01-22 12:35:41.350233: step: 562/469, loss: 0.05256320908665657 2023-01-22 12:35:41.929223: step: 564/469, loss: 0.09941021353006363 2023-01-22 12:35:42.572573: step: 566/469, loss: 1.0583281517028809 2023-01-22 12:35:43.158151: step: 568/469, loss: 0.085181824862957 2023-01-22 12:35:43.753221: step: 570/469, loss: 0.016662290319800377 2023-01-22 12:35:44.396324: step: 572/469, loss: 0.15421420335769653 2023-01-22 12:35:44.967722: step: 574/469, loss: 0.028988031670451164 2023-01-22 12:35:45.596468: step: 576/469, loss: 0.043274663388729095 2023-01-22 12:35:46.203647: step: 578/469, loss: 0.0947754830121994 2023-01-22 12:35:46.880205: step: 580/469, loss: 0.6166610717773438 2023-01-22 12:35:47.524368: step: 582/469, loss: 0.24353112280368805 2023-01-22 12:35:48.114103: step: 584/469, loss: 0.047448042780160904 2023-01-22 12:35:48.725120: step: 586/469, loss: 0.08495884388685226 2023-01-22 12:35:49.341483: step: 588/469, loss: 0.07763383537530899 2023-01-22 12:35:49.901365: step: 590/469, loss: 0.009391697123646736 2023-01-22 12:35:50.512586: step: 592/469, loss: 0.0663059800863266 2023-01-22 12:35:51.212294: step: 594/469, loss: 0.08508490025997162 2023-01-22 12:35:51.855950: step: 596/469, loss: 0.07409289479255676 2023-01-22 12:35:52.490281: step: 598/469, loss: 0.14681270718574524 2023-01-22 12:35:53.116051: step: 600/469, loss: 0.09062065184116364 2023-01-22 12:35:53.803833: step: 602/469, loss: 0.14468273520469666 2023-01-22 12:35:54.496371: step: 604/469, loss: 0.34246498346328735 2023-01-22 12:35:55.153278: step: 606/469, loss: 0.040256209671497345 2023-01-22 12:35:55.824490: step: 608/469, loss: 0.13727182149887085 2023-01-22 12:35:56.385135: step: 610/469, loss: 0.0571071021258831 2023-01-22 12:35:56.998127: step: 612/469, loss: 0.03979555889964104 2023-01-22 12:35:57.625786: step: 614/469, loss: 0.054563213139772415 2023-01-22 12:35:58.225838: step: 616/469, loss: 0.06938362121582031 2023-01-22 12:35:58.860830: step: 618/469, loss: 0.1778077483177185 2023-01-22 12:35:59.474364: step: 620/469, loss: 0.03375286981463432 2023-01-22 12:36:00.103748: step: 622/469, loss: 0.1744992434978485 2023-01-22 12:36:00.675685: step: 624/469, loss: 0.06956393271684647 2023-01-22 12:36:01.265952: step: 626/469, loss: 0.04866023361682892 2023-01-22 12:36:01.873538: step: 628/469, loss: 0.029239829629659653 2023-01-22 12:36:02.513514: step: 630/469, loss: 0.03168150782585144 2023-01-22 12:36:03.100713: step: 632/469, loss: 0.1776774525642395 2023-01-22 12:36:03.729660: step: 634/469, loss: 0.13768786191940308 2023-01-22 12:36:04.314882: step: 636/469, loss: 0.11766307801008224 2023-01-22 12:36:04.934063: step: 638/469, loss: 1.0542603731155396 2023-01-22 12:36:05.593647: step: 640/469, loss: 0.04736362025141716 2023-01-22 12:36:06.174608: step: 642/469, loss: 0.014209137298166752 2023-01-22 12:36:06.722864: step: 644/469, loss: 0.09848280996084213 2023-01-22 12:36:07.342902: step: 646/469, loss: 0.0741153210401535 2023-01-22 12:36:08.031417: step: 648/469, loss: 0.07008747011423111 2023-01-22 12:36:08.619138: step: 650/469, loss: 0.33517831563949585 2023-01-22 12:36:09.249800: step: 652/469, loss: 0.10690117627382278 2023-01-22 12:36:09.842750: step: 654/469, loss: 0.031678080558776855 2023-01-22 12:36:10.491923: step: 656/469, loss: 0.13064999878406525 2023-01-22 12:36:11.103598: step: 658/469, loss: 0.09064306318759918 2023-01-22 12:36:11.731896: step: 660/469, loss: 0.07991311699151993 2023-01-22 12:36:12.361650: step: 662/469, loss: 0.11285699903964996 2023-01-22 12:36:13.077304: step: 664/469, loss: 0.04991097375750542 2023-01-22 12:36:13.680967: step: 666/469, loss: 0.039659518748521805 2023-01-22 12:36:14.364075: step: 668/469, loss: 0.034752923995256424 2023-01-22 12:36:14.983809: step: 670/469, loss: 0.035021405667066574 2023-01-22 12:36:15.588221: step: 672/469, loss: 0.15889544785022736 2023-01-22 12:36:16.230970: step: 674/469, loss: 0.5652160048484802 2023-01-22 12:36:16.849173: step: 676/469, loss: 0.020960049703717232 2023-01-22 12:36:17.417329: step: 678/469, loss: 0.07413636893033981 2023-01-22 12:36:18.075198: step: 680/469, loss: 0.03034192882478237 2023-01-22 12:36:18.734395: step: 682/469, loss: 0.018978828564286232 2023-01-22 12:36:19.376061: step: 684/469, loss: 0.05151795968413353 2023-01-22 12:36:19.916354: step: 686/469, loss: 0.06589885801076889 2023-01-22 12:36:20.532318: step: 688/469, loss: 0.03908786550164223 2023-01-22 12:36:21.239166: step: 690/469, loss: 0.11488750576972961 2023-01-22 12:36:21.889773: step: 692/469, loss: 0.06972968578338623 2023-01-22 12:36:22.525995: step: 694/469, loss: 0.30625277757644653 2023-01-22 12:36:23.124990: step: 696/469, loss: 0.038411471992731094 2023-01-22 12:36:23.862568: step: 698/469, loss: 0.18217110633850098 2023-01-22 12:36:24.502238: step: 700/469, loss: 0.060849037021398544 2023-01-22 12:36:25.197232: step: 702/469, loss: 0.10439438372850418 2023-01-22 12:36:25.834627: step: 704/469, loss: 0.044746894389390945 2023-01-22 12:36:26.500870: step: 706/469, loss: 0.037626005709171295 2023-01-22 12:36:27.058663: step: 708/469, loss: 0.08274061977863312 2023-01-22 12:36:27.675305: step: 710/469, loss: 0.4294675588607788 2023-01-22 12:36:28.366181: step: 712/469, loss: 0.3394537568092346 2023-01-22 12:36:29.004422: step: 714/469, loss: 0.12252811342477798 2023-01-22 12:36:29.616685: step: 716/469, loss: 0.1321982890367508 2023-01-22 12:36:30.226265: step: 718/469, loss: 0.14044569432735443 2023-01-22 12:36:30.837904: step: 720/469, loss: 0.057032596319913864 2023-01-22 12:36:31.437629: step: 722/469, loss: 0.05181776359677315 2023-01-22 12:36:32.068867: step: 724/469, loss: 0.022135816514492035 2023-01-22 12:36:32.849666: step: 726/469, loss: 0.33438247442245483 2023-01-22 12:36:33.592672: step: 728/469, loss: 0.11679305881261826 2023-01-22 12:36:34.243097: step: 730/469, loss: 0.06900320947170258 2023-01-22 12:36:34.896930: step: 732/469, loss: 0.006501945201307535 2023-01-22 12:36:35.536913: step: 734/469, loss: 0.13831335306167603 2023-01-22 12:36:36.256835: step: 736/469, loss: 0.12699875235557556 2023-01-22 12:36:36.859080: step: 738/469, loss: 0.07961904257535934 2023-01-22 12:36:37.465881: step: 740/469, loss: 0.15678110718727112 2023-01-22 12:36:38.085424: step: 742/469, loss: 0.04896744340658188 2023-01-22 12:36:38.617698: step: 744/469, loss: 0.0030765822157263756 2023-01-22 12:36:39.290224: step: 746/469, loss: 0.1713375300168991 2023-01-22 12:36:39.940233: step: 748/469, loss: 0.11107548326253891 2023-01-22 12:36:40.537212: step: 750/469, loss: 0.11765393614768982 2023-01-22 12:36:41.166885: step: 752/469, loss: 1.6821019649505615 2023-01-22 12:36:41.767638: step: 754/469, loss: 0.04893473535776138 2023-01-22 12:36:42.333965: step: 756/469, loss: 0.009994837455451488 2023-01-22 12:36:42.951212: step: 758/469, loss: 0.03376145660877228 2023-01-22 12:36:43.544707: step: 760/469, loss: 0.005881036631762981 2023-01-22 12:36:44.171043: step: 762/469, loss: 0.40421125292778015 2023-01-22 12:36:44.791196: step: 764/469, loss: 0.04221811890602112 2023-01-22 12:36:45.384566: step: 766/469, loss: 0.06776021420955658 2023-01-22 12:36:46.098373: step: 768/469, loss: 0.0718628540635109 2023-01-22 12:36:46.672699: step: 770/469, loss: 0.03564952686429024 2023-01-22 12:36:47.292455: step: 772/469, loss: 0.03369288519024849 2023-01-22 12:36:47.919784: step: 774/469, loss: 0.07828546315431595 2023-01-22 12:36:48.549464: step: 776/469, loss: 8.004802703857422 2023-01-22 12:36:49.190165: step: 778/469, loss: 0.024500887840986252 2023-01-22 12:36:49.819458: step: 780/469, loss: 0.09504243731498718 2023-01-22 12:36:50.413132: step: 782/469, loss: 0.0616409070789814 2023-01-22 12:36:51.050004: step: 784/469, loss: 0.06164541840553284 2023-01-22 12:36:51.694162: step: 786/469, loss: 0.03968252241611481 2023-01-22 12:36:52.273738: step: 788/469, loss: 0.0373171903192997 2023-01-22 12:36:52.914679: step: 790/469, loss: 0.2389187514781952 2023-01-22 12:36:53.482831: step: 792/469, loss: 0.3723146915435791 2023-01-22 12:36:54.123482: step: 794/469, loss: 0.17175130546092987 2023-01-22 12:36:54.831683: step: 796/469, loss: 0.10480652004480362 2023-01-22 12:36:55.449692: step: 798/469, loss: 0.06773378700017929 2023-01-22 12:36:56.025898: step: 800/469, loss: 0.07876768708229065 2023-01-22 12:36:56.741243: step: 802/469, loss: 0.2700870931148529 2023-01-22 12:36:57.406925: step: 804/469, loss: 0.43686532974243164 2023-01-22 12:36:57.964122: step: 806/469, loss: 0.04280206188559532 2023-01-22 12:36:58.646147: step: 808/469, loss: 0.033104076981544495 2023-01-22 12:36:59.273341: step: 810/469, loss: 0.029829207807779312 2023-01-22 12:36:59.868342: step: 812/469, loss: 0.10313430428504944 2023-01-22 12:37:00.527542: step: 814/469, loss: 0.42771920561790466 2023-01-22 12:37:01.125073: step: 816/469, loss: 0.08053378760814667 2023-01-22 12:37:01.794716: step: 818/469, loss: 0.09287554770708084 2023-01-22 12:37:02.368306: step: 820/469, loss: 0.06805162876844406 2023-01-22 12:37:03.050776: step: 822/469, loss: 0.2878781855106354 2023-01-22 12:37:03.682998: step: 824/469, loss: 0.03173014521598816 2023-01-22 12:37:04.328416: step: 826/469, loss: 0.11142918467521667 2023-01-22 12:37:04.918138: step: 828/469, loss: 0.08547961711883545 2023-01-22 12:37:05.557459: step: 830/469, loss: 0.162267804145813 2023-01-22 12:37:06.170086: step: 832/469, loss: 0.028734391555190086 2023-01-22 12:37:06.747550: step: 834/469, loss: 0.06410735845565796 2023-01-22 12:37:07.380885: step: 836/469, loss: 0.05783946439623833 2023-01-22 12:37:08.003464: step: 838/469, loss: 0.03225059434771538 2023-01-22 12:37:08.588690: step: 840/469, loss: 0.025671130046248436 2023-01-22 12:37:09.261123: step: 842/469, loss: 0.10866212099790573 2023-01-22 12:37:09.886350: step: 844/469, loss: 0.025387661531567574 2023-01-22 12:37:10.488747: step: 846/469, loss: 0.03339289501309395 2023-01-22 12:37:11.093378: step: 848/469, loss: 0.08237258344888687 2023-01-22 12:37:11.748121: step: 850/469, loss: 0.17484375834465027 2023-01-22 12:37:12.388733: step: 852/469, loss: 0.4390253722667694 2023-01-22 12:37:13.069605: step: 854/469, loss: 0.022874411195516586 2023-01-22 12:37:13.678558: step: 856/469, loss: 0.009351754561066628 2023-01-22 12:37:14.277653: step: 858/469, loss: 0.057364121079444885 2023-01-22 12:37:14.880213: step: 860/469, loss: 0.0667007640004158 2023-01-22 12:37:15.506545: step: 862/469, loss: 0.0047980910167098045 2023-01-22 12:37:16.157221: step: 864/469, loss: 0.07276533544063568 2023-01-22 12:37:16.715366: step: 866/469, loss: 1.0267325639724731 2023-01-22 12:37:17.273460: step: 868/469, loss: 0.017963893711566925 2023-01-22 12:37:17.898271: step: 870/469, loss: 0.004995882976800203 2023-01-22 12:37:18.450478: step: 872/469, loss: 0.10431088507175446 2023-01-22 12:37:19.107371: step: 874/469, loss: 0.10473817586898804 2023-01-22 12:37:19.700136: step: 876/469, loss: 0.09909619390964508 2023-01-22 12:37:20.332047: step: 878/469, loss: 0.10541045665740967 2023-01-22 12:37:20.949102: step: 880/469, loss: 0.07159728556871414 2023-01-22 12:37:21.626660: step: 882/469, loss: 0.03213709220290184 2023-01-22 12:37:22.323482: step: 884/469, loss: 0.10682906210422516 2023-01-22 12:37:23.014384: step: 886/469, loss: 0.042151015251874924 2023-01-22 12:37:23.616699: step: 888/469, loss: 0.10631813108921051 2023-01-22 12:37:24.224757: step: 890/469, loss: 0.07333306223154068 2023-01-22 12:37:24.865403: step: 892/469, loss: 0.04904847592115402 2023-01-22 12:37:25.454469: step: 894/469, loss: 0.03820941969752312 2023-01-22 12:37:26.091478: step: 896/469, loss: 0.04050300270318985 2023-01-22 12:37:26.738498: step: 898/469, loss: 0.07567108422517776 2023-01-22 12:37:27.425900: step: 900/469, loss: 0.06121153384447098 2023-01-22 12:37:28.037642: step: 902/469, loss: 0.012447827495634556 2023-01-22 12:37:28.777425: step: 904/469, loss: 0.08087782561779022 2023-01-22 12:37:29.388246: step: 906/469, loss: 0.05531115084886551 2023-01-22 12:37:30.013505: step: 908/469, loss: 0.049140430986881256 2023-01-22 12:37:30.673696: step: 910/469, loss: 0.07221703976392746 2023-01-22 12:37:31.302852: step: 912/469, loss: 0.015738114714622498 2023-01-22 12:37:31.919089: step: 914/469, loss: 0.08920235186815262 2023-01-22 12:37:32.514985: step: 916/469, loss: 0.1762600690126419 2023-01-22 12:37:33.121169: step: 918/469, loss: 0.06889771670103073 2023-01-22 12:37:33.748715: step: 920/469, loss: 0.0878787487745285 2023-01-22 12:37:34.382072: step: 922/469, loss: 0.023091405630111694 2023-01-22 12:37:35.045468: step: 924/469, loss: 0.11318391561508179 2023-01-22 12:37:35.750510: step: 926/469, loss: 0.030088823288679123 2023-01-22 12:37:36.379631: step: 928/469, loss: 0.11263003945350647 2023-01-22 12:37:37.037058: step: 930/469, loss: 0.1936572790145874 2023-01-22 12:37:37.635808: step: 932/469, loss: 0.5855045914649963 2023-01-22 12:37:38.248270: step: 934/469, loss: 0.022066129371523857 2023-01-22 12:37:38.900648: step: 936/469, loss: 0.05266191437840462 2023-01-22 12:37:39.602233: step: 938/469, loss: 0.0818169042468071 ================================================== Loss: 0.133 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31960280773840094, 'r': 0.3220286355011212, 'f1': 0.32081113593400923}, 'combined': 0.23638715279348047, 'epoch': 18} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30517676405971506, 'r': 0.26776259536437946, 'f1': 0.285248066991488}, 'combined': 0.15558985472262982, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32139188495120696, 'r': 0.32383129204761085, 'f1': 0.3226069771438392}, 'combined': 0.23771040421124992, 'epoch': 18} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.301201241502149, 'r': 0.2703370703692664, 'f1': 0.2849357935521776}, 'combined': 0.1554195237557332, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31335608835608836, 'r': 0.32465355643723764, 'f1': 0.31890479821514306}, 'combined': 0.23498248289536855, 'epoch': 18} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.29377490559242336, 'r': 0.2679721691451474, 'f1': 0.2802809386369819}, 'combined': 0.15288051198380828, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2847222222222222, 'r': 0.3416666666666666, 'f1': 0.31060606060606055}, 'combined': 0.20707070707070702, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25862068965517243, 'r': 0.32608695652173914, 'f1': 0.2884615384615385}, 'combined': 0.14423076923076925, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.38235294117647056, 'r': 0.22413793103448276, 'f1': 0.28260869565217395}, 'combined': 0.18840579710144928, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28932143369249047, 'r': 0.34806411567559575, 'f1': 0.31598585523004125}, 'combined': 0.23283168280108302, 'epoch': 8} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2915539886359941, 'r': 0.2547429635383114, 'f1': 0.27190826088610776}, 'combined': 0.14831359684696785, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3055555555555556, 'r': 0.358695652173913, 'f1': 0.32999999999999996}, 'combined': 0.16499999999999998, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 19 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:40:27.893151: step: 2/469, loss: 0.018929224461317062 2023-01-22 12:40:28.510946: step: 4/469, loss: 0.07124216854572296 2023-01-22 12:40:29.118026: step: 6/469, loss: 0.012717709876596928 2023-01-22 12:40:29.822096: step: 8/469, loss: 0.2143086940050125 2023-01-22 12:40:30.462646: step: 10/469, loss: 0.13849379122257233 2023-01-22 12:40:31.073131: step: 12/469, loss: 0.6477763652801514 2023-01-22 12:40:31.661795: step: 14/469, loss: 0.017020374536514282 2023-01-22 12:40:32.263985: step: 16/469, loss: 0.012251926586031914 2023-01-22 12:40:32.922761: step: 18/469, loss: 0.19579367339611053 2023-01-22 12:40:33.481635: step: 20/469, loss: 0.028920970857143402 2023-01-22 12:40:34.116091: step: 22/469, loss: 0.027898535132408142 2023-01-22 12:40:34.657967: step: 24/469, loss: 0.6761677265167236 2023-01-22 12:40:35.256890: step: 26/469, loss: 0.047446198761463165 2023-01-22 12:40:35.893851: step: 28/469, loss: 0.05728962644934654 2023-01-22 12:40:36.530600: step: 30/469, loss: 0.078557088971138 2023-01-22 12:40:37.202417: step: 32/469, loss: 0.004781062249094248 2023-01-22 12:40:37.969227: step: 34/469, loss: 0.02646014466881752 2023-01-22 12:40:38.595552: step: 36/469, loss: 0.14936614036560059 2023-01-22 12:40:39.275712: step: 38/469, loss: 0.550441324710846 2023-01-22 12:40:39.960414: step: 40/469, loss: 0.02781039848923683 2023-01-22 12:40:40.643553: step: 42/469, loss: 0.06678663194179535 2023-01-22 12:40:41.265953: step: 44/469, loss: 0.006737399846315384 2023-01-22 12:40:41.908794: step: 46/469, loss: 0.05446932837367058 2023-01-22 12:40:42.494814: step: 48/469, loss: 0.5689120292663574 2023-01-22 12:40:43.209855: step: 50/469, loss: 0.08718938380479813 2023-01-22 12:40:43.851381: step: 52/469, loss: 0.05114837363362312 2023-01-22 12:40:44.439033: step: 54/469, loss: 0.03601820394396782 2023-01-22 12:40:45.035939: step: 56/469, loss: 0.03832560405135155 2023-01-22 12:40:45.590883: step: 58/469, loss: 0.047735635191202164 2023-01-22 12:40:46.187575: step: 60/469, loss: 0.019638512283563614 2023-01-22 12:40:46.863072: step: 62/469, loss: 0.044903937727212906 2023-01-22 12:40:47.435410: step: 64/469, loss: 0.05493366718292236 2023-01-22 12:40:48.066509: step: 66/469, loss: 0.050387073308229446 2023-01-22 12:40:48.740201: step: 68/469, loss: 0.14744219183921814 2023-01-22 12:40:49.393676: step: 70/469, loss: 0.15553677082061768 2023-01-22 12:40:50.004967: step: 72/469, loss: 0.022997574880719185 2023-01-22 12:40:50.647360: step: 74/469, loss: 0.09190671890974045 2023-01-22 12:40:51.284199: step: 76/469, loss: 0.033066220581531525 2023-01-22 12:40:51.899895: step: 78/469, loss: 0.16749121248722076 2023-01-22 12:40:52.530190: step: 80/469, loss: 0.04895635321736336 2023-01-22 12:40:53.089402: step: 82/469, loss: 0.07094690948724747 2023-01-22 12:40:53.807574: step: 84/469, loss: 0.0862123891711235 2023-01-22 12:40:54.459659: step: 86/469, loss: 0.060779884457588196 2023-01-22 12:40:55.121138: step: 88/469, loss: 0.048247966915369034 2023-01-22 12:40:55.759626: step: 90/469, loss: 0.0992327481508255 2023-01-22 12:40:56.339096: step: 92/469, loss: 0.036462124437093735 2023-01-22 12:40:57.042105: step: 94/469, loss: 0.1935470998287201 2023-01-22 12:40:57.678429: step: 96/469, loss: 0.14669057726860046 2023-01-22 12:40:58.305884: step: 98/469, loss: 0.04491839185357094 2023-01-22 12:40:58.892599: step: 100/469, loss: 0.047788821160793304 2023-01-22 12:40:59.533917: step: 102/469, loss: 0.04154706001281738 2023-01-22 12:41:00.155606: step: 104/469, loss: 0.23734036087989807 2023-01-22 12:41:00.812495: step: 106/469, loss: 0.03720643371343613 2023-01-22 12:41:01.490931: step: 108/469, loss: 0.12194491922855377 2023-01-22 12:41:02.079970: step: 110/469, loss: 0.011648233979940414 2023-01-22 12:41:02.726626: step: 112/469, loss: 0.12670712172985077 2023-01-22 12:41:03.335164: step: 114/469, loss: 0.037595465779304504 2023-01-22 12:41:03.986402: step: 116/469, loss: 0.20394590497016907 2023-01-22 12:41:04.633577: step: 118/469, loss: 0.08047802746295929 2023-01-22 12:41:05.281400: step: 120/469, loss: 0.08986532688140869 2023-01-22 12:41:05.924365: step: 122/469, loss: 0.03558919206261635 2023-01-22 12:41:06.592456: step: 124/469, loss: 0.6305689811706543 2023-01-22 12:41:07.166655: step: 126/469, loss: 0.0626261755824089 2023-01-22 12:41:07.809352: step: 128/469, loss: 0.12280867993831635 2023-01-22 12:41:08.378547: step: 130/469, loss: 0.005101164802908897 2023-01-22 12:41:08.975779: step: 132/469, loss: 0.08405684679746628 2023-01-22 12:41:09.638011: step: 134/469, loss: 0.02904880978167057 2023-01-22 12:41:10.272363: step: 136/469, loss: 0.09489244222640991 2023-01-22 12:41:10.869349: step: 138/469, loss: 0.023128828033804893 2023-01-22 12:41:11.489718: step: 140/469, loss: 0.3496091067790985 2023-01-22 12:41:12.218162: step: 142/469, loss: 0.10941021889448166 2023-01-22 12:41:12.799034: step: 144/469, loss: 0.06683848798274994 2023-01-22 12:41:13.447502: step: 146/469, loss: 0.0493616908788681 2023-01-22 12:41:14.047841: step: 148/469, loss: 0.05016420781612396 2023-01-22 12:41:14.702745: step: 150/469, loss: 0.018090449273586273 2023-01-22 12:41:15.483744: step: 152/469, loss: 0.056030020117759705 2023-01-22 12:41:16.103179: step: 154/469, loss: 0.023635540157556534 2023-01-22 12:41:16.736518: step: 156/469, loss: 0.1983170062303543 2023-01-22 12:41:17.323235: step: 158/469, loss: 0.06616871803998947 2023-01-22 12:41:17.918921: step: 160/469, loss: 0.03202993422746658 2023-01-22 12:41:18.607812: step: 162/469, loss: 0.052251845598220825 2023-01-22 12:41:19.211961: step: 164/469, loss: 0.03300376981496811 2023-01-22 12:41:19.838480: step: 166/469, loss: 0.02477295882999897 2023-01-22 12:41:20.431119: step: 168/469, loss: 0.09509236365556717 2023-01-22 12:41:21.046751: step: 170/469, loss: 0.032363831996917725 2023-01-22 12:41:21.595109: step: 172/469, loss: 0.057401143014431 2023-01-22 12:41:22.206428: step: 174/469, loss: 0.06967420876026154 2023-01-22 12:41:22.822616: step: 176/469, loss: 0.044526536017656326 2023-01-22 12:41:23.391148: step: 178/469, loss: 0.033370111137628555 2023-01-22 12:41:23.976718: step: 180/469, loss: 0.12208925187587738 2023-01-22 12:41:24.601929: step: 182/469, loss: 0.02538391202688217 2023-01-22 12:41:25.304632: step: 184/469, loss: 0.13505543768405914 2023-01-22 12:41:25.905465: step: 186/469, loss: 0.03145289421081543 2023-01-22 12:41:26.608265: step: 188/469, loss: 0.044486016035079956 2023-01-22 12:41:27.261162: step: 190/469, loss: 0.0313517265021801 2023-01-22 12:41:27.971530: step: 192/469, loss: 0.06423280388116837 2023-01-22 12:41:28.518324: step: 194/469, loss: 0.01541607454419136 2023-01-22 12:41:29.159952: step: 196/469, loss: 0.05059881880879402 2023-01-22 12:41:29.833919: step: 198/469, loss: 0.12829285860061646 2023-01-22 12:41:30.451938: step: 200/469, loss: 0.05785709246993065 2023-01-22 12:41:31.060248: step: 202/469, loss: 0.12866155803203583 2023-01-22 12:41:31.615281: step: 204/469, loss: 0.04007976874709129 2023-01-22 12:41:32.268010: step: 206/469, loss: 0.06201403960585594 2023-01-22 12:41:32.912626: step: 208/469, loss: 0.1389927864074707 2023-01-22 12:41:33.521213: step: 210/469, loss: 0.03748631477355957 2023-01-22 12:41:34.139373: step: 212/469, loss: 0.024779140949249268 2023-01-22 12:41:34.745091: step: 214/469, loss: 0.052954625338315964 2023-01-22 12:41:35.327230: step: 216/469, loss: 0.01910155639052391 2023-01-22 12:41:35.921918: step: 218/469, loss: 0.011671827174723148 2023-01-22 12:41:36.584360: step: 220/469, loss: 0.09201599657535553 2023-01-22 12:41:37.205840: step: 222/469, loss: 0.06708290427923203 2023-01-22 12:41:37.877527: step: 224/469, loss: 0.04028232395648956 2023-01-22 12:41:38.475122: step: 226/469, loss: 0.003531606635078788 2023-01-22 12:41:39.117647: step: 228/469, loss: 0.1363021582365036 2023-01-22 12:41:39.781185: step: 230/469, loss: 0.05296606943011284 2023-01-22 12:41:40.436956: step: 232/469, loss: 0.02427266724407673 2023-01-22 12:41:41.134335: step: 234/469, loss: 0.03873661160469055 2023-01-22 12:41:41.766033: step: 236/469, loss: 0.05802200734615326 2023-01-22 12:41:42.361417: step: 238/469, loss: 0.20271986722946167 2023-01-22 12:41:42.957261: step: 240/469, loss: 0.01666843146085739 2023-01-22 12:41:43.614486: step: 242/469, loss: 0.169260635972023 2023-01-22 12:41:44.266724: step: 244/469, loss: 0.042294830083847046 2023-01-22 12:41:44.852553: step: 246/469, loss: 0.42812588810920715 2023-01-22 12:41:45.476572: step: 248/469, loss: 0.03527240827679634 2023-01-22 12:41:46.054564: step: 250/469, loss: 0.0908576101064682 2023-01-22 12:41:46.691591: step: 252/469, loss: 0.33815500140190125 2023-01-22 12:41:47.284569: step: 254/469, loss: 0.09581713378429413 2023-01-22 12:41:47.919848: step: 256/469, loss: 0.05028152093291283 2023-01-22 12:41:48.590803: step: 258/469, loss: 0.011085795238614082 2023-01-22 12:41:49.346518: step: 260/469, loss: 0.009318886324763298 2023-01-22 12:41:50.018483: step: 262/469, loss: 0.15508833527565002 2023-01-22 12:41:50.682101: step: 264/469, loss: 0.03315415978431702 2023-01-22 12:41:51.288559: step: 266/469, loss: 0.059173837304115295 2023-01-22 12:41:51.878100: step: 268/469, loss: 0.11708611994981766 2023-01-22 12:41:52.486491: step: 270/469, loss: 0.007663907017558813 2023-01-22 12:41:53.122955: step: 272/469, loss: 0.11967768520116806 2023-01-22 12:41:53.757809: step: 274/469, loss: 0.07220719754695892 2023-01-22 12:41:54.416507: step: 276/469, loss: 0.09503638744354248 2023-01-22 12:41:55.031021: step: 278/469, loss: 0.017050940543413162 2023-01-22 12:41:55.772865: step: 280/469, loss: 0.029534464702010155 2023-01-22 12:41:56.396338: step: 282/469, loss: 0.026098623871803284 2023-01-22 12:41:56.963553: step: 284/469, loss: 0.08735086768865585 2023-01-22 12:41:57.631673: step: 286/469, loss: 0.03122876212000847 2023-01-22 12:41:58.195212: step: 288/469, loss: 0.0493474155664444 2023-01-22 12:41:58.794415: step: 290/469, loss: 0.11733286827802658 2023-01-22 12:41:59.470184: step: 292/469, loss: 0.05277036875486374 2023-01-22 12:42:00.084254: step: 294/469, loss: 0.1768314391374588 2023-01-22 12:42:00.691917: step: 296/469, loss: 0.03973815590143204 2023-01-22 12:42:01.216413: step: 298/469, loss: 0.09797105193138123 2023-01-22 12:42:01.819615: step: 300/469, loss: 0.06105200573801994 2023-01-22 12:42:02.432529: step: 302/469, loss: 0.31381016969680786 2023-01-22 12:42:03.199995: step: 304/469, loss: 0.22844883799552917 2023-01-22 12:42:03.850575: step: 306/469, loss: 0.07330922037363052 2023-01-22 12:42:04.526604: step: 308/469, loss: 0.038831692188978195 2023-01-22 12:42:05.102296: step: 310/469, loss: 0.02562355436384678 2023-01-22 12:42:05.697833: step: 312/469, loss: 0.039489179849624634 2023-01-22 12:42:06.382825: step: 314/469, loss: 0.07543668150901794 2023-01-22 12:42:07.122939: step: 316/469, loss: 0.03310641646385193 2023-01-22 12:42:07.811193: step: 318/469, loss: 0.08928314596414566 2023-01-22 12:42:08.459592: step: 320/469, loss: 0.026486938819289207 2023-01-22 12:42:09.135535: step: 322/469, loss: 0.01166245061904192 2023-01-22 12:42:09.777973: step: 324/469, loss: 0.06775911152362823 2023-01-22 12:42:10.420046: step: 326/469, loss: 0.14716047048568726 2023-01-22 12:42:11.071775: step: 328/469, loss: 0.026105301454663277 2023-01-22 12:42:11.681062: step: 330/469, loss: 0.12618499994277954 2023-01-22 12:42:12.292678: step: 332/469, loss: 0.015240269713103771 2023-01-22 12:42:12.947508: step: 334/469, loss: 0.09497291594743729 2023-01-22 12:42:13.525819: step: 336/469, loss: 0.07686266303062439 2023-01-22 12:42:14.148573: step: 338/469, loss: 0.08008912950754166 2023-01-22 12:42:14.763159: step: 340/469, loss: 0.1150628849864006 2023-01-22 12:42:15.396995: step: 342/469, loss: 0.09120073169469833 2023-01-22 12:42:16.097618: step: 344/469, loss: 0.04809346795082092 2023-01-22 12:42:16.745010: step: 346/469, loss: 0.07628106325864792 2023-01-22 12:42:17.360361: step: 348/469, loss: 0.13730362057685852 2023-01-22 12:42:18.017146: step: 350/469, loss: 0.24316351115703583 2023-01-22 12:42:18.567200: step: 352/469, loss: 0.047220487147569656 2023-01-22 12:42:19.209297: step: 354/469, loss: 0.02312900871038437 2023-01-22 12:42:19.838046: step: 356/469, loss: 0.045622896403074265 2023-01-22 12:42:20.482884: step: 358/469, loss: 0.042554911226034164 2023-01-22 12:42:21.154593: step: 360/469, loss: 0.1307445615530014 2023-01-22 12:42:21.804059: step: 362/469, loss: 0.002914142096415162 2023-01-22 12:42:22.447495: step: 364/469, loss: 0.012220010161399841 2023-01-22 12:42:23.103282: step: 366/469, loss: 0.13123634457588196 2023-01-22 12:42:23.840576: step: 368/469, loss: 0.054937299340963364 2023-01-22 12:42:24.425732: step: 370/469, loss: 0.03938545286655426 2023-01-22 12:42:25.037597: step: 372/469, loss: 0.0792742520570755 2023-01-22 12:42:25.641673: step: 374/469, loss: 0.0925876721739769 2023-01-22 12:42:26.236700: step: 376/469, loss: 0.054532501846551895 2023-01-22 12:42:26.832323: step: 378/469, loss: 0.041149165481328964 2023-01-22 12:42:27.473059: step: 380/469, loss: 0.038725558668375015 2023-01-22 12:42:28.067694: step: 382/469, loss: 0.15061764419078827 2023-01-22 12:42:28.658132: step: 384/469, loss: 0.7279587984085083 2023-01-22 12:42:29.295981: step: 386/469, loss: 0.02412373758852482 2023-01-22 12:42:30.007740: step: 388/469, loss: 0.10511638969182968 2023-01-22 12:42:30.691154: step: 390/469, loss: 0.03942331299185753 2023-01-22 12:42:31.385941: step: 392/469, loss: 0.09832076728343964 2023-01-22 12:42:31.956424: step: 394/469, loss: 0.07461059838533401 2023-01-22 12:42:32.583067: step: 396/469, loss: 0.1348385512828827 2023-01-22 12:42:33.238845: step: 398/469, loss: 0.2990141212940216 2023-01-22 12:42:33.859665: step: 400/469, loss: 0.06430903822183609 2023-01-22 12:42:34.560893: step: 402/469, loss: 0.06335035711526871 2023-01-22 12:42:35.189925: step: 404/469, loss: 0.12693028151988983 2023-01-22 12:42:35.955506: step: 406/469, loss: 0.09448190778493881 2023-01-22 12:42:36.613300: step: 408/469, loss: 0.006209980696439743 2023-01-22 12:42:37.220312: step: 410/469, loss: 0.35782331228256226 2023-01-22 12:42:37.868898: step: 412/469, loss: 0.337869793176651 2023-01-22 12:42:38.429801: step: 414/469, loss: 0.05059871822595596 2023-01-22 12:42:39.023519: step: 416/469, loss: 0.5286617279052734 2023-01-22 12:42:39.618084: step: 418/469, loss: 0.04364926367998123 2023-01-22 12:42:40.217975: step: 420/469, loss: 0.08338271826505661 2023-01-22 12:42:40.834206: step: 422/469, loss: 0.06525425612926483 2023-01-22 12:42:41.599798: step: 424/469, loss: 0.07388177514076233 2023-01-22 12:42:42.217573: step: 426/469, loss: 0.07868321239948273 2023-01-22 12:42:42.790518: step: 428/469, loss: 0.14537185430526733 2023-01-22 12:42:43.431100: step: 430/469, loss: 0.05447091907262802 2023-01-22 12:42:44.093413: step: 432/469, loss: 0.12325700372457504 2023-01-22 12:42:44.761599: step: 434/469, loss: 0.02149268426001072 2023-01-22 12:42:45.447808: step: 436/469, loss: 0.01476992852985859 2023-01-22 12:42:46.033121: step: 438/469, loss: 0.09142114222049713 2023-01-22 12:42:46.670596: step: 440/469, loss: 0.019309429451823235 2023-01-22 12:42:47.252888: step: 442/469, loss: 0.11012211441993713 2023-01-22 12:42:47.874147: step: 444/469, loss: 0.05216379091143608 2023-01-22 12:42:48.627977: step: 446/469, loss: 0.08407919108867645 2023-01-22 12:42:49.309508: step: 448/469, loss: 0.0027140469755977392 2023-01-22 12:42:49.958371: step: 450/469, loss: 0.07924195379018784 2023-01-22 12:42:50.564599: step: 452/469, loss: 0.0822845846414566 2023-01-22 12:42:51.158823: step: 454/469, loss: 0.5457114577293396 2023-01-22 12:42:51.739652: step: 456/469, loss: 0.01041316892951727 2023-01-22 12:42:52.366149: step: 458/469, loss: 0.20511150360107422 2023-01-22 12:42:52.991768: step: 460/469, loss: 0.09668281674385071 2023-01-22 12:42:53.656925: step: 462/469, loss: 0.09260792285203934 2023-01-22 12:42:54.330576: step: 464/469, loss: 0.529384195804596 2023-01-22 12:42:54.876670: step: 466/469, loss: 0.03033657744526863 2023-01-22 12:42:55.530641: step: 468/469, loss: 0.18193933367729187 2023-01-22 12:42:56.119480: step: 470/469, loss: 0.03623465076088905 2023-01-22 12:42:56.710178: step: 472/469, loss: 0.0743374228477478 2023-01-22 12:42:57.280307: step: 474/469, loss: 0.14177709817886353 2023-01-22 12:42:58.003248: step: 476/469, loss: 0.11012189835309982 2023-01-22 12:42:58.654389: step: 478/469, loss: 0.023035774007439613 2023-01-22 12:42:59.375889: step: 480/469, loss: 0.01576581783592701 2023-01-22 12:43:00.008314: step: 482/469, loss: 0.006357547827064991 2023-01-22 12:43:00.631768: step: 484/469, loss: 0.01665729470551014 2023-01-22 12:43:01.209498: step: 486/469, loss: 0.0113474540412426 2023-01-22 12:43:01.860383: step: 488/469, loss: 0.015080654062330723 2023-01-22 12:43:02.428565: step: 490/469, loss: 0.03376530110836029 2023-01-22 12:43:03.121541: step: 492/469, loss: 0.19623930752277374 2023-01-22 12:43:03.723231: step: 494/469, loss: 0.042401209473609924 2023-01-22 12:43:04.328972: step: 496/469, loss: 0.06177276372909546 2023-01-22 12:43:04.985176: step: 498/469, loss: 0.04665808379650116 2023-01-22 12:43:05.639916: step: 500/469, loss: 0.03664287179708481 2023-01-22 12:43:06.325020: step: 502/469, loss: 0.01018795557320118 2023-01-22 12:43:06.896115: step: 504/469, loss: 0.025340456515550613 2023-01-22 12:43:07.552123: step: 506/469, loss: 0.02340005151927471 2023-01-22 12:43:08.245912: step: 508/469, loss: 0.037303630262613297 2023-01-22 12:43:08.856425: step: 510/469, loss: 0.18455928564071655 2023-01-22 12:43:09.513946: step: 512/469, loss: 0.07283520698547363 2023-01-22 12:43:10.103693: step: 514/469, loss: 0.07054881006479263 2023-01-22 12:43:10.698235: step: 516/469, loss: 0.058443740010261536 2023-01-22 12:43:11.280134: step: 518/469, loss: 0.26618072390556335 2023-01-22 12:43:11.886997: step: 520/469, loss: 0.06527213007211685 2023-01-22 12:43:12.523348: step: 522/469, loss: 0.29998257756233215 2023-01-22 12:43:13.096000: step: 524/469, loss: 0.1015496775507927 2023-01-22 12:43:13.767412: step: 526/469, loss: 0.036368899047374725 2023-01-22 12:43:14.425907: step: 528/469, loss: 0.0736686959862709 2023-01-22 12:43:15.044552: step: 530/469, loss: 0.07465191185474396 2023-01-22 12:43:15.689077: step: 532/469, loss: 0.047213707119226456 2023-01-22 12:43:16.299424: step: 534/469, loss: 0.2612151503562927 2023-01-22 12:43:16.881026: step: 536/469, loss: 1.1983392238616943 2023-01-22 12:43:17.502126: step: 538/469, loss: 0.043709646910429 2023-01-22 12:43:18.175150: step: 540/469, loss: 0.035532061010599136 2023-01-22 12:43:18.729063: step: 542/469, loss: 0.08896960318088531 2023-01-22 12:43:19.410138: step: 544/469, loss: 0.07250703871250153 2023-01-22 12:43:20.023262: step: 546/469, loss: 0.10077698528766632 2023-01-22 12:43:20.627159: step: 548/469, loss: 0.07578820735216141 2023-01-22 12:43:21.266082: step: 550/469, loss: 0.04982491955161095 2023-01-22 12:43:21.848171: step: 552/469, loss: 0.15638543665409088 2023-01-22 12:43:22.511353: step: 554/469, loss: 0.1810380220413208 2023-01-22 12:43:23.095437: step: 556/469, loss: 0.08028022944927216 2023-01-22 12:43:23.709515: step: 558/469, loss: 0.09091662615537643 2023-01-22 12:43:24.404189: step: 560/469, loss: 0.3924740254878998 2023-01-22 12:43:25.118865: step: 562/469, loss: 0.04054803401231766 2023-01-22 12:43:25.735920: step: 564/469, loss: 0.06324775516986847 2023-01-22 12:43:26.413355: step: 566/469, loss: 0.012863770127296448 2023-01-22 12:43:27.143507: step: 568/469, loss: 0.008546368218958378 2023-01-22 12:43:27.812619: step: 570/469, loss: 0.24474714696407318 2023-01-22 12:43:28.397354: step: 572/469, loss: 0.06525484472513199 2023-01-22 12:43:29.061339: step: 574/469, loss: 0.12211792916059494 2023-01-22 12:43:29.660519: step: 576/469, loss: 0.07190366089344025 2023-01-22 12:43:30.291292: step: 578/469, loss: 0.05851425230503082 2023-01-22 12:43:30.847990: step: 580/469, loss: 0.04802209883928299 2023-01-22 12:43:31.454080: step: 582/469, loss: 0.35618269443511963 2023-01-22 12:43:32.028388: step: 584/469, loss: 0.12805816531181335 2023-01-22 12:43:32.731978: step: 586/469, loss: 0.05296118184924126 2023-01-22 12:43:33.309376: step: 588/469, loss: 0.032306186854839325 2023-01-22 12:43:33.928371: step: 590/469, loss: 0.09688515961170197 2023-01-22 12:43:34.525891: step: 592/469, loss: 0.2450237274169922 2023-01-22 12:43:35.126098: step: 594/469, loss: 0.07718102633953094 2023-01-22 12:43:35.737866: step: 596/469, loss: 0.029232105240225792 2023-01-22 12:43:36.371271: step: 598/469, loss: 0.15836860239505768 2023-01-22 12:43:37.006893: step: 600/469, loss: 0.038774676620960236 2023-01-22 12:43:37.599469: step: 602/469, loss: 0.045867323875427246 2023-01-22 12:43:38.224959: step: 604/469, loss: 0.09482081979513168 2023-01-22 12:43:38.825344: step: 606/469, loss: 0.05104031786322594 2023-01-22 12:43:39.429453: step: 608/469, loss: 0.19461925327777863 2023-01-22 12:43:40.009779: step: 610/469, loss: 0.03330017998814583 2023-01-22 12:43:40.566436: step: 612/469, loss: 0.049013689160346985 2023-01-22 12:43:41.193265: step: 614/469, loss: 0.03244650363922119 2023-01-22 12:43:41.802575: step: 616/469, loss: 0.15915901958942413 2023-01-22 12:43:42.439030: step: 618/469, loss: 0.09447059780359268 2023-01-22 12:43:43.101942: step: 620/469, loss: 0.039652321487665176 2023-01-22 12:43:43.798349: step: 622/469, loss: 0.02297951653599739 2023-01-22 12:43:44.412585: step: 624/469, loss: 0.07330049574375153 2023-01-22 12:43:44.951453: step: 626/469, loss: 0.06333385407924652 2023-01-22 12:43:45.565441: step: 628/469, loss: 0.02523375116288662 2023-01-22 12:43:46.186503: step: 630/469, loss: 0.13307152688503265 2023-01-22 12:43:46.800323: step: 632/469, loss: 0.03397560864686966 2023-01-22 12:43:47.477010: step: 634/469, loss: 0.144926056265831 2023-01-22 12:43:48.042366: step: 636/469, loss: 0.4149293005466461 2023-01-22 12:43:48.695131: step: 638/469, loss: 0.03584090992808342 2023-01-22 12:43:49.312282: step: 640/469, loss: 0.15646037459373474 2023-01-22 12:43:49.923109: step: 642/469, loss: 0.3996548652648926 2023-01-22 12:43:50.517798: step: 644/469, loss: 0.03151587396860123 2023-01-22 12:43:51.205487: step: 646/469, loss: 0.07914189994335175 2023-01-22 12:43:51.847625: step: 648/469, loss: 0.13150370121002197 2023-01-22 12:43:52.408576: step: 650/469, loss: 0.0494757704436779 2023-01-22 12:43:53.022907: step: 652/469, loss: 0.040536068379879 2023-01-22 12:43:53.653254: step: 654/469, loss: 0.021079814061522484 2023-01-22 12:43:54.269824: step: 656/469, loss: 0.04168560355901718 2023-01-22 12:43:54.934676: step: 658/469, loss: 0.02781490981578827 2023-01-22 12:43:55.655135: step: 660/469, loss: 0.033152054995298386 2023-01-22 12:43:56.345173: step: 662/469, loss: 0.024538125842809677 2023-01-22 12:43:56.953670: step: 664/469, loss: 0.20348501205444336 2023-01-22 12:43:57.558065: step: 666/469, loss: 0.09490671008825302 2023-01-22 12:43:58.158308: step: 668/469, loss: 0.0413602776825428 2023-01-22 12:43:58.804054: step: 670/469, loss: 0.1584605574607849 2023-01-22 12:43:59.455760: step: 672/469, loss: 0.044671401381492615 2023-01-22 12:44:00.068621: step: 674/469, loss: 0.04014156013727188 2023-01-22 12:44:00.730278: step: 676/469, loss: 0.05848986282944679 2023-01-22 12:44:01.335536: step: 678/469, loss: 0.029924491420388222 2023-01-22 12:44:01.999447: step: 680/469, loss: 0.05231635272502899 2023-01-22 12:44:02.594545: step: 682/469, loss: 0.02725483849644661 2023-01-22 12:44:03.186108: step: 684/469, loss: 0.05583032965660095 2023-01-22 12:44:03.830458: step: 686/469, loss: 0.10990659892559052 2023-01-22 12:44:04.481840: step: 688/469, loss: 2.1754565238952637 2023-01-22 12:44:05.143245: step: 690/469, loss: 1.2818113565444946 2023-01-22 12:44:05.738530: step: 692/469, loss: 0.012957548722624779 2023-01-22 12:44:06.338678: step: 694/469, loss: 1.400787115097046 2023-01-22 12:44:06.962043: step: 696/469, loss: 0.26420241594314575 2023-01-22 12:44:07.622024: step: 698/469, loss: 0.04386802390217781 2023-01-22 12:44:08.236790: step: 700/469, loss: 0.12666882574558258 2023-01-22 12:44:08.842512: step: 702/469, loss: 0.029134169220924377 2023-01-22 12:44:09.451391: step: 704/469, loss: 0.052265509963035583 2023-01-22 12:44:10.008283: step: 706/469, loss: 0.06697551161050797 2023-01-22 12:44:10.589278: step: 708/469, loss: 1.072249412536621 2023-01-22 12:44:11.204434: step: 710/469, loss: 0.047012750059366226 2023-01-22 12:44:11.840285: step: 712/469, loss: 0.07216726988554001 2023-01-22 12:44:12.471300: step: 714/469, loss: 0.09166297316551208 2023-01-22 12:44:13.085585: step: 716/469, loss: 0.0377233549952507 2023-01-22 12:44:13.694308: step: 718/469, loss: 0.11211255192756653 2023-01-22 12:44:14.349644: step: 720/469, loss: 0.013600067235529423 2023-01-22 12:44:14.972468: step: 722/469, loss: 0.057414460927248 2023-01-22 12:44:15.607702: step: 724/469, loss: 0.04966941848397255 2023-01-22 12:44:16.229481: step: 726/469, loss: 0.19014638662338257 2023-01-22 12:44:16.808388: step: 728/469, loss: 0.05566960200667381 2023-01-22 12:44:17.432471: step: 730/469, loss: 0.02851691283285618 2023-01-22 12:44:18.140171: step: 732/469, loss: 0.007818255573511124 2023-01-22 12:44:18.842826: step: 734/469, loss: 0.16576926410198212 2023-01-22 12:44:19.443672: step: 736/469, loss: 0.19522695243358612 2023-01-22 12:44:20.134943: step: 738/469, loss: 0.10008224099874496 2023-01-22 12:44:20.677973: step: 740/469, loss: 0.27343299984931946 2023-01-22 12:44:21.294149: step: 742/469, loss: 0.07809916138648987 2023-01-22 12:44:21.895817: step: 744/469, loss: 0.048946261405944824 2023-01-22 12:44:22.559240: step: 746/469, loss: 0.04034539684653282 2023-01-22 12:44:23.224219: step: 748/469, loss: 0.5958285331726074 2023-01-22 12:44:23.798602: step: 750/469, loss: 0.13611815869808197 2023-01-22 12:44:24.482553: step: 752/469, loss: 0.14315664768218994 2023-01-22 12:44:25.094966: step: 754/469, loss: 0.057231444865465164 2023-01-22 12:44:25.729542: step: 756/469, loss: 0.04199300333857536 2023-01-22 12:44:26.339940: step: 758/469, loss: 0.6869280338287354 2023-01-22 12:44:26.983493: step: 760/469, loss: 0.5277479290962219 2023-01-22 12:44:27.621132: step: 762/469, loss: 0.04274006560444832 2023-01-22 12:44:28.298173: step: 764/469, loss: 0.16983969509601593 2023-01-22 12:44:28.974554: step: 766/469, loss: 0.05037027969956398 2023-01-22 12:44:29.648521: step: 768/469, loss: 0.08371130377054214 2023-01-22 12:44:30.260976: step: 770/469, loss: 0.04516316205263138 2023-01-22 12:44:30.858319: step: 772/469, loss: 0.08145400881767273 2023-01-22 12:44:31.507143: step: 774/469, loss: 0.09240426123142242 2023-01-22 12:44:32.144483: step: 776/469, loss: 0.09877298027276993 2023-01-22 12:44:32.770393: step: 778/469, loss: 0.03335174545645714 2023-01-22 12:44:33.458744: step: 780/469, loss: 0.10467829555273056 2023-01-22 12:44:34.113889: step: 782/469, loss: 0.0621492974460125 2023-01-22 12:44:34.783317: step: 784/469, loss: 0.17362046241760254 2023-01-22 12:44:35.420583: step: 786/469, loss: 0.03309227153658867 2023-01-22 12:44:36.090599: step: 788/469, loss: 0.013662155717611313 2023-01-22 12:44:36.739699: step: 790/469, loss: 0.09494443982839584 2023-01-22 12:44:37.326954: step: 792/469, loss: 0.47435706853866577 2023-01-22 12:44:37.941917: step: 794/469, loss: 0.1012163907289505 2023-01-22 12:44:38.605089: step: 796/469, loss: 0.03549069166183472 2023-01-22 12:44:39.264639: step: 798/469, loss: 0.21971246600151062 2023-01-22 12:44:39.857654: step: 800/469, loss: 0.03983674198389053 2023-01-22 12:44:40.446459: step: 802/469, loss: 0.10834245383739471 2023-01-22 12:44:41.108238: step: 804/469, loss: 0.1180226057767868 2023-01-22 12:44:41.711637: step: 806/469, loss: 0.06949001550674438 2023-01-22 12:44:42.364986: step: 808/469, loss: 0.14765363931655884 2023-01-22 12:44:42.975886: step: 810/469, loss: 0.013524406589567661 2023-01-22 12:44:43.702218: step: 812/469, loss: 0.016234934329986572 2023-01-22 12:44:44.327779: step: 814/469, loss: 0.096228688955307 2023-01-22 12:44:44.956449: step: 816/469, loss: 0.05443071573972702 2023-01-22 12:44:45.557684: step: 818/469, loss: 0.0037955818697810173 2023-01-22 12:44:46.117696: step: 820/469, loss: 0.018657274544239044 2023-01-22 12:44:46.765223: step: 822/469, loss: 0.0553579144179821 2023-01-22 12:44:47.330685: step: 824/469, loss: 0.07699181139469147 2023-01-22 12:44:47.968826: step: 826/469, loss: 0.046694591641426086 2023-01-22 12:44:48.690806: step: 828/469, loss: 0.02631101943552494 2023-01-22 12:44:49.380737: step: 830/469, loss: 0.04948659613728523 2023-01-22 12:44:50.023741: step: 832/469, loss: 0.12073986977338791 2023-01-22 12:44:50.618294: step: 834/469, loss: 0.14491772651672363 2023-01-22 12:44:51.231585: step: 836/469, loss: 0.014389042742550373 2023-01-22 12:44:51.818200: step: 838/469, loss: 0.03447839617729187 2023-01-22 12:44:52.381233: step: 840/469, loss: 0.08541145920753479 2023-01-22 12:44:52.990501: step: 842/469, loss: 0.046071138232946396 2023-01-22 12:44:53.665808: step: 844/469, loss: 0.04955539479851723 2023-01-22 12:44:54.244204: step: 846/469, loss: 0.14975903928279877 2023-01-22 12:44:54.931789: step: 848/469, loss: 0.2417968213558197 2023-01-22 12:44:55.554113: step: 850/469, loss: 0.10400690883398056 2023-01-22 12:44:56.185001: step: 852/469, loss: 0.053718503564596176 2023-01-22 12:44:56.816386: step: 854/469, loss: 0.6984888315200806 2023-01-22 12:44:57.449967: step: 856/469, loss: 0.011281779035925865 2023-01-22 12:44:58.062243: step: 858/469, loss: 0.1161065623164177 2023-01-22 12:44:58.705631: step: 860/469, loss: 0.08705775439739227 2023-01-22 12:44:59.349381: step: 862/469, loss: 0.1465783566236496 2023-01-22 12:45:00.006057: step: 864/469, loss: 0.03391014039516449 2023-01-22 12:45:00.577181: step: 866/469, loss: 0.04508693516254425 2023-01-22 12:45:01.193604: step: 868/469, loss: 0.13874170184135437 2023-01-22 12:45:01.745492: step: 870/469, loss: 0.02841523289680481 2023-01-22 12:45:02.392873: step: 872/469, loss: 0.032101888209581375 2023-01-22 12:45:03.031200: step: 874/469, loss: 0.05137887969613075 2023-01-22 12:45:03.687928: step: 876/469, loss: 0.06377363950014114 2023-01-22 12:45:04.342386: step: 878/469, loss: 0.09423363953828812 2023-01-22 12:45:04.960173: step: 880/469, loss: 0.07486584037542343 2023-01-22 12:45:05.496904: step: 882/469, loss: 0.029756108298897743 2023-01-22 12:45:06.129843: step: 884/469, loss: 0.2824748456478119 2023-01-22 12:45:06.748112: step: 886/469, loss: 0.0750042125582695 2023-01-22 12:45:07.343124: step: 888/469, loss: 0.017358288168907166 2023-01-22 12:45:07.963889: step: 890/469, loss: 0.08023647964000702 2023-01-22 12:45:08.574352: step: 892/469, loss: 0.28052639961242676 2023-01-22 12:45:09.194869: step: 894/469, loss: 0.0015651097055524588 2023-01-22 12:45:09.906034: step: 896/469, loss: 0.1067228838801384 2023-01-22 12:45:10.515871: step: 898/469, loss: 0.028684962540864944 2023-01-22 12:45:11.176002: step: 900/469, loss: 0.04417888820171356 2023-01-22 12:45:11.842302: step: 902/469, loss: 0.06779541075229645 2023-01-22 12:45:12.446290: step: 904/469, loss: 0.02422582544386387 2023-01-22 12:45:13.124112: step: 906/469, loss: 0.008805841207504272 2023-01-22 12:45:13.743746: step: 908/469, loss: 0.13440413773059845 2023-01-22 12:45:14.483298: step: 910/469, loss: 0.10930266976356506 2023-01-22 12:45:15.122305: step: 912/469, loss: 0.2258804440498352 2023-01-22 12:45:15.737349: step: 914/469, loss: 0.051467251032590866 2023-01-22 12:45:16.442038: step: 916/469, loss: 0.051162056624889374 2023-01-22 12:45:17.086714: step: 918/469, loss: 0.2344721555709839 2023-01-22 12:45:17.716392: step: 920/469, loss: 0.02741030976176262 2023-01-22 12:45:18.287736: step: 922/469, loss: 0.10001194477081299 2023-01-22 12:45:18.938583: step: 924/469, loss: 0.10465575754642487 2023-01-22 12:45:19.515129: step: 926/469, loss: 0.06605175137519836 2023-01-22 12:45:20.115372: step: 928/469, loss: 0.016095291823148727 2023-01-22 12:45:20.775117: step: 930/469, loss: 0.032779280096292496 2023-01-22 12:45:21.345279: step: 932/469, loss: 0.17856554687023163 2023-01-22 12:45:22.055145: step: 934/469, loss: 0.1468569040298462 2023-01-22 12:45:22.665419: step: 936/469, loss: 0.008212044835090637 2023-01-22 12:45:23.335709: step: 938/469, loss: 0.1104971319437027 ================================================== Loss: 0.112 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3028719190140845, 'r': 0.326435009487666, 'f1': 0.31421232876712324}, 'combined': 0.23152487382840659, 'epoch': 19} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.31306411963581077, 'r': 0.26809516558016366, 'f1': 0.2888398383234292}, 'combined': 0.15754900272187045, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2933188465783665, 'r': 0.3361756799493992, 'f1': 0.31328838785735347}, 'combined': 0.23084407526331308, 'epoch': 19} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30389880414795106, 'r': 0.2666413112331977, 'f1': 0.2840535606022272}, 'combined': 0.154938305783033, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.274074074074074, 'r': 0.35238095238095235, 'f1': 0.3083333333333333}, 'combined': 0.20555555555555552, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.20689655172413793, 'f1': 0.2727272727272727}, 'combined': 0.1818181818181818, 'epoch': 19} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 20 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:48:19.427246: step: 2/469, loss: 0.03047036938369274 2023-01-22 12:48:20.043198: step: 4/469, loss: 0.05100470036268234 2023-01-22 12:48:20.681599: step: 6/469, loss: 0.042807795107364655 2023-01-22 12:48:21.382635: step: 8/469, loss: 0.09654735773801804 2023-01-22 12:48:22.036515: step: 10/469, loss: 0.06212793290615082 2023-01-22 12:48:22.664844: step: 12/469, loss: 0.05861250311136246 2023-01-22 12:48:23.233942: step: 14/469, loss: 0.003254307433962822 2023-01-22 12:48:23.885565: step: 16/469, loss: 0.2485588937997818 2023-01-22 12:48:24.490288: step: 18/469, loss: 0.007442878559231758 2023-01-22 12:48:25.090768: step: 20/469, loss: 0.0058508082292973995 2023-01-22 12:48:25.722514: step: 22/469, loss: 0.06172182410955429 2023-01-22 12:48:26.339621: step: 24/469, loss: 0.008548441343009472 2023-01-22 12:48:26.899482: step: 26/469, loss: 0.016516586765646935 2023-01-22 12:48:27.629713: step: 28/469, loss: 0.22133564949035645 2023-01-22 12:48:28.247971: step: 30/469, loss: 0.04281838238239288 2023-01-22 12:48:28.915549: step: 32/469, loss: 0.03429471328854561 2023-01-22 12:48:29.524986: step: 34/469, loss: 0.1065850630402565 2023-01-22 12:48:30.183546: step: 36/469, loss: 1.7446966171264648 2023-01-22 12:48:30.822694: step: 38/469, loss: 0.026223506778478622 2023-01-22 12:48:31.487031: step: 40/469, loss: 0.02093130722641945 2023-01-22 12:48:32.151289: step: 42/469, loss: 0.10036730021238327 2023-01-22 12:48:32.767165: step: 44/469, loss: 0.10938212275505066 2023-01-22 12:48:33.423741: step: 46/469, loss: 0.10657904297113419 2023-01-22 12:48:34.089552: step: 48/469, loss: 0.038341887295246124 2023-01-22 12:48:34.723497: step: 50/469, loss: 0.07101305574178696 2023-01-22 12:48:35.431333: step: 52/469, loss: 0.056344833225011826 2023-01-22 12:48:36.063768: step: 54/469, loss: 0.10123838484287262 2023-01-22 12:48:36.709706: step: 56/469, loss: 0.08034262806177139 2023-01-22 12:48:37.322399: step: 58/469, loss: 0.21524883806705475 2023-01-22 12:48:37.915915: step: 60/469, loss: 0.044186532497406006 2023-01-22 12:48:38.576193: step: 62/469, loss: 0.13733619451522827 2023-01-22 12:48:39.239847: step: 64/469, loss: 0.367906779050827 2023-01-22 12:48:39.861916: step: 66/469, loss: 0.08909911662340164 2023-01-22 12:48:40.391353: step: 68/469, loss: 0.0975717082619667 2023-01-22 12:48:41.015386: step: 70/469, loss: 0.027646176517009735 2023-01-22 12:48:41.651447: step: 72/469, loss: 0.1450171321630478 2023-01-22 12:48:42.311888: step: 74/469, loss: 0.21010150015354156 2023-01-22 12:48:42.971149: step: 76/469, loss: 0.004874587524682283 2023-01-22 12:48:43.605598: step: 78/469, loss: 0.10334936529397964 2023-01-22 12:48:44.171234: step: 80/469, loss: 0.08851306885480881 2023-01-22 12:48:44.772711: step: 82/469, loss: 0.06980957835912704 2023-01-22 12:48:45.387037: step: 84/469, loss: 0.02645646035671234 2023-01-22 12:48:45.941430: step: 86/469, loss: 0.02716509811580181 2023-01-22 12:48:46.620017: step: 88/469, loss: 0.0896650105714798 2023-01-22 12:48:47.240750: step: 90/469, loss: 0.09851191192865372 2023-01-22 12:48:47.841245: step: 92/469, loss: 0.009893715381622314 2023-01-22 12:48:48.434889: step: 94/469, loss: 0.027347829192876816 2023-01-22 12:48:49.129624: step: 96/469, loss: 0.7567307353019714 2023-01-22 12:48:49.790883: step: 98/469, loss: 0.11436711251735687 2023-01-22 12:48:50.404411: step: 100/469, loss: 0.008082532323896885 2023-01-22 12:48:51.013169: step: 102/469, loss: 0.021137690171599388 2023-01-22 12:48:51.613109: step: 104/469, loss: 0.08700376749038696 2023-01-22 12:48:52.209413: step: 106/469, loss: 0.06389729678630829 2023-01-22 12:48:52.807833: step: 108/469, loss: 0.032811857759952545 2023-01-22 12:48:53.460257: step: 110/469, loss: 0.026620911434292793 2023-01-22 12:48:53.970618: step: 112/469, loss: 0.02649720199406147 2023-01-22 12:48:54.574028: step: 114/469, loss: 0.03372364863753319 2023-01-22 12:48:55.287819: step: 116/469, loss: 0.0719013512134552 2023-01-22 12:48:55.975888: step: 118/469, loss: 0.046262580901384354 2023-01-22 12:48:56.562908: step: 120/469, loss: 0.08907566964626312 2023-01-22 12:48:57.154549: step: 122/469, loss: 0.011383886449038982 2023-01-22 12:48:57.857448: step: 124/469, loss: 0.0537852980196476 2023-01-22 12:48:58.446278: step: 126/469, loss: 0.11244861781597137 2023-01-22 12:48:59.065763: step: 128/469, loss: 0.06547487527132034 2023-01-22 12:48:59.720411: step: 130/469, loss: 0.034582287073135376 2023-01-22 12:49:00.282380: step: 132/469, loss: 0.052909255027770996 2023-01-22 12:49:00.965484: step: 134/469, loss: 0.012787334620952606 2023-01-22 12:49:01.594061: step: 136/469, loss: 0.038171760737895966 2023-01-22 12:49:02.202541: step: 138/469, loss: 0.040770165622234344 2023-01-22 12:49:02.814232: step: 140/469, loss: 0.07857181131839752 2023-01-22 12:49:03.438276: step: 142/469, loss: 0.07648767530918121 2023-01-22 12:49:04.291323: step: 144/469, loss: 0.016063101589679718 2023-01-22 12:49:04.870080: step: 146/469, loss: 0.018572650849819183 2023-01-22 12:49:05.467200: step: 148/469, loss: 0.027876758947968483 2023-01-22 12:49:06.104378: step: 150/469, loss: 0.020173287019133568 2023-01-22 12:49:06.690272: step: 152/469, loss: 0.036739833652973175 2023-01-22 12:49:07.293846: step: 154/469, loss: 0.03203282877802849 2023-01-22 12:49:07.900090: step: 156/469, loss: 0.007034297101199627 2023-01-22 12:49:08.481337: step: 158/469, loss: 0.14537259936332703 2023-01-22 12:49:09.105835: step: 160/469, loss: 0.3501514792442322 2023-01-22 12:49:09.835796: step: 162/469, loss: 0.08837326616048813 2023-01-22 12:49:10.396379: step: 164/469, loss: 0.007120220456272364 2023-01-22 12:49:10.950632: step: 166/469, loss: 0.07574304938316345 2023-01-22 12:49:11.561790: step: 168/469, loss: 0.02238650806248188 2023-01-22 12:49:12.179227: step: 170/469, loss: 0.06542760878801346 2023-01-22 12:49:12.785271: step: 172/469, loss: 0.08846171945333481 2023-01-22 12:49:13.393314: step: 174/469, loss: 0.015145599842071533 2023-01-22 12:49:13.980317: step: 176/469, loss: 0.09192009270191193 2023-01-22 12:49:14.631203: step: 178/469, loss: 0.039436232298612595 2023-01-22 12:49:15.214043: step: 180/469, loss: 0.02832731418311596 2023-01-22 12:49:15.865460: step: 182/469, loss: 0.039854180067777634 2023-01-22 12:49:16.521902: step: 184/469, loss: 0.08265795558691025 2023-01-22 12:49:17.130119: step: 186/469, loss: 0.42919960618019104 2023-01-22 12:49:17.777193: step: 188/469, loss: 0.13242118060588837 2023-01-22 12:49:18.417717: step: 190/469, loss: 0.045264072716236115 2023-01-22 12:49:19.027956: step: 192/469, loss: 0.03616106137633324 2023-01-22 12:49:19.702171: step: 194/469, loss: 0.03622110188007355 2023-01-22 12:49:20.289164: step: 196/469, loss: 0.08620048314332962 2023-01-22 12:49:20.910046: step: 198/469, loss: 0.10974568128585815 2023-01-22 12:49:21.553706: step: 200/469, loss: 0.5377477407455444 2023-01-22 12:49:22.199692: step: 202/469, loss: 0.05585261806845665 2023-01-22 12:49:22.811983: step: 204/469, loss: 0.12974366545677185 2023-01-22 12:49:23.403241: step: 206/469, loss: 0.013405867852270603 2023-01-22 12:49:24.015767: step: 208/469, loss: 0.009080920368432999 2023-01-22 12:49:24.625804: step: 210/469, loss: 0.03891652822494507 2023-01-22 12:49:25.240304: step: 212/469, loss: 0.03136589750647545 2023-01-22 12:49:25.839307: step: 214/469, loss: 0.37084153294563293 2023-01-22 12:49:26.472060: step: 216/469, loss: 0.01203848049044609 2023-01-22 12:49:27.117999: step: 218/469, loss: 0.055246707051992416 2023-01-22 12:49:27.711226: step: 220/469, loss: 0.039372049272060394 2023-01-22 12:49:28.347600: step: 222/469, loss: 0.17466673254966736 2023-01-22 12:49:28.990170: step: 224/469, loss: 0.07062588632106781 2023-01-22 12:49:29.570886: step: 226/469, loss: 0.05622119456529617 2023-01-22 12:49:30.168841: step: 228/469, loss: 0.025270016863942146 2023-01-22 12:49:30.798442: step: 230/469, loss: 0.04417979717254639 2023-01-22 12:49:31.416110: step: 232/469, loss: 0.07498948276042938 2023-01-22 12:49:32.002767: step: 234/469, loss: 0.05173995718359947 2023-01-22 12:49:32.634379: step: 236/469, loss: 0.07530031353235245 2023-01-22 12:49:33.277118: step: 238/469, loss: 0.009907389990985394 2023-01-22 12:49:33.868947: step: 240/469, loss: 0.026475517079234123 2023-01-22 12:49:34.482491: step: 242/469, loss: 0.027065781876444817 2023-01-22 12:49:35.131878: step: 244/469, loss: 0.03740336373448372 2023-01-22 12:49:35.766387: step: 246/469, loss: 0.02053188905119896 2023-01-22 12:49:36.420572: step: 248/469, loss: 0.02678452804684639 2023-01-22 12:49:37.019840: step: 250/469, loss: 0.2260783165693283 2023-01-22 12:49:37.626398: step: 252/469, loss: 0.06816305220127106 2023-01-22 12:49:38.241591: step: 254/469, loss: 0.07353334873914719 2023-01-22 12:49:38.861513: step: 256/469, loss: 0.09397629648447037 2023-01-22 12:49:39.457539: step: 258/469, loss: 0.009656384587287903 2023-01-22 12:49:40.096982: step: 260/469, loss: 0.03368005156517029 2023-01-22 12:49:40.708418: step: 262/469, loss: 0.053902022540569305 2023-01-22 12:49:41.381784: step: 264/469, loss: 0.2720509171485901 2023-01-22 12:49:41.992477: step: 266/469, loss: 0.056828852742910385 2023-01-22 12:49:42.640427: step: 268/469, loss: 0.21532544493675232 2023-01-22 12:49:43.297864: step: 270/469, loss: 0.09514371305704117 2023-01-22 12:49:43.952424: step: 272/469, loss: 0.008572185412049294 2023-01-22 12:49:44.713628: step: 274/469, loss: 0.14297889173030853 2023-01-22 12:49:45.394053: step: 276/469, loss: 0.01746690645813942 2023-01-22 12:49:45.982975: step: 278/469, loss: 0.026987843215465546 2023-01-22 12:49:46.562029: step: 280/469, loss: 0.05808243900537491 2023-01-22 12:49:47.154200: step: 282/469, loss: 0.002617048332467675 2023-01-22 12:49:47.725707: step: 284/469, loss: 0.03153465315699577 2023-01-22 12:49:48.430559: step: 286/469, loss: 0.039475131779909134 2023-01-22 12:49:49.071973: step: 288/469, loss: 0.03739848732948303 2023-01-22 12:49:49.652019: step: 290/469, loss: 0.05013378709554672 2023-01-22 12:49:50.287703: step: 292/469, loss: 0.08018413931131363 2023-01-22 12:49:50.864552: step: 294/469, loss: 0.010611064732074738 2023-01-22 12:49:51.595361: step: 296/469, loss: 0.10344908386468887 2023-01-22 12:49:52.391677: step: 298/469, loss: 0.07918503880500793 2023-01-22 12:49:53.016890: step: 300/469, loss: 0.4584823250770569 2023-01-22 12:49:53.686897: step: 302/469, loss: 0.02735855244100094 2023-01-22 12:49:54.311343: step: 304/469, loss: 0.09685743600130081 2023-01-22 12:49:54.890443: step: 306/469, loss: 0.06576409190893173 2023-01-22 12:49:55.515151: step: 308/469, loss: 0.04262363165616989 2023-01-22 12:49:56.093462: step: 310/469, loss: 0.018736232072114944 2023-01-22 12:49:56.707822: step: 312/469, loss: 0.04727477207779884 2023-01-22 12:49:57.312259: step: 314/469, loss: 0.028145043179392815 2023-01-22 12:49:57.996352: step: 316/469, loss: 0.022955063730478287 2023-01-22 12:49:58.619287: step: 318/469, loss: 0.1341588795185089 2023-01-22 12:49:59.338606: step: 320/469, loss: 0.02380920760333538 2023-01-22 12:49:59.994905: step: 322/469, loss: 0.007822670973837376 2023-01-22 12:50:00.658944: step: 324/469, loss: 0.03533836826682091 2023-01-22 12:50:01.339847: step: 326/469, loss: 0.014371704310178757 2023-01-22 12:50:01.992467: step: 328/469, loss: 0.05644243583083153 2023-01-22 12:50:02.649812: step: 330/469, loss: 0.05656706169247627 2023-01-22 12:50:03.365209: step: 332/469, loss: 0.43775734305381775 2023-01-22 12:50:04.021393: step: 334/469, loss: 0.00509157869964838 2023-01-22 12:50:04.648711: step: 336/469, loss: 0.10166650265455246 2023-01-22 12:50:05.282205: step: 338/469, loss: 0.01550217717885971 2023-01-22 12:50:05.907592: step: 340/469, loss: 0.0271731186658144 2023-01-22 12:50:06.549670: step: 342/469, loss: 0.07601156085729599 2023-01-22 12:50:07.164759: step: 344/469, loss: 0.006699007004499435 2023-01-22 12:50:07.790567: step: 346/469, loss: 0.024352973327040672 2023-01-22 12:50:08.470484: step: 348/469, loss: 0.09689823538064957 2023-01-22 12:50:09.045154: step: 350/469, loss: 0.029600057750940323 2023-01-22 12:50:09.732988: step: 352/469, loss: 0.10261412709951401 2023-01-22 12:50:10.257096: step: 354/469, loss: 0.1763852834701538 2023-01-22 12:50:10.859817: step: 356/469, loss: 0.06254316121339798 2023-01-22 12:50:11.475975: step: 358/469, loss: 3.0973660945892334 2023-01-22 12:50:12.081109: step: 360/469, loss: 0.010241741314530373 2023-01-22 12:50:12.746335: step: 362/469, loss: 0.0581340491771698 2023-01-22 12:50:13.486168: step: 364/469, loss: 0.037835244089365005 2023-01-22 12:50:14.049643: step: 366/469, loss: 0.15622061491012573 2023-01-22 12:50:14.637531: step: 368/469, loss: 0.023382317274808884 2023-01-22 12:50:15.242789: step: 370/469, loss: 0.0449327677488327 2023-01-22 12:50:15.865609: step: 372/469, loss: 0.07265295833349228 2023-01-22 12:50:16.486228: step: 374/469, loss: 0.07179847359657288 2023-01-22 12:50:17.140576: step: 376/469, loss: 0.10519182682037354 2023-01-22 12:50:17.699928: step: 378/469, loss: 0.0063442569226026535 2023-01-22 12:50:18.271955: step: 380/469, loss: 0.027087777853012085 2023-01-22 12:50:18.903168: step: 382/469, loss: 0.0908999815583229 2023-01-22 12:50:19.530432: step: 384/469, loss: 0.017228519544005394 2023-01-22 12:50:20.129527: step: 386/469, loss: 0.13921339809894562 2023-01-22 12:50:20.873102: step: 388/469, loss: 0.04351751506328583 2023-01-22 12:50:21.465113: step: 390/469, loss: 0.06499644368886948 2023-01-22 12:50:22.114305: step: 392/469, loss: 0.6581273674964905 2023-01-22 12:50:22.714676: step: 394/469, loss: 0.043974511325359344 2023-01-22 12:50:23.347698: step: 396/469, loss: 0.15624815225601196 2023-01-22 12:50:23.970793: step: 398/469, loss: 0.0968884751200676 2023-01-22 12:50:24.678362: step: 400/469, loss: 0.08363330364227295 2023-01-22 12:50:25.317206: step: 402/469, loss: 0.13590368628501892 2023-01-22 12:50:25.840590: step: 404/469, loss: 0.04422525316476822 2023-01-22 12:50:26.550679: step: 406/469, loss: 0.023506473749876022 2023-01-22 12:50:27.209423: step: 408/469, loss: 0.04093289375305176 2023-01-22 12:50:27.810840: step: 410/469, loss: 0.008529514074325562 2023-01-22 12:50:28.433443: step: 412/469, loss: 0.11678679287433624 2023-01-22 12:50:29.066536: step: 414/469, loss: 0.07076162099838257 2023-01-22 12:50:29.684673: step: 416/469, loss: 0.04140174388885498 2023-01-22 12:50:30.357408: step: 418/469, loss: 0.14785248041152954 2023-01-22 12:50:31.009763: step: 420/469, loss: 0.05215369164943695 2023-01-22 12:50:31.618065: step: 422/469, loss: 0.04422905296087265 2023-01-22 12:50:32.200867: step: 424/469, loss: 0.010338985361158848 2023-01-22 12:50:32.813663: step: 426/469, loss: 0.0835152268409729 2023-01-22 12:50:33.453373: step: 428/469, loss: 0.053154680877923965 2023-01-22 12:50:34.109731: step: 430/469, loss: 0.025039631873369217 2023-01-22 12:50:34.761905: step: 432/469, loss: 0.05907084420323372 2023-01-22 12:50:35.387389: step: 434/469, loss: 0.05137882009148598 2023-01-22 12:50:36.025429: step: 436/469, loss: 0.010029279626905918 2023-01-22 12:50:36.694071: step: 438/469, loss: 0.09758265316486359 2023-01-22 12:50:37.320764: step: 440/469, loss: 0.012394366785883904 2023-01-22 12:50:38.034808: step: 442/469, loss: 0.24575424194335938 2023-01-22 12:50:38.757371: step: 444/469, loss: 0.02452407404780388 2023-01-22 12:50:39.390203: step: 446/469, loss: 0.07089190185070038 2023-01-22 12:50:40.089856: step: 448/469, loss: 0.07185640931129456 2023-01-22 12:50:40.696109: step: 450/469, loss: 0.0554225891828537 2023-01-22 12:50:41.273673: step: 452/469, loss: 0.01690843142569065 2023-01-22 12:50:41.941970: step: 454/469, loss: 0.05314760282635689 2023-01-22 12:50:42.596877: step: 456/469, loss: 0.11660072207450867 2023-01-22 12:50:43.149805: step: 458/469, loss: 0.03748758137226105 2023-01-22 12:50:43.784236: step: 460/469, loss: 0.012262248434126377 2023-01-22 12:50:44.433551: step: 462/469, loss: 0.013085789978504181 2023-01-22 12:50:44.994052: step: 464/469, loss: 0.017730453982949257 2023-01-22 12:50:45.695388: step: 466/469, loss: 0.1118144616484642 2023-01-22 12:50:46.265698: step: 468/469, loss: 0.06439069658517838 2023-01-22 12:50:46.832942: step: 470/469, loss: 0.021769510582089424 2023-01-22 12:50:47.537696: step: 472/469, loss: 0.04408632591366768 2023-01-22 12:50:48.141807: step: 474/469, loss: 0.0401419959962368 2023-01-22 12:50:48.767857: step: 476/469, loss: 0.12251318991184235 2023-01-22 12:50:49.396508: step: 478/469, loss: 0.015362037345767021 2023-01-22 12:50:50.040983: step: 480/469, loss: 0.34135302901268005 2023-01-22 12:50:50.719157: step: 482/469, loss: 0.44757020473480225 2023-01-22 12:50:51.321680: step: 484/469, loss: 0.005609733052551746 2023-01-22 12:50:51.987376: step: 486/469, loss: 0.2649969458580017 2023-01-22 12:50:52.584922: step: 488/469, loss: 0.12505537271499634 2023-01-22 12:50:53.132185: step: 490/469, loss: 0.029974080622196198 2023-01-22 12:50:53.771747: step: 492/469, loss: 0.03520739823579788 2023-01-22 12:50:54.366690: step: 494/469, loss: 0.017011066898703575 2023-01-22 12:50:55.020442: step: 496/469, loss: 0.16647648811340332 2023-01-22 12:50:55.664060: step: 498/469, loss: 0.030338849872350693 2023-01-22 12:50:56.257402: step: 500/469, loss: 0.04297623038291931 2023-01-22 12:50:56.864698: step: 502/469, loss: 0.07026848942041397 2023-01-22 12:50:57.466712: step: 504/469, loss: 0.06991925090551376 2023-01-22 12:50:58.113999: step: 506/469, loss: 0.03671690821647644 2023-01-22 12:50:58.757423: step: 508/469, loss: 0.0319366492331028 2023-01-22 12:50:59.375342: step: 510/469, loss: 0.02633335255086422 2023-01-22 12:51:00.037413: step: 512/469, loss: 0.0641055777668953 2023-01-22 12:51:00.701567: step: 514/469, loss: 0.034385669976472855 2023-01-22 12:51:01.310631: step: 516/469, loss: 0.21503835916519165 2023-01-22 12:51:02.018851: step: 518/469, loss: 0.04159834608435631 2023-01-22 12:51:02.652560: step: 520/469, loss: 0.06328455358743668 2023-01-22 12:51:03.257428: step: 522/469, loss: 0.12873534858226776 2023-01-22 12:51:03.913080: step: 524/469, loss: 0.07574222981929779 2023-01-22 12:51:04.532893: step: 526/469, loss: 0.30191078782081604 2023-01-22 12:51:05.128749: step: 528/469, loss: 0.010558957234025002 2023-01-22 12:51:05.812757: step: 530/469, loss: 0.01265162043273449 2023-01-22 12:51:06.427112: step: 532/469, loss: 0.25568991899490356 2023-01-22 12:51:07.027976: step: 534/469, loss: 0.01884031482040882 2023-01-22 12:51:07.642034: step: 536/469, loss: 0.068267323076725 2023-01-22 12:51:08.309692: step: 538/469, loss: 0.43168628215789795 2023-01-22 12:51:08.963352: step: 540/469, loss: 0.041933536529541016 2023-01-22 12:51:09.595321: step: 542/469, loss: 0.01796361431479454 2023-01-22 12:51:10.173103: step: 544/469, loss: 0.06711485981941223 2023-01-22 12:51:10.858383: step: 546/469, loss: 0.18310546875 2023-01-22 12:51:11.436509: step: 548/469, loss: 0.03469279408454895 2023-01-22 12:51:12.126807: step: 550/469, loss: 0.25317585468292236 2023-01-22 12:51:12.776275: step: 552/469, loss: 0.08416194468736649 2023-01-22 12:51:13.446668: step: 554/469, loss: 0.10092373937368393 2023-01-22 12:51:14.050375: step: 556/469, loss: 0.022695761173963547 2023-01-22 12:51:14.619296: step: 558/469, loss: 0.018745919689536095 2023-01-22 12:51:15.268336: step: 560/469, loss: 0.04302777349948883 2023-01-22 12:51:15.888329: step: 562/469, loss: 0.036813780665397644 2023-01-22 12:51:16.527890: step: 564/469, loss: 0.21906541287899017 2023-01-22 12:51:17.129091: step: 566/469, loss: 0.02576979622244835 2023-01-22 12:51:17.738337: step: 568/469, loss: 0.04847199097275734 2023-01-22 12:51:18.408210: step: 570/469, loss: 0.21244098246097565 2023-01-22 12:51:19.064620: step: 572/469, loss: 0.018672136589884758 2023-01-22 12:51:19.720194: step: 574/469, loss: 0.07828520238399506 2023-01-22 12:51:20.358009: step: 576/469, loss: 0.024596504867076874 2023-01-22 12:51:21.003361: step: 578/469, loss: 0.02514028549194336 2023-01-22 12:51:21.627457: step: 580/469, loss: 0.10482020676136017 2023-01-22 12:51:22.246899: step: 582/469, loss: 0.04049116373062134 2023-01-22 12:51:22.838250: step: 584/469, loss: 0.01845908910036087 2023-01-22 12:51:23.511205: step: 586/469, loss: 0.06948646157979965 2023-01-22 12:51:24.201945: step: 588/469, loss: 0.1130063384771347 2023-01-22 12:51:24.794542: step: 590/469, loss: 0.030685599893331528 2023-01-22 12:51:25.407890: step: 592/469, loss: 0.03612568974494934 2023-01-22 12:51:26.014475: step: 594/469, loss: 0.03292441368103027 2023-01-22 12:51:26.678607: step: 596/469, loss: 0.026473393663764 2023-01-22 12:51:27.316402: step: 598/469, loss: 0.007028055377304554 2023-01-22 12:51:27.920967: step: 600/469, loss: 0.09396176040172577 2023-01-22 12:51:28.565465: step: 602/469, loss: 0.009699969552457333 2023-01-22 12:51:29.199951: step: 604/469, loss: 0.06945517659187317 2023-01-22 12:51:29.772982: step: 606/469, loss: 0.04944532737135887 2023-01-22 12:51:30.413074: step: 608/469, loss: 0.02853280119597912 2023-01-22 12:51:31.108555: step: 610/469, loss: 0.05194087699055672 2023-01-22 12:51:31.772979: step: 612/469, loss: 0.01059517078101635 2023-01-22 12:51:32.361071: step: 614/469, loss: 0.028247060254216194 2023-01-22 12:51:32.955745: step: 616/469, loss: 0.08249138295650482 2023-01-22 12:51:33.580241: step: 618/469, loss: 0.11553860455751419 2023-01-22 12:51:34.219535: step: 620/469, loss: 1.2491940259933472 2023-01-22 12:51:34.937630: step: 622/469, loss: 0.044590745121240616 2023-01-22 12:51:35.580019: step: 624/469, loss: 0.03426634520292282 2023-01-22 12:51:36.340446: step: 626/469, loss: 0.14192095398902893 2023-01-22 12:51:36.958015: step: 628/469, loss: 0.08071822673082352 2023-01-22 12:51:37.543788: step: 630/469, loss: 0.08231334388256073 2023-01-22 12:51:38.142342: step: 632/469, loss: 0.10219614207744598 2023-01-22 12:51:38.770341: step: 634/469, loss: 0.040496576577425 2023-01-22 12:51:39.415871: step: 636/469, loss: 0.11740805953741074 2023-01-22 12:51:40.044258: step: 638/469, loss: 0.4533992111682892 2023-01-22 12:51:40.651260: step: 640/469, loss: 0.012210090644657612 2023-01-22 12:51:41.300644: step: 642/469, loss: 0.09341727197170258 2023-01-22 12:51:41.965111: step: 644/469, loss: 0.11014952510595322 2023-01-22 12:51:42.557916: step: 646/469, loss: 0.06683366745710373 2023-01-22 12:51:43.195437: step: 648/469, loss: 0.05329553037881851 2023-01-22 12:51:43.827428: step: 650/469, loss: 0.00910886935889721 2023-01-22 12:51:44.454401: step: 652/469, loss: 0.11005696654319763 2023-01-22 12:51:45.092895: step: 654/469, loss: 0.029744772240519524 2023-01-22 12:51:45.759095: step: 656/469, loss: 0.2145126760005951 2023-01-22 12:51:46.427099: step: 658/469, loss: 0.09931598603725433 2023-01-22 12:51:46.963527: step: 660/469, loss: 0.020429594442248344 2023-01-22 12:51:47.565136: step: 662/469, loss: 0.016593294218182564 2023-01-22 12:51:48.180931: step: 664/469, loss: 0.04810403287410736 2023-01-22 12:51:48.820604: step: 666/469, loss: 0.05423254147171974 2023-01-22 12:51:49.410083: step: 668/469, loss: 0.06478362530469894 2023-01-22 12:51:49.968161: step: 670/469, loss: 0.3711676299571991 2023-01-22 12:51:50.689824: step: 672/469, loss: 0.07412133365869522 2023-01-22 12:51:51.399265: step: 674/469, loss: 1.1227519512176514 2023-01-22 12:51:52.043783: step: 676/469, loss: 0.04790123179554939 2023-01-22 12:51:52.630451: step: 678/469, loss: 0.5525271892547607 2023-01-22 12:51:53.213064: step: 680/469, loss: 0.02655475027859211 2023-01-22 12:51:53.918296: step: 682/469, loss: 0.13081642985343933 2023-01-22 12:51:54.480623: step: 684/469, loss: 0.005491634830832481 2023-01-22 12:51:55.147709: step: 686/469, loss: 0.01846947707235813 2023-01-22 12:51:55.764556: step: 688/469, loss: 0.08562768995761871 2023-01-22 12:51:56.356785: step: 690/469, loss: 0.05332883074879646 2023-01-22 12:51:57.008122: step: 692/469, loss: 0.016642436385154724 2023-01-22 12:51:57.626860: step: 694/469, loss: 0.04887961596250534 2023-01-22 12:51:58.253212: step: 696/469, loss: 0.09840194880962372 2023-01-22 12:51:58.885655: step: 698/469, loss: 0.07446332275867462 2023-01-22 12:51:59.523525: step: 700/469, loss: 0.044743407517671585 2023-01-22 12:52:00.112863: step: 702/469, loss: 0.03826627880334854 2023-01-22 12:52:00.720657: step: 704/469, loss: 0.15556564927101135 2023-01-22 12:52:01.335891: step: 706/469, loss: 0.16377954185009003 2023-01-22 12:52:01.964602: step: 708/469, loss: 0.061064738780260086 2023-01-22 12:52:02.592561: step: 710/469, loss: 0.1336861401796341 2023-01-22 12:52:03.209032: step: 712/469, loss: 0.10853223502635956 2023-01-22 12:52:03.855890: step: 714/469, loss: 0.03462926298379898 2023-01-22 12:52:04.441901: step: 716/469, loss: 0.0771002247929573 2023-01-22 12:52:05.077271: step: 718/469, loss: 0.03590358421206474 2023-01-22 12:52:05.697169: step: 720/469, loss: 0.012811330147087574 2023-01-22 12:52:06.300788: step: 722/469, loss: 0.24002225697040558 2023-01-22 12:52:06.952477: step: 724/469, loss: 0.08062081784009933 2023-01-22 12:52:07.583226: step: 726/469, loss: 0.06410007178783417 2023-01-22 12:52:08.284122: step: 728/469, loss: 0.09805223345756531 2023-01-22 12:52:09.025723: step: 730/469, loss: 0.02737535536289215 2023-01-22 12:52:09.631952: step: 732/469, loss: 0.07163655757904053 2023-01-22 12:52:10.395439: step: 734/469, loss: 0.02471967041492462 2023-01-22 12:52:10.991171: step: 736/469, loss: 0.32227209210395813 2023-01-22 12:52:11.604521: step: 738/469, loss: 0.9016312956809998 2023-01-22 12:52:12.277120: step: 740/469, loss: 0.05750349536538124 2023-01-22 12:52:12.946292: step: 742/469, loss: 0.08736299723386765 2023-01-22 12:52:13.518101: step: 744/469, loss: 0.05938255414366722 2023-01-22 12:52:14.203293: step: 746/469, loss: 0.046954963356256485 2023-01-22 12:52:14.740928: step: 748/469, loss: 0.05033650994300842 2023-01-22 12:52:15.343494: step: 750/469, loss: 0.3463425934314728 2023-01-22 12:52:16.010385: step: 752/469, loss: 0.05690761283040047 2023-01-22 12:52:16.686979: step: 754/469, loss: 0.06557939946651459 2023-01-22 12:52:17.254881: step: 756/469, loss: 0.04945741966366768 2023-01-22 12:52:17.827037: step: 758/469, loss: 0.08173562586307526 2023-01-22 12:52:18.406035: step: 760/469, loss: 0.034592051059007645 2023-01-22 12:52:19.041030: step: 762/469, loss: 0.03938683494925499 2023-01-22 12:52:19.584559: step: 764/469, loss: 0.10660931468009949 2023-01-22 12:52:20.206559: step: 766/469, loss: 0.06772271543741226 2023-01-22 12:52:20.752571: step: 768/469, loss: 0.13811947405338287 2023-01-22 12:52:21.323521: step: 770/469, loss: 0.032920584082603455 2023-01-22 12:52:21.921214: step: 772/469, loss: 0.021953996270895004 2023-01-22 12:52:22.560159: step: 774/469, loss: 0.05240635573863983 2023-01-22 12:52:23.178396: step: 776/469, loss: 0.20037932693958282 2023-01-22 12:52:23.870656: step: 778/469, loss: 0.04232468828558922 2023-01-22 12:52:24.572088: step: 780/469, loss: 0.039521485567092896 2023-01-22 12:52:25.245784: step: 782/469, loss: 0.009970891289412975 2023-01-22 12:52:25.948335: step: 784/469, loss: 0.03995884954929352 2023-01-22 12:52:26.623376: step: 786/469, loss: 0.011412148363888264 2023-01-22 12:52:27.223392: step: 788/469, loss: 0.1112995371222496 2023-01-22 12:52:27.826499: step: 790/469, loss: 0.12103532254695892 2023-01-22 12:52:28.406189: step: 792/469, loss: 0.21943050622940063 2023-01-22 12:52:28.996968: step: 794/469, loss: 0.004217034671455622 2023-01-22 12:52:29.553568: step: 796/469, loss: 0.14052830636501312 2023-01-22 12:52:30.182104: step: 798/469, loss: 0.016933167353272438 2023-01-22 12:52:30.816968: step: 800/469, loss: 0.12630292773246765 2023-01-22 12:52:31.482194: step: 802/469, loss: 0.14411765336990356 2023-01-22 12:52:32.091330: step: 804/469, loss: 0.0805254876613617 2023-01-22 12:52:32.690980: step: 806/469, loss: 0.030944321304559708 2023-01-22 12:52:33.289486: step: 808/469, loss: 0.04941081628203392 2023-01-22 12:52:33.877510: step: 810/469, loss: 0.12576352059841156 2023-01-22 12:52:34.457601: step: 812/469, loss: 0.11210504174232483 2023-01-22 12:52:35.041643: step: 814/469, loss: 0.09459513425827026 2023-01-22 12:52:35.683778: step: 816/469, loss: 0.05020858719944954 2023-01-22 12:52:36.278989: step: 818/469, loss: 0.02079484611749649 2023-01-22 12:52:36.947259: step: 820/469, loss: 0.12901917099952698 2023-01-22 12:52:37.566554: step: 822/469, loss: 0.35205814242362976 2023-01-22 12:52:38.177687: step: 824/469, loss: 0.014864898286759853 2023-01-22 12:52:38.782700: step: 826/469, loss: 0.03730570897459984 2023-01-22 12:52:39.418764: step: 828/469, loss: 0.41891026496887207 2023-01-22 12:52:40.096609: step: 830/469, loss: 0.01630174182355404 2023-01-22 12:52:40.748585: step: 832/469, loss: 0.0619516558945179 2023-01-22 12:52:41.403320: step: 834/469, loss: 0.03761196881532669 2023-01-22 12:52:42.017562: step: 836/469, loss: 0.028952032327651978 2023-01-22 12:52:42.575880: step: 838/469, loss: 0.03119269758462906 2023-01-22 12:52:43.186498: step: 840/469, loss: 0.014045340940356255 2023-01-22 12:52:43.791976: step: 842/469, loss: 0.0033642761409282684 2023-01-22 12:52:44.426280: step: 844/469, loss: 0.4119490385055542 2023-01-22 12:52:45.124872: step: 846/469, loss: 0.09381299465894699 2023-01-22 12:52:45.761186: step: 848/469, loss: 0.05417989194393158 2023-01-22 12:52:46.450987: step: 850/469, loss: 0.10595797747373581 2023-01-22 12:52:47.093825: step: 852/469, loss: 0.1034916415810585 2023-01-22 12:52:47.721957: step: 854/469, loss: 0.04896261543035507 2023-01-22 12:52:48.330198: step: 856/469, loss: 0.4186862111091614 2023-01-22 12:52:48.990805: step: 858/469, loss: 0.06940488517284393 2023-01-22 12:52:49.561105: step: 860/469, loss: 0.5892264246940613 2023-01-22 12:52:50.275915: step: 862/469, loss: 0.030064817517995834 2023-01-22 12:52:50.895559: step: 864/469, loss: 0.058277588337659836 2023-01-22 12:52:51.486489: step: 866/469, loss: 0.1297154426574707 2023-01-22 12:52:52.054510: step: 868/469, loss: 0.11072082072496414 2023-01-22 12:52:52.669568: step: 870/469, loss: 0.03493410348892212 2023-01-22 12:52:53.341162: step: 872/469, loss: 0.10401593893766403 2023-01-22 12:52:53.998883: step: 874/469, loss: 0.0552351213991642 2023-01-22 12:52:54.715254: step: 876/469, loss: 0.04945047199726105 2023-01-22 12:52:55.399799: step: 878/469, loss: 0.07825034856796265 2023-01-22 12:52:56.045610: step: 880/469, loss: 0.10247155278921127 2023-01-22 12:52:56.668634: step: 882/469, loss: 0.026932446286082268 2023-01-22 12:52:57.263104: step: 884/469, loss: 0.05905936658382416 2023-01-22 12:52:57.874172: step: 886/469, loss: 0.03196587786078453 2023-01-22 12:52:58.478343: step: 888/469, loss: 0.03644465282559395 2023-01-22 12:52:59.190398: step: 890/469, loss: 0.038065653294324875 2023-01-22 12:52:59.845113: step: 892/469, loss: 0.032983966171741486 2023-01-22 12:53:00.439961: step: 894/469, loss: 0.03079654462635517 2023-01-22 12:53:01.022430: step: 896/469, loss: 0.5434918999671936 2023-01-22 12:53:01.632065: step: 898/469, loss: 0.03318360075354576 2023-01-22 12:53:02.262335: step: 900/469, loss: 0.040477193892002106 2023-01-22 12:53:02.842966: step: 902/469, loss: 0.062321316450834274 2023-01-22 12:53:03.453386: step: 904/469, loss: 0.07525224983692169 2023-01-22 12:53:04.077665: step: 906/469, loss: 0.045330729335546494 2023-01-22 12:53:04.654706: step: 908/469, loss: 0.061386313289403915 2023-01-22 12:53:05.287026: step: 910/469, loss: 0.10755674540996552 2023-01-22 12:53:05.880668: step: 912/469, loss: 0.007984045892953873 2023-01-22 12:53:06.505968: step: 914/469, loss: 0.13335135579109192 2023-01-22 12:53:07.091144: step: 916/469, loss: 0.047902025282382965 2023-01-22 12:53:07.734930: step: 918/469, loss: 0.04972464591264725 2023-01-22 12:53:08.373714: step: 920/469, loss: 0.09323792904615402 2023-01-22 12:53:09.003161: step: 922/469, loss: 0.010326355695724487 2023-01-22 12:53:09.704364: step: 924/469, loss: 4.266698360443115 2023-01-22 12:53:10.347185: step: 926/469, loss: 0.04964417964220047 2023-01-22 12:53:10.955786: step: 928/469, loss: 0.02595529705286026 2023-01-22 12:53:11.723577: step: 930/469, loss: 0.09018541872501373 2023-01-22 12:53:12.339935: step: 932/469, loss: 0.024961672723293304 2023-01-22 12:53:12.940907: step: 934/469, loss: 0.0686643049120903 2023-01-22 12:53:13.607019: step: 936/469, loss: 0.04848860204219818 2023-01-22 12:53:14.295215: step: 938/469, loss: 0.21141712367534637 ================================================== Loss: 0.109 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3056884644101543, 'r': 0.3329510029818379, 'f1': 0.31873783573374853}, 'combined': 0.23485945790907786, 'epoch': 20} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30902866998314077, 'r': 0.2759487483106545, 'f1': 0.29155338995026137}, 'combined': 0.15902912179105164, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29554128198898605, 'r': 0.329750045179362, 'f1': 0.311709908178518}, 'combined': 0.22968098497364484, 'epoch': 20} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3128615645298447, 'r': 0.27736766333890167, 'f1': 0.29404738703144473}, 'combined': 0.16038948383533347, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2904121267150928, 'r': 0.32512932592391797, 'f1': 0.3067916826533657}, 'combined': 0.2260570293235326, 'epoch': 20} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3065924962506269, 'r': 0.27882245313186016, 'f1': 0.29204881770304086}, 'combined': 0.15929935511074955, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21448087431693988, 'r': 0.37380952380952376, 'f1': 0.2725694444444444}, 'combined': 0.18171296296296294, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2661290322580645, 'r': 0.358695652173913, 'f1': 0.30555555555555547}, 'combined': 0.15277777777777773, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40789473684210525, 'r': 0.2672413793103448, 'f1': 0.3229166666666667}, 'combined': 0.2152777777777778, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 21 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:56:03.028386: step: 2/469, loss: 0.02392192929983139 2023-01-22 12:56:03.678262: step: 4/469, loss: 0.01966254599392414 2023-01-22 12:56:04.340421: step: 6/469, loss: 0.05299926921725273 2023-01-22 12:56:04.966145: step: 8/469, loss: 0.044818371534347534 2023-01-22 12:56:05.569790: step: 10/469, loss: 0.022025851532816887 2023-01-22 12:56:06.267244: step: 12/469, loss: 0.012700133956968784 2023-01-22 12:56:06.860796: step: 14/469, loss: 0.037772346287965775 2023-01-22 12:56:07.482312: step: 16/469, loss: 0.02254267781972885 2023-01-22 12:56:08.132541: step: 18/469, loss: 0.07363095134496689 2023-01-22 12:56:08.784787: step: 20/469, loss: 0.008026142604649067 2023-01-22 12:56:09.498798: step: 22/469, loss: 0.005653323605656624 2023-01-22 12:56:10.167847: step: 24/469, loss: 0.07869891822338104 2023-01-22 12:56:10.778920: step: 26/469, loss: 0.028066422790288925 2023-01-22 12:56:11.415941: step: 28/469, loss: 0.010528797283768654 2023-01-22 12:56:11.997082: step: 30/469, loss: 0.07387077063322067 2023-01-22 12:56:12.629344: step: 32/469, loss: 0.032999083399772644 2023-01-22 12:56:13.274259: step: 34/469, loss: 0.023119498044252396 2023-01-22 12:56:13.913010: step: 36/469, loss: 0.04699590429663658 2023-01-22 12:56:14.528075: step: 38/469, loss: 0.00793388020247221 2023-01-22 12:56:15.136466: step: 40/469, loss: 0.10146583616733551 2023-01-22 12:56:15.806764: step: 42/469, loss: 0.006985010579228401 2023-01-22 12:56:16.404158: step: 44/469, loss: 0.02706150896847248 2023-01-22 12:56:16.997386: step: 46/469, loss: 0.00024393167404923588 2023-01-22 12:56:17.655798: step: 48/469, loss: 0.028532344847917557 2023-01-22 12:56:18.244894: step: 50/469, loss: 0.0009933767141774297 2023-01-22 12:56:18.823019: step: 52/469, loss: 0.044913098216056824 2023-01-22 12:56:19.446107: step: 54/469, loss: 0.03143681585788727 2023-01-22 12:56:20.093395: step: 56/469, loss: 0.037025924772024155 2023-01-22 12:56:20.773894: step: 58/469, loss: 0.037975575774908066 2023-01-22 12:56:21.433996: step: 60/469, loss: 0.020727310329675674 2023-01-22 12:56:22.044200: step: 62/469, loss: 0.020771190524101257 2023-01-22 12:56:22.623016: step: 64/469, loss: 0.003391948761418462 2023-01-22 12:56:23.312935: step: 66/469, loss: 0.038785479962825775 2023-01-22 12:56:23.948273: step: 68/469, loss: 0.09054183959960938 2023-01-22 12:56:24.544830: step: 70/469, loss: 0.024149758741259575 2023-01-22 12:56:25.192982: step: 72/469, loss: 0.010785935446619987 2023-01-22 12:56:25.841168: step: 74/469, loss: 0.1472344696521759 2023-01-22 12:56:26.430438: step: 76/469, loss: 0.021318616345524788 2023-01-22 12:56:27.052367: step: 78/469, loss: 0.06855437159538269 2023-01-22 12:56:27.719143: step: 80/469, loss: 0.03193723410367966 2023-01-22 12:56:28.367454: step: 82/469, loss: 0.03750784695148468 2023-01-22 12:56:28.990774: step: 84/469, loss: 0.7567758560180664 2023-01-22 12:56:29.579288: step: 86/469, loss: 0.06039344519376755 2023-01-22 12:56:30.244015: step: 88/469, loss: 0.04281849414110184 2023-01-22 12:56:30.903902: step: 90/469, loss: 0.0049997554160654545 2023-01-22 12:56:31.493893: step: 92/469, loss: 0.04772511497139931 2023-01-22 12:56:32.065025: step: 94/469, loss: 0.036539532244205475 2023-01-22 12:56:32.665820: step: 96/469, loss: 0.013883134350180626 2023-01-22 12:56:33.281767: step: 98/469, loss: 0.0762481614947319 2023-01-22 12:56:33.955925: step: 100/469, loss: 0.05380294471979141 2023-01-22 12:56:34.551173: step: 102/469, loss: 0.05062882974743843 2023-01-22 12:56:35.216370: step: 104/469, loss: 0.16701483726501465 2023-01-22 12:56:35.825032: step: 106/469, loss: 0.014210863038897514 2023-01-22 12:56:36.451257: step: 108/469, loss: 0.01780262403190136 2023-01-22 12:56:37.064633: step: 110/469, loss: 0.13959628343582153 2023-01-22 12:56:37.678064: step: 112/469, loss: 0.024026265367865562 2023-01-22 12:56:38.231947: step: 114/469, loss: 0.03967355564236641 2023-01-22 12:56:38.856136: step: 116/469, loss: 0.06304628401994705 2023-01-22 12:56:39.452541: step: 118/469, loss: 0.01931256242096424 2023-01-22 12:56:40.044832: step: 120/469, loss: 0.7055705189704895 2023-01-22 12:56:40.644254: step: 122/469, loss: 0.015860218554735184 2023-01-22 12:56:41.247890: step: 124/469, loss: 0.08320516347885132 2023-01-22 12:56:41.850990: step: 126/469, loss: 0.05166981741786003 2023-01-22 12:56:42.468198: step: 128/469, loss: 0.15676382184028625 2023-01-22 12:56:43.100068: step: 130/469, loss: 0.014630008488893509 2023-01-22 12:56:43.781706: step: 132/469, loss: 0.1120273545384407 2023-01-22 12:56:44.409899: step: 134/469, loss: 0.029118932783603668 2023-01-22 12:56:45.065776: step: 136/469, loss: 0.07675378769636154 2023-01-22 12:56:45.630972: step: 138/469, loss: 0.028277577832341194 2023-01-22 12:56:46.246142: step: 140/469, loss: 0.022590328007936478 2023-01-22 12:56:46.911558: step: 142/469, loss: 0.020666001364588737 2023-01-22 12:56:47.654033: step: 144/469, loss: 0.069100521504879 2023-01-22 12:56:48.232292: step: 146/469, loss: 0.00901170913130045 2023-01-22 12:56:48.874342: step: 148/469, loss: 0.0521109439432621 2023-01-22 12:56:49.523922: step: 150/469, loss: 0.02426365204155445 2023-01-22 12:56:50.082258: step: 152/469, loss: 0.06741529703140259 2023-01-22 12:56:50.702366: step: 154/469, loss: 0.07119188457727432 2023-01-22 12:56:51.290415: step: 156/469, loss: 0.0225388091057539 2023-01-22 12:56:51.857922: step: 158/469, loss: 0.06181115657091141 2023-01-22 12:56:52.450059: step: 160/469, loss: 0.07040660083293915 2023-01-22 12:56:53.068291: step: 162/469, loss: 0.058279506862163544 2023-01-22 12:56:53.725134: step: 164/469, loss: 0.024268848821520805 2023-01-22 12:56:54.476029: step: 166/469, loss: 0.02673419937491417 2023-01-22 12:56:55.115811: step: 168/469, loss: 0.1298261433839798 2023-01-22 12:56:55.680459: step: 170/469, loss: 0.09414707124233246 2023-01-22 12:56:56.249206: step: 172/469, loss: 0.008704627864062786 2023-01-22 12:56:56.860094: step: 174/469, loss: 0.040511857718229294 2023-01-22 12:56:57.493272: step: 176/469, loss: 0.0404052659869194 2023-01-22 12:56:58.146649: step: 178/469, loss: 0.5321192741394043 2023-01-22 12:56:58.740624: step: 180/469, loss: 0.0034266209695488214 2023-01-22 12:56:59.323853: step: 182/469, loss: 0.018502982333302498 2023-01-22 12:57:00.008730: step: 184/469, loss: 0.05716400220990181 2023-01-22 12:57:00.659771: step: 186/469, loss: 0.03221050277352333 2023-01-22 12:57:01.243559: step: 188/469, loss: 0.06699027866125107 2023-01-22 12:57:01.832127: step: 190/469, loss: 0.08274269104003906 2023-01-22 12:57:02.347695: step: 192/469, loss: 0.005418492015451193 2023-01-22 12:57:02.970796: step: 194/469, loss: 0.010212346911430359 2023-01-22 12:57:03.658143: step: 196/469, loss: 0.07760020345449448 2023-01-22 12:57:04.293767: step: 198/469, loss: 0.30351677536964417 2023-01-22 12:57:04.934041: step: 200/469, loss: 0.012618208304047585 2023-01-22 12:57:05.662599: step: 202/469, loss: 0.49273350834846497 2023-01-22 12:57:06.304452: step: 204/469, loss: 0.04685742035508156 2023-01-22 12:57:06.942246: step: 206/469, loss: 0.04335838556289673 2023-01-22 12:57:07.524579: step: 208/469, loss: 0.01687638834118843 2023-01-22 12:57:08.143954: step: 210/469, loss: 0.0890517309308052 2023-01-22 12:57:08.786280: step: 212/469, loss: 0.1212993636727333 2023-01-22 12:57:09.396299: step: 214/469, loss: 0.036799754947423935 2023-01-22 12:57:10.025357: step: 216/469, loss: 0.03313329443335533 2023-01-22 12:57:10.616804: step: 218/469, loss: 0.11265157163143158 2023-01-22 12:57:11.194936: step: 220/469, loss: 0.011664781719446182 2023-01-22 12:57:11.835765: step: 222/469, loss: 0.0214131698012352 2023-01-22 12:57:12.428484: step: 224/469, loss: 0.004781034775078297 2023-01-22 12:57:13.138018: step: 226/469, loss: 0.002530965954065323 2023-01-22 12:57:13.729455: step: 228/469, loss: 0.024235527962446213 2023-01-22 12:57:14.463816: step: 230/469, loss: 0.039171766489744186 2023-01-22 12:57:15.119148: step: 232/469, loss: 0.05174962803721428 2023-01-22 12:57:15.764003: step: 234/469, loss: 0.05852152407169342 2023-01-22 12:57:16.339611: step: 236/469, loss: 0.018330590799450874 2023-01-22 12:57:16.980228: step: 238/469, loss: 0.0845969170331955 2023-01-22 12:57:17.547833: step: 240/469, loss: 0.04136737063527107 2023-01-22 12:57:18.183592: step: 242/469, loss: 0.013539737090468407 2023-01-22 12:57:18.797096: step: 244/469, loss: 0.07967480272054672 2023-01-22 12:57:19.395445: step: 246/469, loss: 0.008916055783629417 2023-01-22 12:57:19.996817: step: 248/469, loss: 0.012129005044698715 2023-01-22 12:57:20.547774: step: 250/469, loss: 0.014636692591011524 2023-01-22 12:57:21.272345: step: 252/469, loss: 0.024400966241955757 2023-01-22 12:57:21.841020: step: 254/469, loss: 0.03725671023130417 2023-01-22 12:57:22.410019: step: 256/469, loss: 0.00875252578407526 2023-01-22 12:57:22.983127: step: 258/469, loss: 0.18569916486740112 2023-01-22 12:57:23.638590: step: 260/469, loss: 0.02596977911889553 2023-01-22 12:57:24.282122: step: 262/469, loss: 0.8206031322479248 2023-01-22 12:57:24.891548: step: 264/469, loss: 0.09460817277431488 2023-01-22 12:57:25.497193: step: 266/469, loss: 0.032924480736255646 2023-01-22 12:57:26.127115: step: 268/469, loss: 0.0335528738796711 2023-01-22 12:57:26.698440: step: 270/469, loss: 0.060713693499565125 2023-01-22 12:57:27.327487: step: 272/469, loss: 0.07159701734781265 2023-01-22 12:57:27.931459: step: 274/469, loss: 0.013019309379160404 2023-01-22 12:57:28.569772: step: 276/469, loss: 0.07129477709531784 2023-01-22 12:57:29.266264: step: 278/469, loss: 0.07874827086925507 2023-01-22 12:57:29.870436: step: 280/469, loss: 0.029299231246113777 2023-01-22 12:57:30.564719: step: 282/469, loss: 0.05381292477250099 2023-01-22 12:57:31.212485: step: 284/469, loss: 0.13097001612186432 2023-01-22 12:57:31.888114: step: 286/469, loss: 0.06882507354021072 2023-01-22 12:57:32.492354: step: 288/469, loss: 0.12335500866174698 2023-01-22 12:57:33.116010: step: 290/469, loss: 0.04126376658678055 2023-01-22 12:57:33.751338: step: 292/469, loss: 0.039324551820755005 2023-01-22 12:57:34.460060: step: 294/469, loss: 0.05773017555475235 2023-01-22 12:57:35.081143: step: 296/469, loss: 0.03303098306059837 2023-01-22 12:57:35.682133: step: 298/469, loss: 0.05265083163976669 2023-01-22 12:57:36.277716: step: 300/469, loss: 0.08067907392978668 2023-01-22 12:57:36.943442: step: 302/469, loss: 0.0575513057410717 2023-01-22 12:57:37.555644: step: 304/469, loss: 0.9869254231452942 2023-01-22 12:57:38.151311: step: 306/469, loss: 0.011143622919917107 2023-01-22 12:57:38.815512: step: 308/469, loss: 0.049705423414707184 2023-01-22 12:57:39.425569: step: 310/469, loss: 0.02404535748064518 2023-01-22 12:57:40.109138: step: 312/469, loss: 0.05674991384148598 2023-01-22 12:57:40.755625: step: 314/469, loss: 0.10494145005941391 2023-01-22 12:57:41.362000: step: 316/469, loss: 0.06712942570447922 2023-01-22 12:57:41.917632: step: 318/469, loss: 0.04084033891558647 2023-01-22 12:57:42.549579: step: 320/469, loss: 0.03611866012215614 2023-01-22 12:57:43.102429: step: 322/469, loss: 0.008003677241504192 2023-01-22 12:57:43.751492: step: 324/469, loss: 0.034904543310403824 2023-01-22 12:57:44.384391: step: 326/469, loss: 0.03714032843708992 2023-01-22 12:57:45.039674: step: 328/469, loss: 0.012168901041150093 2023-01-22 12:57:45.689029: step: 330/469, loss: 0.014416170306503773 2023-01-22 12:57:46.335604: step: 332/469, loss: 0.07741773873567581 2023-01-22 12:57:46.991894: step: 334/469, loss: 0.0032909305300563574 2023-01-22 12:57:47.710129: step: 336/469, loss: 0.11048717796802521 2023-01-22 12:57:48.351147: step: 338/469, loss: 0.07588531076908112 2023-01-22 12:57:49.003434: step: 340/469, loss: 0.010511383414268494 2023-01-22 12:57:49.617971: step: 342/469, loss: 0.022397121414542198 2023-01-22 12:57:50.290907: step: 344/469, loss: 0.025274138897657394 2023-01-22 12:57:50.904500: step: 346/469, loss: 0.005095488857477903 2023-01-22 12:57:51.597057: step: 348/469, loss: 0.03323239088058472 2023-01-22 12:57:52.203178: step: 350/469, loss: 0.015544925816357136 2023-01-22 12:57:52.888192: step: 352/469, loss: 0.06461065262556076 2023-01-22 12:57:53.497161: step: 354/469, loss: 0.07638489454984665 2023-01-22 12:57:54.148997: step: 356/469, loss: 0.13434520363807678 2023-01-22 12:57:54.827346: step: 358/469, loss: 0.08030161261558533 2023-01-22 12:57:55.527985: step: 360/469, loss: 0.06590965390205383 2023-01-22 12:57:56.223821: step: 362/469, loss: 0.012448490597307682 2023-01-22 12:57:56.842382: step: 364/469, loss: 0.05046309903264046 2023-01-22 12:57:57.435093: step: 366/469, loss: 0.018466822803020477 2023-01-22 12:57:58.080548: step: 368/469, loss: 0.040025223046541214 2023-01-22 12:57:58.695829: step: 370/469, loss: 0.05510979890823364 2023-01-22 12:57:59.424633: step: 372/469, loss: 0.012401694431900978 2023-01-22 12:58:00.022958: step: 374/469, loss: 0.9642502069473267 2023-01-22 12:58:00.599836: step: 376/469, loss: 0.026485169306397438 2023-01-22 12:58:01.310257: step: 378/469, loss: 0.03587673231959343 2023-01-22 12:58:01.971118: step: 380/469, loss: 0.0007051278371363878 2023-01-22 12:58:02.583179: step: 382/469, loss: 0.3616332411766052 2023-01-22 12:58:03.156534: step: 384/469, loss: 0.044319137930870056 2023-01-22 12:58:03.713220: step: 386/469, loss: 0.08458127081394196 2023-01-22 12:58:04.421697: step: 388/469, loss: 0.10217348486185074 2023-01-22 12:58:05.077887: step: 390/469, loss: 0.26924359798431396 2023-01-22 12:58:05.637712: step: 392/469, loss: 0.012389730662107468 2023-01-22 12:58:06.261769: step: 394/469, loss: 0.013179104775190353 2023-01-22 12:58:06.936351: step: 396/469, loss: 0.09027823060750961 2023-01-22 12:58:07.615073: step: 398/469, loss: 0.03433922305703163 2023-01-22 12:58:08.226562: step: 400/469, loss: 0.06290686130523682 2023-01-22 12:58:08.871377: step: 402/469, loss: 0.02589227445423603 2023-01-22 12:58:09.464098: step: 404/469, loss: 0.0005919756949879229 2023-01-22 12:58:10.132352: step: 406/469, loss: 0.13433203101158142 2023-01-22 12:58:10.786370: step: 408/469, loss: 0.07053697109222412 2023-01-22 12:58:11.481309: step: 410/469, loss: 0.04509681835770607 2023-01-22 12:58:12.088078: step: 412/469, loss: 0.04402168467640877 2023-01-22 12:58:12.683117: step: 414/469, loss: 0.06334051489830017 2023-01-22 12:58:13.313209: step: 416/469, loss: 0.0007466504466719925 2023-01-22 12:58:13.875196: step: 418/469, loss: 0.43050992488861084 2023-01-22 12:58:14.517446: step: 420/469, loss: 0.10803550481796265 2023-01-22 12:58:15.068467: step: 422/469, loss: 0.05320751667022705 2023-01-22 12:58:15.712764: step: 424/469, loss: 0.0337304063141346 2023-01-22 12:58:16.338849: step: 426/469, loss: 0.0012051883386448026 2023-01-22 12:58:16.996583: step: 428/469, loss: 0.14188238978385925 2023-01-22 12:58:17.540494: step: 430/469, loss: 0.008196497336030006 2023-01-22 12:58:18.191203: step: 432/469, loss: 0.060212597250938416 2023-01-22 12:58:18.811298: step: 434/469, loss: 0.1244427040219307 2023-01-22 12:58:19.453105: step: 436/469, loss: 0.08918793499469757 2023-01-22 12:58:20.095344: step: 438/469, loss: 0.48906829953193665 2023-01-22 12:58:20.716376: step: 440/469, loss: 0.016950242221355438 2023-01-22 12:58:21.280479: step: 442/469, loss: 0.21700793504714966 2023-01-22 12:58:21.887179: step: 444/469, loss: 0.01460797619074583 2023-01-22 12:58:22.437516: step: 446/469, loss: 0.05856382101774216 2023-01-22 12:58:23.047105: step: 448/469, loss: 0.06313794106245041 2023-01-22 12:58:23.745236: step: 450/469, loss: 0.05432180315256119 2023-01-22 12:58:24.361469: step: 452/469, loss: 0.06948701292276382 2023-01-22 12:58:25.038602: step: 454/469, loss: 0.037344638258218765 2023-01-22 12:58:25.688209: step: 456/469, loss: 0.3675992786884308 2023-01-22 12:58:26.246899: step: 458/469, loss: 0.10851266235113144 2023-01-22 12:58:26.871752: step: 460/469, loss: 0.04810580983757973 2023-01-22 12:58:27.518330: step: 462/469, loss: 0.3988592028617859 2023-01-22 12:58:28.163423: step: 464/469, loss: 0.04201856255531311 2023-01-22 12:58:28.782338: step: 466/469, loss: 0.103286512196064 2023-01-22 12:58:29.362459: step: 468/469, loss: 0.032137639820575714 2023-01-22 12:58:29.969098: step: 470/469, loss: 0.048732005059719086 2023-01-22 12:58:30.525691: step: 472/469, loss: 0.043681900948286057 2023-01-22 12:58:31.078924: step: 474/469, loss: 0.061638057231903076 2023-01-22 12:58:31.669519: step: 476/469, loss: 0.009564572013914585 2023-01-22 12:58:32.257327: step: 478/469, loss: 0.020961610600352287 2023-01-22 12:58:32.879692: step: 480/469, loss: 0.08033375442028046 2023-01-22 12:58:33.548815: step: 482/469, loss: 0.4921906590461731 2023-01-22 12:58:34.235373: step: 484/469, loss: 0.039049189537763596 2023-01-22 12:58:34.848246: step: 486/469, loss: 0.45716163516044617 2023-01-22 12:58:35.476777: step: 488/469, loss: 0.3450915813446045 2023-01-22 12:58:36.176290: step: 490/469, loss: 0.00998485367745161 2023-01-22 12:58:36.765779: step: 492/469, loss: 0.03579733520746231 2023-01-22 12:58:37.454094: step: 494/469, loss: 0.05593695491552353 2023-01-22 12:58:38.179750: step: 496/469, loss: 0.048308905214071274 2023-01-22 12:58:38.815708: step: 498/469, loss: 0.014730180613696575 2023-01-22 12:58:39.449471: step: 500/469, loss: 0.021537592634558678 2023-01-22 12:58:40.090534: step: 502/469, loss: 0.007521749008446932 2023-01-22 12:58:40.714996: step: 504/469, loss: 0.048355430364608765 2023-01-22 12:58:41.367367: step: 506/469, loss: 3.7182867527008057 2023-01-22 12:58:41.927824: step: 508/469, loss: 0.04229341074824333 2023-01-22 12:58:42.511389: step: 510/469, loss: 0.032623041421175 2023-01-22 12:58:43.082734: step: 512/469, loss: 0.018214251846075058 2023-01-22 12:58:43.707534: step: 514/469, loss: 0.06210203468799591 2023-01-22 12:58:44.401257: step: 516/469, loss: 0.1362147480249405 2023-01-22 12:58:44.994498: step: 518/469, loss: 0.01085751224309206 2023-01-22 12:58:45.565368: step: 520/469, loss: 0.02598940208554268 2023-01-22 12:58:46.159216: step: 522/469, loss: 0.11556785553693771 2023-01-22 12:58:46.763930: step: 524/469, loss: 0.012101628817617893 2023-01-22 12:58:47.348347: step: 526/469, loss: 0.0612773634493351 2023-01-22 12:58:47.990981: step: 528/469, loss: 1.120747447013855 2023-01-22 12:58:48.614695: step: 530/469, loss: 0.0069404831156134605 2023-01-22 12:58:49.231498: step: 532/469, loss: 0.033291835337877274 2023-01-22 12:58:49.871997: step: 534/469, loss: 0.06330261379480362 2023-01-22 12:58:50.495840: step: 536/469, loss: 0.027323000133037567 2023-01-22 12:58:51.113017: step: 538/469, loss: 0.20305876433849335 2023-01-22 12:58:51.834282: step: 540/469, loss: 0.2994983196258545 2023-01-22 12:58:52.493108: step: 542/469, loss: 1.0734171867370605 2023-01-22 12:58:53.114378: step: 544/469, loss: 0.024813175201416016 2023-01-22 12:58:53.706127: step: 546/469, loss: 0.03625921159982681 2023-01-22 12:58:54.376936: step: 548/469, loss: 0.13224072754383087 2023-01-22 12:58:54.967462: step: 550/469, loss: 0.040598031133413315 2023-01-22 12:58:55.722795: step: 552/469, loss: 0.0528695173561573 2023-01-22 12:58:56.313103: step: 554/469, loss: 0.23352941870689392 2023-01-22 12:58:56.949757: step: 556/469, loss: 0.06219625845551491 2023-01-22 12:58:57.552650: step: 558/469, loss: 0.03036361373960972 2023-01-22 12:58:58.238197: step: 560/469, loss: 0.05718938261270523 2023-01-22 12:58:58.861709: step: 562/469, loss: 0.3490472137928009 2023-01-22 12:58:59.460408: step: 564/469, loss: 0.31674161553382874 2023-01-22 12:59:00.108914: step: 566/469, loss: 1.1303584575653076 2023-01-22 12:59:00.754557: step: 568/469, loss: 0.3321295976638794 2023-01-22 12:59:01.353230: step: 570/469, loss: 0.052285049110651016 2023-01-22 12:59:01.961567: step: 572/469, loss: 0.010169393382966518 2023-01-22 12:59:02.629624: step: 574/469, loss: 0.05551881715655327 2023-01-22 12:59:03.282162: step: 576/469, loss: 0.03533104434609413 2023-01-22 12:59:03.882647: step: 578/469, loss: 0.05030536651611328 2023-01-22 12:59:04.477174: step: 580/469, loss: 0.1418551802635193 2023-01-22 12:59:05.174561: step: 582/469, loss: 0.06159890070557594 2023-01-22 12:59:05.773541: step: 584/469, loss: 0.018290333449840546 2023-01-22 12:59:06.512698: step: 586/469, loss: 0.04992254823446274 2023-01-22 12:59:07.229637: step: 588/469, loss: 0.07574943453073502 2023-01-22 12:59:07.899708: step: 590/469, loss: 0.05323129519820213 2023-01-22 12:59:08.533008: step: 592/469, loss: 0.07767683267593384 2023-01-22 12:59:09.205972: step: 594/469, loss: 0.021485527977347374 2023-01-22 12:59:09.821111: step: 596/469, loss: 0.04382946342229843 2023-01-22 12:59:10.470718: step: 598/469, loss: 0.029458319768309593 2023-01-22 12:59:11.163331: step: 600/469, loss: 0.03222047537565231 2023-01-22 12:59:11.706606: step: 602/469, loss: 0.030031898990273476 2023-01-22 12:59:12.279882: step: 604/469, loss: 0.14416635036468506 2023-01-22 12:59:12.841114: step: 606/469, loss: 0.010015937499701977 2023-01-22 12:59:13.461202: step: 608/469, loss: 0.059417419135570526 2023-01-22 12:59:14.254223: step: 610/469, loss: 0.01093235332518816 2023-01-22 12:59:14.846330: step: 612/469, loss: 0.013218702748417854 2023-01-22 12:59:15.481436: step: 614/469, loss: 0.03595352917909622 2023-01-22 12:59:16.156734: step: 616/469, loss: 0.05864059180021286 2023-01-22 12:59:16.740920: step: 618/469, loss: 0.0648789331316948 2023-01-22 12:59:17.413996: step: 620/469, loss: 0.017844097688794136 2023-01-22 12:59:17.972858: step: 622/469, loss: 0.05244258791208267 2023-01-22 12:59:18.515626: step: 624/469, loss: 0.04297476261854172 2023-01-22 12:59:19.166041: step: 626/469, loss: 0.033403102308511734 2023-01-22 12:59:19.801090: step: 628/469, loss: 0.014146773144602776 2023-01-22 12:59:20.434612: step: 630/469, loss: 0.27720707654953003 2023-01-22 12:59:21.053973: step: 632/469, loss: 0.2061290740966797 2023-01-22 12:59:21.681319: step: 634/469, loss: 0.18586492538452148 2023-01-22 12:59:22.299972: step: 636/469, loss: 0.0966130793094635 2023-01-22 12:59:22.885837: step: 638/469, loss: 0.1111782118678093 2023-01-22 12:59:23.475048: step: 640/469, loss: 0.05226125568151474 2023-01-22 12:59:24.114476: step: 642/469, loss: 0.01457973476499319 2023-01-22 12:59:24.734318: step: 644/469, loss: 0.1761954128742218 2023-01-22 12:59:25.330552: step: 646/469, loss: 0.02660352550446987 2023-01-22 12:59:26.041188: step: 648/469, loss: 0.2710324823856354 2023-01-22 12:59:26.688295: step: 650/469, loss: 0.0830530896782875 2023-01-22 12:59:27.332953: step: 652/469, loss: 0.2343921959400177 2023-01-22 12:59:27.997634: step: 654/469, loss: 0.09646414965391159 2023-01-22 12:59:28.605982: step: 656/469, loss: 0.0021139108575880527 2023-01-22 12:59:29.258685: step: 658/469, loss: 0.03969826176762581 2023-01-22 12:59:29.894303: step: 660/469, loss: 0.025182340294122696 2023-01-22 12:59:30.557428: step: 662/469, loss: 0.02854195050895214 2023-01-22 12:59:31.195107: step: 664/469, loss: 0.007115752901881933 2023-01-22 12:59:31.800861: step: 666/469, loss: 0.01536788884550333 2023-01-22 12:59:32.428451: step: 668/469, loss: 0.04252452030777931 2023-01-22 12:59:33.073525: step: 670/469, loss: 0.03679046407341957 2023-01-22 12:59:33.674265: step: 672/469, loss: 0.0022139910142868757 2023-01-22 12:59:34.277623: step: 674/469, loss: 0.016556719318032265 2023-01-22 12:59:34.893700: step: 676/469, loss: 0.017600931227207184 2023-01-22 12:59:35.557103: step: 678/469, loss: 0.04440116137266159 2023-01-22 12:59:36.282453: step: 680/469, loss: 0.0637761726975441 2023-01-22 12:59:36.928874: step: 682/469, loss: 0.02740737795829773 2023-01-22 12:59:37.549789: step: 684/469, loss: 0.033616483211517334 2023-01-22 12:59:38.222825: step: 686/469, loss: 0.03198402374982834 2023-01-22 12:59:38.879079: step: 688/469, loss: 0.025264738127589226 2023-01-22 12:59:39.488637: step: 690/469, loss: 0.03330082446336746 2023-01-22 12:59:40.167127: step: 692/469, loss: 0.05013738200068474 2023-01-22 12:59:40.795357: step: 694/469, loss: 0.1575414389371872 2023-01-22 12:59:41.388508: step: 696/469, loss: 0.016313966363668442 2023-01-22 12:59:42.077956: step: 698/469, loss: 0.05271385610103607 2023-01-22 12:59:42.730615: step: 700/469, loss: 0.10756105184555054 2023-01-22 12:59:43.356098: step: 702/469, loss: 0.03324884548783302 2023-01-22 12:59:43.921278: step: 704/469, loss: 0.08443838357925415 2023-01-22 12:59:44.627281: step: 706/469, loss: 0.03254329413175583 2023-01-22 12:59:45.302791: step: 708/469, loss: 0.0392867811024189 2023-01-22 12:59:45.929901: step: 710/469, loss: 0.0397937186062336 2023-01-22 12:59:46.569040: step: 712/469, loss: 0.04455344378948212 2023-01-22 12:59:47.210178: step: 714/469, loss: 0.04998619481921196 2023-01-22 12:59:47.841308: step: 716/469, loss: 0.09658350050449371 2023-01-22 12:59:48.459900: step: 718/469, loss: 0.028297990560531616 2023-01-22 12:59:49.048064: step: 720/469, loss: 0.036358363926410675 2023-01-22 12:59:49.674703: step: 722/469, loss: 0.08025605231523514 2023-01-22 12:59:50.329657: step: 724/469, loss: 0.12209325283765793 2023-01-22 12:59:50.967463: step: 726/469, loss: 0.07784384489059448 2023-01-22 12:59:51.574506: step: 728/469, loss: 0.04587997868657112 2023-01-22 12:59:52.231658: step: 730/469, loss: 0.13514229655265808 2023-01-22 12:59:52.853274: step: 732/469, loss: 0.04976901039481163 2023-01-22 12:59:53.466312: step: 734/469, loss: 0.017125915735960007 2023-01-22 12:59:54.165825: step: 736/469, loss: 0.04468010738492012 2023-01-22 12:59:54.826349: step: 738/469, loss: 0.09711789339780807 2023-01-22 12:59:55.456173: step: 740/469, loss: 0.040078382939100266 2023-01-22 12:59:56.052038: step: 742/469, loss: 0.08487293869256973 2023-01-22 12:59:56.671488: step: 744/469, loss: 0.06645983457565308 2023-01-22 12:59:57.328513: step: 746/469, loss: 0.05511220172047615 2023-01-22 12:59:57.909897: step: 748/469, loss: 0.053987130522727966 2023-01-22 12:59:58.542587: step: 750/469, loss: 0.024653350934386253 2023-01-22 12:59:59.136945: step: 752/469, loss: 0.0641026571393013 2023-01-22 12:59:59.758798: step: 754/469, loss: 0.07033500075340271 2023-01-22 13:00:00.477230: step: 756/469, loss: 0.07802742719650269 2023-01-22 13:00:01.156813: step: 758/469, loss: 0.1480104774236679 2023-01-22 13:00:01.761698: step: 760/469, loss: 0.03959917649626732 2023-01-22 13:00:02.377188: step: 762/469, loss: 0.07692936807870865 2023-01-22 13:00:03.003988: step: 764/469, loss: 0.1016165241599083 2023-01-22 13:00:03.619384: step: 766/469, loss: 0.06488461792469025 2023-01-22 13:00:04.248182: step: 768/469, loss: 0.3187233507633209 2023-01-22 13:00:04.925357: step: 770/469, loss: 0.06554952263832092 2023-01-22 13:00:05.561324: step: 772/469, loss: 0.04240870475769043 2023-01-22 13:00:06.194868: step: 774/469, loss: 0.13569919764995575 2023-01-22 13:00:06.715743: step: 776/469, loss: 0.05393702909350395 2023-01-22 13:00:07.420333: step: 778/469, loss: 0.02817782759666443 2023-01-22 13:00:08.061918: step: 780/469, loss: 0.10576433688402176 2023-01-22 13:00:08.697915: step: 782/469, loss: 0.21694518625736237 2023-01-22 13:00:09.307885: step: 784/469, loss: 0.029813582077622414 2023-01-22 13:00:09.901448: step: 786/469, loss: 0.04769245162606239 2023-01-22 13:00:10.606163: step: 788/469, loss: 0.032447561621665955 2023-01-22 13:00:11.326622: step: 790/469, loss: 0.10353009402751923 2023-01-22 13:00:11.953494: step: 792/469, loss: 0.07201661169528961 2023-01-22 13:00:12.532554: step: 794/469, loss: 0.15413999557495117 2023-01-22 13:00:13.150027: step: 796/469, loss: 0.030728263780474663 2023-01-22 13:00:13.823184: step: 798/469, loss: 0.031079277396202087 2023-01-22 13:00:14.465175: step: 800/469, loss: 0.015821930021047592 2023-01-22 13:00:15.121900: step: 802/469, loss: 0.13217857480049133 2023-01-22 13:00:15.731960: step: 804/469, loss: 0.02876206487417221 2023-01-22 13:00:16.342437: step: 806/469, loss: 0.038718193769454956 2023-01-22 13:00:17.028010: step: 808/469, loss: 0.09459910541772842 2023-01-22 13:00:17.739593: step: 810/469, loss: 0.059684351086616516 2023-01-22 13:00:18.323515: step: 812/469, loss: 0.05107962340116501 2023-01-22 13:00:18.878096: step: 814/469, loss: 0.022224560379981995 2023-01-22 13:00:19.489360: step: 816/469, loss: 0.0248254407197237 2023-01-22 13:00:20.081562: step: 818/469, loss: 0.05781036987900734 2023-01-22 13:00:20.751708: step: 820/469, loss: 0.10669608414173126 2023-01-22 13:00:21.351018: step: 822/469, loss: 0.07365518063306808 2023-01-22 13:00:21.993123: step: 824/469, loss: 0.05389543995261192 2023-01-22 13:00:22.652560: step: 826/469, loss: 0.20074480772018433 2023-01-22 13:00:23.228418: step: 828/469, loss: 0.03945460543036461 2023-01-22 13:00:23.854430: step: 830/469, loss: 0.03820425644516945 2023-01-22 13:00:24.508517: step: 832/469, loss: 0.10944437980651855 2023-01-22 13:00:25.091223: step: 834/469, loss: 0.029513105750083923 2023-01-22 13:00:25.790503: step: 836/469, loss: 0.49039581418037415 2023-01-22 13:00:26.395973: step: 838/469, loss: 0.002449015388265252 2023-01-22 13:00:26.987242: step: 840/469, loss: 0.08510282635688782 2023-01-22 13:00:27.611561: step: 842/469, loss: 0.0468197725713253 2023-01-22 13:00:28.248662: step: 844/469, loss: 0.025860905647277832 2023-01-22 13:00:28.833036: step: 846/469, loss: 0.04489691182971001 2023-01-22 13:00:29.512330: step: 848/469, loss: 0.03902563452720642 2023-01-22 13:00:30.163991: step: 850/469, loss: 0.02157416380941868 2023-01-22 13:00:30.793845: step: 852/469, loss: 0.039949797093868256 2023-01-22 13:00:31.449659: step: 854/469, loss: 0.06636688858270645 2023-01-22 13:00:32.042120: step: 856/469, loss: 0.04155225306749344 2023-01-22 13:00:32.703677: step: 858/469, loss: 0.06414211541414261 2023-01-22 13:00:33.305320: step: 860/469, loss: 0.027421142905950546 2023-01-22 13:00:33.884396: step: 862/469, loss: 0.04800093546509743 2023-01-22 13:00:34.538204: step: 864/469, loss: 0.027248848229646683 2023-01-22 13:00:35.172347: step: 866/469, loss: 0.16070668399333954 2023-01-22 13:00:35.753086: step: 868/469, loss: 0.0034537664614617825 2023-01-22 13:00:36.405376: step: 870/469, loss: 0.059498563408851624 2023-01-22 13:00:37.141593: step: 872/469, loss: 0.0485636331140995 2023-01-22 13:00:37.806860: step: 874/469, loss: 0.029997479170560837 2023-01-22 13:00:38.495990: step: 876/469, loss: 0.034448787569999695 2023-01-22 13:00:39.225767: step: 878/469, loss: 0.1755153238773346 2023-01-22 13:00:39.850073: step: 880/469, loss: 0.04570706933736801 2023-01-22 13:00:40.472090: step: 882/469, loss: 0.02426147647202015 2023-01-22 13:00:41.131666: step: 884/469, loss: 0.03767477720975876 2023-01-22 13:00:41.799035: step: 886/469, loss: 0.07098328322172165 2023-01-22 13:00:42.421957: step: 888/469, loss: 0.09545578807592392 2023-01-22 13:00:43.025144: step: 890/469, loss: 0.02542886696755886 2023-01-22 13:00:43.644448: step: 892/469, loss: 0.05639605596661568 2023-01-22 13:00:44.307714: step: 894/469, loss: 0.006311480421572924 2023-01-22 13:00:44.863049: step: 896/469, loss: 0.02327331341803074 2023-01-22 13:00:45.507428: step: 898/469, loss: 0.0260337945073843 2023-01-22 13:00:46.131961: step: 900/469, loss: 0.03944583982229233 2023-01-22 13:00:46.735343: step: 902/469, loss: 0.04809301719069481 2023-01-22 13:00:47.305817: step: 904/469, loss: 0.02306533046066761 2023-01-22 13:00:47.986957: step: 906/469, loss: 0.0661945566534996 2023-01-22 13:00:48.697448: step: 908/469, loss: 0.0746571347117424 2023-01-22 13:00:49.498125: step: 910/469, loss: 0.38557612895965576 2023-01-22 13:00:50.139183: step: 912/469, loss: 0.052983105182647705 2023-01-22 13:00:50.761810: step: 914/469, loss: 0.8118378520011902 2023-01-22 13:00:51.418802: step: 916/469, loss: 0.02027197740972042 2023-01-22 13:00:52.054143: step: 918/469, loss: 0.041854795068502426 2023-01-22 13:00:52.788795: step: 920/469, loss: 0.04107408598065376 2023-01-22 13:00:53.459743: step: 922/469, loss: 0.035452499985694885 2023-01-22 13:00:54.138263: step: 924/469, loss: 0.035532962530851364 2023-01-22 13:00:54.723364: step: 926/469, loss: 0.4863441586494446 2023-01-22 13:00:55.358619: step: 928/469, loss: 0.059065476059913635 2023-01-22 13:00:56.002102: step: 930/469, loss: 0.0265822671353817 2023-01-22 13:00:56.673324: step: 932/469, loss: 0.08679469674825668 2023-01-22 13:00:57.404156: step: 934/469, loss: 0.1726817637681961 2023-01-22 13:00:58.050613: step: 936/469, loss: 0.007782823871821165 2023-01-22 13:00:58.707739: step: 938/469, loss: 0.08940225094556808 ================================================== Loss: 0.093 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3086890243902439, 'r': 0.31220351043643263, 'f1': 0.310436320754717}, 'combined': 0.22874255213505462, 'epoch': 21} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.31928046532825943, 'r': 0.28101354770886144, 'f1': 0.29892730671122686}, 'combined': 0.16305125820612373, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30765021135265697, 'r': 0.32224462365591394, 'f1': 0.314778344145814}, 'combined': 0.23194193779165242, 'epoch': 21} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3141793440365199, 'r': 0.2742607619331595, 'f1': 0.2928660552024173}, 'combined': 0.15974512101950034, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30001138433515484, 'r': 0.3125355787476281, 'f1': 0.30614544609665434}, 'combined': 0.2255808550185874, 'epoch': 21} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3090444857413183, 'r': 0.2715075756010486, 'f1': 0.28906251364428603}, 'combined': 0.15767046198779236, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24666666666666665, 'r': 0.35238095238095235, 'f1': 0.2901960784313725}, 'combined': 0.19346405228758168, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.22058823529411764, 'r': 0.32608695652173914, 'f1': 0.2631578947368421}, 'combined': 0.13157894736842105, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.421875, 'r': 0.23275862068965517, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 22 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:03:46.046834: step: 2/469, loss: 0.039380770176649094 2023-01-22 13:03:46.760877: step: 4/469, loss: 0.05055861920118332 2023-01-22 13:03:47.378455: step: 6/469, loss: 0.04295489937067032 2023-01-22 13:03:48.044558: step: 8/469, loss: 0.023237884044647217 2023-01-22 13:03:48.650257: step: 10/469, loss: 0.28028494119644165 2023-01-22 13:03:49.297136: step: 12/469, loss: 0.07559418678283691 2023-01-22 13:03:49.956810: step: 14/469, loss: 0.11430711299180984 2023-01-22 13:03:50.590389: step: 16/469, loss: 0.01595250517129898 2023-01-22 13:03:51.194963: step: 18/469, loss: 0.018117446452379227 2023-01-22 13:03:51.862567: step: 20/469, loss: 1.387865424156189 2023-01-22 13:03:52.444209: step: 22/469, loss: 0.024295097216963768 2023-01-22 13:03:53.158393: step: 24/469, loss: 1.1460556983947754 2023-01-22 13:03:53.837962: step: 26/469, loss: 0.13597992062568665 2023-01-22 13:03:54.464850: step: 28/469, loss: 0.01898580975830555 2023-01-22 13:03:55.058066: step: 30/469, loss: 0.015223619528114796 2023-01-22 13:03:55.668677: step: 32/469, loss: 0.0550190769135952 2023-01-22 13:03:56.338620: step: 34/469, loss: 0.005130401346832514 2023-01-22 13:03:56.997229: step: 36/469, loss: 0.02761279046535492 2023-01-22 13:03:57.607282: step: 38/469, loss: 0.0677400454878807 2023-01-22 13:03:58.320853: step: 40/469, loss: 0.048578161746263504 2023-01-22 13:03:59.057162: step: 42/469, loss: 0.07426793873310089 2023-01-22 13:03:59.720063: step: 44/469, loss: 0.04773702472448349 2023-01-22 13:04:00.449812: step: 46/469, loss: 0.3359375298023224 2023-01-22 13:04:01.079653: step: 48/469, loss: 0.08309541642665863 2023-01-22 13:04:01.660306: step: 50/469, loss: 0.17198117077350616 2023-01-22 13:04:02.309777: step: 52/469, loss: 0.06714746356010437 2023-01-22 13:04:02.924197: step: 54/469, loss: 0.0135366665199399 2023-01-22 13:04:03.583179: step: 56/469, loss: 0.5882316827774048 2023-01-22 13:04:04.154813: step: 58/469, loss: 0.008281073532998562 2023-01-22 13:04:04.771464: step: 60/469, loss: 0.0044734301045536995 2023-01-22 13:04:05.405464: step: 62/469, loss: 0.012628215365111828 2023-01-22 13:04:06.079236: step: 64/469, loss: 0.013408358208835125 2023-01-22 13:04:06.668065: step: 66/469, loss: 0.027022888883948326 2023-01-22 13:04:07.317156: step: 68/469, loss: 0.035244572907686234 2023-01-22 13:04:07.912709: step: 70/469, loss: 0.0042418683879077435 2023-01-22 13:04:08.564562: step: 72/469, loss: 0.03192221373319626 2023-01-22 13:04:09.185424: step: 74/469, loss: 0.05043495073914528 2023-01-22 13:04:09.738693: step: 76/469, loss: 0.03084174543619156 2023-01-22 13:04:10.328927: step: 78/469, loss: 0.05266093462705612 2023-01-22 13:04:10.964716: step: 80/469, loss: 0.04924841970205307 2023-01-22 13:04:11.606353: step: 82/469, loss: 0.014719842933118343 2023-01-22 13:04:12.267326: step: 84/469, loss: 0.0027282529044896364 2023-01-22 13:04:12.900353: step: 86/469, loss: 0.02334756590425968 2023-01-22 13:04:13.573889: step: 88/469, loss: 0.043558232486248016 2023-01-22 13:04:14.160970: step: 90/469, loss: 0.02834555320441723 2023-01-22 13:04:14.753484: step: 92/469, loss: 0.09036640077829361 2023-01-22 13:04:15.422300: step: 94/469, loss: 0.018504438921809196 2023-01-22 13:04:16.082677: step: 96/469, loss: 0.06946828961372375 2023-01-22 13:04:16.690849: step: 98/469, loss: 0.09055262058973312 2023-01-22 13:04:17.254267: step: 100/469, loss: 0.02907070517539978 2023-01-22 13:04:17.836212: step: 102/469, loss: 0.015320273116230965 2023-01-22 13:04:18.475250: step: 104/469, loss: 0.040162622928619385 2023-01-22 13:04:19.050594: step: 106/469, loss: 0.021767625585198402 2023-01-22 13:04:19.689975: step: 108/469, loss: 0.09671736508607864 2023-01-22 13:04:20.385821: step: 110/469, loss: 0.01166476495563984 2023-01-22 13:04:20.986882: step: 112/469, loss: 0.06064381077885628 2023-01-22 13:04:21.554736: step: 114/469, loss: 0.030871480703353882 2023-01-22 13:04:22.216234: step: 116/469, loss: 0.05562804266810417 2023-01-22 13:04:22.806179: step: 118/469, loss: 0.03468940407037735 2023-01-22 13:04:23.458846: step: 120/469, loss: 0.04261632263660431 2023-01-22 13:04:24.140059: step: 122/469, loss: 0.24910223484039307 2023-01-22 13:04:24.791271: step: 124/469, loss: 0.007574675139039755 2023-01-22 13:04:25.449320: step: 126/469, loss: 0.017516538500785828 2023-01-22 13:04:26.108778: step: 128/469, loss: 0.059154730290174484 2023-01-22 13:04:26.698522: step: 130/469, loss: 0.16113242506980896 2023-01-22 13:04:27.292623: step: 132/469, loss: 0.01714664325118065 2023-01-22 13:04:27.885243: step: 134/469, loss: 0.005969333928078413 2023-01-22 13:04:28.552596: step: 136/469, loss: 0.0019294844241812825 2023-01-22 13:04:29.176483: step: 138/469, loss: 0.06720604002475739 2023-01-22 13:04:29.793087: step: 140/469, loss: 0.14247657358646393 2023-01-22 13:04:30.433475: step: 142/469, loss: 0.020634878426790237 2023-01-22 13:04:31.120010: step: 144/469, loss: 0.020726803690195084 2023-01-22 13:04:31.738734: step: 146/469, loss: 0.0766528770327568 2023-01-22 13:04:32.296837: step: 148/469, loss: 0.04846583306789398 2023-01-22 13:04:32.924503: step: 150/469, loss: 0.07275208085775375 2023-01-22 13:04:33.547287: step: 152/469, loss: 0.06970221549272537 2023-01-22 13:04:34.156170: step: 154/469, loss: 0.2876651883125305 2023-01-22 13:04:34.785489: step: 156/469, loss: 0.018488343805074692 2023-01-22 13:04:35.402445: step: 158/469, loss: 0.04454474151134491 2023-01-22 13:04:35.963840: step: 160/469, loss: 0.03576549515128136 2023-01-22 13:04:36.607492: step: 162/469, loss: 0.05528002977371216 2023-01-22 13:04:37.206478: step: 164/469, loss: 0.03665146976709366 2023-01-22 13:04:37.786513: step: 166/469, loss: 0.005062991287559271 2023-01-22 13:04:38.344283: step: 168/469, loss: 0.0907004177570343 2023-01-22 13:04:38.917930: step: 170/469, loss: 0.1247393786907196 2023-01-22 13:04:39.546120: step: 172/469, loss: 0.034603241831064224 2023-01-22 13:04:40.187053: step: 174/469, loss: 0.018291473388671875 2023-01-22 13:04:40.839591: step: 176/469, loss: 0.010822205804288387 2023-01-22 13:04:41.476258: step: 178/469, loss: 0.041395403444767 2023-01-22 13:04:42.077877: step: 180/469, loss: 0.2562817931175232 2023-01-22 13:04:42.713654: step: 182/469, loss: 0.02315080165863037 2023-01-22 13:04:43.316077: step: 184/469, loss: 0.10349332541227341 2023-01-22 13:04:43.939449: step: 186/469, loss: 0.04429632052779198 2023-01-22 13:04:44.609074: step: 188/469, loss: 0.052627675235271454 2023-01-22 13:04:45.230080: step: 190/469, loss: 0.052082035690546036 2023-01-22 13:04:46.002610: step: 192/469, loss: 0.09045175462961197 2023-01-22 13:04:46.655324: step: 194/469, loss: 0.056290898472070694 2023-01-22 13:04:47.276030: step: 196/469, loss: 0.06224917992949486 2023-01-22 13:04:47.891171: step: 198/469, loss: 0.08666723966598511 2023-01-22 13:04:48.455261: step: 200/469, loss: 0.02088574692606926 2023-01-22 13:04:49.091507: step: 202/469, loss: 0.17314031720161438 2023-01-22 13:04:49.698865: step: 204/469, loss: 0.045591890811920166 2023-01-22 13:04:50.322969: step: 206/469, loss: 0.03817299008369446 2023-01-22 13:04:50.916647: step: 208/469, loss: 0.1506112813949585 2023-01-22 13:04:51.535214: step: 210/469, loss: 0.02801366150379181 2023-01-22 13:04:52.128220: step: 212/469, loss: 0.03334649279713631 2023-01-22 13:04:52.779148: step: 214/469, loss: 0.07692849636077881 2023-01-22 13:04:53.440458: step: 216/469, loss: 0.029438933357596397 2023-01-22 13:04:54.059026: step: 218/469, loss: 0.04559696093201637 2023-01-22 13:04:54.661244: step: 220/469, loss: 0.029989738017320633 2023-01-22 13:04:55.263811: step: 222/469, loss: 0.02871067449450493 2023-01-22 13:04:55.949386: step: 224/469, loss: 0.028774617239832878 2023-01-22 13:04:56.505999: step: 226/469, loss: 0.06132190674543381 2023-01-22 13:04:57.115417: step: 228/469, loss: 0.03399735689163208 2023-01-22 13:04:57.785881: step: 230/469, loss: 0.029686376452445984 2023-01-22 13:04:58.396853: step: 232/469, loss: 0.025587785989046097 2023-01-22 13:04:58.969262: step: 234/469, loss: 0.0662321075797081 2023-01-22 13:04:59.601545: step: 236/469, loss: 0.009135673753917217 2023-01-22 13:05:00.248427: step: 238/469, loss: 0.005674504209309816 2023-01-22 13:05:00.863982: step: 240/469, loss: 0.1720525622367859 2023-01-22 13:05:01.496116: step: 242/469, loss: 0.5190364122390747 2023-01-22 13:05:02.168577: step: 244/469, loss: 0.009142519906163216 2023-01-22 13:05:02.843068: step: 246/469, loss: 0.4977860748767853 2023-01-22 13:05:03.478507: step: 248/469, loss: 0.06799748539924622 2023-01-22 13:05:04.062198: step: 250/469, loss: 0.016992144286632538 2023-01-22 13:05:04.692249: step: 252/469, loss: 0.03562434762716293 2023-01-22 13:05:05.285126: step: 254/469, loss: 0.02499387413263321 2023-01-22 13:05:05.867207: step: 256/469, loss: 7.44908902561292e-05 2023-01-22 13:05:06.537614: step: 258/469, loss: 0.02635972946882248 2023-01-22 13:05:07.213893: step: 260/469, loss: 0.05244665965437889 2023-01-22 13:05:07.856715: step: 262/469, loss: 0.015961477532982826 2023-01-22 13:05:08.440516: step: 264/469, loss: 0.09574796259403229 2023-01-22 13:05:09.078784: step: 266/469, loss: 0.008326071314513683 2023-01-22 13:05:09.668523: step: 268/469, loss: 0.0605514831840992 2023-01-22 13:05:10.275922: step: 270/469, loss: 0.0383342020213604 2023-01-22 13:05:10.910072: step: 272/469, loss: 0.022514592856168747 2023-01-22 13:05:11.499429: step: 274/469, loss: 0.004070336930453777 2023-01-22 13:05:12.100962: step: 276/469, loss: 0.004935351200401783 2023-01-22 13:05:12.789172: step: 278/469, loss: 0.10180534422397614 2023-01-22 13:05:13.460915: step: 280/469, loss: 0.07068388909101486 2023-01-22 13:05:14.143220: step: 282/469, loss: 0.014246614649891853 2023-01-22 13:05:14.790356: step: 284/469, loss: 0.11330433934926987 2023-01-22 13:05:15.465149: step: 286/469, loss: 0.0123306754976511 2023-01-22 13:05:16.046227: step: 288/469, loss: 0.03695213422179222 2023-01-22 13:05:16.646396: step: 290/469, loss: 0.009263748303055763 2023-01-22 13:05:17.327924: step: 292/469, loss: 0.06397242099046707 2023-01-22 13:05:17.899872: step: 294/469, loss: 0.014196955598890781 2023-01-22 13:05:18.489824: step: 296/469, loss: 0.0032813590951263905 2023-01-22 13:05:19.083993: step: 298/469, loss: 0.009251163341104984 2023-01-22 13:05:19.696946: step: 300/469, loss: 0.040932126343250275 2023-01-22 13:05:20.316365: step: 302/469, loss: 0.003713840153068304 2023-01-22 13:05:21.028185: step: 304/469, loss: 0.10758811235427856 2023-01-22 13:05:21.672498: step: 306/469, loss: 0.024556109681725502 2023-01-22 13:05:22.307357: step: 308/469, loss: 0.08184278011322021 2023-01-22 13:05:22.847202: step: 310/469, loss: 0.012405350804328918 2023-01-22 13:05:23.469856: step: 312/469, loss: 1.9847286939620972 2023-01-22 13:05:24.150485: step: 314/469, loss: 0.15689538419246674 2023-01-22 13:05:24.767132: step: 316/469, loss: 0.025493750348687172 2023-01-22 13:05:25.404479: step: 318/469, loss: 0.08086427301168442 2023-01-22 13:05:26.048952: step: 320/469, loss: 0.031102390959858894 2023-01-22 13:05:26.848490: step: 322/469, loss: 0.040684446692466736 2023-01-22 13:05:27.406654: step: 324/469, loss: 0.018266992643475533 2023-01-22 13:05:28.030917: step: 326/469, loss: 0.032480835914611816 2023-01-22 13:05:28.649427: step: 328/469, loss: 0.054183073341846466 2023-01-22 13:05:29.253052: step: 330/469, loss: 0.03766085207462311 2023-01-22 13:05:30.068015: step: 332/469, loss: 0.06161145493388176 2023-01-22 13:05:30.739515: step: 334/469, loss: 0.01634340174496174 2023-01-22 13:05:31.345234: step: 336/469, loss: 0.06446312367916107 2023-01-22 13:05:31.992884: step: 338/469, loss: 0.002547625219449401 2023-01-22 13:05:32.601756: step: 340/469, loss: 0.015411713160574436 2023-01-22 13:05:33.263908: step: 342/469, loss: 0.4192122519016266 2023-01-22 13:05:33.923445: step: 344/469, loss: 0.08557723462581635 2023-01-22 13:05:34.506228: step: 346/469, loss: 0.04641762748360634 2023-01-22 13:05:35.119636: step: 348/469, loss: 0.04900040850043297 2023-01-22 13:05:35.803907: step: 350/469, loss: 0.02108554169535637 2023-01-22 13:05:36.398194: step: 352/469, loss: 0.5605222582817078 2023-01-22 13:05:37.037434: step: 354/469, loss: 0.008658409118652344 2023-01-22 13:05:37.677462: step: 356/469, loss: 0.050972625613212585 2023-01-22 13:05:38.336302: step: 358/469, loss: 0.1746513843536377 2023-01-22 13:05:38.908216: step: 360/469, loss: 0.022396810352802277 2023-01-22 13:05:39.541838: step: 362/469, loss: 0.07565449178218842 2023-01-22 13:05:40.228477: step: 364/469, loss: 0.034683264791965485 2023-01-22 13:05:40.814896: step: 366/469, loss: 0.190207377076149 2023-01-22 13:05:41.387671: step: 368/469, loss: 0.03791588172316551 2023-01-22 13:05:41.942499: step: 370/469, loss: 0.023350585252046585 2023-01-22 13:05:42.619426: step: 372/469, loss: 0.01141371950507164 2023-01-22 13:05:43.239264: step: 374/469, loss: 0.061606645584106445 2023-01-22 13:05:43.821569: step: 376/469, loss: 0.014597347006201744 2023-01-22 13:05:44.487954: step: 378/469, loss: 0.0035952257458120584 2023-01-22 13:05:45.197263: step: 380/469, loss: 0.08695082366466522 2023-01-22 13:05:45.734798: step: 382/469, loss: 0.25980886816978455 2023-01-22 13:05:46.335467: step: 384/469, loss: 0.014322969131171703 2023-01-22 13:05:46.917370: step: 386/469, loss: 0.02906735986471176 2023-01-22 13:05:47.561070: step: 388/469, loss: 0.016713572666049004 2023-01-22 13:05:48.148229: step: 390/469, loss: 0.026671504601836205 2023-01-22 13:05:48.709536: step: 392/469, loss: 0.027209313586354256 2023-01-22 13:05:49.387974: step: 394/469, loss: 0.08768302947282791 2023-01-22 13:05:49.984275: step: 396/469, loss: 0.05510547012090683 2023-01-22 13:05:50.561565: step: 398/469, loss: 0.018466763198375702 2023-01-22 13:05:51.178146: step: 400/469, loss: 0.01697220467031002 2023-01-22 13:05:51.786779: step: 402/469, loss: 0.03475261107087135 2023-01-22 13:05:52.358674: step: 404/469, loss: 0.04525039345026016 2023-01-22 13:05:52.939376: step: 406/469, loss: 0.02501925826072693 2023-01-22 13:05:53.544292: step: 408/469, loss: 0.006451896391808987 2023-01-22 13:05:54.166796: step: 410/469, loss: 0.04958367347717285 2023-01-22 13:05:54.744238: step: 412/469, loss: 0.015868689864873886 2023-01-22 13:05:55.415702: step: 414/469, loss: 0.011564032174646854 2023-01-22 13:05:56.073160: step: 416/469, loss: 0.16424602270126343 2023-01-22 13:05:56.791608: step: 418/469, loss: 0.050051528960466385 2023-01-22 13:05:57.363792: step: 420/469, loss: 0.05874348431825638 2023-01-22 13:05:58.013314: step: 422/469, loss: 0.26968201994895935 2023-01-22 13:05:58.527982: step: 424/469, loss: 0.0008949778275564313 2023-01-22 13:05:59.147212: step: 426/469, loss: 0.02027851715683937 2023-01-22 13:05:59.747069: step: 428/469, loss: 0.0919344499707222 2023-01-22 13:06:00.352720: step: 430/469, loss: 0.0019553264137357473 2023-01-22 13:06:01.021644: step: 432/469, loss: 0.09591136872768402 2023-01-22 13:06:01.700884: step: 434/469, loss: 0.023648599162697792 2023-01-22 13:06:02.379545: step: 436/469, loss: 0.0690121278166771 2023-01-22 13:06:02.999353: step: 438/469, loss: 0.009107696823775768 2023-01-22 13:06:03.611649: step: 440/469, loss: 0.07664472609758377 2023-01-22 13:06:04.293391: step: 442/469, loss: 0.058984462171792984 2023-01-22 13:06:04.914570: step: 444/469, loss: 0.1856195330619812 2023-01-22 13:06:05.559767: step: 446/469, loss: 0.025817250832915306 2023-01-22 13:06:06.189053: step: 448/469, loss: 0.008321426808834076 2023-01-22 13:06:06.807107: step: 450/469, loss: 0.10565192252397537 2023-01-22 13:06:07.425425: step: 452/469, loss: 0.026837600395083427 2023-01-22 13:06:07.989690: step: 454/469, loss: 0.04784046486020088 2023-01-22 13:06:08.543906: step: 456/469, loss: 0.002765099285170436 2023-01-22 13:06:09.130974: step: 458/469, loss: 0.05832146480679512 2023-01-22 13:06:09.749304: step: 460/469, loss: 0.0680270865559578 2023-01-22 13:06:10.335059: step: 462/469, loss: 0.06666941195726395 2023-01-22 13:06:10.969292: step: 464/469, loss: 0.13499917089939117 2023-01-22 13:06:11.646657: step: 466/469, loss: 0.02597840130329132 2023-01-22 13:06:12.290385: step: 468/469, loss: 0.07810312509536743 2023-01-22 13:06:12.894970: step: 470/469, loss: 0.04390675574541092 2023-01-22 13:06:13.546445: step: 472/469, loss: 0.01453032810240984 2023-01-22 13:06:14.164488: step: 474/469, loss: 0.025088896974921227 2023-01-22 13:06:14.868741: step: 476/469, loss: 0.09662743657827377 2023-01-22 13:06:15.512899: step: 478/469, loss: 0.035653889179229736 2023-01-22 13:06:16.132839: step: 480/469, loss: 0.02839694544672966 2023-01-22 13:06:16.762056: step: 482/469, loss: 0.036323752254247665 2023-01-22 13:06:17.512174: step: 484/469, loss: 0.0936509370803833 2023-01-22 13:06:18.094365: step: 486/469, loss: 0.07789144665002823 2023-01-22 13:06:18.734642: step: 488/469, loss: 0.045089107006788254 2023-01-22 13:06:19.343988: step: 490/469, loss: 0.006903240457177162 2023-01-22 13:06:20.040916: step: 492/469, loss: 0.023016300052404404 2023-01-22 13:06:20.720679: step: 494/469, loss: 0.08976443111896515 2023-01-22 13:06:21.351453: step: 496/469, loss: 0.38846355676651 2023-01-22 13:06:21.897015: step: 498/469, loss: 0.018937893211841583 2023-01-22 13:06:22.486735: step: 500/469, loss: 0.022624827921390533 2023-01-22 13:06:23.119970: step: 502/469, loss: 0.04228555038571358 2023-01-22 13:06:23.751809: step: 504/469, loss: 0.07831253111362457 2023-01-22 13:06:24.525091: step: 506/469, loss: 0.0061602178029716015 2023-01-22 13:06:25.195596: step: 508/469, loss: 0.009053698740899563 2023-01-22 13:06:25.810764: step: 510/469, loss: 0.028699707239866257 2023-01-22 13:06:26.503310: step: 512/469, loss: 0.05716666206717491 2023-01-22 13:06:27.084565: step: 514/469, loss: 0.05631757900118828 2023-01-22 13:06:27.785893: step: 516/469, loss: 0.01383176352828741 2023-01-22 13:06:28.445999: step: 518/469, loss: 0.07788243889808655 2023-01-22 13:06:29.108599: step: 520/469, loss: 0.11175289750099182 2023-01-22 13:06:29.738825: step: 522/469, loss: 0.12618686258792877 2023-01-22 13:06:30.385937: step: 524/469, loss: 0.0342743918299675 2023-01-22 13:06:31.009355: step: 526/469, loss: 0.0502798929810524 2023-01-22 13:06:31.667000: step: 528/469, loss: 0.030329961329698563 2023-01-22 13:06:32.303905: step: 530/469, loss: 0.03982508182525635 2023-01-22 13:06:32.845192: step: 532/469, loss: 0.2980542778968811 2023-01-22 13:06:33.467936: step: 534/469, loss: 0.03202463313937187 2023-01-22 13:06:34.161336: step: 536/469, loss: 0.03793339058756828 2023-01-22 13:06:34.752754: step: 538/469, loss: 0.12230332940816879 2023-01-22 13:06:35.412634: step: 540/469, loss: 0.13345609605312347 2023-01-22 13:06:36.074039: step: 542/469, loss: 0.012067705392837524 2023-01-22 13:06:36.672967: step: 544/469, loss: 0.029101772233843803 2023-01-22 13:06:37.321560: step: 546/469, loss: 0.033134300261735916 2023-01-22 13:06:37.978834: step: 548/469, loss: 0.027724526822566986 2023-01-22 13:06:38.655485: step: 550/469, loss: 0.03329896926879883 2023-01-22 13:06:39.254613: step: 552/469, loss: 0.40814974904060364 2023-01-22 13:06:39.868083: step: 554/469, loss: 0.16827653348445892 2023-01-22 13:06:40.540026: step: 556/469, loss: 0.037233296781778336 2023-01-22 13:06:41.118666: step: 558/469, loss: 0.025751609355211258 2023-01-22 13:06:41.775876: step: 560/469, loss: 0.030713330954313278 2023-01-22 13:06:42.392706: step: 562/469, loss: 0.10262533277273178 2023-01-22 13:06:43.026732: step: 564/469, loss: 0.07401344925165176 2023-01-22 13:06:43.639677: step: 566/469, loss: 0.0704004168510437 2023-01-22 13:06:44.268473: step: 568/469, loss: 0.13814091682434082 2023-01-22 13:06:44.889433: step: 570/469, loss: 0.06557688862085342 2023-01-22 13:06:45.520251: step: 572/469, loss: 0.02300243265926838 2023-01-22 13:06:46.134341: step: 574/469, loss: 0.0038402474019676447 2023-01-22 13:06:46.717352: step: 576/469, loss: 0.04973436892032623 2023-01-22 13:06:47.354249: step: 578/469, loss: 0.05711760371923447 2023-01-22 13:06:47.964604: step: 580/469, loss: 0.06349515914916992 2023-01-22 13:06:48.605208: step: 582/469, loss: 0.030135851353406906 2023-01-22 13:06:49.224497: step: 584/469, loss: 0.04700420796871185 2023-01-22 13:06:49.828601: step: 586/469, loss: 0.075089231133461 2023-01-22 13:06:50.502120: step: 588/469, loss: 0.08240267634391785 2023-01-22 13:06:51.163726: step: 590/469, loss: 0.15979255735874176 2023-01-22 13:06:51.760867: step: 592/469, loss: 0.011902032420039177 2023-01-22 13:06:52.370338: step: 594/469, loss: 0.04692215099930763 2023-01-22 13:06:52.983254: step: 596/469, loss: 0.056545473635196686 2023-01-22 13:06:53.674828: step: 598/469, loss: 0.04550258442759514 2023-01-22 13:06:54.326005: step: 600/469, loss: 0.07504047453403473 2023-01-22 13:06:54.962149: step: 602/469, loss: 0.006439357530325651 2023-01-22 13:06:55.772706: step: 604/469, loss: 0.175323486328125 2023-01-22 13:06:56.414872: step: 606/469, loss: 0.05155857652425766 2023-01-22 13:06:57.049688: step: 608/469, loss: 0.10046927630901337 2023-01-22 13:06:57.720647: step: 610/469, loss: 0.03589603677392006 2023-01-22 13:06:58.363041: step: 612/469, loss: 0.2100471705198288 2023-01-22 13:06:59.005905: step: 614/469, loss: 0.016872920095920563 2023-01-22 13:06:59.632300: step: 616/469, loss: 0.09207411110401154 2023-01-22 13:07:00.266982: step: 618/469, loss: 0.019891807809472084 2023-01-22 13:07:00.853231: step: 620/469, loss: 0.015381723642349243 2023-01-22 13:07:01.500038: step: 622/469, loss: 0.08633366227149963 2023-01-22 13:07:02.143295: step: 624/469, loss: 0.030681993812322617 2023-01-22 13:07:02.756601: step: 626/469, loss: 0.019833922386169434 2023-01-22 13:07:03.392172: step: 628/469, loss: 0.05075858160853386 2023-01-22 13:07:03.991759: step: 630/469, loss: 0.02489897422492504 2023-01-22 13:07:04.614667: step: 632/469, loss: 0.08151629567146301 2023-01-22 13:07:05.245983: step: 634/469, loss: 0.2602803409099579 2023-01-22 13:07:05.850666: step: 636/469, loss: 0.028771480545401573 2023-01-22 13:07:06.517096: step: 638/469, loss: 0.10833048075437546 2023-01-22 13:07:07.142307: step: 640/469, loss: 0.22582930326461792 2023-01-22 13:07:07.800290: step: 642/469, loss: 0.011523720808327198 2023-01-22 13:07:08.518060: step: 644/469, loss: 0.018657146021723747 2023-01-22 13:07:09.168250: step: 646/469, loss: 0.04349122568964958 2023-01-22 13:07:09.794871: step: 648/469, loss: 0.2706346809864044 2023-01-22 13:07:10.363885: step: 650/469, loss: 0.03992457315325737 2023-01-22 13:07:11.021219: step: 652/469, loss: 0.03795793280005455 2023-01-22 13:07:11.591120: step: 654/469, loss: 0.04529811441898346 2023-01-22 13:07:12.190384: step: 656/469, loss: 0.08681243658065796 2023-01-22 13:07:12.850118: step: 658/469, loss: 0.035051338374614716 2023-01-22 13:07:13.448499: step: 660/469, loss: 0.033167291432619095 2023-01-22 13:07:14.056519: step: 662/469, loss: 0.03456336259841919 2023-01-22 13:07:14.662641: step: 664/469, loss: 0.012135052122175694 2023-01-22 13:07:15.268662: step: 666/469, loss: 0.0567280538380146 2023-01-22 13:07:15.931396: step: 668/469, loss: 0.003694563638418913 2023-01-22 13:07:16.557710: step: 670/469, loss: 0.04029498249292374 2023-01-22 13:07:17.160322: step: 672/469, loss: 0.04340934008359909 2023-01-22 13:07:17.781366: step: 674/469, loss: 0.044494953006505966 2023-01-22 13:07:18.327658: step: 676/469, loss: 0.005487445276230574 2023-01-22 13:07:18.924731: step: 678/469, loss: 0.11133415997028351 2023-01-22 13:07:19.573828: step: 680/469, loss: 0.06047096475958824 2023-01-22 13:07:20.279460: step: 682/469, loss: 0.03617478907108307 2023-01-22 13:07:20.879775: step: 684/469, loss: 0.03409360349178314 2023-01-22 13:07:21.535182: step: 686/469, loss: 0.07524754852056503 2023-01-22 13:07:22.141754: step: 688/469, loss: 0.11170642077922821 2023-01-22 13:07:22.715467: step: 690/469, loss: 0.06437705457210541 2023-01-22 13:07:23.436363: step: 692/469, loss: 0.0658484622836113 2023-01-22 13:07:24.018423: step: 694/469, loss: 0.03594261035323143 2023-01-22 13:07:24.626912: step: 696/469, loss: 0.11162786185741425 2023-01-22 13:07:25.350962: step: 698/469, loss: 0.03382645919919014 2023-01-22 13:07:25.978255: step: 700/469, loss: 0.8750171065330505 2023-01-22 13:07:26.647966: step: 702/469, loss: 0.023832466453313828 2023-01-22 13:07:27.131213: step: 704/469, loss: 0.029626909643411636 2023-01-22 13:07:27.739985: step: 706/469, loss: 0.058364421129226685 2023-01-22 13:07:28.345131: step: 708/469, loss: 0.08198067545890808 2023-01-22 13:07:28.998367: step: 710/469, loss: 0.0586685948073864 2023-01-22 13:07:29.643972: step: 712/469, loss: 0.17847940325737 2023-01-22 13:07:30.216881: step: 714/469, loss: 0.040565043687820435 2023-01-22 13:07:30.844924: step: 716/469, loss: 0.11069989949464798 2023-01-22 13:07:31.494009: step: 718/469, loss: 0.29169994592666626 2023-01-22 13:07:32.137888: step: 720/469, loss: 0.034519705921411514 2023-01-22 13:07:32.847610: step: 722/469, loss: 0.008544232696294785 2023-01-22 13:07:33.490414: step: 724/469, loss: 0.02701670303940773 2023-01-22 13:07:34.162998: step: 726/469, loss: 0.07301875203847885 2023-01-22 13:07:34.830554: step: 728/469, loss: 0.07606630027294159 2023-01-22 13:07:35.414335: step: 730/469, loss: 0.041152600198984146 2023-01-22 13:07:36.011582: step: 732/469, loss: 0.05740166828036308 2023-01-22 13:07:36.650328: step: 734/469, loss: 0.08332406729459763 2023-01-22 13:07:37.265753: step: 736/469, loss: 0.2419954389333725 2023-01-22 13:07:37.890582: step: 738/469, loss: 0.03665747493505478 2023-01-22 13:07:38.525954: step: 740/469, loss: 0.025403790175914764 2023-01-22 13:07:39.129705: step: 742/469, loss: 0.008551633916795254 2023-01-22 13:07:39.683078: step: 744/469, loss: 0.002937992801889777 2023-01-22 13:07:40.301872: step: 746/469, loss: 0.11794460564851761 2023-01-22 13:07:40.994426: step: 748/469, loss: 0.0707455426454544 2023-01-22 13:07:41.613664: step: 750/469, loss: 0.029475955292582512 2023-01-22 13:07:42.188972: step: 752/469, loss: 0.19640345871448517 2023-01-22 13:07:42.881695: step: 754/469, loss: 0.012563933618366718 2023-01-22 13:07:43.553678: step: 756/469, loss: 0.05138123780488968 2023-01-22 13:07:44.144833: step: 758/469, loss: 0.27541208267211914 2023-01-22 13:07:44.748912: step: 760/469, loss: 0.016575973480939865 2023-01-22 13:07:45.471760: step: 762/469, loss: 0.056157324463129044 2023-01-22 13:07:46.107939: step: 764/469, loss: 0.19806243479251862 2023-01-22 13:07:46.719827: step: 766/469, loss: 0.03232935070991516 2023-01-22 13:07:47.348178: step: 768/469, loss: 0.01980048045516014 2023-01-22 13:07:48.016244: step: 770/469, loss: 0.4653545022010803 2023-01-22 13:07:48.623148: step: 772/469, loss: 0.07322318851947784 2023-01-22 13:07:49.243577: step: 774/469, loss: 0.0887472853064537 2023-01-22 13:07:49.903885: step: 776/469, loss: 0.012273302301764488 2023-01-22 13:07:50.529569: step: 778/469, loss: 0.02280343882739544 2023-01-22 13:07:51.166485: step: 780/469, loss: 0.012945477850735188 2023-01-22 13:07:51.747596: step: 782/469, loss: 0.09715475142002106 2023-01-22 13:07:52.393384: step: 784/469, loss: 0.13165117800235748 2023-01-22 13:07:52.982178: step: 786/469, loss: 0.02064213901758194 2023-01-22 13:07:53.671433: step: 788/469, loss: 0.018043173477053642 2023-01-22 13:07:54.304350: step: 790/469, loss: 0.05820675566792488 2023-01-22 13:07:54.915518: step: 792/469, loss: 0.012131190858781338 2023-01-22 13:07:55.548860: step: 794/469, loss: 0.035868994891643524 2023-01-22 13:07:56.131058: step: 796/469, loss: 0.01868510991334915 2023-01-22 13:07:56.765923: step: 798/469, loss: 0.4150649905204773 2023-01-22 13:07:57.364283: step: 800/469, loss: 0.028201915323734283 2023-01-22 13:07:57.897180: step: 802/469, loss: 0.016570623964071274 2023-01-22 13:07:58.543130: step: 804/469, loss: 0.06812107563018799 2023-01-22 13:07:59.169118: step: 806/469, loss: 0.06283491104841232 2023-01-22 13:07:59.812678: step: 808/469, loss: 0.0259453933686018 2023-01-22 13:08:00.457210: step: 810/469, loss: 0.05941562354564667 2023-01-22 13:08:01.045491: step: 812/469, loss: 0.023522358387708664 2023-01-22 13:08:01.672776: step: 814/469, loss: 0.044914744794368744 2023-01-22 13:08:02.274379: step: 816/469, loss: 0.013948260806500912 2023-01-22 13:08:02.871749: step: 818/469, loss: 0.06239095330238342 2023-01-22 13:08:03.510080: step: 820/469, loss: 0.09571448713541031 2023-01-22 13:08:04.210076: step: 822/469, loss: 0.06513001769781113 2023-01-22 13:08:04.910615: step: 824/469, loss: 0.029995225369930267 2023-01-22 13:08:05.526177: step: 826/469, loss: 0.007673286367207766 2023-01-22 13:08:06.165130: step: 828/469, loss: 0.029067866504192352 2023-01-22 13:08:06.797144: step: 830/469, loss: 0.04245278239250183 2023-01-22 13:08:07.442882: step: 832/469, loss: 0.021359845995903015 2023-01-22 13:08:08.098256: step: 834/469, loss: 0.030331697314977646 2023-01-22 13:08:08.704100: step: 836/469, loss: 0.02838105894625187 2023-01-22 13:08:09.342103: step: 838/469, loss: 0.10588711500167847 2023-01-22 13:08:10.021279: step: 840/469, loss: 0.044374190270900726 2023-01-22 13:08:10.732354: step: 842/469, loss: 0.5629298686981201 2023-01-22 13:08:11.378168: step: 844/469, loss: 0.05076729506254196 2023-01-22 13:08:11.984210: step: 846/469, loss: 0.02145289070904255 2023-01-22 13:08:12.641363: step: 848/469, loss: 0.035037655383348465 2023-01-22 13:08:13.295395: step: 850/469, loss: 0.029345309361815453 2023-01-22 13:08:13.917109: step: 852/469, loss: 0.04547927901148796 2023-01-22 13:08:14.582784: step: 854/469, loss: 0.019605614244937897 2023-01-22 13:08:15.299448: step: 856/469, loss: 0.061750419437885284 2023-01-22 13:08:15.935990: step: 858/469, loss: 0.00015733565669506788 2023-01-22 13:08:16.667184: step: 860/469, loss: 0.09106114506721497 2023-01-22 13:08:17.184735: step: 862/469, loss: 0.038240257650613785 2023-01-22 13:08:17.816618: step: 864/469, loss: 0.032368253916502 2023-01-22 13:08:18.477686: step: 866/469, loss: 0.02331375889480114 2023-01-22 13:08:19.165125: step: 868/469, loss: 0.0046402099542319775 2023-01-22 13:08:19.870176: step: 870/469, loss: 0.01011233776807785 2023-01-22 13:08:20.488655: step: 872/469, loss: 0.03311903774738312 2023-01-22 13:08:21.047619: step: 874/469, loss: 0.008461453020572662 2023-01-22 13:08:21.705987: step: 876/469, loss: 0.029028356075286865 2023-01-22 13:08:22.287099: step: 878/469, loss: 0.021870478987693787 2023-01-22 13:08:22.907674: step: 880/469, loss: 0.020664963871240616 2023-01-22 13:08:23.452288: step: 882/469, loss: 0.06315284967422485 2023-01-22 13:08:24.005781: step: 884/469, loss: 0.08060536533594131 2023-01-22 13:08:24.612874: step: 886/469, loss: 0.0008277355809696019 2023-01-22 13:08:25.262749: step: 888/469, loss: 0.08211325109004974 2023-01-22 13:08:25.871926: step: 890/469, loss: 0.12384329736232758 2023-01-22 13:08:26.534537: step: 892/469, loss: 0.02858704701066017 2023-01-22 13:08:27.160957: step: 894/469, loss: 0.06107771769165993 2023-01-22 13:08:27.797562: step: 896/469, loss: 0.017599692568182945 2023-01-22 13:08:28.433868: step: 898/469, loss: 0.016021881252527237 2023-01-22 13:08:29.064703: step: 900/469, loss: 0.12623955309391022 2023-01-22 13:08:29.735972: step: 902/469, loss: 0.01892252080142498 2023-01-22 13:08:30.387213: step: 904/469, loss: 0.04684660956263542 2023-01-22 13:08:31.025703: step: 906/469, loss: 0.027284299954771996 2023-01-22 13:08:31.635610: step: 908/469, loss: 0.05409190058708191 2023-01-22 13:08:32.289749: step: 910/469, loss: 0.038611020892858505 2023-01-22 13:08:32.889263: step: 912/469, loss: 0.008061953820288181 2023-01-22 13:08:33.486453: step: 914/469, loss: 0.14487463235855103 2023-01-22 13:08:34.166754: step: 916/469, loss: 0.2800885736942291 2023-01-22 13:08:34.835307: step: 918/469, loss: 0.037585340440273285 2023-01-22 13:08:35.448780: step: 920/469, loss: 0.2513177990913391 2023-01-22 13:08:36.055608: step: 922/469, loss: 0.05311236158013344 2023-01-22 13:08:36.750549: step: 924/469, loss: 0.3178267180919647 2023-01-22 13:08:37.427106: step: 926/469, loss: 0.08004887402057648 2023-01-22 13:08:38.035646: step: 928/469, loss: 0.06739316135644913 2023-01-22 13:08:38.669622: step: 930/469, loss: 0.04459620267152786 2023-01-22 13:08:39.323639: step: 932/469, loss: 0.055536992847919464 2023-01-22 13:08:39.952680: step: 934/469, loss: 0.04985564574599266 2023-01-22 13:08:40.535419: step: 936/469, loss: 0.22679363191127777 2023-01-22 13:08:41.152203: step: 938/469, loss: 0.10756853967905045 ================================================== Loss: 0.078 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3153078111209179, 'r': 0.3081281266172158, 'f1': 0.3116766271156866}, 'combined': 0.22965646208524276, 'epoch': 22} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2992935202622275, 'r': 0.25904086932119597, 'f1': 0.2777162041864318}, 'combined': 0.15148156591987186, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3051332664456465, 'r': 0.3016592634121097, 'f1': 0.30338632026370577}, 'combined': 0.2235478149311516, 'epoch': 22} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30437381331314173, 'r': 0.26037467104097667, 'f1': 0.2806602716447609}, 'combined': 0.15308742089714228, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2978526183623949, 'r': 0.3035044707032373, 'f1': 0.30065198507632723}, 'combined': 0.22153304163518847, 'epoch': 22} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2946521384403305, 'r': 0.2569107849347072, 'f1': 0.2744902130338563}, 'combined': 0.14972193438210343, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2955426356589147, 'r': 0.3630952380952381, 'f1': 0.32585470085470086}, 'combined': 0.21723646723646722, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.27586206896551724, 'r': 0.34782608695652173, 'f1': 0.3076923076923077}, 'combined': 0.15384615384615385, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.38235294117647056, 'r': 0.22413793103448276, 'f1': 0.28260869565217395}, 'combined': 0.18840579710144928, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 23 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:11:29.054407: step: 2/469, loss: 1.2357395887374878 2023-01-22 13:11:29.666998: step: 4/469, loss: 0.024183563888072968 2023-01-22 13:11:30.258230: step: 6/469, loss: 0.0776687040925026 2023-01-22 13:11:30.861245: step: 8/469, loss: 0.0249489713460207 2023-01-22 13:11:31.475209: step: 10/469, loss: 0.0044220928102731705 2023-01-22 13:11:32.129811: step: 12/469, loss: 0.006003873888403177 2023-01-22 13:11:32.793065: step: 14/469, loss: 0.004591218661516905 2023-01-22 13:11:33.371568: step: 16/469, loss: 0.017270060256123543 2023-01-22 13:11:34.064258: step: 18/469, loss: 0.04863632470369339 2023-01-22 13:11:34.712430: step: 20/469, loss: 0.016850149258971214 2023-01-22 13:11:35.269037: step: 22/469, loss: 0.005202325060963631 2023-01-22 13:11:35.906790: step: 24/469, loss: 0.027514085173606873 2023-01-22 13:11:36.516249: step: 26/469, loss: 0.026645144447684288 2023-01-22 13:11:37.144399: step: 28/469, loss: 0.01198261883109808 2023-01-22 13:11:37.818417: step: 30/469, loss: 0.00844186544418335 2023-01-22 13:11:38.441618: step: 32/469, loss: 0.013920240104198456 2023-01-22 13:11:39.055416: step: 34/469, loss: 0.029003195464611053 2023-01-22 13:11:39.707954: step: 36/469, loss: 0.027369927614927292 2023-01-22 13:11:40.388021: step: 38/469, loss: 0.029376836493611336 2023-01-22 13:11:41.016220: step: 40/469, loss: 0.06202325597405434 2023-01-22 13:11:41.606563: step: 42/469, loss: 0.036938123404979706 2023-01-22 13:11:42.167830: step: 44/469, loss: 0.04467153921723366 2023-01-22 13:11:42.719372: step: 46/469, loss: 0.009959367103874683 2023-01-22 13:11:43.433150: step: 48/469, loss: 0.021179791539907455 2023-01-22 13:11:44.117748: step: 50/469, loss: 0.050611015409231186 2023-01-22 13:11:44.672686: step: 52/469, loss: 0.004724889528006315 2023-01-22 13:11:45.277167: step: 54/469, loss: 0.05585126578807831 2023-01-22 13:11:45.959715: step: 56/469, loss: 0.015907835215330124 2023-01-22 13:11:46.592793: step: 58/469, loss: 0.00852515920996666 2023-01-22 13:11:47.243034: step: 60/469, loss: 0.03204323351383209 2023-01-22 13:11:47.855385: step: 62/469, loss: 0.016114724799990654 2023-01-22 13:11:48.408722: step: 64/469, loss: 0.024006161838769913 2023-01-22 13:11:48.951455: step: 66/469, loss: 0.04767401143908501 2023-01-22 13:11:49.581088: step: 68/469, loss: 0.11314443498849869 2023-01-22 13:11:50.176424: step: 70/469, loss: 0.01779489405453205 2023-01-22 13:11:50.757371: step: 72/469, loss: 0.0358414426445961 2023-01-22 13:11:51.468871: step: 74/469, loss: 0.10481178015470505 2023-01-22 13:11:52.089092: step: 76/469, loss: 0.021338336169719696 2023-01-22 13:11:52.696559: step: 78/469, loss: 0.06205512583255768 2023-01-22 13:11:53.383945: step: 80/469, loss: 0.0201254952698946 2023-01-22 13:11:54.011240: step: 82/469, loss: 0.7630947828292847 2023-01-22 13:11:54.641697: step: 84/469, loss: 0.02397053875029087 2023-01-22 13:11:55.264462: step: 86/469, loss: 0.09846276789903641 2023-01-22 13:11:55.812672: step: 88/469, loss: 0.05467109754681587 2023-01-22 13:11:56.434825: step: 90/469, loss: 0.04410193860530853 2023-01-22 13:11:57.062860: step: 92/469, loss: 0.03433063253760338 2023-01-22 13:11:57.688264: step: 94/469, loss: 0.0014956542290747166 2023-01-22 13:11:58.325505: step: 96/469, loss: 0.017814695835113525 2023-01-22 13:11:58.990388: step: 98/469, loss: 0.061385128647089005 2023-01-22 13:11:59.669846: step: 100/469, loss: 0.07646140456199646 2023-01-22 13:12:00.252595: step: 102/469, loss: 0.05072186887264252 2023-01-22 13:12:00.875454: step: 104/469, loss: 0.011625583283603191 2023-01-22 13:12:01.516823: step: 106/469, loss: 0.006602386943995953 2023-01-22 13:12:02.130848: step: 108/469, loss: 0.05438736826181412 2023-01-22 13:12:02.731237: step: 110/469, loss: 0.0855780616402626 2023-01-22 13:12:03.361124: step: 112/469, loss: 0.047850873321294785 2023-01-22 13:12:04.086821: step: 114/469, loss: 0.04076141491532326 2023-01-22 13:12:04.690654: step: 116/469, loss: 0.019435521215200424 2023-01-22 13:12:05.329810: step: 118/469, loss: 0.028983084484934807 2023-01-22 13:12:05.891512: step: 120/469, loss: 0.016191082075238228 2023-01-22 13:12:06.543902: step: 122/469, loss: 0.04412242770195007 2023-01-22 13:12:07.162339: step: 124/469, loss: 0.08400098234415054 2023-01-22 13:12:07.759644: step: 126/469, loss: 0.029566580429673195 2023-01-22 13:12:08.400307: step: 128/469, loss: 0.3515491783618927 2023-01-22 13:12:09.046687: step: 130/469, loss: 0.11477603018283844 2023-01-22 13:12:09.632395: step: 132/469, loss: 0.03230782970786095 2023-01-22 13:12:10.257154: step: 134/469, loss: 0.010962975211441517 2023-01-22 13:12:10.841906: step: 136/469, loss: 0.01593821682035923 2023-01-22 13:12:11.437201: step: 138/469, loss: 0.01611877791583538 2023-01-22 13:12:12.042797: step: 140/469, loss: 0.04281977564096451 2023-01-22 13:12:12.671646: step: 142/469, loss: 0.005105625372380018 2023-01-22 13:12:13.321333: step: 144/469, loss: 0.06341095268726349 2023-01-22 13:12:13.996871: step: 146/469, loss: 0.04355448856949806 2023-01-22 13:12:14.688201: step: 148/469, loss: 0.0710495337843895 2023-01-22 13:12:15.279358: step: 150/469, loss: 0.5553318858146667 2023-01-22 13:12:15.919861: step: 152/469, loss: 0.0023809100966900587 2023-01-22 13:12:16.502874: step: 154/469, loss: 0.08436071127653122 2023-01-22 13:12:17.106588: step: 156/469, loss: 0.061742544174194336 2023-01-22 13:12:17.796462: step: 158/469, loss: 0.025910230353474617 2023-01-22 13:12:18.458813: step: 160/469, loss: 0.04920036345720291 2023-01-22 13:12:19.082066: step: 162/469, loss: 0.02608680911362171 2023-01-22 13:12:19.727203: step: 164/469, loss: 0.03181438520550728 2023-01-22 13:12:20.386857: step: 166/469, loss: 0.14423991739749908 2023-01-22 13:12:20.996965: step: 168/469, loss: 0.1029396802186966 2023-01-22 13:12:21.611566: step: 170/469, loss: 0.02797544188797474 2023-01-22 13:12:22.298484: step: 172/469, loss: 0.07917007803916931 2023-01-22 13:12:22.853590: step: 174/469, loss: 0.10432194918394089 2023-01-22 13:12:23.524031: step: 176/469, loss: 0.03921284154057503 2023-01-22 13:12:24.174812: step: 178/469, loss: 0.07324020564556122 2023-01-22 13:12:24.901441: step: 180/469, loss: 0.04576383903622627 2023-01-22 13:12:25.606085: step: 182/469, loss: 0.010903830640017986 2023-01-22 13:12:26.205195: step: 184/469, loss: 0.05258685722947121 2023-01-22 13:12:26.833703: step: 186/469, loss: 0.02395584061741829 2023-01-22 13:12:27.423991: step: 188/469, loss: 0.026599610224366188 2023-01-22 13:12:28.015894: step: 190/469, loss: 0.0020521513652056456 2023-01-22 13:12:28.655076: step: 192/469, loss: 0.011747482232749462 2023-01-22 13:12:29.260616: step: 194/469, loss: 0.01697644405066967 2023-01-22 13:12:29.880709: step: 196/469, loss: 0.03752192109823227 2023-01-22 13:12:30.467155: step: 198/469, loss: 0.031641364097595215 2023-01-22 13:12:31.237344: step: 200/469, loss: 0.01779523678123951 2023-01-22 13:12:31.894851: step: 202/469, loss: 0.22139151394367218 2023-01-22 13:12:32.585034: step: 204/469, loss: 0.013042815029621124 2023-01-22 13:12:33.137113: step: 206/469, loss: 0.017464587464928627 2023-01-22 13:12:33.745633: step: 208/469, loss: 0.006439428776502609 2023-01-22 13:12:34.503850: step: 210/469, loss: 0.018046630546450615 2023-01-22 13:12:35.184461: step: 212/469, loss: 0.13874879479408264 2023-01-22 13:12:35.771829: step: 214/469, loss: 0.032536838203668594 2023-01-22 13:12:36.460911: step: 216/469, loss: 0.03146125376224518 2023-01-22 13:12:37.106789: step: 218/469, loss: 0.02002551779150963 2023-01-22 13:12:37.713513: step: 220/469, loss: 0.020317958667874336 2023-01-22 13:12:38.299938: step: 222/469, loss: 0.35585200786590576 2023-01-22 13:12:39.031799: step: 224/469, loss: 0.026581084355711937 2023-01-22 13:12:39.626432: step: 226/469, loss: 0.001094404375180602 2023-01-22 13:12:40.302842: step: 228/469, loss: 0.018660511821508408 2023-01-22 13:12:40.928436: step: 230/469, loss: 0.023422103375196457 2023-01-22 13:12:41.580607: step: 232/469, loss: 0.05262136459350586 2023-01-22 13:12:42.220937: step: 234/469, loss: 0.015403765253722668 2023-01-22 13:12:42.941861: step: 236/469, loss: 0.013600052334368229 2023-01-22 13:12:43.529756: step: 238/469, loss: 0.05427788570523262 2023-01-22 13:12:44.106448: step: 240/469, loss: 0.08543679863214493 2023-01-22 13:12:44.762436: step: 242/469, loss: 0.0645485520362854 2023-01-22 13:12:45.498970: step: 244/469, loss: 0.00648009916767478 2023-01-22 13:12:46.150487: step: 246/469, loss: 0.03014432266354561 2023-01-22 13:12:46.681126: step: 248/469, loss: 0.004051559139043093 2023-01-22 13:12:47.279048: step: 250/469, loss: 0.010756065137684345 2023-01-22 13:12:47.851535: step: 252/469, loss: 0.012942900881171227 2023-01-22 13:12:48.489763: step: 254/469, loss: 0.3782583773136139 2023-01-22 13:12:49.074610: step: 256/469, loss: 0.006241234950721264 2023-01-22 13:12:49.769901: step: 258/469, loss: 0.03076982870697975 2023-01-22 13:12:50.356722: step: 260/469, loss: 0.04138808324933052 2023-01-22 13:12:50.950947: step: 262/469, loss: 0.018180401995778084 2023-01-22 13:12:51.660875: step: 264/469, loss: 0.023767195641994476 2023-01-22 13:12:52.336905: step: 266/469, loss: 0.0489799790084362 2023-01-22 13:12:53.013395: step: 268/469, loss: 0.006461685989052057 2023-01-22 13:12:53.685982: step: 270/469, loss: 0.0074934507720172405 2023-01-22 13:12:54.302034: step: 272/469, loss: 0.01630806177854538 2023-01-22 13:12:54.990980: step: 274/469, loss: 0.6623429656028748 2023-01-22 13:12:55.785833: step: 276/469, loss: 0.03137192502617836 2023-01-22 13:12:56.411810: step: 278/469, loss: 0.011359496042132378 2023-01-22 13:12:56.990202: step: 280/469, loss: 0.06348668038845062 2023-01-22 13:12:57.575375: step: 282/469, loss: 0.019177217036485672 2023-01-22 13:12:58.123459: step: 284/469, loss: 0.019112030044198036 2023-01-22 13:12:58.715206: step: 286/469, loss: 0.008358408696949482 2023-01-22 13:12:59.295818: step: 288/469, loss: 0.047255173325538635 2023-01-22 13:12:59.912855: step: 290/469, loss: 0.05162154510617256 2023-01-22 13:13:00.597455: step: 292/469, loss: 0.037955235689878464 2023-01-22 13:13:01.209546: step: 294/469, loss: 0.06755347549915314 2023-01-22 13:13:01.898053: step: 296/469, loss: 0.026413334533572197 2023-01-22 13:13:02.550764: step: 298/469, loss: 0.007649126462638378 2023-01-22 13:13:03.194331: step: 300/469, loss: 0.08559884130954742 2023-01-22 13:13:03.799297: step: 302/469, loss: 0.03458913415670395 2023-01-22 13:13:04.402670: step: 304/469, loss: 0.04814169183373451 2023-01-22 13:13:05.005764: step: 306/469, loss: 0.03001989610493183 2023-01-22 13:13:05.636114: step: 308/469, loss: 0.02405017614364624 2023-01-22 13:13:06.223093: step: 310/469, loss: 0.0034029295202344656 2023-01-22 13:13:06.850247: step: 312/469, loss: 0.006519317161291838 2023-01-22 13:13:07.494005: step: 314/469, loss: 0.13232645392417908 2023-01-22 13:13:08.130220: step: 316/469, loss: 0.029131615534424782 2023-01-22 13:13:08.761078: step: 318/469, loss: 0.056847065687179565 2023-01-22 13:13:09.406905: step: 320/469, loss: 0.02224601060152054 2023-01-22 13:13:10.008127: step: 322/469, loss: 0.03697815164923668 2023-01-22 13:13:10.657676: step: 324/469, loss: 0.001828978885896504 2023-01-22 13:13:11.357920: step: 326/469, loss: 0.020824763923883438 2023-01-22 13:13:11.967470: step: 328/469, loss: 0.03082248568534851 2023-01-22 13:13:12.595429: step: 330/469, loss: 0.03613508865237236 2023-01-22 13:13:13.205941: step: 332/469, loss: 2.1351606845855713 2023-01-22 13:13:13.838044: step: 334/469, loss: 0.07682400941848755 2023-01-22 13:13:14.487563: step: 336/469, loss: 0.006083210464566946 2023-01-22 13:13:15.099996: step: 338/469, loss: 0.04656771197915077 2023-01-22 13:13:15.685087: step: 340/469, loss: 0.7680062055587769 2023-01-22 13:13:16.314460: step: 342/469, loss: 0.02434537373483181 2023-01-22 13:13:16.997685: step: 344/469, loss: 0.0006408179178833961 2023-01-22 13:13:17.647894: step: 346/469, loss: 0.09377032518386841 2023-01-22 13:13:18.252744: step: 348/469, loss: 0.020529458299279213 2023-01-22 13:13:18.917136: step: 350/469, loss: 0.09350648522377014 2023-01-22 13:13:19.588965: step: 352/469, loss: 0.04236915335059166 2023-01-22 13:13:20.185784: step: 354/469, loss: 0.04024454951286316 2023-01-22 13:13:20.797609: step: 356/469, loss: 0.08462707698345184 2023-01-22 13:13:21.487387: step: 358/469, loss: 0.001549765351228416 2023-01-22 13:13:22.086157: step: 360/469, loss: 0.008456028997898102 2023-01-22 13:13:22.684656: step: 362/469, loss: 0.054597094655036926 2023-01-22 13:13:23.327771: step: 364/469, loss: 0.035022981464862823 2023-01-22 13:13:23.936692: step: 366/469, loss: 0.0033466024324297905 2023-01-22 13:13:24.511101: step: 368/469, loss: 0.05445101857185364 2023-01-22 13:13:25.125426: step: 370/469, loss: 0.01942160353064537 2023-01-22 13:13:25.765023: step: 372/469, loss: 0.038416557013988495 2023-01-22 13:13:26.402888: step: 374/469, loss: 0.002653640927746892 2023-01-22 13:13:27.037492: step: 376/469, loss: 0.045215360820293427 2023-01-22 13:13:27.648061: step: 378/469, loss: 0.05492890998721123 2023-01-22 13:13:28.230910: step: 380/469, loss: 0.005246720276772976 2023-01-22 13:13:28.833543: step: 382/469, loss: 0.013209211640059948 2023-01-22 13:13:29.447837: step: 384/469, loss: 0.0067526474595069885 2023-01-22 13:13:30.022189: step: 386/469, loss: 0.05400647222995758 2023-01-22 13:13:30.710931: step: 388/469, loss: 0.07380294054746628 2023-01-22 13:13:31.297997: step: 390/469, loss: 0.09125782549381256 2023-01-22 13:13:31.931751: step: 392/469, loss: 0.02457476034760475 2023-01-22 13:13:32.540142: step: 394/469, loss: 0.04653559625148773 2023-01-22 13:13:33.116698: step: 396/469, loss: 0.025619275867938995 2023-01-22 13:13:33.784962: step: 398/469, loss: 0.014898593537509441 2023-01-22 13:13:34.388519: step: 400/469, loss: 0.018561387434601784 2023-01-22 13:13:35.106200: step: 402/469, loss: 0.06717865914106369 2023-01-22 13:13:35.803355: step: 404/469, loss: 0.07287240773439407 2023-01-22 13:13:36.483158: step: 406/469, loss: 0.051496099680662155 2023-01-22 13:13:37.135216: step: 408/469, loss: 0.24909788370132446 2023-01-22 13:13:37.786736: step: 410/469, loss: 0.08912375569343567 2023-01-22 13:13:38.383470: step: 412/469, loss: 0.030857156962156296 2023-01-22 13:13:39.059900: step: 414/469, loss: 0.08198633044958115 2023-01-22 13:13:39.731196: step: 416/469, loss: 0.03507792949676514 2023-01-22 13:13:40.365535: step: 418/469, loss: 0.02260504849255085 2023-01-22 13:13:41.023884: step: 420/469, loss: 0.01254622358828783 2023-01-22 13:13:41.649165: step: 422/469, loss: 0.016889186576008797 2023-01-22 13:13:42.317037: step: 424/469, loss: 0.014178567565977573 2023-01-22 13:13:42.946032: step: 426/469, loss: 0.03221070393919945 2023-01-22 13:13:43.603211: step: 428/469, loss: 0.020412391051650047 2023-01-22 13:13:44.286738: step: 430/469, loss: 0.03339565172791481 2023-01-22 13:13:44.983515: step: 432/469, loss: 0.009506194852292538 2023-01-22 13:13:45.604246: step: 434/469, loss: 0.029422499239444733 2023-01-22 13:13:46.216569: step: 436/469, loss: 0.04744797199964523 2023-01-22 13:13:46.945823: step: 438/469, loss: 0.014477276243269444 2023-01-22 13:13:47.603781: step: 440/469, loss: 0.05468200519680977 2023-01-22 13:13:48.169510: step: 442/469, loss: 0.029213834553956985 2023-01-22 13:13:48.771973: step: 444/469, loss: 0.03252070024609566 2023-01-22 13:13:49.408347: step: 446/469, loss: 0.02223104238510132 2023-01-22 13:13:50.078139: step: 448/469, loss: 0.003474658355116844 2023-01-22 13:13:50.674775: step: 450/469, loss: 0.06686118990182877 2023-01-22 13:13:51.242387: step: 452/469, loss: 0.021839521825313568 2023-01-22 13:13:51.959048: step: 454/469, loss: 0.20771893858909607 2023-01-22 13:13:52.565851: step: 456/469, loss: 0.008947457186877728 2023-01-22 13:13:53.205999: step: 458/469, loss: 0.020264342427253723 2023-01-22 13:13:53.836585: step: 460/469, loss: 0.04001079499721527 2023-01-22 13:13:54.541279: step: 462/469, loss: 0.10093475133180618 2023-01-22 13:13:55.159061: step: 464/469, loss: 0.038687583059072495 2023-01-22 13:13:55.840113: step: 466/469, loss: 0.033658649772405624 2023-01-22 13:13:56.523626: step: 468/469, loss: 0.11991773545742035 2023-01-22 13:13:57.172608: step: 470/469, loss: 0.06612705439329147 2023-01-22 13:13:57.777797: step: 472/469, loss: 0.003712458536028862 2023-01-22 13:13:58.394345: step: 474/469, loss: 0.03467660769820213 2023-01-22 13:13:58.965764: step: 476/469, loss: 0.02436213009059429 2023-01-22 13:13:59.516751: step: 478/469, loss: 0.10271177440881729 2023-01-22 13:14:00.138424: step: 480/469, loss: 0.026616176590323448 2023-01-22 13:14:00.699544: step: 482/469, loss: 0.03505128622055054 2023-01-22 13:14:01.311891: step: 484/469, loss: 0.009781846776604652 2023-01-22 13:14:01.881664: step: 486/469, loss: 0.009833168238401413 2023-01-22 13:14:02.520595: step: 488/469, loss: 0.11808013170957565 2023-01-22 13:14:03.097475: step: 490/469, loss: 0.0261073037981987 2023-01-22 13:14:03.697433: step: 492/469, loss: 0.037615641951560974 2023-01-22 13:14:04.381564: step: 494/469, loss: 0.056649237871170044 2023-01-22 13:14:05.067933: step: 496/469, loss: 0.02912292256951332 2023-01-22 13:14:05.691444: step: 498/469, loss: 0.02152920514345169 2023-01-22 13:14:06.377027: step: 500/469, loss: 0.015567835420370102 2023-01-22 13:14:07.059962: step: 502/469, loss: 0.006007621064782143 2023-01-22 13:14:07.640686: step: 504/469, loss: 0.030542412772774696 2023-01-22 13:14:08.257536: step: 506/469, loss: 0.009135408326983452 2023-01-22 13:14:08.854618: step: 508/469, loss: 0.07108505070209503 2023-01-22 13:14:09.555379: step: 510/469, loss: 0.09674370288848877 2023-01-22 13:14:10.130610: step: 512/469, loss: 0.02525142952799797 2023-01-22 13:14:10.726536: step: 514/469, loss: 0.0644746795296669 2023-01-22 13:14:11.359091: step: 516/469, loss: 0.048930805176496506 2023-01-22 13:14:12.010641: step: 518/469, loss: 0.07319412380456924 2023-01-22 13:14:12.595052: step: 520/469, loss: 0.036929722875356674 2023-01-22 13:14:13.303182: step: 522/469, loss: 0.3169757127761841 2023-01-22 13:14:13.931348: step: 524/469, loss: 0.053899671882390976 2023-01-22 13:14:14.554722: step: 526/469, loss: 0.06712814420461655 2023-01-22 13:14:15.194828: step: 528/469, loss: 0.01026841625571251 2023-01-22 13:14:15.823036: step: 530/469, loss: 0.003544917330145836 2023-01-22 13:14:16.493607: step: 532/469, loss: 0.06460683792829514 2023-01-22 13:14:17.141609: step: 534/469, loss: 0.0171480905264616 2023-01-22 13:14:17.807913: step: 536/469, loss: 0.0022561487276107073 2023-01-22 13:14:18.440503: step: 538/469, loss: 0.06266505271196365 2023-01-22 13:14:19.043371: step: 540/469, loss: 0.014372874982655048 2023-01-22 13:14:19.698227: step: 542/469, loss: 0.03709370642900467 2023-01-22 13:14:20.347169: step: 544/469, loss: 0.0486343652009964 2023-01-22 13:14:20.930736: step: 546/469, loss: 0.08830500394105911 2023-01-22 13:14:21.624016: step: 548/469, loss: 0.0234213937073946 2023-01-22 13:14:22.251174: step: 550/469, loss: 0.02622537687420845 2023-01-22 13:14:22.879371: step: 552/469, loss: 0.011505834758281708 2023-01-22 13:14:23.578537: step: 554/469, loss: 0.10963188111782074 2023-01-22 13:14:24.227192: step: 556/469, loss: 0.3771672248840332 2023-01-22 13:14:24.875322: step: 558/469, loss: 0.23346242308616638 2023-01-22 13:14:25.531008: step: 560/469, loss: 0.038544341921806335 2023-01-22 13:14:26.172212: step: 562/469, loss: 0.0968988761305809 2023-01-22 13:14:26.804228: step: 564/469, loss: 0.02138388901948929 2023-01-22 13:14:27.375306: step: 566/469, loss: 0.20941877365112305 2023-01-22 13:14:27.983710: step: 568/469, loss: 0.04067647084593773 2023-01-22 13:14:28.678615: step: 570/469, loss: 0.04401589184999466 2023-01-22 13:14:29.334462: step: 572/469, loss: 0.005905947647988796 2023-01-22 13:14:29.957605: step: 574/469, loss: 0.02408822439610958 2023-01-22 13:14:30.631669: step: 576/469, loss: 0.08960024267435074 2023-01-22 13:14:31.219368: step: 578/469, loss: 0.0563046857714653 2023-01-22 13:14:31.850220: step: 580/469, loss: 0.1482042819261551 2023-01-22 13:14:32.509768: step: 582/469, loss: 0.030881470069289207 2023-01-22 13:14:33.169877: step: 584/469, loss: 0.016311822459101677 2023-01-22 13:14:33.800271: step: 586/469, loss: 0.023087259382009506 2023-01-22 13:14:34.453835: step: 588/469, loss: 0.36545512080192566 2023-01-22 13:14:35.121297: step: 590/469, loss: 0.010560513474047184 2023-01-22 13:14:35.719172: step: 592/469, loss: 0.20198041200637817 2023-01-22 13:14:36.467285: step: 594/469, loss: 0.049684807658195496 2023-01-22 13:14:37.119990: step: 596/469, loss: 0.03809770196676254 2023-01-22 13:14:37.718089: step: 598/469, loss: 0.008861164562404156 2023-01-22 13:14:38.317989: step: 600/469, loss: 0.03218526393175125 2023-01-22 13:14:38.966899: step: 602/469, loss: 0.022556880488991737 2023-01-22 13:14:39.597821: step: 604/469, loss: 0.011322306469082832 2023-01-22 13:14:40.152805: step: 606/469, loss: 0.030001727864146233 2023-01-22 13:14:40.777621: step: 608/469, loss: 0.045465029776096344 2023-01-22 13:14:41.435457: step: 610/469, loss: 0.057701513171195984 2023-01-22 13:14:42.081134: step: 612/469, loss: 0.02023312821984291 2023-01-22 13:14:42.707731: step: 614/469, loss: 0.016423925757408142 2023-01-22 13:14:43.372163: step: 616/469, loss: 0.02925160899758339 2023-01-22 13:14:43.948184: step: 618/469, loss: 0.028430957347154617 2023-01-22 13:14:44.589761: step: 620/469, loss: 0.05649249255657196 2023-01-22 13:14:45.196657: step: 622/469, loss: 0.03616967424750328 2023-01-22 13:14:45.815640: step: 624/469, loss: 0.02274082601070404 2023-01-22 13:14:46.490355: step: 626/469, loss: 0.037107981741428375 2023-01-22 13:14:47.175742: step: 628/469, loss: 0.03349355235695839 2023-01-22 13:14:47.821438: step: 630/469, loss: 0.06326816976070404 2023-01-22 13:14:48.417237: step: 632/469, loss: 0.02004053257405758 2023-01-22 13:14:49.026087: step: 634/469, loss: 0.05118941143155098 2023-01-22 13:14:49.679018: step: 636/469, loss: 0.03605641424655914 2023-01-22 13:14:50.309013: step: 638/469, loss: 0.008025464601814747 2023-01-22 13:14:50.968836: step: 640/469, loss: 0.03873804584145546 2023-01-22 13:14:51.550768: step: 642/469, loss: 0.003371611936017871 2023-01-22 13:14:52.141192: step: 644/469, loss: 0.005725967697799206 2023-01-22 13:14:52.729006: step: 646/469, loss: 0.11167502403259277 2023-01-22 13:14:53.365641: step: 648/469, loss: 0.05364027991890907 2023-01-22 13:14:53.951040: step: 650/469, loss: 0.03828307241201401 2023-01-22 13:14:54.575459: step: 652/469, loss: 0.027975894510746002 2023-01-22 13:14:55.177395: step: 654/469, loss: 0.015445971861481667 2023-01-22 13:14:55.802271: step: 656/469, loss: 0.01446506381034851 2023-01-22 13:14:56.382399: step: 658/469, loss: 0.0030841310508549213 2023-01-22 13:14:57.042198: step: 660/469, loss: 0.018016504123806953 2023-01-22 13:14:57.624462: step: 662/469, loss: 0.019011082127690315 2023-01-22 13:14:58.248352: step: 664/469, loss: 0.12268608808517456 2023-01-22 13:14:58.876230: step: 666/469, loss: 0.003342668293043971 2023-01-22 13:14:59.531249: step: 668/469, loss: 0.01909763738512993 2023-01-22 13:15:00.179969: step: 670/469, loss: 0.20439517498016357 2023-01-22 13:15:00.795263: step: 672/469, loss: 0.03834576532244682 2023-01-22 13:15:01.411803: step: 674/469, loss: 0.024597445502877235 2023-01-22 13:15:02.050351: step: 676/469, loss: 0.03728759288787842 2023-01-22 13:15:02.664576: step: 678/469, loss: 0.023696452379226685 2023-01-22 13:15:03.279684: step: 680/469, loss: 0.2461635023355484 2023-01-22 13:15:03.891134: step: 682/469, loss: 0.009932891465723515 2023-01-22 13:15:04.604228: step: 684/469, loss: 0.020177103579044342 2023-01-22 13:15:05.215464: step: 686/469, loss: 0.16025130450725555 2023-01-22 13:15:05.800312: step: 688/469, loss: 0.0043271807953715324 2023-01-22 13:15:06.424174: step: 690/469, loss: 0.023122074082493782 2023-01-22 13:15:07.053277: step: 692/469, loss: 0.42845121026039124 2023-01-22 13:15:07.723431: step: 694/469, loss: 0.023784488439559937 2023-01-22 13:15:08.423766: step: 696/469, loss: 0.045418478548526764 2023-01-22 13:15:09.167779: step: 698/469, loss: 0.0701347142457962 2023-01-22 13:15:09.798660: step: 700/469, loss: 0.029570408165454865 2023-01-22 13:15:10.375956: step: 702/469, loss: 0.04777064174413681 2023-01-22 13:15:10.960030: step: 704/469, loss: 0.08903773128986359 2023-01-22 13:15:11.616795: step: 706/469, loss: 0.07514233887195587 2023-01-22 13:15:12.267034: step: 708/469, loss: 0.0469592846930027 2023-01-22 13:15:12.900812: step: 710/469, loss: 0.02193559519946575 2023-01-22 13:15:13.558712: step: 712/469, loss: 0.11656536161899567 2023-01-22 13:15:14.172540: step: 714/469, loss: 0.04079541563987732 2023-01-22 13:15:14.864180: step: 716/469, loss: 0.03533748537302017 2023-01-22 13:15:15.500601: step: 718/469, loss: 0.03869449719786644 2023-01-22 13:15:16.122809: step: 720/469, loss: 0.04969077557325363 2023-01-22 13:15:16.700697: step: 722/469, loss: 0.01292459573596716 2023-01-22 13:15:17.338522: step: 724/469, loss: 0.004090246744453907 2023-01-22 13:15:17.965761: step: 726/469, loss: 0.023417063057422638 2023-01-22 13:15:18.598535: step: 728/469, loss: 0.0638405904173851 2023-01-22 13:15:19.260316: step: 730/469, loss: 0.10894934087991714 2023-01-22 13:15:19.890848: step: 732/469, loss: 0.035131413489580154 2023-01-22 13:15:20.458434: step: 734/469, loss: 0.04368053376674652 2023-01-22 13:15:21.054720: step: 736/469, loss: 0.8348058462142944 2023-01-22 13:15:21.702630: step: 738/469, loss: 0.003947824705392122 2023-01-22 13:15:22.330337: step: 740/469, loss: 0.0029385206289589405 2023-01-22 13:15:22.987607: step: 742/469, loss: 0.009950543753802776 2023-01-22 13:15:23.556080: step: 744/469, loss: 0.01953227072954178 2023-01-22 13:15:24.257722: step: 746/469, loss: 0.09103751927614212 2023-01-22 13:15:24.855793: step: 748/469, loss: 0.04214608296751976 2023-01-22 13:15:25.462902: step: 750/469, loss: 0.017283525317907333 2023-01-22 13:15:26.105687: step: 752/469, loss: 0.18202316761016846 2023-01-22 13:15:26.739434: step: 754/469, loss: 0.07312453538179398 2023-01-22 13:15:27.374839: step: 756/469, loss: 0.24903231859207153 2023-01-22 13:15:27.969493: step: 758/469, loss: 0.06482073664665222 2023-01-22 13:15:28.582632: step: 760/469, loss: 0.07405403256416321 2023-01-22 13:15:29.250304: step: 762/469, loss: 0.014474152587354183 2023-01-22 13:15:29.932389: step: 764/469, loss: 0.026526834815740585 2023-01-22 13:15:30.624365: step: 766/469, loss: 0.02918674424290657 2023-01-22 13:15:31.215327: step: 768/469, loss: 0.0171043761074543 2023-01-22 13:15:31.925803: step: 770/469, loss: 0.16759347915649414 2023-01-22 13:15:32.543050: step: 772/469, loss: 0.005510732065886259 2023-01-22 13:15:33.143995: step: 774/469, loss: 0.032438408583402634 2023-01-22 13:15:33.736983: step: 776/469, loss: 0.037898868322372437 2023-01-22 13:15:34.358477: step: 778/469, loss: 0.09436165541410446 2023-01-22 13:15:34.963327: step: 780/469, loss: 0.045958563685417175 2023-01-22 13:15:35.575624: step: 782/469, loss: 0.05432308465242386 2023-01-22 13:15:36.206100: step: 784/469, loss: 0.03569307550787926 2023-01-22 13:15:36.867783: step: 786/469, loss: 0.025533713400363922 2023-01-22 13:15:37.535493: step: 788/469, loss: 0.05924192816019058 2023-01-22 13:15:38.120651: step: 790/469, loss: 0.005669349804520607 2023-01-22 13:15:38.770530: step: 792/469, loss: 0.04133680835366249 2023-01-22 13:15:39.351594: step: 794/469, loss: 0.049105141311883926 2023-01-22 13:15:39.980966: step: 796/469, loss: 0.02192024327814579 2023-01-22 13:15:40.555460: step: 798/469, loss: 0.026449689641594887 2023-01-22 13:15:41.151528: step: 800/469, loss: 0.10335677117109299 2023-01-22 13:15:41.788124: step: 802/469, loss: 0.016659628599882126 2023-01-22 13:15:42.389319: step: 804/469, loss: 0.08267058432102203 2023-01-22 13:15:43.070539: step: 806/469, loss: 0.009828275069594383 2023-01-22 13:15:43.667558: step: 808/469, loss: 0.030231472104787827 2023-01-22 13:15:44.423689: step: 810/469, loss: 0.023600690066814423 2023-01-22 13:15:45.050435: step: 812/469, loss: 0.10526539385318756 2023-01-22 13:15:45.657253: step: 814/469, loss: 0.09193176031112671 2023-01-22 13:15:46.271723: step: 816/469, loss: 0.03997723013162613 2023-01-22 13:15:46.887706: step: 818/469, loss: 0.046001747250556946 2023-01-22 13:15:47.455773: step: 820/469, loss: 1.0909194946289062 2023-01-22 13:15:48.096038: step: 822/469, loss: 0.02829616330564022 2023-01-22 13:15:48.780530: step: 824/469, loss: 0.0725482627749443 2023-01-22 13:15:49.403256: step: 826/469, loss: 0.045401617884635925 2023-01-22 13:15:50.040060: step: 828/469, loss: 0.13206619024276733 2023-01-22 13:15:50.650259: step: 830/469, loss: 0.03091304562985897 2023-01-22 13:15:51.302693: step: 832/469, loss: 0.029082871973514557 2023-01-22 13:15:51.884272: step: 834/469, loss: 0.07198868691921234 2023-01-22 13:15:52.530854: step: 836/469, loss: 0.18165065348148346 2023-01-22 13:15:53.133869: step: 838/469, loss: 0.34014299511909485 2023-01-22 13:15:53.771980: step: 840/469, loss: 0.027846289798617363 2023-01-22 13:15:54.381818: step: 842/469, loss: 0.09241542220115662 2023-01-22 13:15:54.981815: step: 844/469, loss: 0.027130432426929474 2023-01-22 13:15:55.605155: step: 846/469, loss: 0.021871939301490784 2023-01-22 13:15:56.258717: step: 848/469, loss: 0.03874404728412628 2023-01-22 13:15:56.892886: step: 850/469, loss: 0.05512149631977081 2023-01-22 13:15:57.465227: step: 852/469, loss: 0.014337156899273396 2023-01-22 13:15:58.099536: step: 854/469, loss: 0.305141419172287 2023-01-22 13:15:58.799829: step: 856/469, loss: 0.07611072063446045 2023-01-22 13:15:59.393696: step: 858/469, loss: 0.029356474056839943 2023-01-22 13:16:00.017073: step: 860/469, loss: 0.04575691372156143 2023-01-22 13:16:00.634355: step: 862/469, loss: 0.09294658899307251 2023-01-22 13:16:01.301384: step: 864/469, loss: 0.06593305617570877 2023-01-22 13:16:01.921563: step: 866/469, loss: 0.035524748265743256 2023-01-22 13:16:02.491653: step: 868/469, loss: 0.10471679270267487 2023-01-22 13:16:03.168915: step: 870/469, loss: 0.03528767451643944 2023-01-22 13:16:03.773100: step: 872/469, loss: 0.03296990692615509 2023-01-22 13:16:04.434272: step: 874/469, loss: 0.026981398463249207 2023-01-22 13:16:05.124158: step: 876/469, loss: 0.02476923167705536 2023-01-22 13:16:05.771492: step: 878/469, loss: 0.029789147898554802 2023-01-22 13:16:06.404364: step: 880/469, loss: 0.27299654483795166 2023-01-22 13:16:07.061181: step: 882/469, loss: 0.026675784960389137 2023-01-22 13:16:07.729768: step: 884/469, loss: 0.2162950336933136 2023-01-22 13:16:08.311579: step: 886/469, loss: 0.001604490913450718 2023-01-22 13:16:08.857494: step: 888/469, loss: 0.0254694614559412 2023-01-22 13:16:09.490233: step: 890/469, loss: 0.008233447559177876 2023-01-22 13:16:10.147028: step: 892/469, loss: 0.03157993406057358 2023-01-22 13:16:10.797984: step: 894/469, loss: 0.13843785226345062 2023-01-22 13:16:11.429219: step: 896/469, loss: 0.013640285469591618 2023-01-22 13:16:12.017198: step: 898/469, loss: 0.0152322668582201 2023-01-22 13:16:12.651611: step: 900/469, loss: 0.055900149047374725 2023-01-22 13:16:13.351706: step: 902/469, loss: 0.028205275535583496 2023-01-22 13:16:13.928457: step: 904/469, loss: 0.03686301037669182 2023-01-22 13:16:14.671979: step: 906/469, loss: 0.04179917648434639 2023-01-22 13:16:15.271358: step: 908/469, loss: 0.040270671248435974 2023-01-22 13:16:15.895186: step: 910/469, loss: 0.007458284962922335 2023-01-22 13:16:16.543097: step: 912/469, loss: 0.5592674612998962 2023-01-22 13:16:17.182301: step: 914/469, loss: 0.007352380082011223 2023-01-22 13:16:17.789794: step: 916/469, loss: 0.05365872010588646 2023-01-22 13:16:18.410930: step: 918/469, loss: 0.11493711173534393 2023-01-22 13:16:19.188126: step: 920/469, loss: 0.02752826176583767 2023-01-22 13:16:19.837717: step: 922/469, loss: 0.0778980702161789 2023-01-22 13:16:20.414222: step: 924/469, loss: 0.18797804415225983 2023-01-22 13:16:20.944875: step: 926/469, loss: 0.02664988860487938 2023-01-22 13:16:21.488238: step: 928/469, loss: 0.11907251179218292 2023-01-22 13:16:22.161059: step: 930/469, loss: 0.024364197626709938 2023-01-22 13:16:22.751607: step: 932/469, loss: 0.0443686842918396 2023-01-22 13:16:23.300999: step: 934/469, loss: 0.0018686829134821892 2023-01-22 13:16:23.904540: step: 936/469, loss: 0.022116629406809807 2023-01-22 13:16:24.561137: step: 938/469, loss: 0.035270195454359055 ================================================== Loss: 0.068 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2930803571428572, 'r': 0.3270042694497154, 'f1': 0.3091143497757848}, 'combined': 0.2277684682558414, 'epoch': 23} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3070273122465261, 'r': 0.27026261580749966, 'f1': 0.28747428458279495}, 'combined': 0.15680415522697905, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28266095317725753, 'r': 0.3207424098671727, 'f1': 0.3005}, 'combined': 0.22142105263157894, 'epoch': 23} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2995308221171761, 'r': 0.26256860915024854, 'f1': 0.279834445601921}, 'combined': 0.15263697032832055, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2888900501672241, 'r': 0.3278107210626186, 'f1': 0.3071222222222222}, 'combined': 0.22630058479532159, 'epoch': 23} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30129457323487396, 'r': 0.26824580834622236, 'f1': 0.283811329140007}, 'combined': 0.1548061795309129, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24166666666666664, 'r': 0.3452380952380952, 'f1': 0.28431372549019607}, 'combined': 0.1895424836601307, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2642857142857143, 'r': 0.40217391304347827, 'f1': 0.31896551724137934}, 'combined': 0.15948275862068967, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40789473684210525, 'r': 0.2672413793103448, 'f1': 0.3229166666666667}, 'combined': 0.2152777777777778, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 24 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:19:13.335030: step: 2/469, loss: 0.016417326405644417 2023-01-22 13:19:14.069534: step: 4/469, loss: 0.047688115388154984 2023-01-22 13:19:14.714059: step: 6/469, loss: 0.0862254723906517 2023-01-22 13:19:15.322510: step: 8/469, loss: 0.23898805677890778 2023-01-22 13:19:15.928956: step: 10/469, loss: 0.030291911214590073 2023-01-22 13:19:16.592208: step: 12/469, loss: 0.01121998205780983 2023-01-22 13:19:17.229687: step: 14/469, loss: 0.034028246998786926 2023-01-22 13:19:17.784016: step: 16/469, loss: 0.01717783696949482 2023-01-22 13:19:18.532487: step: 18/469, loss: 0.0826980322599411 2023-01-22 13:19:19.198815: step: 20/469, loss: 0.05500301718711853 2023-01-22 13:19:19.859291: step: 22/469, loss: 0.04233062267303467 2023-01-22 13:19:20.522131: step: 24/469, loss: 0.0036251835990697145 2023-01-22 13:19:21.148126: step: 26/469, loss: 0.09086892753839493 2023-01-22 13:19:21.768056: step: 28/469, loss: 0.019768664613366127 2023-01-22 13:19:22.385597: step: 30/469, loss: 0.016958897933363914 2023-01-22 13:19:22.935882: step: 32/469, loss: 0.015197346918284893 2023-01-22 13:19:23.554865: step: 34/469, loss: 0.01870288886129856 2023-01-22 13:19:24.198696: step: 36/469, loss: 0.03923607990145683 2023-01-22 13:19:24.862655: step: 38/469, loss: 0.01410847157239914 2023-01-22 13:19:25.598471: step: 40/469, loss: 0.055641189217567444 2023-01-22 13:19:26.253217: step: 42/469, loss: 0.012252534739673138 2023-01-22 13:19:26.955207: step: 44/469, loss: 0.32044076919555664 2023-01-22 13:19:27.574517: step: 46/469, loss: 0.018998123705387115 2023-01-22 13:19:28.235087: step: 48/469, loss: 0.08631859719753265 2023-01-22 13:19:28.867672: step: 50/469, loss: 0.019391590729355812 2023-01-22 13:19:29.426711: step: 52/469, loss: 0.013424401171505451 2023-01-22 13:19:30.096994: step: 54/469, loss: 0.00611753948032856 2023-01-22 13:19:30.752702: step: 56/469, loss: 0.007324477192014456 2023-01-22 13:19:31.304898: step: 58/469, loss: 0.007927605882287025 2023-01-22 13:19:31.897414: step: 60/469, loss: 0.023863786831498146 2023-01-22 13:19:32.555595: step: 62/469, loss: 0.15221978724002838 2023-01-22 13:19:33.196023: step: 64/469, loss: 0.0205460824072361 2023-01-22 13:19:33.850631: step: 66/469, loss: 0.05236423760652542 2023-01-22 13:19:34.511674: step: 68/469, loss: 0.05775609612464905 2023-01-22 13:19:35.204505: step: 70/469, loss: 0.013726840727031231 2023-01-22 13:19:35.831352: step: 72/469, loss: 0.003370016347616911 2023-01-22 13:19:36.473128: step: 74/469, loss: 0.03200198709964752 2023-01-22 13:19:37.077192: step: 76/469, loss: 0.0227058008313179 2023-01-22 13:19:37.722770: step: 78/469, loss: 0.1415403038263321 2023-01-22 13:19:38.385545: step: 80/469, loss: 0.01559305377304554 2023-01-22 13:19:39.066516: step: 82/469, loss: 0.026849525049328804 2023-01-22 13:19:39.761293: step: 84/469, loss: 0.0059050279669463634 2023-01-22 13:19:40.364496: step: 86/469, loss: 0.01893472485244274 2023-01-22 13:19:41.024313: step: 88/469, loss: 0.021686840802431107 2023-01-22 13:19:41.696560: step: 90/469, loss: 0.01580534689128399 2023-01-22 13:19:42.399819: step: 92/469, loss: 0.02733035571873188 2023-01-22 13:19:43.058066: step: 94/469, loss: 0.04168585687875748 2023-01-22 13:19:43.680120: step: 96/469, loss: 0.030311888083815575 2023-01-22 13:19:44.353405: step: 98/469, loss: 0.05860369652509689 2023-01-22 13:19:44.961098: step: 100/469, loss: 0.021574096754193306 2023-01-22 13:19:45.621648: step: 102/469, loss: 0.04766014590859413 2023-01-22 13:19:46.175444: step: 104/469, loss: 0.003634591354057193 2023-01-22 13:19:46.791747: step: 106/469, loss: 0.29936927556991577 2023-01-22 13:19:47.410009: step: 108/469, loss: 0.029460735619068146 2023-01-22 13:19:47.972215: step: 110/469, loss: 0.024939091876149178 2023-01-22 13:19:48.577677: step: 112/469, loss: 0.010890143923461437 2023-01-22 13:19:49.211240: step: 114/469, loss: 0.11194773018360138 2023-01-22 13:19:49.888377: step: 116/469, loss: 0.02466609515249729 2023-01-22 13:19:50.447632: step: 118/469, loss: 0.03958394005894661 2023-01-22 13:19:51.175621: step: 120/469, loss: 0.14312510192394257 2023-01-22 13:19:51.797212: step: 122/469, loss: 0.012246928177773952 2023-01-22 13:19:52.370176: step: 124/469, loss: 0.017965009436011314 2023-01-22 13:19:53.000426: step: 126/469, loss: 0.04019022732973099 2023-01-22 13:19:53.602747: step: 128/469, loss: 0.01370336301624775 2023-01-22 13:19:54.184524: step: 130/469, loss: 0.010952872224152088 2023-01-22 13:19:54.805338: step: 132/469, loss: 0.030317997559905052 2023-01-22 13:19:55.446477: step: 134/469, loss: 0.05731196328997612 2023-01-22 13:19:56.074270: step: 136/469, loss: 0.029016038402915 2023-01-22 13:19:56.728540: step: 138/469, loss: 0.0025959822814911604 2023-01-22 13:19:57.425411: step: 140/469, loss: 0.012712554074823856 2023-01-22 13:19:58.073705: step: 142/469, loss: 0.0649077370762825 2023-01-22 13:19:58.716784: step: 144/469, loss: 0.013003380969166756 2023-01-22 13:19:59.328483: step: 146/469, loss: 0.008440962992608547 2023-01-22 13:19:59.916394: step: 148/469, loss: 0.01160487625747919 2023-01-22 13:20:00.552398: step: 150/469, loss: 0.0455317497253418 2023-01-22 13:20:01.173191: step: 152/469, loss: 0.03124481812119484 2023-01-22 13:20:01.741458: step: 154/469, loss: 0.05350888893008232 2023-01-22 13:20:02.385140: step: 156/469, loss: 0.03994572535157204 2023-01-22 13:20:03.003701: step: 158/469, loss: 0.010522032156586647 2023-01-22 13:20:03.626333: step: 160/469, loss: 0.03227447345852852 2023-01-22 13:20:04.191883: step: 162/469, loss: 0.008141381666064262 2023-01-22 13:20:04.781883: step: 164/469, loss: 0.004845090676099062 2023-01-22 13:20:05.421333: step: 166/469, loss: 0.013959623873233795 2023-01-22 13:20:06.034640: step: 168/469, loss: 0.0075279464945197105 2023-01-22 13:20:06.629748: step: 170/469, loss: 0.04734662547707558 2023-01-22 13:20:07.237326: step: 172/469, loss: 0.03917761892080307 2023-01-22 13:20:07.871506: step: 174/469, loss: 0.00599240604788065 2023-01-22 13:20:08.484414: step: 176/469, loss: 0.0036602281033992767 2023-01-22 13:20:09.149109: step: 178/469, loss: 0.013128047809004784 2023-01-22 13:20:09.776073: step: 180/469, loss: 0.029274987056851387 2023-01-22 13:20:10.357191: step: 182/469, loss: 0.021258031949400902 2023-01-22 13:20:11.006265: step: 184/469, loss: 0.006865369621664286 2023-01-22 13:20:11.589419: step: 186/469, loss: 0.027012838050723076 2023-01-22 13:20:12.274652: step: 188/469, loss: 0.10580946505069733 2023-01-22 13:20:12.936850: step: 190/469, loss: 0.11646991968154907 2023-01-22 13:20:13.495901: step: 192/469, loss: 0.02122093364596367 2023-01-22 13:20:14.115272: step: 194/469, loss: 0.007417526561766863 2023-01-22 13:20:14.697928: step: 196/469, loss: 0.00789575930684805 2023-01-22 13:20:15.345608: step: 198/469, loss: 0.004790593404322863 2023-01-22 13:20:16.002204: step: 200/469, loss: 0.6533937454223633 2023-01-22 13:20:16.625606: step: 202/469, loss: 0.29181212186813354 2023-01-22 13:20:17.298282: step: 204/469, loss: 0.023737413808703423 2023-01-22 13:20:17.969963: step: 206/469, loss: 0.07275974005460739 2023-01-22 13:20:18.598935: step: 208/469, loss: 0.05138004943728447 2023-01-22 13:20:19.248235: step: 210/469, loss: 0.05183066427707672 2023-01-22 13:20:19.889852: step: 212/469, loss: 0.01253726426512003 2023-01-22 13:20:20.499619: step: 214/469, loss: 0.021247226744890213 2023-01-22 13:20:21.220886: step: 216/469, loss: 0.022669294849038124 2023-01-22 13:20:21.842081: step: 218/469, loss: 0.020281096920371056 2023-01-22 13:20:22.425990: step: 220/469, loss: 0.005160744767636061 2023-01-22 13:20:23.041792: step: 222/469, loss: 0.11030631512403488 2023-01-22 13:20:23.800565: step: 224/469, loss: 0.02367747761309147 2023-01-22 13:20:24.535196: step: 226/469, loss: 0.012560294009745121 2023-01-22 13:20:25.199507: step: 228/469, loss: 0.007990298792719841 2023-01-22 13:20:25.750005: step: 230/469, loss: 0.06881830096244812 2023-01-22 13:20:26.501787: step: 232/469, loss: 0.010329904966056347 2023-01-22 13:20:27.150089: step: 234/469, loss: 0.006145569961518049 2023-01-22 13:20:27.705733: step: 236/469, loss: 0.0172202717512846 2023-01-22 13:20:28.351226: step: 238/469, loss: 0.0024612518027424812 2023-01-22 13:20:28.932905: step: 240/469, loss: 0.008188467472791672 2023-01-22 13:20:29.611677: step: 242/469, loss: 0.20481272041797638 2023-01-22 13:20:30.170317: step: 244/469, loss: 0.014653651043772697 2023-01-22 13:20:30.928693: step: 246/469, loss: 0.28711986541748047 2023-01-22 13:20:31.588617: step: 248/469, loss: 0.005164872854948044 2023-01-22 13:20:32.222863: step: 250/469, loss: 0.0071144504472613335 2023-01-22 13:20:32.800370: step: 252/469, loss: 0.0035280201118439436 2023-01-22 13:20:33.458317: step: 254/469, loss: 0.0887732207775116 2023-01-22 13:20:34.030399: step: 256/469, loss: 0.02170315757393837 2023-01-22 13:20:34.627403: step: 258/469, loss: 0.02189589850604534 2023-01-22 13:20:35.261089: step: 260/469, loss: 0.042867522686719894 2023-01-22 13:20:35.895041: step: 262/469, loss: 0.09971924126148224 2023-01-22 13:20:36.592869: step: 264/469, loss: 0.26245683431625366 2023-01-22 13:20:37.190003: step: 266/469, loss: 0.05709851160645485 2023-01-22 13:20:37.802926: step: 268/469, loss: 0.03264932334423065 2023-01-22 13:20:38.481720: step: 270/469, loss: 0.015331503935158253 2023-01-22 13:20:39.083391: step: 272/469, loss: 0.02993532083928585 2023-01-22 13:20:39.694297: step: 274/469, loss: 0.08994031697511673 2023-01-22 13:20:40.283004: step: 276/469, loss: 0.027369476854801178 2023-01-22 13:20:40.907988: step: 278/469, loss: 0.026092637330293655 2023-01-22 13:20:41.508629: step: 280/469, loss: 0.013567878864705563 2023-01-22 13:20:42.219178: step: 282/469, loss: 0.037978872656822205 2023-01-22 13:20:42.841820: step: 284/469, loss: 0.00607453566044569 2023-01-22 13:20:43.459443: step: 286/469, loss: 0.020827503874897957 2023-01-22 13:20:44.163506: step: 288/469, loss: 0.03754419833421707 2023-01-22 13:20:44.746428: step: 290/469, loss: 0.12021613866090775 2023-01-22 13:20:45.346067: step: 292/469, loss: 0.03460229933261871 2023-01-22 13:20:45.981925: step: 294/469, loss: 0.3516269624233246 2023-01-22 13:20:46.634590: step: 296/469, loss: 0.027158131822943687 2023-01-22 13:20:47.203875: step: 298/469, loss: 0.004540742374956608 2023-01-22 13:20:47.840614: step: 300/469, loss: 0.03442658856511116 2023-01-22 13:20:48.448417: step: 302/469, loss: 0.01600290648639202 2023-01-22 13:20:49.045482: step: 304/469, loss: 0.07805667072534561 2023-01-22 13:20:49.648683: step: 306/469, loss: 0.022994069382548332 2023-01-22 13:20:50.337661: step: 308/469, loss: 0.010473649017512798 2023-01-22 13:20:50.951231: step: 310/469, loss: 0.0978112742304802 2023-01-22 13:20:51.625241: step: 312/469, loss: 0.024772867560386658 2023-01-22 13:20:52.250174: step: 314/469, loss: 0.03395106643438339 2023-01-22 13:20:52.908042: step: 316/469, loss: 0.1460859328508377 2023-01-22 13:20:53.532253: step: 318/469, loss: 0.02039787359535694 2023-01-22 13:20:54.127611: step: 320/469, loss: 0.011497749015688896 2023-01-22 13:20:54.794107: step: 322/469, loss: 0.018590491265058517 2023-01-22 13:20:55.386429: step: 324/469, loss: 0.07504917681217194 2023-01-22 13:20:55.962315: step: 326/469, loss: 0.02037842757999897 2023-01-22 13:20:56.618238: step: 328/469, loss: 0.028747988864779472 2023-01-22 13:20:57.260775: step: 330/469, loss: 0.027681805193424225 2023-01-22 13:20:57.894537: step: 332/469, loss: 0.004579153377562761 2023-01-22 13:20:58.564797: step: 334/469, loss: 0.008732077665627003 2023-01-22 13:20:59.157052: step: 336/469, loss: 0.014567781239748001 2023-01-22 13:20:59.764560: step: 338/469, loss: 0.09077747166156769 2023-01-22 13:21:00.394349: step: 340/469, loss: 0.026898130774497986 2023-01-22 13:21:00.969505: step: 342/469, loss: 0.02545371651649475 2023-01-22 13:21:01.553058: step: 344/469, loss: 0.01812591962516308 2023-01-22 13:21:02.281649: step: 346/469, loss: 0.03316383808851242 2023-01-22 13:21:02.940360: step: 348/469, loss: 0.023934369906783104 2023-01-22 13:21:03.490982: step: 350/469, loss: 0.05916007608175278 2023-01-22 13:21:04.149426: step: 352/469, loss: 0.07270391285419464 2023-01-22 13:21:04.749219: step: 354/469, loss: 0.024173913523554802 2023-01-22 13:21:05.463837: step: 356/469, loss: 0.09916363656520844 2023-01-22 13:21:06.075035: step: 358/469, loss: 0.019212665036320686 2023-01-22 13:21:06.819653: step: 360/469, loss: 0.0030976892448961735 2023-01-22 13:21:07.488902: step: 362/469, loss: 0.0708962231874466 2023-01-22 13:21:08.139923: step: 364/469, loss: 0.09258852899074554 2023-01-22 13:21:08.722594: step: 366/469, loss: 0.012380612082779408 2023-01-22 13:21:09.365959: step: 368/469, loss: 0.04000595211982727 2023-01-22 13:21:10.010294: step: 370/469, loss: 0.03125681355595589 2023-01-22 13:21:10.612474: step: 372/469, loss: 0.029831333085894585 2023-01-22 13:21:11.231786: step: 374/469, loss: 0.02931158058345318 2023-01-22 13:21:11.833244: step: 376/469, loss: 0.12362349033355713 2023-01-22 13:21:12.499252: step: 378/469, loss: 0.1478091925382614 2023-01-22 13:21:13.134229: step: 380/469, loss: 0.011295045726001263 2023-01-22 13:21:13.753631: step: 382/469, loss: 0.00720637571066618 2023-01-22 13:21:14.356046: step: 384/469, loss: 7.650078296661377 2023-01-22 13:21:15.030449: step: 386/469, loss: 0.05155227705836296 2023-01-22 13:21:15.673671: step: 388/469, loss: 0.040322039276361465 2023-01-22 13:21:16.399354: step: 390/469, loss: 0.05368424952030182 2023-01-22 13:21:17.094861: step: 392/469, loss: 0.008382214233279228 2023-01-22 13:21:17.644719: step: 394/469, loss: 0.0029755679424852133 2023-01-22 13:21:18.259098: step: 396/469, loss: 0.09491639584302902 2023-01-22 13:21:18.871783: step: 398/469, loss: 0.0021750153973698616 2023-01-22 13:21:19.459249: step: 400/469, loss: 0.01303351204842329 2023-01-22 13:21:20.103177: step: 402/469, loss: 0.02852119877934456 2023-01-22 13:21:20.764980: step: 404/469, loss: 0.16635991632938385 2023-01-22 13:21:21.414683: step: 406/469, loss: 0.019529711455106735 2023-01-22 13:21:21.987703: step: 408/469, loss: 0.0358879417181015 2023-01-22 13:21:22.579404: step: 410/469, loss: 0.46126437187194824 2023-01-22 13:21:23.178550: step: 412/469, loss: 0.03125284984707832 2023-01-22 13:21:23.809334: step: 414/469, loss: 0.003533415962010622 2023-01-22 13:21:24.432315: step: 416/469, loss: 0.08269832283258438 2023-01-22 13:21:25.062230: step: 418/469, loss: 0.01909209042787552 2023-01-22 13:21:25.672236: step: 420/469, loss: 0.009479396045207977 2023-01-22 13:21:26.380267: step: 422/469, loss: 0.014674525707960129 2023-01-22 13:21:27.019370: step: 424/469, loss: 0.202433779835701 2023-01-22 13:21:27.674489: step: 426/469, loss: 0.03065209463238716 2023-01-22 13:21:28.279227: step: 428/469, loss: 0.007127057760953903 2023-01-22 13:21:28.852098: step: 430/469, loss: 0.004880867432802916 2023-01-22 13:21:29.482607: step: 432/469, loss: 0.030944108963012695 2023-01-22 13:21:30.128833: step: 434/469, loss: 0.7026104927062988 2023-01-22 13:21:30.776921: step: 436/469, loss: 0.036813121289014816 2023-01-22 13:21:31.454981: step: 438/469, loss: 0.05058964714407921 2023-01-22 13:21:32.025682: step: 440/469, loss: 0.025473378598690033 2023-01-22 13:21:32.559044: step: 442/469, loss: 0.02426227368414402 2023-01-22 13:21:33.200312: step: 444/469, loss: 0.02645028941333294 2023-01-22 13:21:33.786602: step: 446/469, loss: 0.4938548803329468 2023-01-22 13:21:34.426558: step: 448/469, loss: 0.022870637476444244 2023-01-22 13:21:35.002697: step: 450/469, loss: 0.0035457240883260965 2023-01-22 13:21:35.624063: step: 452/469, loss: 0.06122088059782982 2023-01-22 13:21:36.261317: step: 454/469, loss: 0.0231733750551939 2023-01-22 13:21:36.903376: step: 456/469, loss: 0.032441675662994385 2023-01-22 13:21:37.576877: step: 458/469, loss: 0.013426247052848339 2023-01-22 13:21:38.199187: step: 460/469, loss: 0.013612991198897362 2023-01-22 13:21:38.834005: step: 462/469, loss: 0.027152452617883682 2023-01-22 13:21:39.533859: step: 464/469, loss: 0.02933356910943985 2023-01-22 13:21:40.268195: step: 466/469, loss: 0.13497425615787506 2023-01-22 13:21:40.797833: step: 468/469, loss: 0.05727699398994446 2023-01-22 13:21:41.423775: step: 470/469, loss: 0.017669206485152245 2023-01-22 13:21:42.081317: step: 472/469, loss: 0.07330697774887085 2023-01-22 13:21:42.756392: step: 474/469, loss: 0.07063091546297073 2023-01-22 13:21:43.459914: step: 476/469, loss: 0.010941785760223866 2023-01-22 13:21:44.037677: step: 478/469, loss: 0.0388040654361248 2023-01-22 13:21:44.731374: step: 480/469, loss: 0.008385878056287766 2023-01-22 13:21:45.420963: step: 482/469, loss: 0.040492381900548935 2023-01-22 13:21:46.047284: step: 484/469, loss: 0.017449242994189262 2023-01-22 13:21:46.712960: step: 486/469, loss: 0.016774604097008705 2023-01-22 13:21:47.347711: step: 488/469, loss: 0.009150327183306217 2023-01-22 13:21:47.941670: step: 490/469, loss: 0.015866735950112343 2023-01-22 13:21:48.535195: step: 492/469, loss: 0.0018570665270090103 2023-01-22 13:21:49.180112: step: 494/469, loss: 0.03161801025271416 2023-01-22 13:21:49.848654: step: 496/469, loss: 0.14599068462848663 2023-01-22 13:21:50.478732: step: 498/469, loss: 0.061882343143224716 2023-01-22 13:21:51.146166: step: 500/469, loss: 0.016102727502584457 2023-01-22 13:21:51.700938: step: 502/469, loss: 0.09188220649957657 2023-01-22 13:21:52.365166: step: 504/469, loss: 0.04177233204245567 2023-01-22 13:21:52.949059: step: 506/469, loss: 0.02355528436601162 2023-01-22 13:21:53.556162: step: 508/469, loss: 0.052326660603284836 2023-01-22 13:21:54.246048: step: 510/469, loss: 0.03700276464223862 2023-01-22 13:21:54.948053: step: 512/469, loss: 0.022019846364855766 2023-01-22 13:21:55.515335: step: 514/469, loss: 0.07263131439685822 2023-01-22 13:21:56.220719: step: 516/469, loss: 0.01748010143637657 2023-01-22 13:21:56.853967: step: 518/469, loss: 0.062186408787965775 2023-01-22 13:21:57.453976: step: 520/469, loss: 0.004920444451272488 2023-01-22 13:21:58.088663: step: 522/469, loss: 0.0015854996163398027 2023-01-22 13:21:58.739695: step: 524/469, loss: 0.041216686367988586 2023-01-22 13:21:59.405546: step: 526/469, loss: 0.14086870849132538 2023-01-22 13:22:00.050470: step: 528/469, loss: 0.028211597353219986 2023-01-22 13:22:00.655799: step: 530/469, loss: 0.5521446466445923 2023-01-22 13:22:01.300023: step: 532/469, loss: 0.13783621788024902 2023-01-22 13:22:01.875657: step: 534/469, loss: 0.008081533946096897 2023-01-22 13:22:02.543999: step: 536/469, loss: 0.002966032363474369 2023-01-22 13:22:03.133849: step: 538/469, loss: 0.3086635172367096 2023-01-22 13:22:03.757200: step: 540/469, loss: 0.022557653486728668 2023-01-22 13:22:04.460824: step: 542/469, loss: 0.013709179125726223 2023-01-22 13:22:05.049470: step: 544/469, loss: 0.11992644518613815 2023-01-22 13:22:05.677253: step: 546/469, loss: 0.029493257403373718 2023-01-22 13:22:06.328041: step: 548/469, loss: 0.042251765727996826 2023-01-22 13:22:06.936144: step: 550/469, loss: 0.018960410729050636 2023-01-22 13:22:07.524345: step: 552/469, loss: 0.0232393816113472 2023-01-22 13:22:08.077647: step: 554/469, loss: 0.028066672384738922 2023-01-22 13:22:08.807956: step: 556/469, loss: 0.010841749608516693 2023-01-22 13:22:09.352520: step: 558/469, loss: 0.011582441627979279 2023-01-22 13:22:09.937426: step: 560/469, loss: 0.03709997981786728 2023-01-22 13:22:10.573987: step: 562/469, loss: 0.280786395072937 2023-01-22 13:22:11.186839: step: 564/469, loss: 0.02346476912498474 2023-01-22 13:22:11.800864: step: 566/469, loss: 0.12232447415590286 2023-01-22 13:22:12.512226: step: 568/469, loss: 0.020489519461989403 2023-01-22 13:22:13.198513: step: 570/469, loss: 0.2667163014411926 2023-01-22 13:22:13.784968: step: 572/469, loss: 0.012609031982719898 2023-01-22 13:22:14.347177: step: 574/469, loss: 0.02504895254969597 2023-01-22 13:22:14.998098: step: 576/469, loss: 0.04171283543109894 2023-01-22 13:22:15.551178: step: 578/469, loss: 0.022249821573495865 2023-01-22 13:22:16.142791: step: 580/469, loss: 0.023193230852484703 2023-01-22 13:22:16.735921: step: 582/469, loss: 0.010279330424964428 2023-01-22 13:22:17.332709: step: 584/469, loss: 0.03859834745526314 2023-01-22 13:22:18.027968: step: 586/469, loss: 0.020164670422673225 2023-01-22 13:22:18.742303: step: 588/469, loss: 0.12396000325679779 2023-01-22 13:22:19.455586: step: 590/469, loss: 0.778117835521698 2023-01-22 13:22:20.047504: step: 592/469, loss: 0.016898328438401222 2023-01-22 13:22:20.700998: step: 594/469, loss: 0.12879402935504913 2023-01-22 13:22:21.224376: step: 596/469, loss: 0.0073245819658041 2023-01-22 13:22:21.827351: step: 598/469, loss: 0.09994330257177353 2023-01-22 13:22:22.526392: step: 600/469, loss: 0.09125112742185593 2023-01-22 13:22:23.107731: step: 602/469, loss: 0.010347902774810791 2023-01-22 13:22:23.740901: step: 604/469, loss: 0.042560476809740067 2023-01-22 13:22:24.331805: step: 606/469, loss: 0.005342626478523016 2023-01-22 13:22:24.936263: step: 608/469, loss: 0.0492556095123291 2023-01-22 13:22:25.627551: step: 610/469, loss: 0.09419920295476913 2023-01-22 13:22:26.257945: step: 612/469, loss: 0.13855132460594177 2023-01-22 13:22:26.883258: step: 614/469, loss: 0.002914676209911704 2023-01-22 13:22:27.489375: step: 616/469, loss: 0.37900644540786743 2023-01-22 13:22:28.152226: step: 618/469, loss: 0.006001289002597332 2023-01-22 13:22:28.684527: step: 620/469, loss: 0.02134389989078045 2023-01-22 13:22:29.314239: step: 622/469, loss: 0.03038940764963627 2023-01-22 13:22:29.944605: step: 624/469, loss: 0.038485899567604065 2023-01-22 13:22:30.552536: step: 626/469, loss: 0.03308103606104851 2023-01-22 13:22:31.200536: step: 628/469, loss: 0.03451153263449669 2023-01-22 13:22:31.715709: step: 630/469, loss: 0.023783240467309952 2023-01-22 13:22:32.335991: step: 632/469, loss: 0.022690072655677795 2023-01-22 13:22:32.966172: step: 634/469, loss: 0.02024712786078453 2023-01-22 13:22:33.558172: step: 636/469, loss: 0.0010295177344232798 2023-01-22 13:22:34.282613: step: 638/469, loss: 0.0438709557056427 2023-01-22 13:22:34.897778: step: 640/469, loss: 0.055278677493333817 2023-01-22 13:22:35.489524: step: 642/469, loss: 0.01443219929933548 2023-01-22 13:22:36.157859: step: 644/469, loss: 0.023407230153679848 2023-01-22 13:22:36.716599: step: 646/469, loss: 0.00030136972782202065 2023-01-22 13:22:37.339022: step: 648/469, loss: 0.05775486305356026 2023-01-22 13:22:37.951151: step: 650/469, loss: 0.12905217707157135 2023-01-22 13:22:38.537711: step: 652/469, loss: 0.04188220947980881 2023-01-22 13:22:39.168084: step: 654/469, loss: 0.11069829016923904 2023-01-22 13:22:39.811319: step: 656/469, loss: 0.03923364728689194 2023-01-22 13:22:40.414860: step: 658/469, loss: 0.0034383300226181746 2023-01-22 13:22:41.095546: step: 660/469, loss: 0.04181593656539917 2023-01-22 13:22:41.735880: step: 662/469, loss: 0.04227343201637268 2023-01-22 13:22:42.342344: step: 664/469, loss: 0.007153121288865805 2023-01-22 13:22:42.975326: step: 666/469, loss: 0.023828310891985893 2023-01-22 13:22:43.608866: step: 668/469, loss: 0.03760472312569618 2023-01-22 13:22:44.304762: step: 670/469, loss: 0.07202967256307602 2023-01-22 13:22:44.943575: step: 672/469, loss: 0.0027448469772934914 2023-01-22 13:22:45.567218: step: 674/469, loss: 0.14921295642852783 2023-01-22 13:22:46.227139: step: 676/469, loss: 0.047992900013923645 2023-01-22 13:22:46.852226: step: 678/469, loss: 0.017281245440244675 2023-01-22 13:22:47.436231: step: 680/469, loss: 0.005759479012340307 2023-01-22 13:22:48.051692: step: 682/469, loss: 0.03225100040435791 2023-01-22 13:22:48.633426: step: 684/469, loss: 0.019822152331471443 2023-01-22 13:22:49.270471: step: 686/469, loss: 0.027294326573610306 2023-01-22 13:22:49.975545: step: 688/469, loss: 0.04073766991496086 2023-01-22 13:22:50.589798: step: 690/469, loss: 0.007256670854985714 2023-01-22 13:22:51.200273: step: 692/469, loss: 0.10709620267152786 2023-01-22 13:22:51.873854: step: 694/469, loss: 0.05638296529650688 2023-01-22 13:22:52.437373: step: 696/469, loss: 0.003282777965068817 2023-01-22 13:22:53.083687: step: 698/469, loss: 0.09598791599273682 2023-01-22 13:22:53.730322: step: 700/469, loss: 0.0482390932738781 2023-01-22 13:22:54.279345: step: 702/469, loss: 0.01908688433468342 2023-01-22 13:22:54.941597: step: 704/469, loss: 0.00163744087330997 2023-01-22 13:22:55.515916: step: 706/469, loss: 0.02473783679306507 2023-01-22 13:22:56.180084: step: 708/469, loss: 0.020040497183799744 2023-01-22 13:22:56.802058: step: 710/469, loss: 0.024155493825674057 2023-01-22 13:22:57.543988: step: 712/469, loss: 0.03475001081824303 2023-01-22 13:22:58.123076: step: 714/469, loss: 0.10461072623729706 2023-01-22 13:22:58.737107: step: 716/469, loss: 0.0498565211892128 2023-01-22 13:22:59.365147: step: 718/469, loss: 0.04575449228286743 2023-01-22 13:22:59.987920: step: 720/469, loss: 0.014970526099205017 2023-01-22 13:23:00.622621: step: 722/469, loss: 0.2002747803926468 2023-01-22 13:23:01.268457: step: 724/469, loss: 0.05371290445327759 2023-01-22 13:23:01.899363: step: 726/469, loss: 0.039565615355968475 2023-01-22 13:23:02.548366: step: 728/469, loss: 0.016365688294172287 2023-01-22 13:23:03.155491: step: 730/469, loss: 0.05834020674228668 2023-01-22 13:23:03.766694: step: 732/469, loss: 0.009845305234193802 2023-01-22 13:23:04.350632: step: 734/469, loss: 0.041016723960638046 2023-01-22 13:23:04.974336: step: 736/469, loss: 0.015585620887577534 2023-01-22 13:23:05.644566: step: 738/469, loss: 0.16015508770942688 2023-01-22 13:23:06.264495: step: 740/469, loss: 0.14360953867435455 2023-01-22 13:23:06.924633: step: 742/469, loss: 0.10342170298099518 2023-01-22 13:23:07.537827: step: 744/469, loss: 0.013389134779572487 2023-01-22 13:23:08.201557: step: 746/469, loss: 0.11928045749664307 2023-01-22 13:23:08.812619: step: 748/469, loss: 0.05421704798936844 2023-01-22 13:23:09.434493: step: 750/469, loss: 0.5256090760231018 2023-01-22 13:23:10.050571: step: 752/469, loss: 0.023640748113393784 2023-01-22 13:23:10.678584: step: 754/469, loss: 0.037213560193777084 2023-01-22 13:23:11.351041: step: 756/469, loss: 0.018608342856168747 2023-01-22 13:23:12.026548: step: 758/469, loss: 0.0015264227986335754 2023-01-22 13:23:12.616730: step: 760/469, loss: 0.04242260754108429 2023-01-22 13:23:13.228258: step: 762/469, loss: 0.7380765676498413 2023-01-22 13:23:13.962204: step: 764/469, loss: 0.0545666366815567 2023-01-22 13:23:14.654396: step: 766/469, loss: 0.1233232393860817 2023-01-22 13:23:15.291283: step: 768/469, loss: 0.7233437895774841 2023-01-22 13:23:15.907576: step: 770/469, loss: 0.00846213661134243 2023-01-22 13:23:16.546335: step: 772/469, loss: 0.025160931050777435 2023-01-22 13:23:17.180441: step: 774/469, loss: 0.023392528295516968 2023-01-22 13:23:17.819531: step: 776/469, loss: 0.09777527302503586 2023-01-22 13:23:18.382484: step: 778/469, loss: 0.03297801688313484 2023-01-22 13:23:19.003100: step: 780/469, loss: 0.03944766893982887 2023-01-22 13:23:19.608743: step: 782/469, loss: 0.05732549726963043 2023-01-22 13:23:20.220022: step: 784/469, loss: 0.015613374300301075 2023-01-22 13:23:20.856533: step: 786/469, loss: 0.04868251085281372 2023-01-22 13:23:21.466405: step: 788/469, loss: 0.03899196535348892 2023-01-22 13:23:22.057929: step: 790/469, loss: 0.036485206335783005 2023-01-22 13:23:22.688018: step: 792/469, loss: 0.03200719133019447 2023-01-22 13:23:23.342050: step: 794/469, loss: 0.01654466614127159 2023-01-22 13:23:23.962737: step: 796/469, loss: 0.05316543206572533 2023-01-22 13:23:24.540946: step: 798/469, loss: 0.0019444635836407542 2023-01-22 13:23:25.138717: step: 800/469, loss: 0.06956598162651062 2023-01-22 13:23:25.758657: step: 802/469, loss: 0.015713702887296677 2023-01-22 13:23:26.478577: step: 804/469, loss: 0.018174387514591217 2023-01-22 13:23:27.109278: step: 806/469, loss: 0.034022312611341476 2023-01-22 13:23:27.702789: step: 808/469, loss: 0.009265918284654617 2023-01-22 13:23:28.335696: step: 810/469, loss: 0.04342231526970863 2023-01-22 13:23:28.976967: step: 812/469, loss: 0.19386352598667145 2023-01-22 13:23:29.594393: step: 814/469, loss: 0.0654096007347107 2023-01-22 13:23:30.242678: step: 816/469, loss: 0.03987232968211174 2023-01-22 13:23:30.886889: step: 818/469, loss: 0.1549762338399887 2023-01-22 13:23:31.528421: step: 820/469, loss: 0.015902718529105186 2023-01-22 13:23:32.145715: step: 822/469, loss: 0.012863630428910255 2023-01-22 13:23:32.774864: step: 824/469, loss: 0.0011162246810272336 2023-01-22 13:23:33.364626: step: 826/469, loss: 0.022756634280085564 2023-01-22 13:23:33.979168: step: 828/469, loss: 0.04327286779880524 2023-01-22 13:23:34.537588: step: 830/469, loss: 0.006346752867102623 2023-01-22 13:23:35.206149: step: 832/469, loss: 0.029777828603982925 2023-01-22 13:23:35.865524: step: 834/469, loss: 0.05840518698096275 2023-01-22 13:23:36.514506: step: 836/469, loss: 0.03870542719960213 2023-01-22 13:23:37.210237: step: 838/469, loss: 0.07472429424524307 2023-01-22 13:23:37.816084: step: 840/469, loss: 0.02006162516772747 2023-01-22 13:23:38.501370: step: 842/469, loss: 0.030478358268737793 2023-01-22 13:23:39.120171: step: 844/469, loss: 0.04837769269943237 2023-01-22 13:23:39.751523: step: 846/469, loss: 0.022878864780068398 2023-01-22 13:23:40.326294: step: 848/469, loss: 0.007214287295937538 2023-01-22 13:23:40.907710: step: 850/469, loss: 0.025364549830555916 2023-01-22 13:23:41.506881: step: 852/469, loss: 0.053298626095056534 2023-01-22 13:23:42.144312: step: 854/469, loss: 0.07309738546609879 2023-01-22 13:23:42.759711: step: 856/469, loss: 0.025104938074946404 2023-01-22 13:23:43.392972: step: 858/469, loss: 0.03455242142081261 2023-01-22 13:23:44.011934: step: 860/469, loss: 0.008442571386694908 2023-01-22 13:23:44.658321: step: 862/469, loss: 0.04814957454800606 2023-01-22 13:23:45.352184: step: 864/469, loss: 0.008963426575064659 2023-01-22 13:23:45.937600: step: 866/469, loss: 0.09332709014415741 2023-01-22 13:23:46.569640: step: 868/469, loss: 0.019229233264923096 2023-01-22 13:23:47.152817: step: 870/469, loss: 0.07128330320119858 2023-01-22 13:23:47.711552: step: 872/469, loss: 0.035470034927129745 2023-01-22 13:23:48.366184: step: 874/469, loss: 0.0392182283103466 2023-01-22 13:23:48.966654: step: 876/469, loss: 0.0073399618268013 2023-01-22 13:23:49.582276: step: 878/469, loss: 0.035065118223428726 2023-01-22 13:23:50.183060: step: 880/469, loss: 0.005196990445256233 2023-01-22 13:23:50.842469: step: 882/469, loss: 0.0597517192363739 2023-01-22 13:23:51.508854: step: 884/469, loss: 0.0620705671608448 2023-01-22 13:23:52.208525: step: 886/469, loss: 0.028478804975748062 2023-01-22 13:23:52.793087: step: 888/469, loss: 0.06184910610318184 2023-01-22 13:23:53.432986: step: 890/469, loss: 0.04905041679739952 2023-01-22 13:23:54.125419: step: 892/469, loss: 0.007581349927932024 2023-01-22 13:23:54.766845: step: 894/469, loss: 0.017444944009184837 2023-01-22 13:23:55.392805: step: 896/469, loss: 0.011200396344065666 2023-01-22 13:23:56.058198: step: 898/469, loss: 0.04748990014195442 2023-01-22 13:23:56.676498: step: 900/469, loss: 0.04625925421714783 2023-01-22 13:23:57.298539: step: 902/469, loss: 0.07253258675336838 2023-01-22 13:23:57.892898: step: 904/469, loss: 0.00748829310759902 2023-01-22 13:23:58.533716: step: 906/469, loss: 0.22255004942417145 2023-01-22 13:23:59.165089: step: 908/469, loss: 0.043984923511743546 2023-01-22 13:23:59.780656: step: 910/469, loss: 0.02163475565612316 2023-01-22 13:24:00.371534: step: 912/469, loss: 0.2576966881752014 2023-01-22 13:24:01.076012: step: 914/469, loss: 0.07093063741922379 2023-01-22 13:24:01.736792: step: 916/469, loss: 0.2424660623073578 2023-01-22 13:24:02.321737: step: 918/469, loss: 0.067013680934906 2023-01-22 13:24:02.934551: step: 920/469, loss: 0.00853918306529522 2023-01-22 13:24:03.545839: step: 922/469, loss: 0.18369174003601074 2023-01-22 13:24:04.225600: step: 924/469, loss: 0.011981997638940811 2023-01-22 13:24:04.837690: step: 926/469, loss: 0.07305097579956055 2023-01-22 13:24:05.386849: step: 928/469, loss: 0.0101209981366992 2023-01-22 13:24:05.975992: step: 930/469, loss: 0.013684126548469067 2023-01-22 13:24:06.576690: step: 932/469, loss: 0.05986110866069794 2023-01-22 13:24:07.233065: step: 934/469, loss: 0.12811024487018585 2023-01-22 13:24:07.838934: step: 936/469, loss: 0.043509408831596375 2023-01-22 13:24:08.464384: step: 938/469, loss: 0.00022222570260055363 ================================================== Loss: 0.075 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29060799455825864, 'r': 0.3132169656718993, 'f1': 0.301489207139892}, 'combined': 0.2221499421030783, 'epoch': 24} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.29382331391284633, 'r': 0.25753223671409586, 'f1': 0.27448340782887054}, 'combined': 0.1497182224521112, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28564270703611455, 'r': 0.3165376487838537, 'f1': 0.30029764340070364}, 'combined': 0.22127194776893952, 'epoch': 24} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.29245745956601893, 'r': 0.25446207140648125, 'f1': 0.2721399648212173}, 'combined': 0.14843998081157306, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28430566255778117, 'r': 0.31829286700017245, 'f1': 0.3003408073573695}, 'combined': 0.22130375278964068, 'epoch': 24} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.2938176818864943, 'r': 0.25645202975271325, 'f1': 0.2738662125253694}, 'combined': 0.1493815704683833, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2569444444444444, 'r': 0.35238095238095235, 'f1': 0.2971887550200803}, 'combined': 0.1981258366800535, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.20270270270270271, 'r': 0.32608695652173914, 'f1': 0.25}, 'combined': 0.125, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4305555555555556, 'r': 0.2672413793103448, 'f1': 0.3297872340425532}, 'combined': 0.2198581560283688, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 25 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:26:57.118054: step: 2/469, loss: 0.04856976494193077 2023-01-22 13:26:57.794737: step: 4/469, loss: 0.02147574909031391 2023-01-22 13:26:58.454165: step: 6/469, loss: 0.042533762753009796 2023-01-22 13:26:59.012287: step: 8/469, loss: 0.026998911052942276 2023-01-22 13:26:59.577154: step: 10/469, loss: 0.34051206707954407 2023-01-22 13:27:00.241991: step: 12/469, loss: 0.01712040603160858 2023-01-22 13:27:00.830397: step: 14/469, loss: 0.015525841154158115 2023-01-22 13:27:01.443808: step: 16/469, loss: 0.03011523000895977 2023-01-22 13:27:02.079397: step: 18/469, loss: 0.05591309815645218 2023-01-22 13:27:02.742267: step: 20/469, loss: 0.04188614338636398 2023-01-22 13:27:03.372079: step: 22/469, loss: 0.0298788920044899 2023-01-22 13:27:04.003571: step: 24/469, loss: 0.06907416880130768 2023-01-22 13:27:04.652840: step: 26/469, loss: 0.03653291240334511 2023-01-22 13:27:05.260871: step: 28/469, loss: 0.5556086301803589 2023-01-22 13:27:05.879975: step: 30/469, loss: 0.00822235643863678 2023-01-22 13:27:06.523382: step: 32/469, loss: 0.06540173292160034 2023-01-22 13:27:07.176439: step: 34/469, loss: 0.011102447286248207 2023-01-22 13:27:07.792064: step: 36/469, loss: 0.013518033549189568 2023-01-22 13:27:08.417251: step: 38/469, loss: 0.006230557803064585 2023-01-22 13:27:09.113334: step: 40/469, loss: 0.10722088813781738 2023-01-22 13:27:09.738606: step: 42/469, loss: 0.0031198596116155386 2023-01-22 13:27:10.353908: step: 44/469, loss: 0.0341133289039135 2023-01-22 13:27:10.992790: step: 46/469, loss: 0.0076862904243171215 2023-01-22 13:27:11.586415: step: 48/469, loss: 0.016710253432393074 2023-01-22 13:27:12.255047: step: 50/469, loss: 0.021328141912817955 2023-01-22 13:27:12.861452: step: 52/469, loss: 0.009631364606320858 2023-01-22 13:27:13.504984: step: 54/469, loss: 0.01578831858932972 2023-01-22 13:27:14.162601: step: 56/469, loss: 0.0020905991550534964 2023-01-22 13:27:14.862288: step: 58/469, loss: 0.42499473690986633 2023-01-22 13:27:15.483658: step: 60/469, loss: 0.029958128929138184 2023-01-22 13:27:16.107796: step: 62/469, loss: 0.030102305114269257 2023-01-22 13:27:16.704789: step: 64/469, loss: 0.053664810955524445 2023-01-22 13:27:17.287651: step: 66/469, loss: 0.01745413988828659 2023-01-22 13:27:17.866985: step: 68/469, loss: 0.08032667636871338 2023-01-22 13:27:18.493311: step: 70/469, loss: 0.06182575970888138 2023-01-22 13:27:19.109829: step: 72/469, loss: 0.005048101767897606 2023-01-22 13:27:19.748096: step: 74/469, loss: 0.05123775824904442 2023-01-22 13:27:20.361601: step: 76/469, loss: 4.262962341308594 2023-01-22 13:27:21.024555: step: 78/469, loss: 0.01811578869819641 2023-01-22 13:27:21.591164: step: 80/469, loss: 0.020840588957071304 2023-01-22 13:27:22.260205: step: 82/469, loss: 0.007778041996061802 2023-01-22 13:27:22.843500: step: 84/469, loss: 0.09622455388307571 2023-01-22 13:27:23.501224: step: 86/469, loss: 0.04122927784919739 2023-01-22 13:27:24.083842: step: 88/469, loss: 0.051051631569862366 2023-01-22 13:27:24.728275: step: 90/469, loss: 0.017610173672437668 2023-01-22 13:27:25.332360: step: 92/469, loss: 0.03190859407186508 2023-01-22 13:27:25.977352: step: 94/469, loss: 0.005970291793346405 2023-01-22 13:27:26.543694: step: 96/469, loss: 0.05467331036925316 2023-01-22 13:27:27.158311: step: 98/469, loss: 0.12147871404886246 2023-01-22 13:27:27.789882: step: 100/469, loss: 0.01909748837351799 2023-01-22 13:27:28.316078: step: 102/469, loss: 0.04550115019083023 2023-01-22 13:27:28.962535: step: 104/469, loss: 0.034840092062950134 2023-01-22 13:27:29.571926: step: 106/469, loss: 0.005601534619927406 2023-01-22 13:27:30.313969: step: 108/469, loss: 0.0599069744348526 2023-01-22 13:27:30.904697: step: 110/469, loss: 0.0008288627723231912 2023-01-22 13:27:31.504246: step: 112/469, loss: 0.04758596792817116 2023-01-22 13:27:32.134949: step: 114/469, loss: 0.2026187628507614 2023-01-22 13:27:32.683291: step: 116/469, loss: 0.02390963025391102 2023-01-22 13:27:33.274061: step: 118/469, loss: 0.007931917905807495 2023-01-22 13:27:33.992699: step: 120/469, loss: 0.04644008353352547 2023-01-22 13:27:34.602272: step: 122/469, loss: 0.03678201138973236 2023-01-22 13:27:35.230088: step: 124/469, loss: 0.017658861353993416 2023-01-22 13:27:35.849073: step: 126/469, loss: 0.023343989625573158 2023-01-22 13:27:36.522349: step: 128/469, loss: 0.03157182037830353 2023-01-22 13:27:37.140327: step: 130/469, loss: 0.015042303130030632 2023-01-22 13:27:37.695857: step: 132/469, loss: 0.005432233680039644 2023-01-22 13:27:38.205898: step: 134/469, loss: 0.008275623433291912 2023-01-22 13:27:38.773642: step: 136/469, loss: 0.00010147449938813224 2023-01-22 13:27:39.476094: step: 138/469, loss: 0.018918149173259735 2023-01-22 13:27:40.151414: step: 140/469, loss: 0.016375940293073654 2023-01-22 13:27:40.797775: step: 142/469, loss: 0.041358571499586105 2023-01-22 13:27:41.407316: step: 144/469, loss: 0.01368478499352932 2023-01-22 13:27:42.014132: step: 146/469, loss: 0.004914687480777502 2023-01-22 13:27:42.625899: step: 148/469, loss: 0.0020167571492493153 2023-01-22 13:27:43.235271: step: 150/469, loss: 0.0745353177189827 2023-01-22 13:27:43.861757: step: 152/469, loss: 0.019189924001693726 2023-01-22 13:27:44.409207: step: 154/469, loss: 0.002326143207028508 2023-01-22 13:27:45.045631: step: 156/469, loss: 0.012304248288273811 2023-01-22 13:27:45.631146: step: 158/469, loss: 0.020376984030008316 2023-01-22 13:27:46.261793: step: 160/469, loss: 0.004673916380852461 2023-01-22 13:27:46.819774: step: 162/469, loss: 0.04110435023903847 2023-01-22 13:27:47.409539: step: 164/469, loss: 0.04092944413423538 2023-01-22 13:27:48.013467: step: 166/469, loss: 0.012793663889169693 2023-01-22 13:27:48.664643: step: 168/469, loss: 0.022109219804406166 2023-01-22 13:27:49.273666: step: 170/469, loss: 0.007726403884589672 2023-01-22 13:27:49.899720: step: 172/469, loss: 0.03344342112541199 2023-01-22 13:27:50.516532: step: 174/469, loss: 0.0014511216431856155 2023-01-22 13:27:51.154685: step: 176/469, loss: 0.006137159187346697 2023-01-22 13:27:51.799363: step: 178/469, loss: 0.10614075511693954 2023-01-22 13:27:52.372430: step: 180/469, loss: 0.014324543066322803 2023-01-22 13:27:53.012544: step: 182/469, loss: 0.08042246848344803 2023-01-22 13:27:53.594781: step: 184/469, loss: 0.032187726348638535 2023-01-22 13:27:54.166724: step: 186/469, loss: 0.04879891872406006 2023-01-22 13:27:54.795218: step: 188/469, loss: 0.139285147190094 2023-01-22 13:27:55.459573: step: 190/469, loss: 0.005954607855528593 2023-01-22 13:27:56.048811: step: 192/469, loss: 0.011356913484632969 2023-01-22 13:27:56.704030: step: 194/469, loss: 0.004321274347603321 2023-01-22 13:27:57.402266: step: 196/469, loss: 0.013530028983950615 2023-01-22 13:27:57.980720: step: 198/469, loss: 0.0008386899717152119 2023-01-22 13:27:58.533325: step: 200/469, loss: 0.01096635963767767 2023-01-22 13:27:59.101916: step: 202/469, loss: 0.009184669703245163 2023-01-22 13:27:59.741371: step: 204/469, loss: 0.03868536278605461 2023-01-22 13:28:00.415505: step: 206/469, loss: 0.04301360249519348 2023-01-22 13:28:01.023857: step: 208/469, loss: 0.0010095011675730348 2023-01-22 13:28:01.582274: step: 210/469, loss: 0.000283485947875306 2023-01-22 13:28:02.189973: step: 212/469, loss: 0.0070633552968502045 2023-01-22 13:28:02.822461: step: 214/469, loss: 0.030808549374341965 2023-01-22 13:28:03.458897: step: 216/469, loss: 0.0026243298780173063 2023-01-22 13:28:04.108162: step: 218/469, loss: 0.0005290028639137745 2023-01-22 13:28:04.748292: step: 220/469, loss: 0.015227171592414379 2023-01-22 13:28:05.490218: step: 222/469, loss: 0.024634400382637978 2023-01-22 13:28:06.124222: step: 224/469, loss: 0.03855723887681961 2023-01-22 13:28:06.673638: step: 226/469, loss: 0.00018793513299897313 2023-01-22 13:28:07.345751: step: 228/469, loss: 0.01885269209742546 2023-01-22 13:28:07.977213: step: 230/469, loss: 0.049848005175590515 2023-01-22 13:28:08.604239: step: 232/469, loss: 0.0272691510617733 2023-01-22 13:28:09.228103: step: 234/469, loss: 0.00755769619718194 2023-01-22 13:28:09.882172: step: 236/469, loss: 0.049853160977363586 2023-01-22 13:28:10.513428: step: 238/469, loss: 0.10203791409730911 2023-01-22 13:28:11.152053: step: 240/469, loss: 0.05088697001338005 2023-01-22 13:28:11.697721: step: 242/469, loss: 0.00043119274778291583 2023-01-22 13:28:12.291144: step: 244/469, loss: 0.11058246344327927 2023-01-22 13:28:12.946296: step: 246/469, loss: 0.03444642946124077 2023-01-22 13:28:13.609912: step: 248/469, loss: 0.013254709541797638 2023-01-22 13:28:14.193322: step: 250/469, loss: 0.05417263135313988 2023-01-22 13:28:14.783895: step: 252/469, loss: 0.013920310884714127 2023-01-22 13:28:15.377525: step: 254/469, loss: 0.04292064905166626 2023-01-22 13:28:15.937178: step: 256/469, loss: 0.030953271314501762 2023-01-22 13:28:16.522841: step: 258/469, loss: 0.00650059524923563 2023-01-22 13:28:17.161219: step: 260/469, loss: 0.020494060590863228 2023-01-22 13:28:17.749889: step: 262/469, loss: 0.05571705847978592 2023-01-22 13:28:18.331026: step: 264/469, loss: 0.03857578709721565 2023-01-22 13:28:18.930306: step: 266/469, loss: 0.0627986490726471 2023-01-22 13:28:19.528738: step: 268/469, loss: 0.01743331365287304 2023-01-22 13:28:20.163293: step: 270/469, loss: 0.003057928988710046 2023-01-22 13:28:20.780975: step: 272/469, loss: 0.0327683687210083 2023-01-22 13:28:21.406813: step: 274/469, loss: 0.036044642329216 2023-01-22 13:28:22.026013: step: 276/469, loss: 0.018472548574209213 2023-01-22 13:28:22.616703: step: 278/469, loss: 0.0030957930721342564 2023-01-22 13:28:23.221783: step: 280/469, loss: 0.06196119263768196 2023-01-22 13:28:23.915496: step: 282/469, loss: 0.012512745335698128 2023-01-22 13:28:24.503773: step: 284/469, loss: 0.026475517079234123 2023-01-22 13:28:25.121960: step: 286/469, loss: 0.015970377251505852 2023-01-22 13:28:25.730228: step: 288/469, loss: 0.004397625103592873 2023-01-22 13:28:26.316708: step: 290/469, loss: 0.010436036624014378 2023-01-22 13:28:27.066593: step: 292/469, loss: 0.015440810471773148 2023-01-22 13:28:27.604116: step: 294/469, loss: 0.0022039152681827545 2023-01-22 13:28:28.182961: step: 296/469, loss: 0.042669110000133514 2023-01-22 13:28:28.825908: step: 298/469, loss: 0.011528090573847294 2023-01-22 13:28:29.489518: step: 300/469, loss: 0.025446249172091484 2023-01-22 13:28:30.143433: step: 302/469, loss: 0.049084506928920746 2023-01-22 13:28:30.756269: step: 304/469, loss: 0.029263099655508995 2023-01-22 13:28:31.370790: step: 306/469, loss: 0.005450424738228321 2023-01-22 13:28:31.992221: step: 308/469, loss: 0.008434198796749115 2023-01-22 13:28:32.617206: step: 310/469, loss: 0.01223000418394804 2023-01-22 13:28:33.258552: step: 312/469, loss: 0.05077080428600311 2023-01-22 13:28:33.895695: step: 314/469, loss: 0.004749195650219917 2023-01-22 13:28:34.483511: step: 316/469, loss: 0.048470769077539444 2023-01-22 13:28:35.127471: step: 318/469, loss: 0.04480629786849022 2023-01-22 13:28:35.705930: step: 320/469, loss: 0.044698476791381836 2023-01-22 13:28:36.381357: step: 322/469, loss: 0.01952251046895981 2023-01-22 13:28:37.010099: step: 324/469, loss: 0.01769523322582245 2023-01-22 13:28:37.663012: step: 326/469, loss: 0.02703937515616417 2023-01-22 13:28:38.284817: step: 328/469, loss: 0.1337839663028717 2023-01-22 13:28:38.881329: step: 330/469, loss: 0.05273359268903732 2023-01-22 13:28:39.532319: step: 332/469, loss: 0.02908332273364067 2023-01-22 13:28:40.142046: step: 334/469, loss: 0.04517734795808792 2023-01-22 13:28:40.728802: step: 336/469, loss: 0.014714795164763927 2023-01-22 13:28:41.366288: step: 338/469, loss: 0.021900169551372528 2023-01-22 13:28:41.986826: step: 340/469, loss: 0.015135765075683594 2023-01-22 13:28:42.569763: step: 342/469, loss: 0.010647600516676903 2023-01-22 13:28:43.188187: step: 344/469, loss: 0.0033777577336877584 2023-01-22 13:28:43.780589: step: 346/469, loss: 0.02138627879321575 2023-01-22 13:28:44.384907: step: 348/469, loss: 0.03390428051352501 2023-01-22 13:28:45.080060: step: 350/469, loss: 0.00392957916483283 2023-01-22 13:28:45.664589: step: 352/469, loss: 0.0018901792354881763 2023-01-22 13:28:46.288153: step: 354/469, loss: 0.05929534509778023 2023-01-22 13:28:46.992638: step: 356/469, loss: 0.013390686362981796 2023-01-22 13:28:47.545585: step: 358/469, loss: 0.05661235377192497 2023-01-22 13:28:48.198752: step: 360/469, loss: 0.036856770515441895 2023-01-22 13:28:48.780852: step: 362/469, loss: 0.027632741257548332 2023-01-22 13:28:49.358120: step: 364/469, loss: 0.002649830188602209 2023-01-22 13:28:50.002038: step: 366/469, loss: 0.20955486595630646 2023-01-22 13:28:50.570283: step: 368/469, loss: 0.0019603234250098467 2023-01-22 13:28:51.264260: step: 370/469, loss: 0.015419835224747658 2023-01-22 13:28:51.873304: step: 372/469, loss: 0.04204192012548447 2023-01-22 13:28:52.453744: step: 374/469, loss: 0.10948891192674637 2023-01-22 13:28:53.097579: step: 376/469, loss: 0.045809097588062286 2023-01-22 13:28:53.677794: step: 378/469, loss: 0.007452741265296936 2023-01-22 13:28:54.466583: step: 380/469, loss: 0.013179030269384384 2023-01-22 13:28:55.091002: step: 382/469, loss: 0.11277803778648376 2023-01-22 13:28:55.798863: step: 384/469, loss: 0.0019561247900128365 2023-01-22 13:28:56.451100: step: 386/469, loss: 0.00523748341947794 2023-01-22 13:28:57.131540: step: 388/469, loss: 0.10067883133888245 2023-01-22 13:28:57.802740: step: 390/469, loss: 0.054992761462926865 2023-01-22 13:28:58.405649: step: 392/469, loss: 0.007033468224108219 2023-01-22 13:28:59.020759: step: 394/469, loss: 0.2416965365409851 2023-01-22 13:28:59.637007: step: 396/469, loss: 0.023505480960011482 2023-01-22 13:29:00.290426: step: 398/469, loss: 0.008563170209527016 2023-01-22 13:29:00.982635: step: 400/469, loss: 0.003997010178864002 2023-01-22 13:29:01.634029: step: 402/469, loss: 0.023158133029937744 2023-01-22 13:29:02.306547: step: 404/469, loss: 0.03962918743491173 2023-01-22 13:29:02.965980: step: 406/469, loss: 0.054418012499809265 2023-01-22 13:29:03.523279: step: 408/469, loss: 0.02258702740073204 2023-01-22 13:29:04.105471: step: 410/469, loss: 0.007732185069471598 2023-01-22 13:29:04.691474: step: 412/469, loss: 0.056680869311094284 2023-01-22 13:29:05.337105: step: 414/469, loss: 0.6087327003479004 2023-01-22 13:29:05.972485: step: 416/469, loss: 0.06544509530067444 2023-01-22 13:29:06.607323: step: 418/469, loss: 0.019155997782945633 2023-01-22 13:29:07.167699: step: 420/469, loss: 0.045098453760147095 2023-01-22 13:29:07.879740: step: 422/469, loss: 0.008726864121854305 2023-01-22 13:29:08.523908: step: 424/469, loss: 0.023709028959274292 2023-01-22 13:29:09.244131: step: 426/469, loss: 0.043676912784576416 2023-01-22 13:29:09.945961: step: 428/469, loss: 0.008577961474657059 2023-01-22 13:29:10.524182: step: 430/469, loss: 0.00293761701323092 2023-01-22 13:29:11.105409: step: 432/469, loss: 0.02273917943239212 2023-01-22 13:29:11.734306: step: 434/469, loss: 0.005073876585811377 2023-01-22 13:29:12.290670: step: 436/469, loss: 0.00121480249799788 2023-01-22 13:29:12.899464: step: 438/469, loss: 0.012796197086572647 2023-01-22 13:29:13.588540: step: 440/469, loss: 0.12014244496822357 2023-01-22 13:29:14.459052: step: 442/469, loss: 0.018803711980581284 2023-01-22 13:29:15.062930: step: 444/469, loss: 0.006569478195160627 2023-01-22 13:29:15.687639: step: 446/469, loss: 0.0064140004105865955 2023-01-22 13:29:16.293734: step: 448/469, loss: 0.034201301634311676 2023-01-22 13:29:16.893427: step: 450/469, loss: 0.0773974359035492 2023-01-22 13:29:17.462940: step: 452/469, loss: 0.006095223128795624 2023-01-22 13:29:18.034655: step: 454/469, loss: 0.024928433820605278 2023-01-22 13:29:18.665696: step: 456/469, loss: 0.008130456320941448 2023-01-22 13:29:19.248164: step: 458/469, loss: 0.010952129028737545 2023-01-22 13:29:19.895409: step: 460/469, loss: 0.034331656992435455 2023-01-22 13:29:20.556977: step: 462/469, loss: 0.024890027940273285 2023-01-22 13:29:21.221872: step: 464/469, loss: 0.013754414394497871 2023-01-22 13:29:21.865717: step: 466/469, loss: 0.21725188195705414 2023-01-22 13:29:22.479698: step: 468/469, loss: 0.03119678795337677 2023-01-22 13:29:23.153055: step: 470/469, loss: 0.010625576600432396 2023-01-22 13:29:23.846160: step: 472/469, loss: 0.38016849756240845 2023-01-22 13:29:24.518101: step: 474/469, loss: 0.012717713601887226 2023-01-22 13:29:25.198472: step: 476/469, loss: 0.07365652173757553 2023-01-22 13:29:25.840433: step: 478/469, loss: 0.0703604593873024 2023-01-22 13:29:26.429591: step: 480/469, loss: 0.027157843112945557 2023-01-22 13:29:27.053636: step: 482/469, loss: 0.008624759502708912 2023-01-22 13:29:28.384190: step: 484/469, loss: 0.20557545125484467 2023-01-22 13:29:28.992475: step: 486/469, loss: 0.02057848870754242 2023-01-22 13:29:29.607295: step: 488/469, loss: 0.010233267210423946 2023-01-22 13:29:30.296809: step: 490/469, loss: 0.006442150566726923 2023-01-22 13:29:30.937898: step: 492/469, loss: 0.02376784197986126 2023-01-22 13:29:31.556145: step: 494/469, loss: 0.03933355212211609 2023-01-22 13:29:32.210752: step: 496/469, loss: 0.03800881281495094 2023-01-22 13:29:32.835443: step: 498/469, loss: 0.10246901214122772 2023-01-22 13:29:33.456482: step: 500/469, loss: 0.0008079497492872179 2023-01-22 13:29:34.054499: step: 502/469, loss: 0.1171233206987381 2023-01-22 13:29:34.699453: step: 504/469, loss: 1.5556166172027588 2023-01-22 13:29:35.345925: step: 506/469, loss: 0.07243640720844269 2023-01-22 13:29:35.955680: step: 508/469, loss: 0.0018142522312700748 2023-01-22 13:29:36.526321: step: 510/469, loss: 0.01611032709479332 2023-01-22 13:29:37.118876: step: 512/469, loss: 1.1219016313552856 2023-01-22 13:29:37.726205: step: 514/469, loss: 0.0015680743381381035 2023-01-22 13:29:38.344980: step: 516/469, loss: 0.03115461766719818 2023-01-22 13:29:38.985262: step: 518/469, loss: 0.12085239589214325 2023-01-22 13:29:39.630821: step: 520/469, loss: 0.00013592492905445397 2023-01-22 13:29:40.257207: step: 522/469, loss: 0.022860264405608177 2023-01-22 13:29:40.931577: step: 524/469, loss: 0.24189890921115875 2023-01-22 13:29:41.578368: step: 526/469, loss: 0.6601332426071167 2023-01-22 13:29:42.194108: step: 528/469, loss: 0.0065761953592300415 2023-01-22 13:29:42.839128: step: 530/469, loss: 0.018679099157452583 2023-01-22 13:29:43.487497: step: 532/469, loss: 0.08067703992128372 2023-01-22 13:29:44.050744: step: 534/469, loss: 0.00792071782052517 2023-01-22 13:29:44.612483: step: 536/469, loss: 0.01419886201620102 2023-01-22 13:29:45.272292: step: 538/469, loss: 0.08376763761043549 2023-01-22 13:29:45.883722: step: 540/469, loss: 0.014224591664969921 2023-01-22 13:29:46.652675: step: 542/469, loss: 0.014264407567679882 2023-01-22 13:29:47.376236: step: 544/469, loss: 0.017167195677757263 2023-01-22 13:29:47.998738: step: 546/469, loss: 0.054914992302656174 2023-01-22 13:29:48.606037: step: 548/469, loss: 0.0004956695483997464 2023-01-22 13:29:49.219724: step: 550/469, loss: 0.017324771732091904 2023-01-22 13:29:49.809957: step: 552/469, loss: 0.008702096529304981 2023-01-22 13:29:50.402178: step: 554/469, loss: 0.006109388079494238 2023-01-22 13:29:51.029830: step: 556/469, loss: 0.13674870133399963 2023-01-22 13:29:51.672953: step: 558/469, loss: 0.09270133823156357 2023-01-22 13:29:52.284204: step: 560/469, loss: 0.11514364182949066 2023-01-22 13:29:52.948034: step: 562/469, loss: 0.007401920389384031 2023-01-22 13:29:53.583260: step: 564/469, loss: 0.009537674486637115 2023-01-22 13:29:54.253537: step: 566/469, loss: 0.015438495203852654 2023-01-22 13:29:54.842232: step: 568/469, loss: 0.014768391847610474 2023-01-22 13:29:55.527999: step: 570/469, loss: 0.03671162202954292 2023-01-22 13:29:56.139963: step: 572/469, loss: 0.007807845249772072 2023-01-22 13:29:56.702663: step: 574/469, loss: 0.07082484662532806 2023-01-22 13:29:57.341349: step: 576/469, loss: 0.03761536255478859 2023-01-22 13:29:57.937539: step: 578/469, loss: 0.009655756875872612 2023-01-22 13:29:58.543983: step: 580/469, loss: 0.002102601807564497 2023-01-22 13:29:59.161590: step: 582/469, loss: 0.05156554654240608 2023-01-22 13:29:59.855395: step: 584/469, loss: 0.1121736615896225 2023-01-22 13:30:00.462419: step: 586/469, loss: 0.2778625190258026 2023-01-22 13:30:01.075442: step: 588/469, loss: 0.009841597639024258 2023-01-22 13:30:01.789526: step: 590/469, loss: 0.028299439698457718 2023-01-22 13:30:02.463284: step: 592/469, loss: 0.05079028010368347 2023-01-22 13:30:03.067422: step: 594/469, loss: 0.01051055733114481 2023-01-22 13:30:03.715746: step: 596/469, loss: 0.015717197209596634 2023-01-22 13:30:04.383872: step: 598/469, loss: 0.046247448772192 2023-01-22 13:30:04.986658: step: 600/469, loss: 0.026593945920467377 2023-01-22 13:30:05.542462: step: 602/469, loss: 0.05360126122832298 2023-01-22 13:30:06.156026: step: 604/469, loss: 0.02141428180038929 2023-01-22 13:30:06.768127: step: 606/469, loss: 0.3677946925163269 2023-01-22 13:30:07.408537: step: 608/469, loss: 0.00097944017034024 2023-01-22 13:30:08.026699: step: 610/469, loss: 0.0052940682508051395 2023-01-22 13:30:08.671777: step: 612/469, loss: 0.006692413706332445 2023-01-22 13:30:09.282262: step: 614/469, loss: 0.0067467219196259975 2023-01-22 13:30:09.854109: step: 616/469, loss: 0.016620740294456482 2023-01-22 13:30:10.456647: step: 618/469, loss: 0.06428956240415573 2023-01-22 13:30:11.137245: step: 620/469, loss: 0.03241540119051933 2023-01-22 13:30:11.796654: step: 622/469, loss: 0.08981288969516754 2023-01-22 13:30:12.444732: step: 624/469, loss: 0.03582325950264931 2023-01-22 13:30:13.065656: step: 626/469, loss: 0.0794992595911026 2023-01-22 13:30:13.700240: step: 628/469, loss: 0.060099996626377106 2023-01-22 13:30:14.232592: step: 630/469, loss: 0.035620734095573425 2023-01-22 13:30:14.813804: step: 632/469, loss: 0.0037603736855089664 2023-01-22 13:30:15.449075: step: 634/469, loss: 0.001754170167259872 2023-01-22 13:30:16.132459: step: 636/469, loss: 0.04415275529026985 2023-01-22 13:30:16.697833: step: 638/469, loss: 0.022454949095845222 2023-01-22 13:30:17.318744: step: 640/469, loss: 0.012613899074494839 2023-01-22 13:30:17.941242: step: 642/469, loss: 0.005072041414678097 2023-01-22 13:30:18.602630: step: 644/469, loss: 0.04319910705089569 2023-01-22 13:30:19.245589: step: 646/469, loss: 0.18360324203968048 2023-01-22 13:30:19.917068: step: 648/469, loss: 0.046460311859846115 2023-01-22 13:30:20.488223: step: 650/469, loss: 0.009786998853087425 2023-01-22 13:30:21.118785: step: 652/469, loss: 0.1228475421667099 2023-01-22 13:30:21.799896: step: 654/469, loss: 0.020434217527508736 2023-01-22 13:30:22.477367: step: 656/469, loss: 0.0253992211073637 2023-01-22 13:30:23.075867: step: 658/469, loss: 0.027405569329857826 2023-01-22 13:30:23.702266: step: 660/469, loss: 0.01696774736046791 2023-01-22 13:30:24.343817: step: 662/469, loss: 0.017147373408079147 2023-01-22 13:30:24.948321: step: 664/469, loss: 0.025060776621103287 2023-01-22 13:30:25.624520: step: 666/469, loss: 0.009904792532324791 2023-01-22 13:30:26.253546: step: 668/469, loss: 0.002904832363128662 2023-01-22 13:30:26.927839: step: 670/469, loss: 0.027811652049422264 2023-01-22 13:30:27.547276: step: 672/469, loss: 0.013813911005854607 2023-01-22 13:30:28.211022: step: 674/469, loss: 0.010885735973715782 2023-01-22 13:30:28.850777: step: 676/469, loss: 0.27891072630882263 2023-01-22 13:30:29.468843: step: 678/469, loss: 0.01488383486866951 2023-01-22 13:30:30.128632: step: 680/469, loss: 0.030160052701830864 2023-01-22 13:30:30.772633: step: 682/469, loss: 0.03998572379350662 2023-01-22 13:30:31.383814: step: 684/469, loss: 0.03636665642261505 2023-01-22 13:30:31.993532: step: 686/469, loss: 0.07142338901758194 2023-01-22 13:30:32.706986: step: 688/469, loss: 0.0981699749827385 2023-01-22 13:30:33.314242: step: 690/469, loss: 0.0968250259757042 2023-01-22 13:30:33.888580: step: 692/469, loss: 0.014923705719411373 2023-01-22 13:30:34.523289: step: 694/469, loss: 0.03728951886296272 2023-01-22 13:30:35.202567: step: 696/469, loss: 0.038896434009075165 2023-01-22 13:30:35.864777: step: 698/469, loss: 0.07678399235010147 2023-01-22 13:30:36.464935: step: 700/469, loss: 0.01881502754986286 2023-01-22 13:30:37.084296: step: 702/469, loss: 0.008039776235818863 2023-01-22 13:30:37.820944: step: 704/469, loss: 0.013041947036981583 2023-01-22 13:30:38.359411: step: 706/469, loss: 0.00392870930954814 2023-01-22 13:30:39.056986: step: 708/469, loss: 0.08560433983802795 2023-01-22 13:30:39.652779: step: 710/469, loss: 0.0139212841168046 2023-01-22 13:30:40.286571: step: 712/469, loss: 0.09286808222532272 2023-01-22 13:30:40.958182: step: 714/469, loss: 0.03638756275177002 2023-01-22 13:30:41.609532: step: 716/469, loss: 0.01713269017636776 2023-01-22 13:30:42.242717: step: 718/469, loss: 0.13275638222694397 2023-01-22 13:30:42.899776: step: 720/469, loss: 0.05306063964962959 2023-01-22 13:30:43.485013: step: 722/469, loss: 0.02417629025876522 2023-01-22 13:30:44.139826: step: 724/469, loss: 0.013635202310979366 2023-01-22 13:30:44.723111: step: 726/469, loss: 0.12524113059043884 2023-01-22 13:30:45.332420: step: 728/469, loss: 0.1346540004014969 2023-01-22 13:30:45.939840: step: 730/469, loss: 0.01829395443201065 2023-01-22 13:30:46.607923: step: 732/469, loss: 0.3671169579029083 2023-01-22 13:30:47.232121: step: 734/469, loss: 0.004386854358017445 2023-01-22 13:30:47.813604: step: 736/469, loss: 0.039212267845869064 2023-01-22 13:30:48.473796: step: 738/469, loss: 0.0047495318576693535 2023-01-22 13:30:49.099567: step: 740/469, loss: 0.03000878542661667 2023-01-22 13:30:49.750300: step: 742/469, loss: 0.004853927996009588 2023-01-22 13:30:50.400494: step: 744/469, loss: 0.0021590818651020527 2023-01-22 13:30:51.058481: step: 746/469, loss: 0.02374895289540291 2023-01-22 13:30:51.683339: step: 748/469, loss: 0.009065737947821617 2023-01-22 13:30:52.248513: step: 750/469, loss: 0.008073159493505955 2023-01-22 13:30:52.811005: step: 752/469, loss: 0.11257588118314743 2023-01-22 13:30:53.449070: step: 754/469, loss: 0.04057428240776062 2023-01-22 13:30:54.106389: step: 756/469, loss: 0.10555069148540497 2023-01-22 13:30:54.797834: step: 758/469, loss: 0.023752979934215546 2023-01-22 13:30:55.454296: step: 760/469, loss: 0.048274967819452286 2023-01-22 13:30:56.128733: step: 762/469, loss: 1.2696187496185303 2023-01-22 13:30:56.773542: step: 764/469, loss: 0.012262381613254547 2023-01-22 13:30:57.401186: step: 766/469, loss: 0.03396160155534744 2023-01-22 13:30:58.029539: step: 768/469, loss: 0.1591954231262207 2023-01-22 13:30:58.689343: step: 770/469, loss: 0.05642412602901459 2023-01-22 13:30:59.342500: step: 772/469, loss: 0.3960493803024292 2023-01-22 13:31:00.001934: step: 774/469, loss: 0.026414884254336357 2023-01-22 13:31:00.579552: step: 776/469, loss: 0.026224391534924507 2023-01-22 13:31:01.194582: step: 778/469, loss: 0.03593488037586212 2023-01-22 13:31:01.834081: step: 780/469, loss: 0.0621548630297184 2023-01-22 13:31:02.552334: step: 782/469, loss: 0.0032862641382962465 2023-01-22 13:31:03.246500: step: 784/469, loss: 0.006173399742692709 2023-01-22 13:31:03.915895: step: 786/469, loss: 0.09690824151039124 2023-01-22 13:31:04.554990: step: 788/469, loss: 0.026896344497799873 2023-01-22 13:31:05.142094: step: 790/469, loss: 0.030217744410037994 2023-01-22 13:31:05.787069: step: 792/469, loss: 0.00029961648397147655 2023-01-22 13:31:06.484539: step: 794/469, loss: 0.025184880942106247 2023-01-22 13:31:07.072428: step: 796/469, loss: 0.1708069145679474 2023-01-22 13:31:07.669898: step: 798/469, loss: 0.013849414885044098 2023-01-22 13:31:08.247346: step: 800/469, loss: 0.03732619807124138 2023-01-22 13:31:08.878398: step: 802/469, loss: 0.018460582941770554 2023-01-22 13:31:09.533230: step: 804/469, loss: 0.003849827451631427 2023-01-22 13:31:10.148312: step: 806/469, loss: 0.03699662536382675 2023-01-22 13:31:10.714376: step: 808/469, loss: 0.04430724307894707 2023-01-22 13:31:11.307358: step: 810/469, loss: 0.0428469218313694 2023-01-22 13:31:11.917511: step: 812/469, loss: 0.028586532920598984 2023-01-22 13:31:12.537691: step: 814/469, loss: 0.05149797722697258 2023-01-22 13:31:13.147061: step: 816/469, loss: 0.013561070896685123 2023-01-22 13:31:13.780909: step: 818/469, loss: 0.10084158927202225 2023-01-22 13:31:14.469834: step: 820/469, loss: 0.014455013908445835 2023-01-22 13:31:15.097202: step: 822/469, loss: 0.01934192143380642 2023-01-22 13:31:15.691834: step: 824/469, loss: 0.03623116388916969 2023-01-22 13:31:16.316802: step: 826/469, loss: 0.0033513649832457304 2023-01-22 13:31:16.941635: step: 828/469, loss: 0.06103557348251343 2023-01-22 13:31:17.583373: step: 830/469, loss: 0.09281483292579651 2023-01-22 13:31:18.222611: step: 832/469, loss: 0.02423933893442154 2023-01-22 13:31:18.900816: step: 834/469, loss: 0.0684056505560875 2023-01-22 13:31:19.516737: step: 836/469, loss: 0.09093356877565384 2023-01-22 13:31:20.246673: step: 838/469, loss: 0.023410648107528687 2023-01-22 13:31:20.870800: step: 840/469, loss: 0.08767075836658478 2023-01-22 13:31:21.409271: step: 842/469, loss: 0.007343295030295849 2023-01-22 13:31:22.010822: step: 844/469, loss: 0.00962060783058405 2023-01-22 13:31:22.655508: step: 846/469, loss: 0.11975578963756561 2023-01-22 13:31:23.223097: step: 848/469, loss: 0.006536045111715794 2023-01-22 13:31:23.896634: step: 850/469, loss: 0.01408056914806366 2023-01-22 13:31:24.509389: step: 852/469, loss: 0.01377950794994831 2023-01-22 13:31:25.150531: step: 854/469, loss: 0.008818451315164566 2023-01-22 13:31:25.738440: step: 856/469, loss: 0.0555163249373436 2023-01-22 13:31:26.436438: step: 858/469, loss: 0.1449873000383377 2023-01-22 13:31:27.069966: step: 860/469, loss: 0.028125213459134102 2023-01-22 13:31:27.781232: step: 862/469, loss: 0.036477431654930115 2023-01-22 13:31:28.482756: step: 864/469, loss: 0.10478488355875015 2023-01-22 13:31:29.076570: step: 866/469, loss: 0.0466022826731205 2023-01-22 13:31:29.672881: step: 868/469, loss: 0.05031195282936096 2023-01-22 13:31:30.281814: step: 870/469, loss: 0.015532007440924644 2023-01-22 13:31:30.944583: step: 872/469, loss: 0.0006207653786987066 2023-01-22 13:31:31.595699: step: 874/469, loss: 0.05724656581878662 2023-01-22 13:31:32.177364: step: 876/469, loss: 0.09066110849380493 2023-01-22 13:31:32.838735: step: 878/469, loss: 0.01952778361737728 2023-01-22 13:31:33.534389: step: 880/469, loss: 0.020550131797790527 2023-01-22 13:31:34.136560: step: 882/469, loss: 0.02035904861986637 2023-01-22 13:31:34.760420: step: 884/469, loss: 0.0837024450302124 2023-01-22 13:31:35.357480: step: 886/469, loss: 0.02065715566277504 2023-01-22 13:31:35.942832: step: 888/469, loss: 0.061157356947660446 2023-01-22 13:31:36.572833: step: 890/469, loss: 0.0634181872010231 2023-01-22 13:31:37.173119: step: 892/469, loss: 0.07618377357721329 2023-01-22 13:31:37.867884: step: 894/469, loss: 0.006980047095566988 2023-01-22 13:31:38.524610: step: 896/469, loss: 0.07628069072961807 2023-01-22 13:31:39.240044: step: 898/469, loss: 0.03572295978665352 2023-01-22 13:31:39.932457: step: 900/469, loss: 0.0065027568489313126 2023-01-22 13:31:40.546392: step: 902/469, loss: 0.05485999584197998 2023-01-22 13:31:41.199664: step: 904/469, loss: 0.016958028078079224 2023-01-22 13:31:41.948938: step: 906/469, loss: 0.07176735252141953 2023-01-22 13:31:42.624529: step: 908/469, loss: 0.008512450382113457 2023-01-22 13:31:43.331680: step: 910/469, loss: 0.015240861102938652 2023-01-22 13:31:44.020183: step: 912/469, loss: 0.01660452038049698 2023-01-22 13:31:44.707765: step: 914/469, loss: 0.001401496701873839 2023-01-22 13:31:45.398837: step: 916/469, loss: 0.013293241150677204 2023-01-22 13:31:45.988682: step: 918/469, loss: 0.01657457835972309 2023-01-22 13:31:46.619166: step: 920/469, loss: 0.08381480723619461 2023-01-22 13:31:47.216811: step: 922/469, loss: 0.0026067576836794615 2023-01-22 13:31:47.879269: step: 924/469, loss: 0.008933331817388535 2023-01-22 13:31:48.446446: step: 926/469, loss: 0.011091455817222595 2023-01-22 13:31:49.168200: step: 928/469, loss: 0.01633446104824543 2023-01-22 13:31:49.772682: step: 930/469, loss: 0.027701539918780327 2023-01-22 13:31:50.418014: step: 932/469, loss: 0.08012156188488007 2023-01-22 13:31:51.073839: step: 934/469, loss: 0.01416864525526762 2023-01-22 13:31:51.712311: step: 936/469, loss: 0.009045025333762169 2023-01-22 13:31:52.349023: step: 938/469, loss: 0.03131178393959999 ================================================== Loss: 0.063 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3012100355239787, 'r': 0.32178605313092984, 'f1': 0.31115825688073395}, 'combined': 0.22927450507001448, 'epoch': 25} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3104830683604645, 'r': 0.26673705506905415, 'f1': 0.28695236337645297}, 'combined': 0.1565194709326107, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2976741622574956, 'r': 0.3202680265654649, 'f1': 0.30855804387568553}, 'combined': 0.22735855864524196, 'epoch': 25} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.31641455921607675, 'r': 0.2718328189422654, 'f1': 0.2924343219526536}, 'combined': 0.15950963015599287, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2829288373424972, 'r': 0.31245651486401016, 'f1': 0.2969604749023144}, 'combined': 0.2188129815069685, 'epoch': 25} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30678691807840913, 'r': 0.2663685135008328, 'f1': 0.28515258105427055}, 'combined': 0.15553777148414755, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2647569444444444, 'r': 0.3630952380952381, 'f1': 0.30622489959839355}, 'combined': 0.2041499330655957, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.275, 'r': 0.358695652173913, 'f1': 0.3113207547169812}, 'combined': 0.1556603773584906, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3888888888888889, 'r': 0.2413793103448276, 'f1': 0.2978723404255319}, 'combined': 0.19858156028368792, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 26 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:34:40.354509: step: 2/469, loss: 0.052119068801403046 2023-01-22 13:34:41.078223: step: 4/469, loss: 0.057875435799360275 2023-01-22 13:34:41.685296: step: 6/469, loss: 0.01828203722834587 2023-01-22 13:34:42.340843: step: 8/469, loss: 0.03723328188061714 2023-01-22 13:34:42.945201: step: 10/469, loss: 0.049895111471414566 2023-01-22 13:34:43.549501: step: 12/469, loss: 0.03184844180941582 2023-01-22 13:34:44.105427: step: 14/469, loss: 0.020090363919734955 2023-01-22 13:34:44.681816: step: 16/469, loss: 0.05181034654378891 2023-01-22 13:34:45.339554: step: 18/469, loss: 0.05390797555446625 2023-01-22 13:34:45.921941: step: 20/469, loss: 0.02640211582183838 2023-01-22 13:34:46.539250: step: 22/469, loss: 0.00038729762309230864 2023-01-22 13:34:47.133418: step: 24/469, loss: 0.017456218600273132 2023-01-22 13:34:47.754068: step: 26/469, loss: 0.018573587760329247 2023-01-22 13:34:48.325161: step: 28/469, loss: 0.03029969520866871 2023-01-22 13:34:48.978083: step: 30/469, loss: 0.07916606217622757 2023-01-22 13:34:49.602575: step: 32/469, loss: 0.02013191021978855 2023-01-22 13:34:50.207848: step: 34/469, loss: 0.03712153434753418 2023-01-22 13:34:50.890092: step: 36/469, loss: 0.021866630762815475 2023-01-22 13:34:51.597432: step: 38/469, loss: 0.0033951029181480408 2023-01-22 13:34:52.276724: step: 40/469, loss: 0.03701892867684364 2023-01-22 13:34:52.982661: step: 42/469, loss: 0.00207902560941875 2023-01-22 13:34:53.552640: step: 44/469, loss: 0.05544053018093109 2023-01-22 13:34:54.141373: step: 46/469, loss: 0.01703200861811638 2023-01-22 13:34:54.803731: step: 48/469, loss: 0.004666522610932589 2023-01-22 13:34:55.427657: step: 50/469, loss: 0.024404587224125862 2023-01-22 13:34:56.080476: step: 52/469, loss: 0.00701537961140275 2023-01-22 13:34:56.675852: step: 54/469, loss: 0.02445834130048752 2023-01-22 13:34:57.319632: step: 56/469, loss: 0.1930391788482666 2023-01-22 13:34:57.894545: step: 58/469, loss: 0.02732478454709053 2023-01-22 13:34:58.519193: step: 60/469, loss: 0.04507780447602272 2023-01-22 13:34:59.164579: step: 62/469, loss: 0.12123608589172363 2023-01-22 13:34:59.837031: step: 64/469, loss: 0.11656873673200607 2023-01-22 13:35:00.435668: step: 66/469, loss: 0.009603939950466156 2023-01-22 13:35:01.192588: step: 68/469, loss: 0.013164698146283627 2023-01-22 13:35:01.826414: step: 70/469, loss: 0.020418571308255196 2023-01-22 13:35:02.471473: step: 72/469, loss: 0.02749587595462799 2023-01-22 13:35:03.045927: step: 74/469, loss: 0.003771075513213873 2023-01-22 13:35:03.639178: step: 76/469, loss: 0.022811949253082275 2023-01-22 13:35:04.355319: step: 78/469, loss: 0.0032443255186080933 2023-01-22 13:35:04.929690: step: 80/469, loss: 0.061158884316682816 2023-01-22 13:35:05.565416: step: 82/469, loss: 0.033115629106760025 2023-01-22 13:35:06.168908: step: 84/469, loss: 0.008742800913751125 2023-01-22 13:35:06.823874: step: 86/469, loss: 0.07537149637937546 2023-01-22 13:35:07.479686: step: 88/469, loss: 0.2844546437263489 2023-01-22 13:35:08.027698: step: 90/469, loss: 0.02401832304894924 2023-01-22 13:35:08.687391: step: 92/469, loss: 0.01951124705374241 2023-01-22 13:35:09.328966: step: 94/469, loss: 0.027710849419236183 2023-01-22 13:35:10.022856: step: 96/469, loss: 0.04442556947469711 2023-01-22 13:35:10.677752: step: 98/469, loss: 0.019059425219893456 2023-01-22 13:35:11.254983: step: 100/469, loss: 0.09212709963321686 2023-01-22 13:35:11.869485: step: 102/469, loss: 0.01231745257973671 2023-01-22 13:35:12.589229: step: 104/469, loss: 0.0010406281799077988 2023-01-22 13:35:13.154645: step: 106/469, loss: 0.0013047197135165334 2023-01-22 13:35:13.816570: step: 108/469, loss: 0.08898918330669403 2023-01-22 13:35:14.455694: step: 110/469, loss: 0.013732579536736012 2023-01-22 13:35:15.066404: step: 112/469, loss: 0.03436555713415146 2023-01-22 13:35:15.713067: step: 114/469, loss: 0.003770434530451894 2023-01-22 13:35:16.304617: step: 116/469, loss: 0.019989464432001114 2023-01-22 13:35:16.921467: step: 118/469, loss: 0.04549114778637886 2023-01-22 13:35:17.533255: step: 120/469, loss: 0.02905629761517048 2023-01-22 13:35:18.210252: step: 122/469, loss: 0.015283535234630108 2023-01-22 13:35:18.820592: step: 124/469, loss: 0.030058445408940315 2023-01-22 13:35:19.427291: step: 126/469, loss: 0.012366805225610733 2023-01-22 13:35:20.054809: step: 128/469, loss: 0.01682085543870926 2023-01-22 13:35:20.642485: step: 130/469, loss: 0.5060863494873047 2023-01-22 13:35:21.250484: step: 132/469, loss: 0.050181202590465546 2023-01-22 13:35:21.903378: step: 134/469, loss: 0.005061844363808632 2023-01-22 13:35:22.528521: step: 136/469, loss: 0.023658951744437218 2023-01-22 13:35:23.150717: step: 138/469, loss: 0.029817741364240646 2023-01-22 13:35:23.751221: step: 140/469, loss: 0.006416229996830225 2023-01-22 13:35:24.354287: step: 142/469, loss: 2.4560458660125732 2023-01-22 13:35:24.959722: step: 144/469, loss: 0.05049045756459236 2023-01-22 13:35:25.629244: step: 146/469, loss: 2.2055299282073975 2023-01-22 13:35:26.229339: step: 148/469, loss: 0.0047340369783341885 2023-01-22 13:35:26.973953: step: 150/469, loss: 0.008636378683149815 2023-01-22 13:35:27.666795: step: 152/469, loss: 0.08580818772315979 2023-01-22 13:35:28.371358: step: 154/469, loss: 0.04944342374801636 2023-01-22 13:35:28.982159: step: 156/469, loss: 0.025038911029696465 2023-01-22 13:35:29.574649: step: 158/469, loss: 0.0020327784586697817 2023-01-22 13:35:30.197153: step: 160/469, loss: 0.03520830348134041 2023-01-22 13:35:30.803795: step: 162/469, loss: 0.007501641754060984 2023-01-22 13:35:31.465269: step: 164/469, loss: 0.6785876154899597 2023-01-22 13:35:32.091093: step: 166/469, loss: 0.03633784130215645 2023-01-22 13:35:32.714136: step: 168/469, loss: 0.018300127238035202 2023-01-22 13:35:33.285279: step: 170/469, loss: 0.011675868183374405 2023-01-22 13:35:33.896659: step: 172/469, loss: 0.06578411906957626 2023-01-22 13:35:34.563220: step: 174/469, loss: 0.05599719285964966 2023-01-22 13:35:35.195953: step: 176/469, loss: 0.06624706834554672 2023-01-22 13:35:35.834959: step: 178/469, loss: 0.23955385386943817 2023-01-22 13:35:36.441866: step: 180/469, loss: 0.2692038416862488 2023-01-22 13:35:37.101803: step: 182/469, loss: 0.011191947385668755 2023-01-22 13:35:37.705524: step: 184/469, loss: 0.0032901111990213394 2023-01-22 13:35:38.305833: step: 186/469, loss: 0.33437827229499817 2023-01-22 13:35:38.980067: step: 188/469, loss: 0.02446429431438446 2023-01-22 13:35:39.597363: step: 190/469, loss: 0.004033736884593964 2023-01-22 13:35:40.211880: step: 192/469, loss: 0.003232074435800314 2023-01-22 13:35:40.811620: step: 194/469, loss: 0.012413287535309792 2023-01-22 13:35:41.437735: step: 196/469, loss: 0.00035754716373048723 2023-01-22 13:35:42.021753: step: 198/469, loss: 0.0028858347795903683 2023-01-22 13:35:42.704502: step: 200/469, loss: 0.011671126820147038 2023-01-22 13:35:43.330028: step: 202/469, loss: 0.03840034082531929 2023-01-22 13:35:44.101993: step: 204/469, loss: 0.039894383400678635 2023-01-22 13:35:44.702348: step: 206/469, loss: 0.032491691410541534 2023-01-22 13:35:45.310182: step: 208/469, loss: 0.031528256833553314 2023-01-22 13:35:45.968419: step: 210/469, loss: 0.05007137358188629 2023-01-22 13:35:46.552087: step: 212/469, loss: 0.05403231456875801 2023-01-22 13:35:47.149078: step: 214/469, loss: 0.010872680693864822 2023-01-22 13:35:47.761477: step: 216/469, loss: 0.0020133310463279486 2023-01-22 13:35:48.451082: step: 218/469, loss: 0.1598788946866989 2023-01-22 13:35:49.023754: step: 220/469, loss: 0.0006900245207361877 2023-01-22 13:35:49.657031: step: 222/469, loss: 0.020223049446940422 2023-01-22 13:35:50.345545: step: 224/469, loss: 0.03611772507429123 2023-01-22 13:35:51.013355: step: 226/469, loss: 0.0020694381091743708 2023-01-22 13:35:51.621688: step: 228/469, loss: 0.019679654389619827 2023-01-22 13:35:52.235069: step: 230/469, loss: 1.0678807497024536 2023-01-22 13:35:52.846347: step: 232/469, loss: 0.017862658947706223 2023-01-22 13:35:53.489048: step: 234/469, loss: 0.007371433079242706 2023-01-22 13:35:54.051773: step: 236/469, loss: 0.0022500392515212297 2023-01-22 13:35:54.692482: step: 238/469, loss: 0.031100109219551086 2023-01-22 13:35:55.282715: step: 240/469, loss: 0.0032845602836459875 2023-01-22 13:35:56.000387: step: 242/469, loss: 0.2349967211484909 2023-01-22 13:35:56.654900: step: 244/469, loss: 0.006704397965222597 2023-01-22 13:35:57.266067: step: 246/469, loss: 0.002046919660642743 2023-01-22 13:35:57.882243: step: 248/469, loss: 0.048905618488788605 2023-01-22 13:35:58.485007: step: 250/469, loss: 0.02675948478281498 2023-01-22 13:35:59.131319: step: 252/469, loss: 0.013306604698300362 2023-01-22 13:35:59.773354: step: 254/469, loss: 0.04287441447377205 2023-01-22 13:36:00.378740: step: 256/469, loss: 0.0009932884713634849 2023-01-22 13:36:01.009246: step: 258/469, loss: 0.0010622227564454079 2023-01-22 13:36:01.658398: step: 260/469, loss: 0.006308846175670624 2023-01-22 13:36:02.307093: step: 262/469, loss: 0.039348725229501724 2023-01-22 13:36:02.958320: step: 264/469, loss: 0.00859423354268074 2023-01-22 13:36:03.614215: step: 266/469, loss: 0.0015562705229967833 2023-01-22 13:36:04.226967: step: 268/469, loss: 0.03566424921154976 2023-01-22 13:36:04.927043: step: 270/469, loss: 0.6679173707962036 2023-01-22 13:36:05.554366: step: 272/469, loss: 0.01110112201422453 2023-01-22 13:36:06.142591: step: 274/469, loss: 0.026794644072651863 2023-01-22 13:36:06.761855: step: 276/469, loss: 0.019411958754062653 2023-01-22 13:36:07.373018: step: 278/469, loss: 0.030573224648833275 2023-01-22 13:36:08.057921: step: 280/469, loss: 0.00015785006689839065 2023-01-22 13:36:08.674643: step: 282/469, loss: 0.02100604772567749 2023-01-22 13:36:09.322731: step: 284/469, loss: 0.004133254289627075 2023-01-22 13:36:09.972151: step: 286/469, loss: 0.05037087947130203 2023-01-22 13:36:10.565047: step: 288/469, loss: 0.029138660058379173 2023-01-22 13:36:11.194754: step: 290/469, loss: 0.02972107194364071 2023-01-22 13:36:11.797699: step: 292/469, loss: 0.1522054374217987 2023-01-22 13:36:12.377715: step: 294/469, loss: 0.5802236199378967 2023-01-22 13:36:13.024043: step: 296/469, loss: 0.003168710507452488 2023-01-22 13:36:13.634795: step: 298/469, loss: 0.003728737821802497 2023-01-22 13:36:14.192825: step: 300/469, loss: 0.135112926363945 2023-01-22 13:36:14.907543: step: 302/469, loss: 0.0040602353401482105 2023-01-22 13:36:15.590599: step: 304/469, loss: 0.014956161379814148 2023-01-22 13:36:16.213738: step: 306/469, loss: 0.06096882000565529 2023-01-22 13:36:16.755905: step: 308/469, loss: 0.004549896810203791 2023-01-22 13:36:17.356954: step: 310/469, loss: 0.012716942466795444 2023-01-22 13:36:17.975398: step: 312/469, loss: 0.01619807258248329 2023-01-22 13:36:18.573783: step: 314/469, loss: 0.011792403645813465 2023-01-22 13:36:19.137802: step: 316/469, loss: 0.012099232524633408 2023-01-22 13:36:19.750810: step: 318/469, loss: 0.1219257116317749 2023-01-22 13:36:20.397007: step: 320/469, loss: 0.0035128050949424505 2023-01-22 13:36:21.049015: step: 322/469, loss: 0.05700281262397766 2023-01-22 13:36:21.628953: step: 324/469, loss: 0.033953264355659485 2023-01-22 13:36:22.256909: step: 326/469, loss: 0.7913249135017395 2023-01-22 13:36:22.914289: step: 328/469, loss: 0.0027427980676293373 2023-01-22 13:36:23.613413: step: 330/469, loss: 0.014402883127331734 2023-01-22 13:36:24.218559: step: 332/469, loss: 0.0012608341639861465 2023-01-22 13:36:24.837764: step: 334/469, loss: 0.005000323057174683 2023-01-22 13:36:25.481049: step: 336/469, loss: 0.042454447597265244 2023-01-22 13:36:26.043984: step: 338/469, loss: 0.021152367815375328 2023-01-22 13:36:26.659531: step: 340/469, loss: 0.0006268061115406454 2023-01-22 13:36:27.208133: step: 342/469, loss: 0.01574760116636753 2023-01-22 13:36:27.826755: step: 344/469, loss: 0.7566384673118591 2023-01-22 13:36:28.575703: step: 346/469, loss: 0.06349111348390579 2023-01-22 13:36:29.119304: step: 348/469, loss: 0.0023796861059963703 2023-01-22 13:36:29.743763: step: 350/469, loss: 0.08010248094797134 2023-01-22 13:36:30.375206: step: 352/469, loss: 0.05025271326303482 2023-01-22 13:36:31.013402: step: 354/469, loss: 0.005450638476759195 2023-01-22 13:36:31.598630: step: 356/469, loss: 0.0031277972739189863 2023-01-22 13:36:32.263696: step: 358/469, loss: 0.01332804374396801 2023-01-22 13:36:32.968199: step: 360/469, loss: 0.0004827117663808167 2023-01-22 13:36:33.632619: step: 362/469, loss: 0.08021761476993561 2023-01-22 13:36:34.296438: step: 364/469, loss: 0.00970520917326212 2023-01-22 13:36:35.007022: step: 366/469, loss: 0.025385988876223564 2023-01-22 13:36:35.608905: step: 368/469, loss: 0.0004828694509342313 2023-01-22 13:36:36.190073: step: 370/469, loss: 0.021221010014414787 2023-01-22 13:36:36.834436: step: 372/469, loss: 0.014321565628051758 2023-01-22 13:36:37.499119: step: 374/469, loss: 0.010588493198156357 2023-01-22 13:36:38.126244: step: 376/469, loss: 0.04768862575292587 2023-01-22 13:36:38.673655: step: 378/469, loss: 0.0011960220290347934 2023-01-22 13:36:39.352849: step: 380/469, loss: 0.019489774480462074 2023-01-22 13:36:39.992873: step: 382/469, loss: 0.004130158107727766 2023-01-22 13:36:40.620945: step: 384/469, loss: 0.043059807270765305 2023-01-22 13:36:41.403634: step: 386/469, loss: 0.0044572739861905575 2023-01-22 13:36:42.019963: step: 388/469, loss: 0.01584039442241192 2023-01-22 13:36:42.660850: step: 390/469, loss: 0.0034310214687138796 2023-01-22 13:36:43.310553: step: 392/469, loss: 0.04041663184762001 2023-01-22 13:36:43.920289: step: 394/469, loss: 0.025866210460662842 2023-01-22 13:36:44.581225: step: 396/469, loss: 0.025049636140465736 2023-01-22 13:36:45.203491: step: 398/469, loss: 0.019481627270579338 2023-01-22 13:36:45.776427: step: 400/469, loss: 0.005536247044801712 2023-01-22 13:36:46.411967: step: 402/469, loss: 0.04828262701630592 2023-01-22 13:36:47.067748: step: 404/469, loss: 0.019890954717993736 2023-01-22 13:36:47.725742: step: 406/469, loss: 0.017756346613168716 2023-01-22 13:36:48.337410: step: 408/469, loss: 0.0208131093531847 2023-01-22 13:36:48.940946: step: 410/469, loss: 0.010058622807264328 2023-01-22 13:36:49.609584: step: 412/469, loss: 2.3890333977760747e-05 2023-01-22 13:36:50.255191: step: 414/469, loss: 0.027549250051379204 2023-01-22 13:36:50.948119: step: 416/469, loss: 0.01741381548345089 2023-01-22 13:36:51.549599: step: 418/469, loss: 0.03779299184679985 2023-01-22 13:36:52.198808: step: 420/469, loss: 0.02548857033252716 2023-01-22 13:36:52.904179: step: 422/469, loss: 0.04002354294061661 2023-01-22 13:36:53.542385: step: 424/469, loss: 0.052142735570669174 2023-01-22 13:36:54.123116: step: 426/469, loss: 0.01683874800801277 2023-01-22 13:36:54.761925: step: 428/469, loss: 0.015530227683484554 2023-01-22 13:36:55.354755: step: 430/469, loss: 0.004677011165767908 2023-01-22 13:36:55.970581: step: 432/469, loss: 0.024919092655181885 2023-01-22 13:36:56.579157: step: 434/469, loss: 0.028229011222720146 2023-01-22 13:36:57.210216: step: 436/469, loss: 0.07605470716953278 2023-01-22 13:36:57.864766: step: 438/469, loss: 0.053130775690078735 2023-01-22 13:36:58.513700: step: 440/469, loss: 1.0160630941390991 2023-01-22 13:36:59.151361: step: 442/469, loss: 0.029920082539319992 2023-01-22 13:36:59.785168: step: 444/469, loss: 0.014652331359684467 2023-01-22 13:37:00.431962: step: 446/469, loss: 0.0997944325208664 2023-01-22 13:37:01.073829: step: 448/469, loss: 0.0088579673320055 2023-01-22 13:37:01.784201: step: 450/469, loss: 0.05553983896970749 2023-01-22 13:37:02.459599: step: 452/469, loss: 0.05492016673088074 2023-01-22 13:37:03.176228: step: 454/469, loss: 0.05790337920188904 2023-01-22 13:37:03.812796: step: 456/469, loss: 0.010022619739174843 2023-01-22 13:37:04.458776: step: 458/469, loss: 0.0025393322575837374 2023-01-22 13:37:05.049724: step: 460/469, loss: 0.07157866656780243 2023-01-22 13:37:05.705997: step: 462/469, loss: 0.0258535984903574 2023-01-22 13:37:06.357644: step: 464/469, loss: 0.03282848000526428 2023-01-22 13:37:06.976554: step: 466/469, loss: 0.0360444150865078 2023-01-22 13:37:07.657043: step: 468/469, loss: 0.06956269592046738 2023-01-22 13:37:08.296170: step: 470/469, loss: 0.03313492611050606 2023-01-22 13:37:08.906434: step: 472/469, loss: 0.024751227349042892 2023-01-22 13:37:09.511920: step: 474/469, loss: 0.011004014872014523 2023-01-22 13:37:10.145840: step: 476/469, loss: 0.05356544628739357 2023-01-22 13:37:10.773858: step: 478/469, loss: 0.05753597989678383 2023-01-22 13:37:11.413281: step: 480/469, loss: 0.2702380120754242 2023-01-22 13:37:12.045456: step: 482/469, loss: 3.117340326309204 2023-01-22 13:37:12.631497: step: 484/469, loss: 0.018965883180499077 2023-01-22 13:37:13.287285: step: 486/469, loss: 0.14978134632110596 2023-01-22 13:37:13.927114: step: 488/469, loss: 0.0010173512855544686 2023-01-22 13:37:14.557043: step: 490/469, loss: 0.010677210055291653 2023-01-22 13:37:15.194615: step: 492/469, loss: 0.1280421018600464 2023-01-22 13:37:15.778058: step: 494/469, loss: 0.07581206411123276 2023-01-22 13:37:16.385876: step: 496/469, loss: 0.06245987489819527 2023-01-22 13:37:17.048044: step: 498/469, loss: 0.009609738364815712 2023-01-22 13:37:17.696546: step: 500/469, loss: 0.005049293395131826 2023-01-22 13:37:18.307890: step: 502/469, loss: 0.009780517779290676 2023-01-22 13:37:18.950189: step: 504/469, loss: 0.06654675304889679 2023-01-22 13:37:19.708028: step: 506/469, loss: 0.01880868338048458 2023-01-22 13:37:20.277753: step: 508/469, loss: 0.017586544156074524 2023-01-22 13:37:20.856102: step: 510/469, loss: 0.014270694926381111 2023-01-22 13:37:21.450512: step: 512/469, loss: 0.05923789367079735 2023-01-22 13:37:22.002502: step: 514/469, loss: 0.002917267382144928 2023-01-22 13:37:22.541601: step: 516/469, loss: 0.0029033832252025604 2023-01-22 13:37:23.161501: step: 518/469, loss: 0.03822728246450424 2023-01-22 13:37:23.823772: step: 520/469, loss: 6.42087984085083 2023-01-22 13:37:24.449908: step: 522/469, loss: 0.015480708330869675 2023-01-22 13:37:25.057184: step: 524/469, loss: 0.04548148065805435 2023-01-22 13:37:25.677535: step: 526/469, loss: 0.007104057818651199 2023-01-22 13:37:26.371220: step: 528/469, loss: 0.05192127078771591 2023-01-22 13:37:27.052196: step: 530/469, loss: 0.015678923577070236 2023-01-22 13:37:27.687548: step: 532/469, loss: 0.02728353999555111 2023-01-22 13:37:28.299731: step: 534/469, loss: 0.07811377942562103 2023-01-22 13:37:28.887784: step: 536/469, loss: 0.006859962362796068 2023-01-22 13:37:29.499496: step: 538/469, loss: 0.010269735008478165 2023-01-22 13:37:30.057907: step: 540/469, loss: 0.026836326345801353 2023-01-22 13:37:30.650110: step: 542/469, loss: 0.021811068058013916 2023-01-22 13:37:31.259664: step: 544/469, loss: 0.06388245522975922 2023-01-22 13:37:31.826676: step: 546/469, loss: 0.035730913281440735 2023-01-22 13:37:32.514090: step: 548/469, loss: 0.0647575780749321 2023-01-22 13:37:33.123253: step: 550/469, loss: 0.047171153128147125 2023-01-22 13:37:33.745013: step: 552/469, loss: 0.0014567658072337508 2023-01-22 13:37:34.325314: step: 554/469, loss: 0.06879657506942749 2023-01-22 13:37:34.961905: step: 556/469, loss: 0.08878390491008759 2023-01-22 13:37:35.561129: step: 558/469, loss: 0.02075077034533024 2023-01-22 13:37:36.154246: step: 560/469, loss: 0.0844666063785553 2023-01-22 13:37:36.761839: step: 562/469, loss: 0.006936375983059406 2023-01-22 13:37:37.463314: step: 564/469, loss: 0.03022335097193718 2023-01-22 13:37:38.092533: step: 566/469, loss: 0.018008532002568245 2023-01-22 13:37:38.725303: step: 568/469, loss: 0.057579126209020615 2023-01-22 13:37:39.329363: step: 570/469, loss: 0.0805489718914032 2023-01-22 13:37:39.938478: step: 572/469, loss: 0.07772126793861389 2023-01-22 13:37:40.571127: step: 574/469, loss: 0.031969670206308365 2023-01-22 13:37:41.185062: step: 576/469, loss: 0.009727970696985722 2023-01-22 13:37:41.801720: step: 578/469, loss: 0.09800968319177628 2023-01-22 13:37:42.396965: step: 580/469, loss: 0.014431802555918694 2023-01-22 13:37:43.035562: step: 582/469, loss: 0.011403510347008705 2023-01-22 13:37:43.650307: step: 584/469, loss: 0.02918725647032261 2023-01-22 13:37:44.289665: step: 586/469, loss: 0.009596250019967556 2023-01-22 13:37:44.885941: step: 588/469, loss: 0.020912984386086464 2023-01-22 13:37:45.525166: step: 590/469, loss: 0.037036627531051636 2023-01-22 13:37:46.198203: step: 592/469, loss: 0.022930650040507317 2023-01-22 13:37:46.730558: step: 594/469, loss: 0.020664162933826447 2023-01-22 13:37:47.313462: step: 596/469, loss: 0.04543563351035118 2023-01-22 13:37:47.992779: step: 598/469, loss: 0.0345572903752327 2023-01-22 13:37:48.570752: step: 600/469, loss: 0.0032024672254920006 2023-01-22 13:37:49.200874: step: 602/469, loss: 0.0560622401535511 2023-01-22 13:37:49.824769: step: 604/469, loss: 0.08288592100143433 2023-01-22 13:37:50.461512: step: 606/469, loss: 0.05112718790769577 2023-01-22 13:37:51.068634: step: 608/469, loss: 0.011430895887315273 2023-01-22 13:37:51.595222: step: 610/469, loss: 0.004481939598917961 2023-01-22 13:37:52.199761: step: 612/469, loss: 0.010696549899876118 2023-01-22 13:37:52.820195: step: 614/469, loss: 0.04353227838873863 2023-01-22 13:37:53.369040: step: 616/469, loss: 0.00513804005458951 2023-01-22 13:37:53.968111: step: 618/469, loss: 0.04890283942222595 2023-01-22 13:37:54.594538: step: 620/469, loss: 0.007950910367071629 2023-01-22 13:37:55.176699: step: 622/469, loss: 0.003297393675893545 2023-01-22 13:37:55.872414: step: 624/469, loss: 0.0445278026163578 2023-01-22 13:37:56.566115: step: 626/469, loss: 0.3941957950592041 2023-01-22 13:37:57.229906: step: 628/469, loss: 0.003935977350920439 2023-01-22 13:37:57.786855: step: 630/469, loss: 0.13316921889781952 2023-01-22 13:37:58.440794: step: 632/469, loss: 0.037868812680244446 2023-01-22 13:37:59.045426: step: 634/469, loss: 0.011567690409719944 2023-01-22 13:37:59.627017: step: 636/469, loss: 0.0038243380840867758 2023-01-22 13:38:00.186810: step: 638/469, loss: 0.011350546032190323 2023-01-22 13:38:00.757516: step: 640/469, loss: 0.053472843021154404 2023-01-22 13:38:01.323055: step: 642/469, loss: 0.04042840003967285 2023-01-22 13:38:02.063309: step: 644/469, loss: 0.025590164586901665 2023-01-22 13:38:02.672457: step: 646/469, loss: 0.4160204529762268 2023-01-22 13:38:03.256883: step: 648/469, loss: 0.016708774492144585 2023-01-22 13:38:03.880850: step: 650/469, loss: 0.0036376859061419964 2023-01-22 13:38:04.602210: step: 652/469, loss: 0.24705639481544495 2023-01-22 13:38:05.182556: step: 654/469, loss: 0.026834789663553238 2023-01-22 13:38:05.808846: step: 656/469, loss: 0.013414159417152405 2023-01-22 13:38:06.461541: step: 658/469, loss: 0.010435991920530796 2023-01-22 13:38:07.086773: step: 660/469, loss: 0.01883425936102867 2023-01-22 13:38:07.726642: step: 662/469, loss: 0.012636534869670868 2023-01-22 13:38:08.429248: step: 664/469, loss: 0.019053366035223007 2023-01-22 13:38:09.015891: step: 666/469, loss: 0.002981501864269376 2023-01-22 13:38:09.618606: step: 668/469, loss: 0.01738005504012108 2023-01-22 13:38:10.228220: step: 670/469, loss: 0.07976853847503662 2023-01-22 13:38:10.852032: step: 672/469, loss: 0.014995337463915348 2023-01-22 13:38:11.496375: step: 674/469, loss: 0.013884548097848892 2023-01-22 13:38:12.065889: step: 676/469, loss: 0.010399105958640575 2023-01-22 13:38:12.651555: step: 678/469, loss: 0.012271115556359291 2023-01-22 13:38:13.285761: step: 680/469, loss: 0.0009961036266759038 2023-01-22 13:38:13.930371: step: 682/469, loss: 0.023565329611301422 2023-01-22 13:38:14.561376: step: 684/469, loss: 0.035644806921482086 2023-01-22 13:38:15.219713: step: 686/469, loss: 1.081357479095459 2023-01-22 13:38:15.826954: step: 688/469, loss: 0.08901568502187729 2023-01-22 13:38:16.471140: step: 690/469, loss: 0.15900513529777527 2023-01-22 13:38:17.136202: step: 692/469, loss: 0.08483231067657471 2023-01-22 13:38:17.848468: step: 694/469, loss: 0.04530998691916466 2023-01-22 13:38:18.479430: step: 696/469, loss: 0.026291202753782272 2023-01-22 13:38:19.112309: step: 698/469, loss: 0.02551412396132946 2023-01-22 13:38:19.748795: step: 700/469, loss: 0.02575918473303318 2023-01-22 13:38:20.408871: step: 702/469, loss: 0.0030414615757763386 2023-01-22 13:38:21.024459: step: 704/469, loss: 0.06050509586930275 2023-01-22 13:38:21.601149: step: 706/469, loss: 0.007125751581043005 2023-01-22 13:38:22.226564: step: 708/469, loss: 0.053902920335531235 2023-01-22 13:38:22.828073: step: 710/469, loss: 0.009892820380628109 2023-01-22 13:38:23.390796: step: 712/469, loss: 0.026334915310144424 2023-01-22 13:38:24.009134: step: 714/469, loss: 0.007749361451715231 2023-01-22 13:38:24.673499: step: 716/469, loss: 0.1088370531797409 2023-01-22 13:38:25.286206: step: 718/469, loss: 0.29399850964546204 2023-01-22 13:38:25.944248: step: 720/469, loss: 0.004869928117841482 2023-01-22 13:38:26.623289: step: 722/469, loss: 0.000951407419051975 2023-01-22 13:38:27.306015: step: 724/469, loss: 0.05764957517385483 2023-01-22 13:38:27.937838: step: 726/469, loss: 0.035510048270225525 2023-01-22 13:38:28.532437: step: 728/469, loss: 0.022459859028458595 2023-01-22 13:38:29.127105: step: 730/469, loss: 0.1560434103012085 2023-01-22 13:38:29.743103: step: 732/469, loss: 0.020503107458353043 2023-01-22 13:38:30.357960: step: 734/469, loss: 0.020482506603002548 2023-01-22 13:38:30.995808: step: 736/469, loss: 0.0142503771930933 2023-01-22 13:38:31.608272: step: 738/469, loss: 0.8064932823181152 2023-01-22 13:38:32.134811: step: 740/469, loss: 0.026013199239969254 2023-01-22 13:38:32.666045: step: 742/469, loss: 0.1833120733499527 2023-01-22 13:38:33.288305: step: 744/469, loss: 0.014799002557992935 2023-01-22 13:38:33.888899: step: 746/469, loss: 0.011398497968912125 2023-01-22 13:38:34.489748: step: 748/469, loss: 0.053761355578899384 2023-01-22 13:38:35.113586: step: 750/469, loss: 0.03485002741217613 2023-01-22 13:38:35.824160: step: 752/469, loss: 0.014673682861030102 2023-01-22 13:38:36.377343: step: 754/469, loss: 0.10965120792388916 2023-01-22 13:38:36.949410: step: 756/469, loss: 0.05517612770199776 2023-01-22 13:38:37.574483: step: 758/469, loss: 0.09887915104627609 2023-01-22 13:38:38.247637: step: 760/469, loss: 0.01138286292552948 2023-01-22 13:38:38.845446: step: 762/469, loss: 0.03641868755221367 2023-01-22 13:38:39.499134: step: 764/469, loss: 0.003509464440867305 2023-01-22 13:38:40.132252: step: 766/469, loss: 0.014076833613216877 2023-01-22 13:38:40.765034: step: 768/469, loss: 0.00853697769343853 2023-01-22 13:38:41.428996: step: 770/469, loss: 0.03722076117992401 2023-01-22 13:38:42.107001: step: 772/469, loss: 0.002302593318745494 2023-01-22 13:38:42.670369: step: 774/469, loss: 0.029911432415246964 2023-01-22 13:38:43.310564: step: 776/469, loss: 0.06319554150104523 2023-01-22 13:38:43.932181: step: 778/469, loss: 0.03323550522327423 2023-01-22 13:38:44.621565: step: 780/469, loss: 0.006586946547031403 2023-01-22 13:38:45.223806: step: 782/469, loss: 0.013489141128957272 2023-01-22 13:38:45.821542: step: 784/469, loss: 0.030085476115345955 2023-01-22 13:38:46.454905: step: 786/469, loss: 0.04462684690952301 2023-01-22 13:38:47.118489: step: 788/469, loss: 0.02085179276764393 2023-01-22 13:38:47.765325: step: 790/469, loss: 0.010906665585935116 2023-01-22 13:38:48.387634: step: 792/469, loss: 0.02653900533914566 2023-01-22 13:38:48.995209: step: 794/469, loss: 0.1314384639263153 2023-01-22 13:38:49.562766: step: 796/469, loss: 0.15087509155273438 2023-01-22 13:38:50.170243: step: 798/469, loss: 0.000667657470330596 2023-01-22 13:38:50.882736: step: 800/469, loss: 0.5286304354667664 2023-01-22 13:38:51.488118: step: 802/469, loss: 0.01442254614084959 2023-01-22 13:38:52.220204: step: 804/469, loss: 0.006482763681560755 2023-01-22 13:38:52.827263: step: 806/469, loss: 0.007659719791263342 2023-01-22 13:38:53.396354: step: 808/469, loss: 0.05417191982269287 2023-01-22 13:38:54.055275: step: 810/469, loss: 0.06836564838886261 2023-01-22 13:38:54.649846: step: 812/469, loss: 0.041151028126478195 2023-01-22 13:38:55.261626: step: 814/469, loss: 0.03098500706255436 2023-01-22 13:38:55.858613: step: 816/469, loss: 0.002014571102336049 2023-01-22 13:38:56.513245: step: 818/469, loss: 0.4861396551132202 2023-01-22 13:38:57.187050: step: 820/469, loss: 0.07447970658540726 2023-01-22 13:38:57.763725: step: 822/469, loss: 0.011415109969675541 2023-01-22 13:38:58.284650: step: 824/469, loss: 0.0076906392350792885 2023-01-22 13:38:58.889924: step: 826/469, loss: 0.004792527295649052 2023-01-22 13:38:59.576479: step: 828/469, loss: 0.014225320890545845 2023-01-22 13:39:00.145757: step: 830/469, loss: 0.043766554445028305 2023-01-22 13:39:00.794672: step: 832/469, loss: 0.06967785209417343 2023-01-22 13:39:01.451845: step: 834/469, loss: 0.05522594973444939 2023-01-22 13:39:02.147633: step: 836/469, loss: 0.04994479939341545 2023-01-22 13:39:02.845499: step: 838/469, loss: 0.026540275663137436 2023-01-22 13:39:03.579912: step: 840/469, loss: 0.10919288545846939 2023-01-22 13:39:04.179867: step: 842/469, loss: 0.1099095568060875 2023-01-22 13:39:04.839344: step: 844/469, loss: 0.05464242398738861 2023-01-22 13:39:05.450640: step: 846/469, loss: 0.041132453829050064 2023-01-22 13:39:06.157887: step: 848/469, loss: 0.05038027837872505 2023-01-22 13:39:06.807193: step: 850/469, loss: 0.0027275551110506058 2023-01-22 13:39:07.450776: step: 852/469, loss: 0.018887832760810852 2023-01-22 13:39:08.078449: step: 854/469, loss: 0.0044850511476397514 2023-01-22 13:39:08.680665: step: 856/469, loss: 1.9406450986862183 2023-01-22 13:39:09.286143: step: 858/469, loss: 0.08518712967634201 2023-01-22 13:39:09.927394: step: 860/469, loss: 0.04969101771712303 2023-01-22 13:39:10.510138: step: 862/469, loss: 0.02239733561873436 2023-01-22 13:39:11.074557: step: 864/469, loss: 0.03769238665699959 2023-01-22 13:39:11.691159: step: 866/469, loss: 0.053547605872154236 2023-01-22 13:39:12.411320: step: 868/469, loss: 0.008203242905437946 2023-01-22 13:39:13.023475: step: 870/469, loss: 0.05329376831650734 2023-01-22 13:39:13.695000: step: 872/469, loss: 0.17371654510498047 2023-01-22 13:39:14.311151: step: 874/469, loss: 0.017412085086107254 2023-01-22 13:39:14.943670: step: 876/469, loss: 0.02849368192255497 2023-01-22 13:39:15.544638: step: 878/469, loss: 0.006044892594218254 2023-01-22 13:39:16.263340: step: 880/469, loss: 0.010606056079268456 2023-01-22 13:39:16.935171: step: 882/469, loss: 0.022346163168549538 2023-01-22 13:39:17.574442: step: 884/469, loss: 0.02221456728875637 2023-01-22 13:39:18.217882: step: 886/469, loss: 0.01554245874285698 2023-01-22 13:39:18.827915: step: 888/469, loss: 0.038454942405223846 2023-01-22 13:39:19.494350: step: 890/469, loss: 0.15954706072807312 2023-01-22 13:39:20.090517: step: 892/469, loss: 0.013759526424109936 2023-01-22 13:39:20.695716: step: 894/469, loss: 0.02033253014087677 2023-01-22 13:39:21.268457: step: 896/469, loss: 0.00540660135447979 2023-01-22 13:39:21.864000: step: 898/469, loss: 0.010812917724251747 2023-01-22 13:39:22.491446: step: 900/469, loss: 0.16664300858974457 2023-01-22 13:39:23.119160: step: 902/469, loss: 0.03643622249364853 2023-01-22 13:39:23.765906: step: 904/469, loss: 0.7900838851928711 2023-01-22 13:39:24.435441: step: 906/469, loss: 0.09270291775465012 2023-01-22 13:39:25.051809: step: 908/469, loss: 0.027008721604943275 2023-01-22 13:39:25.699831: step: 910/469, loss: 0.11897628754377365 2023-01-22 13:39:26.382584: step: 912/469, loss: 0.02917761169373989 2023-01-22 13:39:27.067243: step: 914/469, loss: 0.003338885260745883 2023-01-22 13:39:27.687100: step: 916/469, loss: 0.03401242569088936 2023-01-22 13:39:28.307295: step: 918/469, loss: 0.1241544559597969 2023-01-22 13:39:28.866606: step: 920/469, loss: 0.09011563658714294 2023-01-22 13:39:29.529783: step: 922/469, loss: 0.014786790125072002 2023-01-22 13:39:30.163541: step: 924/469, loss: 0.19662529230117798 2023-01-22 13:39:30.759922: step: 926/469, loss: 0.028263317421078682 2023-01-22 13:39:31.386970: step: 928/469, loss: 0.0006891288794577122 2023-01-22 13:39:32.043682: step: 930/469, loss: 0.021288258954882622 2023-01-22 13:39:32.675463: step: 932/469, loss: 0.02567538060247898 2023-01-22 13:39:33.369608: step: 934/469, loss: 0.031597234308719635 2023-01-22 13:39:34.029605: step: 936/469, loss: 0.04455246776342392 2023-01-22 13:39:34.634528: step: 938/469, loss: 0.029010459780693054 ================================================== Loss: 0.094 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30018131399317405, 'r': 0.33378795066413663, 'f1': 0.3160938903863432}, 'combined': 0.232911287653095, 'epoch': 26} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30879205295687595, 'r': 0.2873206933734152, 'f1': 0.29766968517264725}, 'combined': 0.16236528282144394, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2994322729091637, 'r': 0.33806869522002353, 'f1': 0.31757968338850695}, 'combined': 0.23400608249679458, 'epoch': 26} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30613489863794746, 'r': 0.2870889946055774, 'f1': 0.2963062050084005}, 'combined': 0.16162156636821845, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2884951790633609, 'r': 0.33119465528146746, 'f1': 0.3083738221436985}, 'combined': 0.22722281631640942, 'epoch': 26} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3089419770799593, 'r': 0.2857642624225424, 'f1': 0.29690146276410534}, 'combined': 0.16194625241678473, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25443262411347517, 'r': 0.3416666666666666, 'f1': 0.29166666666666663}, 'combined': 0.19444444444444442, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.32608695652173914, 'f1': 0.2830188679245283}, 'combined': 0.14150943396226415, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3684210526315789, 'r': 0.2413793103448276, 'f1': 0.2916666666666667}, 'combined': 0.19444444444444445, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 27 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:42:22.890644: step: 2/469, loss: 0.010263964533805847 2023-01-22 13:42:23.480360: step: 4/469, loss: 0.011231492273509502 2023-01-22 13:42:24.156653: step: 6/469, loss: 0.015762705355882645 2023-01-22 13:42:24.823995: step: 8/469, loss: 0.0012724900152534246 2023-01-22 13:42:25.443612: step: 10/469, loss: 0.053235072642564774 2023-01-22 13:42:26.161470: step: 12/469, loss: 0.010204012505710125 2023-01-22 13:42:26.762574: step: 14/469, loss: 0.030783262103796005 2023-01-22 13:42:27.410576: step: 16/469, loss: 0.005242544692009687 2023-01-22 13:42:28.027902: step: 18/469, loss: 0.08665913343429565 2023-01-22 13:42:28.685763: step: 20/469, loss: 0.011185991577804089 2023-01-22 13:42:29.318405: step: 22/469, loss: 0.009902694262564182 2023-01-22 13:42:29.877344: step: 24/469, loss: 0.017803888767957687 2023-01-22 13:42:30.535722: step: 26/469, loss: 0.003923527430742979 2023-01-22 13:42:31.158931: step: 28/469, loss: 0.029844868928194046 2023-01-22 13:42:31.791552: step: 30/469, loss: 0.03673786669969559 2023-01-22 13:42:32.374569: step: 32/469, loss: 0.01843520998954773 2023-01-22 13:42:33.008417: step: 34/469, loss: 0.006193936336785555 2023-01-22 13:42:33.705984: step: 36/469, loss: 0.014202727936208248 2023-01-22 13:42:34.387736: step: 38/469, loss: 0.05356700345873833 2023-01-22 13:42:35.022534: step: 40/469, loss: 0.008297438733279705 2023-01-22 13:42:35.668378: step: 42/469, loss: 0.0147783812135458 2023-01-22 13:42:36.335074: step: 44/469, loss: 0.010029271245002747 2023-01-22 13:42:36.944085: step: 46/469, loss: 0.011169832199811935 2023-01-22 13:42:37.598586: step: 48/469, loss: 0.041271597146987915 2023-01-22 13:42:38.280421: step: 50/469, loss: 0.0684984028339386 2023-01-22 13:42:38.878604: step: 52/469, loss: 0.013210831210017204 2023-01-22 13:42:39.509396: step: 54/469, loss: 0.026933982968330383 2023-01-22 13:42:40.119857: step: 56/469, loss: 0.001984068425372243 2023-01-22 13:42:40.755632: step: 58/469, loss: 0.026761960238218307 2023-01-22 13:42:41.406177: step: 60/469, loss: 0.12014544755220413 2023-01-22 13:42:41.980712: step: 62/469, loss: 0.02620898000895977 2023-01-22 13:42:42.503811: step: 64/469, loss: 0.0016914040315896273 2023-01-22 13:42:43.161232: step: 66/469, loss: 0.04991891607642174 2023-01-22 13:42:43.877376: step: 68/469, loss: 0.025870252400636673 2023-01-22 13:42:44.424510: step: 70/469, loss: 0.02388407662510872 2023-01-22 13:42:45.217517: step: 72/469, loss: 0.001007211278192699 2023-01-22 13:42:45.816705: step: 74/469, loss: 0.15625114738941193 2023-01-22 13:42:46.371612: step: 76/469, loss: 0.001346201985143125 2023-01-22 13:42:47.011302: step: 78/469, loss: 0.05820954218506813 2023-01-22 13:42:47.550648: step: 80/469, loss: 0.01771903596818447 2023-01-22 13:42:48.109002: step: 82/469, loss: 0.02378823608160019 2023-01-22 13:42:48.691289: step: 84/469, loss: 0.00656943628564477 2023-01-22 13:42:49.252204: step: 86/469, loss: 0.005551417823880911 2023-01-22 13:42:49.911432: step: 88/469, loss: 0.03339490294456482 2023-01-22 13:42:50.587301: step: 90/469, loss: 0.08753277361392975 2023-01-22 13:42:51.181647: step: 92/469, loss: 0.009791675955057144 2023-01-22 13:42:51.813894: step: 94/469, loss: 0.48874059319496155 2023-01-22 13:42:52.437124: step: 96/469, loss: 0.007072238251566887 2023-01-22 13:42:53.049814: step: 98/469, loss: 0.015402862802147865 2023-01-22 13:42:53.785093: step: 100/469, loss: 0.1414964199066162 2023-01-22 13:42:54.458990: step: 102/469, loss: 0.017023388296365738 2023-01-22 13:42:55.073652: step: 104/469, loss: 0.00987032800912857 2023-01-22 13:42:55.678531: step: 106/469, loss: 0.08945617079734802 2023-01-22 13:42:56.218028: step: 108/469, loss: 0.004312312696129084 2023-01-22 13:42:56.833151: step: 110/469, loss: 0.02816644497215748 2023-01-22 13:42:57.413979: step: 112/469, loss: 0.015106548555195332 2023-01-22 13:42:58.057822: step: 114/469, loss: 0.2121334969997406 2023-01-22 13:42:58.645156: step: 116/469, loss: 0.01742878556251526 2023-01-22 13:42:59.283816: step: 118/469, loss: 0.009169898927211761 2023-01-22 13:42:59.892484: step: 120/469, loss: 0.012686403468251228 2023-01-22 13:43:00.588332: step: 122/469, loss: 0.0169672854244709 2023-01-22 13:43:01.205629: step: 124/469, loss: 0.010977535508573055 2023-01-22 13:43:01.830470: step: 126/469, loss: 0.04842076823115349 2023-01-22 13:43:02.437957: step: 128/469, loss: 0.03538428992033005 2023-01-22 13:43:03.140678: step: 130/469, loss: 0.02807338535785675 2023-01-22 13:43:03.798519: step: 132/469, loss: 0.015565113164484501 2023-01-22 13:43:04.478614: step: 134/469, loss: 0.005185325630009174 2023-01-22 13:43:04.991607: step: 136/469, loss: 0.011364608071744442 2023-01-22 13:43:05.610259: step: 138/469, loss: 0.039853207767009735 2023-01-22 13:43:06.227120: step: 140/469, loss: 0.005194418597966433 2023-01-22 13:43:06.808264: step: 142/469, loss: 0.012483472935855389 2023-01-22 13:43:07.464377: step: 144/469, loss: 0.022440873086452484 2023-01-22 13:43:08.069125: step: 146/469, loss: 0.0964784175157547 2023-01-22 13:43:08.740177: step: 148/469, loss: 0.036113470792770386 2023-01-22 13:43:09.291233: step: 150/469, loss: 0.00023906411661300808 2023-01-22 13:43:09.861439: step: 152/469, loss: 0.003979787230491638 2023-01-22 13:43:10.433326: step: 154/469, loss: 0.052820540964603424 2023-01-22 13:43:11.110221: step: 156/469, loss: 0.012995469383895397 2023-01-22 13:43:11.674794: step: 158/469, loss: 0.00227616960182786 2023-01-22 13:43:12.273241: step: 160/469, loss: 0.043756499886512756 2023-01-22 13:43:12.946151: step: 162/469, loss: 0.08687310665845871 2023-01-22 13:43:13.542773: step: 164/469, loss: 0.011782361194491386 2023-01-22 13:43:14.151484: step: 166/469, loss: 0.05526575446128845 2023-01-22 13:43:14.751035: step: 168/469, loss: 0.0008925764705054462 2023-01-22 13:43:15.428873: step: 170/469, loss: 0.18294069170951843 2023-01-22 13:43:16.009620: step: 172/469, loss: 0.03835257142782211 2023-01-22 13:43:16.601474: step: 174/469, loss: 0.16800366342067719 2023-01-22 13:43:17.244127: step: 176/469, loss: 0.03162071853876114 2023-01-22 13:43:17.869286: step: 178/469, loss: 0.027066271752119064 2023-01-22 13:43:18.529502: step: 180/469, loss: 0.13620692491531372 2023-01-22 13:43:19.224441: step: 182/469, loss: 0.010903574526309967 2023-01-22 13:43:19.800561: step: 184/469, loss: 0.049468494951725006 2023-01-22 13:43:20.449405: step: 186/469, loss: 0.004295628517866135 2023-01-22 13:43:21.102909: step: 188/469, loss: 0.005311970598995686 2023-01-22 13:43:21.710966: step: 190/469, loss: 0.0048162611201405525 2023-01-22 13:43:22.301158: step: 192/469, loss: 0.01803344301879406 2023-01-22 13:43:22.867125: step: 194/469, loss: 0.011363444849848747 2023-01-22 13:43:23.467115: step: 196/469, loss: 0.0033321427181363106 2023-01-22 13:43:24.116064: step: 198/469, loss: 0.012740414589643478 2023-01-22 13:43:24.699371: step: 200/469, loss: 0.018664730712771416 2023-01-22 13:43:25.365578: step: 202/469, loss: 0.0025006842333823442 2023-01-22 13:43:25.943075: step: 204/469, loss: 0.00245764316059649 2023-01-22 13:43:26.575871: step: 206/469, loss: 0.2508716583251953 2023-01-22 13:43:27.181533: step: 208/469, loss: 0.022152552381157875 2023-01-22 13:43:27.829384: step: 210/469, loss: 0.027070006355643272 2023-01-22 13:43:28.500674: step: 212/469, loss: 0.1623942106962204 2023-01-22 13:43:29.204159: step: 214/469, loss: 0.32966285943984985 2023-01-22 13:43:29.820836: step: 216/469, loss: 0.005950678139925003 2023-01-22 13:43:30.437575: step: 218/469, loss: 0.003281219396740198 2023-01-22 13:43:31.022811: step: 220/469, loss: 0.019649382680654526 2023-01-22 13:43:31.644251: step: 222/469, loss: 0.040834516286849976 2023-01-22 13:43:32.189484: step: 224/469, loss: 0.025422558188438416 2023-01-22 13:43:32.806685: step: 226/469, loss: 0.00022721791174262762 2023-01-22 13:43:33.342565: step: 228/469, loss: 0.04407082125544548 2023-01-22 13:43:33.910185: step: 230/469, loss: 0.017822694033384323 2023-01-22 13:43:34.528769: step: 232/469, loss: 0.04084165394306183 2023-01-22 13:43:35.161231: step: 234/469, loss: 0.005138130858540535 2023-01-22 13:43:35.789603: step: 236/469, loss: 0.008525789715349674 2023-01-22 13:43:36.436353: step: 238/469, loss: 0.031569741666316986 2023-01-22 13:43:37.080899: step: 240/469, loss: 0.006945348810404539 2023-01-22 13:43:37.753596: step: 242/469, loss: 0.21676471829414368 2023-01-22 13:43:38.397810: step: 244/469, loss: 0.0058443257585167885 2023-01-22 13:43:39.039318: step: 246/469, loss: 0.021715538576245308 2023-01-22 13:43:39.683285: step: 248/469, loss: 0.01268727146089077 2023-01-22 13:43:40.294975: step: 250/469, loss: 0.0061350250616669655 2023-01-22 13:43:40.909723: step: 252/469, loss: 0.039857491850852966 2023-01-22 13:43:41.539536: step: 254/469, loss: 0.009696245193481445 2023-01-22 13:43:42.240568: step: 256/469, loss: 0.0034504346549510956 2023-01-22 13:43:42.885632: step: 258/469, loss: 0.030834296718239784 2023-01-22 13:43:43.468786: step: 260/469, loss: 1.4202287197113037 2023-01-22 13:43:44.212736: step: 262/469, loss: 0.011240133084356785 2023-01-22 13:43:44.758500: step: 264/469, loss: 0.0023286775685846806 2023-01-22 13:43:45.427040: step: 266/469, loss: 0.006101185455918312 2023-01-22 13:43:46.023644: step: 268/469, loss: 0.02342948503792286 2023-01-22 13:43:46.629525: step: 270/469, loss: 0.005187559872865677 2023-01-22 13:43:47.263351: step: 272/469, loss: 0.24962659180164337 2023-01-22 13:43:47.924856: step: 274/469, loss: 1.4215294122695923 2023-01-22 13:43:48.518564: step: 276/469, loss: 0.0015044870087876916 2023-01-22 13:43:49.143304: step: 278/469, loss: 0.040224362164735794 2023-01-22 13:43:49.740833: step: 280/469, loss: 0.0005229779053479433 2023-01-22 13:43:50.297744: step: 282/469, loss: 0.04526732861995697 2023-01-22 13:43:50.978285: step: 284/469, loss: 0.010339884087443352 2023-01-22 13:43:51.574002: step: 286/469, loss: 0.02542593516409397 2023-01-22 13:43:52.175367: step: 288/469, loss: 0.014497010968625546 2023-01-22 13:43:52.764008: step: 290/469, loss: 0.4429609179496765 2023-01-22 13:43:53.359563: step: 292/469, loss: 0.0052616591565310955 2023-01-22 13:43:53.955041: step: 294/469, loss: 0.04630669206380844 2023-01-22 13:43:54.671731: step: 296/469, loss: 0.01808157004415989 2023-01-22 13:43:55.260366: step: 298/469, loss: 0.15663926303386688 2023-01-22 13:43:55.959390: step: 300/469, loss: 0.017160143703222275 2023-01-22 13:43:56.665843: step: 302/469, loss: 0.0031339225824922323 2023-01-22 13:43:57.312347: step: 304/469, loss: 0.005071716848760843 2023-01-22 13:43:57.900054: step: 306/469, loss: 0.0012592441635206342 2023-01-22 13:43:58.539578: step: 308/469, loss: 0.15066708624362946 2023-01-22 13:43:59.196440: step: 310/469, loss: 0.027801720425486565 2023-01-22 13:43:59.837485: step: 312/469, loss: 0.03207564353942871 2023-01-22 13:44:00.456571: step: 314/469, loss: 0.015848476439714432 2023-01-22 13:44:01.065140: step: 316/469, loss: 0.0015576331643387675 2023-01-22 13:44:01.731470: step: 318/469, loss: 0.024646896868944168 2023-01-22 13:44:02.346464: step: 320/469, loss: 0.017506342381238937 2023-01-22 13:44:02.993506: step: 322/469, loss: 0.0037719248794019222 2023-01-22 13:44:03.682875: step: 324/469, loss: 0.43416082859039307 2023-01-22 13:44:04.333575: step: 326/469, loss: 0.04574159160256386 2023-01-22 13:44:04.994962: step: 328/469, loss: 0.013402159325778484 2023-01-22 13:44:05.567593: step: 330/469, loss: 0.009678997099399567 2023-01-22 13:44:06.177361: step: 332/469, loss: 0.01342147309333086 2023-01-22 13:44:06.790174: step: 334/469, loss: 0.03259721025824547 2023-01-22 13:44:07.377232: step: 336/469, loss: 0.01209324225783348 2023-01-22 13:44:08.052158: step: 338/469, loss: 0.0022710200864821672 2023-01-22 13:44:08.630054: step: 340/469, loss: 0.022515149787068367 2023-01-22 13:44:09.216575: step: 342/469, loss: 0.04635896533727646 2023-01-22 13:44:09.870569: step: 344/469, loss: 0.0037869280204176903 2023-01-22 13:44:10.517465: step: 346/469, loss: 0.021152865141630173 2023-01-22 13:44:11.118852: step: 348/469, loss: 0.01859697513282299 2023-01-22 13:44:11.700360: step: 350/469, loss: 0.02358274720609188 2023-01-22 13:44:12.358869: step: 352/469, loss: 0.008010893128812313 2023-01-22 13:44:13.076061: step: 354/469, loss: 0.009056910872459412 2023-01-22 13:44:13.709146: step: 356/469, loss: 0.32193630933761597 2023-01-22 13:44:14.372698: step: 358/469, loss: 0.07568494975566864 2023-01-22 13:44:14.984314: step: 360/469, loss: 0.09271309524774551 2023-01-22 13:44:15.572541: step: 362/469, loss: 0.10162200778722763 2023-01-22 13:44:16.214542: step: 364/469, loss: 0.03486338630318642 2023-01-22 13:44:16.869515: step: 366/469, loss: 0.030468272045254707 2023-01-22 13:44:17.503914: step: 368/469, loss: 0.009875318966805935 2023-01-22 13:44:18.233987: step: 370/469, loss: 0.008483083918690681 2023-01-22 13:44:18.843183: step: 372/469, loss: 0.046357907354831696 2023-01-22 13:44:19.489733: step: 374/469, loss: 0.01245014276355505 2023-01-22 13:44:20.066693: step: 376/469, loss: 0.04683561623096466 2023-01-22 13:44:20.763439: step: 378/469, loss: 0.0032857325859367847 2023-01-22 13:44:21.433793: step: 380/469, loss: 0.4295358657836914 2023-01-22 13:44:22.055093: step: 382/469, loss: 0.03776291757822037 2023-01-22 13:44:22.667153: step: 384/469, loss: 0.0042632948607206345 2023-01-22 13:44:23.457062: step: 386/469, loss: 0.06727524846792221 2023-01-22 13:44:24.128646: step: 388/469, loss: 0.0033810532186180353 2023-01-22 13:44:24.758942: step: 390/469, loss: 0.00856588501483202 2023-01-22 13:44:25.403342: step: 392/469, loss: 0.0437612310051918 2023-01-22 13:44:26.039251: step: 394/469, loss: 0.022761067375540733 2023-01-22 13:44:26.619245: step: 396/469, loss: 0.0060792299918830395 2023-01-22 13:44:27.148239: step: 398/469, loss: 0.008533630520105362 2023-01-22 13:44:27.680922: step: 400/469, loss: 0.002531493781134486 2023-01-22 13:44:28.333161: step: 402/469, loss: 0.01378880999982357 2023-01-22 13:44:28.985638: step: 404/469, loss: 0.012417015619575977 2023-01-22 13:44:29.581182: step: 406/469, loss: 0.007826639339327812 2023-01-22 13:44:30.147939: step: 408/469, loss: 0.05615496262907982 2023-01-22 13:44:30.734859: step: 410/469, loss: 0.015419812873005867 2023-01-22 13:44:31.387790: step: 412/469, loss: 0.01269865408539772 2023-01-22 13:44:32.060883: step: 414/469, loss: 0.006402693688869476 2023-01-22 13:44:32.646780: step: 416/469, loss: 0.007301743142306805 2023-01-22 13:44:33.287057: step: 418/469, loss: 0.08711107075214386 2023-01-22 13:44:33.864192: step: 420/469, loss: 0.3633323013782501 2023-01-22 13:44:34.488800: step: 422/469, loss: 0.03807739168405533 2023-01-22 13:44:35.064753: step: 424/469, loss: 0.04179481044411659 2023-01-22 13:44:35.680238: step: 426/469, loss: 0.10723355412483215 2023-01-22 13:44:36.372298: step: 428/469, loss: 0.3118813931941986 2023-01-22 13:44:37.018140: step: 430/469, loss: 0.021025799214839935 2023-01-22 13:44:37.647655: step: 432/469, loss: 0.017781071364879608 2023-01-22 13:44:38.261142: step: 434/469, loss: 0.0011829964350908995 2023-01-22 13:44:38.833381: step: 436/469, loss: 0.002774057677015662 2023-01-22 13:44:39.452173: step: 438/469, loss: 0.029620174318552017 2023-01-22 13:44:40.115246: step: 440/469, loss: 0.022168945521116257 2023-01-22 13:44:40.695419: step: 442/469, loss: 0.006440779659897089 2023-01-22 13:44:41.302564: step: 444/469, loss: 0.0011556058889254928 2023-01-22 13:44:41.917485: step: 446/469, loss: 0.2666653096675873 2023-01-22 13:44:42.505241: step: 448/469, loss: 0.0335611067712307 2023-01-22 13:44:43.161346: step: 450/469, loss: 0.06425870209932327 2023-01-22 13:44:43.794214: step: 452/469, loss: 0.011099952273070812 2023-01-22 13:44:44.402354: step: 454/469, loss: 0.0860430896282196 2023-01-22 13:44:45.018720: step: 456/469, loss: 0.6219310164451599 2023-01-22 13:44:45.599484: step: 458/469, loss: 0.0025105306413024664 2023-01-22 13:44:46.221501: step: 460/469, loss: 0.004365452565252781 2023-01-22 13:44:46.893536: step: 462/469, loss: 0.00031475472496822476 2023-01-22 13:44:47.523431: step: 464/469, loss: 0.07625474035739899 2023-01-22 13:44:48.087173: step: 466/469, loss: 0.0357365645468235 2023-01-22 13:44:48.674337: step: 468/469, loss: 0.04635073244571686 2023-01-22 13:44:49.282817: step: 470/469, loss: 0.010422706604003906 2023-01-22 13:44:49.905840: step: 472/469, loss: 0.015965810045599937 2023-01-22 13:44:50.516511: step: 474/469, loss: 0.05657362565398216 2023-01-22 13:44:51.113489: step: 476/469, loss: 0.01111332606524229 2023-01-22 13:44:51.797260: step: 478/469, loss: 2.8216209411621094 2023-01-22 13:44:52.383887: step: 480/469, loss: 0.03001786395907402 2023-01-22 13:44:53.023430: step: 482/469, loss: 0.009279635734856129 2023-01-22 13:44:53.717554: step: 484/469, loss: 0.011910689994692802 2023-01-22 13:44:54.374223: step: 486/469, loss: 0.07531367242336273 2023-01-22 13:44:54.989620: step: 488/469, loss: 0.04005114734172821 2023-01-22 13:44:55.649181: step: 490/469, loss: 0.029068103060126305 2023-01-22 13:44:56.264677: step: 492/469, loss: 0.007307740859687328 2023-01-22 13:44:56.943567: step: 494/469, loss: 0.013578913174569607 2023-01-22 13:44:57.502373: step: 496/469, loss: 0.014166039414703846 2023-01-22 13:44:58.108329: step: 498/469, loss: 0.005432970356196165 2023-01-22 13:44:58.709400: step: 500/469, loss: 0.438414603471756 2023-01-22 13:44:59.309353: step: 502/469, loss: 0.009488563053309917 2023-01-22 13:44:59.935454: step: 504/469, loss: 0.026535330340266228 2023-01-22 13:45:00.533694: step: 506/469, loss: 0.0009407024481333792 2023-01-22 13:45:01.089043: step: 508/469, loss: 0.021527405828237534 2023-01-22 13:45:01.721665: step: 510/469, loss: 0.0013636683579534292 2023-01-22 13:45:02.406529: step: 512/469, loss: 0.00748690078034997 2023-01-22 13:45:03.070359: step: 514/469, loss: 0.06145711988210678 2023-01-22 13:45:03.745657: step: 516/469, loss: 0.0016967600677162409 2023-01-22 13:45:04.372204: step: 518/469, loss: 0.01326613500714302 2023-01-22 13:45:05.013857: step: 520/469, loss: 0.017016036435961723 2023-01-22 13:45:05.582267: step: 522/469, loss: 0.2631334364414215 2023-01-22 13:45:06.239315: step: 524/469, loss: 0.0008613273384980857 2023-01-22 13:45:06.921406: step: 526/469, loss: 0.008722303435206413 2023-01-22 13:45:07.508574: step: 528/469, loss: 0.0010988791473209858 2023-01-22 13:45:08.130008: step: 530/469, loss: 0.025261837989091873 2023-01-22 13:45:08.785428: step: 532/469, loss: 0.108428455889225 2023-01-22 13:45:09.400146: step: 534/469, loss: 0.04474363103508949 2023-01-22 13:45:09.990308: step: 536/469, loss: 0.022395450621843338 2023-01-22 13:45:10.556291: step: 538/469, loss: 0.0051158517599105835 2023-01-22 13:45:11.186446: step: 540/469, loss: 0.07086169719696045 2023-01-22 13:45:11.828339: step: 542/469, loss: 0.1461431086063385 2023-01-22 13:45:12.478730: step: 544/469, loss: 0.02447369508445263 2023-01-22 13:45:13.111646: step: 546/469, loss: 0.003372251521795988 2023-01-22 13:45:13.804199: step: 548/469, loss: 0.022394711151719093 2023-01-22 13:45:14.451978: step: 550/469, loss: 0.05098595470190048 2023-01-22 13:45:15.076694: step: 552/469, loss: 0.03485753759741783 2023-01-22 13:45:15.685333: step: 554/469, loss: 0.01913411170244217 2023-01-22 13:45:16.399505: step: 556/469, loss: 0.0005106988828629255 2023-01-22 13:45:17.097650: step: 558/469, loss: 0.024273579940199852 2023-01-22 13:45:17.788994: step: 560/469, loss: 0.06003189459443092 2023-01-22 13:45:18.418041: step: 562/469, loss: 0.015975648537278175 2023-01-22 13:45:19.073188: step: 564/469, loss: 0.04889070242643356 2023-01-22 13:45:19.741320: step: 566/469, loss: 0.004329674877226353 2023-01-22 13:45:20.422635: step: 568/469, loss: 0.20665863156318665 2023-01-22 13:45:21.161403: step: 570/469, loss: 0.046717625111341476 2023-01-22 13:45:21.779067: step: 572/469, loss: 0.005358706694096327 2023-01-22 13:45:22.410069: step: 574/469, loss: 0.05420185253024101 2023-01-22 13:45:22.974865: step: 576/469, loss: 0.045543890446424484 2023-01-22 13:45:23.591268: step: 578/469, loss: 0.051338620483875275 2023-01-22 13:45:24.167656: step: 580/469, loss: 0.09271926432847977 2023-01-22 13:45:24.796515: step: 582/469, loss: 0.07600134611129761 2023-01-22 13:45:25.411624: step: 584/469, loss: 0.0850319117307663 2023-01-22 13:45:26.101936: step: 586/469, loss: 0.047790274024009705 2023-01-22 13:45:26.757023: step: 588/469, loss: 0.01662716642022133 2023-01-22 13:45:27.425529: step: 590/469, loss: 0.01776893064379692 2023-01-22 13:45:28.020184: step: 592/469, loss: 0.010264355689287186 2023-01-22 13:45:28.628763: step: 594/469, loss: 0.03647656738758087 2023-01-22 13:45:29.223096: step: 596/469, loss: 0.0029565831646323204 2023-01-22 13:45:29.895250: step: 598/469, loss: 0.005822004284709692 2023-01-22 13:45:30.517935: step: 600/469, loss: 0.07133756577968597 2023-01-22 13:45:31.079306: step: 602/469, loss: 0.05447622016072273 2023-01-22 13:45:31.655416: step: 604/469, loss: 0.0001856352319009602 2023-01-22 13:45:32.319311: step: 606/469, loss: 0.0003206977271474898 2023-01-22 13:45:32.830892: step: 608/469, loss: 0.007646747399121523 2023-01-22 13:45:33.492445: step: 610/469, loss: 0.015947137027978897 2023-01-22 13:45:34.115747: step: 612/469, loss: 0.031069105491042137 2023-01-22 13:45:34.724101: step: 614/469, loss: 0.09184547513723373 2023-01-22 13:45:35.550006: step: 616/469, loss: 0.020485946908593178 2023-01-22 13:45:36.184262: step: 618/469, loss: 0.2241358458995819 2023-01-22 13:45:36.816266: step: 620/469, loss: 0.005171081516891718 2023-01-22 13:45:37.481208: step: 622/469, loss: 0.034143850207328796 2023-01-22 13:45:38.119952: step: 624/469, loss: 0.02039930410683155 2023-01-22 13:45:38.906652: step: 626/469, loss: 0.061198413372039795 2023-01-22 13:45:39.507017: step: 628/469, loss: 0.0069563682191073895 2023-01-22 13:45:40.137112: step: 630/469, loss: 0.0026992070488631725 2023-01-22 13:45:40.793872: step: 632/469, loss: 0.01687643676996231 2023-01-22 13:45:41.414739: step: 634/469, loss: 0.023891648277640343 2023-01-22 13:45:41.978680: step: 636/469, loss: 0.04390119016170502 2023-01-22 13:45:42.625361: step: 638/469, loss: 0.2554837167263031 2023-01-22 13:45:43.308527: step: 640/469, loss: 0.04864421859383583 2023-01-22 13:45:43.955268: step: 642/469, loss: 0.004678256344050169 2023-01-22 13:45:44.557943: step: 644/469, loss: 0.0076864478178322315 2023-01-22 13:45:45.180077: step: 646/469, loss: 0.015554619953036308 2023-01-22 13:45:45.752044: step: 648/469, loss: 0.0730988010764122 2023-01-22 13:45:46.293135: step: 650/469, loss: 0.04674271121621132 2023-01-22 13:45:46.973642: step: 652/469, loss: 0.013844741508364677 2023-01-22 13:45:47.519541: step: 654/469, loss: 0.01298499759286642 2023-01-22 13:45:48.167232: step: 656/469, loss: 0.010526896454393864 2023-01-22 13:45:48.874468: step: 658/469, loss: 0.0166607778519392 2023-01-22 13:45:49.514006: step: 660/469, loss: 0.013099784031510353 2023-01-22 13:45:50.125974: step: 662/469, loss: 0.023929951712489128 2023-01-22 13:45:50.804295: step: 664/469, loss: 0.029083797708153725 2023-01-22 13:45:51.403439: step: 666/469, loss: 0.031284451484680176 2023-01-22 13:45:52.090884: step: 668/469, loss: 0.23350965976715088 2023-01-22 13:45:52.777799: step: 670/469, loss: 0.04141067713499069 2023-01-22 13:45:53.444595: step: 672/469, loss: 0.04890824109315872 2023-01-22 13:45:54.070361: step: 674/469, loss: 0.2251994013786316 2023-01-22 13:45:54.670447: step: 676/469, loss: 0.01100002322345972 2023-01-22 13:45:55.270205: step: 678/469, loss: 0.02639543078839779 2023-01-22 13:45:55.859858: step: 680/469, loss: 0.026796657592058182 2023-01-22 13:45:56.485495: step: 682/469, loss: 0.020167997106909752 2023-01-22 13:45:57.097361: step: 684/469, loss: 0.004455122631043196 2023-01-22 13:45:57.707008: step: 686/469, loss: 0.012984787113964558 2023-01-22 13:45:58.350956: step: 688/469, loss: 0.28461185097694397 2023-01-22 13:45:59.002537: step: 690/469, loss: 0.004374017007648945 2023-01-22 13:45:59.636488: step: 692/469, loss: 0.015682077035307884 2023-01-22 13:46:00.240960: step: 694/469, loss: 0.016279974952340126 2023-01-22 13:46:00.911513: step: 696/469, loss: 0.059418100863695145 2023-01-22 13:46:01.507781: step: 698/469, loss: 0.008805235847830772 2023-01-22 13:46:02.170170: step: 700/469, loss: 0.037468321621418 2023-01-22 13:46:02.760357: step: 702/469, loss: 0.03351537883281708 2023-01-22 13:46:03.385099: step: 704/469, loss: 0.2406420260667801 2023-01-22 13:46:04.058280: step: 706/469, loss: 0.0076634762808680534 2023-01-22 13:46:04.688492: step: 708/469, loss: 0.12576699256896973 2023-01-22 13:46:05.271217: step: 710/469, loss: 0.0019828625954687595 2023-01-22 13:46:05.877713: step: 712/469, loss: 0.015728894621133804 2023-01-22 13:46:06.430147: step: 714/469, loss: 0.009912285022437572 2023-01-22 13:46:07.080447: step: 716/469, loss: 0.02390599250793457 2023-01-22 13:46:07.756401: step: 718/469, loss: 0.6465012431144714 2023-01-22 13:46:08.381383: step: 720/469, loss: 0.0433785654604435 2023-01-22 13:46:08.981505: step: 722/469, loss: 0.0016821925528347492 2023-01-22 13:46:09.613689: step: 724/469, loss: 0.015557276085019112 2023-01-22 13:46:10.252942: step: 726/469, loss: 0.006185324862599373 2023-01-22 13:46:10.864110: step: 728/469, loss: 0.007912787608802319 2023-01-22 13:46:11.464092: step: 730/469, loss: 1.372776985168457 2023-01-22 13:46:12.101761: step: 732/469, loss: 0.10331526398658752 2023-01-22 13:46:12.680385: step: 734/469, loss: 0.08862237632274628 2023-01-22 13:46:13.317574: step: 736/469, loss: 0.05291207879781723 2023-01-22 13:46:13.905577: step: 738/469, loss: 0.014255165122449398 2023-01-22 13:46:14.539486: step: 740/469, loss: 0.03309673070907593 2023-01-22 13:46:15.109089: step: 742/469, loss: 0.003737740684300661 2023-01-22 13:46:15.762496: step: 744/469, loss: 0.10176929831504822 2023-01-22 13:46:16.421476: step: 746/469, loss: 0.0867793932557106 2023-01-22 13:46:17.012561: step: 748/469, loss: 0.050306957215070724 2023-01-22 13:46:17.690532: step: 750/469, loss: 0.02482243999838829 2023-01-22 13:46:18.318207: step: 752/469, loss: 0.03686288744211197 2023-01-22 13:46:18.980430: step: 754/469, loss: 0.019275685772299767 2023-01-22 13:46:19.578366: step: 756/469, loss: 0.0424811951816082 2023-01-22 13:46:20.104247: step: 758/469, loss: 0.015101857483386993 2023-01-22 13:46:20.824421: step: 760/469, loss: 0.0010853593703359365 2023-01-22 13:46:21.417670: step: 762/469, loss: 0.07512016594409943 2023-01-22 13:46:22.059638: step: 764/469, loss: 0.2834002375602722 2023-01-22 13:46:22.665843: step: 766/469, loss: 0.03509870544075966 2023-01-22 13:46:23.300282: step: 768/469, loss: 0.030473843216896057 2023-01-22 13:46:23.978701: step: 770/469, loss: 0.0468730665743351 2023-01-22 13:46:24.632155: step: 772/469, loss: 0.6759230494499207 2023-01-22 13:46:25.207380: step: 774/469, loss: 0.011631988920271397 2023-01-22 13:46:25.806991: step: 776/469, loss: 0.0067085642367601395 2023-01-22 13:46:26.447272: step: 778/469, loss: 0.025166219100356102 2023-01-22 13:46:27.150767: step: 780/469, loss: 0.03705146163702011 2023-01-22 13:46:27.794603: step: 782/469, loss: 0.044877778738737106 2023-01-22 13:46:28.446301: step: 784/469, loss: 0.05136854574084282 2023-01-22 13:46:29.040478: step: 786/469, loss: 0.16610364615917206 2023-01-22 13:46:29.739464: step: 788/469, loss: 0.04840927943587303 2023-01-22 13:46:30.447294: step: 790/469, loss: 0.19635668396949768 2023-01-22 13:46:31.007194: step: 792/469, loss: 0.01864774152636528 2023-01-22 13:46:31.708877: step: 794/469, loss: 0.002800745191052556 2023-01-22 13:46:32.335404: step: 796/469, loss: 0.012739913538098335 2023-01-22 13:46:32.980192: step: 798/469, loss: 0.012379863299429417 2023-01-22 13:46:33.632158: step: 800/469, loss: 0.0013234555954113603 2023-01-22 13:46:34.289905: step: 802/469, loss: 0.14927874505519867 2023-01-22 13:46:34.918718: step: 804/469, loss: 0.03492793068289757 2023-01-22 13:46:35.595813: step: 806/469, loss: 0.05809061974287033 2023-01-22 13:46:36.198528: step: 808/469, loss: 0.004753398708999157 2023-01-22 13:46:36.782775: step: 810/469, loss: 0.006861999165266752 2023-01-22 13:46:37.456780: step: 812/469, loss: 0.014548441395163536 2023-01-22 13:46:38.105276: step: 814/469, loss: 0.07043583691120148 2023-01-22 13:46:38.722919: step: 816/469, loss: 0.02126930095255375 2023-01-22 13:46:39.340517: step: 818/469, loss: 0.02161935158073902 2023-01-22 13:46:40.037149: step: 820/469, loss: 0.003955075982958078 2023-01-22 13:46:40.669409: step: 822/469, loss: 0.03301957994699478 2023-01-22 13:46:41.361178: step: 824/469, loss: 0.012437576428055763 2023-01-22 13:46:42.010916: step: 826/469, loss: 0.007403786759823561 2023-01-22 13:46:42.630229: step: 828/469, loss: 0.019394539296627045 2023-01-22 13:46:43.239521: step: 830/469, loss: 0.03379194810986519 2023-01-22 13:46:43.910873: step: 832/469, loss: 0.03000115603208542 2023-01-22 13:46:44.582840: step: 834/469, loss: 0.010721656493842602 2023-01-22 13:46:45.160479: step: 836/469, loss: 0.05591261014342308 2023-01-22 13:46:45.763368: step: 838/469, loss: 0.0024415398947894573 2023-01-22 13:46:46.377379: step: 840/469, loss: 0.04237809777259827 2023-01-22 13:46:47.021486: step: 842/469, loss: 0.004956814460456371 2023-01-22 13:46:47.641655: step: 844/469, loss: 0.021096479147672653 2023-01-22 13:46:48.296562: step: 846/469, loss: 0.0444808229804039 2023-01-22 13:46:48.836951: step: 848/469, loss: 0.0549582801759243 2023-01-22 13:46:49.462787: step: 850/469, loss: 0.049095068126916885 2023-01-22 13:46:50.074504: step: 852/469, loss: 0.0007168870070017874 2023-01-22 13:46:50.702806: step: 854/469, loss: 0.007447755895555019 2023-01-22 13:46:51.300209: step: 856/469, loss: 0.013007380068302155 2023-01-22 13:46:51.954189: step: 858/469, loss: 0.07912391424179077 2023-01-22 13:46:52.584189: step: 860/469, loss: 0.007616672199219465 2023-01-22 13:46:53.199318: step: 862/469, loss: 0.06354168802499771 2023-01-22 13:46:53.814558: step: 864/469, loss: 0.01765485294163227 2023-01-22 13:46:54.417713: step: 866/469, loss: 0.08060652017593384 2023-01-22 13:46:55.001417: step: 868/469, loss: 0.04794704541563988 2023-01-22 13:46:55.613371: step: 870/469, loss: 0.03857680410146713 2023-01-22 13:46:56.277516: step: 872/469, loss: 0.06660234928131104 2023-01-22 13:46:56.933666: step: 874/469, loss: 0.04524796083569527 2023-01-22 13:46:57.563092: step: 876/469, loss: 0.0294257253408432 2023-01-22 13:46:58.203691: step: 878/469, loss: 0.048293184489011765 2023-01-22 13:46:58.837398: step: 880/469, loss: 0.022815581411123276 2023-01-22 13:46:59.451595: step: 882/469, loss: 0.012829248793423176 2023-01-22 13:47:00.097132: step: 884/469, loss: 0.14886873960494995 2023-01-22 13:47:00.781200: step: 886/469, loss: 0.025759056210517883 2023-01-22 13:47:01.355102: step: 888/469, loss: 0.412566602230072 2023-01-22 13:47:01.991913: step: 890/469, loss: 0.033732280135154724 2023-01-22 13:47:02.667372: step: 892/469, loss: 0.011866229586303234 2023-01-22 13:47:03.249682: step: 894/469, loss: 0.003136869752779603 2023-01-22 13:47:03.836676: step: 896/469, loss: 0.028152301907539368 2023-01-22 13:47:04.572732: step: 898/469, loss: 0.07520636916160583 2023-01-22 13:47:05.183640: step: 900/469, loss: 0.055856283754110336 2023-01-22 13:47:05.844472: step: 902/469, loss: 0.01485371496528387 2023-01-22 13:47:06.439075: step: 904/469, loss: 0.06036660075187683 2023-01-22 13:47:07.105554: step: 906/469, loss: 0.11419340968132019 2023-01-22 13:47:07.696761: step: 908/469, loss: 0.02651139162480831 2023-01-22 13:47:08.346276: step: 910/469, loss: 0.0018147239461541176 2023-01-22 13:47:08.928196: step: 912/469, loss: 0.0162180345505476 2023-01-22 13:47:09.607430: step: 914/469, loss: 0.009250929579138756 2023-01-22 13:47:10.206771: step: 916/469, loss: 0.07815077155828476 2023-01-22 13:47:10.858064: step: 918/469, loss: 0.0694354996085167 2023-01-22 13:47:11.481075: step: 920/469, loss: 0.009638773277401924 2023-01-22 13:47:12.126401: step: 922/469, loss: 0.015771687030792236 2023-01-22 13:47:12.666136: step: 924/469, loss: 0.13897131383419037 2023-01-22 13:47:13.405608: step: 926/469, loss: 0.0017463566036894917 2023-01-22 13:47:14.045301: step: 928/469, loss: 0.04525983706116676 2023-01-22 13:47:14.752804: step: 930/469, loss: 0.011975876986980438 2023-01-22 13:47:15.351982: step: 932/469, loss: 0.006134568713605404 2023-01-22 13:47:15.933783: step: 934/469, loss: 0.0049422550946474075 2023-01-22 13:47:16.534275: step: 936/469, loss: 0.08714479207992554 2023-01-22 13:47:17.156304: step: 938/469, loss: 0.10087117552757263 ================================================== Loss: 0.064 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3079413082437276, 'r': 0.3260555028462998, 'f1': 0.3167396313364056}, 'combined': 0.2333870967741936, 'epoch': 27} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30643377368977576, 'r': 0.2685851374151923, 'f1': 0.28626382759122887}, 'combined': 0.1561439059588521, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29927677029360966, 'r': 0.3288069259962049, 'f1': 0.31334764918625674}, 'combined': 0.23088774150566285, 'epoch': 27} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3101600441312889, 'r': 0.27014854712990577, 'f1': 0.2887749261740704}, 'combined': 0.15751359609494747, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2995862624487405, 'r': 0.32346220746363064, 'f1': 0.3110667579075426}, 'combined': 0.22920708477397878, 'epoch': 27} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3042300478234243, 'r': 0.27333385815425676, 'f1': 0.28795557297600255}, 'combined': 0.15706667616872866, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2803030303030303, 'r': 0.35238095238095235, 'f1': 0.3122362869198312}, 'combined': 0.2081575246132208, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2661290322580645, 'r': 0.358695652173913, 'f1': 0.30555555555555547}, 'combined': 0.15277777777777773, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45588235294117646, 'r': 0.2672413793103448, 'f1': 0.33695652173913043}, 'combined': 0.2246376811594203, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 28 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:50:06.136152: step: 2/469, loss: 0.046117667108774185 2023-01-22 13:50:06.725702: step: 4/469, loss: 0.021690616384148598 2023-01-22 13:50:07.288506: step: 6/469, loss: 0.001044116448611021 2023-01-22 13:50:07.949617: step: 8/469, loss: 0.023402441293001175 2023-01-22 13:50:08.566248: step: 10/469, loss: 0.00445956876501441 2023-01-22 13:50:09.158544: step: 12/469, loss: 0.09092497825622559 2023-01-22 13:50:09.724288: step: 14/469, loss: 0.0051290481351315975 2023-01-22 13:50:10.351130: step: 16/469, loss: 0.0009732205071486533 2023-01-22 13:50:10.902177: step: 18/469, loss: 0.004005457740277052 2023-01-22 13:50:11.515158: step: 20/469, loss: 0.003026357153430581 2023-01-22 13:50:12.095490: step: 22/469, loss: 0.005434069782495499 2023-01-22 13:50:12.752565: step: 24/469, loss: 0.012082789093255997 2023-01-22 13:50:13.351804: step: 26/469, loss: 0.11068152636289597 2023-01-22 13:50:13.960576: step: 28/469, loss: 0.014834997244179249 2023-01-22 13:50:14.585836: step: 30/469, loss: 0.0019150963053107262 2023-01-22 13:50:15.156945: step: 32/469, loss: 0.0034351758658885956 2023-01-22 13:50:15.919346: step: 34/469, loss: 0.044433578848838806 2023-01-22 13:50:16.566478: step: 36/469, loss: 0.0034827005583792925 2023-01-22 13:50:17.170736: step: 38/469, loss: 0.03252704441547394 2023-01-22 13:50:17.776418: step: 40/469, loss: 0.0008543062140233815 2023-01-22 13:50:18.434348: step: 42/469, loss: 0.036822181195020676 2023-01-22 13:50:19.146433: step: 44/469, loss: 0.4671904742717743 2023-01-22 13:50:19.764979: step: 46/469, loss: 0.01179991103708744 2023-01-22 13:50:20.387654: step: 48/469, loss: 0.0029738720040768385 2023-01-22 13:50:20.986220: step: 50/469, loss: 0.025945907458662987 2023-01-22 13:50:21.610535: step: 52/469, loss: 0.24279694259166718 2023-01-22 13:50:22.256481: step: 54/469, loss: 0.04112083464860916 2023-01-22 13:50:22.888901: step: 56/469, loss: 0.0020899134688079357 2023-01-22 13:50:23.530241: step: 58/469, loss: 0.016317013651132584 2023-01-22 13:50:24.158919: step: 60/469, loss: 0.026673637330532074 2023-01-22 13:50:24.848839: step: 62/469, loss: 0.0037051585968583822 2023-01-22 13:50:25.470863: step: 64/469, loss: 0.000197053566807881 2023-01-22 13:50:26.177305: step: 66/469, loss: 0.003966174554079771 2023-01-22 13:50:26.782163: step: 68/469, loss: 0.014426983892917633 2023-01-22 13:50:27.423877: step: 70/469, loss: 0.015052501112222672 2023-01-22 13:50:28.069954: step: 72/469, loss: 0.029321715235710144 2023-01-22 13:50:28.664996: step: 74/469, loss: 0.010223906487226486 2023-01-22 13:50:29.409775: step: 76/469, loss: 0.020182477310299873 2023-01-22 13:50:30.092905: step: 78/469, loss: 0.0006092899711802602 2023-01-22 13:50:30.750342: step: 80/469, loss: 0.08710543066263199 2023-01-22 13:50:31.416505: step: 82/469, loss: 0.5180069804191589 2023-01-22 13:50:32.035069: step: 84/469, loss: 0.0765908807516098 2023-01-22 13:50:32.615832: step: 86/469, loss: 0.04816500097513199 2023-01-22 13:50:33.257498: step: 88/469, loss: 0.009879494085907936 2023-01-22 13:50:33.932854: step: 90/469, loss: 0.009175695478916168 2023-01-22 13:50:34.542534: step: 92/469, loss: 0.0030323462560772896 2023-01-22 13:50:35.163157: step: 94/469, loss: 0.016966333612799644 2023-01-22 13:50:35.822303: step: 96/469, loss: 1.4426153898239136 2023-01-22 13:50:36.425128: step: 98/469, loss: 0.019680799916386604 2023-01-22 13:50:37.027779: step: 100/469, loss: 0.007532022427767515 2023-01-22 13:50:37.677736: step: 102/469, loss: 0.003947995137423277 2023-01-22 13:50:38.325461: step: 104/469, loss: 0.02941165491938591 2023-01-22 13:50:38.942644: step: 106/469, loss: 0.04238269105553627 2023-01-22 13:50:39.517670: step: 108/469, loss: 0.00334938894957304 2023-01-22 13:50:40.301935: step: 110/469, loss: 0.04745030403137207 2023-01-22 13:50:40.940216: step: 112/469, loss: 0.005659888032823801 2023-01-22 13:50:41.585144: step: 114/469, loss: 0.004309596959501505 2023-01-22 13:50:42.245141: step: 116/469, loss: 0.013013780117034912 2023-01-22 13:50:42.893123: step: 118/469, loss: 0.0148171903565526 2023-01-22 13:50:43.490185: step: 120/469, loss: 0.0932435691356659 2023-01-22 13:50:44.092289: step: 122/469, loss: 0.03964155539870262 2023-01-22 13:50:44.718554: step: 124/469, loss: 0.03424255922436714 2023-01-22 13:50:45.332639: step: 126/469, loss: 0.35389021039009094 2023-01-22 13:50:45.890861: step: 128/469, loss: 0.008723670616745949 2023-01-22 13:50:46.530240: step: 130/469, loss: 0.14223450422286987 2023-01-22 13:50:47.160984: step: 132/469, loss: 0.0004380336031317711 2023-01-22 13:50:47.753032: step: 134/469, loss: 0.03252764046192169 2023-01-22 13:50:48.360761: step: 136/469, loss: 0.050242744386196136 2023-01-22 13:50:48.922630: step: 138/469, loss: 0.023888027295470238 2023-01-22 13:50:49.485362: step: 140/469, loss: 0.007920670323073864 2023-01-22 13:50:50.108070: step: 142/469, loss: 0.0038254917599260807 2023-01-22 13:50:50.688424: step: 144/469, loss: 0.0032616383396089077 2023-01-22 13:50:51.347712: step: 146/469, loss: 0.007968848571181297 2023-01-22 13:50:51.982492: step: 148/469, loss: 0.002290271455422044 2023-01-22 13:50:52.616615: step: 150/469, loss: 0.03925652429461479 2023-01-22 13:50:53.221431: step: 152/469, loss: 0.0185690987855196 2023-01-22 13:50:53.816372: step: 154/469, loss: 0.0018010850762948394 2023-01-22 13:50:54.533540: step: 156/469, loss: 0.009231813251972198 2023-01-22 13:50:55.138399: step: 158/469, loss: 0.011754135601222515 2023-01-22 13:50:55.749838: step: 160/469, loss: 0.06417781859636307 2023-01-22 13:50:56.371763: step: 162/469, loss: 0.032035015523433685 2023-01-22 13:50:56.961055: step: 164/469, loss: 0.14094367623329163 2023-01-22 13:50:57.577808: step: 166/469, loss: 0.03806191310286522 2023-01-22 13:50:58.273665: step: 168/469, loss: 0.028705600649118423 2023-01-22 13:50:58.851147: step: 170/469, loss: 0.012910599820315838 2023-01-22 13:50:59.386166: step: 172/469, loss: 0.002661576960235834 2023-01-22 13:51:00.018941: step: 174/469, loss: 0.011819063685834408 2023-01-22 13:51:00.719382: step: 176/469, loss: 0.029520921409130096 2023-01-22 13:51:01.345570: step: 178/469, loss: 0.05733266845345497 2023-01-22 13:51:01.946853: step: 180/469, loss: 0.011158952489495277 2023-01-22 13:51:02.600117: step: 182/469, loss: 0.021350566297769547 2023-01-22 13:51:03.280284: step: 184/469, loss: 0.052336230874061584 2023-01-22 13:51:03.906006: step: 186/469, loss: 0.026848940178751945 2023-01-22 13:51:04.480482: step: 188/469, loss: 0.0014062923146411777 2023-01-22 13:51:05.043105: step: 190/469, loss: 0.03976031020283699 2023-01-22 13:51:05.689702: step: 192/469, loss: 0.04806031659245491 2023-01-22 13:51:06.331756: step: 194/469, loss: 0.03805208206176758 2023-01-22 13:51:06.987854: step: 196/469, loss: 0.012083997018635273 2023-01-22 13:51:07.592905: step: 198/469, loss: 0.0216117724776268 2023-01-22 13:51:08.157406: step: 200/469, loss: 0.0507252961397171 2023-01-22 13:51:08.797043: step: 202/469, loss: 0.00042968595516867936 2023-01-22 13:51:09.392502: step: 204/469, loss: 0.06810729950666428 2023-01-22 13:51:10.036622: step: 206/469, loss: 0.013265375979244709 2023-01-22 13:51:10.761176: step: 208/469, loss: 0.02576095052063465 2023-01-22 13:51:11.405231: step: 210/469, loss: 0.023638907819986343 2023-01-22 13:51:12.013539: step: 212/469, loss: 0.11431443691253662 2023-01-22 13:51:12.624824: step: 214/469, loss: 0.00093061663210392 2023-01-22 13:51:13.351858: step: 216/469, loss: 0.11402209103107452 2023-01-22 13:51:13.960754: step: 218/469, loss: 0.3777497410774231 2023-01-22 13:51:14.541454: step: 220/469, loss: 0.0003449449432082474 2023-01-22 13:51:15.107447: step: 222/469, loss: 0.007000977173447609 2023-01-22 13:51:15.703832: step: 224/469, loss: 0.003302902216091752 2023-01-22 13:51:16.296106: step: 226/469, loss: 0.025939274579286575 2023-01-22 13:51:16.857441: step: 228/469, loss: 0.019885417073965073 2023-01-22 13:51:17.504015: step: 230/469, loss: 0.018146274611353874 2023-01-22 13:51:18.151065: step: 232/469, loss: 0.002075487282127142 2023-01-22 13:51:18.762659: step: 234/469, loss: 0.06297841668128967 2023-01-22 13:51:19.409184: step: 236/469, loss: 0.006929186638444662 2023-01-22 13:51:19.980810: step: 238/469, loss: 0.011928737163543701 2023-01-22 13:51:20.574818: step: 240/469, loss: 0.006712186615914106 2023-01-22 13:51:21.206443: step: 242/469, loss: 0.005772011820226908 2023-01-22 13:51:21.873263: step: 244/469, loss: 0.05793805792927742 2023-01-22 13:51:22.467868: step: 246/469, loss: 0.004826302640140057 2023-01-22 13:51:23.108938: step: 248/469, loss: 0.008750352077186108 2023-01-22 13:51:23.717463: step: 250/469, loss: 0.003474102122709155 2023-01-22 13:51:24.375698: step: 252/469, loss: 0.010111937299370766 2023-01-22 13:51:24.995648: step: 254/469, loss: 0.0006807201425544918 2023-01-22 13:51:25.623628: step: 256/469, loss: 0.005595608148723841 2023-01-22 13:51:26.182062: step: 258/469, loss: 0.013326623477041721 2023-01-22 13:51:26.776099: step: 260/469, loss: 0.7546446323394775 2023-01-22 13:51:27.427975: step: 262/469, loss: 0.019674133509397507 2023-01-22 13:51:28.123620: step: 264/469, loss: 0.024653123691678047 2023-01-22 13:51:28.675682: step: 266/469, loss: 0.030251184478402138 2023-01-22 13:51:29.296758: step: 268/469, loss: 0.021238503977656364 2023-01-22 13:51:29.915783: step: 270/469, loss: 0.06220995634794235 2023-01-22 13:51:30.538766: step: 272/469, loss: 0.18594741821289062 2023-01-22 13:51:31.205844: step: 274/469, loss: 0.006447215564548969 2023-01-22 13:51:31.796542: step: 276/469, loss: 0.020789941772818565 2023-01-22 13:51:32.360785: step: 278/469, loss: 3.72604736185167e-05 2023-01-22 13:51:33.066700: step: 280/469, loss: 0.006562444381415844 2023-01-22 13:51:33.676773: step: 282/469, loss: 0.007398547139018774 2023-01-22 13:51:34.237655: step: 284/469, loss: 0.026809178292751312 2023-01-22 13:51:34.878715: step: 286/469, loss: 0.061946723610162735 2023-01-22 13:51:35.465277: step: 288/469, loss: 0.0020723463967442513 2023-01-22 13:51:36.095113: step: 290/469, loss: 0.027895238250494003 2023-01-22 13:51:36.737449: step: 292/469, loss: 0.23240278661251068 2023-01-22 13:51:37.358608: step: 294/469, loss: 0.02472991682589054 2023-01-22 13:51:37.939161: step: 296/469, loss: 1.0509834289550781 2023-01-22 13:51:38.522639: step: 298/469, loss: 0.0015113805420696735 2023-01-22 13:51:39.142055: step: 300/469, loss: 0.017315786331892014 2023-01-22 13:51:39.742625: step: 302/469, loss: 0.021042747423052788 2023-01-22 13:51:40.313642: step: 304/469, loss: 0.011752031743526459 2023-01-22 13:51:41.011858: step: 306/469, loss: 0.513655424118042 2023-01-22 13:51:41.656739: step: 308/469, loss: 0.004860830493271351 2023-01-22 13:51:42.311462: step: 310/469, loss: 0.005903632380068302 2023-01-22 13:51:42.946839: step: 312/469, loss: 0.014028006233274937 2023-01-22 13:51:43.557503: step: 314/469, loss: 0.0006413854425773025 2023-01-22 13:51:44.144515: step: 316/469, loss: 0.03341635316610336 2023-01-22 13:51:44.758057: step: 318/469, loss: 0.004747896920889616 2023-01-22 13:51:45.480385: step: 320/469, loss: 0.015307956375181675 2023-01-22 13:51:46.033594: step: 322/469, loss: 0.0030243617948144674 2023-01-22 13:51:46.677041: step: 324/469, loss: 0.0326656699180603 2023-01-22 13:51:47.287536: step: 326/469, loss: 0.00802603829652071 2023-01-22 13:51:47.880332: step: 328/469, loss: 0.03192810341715813 2023-01-22 13:51:48.452960: step: 330/469, loss: 0.06231268122792244 2023-01-22 13:51:49.151604: step: 332/469, loss: 0.00647707236930728 2023-01-22 13:51:49.724298: step: 334/469, loss: 0.003140086540952325 2023-01-22 13:51:50.328494: step: 336/469, loss: 0.004049165640026331 2023-01-22 13:51:50.980282: step: 338/469, loss: 0.7120697498321533 2023-01-22 13:51:51.615622: step: 340/469, loss: 0.024741338565945625 2023-01-22 13:51:52.308587: step: 342/469, loss: 0.015725327655673027 2023-01-22 13:51:52.945871: step: 344/469, loss: 0.0012354745995253325 2023-01-22 13:51:53.592663: step: 346/469, loss: 0.013057139702141285 2023-01-22 13:51:54.083541: step: 348/469, loss: 0.008503837510943413 2023-01-22 13:51:54.781807: step: 350/469, loss: 0.03578595072031021 2023-01-22 13:51:55.394469: step: 352/469, loss: 0.014671429991722107 2023-01-22 13:51:55.999823: step: 354/469, loss: 0.020837299525737762 2023-01-22 13:51:56.618177: step: 356/469, loss: 0.012281851842999458 2023-01-22 13:51:57.190732: step: 358/469, loss: 0.5292306542396545 2023-01-22 13:51:57.796274: step: 360/469, loss: 0.021828878670930862 2023-01-22 13:51:58.498868: step: 362/469, loss: 0.004975995514541864 2023-01-22 13:51:59.115298: step: 364/469, loss: 0.01794261671602726 2023-01-22 13:51:59.760165: step: 366/469, loss: 0.007526268716901541 2023-01-22 13:52:00.400761: step: 368/469, loss: 0.6245337724685669 2023-01-22 13:52:01.073559: step: 370/469, loss: 0.017351172864437103 2023-01-22 13:52:01.710647: step: 372/469, loss: 0.009775765240192413 2023-01-22 13:52:02.301157: step: 374/469, loss: 0.013930571265518665 2023-01-22 13:52:02.956774: step: 376/469, loss: 0.102723628282547 2023-01-22 13:52:03.629014: step: 378/469, loss: 0.05071789026260376 2023-01-22 13:52:04.185835: step: 380/469, loss: 0.009899248369038105 2023-01-22 13:52:04.854328: step: 382/469, loss: 0.005348144564777613 2023-01-22 13:52:05.414942: step: 384/469, loss: 0.0845874771475792 2023-01-22 13:52:06.117334: step: 386/469, loss: 0.005308009684085846 2023-01-22 13:52:06.814489: step: 388/469, loss: 0.08498001098632812 2023-01-22 13:52:07.444086: step: 390/469, loss: 0.03851882740855217 2023-01-22 13:52:08.098146: step: 392/469, loss: 0.00036290474236011505 2023-01-22 13:52:08.697595: step: 394/469, loss: 0.03963419795036316 2023-01-22 13:52:09.340712: step: 396/469, loss: 0.03450312465429306 2023-01-22 13:52:09.940989: step: 398/469, loss: 0.0025501695927232504 2023-01-22 13:52:10.562938: step: 400/469, loss: 0.04440242052078247 2023-01-22 13:52:11.176321: step: 402/469, loss: 0.004292868077754974 2023-01-22 13:52:11.720633: step: 404/469, loss: 0.03436756879091263 2023-01-22 13:52:12.291269: step: 406/469, loss: 0.0028362928424030542 2023-01-22 13:52:12.972536: step: 408/469, loss: 0.0009868261404335499 2023-01-22 13:52:13.665420: step: 410/469, loss: 0.03605183959007263 2023-01-22 13:52:14.227343: step: 412/469, loss: 0.024166947230696678 2023-01-22 13:52:14.823062: step: 414/469, loss: 0.01576944626867771 2023-01-22 13:52:15.468702: step: 416/469, loss: 0.028224492445588112 2023-01-22 13:52:16.091189: step: 418/469, loss: 0.05126165971159935 2023-01-22 13:52:16.665570: step: 420/469, loss: 0.15945526957511902 2023-01-22 13:52:17.268240: step: 422/469, loss: 0.001844732672907412 2023-01-22 13:52:17.932787: step: 424/469, loss: 0.00355209456756711 2023-01-22 13:52:18.551603: step: 426/469, loss: 0.03788159415125847 2023-01-22 13:52:19.153756: step: 428/469, loss: 0.0040191118605434895 2023-01-22 13:52:19.834430: step: 430/469, loss: 0.018151845782995224 2023-01-22 13:52:20.430030: step: 432/469, loss: 0.03129963204264641 2023-01-22 13:52:21.012901: step: 434/469, loss: 0.010316013358533382 2023-01-22 13:52:21.632562: step: 436/469, loss: 0.07308047264814377 2023-01-22 13:52:22.287903: step: 438/469, loss: 0.0064715552143752575 2023-01-22 13:52:22.906878: step: 440/469, loss: 0.03209478035569191 2023-01-22 13:52:23.505577: step: 442/469, loss: 0.004542089533060789 2023-01-22 13:52:24.198571: step: 444/469, loss: 0.0011712894774973392 2023-01-22 13:52:24.812473: step: 446/469, loss: 0.1002793163061142 2023-01-22 13:52:25.487364: step: 448/469, loss: 0.07509500533342361 2023-01-22 13:52:26.114621: step: 450/469, loss: 0.04948216304183006 2023-01-22 13:52:26.705789: step: 452/469, loss: 0.12339958548545837 2023-01-22 13:52:27.334824: step: 454/469, loss: 0.001356293330900371 2023-01-22 13:52:28.032529: step: 456/469, loss: 0.001529977540485561 2023-01-22 13:52:28.602264: step: 458/469, loss: 0.0015764759154990315 2023-01-22 13:52:29.248203: step: 460/469, loss: 0.008430872112512589 2023-01-22 13:52:29.896955: step: 462/469, loss: 0.015910552814602852 2023-01-22 13:52:30.527400: step: 464/469, loss: 0.206633061170578 2023-01-22 13:52:31.106395: step: 466/469, loss: 0.050532370805740356 2023-01-22 13:52:31.727190: step: 468/469, loss: 0.0410938486456871 2023-01-22 13:52:32.336367: step: 470/469, loss: 0.01015803124755621 2023-01-22 13:52:32.955234: step: 472/469, loss: 0.10376623272895813 2023-01-22 13:52:33.693104: step: 474/469, loss: 0.010095294564962387 2023-01-22 13:52:34.307896: step: 476/469, loss: 0.047576677054166794 2023-01-22 13:52:35.032995: step: 478/469, loss: 0.008052426390349865 2023-01-22 13:52:35.664189: step: 480/469, loss: 2.4106077034957707e-05 2023-01-22 13:52:36.300565: step: 482/469, loss: 0.005809403490275145 2023-01-22 13:52:36.902935: step: 484/469, loss: 0.025287888944149017 2023-01-22 13:52:37.475959: step: 486/469, loss: 0.005380943417549133 2023-01-22 13:52:38.092029: step: 488/469, loss: 0.009135490283370018 2023-01-22 13:52:38.668736: step: 490/469, loss: 0.023876287043094635 2023-01-22 13:52:39.272509: step: 492/469, loss: 0.0030396936926990747 2023-01-22 13:52:39.896640: step: 494/469, loss: 0.015187445096671581 2023-01-22 13:52:40.575460: step: 496/469, loss: 0.01837237924337387 2023-01-22 13:52:41.246040: step: 498/469, loss: 0.0049912696704268456 2023-01-22 13:52:41.894565: step: 500/469, loss: 0.009158773347735405 2023-01-22 13:52:42.599970: step: 502/469, loss: 0.043934985995292664 2023-01-22 13:52:43.221323: step: 504/469, loss: 0.062133997678756714 2023-01-22 13:52:43.899529: step: 506/469, loss: 0.007055279333144426 2023-01-22 13:52:44.577063: step: 508/469, loss: 0.01807880960404873 2023-01-22 13:52:45.160689: step: 510/469, loss: 0.008635953068733215 2023-01-22 13:52:45.755058: step: 512/469, loss: 0.00672085490077734 2023-01-22 13:52:46.341123: step: 514/469, loss: 0.007145282346755266 2023-01-22 13:52:46.947461: step: 516/469, loss: 0.010456304997205734 2023-01-22 13:52:47.543162: step: 518/469, loss: 0.004864528309553862 2023-01-22 13:52:48.144863: step: 520/469, loss: 0.6717926263809204 2023-01-22 13:52:48.750550: step: 522/469, loss: 0.02913685329258442 2023-01-22 13:52:49.278610: step: 524/469, loss: 0.0020940680988132954 2023-01-22 13:52:49.938987: step: 526/469, loss: 0.004378907382488251 2023-01-22 13:52:50.577943: step: 528/469, loss: 0.1069122850894928 2023-01-22 13:52:51.203442: step: 530/469, loss: 0.07529553771018982 2023-01-22 13:52:51.835731: step: 532/469, loss: 0.015649432316422462 2023-01-22 13:52:52.581352: step: 534/469, loss: 0.00882435217499733 2023-01-22 13:52:53.168838: step: 536/469, loss: 0.030467107892036438 2023-01-22 13:52:53.833437: step: 538/469, loss: 0.013383789919316769 2023-01-22 13:52:54.553060: step: 540/469, loss: 0.2872244417667389 2023-01-22 13:52:55.215105: step: 542/469, loss: 0.0003488498041406274 2023-01-22 13:52:55.785702: step: 544/469, loss: 0.047618526965379715 2023-01-22 13:52:56.420571: step: 546/469, loss: 0.05529698356986046 2023-01-22 13:52:57.101204: step: 548/469, loss: 0.01042431965470314 2023-01-22 13:52:57.748650: step: 550/469, loss: 0.001183462212793529 2023-01-22 13:52:58.372059: step: 552/469, loss: 0.0013073893496766686 2023-01-22 13:52:58.949134: step: 554/469, loss: 0.0003614505403675139 2023-01-22 13:52:59.716273: step: 556/469, loss: 0.01786225475370884 2023-01-22 13:53:00.326768: step: 558/469, loss: 0.06564614921808243 2023-01-22 13:53:00.952429: step: 560/469, loss: 0.0533469133079052 2023-01-22 13:53:01.583914: step: 562/469, loss: 0.009879239834845066 2023-01-22 13:53:02.221022: step: 564/469, loss: 0.033112458884716034 2023-01-22 13:53:02.808747: step: 566/469, loss: 0.024811945855617523 2023-01-22 13:53:03.411963: step: 568/469, loss: 0.009929421357810497 2023-01-22 13:53:04.106494: step: 570/469, loss: 1.0463825464248657 2023-01-22 13:53:04.738650: step: 572/469, loss: 0.045280393213033676 2023-01-22 13:53:05.329460: step: 574/469, loss: 0.08431726694107056 2023-01-22 13:53:05.979908: step: 576/469, loss: 0.014226955361664295 2023-01-22 13:53:06.546439: step: 578/469, loss: 0.00147697061765939 2023-01-22 13:53:07.165454: step: 580/469, loss: 0.0017435989575460553 2023-01-22 13:53:07.865579: step: 582/469, loss: 0.015671757981181145 2023-01-22 13:53:08.467797: step: 584/469, loss: 0.05729326233267784 2023-01-22 13:53:09.035329: step: 586/469, loss: 0.005357309710234404 2023-01-22 13:53:09.724120: step: 588/469, loss: 0.08900397270917892 2023-01-22 13:53:10.325603: step: 590/469, loss: 0.007743714842945337 2023-01-22 13:53:10.952344: step: 592/469, loss: 0.0074604833498597145 2023-01-22 13:53:11.588159: step: 594/469, loss: 0.02177645079791546 2023-01-22 13:53:12.224238: step: 596/469, loss: 0.04581688717007637 2023-01-22 13:53:12.868936: step: 598/469, loss: 0.020270923152565956 2023-01-22 13:53:13.561681: step: 600/469, loss: 0.026205964386463165 2023-01-22 13:53:14.208513: step: 602/469, loss: 0.09614122658967972 2023-01-22 13:53:14.772690: step: 604/469, loss: 0.006960897706449032 2023-01-22 13:53:15.414007: step: 606/469, loss: 0.002453114604577422 2023-01-22 13:53:16.038465: step: 608/469, loss: 0.0012141662882640958 2023-01-22 13:53:16.619074: step: 610/469, loss: 0.9516305923461914 2023-01-22 13:53:17.286387: step: 612/469, loss: 0.0357881635427475 2023-01-22 13:53:17.851112: step: 614/469, loss: 0.014198594726622105 2023-01-22 13:53:18.508811: step: 616/469, loss: 0.21801839768886566 2023-01-22 13:53:19.152201: step: 618/469, loss: 0.004060262348502874 2023-01-22 13:53:19.887466: step: 620/469, loss: 0.045317068696022034 2023-01-22 13:53:20.413838: step: 622/469, loss: 0.22143952548503876 2023-01-22 13:53:21.084704: step: 624/469, loss: 0.01167630311101675 2023-01-22 13:53:21.720034: step: 626/469, loss: 0.05861573666334152 2023-01-22 13:53:22.355261: step: 628/469, loss: 0.016020627692341805 2023-01-22 13:53:22.956578: step: 630/469, loss: 0.02167559415102005 2023-01-22 13:53:23.503429: step: 632/469, loss: 0.001943980692885816 2023-01-22 13:53:24.126311: step: 634/469, loss: 0.006525107193738222 2023-01-22 13:53:24.705528: step: 636/469, loss: 0.029377026483416557 2023-01-22 13:53:25.317593: step: 638/469, loss: 0.024579958990216255 2023-01-22 13:53:25.908854: step: 640/469, loss: 0.09940575808286667 2023-01-22 13:53:26.533764: step: 642/469, loss: 0.015262838453054428 2023-01-22 13:53:27.189333: step: 644/469, loss: 0.020658176392316818 2023-01-22 13:53:27.836242: step: 646/469, loss: 0.12778401374816895 2023-01-22 13:53:28.397929: step: 648/469, loss: 0.007055656518787146 2023-01-22 13:53:28.981557: step: 650/469, loss: 0.03938606008887291 2023-01-22 13:53:29.547789: step: 652/469, loss: 0.00116272596642375 2023-01-22 13:53:30.117152: step: 654/469, loss: 0.03565958887338638 2023-01-22 13:53:30.682081: step: 656/469, loss: 0.02677679993212223 2023-01-22 13:53:31.218590: step: 658/469, loss: 0.00022557307966053486 2023-01-22 13:53:31.844568: step: 660/469, loss: 0.04465703293681145 2023-01-22 13:53:32.518167: step: 662/469, loss: 1.6279860734939575 2023-01-22 13:53:33.151765: step: 664/469, loss: 0.009028756059706211 2023-01-22 13:53:33.719863: step: 666/469, loss: 0.020242320373654366 2023-01-22 13:53:34.331515: step: 668/469, loss: 0.005034836940467358 2023-01-22 13:53:34.896092: step: 670/469, loss: 0.012029912322759628 2023-01-22 13:53:35.468024: step: 672/469, loss: 0.03289627283811569 2023-01-22 13:53:36.092823: step: 674/469, loss: 0.013846637681126595 2023-01-22 13:53:36.704079: step: 676/469, loss: 0.02020134963095188 2023-01-22 13:53:37.365446: step: 678/469, loss: 0.05965556204319 2023-01-22 13:53:37.970056: step: 680/469, loss: 0.025320544838905334 2023-01-22 13:53:38.627641: step: 682/469, loss: 0.03602776303887367 2023-01-22 13:53:39.244352: step: 684/469, loss: 0.008313469588756561 2023-01-22 13:53:39.863042: step: 686/469, loss: 0.01549284253269434 2023-01-22 13:53:40.529707: step: 688/469, loss: 0.04114497825503349 2023-01-22 13:53:41.234267: step: 690/469, loss: 0.03416604921221733 2023-01-22 13:53:41.995279: step: 692/469, loss: 0.04226415976881981 2023-01-22 13:53:42.706057: step: 694/469, loss: 0.06959113478660583 2023-01-22 13:53:43.313898: step: 696/469, loss: 0.0008838659268803895 2023-01-22 13:53:44.021291: step: 698/469, loss: 0.0439516082406044 2023-01-22 13:53:44.589261: step: 700/469, loss: 0.0051228865049779415 2023-01-22 13:53:45.140837: step: 702/469, loss: 0.022063951939344406 2023-01-22 13:53:45.748050: step: 704/469, loss: 0.006934995297342539 2023-01-22 13:53:46.430717: step: 706/469, loss: 0.05567527562379837 2023-01-22 13:53:47.022131: step: 708/469, loss: 0.014823506586253643 2023-01-22 13:53:47.670233: step: 710/469, loss: 0.010614674538373947 2023-01-22 13:53:48.240094: step: 712/469, loss: 0.030289940536022186 2023-01-22 13:53:48.859689: step: 714/469, loss: 0.00467514805495739 2023-01-22 13:53:49.452961: step: 716/469, loss: 0.009180083870887756 2023-01-22 13:53:50.121291: step: 718/469, loss: 0.05070386454463005 2023-01-22 13:53:50.780353: step: 720/469, loss: 0.02567620389163494 2023-01-22 13:53:51.390380: step: 722/469, loss: 0.032591812312603 2023-01-22 13:53:52.039930: step: 724/469, loss: 0.015123724937438965 2023-01-22 13:53:52.679372: step: 726/469, loss: 0.0711151584982872 2023-01-22 13:53:53.361702: step: 728/469, loss: 0.02317347377538681 2023-01-22 13:53:54.015445: step: 730/469, loss: 0.019069934263825417 2023-01-22 13:53:54.658988: step: 732/469, loss: 0.014179736375808716 2023-01-22 13:53:55.250723: step: 734/469, loss: 0.04434696584939957 2023-01-22 13:53:55.853700: step: 736/469, loss: 0.013942303135991096 2023-01-22 13:53:56.464328: step: 738/469, loss: 0.12939247488975525 2023-01-22 13:53:57.104978: step: 740/469, loss: 0.15470831096172333 2023-01-22 13:53:57.739278: step: 742/469, loss: 0.044453032314777374 2023-01-22 13:53:58.403892: step: 744/469, loss: 0.02451922744512558 2023-01-22 13:53:59.079660: step: 746/469, loss: 0.30026310682296753 2023-01-22 13:53:59.765565: step: 748/469, loss: 0.04077666997909546 2023-01-22 13:54:00.450354: step: 750/469, loss: 0.1663493514060974 2023-01-22 13:54:01.030744: step: 752/469, loss: 0.002196589717641473 2023-01-22 13:54:01.676386: step: 754/469, loss: 0.04906152933835983 2023-01-22 13:54:02.343098: step: 756/469, loss: 0.0015373738715425134 2023-01-22 13:54:02.965966: step: 758/469, loss: 0.005340177100151777 2023-01-22 13:54:03.603413: step: 760/469, loss: 0.005104701966047287 2023-01-22 13:54:04.252675: step: 762/469, loss: 0.015469688922166824 2023-01-22 13:54:04.852821: step: 764/469, loss: 0.007918402552604675 2023-01-22 13:54:05.452808: step: 766/469, loss: 0.024146132171154022 2023-01-22 13:54:06.058759: step: 768/469, loss: 0.05916436389088631 2023-01-22 13:54:06.711629: step: 770/469, loss: 0.006385270971804857 2023-01-22 13:54:07.321570: step: 772/469, loss: 0.06721234321594238 2023-01-22 13:54:08.028735: step: 774/469, loss: 0.050477322190999985 2023-01-22 13:54:08.710672: step: 776/469, loss: 0.008046678267419338 2023-01-22 13:54:09.296255: step: 778/469, loss: 0.09385431557893753 2023-01-22 13:54:09.923839: step: 780/469, loss: 0.03446837142109871 2023-01-22 13:54:10.546997: step: 782/469, loss: 0.02484111115336418 2023-01-22 13:54:11.170161: step: 784/469, loss: 0.05750124529004097 2023-01-22 13:54:11.804342: step: 786/469, loss: 0.002741179894655943 2023-01-22 13:54:12.469299: step: 788/469, loss: 0.015425169840455055 2023-01-22 13:54:13.166874: step: 790/469, loss: 0.04131796956062317 2023-01-22 13:54:13.826926: step: 792/469, loss: 0.006841577589511871 2023-01-22 13:54:14.537292: step: 794/469, loss: 0.003700926434248686 2023-01-22 13:54:15.113565: step: 796/469, loss: 0.00742322439327836 2023-01-22 13:54:15.714598: step: 798/469, loss: 0.061236098408699036 2023-01-22 13:54:16.267137: step: 800/469, loss: 0.0407230518758297 2023-01-22 13:54:16.951383: step: 802/469, loss: 0.003105613635852933 2023-01-22 13:54:17.564972: step: 804/469, loss: 0.02933841198682785 2023-01-22 13:54:18.187119: step: 806/469, loss: 0.02100670151412487 2023-01-22 13:54:18.893583: step: 808/469, loss: 0.01120324619114399 2023-01-22 13:54:19.465039: step: 810/469, loss: 0.0010781317250803113 2023-01-22 13:54:20.129394: step: 812/469, loss: 0.03038143739104271 2023-01-22 13:54:20.725687: step: 814/469, loss: 0.012821342796087265 2023-01-22 13:54:21.453514: step: 816/469, loss: 0.023509899154305458 2023-01-22 13:54:22.060084: step: 818/469, loss: 0.02559499256312847 2023-01-22 13:54:22.690027: step: 820/469, loss: 0.041714951395988464 2023-01-22 13:54:23.270745: step: 822/469, loss: 0.0006077050347812474 2023-01-22 13:54:23.891724: step: 824/469, loss: 0.03724521026015282 2023-01-22 13:54:24.581875: step: 826/469, loss: 0.005431410390883684 2023-01-22 13:54:25.189406: step: 828/469, loss: 0.02351495437324047 2023-01-22 13:54:25.917377: step: 830/469, loss: 0.009260783903300762 2023-01-22 13:54:26.537497: step: 832/469, loss: 0.0022510855924338102 2023-01-22 13:54:27.187098: step: 834/469, loss: 0.04596267640590668 2023-01-22 13:54:27.855945: step: 836/469, loss: 0.03593933954834938 2023-01-22 13:54:28.475929: step: 838/469, loss: 0.042275525629520416 2023-01-22 13:54:29.127221: step: 840/469, loss: 0.0019201339455321431 2023-01-22 13:54:29.748724: step: 842/469, loss: 0.03970102220773697 2023-01-22 13:54:30.454462: step: 844/469, loss: 0.01932855136692524 2023-01-22 13:54:31.106415: step: 846/469, loss: 0.022237861528992653 2023-01-22 13:54:31.753078: step: 848/469, loss: 0.05673278123140335 2023-01-22 13:54:32.347275: step: 850/469, loss: 0.0016504751984030008 2023-01-22 13:54:33.013637: step: 852/469, loss: 0.028248030692338943 2023-01-22 13:54:33.627890: step: 854/469, loss: 0.13463911414146423 2023-01-22 13:54:34.350074: step: 856/469, loss: 0.0360083132982254 2023-01-22 13:54:34.965765: step: 858/469, loss: 0.004419809672981501 2023-01-22 13:54:35.673907: step: 860/469, loss: 0.10130942612886429 2023-01-22 13:54:36.352788: step: 862/469, loss: 0.006401387508958578 2023-01-22 13:54:36.923415: step: 864/469, loss: 0.023610521107912064 2023-01-22 13:54:37.526187: step: 866/469, loss: 0.01691424660384655 2023-01-22 13:54:38.106397: step: 868/469, loss: 0.012129467912018299 2023-01-22 13:54:38.759295: step: 870/469, loss: 0.026763200759887695 2023-01-22 13:54:39.388259: step: 872/469, loss: 0.005129052326083183 2023-01-22 13:54:40.004045: step: 874/469, loss: 0.17912110686302185 2023-01-22 13:54:40.611859: step: 876/469, loss: 0.035643480718135834 2023-01-22 13:54:41.221409: step: 878/469, loss: 0.035767074674367905 2023-01-22 13:54:41.888172: step: 880/469, loss: 0.0043825372122228146 2023-01-22 13:54:42.495012: step: 882/469, loss: 0.011669031344354153 2023-01-22 13:54:43.143520: step: 884/469, loss: 0.055026717483997345 2023-01-22 13:54:43.789016: step: 886/469, loss: 0.0007716281106695533 2023-01-22 13:54:44.467816: step: 888/469, loss: 0.04984692856669426 2023-01-22 13:54:45.090598: step: 890/469, loss: 0.008310409262776375 2023-01-22 13:54:45.672101: step: 892/469, loss: 0.02318243868649006 2023-01-22 13:54:46.287188: step: 894/469, loss: 0.0009560533799231052 2023-01-22 13:54:47.020035: step: 896/469, loss: 0.031211044639348984 2023-01-22 13:54:47.623350: step: 898/469, loss: 0.038294319063425064 2023-01-22 13:54:48.256528: step: 900/469, loss: 0.04682275652885437 2023-01-22 13:54:48.877463: step: 902/469, loss: 0.006399987731128931 2023-01-22 13:54:49.483306: step: 904/469, loss: 0.004942836239933968 2023-01-22 13:54:50.068160: step: 906/469, loss: 0.0034050687681883574 2023-01-22 13:54:50.645346: step: 908/469, loss: 0.011395362205803394 2023-01-22 13:54:51.275416: step: 910/469, loss: 0.10199712961912155 2023-01-22 13:54:51.907328: step: 912/469, loss: 0.015820639207959175 2023-01-22 13:54:52.540789: step: 914/469, loss: 0.016284726560115814 2023-01-22 13:54:53.229291: step: 916/469, loss: 0.0012991942930966616 2023-01-22 13:54:53.892552: step: 918/469, loss: 0.0005135639803484082 2023-01-22 13:54:54.473606: step: 920/469, loss: 0.0022157044149935246 2023-01-22 13:54:55.129853: step: 922/469, loss: 0.0037811293732374907 2023-01-22 13:54:55.720490: step: 924/469, loss: 0.017572833225131035 2023-01-22 13:54:56.358864: step: 926/469, loss: 0.013367361389100552 2023-01-22 13:54:57.017871: step: 928/469, loss: 0.031215744093060493 2023-01-22 13:54:57.620440: step: 930/469, loss: 0.010102222673594952 2023-01-22 13:54:58.257068: step: 932/469, loss: 0.03398963436484337 2023-01-22 13:54:58.855541: step: 934/469, loss: 0.03642801567912102 2023-01-22 13:54:59.457526: step: 936/469, loss: 0.05083474516868591 2023-01-22 13:55:00.119581: step: 938/469, loss: 0.010094666853547096 ================================================== Loss: 0.054 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3030324377457405, 'r': 0.31338269178639194, 'f1': 0.30812066897654583}, 'combined': 0.2270362824037706, 'epoch': 28} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.31687428371783966, 'r': 0.26642952125955593, 'f1': 0.28947064287941815}, 'combined': 0.15789307793422808, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3058907121407122, 'r': 0.3221429700912623, 'f1': 0.3138065531203239}, 'combined': 0.23122588124655444, 'epoch': 28} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3257576629300902, 'r': 0.2747928318586854, 'f1': 0.2981127198228716}, 'combined': 0.16260693808520266, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2974684009406232, 'r': 0.3200466476913346, 'f1': 0.308344759293114}, 'combined': 0.22720140158439978, 'epoch': 28} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3158386869012299, 'r': 0.2719160149808393, 'f1': 0.29223618915836513}, 'combined': 0.1594015577227446, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21637426900584794, 'r': 0.35238095238095235, 'f1': 0.2681159420289855}, 'combined': 0.17874396135265697, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.275, 'r': 0.358695652173913, 'f1': 0.3113207547169812}, 'combined': 0.1556603773584906, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4117647058823529, 'r': 0.2413793103448276, 'f1': 0.3043478260869565}, 'combined': 0.20289855072463764, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 29 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:57:47.602742: step: 2/469, loss: 0.011397200636565685 2023-01-22 13:57:48.209392: step: 4/469, loss: 0.09441093355417252 2023-01-22 13:57:48.833454: step: 6/469, loss: 0.009555439464747906 2023-01-22 13:57:49.405036: step: 8/469, loss: 0.035223279148340225 2023-01-22 13:57:50.019864: step: 10/469, loss: 0.0031088590621948242 2023-01-22 13:57:50.629003: step: 12/469, loss: 0.0036763264797627926 2023-01-22 13:57:51.231694: step: 14/469, loss: 0.010613823309540749 2023-01-22 13:57:51.931043: step: 16/469, loss: 0.003453663783147931 2023-01-22 13:57:52.565405: step: 18/469, loss: 0.004090846516191959 2023-01-22 13:57:53.190168: step: 20/469, loss: 0.016755182296037674 2023-01-22 13:57:53.816550: step: 22/469, loss: 0.01178661361336708 2023-01-22 13:57:54.410656: step: 24/469, loss: 0.0036587894428521395 2023-01-22 13:57:55.123751: step: 26/469, loss: 0.034375809133052826 2023-01-22 13:57:55.810802: step: 28/469, loss: 0.09999600052833557 2023-01-22 13:57:56.496949: step: 30/469, loss: 0.004642448853701353 2023-01-22 13:57:57.123078: step: 32/469, loss: 0.04244396463036537 2023-01-22 13:57:57.774766: step: 34/469, loss: 0.003256869036704302 2023-01-22 13:57:58.374930: step: 36/469, loss: 0.004867095500230789 2023-01-22 13:57:58.967233: step: 38/469, loss: 0.05503746494650841 2023-01-22 13:57:59.601610: step: 40/469, loss: 0.0006913591641932726 2023-01-22 13:58:00.275439: step: 42/469, loss: 0.0014958296669647098 2023-01-22 13:58:00.894198: step: 44/469, loss: 0.0031275958754122257 2023-01-22 13:58:01.545360: step: 46/469, loss: 0.024859173223376274 2023-01-22 13:58:02.170318: step: 48/469, loss: 0.08497133105993271 2023-01-22 13:58:02.740870: step: 50/469, loss: 0.03626275435090065 2023-01-22 13:58:03.303868: step: 52/469, loss: 0.032655492424964905 2023-01-22 13:58:04.014503: step: 54/469, loss: 0.009692434221506119 2023-01-22 13:58:04.700426: step: 56/469, loss: 0.005447814706712961 2023-01-22 13:58:05.322265: step: 58/469, loss: 0.0020807625260204077 2023-01-22 13:58:06.005488: step: 60/469, loss: 0.029967639595270157 2023-01-22 13:58:06.626344: step: 62/469, loss: 0.004841005429625511 2023-01-22 13:58:07.290112: step: 64/469, loss: 0.013195697218179703 2023-01-22 13:58:07.850397: step: 66/469, loss: 0.0005663955816999078 2023-01-22 13:58:08.459319: step: 68/469, loss: 0.01343944575637579 2023-01-22 13:58:09.085889: step: 70/469, loss: 0.08714127540588379 2023-01-22 13:58:09.628106: step: 72/469, loss: 0.002277157036587596 2023-01-22 13:58:10.196344: step: 74/469, loss: 0.022743044421076775 2023-01-22 13:58:10.875570: step: 76/469, loss: 0.007360660005360842 2023-01-22 13:58:11.481535: step: 78/469, loss: 0.011254142969846725 2023-01-22 13:58:12.088485: step: 80/469, loss: 0.028145454823970795 2023-01-22 13:58:12.711919: step: 82/469, loss: 0.016802730038762093 2023-01-22 13:58:13.410390: step: 84/469, loss: 0.04198892042040825 2023-01-22 13:58:14.134379: step: 86/469, loss: 0.1045631393790245 2023-01-22 13:58:14.766251: step: 88/469, loss: 0.01818367838859558 2023-01-22 13:58:15.360816: step: 90/469, loss: 0.009146373718976974 2023-01-22 13:58:16.047241: step: 92/469, loss: 0.040817372500896454 2023-01-22 13:58:16.629904: step: 94/469, loss: 0.04876149818301201 2023-01-22 13:58:17.226446: step: 96/469, loss: 0.06437990069389343 2023-01-22 13:58:17.790347: step: 98/469, loss: 0.014572279527783394 2023-01-22 13:58:18.423109: step: 100/469, loss: 0.01077072974294424 2023-01-22 13:58:19.042045: step: 102/469, loss: 0.236786887049675 2023-01-22 13:58:19.636271: step: 104/469, loss: 0.006526426877826452 2023-01-22 13:58:20.226232: step: 106/469, loss: 0.01405851449817419 2023-01-22 13:58:20.888237: step: 108/469, loss: 0.010559209622442722 2023-01-22 13:58:21.501149: step: 110/469, loss: 0.002049786504358053 2023-01-22 13:58:22.149242: step: 112/469, loss: 0.011228427290916443 2023-01-22 13:58:22.755461: step: 114/469, loss: 0.05261453241109848 2023-01-22 13:58:23.332070: step: 116/469, loss: 0.0029814711306244135 2023-01-22 13:58:23.972253: step: 118/469, loss: 0.04157707095146179 2023-01-22 13:58:24.618976: step: 120/469, loss: 0.005800592713057995 2023-01-22 13:58:25.164373: step: 122/469, loss: 0.0075937239453196526 2023-01-22 13:58:25.871589: step: 124/469, loss: 0.003897598246112466 2023-01-22 13:58:26.538754: step: 126/469, loss: 0.001507570850662887 2023-01-22 13:58:27.284716: step: 128/469, loss: 0.004470352549105883 2023-01-22 13:58:27.940151: step: 130/469, loss: 0.020746994763612747 2023-01-22 13:58:28.564578: step: 132/469, loss: 0.06914812326431274 2023-01-22 13:58:29.174765: step: 134/469, loss: 0.05455198138952255 2023-01-22 13:58:29.854388: step: 136/469, loss: 0.002455746755003929 2023-01-22 13:58:30.503577: step: 138/469, loss: 0.050567299127578735 2023-01-22 13:58:31.090802: step: 140/469, loss: 0.0071621243841946125 2023-01-22 13:58:31.755414: step: 142/469, loss: 0.029519788920879364 2023-01-22 13:58:32.406377: step: 144/469, loss: 0.046111345291137695 2023-01-22 13:58:33.088246: step: 146/469, loss: 0.03253140673041344 2023-01-22 13:58:33.692916: step: 148/469, loss: 0.01767793670296669 2023-01-22 13:58:34.379818: step: 150/469, loss: 0.0021391334012150764 2023-01-22 13:58:35.036556: step: 152/469, loss: 0.005098097492009401 2023-01-22 13:58:35.613185: step: 154/469, loss: 0.033101294189691544 2023-01-22 13:58:36.198455: step: 156/469, loss: 0.05333928018808365 2023-01-22 13:58:36.743699: step: 158/469, loss: 0.02332524210214615 2023-01-22 13:58:37.400012: step: 160/469, loss: 0.006876158528029919 2023-01-22 13:58:37.987100: step: 162/469, loss: 0.012489285320043564 2023-01-22 13:58:38.635700: step: 164/469, loss: 0.02170495130121708 2023-01-22 13:58:39.215480: step: 166/469, loss: 0.0003414927050471306 2023-01-22 13:58:39.854159: step: 168/469, loss: 2.6231969968648627e-05 2023-01-22 13:58:40.388720: step: 170/469, loss: 0.08443411439657211 2023-01-22 13:58:40.990854: step: 172/469, loss: 0.1412838250398636 2023-01-22 13:58:41.530059: step: 174/469, loss: 0.0012679558712989092 2023-01-22 13:58:42.100772: step: 176/469, loss: 0.00448417104780674 2023-01-22 13:58:42.658648: step: 178/469, loss: 0.0009893211536109447 2023-01-22 13:58:43.299966: step: 180/469, loss: 0.028251152485609055 2023-01-22 13:58:43.923760: step: 182/469, loss: 0.0014473890187218785 2023-01-22 13:58:44.560500: step: 184/469, loss: 0.03571750223636627 2023-01-22 13:58:45.262221: step: 186/469, loss: 0.012102787382900715 2023-01-22 13:58:45.860300: step: 188/469, loss: 0.21775077283382416 2023-01-22 13:58:46.519749: step: 190/469, loss: 0.07623061537742615 2023-01-22 13:58:47.224062: step: 192/469, loss: 0.5434526205062866 2023-01-22 13:58:47.855722: step: 194/469, loss: 0.021269680932164192 2023-01-22 13:58:48.512995: step: 196/469, loss: 0.2258983701467514 2023-01-22 13:58:49.132090: step: 198/469, loss: 0.017458146438002586 2023-01-22 13:58:49.771984: step: 200/469, loss: 0.0072042066603899 2023-01-22 13:58:50.356363: step: 202/469, loss: 0.000873247510753572 2023-01-22 13:58:50.926352: step: 204/469, loss: 0.00014553866640198976 2023-01-22 13:58:51.573548: step: 206/469, loss: 0.01660745218396187 2023-01-22 13:58:52.178437: step: 208/469, loss: 0.015552261844277382 2023-01-22 13:58:52.787251: step: 210/469, loss: 0.004793266765773296 2023-01-22 13:58:53.424151: step: 212/469, loss: 0.0006335374200716615 2023-01-22 13:58:54.092148: step: 214/469, loss: 0.027401519939303398 2023-01-22 13:58:54.717175: step: 216/469, loss: 0.043808966875076294 2023-01-22 13:58:55.364159: step: 218/469, loss: 2.8400772862369195e-05 2023-01-22 13:58:55.979158: step: 220/469, loss: 0.023992571979761124 2023-01-22 13:58:56.612302: step: 222/469, loss: 0.002939148573204875 2023-01-22 13:58:57.338483: step: 224/469, loss: 0.00699267303571105 2023-01-22 13:58:57.925611: step: 226/469, loss: 0.02978898026049137 2023-01-22 13:58:58.545603: step: 228/469, loss: 0.013289243914186954 2023-01-22 13:58:59.175908: step: 230/469, loss: 0.02610580436885357 2023-01-22 13:58:59.813923: step: 232/469, loss: 0.007343559060245752 2023-01-22 13:59:00.479273: step: 234/469, loss: 0.001022710232064128 2023-01-22 13:59:01.122198: step: 236/469, loss: 0.01243559829890728 2023-01-22 13:59:01.776074: step: 238/469, loss: 0.06458567827939987 2023-01-22 13:59:02.406307: step: 240/469, loss: 0.045231886208057404 2023-01-22 13:59:03.003905: step: 242/469, loss: 0.032268546521663666 2023-01-22 13:59:03.571570: step: 244/469, loss: 0.011744252406060696 2023-01-22 13:59:04.249269: step: 246/469, loss: 0.00015904102474451065 2023-01-22 13:59:04.906381: step: 248/469, loss: 0.16161437332630157 2023-01-22 13:59:05.555839: step: 250/469, loss: 0.20700424909591675 2023-01-22 13:59:06.186141: step: 252/469, loss: 0.014303024858236313 2023-01-22 13:59:06.836028: step: 254/469, loss: 0.012318290770053864 2023-01-22 13:59:07.403258: step: 256/469, loss: 0.002054626354947686 2023-01-22 13:59:08.006715: step: 258/469, loss: 0.20263001322746277 2023-01-22 13:59:08.625474: step: 260/469, loss: 0.0022341187577694654 2023-01-22 13:59:09.238498: step: 262/469, loss: 0.014170438051223755 2023-01-22 13:59:09.839769: step: 264/469, loss: 0.018478937447071075 2023-01-22 13:59:10.418211: step: 266/469, loss: 0.022423433139920235 2023-01-22 13:59:11.035050: step: 268/469, loss: 0.0031005688942968845 2023-01-22 13:59:11.615732: step: 270/469, loss: 0.004753394518047571 2023-01-22 13:59:12.300113: step: 272/469, loss: 0.005259358789771795 2023-01-22 13:59:12.944816: step: 274/469, loss: 0.15629994869232178 2023-01-22 13:59:13.573802: step: 276/469, loss: 0.07923565059900284 2023-01-22 13:59:14.215570: step: 278/469, loss: 0.008050136268138885 2023-01-22 13:59:14.855075: step: 280/469, loss: 0.0004991943133063614 2023-01-22 13:59:15.521652: step: 282/469, loss: 0.22852376103401184 2023-01-22 13:59:16.128717: step: 284/469, loss: 0.016664106398820877 2023-01-22 13:59:16.761401: step: 286/469, loss: 0.02770352177321911 2023-01-22 13:59:17.417788: step: 288/469, loss: 0.04874853044748306 2023-01-22 13:59:18.046085: step: 290/469, loss: 0.01245155930519104 2023-01-22 13:59:18.699683: step: 292/469, loss: 0.024859843775629997 2023-01-22 13:59:19.356221: step: 294/469, loss: 0.00643170066177845 2023-01-22 13:59:20.041821: step: 296/469, loss: 0.08221544325351715 2023-01-22 13:59:20.578634: step: 298/469, loss: 0.00019639221136458218 2023-01-22 13:59:21.205354: step: 300/469, loss: 0.0065299165435135365 2023-01-22 13:59:21.790416: step: 302/469, loss: 0.002853293204680085 2023-01-22 13:59:22.394113: step: 304/469, loss: 0.014048060402274132 2023-01-22 13:59:22.942345: step: 306/469, loss: 0.0005820858059450984 2023-01-22 13:59:23.557644: step: 308/469, loss: 0.002236904576420784 2023-01-22 13:59:24.185546: step: 310/469, loss: 0.009890853427350521 2023-01-22 13:59:24.837512: step: 312/469, loss: 0.03145544230937958 2023-01-22 13:59:25.586041: step: 314/469, loss: 0.010448926128447056 2023-01-22 13:59:26.258495: step: 316/469, loss: 0.001130950404331088 2023-01-22 13:59:26.924248: step: 318/469, loss: 0.013824332505464554 2023-01-22 13:59:27.550870: step: 320/469, loss: 0.004797209519892931 2023-01-22 13:59:28.154818: step: 322/469, loss: 0.05765450373291969 2023-01-22 13:59:28.746537: step: 324/469, loss: 0.0034388245549052954 2023-01-22 13:59:29.299202: step: 326/469, loss: 0.000766948563978076 2023-01-22 13:59:29.903173: step: 328/469, loss: 0.022361118346452713 2023-01-22 13:59:30.516980: step: 330/469, loss: 0.005188967566937208 2023-01-22 13:59:31.132743: step: 332/469, loss: 0.00784236192703247 2023-01-22 13:59:31.680289: step: 334/469, loss: 0.012940814718604088 2023-01-22 13:59:32.326783: step: 336/469, loss: 0.0010617084335535765 2023-01-22 13:59:32.946379: step: 338/469, loss: 0.040706247091293335 2023-01-22 13:59:33.610866: step: 340/469, loss: 0.02976055070757866 2023-01-22 13:59:34.273433: step: 342/469, loss: 0.06347508728504181 2023-01-22 13:59:34.886862: step: 344/469, loss: 0.017345333471894264 2023-01-22 13:59:35.491478: step: 346/469, loss: 0.003586218459531665 2023-01-22 13:59:36.137092: step: 348/469, loss: 0.13524411618709564 2023-01-22 13:59:36.800445: step: 350/469, loss: 0.004465170670300722 2023-01-22 13:59:37.492444: step: 352/469, loss: 0.0479794479906559 2023-01-22 13:59:38.080835: step: 354/469, loss: 0.0004911953583359718 2023-01-22 13:59:38.684040: step: 356/469, loss: 0.01962626911699772 2023-01-22 13:59:39.302016: step: 358/469, loss: 0.01667158678174019 2023-01-22 13:59:39.940437: step: 360/469, loss: 0.005676074884831905 2023-01-22 13:59:40.593593: step: 362/469, loss: 0.05340810865163803 2023-01-22 13:59:41.227618: step: 364/469, loss: 0.017531901597976685 2023-01-22 13:59:41.786821: step: 366/469, loss: 0.07758454233407974 2023-01-22 13:59:42.506515: step: 368/469, loss: 0.4954358637332916 2023-01-22 13:59:43.173175: step: 370/469, loss: 0.004969179630279541 2023-01-22 13:59:43.822730: step: 372/469, loss: 0.004571582190692425 2023-01-22 13:59:44.462532: step: 374/469, loss: 0.024722153320908546 2023-01-22 13:59:45.053673: step: 376/469, loss: 0.02770611271262169 2023-01-22 13:59:45.673461: step: 378/469, loss: 0.0033734701573848724 2023-01-22 13:59:46.315103: step: 380/469, loss: 0.025060933083295822 2023-01-22 13:59:46.936538: step: 382/469, loss: 0.0038945425767451525 2023-01-22 13:59:47.569467: step: 384/469, loss: 0.05608239024877548 2023-01-22 13:59:48.149755: step: 386/469, loss: 0.20016659796237946 2023-01-22 13:59:48.795405: step: 388/469, loss: 0.0029896865598857403 2023-01-22 13:59:49.518766: step: 390/469, loss: 0.019879590719938278 2023-01-22 13:59:50.150495: step: 392/469, loss: 0.041897982358932495 2023-01-22 13:59:50.754949: step: 394/469, loss: 0.014243301004171371 2023-01-22 13:59:51.441012: step: 396/469, loss: 0.014828565530478954 2023-01-22 13:59:52.061567: step: 398/469, loss: 0.016474423930048943 2023-01-22 13:59:52.662400: step: 400/469, loss: 0.006784925237298012 2023-01-22 13:59:53.282967: step: 402/469, loss: 0.08661936223506927 2023-01-22 13:59:53.947470: step: 404/469, loss: 0.0553220696747303 2023-01-22 13:59:54.515054: step: 406/469, loss: 0.03310636803507805 2023-01-22 13:59:55.111475: step: 408/469, loss: 0.008349345996975899 2023-01-22 13:59:55.660240: step: 410/469, loss: 0.017226219177246094 2023-01-22 13:59:56.318384: step: 412/469, loss: 0.03554794192314148 2023-01-22 13:59:56.945257: step: 414/469, loss: 0.04668092727661133 2023-01-22 13:59:57.533471: step: 416/469, loss: 0.0037377693224698305 2023-01-22 13:59:58.094565: step: 418/469, loss: 0.013941371813416481 2023-01-22 13:59:58.695334: step: 420/469, loss: 0.0028607724234461784 2023-01-22 13:59:59.384402: step: 422/469, loss: 0.04367578402161598 2023-01-22 14:00:00.002851: step: 424/469, loss: 0.08808927983045578 2023-01-22 14:00:00.654839: step: 426/469, loss: 0.1437286138534546 2023-01-22 14:00:01.299177: step: 428/469, loss: 0.01248926855623722 2023-01-22 14:00:01.908910: step: 430/469, loss: 0.02520914562046528 2023-01-22 14:00:02.483322: step: 432/469, loss: 0.009373209439218044 2023-01-22 14:00:03.076635: step: 434/469, loss: 0.037377070635557175 2023-01-22 14:00:03.700237: step: 436/469, loss: 0.0019492616411298513 2023-01-22 14:00:04.471709: step: 438/469, loss: 0.01320639718323946 2023-01-22 14:00:05.127298: step: 440/469, loss: 0.020629139617085457 2023-01-22 14:00:05.724428: step: 442/469, loss: 0.04263303801417351 2023-01-22 14:00:06.396233: step: 444/469, loss: 0.017304904758930206 2023-01-22 14:00:07.005946: step: 446/469, loss: 0.0016037187306210399 2023-01-22 14:00:07.636170: step: 448/469, loss: 0.0705556645989418 2023-01-22 14:00:08.249780: step: 450/469, loss: 1.679777979850769 2023-01-22 14:00:08.868919: step: 452/469, loss: 0.021190904080867767 2023-01-22 14:00:09.492193: step: 454/469, loss: 0.005444503854960203 2023-01-22 14:00:10.076640: step: 456/469, loss: 0.007108920253813267 2023-01-22 14:00:10.723089: step: 458/469, loss: 0.045692723244428635 2023-01-22 14:00:11.277898: step: 460/469, loss: 0.023028597235679626 2023-01-22 14:00:11.927424: step: 462/469, loss: 0.049323033541440964 2023-01-22 14:00:12.491623: step: 464/469, loss: 0.0716588944196701 2023-01-22 14:00:13.101513: step: 466/469, loss: 0.3417256474494934 2023-01-22 14:00:13.705960: step: 468/469, loss: 0.14080263674259186 2023-01-22 14:00:14.356781: step: 470/469, loss: 0.08692007511854172 2023-01-22 14:00:14.974211: step: 472/469, loss: 0.10216516256332397 2023-01-22 14:00:15.527382: step: 474/469, loss: 0.00489381980150938 2023-01-22 14:00:16.198639: step: 476/469, loss: 0.07432972639799118 2023-01-22 14:00:16.870037: step: 478/469, loss: 0.03337600454688072 2023-01-22 14:00:17.522625: step: 480/469, loss: 0.005669097416102886 2023-01-22 14:00:18.108805: step: 482/469, loss: 0.271613210439682 2023-01-22 14:00:18.712178: step: 484/469, loss: 0.015623623505234718 2023-01-22 14:00:19.333236: step: 486/469, loss: 0.01844528317451477 2023-01-22 14:00:19.925993: step: 488/469, loss: 0.006291560363024473 2023-01-22 14:00:20.484273: step: 490/469, loss: 0.01965421251952648 2023-01-22 14:00:21.140873: step: 492/469, loss: 0.005876512266695499 2023-01-22 14:00:21.778573: step: 494/469, loss: 0.0049910470843315125 2023-01-22 14:00:22.423894: step: 496/469, loss: 1.1827524900436401 2023-01-22 14:00:23.033569: step: 498/469, loss: 0.020476648584008217 2023-01-22 14:00:23.607536: step: 500/469, loss: 0.01108358334749937 2023-01-22 14:00:24.309417: step: 502/469, loss: 0.02751188538968563 2023-01-22 14:00:24.985906: step: 504/469, loss: 0.15873494744300842 2023-01-22 14:00:25.527340: step: 506/469, loss: 0.0060341982170939445 2023-01-22 14:00:26.183262: step: 508/469, loss: 0.027051256969571114 2023-01-22 14:00:26.713446: step: 510/469, loss: 0.012410561554133892 2023-01-22 14:00:27.366621: step: 512/469, loss: 0.4625912308692932 2023-01-22 14:00:28.058524: step: 514/469, loss: 0.036333683878183365 2023-01-22 14:00:28.697754: step: 516/469, loss: 0.009291158989071846 2023-01-22 14:00:29.346380: step: 518/469, loss: 0.013837597332894802 2023-01-22 14:00:29.922072: step: 520/469, loss: 0.02988453023135662 2023-01-22 14:00:30.536400: step: 522/469, loss: 0.022480791434645653 2023-01-22 14:00:31.123045: step: 524/469, loss: 0.03669856861233711 2023-01-22 14:00:31.742595: step: 526/469, loss: 0.011793060228228569 2023-01-22 14:00:32.392050: step: 528/469, loss: 0.008280466310679913 2023-01-22 14:00:33.024836: step: 530/469, loss: 0.00646325433626771 2023-01-22 14:00:33.611781: step: 532/469, loss: 0.05213792249560356 2023-01-22 14:00:34.206220: step: 534/469, loss: 0.0025119639467447996 2023-01-22 14:00:34.784136: step: 536/469, loss: 0.0056335278786718845 2023-01-22 14:00:35.346506: step: 538/469, loss: 0.0007014645962044597 2023-01-22 14:00:36.012222: step: 540/469, loss: 0.01949908398091793 2023-01-22 14:00:36.583536: step: 542/469, loss: 0.006616770289838314 2023-01-22 14:00:37.146736: step: 544/469, loss: 0.0036147721111774445 2023-01-22 14:00:37.745276: step: 546/469, loss: 2.7988462448120117 2023-01-22 14:00:38.342655: step: 548/469, loss: 0.2557152509689331 2023-01-22 14:00:39.009817: step: 550/469, loss: 0.07260492444038391 2023-01-22 14:00:39.624041: step: 552/469, loss: 0.0028305102605372667 2023-01-22 14:00:40.213792: step: 554/469, loss: 0.001115532242693007 2023-01-22 14:00:40.788401: step: 556/469, loss: 0.010525760240852833 2023-01-22 14:00:41.373699: step: 558/469, loss: 0.0026127370074391365 2023-01-22 14:00:42.019320: step: 560/469, loss: 0.08996759355068207 2023-01-22 14:00:42.615773: step: 562/469, loss: 0.007148308679461479 2023-01-22 14:00:43.269540: step: 564/469, loss: 0.025730041787028313 2023-01-22 14:00:43.962904: step: 566/469, loss: 0.07166583836078644 2023-01-22 14:00:44.767284: step: 568/469, loss: 0.019982781261205673 2023-01-22 14:00:45.369376: step: 570/469, loss: 0.025221683084964752 2023-01-22 14:00:46.067441: step: 572/469, loss: 0.020528538152575493 2023-01-22 14:00:46.687536: step: 574/469, loss: 0.02145366370677948 2023-01-22 14:00:47.268533: step: 576/469, loss: 0.012887056916952133 2023-01-22 14:00:47.883977: step: 578/469, loss: 0.018775420263409615 2023-01-22 14:00:48.470152: step: 580/469, loss: 1.0892153978347778 2023-01-22 14:00:49.110466: step: 582/469, loss: 0.002186531899496913 2023-01-22 14:00:49.665609: step: 584/469, loss: 0.00845075398683548 2023-01-22 14:00:50.239175: step: 586/469, loss: 0.0008586874464526772 2023-01-22 14:00:50.862134: step: 588/469, loss: 0.0210244283080101 2023-01-22 14:00:51.434910: step: 590/469, loss: 0.0013312355149537325 2023-01-22 14:00:52.072192: step: 592/469, loss: 0.0015349240275099874 2023-01-22 14:00:52.672295: step: 594/469, loss: 0.04331725090742111 2023-01-22 14:00:53.311837: step: 596/469, loss: 0.07671844214200974 2023-01-22 14:00:53.962757: step: 598/469, loss: 0.03747640177607536 2023-01-22 14:00:54.542684: step: 600/469, loss: 0.0502481535077095 2023-01-22 14:00:55.196759: step: 602/469, loss: 0.02473577857017517 2023-01-22 14:00:55.883956: step: 604/469, loss: 0.019558290019631386 2023-01-22 14:00:56.535286: step: 606/469, loss: 0.03300067037343979 2023-01-22 14:00:57.198638: step: 608/469, loss: 0.007178252562880516 2023-01-22 14:00:57.814408: step: 610/469, loss: 0.016332488507032394 2023-01-22 14:00:58.378152: step: 612/469, loss: 7.859564902901184e-06 2023-01-22 14:00:58.985662: step: 614/469, loss: 0.004172973334789276 2023-01-22 14:00:59.647414: step: 616/469, loss: 0.013322818093001842 2023-01-22 14:01:00.198162: step: 618/469, loss: 0.09358158707618713 2023-01-22 14:01:00.779678: step: 620/469, loss: 0.0145272146910429 2023-01-22 14:01:01.449979: step: 622/469, loss: 0.004783676005899906 2023-01-22 14:01:02.063444: step: 624/469, loss: 0.01821512170135975 2023-01-22 14:01:02.678049: step: 626/469, loss: 0.01578216254711151 2023-01-22 14:01:03.417039: step: 628/469, loss: 0.002338086487725377 2023-01-22 14:01:04.121291: step: 630/469, loss: 0.06430988758802414 2023-01-22 14:01:04.710378: step: 632/469, loss: 0.019697776064276695 2023-01-22 14:01:05.322168: step: 634/469, loss: 0.0008986774482764304 2023-01-22 14:01:05.942216: step: 636/469, loss: 0.002800026908516884 2023-01-22 14:01:06.639906: step: 638/469, loss: 0.013280363753437996 2023-01-22 14:01:07.297166: step: 640/469, loss: 0.023753050714731216 2023-01-22 14:01:07.940158: step: 642/469, loss: 0.013031180016696453 2023-01-22 14:01:08.531709: step: 644/469, loss: 0.0015544953057542443 2023-01-22 14:01:09.104453: step: 646/469, loss: 0.007948714308440685 2023-01-22 14:01:09.746332: step: 648/469, loss: 0.029600001871585846 2023-01-22 14:01:10.330636: step: 650/469, loss: 0.05922909826040268 2023-01-22 14:01:10.956910: step: 652/469, loss: 0.013812258839607239 2023-01-22 14:01:11.576171: step: 654/469, loss: 0.15025420486927032 2023-01-22 14:01:12.145662: step: 656/469, loss: 0.585685670375824 2023-01-22 14:01:12.846938: step: 658/469, loss: 1.0335520505905151 2023-01-22 14:01:13.459627: step: 660/469, loss: 0.0057723610661923885 2023-01-22 14:01:14.154929: step: 662/469, loss: 0.07760661095380783 2023-01-22 14:01:14.780631: step: 664/469, loss: 0.006530494894832373 2023-01-22 14:01:15.446931: step: 666/469, loss: 0.0018100393936038017 2023-01-22 14:01:16.122030: step: 668/469, loss: 0.023965319618582726 2023-01-22 14:01:16.735063: step: 670/469, loss: 0.018816273659467697 2023-01-22 14:01:17.454120: step: 672/469, loss: 0.07420765608549118 2023-01-22 14:01:18.124544: step: 674/469, loss: 0.002655893098562956 2023-01-22 14:01:18.808037: step: 676/469, loss: 0.020108599215745926 2023-01-22 14:01:19.401199: step: 678/469, loss: 0.02379756048321724 2023-01-22 14:01:20.057695: step: 680/469, loss: 0.0020835783798247576 2023-01-22 14:01:20.694777: step: 682/469, loss: 0.039586957544088364 2023-01-22 14:01:21.351777: step: 684/469, loss: 0.04710690677165985 2023-01-22 14:01:21.972941: step: 686/469, loss: 0.003648559795692563 2023-01-22 14:01:22.593124: step: 688/469, loss: 0.023693419992923737 2023-01-22 14:01:23.313554: step: 690/469, loss: 0.04521752521395683 2023-01-22 14:01:23.929702: step: 692/469, loss: 0.005997614003717899 2023-01-22 14:01:24.560930: step: 694/469, loss: 0.038455672562122345 2023-01-22 14:01:25.153007: step: 696/469, loss: 0.01075368095189333 2023-01-22 14:01:25.756133: step: 698/469, loss: 0.01868843100965023 2023-01-22 14:01:26.382018: step: 700/469, loss: 0.0031358441337943077 2023-01-22 14:01:27.029458: step: 702/469, loss: 0.10962771624326706 2023-01-22 14:01:27.589242: step: 704/469, loss: 0.001154505298472941 2023-01-22 14:01:28.188056: step: 706/469, loss: 6.791084289550781 2023-01-22 14:01:28.726794: step: 708/469, loss: 0.0009272179449908435 2023-01-22 14:01:29.258571: step: 710/469, loss: 0.0019114745082333684 2023-01-22 14:01:29.860910: step: 712/469, loss: 0.04137321561574936 2023-01-22 14:01:30.485059: step: 714/469, loss: 0.006473363842815161 2023-01-22 14:01:31.086901: step: 716/469, loss: 0.004562808200716972 2023-01-22 14:01:31.796602: step: 718/469, loss: 0.04581043869256973 2023-01-22 14:01:32.401072: step: 720/469, loss: 0.008559256792068481 2023-01-22 14:01:33.057057: step: 722/469, loss: 0.010527494363486767 2023-01-22 14:01:33.695195: step: 724/469, loss: 0.004464718513190746 2023-01-22 14:01:34.283270: step: 726/469, loss: 0.04944245144724846 2023-01-22 14:01:34.904995: step: 728/469, loss: 0.005690377205610275 2023-01-22 14:01:35.590164: step: 730/469, loss: 0.2028210610151291 2023-01-22 14:01:36.247234: step: 732/469, loss: 0.012636340223252773 2023-01-22 14:01:36.949505: step: 734/469, loss: 0.047626569867134094 2023-01-22 14:01:37.588737: step: 736/469, loss: 0.05612828582525253 2023-01-22 14:01:38.210479: step: 738/469, loss: 0.04176720604300499 2023-01-22 14:01:38.882632: step: 740/469, loss: 0.00456358352676034 2023-01-22 14:01:39.545705: step: 742/469, loss: 0.03201710805296898 2023-01-22 14:01:40.122055: step: 744/469, loss: 0.007990165613591671 2023-01-22 14:01:40.813529: step: 746/469, loss: 0.1465800553560257 2023-01-22 14:01:41.479328: step: 748/469, loss: 0.06553951650857925 2023-01-22 14:01:42.096074: step: 750/469, loss: 0.006729306187480688 2023-01-22 14:01:42.742822: step: 752/469, loss: 0.00826718844473362 2023-01-22 14:01:43.411381: step: 754/469, loss: 0.002610917203128338 2023-01-22 14:01:44.091990: step: 756/469, loss: 0.03906663507223129 2023-01-22 14:01:44.684391: step: 758/469, loss: 0.005207396112382412 2023-01-22 14:01:45.367064: step: 760/469, loss: 0.006388232111930847 2023-01-22 14:01:46.029836: step: 762/469, loss: 0.013333003968000412 2023-01-22 14:01:46.632541: step: 764/469, loss: 0.005317576229572296 2023-01-22 14:01:47.224205: step: 766/469, loss: 0.02116086333990097 2023-01-22 14:01:47.846128: step: 768/469, loss: 0.008979404345154762 2023-01-22 14:01:48.483556: step: 770/469, loss: 0.02597903087735176 2023-01-22 14:01:49.080770: step: 772/469, loss: 0.6848384141921997 2023-01-22 14:01:49.746218: step: 774/469, loss: 0.0012179943732917309 2023-01-22 14:01:50.399408: step: 776/469, loss: 0.0016474841395393014 2023-01-22 14:01:50.994565: step: 778/469, loss: 0.0005874739727005363 2023-01-22 14:01:51.575706: step: 780/469, loss: 0.052955713123083115 2023-01-22 14:01:52.140252: step: 782/469, loss: 0.04274573177099228 2023-01-22 14:01:52.791776: step: 784/469, loss: 0.012886938638985157 2023-01-22 14:01:53.427627: step: 786/469, loss: 0.009848181158304214 2023-01-22 14:01:54.002680: step: 788/469, loss: 0.05092054232954979 2023-01-22 14:01:54.632770: step: 790/469, loss: 0.00875928346067667 2023-01-22 14:01:55.253618: step: 792/469, loss: 0.022496996447443962 2023-01-22 14:01:55.901847: step: 794/469, loss: 0.02337433211505413 2023-01-22 14:01:56.512051: step: 796/469, loss: 0.03319117799401283 2023-01-22 14:01:57.125296: step: 798/469, loss: 0.0005383278476074338 2023-01-22 14:01:57.810680: step: 800/469, loss: 0.031798552721738815 2023-01-22 14:01:58.496040: step: 802/469, loss: 0.08176372200250626 2023-01-22 14:01:59.068653: step: 804/469, loss: 0.0004847792733926326 2023-01-22 14:01:59.627655: step: 806/469, loss: 0.015916161239147186 2023-01-22 14:02:00.277044: step: 808/469, loss: 0.029505586251616478 2023-01-22 14:02:00.931493: step: 810/469, loss: 0.5855922698974609 2023-01-22 14:02:01.517136: step: 812/469, loss: 0.006037293933331966 2023-01-22 14:02:02.073870: step: 814/469, loss: 0.028843944892287254 2023-01-22 14:02:02.712760: step: 816/469, loss: 0.07339662313461304 2023-01-22 14:02:03.359774: step: 818/469, loss: 0.011284264735877514 2023-01-22 14:02:03.951927: step: 820/469, loss: 0.01086319237947464 2023-01-22 14:02:04.532964: step: 822/469, loss: 0.002898276085034013 2023-01-22 14:02:05.147909: step: 824/469, loss: 0.0077510373666882515 2023-01-22 14:02:05.785720: step: 826/469, loss: 0.025537706911563873 2023-01-22 14:02:06.403332: step: 828/469, loss: 0.003802065970376134 2023-01-22 14:02:06.998904: step: 830/469, loss: 0.07798043638467789 2023-01-22 14:02:07.654255: step: 832/469, loss: 0.0394209548830986 2023-01-22 14:02:08.311883: step: 834/469, loss: 0.2973405718803406 2023-01-22 14:02:08.913086: step: 836/469, loss: 0.0050156936049461365 2023-01-22 14:02:09.558805: step: 838/469, loss: 0.003936249762773514 2023-01-22 14:02:10.219893: step: 840/469, loss: 0.004344089888036251 2023-01-22 14:02:10.948667: step: 842/469, loss: 0.031523656100034714 2023-01-22 14:02:11.668021: step: 844/469, loss: 0.0013185730203986168 2023-01-22 14:02:12.283457: step: 846/469, loss: 0.009157379157841206 2023-01-22 14:02:12.958244: step: 848/469, loss: 0.1362275928258896 2023-01-22 14:02:13.526341: step: 850/469, loss: 0.002761450130492449 2023-01-22 14:02:14.117408: step: 852/469, loss: 0.012748624198138714 2023-01-22 14:02:14.759732: step: 854/469, loss: 0.3057138919830322 2023-01-22 14:02:15.421519: step: 856/469, loss: 0.03490421175956726 2023-01-22 14:02:16.000830: step: 858/469, loss: 0.013592280447483063 2023-01-22 14:02:16.610529: step: 860/469, loss: 0.013455227017402649 2023-01-22 14:02:17.189689: step: 862/469, loss: 6.178225885378197e-05 2023-01-22 14:02:17.877210: step: 864/469, loss: 0.1289229542016983 2023-01-22 14:02:18.522499: step: 866/469, loss: 0.014098123647272587 2023-01-22 14:02:19.118865: step: 868/469, loss: 0.015085606835782528 2023-01-22 14:02:19.728767: step: 870/469, loss: 0.0155626917257905 2023-01-22 14:02:20.373627: step: 872/469, loss: 0.9027851223945618 2023-01-22 14:02:20.973348: step: 874/469, loss: 0.04441506788134575 2023-01-22 14:02:21.620930: step: 876/469, loss: 0.004249469377100468 2023-01-22 14:02:22.241125: step: 878/469, loss: 0.002969399094581604 2023-01-22 14:02:22.818123: step: 880/469, loss: 0.015422014519572258 2023-01-22 14:02:23.452522: step: 882/469, loss: 0.010858754627406597 2023-01-22 14:02:24.146148: step: 884/469, loss: 0.0024042685981839895 2023-01-22 14:02:24.737943: step: 886/469, loss: 0.010281093418598175 2023-01-22 14:02:25.364744: step: 888/469, loss: 0.011785667389631271 2023-01-22 14:02:25.977353: step: 890/469, loss: 0.4762597978115082 2023-01-22 14:02:26.571605: step: 892/469, loss: 0.011760367080569267 2023-01-22 14:02:27.182045: step: 894/469, loss: 0.1458122581243515 2023-01-22 14:02:27.961540: step: 896/469, loss: 0.08357082307338715 2023-01-22 14:02:28.635660: step: 898/469, loss: 0.01763766258955002 2023-01-22 14:02:29.308006: step: 900/469, loss: 0.017989760264754295 2023-01-22 14:02:29.987405: step: 902/469, loss: 0.055756352841854095 2023-01-22 14:02:30.679374: step: 904/469, loss: 0.009300785139203072 2023-01-22 14:02:31.331989: step: 906/469, loss: 0.1509122997522354 2023-01-22 14:02:32.037361: step: 908/469, loss: 0.008779053576290607 2023-01-22 14:02:32.711519: step: 910/469, loss: 0.005239448044449091 2023-01-22 14:02:33.327352: step: 912/469, loss: 0.15467186272144318 2023-01-22 14:02:33.970111: step: 914/469, loss: 0.007002062164247036 2023-01-22 14:02:34.620767: step: 916/469, loss: 0.0002871248871088028 2023-01-22 14:02:35.257840: step: 918/469, loss: 0.009084869176149368 2023-01-22 14:02:35.873543: step: 920/469, loss: 0.008464104495942593 2023-01-22 14:02:36.462301: step: 922/469, loss: 0.08195966482162476 2023-01-22 14:02:37.083126: step: 924/469, loss: 0.028196517378091812 2023-01-22 14:02:37.741123: step: 926/469, loss: 0.0013524334644898772 2023-01-22 14:02:38.367809: step: 928/469, loss: 0.004344878252595663 2023-01-22 14:02:39.003840: step: 930/469, loss: 0.015616672113537788 2023-01-22 14:02:39.595915: step: 932/469, loss: 0.01837651990354061 2023-01-22 14:02:40.130212: step: 934/469, loss: 0.0018815413350239396 2023-01-22 14:02:40.716773: step: 936/469, loss: 0.003958356566727161 2023-01-22 14:02:41.353416: step: 938/469, loss: 0.0240335613489151 ================================================== Loss: 0.072 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3027986725663717, 'r': 0.3246323529411765, 'f1': 0.31333562271062276}, 'combined': 0.2308788798920378, 'epoch': 29} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.32707119193964884, 'r': 0.2744046505111235, 'f1': 0.2984321223966746}, 'combined': 0.16278115767091342, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3011806759098787, 'r': 0.3297556925996205, 'f1': 0.31482110507246375}, 'combined': 0.231973445842868, 'epoch': 29} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.32801219712417357, 'r': 0.2778950544711663, 'f1': 0.30088092574243164}, 'combined': 0.16411686858678087, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28928017484489565, 'r': 0.32441097406704617, 'f1': 0.3058400417412045}, 'combined': 0.2253558202303612, 'epoch': 29} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.32724832534615145, 'r': 0.28024193277584425, 'f1': 0.3019264982986671}, 'combined': 0.16468718089018203, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2353395061728395, 'r': 0.3630952380952381, 'f1': 0.2855805243445693}, 'combined': 0.19038701622971288, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.358695652173913, 'f1': 0.2946428571428571}, 'combined': 0.14732142857142855, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35714285714285715, 'r': 0.1724137931034483, 'f1': 0.23255813953488377}, 'combined': 0.1550387596899225, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 30 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:05:30.206457: step: 2/469, loss: 0.030204234644770622 2023-01-22 14:05:30.808355: step: 4/469, loss: 0.000214090890949592 2023-01-22 14:05:31.355354: step: 6/469, loss: 0.02446596510708332 2023-01-22 14:05:31.945758: step: 8/469, loss: 0.016314657405018806 2023-01-22 14:05:32.576833: step: 10/469, loss: 0.02458830736577511 2023-01-22 14:05:33.185370: step: 12/469, loss: 0.12970979511737823 2023-01-22 14:05:33.724366: step: 14/469, loss: 0.0018961385358124971 2023-01-22 14:05:34.310362: step: 16/469, loss: 0.01764444075524807 2023-01-22 14:05:34.923101: step: 18/469, loss: 1.2081280829079333e-06 2023-01-22 14:05:35.500174: step: 20/469, loss: 0.020217763260006905 2023-01-22 14:05:36.106142: step: 22/469, loss: 0.0008690494578331709 2023-01-22 14:05:36.746923: step: 24/469, loss: 0.04564937949180603 2023-01-22 14:05:37.375907: step: 26/469, loss: 0.01216245535761118 2023-01-22 14:05:37.943169: step: 28/469, loss: 0.040008775889873505 2023-01-22 14:05:38.541446: step: 30/469, loss: 0.011112927459180355 2023-01-22 14:05:39.201190: step: 32/469, loss: 0.011521357111632824 2023-01-22 14:05:39.770655: step: 34/469, loss: 0.004028141498565674 2023-01-22 14:05:40.439902: step: 36/469, loss: 0.0675557479262352 2023-01-22 14:05:41.038858: step: 38/469, loss: 0.0011803836096078157 2023-01-22 14:05:41.663004: step: 40/469, loss: 0.023845933377742767 2023-01-22 14:05:42.301156: step: 42/469, loss: 0.02379044145345688 2023-01-22 14:05:42.935722: step: 44/469, loss: 0.005500413477420807 2023-01-22 14:05:43.525216: step: 46/469, loss: 0.027780411764979362 2023-01-22 14:05:44.166050: step: 48/469, loss: 0.005657805595546961 2023-01-22 14:05:44.824491: step: 50/469, loss: 0.004490879364311695 2023-01-22 14:05:45.408371: step: 52/469, loss: 0.00833278987556696 2023-01-22 14:05:46.018677: step: 54/469, loss: 0.024658534675836563 2023-01-22 14:05:46.632439: step: 56/469, loss: 0.03944602236151695 2023-01-22 14:05:47.235350: step: 58/469, loss: 0.009471878409385681 2023-01-22 14:05:47.837709: step: 60/469, loss: 0.021279895678162575 2023-01-22 14:05:48.477863: step: 62/469, loss: 0.0003297892981208861 2023-01-22 14:05:49.181544: step: 64/469, loss: 0.25596874952316284 2023-01-22 14:05:49.766135: step: 66/469, loss: 0.0013570016017183661 2023-01-22 14:05:50.377208: step: 68/469, loss: 0.005010408349335194 2023-01-22 14:05:51.001284: step: 70/469, loss: 0.00613814452663064 2023-01-22 14:05:51.614226: step: 72/469, loss: 0.04745108261704445 2023-01-22 14:05:52.292759: step: 74/469, loss: 0.07794447988271713 2023-01-22 14:05:52.941173: step: 76/469, loss: 0.0014230242231860757 2023-01-22 14:05:53.527521: step: 78/469, loss: 0.01976405270397663 2023-01-22 14:05:54.081441: step: 80/469, loss: 0.007515359669923782 2023-01-22 14:05:54.699891: step: 82/469, loss: 0.20509494841098785 2023-01-22 14:05:55.415152: step: 84/469, loss: 0.02520422264933586 2023-01-22 14:05:55.999629: step: 86/469, loss: 0.002537622582167387 2023-01-22 14:05:56.577389: step: 88/469, loss: 0.017039865255355835 2023-01-22 14:05:57.282747: step: 90/469, loss: 0.012986699119210243 2023-01-22 14:05:57.907959: step: 92/469, loss: 0.022357499226927757 2023-01-22 14:05:58.549801: step: 94/469, loss: 0.004211435094475746 2023-01-22 14:05:59.197047: step: 96/469, loss: 0.041084833443164825 2023-01-22 14:05:59.822647: step: 98/469, loss: 0.07905570417642593 2023-01-22 14:06:00.478143: step: 100/469, loss: 0.01296793483197689 2023-01-22 14:06:01.051763: step: 102/469, loss: 0.015302873216569424 2023-01-22 14:06:01.789491: step: 104/469, loss: 0.4981491267681122 2023-01-22 14:06:02.540976: step: 106/469, loss: 0.006403472274541855 2023-01-22 14:06:03.158255: step: 108/469, loss: 0.00457416009157896 2023-01-22 14:06:03.734112: step: 110/469, loss: 0.35185667872428894 2023-01-22 14:06:04.452283: step: 112/469, loss: 0.029439128935337067 2023-01-22 14:06:05.069321: step: 114/469, loss: 0.002741925185546279 2023-01-22 14:06:05.708817: step: 116/469, loss: 0.0441625602543354 2023-01-22 14:06:06.509976: step: 118/469, loss: 0.16306741535663605 2023-01-22 14:06:07.087944: step: 120/469, loss: 0.003895440837368369 2023-01-22 14:06:07.695434: step: 122/469, loss: 0.006960265804082155 2023-01-22 14:06:08.333226: step: 124/469, loss: 0.003254164708778262 2023-01-22 14:06:08.936908: step: 126/469, loss: 0.08026694506406784 2023-01-22 14:06:09.626373: step: 128/469, loss: 0.00878845900297165 2023-01-22 14:06:10.207190: step: 130/469, loss: 0.01168108731508255 2023-01-22 14:06:10.858706: step: 132/469, loss: 0.04680744558572769 2023-01-22 14:06:11.423238: step: 134/469, loss: 0.00019920070189982653 2023-01-22 14:06:12.048971: step: 136/469, loss: 0.0009741685353219509 2023-01-22 14:06:12.614095: step: 138/469, loss: 0.004376692231744528 2023-01-22 14:06:13.203504: step: 140/469, loss: 0.0072433315217494965 2023-01-22 14:06:13.844260: step: 142/469, loss: 0.00038019128260202706 2023-01-22 14:06:14.466558: step: 144/469, loss: 0.0019010391552001238 2023-01-22 14:06:15.158183: step: 146/469, loss: 0.0016824081540107727 2023-01-22 14:06:15.781725: step: 148/469, loss: 0.008423411287367344 2023-01-22 14:06:16.422843: step: 150/469, loss: 0.017467912286520004 2023-01-22 14:06:17.041813: step: 152/469, loss: 0.005334971472620964 2023-01-22 14:06:17.671528: step: 154/469, loss: 0.008914156816899776 2023-01-22 14:06:18.274568: step: 156/469, loss: 0.0430353507399559 2023-01-22 14:06:18.901159: step: 158/469, loss: 0.012414567172527313 2023-01-22 14:06:19.500222: step: 160/469, loss: 0.00021787987498100847 2023-01-22 14:06:20.146601: step: 162/469, loss: 0.057112645357847214 2023-01-22 14:06:20.835632: step: 164/469, loss: 0.02861364744603634 2023-01-22 14:06:21.453665: step: 166/469, loss: 0.008333291858434677 2023-01-22 14:06:22.158519: step: 168/469, loss: 0.06528560817241669 2023-01-22 14:06:22.750263: step: 170/469, loss: 0.00824511144310236 2023-01-22 14:06:23.313990: step: 172/469, loss: 0.07460235059261322 2023-01-22 14:06:23.914365: step: 174/469, loss: 0.005715370178222656 2023-01-22 14:06:24.470047: step: 176/469, loss: 0.026749148964881897 2023-01-22 14:06:25.075418: step: 178/469, loss: 0.0014731278643012047 2023-01-22 14:06:25.732202: step: 180/469, loss: 0.047200821340084076 2023-01-22 14:06:26.350554: step: 182/469, loss: 0.01909456215798855 2023-01-22 14:06:26.953927: step: 184/469, loss: 0.00663789501413703 2023-01-22 14:06:27.598877: step: 186/469, loss: 0.04863253980875015 2023-01-22 14:06:28.201486: step: 188/469, loss: 0.010813768953084946 2023-01-22 14:06:28.909525: step: 190/469, loss: 0.012834246270358562 2023-01-22 14:06:29.546267: step: 192/469, loss: 0.01888222061097622 2023-01-22 14:06:30.179551: step: 194/469, loss: 0.024609556421637535 2023-01-22 14:06:30.771141: step: 196/469, loss: 0.003927430137991905 2023-01-22 14:06:31.408917: step: 198/469, loss: 0.001142520341090858 2023-01-22 14:06:32.079142: step: 200/469, loss: 0.05040375888347626 2023-01-22 14:06:32.669545: step: 202/469, loss: 6.32699957350269e-05 2023-01-22 14:06:33.283635: step: 204/469, loss: 0.018740428611636162 2023-01-22 14:06:33.874222: step: 206/469, loss: 0.010919380933046341 2023-01-22 14:06:34.456615: step: 208/469, loss: 0.015744278207421303 2023-01-22 14:06:35.057469: step: 210/469, loss: 0.04160604253411293 2023-01-22 14:06:35.695039: step: 212/469, loss: 0.32554858922958374 2023-01-22 14:06:36.314309: step: 214/469, loss: 0.02820652909576893 2023-01-22 14:06:36.969684: step: 216/469, loss: 0.007355737499892712 2023-01-22 14:06:37.502815: step: 218/469, loss: 0.014366361312568188 2023-01-22 14:06:38.102802: step: 220/469, loss: 0.05089385434985161 2023-01-22 14:06:38.679252: step: 222/469, loss: 0.0067719887010753155 2023-01-22 14:06:39.322866: step: 224/469, loss: 0.02073615975677967 2023-01-22 14:06:39.989583: step: 226/469, loss: 0.001756730256602168 2023-01-22 14:06:40.609069: step: 228/469, loss: 0.02269658073782921 2023-01-22 14:06:41.254664: step: 230/469, loss: 0.004856404848396778 2023-01-22 14:06:41.899459: step: 232/469, loss: 0.034278079867362976 2023-01-22 14:06:42.516444: step: 234/469, loss: 0.012941844761371613 2023-01-22 14:06:43.072383: step: 236/469, loss: 0.004036817234009504 2023-01-22 14:06:43.694434: step: 238/469, loss: 0.5611745715141296 2023-01-22 14:06:44.378951: step: 240/469, loss: 0.0008843239047564566 2023-01-22 14:06:44.983045: step: 242/469, loss: 0.004814642481505871 2023-01-22 14:06:45.628320: step: 244/469, loss: 0.012764296494424343 2023-01-22 14:06:46.216278: step: 246/469, loss: 0.4942607879638672 2023-01-22 14:06:46.917968: step: 248/469, loss: 0.09036976099014282 2023-01-22 14:06:47.510763: step: 250/469, loss: 0.0005032708868384361 2023-01-22 14:06:48.102748: step: 252/469, loss: 0.013998867943882942 2023-01-22 14:06:48.783791: step: 254/469, loss: 0.0005572093068622053 2023-01-22 14:06:49.380860: step: 256/469, loss: 0.16987258195877075 2023-01-22 14:06:50.037167: step: 258/469, loss: 0.0029405662789940834 2023-01-22 14:06:50.641510: step: 260/469, loss: 1.712666630744934 2023-01-22 14:06:51.299209: step: 262/469, loss: 0.022773411124944687 2023-01-22 14:06:51.870388: step: 264/469, loss: 0.010688270442187786 2023-01-22 14:06:52.497781: step: 266/469, loss: 0.008014222607016563 2023-01-22 14:06:53.093739: step: 268/469, loss: 0.00755898654460907 2023-01-22 14:06:53.713344: step: 270/469, loss: 0.002323312219232321 2023-01-22 14:06:54.386209: step: 272/469, loss: 0.016693051904439926 2023-01-22 14:06:54.996139: step: 274/469, loss: 0.02540774643421173 2023-01-22 14:06:55.574207: step: 276/469, loss: 0.03135130926966667 2023-01-22 14:06:56.207952: step: 278/469, loss: 0.061762865632772446 2023-01-22 14:06:56.888964: step: 280/469, loss: 0.005214679054915905 2023-01-22 14:06:57.485974: step: 282/469, loss: 0.0032812608405947685 2023-01-22 14:06:58.088029: step: 284/469, loss: 0.017116745933890343 2023-01-22 14:06:58.625234: step: 286/469, loss: 0.0011244808556511998 2023-01-22 14:06:59.263669: step: 288/469, loss: 0.01831858418881893 2023-01-22 14:06:59.840599: step: 290/469, loss: 0.010637855157256126 2023-01-22 14:07:00.501152: step: 292/469, loss: 0.0061506847850978374 2023-01-22 14:07:01.176425: step: 294/469, loss: 0.013292726129293442 2023-01-22 14:07:01.853451: step: 296/469, loss: 0.003598094917833805 2023-01-22 14:07:02.533953: step: 298/469, loss: 0.008599359542131424 2023-01-22 14:07:03.147597: step: 300/469, loss: 0.003592181019484997 2023-01-22 14:07:03.836086: step: 302/469, loss: 0.01342946756631136 2023-01-22 14:07:04.457155: step: 304/469, loss: 0.2819414436817169 2023-01-22 14:07:05.152220: step: 306/469, loss: 0.11808431893587112 2023-01-22 14:07:05.775254: step: 308/469, loss: 0.013801461085677147 2023-01-22 14:07:06.403780: step: 310/469, loss: 0.021975615993142128 2023-01-22 14:07:07.036743: step: 312/469, loss: 0.003579105716198683 2023-01-22 14:07:07.675371: step: 314/469, loss: 0.006908521521836519 2023-01-22 14:07:08.290863: step: 316/469, loss: 0.017278479412198067 2023-01-22 14:07:08.898023: step: 318/469, loss: 0.01671069860458374 2023-01-22 14:07:09.610327: step: 320/469, loss: 0.11681130528450012 2023-01-22 14:07:10.154411: step: 322/469, loss: 0.03637412562966347 2023-01-22 14:07:10.801220: step: 324/469, loss: 0.037795957177877426 2023-01-22 14:07:11.445527: step: 326/469, loss: 0.0647817924618721 2023-01-22 14:07:12.086767: step: 328/469, loss: 0.07110146433115005 2023-01-22 14:07:12.714854: step: 330/469, loss: 0.002806697739288211 2023-01-22 14:07:13.406781: step: 332/469, loss: 0.017226919531822205 2023-01-22 14:07:14.046434: step: 334/469, loss: 0.0007886120001785457 2023-01-22 14:07:14.674703: step: 336/469, loss: 0.40470314025878906 2023-01-22 14:07:15.250723: step: 338/469, loss: 0.0025602150708436966 2023-01-22 14:07:15.895301: step: 340/469, loss: 0.0430583655834198 2023-01-22 14:07:16.515241: step: 342/469, loss: 0.004851692821830511 2023-01-22 14:07:17.139582: step: 344/469, loss: 0.021595578640699387 2023-01-22 14:07:17.719741: step: 346/469, loss: 0.012612909078598022 2023-01-22 14:07:18.297068: step: 348/469, loss: 0.4985911250114441 2023-01-22 14:07:18.899482: step: 350/469, loss: 0.02535969950258732 2023-01-22 14:07:19.514428: step: 352/469, loss: 0.0025849139783531427 2023-01-22 14:07:20.190895: step: 354/469, loss: 0.006168850697577 2023-01-22 14:07:20.866514: step: 356/469, loss: 0.023574557155370712 2023-01-22 14:07:21.416209: step: 358/469, loss: 0.0024570091627538204 2023-01-22 14:07:22.034588: step: 360/469, loss: 0.014448233880102634 2023-01-22 14:07:22.740129: step: 362/469, loss: 0.027336303144693375 2023-01-22 14:07:23.367831: step: 364/469, loss: 0.04359082505106926 2023-01-22 14:07:24.013594: step: 366/469, loss: 0.5205985903739929 2023-01-22 14:07:24.620202: step: 368/469, loss: 0.015982870012521744 2023-01-22 14:07:25.233218: step: 370/469, loss: 0.011232663877308369 2023-01-22 14:07:25.794400: step: 372/469, loss: 0.033528730273246765 2023-01-22 14:07:26.475352: step: 374/469, loss: 0.002884415676817298 2023-01-22 14:07:27.083047: step: 376/469, loss: 0.04277842864394188 2023-01-22 14:07:27.677159: step: 378/469, loss: 0.0007292425725609064 2023-01-22 14:07:28.324840: step: 380/469, loss: 0.025297727435827255 2023-01-22 14:07:28.992455: step: 382/469, loss: 0.009050133638083935 2023-01-22 14:07:29.605660: step: 384/469, loss: 0.07818689197301865 2023-01-22 14:07:30.173423: step: 386/469, loss: 0.012338213622570038 2023-01-22 14:07:30.796111: step: 388/469, loss: 0.01873023808002472 2023-01-22 14:07:31.362573: step: 390/469, loss: 0.08117516338825226 2023-01-22 14:07:32.029631: step: 392/469, loss: 0.012470938265323639 2023-01-22 14:07:32.624908: step: 394/469, loss: 0.01541326753795147 2023-01-22 14:07:33.272490: step: 396/469, loss: 0.0018816686933860183 2023-01-22 14:07:33.868172: step: 398/469, loss: 0.0008937534876167774 2023-01-22 14:07:34.489436: step: 400/469, loss: 0.04712964966893196 2023-01-22 14:07:35.122429: step: 402/469, loss: 0.0015105074271559715 2023-01-22 14:07:35.747438: step: 404/469, loss: 0.00022589498257730156 2023-01-22 14:07:36.487751: step: 406/469, loss: 0.03625385835766792 2023-01-22 14:07:37.102007: step: 408/469, loss: 0.1721838265657425 2023-01-22 14:07:37.842343: step: 410/469, loss: 0.015009745955467224 2023-01-22 14:07:38.493331: step: 412/469, loss: 0.018819235265254974 2023-01-22 14:07:39.214696: step: 414/469, loss: 0.004143711645156145 2023-01-22 14:07:39.793254: step: 416/469, loss: 0.005448823794722557 2023-01-22 14:07:40.539410: step: 418/469, loss: 0.0025705129373818636 2023-01-22 14:07:41.178054: step: 420/469, loss: 0.01444432232528925 2023-01-22 14:07:41.804200: step: 422/469, loss: 0.001175693585537374 2023-01-22 14:07:42.526240: step: 424/469, loss: 0.009207737632095814 2023-01-22 14:07:43.221113: step: 426/469, loss: 0.022723522037267685 2023-01-22 14:07:43.833575: step: 428/469, loss: 0.008806123398244381 2023-01-22 14:07:44.484575: step: 430/469, loss: 0.11665830761194229 2023-01-22 14:07:45.113949: step: 432/469, loss: 0.005949435289949179 2023-01-22 14:07:45.745960: step: 434/469, loss: 0.016641709953546524 2023-01-22 14:07:46.274339: step: 436/469, loss: 0.004267572425305843 2023-01-22 14:07:46.897460: step: 438/469, loss: 0.0026941681280732155 2023-01-22 14:07:47.515384: step: 440/469, loss: 0.2649521827697754 2023-01-22 14:07:48.221963: step: 442/469, loss: 0.0063835857436060905 2023-01-22 14:07:48.842009: step: 444/469, loss: 0.006116997450590134 2023-01-22 14:07:49.477082: step: 446/469, loss: 0.006502763833850622 2023-01-22 14:07:50.112013: step: 448/469, loss: 0.027114741504192352 2023-01-22 14:07:50.725331: step: 450/469, loss: 0.0666317492723465 2023-01-22 14:07:51.346719: step: 452/469, loss: 0.04511849582195282 2023-01-22 14:07:52.015655: step: 454/469, loss: 0.0022478022146970034 2023-01-22 14:07:52.710101: step: 456/469, loss: 0.011447342112660408 2023-01-22 14:07:53.303188: step: 458/469, loss: 0.055655594915151596 2023-01-22 14:07:53.999835: step: 460/469, loss: 0.026209073141217232 2023-01-22 14:07:54.745743: step: 462/469, loss: 0.005066557787358761 2023-01-22 14:07:55.393419: step: 464/469, loss: 0.0262867771089077 2023-01-22 14:07:56.004758: step: 466/469, loss: 3.727868170244619e-05 2023-01-22 14:07:56.641453: step: 468/469, loss: 0.011682676151394844 2023-01-22 14:07:57.256231: step: 470/469, loss: 0.013487524352967739 2023-01-22 14:07:57.854660: step: 472/469, loss: 0.0008283891365863383 2023-01-22 14:07:58.508062: step: 474/469, loss: 5.688061355613172e-05 2023-01-22 14:07:59.091034: step: 476/469, loss: 0.003044802462682128 2023-01-22 14:07:59.709235: step: 478/469, loss: 0.07766813039779663 2023-01-22 14:08:00.360329: step: 480/469, loss: 0.2414858639240265 2023-01-22 14:08:00.988783: step: 482/469, loss: 0.0015074929688125849 2023-01-22 14:08:01.593129: step: 484/469, loss: 0.010486260987818241 2023-01-22 14:08:02.234749: step: 486/469, loss: 0.011371143162250519 2023-01-22 14:08:02.809802: step: 488/469, loss: 0.0038459154311567545 2023-01-22 14:08:03.450551: step: 490/469, loss: 0.0009269219590350986 2023-01-22 14:08:04.090551: step: 492/469, loss: 0.00967847928404808 2023-01-22 14:08:04.728336: step: 494/469, loss: 0.003559316508471966 2023-01-22 14:08:05.354200: step: 496/469, loss: 0.048895612359046936 2023-01-22 14:08:05.929682: step: 498/469, loss: 0.05181241035461426 2023-01-22 14:08:06.608997: step: 500/469, loss: 0.007987109944224358 2023-01-22 14:08:07.233436: step: 502/469, loss: 0.16030405461788177 2023-01-22 14:08:07.874672: step: 504/469, loss: 0.0008819969370961189 2023-01-22 14:08:08.488716: step: 506/469, loss: 0.28211885690689087 2023-01-22 14:08:09.114338: step: 508/469, loss: 0.001571113127283752 2023-01-22 14:08:09.672250: step: 510/469, loss: 0.00629191379994154 2023-01-22 14:08:10.282669: step: 512/469, loss: 0.02589668706059456 2023-01-22 14:08:10.912459: step: 514/469, loss: 0.0009901889134198427 2023-01-22 14:08:11.571908: step: 516/469, loss: 0.07391221076250076 2023-01-22 14:08:12.230308: step: 518/469, loss: 0.04188664257526398 2023-01-22 14:08:12.876500: step: 520/469, loss: 0.010917184874415398 2023-01-22 14:08:13.505672: step: 522/469, loss: 0.012965405359864235 2023-01-22 14:08:14.064668: step: 524/469, loss: 0.32013627886772156 2023-01-22 14:08:14.776160: step: 526/469, loss: 0.024639640003442764 2023-01-22 14:08:15.409379: step: 528/469, loss: 0.00801227055490017 2023-01-22 14:08:15.987991: step: 530/469, loss: 0.020771576091647148 2023-01-22 14:08:16.639011: step: 532/469, loss: 0.0076049817726016045 2023-01-22 14:08:17.325521: step: 534/469, loss: 0.007345182821154594 2023-01-22 14:08:17.969821: step: 536/469, loss: 0.20593872666358948 2023-01-22 14:08:18.607235: step: 538/469, loss: 0.23821468651294708 2023-01-22 14:08:19.263449: step: 540/469, loss: 0.0007799813756719232 2023-01-22 14:08:19.866573: step: 542/469, loss: 0.025101082399487495 2023-01-22 14:08:20.406817: step: 544/469, loss: 0.0236770361661911 2023-01-22 14:08:21.034472: step: 546/469, loss: 0.02644025720655918 2023-01-22 14:08:21.634546: step: 548/469, loss: 0.02457072027027607 2023-01-22 14:08:22.256370: step: 550/469, loss: 0.0788365826010704 2023-01-22 14:08:22.880095: step: 552/469, loss: 0.010176182724535465 2023-01-22 14:08:23.447707: step: 554/469, loss: 0.005486792419105768 2023-01-22 14:08:24.110450: step: 556/469, loss: 0.026093045249581337 2023-01-22 14:08:24.913372: step: 558/469, loss: 0.0605422779917717 2023-01-22 14:08:25.508880: step: 560/469, loss: 0.025924276560544968 2023-01-22 14:08:26.149505: step: 562/469, loss: 0.06749466806650162 2023-01-22 14:08:26.778946: step: 564/469, loss: 0.042776819318532944 2023-01-22 14:08:27.362900: step: 566/469, loss: 0.010451863519847393 2023-01-22 14:08:27.971007: step: 568/469, loss: 0.01529045682400465 2023-01-22 14:08:28.566323: step: 570/469, loss: 0.011241276748478413 2023-01-22 14:08:29.176541: step: 572/469, loss: 0.3424895405769348 2023-01-22 14:08:29.784159: step: 574/469, loss: 0.019531693309545517 2023-01-22 14:08:30.421463: step: 576/469, loss: 0.0014761670026928186 2023-01-22 14:08:30.998464: step: 578/469, loss: 0.048141270875930786 2023-01-22 14:08:31.651440: step: 580/469, loss: 0.004486797843128443 2023-01-22 14:08:32.279963: step: 582/469, loss: 0.0013610408641397953 2023-01-22 14:08:32.845367: step: 584/469, loss: 0.0013765976764261723 2023-01-22 14:08:33.460838: step: 586/469, loss: 0.02723407931625843 2023-01-22 14:08:34.121924: step: 588/469, loss: 0.0032612651120871305 2023-01-22 14:08:34.735070: step: 590/469, loss: 0.03588743880391121 2023-01-22 14:08:35.427135: step: 592/469, loss: 0.0017066128784790635 2023-01-22 14:08:36.026760: step: 594/469, loss: 0.007653994485735893 2023-01-22 14:08:36.641404: step: 596/469, loss: 0.023221269249916077 2023-01-22 14:08:37.256971: step: 598/469, loss: 0.000273090903647244 2023-01-22 14:08:37.923857: step: 600/469, loss: 0.003898853901773691 2023-01-22 14:08:38.487160: step: 602/469, loss: 0.054680511355400085 2023-01-22 14:08:39.103068: step: 604/469, loss: 0.6385080814361572 2023-01-22 14:08:39.710981: step: 606/469, loss: 0.0015598267782479525 2023-01-22 14:08:40.370141: step: 608/469, loss: 0.01506998110562563 2023-01-22 14:08:40.973045: step: 610/469, loss: 0.0008091532508842647 2023-01-22 14:08:41.684818: step: 612/469, loss: 0.014290979132056236 2023-01-22 14:08:42.287317: step: 614/469, loss: 0.029819728806614876 2023-01-22 14:08:42.869883: step: 616/469, loss: 0.013888353481888771 2023-01-22 14:08:43.481625: step: 618/469, loss: 0.007446426432579756 2023-01-22 14:08:44.097318: step: 620/469, loss: 0.0053463405929505825 2023-01-22 14:08:44.690257: step: 622/469, loss: 0.015953373163938522 2023-01-22 14:08:45.303266: step: 624/469, loss: 0.005202372092753649 2023-01-22 14:08:45.897712: step: 626/469, loss: 0.338346004486084 2023-01-22 14:08:46.533933: step: 628/469, loss: 0.06557228416204453 2023-01-22 14:08:47.098835: step: 630/469, loss: 0.13257935643196106 2023-01-22 14:08:47.703460: step: 632/469, loss: 0.0065249791368842125 2023-01-22 14:08:48.359677: step: 634/469, loss: 0.04262690991163254 2023-01-22 14:08:48.980767: step: 636/469, loss: 0.00965095590800047 2023-01-22 14:08:49.589796: step: 638/469, loss: 0.8289828896522522 2023-01-22 14:08:50.236966: step: 640/469, loss: 0.030092066153883934 2023-01-22 14:08:50.856961: step: 642/469, loss: 0.032380178570747375 2023-01-22 14:08:51.420050: step: 644/469, loss: 0.008327679708600044 2023-01-22 14:08:52.031378: step: 646/469, loss: 0.016449350863695145 2023-01-22 14:08:52.616498: step: 648/469, loss: 0.0007658099639229476 2023-01-22 14:08:53.301714: step: 650/469, loss: 0.009851734153926373 2023-01-22 14:08:53.922634: step: 652/469, loss: 0.05094342306256294 2023-01-22 14:08:54.527638: step: 654/469, loss: 0.0020685922354459763 2023-01-22 14:08:55.246098: step: 656/469, loss: 0.05595378950238228 2023-01-22 14:08:55.874377: step: 658/469, loss: 0.009969577193260193 2023-01-22 14:08:56.598792: step: 660/469, loss: 0.07338903099298477 2023-01-22 14:08:57.345206: step: 662/469, loss: 0.28325212001800537 2023-01-22 14:08:57.905159: step: 664/469, loss: 0.011265905573964119 2023-01-22 14:08:58.536177: step: 666/469, loss: 0.0010473066940903664 2023-01-22 14:08:59.199307: step: 668/469, loss: 0.03323003277182579 2023-01-22 14:08:59.728071: step: 670/469, loss: 0.001536346971988678 2023-01-22 14:09:00.335035: step: 672/469, loss: 0.06809452176094055 2023-01-22 14:09:00.993932: step: 674/469, loss: 0.6119838953018188 2023-01-22 14:09:01.688977: step: 676/469, loss: 0.06336773931980133 2023-01-22 14:09:02.291393: step: 678/469, loss: 0.0024896988179534674 2023-01-22 14:09:02.911419: step: 680/469, loss: 0.0852007046341896 2023-01-22 14:09:03.558043: step: 682/469, loss: 0.006763627752661705 2023-01-22 14:09:04.121755: step: 684/469, loss: 0.007324565201997757 2023-01-22 14:09:04.906538: step: 686/469, loss: 0.0406450591981411 2023-01-22 14:09:05.475937: step: 688/469, loss: 0.045363061130046844 2023-01-22 14:09:06.072756: step: 690/469, loss: 0.040597084909677505 2023-01-22 14:09:06.641684: step: 692/469, loss: 0.05471392348408699 2023-01-22 14:09:07.270219: step: 694/469, loss: 0.02417490817606449 2023-01-22 14:09:07.911337: step: 696/469, loss: 0.038300611078739166 2023-01-22 14:09:08.546255: step: 698/469, loss: 0.03155882656574249 2023-01-22 14:09:09.138761: step: 700/469, loss: 0.010796644724905491 2023-01-22 14:09:09.778967: step: 702/469, loss: 0.02432398311793804 2023-01-22 14:09:10.476089: step: 704/469, loss: 0.5379462838172913 2023-01-22 14:09:11.145881: step: 706/469, loss: 0.005173343233764172 2023-01-22 14:09:11.745309: step: 708/469, loss: 0.028695274144411087 2023-01-22 14:09:12.296777: step: 710/469, loss: 0.08584290742874146 2023-01-22 14:09:13.023472: step: 712/469, loss: 0.0008699421887286007 2023-01-22 14:09:13.698376: step: 714/469, loss: 0.009589890018105507 2023-01-22 14:09:14.283941: step: 716/469, loss: 0.0009711757302284241 2023-01-22 14:09:14.920984: step: 718/469, loss: 0.013375996612012386 2023-01-22 14:09:15.457855: step: 720/469, loss: 0.022644609212875366 2023-01-22 14:09:16.042079: step: 722/469, loss: 0.5862990617752075 2023-01-22 14:09:16.694459: step: 724/469, loss: 0.009702946059405804 2023-01-22 14:09:17.300455: step: 726/469, loss: 0.061791542917490005 2023-01-22 14:09:17.934112: step: 728/469, loss: 0.01918252743780613 2023-01-22 14:09:18.561643: step: 730/469, loss: 0.010699711740016937 2023-01-22 14:09:19.258678: step: 732/469, loss: 0.043368566781282425 2023-01-22 14:09:19.879154: step: 734/469, loss: 0.0138897355645895 2023-01-22 14:09:20.509680: step: 736/469, loss: 0.021746980026364326 2023-01-22 14:09:21.135174: step: 738/469, loss: 0.07260733097791672 2023-01-22 14:09:21.782270: step: 740/469, loss: 0.0003846806939691305 2023-01-22 14:09:22.383591: step: 742/469, loss: 0.1339012086391449 2023-01-22 14:09:23.092260: step: 744/469, loss: 0.00799509510397911 2023-01-22 14:09:23.729744: step: 746/469, loss: 0.001403206493705511 2023-01-22 14:09:24.407509: step: 748/469, loss: 0.019522899761795998 2023-01-22 14:09:25.111185: step: 750/469, loss: 0.0902690514922142 2023-01-22 14:09:25.705261: step: 752/469, loss: 0.011594656854867935 2023-01-22 14:09:26.320860: step: 754/469, loss: 0.013745458796620369 2023-01-22 14:09:26.955430: step: 756/469, loss: 0.012667307630181313 2023-01-22 14:09:27.571608: step: 758/469, loss: 0.0009552580304443836 2023-01-22 14:09:28.215820: step: 760/469, loss: 0.0006107121589593589 2023-01-22 14:09:28.780921: step: 762/469, loss: 0.027866194024682045 2023-01-22 14:09:29.405469: step: 764/469, loss: 0.0951715037226677 2023-01-22 14:09:30.045524: step: 766/469, loss: 0.016138773411512375 2023-01-22 14:09:30.752922: step: 768/469, loss: 0.005201226100325584 2023-01-22 14:09:31.350652: step: 770/469, loss: 0.00589489471167326 2023-01-22 14:09:31.912020: step: 772/469, loss: 0.02527569979429245 2023-01-22 14:09:32.520083: step: 774/469, loss: 0.0347880944609642 2023-01-22 14:09:33.143608: step: 776/469, loss: 0.013205569237470627 2023-01-22 14:09:33.774960: step: 778/469, loss: 0.011248763650655746 2023-01-22 14:09:34.464476: step: 780/469, loss: 0.00997249223291874 2023-01-22 14:09:35.003389: step: 782/469, loss: 0.0025223700795322657 2023-01-22 14:09:35.553301: step: 784/469, loss: 0.009769073687493801 2023-01-22 14:09:36.204530: step: 786/469, loss: 0.003616972593590617 2023-01-22 14:09:36.922384: step: 788/469, loss: 0.022250380367040634 2023-01-22 14:09:37.587031: step: 790/469, loss: 0.012565650045871735 2023-01-22 14:09:38.188436: step: 792/469, loss: 0.046210065484046936 2023-01-22 14:09:38.801553: step: 794/469, loss: 0.023500768467783928 2023-01-22 14:09:39.399346: step: 796/469, loss: 0.0485650934278965 2023-01-22 14:09:39.986985: step: 798/469, loss: 0.00012500408047344536 2023-01-22 14:09:40.592070: step: 800/469, loss: 0.007613717578351498 2023-01-22 14:09:41.159241: step: 802/469, loss: 0.003843215061351657 2023-01-22 14:09:41.758755: step: 804/469, loss: 0.00727503839880228 2023-01-22 14:09:42.401140: step: 806/469, loss: 0.017243437469005585 2023-01-22 14:09:43.022281: step: 808/469, loss: 0.01734367199242115 2023-01-22 14:09:43.661574: step: 810/469, loss: 0.012736249715089798 2023-01-22 14:09:44.283690: step: 812/469, loss: 0.057649098336696625 2023-01-22 14:09:44.906684: step: 814/469, loss: 0.019087396562099457 2023-01-22 14:09:45.560495: step: 816/469, loss: 0.04761027917265892 2023-01-22 14:09:46.204921: step: 818/469, loss: 0.03003348782658577 2023-01-22 14:09:46.825943: step: 820/469, loss: 0.005879759788513184 2023-01-22 14:09:47.460517: step: 822/469, loss: 0.0019286437891423702 2023-01-22 14:09:48.152569: step: 824/469, loss: 0.012159345671534538 2023-01-22 14:09:48.823533: step: 826/469, loss: 0.07695124298334122 2023-01-22 14:09:49.454015: step: 828/469, loss: 0.011008651927113533 2023-01-22 14:09:50.156589: step: 830/469, loss: 0.027759000658988953 2023-01-22 14:09:50.734794: step: 832/469, loss: 0.0030974114779382944 2023-01-22 14:09:51.352555: step: 834/469, loss: 0.07664292305707932 2023-01-22 14:09:51.910311: step: 836/469, loss: 0.09113086014986038 2023-01-22 14:09:52.537836: step: 838/469, loss: 0.0007742611924186349 2023-01-22 14:09:53.207258: step: 840/469, loss: 0.01921161636710167 2023-01-22 14:09:53.827466: step: 842/469, loss: 0.006564127281308174 2023-01-22 14:09:54.514468: step: 844/469, loss: 0.015580904670059681 2023-01-22 14:09:55.154211: step: 846/469, loss: 0.00860374141484499 2023-01-22 14:09:55.822660: step: 848/469, loss: 0.005637663416564465 2023-01-22 14:09:56.476830: step: 850/469, loss: 0.03962088003754616 2023-01-22 14:09:57.072302: step: 852/469, loss: 0.005060709081590176 2023-01-22 14:09:57.695439: step: 854/469, loss: 0.00484490767121315 2023-01-22 14:09:58.324603: step: 856/469, loss: 0.0007542431703768671 2023-01-22 14:09:58.936245: step: 858/469, loss: 0.02857186645269394 2023-01-22 14:09:59.567393: step: 860/469, loss: 0.020907428115606308 2023-01-22 14:10:00.155036: step: 862/469, loss: 0.02676711417734623 2023-01-22 14:10:00.694539: step: 864/469, loss: 0.0011087771272286773 2023-01-22 14:10:01.340956: step: 866/469, loss: 0.025923844426870346 2023-01-22 14:10:01.941373: step: 868/469, loss: 0.06489332020282745 2023-01-22 14:10:02.563691: step: 870/469, loss: 0.02352488972246647 2023-01-22 14:10:03.169664: step: 872/469, loss: 0.030912941321730614 2023-01-22 14:10:03.757450: step: 874/469, loss: 0.004885273054242134 2023-01-22 14:10:04.331598: step: 876/469, loss: 0.07296209037303925 2023-01-22 14:10:04.973246: step: 878/469, loss: 0.02735503576695919 2023-01-22 14:10:05.523632: step: 880/469, loss: 0.0836150050163269 2023-01-22 14:10:06.096967: step: 882/469, loss: 0.018567459657788277 2023-01-22 14:10:06.667056: step: 884/469, loss: 0.0021437390241771936 2023-01-22 14:10:07.212458: step: 886/469, loss: 0.017340516671538353 2023-01-22 14:10:07.808847: step: 888/469, loss: 0.009366720914840698 2023-01-22 14:10:08.389347: step: 890/469, loss: 0.01768156886100769 2023-01-22 14:10:08.979519: step: 892/469, loss: 0.029670976102352142 2023-01-22 14:10:09.615219: step: 894/469, loss: 0.0538417287170887 2023-01-22 14:10:10.293178: step: 896/469, loss: 0.021779846400022507 2023-01-22 14:10:10.937034: step: 898/469, loss: 0.013282307423651218 2023-01-22 14:10:11.491801: step: 900/469, loss: 0.0017287018708884716 2023-01-22 14:10:12.095412: step: 902/469, loss: 0.012488807551562786 2023-01-22 14:10:12.739255: step: 904/469, loss: 1.4560414552688599 2023-01-22 14:10:13.443707: step: 906/469, loss: 6.52432645438239e-05 2023-01-22 14:10:14.035197: step: 908/469, loss: 0.004614822566509247 2023-01-22 14:10:14.711087: step: 910/469, loss: 0.011488398537039757 2023-01-22 14:10:15.365054: step: 912/469, loss: 0.0010576223721727729 2023-01-22 14:10:15.944336: step: 914/469, loss: 0.053319886326789856 2023-01-22 14:10:16.551715: step: 916/469, loss: 0.012150132097303867 2023-01-22 14:10:17.243743: step: 918/469, loss: 0.11468244343996048 2023-01-22 14:10:17.878768: step: 920/469, loss: 0.0085227582603693 2023-01-22 14:10:18.438400: step: 922/469, loss: 0.026661816984415054 2023-01-22 14:10:19.015005: step: 924/469, loss: 0.039426226168870926 2023-01-22 14:10:19.599537: step: 926/469, loss: 0.013353646732866764 2023-01-22 14:10:20.367338: step: 928/469, loss: 0.04049745202064514 2023-01-22 14:10:20.958488: step: 930/469, loss: 0.02001982554793358 2023-01-22 14:10:21.581220: step: 932/469, loss: 0.00044286841875873506 2023-01-22 14:10:22.194795: step: 934/469, loss: 0.00044436060125008225 2023-01-22 14:10:22.745696: step: 936/469, loss: 1.128492832183838 2023-01-22 14:10:23.434435: step: 938/469, loss: 0.0041632517240941525 ================================================== Loss: 0.053 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29623096144693, 'r': 0.3220879334138347, 'f1': 0.3086188016528925}, 'combined': 0.22740332753371026, 'epoch': 30} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3124048848262709, 'r': 0.268673917416921, 'f1': 0.28889384332188356}, 'combined': 0.15757845999375467, 'epoch': 30} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29887621960662847, 'r': 0.3329038726927721, 'f1': 0.31497368206299986}, 'combined': 0.23208587099378936, 'epoch': 30} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3161198793599519, 'r': 0.2716203716621159, 'f1': 0.2921855325450883}, 'combined': 0.1593739268427754, 'epoch': 30} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28760281385281383, 'r': 0.3247128543499511, 'f1': 0.3050332874196511}, 'combined': 0.22476136967763763, 'epoch': 30} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3124372939995096, 'r': 0.27070276067478094, 'f1': 0.2900765857034662}, 'combined': 0.15822359220189067, 'epoch': 30} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23717948717948717, 'r': 0.35238095238095235, 'f1': 0.2835249042145594}, 'combined': 0.18901660280970625, 'epoch': 30} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2642857142857143, 'r': 0.40217391304347827, 'f1': 0.31896551724137934}, 'combined': 0.15948275862068967, 'epoch': 30} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.421875, 'r': 0.23275862068965517, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 30} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 31 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:13:10.999524: step: 2/469, loss: 0.0005729378899559379 2023-01-22 14:13:11.622015: step: 4/469, loss: 0.021582650020718575 2023-01-22 14:13:12.386534: step: 6/469, loss: 0.019079118967056274 2023-01-22 14:13:13.107888: step: 8/469, loss: 0.0162974800914526 2023-01-22 14:13:13.741742: step: 10/469, loss: 0.01126583106815815 2023-01-22 14:13:14.351286: step: 12/469, loss: 0.007237909361720085 2023-01-22 14:13:14.999701: step: 14/469, loss: 0.025096124038100243 2023-01-22 14:13:15.617794: step: 16/469, loss: 0.003229469293728471 2023-01-22 14:13:16.235886: step: 18/469, loss: 0.012669836170971394 2023-01-22 14:13:16.803544: step: 20/469, loss: 0.07041142880916595 2023-01-22 14:13:17.425420: step: 22/469, loss: 0.028156736865639687 2023-01-22 14:13:18.162309: step: 24/469, loss: 0.010577772743999958 2023-01-22 14:13:18.822015: step: 26/469, loss: 0.019591083750128746 2023-01-22 14:13:19.543801: step: 28/469, loss: 0.003482040949165821 2023-01-22 14:13:20.158380: step: 30/469, loss: 0.0009129349491558969 2023-01-22 14:13:20.770912: step: 32/469, loss: 0.024413228034973145 2023-01-22 14:13:21.423659: step: 34/469, loss: 0.01993262954056263 2023-01-22 14:13:22.009248: step: 36/469, loss: 0.01959720440208912 2023-01-22 14:13:22.618705: step: 38/469, loss: 0.0008238955633714795 2023-01-22 14:13:23.173404: step: 40/469, loss: 0.002699420787394047 2023-01-22 14:13:23.864377: step: 42/469, loss: 0.7757253050804138 2023-01-22 14:13:24.522465: step: 44/469, loss: 0.0028708865866065025 2023-01-22 14:13:25.132526: step: 46/469, loss: 0.018478717654943466 2023-01-22 14:13:25.788254: step: 48/469, loss: 0.055832114070653915 2023-01-22 14:13:26.445129: step: 50/469, loss: 0.0019766055047512054 2023-01-22 14:13:27.119359: step: 52/469, loss: 0.016218986362218857 2023-01-22 14:13:27.718241: step: 54/469, loss: 0.03432663530111313 2023-01-22 14:13:28.295324: step: 56/469, loss: 0.001772875664755702 2023-01-22 14:13:28.888956: step: 58/469, loss: 0.02379467710852623 2023-01-22 14:13:29.552650: step: 60/469, loss: 0.015501943416893482 2023-01-22 14:13:30.190815: step: 62/469, loss: 0.0018933971878141165 2023-01-22 14:13:30.837331: step: 64/469, loss: 0.005271086934953928 2023-01-22 14:13:31.503949: step: 66/469, loss: 0.026647893711924553 2023-01-22 14:13:32.144894: step: 68/469, loss: 0.009883650578558445 2023-01-22 14:13:32.793128: step: 70/469, loss: 0.01539866253733635 2023-01-22 14:13:33.389279: step: 72/469, loss: 0.0025817533023655415 2023-01-22 14:13:34.037274: step: 74/469, loss: 0.004883921705186367 2023-01-22 14:13:34.688037: step: 76/469, loss: 0.0011225785128772259 2023-01-22 14:13:35.305908: step: 78/469, loss: 0.04822733998298645 2023-01-22 14:13:35.971972: step: 80/469, loss: 0.03262604400515556 2023-01-22 14:13:36.615937: step: 82/469, loss: 0.015143095515668392 2023-01-22 14:13:37.173847: step: 84/469, loss: 0.10950957983732224 2023-01-22 14:13:37.756694: step: 86/469, loss: 0.0032710139639675617 2023-01-22 14:13:38.419575: step: 88/469, loss: 0.05682629719376564 2023-01-22 14:13:39.057048: step: 90/469, loss: 0.07679912447929382 2023-01-22 14:13:39.690635: step: 92/469, loss: 0.0005978215485811234 2023-01-22 14:13:40.341844: step: 94/469, loss: 0.0337165929377079 2023-01-22 14:13:41.025041: step: 96/469, loss: 0.006347418297082186 2023-01-22 14:13:41.660559: step: 98/469, loss: 0.06465981900691986 2023-01-22 14:13:42.292510: step: 100/469, loss: 0.00465664267539978 2023-01-22 14:13:42.953025: step: 102/469, loss: 0.024802401661872864 2023-01-22 14:13:43.597990: step: 104/469, loss: 0.011391839943826199 2023-01-22 14:13:44.222388: step: 106/469, loss: 0.0023167754989117384 2023-01-22 14:13:44.865939: step: 108/469, loss: 0.047045741230249405 2023-01-22 14:13:45.483076: step: 110/469, loss: 0.0008440035162493587 2023-01-22 14:13:46.123385: step: 112/469, loss: 0.010709509253501892 2023-01-22 14:13:46.701578: step: 114/469, loss: 0.016167698428034782 2023-01-22 14:13:47.324122: step: 116/469, loss: 0.0020748646929860115 2023-01-22 14:13:47.934371: step: 118/469, loss: 0.002652397146448493 2023-01-22 14:13:48.581819: step: 120/469, loss: 0.004655083175748587 2023-01-22 14:13:49.201291: step: 122/469, loss: 0.02868480049073696 2023-01-22 14:13:49.851180: step: 124/469, loss: 0.04327602684497833 2023-01-22 14:13:50.486898: step: 126/469, loss: 0.05765855684876442 2023-01-22 14:13:51.071776: step: 128/469, loss: 0.01431294996291399 2023-01-22 14:13:51.637676: step: 130/469, loss: 0.05785543471574783 2023-01-22 14:13:52.242103: step: 132/469, loss: 0.003356471424922347 2023-01-22 14:13:52.883890: step: 134/469, loss: 0.0007477797335013747 2023-01-22 14:13:53.572436: step: 136/469, loss: 0.007177515886723995 2023-01-22 14:13:54.136874: step: 138/469, loss: 0.01197109092026949 2023-01-22 14:13:54.779528: step: 140/469, loss: 0.0010844813659787178 2023-01-22 14:13:55.401958: step: 142/469, loss: 0.018617521971464157 2023-01-22 14:13:56.070083: step: 144/469, loss: 0.031321339309215546 2023-01-22 14:13:56.661854: step: 146/469, loss: 0.010097871534526348 2023-01-22 14:13:57.258632: step: 148/469, loss: 0.0375542975962162 2023-01-22 14:13:57.965969: step: 150/469, loss: 0.0012660510838031769 2023-01-22 14:13:58.535944: step: 152/469, loss: 0.00766314473003149 2023-01-22 14:13:59.156534: step: 154/469, loss: 0.040173135697841644 2023-01-22 14:13:59.741380: step: 156/469, loss: 0.01005063857883215 2023-01-22 14:14:00.344586: step: 158/469, loss: 0.012250649742782116 2023-01-22 14:14:00.989997: step: 160/469, loss: 0.0040445332415401936 2023-01-22 14:14:01.628845: step: 162/469, loss: 0.002032180316746235 2023-01-22 14:14:02.238747: step: 164/469, loss: 0.07697049528360367 2023-01-22 14:14:02.809336: step: 166/469, loss: 0.00881399679929018 2023-01-22 14:14:03.431343: step: 168/469, loss: 0.0013901349157094955 2023-01-22 14:14:04.091066: step: 170/469, loss: 0.0011047078296542168 2023-01-22 14:14:04.752904: step: 172/469, loss: 0.04833559691905975 2023-01-22 14:14:05.436704: step: 174/469, loss: 0.06360644847154617 2023-01-22 14:14:06.188216: step: 176/469, loss: 0.023506218567490578 2023-01-22 14:14:06.793225: step: 178/469, loss: 0.0009571845876052976 2023-01-22 14:14:07.368463: step: 180/469, loss: 0.019779067486524582 2023-01-22 14:14:07.999438: step: 182/469, loss: 0.011791273951530457 2023-01-22 14:14:08.642423: step: 184/469, loss: 0.004171493463218212 2023-01-22 14:14:09.270124: step: 186/469, loss: 0.05008388310670853 2023-01-22 14:14:09.970831: step: 188/469, loss: 0.007591721601784229 2023-01-22 14:14:10.633189: step: 190/469, loss: 0.00032156784436665475 2023-01-22 14:14:11.264392: step: 192/469, loss: 0.0032910103909671307 2023-01-22 14:14:11.841107: step: 194/469, loss: 0.01623852550983429 2023-01-22 14:14:12.492966: step: 196/469, loss: 0.006585577968508005 2023-01-22 14:14:13.150966: step: 198/469, loss: 0.008083397522568703 2023-01-22 14:14:13.708341: step: 200/469, loss: 0.00017901387764140964 2023-01-22 14:14:14.340653: step: 202/469, loss: 0.0354151576757431 2023-01-22 14:14:14.947883: step: 204/469, loss: 0.004744931124150753 2023-01-22 14:14:15.604419: step: 206/469, loss: 0.01003217976540327 2023-01-22 14:14:16.246819: step: 208/469, loss: 8.596295811003074e-05 2023-01-22 14:14:16.843764: step: 210/469, loss: 0.012387464754283428 2023-01-22 14:14:17.467171: step: 212/469, loss: 0.14620867371559143 2023-01-22 14:14:18.150557: step: 214/469, loss: 0.007369412574917078 2023-01-22 14:14:18.676155: step: 216/469, loss: 0.024283574894070625 2023-01-22 14:14:19.325155: step: 218/469, loss: 0.004432167857885361 2023-01-22 14:14:19.882263: step: 220/469, loss: 0.005264786072075367 2023-01-22 14:14:20.535836: step: 222/469, loss: 0.003752398304641247 2023-01-22 14:14:21.174434: step: 224/469, loss: 0.021850887686014175 2023-01-22 14:14:21.846363: step: 226/469, loss: 0.0037360461428761482 2023-01-22 14:14:22.477460: step: 228/469, loss: 0.20684556663036346 2023-01-22 14:14:23.088740: step: 230/469, loss: 0.017796510830521584 2023-01-22 14:14:23.720133: step: 232/469, loss: 0.019183460623025894 2023-01-22 14:14:24.278236: step: 234/469, loss: 0.011985360644757748 2023-01-22 14:14:24.963231: step: 236/469, loss: 0.0029038565699011087 2023-01-22 14:14:25.585965: step: 238/469, loss: 0.03439052030444145 2023-01-22 14:14:26.241513: step: 240/469, loss: 0.0347650982439518 2023-01-22 14:14:26.823161: step: 242/469, loss: 0.025752931833267212 2023-01-22 14:14:27.442914: step: 244/469, loss: 0.00030208344105631113 2023-01-22 14:14:28.034200: step: 246/469, loss: 0.00015331061149481684 2023-01-22 14:14:28.665425: step: 248/469, loss: 0.0011783675290644169 2023-01-22 14:14:29.291616: step: 250/469, loss: 0.04272384196519852 2023-01-22 14:14:29.937297: step: 252/469, loss: 0.0030883518047630787 2023-01-22 14:14:30.605825: step: 254/469, loss: 0.00023785419762134552 2023-01-22 14:14:31.278614: step: 256/469, loss: 0.0015512205427512527 2023-01-22 14:14:31.877229: step: 258/469, loss: 0.005846394691616297 2023-01-22 14:14:32.458888: step: 260/469, loss: 0.016543468460440636 2023-01-22 14:14:33.033219: step: 262/469, loss: 0.024850815534591675 2023-01-22 14:14:33.742206: step: 264/469, loss: 0.002375295152887702 2023-01-22 14:14:34.373875: step: 266/469, loss: 0.004793278872966766 2023-01-22 14:14:34.955072: step: 268/469, loss: 0.009141725488007069 2023-01-22 14:14:35.550534: step: 270/469, loss: 0.01785050705075264 2023-01-22 14:14:36.175686: step: 272/469, loss: 0.004698670469224453 2023-01-22 14:14:36.808840: step: 274/469, loss: 0.002224231604486704 2023-01-22 14:14:37.443218: step: 276/469, loss: 0.006143860053271055 2023-01-22 14:14:38.137435: step: 278/469, loss: 1.6983529329299927 2023-01-22 14:14:38.752068: step: 280/469, loss: 2.326651883777231e-05 2023-01-22 14:14:39.419083: step: 282/469, loss: 0.0031944005750119686 2023-01-22 14:14:40.064415: step: 284/469, loss: 0.00045949631021358073 2023-01-22 14:14:40.617311: step: 286/469, loss: 0.025172626599669456 2023-01-22 14:14:41.233412: step: 288/469, loss: 0.005660675000399351 2023-01-22 14:14:41.848837: step: 290/469, loss: 0.02098415233194828 2023-01-22 14:14:42.552317: step: 292/469, loss: 0.013500221073627472 2023-01-22 14:14:43.268959: step: 294/469, loss: 0.001041474286466837 2023-01-22 14:14:43.951193: step: 296/469, loss: 0.009058577008545399 2023-01-22 14:14:44.551342: step: 298/469, loss: 0.0028848727233707905 2023-01-22 14:14:45.166115: step: 300/469, loss: 0.006596582476049662 2023-01-22 14:14:45.820945: step: 302/469, loss: 0.002710898406803608 2023-01-22 14:14:46.448325: step: 304/469, loss: 0.026905842125415802 2023-01-22 14:14:47.108936: step: 306/469, loss: 0.037932656705379486 2023-01-22 14:14:47.729420: step: 308/469, loss: 0.05146189033985138 2023-01-22 14:14:48.359747: step: 310/469, loss: 0.00023282418260350823 2023-01-22 14:14:48.955142: step: 312/469, loss: 0.004080084152519703 2023-01-22 14:14:49.546570: step: 314/469, loss: 0.01171773113310337 2023-01-22 14:14:50.174902: step: 316/469, loss: 0.0002971935027744621 2023-01-22 14:14:50.778608: step: 318/469, loss: 0.015443394891917706 2023-01-22 14:14:51.409620: step: 320/469, loss: 0.033965498208999634 2023-01-22 14:14:52.052115: step: 322/469, loss: 0.0036953482776880264 2023-01-22 14:14:52.640943: step: 324/469, loss: 0.021460488438606262 2023-01-22 14:14:53.348560: step: 326/469, loss: 0.008619173429906368 2023-01-22 14:14:54.010809: step: 328/469, loss: 0.009122327901422977 2023-01-22 14:14:54.600383: step: 330/469, loss: 0.013629604130983353 2023-01-22 14:14:55.184237: step: 332/469, loss: 0.008984750136733055 2023-01-22 14:14:55.796408: step: 334/469, loss: 0.057154733687639236 2023-01-22 14:14:56.460335: step: 336/469, loss: 0.45392361283302307 2023-01-22 14:14:57.141641: step: 338/469, loss: 0.010387986898422241 2023-01-22 14:14:57.716385: step: 340/469, loss: 0.00038042268715798855 2023-01-22 14:14:58.371435: step: 342/469, loss: 0.029774561524391174 2023-01-22 14:14:58.995636: step: 344/469, loss: 0.0258974377065897 2023-01-22 14:14:59.586626: step: 346/469, loss: 0.04096565395593643 2023-01-22 14:15:00.216957: step: 348/469, loss: 0.010139081627130508 2023-01-22 14:15:00.835906: step: 350/469, loss: 0.05517276003956795 2023-01-22 14:15:01.495385: step: 352/469, loss: 0.02398163639008999 2023-01-22 14:15:02.109857: step: 354/469, loss: 0.03484689071774483 2023-01-22 14:15:02.713885: step: 356/469, loss: 0.07194016873836517 2023-01-22 14:15:03.444156: step: 358/469, loss: 0.02317383699119091 2023-01-22 14:15:04.019141: step: 360/469, loss: 0.001363103510811925 2023-01-22 14:15:04.633344: step: 362/469, loss: 0.008959420025348663 2023-01-22 14:15:05.308312: step: 364/469, loss: 0.0260955560952425 2023-01-22 14:15:05.914092: step: 366/469, loss: 0.011731689795851707 2023-01-22 14:15:06.590484: step: 368/469, loss: 0.00877874344587326 2023-01-22 14:15:07.206581: step: 370/469, loss: 0.010402548126876354 2023-01-22 14:15:07.798605: step: 372/469, loss: 0.0006272983155213296 2023-01-22 14:15:08.350225: step: 374/469, loss: 0.01702445186674595 2023-01-22 14:15:08.969478: step: 376/469, loss: 0.00828280858695507 2023-01-22 14:15:09.554228: step: 378/469, loss: 0.040847986936569214 2023-01-22 14:15:10.139384: step: 380/469, loss: 0.009576273150742054 2023-01-22 14:15:10.802891: step: 382/469, loss: 0.015475092455744743 2023-01-22 14:15:11.412798: step: 384/469, loss: 0.0035011260770261288 2023-01-22 14:15:11.978932: step: 386/469, loss: 0.001737115322612226 2023-01-22 14:15:12.655399: step: 388/469, loss: 0.009717884473502636 2023-01-22 14:15:13.292381: step: 390/469, loss: 0.011555822566151619 2023-01-22 14:15:13.894288: step: 392/469, loss: 0.003415114479139447 2023-01-22 14:15:14.587610: step: 394/469, loss: 0.0021645131055265665 2023-01-22 14:15:15.242151: step: 396/469, loss: 0.0073388307355344296 2023-01-22 14:15:15.879445: step: 398/469, loss: 0.030330680310726166 2023-01-22 14:15:16.471342: step: 400/469, loss: 0.007304188329726458 2023-01-22 14:15:17.092434: step: 402/469, loss: 0.03339911997318268 2023-01-22 14:15:17.716989: step: 404/469, loss: 0.005928943865001202 2023-01-22 14:15:18.455409: step: 406/469, loss: 0.011872672475874424 2023-01-22 14:15:19.077753: step: 408/469, loss: 0.006231411825865507 2023-01-22 14:15:19.685370: step: 410/469, loss: 0.00125815998762846 2023-01-22 14:15:20.305130: step: 412/469, loss: 0.007493156939744949 2023-01-22 14:15:20.961148: step: 414/469, loss: 0.014170171692967415 2023-01-22 14:15:21.583052: step: 416/469, loss: 0.03449777141213417 2023-01-22 14:15:22.203755: step: 418/469, loss: 0.0023047924041748047 2023-01-22 14:15:22.799025: step: 420/469, loss: 0.023593971505761147 2023-01-22 14:15:23.518288: step: 422/469, loss: 0.10681014508008957 2023-01-22 14:15:24.139178: step: 424/469, loss: 0.020520251244306564 2023-01-22 14:15:24.705635: step: 426/469, loss: 0.0009596815798431635 2023-01-22 14:15:25.342132: step: 428/469, loss: 0.026223665103316307 2023-01-22 14:15:25.945674: step: 430/469, loss: 0.013746678829193115 2023-01-22 14:15:26.621184: step: 432/469, loss: 0.004561530891805887 2023-01-22 14:15:27.322968: step: 434/469, loss: 0.01142672449350357 2023-01-22 14:15:27.885967: step: 436/469, loss: 0.0047954595647752285 2023-01-22 14:15:28.440418: step: 438/469, loss: 0.0055572581477463245 2023-01-22 14:15:29.164186: step: 440/469, loss: 0.033278938382864 2023-01-22 14:15:29.780961: step: 442/469, loss: 0.04738524928689003 2023-01-22 14:15:30.393078: step: 444/469, loss: 0.024530841037631035 2023-01-22 14:15:30.975928: step: 446/469, loss: 0.007115528918802738 2023-01-22 14:15:31.624244: step: 448/469, loss: 0.15645791590213776 2023-01-22 14:15:32.306938: step: 450/469, loss: 0.06723763048648834 2023-01-22 14:15:32.885978: step: 452/469, loss: 0.006464261561632156 2023-01-22 14:15:33.490008: step: 454/469, loss: 0.0035614504013210535 2023-01-22 14:15:34.141071: step: 456/469, loss: 0.05196283385157585 2023-01-22 14:15:34.785121: step: 458/469, loss: 0.04066628962755203 2023-01-22 14:15:35.384294: step: 460/469, loss: 0.011683044955134392 2023-01-22 14:15:36.024541: step: 462/469, loss: 0.0032291521783918142 2023-01-22 14:15:36.547000: step: 464/469, loss: 0.0010921609355136752 2023-01-22 14:15:37.104831: step: 466/469, loss: 0.0028939854819327593 2023-01-22 14:15:37.755906: step: 468/469, loss: 0.02308109775185585 2023-01-22 14:15:38.369254: step: 470/469, loss: 0.001572037348523736 2023-01-22 14:15:38.987334: step: 472/469, loss: 0.002705646213144064 2023-01-22 14:15:39.566144: step: 474/469, loss: 0.0002364056126680225 2023-01-22 14:15:40.154449: step: 476/469, loss: 0.04919661581516266 2023-01-22 14:15:40.723259: step: 478/469, loss: 0.015851834788918495 2023-01-22 14:15:41.332095: step: 480/469, loss: 0.0008903385605663061 2023-01-22 14:15:41.872692: step: 482/469, loss: 0.0006177516188472509 2023-01-22 14:15:42.452146: step: 484/469, loss: 0.009375235997140408 2023-01-22 14:15:43.106642: step: 486/469, loss: 0.004601579159498215 2023-01-22 14:15:43.729018: step: 488/469, loss: 0.015804223716259003 2023-01-22 14:15:44.419596: step: 490/469, loss: 0.018202630802989006 2023-01-22 14:15:45.095867: step: 492/469, loss: 0.00025559458299539983 2023-01-22 14:15:45.782897: step: 494/469, loss: 0.004796760622411966 2023-01-22 14:15:46.483009: step: 496/469, loss: 0.012133373878896236 2023-01-22 14:15:47.095794: step: 498/469, loss: 0.19531692564487457 2023-01-22 14:15:47.707273: step: 500/469, loss: 0.007474957499653101 2023-01-22 14:15:48.284454: step: 502/469, loss: 0.0008940544212237 2023-01-22 14:15:49.089351: step: 504/469, loss: 0.0025691776536405087 2023-01-22 14:15:49.682642: step: 506/469, loss: 0.050084542483091354 2023-01-22 14:15:50.294363: step: 508/469, loss: 0.005334364250302315 2023-01-22 14:15:50.893150: step: 510/469, loss: 0.0012737170327454805 2023-01-22 14:15:51.506094: step: 512/469, loss: 0.02926657348871231 2023-01-22 14:15:52.133373: step: 514/469, loss: 0.005524573847651482 2023-01-22 14:15:52.683204: step: 516/469, loss: 0.022212618961930275 2023-01-22 14:15:53.326628: step: 518/469, loss: 0.02893757075071335 2023-01-22 14:15:53.958416: step: 520/469, loss: 0.005629885010421276 2023-01-22 14:15:54.606456: step: 522/469, loss: 0.024496033787727356 2023-01-22 14:15:55.289701: step: 524/469, loss: 0.010469109751284122 2023-01-22 14:15:55.970092: step: 526/469, loss: 0.001399482600390911 2023-01-22 14:15:56.622851: step: 528/469, loss: 0.008184348233044147 2023-01-22 14:15:57.242511: step: 530/469, loss: 0.0072659580036997795 2023-01-22 14:15:57.880020: step: 532/469, loss: 0.004504271317273378 2023-01-22 14:15:58.439165: step: 534/469, loss: 0.0009724801639094949 2023-01-22 14:15:59.081451: step: 536/469, loss: 0.06959446519613266 2023-01-22 14:15:59.687645: step: 538/469, loss: 0.00033104713656939566 2023-01-22 14:16:00.285170: step: 540/469, loss: 0.009299339726567268 2023-01-22 14:16:00.870003: step: 542/469, loss: 0.008532840758562088 2023-01-22 14:16:01.503202: step: 544/469, loss: 0.00047709012869745493 2023-01-22 14:16:02.094993: step: 546/469, loss: 0.0034982587676495314 2023-01-22 14:16:02.741831: step: 548/469, loss: 0.013692804612219334 2023-01-22 14:16:03.336175: step: 550/469, loss: 0.006670364178717136 2023-01-22 14:16:03.979404: step: 552/469, loss: 0.0017971234628930688 2023-01-22 14:16:04.673193: step: 554/469, loss: 0.0021280001383274794 2023-01-22 14:16:05.293014: step: 556/469, loss: 0.05228687450289726 2023-01-22 14:16:05.971106: step: 558/469, loss: 0.03364037722349167 2023-01-22 14:16:06.569915: step: 560/469, loss: 0.04131297022104263 2023-01-22 14:16:07.166907: step: 562/469, loss: 0.019409582018852234 2023-01-22 14:16:07.900656: step: 564/469, loss: 0.004515229724347591 2023-01-22 14:16:08.573988: step: 566/469, loss: 0.001105062779970467 2023-01-22 14:16:09.190231: step: 568/469, loss: 0.00010040286724688485 2023-01-22 14:16:09.838943: step: 570/469, loss: 0.02487722598016262 2023-01-22 14:16:10.501801: step: 572/469, loss: 0.0003732540353666991 2023-01-22 14:16:11.104080: step: 574/469, loss: 0.021247070282697678 2023-01-22 14:16:11.723137: step: 576/469, loss: 0.043238986283540726 2023-01-22 14:16:12.351431: step: 578/469, loss: 0.0455329567193985 2023-01-22 14:16:13.047569: step: 580/469, loss: 0.043944958597421646 2023-01-22 14:16:13.588624: step: 582/469, loss: 0.021928537636995316 2023-01-22 14:16:14.157987: step: 584/469, loss: 0.019972575828433037 2023-01-22 14:16:14.719732: step: 586/469, loss: 0.05875847488641739 2023-01-22 14:16:15.398557: step: 588/469, loss: 0.020967772230505943 2023-01-22 14:16:16.042202: step: 590/469, loss: 0.03154616057872772 2023-01-22 14:16:16.794829: step: 592/469, loss: 0.049560993909835815 2023-01-22 14:16:17.449703: step: 594/469, loss: 0.004503656178712845 2023-01-22 14:16:18.059563: step: 596/469, loss: 0.026225287467241287 2023-01-22 14:16:18.649999: step: 598/469, loss: 0.012035707011818886 2023-01-22 14:16:19.266446: step: 600/469, loss: 0.052976544946432114 2023-01-22 14:16:19.932135: step: 602/469, loss: 0.004996637813746929 2023-01-22 14:16:20.521911: step: 604/469, loss: 0.004497432615607977 2023-01-22 14:16:21.159914: step: 606/469, loss: 0.0003385764721315354 2023-01-22 14:16:21.794395: step: 608/469, loss: 0.007032254710793495 2023-01-22 14:16:22.427660: step: 610/469, loss: 0.00252163247205317 2023-01-22 14:16:23.051772: step: 612/469, loss: 0.035394471138715744 2023-01-22 14:16:23.676766: step: 614/469, loss: 0.0033063183072954416 2023-01-22 14:16:24.386876: step: 616/469, loss: 0.0018908550264313817 2023-01-22 14:16:25.049510: step: 618/469, loss: 0.01915391907095909 2023-01-22 14:16:25.636199: step: 620/469, loss: 0.010002137161791325 2023-01-22 14:16:26.201216: step: 622/469, loss: 0.013907020911574364 2023-01-22 14:16:26.827122: step: 624/469, loss: 0.15572293102741241 2023-01-22 14:16:27.496605: step: 626/469, loss: 0.03410392627120018 2023-01-22 14:16:28.102875: step: 628/469, loss: 0.012967683374881744 2023-01-22 14:16:28.689910: step: 630/469, loss: 0.06055464968085289 2023-01-22 14:16:29.340347: step: 632/469, loss: 0.03310849890112877 2023-01-22 14:16:29.944170: step: 634/469, loss: 0.0019108057022094727 2023-01-22 14:16:30.698757: step: 636/469, loss: 0.004113891627639532 2023-01-22 14:16:31.349410: step: 638/469, loss: 0.07549567520618439 2023-01-22 14:16:31.967369: step: 640/469, loss: 0.016415109857916832 2023-01-22 14:16:32.573536: step: 642/469, loss: 0.04454901069402695 2023-01-22 14:16:33.193314: step: 644/469, loss: 7.959224603837356e-05 2023-01-22 14:16:33.830645: step: 646/469, loss: 0.21042108535766602 2023-01-22 14:16:34.542710: step: 648/469, loss: 0.02510662004351616 2023-01-22 14:16:35.132145: step: 650/469, loss: 0.002376752905547619 2023-01-22 14:16:35.789583: step: 652/469, loss: 0.040509264916181564 2023-01-22 14:16:36.406593: step: 654/469, loss: 0.019773663952946663 2023-01-22 14:16:36.962993: step: 656/469, loss: 0.0060656084679067135 2023-01-22 14:16:37.617493: step: 658/469, loss: 0.004794066771864891 2023-01-22 14:16:38.234492: step: 660/469, loss: 0.3114969730377197 2023-01-22 14:16:38.853865: step: 662/469, loss: 0.011619108729064465 2023-01-22 14:16:39.539825: step: 664/469, loss: 0.0051984163001179695 2023-01-22 14:16:40.092204: step: 666/469, loss: 0.034609321504831314 2023-01-22 14:16:40.716475: step: 668/469, loss: 0.011906139552593231 2023-01-22 14:16:41.336390: step: 670/469, loss: 0.007531465031206608 2023-01-22 14:16:41.990881: step: 672/469, loss: 0.008113306947052479 2023-01-22 14:16:42.595385: step: 674/469, loss: 0.009066373109817505 2023-01-22 14:16:43.191956: step: 676/469, loss: 0.020657051354646683 2023-01-22 14:16:43.743354: step: 678/469, loss: 0.007757657673209906 2023-01-22 14:16:44.398563: step: 680/469, loss: 0.0023668238427489996 2023-01-22 14:16:45.135621: step: 682/469, loss: 0.01625860668718815 2023-01-22 14:16:45.728371: step: 684/469, loss: 0.05641509220004082 2023-01-22 14:16:46.293509: step: 686/469, loss: 0.0011308124521747231 2023-01-22 14:16:46.899455: step: 688/469, loss: 0.004692739807069302 2023-01-22 14:16:47.573911: step: 690/469, loss: 0.006711769849061966 2023-01-22 14:16:48.277706: step: 692/469, loss: 0.028482874855399132 2023-01-22 14:16:48.913785: step: 694/469, loss: 0.0388733334839344 2023-01-22 14:16:49.479838: step: 696/469, loss: 0.0009056212147697806 2023-01-22 14:16:50.137871: step: 698/469, loss: 0.05017658323049545 2023-01-22 14:16:50.732375: step: 700/469, loss: 0.04181431978940964 2023-01-22 14:16:51.386987: step: 702/469, loss: 0.005831209011375904 2023-01-22 14:16:51.986540: step: 704/469, loss: 0.00044027509284205735 2023-01-22 14:16:52.588371: step: 706/469, loss: 0.008657765574753284 2023-01-22 14:16:53.194751: step: 708/469, loss: 0.011135912500321865 2023-01-22 14:16:53.792646: step: 710/469, loss: 0.001807711785659194 2023-01-22 14:16:54.375001: step: 712/469, loss: 0.8846816420555115 2023-01-22 14:16:54.989690: step: 714/469, loss: 0.0006951538962312043 2023-01-22 14:16:55.530599: step: 716/469, loss: 0.014210399240255356 2023-01-22 14:16:56.282899: step: 718/469, loss: 0.018556321039795876 2023-01-22 14:16:56.911756: step: 720/469, loss: 0.014850894920527935 2023-01-22 14:16:57.533194: step: 722/469, loss: 0.001306203892454505 2023-01-22 14:16:58.148782: step: 724/469, loss: 0.01426657848060131 2023-01-22 14:16:58.722539: step: 726/469, loss: 0.0008523244177922606 2023-01-22 14:16:59.413476: step: 728/469, loss: 0.047879546880722046 2023-01-22 14:17:00.058487: step: 730/469, loss: 0.11118033528327942 2023-01-22 14:17:00.694688: step: 732/469, loss: 0.022916695103049278 2023-01-22 14:17:01.326685: step: 734/469, loss: 0.0035462328232824802 2023-01-22 14:17:01.959469: step: 736/469, loss: 0.036676790565252304 2023-01-22 14:17:02.639494: step: 738/469, loss: 0.08634741604328156 2023-01-22 14:17:03.322182: step: 740/469, loss: 0.02391703985631466 2023-01-22 14:17:03.902430: step: 742/469, loss: 0.0008842243114486337 2023-01-22 14:17:04.500396: step: 744/469, loss: 0.0008636997663415968 2023-01-22 14:17:05.121602: step: 746/469, loss: 0.0009479793952777982 2023-01-22 14:17:05.734325: step: 748/469, loss: 0.0065277642570436 2023-01-22 14:17:06.317125: step: 750/469, loss: 0.0010879590408876538 2023-01-22 14:17:06.952380: step: 752/469, loss: 0.04227452352643013 2023-01-22 14:17:07.579880: step: 754/469, loss: 0.027240905910730362 2023-01-22 14:17:08.249880: step: 756/469, loss: 0.026949815452098846 2023-01-22 14:17:08.845639: step: 758/469, loss: 0.010922363959252834 2023-01-22 14:17:09.442323: step: 760/469, loss: 0.4779205322265625 2023-01-22 14:17:10.011823: step: 762/469, loss: 0.02142428234219551 2023-01-22 14:17:10.684015: step: 764/469, loss: 0.007394219283014536 2023-01-22 14:17:11.259534: step: 766/469, loss: 0.004700392950326204 2023-01-22 14:17:11.859263: step: 768/469, loss: 0.02452561818063259 2023-01-22 14:17:12.468098: step: 770/469, loss: 0.09465805441141129 2023-01-22 14:17:13.135109: step: 772/469, loss: 0.04306073486804962 2023-01-22 14:17:13.749958: step: 774/469, loss: 0.013418346643447876 2023-01-22 14:17:14.355181: step: 776/469, loss: 0.01829378493130207 2023-01-22 14:17:14.974386: step: 778/469, loss: 0.0048456392250955105 2023-01-22 14:17:15.615254: step: 780/469, loss: 0.07697334885597229 2023-01-22 14:17:16.212540: step: 782/469, loss: 0.01613239385187626 2023-01-22 14:17:16.914805: step: 784/469, loss: 0.007246014196425676 2023-01-22 14:17:17.525891: step: 786/469, loss: 0.06801823526620865 2023-01-22 14:17:18.124787: step: 788/469, loss: 0.011144820600748062 2023-01-22 14:17:18.727761: step: 790/469, loss: 0.012843788601458073 2023-01-22 14:17:19.371646: step: 792/469, loss: 0.01382353063672781 2023-01-22 14:17:19.944982: step: 794/469, loss: 0.001624260563403368 2023-01-22 14:17:20.676607: step: 796/469, loss: 0.0030050971545279026 2023-01-22 14:17:21.244623: step: 798/469, loss: 0.001356614171527326 2023-01-22 14:17:21.843455: step: 800/469, loss: 0.07368076592683792 2023-01-22 14:17:22.526126: step: 802/469, loss: 0.01569214276969433 2023-01-22 14:17:23.140179: step: 804/469, loss: 0.024293113499879837 2023-01-22 14:17:23.783414: step: 806/469, loss: 0.013290208764374256 2023-01-22 14:17:24.370767: step: 808/469, loss: 0.04374516382813454 2023-01-22 14:17:25.057059: step: 810/469, loss: 0.04349565505981445 2023-01-22 14:17:25.739153: step: 812/469, loss: 0.022485224530100822 2023-01-22 14:17:26.377214: step: 814/469, loss: 0.030861996114253998 2023-01-22 14:17:26.937254: step: 816/469, loss: 3.837492840830237e-05 2023-01-22 14:17:27.551422: step: 818/469, loss: 0.026942269876599312 2023-01-22 14:17:28.161698: step: 820/469, loss: 0.029813630506396294 2023-01-22 14:17:28.784512: step: 822/469, loss: 0.04226364195346832 2023-01-22 14:17:29.514415: step: 824/469, loss: 0.00019053473079111427 2023-01-22 14:17:30.135124: step: 826/469, loss: 0.6574000120162964 2023-01-22 14:17:30.769803: step: 828/469, loss: 0.010332663543522358 2023-01-22 14:17:31.379418: step: 830/469, loss: 0.04169304668903351 2023-01-22 14:17:32.017854: step: 832/469, loss: 0.0004893785226158798 2023-01-22 14:17:32.628704: step: 834/469, loss: 0.005573753267526627 2023-01-22 14:17:33.320260: step: 836/469, loss: 0.3874082565307617 2023-01-22 14:17:34.015011: step: 838/469, loss: 0.02727372571825981 2023-01-22 14:17:34.701366: step: 840/469, loss: 0.018294041976332664 2023-01-22 14:17:35.358657: step: 842/469, loss: 0.025934560224413872 2023-01-22 14:17:35.978613: step: 844/469, loss: 0.47449129819869995 2023-01-22 14:17:36.542722: step: 846/469, loss: 0.0019088794942945242 2023-01-22 14:17:37.219114: step: 848/469, loss: 0.0978962630033493 2023-01-22 14:17:37.796364: step: 850/469, loss: 0.0023534628562629223 2023-01-22 14:17:38.424969: step: 852/469, loss: 0.024740392342209816 2023-01-22 14:17:39.051826: step: 854/469, loss: 0.0012690417934209108 2023-01-22 14:17:39.642092: step: 856/469, loss: 0.014940101653337479 2023-01-22 14:17:40.302931: step: 858/469, loss: 0.053182702511548996 2023-01-22 14:17:40.940512: step: 860/469, loss: 0.04499712586402893 2023-01-22 14:17:41.489027: step: 862/469, loss: 0.0002868360315915197 2023-01-22 14:17:42.194819: step: 864/469, loss: 0.015457911416888237 2023-01-22 14:17:42.855658: step: 866/469, loss: 0.07289420068264008 2023-01-22 14:17:43.534860: step: 868/469, loss: 0.38829493522644043 2023-01-22 14:17:44.094065: step: 870/469, loss: 0.001543375663459301 2023-01-22 14:17:44.733953: step: 872/469, loss: 0.001180908177047968 2023-01-22 14:17:45.402618: step: 874/469, loss: 0.011447690427303314 2023-01-22 14:17:46.072509: step: 876/469, loss: 0.0020356171298772097 2023-01-22 14:17:46.721305: step: 878/469, loss: 0.0011502087581902742 2023-01-22 14:17:47.352219: step: 880/469, loss: 0.001193191739730537 2023-01-22 14:17:47.986080: step: 882/469, loss: 0.01965019293129444 2023-01-22 14:17:48.518903: step: 884/469, loss: 0.0037854595575481653 2023-01-22 14:17:49.119616: step: 886/469, loss: 0.021804021671414375 2023-01-22 14:17:49.760984: step: 888/469, loss: 0.04215393215417862 2023-01-22 14:17:50.381092: step: 890/469, loss: 0.016078893095254898 2023-01-22 14:17:51.069661: step: 892/469, loss: 0.0015113918343558908 2023-01-22 14:17:51.695396: step: 894/469, loss: 0.005867231171578169 2023-01-22 14:17:52.329773: step: 896/469, loss: 0.005106255877763033 2023-01-22 14:17:52.936135: step: 898/469, loss: 0.008018449880182743 2023-01-22 14:17:53.636120: step: 900/469, loss: 0.03355848044157028 2023-01-22 14:17:54.236500: step: 902/469, loss: 0.015441610477864742 2023-01-22 14:17:54.829262: step: 904/469, loss: 0.011782408691942692 2023-01-22 14:17:55.482400: step: 906/469, loss: 0.001954123843461275 2023-01-22 14:17:56.147156: step: 908/469, loss: 0.04112967476248741 2023-01-22 14:17:56.784261: step: 910/469, loss: 0.04850506782531738 2023-01-22 14:17:57.395601: step: 912/469, loss: 0.0682452842593193 2023-01-22 14:17:57.976549: step: 914/469, loss: 0.01938783936202526 2023-01-22 14:17:58.611819: step: 916/469, loss: 0.012103945016860962 2023-01-22 14:17:59.248600: step: 918/469, loss: 0.06571812927722931 2023-01-22 14:17:59.882010: step: 920/469, loss: 0.005671871360391378 2023-01-22 14:18:00.485039: step: 922/469, loss: 0.006516584195196629 2023-01-22 14:18:01.037489: step: 924/469, loss: 0.0010232835775241256 2023-01-22 14:18:01.574902: step: 926/469, loss: 0.0010701266583055258 2023-01-22 14:18:02.145897: step: 928/469, loss: 0.0016947818221524358 2023-01-22 14:18:02.738289: step: 930/469, loss: 0.0014523242134600878 2023-01-22 14:18:03.452523: step: 932/469, loss: 0.00045429577585309744 2023-01-22 14:18:04.030842: step: 934/469, loss: 0.023384835571050644 2023-01-22 14:18:04.632947: step: 936/469, loss: 0.0554439052939415 2023-01-22 14:18:05.269369: step: 938/469, loss: 0.0295626912266016 ================================================== Loss: 0.034 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2894797585227272, 'r': 0.31639533379334134, 'f1': 0.30233969339817024}, 'combined': 0.22277661618812541, 'epoch': 31} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3218553001128336, 'r': 0.2638447839900264, 'f1': 0.28997722363106976}, 'combined': 0.1581693947078562, 'epoch': 31} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28383220087472666, 'r': 0.31345415732275317, 'f1': 0.2979086400524633}, 'combined': 0.21951162951234138, 'epoch': 31} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.31850806314644103, 'r': 0.2613922531037124, 'f1': 0.2871374197410629}, 'combined': 0.15662041076785246, 'epoch': 31} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27939707993642005, 'r': 0.31332765510896443, 'f1': 0.2953911882690953}, 'combined': 0.21765666504038597, 'epoch': 31} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.31821260800268436, 'r': 0.26697251741853756, 'f1': 0.2903492154611558}, 'combined': 0.1583722993424486, 'epoch': 31} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23363095238095236, 'r': 0.37380952380952376, 'f1': 0.2875457875457875}, 'combined': 0.19169719169719168, 'epoch': 31} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 31} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.421875, 'r': 0.23275862068965517, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 31} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 32 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:20:53.961685: step: 2/469, loss: 0.03749991953372955 2023-01-22 14:20:54.585580: step: 4/469, loss: 0.017268238589167595 2023-01-22 14:20:55.170046: step: 6/469, loss: 0.001208505011163652 2023-01-22 14:20:55.769899: step: 8/469, loss: 0.01648283377289772 2023-01-22 14:20:56.437723: step: 10/469, loss: 0.00220747129060328 2023-01-22 14:20:57.088803: step: 12/469, loss: 0.00031065926305018365 2023-01-22 14:20:57.745597: step: 14/469, loss: 0.1470402330160141 2023-01-22 14:20:58.304738: step: 16/469, loss: 0.020634595304727554 2023-01-22 14:20:58.961995: step: 18/469, loss: 0.0004752243112307042 2023-01-22 14:20:59.597022: step: 20/469, loss: 0.016507146880030632 2023-01-22 14:21:00.167673: step: 22/469, loss: 0.07745123654603958 2023-01-22 14:21:00.752583: step: 24/469, loss: 0.04096395522356033 2023-01-22 14:21:01.403672: step: 26/469, loss: 2.915346522058826e-05 2023-01-22 14:21:02.023904: step: 28/469, loss: 0.006948579102754593 2023-01-22 14:21:02.615599: step: 30/469, loss: 0.030988536775112152 2023-01-22 14:21:03.276178: step: 32/469, loss: 0.001262485166080296 2023-01-22 14:21:03.878028: step: 34/469, loss: 0.02379172295331955 2023-01-22 14:21:04.463823: step: 36/469, loss: 0.006459326948970556 2023-01-22 14:21:05.101322: step: 38/469, loss: 0.00017157568072434515 2023-01-22 14:21:05.758085: step: 40/469, loss: 0.0030621082987636328 2023-01-22 14:21:06.452459: step: 42/469, loss: 8.642600732855499e-05 2023-01-22 14:21:07.087002: step: 44/469, loss: 0.08777878433465958 2023-01-22 14:21:07.635464: step: 46/469, loss: 0.0026939958333969116 2023-01-22 14:21:08.246068: step: 48/469, loss: 0.4211970269680023 2023-01-22 14:21:08.872517: step: 50/469, loss: 3.9609494706382975e-05 2023-01-22 14:21:09.467031: step: 52/469, loss: 0.02289861999452114 2023-01-22 14:21:10.104148: step: 54/469, loss: 0.006645299959927797 2023-01-22 14:21:10.722674: step: 56/469, loss: 0.026295255869627 2023-01-22 14:21:11.368656: step: 58/469, loss: 0.0011924994178116322 2023-01-22 14:21:11.963517: step: 60/469, loss: 0.08294352889060974 2023-01-22 14:21:12.584162: step: 62/469, loss: 0.004066620022058487 2023-01-22 14:21:13.188863: step: 64/469, loss: 0.22339172661304474 2023-01-22 14:21:13.782088: step: 66/469, loss: 0.007126346230506897 2023-01-22 14:21:14.408926: step: 68/469, loss: 0.01773212105035782 2023-01-22 14:21:15.008678: step: 70/469, loss: 0.0011778019834309816 2023-01-22 14:21:15.640875: step: 72/469, loss: 0.1522241234779358 2023-01-22 14:21:16.212680: step: 74/469, loss: 0.0004932772717438638 2023-01-22 14:21:16.788944: step: 76/469, loss: 0.011974997818470001 2023-01-22 14:21:17.379617: step: 78/469, loss: 0.03133000060915947 2023-01-22 14:21:18.115455: step: 80/469, loss: 0.023759806528687477 2023-01-22 14:21:18.786144: step: 82/469, loss: 0.017190126702189445 2023-01-22 14:21:19.353120: step: 84/469, loss: 0.044721778482198715 2023-01-22 14:21:20.038754: step: 86/469, loss: 0.019774969667196274 2023-01-22 14:21:20.684435: step: 88/469, loss: 0.012061896733939648 2023-01-22 14:21:21.270045: step: 90/469, loss: 0.004765648394823074 2023-01-22 14:21:21.940440: step: 92/469, loss: 0.08470029383897781 2023-01-22 14:21:22.568222: step: 94/469, loss: 0.4178975224494934 2023-01-22 14:21:23.153500: step: 96/469, loss: 0.005485690664499998 2023-01-22 14:21:23.731236: step: 98/469, loss: 0.0009674173779785633 2023-01-22 14:21:24.338703: step: 100/469, loss: 0.004112924449145794 2023-01-22 14:21:24.897632: step: 102/469, loss: 0.00045645286445505917 2023-01-22 14:21:25.510307: step: 104/469, loss: 0.0058275870978832245 2023-01-22 14:21:26.075520: step: 106/469, loss: 0.004125871229916811 2023-01-22 14:21:26.679239: step: 108/469, loss: 0.03037143498659134 2023-01-22 14:21:27.286817: step: 110/469, loss: 0.06216638907790184 2023-01-22 14:21:27.877310: step: 112/469, loss: 0.005116002634167671 2023-01-22 14:21:28.485565: step: 114/469, loss: 0.0014115545200183988 2023-01-22 14:21:29.049483: step: 116/469, loss: 0.0010772172827273607 2023-01-22 14:21:29.663323: step: 118/469, loss: 0.00050545041449368 2023-01-22 14:21:30.412571: step: 120/469, loss: 0.000938965764362365 2023-01-22 14:21:31.067242: step: 122/469, loss: 0.007019012235105038 2023-01-22 14:21:31.634567: step: 124/469, loss: 0.0028248117305338383 2023-01-22 14:21:32.167780: step: 126/469, loss: 0.03765726834535599 2023-01-22 14:21:32.851191: step: 128/469, loss: 0.002961465623229742 2023-01-22 14:21:33.525203: step: 130/469, loss: 0.023719433695077896 2023-01-22 14:21:34.151114: step: 132/469, loss: 0.027861934155225754 2023-01-22 14:21:34.735788: step: 134/469, loss: 0.0019629402086138725 2023-01-22 14:21:35.321116: step: 136/469, loss: 0.018162770196795464 2023-01-22 14:21:35.923086: step: 138/469, loss: 0.008055283688008785 2023-01-22 14:21:36.577716: step: 140/469, loss: 0.007368750870227814 2023-01-22 14:21:37.223587: step: 142/469, loss: 0.041566379368305206 2023-01-22 14:21:37.828406: step: 144/469, loss: 0.016668105497956276 2023-01-22 14:21:38.429044: step: 146/469, loss: 0.0253001619130373 2023-01-22 14:21:39.019039: step: 148/469, loss: 0.005218131933361292 2023-01-22 14:21:39.682244: step: 150/469, loss: 0.0009144657524302602 2023-01-22 14:21:40.335410: step: 152/469, loss: 0.017045224085450172 2023-01-22 14:21:41.077745: step: 154/469, loss: 0.03626452386379242 2023-01-22 14:21:41.643544: step: 156/469, loss: 0.00015720822557341307 2023-01-22 14:21:42.229011: step: 158/469, loss: 0.013244305737316608 2023-01-22 14:21:42.881456: step: 160/469, loss: 0.002182579832151532 2023-01-22 14:21:43.509686: step: 162/469, loss: 0.005379943177103996 2023-01-22 14:21:44.138250: step: 164/469, loss: 0.00024298630887642503 2023-01-22 14:21:44.738978: step: 166/469, loss: 0.002628966700285673 2023-01-22 14:21:45.355340: step: 168/469, loss: 0.18549330532550812 2023-01-22 14:21:45.935795: step: 170/469, loss: 0.009937564842402935 2023-01-22 14:21:46.643690: step: 172/469, loss: 0.0010264319134876132 2023-01-22 14:21:47.376177: step: 174/469, loss: 0.005788552109152079 2023-01-22 14:21:48.039936: step: 176/469, loss: 0.0041469489224255085 2023-01-22 14:21:48.678329: step: 178/469, loss: 0.003415116108953953 2023-01-22 14:21:49.382252: step: 180/469, loss: 0.007839956320822239 2023-01-22 14:21:50.021754: step: 182/469, loss: 0.0027799762319773436 2023-01-22 14:21:50.663630: step: 184/469, loss: 0.016197774559259415 2023-01-22 14:21:51.187804: step: 186/469, loss: 0.25818222761154175 2023-01-22 14:21:51.872524: step: 188/469, loss: 0.025767633691430092 2023-01-22 14:21:52.444504: step: 190/469, loss: 0.005767365917563438 2023-01-22 14:21:53.040428: step: 192/469, loss: 0.010001887567341328 2023-01-22 14:21:53.669648: step: 194/469, loss: 0.014682997018098831 2023-01-22 14:21:54.304533: step: 196/469, loss: 0.0005096681998111308 2023-01-22 14:21:55.000730: step: 198/469, loss: 0.00389700080268085 2023-01-22 14:21:55.682022: step: 200/469, loss: 0.01446735579520464 2023-01-22 14:21:56.293468: step: 202/469, loss: 0.005991010461002588 2023-01-22 14:21:56.919129: step: 204/469, loss: 0.0007895565358921885 2023-01-22 14:21:57.588197: step: 206/469, loss: 0.02897719107568264 2023-01-22 14:21:58.318286: step: 208/469, loss: 0.004441920667886734 2023-01-22 14:21:58.919306: step: 210/469, loss: 0.012744102627038956 2023-01-22 14:21:59.559361: step: 212/469, loss: 0.0015204461524263024 2023-01-22 14:22:00.180089: step: 214/469, loss: 0.005202990025281906 2023-01-22 14:22:00.754327: step: 216/469, loss: 0.036313001066446304 2023-01-22 14:22:01.390282: step: 218/469, loss: 0.04497510567307472 2023-01-22 14:22:02.025057: step: 220/469, loss: 0.004818841814994812 2023-01-22 14:22:02.660799: step: 222/469, loss: 0.0025633235927671194 2023-01-22 14:22:03.217973: step: 224/469, loss: 0.0008717576856724918 2023-01-22 14:22:03.927127: step: 226/469, loss: 0.013081683777272701 2023-01-22 14:22:04.508498: step: 228/469, loss: 0.03065168298780918 2023-01-22 14:22:05.125996: step: 230/469, loss: 0.0028354807291179895 2023-01-22 14:22:05.720096: step: 232/469, loss: 0.0025457723531872034 2023-01-22 14:22:06.432468: step: 234/469, loss: 0.002082462888211012 2023-01-22 14:22:07.019135: step: 236/469, loss: 0.001847420004196465 2023-01-22 14:22:07.596909: step: 238/469, loss: 5.1690582040464506e-05 2023-01-22 14:22:08.188078: step: 240/469, loss: 0.006741645745933056 2023-01-22 14:22:08.767616: step: 242/469, loss: 0.013064516708254814 2023-01-22 14:22:09.464006: step: 244/469, loss: 0.018380461260676384 2023-01-22 14:22:10.122766: step: 246/469, loss: 0.004879888147115707 2023-01-22 14:22:10.720218: step: 248/469, loss: 0.017948638647794724 2023-01-22 14:22:11.292828: step: 250/469, loss: 0.018873173743486404 2023-01-22 14:22:11.979726: step: 252/469, loss: 0.0069093527272343636 2023-01-22 14:22:12.637583: step: 254/469, loss: 0.1195477694272995 2023-01-22 14:22:13.222474: step: 256/469, loss: 0.010812277905642986 2023-01-22 14:22:13.898873: step: 258/469, loss: 0.018889935687184334 2023-01-22 14:22:14.562862: step: 260/469, loss: 0.017336001619696617 2023-01-22 14:22:15.209661: step: 262/469, loss: 0.0010860683396458626 2023-01-22 14:22:15.831946: step: 264/469, loss: 0.0021146717481315136 2023-01-22 14:22:16.491561: step: 266/469, loss: 0.02262805961072445 2023-01-22 14:22:17.125198: step: 268/469, loss: 0.002259948058053851 2023-01-22 14:22:17.752253: step: 270/469, loss: 0.0009311236790381372 2023-01-22 14:22:18.416315: step: 272/469, loss: 0.01470889151096344 2023-01-22 14:22:19.075939: step: 274/469, loss: 0.01056294422596693 2023-01-22 14:22:19.620332: step: 276/469, loss: 0.06171771138906479 2023-01-22 14:22:20.255956: step: 278/469, loss: 0.008005499839782715 2023-01-22 14:22:20.869255: step: 280/469, loss: 0.017640594393014908 2023-01-22 14:22:21.469849: step: 282/469, loss: 0.014186647720634937 2023-01-22 14:22:22.019821: step: 284/469, loss: 0.008060427382588387 2023-01-22 14:22:22.612325: step: 286/469, loss: 0.01983780786395073 2023-01-22 14:22:23.168222: step: 288/469, loss: 0.015098470263183117 2023-01-22 14:22:23.731297: step: 290/469, loss: 0.0008657456492073834 2023-01-22 14:22:24.337220: step: 292/469, loss: 0.0006265053525567055 2023-01-22 14:22:24.937825: step: 294/469, loss: 0.024693096056580544 2023-01-22 14:22:25.570493: step: 296/469, loss: 0.2753787636756897 2023-01-22 14:22:26.192555: step: 298/469, loss: 0.003063612151890993 2023-01-22 14:22:26.770259: step: 300/469, loss: 0.0044922903180122375 2023-01-22 14:22:27.493760: step: 302/469, loss: 0.05268428847193718 2023-01-22 14:22:28.095734: step: 304/469, loss: 0.0007777509745210409 2023-01-22 14:22:28.713297: step: 306/469, loss: 0.08876866847276688 2023-01-22 14:22:29.334248: step: 308/469, loss: 0.01490736287087202 2023-01-22 14:22:30.070995: step: 310/469, loss: 0.005947615019977093 2023-01-22 14:22:30.720693: step: 312/469, loss: 0.019858727231621742 2023-01-22 14:22:31.359957: step: 314/469, loss: 0.004382960963994265 2023-01-22 14:22:31.961094: step: 316/469, loss: 0.01166087668389082 2023-01-22 14:22:32.527315: step: 318/469, loss: 0.0028572576120495796 2023-01-22 14:22:33.087021: step: 320/469, loss: 0.11967552453279495 2023-01-22 14:22:33.745566: step: 322/469, loss: 0.007672153413295746 2023-01-22 14:22:34.397948: step: 324/469, loss: 0.023025119677186012 2023-01-22 14:22:35.011491: step: 326/469, loss: 0.012373638339340687 2023-01-22 14:22:35.676883: step: 328/469, loss: 0.9530372023582458 2023-01-22 14:22:36.338779: step: 330/469, loss: 0.6637327075004578 2023-01-22 14:22:36.918281: step: 332/469, loss: 0.009954139590263367 2023-01-22 14:22:37.533268: step: 334/469, loss: 8.420260564889759e-05 2023-01-22 14:22:38.104685: step: 336/469, loss: 0.00999933946877718 2023-01-22 14:22:38.707148: step: 338/469, loss: 0.01693165861070156 2023-01-22 14:22:39.363710: step: 340/469, loss: 0.010749445296823978 2023-01-22 14:22:40.092950: step: 342/469, loss: 2.6861298084259033 2023-01-22 14:22:40.731990: step: 344/469, loss: 0.03646576404571533 2023-01-22 14:22:41.344182: step: 346/469, loss: 9.060656157089397e-05 2023-01-22 14:22:41.992307: step: 348/469, loss: 0.020453309640288353 2023-01-22 14:22:42.603055: step: 350/469, loss: 0.0018417125102132559 2023-01-22 14:22:43.215964: step: 352/469, loss: 0.00860718172043562 2023-01-22 14:22:43.869743: step: 354/469, loss: 0.025369614362716675 2023-01-22 14:22:44.454102: step: 356/469, loss: 0.0002613769320305437 2023-01-22 14:22:45.109621: step: 358/469, loss: 0.007920288480818272 2023-01-22 14:22:45.764786: step: 360/469, loss: 0.02583874575793743 2023-01-22 14:22:46.384409: step: 362/469, loss: 0.0003160881169606 2023-01-22 14:22:46.996379: step: 364/469, loss: 0.007488342467695475 2023-01-22 14:22:47.659898: step: 366/469, loss: 0.009340872056782246 2023-01-22 14:22:48.324117: step: 368/469, loss: 0.04050293192267418 2023-01-22 14:22:48.966971: step: 370/469, loss: 0.0694957748055458 2023-01-22 14:22:49.555812: step: 372/469, loss: 0.0023636994883418083 2023-01-22 14:22:50.120013: step: 374/469, loss: 0.007173544727265835 2023-01-22 14:22:50.722381: step: 376/469, loss: 0.010378501377999783 2023-01-22 14:22:51.370812: step: 378/469, loss: 0.002246354939416051 2023-01-22 14:22:51.998220: step: 380/469, loss: 0.03717915341258049 2023-01-22 14:22:52.613528: step: 382/469, loss: 0.006874177139252424 2023-01-22 14:22:53.178588: step: 384/469, loss: 0.009456251747906208 2023-01-22 14:22:53.779525: step: 386/469, loss: 0.01630944013595581 2023-01-22 14:22:54.401267: step: 388/469, loss: 0.015650790184736252 2023-01-22 14:22:55.034668: step: 390/469, loss: 0.04911442846059799 2023-01-22 14:22:55.659860: step: 392/469, loss: 0.0035679202992469072 2023-01-22 14:22:56.261435: step: 394/469, loss: 0.0008459281525574625 2023-01-22 14:22:56.862886: step: 396/469, loss: 0.03436840698122978 2023-01-22 14:22:57.395731: step: 398/469, loss: 0.009653203189373016 2023-01-22 14:22:58.077511: step: 400/469, loss: 0.002274241531267762 2023-01-22 14:22:58.677895: step: 402/469, loss: 0.006488419137895107 2023-01-22 14:22:59.252278: step: 404/469, loss: 0.0010931938886642456 2023-01-22 14:22:59.917776: step: 406/469, loss: 0.00029615702806040645 2023-01-22 14:23:00.541137: step: 408/469, loss: 0.013120145536959171 2023-01-22 14:23:01.263931: step: 410/469, loss: 0.008972086012363434 2023-01-22 14:23:01.915730: step: 412/469, loss: 0.09995332360267639 2023-01-22 14:23:02.495102: step: 414/469, loss: 0.01970064640045166 2023-01-22 14:23:03.114972: step: 416/469, loss: 0.00018883164739236236 2023-01-22 14:23:03.671738: step: 418/469, loss: 0.0018334169872105122 2023-01-22 14:23:04.328202: step: 420/469, loss: 0.011028261855244637 2023-01-22 14:23:04.932071: step: 422/469, loss: 0.03585797920823097 2023-01-22 14:23:05.585286: step: 424/469, loss: 0.007904846221208572 2023-01-22 14:23:06.175841: step: 426/469, loss: 0.00872550718486309 2023-01-22 14:23:06.812145: step: 428/469, loss: 0.023253824561834335 2023-01-22 14:23:07.496128: step: 430/469, loss: 0.00245981034822762 2023-01-22 14:23:08.062099: step: 432/469, loss: 0.01047658734023571 2023-01-22 14:23:08.636884: step: 434/469, loss: 3.3655032893875614e-07 2023-01-22 14:23:09.329834: step: 436/469, loss: 0.04564597085118294 2023-01-22 14:23:09.964348: step: 438/469, loss: 0.30991101264953613 2023-01-22 14:23:10.627442: step: 440/469, loss: 0.007484719157218933 2023-01-22 14:23:11.227282: step: 442/469, loss: 0.022249378263950348 2023-01-22 14:23:11.831301: step: 444/469, loss: 0.018054088577628136 2023-01-22 14:23:12.429693: step: 446/469, loss: 0.0015145221259444952 2023-01-22 14:23:13.095623: step: 448/469, loss: 0.0004005207447335124 2023-01-22 14:23:13.733721: step: 450/469, loss: 0.0007542406092397869 2023-01-22 14:23:14.400326: step: 452/469, loss: 0.009223207831382751 2023-01-22 14:23:15.012437: step: 454/469, loss: 0.01755375787615776 2023-01-22 14:23:15.599482: step: 456/469, loss: 0.004072573967278004 2023-01-22 14:23:16.188769: step: 458/469, loss: 0.05217577517032623 2023-01-22 14:23:16.792822: step: 460/469, loss: 0.012858073227107525 2023-01-22 14:23:17.441385: step: 462/469, loss: 0.001152542419731617 2023-01-22 14:23:18.061200: step: 464/469, loss: 0.0006960227619856596 2023-01-22 14:23:18.689669: step: 466/469, loss: 0.031560566276311874 2023-01-22 14:23:19.306448: step: 468/469, loss: 0.0060020796954631805 2023-01-22 14:23:19.946787: step: 470/469, loss: 0.049895279109478 2023-01-22 14:23:20.537009: step: 472/469, loss: 0.03689471632242203 2023-01-22 14:23:21.238263: step: 474/469, loss: 0.06906048953533173 2023-01-22 14:23:21.810912: step: 476/469, loss: 0.015291198156774044 2023-01-22 14:23:22.372633: step: 478/469, loss: 0.004548948258161545 2023-01-22 14:23:23.058234: step: 480/469, loss: 0.03188430890440941 2023-01-22 14:23:23.639946: step: 482/469, loss: 0.05921867862343788 2023-01-22 14:23:24.276538: step: 484/469, loss: 0.002240013564005494 2023-01-22 14:23:24.918080: step: 486/469, loss: 0.01722046174108982 2023-01-22 14:23:25.533085: step: 488/469, loss: 0.00045966083416715264 2023-01-22 14:23:26.148825: step: 490/469, loss: 0.004585472866892815 2023-01-22 14:23:26.766052: step: 492/469, loss: 0.005104638170450926 2023-01-22 14:23:27.368857: step: 494/469, loss: 0.012179567478597164 2023-01-22 14:23:27.976697: step: 496/469, loss: 0.0025584574323147535 2023-01-22 14:23:28.594706: step: 498/469, loss: 0.02211659587919712 2023-01-22 14:23:29.266124: step: 500/469, loss: 0.4852209687232971 2023-01-22 14:23:29.821896: step: 502/469, loss: 0.00184117432218045 2023-01-22 14:23:30.431039: step: 504/469, loss: 0.0005285548395477235 2023-01-22 14:23:31.034601: step: 506/469, loss: 0.029422659426927567 2023-01-22 14:23:31.686295: step: 508/469, loss: 0.025827063247561455 2023-01-22 14:23:32.285401: step: 510/469, loss: 0.0012686802074313164 2023-01-22 14:23:32.876972: step: 512/469, loss: 0.05435323715209961 2023-01-22 14:23:33.476468: step: 514/469, loss: 0.03335980325937271 2023-01-22 14:23:34.084566: step: 516/469, loss: 0.0006537021836265922 2023-01-22 14:23:34.718156: step: 518/469, loss: 0.006101388018578291 2023-01-22 14:23:35.334695: step: 520/469, loss: 0.004528559744358063 2023-01-22 14:23:35.957398: step: 522/469, loss: 0.6522846817970276 2023-01-22 14:23:36.661986: step: 524/469, loss: 0.05915721505880356 2023-01-22 14:23:37.338043: step: 526/469, loss: 0.018689153715968132 2023-01-22 14:23:37.999994: step: 528/469, loss: 0.03787685185670853 2023-01-22 14:23:38.631657: step: 530/469, loss: 1.9913724827347323e-05 2023-01-22 14:23:39.311265: step: 532/469, loss: 0.013306287117302418 2023-01-22 14:23:39.898418: step: 534/469, loss: 0.02387900836765766 2023-01-22 14:23:40.617353: step: 536/469, loss: 0.07774024456739426 2023-01-22 14:23:41.163017: step: 538/469, loss: 0.005904316436499357 2023-01-22 14:23:41.771699: step: 540/469, loss: 0.04864424094557762 2023-01-22 14:23:42.485786: step: 542/469, loss: 0.01240241527557373 2023-01-22 14:23:43.130292: step: 544/469, loss: 0.03410211578011513 2023-01-22 14:23:43.742802: step: 546/469, loss: 0.03649758920073509 2023-01-22 14:23:44.340263: step: 548/469, loss: 0.03038223646581173 2023-01-22 14:23:45.006101: step: 550/469, loss: 0.09271855652332306 2023-01-22 14:23:45.684696: step: 552/469, loss: 0.0013210264733061194 2023-01-22 14:23:46.347138: step: 554/469, loss: 0.05411539226770401 2023-01-22 14:23:47.000346: step: 556/469, loss: 0.035361193120479584 2023-01-22 14:23:47.758619: step: 558/469, loss: 0.03385433554649353 2023-01-22 14:23:48.306206: step: 560/469, loss: 8.091592462733388e-05 2023-01-22 14:23:49.023864: step: 562/469, loss: 0.019768020138144493 2023-01-22 14:23:49.706950: step: 564/469, loss: 0.0021043112501502037 2023-01-22 14:23:50.290614: step: 566/469, loss: 0.0042312839068472385 2023-01-22 14:23:50.900505: step: 568/469, loss: 0.0024475210811942816 2023-01-22 14:23:51.490335: step: 570/469, loss: 0.0007352540269494057 2023-01-22 14:23:52.098932: step: 572/469, loss: 0.12982802093029022 2023-01-22 14:23:52.662902: step: 574/469, loss: 0.020285388454794884 2023-01-22 14:23:53.330895: step: 576/469, loss: 0.00894717313349247 2023-01-22 14:23:53.915994: step: 578/469, loss: 0.017607485875487328 2023-01-22 14:23:54.545469: step: 580/469, loss: 0.017055517062544823 2023-01-22 14:23:55.131279: step: 582/469, loss: 0.000573902390897274 2023-01-22 14:23:55.750432: step: 584/469, loss: 0.14255721867084503 2023-01-22 14:23:56.435062: step: 586/469, loss: 0.0030318961944431067 2023-01-22 14:23:57.090430: step: 588/469, loss: 0.004560540895909071 2023-01-22 14:23:57.729882: step: 590/469, loss: 0.032422054558992386 2023-01-22 14:23:58.380350: step: 592/469, loss: 0.2888505458831787 2023-01-22 14:23:59.023070: step: 594/469, loss: 0.09623157232999802 2023-01-22 14:23:59.612886: step: 596/469, loss: 0.008272957056760788 2023-01-22 14:24:00.326559: step: 598/469, loss: 0.07348811626434326 2023-01-22 14:24:00.896278: step: 600/469, loss: 0.005834984127432108 2023-01-22 14:24:01.539546: step: 602/469, loss: 0.01735871471464634 2023-01-22 14:24:02.230999: step: 604/469, loss: 0.0485711395740509 2023-01-22 14:24:02.820995: step: 606/469, loss: 0.059332795441150665 2023-01-22 14:24:03.409008: step: 608/469, loss: 0.005800224840641022 2023-01-22 14:24:04.051203: step: 610/469, loss: 0.019524747505784035 2023-01-22 14:24:04.663822: step: 612/469, loss: 0.76934415102005 2023-01-22 14:24:05.228080: step: 614/469, loss: 0.007277317810803652 2023-01-22 14:24:05.841903: step: 616/469, loss: 0.004504975862801075 2023-01-22 14:24:06.425454: step: 618/469, loss: 0.0012157695600762963 2023-01-22 14:24:06.989610: step: 620/469, loss: 0.10344929993152618 2023-01-22 14:24:07.564268: step: 622/469, loss: 0.10154633224010468 2023-01-22 14:24:08.176201: step: 624/469, loss: 0.00018853195069823414 2023-01-22 14:24:08.776903: step: 626/469, loss: 0.048401981592178345 2023-01-22 14:24:09.365273: step: 628/469, loss: 0.002542890375480056 2023-01-22 14:24:10.066533: step: 630/469, loss: 0.005759979598224163 2023-01-22 14:24:10.719401: step: 632/469, loss: 0.01339081022888422 2023-01-22 14:24:11.385508: step: 634/469, loss: 0.029422728344798088 2023-01-22 14:24:12.137208: step: 636/469, loss: 0.014681156724691391 2023-01-22 14:24:12.775872: step: 638/469, loss: 0.001315159723162651 2023-01-22 14:24:13.388054: step: 640/469, loss: 0.017636211588978767 2023-01-22 14:24:14.026645: step: 642/469, loss: 0.0009735087514854968 2023-01-22 14:24:14.727050: step: 644/469, loss: 0.05150126293301582 2023-01-22 14:24:15.377423: step: 646/469, loss: 0.009876486845314503 2023-01-22 14:24:15.982908: step: 648/469, loss: 0.017833251506090164 2023-01-22 14:24:16.651711: step: 650/469, loss: 0.20279279351234436 2023-01-22 14:24:17.272125: step: 652/469, loss: 0.005821316037327051 2023-01-22 14:24:17.902016: step: 654/469, loss: 0.002447586040943861 2023-01-22 14:24:18.490515: step: 656/469, loss: 0.0080407140776515 2023-01-22 14:24:19.159039: step: 658/469, loss: 0.03799927979707718 2023-01-22 14:24:19.824752: step: 660/469, loss: 0.021526480093598366 2023-01-22 14:24:20.463775: step: 662/469, loss: 0.0017205958720296621 2023-01-22 14:24:21.118132: step: 664/469, loss: 0.05159081146121025 2023-01-22 14:24:21.722235: step: 666/469, loss: 0.019654128700494766 2023-01-22 14:24:22.357449: step: 668/469, loss: 0.0027194879949092865 2023-01-22 14:24:22.964655: step: 670/469, loss: 0.005255649797618389 2023-01-22 14:24:23.598119: step: 672/469, loss: 0.01566912792623043 2023-01-22 14:24:24.229546: step: 674/469, loss: 0.14635688066482544 2023-01-22 14:24:24.901045: step: 676/469, loss: 0.006894399411976337 2023-01-22 14:24:25.506875: step: 678/469, loss: 0.0023598771076649427 2023-01-22 14:24:26.066561: step: 680/469, loss: 0.050585679709911346 2023-01-22 14:24:26.652125: step: 682/469, loss: 0.0012097225990146399 2023-01-22 14:24:27.258047: step: 684/469, loss: 0.019620677456259727 2023-01-22 14:24:27.922306: step: 686/469, loss: 0.0015850631752982736 2023-01-22 14:24:28.583652: step: 688/469, loss: 0.0761529803276062 2023-01-22 14:24:29.223568: step: 690/469, loss: 0.00974776316434145 2023-01-22 14:24:29.823201: step: 692/469, loss: 0.44478705525398254 2023-01-22 14:24:30.486351: step: 694/469, loss: 0.001052939216606319 2023-01-22 14:24:31.132645: step: 696/469, loss: 0.011028924025595188 2023-01-22 14:24:31.757828: step: 698/469, loss: 0.004014064557850361 2023-01-22 14:24:32.436791: step: 700/469, loss: 0.007433253340423107 2023-01-22 14:24:33.078954: step: 702/469, loss: 0.018054557964205742 2023-01-22 14:24:33.720141: step: 704/469, loss: 0.0019376386189833283 2023-01-22 14:24:34.344909: step: 706/469, loss: 0.01151854544878006 2023-01-22 14:24:34.965329: step: 708/469, loss: 0.0018493456300348043 2023-01-22 14:24:35.557865: step: 710/469, loss: 0.057258240878582 2023-01-22 14:24:36.215207: step: 712/469, loss: 0.00707352114841342 2023-01-22 14:24:36.887003: step: 714/469, loss: 0.2225046455860138 2023-01-22 14:24:37.479320: step: 716/469, loss: 0.0035385992377996445 2023-01-22 14:24:38.038708: step: 718/469, loss: 0.0035661403089761734 2023-01-22 14:24:38.615294: step: 720/469, loss: 0.5725209712982178 2023-01-22 14:24:39.206684: step: 722/469, loss: 0.04594719037413597 2023-01-22 14:24:39.880908: step: 724/469, loss: 0.01260649785399437 2023-01-22 14:24:40.487173: step: 726/469, loss: 0.03628157079219818 2023-01-22 14:24:41.143722: step: 728/469, loss: 0.01140190102159977 2023-01-22 14:24:41.792014: step: 730/469, loss: 0.005689607001841068 2023-01-22 14:24:42.413406: step: 732/469, loss: 0.04229084029793739 2023-01-22 14:24:42.987454: step: 734/469, loss: 0.04005708917975426 2023-01-22 14:24:43.699945: step: 736/469, loss: 0.0002078147226711735 2023-01-22 14:24:44.351788: step: 738/469, loss: 0.006827342323958874 2023-01-22 14:24:44.932593: step: 740/469, loss: 0.02837841771543026 2023-01-22 14:24:45.656458: step: 742/469, loss: 0.03563904017210007 2023-01-22 14:24:46.327548: step: 744/469, loss: 0.048966579139232635 2023-01-22 14:24:46.903430: step: 746/469, loss: 0.0012211324647068977 2023-01-22 14:24:47.528952: step: 748/469, loss: 0.0002296100283274427 2023-01-22 14:24:48.144466: step: 750/469, loss: 0.002885879948735237 2023-01-22 14:24:48.765298: step: 752/469, loss: 0.03161158412694931 2023-01-22 14:24:49.393086: step: 754/469, loss: 0.021520603448152542 2023-01-22 14:24:50.020504: step: 756/469, loss: 0.025058690458536148 2023-01-22 14:24:50.700873: step: 758/469, loss: 0.0621785968542099 2023-01-22 14:24:51.311266: step: 760/469, loss: 0.009903007186949253 2023-01-22 14:24:51.994695: step: 762/469, loss: 0.014340068213641644 2023-01-22 14:24:52.653417: step: 764/469, loss: 0.035435933619737625 2023-01-22 14:24:53.260716: step: 766/469, loss: 0.08457788079977036 2023-01-22 14:24:53.864000: step: 768/469, loss: 0.022854575887322426 2023-01-22 14:24:54.496073: step: 770/469, loss: 0.009248269721865654 2023-01-22 14:24:55.043018: step: 772/469, loss: 0.0012875997927039862 2023-01-22 14:24:55.629928: step: 774/469, loss: 0.03336193412542343 2023-01-22 14:24:56.265697: step: 776/469, loss: 0.0009478839347139001 2023-01-22 14:24:56.897950: step: 778/469, loss: 0.10989048331975937 2023-01-22 14:24:57.517733: step: 780/469, loss: 0.18429778516292572 2023-01-22 14:24:58.134507: step: 782/469, loss: 0.01059026550501585 2023-01-22 14:24:58.687233: step: 784/469, loss: 0.15679648518562317 2023-01-22 14:24:59.283490: step: 786/469, loss: 0.001708191935904324 2023-01-22 14:24:59.855993: step: 788/469, loss: 0.006603969726711512 2023-01-22 14:25:00.501255: step: 790/469, loss: 0.03528481721878052 2023-01-22 14:25:01.080121: step: 792/469, loss: 0.005352017469704151 2023-01-22 14:25:01.748388: step: 794/469, loss: 0.01346633117645979 2023-01-22 14:25:02.319530: step: 796/469, loss: 0.03727249801158905 2023-01-22 14:25:02.897811: step: 798/469, loss: 1.8572876453399658 2023-01-22 14:25:03.507607: step: 800/469, loss: 0.03379129245877266 2023-01-22 14:25:04.162792: step: 802/469, loss: 0.0027329474687576294 2023-01-22 14:25:04.769831: step: 804/469, loss: 0.018761103972792625 2023-01-22 14:25:05.357047: step: 806/469, loss: 0.026164662092924118 2023-01-22 14:25:05.964183: step: 808/469, loss: 0.39833980798721313 2023-01-22 14:25:06.596057: step: 810/469, loss: 1.3219311237335205 2023-01-22 14:25:07.253302: step: 812/469, loss: 0.04336877167224884 2023-01-22 14:25:07.824807: step: 814/469, loss: 0.13728487491607666 2023-01-22 14:25:08.482214: step: 816/469, loss: 0.004919958300888538 2023-01-22 14:25:09.148590: step: 818/469, loss: 0.025538092479109764 2023-01-22 14:25:09.814163: step: 820/469, loss: 0.028856776654720306 2023-01-22 14:25:10.427100: step: 822/469, loss: 0.021210791543126106 2023-01-22 14:25:11.050193: step: 824/469, loss: 0.005643091630190611 2023-01-22 14:25:11.689256: step: 826/469, loss: 0.0005824893596582115 2023-01-22 14:25:12.313293: step: 828/469, loss: 0.048885468393564224 2023-01-22 14:25:12.993981: step: 830/469, loss: 0.004670221824198961 2023-01-22 14:25:13.583271: step: 832/469, loss: 0.0495980940759182 2023-01-22 14:25:14.196426: step: 834/469, loss: 0.05524533987045288 2023-01-22 14:25:14.827364: step: 836/469, loss: 0.013411085121333599 2023-01-22 14:25:15.444919: step: 838/469, loss: 0.020150167867541313 2023-01-22 14:25:16.068800: step: 840/469, loss: 0.0058477455750107765 2023-01-22 14:25:16.703667: step: 842/469, loss: 0.04638752341270447 2023-01-22 14:25:17.283707: step: 844/469, loss: 0.00031704845605418086 2023-01-22 14:25:17.929876: step: 846/469, loss: 0.039171431213617325 2023-01-22 14:25:18.539776: step: 848/469, loss: 0.023008722811937332 2023-01-22 14:25:19.092596: step: 850/469, loss: 0.0039032783824950457 2023-01-22 14:25:19.740964: step: 852/469, loss: 0.007801303174346685 2023-01-22 14:25:20.427315: step: 854/469, loss: 0.03950684145092964 2023-01-22 14:25:21.043338: step: 856/469, loss: 0.01630246639251709 2023-01-22 14:25:21.680369: step: 858/469, loss: 0.0004967916756868362 2023-01-22 14:25:22.222819: step: 860/469, loss: 0.003600027645006776 2023-01-22 14:25:22.947831: step: 862/469, loss: 0.005115168169140816 2023-01-22 14:25:23.587315: step: 864/469, loss: 0.008133250288665295 2023-01-22 14:25:24.146833: step: 866/469, loss: 0.02947641722857952 2023-01-22 14:25:24.825197: step: 868/469, loss: 0.07142122089862823 2023-01-22 14:25:25.433724: step: 870/469, loss: 0.00017969335021916777 2023-01-22 14:25:26.068675: step: 872/469, loss: 0.03394768387079239 2023-01-22 14:25:26.859955: step: 874/469, loss: 0.10148341208696365 2023-01-22 14:25:27.455397: step: 876/469, loss: 0.0012569662649184465 2023-01-22 14:25:28.098747: step: 878/469, loss: 0.04769293963909149 2023-01-22 14:25:28.738778: step: 880/469, loss: 0.0037581499200314283 2023-01-22 14:25:29.303649: step: 882/469, loss: 0.007814634591341019 2023-01-22 14:25:29.985165: step: 884/469, loss: 0.061906807124614716 2023-01-22 14:25:30.671593: step: 886/469, loss: 0.0010234395740553737 2023-01-22 14:25:31.231425: step: 888/469, loss: 0.010987556539475918 2023-01-22 14:25:31.808740: step: 890/469, loss: 0.03246670588850975 2023-01-22 14:25:32.407732: step: 892/469, loss: 0.03887225687503815 2023-01-22 14:25:33.002089: step: 894/469, loss: 0.03355664387345314 2023-01-22 14:25:33.607960: step: 896/469, loss: 0.015674414113163948 2023-01-22 14:25:34.259689: step: 898/469, loss: 0.020127814263105392 2023-01-22 14:25:34.863434: step: 900/469, loss: 0.010915447026491165 2023-01-22 14:25:35.493041: step: 902/469, loss: 0.032397475093603134 2023-01-22 14:25:36.070927: step: 904/469, loss: 0.0015398976393043995 2023-01-22 14:25:36.675952: step: 906/469, loss: 0.029909886419773102 2023-01-22 14:25:37.303234: step: 908/469, loss: 0.0016637513181194663 2023-01-22 14:25:37.914486: step: 910/469, loss: 0.002773152431473136 2023-01-22 14:25:38.491068: step: 912/469, loss: 0.025767548009753227 2023-01-22 14:25:39.110913: step: 914/469, loss: 0.021626248955726624 2023-01-22 14:25:39.673789: step: 916/469, loss: 0.012449540197849274 2023-01-22 14:25:40.270461: step: 918/469, loss: 0.05381845310330391 2023-01-22 14:25:40.841308: step: 920/469, loss: 0.14493398368358612 2023-01-22 14:25:41.496176: step: 922/469, loss: 0.3225865066051483 2023-01-22 14:25:42.184608: step: 924/469, loss: 0.013396164402365685 2023-01-22 14:25:42.755823: step: 926/469, loss: 0.03985230252146721 2023-01-22 14:25:43.418581: step: 928/469, loss: 0.01934659294784069 2023-01-22 14:25:44.056212: step: 930/469, loss: 0.016271311789751053 2023-01-22 14:25:44.691555: step: 932/469, loss: 0.0010511681903153658 2023-01-22 14:25:45.277292: step: 934/469, loss: 0.03172919526696205 2023-01-22 14:25:45.909340: step: 936/469, loss: 0.0014140321873128414 2023-01-22 14:25:46.570251: step: 938/469, loss: 0.010791617445647717 ================================================== Loss: 0.050 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2884293489226936, 'r': 0.3283825604432944, 'f1': 0.3071119953036666}, 'combined': 0.22629304917112275, 'epoch': 32} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.31079027756826066, 'r': 0.2692757482682002, 'f1': 0.28854744397759097}, 'combined': 0.15738951489686778, 'epoch': 32} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28671237751702455, 'r': 0.3286039393174247, 'f1': 0.30623214150359473}, 'combined': 0.225644735844754, 'epoch': 32} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.31108754144791373, 'r': 0.26754097800644, 'f1': 0.2876756408864131}, 'combined': 0.1569139859380435, 'epoch': 32} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28027328792607686, 'r': 0.3254786569464118, 'f1': 0.30118920493548557}, 'combined': 0.22192888784719988, 'epoch': 32} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.307717295551314, 'r': 0.27393314599398766, 'f1': 0.2898440741252938}, 'combined': 0.15809676770470568, 'epoch': 32} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20617816091954022, 'r': 0.3416666666666666, 'f1': 0.25716845878136196}, 'combined': 0.17144563918757463, 'epoch': 32} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.22297297297297297, 'r': 0.358695652173913, 'f1': 0.275}, 'combined': 0.1375, 'epoch': 32} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3888888888888889, 'r': 0.2413793103448276, 'f1': 0.2978723404255319}, 'combined': 0.19858156028368792, 'epoch': 32} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 33 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:28:35.461989: step: 2/469, loss: 0.013351435773074627 2023-01-22 14:28:36.025426: step: 4/469, loss: 0.005639699287712574 2023-01-22 14:28:36.617890: step: 6/469, loss: 0.006113546900451183 2023-01-22 14:28:37.253953: step: 8/469, loss: 0.0021835421212017536 2023-01-22 14:28:37.882162: step: 10/469, loss: 0.033465608954429626 2023-01-22 14:28:38.511220: step: 12/469, loss: 0.007830152288079262 2023-01-22 14:28:39.141487: step: 14/469, loss: 0.004056346137076616 2023-01-22 14:28:39.754118: step: 16/469, loss: 0.00999542698264122 2023-01-22 14:28:40.382171: step: 18/469, loss: 0.004410950932651758 2023-01-22 14:28:41.027588: step: 20/469, loss: 0.04339294135570526 2023-01-22 14:28:41.678281: step: 22/469, loss: 0.03698296472430229 2023-01-22 14:28:42.335143: step: 24/469, loss: 0.040439456701278687 2023-01-22 14:28:42.945403: step: 26/469, loss: 0.04639560729265213 2023-01-22 14:28:43.561056: step: 28/469, loss: 0.0019108380656689405 2023-01-22 14:28:44.162527: step: 30/469, loss: 0.011149045079946518 2023-01-22 14:28:44.748085: step: 32/469, loss: 0.0009831722127273679 2023-01-22 14:28:45.400090: step: 34/469, loss: 0.0006967243971303105 2023-01-22 14:28:46.090699: step: 36/469, loss: 0.039589859545230865 2023-01-22 14:28:46.715251: step: 38/469, loss: 0.00748983770608902 2023-01-22 14:28:47.334935: step: 40/469, loss: 0.24827691912651062 2023-01-22 14:28:47.918629: step: 42/469, loss: 0.04059219732880592 2023-01-22 14:28:48.571634: step: 44/469, loss: 0.0077805048786103725 2023-01-22 14:28:49.109819: step: 46/469, loss: 0.00036753417225554585 2023-01-22 14:28:49.693462: step: 48/469, loss: 2.3846070766448975 2023-01-22 14:28:50.303125: step: 50/469, loss: 0.012917861342430115 2023-01-22 14:28:50.968802: step: 52/469, loss: 0.0037191000301390886 2023-01-22 14:28:51.586195: step: 54/469, loss: 0.021560192108154297 2023-01-22 14:28:52.228123: step: 56/469, loss: 0.5750148892402649 2023-01-22 14:28:52.803767: step: 58/469, loss: 0.0033462175633758307 2023-01-22 14:28:53.440640: step: 60/469, loss: 0.12610109150409698 2023-01-22 14:28:54.059998: step: 62/469, loss: 0.004488074220716953 2023-01-22 14:28:54.577932: step: 64/469, loss: 0.002650371752679348 2023-01-22 14:28:55.243311: step: 66/469, loss: 0.0027625400107353926 2023-01-22 14:28:55.889778: step: 68/469, loss: 0.10583145171403885 2023-01-22 14:28:56.538685: step: 70/469, loss: 0.019448785111308098 2023-01-22 14:28:57.205410: step: 72/469, loss: 0.11890777945518494 2023-01-22 14:28:57.937106: step: 74/469, loss: 0.0055470094084739685 2023-01-22 14:28:58.596746: step: 76/469, loss: 0.040658287703990936 2023-01-22 14:28:59.217032: step: 78/469, loss: 0.007026447914540768 2023-01-22 14:28:59.821543: step: 80/469, loss: 0.0038187941536307335 2023-01-22 14:29:00.442789: step: 82/469, loss: 0.005367457401007414 2023-01-22 14:29:01.062128: step: 84/469, loss: 0.016662539914250374 2023-01-22 14:29:01.651512: step: 86/469, loss: 0.0029779854230582714 2023-01-22 14:29:02.259609: step: 88/469, loss: 0.06508389860391617 2023-01-22 14:29:02.976514: step: 90/469, loss: 0.0022850006353110075 2023-01-22 14:29:03.555339: step: 92/469, loss: 0.005730548873543739 2023-01-22 14:29:04.153272: step: 94/469, loss: 0.0008074180223047733 2023-01-22 14:29:04.718741: step: 96/469, loss: 0.0062136054039001465 2023-01-22 14:29:05.332491: step: 98/469, loss: 0.000551339122466743 2023-01-22 14:29:05.892371: step: 100/469, loss: 0.019677409902215004 2023-01-22 14:29:06.513537: step: 102/469, loss: 0.0033649744000285864 2023-01-22 14:29:07.124939: step: 104/469, loss: 0.008814056403934956 2023-01-22 14:29:07.748293: step: 106/469, loss: 0.03335726633667946 2023-01-22 14:29:08.464013: step: 108/469, loss: 0.06300927698612213 2023-01-22 14:29:09.067705: step: 110/469, loss: 0.0029953571502119303 2023-01-22 14:29:09.730848: step: 112/469, loss: 0.0060960473492741585 2023-01-22 14:29:10.371636: step: 114/469, loss: 0.011273249052464962 2023-01-22 14:29:10.971438: step: 116/469, loss: 0.004299314226955175 2023-01-22 14:29:11.592746: step: 118/469, loss: 0.12154749035835266 2023-01-22 14:29:12.198097: step: 120/469, loss: 0.018587274476885796 2023-01-22 14:29:12.943111: step: 122/469, loss: 0.016417136415839195 2023-01-22 14:29:13.526680: step: 124/469, loss: 0.012194930575788021 2023-01-22 14:29:14.194389: step: 126/469, loss: 0.04684147238731384 2023-01-22 14:29:14.769551: step: 128/469, loss: 0.005757268518209457 2023-01-22 14:29:15.428917: step: 130/469, loss: 0.003936708439141512 2023-01-22 14:29:16.042189: step: 132/469, loss: 0.004670368507504463 2023-01-22 14:29:16.687167: step: 134/469, loss: 0.03482309728860855 2023-01-22 14:29:17.337257: step: 136/469, loss: 0.00046595456660725176 2023-01-22 14:29:17.918506: step: 138/469, loss: 0.00020312923879828304 2023-01-22 14:29:18.506029: step: 140/469, loss: 0.0028754393570125103 2023-01-22 14:29:19.140268: step: 142/469, loss: 0.005672778934240341 2023-01-22 14:29:19.750017: step: 144/469, loss: 0.0037217920180410147 2023-01-22 14:29:20.294081: step: 146/469, loss: 0.003972527105361223 2023-01-22 14:29:20.904988: step: 148/469, loss: 0.00446277717128396 2023-01-22 14:29:21.474993: step: 150/469, loss: 0.001574783120304346 2023-01-22 14:29:22.080561: step: 152/469, loss: 0.0018017555121332407 2023-01-22 14:29:22.697822: step: 154/469, loss: 0.034334566444158554 2023-01-22 14:29:23.325292: step: 156/469, loss: 0.0006047360948286951 2023-01-22 14:29:23.974203: step: 158/469, loss: 0.0045813340693712234 2023-01-22 14:29:24.580913: step: 160/469, loss: 0.0003530083631630987 2023-01-22 14:29:25.217061: step: 162/469, loss: 1.590933788975235e-05 2023-01-22 14:29:25.798452: step: 164/469, loss: 0.010642945766448975 2023-01-22 14:29:26.388827: step: 166/469, loss: 0.011005950160324574 2023-01-22 14:29:27.012650: step: 168/469, loss: 0.0013642124831676483 2023-01-22 14:29:27.672114: step: 170/469, loss: 0.022932417690753937 2023-01-22 14:29:28.250276: step: 172/469, loss: 0.005808479152619839 2023-01-22 14:29:28.934867: step: 174/469, loss: 0.003486677072942257 2023-01-22 14:29:29.527682: step: 176/469, loss: 0.03442490100860596 2023-01-22 14:29:30.141713: step: 178/469, loss: 0.0015917201526463032 2023-01-22 14:29:30.796607: step: 180/469, loss: 0.0567733459174633 2023-01-22 14:29:31.353609: step: 182/469, loss: 0.007270394824445248 2023-01-22 14:29:31.933420: step: 184/469, loss: 0.00015969171363394707 2023-01-22 14:29:32.570178: step: 186/469, loss: 0.00758505892008543 2023-01-22 14:29:33.212342: step: 188/469, loss: 0.004748107399791479 2023-01-22 14:29:33.786013: step: 190/469, loss: 0.009574953466653824 2023-01-22 14:29:34.396994: step: 192/469, loss: 0.00833309069275856 2023-01-22 14:29:35.004577: step: 194/469, loss: 0.00844356045126915 2023-01-22 14:29:35.589891: step: 196/469, loss: 0.0020320776384323835 2023-01-22 14:29:36.176961: step: 198/469, loss: 0.013346420601010323 2023-01-22 14:29:36.742120: step: 200/469, loss: 0.014664324931800365 2023-01-22 14:29:37.359584: step: 202/469, loss: 0.01814393512904644 2023-01-22 14:29:37.967407: step: 204/469, loss: 0.0004453635192476213 2023-01-22 14:29:38.563239: step: 206/469, loss: 0.039727482944726944 2023-01-22 14:29:39.192849: step: 208/469, loss: 0.0053256177343428135 2023-01-22 14:29:39.902659: step: 210/469, loss: 0.0034098029136657715 2023-01-22 14:29:40.487535: step: 212/469, loss: 0.004800884984433651 2023-01-22 14:29:41.102226: step: 214/469, loss: 0.0006688708672299981 2023-01-22 14:29:41.745990: step: 216/469, loss: 0.00014004700642544776 2023-01-22 14:29:42.369649: step: 218/469, loss: 0.0251908116042614 2023-01-22 14:29:42.951488: step: 220/469, loss: 0.009037350304424763 2023-01-22 14:29:43.536533: step: 222/469, loss: 0.01472912821918726 2023-01-22 14:29:44.203349: step: 224/469, loss: 0.5599769949913025 2023-01-22 14:29:44.798825: step: 226/469, loss: 0.0030028442852199078 2023-01-22 14:29:45.354014: step: 228/469, loss: 0.00796330813318491 2023-01-22 14:29:46.000181: step: 230/469, loss: 0.0050033507868647575 2023-01-22 14:29:46.638132: step: 232/469, loss: 0.01153347548097372 2023-01-22 14:29:47.282305: step: 234/469, loss: 0.12970635294914246 2023-01-22 14:29:47.839223: step: 236/469, loss: 0.0012679616920650005 2023-01-22 14:29:48.388670: step: 238/469, loss: 0.0248167272657156 2023-01-22 14:29:49.027057: step: 240/469, loss: 0.00013923125516157597 2023-01-22 14:29:49.698513: step: 242/469, loss: 0.002921437146142125 2023-01-22 14:29:50.273912: step: 244/469, loss: 0.015390458516776562 2023-01-22 14:29:50.954898: step: 246/469, loss: 0.035339586436748505 2023-01-22 14:29:51.558205: step: 248/469, loss: 0.0014699893072247505 2023-01-22 14:29:52.128826: step: 250/469, loss: 0.023447921499609947 2023-01-22 14:29:52.738571: step: 252/469, loss: 0.002986697945743799 2023-01-22 14:29:53.391453: step: 254/469, loss: 0.003254184266552329 2023-01-22 14:29:54.205520: step: 256/469, loss: 0.02767689898610115 2023-01-22 14:29:54.816815: step: 258/469, loss: 0.00126143719535321 2023-01-22 14:29:55.445068: step: 260/469, loss: 0.005399410612881184 2023-01-22 14:29:56.023694: step: 262/469, loss: 0.0018525384366512299 2023-01-22 14:29:56.710211: step: 264/469, loss: 0.01274352241307497 2023-01-22 14:29:57.313462: step: 266/469, loss: 0.08559960871934891 2023-01-22 14:29:57.881332: step: 268/469, loss: 0.0021140091121196747 2023-01-22 14:29:58.466812: step: 270/469, loss: 0.0023341989144682884 2023-01-22 14:29:59.064940: step: 272/469, loss: 0.0004674943338613957 2023-01-22 14:29:59.659063: step: 274/469, loss: 0.32528895139694214 2023-01-22 14:30:00.323630: step: 276/469, loss: 0.001879091956652701 2023-01-22 14:30:00.939583: step: 278/469, loss: 0.017344696447253227 2023-01-22 14:30:01.649028: step: 280/469, loss: 0.011400858871638775 2023-01-22 14:30:02.320108: step: 282/469, loss: 0.01149826031178236 2023-01-22 14:30:02.934325: step: 284/469, loss: 0.039661381393671036 2023-01-22 14:30:03.557845: step: 286/469, loss: 0.01687077060341835 2023-01-22 14:30:04.220126: step: 288/469, loss: 0.004539964254945517 2023-01-22 14:30:04.801374: step: 290/469, loss: 0.00014690875832457095 2023-01-22 14:30:05.386903: step: 292/469, loss: 0.002088255016133189 2023-01-22 14:30:06.020832: step: 294/469, loss: 0.04087628051638603 2023-01-22 14:30:06.598352: step: 296/469, loss: 0.00926870759576559 2023-01-22 14:30:07.300628: step: 298/469, loss: 0.004287716932594776 2023-01-22 14:30:07.890016: step: 300/469, loss: 0.0011729313991963863 2023-01-22 14:30:08.468617: step: 302/469, loss: 0.04164522886276245 2023-01-22 14:30:09.161274: step: 304/469, loss: 0.02392064593732357 2023-01-22 14:30:09.819536: step: 306/469, loss: 0.0016163551481440663 2023-01-22 14:30:10.520238: step: 308/469, loss: 0.000714242400135845 2023-01-22 14:30:11.148503: step: 310/469, loss: 0.03168698027729988 2023-01-22 14:30:11.761684: step: 312/469, loss: 0.004886186681687832 2023-01-22 14:30:12.402010: step: 314/469, loss: 0.05792257934808731 2023-01-22 14:30:13.088736: step: 316/469, loss: 0.0011326250387355685 2023-01-22 14:30:13.655992: step: 318/469, loss: 0.00026391472783870995 2023-01-22 14:30:14.250292: step: 320/469, loss: 0.03720565885305405 2023-01-22 14:30:14.835071: step: 322/469, loss: 0.0004022291686851531 2023-01-22 14:30:15.409405: step: 324/469, loss: 0.0037682843394577503 2023-01-22 14:30:15.981703: step: 326/469, loss: 0.00714307464659214 2023-01-22 14:30:16.589871: step: 328/469, loss: 0.002341343555599451 2023-01-22 14:30:17.225168: step: 330/469, loss: 0.00014244038902688771 2023-01-22 14:30:17.874734: step: 332/469, loss: 0.006061086896806955 2023-01-22 14:30:18.488192: step: 334/469, loss: 0.03425901010632515 2023-01-22 14:30:19.093888: step: 336/469, loss: 0.007496016565710306 2023-01-22 14:30:19.741609: step: 338/469, loss: 0.03554781898856163 2023-01-22 14:30:20.337700: step: 340/469, loss: 0.006188980303704739 2023-01-22 14:30:20.992160: step: 342/469, loss: 0.009236977435648441 2023-01-22 14:30:21.629183: step: 344/469, loss: 0.010148722678422928 2023-01-22 14:30:22.344783: step: 346/469, loss: 0.007321231998503208 2023-01-22 14:30:22.972243: step: 348/469, loss: 0.10858845710754395 2023-01-22 14:30:23.553140: step: 350/469, loss: 0.000323133310303092 2023-01-22 14:30:24.220659: step: 352/469, loss: 0.00132815632969141 2023-01-22 14:30:24.782188: step: 354/469, loss: 0.0009524194174446166 2023-01-22 14:30:25.414384: step: 356/469, loss: 0.03454100340604782 2023-01-22 14:30:26.028714: step: 358/469, loss: 0.006098690442740917 2023-01-22 14:30:26.638040: step: 360/469, loss: 0.04598555341362953 2023-01-22 14:30:27.239803: step: 362/469, loss: 0.004837253130972385 2023-01-22 14:30:27.851943: step: 364/469, loss: 0.041017018258571625 2023-01-22 14:30:28.422883: step: 366/469, loss: 0.004117204807698727 2023-01-22 14:30:28.967379: step: 368/469, loss: 0.032427240163087845 2023-01-22 14:30:29.631350: step: 370/469, loss: 0.00023781275376677513 2023-01-22 14:30:30.261220: step: 372/469, loss: 0.05958930775523186 2023-01-22 14:30:30.841537: step: 374/469, loss: 0.01593131013214588 2023-01-22 14:30:31.529100: step: 376/469, loss: 0.015888068825006485 2023-01-22 14:30:32.178113: step: 378/469, loss: 0.01922130398452282 2023-01-22 14:30:32.779357: step: 380/469, loss: 0.19295734167099 2023-01-22 14:30:33.409311: step: 382/469, loss: 0.00452197901904583 2023-01-22 14:30:34.050512: step: 384/469, loss: 0.0188754890114069 2023-01-22 14:30:34.733894: step: 386/469, loss: 0.0028828668873757124 2023-01-22 14:30:35.387711: step: 388/469, loss: 0.002730908105149865 2023-01-22 14:30:35.960141: step: 390/469, loss: 0.037013109773397446 2023-01-22 14:30:36.640158: step: 392/469, loss: 0.00012813137436751276 2023-01-22 14:30:37.287132: step: 394/469, loss: 0.00018990701937582344 2023-01-22 14:30:37.935496: step: 396/469, loss: 0.00619854312390089 2023-01-22 14:30:38.605191: step: 398/469, loss: 0.018023747950792313 2023-01-22 14:30:39.264314: step: 400/469, loss: 0.003330582519993186 2023-01-22 14:30:39.952977: step: 402/469, loss: 0.019715260714292526 2023-01-22 14:30:40.582837: step: 404/469, loss: 0.00561181316152215 2023-01-22 14:30:41.190068: step: 406/469, loss: 0.006778828799724579 2023-01-22 14:30:41.888425: step: 408/469, loss: 0.01337337028235197 2023-01-22 14:30:42.526676: step: 410/469, loss: 0.0035241665318608284 2023-01-22 14:30:43.189045: step: 412/469, loss: 0.004474778193980455 2023-01-22 14:30:43.800048: step: 414/469, loss: 0.07631265372037888 2023-01-22 14:30:44.402717: step: 416/469, loss: 0.00584753230214119 2023-01-22 14:30:45.072918: step: 418/469, loss: 0.002276880666613579 2023-01-22 14:30:45.695064: step: 420/469, loss: 0.03101464733481407 2023-01-22 14:30:46.246930: step: 422/469, loss: 0.012025087140500546 2023-01-22 14:30:46.872834: step: 424/469, loss: 0.0015342243714258075 2023-01-22 14:30:47.488614: step: 426/469, loss: 0.015713496133685112 2023-01-22 14:30:48.110449: step: 428/469, loss: 0.0010290158679708838 2023-01-22 14:30:48.738252: step: 430/469, loss: 0.005436266772449017 2023-01-22 14:30:49.383890: step: 432/469, loss: 0.039580777287483215 2023-01-22 14:30:50.002616: step: 434/469, loss: 0.005971907638013363 2023-01-22 14:30:50.715999: step: 436/469, loss: 0.08663719892501831 2023-01-22 14:30:51.323425: step: 438/469, loss: 0.003683008486405015 2023-01-22 14:30:51.969449: step: 440/469, loss: 0.002901983680203557 2023-01-22 14:30:52.572146: step: 442/469, loss: 0.007728767581284046 2023-01-22 14:30:53.186410: step: 444/469, loss: 0.02300255000591278 2023-01-22 14:30:53.831276: step: 446/469, loss: 0.0006015023100189865 2023-01-22 14:30:54.461761: step: 448/469, loss: 0.03230655938386917 2023-01-22 14:30:55.054048: step: 450/469, loss: 0.0007482889341190457 2023-01-22 14:30:55.690342: step: 452/469, loss: 0.008743666112422943 2023-01-22 14:30:56.308220: step: 454/469, loss: 0.02133469097316265 2023-01-22 14:30:56.920415: step: 456/469, loss: 0.0022225086577236652 2023-01-22 14:30:57.590371: step: 458/469, loss: 0.0029493970796465874 2023-01-22 14:30:58.274442: step: 460/469, loss: 0.01647314988076687 2023-01-22 14:30:58.864314: step: 462/469, loss: 0.0008527342579327524 2023-01-22 14:30:59.507411: step: 464/469, loss: 0.005008402280509472 2023-01-22 14:31:00.103524: step: 466/469, loss: 0.0014473085757344961 2023-01-22 14:31:00.713377: step: 468/469, loss: 0.03902801126241684 2023-01-22 14:31:01.371588: step: 470/469, loss: 0.10863203555345535 2023-01-22 14:31:01.980941: step: 472/469, loss: 0.03507671505212784 2023-01-22 14:31:02.621573: step: 474/469, loss: 2.4033501148223877 2023-01-22 14:31:03.266860: step: 476/469, loss: 0.0023762674536556005 2023-01-22 14:31:03.907608: step: 478/469, loss: 0.006410107482224703 2023-01-22 14:31:04.535416: step: 480/469, loss: 0.010130060836672783 2023-01-22 14:31:05.126942: step: 482/469, loss: 0.0028522307984530926 2023-01-22 14:31:05.764256: step: 484/469, loss: 0.0016838125884532928 2023-01-22 14:31:06.385554: step: 486/469, loss: 0.18121619522571564 2023-01-22 14:31:06.965050: step: 488/469, loss: 0.0028508505783975124 2023-01-22 14:31:07.539771: step: 490/469, loss: 0.006383610889315605 2023-01-22 14:31:08.135568: step: 492/469, loss: 0.012291405349969864 2023-01-22 14:31:08.741558: step: 494/469, loss: 0.021269096061587334 2023-01-22 14:31:09.350252: step: 496/469, loss: 0.000360461650416255 2023-01-22 14:31:09.933587: step: 498/469, loss: 0.01742173545062542 2023-01-22 14:31:10.513016: step: 500/469, loss: 0.029842935502529144 2023-01-22 14:31:11.200787: step: 502/469, loss: 0.09552260488271713 2023-01-22 14:31:11.800997: step: 504/469, loss: 0.03839993476867676 2023-01-22 14:31:12.434625: step: 506/469, loss: 0.0005510871997103095 2023-01-22 14:31:13.130490: step: 508/469, loss: 0.007790790870785713 2023-01-22 14:31:13.697902: step: 510/469, loss: 0.003291425062343478 2023-01-22 14:31:14.357528: step: 512/469, loss: 0.0027130248490720987 2023-01-22 14:31:15.069018: step: 514/469, loss: 0.03849266842007637 2023-01-22 14:31:15.642008: step: 516/469, loss: 0.015371840447187424 2023-01-22 14:31:16.270959: step: 518/469, loss: 0.00024459429550915956 2023-01-22 14:31:16.837006: step: 520/469, loss: 0.011851261369884014 2023-01-22 14:31:17.368958: step: 522/469, loss: 0.0003780556144192815 2023-01-22 14:31:18.098225: step: 524/469, loss: 1.996565333683975e-05 2023-01-22 14:31:18.713927: step: 526/469, loss: 0.33478713035583496 2023-01-22 14:31:19.255609: step: 528/469, loss: 0.0013824690831825137 2023-01-22 14:31:19.831498: step: 530/469, loss: 0.013079002499580383 2023-01-22 14:31:20.405088: step: 532/469, loss: 0.0014266233192756772 2023-01-22 14:31:21.024411: step: 534/469, loss: 0.012344690039753914 2023-01-22 14:31:21.674983: step: 536/469, loss: 0.0005016499781049788 2023-01-22 14:31:22.337608: step: 538/469, loss: 0.0035884634125977755 2023-01-22 14:31:22.911497: step: 540/469, loss: 0.0975341945886612 2023-01-22 14:31:23.508171: step: 542/469, loss: 0.0015002776635810733 2023-01-22 14:31:24.162885: step: 544/469, loss: 0.017281167209148407 2023-01-22 14:31:24.664739: step: 546/469, loss: 0.0033554481342434883 2023-01-22 14:31:25.261481: step: 548/469, loss: 0.0021954441908746958 2023-01-22 14:31:25.921751: step: 550/469, loss: 0.02514529973268509 2023-01-22 14:31:26.622900: step: 552/469, loss: 0.011842131614685059 2023-01-22 14:31:27.246565: step: 554/469, loss: 0.002635958604514599 2023-01-22 14:31:27.859056: step: 556/469, loss: 0.0016326900804415345 2023-01-22 14:31:28.458807: step: 558/469, loss: 0.025699403136968613 2023-01-22 14:31:29.136470: step: 560/469, loss: 0.0009642823715694249 2023-01-22 14:31:29.706127: step: 562/469, loss: 0.04040439799427986 2023-01-22 14:31:30.474638: step: 564/469, loss: 0.00034277039230801165 2023-01-22 14:31:31.166620: step: 566/469, loss: 0.012492691166698933 2023-01-22 14:31:31.795508: step: 568/469, loss: 0.024846741929650307 2023-01-22 14:31:32.414821: step: 570/469, loss: 0.0007408543024212122 2023-01-22 14:31:33.027566: step: 572/469, loss: 8.127120236167684e-05 2023-01-22 14:31:33.683022: step: 574/469, loss: 0.16272634267807007 2023-01-22 14:31:34.298798: step: 576/469, loss: 0.0019044807413592935 2023-01-22 14:31:34.902164: step: 578/469, loss: 0.00011966226156800985 2023-01-22 14:31:35.471635: step: 580/469, loss: 0.0273868590593338 2023-01-22 14:31:36.046730: step: 582/469, loss: 0.00020728286472149193 2023-01-22 14:31:36.674588: step: 584/469, loss: 0.39482250809669495 2023-01-22 14:31:37.295273: step: 586/469, loss: 0.002915006596595049 2023-01-22 14:31:37.949799: step: 588/469, loss: 0.004416953772306442 2023-01-22 14:31:38.588183: step: 590/469, loss: 0.0629315972328186 2023-01-22 14:31:39.229821: step: 592/469, loss: 0.003230934264138341 2023-01-22 14:31:39.858183: step: 594/469, loss: 0.09500297158956528 2023-01-22 14:31:40.471694: step: 596/469, loss: 0.002657938050106168 2023-01-22 14:31:41.078001: step: 598/469, loss: 0.06940101087093353 2023-01-22 14:31:41.691393: step: 600/469, loss: 0.014296120032668114 2023-01-22 14:31:42.311476: step: 602/469, loss: 0.36395740509033203 2023-01-22 14:31:42.938986: step: 604/469, loss: 0.02228420600295067 2023-01-22 14:31:43.571935: step: 606/469, loss: 0.000938447832595557 2023-01-22 14:31:44.217229: step: 608/469, loss: 0.016886886209249496 2023-01-22 14:31:44.831031: step: 610/469, loss: 0.024989686906337738 2023-01-22 14:31:45.428629: step: 612/469, loss: 0.03809540346264839 2023-01-22 14:31:45.963492: step: 614/469, loss: 0.0003656471089925617 2023-01-22 14:31:46.558321: step: 616/469, loss: 0.11770421266555786 2023-01-22 14:31:47.230978: step: 618/469, loss: 0.03488276153802872 2023-01-22 14:31:47.772673: step: 620/469, loss: 4.1198763938155025e-05 2023-01-22 14:31:48.409760: step: 622/469, loss: 0.08880746364593506 2023-01-22 14:31:49.066654: step: 624/469, loss: 0.034829068928956985 2023-01-22 14:31:49.695931: step: 626/469, loss: 0.0013449967373162508 2023-01-22 14:31:50.313840: step: 628/469, loss: 0.03821936994791031 2023-01-22 14:31:50.908869: step: 630/469, loss: 0.019052352756261826 2023-01-22 14:31:51.494086: step: 632/469, loss: 0.0005967976176179945 2023-01-22 14:31:52.090495: step: 634/469, loss: 0.08901707082986832 2023-01-22 14:31:52.697281: step: 636/469, loss: 0.0010007378878071904 2023-01-22 14:31:53.298847: step: 638/469, loss: 0.014912023209035397 2023-01-22 14:31:53.913735: step: 640/469, loss: 0.17074042558670044 2023-01-22 14:31:54.572992: step: 642/469, loss: 0.01104023028165102 2023-01-22 14:31:55.256184: step: 644/469, loss: 0.04529206454753876 2023-01-22 14:31:55.941685: step: 646/469, loss: 0.02859463356435299 2023-01-22 14:31:56.588988: step: 648/469, loss: 0.031995393335819244 2023-01-22 14:31:57.346857: step: 650/469, loss: 2.4087648391723633 2023-01-22 14:31:58.042979: step: 652/469, loss: 0.009420580230653286 2023-01-22 14:31:58.691169: step: 654/469, loss: 0.02887788787484169 2023-01-22 14:31:59.352549: step: 656/469, loss: 0.12417895346879959 2023-01-22 14:31:59.942266: step: 658/469, loss: 0.0006238478235900402 2023-01-22 14:32:00.527486: step: 660/469, loss: 0.005185308866202831 2023-01-22 14:32:01.093006: step: 662/469, loss: 0.0008076611557044089 2023-01-22 14:32:01.721760: step: 664/469, loss: 0.046926502138376236 2023-01-22 14:32:02.369248: step: 666/469, loss: 0.019344089552760124 2023-01-22 14:32:02.964089: step: 668/469, loss: 0.0002813950413838029 2023-01-22 14:32:03.626706: step: 670/469, loss: 0.07090133428573608 2023-01-22 14:32:04.251002: step: 672/469, loss: 0.02977340668439865 2023-01-22 14:32:04.857979: step: 674/469, loss: 0.0018980741733685136 2023-01-22 14:32:05.478487: step: 676/469, loss: 0.04407316818833351 2023-01-22 14:32:06.099439: step: 678/469, loss: 0.02806190773844719 2023-01-22 14:32:06.655203: step: 680/469, loss: 0.01468958705663681 2023-01-22 14:32:07.467283: step: 682/469, loss: 0.008281126618385315 2023-01-22 14:32:08.059156: step: 684/469, loss: 0.002183744451031089 2023-01-22 14:32:08.654836: step: 686/469, loss: 0.0006071201642043889 2023-01-22 14:32:09.213193: step: 688/469, loss: 0.022162703797221184 2023-01-22 14:32:09.882214: step: 690/469, loss: 0.006789313163608313 2023-01-22 14:32:10.503822: step: 692/469, loss: 0.014217977412045002 2023-01-22 14:32:11.170113: step: 694/469, loss: 0.015008599497377872 2023-01-22 14:32:11.738460: step: 696/469, loss: 0.03592699393630028 2023-01-22 14:32:12.330807: step: 698/469, loss: 0.3247458040714264 2023-01-22 14:32:12.899474: step: 700/469, loss: 0.01957259327173233 2023-01-22 14:32:13.668749: step: 702/469, loss: 0.0026686498895287514 2023-01-22 14:32:14.323629: step: 704/469, loss: 0.02792477421462536 2023-01-22 14:32:14.925961: step: 706/469, loss: 0.0254361554980278 2023-01-22 14:32:15.528054: step: 708/469, loss: 0.0011668233200907707 2023-01-22 14:32:16.264667: step: 710/469, loss: 0.06137216463685036 2023-01-22 14:32:16.866982: step: 712/469, loss: 0.05560300126671791 2023-01-22 14:32:17.525740: step: 714/469, loss: 0.0025397774297744036 2023-01-22 14:32:18.176576: step: 716/469, loss: 0.001246861182153225 2023-01-22 14:32:18.761810: step: 718/469, loss: 0.0632171556353569 2023-01-22 14:32:19.390343: step: 720/469, loss: 0.000753260450437665 2023-01-22 14:32:20.018308: step: 722/469, loss: 0.03895358741283417 2023-01-22 14:32:20.694679: step: 724/469, loss: 0.013898639008402824 2023-01-22 14:32:21.292496: step: 726/469, loss: 0.04265652224421501 2023-01-22 14:32:21.833148: step: 728/469, loss: 0.004402386024594307 2023-01-22 14:32:22.430779: step: 730/469, loss: 0.004465318284928799 2023-01-22 14:32:22.997992: step: 732/469, loss: 0.01439233310520649 2023-01-22 14:32:23.628020: step: 734/469, loss: 0.014482861384749413 2023-01-22 14:32:24.229547: step: 736/469, loss: 0.059373173862695694 2023-01-22 14:32:24.918557: step: 738/469, loss: 0.02686934918165207 2023-01-22 14:32:25.531813: step: 740/469, loss: 0.001371812541037798 2023-01-22 14:32:26.153948: step: 742/469, loss: 0.023025471717119217 2023-01-22 14:32:26.789065: step: 744/469, loss: 0.00024758256040513515 2023-01-22 14:32:27.394156: step: 746/469, loss: 0.0034194202162325382 2023-01-22 14:32:27.989724: step: 748/469, loss: 0.001260263379663229 2023-01-22 14:32:28.685156: step: 750/469, loss: 0.020660411566495895 2023-01-22 14:32:29.456582: step: 752/469, loss: 0.03583719581365585 2023-01-22 14:32:30.115594: step: 754/469, loss: 0.021274235099554062 2023-01-22 14:32:30.782189: step: 756/469, loss: 0.011315690353512764 2023-01-22 14:32:31.382309: step: 758/469, loss: 0.027749918401241302 2023-01-22 14:32:32.007856: step: 760/469, loss: 0.023145675659179688 2023-01-22 14:32:32.648972: step: 762/469, loss: 0.0783037319779396 2023-01-22 14:32:33.241821: step: 764/469, loss: 0.00954751018434763 2023-01-22 14:32:33.868865: step: 766/469, loss: 0.0516485720872879 2023-01-22 14:32:34.516803: step: 768/469, loss: 0.03406033292412758 2023-01-22 14:32:35.250022: step: 770/469, loss: 0.0791030302643776 2023-01-22 14:32:35.915174: step: 772/469, loss: 0.0046530370600521564 2023-01-22 14:32:36.525903: step: 774/469, loss: 0.012602507136762142 2023-01-22 14:32:37.101423: step: 776/469, loss: 0.00102899968624115 2023-01-22 14:32:37.765875: step: 778/469, loss: 0.040264785289764404 2023-01-22 14:32:38.338968: step: 780/469, loss: 0.0050149839371442795 2023-01-22 14:32:38.879813: step: 782/469, loss: 0.07066849619150162 2023-01-22 14:32:39.514419: step: 784/469, loss: 0.017565440386533737 2023-01-22 14:32:40.220509: step: 786/469, loss: 0.013721856288611889 2023-01-22 14:32:40.821900: step: 788/469, loss: 0.03092535398900509 2023-01-22 14:32:41.430402: step: 790/469, loss: 0.04674604907631874 2023-01-22 14:32:42.060662: step: 792/469, loss: 0.02690579555928707 2023-01-22 14:32:42.682898: step: 794/469, loss: 0.07567117363214493 2023-01-22 14:32:43.334939: step: 796/469, loss: 0.08812756836414337 2023-01-22 14:32:44.012595: step: 798/469, loss: 0.10162734240293503 2023-01-22 14:32:44.647081: step: 800/469, loss: 0.13834482431411743 2023-01-22 14:32:45.405813: step: 802/469, loss: 0.010010241530835629 2023-01-22 14:32:46.018000: step: 804/469, loss: 0.01298542320728302 2023-01-22 14:32:46.689471: step: 806/469, loss: 0.017113426700234413 2023-01-22 14:32:47.351477: step: 808/469, loss: 0.013750889338552952 2023-01-22 14:32:47.946766: step: 810/469, loss: 0.006098391953855753 2023-01-22 14:32:48.547107: step: 812/469, loss: 0.03663692995905876 2023-01-22 14:32:49.164643: step: 814/469, loss: 0.017231501638889313 2023-01-22 14:32:49.806523: step: 816/469, loss: 0.010893937200307846 2023-01-22 14:32:50.433460: step: 818/469, loss: 0.00018118097796104848 2023-01-22 14:32:51.102586: step: 820/469, loss: 0.023833217099308968 2023-01-22 14:32:51.722354: step: 822/469, loss: 0.009173400700092316 2023-01-22 14:32:52.405070: step: 824/469, loss: 0.02096126414835453 2023-01-22 14:32:53.002440: step: 826/469, loss: 0.005791542120277882 2023-01-22 14:32:53.644108: step: 828/469, loss: 0.00048988894559443 2023-01-22 14:32:54.312885: step: 830/469, loss: 0.011068285442888737 2023-01-22 14:32:54.897950: step: 832/469, loss: 0.00469591747969389 2023-01-22 14:32:55.554357: step: 834/469, loss: 0.00930104125291109 2023-01-22 14:32:56.214892: step: 836/469, loss: 0.00478220684453845 2023-01-22 14:32:56.834468: step: 838/469, loss: 0.02467329427599907 2023-01-22 14:32:57.480089: step: 840/469, loss: 0.006275024730712175 2023-01-22 14:32:58.121833: step: 842/469, loss: 0.014605286531150341 2023-01-22 14:32:58.727803: step: 844/469, loss: 0.0015325166750699282 2023-01-22 14:32:59.364681: step: 846/469, loss: 0.0023205613251775503 2023-01-22 14:33:00.010481: step: 848/469, loss: 0.005115923937410116 2023-01-22 14:33:00.605232: step: 850/469, loss: 0.02142981067299843 2023-01-22 14:33:01.284852: step: 852/469, loss: 0.016367793083190918 2023-01-22 14:33:01.971106: step: 854/469, loss: 0.005425679963082075 2023-01-22 14:33:02.581745: step: 856/469, loss: 0.02080382965505123 2023-01-22 14:33:03.152857: step: 858/469, loss: 0.01139050628989935 2023-01-22 14:33:03.730496: step: 860/469, loss: 0.026312697678804398 2023-01-22 14:33:04.362040: step: 862/469, loss: 0.01092403382062912 2023-01-22 14:33:05.067331: step: 864/469, loss: 0.006747534964233637 2023-01-22 14:33:05.699773: step: 866/469, loss: 0.15142342448234558 2023-01-22 14:33:06.302687: step: 868/469, loss: 0.0016730048228055239 2023-01-22 14:33:06.980813: step: 870/469, loss: 0.05006123334169388 2023-01-22 14:33:07.549512: step: 872/469, loss: 0.014254732057452202 2023-01-22 14:33:08.131262: step: 874/469, loss: 0.04299959912896156 2023-01-22 14:33:08.720974: step: 876/469, loss: 0.027509279549121857 2023-01-22 14:33:09.363209: step: 878/469, loss: 0.01997573859989643 2023-01-22 14:33:09.954217: step: 880/469, loss: 0.00031374391983263195 2023-01-22 14:33:10.585622: step: 882/469, loss: 0.016268735751509666 2023-01-22 14:33:11.198892: step: 884/469, loss: 0.0013883366482332349 2023-01-22 14:33:11.789451: step: 886/469, loss: 0.025057921186089516 2023-01-22 14:33:12.384423: step: 888/469, loss: 0.2622474431991577 2023-01-22 14:33:13.021362: step: 890/469, loss: 0.0010785538470372558 2023-01-22 14:33:13.626106: step: 892/469, loss: 0.012118016369640827 2023-01-22 14:33:14.296607: step: 894/469, loss: 0.09365305304527283 2023-01-22 14:33:14.900200: step: 896/469, loss: 2.3522001356468536e-05 2023-01-22 14:33:15.518515: step: 898/469, loss: 0.0015640523051843047 2023-01-22 14:33:16.223398: step: 900/469, loss: 0.7320762276649475 2023-01-22 14:33:16.919266: step: 902/469, loss: 0.06162048131227493 2023-01-22 14:33:17.508143: step: 904/469, loss: 0.0066713797859847546 2023-01-22 14:33:18.211696: step: 906/469, loss: 0.016309410333633423 2023-01-22 14:33:18.927779: step: 908/469, loss: 0.21170613169670105 2023-01-22 14:33:19.541213: step: 910/469, loss: 0.033934880048036575 2023-01-22 14:33:20.134408: step: 912/469, loss: 0.01726914942264557 2023-01-22 14:33:20.773107: step: 914/469, loss: 0.0049497890286147594 2023-01-22 14:33:21.422106: step: 916/469, loss: 0.016548756510019302 2023-01-22 14:33:22.068764: step: 918/469, loss: 0.0022880916949361563 2023-01-22 14:33:22.742898: step: 920/469, loss: 0.000545771443285048 2023-01-22 14:33:23.342749: step: 922/469, loss: 0.02944769337773323 2023-01-22 14:33:23.984113: step: 924/469, loss: 0.0009377841488458216 2023-01-22 14:33:24.543346: step: 926/469, loss: 0.17907419800758362 2023-01-22 14:33:25.165669: step: 928/469, loss: 0.014677022583782673 2023-01-22 14:33:25.769514: step: 930/469, loss: 0.036170970648527145 2023-01-22 14:33:26.380221: step: 932/469, loss: 0.03944196552038193 2023-01-22 14:33:27.007710: step: 934/469, loss: 0.005004808772355318 2023-01-22 14:33:27.604678: step: 936/469, loss: 0.003991291392594576 2023-01-22 14:33:28.307372: step: 938/469, loss: 0.004129363689571619 ================================================== Loss: 0.045 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3026070127504554, 'r': 0.31523956356736244, 'f1': 0.3087941449814126}, 'combined': 0.22753252788104086, 'epoch': 33} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.31918886409967634, 'r': 0.26579621132980363, 'f1': 0.29005591540628944}, 'combined': 0.15821231749433967, 'epoch': 33} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29600756227758007, 'r': 0.3156665085388994, 'f1': 0.3055211202938476}, 'combined': 0.22512082547967716, 'epoch': 33} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3234982078916707, 'r': 0.27027181171205394, 'f1': 0.29449936455476794}, 'combined': 0.1606360170298734, 'epoch': 33} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2903066606389391, 'r': 0.30462919038583175, 'f1': 0.297295524691358}, 'combined': 0.21905986029889535, 'epoch': 33} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3152750170515444, 'r': 0.26887713977064986, 'f1': 0.2902334394761627}, 'combined': 0.15830914880517966, 'epoch': 33} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2703900709219858, 'r': 0.3630952380952381, 'f1': 0.3099593495934959}, 'combined': 0.20663956639566394, 'epoch': 33} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.27205882352941174, 'r': 0.40217391304347827, 'f1': 0.32456140350877194}, 'combined': 0.16228070175438597, 'epoch': 33} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4117647058823529, 'r': 0.2413793103448276, 'f1': 0.3043478260869565}, 'combined': 0.20289855072463764, 'epoch': 33} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 34 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:36:20.318874: step: 2/469, loss: 0.006031720899045467 2023-01-22 14:36:20.956071: step: 4/469, loss: 0.0019197453511878848 2023-01-22 14:36:21.599803: step: 6/469, loss: 0.004195861984044313 2023-01-22 14:36:22.184263: step: 8/469, loss: 0.006084569729864597 2023-01-22 14:36:22.894668: step: 10/469, loss: 0.03573630005121231 2023-01-22 14:36:23.682983: step: 12/469, loss: 0.005998925771564245 2023-01-22 14:36:24.278937: step: 14/469, loss: 0.02235507406294346 2023-01-22 14:36:24.921157: step: 16/469, loss: 0.004785803612321615 2023-01-22 14:36:25.602233: step: 18/469, loss: 0.013863739557564259 2023-01-22 14:36:26.258147: step: 20/469, loss: 0.017171066254377365 2023-01-22 14:36:26.888307: step: 22/469, loss: 0.005527122877538204 2023-01-22 14:36:27.514082: step: 24/469, loss: 0.0009547692025080323 2023-01-22 14:36:28.159960: step: 26/469, loss: 0.02117086946964264 2023-01-22 14:36:28.906695: step: 28/469, loss: 0.00163931492716074 2023-01-22 14:36:29.545170: step: 30/469, loss: 0.025652950629591942 2023-01-22 14:36:30.221764: step: 32/469, loss: 0.00017634050163906068 2023-01-22 14:36:30.781374: step: 34/469, loss: 0.000924905703868717 2023-01-22 14:36:31.367210: step: 36/469, loss: 0.05202510952949524 2023-01-22 14:36:32.013374: step: 38/469, loss: 0.008343975991010666 2023-01-22 14:36:32.577868: step: 40/469, loss: 0.0006487302598543465 2023-01-22 14:36:33.173516: step: 42/469, loss: 0.07102520018815994 2023-01-22 14:36:33.819648: step: 44/469, loss: 0.01853327825665474 2023-01-22 14:36:34.455039: step: 46/469, loss: 0.0006005986942909658 2023-01-22 14:36:35.076226: step: 48/469, loss: 0.00398049782961607 2023-01-22 14:36:35.674687: step: 50/469, loss: 0.012305492535233498 2023-01-22 14:36:36.284706: step: 52/469, loss: 0.0011470906902104616 2023-01-22 14:36:36.982202: step: 54/469, loss: 0.0023668729700148106 2023-01-22 14:36:37.579109: step: 56/469, loss: 0.0054066115990281105 2023-01-22 14:36:38.145625: step: 58/469, loss: 0.012622066773474216 2023-01-22 14:36:38.802898: step: 60/469, loss: 0.00028697348898276687 2023-01-22 14:36:39.423954: step: 62/469, loss: 0.04201425984501839 2023-01-22 14:36:40.104612: step: 64/469, loss: 0.012655284255743027 2023-01-22 14:36:40.737729: step: 66/469, loss: 0.13666579127311707 2023-01-22 14:36:41.397311: step: 68/469, loss: 0.00934639759361744 2023-01-22 14:36:42.086695: step: 70/469, loss: 0.0013036837335675955 2023-01-22 14:36:42.738106: step: 72/469, loss: 0.00669926218688488 2023-01-22 14:36:43.361706: step: 74/469, loss: 0.0067839352414011955 2023-01-22 14:36:43.950201: step: 76/469, loss: 0.0014320772606879473 2023-01-22 14:36:44.602516: step: 78/469, loss: 0.010338111780583858 2023-01-22 14:36:45.194097: step: 80/469, loss: 0.0016123708337545395 2023-01-22 14:36:45.828481: step: 82/469, loss: 0.0005422577378340065 2023-01-22 14:36:46.401458: step: 84/469, loss: 0.008865961804986 2023-01-22 14:36:47.015303: step: 86/469, loss: 0.20689965784549713 2023-01-22 14:36:47.650304: step: 88/469, loss: 0.05947402864694595 2023-01-22 14:36:48.299345: step: 90/469, loss: 0.008328648284077644 2023-01-22 14:36:48.957558: step: 92/469, loss: 0.059641528874635696 2023-01-22 14:36:49.617618: step: 94/469, loss: 0.007071740925312042 2023-01-22 14:36:50.213690: step: 96/469, loss: 0.00270385411567986 2023-01-22 14:36:50.845587: step: 98/469, loss: 0.2607606053352356 2023-01-22 14:36:51.513187: step: 100/469, loss: 0.05911825969815254 2023-01-22 14:36:52.148287: step: 102/469, loss: 0.16916383802890778 2023-01-22 14:36:52.706688: step: 104/469, loss: 0.002343447646126151 2023-01-22 14:36:53.310980: step: 106/469, loss: 0.011622349731624126 2023-01-22 14:36:53.954328: step: 108/469, loss: 0.000993871595710516 2023-01-22 14:36:54.554950: step: 110/469, loss: 0.004134451039135456 2023-01-22 14:36:55.183785: step: 112/469, loss: 0.011134440079331398 2023-01-22 14:36:55.781989: step: 114/469, loss: 0.00017363010556437075 2023-01-22 14:36:56.362528: step: 116/469, loss: 0.0010013064602389932 2023-01-22 14:36:56.963513: step: 118/469, loss: 0.0020272599067538977 2023-01-22 14:36:57.631403: step: 120/469, loss: 0.009724320843815804 2023-01-22 14:36:58.240793: step: 122/469, loss: 0.000933723000343889 2023-01-22 14:36:58.851206: step: 124/469, loss: 0.0011233205441385508 2023-01-22 14:36:59.501516: step: 126/469, loss: 0.0980379581451416 2023-01-22 14:37:00.054910: step: 128/469, loss: 0.0008008508011698723 2023-01-22 14:37:00.671403: step: 130/469, loss: 0.4928361475467682 2023-01-22 14:37:01.303594: step: 132/469, loss: 0.00046625922550447285 2023-01-22 14:37:01.878602: step: 134/469, loss: 0.018281705677509308 2023-01-22 14:37:02.467754: step: 136/469, loss: 17.673542022705078 2023-01-22 14:37:03.042121: step: 138/469, loss: 0.0004628832102753222 2023-01-22 14:37:03.609602: step: 140/469, loss: 0.029018081724643707 2023-01-22 14:37:04.234487: step: 142/469, loss: 0.03466082364320755 2023-01-22 14:37:04.826296: step: 144/469, loss: 0.006776665803045034 2023-01-22 14:37:05.413747: step: 146/469, loss: 0.0028881519101560116 2023-01-22 14:37:06.069942: step: 148/469, loss: 0.10274806618690491 2023-01-22 14:37:06.701113: step: 150/469, loss: 0.0005523923900909722 2023-01-22 14:37:07.334407: step: 152/469, loss: 0.0441497340798378 2023-01-22 14:37:07.962128: step: 154/469, loss: 0.036486443132162094 2023-01-22 14:37:08.628571: step: 156/469, loss: 0.0028733036015182734 2023-01-22 14:37:09.202842: step: 158/469, loss: 6.04199267399963e-05 2023-01-22 14:37:09.774465: step: 160/469, loss: 0.022768253460526466 2023-01-22 14:37:10.405130: step: 162/469, loss: 6.1829574406147e-05 2023-01-22 14:37:11.038554: step: 164/469, loss: 0.0013987660640850663 2023-01-22 14:37:11.658564: step: 166/469, loss: 0.0006711300811730325 2023-01-22 14:37:12.279476: step: 168/469, loss: 0.002706859027966857 2023-01-22 14:37:12.896159: step: 170/469, loss: 0.032152675092220306 2023-01-22 14:37:13.547575: step: 172/469, loss: 7.145424751797691e-05 2023-01-22 14:37:14.168391: step: 174/469, loss: 0.01663138158619404 2023-01-22 14:37:14.752245: step: 176/469, loss: 0.007105072028934956 2023-01-22 14:37:15.455117: step: 178/469, loss: 0.004928879905492067 2023-01-22 14:37:16.051876: step: 180/469, loss: 0.010595102794468403 2023-01-22 14:37:16.682424: step: 182/469, loss: 0.08913377672433853 2023-01-22 14:37:17.485297: step: 184/469, loss: 0.012893940322101116 2023-01-22 14:37:18.112486: step: 186/469, loss: 0.0009624919039197266 2023-01-22 14:37:18.654541: step: 188/469, loss: 0.0019938363693654537 2023-01-22 14:37:19.270437: step: 190/469, loss: 0.02950909361243248 2023-01-22 14:37:19.885381: step: 192/469, loss: 0.009299580939114094 2023-01-22 14:37:20.512723: step: 194/469, loss: 4.469795021577738e-05 2023-01-22 14:37:21.185638: step: 196/469, loss: 0.017768926918506622 2023-01-22 14:37:21.777262: step: 198/469, loss: 0.0019421406323090196 2023-01-22 14:37:22.410736: step: 200/469, loss: 0.003053602995350957 2023-01-22 14:37:23.130775: step: 202/469, loss: 0.020354555919766426 2023-01-22 14:37:23.716189: step: 204/469, loss: 0.012567868456244469 2023-01-22 14:37:24.397977: step: 206/469, loss: 0.007447153329849243 2023-01-22 14:37:25.020332: step: 208/469, loss: 0.34254640340805054 2023-01-22 14:37:25.673641: step: 210/469, loss: 0.012624618597328663 2023-01-22 14:37:26.272811: step: 212/469, loss: 0.05136948823928833 2023-01-22 14:37:26.897241: step: 214/469, loss: 0.6681893467903137 2023-01-22 14:37:27.511789: step: 216/469, loss: 0.024027040228247643 2023-01-22 14:37:28.143812: step: 218/469, loss: 0.00382420071400702 2023-01-22 14:37:28.817971: step: 220/469, loss: 0.0004367214278317988 2023-01-22 14:37:29.443383: step: 222/469, loss: 0.00037103978684172034 2023-01-22 14:37:30.074342: step: 224/469, loss: 0.0037761121056973934 2023-01-22 14:37:30.702928: step: 226/469, loss: 0.029214244335889816 2023-01-22 14:37:31.307375: step: 228/469, loss: 0.002395816845819354 2023-01-22 14:37:31.979550: step: 230/469, loss: 0.17381082475185394 2023-01-22 14:37:32.621111: step: 232/469, loss: 0.030532490462064743 2023-01-22 14:37:33.256623: step: 234/469, loss: 0.012518075294792652 2023-01-22 14:37:33.857257: step: 236/469, loss: 0.004888856317847967 2023-01-22 14:37:34.427472: step: 238/469, loss: 0.013113731518387794 2023-01-22 14:37:35.012723: step: 240/469, loss: 0.008494261652231216 2023-01-22 14:37:35.597307: step: 242/469, loss: 0.016662968322634697 2023-01-22 14:37:36.228353: step: 244/469, loss: 0.008665439672768116 2023-01-22 14:37:36.777053: step: 246/469, loss: 0.001021687057800591 2023-01-22 14:37:37.461806: step: 248/469, loss: 0.0008591831428930163 2023-01-22 14:37:38.096860: step: 250/469, loss: 0.023028653115034103 2023-01-22 14:37:38.677980: step: 252/469, loss: 0.001024438999593258 2023-01-22 14:37:39.299280: step: 254/469, loss: 0.010340651497244835 2023-01-22 14:37:39.888895: step: 256/469, loss: 0.0374983474612236 2023-01-22 14:37:40.522657: step: 258/469, loss: 0.02440524473786354 2023-01-22 14:37:41.142822: step: 260/469, loss: 0.03594852238893509 2023-01-22 14:37:41.800071: step: 262/469, loss: 0.0031431352254003286 2023-01-22 14:37:42.474330: step: 264/469, loss: 0.3929257392883301 2023-01-22 14:37:43.090467: step: 266/469, loss: 0.005691472440958023 2023-01-22 14:37:43.632676: step: 268/469, loss: 0.001135129015892744 2023-01-22 14:37:44.276034: step: 270/469, loss: 0.005055820103734732 2023-01-22 14:37:44.896148: step: 272/469, loss: 0.001676631742157042 2023-01-22 14:37:45.546156: step: 274/469, loss: 0.0556795559823513 2023-01-22 14:37:46.134888: step: 276/469, loss: 0.009158093482255936 2023-01-22 14:37:46.776871: step: 278/469, loss: 0.0013554584002122283 2023-01-22 14:37:47.385582: step: 280/469, loss: 0.013460691086947918 2023-01-22 14:37:47.971633: step: 282/469, loss: 0.0020840729121118784 2023-01-22 14:37:48.573151: step: 284/469, loss: 0.00486515648663044 2023-01-22 14:37:49.246844: step: 286/469, loss: 0.0008479985990561545 2023-01-22 14:37:49.878064: step: 288/469, loss: 0.002161717973649502 2023-01-22 14:37:50.540386: step: 290/469, loss: 0.011881375685334206 2023-01-22 14:37:51.129130: step: 292/469, loss: 0.00757069606333971 2023-01-22 14:37:51.743109: step: 294/469, loss: 0.01817019283771515 2023-01-22 14:37:52.338050: step: 296/469, loss: 0.0027619285974651575 2023-01-22 14:37:53.002267: step: 298/469, loss: 0.001362512819468975 2023-01-22 14:37:53.609330: step: 300/469, loss: 0.006482349708676338 2023-01-22 14:37:54.272942: step: 302/469, loss: 0.0038111312314867973 2023-01-22 14:37:54.953081: step: 304/469, loss: 0.13215026259422302 2023-01-22 14:37:55.577858: step: 306/469, loss: 0.004076431971043348 2023-01-22 14:37:56.286170: step: 308/469, loss: 0.01719863899052143 2023-01-22 14:37:56.955478: step: 310/469, loss: 0.0044835517182946205 2023-01-22 14:37:57.550296: step: 312/469, loss: 0.0033766133710741997 2023-01-22 14:37:58.085377: step: 314/469, loss: 0.020229041576385498 2023-01-22 14:37:58.704739: step: 316/469, loss: 0.029720574617385864 2023-01-22 14:37:59.276923: step: 318/469, loss: 0.012220481410622597 2023-01-22 14:37:59.904722: step: 320/469, loss: 0.001467244466766715 2023-01-22 14:38:00.496324: step: 322/469, loss: 0.009732890874147415 2023-01-22 14:38:01.094426: step: 324/469, loss: 0.003811521688476205 2023-01-22 14:38:01.665283: step: 326/469, loss: 9.424789459444582e-05 2023-01-22 14:38:02.278516: step: 328/469, loss: 1.0046617984771729 2023-01-22 14:38:02.939706: step: 330/469, loss: 0.0038575257640331984 2023-01-22 14:38:03.551979: step: 332/469, loss: 0.09175248444080353 2023-01-22 14:38:04.098499: step: 334/469, loss: 2.0372726917266846 2023-01-22 14:38:04.696850: step: 336/469, loss: 0.17822694778442383 2023-01-22 14:38:05.237434: step: 338/469, loss: 0.0017835474573075771 2023-01-22 14:38:05.866110: step: 340/469, loss: 0.021845972165465355 2023-01-22 14:38:06.473509: step: 342/469, loss: 0.0038313264958560467 2023-01-22 14:38:07.044113: step: 344/469, loss: 0.002929736627265811 2023-01-22 14:38:07.638694: step: 346/469, loss: 0.0031008594669401646 2023-01-22 14:38:08.266573: step: 348/469, loss: 0.03419598937034607 2023-01-22 14:38:08.865419: step: 350/469, loss: 0.08123939484357834 2023-01-22 14:38:09.458345: step: 352/469, loss: 0.00441707344725728 2023-01-22 14:38:10.059389: step: 354/469, loss: 0.00400394294410944 2023-01-22 14:38:10.677983: step: 356/469, loss: 0.017485061660408974 2023-01-22 14:38:11.330217: step: 358/469, loss: 0.0006666892440989614 2023-01-22 14:38:11.917736: step: 360/469, loss: 0.021029051393270493 2023-01-22 14:38:12.513469: step: 362/469, loss: 0.2805744409561157 2023-01-22 14:38:13.177430: step: 364/469, loss: 0.004257042892277241 2023-01-22 14:38:13.817613: step: 366/469, loss: 0.00023133697686716914 2023-01-22 14:38:14.435511: step: 368/469, loss: 0.07727959007024765 2023-01-22 14:38:15.111234: step: 370/469, loss: 0.006901529151946306 2023-01-22 14:38:15.694257: step: 372/469, loss: 0.017377611249685287 2023-01-22 14:38:16.284048: step: 374/469, loss: 0.001473445212468505 2023-01-22 14:38:16.885401: step: 376/469, loss: 0.0023926731664687395 2023-01-22 14:38:17.550007: step: 378/469, loss: 0.003412291407585144 2023-01-22 14:38:18.202201: step: 380/469, loss: 0.00804143212735653 2023-01-22 14:38:18.819139: step: 382/469, loss: 0.0014265509089455009 2023-01-22 14:38:19.454943: step: 384/469, loss: 0.042633287608623505 2023-01-22 14:38:20.035803: step: 386/469, loss: 0.022622903808951378 2023-01-22 14:38:20.711504: step: 388/469, loss: 0.021721651777625084 2023-01-22 14:38:21.317172: step: 390/469, loss: 0.0018895902903750539 2023-01-22 14:38:21.931145: step: 392/469, loss: 0.0019854996353387833 2023-01-22 14:38:22.546519: step: 394/469, loss: 0.01369100995361805 2023-01-22 14:38:23.153307: step: 396/469, loss: 0.00621099304407835 2023-01-22 14:38:23.842526: step: 398/469, loss: 0.01120622456073761 2023-01-22 14:38:24.572754: step: 400/469, loss: 0.0005261891055852175 2023-01-22 14:38:25.229017: step: 402/469, loss: 0.00047795631689950824 2023-01-22 14:38:25.852295: step: 404/469, loss: 0.02254605107009411 2023-01-22 14:38:26.465905: step: 406/469, loss: 0.04985782504081726 2023-01-22 14:38:27.101536: step: 408/469, loss: 0.0027778656221926212 2023-01-22 14:38:27.680443: step: 410/469, loss: 0.059414394199848175 2023-01-22 14:38:28.246972: step: 412/469, loss: 0.024252986535429955 2023-01-22 14:38:28.874879: step: 414/469, loss: 0.019016612321138382 2023-01-22 14:38:29.545853: step: 416/469, loss: 0.004256725311279297 2023-01-22 14:38:30.191778: step: 418/469, loss: 0.0007422525086440146 2023-01-22 14:38:30.767970: step: 420/469, loss: 0.05341136455535889 2023-01-22 14:38:31.443487: step: 422/469, loss: 0.007369490340352058 2023-01-22 14:38:32.027214: step: 424/469, loss: 0.06091062352061272 2023-01-22 14:38:32.706442: step: 426/469, loss: 0.007739037275314331 2023-01-22 14:38:33.303077: step: 428/469, loss: 0.0036941147409379482 2023-01-22 14:38:33.917142: step: 430/469, loss: 0.027182970196008682 2023-01-22 14:38:34.512288: step: 432/469, loss: 0.002020519459620118 2023-01-22 14:38:35.099377: step: 434/469, loss: 0.12288780510425568 2023-01-22 14:38:35.715513: step: 436/469, loss: 0.014308992773294449 2023-01-22 14:38:36.353817: step: 438/469, loss: 0.03507082536816597 2023-01-22 14:38:36.960994: step: 440/469, loss: 0.001033384003676474 2023-01-22 14:38:37.565243: step: 442/469, loss: 0.9164943099021912 2023-01-22 14:38:38.210466: step: 444/469, loss: 0.004102073609828949 2023-01-22 14:38:38.836081: step: 446/469, loss: 0.0014659215230494738 2023-01-22 14:38:39.498158: step: 448/469, loss: 0.016013145446777344 2023-01-22 14:38:40.105000: step: 450/469, loss: 0.0547676607966423 2023-01-22 14:38:40.705921: step: 452/469, loss: 0.02253960631787777 2023-01-22 14:38:41.265004: step: 454/469, loss: 0.0020860759541392326 2023-01-22 14:38:41.819477: step: 456/469, loss: 0.0037214504554867744 2023-01-22 14:38:42.386777: step: 458/469, loss: 0.0073936269618570805 2023-01-22 14:38:42.959308: step: 460/469, loss: 0.010454141534864902 2023-01-22 14:38:43.521600: step: 462/469, loss: 0.006855126470327377 2023-01-22 14:38:44.158448: step: 464/469, loss: 0.022780634462833405 2023-01-22 14:38:44.817020: step: 466/469, loss: 0.03849603235721588 2023-01-22 14:38:45.456318: step: 468/469, loss: 0.012857901863753796 2023-01-22 14:38:46.137691: step: 470/469, loss: 0.0005129786441102624 2023-01-22 14:38:46.805867: step: 472/469, loss: 0.0010221077827736735 2023-01-22 14:38:47.398318: step: 474/469, loss: 0.017566831782460213 2023-01-22 14:38:47.998178: step: 476/469, loss: 0.008294445462524891 2023-01-22 14:38:48.682824: step: 478/469, loss: 0.025740107521414757 2023-01-22 14:38:49.333467: step: 480/469, loss: 0.005647765938192606 2023-01-22 14:38:49.897845: step: 482/469, loss: 0.00023753277491778135 2023-01-22 14:38:50.511545: step: 484/469, loss: 0.036121148616075516 2023-01-22 14:38:51.160592: step: 486/469, loss: 0.11313173919916153 2023-01-22 14:38:51.789548: step: 488/469, loss: 0.009546004235744476 2023-01-22 14:38:52.417390: step: 490/469, loss: 0.012960994616150856 2023-01-22 14:38:53.064472: step: 492/469, loss: 0.022116269916296005 2023-01-22 14:38:53.677623: step: 494/469, loss: 0.002578374929726124 2023-01-22 14:38:54.229006: step: 496/469, loss: 0.001413803081959486 2023-01-22 14:38:54.806256: step: 498/469, loss: 3.757523518288508e-05 2023-01-22 14:38:55.504871: step: 500/469, loss: 0.005428260657936335 2023-01-22 14:38:56.036890: step: 502/469, loss: 0.010336990468204021 2023-01-22 14:38:56.641455: step: 504/469, loss: 0.013869226910173893 2023-01-22 14:38:57.284569: step: 506/469, loss: 0.017277058213949203 2023-01-22 14:38:57.935378: step: 508/469, loss: 0.0038197608664631844 2023-01-22 14:38:58.500508: step: 510/469, loss: 0.025294004008173943 2023-01-22 14:38:59.126078: step: 512/469, loss: 0.485908567905426 2023-01-22 14:38:59.741083: step: 514/469, loss: 0.01573188789188862 2023-01-22 14:39:00.305132: step: 516/469, loss: 0.0028253868222236633 2023-01-22 14:39:00.992670: step: 518/469, loss: 0.007987121120095253 2023-01-22 14:39:01.660934: step: 520/469, loss: 0.010225597769021988 2023-01-22 14:39:02.254351: step: 522/469, loss: 0.0044867186807096004 2023-01-22 14:39:02.958918: step: 524/469, loss: 0.004424052778631449 2023-01-22 14:39:03.523556: step: 526/469, loss: 1.942397102538962e-05 2023-01-22 14:39:04.206670: step: 528/469, loss: 0.007998321205377579 2023-01-22 14:39:04.847305: step: 530/469, loss: 0.01795625314116478 2023-01-22 14:39:05.501703: step: 532/469, loss: 0.09316721558570862 2023-01-22 14:39:06.073812: step: 534/469, loss: 0.001042116666212678 2023-01-22 14:39:06.761215: step: 536/469, loss: 0.00992762204259634 2023-01-22 14:39:07.348634: step: 538/469, loss: 0.029562314972281456 2023-01-22 14:39:07.970404: step: 540/469, loss: 0.0009023420279845595 2023-01-22 14:39:08.542793: step: 542/469, loss: 0.028390206396579742 2023-01-22 14:39:09.083637: step: 544/469, loss: 6.388746260199696e-05 2023-01-22 14:39:09.700666: step: 546/469, loss: 0.0031936070881783962 2023-01-22 14:39:10.341598: step: 548/469, loss: 0.008089176379144192 2023-01-22 14:39:11.008300: step: 550/469, loss: 0.029060736298561096 2023-01-22 14:39:11.614251: step: 552/469, loss: 0.01640424132347107 2023-01-22 14:39:12.318156: step: 554/469, loss: 0.0007548629655502737 2023-01-22 14:39:13.048253: step: 556/469, loss: 0.0028010231908410788 2023-01-22 14:39:13.664280: step: 558/469, loss: 0.0002140920259989798 2023-01-22 14:39:14.302991: step: 560/469, loss: 0.005593898706138134 2023-01-22 14:39:14.902815: step: 562/469, loss: 0.00156761787366122 2023-01-22 14:39:15.521306: step: 564/469, loss: 0.00020123986178077757 2023-01-22 14:39:16.121254: step: 566/469, loss: 0.009417970664799213 2023-01-22 14:39:16.788746: step: 568/469, loss: 0.00011312395508866757 2023-01-22 14:39:17.379328: step: 570/469, loss: 0.006780230440199375 2023-01-22 14:39:18.000703: step: 572/469, loss: 0.011906866915524006 2023-01-22 14:39:18.592378: step: 574/469, loss: 0.018123114481568336 2023-01-22 14:39:19.373539: step: 576/469, loss: 0.017633073031902313 2023-01-22 14:39:20.047387: step: 578/469, loss: 0.0015887359622865915 2023-01-22 14:39:20.647114: step: 580/469, loss: 0.00014775626186747104 2023-01-22 14:39:21.204005: step: 582/469, loss: 0.044115468859672546 2023-01-22 14:39:21.849026: step: 584/469, loss: 0.002225038595497608 2023-01-22 14:39:22.438114: step: 586/469, loss: 0.006928933784365654 2023-01-22 14:39:23.081812: step: 588/469, loss: 0.0031385808251798153 2023-01-22 14:39:23.705390: step: 590/469, loss: 0.0014847946586087346 2023-01-22 14:39:24.279888: step: 592/469, loss: 0.004237988963723183 2023-01-22 14:39:25.026083: step: 594/469, loss: 0.003485941793769598 2023-01-22 14:39:25.652063: step: 596/469, loss: 0.0012962986947968602 2023-01-22 14:39:26.271706: step: 598/469, loss: 0.008641515858471394 2023-01-22 14:39:26.830009: step: 600/469, loss: 0.01856308989226818 2023-01-22 14:39:27.473947: step: 602/469, loss: 0.12201689928770065 2023-01-22 14:39:28.119342: step: 604/469, loss: 0.00418493989855051 2023-01-22 14:39:28.697039: step: 606/469, loss: 0.008976966142654419 2023-01-22 14:39:29.301464: step: 608/469, loss: 0.010338297113776207 2023-01-22 14:39:29.918112: step: 610/469, loss: 0.004966519307345152 2023-01-22 14:39:30.555940: step: 612/469, loss: 0.006739511154592037 2023-01-22 14:39:31.208300: step: 614/469, loss: 0.00052741909166798 2023-01-22 14:39:31.759108: step: 616/469, loss: 0.0007422165363095701 2023-01-22 14:39:32.385854: step: 618/469, loss: 0.0005756103782914579 2023-01-22 14:39:33.001242: step: 620/469, loss: 0.002179077360779047 2023-01-22 14:39:33.690192: step: 622/469, loss: 0.000697400770150125 2023-01-22 14:39:34.279262: step: 624/469, loss: 0.005185157526284456 2023-01-22 14:39:34.887001: step: 626/469, loss: 0.0017763691721484065 2023-01-22 14:39:35.496973: step: 628/469, loss: 0.0018852520734071732 2023-01-22 14:39:36.080586: step: 630/469, loss: 0.03680843114852905 2023-01-22 14:39:36.706385: step: 632/469, loss: 0.02520536445081234 2023-01-22 14:39:37.391522: step: 634/469, loss: 0.008047816343605518 2023-01-22 14:39:38.017096: step: 636/469, loss: 0.05942419543862343 2023-01-22 14:39:38.619030: step: 638/469, loss: 0.03507499769330025 2023-01-22 14:39:39.160057: step: 640/469, loss: 0.00015529656957369298 2023-01-22 14:39:39.736417: step: 642/469, loss: 0.010113165713846684 2023-01-22 14:39:40.440499: step: 644/469, loss: 0.0006901027518324554 2023-01-22 14:39:41.009789: step: 646/469, loss: 0.015838168561458588 2023-01-22 14:39:41.648640: step: 648/469, loss: 0.010356945917010307 2023-01-22 14:39:42.293533: step: 650/469, loss: 0.00045444778515957296 2023-01-22 14:39:42.927936: step: 652/469, loss: 0.04789530485868454 2023-01-22 14:39:43.574595: step: 654/469, loss: 0.00017765616939868778 2023-01-22 14:39:44.184732: step: 656/469, loss: 0.0028024199418723583 2023-01-22 14:39:44.798446: step: 658/469, loss: 0.046418748795986176 2023-01-22 14:39:45.448056: step: 660/469, loss: 0.01530068926513195 2023-01-22 14:39:46.129168: step: 662/469, loss: 0.028189966455101967 2023-01-22 14:39:46.752064: step: 664/469, loss: 0.012885896489024162 2023-01-22 14:39:47.368173: step: 666/469, loss: 0.004930654540657997 2023-01-22 14:39:48.026093: step: 668/469, loss: 0.004085281863808632 2023-01-22 14:39:48.641561: step: 670/469, loss: 0.03574523702263832 2023-01-22 14:39:49.363737: step: 672/469, loss: 0.0017322733765468001 2023-01-22 14:39:49.989287: step: 674/469, loss: 0.00633661774918437 2023-01-22 14:39:50.621541: step: 676/469, loss: 0.006138347554951906 2023-01-22 14:39:51.200358: step: 678/469, loss: 0.05056126415729523 2023-01-22 14:39:51.823056: step: 680/469, loss: 0.07599984109401703 2023-01-22 14:39:52.537179: step: 682/469, loss: 0.0013540622312575579 2023-01-22 14:39:53.139666: step: 684/469, loss: 0.005420427769422531 2023-01-22 14:39:53.787424: step: 686/469, loss: 0.17848405241966248 2023-01-22 14:39:54.547643: step: 688/469, loss: 0.025562219321727753 2023-01-22 14:39:55.152489: step: 690/469, loss: 0.08172006160020828 2023-01-22 14:39:55.812392: step: 692/469, loss: 0.003798913676291704 2023-01-22 14:39:56.462953: step: 694/469, loss: 0.007975392043590546 2023-01-22 14:39:57.086207: step: 696/469, loss: 0.015071935951709747 2023-01-22 14:39:57.659427: step: 698/469, loss: 0.0039918553084135056 2023-01-22 14:39:58.351742: step: 700/469, loss: 0.009902305901050568 2023-01-22 14:39:59.003411: step: 702/469, loss: 0.6302534937858582 2023-01-22 14:39:59.575941: step: 704/469, loss: 8.2736587501131e-05 2023-01-22 14:40:00.253231: step: 706/469, loss: 0.056186988949775696 2023-01-22 14:40:00.909820: step: 708/469, loss: 0.0038653018418699503 2023-01-22 14:40:01.550531: step: 710/469, loss: 0.004778795875608921 2023-01-22 14:40:02.187260: step: 712/469, loss: 0.008922592736780643 2023-01-22 14:40:02.887380: step: 714/469, loss: 0.0406455434858799 2023-01-22 14:40:03.519063: step: 716/469, loss: 0.35129010677337646 2023-01-22 14:40:04.090927: step: 718/469, loss: 0.00021591174299828708 2023-01-22 14:40:04.785286: step: 720/469, loss: 0.14848856627941132 2023-01-22 14:40:05.410281: step: 722/469, loss: 0.015957769006490707 2023-01-22 14:40:05.998223: step: 724/469, loss: 0.6531515717506409 2023-01-22 14:40:06.581098: step: 726/469, loss: 0.006518089212477207 2023-01-22 14:40:07.180295: step: 728/469, loss: 0.0049733794294297695 2023-01-22 14:40:07.784274: step: 730/469, loss: 0.002887943759560585 2023-01-22 14:40:08.499988: step: 732/469, loss: 0.0022062307689338923 2023-01-22 14:40:09.054640: step: 734/469, loss: 0.0024291295558214188 2023-01-22 14:40:09.653217: step: 736/469, loss: 0.054791562259197235 2023-01-22 14:40:10.293444: step: 738/469, loss: 0.028176110237836838 2023-01-22 14:40:10.928410: step: 740/469, loss: 0.026619803160429 2023-01-22 14:40:11.662371: step: 742/469, loss: 0.004758811555802822 2023-01-22 14:40:12.238215: step: 744/469, loss: 0.00128581328317523 2023-01-22 14:40:12.942663: step: 746/469, loss: 0.04105202481150627 2023-01-22 14:40:13.634867: step: 748/469, loss: 0.0015457069966942072 2023-01-22 14:40:14.259106: step: 750/469, loss: 0.04305998608469963 2023-01-22 14:40:14.918143: step: 752/469, loss: 0.009557138197124004 2023-01-22 14:40:15.525453: step: 754/469, loss: 0.001405681949108839 2023-01-22 14:40:16.187575: step: 756/469, loss: 0.02468080446124077 2023-01-22 14:40:16.850646: step: 758/469, loss: 0.011018931865692139 2023-01-22 14:40:17.493650: step: 760/469, loss: 0.019617140293121338 2023-01-22 14:40:18.102013: step: 762/469, loss: 0.005259671714156866 2023-01-22 14:40:18.738451: step: 764/469, loss: 0.0014008820289745927 2023-01-22 14:40:19.349317: step: 766/469, loss: 0.004322248511016369 2023-01-22 14:40:19.963899: step: 768/469, loss: 0.0024822531268000603 2023-01-22 14:40:20.582429: step: 770/469, loss: 0.0076635791920125484 2023-01-22 14:40:21.203935: step: 772/469, loss: 0.0390038900077343 2023-01-22 14:40:21.837620: step: 774/469, loss: 0.02912021242082119 2023-01-22 14:40:22.448442: step: 776/469, loss: 0.0012746142456308007 2023-01-22 14:40:23.070856: step: 778/469, loss: 3.890005350112915 2023-01-22 14:40:23.681209: step: 780/469, loss: 0.033169832080602646 2023-01-22 14:40:24.293433: step: 782/469, loss: 0.0016558844363316894 2023-01-22 14:40:24.973001: step: 784/469, loss: 0.22162684798240662 2023-01-22 14:40:25.615892: step: 786/469, loss: 0.012074566446244717 2023-01-22 14:40:26.203740: step: 788/469, loss: 0.023014012724161148 2023-01-22 14:40:26.798132: step: 790/469, loss: 0.0008739625918678939 2023-01-22 14:40:27.383373: step: 792/469, loss: 0.00013586315617430955 2023-01-22 14:40:27.935961: step: 794/469, loss: 0.011108554899692535 2023-01-22 14:40:28.563285: step: 796/469, loss: 0.0015684902900829911 2023-01-22 14:40:29.167041: step: 798/469, loss: 0.03064385987818241 2023-01-22 14:40:29.770052: step: 800/469, loss: 0.00010309765639249235 2023-01-22 14:40:30.451732: step: 802/469, loss: 0.0014354257145896554 2023-01-22 14:40:31.086955: step: 804/469, loss: 2.1678266525268555 2023-01-22 14:40:31.749996: step: 806/469, loss: 0.017343001440167427 2023-01-22 14:40:32.369454: step: 808/469, loss: 0.04927444830536842 2023-01-22 14:40:33.012770: step: 810/469, loss: 0.0027439019177109003 2023-01-22 14:40:33.663964: step: 812/469, loss: 6.236750050447881e-05 2023-01-22 14:40:34.255438: step: 814/469, loss: 0.0031373342499136925 2023-01-22 14:40:34.903744: step: 816/469, loss: 0.008151421323418617 2023-01-22 14:40:35.522055: step: 818/469, loss: 0.00015864340821281075 2023-01-22 14:40:36.152053: step: 820/469, loss: 0.02975013665854931 2023-01-22 14:40:36.804240: step: 822/469, loss: 0.005652218125760555 2023-01-22 14:40:37.445001: step: 824/469, loss: 0.0030411549378186464 2023-01-22 14:40:38.053109: step: 826/469, loss: 0.03713679686188698 2023-01-22 14:40:38.639056: step: 828/469, loss: 0.0015378013486042619 2023-01-22 14:40:39.238417: step: 830/469, loss: 0.0037213508039712906 2023-01-22 14:40:39.780861: step: 832/469, loss: 0.0023648878559470177 2023-01-22 14:40:40.324883: step: 834/469, loss: 4.139569227845641e-06 2023-01-22 14:40:40.945107: step: 836/469, loss: 0.004509124904870987 2023-01-22 14:40:41.494912: step: 838/469, loss: 0.13292941451072693 2023-01-22 14:40:42.141242: step: 840/469, loss: 0.0561220645904541 2023-01-22 14:40:42.789351: step: 842/469, loss: 0.0004946504486724734 2023-01-22 14:40:43.477161: step: 844/469, loss: 0.027973726391792297 2023-01-22 14:40:44.156548: step: 846/469, loss: 0.20878294110298157 2023-01-22 14:40:44.740665: step: 848/469, loss: 0.009020388126373291 2023-01-22 14:40:45.338393: step: 850/469, loss: 0.0171761903911829 2023-01-22 14:40:46.017443: step: 852/469, loss: 0.02266085147857666 2023-01-22 14:40:46.617081: step: 854/469, loss: 0.005098353605717421 2023-01-22 14:40:47.204844: step: 856/469, loss: 1.4842417840554845e-05 2023-01-22 14:40:47.866209: step: 858/469, loss: 0.04865461215376854 2023-01-22 14:40:48.553522: step: 860/469, loss: 0.003949211910367012 2023-01-22 14:40:49.155438: step: 862/469, loss: 1.0983926057815552 2023-01-22 14:40:49.758135: step: 864/469, loss: 0.037031140178442 2023-01-22 14:40:50.397504: step: 866/469, loss: 0.222388356924057 2023-01-22 14:40:51.030692: step: 868/469, loss: 0.006978880148380995 2023-01-22 14:40:51.658932: step: 870/469, loss: 0.0023100259713828564 2023-01-22 14:40:52.298504: step: 872/469, loss: 0.008067827671766281 2023-01-22 14:40:52.916096: step: 874/469, loss: 0.0067926631309092045 2023-01-22 14:40:53.540880: step: 876/469, loss: 0.04275653511285782 2023-01-22 14:40:54.202750: step: 878/469, loss: 0.002950613386929035 2023-01-22 14:40:54.843836: step: 880/469, loss: 0.004311679862439632 2023-01-22 14:40:55.440495: step: 882/469, loss: 0.017032163217663765 2023-01-22 14:40:56.046260: step: 884/469, loss: 0.0003411119687370956 2023-01-22 14:40:56.700638: step: 886/469, loss: 0.052657030522823334 2023-01-22 14:40:57.314994: step: 888/469, loss: 0.019283462315797806 2023-01-22 14:40:57.985833: step: 890/469, loss: 0.048589691519737244 2023-01-22 14:40:58.638338: step: 892/469, loss: 0.0272402074187994 2023-01-22 14:40:59.258180: step: 894/469, loss: 0.578173816204071 2023-01-22 14:40:59.870279: step: 896/469, loss: 0.0022126534022390842 2023-01-22 14:41:00.443054: step: 898/469, loss: 0.015187570825219154 2023-01-22 14:41:01.055075: step: 900/469, loss: 0.007477788254618645 2023-01-22 14:41:01.694723: step: 902/469, loss: 0.00683231744915247 2023-01-22 14:41:02.302898: step: 904/469, loss: 0.0017801914364099503 2023-01-22 14:41:02.839603: step: 906/469, loss: 0.004318722523748875 2023-01-22 14:41:03.461956: step: 908/469, loss: 0.0005664682830683887 2023-01-22 14:41:04.017625: step: 910/469, loss: 0.040004484355449677 2023-01-22 14:41:04.655869: step: 912/469, loss: 0.013206937350332737 2023-01-22 14:41:05.267356: step: 914/469, loss: 0.009563728235661983 2023-01-22 14:41:05.831747: step: 916/469, loss: 0.003190231043845415 2023-01-22 14:41:06.395890: step: 918/469, loss: 0.0006425548926927149 2023-01-22 14:41:07.001642: step: 920/469, loss: 0.025662649422883987 2023-01-22 14:41:07.765049: step: 922/469, loss: 0.013986133970320225 2023-01-22 14:41:08.351724: step: 924/469, loss: 0.11339568346738815 2023-01-22 14:41:08.949900: step: 926/469, loss: 0.0007033753208816051 2023-01-22 14:41:09.554252: step: 928/469, loss: 0.006073335651308298 2023-01-22 14:41:10.217641: step: 930/469, loss: 0.03721383213996887 2023-01-22 14:41:10.771113: step: 932/469, loss: 0.04197828844189644 2023-01-22 14:41:11.334486: step: 934/469, loss: 0.0018439313862472773 2023-01-22 14:41:11.960983: step: 936/469, loss: 0.001097707194276154 2023-01-22 14:41:12.549660: step: 938/469, loss: 0.03952580317854881 ================================================== Loss: 0.091 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29446679877062437, 'r': 0.31402341728480243, 'f1': 0.3039308372986058}, 'combined': 0.223949038009499, 'epoch': 34} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30750686556506185, 'r': 0.2737458190254393, 'f1': 0.28964586659710084}, 'combined': 0.15798865450750954, 'epoch': 34} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2935819046870085, 'r': 0.32199305675349316, 'f1': 0.3071318387494858}, 'combined': 0.22630767065751584, 'epoch': 34} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30867806619401006, 'r': 0.27676532924989006, 'f1': 0.291851910149667}, 'combined': 0.15919195099072744, 'epoch': 34} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2871492582620631, 'r': 0.31275839514691506, 'f1': 0.2994072193322874}, 'combined': 0.2206158458237907, 'epoch': 34} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30359845547597564, 'r': 0.2755440693798425, 'f1': 0.2888917677047173}, 'combined': 0.1575773278389367, 'epoch': 34} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2312925170068027, 'r': 0.32380952380952377, 'f1': 0.26984126984126977}, 'combined': 0.17989417989417983, 'epoch': 34} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2426470588235294, 'r': 0.358695652173913, 'f1': 0.2894736842105263}, 'combined': 0.14473684210526316, 'epoch': 34} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39705882352941174, 'r': 0.23275862068965517, 'f1': 0.2934782608695652}, 'combined': 0.19565217391304346, 'epoch': 34} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 35 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:44:03.227594: step: 2/469, loss: 0.06223827227950096 2023-01-22 14:44:03.839620: step: 4/469, loss: 0.00801390316337347 2023-01-22 14:44:04.603063: step: 6/469, loss: 0.007537577301263809 2023-01-22 14:44:05.247275: step: 8/469, loss: 0.00014759607438463718 2023-01-22 14:44:05.781276: step: 10/469, loss: 0.007370145991444588 2023-01-22 14:44:06.347719: step: 12/469, loss: 0.0005001530516892672 2023-01-22 14:44:06.977502: step: 14/469, loss: 0.01761670596897602 2023-01-22 14:44:07.601363: step: 16/469, loss: 0.12962032854557037 2023-01-22 14:44:08.196502: step: 18/469, loss: 0.0010319643188267946 2023-01-22 14:44:08.856205: step: 20/469, loss: 0.016550837084650993 2023-01-22 14:44:09.434038: step: 22/469, loss: 0.30565378069877625 2023-01-22 14:44:10.158362: step: 24/469, loss: 0.00024738864158280194 2023-01-22 14:44:10.852448: step: 26/469, loss: 0.001660690875723958 2023-01-22 14:44:11.481738: step: 28/469, loss: 0.036150235682725906 2023-01-22 14:44:12.092548: step: 30/469, loss: 0.0013579098740592599 2023-01-22 14:44:12.656620: step: 32/469, loss: 0.005291574168950319 2023-01-22 14:44:13.259302: step: 34/469, loss: 0.010724920779466629 2023-01-22 14:44:13.911027: step: 36/469, loss: 0.003547102678567171 2023-01-22 14:44:14.484624: step: 38/469, loss: 0.00706341490149498 2023-01-22 14:44:15.118828: step: 40/469, loss: 0.0016088581178337336 2023-01-22 14:44:15.722121: step: 42/469, loss: 0.0027645586524158716 2023-01-22 14:44:16.264600: step: 44/469, loss: 0.0011111802887171507 2023-01-22 14:44:16.835364: step: 46/469, loss: 0.0009060550946742296 2023-01-22 14:44:17.404347: step: 48/469, loss: 0.0012177587486803532 2023-01-22 14:44:18.052210: step: 50/469, loss: 0.002270347438752651 2023-01-22 14:44:18.680485: step: 52/469, loss: 0.013230101205408573 2023-01-22 14:44:19.366276: step: 54/469, loss: 0.0015403461875393987 2023-01-22 14:44:19.950257: step: 56/469, loss: 0.035884175449609756 2023-01-22 14:44:20.557973: step: 58/469, loss: 1.0156863927841187 2023-01-22 14:44:21.202214: step: 60/469, loss: 0.01794080063700676 2023-01-22 14:44:21.783668: step: 62/469, loss: 0.012344960123300552 2023-01-22 14:44:22.377998: step: 64/469, loss: 0.029343632981181145 2023-01-22 14:44:22.977246: step: 66/469, loss: 0.0009511752869002521 2023-01-22 14:44:23.546684: step: 68/469, loss: 0.003018788993358612 2023-01-22 14:44:24.179156: step: 70/469, loss: 0.01511884294450283 2023-01-22 14:44:24.796904: step: 72/469, loss: 0.025081027299165726 2023-01-22 14:44:25.416282: step: 74/469, loss: 0.0034823277965188026 2023-01-22 14:44:26.075539: step: 76/469, loss: 0.00952210370451212 2023-01-22 14:44:26.705654: step: 78/469, loss: 0.028778832405805588 2023-01-22 14:44:27.302235: step: 80/469, loss: 0.02010970562696457 2023-01-22 14:44:28.054631: step: 82/469, loss: 0.3724760115146637 2023-01-22 14:44:28.644248: step: 84/469, loss: 0.001976816216483712 2023-01-22 14:44:29.222926: step: 86/469, loss: 0.01593942940235138 2023-01-22 14:44:29.905586: step: 88/469, loss: 0.0011511326301842928 2023-01-22 14:44:30.550171: step: 90/469, loss: 0.012217110954225063 2023-01-22 14:44:31.207888: step: 92/469, loss: 0.007778637111186981 2023-01-22 14:44:31.866369: step: 94/469, loss: 0.0013589736772701144 2023-01-22 14:44:32.502861: step: 96/469, loss: 0.011407505720853806 2023-01-22 14:44:33.143409: step: 98/469, loss: 0.01613622158765793 2023-01-22 14:44:33.763829: step: 100/469, loss: 0.010910450480878353 2023-01-22 14:44:34.368516: step: 102/469, loss: 0.033466242253780365 2023-01-22 14:44:34.981670: step: 104/469, loss: 0.03147251531481743 2023-01-22 14:44:35.647267: step: 106/469, loss: 0.003176672151312232 2023-01-22 14:44:36.227665: step: 108/469, loss: 0.1680041402578354 2023-01-22 14:44:36.835824: step: 110/469, loss: 0.09557179361581802 2023-01-22 14:44:37.405185: step: 112/469, loss: 0.001714197569526732 2023-01-22 14:44:38.029316: step: 114/469, loss: 0.026560502126812935 2023-01-22 14:44:38.620843: step: 116/469, loss: 0.00048767152475193143 2023-01-22 14:44:39.346360: step: 118/469, loss: 0.0006235787877812982 2023-01-22 14:44:39.871161: step: 120/469, loss: 0.0025874904822558165 2023-01-22 14:44:40.547505: step: 122/469, loss: 0.004461661912500858 2023-01-22 14:44:41.255740: step: 124/469, loss: 0.013387758284807205 2023-01-22 14:44:41.846421: step: 126/469, loss: 0.03437449410557747 2023-01-22 14:44:42.474954: step: 128/469, loss: 0.01547978539019823 2023-01-22 14:44:43.105651: step: 130/469, loss: 0.027353087440133095 2023-01-22 14:44:43.734388: step: 132/469, loss: 0.10551943629980087 2023-01-22 14:44:44.330801: step: 134/469, loss: 0.0008355679456144571 2023-01-22 14:44:44.973009: step: 136/469, loss: 5.5443837482016534e-05 2023-01-22 14:44:45.598519: step: 138/469, loss: 0.01803562231361866 2023-01-22 14:44:46.213147: step: 140/469, loss: 0.0072751473635435104 2023-01-22 14:44:46.861843: step: 142/469, loss: 0.0002711582346819341 2023-01-22 14:44:47.478085: step: 144/469, loss: 0.010057334788143635 2023-01-22 14:44:48.116396: step: 146/469, loss: 0.0020914659835398197 2023-01-22 14:44:48.689536: step: 148/469, loss: 0.027402479201555252 2023-01-22 14:44:49.334106: step: 150/469, loss: 0.07805662602186203 2023-01-22 14:44:49.992833: step: 152/469, loss: 0.05145958438515663 2023-01-22 14:44:50.625401: step: 154/469, loss: 0.0231634508818388 2023-01-22 14:44:51.170654: step: 156/469, loss: 7.571755850221962e-05 2023-01-22 14:44:51.737986: step: 158/469, loss: 0.0025138002820312977 2023-01-22 14:44:52.378428: step: 160/469, loss: 0.02510700188577175 2023-01-22 14:44:53.025373: step: 162/469, loss: 0.0085305105894804 2023-01-22 14:44:53.691579: step: 164/469, loss: 0.0009671675506979227 2023-01-22 14:44:54.361939: step: 166/469, loss: 0.03375331312417984 2023-01-22 14:44:54.949295: step: 168/469, loss: 0.004886567126959562 2023-01-22 14:44:55.530903: step: 170/469, loss: 0.04269058629870415 2023-01-22 14:44:56.064731: step: 172/469, loss: 0.016756601631641388 2023-01-22 14:44:56.660607: step: 174/469, loss: 0.002597453538328409 2023-01-22 14:44:57.233823: step: 176/469, loss: 0.010407566092908382 2023-01-22 14:44:57.863574: step: 178/469, loss: 0.022121120244264603 2023-01-22 14:44:58.470599: step: 180/469, loss: 0.5732991099357605 2023-01-22 14:44:59.081784: step: 182/469, loss: 0.036545529961586 2023-01-22 14:44:59.681451: step: 184/469, loss: 0.004315868951380253 2023-01-22 14:45:00.295365: step: 186/469, loss: 0.020468683913350105 2023-01-22 14:45:00.958622: step: 188/469, loss: 0.006902128458023071 2023-01-22 14:45:01.568618: step: 190/469, loss: 0.04524805769324303 2023-01-22 14:45:02.177695: step: 192/469, loss: 0.00020645791664719582 2023-01-22 14:45:02.809166: step: 194/469, loss: 0.002660428173840046 2023-01-22 14:45:03.402104: step: 196/469, loss: 0.06269151717424393 2023-01-22 14:45:04.000303: step: 198/469, loss: 0.0013125533005222678 2023-01-22 14:45:04.601270: step: 200/469, loss: 0.0731223076581955 2023-01-22 14:45:05.257778: step: 202/469, loss: 0.010240744799375534 2023-01-22 14:45:05.890232: step: 204/469, loss: 0.003099207766354084 2023-01-22 14:45:06.457596: step: 206/469, loss: 0.0004563714028336108 2023-01-22 14:45:07.063471: step: 208/469, loss: 0.004949130583554506 2023-01-22 14:45:07.769327: step: 210/469, loss: 0.013902345672249794 2023-01-22 14:45:08.510185: step: 212/469, loss: 0.0035656061954796314 2023-01-22 14:45:09.146894: step: 214/469, loss: 0.025212017819285393 2023-01-22 14:45:09.713856: step: 216/469, loss: 0.003123203292489052 2023-01-22 14:45:10.403218: step: 218/469, loss: 0.06892810761928558 2023-01-22 14:45:11.032345: step: 220/469, loss: 0.011356232687830925 2023-01-22 14:45:11.622757: step: 222/469, loss: 0.005522533785551786 2023-01-22 14:45:12.268348: step: 224/469, loss: 0.0005612036329694092 2023-01-22 14:45:12.906319: step: 226/469, loss: 0.010729297995567322 2023-01-22 14:45:13.539311: step: 228/469, loss: 0.010812995955348015 2023-01-22 14:45:14.091078: step: 230/469, loss: 0.00023187536862678826 2023-01-22 14:45:14.613789: step: 232/469, loss: 0.0005325591191649437 2023-01-22 14:45:15.282770: step: 234/469, loss: 0.0056231459602713585 2023-01-22 14:45:15.888895: step: 236/469, loss: 0.002099360106512904 2023-01-22 14:45:16.487823: step: 238/469, loss: 0.035660479217767715 2023-01-22 14:45:17.031253: step: 240/469, loss: 0.017806263640522957 2023-01-22 14:45:17.597319: step: 242/469, loss: 0.00563768669962883 2023-01-22 14:45:18.225394: step: 244/469, loss: 0.010401152074337006 2023-01-22 14:45:18.855057: step: 246/469, loss: 0.01913202367722988 2023-01-22 14:45:19.544624: step: 248/469, loss: 4.502174851950258e-05 2023-01-22 14:45:20.136542: step: 250/469, loss: 7.243495929287747e-05 2023-01-22 14:45:20.792683: step: 252/469, loss: 0.0009222208173014224 2023-01-22 14:45:21.493669: step: 254/469, loss: 0.07658590376377106 2023-01-22 14:45:22.092910: step: 256/469, loss: 0.004743319004774094 2023-01-22 14:45:22.723456: step: 258/469, loss: 0.0024721897207200527 2023-01-22 14:45:23.305766: step: 260/469, loss: 0.0011867209104821086 2023-01-22 14:45:24.013490: step: 262/469, loss: 0.0561700165271759 2023-01-22 14:45:24.586802: step: 264/469, loss: 0.014542055316269398 2023-01-22 14:45:25.206161: step: 266/469, loss: 0.418611615896225 2023-01-22 14:45:25.795860: step: 268/469, loss: 0.0036808205768465996 2023-01-22 14:45:26.442106: step: 270/469, loss: 0.01344411913305521 2023-01-22 14:45:27.051543: step: 272/469, loss: 0.010014382191002369 2023-01-22 14:45:27.677389: step: 274/469, loss: 0.0018394882790744305 2023-01-22 14:45:28.445120: step: 276/469, loss: 0.06465846300125122 2023-01-22 14:45:29.095463: step: 278/469, loss: 0.007905179634690285 2023-01-22 14:45:29.691929: step: 280/469, loss: 0.0169659536331892 2023-01-22 14:45:30.287981: step: 282/469, loss: 0.0152912437915802 2023-01-22 14:45:30.936663: step: 284/469, loss: 0.04542376101016998 2023-01-22 14:45:31.576340: step: 286/469, loss: 0.004321933723986149 2023-01-22 14:45:32.235250: step: 288/469, loss: 0.044069863855838776 2023-01-22 14:45:32.827128: step: 290/469, loss: 0.0033424466382712126 2023-01-22 14:45:33.516187: step: 292/469, loss: 0.01769493706524372 2023-01-22 14:45:34.125658: step: 294/469, loss: 0.00044977504876442254 2023-01-22 14:45:34.703922: step: 296/469, loss: 0.0023002258967608213 2023-01-22 14:45:35.361223: step: 298/469, loss: 0.0354275219142437 2023-01-22 14:45:35.920279: step: 300/469, loss: 0.0009577972814440727 2023-01-22 14:45:36.523461: step: 302/469, loss: 0.010117345489561558 2023-01-22 14:45:37.185345: step: 304/469, loss: 0.029526468366384506 2023-01-22 14:45:37.931852: step: 306/469, loss: 0.037232935428619385 2023-01-22 14:45:38.568360: step: 308/469, loss: 0.0014969746116548777 2023-01-22 14:45:39.158215: step: 310/469, loss: 0.001418725703842938 2023-01-22 14:45:39.758458: step: 312/469, loss: 0.039661943912506104 2023-01-22 14:45:40.370538: step: 314/469, loss: 0.03849601745605469 2023-01-22 14:45:41.046224: step: 316/469, loss: 0.004174413625150919 2023-01-22 14:45:41.657614: step: 318/469, loss: 0.0023285315837711096 2023-01-22 14:45:42.307947: step: 320/469, loss: 0.0018768302397802472 2023-01-22 14:45:42.909282: step: 322/469, loss: 0.0007896666647866368 2023-01-22 14:45:43.513776: step: 324/469, loss: 0.21864452958106995 2023-01-22 14:45:44.176199: step: 326/469, loss: 0.013234913349151611 2023-01-22 14:45:44.802063: step: 328/469, loss: 0.0040725478902459145 2023-01-22 14:45:45.456882: step: 330/469, loss: 0.12069471180438995 2023-01-22 14:45:46.082559: step: 332/469, loss: 0.006533664185553789 2023-01-22 14:45:46.734182: step: 334/469, loss: 0.0002847716095857322 2023-01-22 14:45:47.334454: step: 336/469, loss: 0.0018084668554365635 2023-01-22 14:45:47.954898: step: 338/469, loss: 0.11005932092666626 2023-01-22 14:45:48.544138: step: 340/469, loss: 0.019758908078074455 2023-01-22 14:45:49.187091: step: 342/469, loss: 0.018155261874198914 2023-01-22 14:45:49.918315: step: 344/469, loss: 0.006524763535708189 2023-01-22 14:45:50.514826: step: 346/469, loss: 0.07354401797056198 2023-01-22 14:45:51.086233: step: 348/469, loss: 0.009025816805660725 2023-01-22 14:45:51.692614: step: 350/469, loss: 0.002713108202442527 2023-01-22 14:45:52.338293: step: 352/469, loss: 0.03147520497441292 2023-01-22 14:45:52.982623: step: 354/469, loss: 0.0009897833224385977 2023-01-22 14:45:53.581433: step: 356/469, loss: 0.0012717392528429627 2023-01-22 14:45:54.218632: step: 358/469, loss: 0.032479457557201385 2023-01-22 14:45:54.842766: step: 360/469, loss: 0.0005192225216887891 2023-01-22 14:45:55.398341: step: 362/469, loss: 0.005973644554615021 2023-01-22 14:45:55.991296: step: 364/469, loss: 0.009888162836432457 2023-01-22 14:45:56.614832: step: 366/469, loss: 0.011599777266383171 2023-01-22 14:45:57.171387: step: 368/469, loss: 0.01079634577035904 2023-01-22 14:45:57.784249: step: 370/469, loss: 0.001837767194956541 2023-01-22 14:45:58.426842: step: 372/469, loss: 0.008005251176655293 2023-01-22 14:45:59.010014: step: 374/469, loss: 0.010463404469192028 2023-01-22 14:45:59.653211: step: 376/469, loss: 0.019618261605501175 2023-01-22 14:46:00.280163: step: 378/469, loss: 0.005425151903182268 2023-01-22 14:46:00.911171: step: 380/469, loss: 0.0055457246489822865 2023-01-22 14:46:01.538192: step: 382/469, loss: 0.0004786673525813967 2023-01-22 14:46:02.164306: step: 384/469, loss: 0.007368323393166065 2023-01-22 14:46:02.859505: step: 386/469, loss: 0.05301499366760254 2023-01-22 14:46:03.474533: step: 388/469, loss: 0.001236757030710578 2023-01-22 14:46:04.099645: step: 390/469, loss: 0.02682354487478733 2023-01-22 14:46:04.796412: step: 392/469, loss: 0.013161814771592617 2023-01-22 14:46:05.433602: step: 394/469, loss: 0.034417010843753815 2023-01-22 14:46:06.081965: step: 396/469, loss: 0.021855738013982773 2023-01-22 14:46:06.711694: step: 398/469, loss: 0.11348588764667511 2023-01-22 14:46:07.264252: step: 400/469, loss: 0.05384557694196701 2023-01-22 14:46:07.914584: step: 402/469, loss: 0.0439118817448616 2023-01-22 14:46:08.551550: step: 404/469, loss: 0.022635243833065033 2023-01-22 14:46:09.130114: step: 406/469, loss: 0.0011401812080293894 2023-01-22 14:46:09.735495: step: 408/469, loss: 0.02124967612326145 2023-01-22 14:46:10.342960: step: 410/469, loss: 0.005235178861767054 2023-01-22 14:46:10.970300: step: 412/469, loss: 0.06608827412128448 2023-01-22 14:46:11.586202: step: 414/469, loss: 0.009553633630275726 2023-01-22 14:46:12.227320: step: 416/469, loss: 0.1105639860033989 2023-01-22 14:46:12.844094: step: 418/469, loss: 0.004002057481557131 2023-01-22 14:46:13.496657: step: 420/469, loss: 0.0009886876214295626 2023-01-22 14:46:14.102828: step: 422/469, loss: 0.0018955428386107087 2023-01-22 14:46:14.733419: step: 424/469, loss: 0.014699460938572884 2023-01-22 14:46:15.395149: step: 426/469, loss: 0.0007934740860946476 2023-01-22 14:46:15.978812: step: 428/469, loss: 0.0005282569909468293 2023-01-22 14:46:16.610554: step: 430/469, loss: 0.002740419702604413 2023-01-22 14:46:17.205715: step: 432/469, loss: 0.06098758429288864 2023-01-22 14:46:17.778361: step: 434/469, loss: 0.0027008354663848877 2023-01-22 14:46:18.400114: step: 436/469, loss: 0.020581193268299103 2023-01-22 14:46:19.032664: step: 438/469, loss: 0.0008362382650375366 2023-01-22 14:46:19.629251: step: 440/469, loss: 0.009131846949458122 2023-01-22 14:46:20.276177: step: 442/469, loss: 0.0007631309563294053 2023-01-22 14:46:20.827252: step: 444/469, loss: 0.006298655178397894 2023-01-22 14:46:21.451736: step: 446/469, loss: 0.00956434290856123 2023-01-22 14:46:22.091609: step: 448/469, loss: 0.0022332214284688234 2023-01-22 14:46:22.697425: step: 450/469, loss: 0.03460382670164108 2023-01-22 14:46:23.309959: step: 452/469, loss: 0.010300195775926113 2023-01-22 14:46:23.923302: step: 454/469, loss: 0.05888713151216507 2023-01-22 14:46:24.512562: step: 456/469, loss: 5.7322900829603896e-05 2023-01-22 14:46:25.164544: step: 458/469, loss: 0.00104769435711205 2023-01-22 14:46:25.769945: step: 460/469, loss: 0.018517985939979553 2023-01-22 14:46:26.366105: step: 462/469, loss: 0.0035709377843886614 2023-01-22 14:46:26.963753: step: 464/469, loss: 0.0015351761830970645 2023-01-22 14:46:27.659088: step: 466/469, loss: 0.04311732202768326 2023-01-22 14:46:28.220239: step: 468/469, loss: 0.0011024208506569266 2023-01-22 14:46:28.870973: step: 470/469, loss: 0.01289608795195818 2023-01-22 14:46:29.453421: step: 472/469, loss: 0.003955214750021696 2023-01-22 14:46:30.098485: step: 474/469, loss: 0.002936768811196089 2023-01-22 14:46:30.740287: step: 476/469, loss: 0.038597188889980316 2023-01-22 14:46:31.333296: step: 478/469, loss: 0.0023800497874617577 2023-01-22 14:46:31.946037: step: 480/469, loss: 0.005549492314457893 2023-01-22 14:46:32.556606: step: 482/469, loss: 0.011222630739212036 2023-01-22 14:46:33.148213: step: 484/469, loss: 0.04048134759068489 2023-01-22 14:46:33.800261: step: 486/469, loss: 0.019612450152635574 2023-01-22 14:46:34.403117: step: 488/469, loss: 0.003918325528502464 2023-01-22 14:46:34.980866: step: 490/469, loss: 0.0009998517343774438 2023-01-22 14:46:35.591143: step: 492/469, loss: 0.0001641609414946288 2023-01-22 14:46:36.234488: step: 494/469, loss: 0.0014647396747022867 2023-01-22 14:46:36.808488: step: 496/469, loss: 0.0015139882452785969 2023-01-22 14:46:37.358914: step: 498/469, loss: 0.017345961183309555 2023-01-22 14:46:38.015852: step: 500/469, loss: 0.007861591875553131 2023-01-22 14:46:38.663491: step: 502/469, loss: 8.483230340061709e-05 2023-01-22 14:46:39.286097: step: 504/469, loss: 0.03512251004576683 2023-01-22 14:46:39.873050: step: 506/469, loss: 0.019959785044193268 2023-01-22 14:46:40.571975: step: 508/469, loss: 0.01568104699254036 2023-01-22 14:46:41.170750: step: 510/469, loss: 0.03988457843661308 2023-01-22 14:46:41.792594: step: 512/469, loss: 0.008457482792437077 2023-01-22 14:46:42.375675: step: 514/469, loss: 0.00017965841107070446 2023-01-22 14:46:43.066464: step: 516/469, loss: 4.432436253409833e-05 2023-01-22 14:46:43.665596: step: 518/469, loss: 0.037693727761507034 2023-01-22 14:46:44.255694: step: 520/469, loss: 0.00730451475828886 2023-01-22 14:46:44.893572: step: 522/469, loss: 0.009752505458891392 2023-01-22 14:46:45.568674: step: 524/469, loss: 0.01299829687923193 2023-01-22 14:46:46.173063: step: 526/469, loss: 0.0004542673414107412 2023-01-22 14:46:46.759032: step: 528/469, loss: 0.0005608549690805376 2023-01-22 14:46:47.328685: step: 530/469, loss: 3.6528388591250405e-05 2023-01-22 14:46:47.954772: step: 532/469, loss: 0.020629290491342545 2023-01-22 14:46:48.623736: step: 534/469, loss: 0.03824291378259659 2023-01-22 14:46:49.181039: step: 536/469, loss: 0.0007615429349243641 2023-01-22 14:46:49.753680: step: 538/469, loss: 0.0003051344829145819 2023-01-22 14:46:50.390625: step: 540/469, loss: 0.07173144817352295 2023-01-22 14:46:50.983652: step: 542/469, loss: 0.004043711815029383 2023-01-22 14:46:51.676242: step: 544/469, loss: 0.004293826408684254 2023-01-22 14:46:52.296757: step: 546/469, loss: 0.0028995154425501823 2023-01-22 14:46:52.899303: step: 548/469, loss: 0.02083742432296276 2023-01-22 14:46:53.501725: step: 550/469, loss: 0.019430281594395638 2023-01-22 14:46:54.074243: step: 552/469, loss: 0.014926875941455364 2023-01-22 14:46:54.719628: step: 554/469, loss: 0.002948716515675187 2023-01-22 14:46:55.317712: step: 556/469, loss: 0.020838819444179535 2023-01-22 14:46:55.913330: step: 558/469, loss: 0.0014127717586234212 2023-01-22 14:46:56.560796: step: 560/469, loss: 0.0012188029941171408 2023-01-22 14:46:57.164353: step: 562/469, loss: 0.004385044332593679 2023-01-22 14:46:57.729530: step: 564/469, loss: 0.002608982613310218 2023-01-22 14:46:58.327759: step: 566/469, loss: 0.003080811118707061 2023-01-22 14:46:58.928375: step: 568/469, loss: 0.005462948698550463 2023-01-22 14:46:59.515522: step: 570/469, loss: 0.0015820726985111833 2023-01-22 14:47:00.097141: step: 572/469, loss: 0.2151658535003662 2023-01-22 14:47:00.670410: step: 574/469, loss: 0.023377683013677597 2023-01-22 14:47:01.263230: step: 576/469, loss: 0.013376289047300816 2023-01-22 14:47:01.881656: step: 578/469, loss: 0.0034325220622122288 2023-01-22 14:47:02.511121: step: 580/469, loss: 0.038481228053569794 2023-01-22 14:47:03.115635: step: 582/469, loss: 0.003627503290772438 2023-01-22 14:47:03.732493: step: 584/469, loss: 0.092597596347332 2023-01-22 14:47:04.416808: step: 586/469, loss: 0.006897504907101393 2023-01-22 14:47:04.986892: step: 588/469, loss: 0.008465752005577087 2023-01-22 14:47:05.579361: step: 590/469, loss: 0.001693866797722876 2023-01-22 14:47:06.275170: step: 592/469, loss: 0.007011231034994125 2023-01-22 14:47:06.863358: step: 594/469, loss: 0.0005285179358907044 2023-01-22 14:47:07.494562: step: 596/469, loss: 0.0416235513985157 2023-01-22 14:47:08.085858: step: 598/469, loss: 0.0005588795174844563 2023-01-22 14:47:08.742530: step: 600/469, loss: 0.038418516516685486 2023-01-22 14:47:09.378842: step: 602/469, loss: 0.003238404169678688 2023-01-22 14:47:10.019776: step: 604/469, loss: 5.870701716048643e-05 2023-01-22 14:47:10.579614: step: 606/469, loss: 0.004645701963454485 2023-01-22 14:47:11.251502: step: 608/469, loss: 0.010543636046350002 2023-01-22 14:47:11.944081: step: 610/469, loss: 0.0011761996429413557 2023-01-22 14:47:12.567058: step: 612/469, loss: 0.005731097888201475 2023-01-22 14:47:13.212491: step: 614/469, loss: 0.022229019552469254 2023-01-22 14:47:13.840874: step: 616/469, loss: 0.0005212425021454692 2023-01-22 14:47:14.499209: step: 618/469, loss: 0.001753071672283113 2023-01-22 14:47:15.228653: step: 620/469, loss: 0.0012778625823557377 2023-01-22 14:47:15.769864: step: 622/469, loss: 0.020835336297750473 2023-01-22 14:47:16.383157: step: 624/469, loss: 0.0034977321047335863 2023-01-22 14:47:16.998872: step: 626/469, loss: 0.024047844111919403 2023-01-22 14:47:17.624981: step: 628/469, loss: 0.006701468024402857 2023-01-22 14:47:18.266880: step: 630/469, loss: 0.000508688623085618 2023-01-22 14:47:18.936070: step: 632/469, loss: 0.0008328788680955768 2023-01-22 14:47:19.637641: step: 634/469, loss: 0.0007181655382737517 2023-01-22 14:47:20.263874: step: 636/469, loss: 0.0021388118620961905 2023-01-22 14:47:20.856958: step: 638/469, loss: 0.029636982828378677 2023-01-22 14:47:21.502940: step: 640/469, loss: 0.05095604434609413 2023-01-22 14:47:22.126173: step: 642/469, loss: 0.004727157298475504 2023-01-22 14:47:22.713469: step: 644/469, loss: 1.4363168478012085 2023-01-22 14:47:23.340470: step: 646/469, loss: 0.0017516098450869322 2023-01-22 14:47:23.939677: step: 648/469, loss: 0.07704787701368332 2023-01-22 14:47:24.506088: step: 650/469, loss: 0.015268268994987011 2023-01-22 14:47:25.193625: step: 652/469, loss: 0.006294921040534973 2023-01-22 14:47:25.784990: step: 654/469, loss: 0.029882969334721565 2023-01-22 14:47:26.442542: step: 656/469, loss: 0.02858980931341648 2023-01-22 14:47:27.097773: step: 658/469, loss: 0.04057031497359276 2023-01-22 14:47:27.754832: step: 660/469, loss: 0.003819218371063471 2023-01-22 14:47:28.405607: step: 662/469, loss: 0.04323418438434601 2023-01-22 14:47:28.983070: step: 664/469, loss: 0.0002988446212839335 2023-01-22 14:47:29.656693: step: 666/469, loss: 0.18204519152641296 2023-01-22 14:47:30.300542: step: 668/469, loss: 0.0025522124487906694 2023-01-22 14:47:30.875473: step: 670/469, loss: 0.00962864700704813 2023-01-22 14:47:31.467086: step: 672/469, loss: 0.004951891489326954 2023-01-22 14:47:32.044238: step: 674/469, loss: 0.0006246576085686684 2023-01-22 14:47:32.671087: step: 676/469, loss: 0.0008598135318607092 2023-01-22 14:47:33.345057: step: 678/469, loss: 2.0176716134301387e-05 2023-01-22 14:47:33.950902: step: 680/469, loss: 0.024525316432118416 2023-01-22 14:47:34.537350: step: 682/469, loss: 0.0039021417032927275 2023-01-22 14:47:35.169295: step: 684/469, loss: 0.001621451461687684 2023-01-22 14:47:35.752963: step: 686/469, loss: 0.001344478689134121 2023-01-22 14:47:36.328891: step: 688/469, loss: 0.03194309026002884 2023-01-22 14:47:36.959371: step: 690/469, loss: 0.017155833542346954 2023-01-22 14:47:37.648331: step: 692/469, loss: 0.0018762206891551614 2023-01-22 14:47:38.328201: step: 694/469, loss: 0.00497461436316371 2023-01-22 14:47:39.020942: step: 696/469, loss: 0.01276924554258585 2023-01-22 14:47:39.675686: step: 698/469, loss: 0.0005737309111282229 2023-01-22 14:47:40.260242: step: 700/469, loss: 0.0007732072845101357 2023-01-22 14:47:40.902682: step: 702/469, loss: 0.0014760341728106141 2023-01-22 14:47:41.505178: step: 704/469, loss: 0.003231868613511324 2023-01-22 14:47:42.175583: step: 706/469, loss: 0.003730586264282465 2023-01-22 14:47:42.771814: step: 708/469, loss: 0.03210756555199623 2023-01-22 14:47:43.360305: step: 710/469, loss: 0.015252848155796528 2023-01-22 14:47:44.013122: step: 712/469, loss: 0.06766407936811447 2023-01-22 14:47:44.678782: step: 714/469, loss: 0.007349667139351368 2023-01-22 14:47:45.521036: step: 716/469, loss: 0.009399761445820332 2023-01-22 14:47:46.174069: step: 718/469, loss: 0.10491109639406204 2023-01-22 14:47:46.830468: step: 720/469, loss: 0.002760874340310693 2023-01-22 14:47:47.589413: step: 722/469, loss: 0.02332591637969017 2023-01-22 14:47:48.178889: step: 724/469, loss: 0.004359787795692682 2023-01-22 14:47:48.806106: step: 726/469, loss: 0.008645017631351948 2023-01-22 14:47:49.454913: step: 728/469, loss: 0.0009867568733170629 2023-01-22 14:47:50.131472: step: 730/469, loss: 0.0003513229312375188 2023-01-22 14:47:50.698242: step: 732/469, loss: 2.46677303314209 2023-01-22 14:47:51.311028: step: 734/469, loss: 0.02201024256646633 2023-01-22 14:47:51.883592: step: 736/469, loss: 0.016579758375883102 2023-01-22 14:47:52.508295: step: 738/469, loss: 0.038843315094709396 2023-01-22 14:47:53.195509: step: 740/469, loss: 0.00020381015201564878 2023-01-22 14:47:53.803167: step: 742/469, loss: 0.012041660025715828 2023-01-22 14:47:54.506282: step: 744/469, loss: 0.20232078433036804 2023-01-22 14:47:55.116875: step: 746/469, loss: 0.003205152228474617 2023-01-22 14:47:55.710259: step: 748/469, loss: 0.004757591988891363 2023-01-22 14:47:56.328933: step: 750/469, loss: 6.730795576004311e-05 2023-01-22 14:47:56.888033: step: 752/469, loss: 0.004922729916870594 2023-01-22 14:47:57.505307: step: 754/469, loss: 8.53805904625915e-05 2023-01-22 14:47:58.056313: step: 756/469, loss: 0.012571812607347965 2023-01-22 14:47:58.735981: step: 758/469, loss: 0.002861930988729 2023-01-22 14:47:59.383128: step: 760/469, loss: 0.0002941860584542155 2023-01-22 14:47:59.957734: step: 762/469, loss: 0.005963773466646671 2023-01-22 14:48:00.597868: step: 764/469, loss: 0.13207204639911652 2023-01-22 14:48:01.162053: step: 766/469, loss: 0.009327885694801807 2023-01-22 14:48:01.814322: step: 768/469, loss: 0.0031073433347046375 2023-01-22 14:48:02.443935: step: 770/469, loss: 0.02418460138142109 2023-01-22 14:48:03.189066: step: 772/469, loss: 0.01933264173567295 2023-01-22 14:48:03.843753: step: 774/469, loss: 0.007950271479785442 2023-01-22 14:48:04.444468: step: 776/469, loss: 0.0024820449762046337 2023-01-22 14:48:05.050371: step: 778/469, loss: 0.030494293197989464 2023-01-22 14:48:05.779076: step: 780/469, loss: 0.00631794473156333 2023-01-22 14:48:06.441209: step: 782/469, loss: 0.002698679454624653 2023-01-22 14:48:07.018092: step: 784/469, loss: 2.5327946787001565e-05 2023-01-22 14:48:07.661912: step: 786/469, loss: 0.04262625426054001 2023-01-22 14:48:08.262487: step: 788/469, loss: 0.0002699931210372597 2023-01-22 14:48:08.877485: step: 790/469, loss: 0.0003126077353954315 2023-01-22 14:48:09.487111: step: 792/469, loss: 0.00014261103933677077 2023-01-22 14:48:10.100139: step: 794/469, loss: 0.10336020588874817 2023-01-22 14:48:10.729265: step: 796/469, loss: 0.003584097372367978 2023-01-22 14:48:11.355877: step: 798/469, loss: 0.011599306017160416 2023-01-22 14:48:11.956723: step: 800/469, loss: 0.018106209114193916 2023-01-22 14:48:12.627040: step: 802/469, loss: 0.013742119073867798 2023-01-22 14:48:13.357908: step: 804/469, loss: 0.009712567552924156 2023-01-22 14:48:14.037067: step: 806/469, loss: 0.018879614770412445 2023-01-22 14:48:14.634896: step: 808/469, loss: 0.004268000368028879 2023-01-22 14:48:15.168159: step: 810/469, loss: 0.0032297568395733833 2023-01-22 14:48:15.869422: step: 812/469, loss: 0.0032134992070496082 2023-01-22 14:48:16.486091: step: 814/469, loss: 0.0198863185942173 2023-01-22 14:48:17.114927: step: 816/469, loss: 0.0004367715446278453 2023-01-22 14:48:17.763085: step: 818/469, loss: 0.00891380850225687 2023-01-22 14:48:18.369809: step: 820/469, loss: 0.025122186169028282 2023-01-22 14:48:18.992955: step: 822/469, loss: 0.0024353195913136005 2023-01-22 14:48:19.618678: step: 824/469, loss: 0.003031976753845811 2023-01-22 14:48:20.282324: step: 826/469, loss: 0.014371187426149845 2023-01-22 14:48:20.964371: step: 828/469, loss: 0.0011106326710432768 2023-01-22 14:48:21.646707: step: 830/469, loss: 0.008817785419523716 2023-01-22 14:48:22.276627: step: 832/469, loss: 0.024798348546028137 2023-01-22 14:48:22.840176: step: 834/469, loss: 0.011286124587059021 2023-01-22 14:48:23.440423: step: 836/469, loss: 0.029052378609776497 2023-01-22 14:48:24.095772: step: 838/469, loss: 0.10058391094207764 2023-01-22 14:48:24.718182: step: 840/469, loss: 0.006272433791309595 2023-01-22 14:48:25.315815: step: 842/469, loss: 0.0011993744410574436 2023-01-22 14:48:25.949021: step: 844/469, loss: 0.009213746525347233 2023-01-22 14:48:26.515088: step: 846/469, loss: 0.016731230542063713 2023-01-22 14:48:27.118675: step: 848/469, loss: 0.0027808707673102617 2023-01-22 14:48:27.746428: step: 850/469, loss: 0.0035659456625580788 2023-01-22 14:48:28.310463: step: 852/469, loss: 0.0008063454879447818 2023-01-22 14:48:28.908707: step: 854/469, loss: 0.00021601474145427346 2023-01-22 14:48:29.563014: step: 856/469, loss: 0.013462109491229057 2023-01-22 14:48:30.185543: step: 858/469, loss: 0.17038434743881226 2023-01-22 14:48:30.797150: step: 860/469, loss: 0.0026259678415954113 2023-01-22 14:48:31.417938: step: 862/469, loss: 0.14731523394584656 2023-01-22 14:48:32.026546: step: 864/469, loss: 0.006066069006919861 2023-01-22 14:48:32.725961: step: 866/469, loss: 0.0005654181586578488 2023-01-22 14:48:33.347360: step: 868/469, loss: 0.003503442509099841 2023-01-22 14:48:33.981766: step: 870/469, loss: 0.0025665946304798126 2023-01-22 14:48:34.590417: step: 872/469, loss: 0.011841288767755032 2023-01-22 14:48:35.204713: step: 874/469, loss: 0.00535447197034955 2023-01-22 14:48:35.844678: step: 876/469, loss: 0.000537946994882077 2023-01-22 14:48:36.455244: step: 878/469, loss: 0.0285461638122797 2023-01-22 14:48:37.059708: step: 880/469, loss: 0.0005378602654673159 2023-01-22 14:48:37.696512: step: 882/469, loss: 0.013773929327726364 2023-01-22 14:48:38.472776: step: 884/469, loss: 0.0024970925878733397 2023-01-22 14:48:39.079131: step: 886/469, loss: 0.0207919180393219 2023-01-22 14:48:39.759608: step: 888/469, loss: 0.0146642429754138 2023-01-22 14:48:40.431886: step: 890/469, loss: 0.0031988827977329493 2023-01-22 14:48:41.029996: step: 892/469, loss: 0.02267240174114704 2023-01-22 14:48:41.721640: step: 894/469, loss: 0.004538672976195812 2023-01-22 14:48:42.348038: step: 896/469, loss: 0.015491476282477379 2023-01-22 14:48:42.981362: step: 898/469, loss: 7.6398289820645e-05 2023-01-22 14:48:43.607126: step: 900/469, loss: 0.0008502332493662834 2023-01-22 14:48:44.216700: step: 902/469, loss: 0.010820634663105011 2023-01-22 14:48:44.893594: step: 904/469, loss: 0.14744719862937927 2023-01-22 14:48:45.637150: step: 906/469, loss: 0.21440817415714264 2023-01-22 14:48:46.349739: step: 908/469, loss: 0.014432581141591072 2023-01-22 14:48:47.014196: step: 910/469, loss: 0.005767323076725006 2023-01-22 14:48:47.655889: step: 912/469, loss: 0.0068262433633208275 2023-01-22 14:48:48.239069: step: 914/469, loss: 0.0001664624287514016 2023-01-22 14:48:48.836395: step: 916/469, loss: 0.19408603012561798 2023-01-22 14:48:49.423164: step: 918/469, loss: 0.005819531623274088 2023-01-22 14:48:50.042332: step: 920/469, loss: 0.010460923425853252 2023-01-22 14:48:50.665173: step: 922/469, loss: 0.010691720061004162 2023-01-22 14:48:51.228178: step: 924/469, loss: 0.0006093971896916628 2023-01-22 14:48:51.837653: step: 926/469, loss: 0.02577994391322136 2023-01-22 14:48:52.405270: step: 928/469, loss: 0.042842525988817215 2023-01-22 14:48:53.042395: step: 930/469, loss: 0.03465908020734787 2023-01-22 14:48:53.717826: step: 932/469, loss: 0.002047451911494136 2023-01-22 14:48:54.328272: step: 934/469, loss: 0.01731567084789276 2023-01-22 14:48:54.930378: step: 936/469, loss: 0.12593920528888702 2023-01-22 14:48:55.522232: step: 938/469, loss: 0.041484981775283813 ================================================== Loss: 0.033 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3021616944376378, 'r': 0.30388177998472116, 'f1': 0.3030192962193908}, 'combined': 0.22327737616165635, 'epoch': 35} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.32567904177707563, 'r': 0.2723427668657339, 'f1': 0.29663243067687806}, 'combined': 0.16179950764193346, 'epoch': 35} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3071259153618967, 'r': 0.3117881683465175, 'f1': 0.30943948157931217}, 'combined': 0.22800803905844053, 'epoch': 35} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.33036152093560656, 'r': 0.2717246178601192, 'f1': 0.29818775835452843}, 'combined': 0.16264786819337912, 'epoch': 35} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2908940651906519, 'r': 0.2991737824161923, 'f1': 0.29497583411287814}, 'combined': 0.21735061460948915, 'epoch': 35} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.323979349766826, 'r': 0.2756640396003186, 'f1': 0.2978752301365775}, 'combined': 0.162477398256315, 'epoch': 35} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2764227642276422, 'r': 0.32380952380952377, 'f1': 0.29824561403508765}, 'combined': 0.19883040935672508, 'epoch': 35} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.275, 'r': 0.358695652173913, 'f1': 0.3113207547169812}, 'combined': 0.1556603773584906, 'epoch': 35} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4305555555555556, 'r': 0.2672413793103448, 'f1': 0.3297872340425532}, 'combined': 0.2198581560283688, 'epoch': 35} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 36 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:51:43.412102: step: 2/469, loss: 0.004019667394459248 2023-01-22 14:51:44.016642: step: 4/469, loss: 0.0004227577883284539 2023-01-22 14:51:44.645981: step: 6/469, loss: 0.027020292356610298 2023-01-22 14:51:45.276628: step: 8/469, loss: 0.06948929280042648 2023-01-22 14:51:45.928616: step: 10/469, loss: 0.0002911464835051447 2023-01-22 14:51:46.576490: step: 12/469, loss: 0.10993222147226334 2023-01-22 14:51:47.209608: step: 14/469, loss: 0.000966193329077214 2023-01-22 14:51:47.817106: step: 16/469, loss: 0.0005814554169774055 2023-01-22 14:51:48.427114: step: 18/469, loss: 0.04141689091920853 2023-01-22 14:51:49.187922: step: 20/469, loss: 0.042358919978141785 2023-01-22 14:51:49.873976: step: 22/469, loss: 0.2076289802789688 2023-01-22 14:51:50.468585: step: 24/469, loss: 0.006613573059439659 2023-01-22 14:51:51.023023: step: 26/469, loss: 2.0684610717580654e-05 2023-01-22 14:51:51.541154: step: 28/469, loss: 0.004543894436210394 2023-01-22 14:51:52.144683: step: 30/469, loss: 0.08396077156066895 2023-01-22 14:51:52.716969: step: 32/469, loss: 0.012454559095203876 2023-01-22 14:51:53.421180: step: 34/469, loss: 0.006128770299255848 2023-01-22 14:51:54.004832: step: 36/469, loss: 0.006237713620066643 2023-01-22 14:51:54.633480: step: 38/469, loss: 0.00046029267832636833 2023-01-22 14:51:55.217179: step: 40/469, loss: 0.00116886873729527 2023-01-22 14:51:55.913987: step: 42/469, loss: 0.001049504615366459 2023-01-22 14:51:56.556125: step: 44/469, loss: 0.0021261069923639297 2023-01-22 14:51:57.158336: step: 46/469, loss: 0.03951739892363548 2023-01-22 14:51:57.733435: step: 48/469, loss: 0.00025101398932747543 2023-01-22 14:51:58.365749: step: 50/469, loss: 0.019808761775493622 2023-01-22 14:51:58.980287: step: 52/469, loss: 0.000916816818062216 2023-01-22 14:51:59.630287: step: 54/469, loss: 0.006958385929465294 2023-01-22 14:52:00.256375: step: 56/469, loss: 0.004789789207279682 2023-01-22 14:52:00.872544: step: 58/469, loss: 0.0009373460779897869 2023-01-22 14:52:01.534439: step: 60/469, loss: 0.017457688227295876 2023-01-22 14:52:02.129460: step: 62/469, loss: 0.000646613712888211 2023-01-22 14:52:02.829945: step: 64/469, loss: 0.032429322600364685 2023-01-22 14:52:03.460626: step: 66/469, loss: 0.011531285010278225 2023-01-22 14:52:04.145379: step: 68/469, loss: 0.0032940825913101435 2023-01-22 14:52:04.715443: step: 70/469, loss: 0.05203739181160927 2023-01-22 14:52:05.327874: step: 72/469, loss: 0.022996407002210617 2023-01-22 14:52:05.904677: step: 74/469, loss: 0.009048468433320522 2023-01-22 14:52:06.536489: step: 76/469, loss: 0.03836231678724289 2023-01-22 14:52:07.180260: step: 78/469, loss: 0.0057999067939817905 2023-01-22 14:52:07.783866: step: 80/469, loss: 0.0004627261951100081 2023-01-22 14:52:08.453885: step: 82/469, loss: 8.897064253687859e-05 2023-01-22 14:52:09.061916: step: 84/469, loss: 0.0003895910340361297 2023-01-22 14:52:09.665769: step: 86/469, loss: 8.814774628262967e-05 2023-01-22 14:52:10.317185: step: 88/469, loss: 0.00035210661008022726 2023-01-22 14:52:10.913805: step: 90/469, loss: 0.004196563735604286 2023-01-22 14:52:11.598479: step: 92/469, loss: 0.00043823086889460683 2023-01-22 14:52:12.213184: step: 94/469, loss: 0.013972615823149681 2023-01-22 14:52:12.763513: step: 96/469, loss: 0.012797524221241474 2023-01-22 14:52:13.348907: step: 98/469, loss: 0.09215812385082245 2023-01-22 14:52:13.956676: step: 100/469, loss: 0.01694154553115368 2023-01-22 14:52:14.582246: step: 102/469, loss: 0.00024399500398430973 2023-01-22 14:52:15.212074: step: 104/469, loss: 0.001303322147578001 2023-01-22 14:52:15.775416: step: 106/469, loss: 0.017214186489582062 2023-01-22 14:52:16.433221: step: 108/469, loss: 0.005354368593543768 2023-01-22 14:52:16.994650: step: 110/469, loss: 0.11762009561061859 2023-01-22 14:52:17.578461: step: 112/469, loss: 9.76502924459055e-05 2023-01-22 14:52:18.191076: step: 114/469, loss: 0.0015003300504758954 2023-01-22 14:52:18.784060: step: 116/469, loss: 0.0009911570232361555 2023-01-22 14:52:19.416358: step: 118/469, loss: 0.05354335531592369 2023-01-22 14:52:20.082411: step: 120/469, loss: 0.010274798609316349 2023-01-22 14:52:20.684598: step: 122/469, loss: 0.003682586131617427 2023-01-22 14:52:21.273785: step: 124/469, loss: 0.0016012543346732855 2023-01-22 14:52:21.826168: step: 126/469, loss: 0.008140754885971546 2023-01-22 14:52:22.519047: step: 128/469, loss: 0.11502741277217865 2023-01-22 14:52:23.173562: step: 130/469, loss: 0.014009936712682247 2023-01-22 14:52:23.956621: step: 132/469, loss: 0.003206227906048298 2023-01-22 14:52:24.578184: step: 134/469, loss: 0.10316529124975204 2023-01-22 14:52:25.182828: step: 136/469, loss: 0.0004017339670099318 2023-01-22 14:52:25.777687: step: 138/469, loss: 0.0013377888826653361 2023-01-22 14:52:26.393361: step: 140/469, loss: 0.9314048290252686 2023-01-22 14:52:27.031658: step: 142/469, loss: 0.0010301253059878945 2023-01-22 14:52:27.643273: step: 144/469, loss: 0.04562880098819733 2023-01-22 14:52:28.290404: step: 146/469, loss: 0.035739898681640625 2023-01-22 14:52:29.016928: step: 148/469, loss: 0.0039064460434019566 2023-01-22 14:52:29.716807: step: 150/469, loss: 0.012470165267586708 2023-01-22 14:52:30.322975: step: 152/469, loss: 0.005693660117685795 2023-01-22 14:52:30.893105: step: 154/469, loss: 0.00013863515050616115 2023-01-22 14:52:31.572205: step: 156/469, loss: 0.007109920494258404 2023-01-22 14:52:32.130907: step: 158/469, loss: 0.005907388404011726 2023-01-22 14:52:32.702954: step: 160/469, loss: 0.0011227307841181755 2023-01-22 14:52:33.364431: step: 162/469, loss: 0.0019335340475663543 2023-01-22 14:52:33.967515: step: 164/469, loss: 0.0009818810503929853 2023-01-22 14:52:34.613423: step: 166/469, loss: 0.0016895406879484653 2023-01-22 14:52:35.225462: step: 168/469, loss: 0.022339507937431335 2023-01-22 14:52:35.891392: step: 170/469, loss: 0.009019142016768456 2023-01-22 14:52:36.520614: step: 172/469, loss: 0.002969138091430068 2023-01-22 14:52:37.142540: step: 174/469, loss: 0.002767864614725113 2023-01-22 14:52:37.785452: step: 176/469, loss: 0.005112234968692064 2023-01-22 14:52:38.377849: step: 178/469, loss: 0.0011714731808751822 2023-01-22 14:52:38.929443: step: 180/469, loss: 0.21712468564510345 2023-01-22 14:52:39.557613: step: 182/469, loss: 0.0027430085465312004 2023-01-22 14:52:40.209752: step: 184/469, loss: 0.00548455910757184 2023-01-22 14:52:40.845633: step: 186/469, loss: 0.019656404852867126 2023-01-22 14:52:41.449032: step: 188/469, loss: 0.0007380904862657189 2023-01-22 14:52:42.053572: step: 190/469, loss: 0.00356236076913774 2023-01-22 14:52:42.675608: step: 192/469, loss: 0.008898664265871048 2023-01-22 14:52:43.320361: step: 194/469, loss: 0.010636312887072563 2023-01-22 14:52:44.036181: step: 196/469, loss: 0.001877523260191083 2023-01-22 14:52:44.658056: step: 198/469, loss: 0.017142169177532196 2023-01-22 14:52:45.233252: step: 200/469, loss: 0.0031895486172288656 2023-01-22 14:52:45.816992: step: 202/469, loss: 0.008416732773184776 2023-01-22 14:52:46.457634: step: 204/469, loss: 0.001715382793918252 2023-01-22 14:52:47.059690: step: 206/469, loss: 0.004539210349321365 2023-01-22 14:52:47.786250: step: 208/469, loss: 0.024577531963586807 2023-01-22 14:52:48.360743: step: 210/469, loss: 0.009009582921862602 2023-01-22 14:52:48.980443: step: 212/469, loss: 0.011423132382333279 2023-01-22 14:52:49.575775: step: 214/469, loss: 0.08204792439937592 2023-01-22 14:52:50.287671: step: 216/469, loss: 0.002255804371088743 2023-01-22 14:52:50.960820: step: 218/469, loss: 0.002182943746447563 2023-01-22 14:52:51.583261: step: 220/469, loss: 0.0008874643826857209 2023-01-22 14:52:52.152684: step: 222/469, loss: 0.00824873335659504 2023-01-22 14:52:52.733581: step: 224/469, loss: 0.000958276039455086 2023-01-22 14:52:53.327369: step: 226/469, loss: 0.0070266565307974815 2023-01-22 14:52:54.026778: step: 228/469, loss: 0.17838431894779205 2023-01-22 14:52:54.692231: step: 230/469, loss: 0.11022572219371796 2023-01-22 14:52:55.288978: step: 232/469, loss: 0.04407728835940361 2023-01-22 14:52:55.963798: step: 234/469, loss: 0.0008073513163253665 2023-01-22 14:52:56.662116: step: 236/469, loss: 0.01689794845879078 2023-01-22 14:52:57.253650: step: 238/469, loss: 0.004012030549347401 2023-01-22 14:52:57.867678: step: 240/469, loss: 0.014662004075944424 2023-01-22 14:52:58.499642: step: 242/469, loss: 0.0031540084164589643 2023-01-22 14:52:59.153665: step: 244/469, loss: 0.06471965461969376 2023-01-22 14:52:59.785473: step: 246/469, loss: 0.004902043845504522 2023-01-22 14:53:00.428544: step: 248/469, loss: 0.02574852854013443 2023-01-22 14:53:01.000392: step: 250/469, loss: 0.0011984813027083874 2023-01-22 14:53:01.619258: step: 252/469, loss: 0.0012043439783155918 2023-01-22 14:53:02.291042: step: 254/469, loss: 0.018357571214437485 2023-01-22 14:53:02.931413: step: 256/469, loss: 0.0015557422302663326 2023-01-22 14:53:03.611132: step: 258/469, loss: 0.049370232969522476 2023-01-22 14:53:04.202293: step: 260/469, loss: 0.005529611371457577 2023-01-22 14:53:04.778192: step: 262/469, loss: 0.0017110251355916262 2023-01-22 14:53:05.410639: step: 264/469, loss: 0.004286629147827625 2023-01-22 14:53:06.018279: step: 266/469, loss: 0.000177061214344576 2023-01-22 14:53:06.622819: step: 268/469, loss: 0.0700095072388649 2023-01-22 14:53:07.251264: step: 270/469, loss: 0.00356339942663908 2023-01-22 14:53:07.882109: step: 272/469, loss: 0.03956741467118263 2023-01-22 14:53:08.454003: step: 274/469, loss: 0.010773099958896637 2023-01-22 14:53:09.058677: step: 276/469, loss: 0.005284388083964586 2023-01-22 14:53:09.697902: step: 278/469, loss: 0.00043513590935617685 2023-01-22 14:53:10.507343: step: 280/469, loss: 0.01611909084022045 2023-01-22 14:53:11.048143: step: 282/469, loss: 0.0007956814370118082 2023-01-22 14:53:11.631422: step: 284/469, loss: 0.0034125293605029583 2023-01-22 14:53:12.295097: step: 286/469, loss: 0.000292459677439183 2023-01-22 14:53:12.954945: step: 288/469, loss: 0.004325747489929199 2023-01-22 14:53:13.646986: step: 290/469, loss: 0.08130623400211334 2023-01-22 14:53:14.233713: step: 292/469, loss: 8.02692084107548e-05 2023-01-22 14:53:14.835446: step: 294/469, loss: 0.0011682234471663833 2023-01-22 14:53:15.436031: step: 296/469, loss: 0.3660926818847656 2023-01-22 14:53:16.065533: step: 298/469, loss: 0.00048090022755786777 2023-01-22 14:53:16.651289: step: 300/469, loss: 0.08668415248394012 2023-01-22 14:53:17.280653: step: 302/469, loss: 0.08002693951129913 2023-01-22 14:53:17.934402: step: 304/469, loss: 0.0442228727042675 2023-01-22 14:53:18.538130: step: 306/469, loss: 0.0017019023653119802 2023-01-22 14:53:19.159038: step: 308/469, loss: 0.024139491841197014 2023-01-22 14:53:19.783907: step: 310/469, loss: 0.0019354281248524785 2023-01-22 14:53:20.424905: step: 312/469, loss: 0.006996998563408852 2023-01-22 14:53:21.039624: step: 314/469, loss: 0.00355371437035501 2023-01-22 14:53:21.716875: step: 316/469, loss: 0.01877163164317608 2023-01-22 14:53:22.364792: step: 318/469, loss: 0.0008296903106383979 2023-01-22 14:53:22.947565: step: 320/469, loss: 9.87905150395818e-05 2023-01-22 14:53:23.530719: step: 322/469, loss: 0.025181597098708153 2023-01-22 14:53:24.164833: step: 324/469, loss: 0.01067360583692789 2023-01-22 14:53:24.806243: step: 326/469, loss: 0.0005482613923959434 2023-01-22 14:53:25.370071: step: 328/469, loss: 0.005046092439442873 2023-01-22 14:53:25.983426: step: 330/469, loss: 0.02470463700592518 2023-01-22 14:53:26.667076: step: 332/469, loss: 0.009912683628499508 2023-01-22 14:53:27.311373: step: 334/469, loss: 0.005101479589939117 2023-01-22 14:53:27.901482: step: 336/469, loss: 0.0015234098536893725 2023-01-22 14:53:28.550525: step: 338/469, loss: 0.015306186862289906 2023-01-22 14:53:29.205933: step: 340/469, loss: 0.00023860990768298507 2023-01-22 14:53:29.786440: step: 342/469, loss: 0.0034003914333879948 2023-01-22 14:53:30.373894: step: 344/469, loss: 0.00551243033260107 2023-01-22 14:53:30.983406: step: 346/469, loss: 0.0004621801199391484 2023-01-22 14:53:31.622997: step: 348/469, loss: 0.0009943271288648248 2023-01-22 14:53:32.264230: step: 350/469, loss: 0.3635597229003906 2023-01-22 14:53:32.942858: step: 352/469, loss: 0.003187666879966855 2023-01-22 14:53:33.547775: step: 354/469, loss: 0.0033788057044148445 2023-01-22 14:53:34.238174: step: 356/469, loss: 0.006172339431941509 2023-01-22 14:53:34.831392: step: 358/469, loss: 0.01572144590318203 2023-01-22 14:53:35.382910: step: 360/469, loss: 0.005256704054772854 2023-01-22 14:53:35.994494: step: 362/469, loss: 0.012815205380320549 2023-01-22 14:53:36.663917: step: 364/469, loss: 0.0021572893019765615 2023-01-22 14:53:37.231494: step: 366/469, loss: 0.0014868644066154957 2023-01-22 14:53:37.916864: step: 368/469, loss: 0.014277968555688858 2023-01-22 14:53:38.530604: step: 370/469, loss: 0.00176100037060678 2023-01-22 14:53:39.130912: step: 372/469, loss: 0.00022385075862985104 2023-01-22 14:53:39.717706: step: 374/469, loss: 0.004147999454289675 2023-01-22 14:53:40.327765: step: 376/469, loss: 0.0004994391347281635 2023-01-22 14:53:41.067364: step: 378/469, loss: 0.006284559611231089 2023-01-22 14:53:41.723687: step: 380/469, loss: 0.0011346520623192191 2023-01-22 14:53:42.347685: step: 382/469, loss: 0.00012142006744397804 2023-01-22 14:53:42.950138: step: 384/469, loss: 0.0016428964445367455 2023-01-22 14:53:43.541144: step: 386/469, loss: 0.01398802176117897 2023-01-22 14:53:44.126455: step: 388/469, loss: 0.0004344326152931899 2023-01-22 14:53:44.755256: step: 390/469, loss: 0.0009233256569132209 2023-01-22 14:53:45.344699: step: 392/469, loss: 0.0014322545612230897 2023-01-22 14:53:45.871090: step: 394/469, loss: 5.282333222567104e-05 2023-01-22 14:53:46.586972: step: 396/469, loss: 0.0038123372942209244 2023-01-22 14:53:47.237340: step: 398/469, loss: 0.04511203616857529 2023-01-22 14:53:47.869333: step: 400/469, loss: 0.0031361912842839956 2023-01-22 14:53:48.448208: step: 402/469, loss: 0.0008506365120410919 2023-01-22 14:53:49.031491: step: 404/469, loss: 0.38688597083091736 2023-01-22 14:53:49.650497: step: 406/469, loss: 0.023370549082756042 2023-01-22 14:53:50.271431: step: 408/469, loss: 0.010454434901475906 2023-01-22 14:53:50.907038: step: 410/469, loss: 0.0018446693429723382 2023-01-22 14:53:51.489217: step: 412/469, loss: 0.0007001546327956021 2023-01-22 14:53:52.128699: step: 414/469, loss: 0.00025169330183416605 2023-01-22 14:53:52.829604: step: 416/469, loss: 0.0010023503564298153 2023-01-22 14:53:53.406252: step: 418/469, loss: 0.00013269903138279915 2023-01-22 14:53:54.001773: step: 420/469, loss: 0.013020424172282219 2023-01-22 14:53:54.610969: step: 422/469, loss: 0.019305003806948662 2023-01-22 14:53:55.306760: step: 424/469, loss: 0.07851661741733551 2023-01-22 14:53:55.931633: step: 426/469, loss: 0.020300403237342834 2023-01-22 14:53:56.537060: step: 428/469, loss: 0.03077412024140358 2023-01-22 14:53:57.177712: step: 430/469, loss: 0.010809487663209438 2023-01-22 14:53:57.786200: step: 432/469, loss: 0.026068199425935745 2023-01-22 14:53:58.376892: step: 434/469, loss: 0.0007637391099706292 2023-01-22 14:53:59.017821: step: 436/469, loss: 0.0024585032369941473 2023-01-22 14:53:59.570500: step: 438/469, loss: 0.0008235553978011012 2023-01-22 14:54:00.181052: step: 440/469, loss: 0.02056199684739113 2023-01-22 14:54:00.862592: step: 442/469, loss: 0.02932426705956459 2023-01-22 14:54:01.452787: step: 444/469, loss: 0.0018069082871079445 2023-01-22 14:54:02.015380: step: 446/469, loss: 0.007590556051582098 2023-01-22 14:54:02.626928: step: 448/469, loss: 0.007134605664759874 2023-01-22 14:54:03.380281: step: 450/469, loss: 0.018807299435138702 2023-01-22 14:54:03.973245: step: 452/469, loss: 0.020430870354175568 2023-01-22 14:54:04.646941: step: 454/469, loss: 0.004038804676383734 2023-01-22 14:54:05.274903: step: 456/469, loss: 0.0058199018239974976 2023-01-22 14:54:05.874570: step: 458/469, loss: 0.005377876106649637 2023-01-22 14:54:06.532342: step: 460/469, loss: 0.007059148512780666 2023-01-22 14:54:07.152137: step: 462/469, loss: 0.006550138350576162 2023-01-22 14:54:07.735462: step: 464/469, loss: 0.00034577693440951407 2023-01-22 14:54:08.357681: step: 466/469, loss: 0.0036921261344105005 2023-01-22 14:54:08.969158: step: 468/469, loss: 0.022593403235077858 2023-01-22 14:54:09.595455: step: 470/469, loss: 0.0671863704919815 2023-01-22 14:54:10.197294: step: 472/469, loss: 0.0019509991398081183 2023-01-22 14:54:10.867661: step: 474/469, loss: 0.001830831984989345 2023-01-22 14:54:11.484555: step: 476/469, loss: 0.0001018723487504758 2023-01-22 14:54:12.055319: step: 478/469, loss: 0.010821687057614326 2023-01-22 14:54:12.697638: step: 480/469, loss: 0.05585511028766632 2023-01-22 14:54:13.356063: step: 482/469, loss: 0.003776567755267024 2023-01-22 14:54:13.959805: step: 484/469, loss: 0.04578500613570213 2023-01-22 14:54:14.727352: step: 486/469, loss: 0.02981276996433735 2023-01-22 14:54:15.287505: step: 488/469, loss: 0.0014807283878326416 2023-01-22 14:54:15.939412: step: 490/469, loss: 0.0056166574358940125 2023-01-22 14:54:16.547331: step: 492/469, loss: 0.11408372223377228 2023-01-22 14:54:17.139880: step: 494/469, loss: 0.005550468806177378 2023-01-22 14:54:17.725601: step: 496/469, loss: 0.002481278032064438 2023-01-22 14:54:18.377796: step: 498/469, loss: 0.005701960064470768 2023-01-22 14:54:18.988102: step: 500/469, loss: 0.00721729127690196 2023-01-22 14:54:19.572429: step: 502/469, loss: 0.004936212673783302 2023-01-22 14:54:20.245463: step: 504/469, loss: 0.045986518263816833 2023-01-22 14:54:20.816230: step: 506/469, loss: 0.0019094212912023067 2023-01-22 14:54:21.432594: step: 508/469, loss: 0.049983784556388855 2023-01-22 14:54:21.989170: step: 510/469, loss: 0.018032558262348175 2023-01-22 14:54:22.602629: step: 512/469, loss: 1.561031058372464e-05 2023-01-22 14:54:23.342359: step: 514/469, loss: 0.07706205546855927 2023-01-22 14:54:24.032755: step: 516/469, loss: 0.007279384881258011 2023-01-22 14:54:24.653808: step: 518/469, loss: 0.0040094079449772835 2023-01-22 14:54:25.228517: step: 520/469, loss: 0.027229076251387596 2023-01-22 14:54:25.855450: step: 522/469, loss: 0.0019564018584787846 2023-01-22 14:54:26.568355: step: 524/469, loss: 0.0011849193833768368 2023-01-22 14:54:27.256797: step: 526/469, loss: 0.01664084754884243 2023-01-22 14:54:27.844881: step: 528/469, loss: 0.00618576118722558 2023-01-22 14:54:28.480907: step: 530/469, loss: 0.013897284865379333 2023-01-22 14:54:29.109683: step: 532/469, loss: 0.0008726078667677939 2023-01-22 14:54:29.744275: step: 534/469, loss: 0.0031327398028224707 2023-01-22 14:54:30.368669: step: 536/469, loss: 0.02323629893362522 2023-01-22 14:54:31.028769: step: 538/469, loss: 0.044767554849386215 2023-01-22 14:54:31.622021: step: 540/469, loss: 0.00033543503377586603 2023-01-22 14:54:32.240164: step: 542/469, loss: 0.0008319690823554993 2023-01-22 14:54:32.861491: step: 544/469, loss: 0.06147266924381256 2023-01-22 14:54:33.524460: step: 546/469, loss: 0.010405710898339748 2023-01-22 14:54:34.130069: step: 548/469, loss: 0.02598422020673752 2023-01-22 14:54:34.728701: step: 550/469, loss: 2.1834668586961925e-05 2023-01-22 14:54:35.320362: step: 552/469, loss: 0.0034023048356175423 2023-01-22 14:54:35.954533: step: 554/469, loss: 0.03261549025774002 2023-01-22 14:54:36.515692: step: 556/469, loss: 0.002278990810737014 2023-01-22 14:54:37.145014: step: 558/469, loss: 0.03563765808939934 2023-01-22 14:54:37.744964: step: 560/469, loss: 0.028120949864387512 2023-01-22 14:54:38.348141: step: 562/469, loss: 0.007851927541196346 2023-01-22 14:54:38.944022: step: 564/469, loss: 1.195573804579908e-05 2023-01-22 14:54:39.602743: step: 566/469, loss: 0.00018038078269455582 2023-01-22 14:54:40.204394: step: 568/469, loss: 0.03586490824818611 2023-01-22 14:54:40.748318: step: 570/469, loss: 0.009922190569341183 2023-01-22 14:54:41.340935: step: 572/469, loss: 0.08011331409215927 2023-01-22 14:54:42.036653: step: 574/469, loss: 0.023162007331848145 2023-01-22 14:54:42.725216: step: 576/469, loss: 0.0010554376058280468 2023-01-22 14:54:43.382867: step: 578/469, loss: 0.0033900949638336897 2023-01-22 14:54:44.012439: step: 580/469, loss: 0.036971576511859894 2023-01-22 14:54:44.617296: step: 582/469, loss: 0.025155462324619293 2023-01-22 14:54:45.224601: step: 584/469, loss: 9.353594577987678e-06 2023-01-22 14:54:45.784955: step: 586/469, loss: 0.001067722332663834 2023-01-22 14:54:46.440244: step: 588/469, loss: 0.0021462542936205864 2023-01-22 14:54:47.084327: step: 590/469, loss: 0.022367671132087708 2023-01-22 14:54:47.688067: step: 592/469, loss: 0.0014849365688860416 2023-01-22 14:54:48.244524: step: 594/469, loss: 3.283873593318276e-05 2023-01-22 14:54:48.866220: step: 596/469, loss: 0.0026768064126372337 2023-01-22 14:54:49.531997: step: 598/469, loss: 0.00786451157182455 2023-01-22 14:54:50.157575: step: 600/469, loss: 0.005423974711447954 2023-01-22 14:54:50.737493: step: 602/469, loss: 0.005975480657070875 2023-01-22 14:54:51.293953: step: 604/469, loss: 0.02671235427260399 2023-01-22 14:54:51.811882: step: 606/469, loss: 0.001727751805447042 2023-01-22 14:54:52.431068: step: 608/469, loss: 0.15199093520641327 2023-01-22 14:54:53.110764: step: 610/469, loss: 0.0003186521935276687 2023-01-22 14:54:53.772060: step: 612/469, loss: 0.009734037332236767 2023-01-22 14:54:54.350620: step: 614/469, loss: 0.001794076873920858 2023-01-22 14:54:55.041596: step: 616/469, loss: 0.00023365308879874647 2023-01-22 14:54:55.677460: step: 618/469, loss: 0.08687198162078857 2023-01-22 14:54:56.294096: step: 620/469, loss: 0.011066783219575882 2023-01-22 14:54:56.959165: step: 622/469, loss: 0.004672329872846603 2023-01-22 14:54:57.562284: step: 624/469, loss: 0.003951750695705414 2023-01-22 14:54:58.166549: step: 626/469, loss: 0.0007730473298579454 2023-01-22 14:54:58.778641: step: 628/469, loss: 0.00032559860846959054 2023-01-22 14:54:59.377399: step: 630/469, loss: 7.381993782473728e-05 2023-01-22 14:54:59.970613: step: 632/469, loss: 0.0011772344587370753 2023-01-22 14:55:00.563268: step: 634/469, loss: 0.03232504427433014 2023-01-22 14:55:01.220409: step: 636/469, loss: 0.0838412195444107 2023-01-22 14:55:01.796301: step: 638/469, loss: 0.4180150628089905 2023-01-22 14:55:02.406624: step: 640/469, loss: 0.0005095831584185362 2023-01-22 14:55:03.043079: step: 642/469, loss: 0.36279064416885376 2023-01-22 14:55:03.735677: step: 644/469, loss: 0.0027092904783785343 2023-01-22 14:55:04.339220: step: 646/469, loss: 0.005490084178745747 2023-01-22 14:55:04.976865: step: 648/469, loss: 0.0017521940171718597 2023-01-22 14:55:05.612753: step: 650/469, loss: 0.036064352840185165 2023-01-22 14:55:06.238797: step: 652/469, loss: 0.09220781177282333 2023-01-22 14:55:06.875496: step: 654/469, loss: 0.04325910657644272 2023-01-22 14:55:07.515557: step: 656/469, loss: 0.012073627673089504 2023-01-22 14:55:08.304646: step: 658/469, loss: 0.036337390542030334 2023-01-22 14:55:08.888028: step: 660/469, loss: 0.0017401073127985 2023-01-22 14:55:09.513148: step: 662/469, loss: 0.0011992332292720675 2023-01-22 14:55:10.115012: step: 664/469, loss: 0.02882547304034233 2023-01-22 14:55:10.734823: step: 666/469, loss: 0.026680851355195045 2023-01-22 14:55:11.391696: step: 668/469, loss: 0.004989529959857464 2023-01-22 14:55:11.996567: step: 670/469, loss: 0.01681157574057579 2023-01-22 14:55:12.576193: step: 672/469, loss: 6.639528146479279e-05 2023-01-22 14:55:13.151166: step: 674/469, loss: 0.008588061667978764 2023-01-22 14:55:13.796166: step: 676/469, loss: 6.242917152121663e-05 2023-01-22 14:55:14.431855: step: 678/469, loss: 6.337384547805414e-05 2023-01-22 14:55:15.087553: step: 680/469, loss: 0.0051832362078130245 2023-01-22 14:55:15.740828: step: 682/469, loss: 0.02174840122461319 2023-01-22 14:55:16.412856: step: 684/469, loss: 0.002919914200901985 2023-01-22 14:55:16.944206: step: 686/469, loss: 0.0022616484202444553 2023-01-22 14:55:17.520522: step: 688/469, loss: 0.007699252106249332 2023-01-22 14:55:18.074519: step: 690/469, loss: 0.0007571104797534645 2023-01-22 14:55:18.651407: step: 692/469, loss: 0.0009577810415066779 2023-01-22 14:55:19.260127: step: 694/469, loss: 0.0014563316944986582 2023-01-22 14:55:19.872572: step: 696/469, loss: 0.007509433664381504 2023-01-22 14:55:20.509860: step: 698/469, loss: 0.018059106543660164 2023-01-22 14:55:21.124371: step: 700/469, loss: 0.0019540274515748024 2023-01-22 14:55:21.731359: step: 702/469, loss: 0.18619872629642487 2023-01-22 14:55:22.357046: step: 704/469, loss: 0.0012100540334358811 2023-01-22 14:55:22.974043: step: 706/469, loss: 0.007862688042223454 2023-01-22 14:55:23.527496: step: 708/469, loss: 0.0008311189012601972 2023-01-22 14:55:24.111638: step: 710/469, loss: 0.063021220266819 2023-01-22 14:55:24.722288: step: 712/469, loss: 0.2638273239135742 2023-01-22 14:55:25.266184: step: 714/469, loss: 0.0015841509448364377 2023-01-22 14:55:25.895762: step: 716/469, loss: 0.00010814674897119403 2023-01-22 14:55:26.452964: step: 718/469, loss: 0.016875075176358223 2023-01-22 14:55:27.060403: step: 720/469, loss: 0.0034369411878287792 2023-01-22 14:55:27.600394: step: 722/469, loss: 0.0003486261412035674 2023-01-22 14:55:28.249279: step: 724/469, loss: 0.014958810061216354 2023-01-22 14:55:28.868606: step: 726/469, loss: 0.0028847274370491505 2023-01-22 14:55:29.475801: step: 728/469, loss: 0.010769457556307316 2023-01-22 14:55:30.066832: step: 730/469, loss: 0.19564558565616608 2023-01-22 14:55:30.762682: step: 732/469, loss: 0.0037767256144434214 2023-01-22 14:55:31.454945: step: 734/469, loss: 0.010133241303265095 2023-01-22 14:55:32.057028: step: 736/469, loss: 0.0005434389458969235 2023-01-22 14:55:32.699080: step: 738/469, loss: 0.000599593564402312 2023-01-22 14:55:33.323996: step: 740/469, loss: 0.00011581474245758727 2023-01-22 14:55:33.888824: step: 742/469, loss: 0.054639704525470734 2023-01-22 14:55:34.531563: step: 744/469, loss: 0.0030861583072692156 2023-01-22 14:55:35.158769: step: 746/469, loss: 0.0017878111684694886 2023-01-22 14:55:35.805064: step: 748/469, loss: 0.0074324156157672405 2023-01-22 14:55:36.449364: step: 750/469, loss: 0.12366275489330292 2023-01-22 14:55:37.069115: step: 752/469, loss: 0.021732477471232414 2023-01-22 14:55:37.705635: step: 754/469, loss: 0.0007377371075563133 2023-01-22 14:55:38.309855: step: 756/469, loss: 0.0034300272818654776 2023-01-22 14:55:38.927829: step: 758/469, loss: 0.004186233971267939 2023-01-22 14:55:39.586161: step: 760/469, loss: 0.06287708878517151 2023-01-22 14:55:40.234504: step: 762/469, loss: 0.007598173804581165 2023-01-22 14:55:40.856169: step: 764/469, loss: 0.15563441812992096 2023-01-22 14:55:41.473392: step: 766/469, loss: 0.11742037534713745 2023-01-22 14:55:42.207290: step: 768/469, loss: 0.022891785949468613 2023-01-22 14:55:42.789344: step: 770/469, loss: 0.00043143340735696256 2023-01-22 14:55:43.613525: step: 772/469, loss: 0.0017112481873482466 2023-01-22 14:55:44.223473: step: 774/469, loss: 0.00031356822000816464 2023-01-22 14:55:44.786256: step: 776/469, loss: 0.005490230396389961 2023-01-22 14:55:45.397813: step: 778/469, loss: 0.01457175798714161 2023-01-22 14:55:46.060922: step: 780/469, loss: 0.025646094232797623 2023-01-22 14:55:46.677724: step: 782/469, loss: 0.007112429942935705 2023-01-22 14:55:47.293949: step: 784/469, loss: 0.01074180193245411 2023-01-22 14:55:47.885357: step: 786/469, loss: 0.0004196114023216069 2023-01-22 14:55:48.495361: step: 788/469, loss: 0.00041283125756308436 2023-01-22 14:55:49.097115: step: 790/469, loss: 0.008841020986437798 2023-01-22 14:55:49.735892: step: 792/469, loss: 0.008139640092849731 2023-01-22 14:55:50.306405: step: 794/469, loss: 0.03408011049032211 2023-01-22 14:55:50.879249: step: 796/469, loss: 0.02207968197762966 2023-01-22 14:55:51.524263: step: 798/469, loss: 0.005294452887028456 2023-01-22 14:55:52.160941: step: 800/469, loss: 0.047961167991161346 2023-01-22 14:55:52.835483: step: 802/469, loss: 0.0017440533265471458 2023-01-22 14:55:53.480270: step: 804/469, loss: 0.013308239169418812 2023-01-22 14:55:54.085168: step: 806/469, loss: 0.005262637510895729 2023-01-22 14:55:54.713608: step: 808/469, loss: 0.001996168866753578 2023-01-22 14:55:55.411992: step: 810/469, loss: 0.027486082166433334 2023-01-22 14:55:56.017382: step: 812/469, loss: 0.0007017867756076157 2023-01-22 14:55:56.633144: step: 814/469, loss: 0.0018342012772336602 2023-01-22 14:55:57.231723: step: 816/469, loss: 9.853844676399603e-05 2023-01-22 14:55:57.796576: step: 818/469, loss: 0.0020896398928016424 2023-01-22 14:55:58.358425: step: 820/469, loss: 0.0050257109105587006 2023-01-22 14:55:59.024962: step: 822/469, loss: 0.030479149892926216 2023-01-22 14:55:59.707849: step: 824/469, loss: 0.0023915020283311605 2023-01-22 14:56:00.342303: step: 826/469, loss: 0.005647624377161264 2023-01-22 14:56:00.977696: step: 828/469, loss: 0.0007674493826925755 2023-01-22 14:56:01.537266: step: 830/469, loss: 0.004160327836871147 2023-01-22 14:56:02.181966: step: 832/469, loss: 0.021543780341744423 2023-01-22 14:56:02.785426: step: 834/469, loss: 0.019983643665909767 2023-01-22 14:56:03.391343: step: 836/469, loss: 0.0033185044303536415 2023-01-22 14:56:04.071942: step: 838/469, loss: 0.00011398903734516352 2023-01-22 14:56:04.686625: step: 840/469, loss: 0.0050304424948990345 2023-01-22 14:56:05.378619: step: 842/469, loss: 0.07564875483512878 2023-01-22 14:56:06.036672: step: 844/469, loss: 0.00016987166600301862 2023-01-22 14:56:06.678786: step: 846/469, loss: 0.022639740258455276 2023-01-22 14:56:07.354494: step: 848/469, loss: 0.010631454177200794 2023-01-22 14:56:07.963147: step: 850/469, loss: 0.0017994737718254328 2023-01-22 14:56:08.641853: step: 852/469, loss: 0.002849370939657092 2023-01-22 14:56:09.258701: step: 854/469, loss: 0.01692868396639824 2023-01-22 14:56:09.847696: step: 856/469, loss: 0.028368575498461723 2023-01-22 14:56:10.456088: step: 858/469, loss: 0.02622413821518421 2023-01-22 14:56:11.060429: step: 860/469, loss: 0.08661984652280807 2023-01-22 14:56:11.720576: step: 862/469, loss: 0.0007552942261099815 2023-01-22 14:56:12.392799: step: 864/469, loss: 0.1911463439464569 2023-01-22 14:56:13.012685: step: 866/469, loss: 0.002511322032660246 2023-01-22 14:56:13.599795: step: 868/469, loss: 0.0032366435043513775 2023-01-22 14:56:14.344501: step: 870/469, loss: 0.0002490816987119615 2023-01-22 14:56:14.925895: step: 872/469, loss: 0.0011198052670806646 2023-01-22 14:56:15.482648: step: 874/469, loss: 0.001496673095971346 2023-01-22 14:56:16.094304: step: 876/469, loss: 0.005319307558238506 2023-01-22 14:56:16.733108: step: 878/469, loss: 0.015571960248053074 2023-01-22 14:56:17.294673: step: 880/469, loss: 6.340535037452355e-05 2023-01-22 14:56:17.890130: step: 882/469, loss: 0.0018311298917979002 2023-01-22 14:56:18.576748: step: 884/469, loss: 7.11813336238265e-05 2023-01-22 14:56:19.218544: step: 886/469, loss: 0.009444230236113071 2023-01-22 14:56:19.794708: step: 888/469, loss: 0.01700914278626442 2023-01-22 14:56:20.355631: step: 890/469, loss: 0.006454721093177795 2023-01-22 14:56:20.940593: step: 892/469, loss: 0.056100994348526 2023-01-22 14:56:21.539014: step: 894/469, loss: 0.8182767629623413 2023-01-22 14:56:22.171547: step: 896/469, loss: 0.018168237060308456 2023-01-22 14:56:22.858300: step: 898/469, loss: 0.04914424940943718 2023-01-22 14:56:23.572841: step: 900/469, loss: 0.001316792331635952 2023-01-22 14:56:24.244746: step: 902/469, loss: 0.008519035764038563 2023-01-22 14:56:24.821234: step: 904/469, loss: 0.003834921633824706 2023-01-22 14:56:25.450611: step: 906/469, loss: 0.015359274111688137 2023-01-22 14:56:26.106328: step: 908/469, loss: 0.018616918474435806 2023-01-22 14:56:26.762306: step: 910/469, loss: 0.000498221896123141 2023-01-22 14:56:27.412690: step: 912/469, loss: 0.03173353523015976 2023-01-22 14:56:28.138522: step: 914/469, loss: 0.023717835545539856 2023-01-22 14:56:28.821005: step: 916/469, loss: 0.01838405802845955 2023-01-22 14:56:29.388745: step: 918/469, loss: 0.07607328146696091 2023-01-22 14:56:30.018081: step: 920/469, loss: 0.015997862443327904 2023-01-22 14:56:30.611052: step: 922/469, loss: 0.005880296695977449 2023-01-22 14:56:31.345048: step: 924/469, loss: 0.0033639695029705763 2023-01-22 14:56:31.968526: step: 926/469, loss: 0.0015219607157632709 2023-01-22 14:56:32.610008: step: 928/469, loss: 0.0019990154542028904 2023-01-22 14:56:33.242148: step: 930/469, loss: 0.01711413264274597 2023-01-22 14:56:33.819973: step: 932/469, loss: 0.006104788277298212 2023-01-22 14:56:34.479043: step: 934/469, loss: 0.004206092096865177 2023-01-22 14:56:35.070313: step: 936/469, loss: 0.0012522918405011296 2023-01-22 14:56:35.647464: step: 938/469, loss: 0.003964857663959265 ================================================== Loss: 0.026 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2993057320793086, 'r': 0.31918372187584715, 'f1': 0.30892529187983736}, 'combined': 0.2276291624377749, 'epoch': 36} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3054674364880787, 'r': 0.27416610722306056, 'f1': 0.28897160578091147}, 'combined': 0.15762087588049714, 'epoch': 36} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.292573236632537, 'r': 0.3253281151170146, 'f1': 0.3080825097334532}, 'combined': 0.22700816506675497, 'epoch': 36} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3140060492491048, 'r': 0.27924417188483974, 'f1': 0.2956066633124744}, 'combined': 0.16123999817044057, 'epoch': 36} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.286747656379625, 'r': 0.3123209767778079, 'f1': 0.2989884736819342}, 'combined': 0.22030729639721466, 'epoch': 36} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3033002363899542, 'r': 0.2752734075927123, 'f1': 0.2886079947231026}, 'combined': 0.15742254257623778, 'epoch': 36} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2353395061728395, 'r': 0.3630952380952381, 'f1': 0.2855805243445693}, 'combined': 0.19038701622971288, 'epoch': 36} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.27586206896551724, 'r': 0.34782608695652173, 'f1': 0.3076923076923077}, 'combined': 0.15384615384615385, 'epoch': 36} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45588235294117646, 'r': 0.2672413793103448, 'f1': 0.33695652173913043}, 'combined': 0.2246376811594203, 'epoch': 36} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 37 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:59:24.091878: step: 2/469, loss: 0.02334951050579548 2023-01-22 14:59:24.675366: step: 4/469, loss: 0.013603771105408669 2023-01-22 14:59:25.247528: step: 6/469, loss: 0.05895049870014191 2023-01-22 14:59:25.898427: step: 8/469, loss: 0.009032522328197956 2023-01-22 14:59:26.536415: step: 10/469, loss: 0.002515010302886367 2023-01-22 14:59:27.217659: step: 12/469, loss: 0.0003157924802508205 2023-01-22 14:59:27.863175: step: 14/469, loss: 0.00029328028904274106 2023-01-22 14:59:28.421790: step: 16/469, loss: 3.104509960394353e-05 2023-01-22 14:59:29.040354: step: 18/469, loss: 0.0002855995553545654 2023-01-22 14:59:29.643999: step: 20/469, loss: 0.001551100634969771 2023-01-22 14:59:30.328037: step: 22/469, loss: 0.006941393483430147 2023-01-22 14:59:30.975920: step: 24/469, loss: 0.006952290423214436 2023-01-22 14:59:31.564598: step: 26/469, loss: 7.099486538209021e-05 2023-01-22 14:59:32.192624: step: 28/469, loss: 0.007216090802103281 2023-01-22 14:59:32.776040: step: 30/469, loss: 0.003178994171321392 2023-01-22 14:59:33.400718: step: 32/469, loss: 0.0009747220319695771 2023-01-22 14:59:33.981851: step: 34/469, loss: 0.0005233795964159071 2023-01-22 14:59:34.655295: step: 36/469, loss: 0.008877452462911606 2023-01-22 14:59:35.252389: step: 38/469, loss: 0.019093405455350876 2023-01-22 14:59:35.801848: step: 40/469, loss: 5.362175215850584e-05 2023-01-22 14:59:36.459810: step: 42/469, loss: 0.0002662398910615593 2023-01-22 14:59:37.066731: step: 44/469, loss: 0.0019493541913107038 2023-01-22 14:59:37.687256: step: 46/469, loss: 0.015421044081449509 2023-01-22 14:59:38.316219: step: 48/469, loss: 0.0008357454207725823 2023-01-22 14:59:38.940819: step: 50/469, loss: 0.008192282170057297 2023-01-22 14:59:39.620441: step: 52/469, loss: 0.00021694099996238947 2023-01-22 14:59:40.242733: step: 54/469, loss: 0.0005123814335092902 2023-01-22 14:59:40.880393: step: 56/469, loss: 0.005808388814330101 2023-01-22 14:59:41.531222: step: 58/469, loss: 0.005942604038864374 2023-01-22 14:59:42.115757: step: 60/469, loss: 0.00048650658573023975 2023-01-22 14:59:42.713863: step: 62/469, loss: 0.028779789805412292 2023-01-22 14:59:43.446789: step: 64/469, loss: 0.001439630868844688 2023-01-22 14:59:44.115847: step: 66/469, loss: 0.00027584107010625303 2023-01-22 14:59:44.824630: step: 68/469, loss: 0.0037879787851125 2023-01-22 14:59:45.491332: step: 70/469, loss: 0.018322013318538666 2023-01-22 14:59:46.148664: step: 72/469, loss: 0.0068619451485574245 2023-01-22 14:59:46.824815: step: 74/469, loss: 0.00015405187150463462 2023-01-22 14:59:47.428986: step: 76/469, loss: 0.0002149700012523681 2023-01-22 14:59:47.953471: step: 78/469, loss: 0.0016966273542493582 2023-01-22 14:59:48.644967: step: 80/469, loss: 0.011040701530873775 2023-01-22 14:59:49.361189: step: 82/469, loss: 0.004825273063033819 2023-01-22 14:59:50.065976: step: 84/469, loss: 0.000932644703425467 2023-01-22 14:59:50.726247: step: 86/469, loss: 0.015768012031912804 2023-01-22 14:59:51.357270: step: 88/469, loss: 0.0015958852600306273 2023-01-22 14:59:52.114617: step: 90/469, loss: 0.00022211599571164697 2023-01-22 14:59:52.671334: step: 92/469, loss: 0.003766988404095173 2023-01-22 14:59:53.338070: step: 94/469, loss: 0.00632568821310997 2023-01-22 14:59:53.978559: step: 96/469, loss: 3.9818834920879453e-05 2023-01-22 14:59:54.646874: step: 98/469, loss: 0.00012792499910574406 2023-01-22 14:59:55.276366: step: 100/469, loss: 0.0056974878534674644 2023-01-22 14:59:55.872438: step: 102/469, loss: 0.0001498083584010601 2023-01-22 14:59:56.516993: step: 104/469, loss: 0.004224066622555256 2023-01-22 14:59:57.155596: step: 106/469, loss: 0.001001994707621634 2023-01-22 14:59:57.770399: step: 108/469, loss: 0.0001033278604154475 2023-01-22 14:59:58.396902: step: 110/469, loss: 0.008249464444816113 2023-01-22 14:59:58.989343: step: 112/469, loss: 2.1856059902347624e-05 2023-01-22 14:59:59.637201: step: 114/469, loss: 2.4825678337947465e-05 2023-01-22 15:00:00.241121: step: 116/469, loss: 0.0014028786681592464 2023-01-22 15:00:00.923131: step: 118/469, loss: 6.449964712373912e-05 2023-01-22 15:00:01.525395: step: 120/469, loss: 0.021299973130226135 2023-01-22 15:00:02.115059: step: 122/469, loss: 0.01827310584485531 2023-01-22 15:00:02.828644: step: 124/469, loss: 0.01773155853152275 2023-01-22 15:00:03.450899: step: 126/469, loss: 0.0010096313199028373 2023-01-22 15:00:04.080732: step: 128/469, loss: 0.0034883213229477406 2023-01-22 15:00:04.752816: step: 130/469, loss: 0.031068120151758194 2023-01-22 15:00:05.314502: step: 132/469, loss: 0.17981663346290588 2023-01-22 15:00:05.944452: step: 134/469, loss: 0.011230730451643467 2023-01-22 15:00:06.650226: step: 136/469, loss: 0.11118075251579285 2023-01-22 15:00:07.336872: step: 138/469, loss: 0.00017911367467604578 2023-01-22 15:00:07.994961: step: 140/469, loss: 0.07104042172431946 2023-01-22 15:00:08.641003: step: 142/469, loss: 0.012461545877158642 2023-01-22 15:00:09.246007: step: 144/469, loss: 0.00015260098734870553 2023-01-22 15:00:09.877905: step: 146/469, loss: 0.003665447235107422 2023-01-22 15:00:10.587893: step: 148/469, loss: 0.007120559923350811 2023-01-22 15:00:11.224778: step: 150/469, loss: 0.0008562491275370121 2023-01-22 15:00:11.853845: step: 152/469, loss: 0.03952468931674957 2023-01-22 15:00:12.458560: step: 154/469, loss: 0.00041526317363604903 2023-01-22 15:00:13.130951: step: 156/469, loss: 0.00015986785001587123 2023-01-22 15:00:13.703394: step: 158/469, loss: 0.00011045135033782572 2023-01-22 15:00:14.279395: step: 160/469, loss: 0.0034824644681066275 2023-01-22 15:00:14.872194: step: 162/469, loss: 0.0001812041155062616 2023-01-22 15:00:15.492295: step: 164/469, loss: 0.005615279544144869 2023-01-22 15:00:16.121591: step: 166/469, loss: 0.02610810473561287 2023-01-22 15:00:16.736265: step: 168/469, loss: 0.035773828625679016 2023-01-22 15:00:17.338951: step: 170/469, loss: 0.3308396339416504 2023-01-22 15:00:17.947789: step: 172/469, loss: 7.553344767075032e-05 2023-01-22 15:00:18.575458: step: 174/469, loss: 0.09016861766576767 2023-01-22 15:00:19.173921: step: 176/469, loss: 0.0012278578942641616 2023-01-22 15:00:19.806340: step: 178/469, loss: 0.0007366069476120174 2023-01-22 15:00:20.418615: step: 180/469, loss: 0.00018095181440003216 2023-01-22 15:00:21.022761: step: 182/469, loss: 8.926808732212521e-06 2023-01-22 15:00:21.700132: step: 184/469, loss: 0.0029862928204238415 2023-01-22 15:00:22.294046: step: 186/469, loss: 0.1733856350183487 2023-01-22 15:00:22.906318: step: 188/469, loss: 0.020981404930353165 2023-01-22 15:00:23.550164: step: 190/469, loss: 0.00013106894039083272 2023-01-22 15:00:24.223269: step: 192/469, loss: 0.015181186608970165 2023-01-22 15:00:24.766152: step: 194/469, loss: 3.205590837751515e-05 2023-01-22 15:00:25.443452: step: 196/469, loss: 0.0026021336670964956 2023-01-22 15:00:26.015808: step: 198/469, loss: 0.00040086277294903994 2023-01-22 15:00:26.639188: step: 200/469, loss: 0.0012865004828199744 2023-01-22 15:00:27.236712: step: 202/469, loss: 0.0005586662446148694 2023-01-22 15:00:27.863868: step: 204/469, loss: 0.012114351615309715 2023-01-22 15:00:28.459128: step: 206/469, loss: 0.050020214170217514 2023-01-22 15:00:29.035903: step: 208/469, loss: 0.01910516247153282 2023-01-22 15:00:29.623274: step: 210/469, loss: 0.00251079467125237 2023-01-22 15:00:30.213773: step: 212/469, loss: 0.011612393893301487 2023-01-22 15:00:30.811217: step: 214/469, loss: 0.004886446986347437 2023-01-22 15:00:31.461698: step: 216/469, loss: 0.005088290199637413 2023-01-22 15:00:32.064201: step: 218/469, loss: 0.017661696299910545 2023-01-22 15:00:32.727291: step: 220/469, loss: 0.0006772956112399697 2023-01-22 15:00:33.461402: step: 222/469, loss: 0.00578831322491169 2023-01-22 15:00:34.263345: step: 224/469, loss: 0.0015098800649866462 2023-01-22 15:00:34.867194: step: 226/469, loss: 0.0048980871215462685 2023-01-22 15:00:35.546155: step: 228/469, loss: 0.05263599008321762 2023-01-22 15:00:36.149834: step: 230/469, loss: 0.0008351249853149056 2023-01-22 15:00:36.799669: step: 232/469, loss: 0.0065918113104999065 2023-01-22 15:00:37.441620: step: 234/469, loss: 0.00393585255369544 2023-01-22 15:00:38.075662: step: 236/469, loss: 0.004989714361727238 2023-01-22 15:00:38.652213: step: 238/469, loss: 0.00044002989307045937 2023-01-22 15:00:39.251439: step: 240/469, loss: 0.0006200528587214649 2023-01-22 15:00:39.938662: step: 242/469, loss: 0.004732328932732344 2023-01-22 15:00:40.556961: step: 244/469, loss: 0.06446399539709091 2023-01-22 15:00:41.199802: step: 246/469, loss: 0.018771782517433167 2023-01-22 15:00:41.801222: step: 248/469, loss: 0.0024162803310900927 2023-01-22 15:00:42.469135: step: 250/469, loss: 0.0005391480517573655 2023-01-22 15:00:43.100181: step: 252/469, loss: 0.0045122853480279446 2023-01-22 15:00:43.851644: step: 254/469, loss: 0.00261093582957983 2023-01-22 15:00:44.445624: step: 256/469, loss: 0.000743781216442585 2023-01-22 15:00:45.127693: step: 258/469, loss: 0.00010305748583050445 2023-01-22 15:00:45.819420: step: 260/469, loss: 5.486128884513164e-07 2023-01-22 15:00:46.453242: step: 262/469, loss: 0.014851206913590431 2023-01-22 15:00:47.031961: step: 264/469, loss: 0.007517075631767511 2023-01-22 15:00:47.633475: step: 266/469, loss: 0.0023233280517160892 2023-01-22 15:00:48.276972: step: 268/469, loss: 0.009786678478121758 2023-01-22 15:00:48.882734: step: 270/469, loss: 0.0003873548412229866 2023-01-22 15:00:49.521379: step: 272/469, loss: 0.006946471985429525 2023-01-22 15:00:50.123567: step: 274/469, loss: 0.003758813254535198 2023-01-22 15:00:50.828090: step: 276/469, loss: 0.003824097802862525 2023-01-22 15:00:51.424532: step: 278/469, loss: 0.0009378312970511615 2023-01-22 15:00:51.966891: step: 280/469, loss: 0.0016444976208731532 2023-01-22 15:00:52.713515: step: 282/469, loss: 6.573781865881756e-05 2023-01-22 15:00:53.399779: step: 284/469, loss: 0.012586846947669983 2023-01-22 15:00:54.043134: step: 286/469, loss: 0.0011832331074401736 2023-01-22 15:00:54.621414: step: 288/469, loss: 0.017640678212046623 2023-01-22 15:00:55.237486: step: 290/469, loss: 0.0009549250244162977 2023-01-22 15:00:55.892300: step: 292/469, loss: 0.05993243679404259 2023-01-22 15:00:56.505751: step: 294/469, loss: 0.00037836923729628325 2023-01-22 15:00:57.153506: step: 296/469, loss: 0.000849375850521028 2023-01-22 15:00:57.863470: step: 298/469, loss: 0.5837039351463318 2023-01-22 15:00:58.439485: step: 300/469, loss: 0.00045387932914309204 2023-01-22 15:00:59.025529: step: 302/469, loss: 1.7525017028674483e-05 2023-01-22 15:00:59.656604: step: 304/469, loss: 0.010772112756967545 2023-01-22 15:01:00.277867: step: 306/469, loss: 0.0204091165214777 2023-01-22 15:01:00.910452: step: 308/469, loss: 0.0002220183814642951 2023-01-22 15:01:01.480659: step: 310/469, loss: 0.0054563977755606174 2023-01-22 15:01:02.135280: step: 312/469, loss: 0.005815763492137194 2023-01-22 15:01:02.751583: step: 314/469, loss: 0.0002758143236860633 2023-01-22 15:01:03.396980: step: 316/469, loss: 0.07313418388366699 2023-01-22 15:01:04.026251: step: 318/469, loss: 0.007582417689263821 2023-01-22 15:01:04.691519: step: 320/469, loss: 0.03991956263780594 2023-01-22 15:01:05.317373: step: 322/469, loss: 0.03166396915912628 2023-01-22 15:01:05.912751: step: 324/469, loss: 0.011305739171802998 2023-01-22 15:01:06.513882: step: 326/469, loss: 0.01131292525678873 2023-01-22 15:01:07.142944: step: 328/469, loss: 0.001254220143891871 2023-01-22 15:01:07.798003: step: 330/469, loss: 0.00580606609582901 2023-01-22 15:01:08.474804: step: 332/469, loss: 0.0020498523954302073 2023-01-22 15:01:09.159424: step: 334/469, loss: 7.834314601495862e-05 2023-01-22 15:01:09.716986: step: 336/469, loss: 0.008076257072389126 2023-01-22 15:01:10.266706: step: 338/469, loss: 0.0029463532846421003 2023-01-22 15:01:10.901408: step: 340/469, loss: 0.02310442179441452 2023-01-22 15:01:11.586849: step: 342/469, loss: 0.16581524908542633 2023-01-22 15:01:12.211517: step: 344/469, loss: 0.003446395741775632 2023-01-22 15:01:12.789951: step: 346/469, loss: 9.655807389208348e-07 2023-01-22 15:01:13.577600: step: 348/469, loss: 0.02838398888707161 2023-01-22 15:01:14.209417: step: 350/469, loss: 0.013428416103124619 2023-01-22 15:01:14.870267: step: 352/469, loss: 0.0110907768830657 2023-01-22 15:01:15.480621: step: 354/469, loss: 0.0035205520689487457 2023-01-22 15:01:16.076787: step: 356/469, loss: 0.011168516241014004 2023-01-22 15:01:16.688609: step: 358/469, loss: 0.02038724534213543 2023-01-22 15:01:17.297614: step: 360/469, loss: 1.6953126760199666e-05 2023-01-22 15:01:17.880518: step: 362/469, loss: 0.0064309025183320045 2023-01-22 15:01:18.479296: step: 364/469, loss: 0.00035900474176742136 2023-01-22 15:01:19.089550: step: 366/469, loss: 0.0017374753952026367 2023-01-22 15:01:19.722072: step: 368/469, loss: 0.035388797521591187 2023-01-22 15:01:20.405804: step: 370/469, loss: 0.0004407311789691448 2023-01-22 15:01:21.028877: step: 372/469, loss: 0.000823917449451983 2023-01-22 15:01:21.754215: step: 374/469, loss: 0.00952114351093769 2023-01-22 15:01:22.372575: step: 376/469, loss: 0.00024248902627732605 2023-01-22 15:01:22.986242: step: 378/469, loss: 0.07362029701471329 2023-01-22 15:01:23.608619: step: 380/469, loss: 0.5559163093566895 2023-01-22 15:01:24.241814: step: 382/469, loss: 0.0021526487544178963 2023-01-22 15:01:24.792890: step: 384/469, loss: 0.0008400852675549686 2023-01-22 15:01:25.378659: step: 386/469, loss: 0.0013083710800856352 2023-01-22 15:01:26.003056: step: 388/469, loss: 4.7370594984386116e-05 2023-01-22 15:01:26.651132: step: 390/469, loss: 9.946803402272053e-06 2023-01-22 15:01:27.264169: step: 392/469, loss: 0.025393834337592125 2023-01-22 15:01:27.873967: step: 394/469, loss: 0.0015814976068213582 2023-01-22 15:01:28.412260: step: 396/469, loss: 0.017286112532019615 2023-01-22 15:01:29.067270: step: 398/469, loss: 0.4820334017276764 2023-01-22 15:01:29.666651: step: 400/469, loss: 0.0005268231616355479 2023-01-22 15:01:30.238762: step: 402/469, loss: 0.000354180607246235 2023-01-22 15:01:30.858743: step: 404/469, loss: 0.0019696198869496584 2023-01-22 15:01:31.455460: step: 406/469, loss: 0.014124004170298576 2023-01-22 15:01:32.053767: step: 408/469, loss: 0.0011038167867809534 2023-01-22 15:01:32.674022: step: 410/469, loss: 0.03709586709737778 2023-01-22 15:01:33.311032: step: 412/469, loss: 0.007256742566823959 2023-01-22 15:01:33.937184: step: 414/469, loss: 0.0031886377837508917 2023-01-22 15:01:34.652190: step: 416/469, loss: 0.0014762443024665117 2023-01-22 15:01:35.325910: step: 418/469, loss: 0.017940755933523178 2023-01-22 15:01:35.890669: step: 420/469, loss: 0.005015256814658642 2023-01-22 15:01:36.560476: step: 422/469, loss: 0.002521911868825555 2023-01-22 15:01:37.143038: step: 424/469, loss: 0.0002531056525185704 2023-01-22 15:01:37.785517: step: 426/469, loss: 0.00305581814609468 2023-01-22 15:01:38.415238: step: 428/469, loss: 0.01845567487180233 2023-01-22 15:01:39.064232: step: 430/469, loss: 0.010437524877488613 2023-01-22 15:01:39.767788: step: 432/469, loss: 0.01325505506247282 2023-01-22 15:01:40.392256: step: 434/469, loss: 0.002705496968701482 2023-01-22 15:01:40.990186: step: 436/469, loss: 0.027814706787467003 2023-01-22 15:01:41.552115: step: 438/469, loss: 0.0008967772591859102 2023-01-22 15:01:42.179146: step: 440/469, loss: 0.036574140191078186 2023-01-22 15:01:42.779669: step: 442/469, loss: 0.0014592144871130586 2023-01-22 15:01:43.446538: step: 444/469, loss: 0.006886797957122326 2023-01-22 15:01:44.076715: step: 446/469, loss: 0.004467473365366459 2023-01-22 15:01:44.702134: step: 448/469, loss: 0.0009431479265913367 2023-01-22 15:01:45.345245: step: 450/469, loss: 0.0025406964123249054 2023-01-22 15:01:45.966046: step: 452/469, loss: 0.018055371940135956 2023-01-22 15:01:46.569992: step: 454/469, loss: 0.010064304806292057 2023-01-22 15:01:47.148277: step: 456/469, loss: 0.0036551090888679028 2023-01-22 15:01:47.826442: step: 458/469, loss: 0.004080642480403185 2023-01-22 15:01:48.425317: step: 460/469, loss: 0.019044430926442146 2023-01-22 15:01:49.014901: step: 462/469, loss: 0.0016572517342865467 2023-01-22 15:01:49.633531: step: 464/469, loss: 0.0060867383144795895 2023-01-22 15:01:50.221952: step: 466/469, loss: 0.000536244479008019 2023-01-22 15:01:50.882471: step: 468/469, loss: 0.012677637860178947 2023-01-22 15:01:51.455339: step: 470/469, loss: 1.4633342289016582e-05 2023-01-22 15:01:52.089597: step: 472/469, loss: 0.0130271315574646 2023-01-22 15:01:52.685934: step: 474/469, loss: 0.01142079383134842 2023-01-22 15:01:53.279571: step: 476/469, loss: 0.0062408470548689365 2023-01-22 15:01:53.883177: step: 478/469, loss: 0.06313908100128174 2023-01-22 15:01:54.466314: step: 480/469, loss: 0.001148531213402748 2023-01-22 15:01:55.116497: step: 482/469, loss: 0.006836864165961742 2023-01-22 15:01:55.712731: step: 484/469, loss: 0.014268173836171627 2023-01-22 15:01:56.374809: step: 486/469, loss: 0.0007623200654052198 2023-01-22 15:01:56.982655: step: 488/469, loss: 0.43366900086402893 2023-01-22 15:01:57.625702: step: 490/469, loss: 0.016756707802414894 2023-01-22 15:01:58.296285: step: 492/469, loss: 0.0017298609018325806 2023-01-22 15:01:58.900400: step: 494/469, loss: 0.007773790508508682 2023-01-22 15:01:59.571558: step: 496/469, loss: 0.14746889472007751 2023-01-22 15:02:00.164182: step: 498/469, loss: 0.00592234218493104 2023-01-22 15:02:00.746940: step: 500/469, loss: 0.008632895536720753 2023-01-22 15:02:01.338651: step: 502/469, loss: 0.020562294870615005 2023-01-22 15:02:02.027857: step: 504/469, loss: 0.00138269760645926 2023-01-22 15:02:02.606776: step: 506/469, loss: 0.039450839161872864 2023-01-22 15:02:03.230074: step: 508/469, loss: 0.12444482743740082 2023-01-22 15:02:03.811736: step: 510/469, loss: 0.003615305759012699 2023-01-22 15:02:04.409602: step: 512/469, loss: 0.010039028711616993 2023-01-22 15:02:04.941407: step: 514/469, loss: 0.0007576293428428471 2023-01-22 15:02:05.478712: step: 516/469, loss: 6.50722358841449e-05 2023-01-22 15:02:06.100562: step: 518/469, loss: 0.0018354099011048675 2023-01-22 15:02:06.738078: step: 520/469, loss: 0.0002575975959189236 2023-01-22 15:02:07.334114: step: 522/469, loss: 0.0008082690183073282 2023-01-22 15:02:07.922122: step: 524/469, loss: 0.00040417019044980407 2023-01-22 15:02:08.487133: step: 526/469, loss: 0.0014887795550748706 2023-01-22 15:02:09.080774: step: 528/469, loss: 0.4752001166343689 2023-01-22 15:02:09.723400: step: 530/469, loss: 0.0003663224051706493 2023-01-22 15:02:10.380986: step: 532/469, loss: 0.05763047933578491 2023-01-22 15:02:11.044645: step: 534/469, loss: 0.02389954961836338 2023-01-22 15:02:11.654709: step: 536/469, loss: 0.0033645592629909515 2023-01-22 15:02:12.299800: step: 538/469, loss: 3.387061042303685e-06 2023-01-22 15:02:12.962737: step: 540/469, loss: 0.01993384398519993 2023-01-22 15:02:13.524120: step: 542/469, loss: 0.009130529128015041 2023-01-22 15:02:14.096030: step: 544/469, loss: 0.03648785874247551 2023-01-22 15:02:14.681036: step: 546/469, loss: 0.011563469655811787 2023-01-22 15:02:15.219287: step: 548/469, loss: 0.0023125088773667812 2023-01-22 15:02:15.793255: step: 550/469, loss: 0.0007673653890378773 2023-01-22 15:02:16.375969: step: 552/469, loss: 0.003212862415239215 2023-01-22 15:02:16.952101: step: 554/469, loss: 0.015062791295349598 2023-01-22 15:02:17.517050: step: 556/469, loss: 0.004490231163799763 2023-01-22 15:02:18.213911: step: 558/469, loss: 0.009565253742039204 2023-01-22 15:02:18.856622: step: 560/469, loss: 0.00046789570478722453 2023-01-22 15:02:19.493245: step: 562/469, loss: 0.0007870469125919044 2023-01-22 15:02:20.123291: step: 564/469, loss: 2.5715095034684055e-05 2023-01-22 15:02:20.764718: step: 566/469, loss: 0.004579667001962662 2023-01-22 15:02:21.419566: step: 568/469, loss: 0.0014246586943045259 2023-01-22 15:02:22.054196: step: 570/469, loss: 0.0003634264867287129 2023-01-22 15:02:22.627564: step: 572/469, loss: 0.0011543008731678128 2023-01-22 15:02:23.245280: step: 574/469, loss: 0.008317421190440655 2023-01-22 15:02:23.833158: step: 576/469, loss: 0.008216132409870625 2023-01-22 15:02:24.414738: step: 578/469, loss: 0.011049808003008366 2023-01-22 15:02:25.059893: step: 580/469, loss: 0.4795406460762024 2023-01-22 15:02:25.688544: step: 582/469, loss: 0.02651878446340561 2023-01-22 15:02:26.254324: step: 584/469, loss: 0.016705136746168137 2023-01-22 15:02:26.882072: step: 586/469, loss: 0.07096443325281143 2023-01-22 15:02:27.421873: step: 588/469, loss: 0.00020236516138538718 2023-01-22 15:02:28.058313: step: 590/469, loss: 0.011545329354703426 2023-01-22 15:02:28.699518: step: 592/469, loss: 0.0005384701071307063 2023-01-22 15:02:29.294246: step: 594/469, loss: 0.007825718261301517 2023-01-22 15:02:29.881414: step: 596/469, loss: 0.01749524660408497 2023-01-22 15:02:30.520284: step: 598/469, loss: 0.0029937727376818657 2023-01-22 15:02:31.107959: step: 600/469, loss: 0.0022448524832725525 2023-01-22 15:02:31.667587: step: 602/469, loss: 0.000597543315961957 2023-01-22 15:02:32.392554: step: 604/469, loss: 0.09895917028188705 2023-01-22 15:02:33.021002: step: 606/469, loss: 0.019120363518595695 2023-01-22 15:02:33.649240: step: 608/469, loss: 0.006476159673184156 2023-01-22 15:02:34.205310: step: 610/469, loss: 0.0002013736084336415 2023-01-22 15:02:34.774682: step: 612/469, loss: 0.04919161647558212 2023-01-22 15:02:35.381966: step: 614/469, loss: 0.0032582608982920647 2023-01-22 15:02:36.092468: step: 616/469, loss: 0.025594206526875496 2023-01-22 15:02:36.681797: step: 618/469, loss: 0.0001824445789679885 2023-01-22 15:02:37.340057: step: 620/469, loss: 0.0008693342097103596 2023-01-22 15:02:37.932700: step: 622/469, loss: 0.008048921823501587 2023-01-22 15:02:38.553818: step: 624/469, loss: 0.0008903730195015669 2023-01-22 15:02:39.184790: step: 626/469, loss: 0.0020310492254793644 2023-01-22 15:02:39.916535: step: 628/469, loss: 0.0005097966059111059 2023-01-22 15:02:40.547491: step: 630/469, loss: 0.00037166595575399697 2023-01-22 15:02:41.124565: step: 632/469, loss: 0.0008686490473337471 2023-01-22 15:02:41.727164: step: 634/469, loss: 0.0005579735152423382 2023-01-22 15:02:42.335268: step: 636/469, loss: 0.005188541021198034 2023-01-22 15:02:42.899267: step: 638/469, loss: 0.002648049732670188 2023-01-22 15:02:43.560677: step: 640/469, loss: 0.03972799703478813 2023-01-22 15:02:44.127518: step: 642/469, loss: 0.07975295186042786 2023-01-22 15:02:44.764899: step: 644/469, loss: 0.004514096304774284 2023-01-22 15:02:45.433424: step: 646/469, loss: 0.006104631349444389 2023-01-22 15:02:46.046529: step: 648/469, loss: 0.002300059190019965 2023-01-22 15:02:46.640545: step: 650/469, loss: 0.03398158773779869 2023-01-22 15:02:47.233277: step: 652/469, loss: 0.003969356417655945 2023-01-22 15:02:47.914706: step: 654/469, loss: 0.0026101006660610437 2023-01-22 15:02:48.517539: step: 656/469, loss: 0.0012079097796231508 2023-01-22 15:02:49.158005: step: 658/469, loss: 0.0003898576833307743 2023-01-22 15:02:49.780203: step: 660/469, loss: 0.012895661406219006 2023-01-22 15:02:50.457873: step: 662/469, loss: 0.011776790954172611 2023-01-22 15:02:51.133526: step: 664/469, loss: 0.028253180906176567 2023-01-22 15:02:51.815733: step: 666/469, loss: 0.005696744192391634 2023-01-22 15:02:52.379369: step: 668/469, loss: 0.004100794438272715 2023-01-22 15:02:52.952240: step: 670/469, loss: 0.004053592216223478 2023-01-22 15:02:53.649269: step: 672/469, loss: 0.0008256476721726358 2023-01-22 15:02:54.273098: step: 674/469, loss: 0.014969659969210625 2023-01-22 15:02:54.925762: step: 676/469, loss: 0.019797423854470253 2023-01-22 15:02:55.540838: step: 678/469, loss: 0.003951985388994217 2023-01-22 15:02:56.067305: step: 680/469, loss: 0.0038019197527319193 2023-01-22 15:02:56.664624: step: 682/469, loss: 0.47740209102630615 2023-01-22 15:02:57.235793: step: 684/469, loss: 0.0016613781917840242 2023-01-22 15:02:57.872866: step: 686/469, loss: 0.01725945807993412 2023-01-22 15:02:58.525531: step: 688/469, loss: 0.05918346717953682 2023-01-22 15:02:59.073730: step: 690/469, loss: 0.002689868677407503 2023-01-22 15:02:59.638013: step: 692/469, loss: 0.0017075049690902233 2023-01-22 15:03:00.277957: step: 694/469, loss: 0.005542844533920288 2023-01-22 15:03:00.936865: step: 696/469, loss: 0.020259235054254532 2023-01-22 15:03:01.714787: step: 698/469, loss: 0.0008228069636970758 2023-01-22 15:03:02.364403: step: 700/469, loss: 0.00013446218508761376 2023-01-22 15:03:02.974709: step: 702/469, loss: 0.02992377057671547 2023-01-22 15:03:03.591214: step: 704/469, loss: 0.00042253901483491063 2023-01-22 15:03:04.235818: step: 706/469, loss: 0.0281726885586977 2023-01-22 15:03:04.874569: step: 708/469, loss: 0.0012305629206821322 2023-01-22 15:03:05.477956: step: 710/469, loss: 0.001158147701062262 2023-01-22 15:03:06.098193: step: 712/469, loss: 0.0007472842116840184 2023-01-22 15:03:06.717855: step: 714/469, loss: 0.013940438628196716 2023-01-22 15:03:07.326609: step: 716/469, loss: 0.0005880473181605339 2023-01-22 15:03:07.885798: step: 718/469, loss: 0.0005283617065288126 2023-01-22 15:03:08.456269: step: 720/469, loss: 0.06430403143167496 2023-01-22 15:03:09.030849: step: 722/469, loss: 0.005483551416546106 2023-01-22 15:03:09.697143: step: 724/469, loss: 0.0002046038134722039 2023-01-22 15:03:10.406716: step: 726/469, loss: 0.018971361219882965 2023-01-22 15:03:11.018507: step: 728/469, loss: 0.0009785378351807594 2023-01-22 15:03:11.634503: step: 730/469, loss: 0.0009495049598626792 2023-01-22 15:03:12.302751: step: 732/469, loss: 0.024526935070753098 2023-01-22 15:03:12.960208: step: 734/469, loss: 0.004857623018324375 2023-01-22 15:03:13.591704: step: 736/469, loss: 0.0015608284156769514 2023-01-22 15:03:14.231668: step: 738/469, loss: 0.01461784914135933 2023-01-22 15:03:14.836381: step: 740/469, loss: 0.02820703573524952 2023-01-22 15:03:15.438567: step: 742/469, loss: 0.0005968028563074768 2023-01-22 15:03:16.037616: step: 744/469, loss: 0.000444865960162133 2023-01-22 15:03:16.659439: step: 746/469, loss: 0.028595704585313797 2023-01-22 15:03:17.263109: step: 748/469, loss: 0.8248428702354431 2023-01-22 15:03:17.828300: step: 750/469, loss: 0.00023765770311001688 2023-01-22 15:03:18.439953: step: 752/469, loss: 0.0004923766828142107 2023-01-22 15:03:19.041098: step: 754/469, loss: 0.000211478240089491 2023-01-22 15:03:19.627796: step: 756/469, loss: 6.25285247224383e-05 2023-01-22 15:03:20.226186: step: 758/469, loss: 0.003036908805370331 2023-01-22 15:03:20.879775: step: 760/469, loss: 0.06792990118265152 2023-01-22 15:03:21.493321: step: 762/469, loss: 0.05790035054087639 2023-01-22 15:03:22.063685: step: 764/469, loss: 0.009418771602213383 2023-01-22 15:03:22.701827: step: 766/469, loss: 0.008413447998464108 2023-01-22 15:03:23.348454: step: 768/469, loss: 0.0005410366575233638 2023-01-22 15:03:23.945547: step: 770/469, loss: 0.003446622285991907 2023-01-22 15:03:24.600202: step: 772/469, loss: 0.0041089304722845554 2023-01-22 15:03:25.188845: step: 774/469, loss: 0.0015499526634812355 2023-01-22 15:03:25.841873: step: 776/469, loss: 0.003994627855718136 2023-01-22 15:03:26.478660: step: 778/469, loss: 0.007136105094105005 2023-01-22 15:03:27.095341: step: 780/469, loss: 0.038794975727796555 2023-01-22 15:03:27.691811: step: 782/469, loss: 7.773537618049886e-06 2023-01-22 15:03:28.231972: step: 784/469, loss: 0.02531854249536991 2023-01-22 15:03:28.804264: step: 786/469, loss: 0.014359182678163052 2023-01-22 15:03:29.379119: step: 788/469, loss: 0.002040768973529339 2023-01-22 15:03:30.029959: step: 790/469, loss: 0.003035011002793908 2023-01-22 15:03:30.600471: step: 792/469, loss: 8.428594446741045e-05 2023-01-22 15:03:31.206249: step: 794/469, loss: 0.0001258325792150572 2023-01-22 15:03:31.865689: step: 796/469, loss: 0.024884812533855438 2023-01-22 15:03:32.493957: step: 798/469, loss: 0.0002696172159630805 2023-01-22 15:03:33.146173: step: 800/469, loss: 0.0005056412192061543 2023-01-22 15:03:33.756423: step: 802/469, loss: 0.0027451585046947002 2023-01-22 15:03:34.369172: step: 804/469, loss: 0.003099893219769001 2023-01-22 15:03:34.981025: step: 806/469, loss: 0.0025701054837554693 2023-01-22 15:03:35.644678: step: 808/469, loss: 0.003196586621925235 2023-01-22 15:03:36.313604: step: 810/469, loss: 0.01178525947034359 2023-01-22 15:03:36.978487: step: 812/469, loss: 0.040642641484737396 2023-01-22 15:03:37.641574: step: 814/469, loss: 7.571535110473633 2023-01-22 15:03:38.234539: step: 816/469, loss: 0.005943898111581802 2023-01-22 15:03:38.877245: step: 818/469, loss: 0.022087549790740013 2023-01-22 15:03:39.488031: step: 820/469, loss: 0.01484906766563654 2023-01-22 15:03:40.122552: step: 822/469, loss: 0.070688396692276 2023-01-22 15:03:40.731853: step: 824/469, loss: 0.002011804608628154 2023-01-22 15:03:41.400057: step: 826/469, loss: 0.002348933834582567 2023-01-22 15:03:42.091834: step: 828/469, loss: 0.002215781481936574 2023-01-22 15:03:42.779419: step: 830/469, loss: 0.0008301714551635087 2023-01-22 15:03:43.355921: step: 832/469, loss: 0.0051965718157589436 2023-01-22 15:03:44.043083: step: 834/469, loss: 0.06248040497303009 2023-01-22 15:03:44.605788: step: 836/469, loss: 0.0007692843209952116 2023-01-22 15:03:45.208950: step: 838/469, loss: 0.0013292405055835843 2023-01-22 15:03:45.801371: step: 840/469, loss: 0.0032913689501583576 2023-01-22 15:03:46.384862: step: 842/469, loss: 0.0073381816036999226 2023-01-22 15:03:47.063064: step: 844/469, loss: 0.02456783689558506 2023-01-22 15:03:47.654572: step: 846/469, loss: 0.05738891288638115 2023-01-22 15:03:48.258545: step: 848/469, loss: 0.0019288643961772323 2023-01-22 15:03:48.898403: step: 850/469, loss: 0.003935445565730333 2023-01-22 15:03:49.538435: step: 852/469, loss: 0.014951786957681179 2023-01-22 15:03:50.128108: step: 854/469, loss: 0.0013995375484228134 2023-01-22 15:03:50.753824: step: 856/469, loss: 0.0019490347476676106 2023-01-22 15:03:51.340009: step: 858/469, loss: 0.02062826231122017 2023-01-22 15:03:51.936489: step: 860/469, loss: 0.0016663874266669154 2023-01-22 15:03:52.518259: step: 862/469, loss: 0.470892995595932 2023-01-22 15:03:53.146166: step: 864/469, loss: 0.004955326206982136 2023-01-22 15:03:53.680439: step: 866/469, loss: 0.0010713455267250538 2023-01-22 15:03:54.299899: step: 868/469, loss: 0.006135144736617804 2023-01-22 15:03:54.962984: step: 870/469, loss: 0.017733190208673477 2023-01-22 15:03:55.532243: step: 872/469, loss: 0.017774365842342377 2023-01-22 15:03:56.129375: step: 874/469, loss: 0.0013761044247075915 2023-01-22 15:03:56.662086: step: 876/469, loss: 0.00018988788360729814 2023-01-22 15:03:57.236888: step: 878/469, loss: 0.0005332918372005224 2023-01-22 15:03:57.872303: step: 880/469, loss: 0.10966512560844421 2023-01-22 15:03:58.476072: step: 882/469, loss: 0.009215809404850006 2023-01-22 15:03:59.086472: step: 884/469, loss: 0.021503997966647148 2023-01-22 15:03:59.726805: step: 886/469, loss: 0.03035183809697628 2023-01-22 15:04:00.344984: step: 888/469, loss: 0.0015832387143746018 2023-01-22 15:04:01.076608: step: 890/469, loss: 0.0521547831594944 2023-01-22 15:04:01.649840: step: 892/469, loss: 0.004446502309292555 2023-01-22 15:04:02.256794: step: 894/469, loss: 0.0028156894259154797 2023-01-22 15:04:02.883716: step: 896/469, loss: 0.007497936021536589 2023-01-22 15:04:03.524352: step: 898/469, loss: 0.013218437321484089 2023-01-22 15:04:04.133033: step: 900/469, loss: 0.026874646544456482 2023-01-22 15:04:04.816255: step: 902/469, loss: 0.01914488524198532 2023-01-22 15:04:05.427782: step: 904/469, loss: 0.00360128958709538 2023-01-22 15:04:06.179907: step: 906/469, loss: 0.011574283242225647 2023-01-22 15:04:06.795871: step: 908/469, loss: 0.0013154788175597787 2023-01-22 15:04:07.391541: step: 910/469, loss: 0.003331080311909318 2023-01-22 15:04:08.025894: step: 912/469, loss: 0.00806096289306879 2023-01-22 15:04:08.630032: step: 914/469, loss: 0.00961526483297348 2023-01-22 15:04:09.260780: step: 916/469, loss: 0.02750217728316784 2023-01-22 15:04:09.865455: step: 918/469, loss: 0.009093781001865864 2023-01-22 15:04:10.470632: step: 920/469, loss: 0.0008323579095304012 2023-01-22 15:04:11.111903: step: 922/469, loss: 0.0016904020449146628 2023-01-22 15:04:11.711519: step: 924/469, loss: 0.010574395768344402 2023-01-22 15:04:12.313353: step: 926/469, loss: 0.060014065355062485 2023-01-22 15:04:12.927008: step: 928/469, loss: 0.260230153799057 2023-01-22 15:04:13.545873: step: 930/469, loss: 0.010599013417959213 2023-01-22 15:04:14.194866: step: 932/469, loss: 0.17041371762752533 2023-01-22 15:04:14.769493: step: 934/469, loss: 0.0018142808694392443 2023-01-22 15:04:15.398376: step: 936/469, loss: 0.01914832927286625 2023-01-22 15:04:15.991620: step: 938/469, loss: 0.48395004868507385 ================================================== Loss: 0.041 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30503739137700536, 'r': 0.3148773072278765, 'f1': 0.3098792547321959}, 'combined': 0.2283320824342496, 'epoch': 37} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3131245737351909, 'r': 0.27387657135484217, 'f1': 0.2921884748568497}, 'combined': 0.1593755317400998, 'epoch': 37} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3068707172951067, 'r': 0.3272511254646109, 'f1': 0.316733412524977}, 'combined': 0.2333825144920883, 'epoch': 37} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3143454668794837, 'r': 0.27638242787848477, 'f1': 0.29414410289307097}, 'combined': 0.16044223794167506, 'epoch': 37} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3010714891048347, 'r': 0.3170676972546172, 'f1': 0.3088626182129081}, 'combined': 0.2275829818410902, 'epoch': 37} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30745162856715974, 'r': 0.2723544106941963, 'f1': 0.28884075516721064}, 'combined': 0.15754950281847852, 'epoch': 37} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2936507936507936, 'r': 0.35238095238095235, 'f1': 0.3203463203463203}, 'combined': 0.21356421356421354, 'epoch': 37} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2265625, 'r': 0.31521739130434784, 'f1': 0.2636363636363636}, 'combined': 0.1318181818181818, 'epoch': 37} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45588235294117646, 'r': 0.2672413793103448, 'f1': 0.33695652173913043}, 'combined': 0.2246376811594203, 'epoch': 37} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 38 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 15:07:03.235870: step: 2/469, loss: 0.00012753703049384058 2023-01-22 15:07:03.831061: step: 4/469, loss: 0.0003398454573471099 2023-01-22 15:07:04.492214: step: 6/469, loss: 0.0039046138990670443 2023-01-22 15:07:05.126138: step: 8/469, loss: 0.0031364429742097855 2023-01-22 15:07:05.712844: step: 10/469, loss: 0.0007567218272015452 2023-01-22 15:07:06.389063: step: 12/469, loss: 0.0025369087234139442 2023-01-22 15:07:07.018321: step: 14/469, loss: 0.016507478430867195 2023-01-22 15:07:07.565996: step: 16/469, loss: 0.051569197326898575 2023-01-22 15:07:08.184822: step: 18/469, loss: 0.0052370913326740265 2023-01-22 15:07:08.787714: step: 20/469, loss: 4.446284583536908e-05 2023-01-22 15:07:09.387759: step: 22/469, loss: 0.00018782868573907763 2023-01-22 15:07:10.067400: step: 24/469, loss: 0.0011781727662310004 2023-01-22 15:07:10.664942: step: 26/469, loss: 0.0026432471349835396 2023-01-22 15:07:11.233161: step: 28/469, loss: 0.00699997553601861 2023-01-22 15:07:11.852554: step: 30/469, loss: 0.011381281539797783 2023-01-22 15:07:12.451135: step: 32/469, loss: 0.00010784477490233257 2023-01-22 15:07:13.141200: step: 34/469, loss: 0.0006362306303344667 2023-01-22 15:07:13.788215: step: 36/469, loss: 0.05683068186044693 2023-01-22 15:07:14.421506: step: 38/469, loss: 0.5970094203948975 2023-01-22 15:07:15.024387: step: 40/469, loss: 0.01271666307002306 2023-01-22 15:07:15.658164: step: 42/469, loss: 0.012536873109638691 2023-01-22 15:07:16.255823: step: 44/469, loss: 0.5063958168029785 2023-01-22 15:07:16.884659: step: 46/469, loss: 0.011953196488320827 2023-01-22 15:07:17.446034: step: 48/469, loss: 0.010440017096698284 2023-01-22 15:07:18.083353: step: 50/469, loss: 0.0017809176351875067 2023-01-22 15:07:18.686845: step: 52/469, loss: 0.00205801404081285 2023-01-22 15:07:19.299381: step: 54/469, loss: 0.0007887992542237043 2023-01-22 15:07:19.916932: step: 56/469, loss: 0.0034836807753890753 2023-01-22 15:07:20.503261: step: 58/469, loss: 0.0007971806917339563 2023-01-22 15:07:21.190723: step: 60/469, loss: 0.004591221455484629 2023-01-22 15:07:21.782004: step: 62/469, loss: 0.008477076888084412 2023-01-22 15:07:22.379839: step: 64/469, loss: 0.03826672211289406 2023-01-22 15:07:23.011685: step: 66/469, loss: 0.06503720581531525 2023-01-22 15:07:23.598895: step: 68/469, loss: 0.003739919513463974 2023-01-22 15:07:24.292291: step: 70/469, loss: 0.0002656341530382633 2023-01-22 15:07:24.875356: step: 72/469, loss: 0.0011306344531476498 2023-01-22 15:07:25.496963: step: 74/469, loss: 0.006940985564142466 2023-01-22 15:07:26.031497: step: 76/469, loss: 0.00018072024977300316 2023-01-22 15:07:26.690958: step: 78/469, loss: 0.0032506794668734074 2023-01-22 15:07:27.305036: step: 80/469, loss: 0.02015191689133644 2023-01-22 15:07:27.937414: step: 82/469, loss: 0.30333465337753296 2023-01-22 15:07:28.595125: step: 84/469, loss: 0.03256223350763321 2023-01-22 15:07:29.179891: step: 86/469, loss: 0.000770751794334501 2023-01-22 15:07:29.850703: step: 88/469, loss: 0.00010630900214891881 2023-01-22 15:07:30.433067: step: 90/469, loss: 0.0025739348493516445 2023-01-22 15:07:30.965257: step: 92/469, loss: 0.0006421981379389763 2023-01-22 15:07:31.627801: step: 94/469, loss: 0.0023326845839619637 2023-01-22 15:07:32.320381: step: 96/469, loss: 0.0004707353364210576 2023-01-22 15:07:32.967466: step: 98/469, loss: 0.003543284721672535 2023-01-22 15:07:33.545044: step: 100/469, loss: 0.006560937501490116 2023-01-22 15:07:34.147752: step: 102/469, loss: 0.0035761953331530094 2023-01-22 15:07:34.719628: step: 104/469, loss: 0.005540677346289158 2023-01-22 15:07:35.453418: step: 106/469, loss: 0.015196459367871284 2023-01-22 15:07:36.040425: step: 108/469, loss: 0.003005946520715952 2023-01-22 15:07:36.653628: step: 110/469, loss: 0.0031792670488357544 2023-01-22 15:07:37.234997: step: 112/469, loss: 0.008570600301027298 2023-01-22 15:07:37.854599: step: 114/469, loss: 0.002000159118324518 2023-01-22 15:07:38.479593: step: 116/469, loss: 0.06860151886940002 2023-01-22 15:07:39.139333: step: 118/469, loss: 0.007608199026435614 2023-01-22 15:07:39.737579: step: 120/469, loss: 0.08729299902915955 2023-01-22 15:07:40.293340: step: 122/469, loss: 0.00823879987001419 2023-01-22 15:07:40.880039: step: 124/469, loss: 0.017600657418370247 2023-01-22 15:07:41.416886: step: 126/469, loss: 0.017034798860549927 2023-01-22 15:07:42.059585: step: 128/469, loss: 0.044462304562330246 2023-01-22 15:07:42.690910: step: 130/469, loss: 0.18946333229541779 2023-01-22 15:07:43.283491: step: 132/469, loss: 0.0031678848899900913 2023-01-22 15:07:43.829676: step: 134/469, loss: 0.0029597370885312557 2023-01-22 15:07:44.418302: step: 136/469, loss: 0.011893881484866142 2023-01-22 15:07:45.018116: step: 138/469, loss: 0.054351650178432465 2023-01-22 15:07:45.596179: step: 140/469, loss: 1.7306028894381598e-05 2023-01-22 15:07:46.273635: step: 142/469, loss: 0.04485905170440674 2023-01-22 15:07:46.901492: step: 144/469, loss: 0.002444647019729018 2023-01-22 15:07:47.517649: step: 146/469, loss: 4.622690200805664 2023-01-22 15:07:48.170401: step: 148/469, loss: 0.05602993816137314 2023-01-22 15:07:48.868950: step: 150/469, loss: 0.07476110756397247 2023-01-22 15:07:49.498505: step: 152/469, loss: 0.0045829168520867825 2023-01-22 15:07:50.076441: step: 154/469, loss: 0.016624649986624718 2023-01-22 15:07:50.743891: step: 156/469, loss: 0.007370179984718561 2023-01-22 15:07:51.416316: step: 158/469, loss: 0.01567571982741356 2023-01-22 15:07:52.089113: step: 160/469, loss: 0.0013074036687612534 2023-01-22 15:07:52.724615: step: 162/469, loss: 0.0014897125074639916 2023-01-22 15:07:53.333241: step: 164/469, loss: 0.006558215711265802 2023-01-22 15:07:53.913924: step: 166/469, loss: 0.0064345537684857845 2023-01-22 15:07:54.501312: step: 168/469, loss: 0.0007995741325430572 2023-01-22 15:07:55.139736: step: 170/469, loss: 0.014650707133114338 2023-01-22 15:07:55.839225: step: 172/469, loss: 0.04634474590420723 2023-01-22 15:07:56.534018: step: 174/469, loss: 0.0007460745400749147 2023-01-22 15:07:57.149212: step: 176/469, loss: 0.0076140668243169785 2023-01-22 15:07:57.766423: step: 178/469, loss: 3.6223441384208854e-06 2023-01-22 15:07:58.382009: step: 180/469, loss: 0.0013359755976125598 2023-01-22 15:07:59.029942: step: 182/469, loss: 0.0037875857669860125 2023-01-22 15:07:59.730593: step: 184/469, loss: 0.005390759091824293 2023-01-22 15:08:00.306241: step: 186/469, loss: 0.005834782030433416 2023-01-22 15:08:01.019670: step: 188/469, loss: 6.37365592410788e-05 2023-01-22 15:08:01.667271: step: 190/469, loss: 0.026993609964847565 2023-01-22 15:08:02.242261: step: 192/469, loss: 0.000593524076975882 2023-01-22 15:08:02.812032: step: 194/469, loss: 0.0006906589260324836 2023-01-22 15:08:03.484963: step: 196/469, loss: 0.0038057200144976377 2023-01-22 15:08:04.214650: step: 198/469, loss: 0.033612195402383804 2023-01-22 15:08:04.919779: step: 200/469, loss: 0.005429660901427269 2023-01-22 15:08:05.564234: step: 202/469, loss: 0.031010059639811516 2023-01-22 15:08:06.223309: step: 204/469, loss: 8.553740917705e-05 2023-01-22 15:08:06.857082: step: 206/469, loss: 0.009626995772123337 2023-01-22 15:08:07.512410: step: 208/469, loss: 0.05773867294192314 2023-01-22 15:08:08.101663: step: 210/469, loss: 0.003889423096552491 2023-01-22 15:08:08.748748: step: 212/469, loss: 0.03576301038265228 2023-01-22 15:08:09.407687: step: 214/469, loss: 0.008747031912207603 2023-01-22 15:08:10.070789: step: 216/469, loss: 0.00959421880543232 2023-01-22 15:08:10.712070: step: 218/469, loss: 0.0017345475498586893 2023-01-22 15:08:11.311621: step: 220/469, loss: 0.019519519060850143 2023-01-22 15:08:11.909165: step: 222/469, loss: 0.016517382115125656 2023-01-22 15:08:12.525063: step: 224/469, loss: 0.007057040464133024 2023-01-22 15:08:13.169062: step: 226/469, loss: 0.00028294912772253156 2023-01-22 15:08:13.875465: step: 228/469, loss: 0.0016824085032567382 2023-01-22 15:08:14.493889: step: 230/469, loss: 0.005338934250175953 2023-01-22 15:08:15.062757: step: 232/469, loss: 0.005753990728408098 2023-01-22 15:08:15.756258: step: 234/469, loss: 0.04637658968567848 2023-01-22 15:08:16.420572: step: 236/469, loss: 0.003527577966451645 2023-01-22 15:08:16.966977: step: 238/469, loss: 0.0018008939223363996 2023-01-22 15:08:17.578259: step: 240/469, loss: 0.0035127070732414722 2023-01-22 15:08:18.187493: step: 242/469, loss: 0.0016487244283780456 2023-01-22 15:08:18.783862: step: 244/469, loss: 0.0003422746667638421 2023-01-22 15:08:19.353293: step: 246/469, loss: 0.004949332680553198 2023-01-22 15:08:19.999716: step: 248/469, loss: 0.0001981878449441865 2023-01-22 15:08:20.636091: step: 250/469, loss: 0.024412959814071655 2023-01-22 15:08:21.267833: step: 252/469, loss: 0.009894313290715218 2023-01-22 15:08:21.814650: step: 254/469, loss: 0.005194500088691711 2023-01-22 15:08:22.462551: step: 256/469, loss: 0.0006939319428056479 2023-01-22 15:08:23.064363: step: 258/469, loss: 0.01006841380149126 2023-01-22 15:08:23.704958: step: 260/469, loss: 0.0022171945311129093 2023-01-22 15:08:24.306019: step: 262/469, loss: 0.0011323319049552083 2023-01-22 15:08:24.915325: step: 264/469, loss: 0.028261585161089897 2023-01-22 15:08:25.636065: step: 266/469, loss: 0.003775307908654213 2023-01-22 15:08:26.351317: step: 268/469, loss: 0.007662794087082148 2023-01-22 15:08:26.893753: step: 270/469, loss: 4.843069473281503e-05 2023-01-22 15:08:27.536685: step: 272/469, loss: 0.00922419410198927 2023-01-22 15:08:28.170518: step: 274/469, loss: 0.025499649345874786 2023-01-22 15:08:28.793203: step: 276/469, loss: 0.00046690506860613823 2023-01-22 15:08:29.450142: step: 278/469, loss: 0.015777206048369408 2023-01-22 15:08:30.024059: step: 280/469, loss: 0.00016869693354237825 2023-01-22 15:08:30.643407: step: 282/469, loss: 0.03249003365635872 2023-01-22 15:08:31.254836: step: 284/469, loss: 0.00014499310054816306 2023-01-22 15:08:31.854991: step: 286/469, loss: 9.50759495026432e-05 2023-01-22 15:08:32.553495: step: 288/469, loss: 0.01701691746711731 2023-01-22 15:08:33.205346: step: 290/469, loss: 0.012654634192585945 2023-01-22 15:08:33.756049: step: 292/469, loss: 0.0005531099741347134 2023-01-22 15:08:34.389251: step: 294/469, loss: 0.012154090218245983 2023-01-22 15:08:34.990812: step: 296/469, loss: 0.7882398366928101 2023-01-22 15:08:35.714068: step: 298/469, loss: 0.0001749203074723482 2023-01-22 15:08:36.338114: step: 300/469, loss: 0.017260221764445305 2023-01-22 15:08:36.990593: step: 302/469, loss: 0.002861148677766323 2023-01-22 15:08:37.625378: step: 304/469, loss: 0.012813948094844818 2023-01-22 15:08:38.311297: step: 306/469, loss: 0.03126691281795502 2023-01-22 15:08:38.973957: step: 308/469, loss: 0.0042886883020401 2023-01-22 15:08:39.565899: step: 310/469, loss: 0.0005286370287649333 2023-01-22 15:08:40.220139: step: 312/469, loss: 0.0025916260201483965 2023-01-22 15:08:40.916149: step: 314/469, loss: 0.00874342955648899 2023-01-22 15:08:41.522924: step: 316/469, loss: 0.001825582468882203 2023-01-22 15:08:42.187969: step: 318/469, loss: 0.0037278085947036743 2023-01-22 15:08:42.876595: step: 320/469, loss: 0.00016280909767374396 2023-01-22 15:08:43.460599: step: 322/469, loss: 0.04597877711057663 2023-01-22 15:08:44.056308: step: 324/469, loss: 0.0024294147733598948 2023-01-22 15:08:44.683414: step: 326/469, loss: 0.022655285894870758 2023-01-22 15:08:45.292760: step: 328/469, loss: 0.019259842112660408 2023-01-22 15:08:45.926652: step: 330/469, loss: 0.003929068800061941 2023-01-22 15:08:46.610529: step: 332/469, loss: 0.0030545189511030912 2023-01-22 15:08:47.194893: step: 334/469, loss: 0.03086966834962368 2023-01-22 15:08:47.841953: step: 336/469, loss: 0.010108484886586666 2023-01-22 15:08:48.442918: step: 338/469, loss: 0.004394331015646458 2023-01-22 15:08:49.001057: step: 340/469, loss: 0.0010387523798272014 2023-01-22 15:08:49.597933: step: 342/469, loss: 0.0018661613576114178 2023-01-22 15:08:50.199561: step: 344/469, loss: 0.00034492459963075817 2023-01-22 15:08:50.844316: step: 346/469, loss: 0.014092102646827698 2023-01-22 15:08:51.459388: step: 348/469, loss: 0.00043628690764307976 2023-01-22 15:08:52.099210: step: 350/469, loss: 0.04604126140475273 2023-01-22 15:08:52.766546: step: 352/469, loss: 0.016869962215423584 2023-01-22 15:08:53.391338: step: 354/469, loss: 0.00908180233091116 2023-01-22 15:08:53.963812: step: 356/469, loss: 0.008587555028498173 2023-01-22 15:08:54.536502: step: 358/469, loss: 0.00016373336256947368 2023-01-22 15:08:55.129681: step: 360/469, loss: 0.003951522056013346 2023-01-22 15:08:55.740015: step: 362/469, loss: 0.000977140269242227 2023-01-22 15:08:56.339062: step: 364/469, loss: 0.00026099587557837367 2023-01-22 15:08:56.883076: step: 366/469, loss: 0.01923340931534767 2023-01-22 15:08:57.484897: step: 368/469, loss: 0.28835055232048035 2023-01-22 15:08:58.087840: step: 370/469, loss: 0.0004068380512762815 2023-01-22 15:08:58.718748: step: 372/469, loss: 0.0068396651186048985 2023-01-22 15:08:59.365145: step: 374/469, loss: 0.03188212215900421 2023-01-22 15:08:59.976645: step: 376/469, loss: 0.011523513123393059 2023-01-22 15:09:00.569546: step: 378/469, loss: 0.0038248023483902216 2023-01-22 15:09:01.200734: step: 380/469, loss: 0.0013167018769308925 2023-01-22 15:09:01.842880: step: 382/469, loss: 0.004772607237100601 2023-01-22 15:09:02.513394: step: 384/469, loss: 0.0043232315219938755 2023-01-22 15:09:03.136655: step: 386/469, loss: 0.001649960526265204 2023-01-22 15:09:03.782314: step: 388/469, loss: 0.0001340229791821912 2023-01-22 15:09:04.425085: step: 390/469, loss: 0.0026363946963101625 2023-01-22 15:09:05.072327: step: 392/469, loss: 0.0008659258601255715 2023-01-22 15:09:05.711924: step: 394/469, loss: 0.0011113679502159357 2023-01-22 15:09:06.464190: step: 396/469, loss: 0.01586460880935192 2023-01-22 15:09:07.057197: step: 398/469, loss: 0.6813976168632507 2023-01-22 15:09:07.715386: step: 400/469, loss: 0.006336328107863665 2023-01-22 15:09:08.454277: step: 402/469, loss: 0.027061088010668755 2023-01-22 15:09:09.068253: step: 404/469, loss: 0.006307321134954691 2023-01-22 15:09:09.707688: step: 406/469, loss: 0.06695932894945145 2023-01-22 15:09:10.320852: step: 408/469, loss: 0.012269905768334866 2023-01-22 15:09:10.994703: step: 410/469, loss: 0.005150848068296909 2023-01-22 15:09:11.563769: step: 412/469, loss: 0.005380583461374044 2023-01-22 15:09:12.172171: step: 414/469, loss: 0.011634535156190395 2023-01-22 15:09:12.742252: step: 416/469, loss: 0.03634500131011009 2023-01-22 15:09:13.363714: step: 418/469, loss: 0.02824183739721775 2023-01-22 15:09:13.909968: step: 420/469, loss: 0.015918325632810593 2023-01-22 15:09:14.470642: step: 422/469, loss: 0.23165559768676758 2023-01-22 15:09:15.073387: step: 424/469, loss: 0.006573736201971769 2023-01-22 15:09:15.638601: step: 426/469, loss: 0.0016460868064314127 2023-01-22 15:09:16.256706: step: 428/469, loss: 0.20384475588798523 2023-01-22 15:09:16.894902: step: 430/469, loss: 0.0012258534552529454 2023-01-22 15:09:17.523485: step: 432/469, loss: 0.013706815429031849 2023-01-22 15:09:18.155816: step: 434/469, loss: 0.042897991836071014 2023-01-22 15:09:18.796858: step: 436/469, loss: 0.02255186066031456 2023-01-22 15:09:19.397028: step: 438/469, loss: 0.0777776837348938 2023-01-22 15:09:19.973049: step: 440/469, loss: 0.007089119870215654 2023-01-22 15:09:20.665752: step: 442/469, loss: 0.007612396962940693 2023-01-22 15:09:21.350426: step: 444/469, loss: 0.04386671259999275 2023-01-22 15:09:21.988983: step: 446/469, loss: 0.00020659025176428258 2023-01-22 15:09:22.601755: step: 448/469, loss: 0.000501752074342221 2023-01-22 15:09:23.222720: step: 450/469, loss: 0.003374500432983041 2023-01-22 15:09:23.839918: step: 452/469, loss: 0.08201678842306137 2023-01-22 15:09:24.449475: step: 454/469, loss: 0.01573663018643856 2023-01-22 15:09:25.073021: step: 456/469, loss: 0.00046490647946484387 2023-01-22 15:09:25.700016: step: 458/469, loss: 0.0004637721576727927 2023-01-22 15:09:26.365023: step: 460/469, loss: 0.07756218314170837 2023-01-22 15:09:26.966914: step: 462/469, loss: 0.002419006312265992 2023-01-22 15:09:27.508671: step: 464/469, loss: 1.2328708180575632e-05 2023-01-22 15:09:28.179623: step: 466/469, loss: 0.005360405892133713 2023-01-22 15:09:28.805879: step: 468/469, loss: 0.0007136064814403653 2023-01-22 15:09:29.402605: step: 470/469, loss: 0.0019342441810294986 2023-01-22 15:09:30.038789: step: 472/469, loss: 0.0008165700710378587 2023-01-22 15:09:30.691837: step: 474/469, loss: 0.002735595451667905 2023-01-22 15:09:31.345936: step: 476/469, loss: 0.023120015859603882 2023-01-22 15:09:31.968785: step: 478/469, loss: 0.00038176102680154145 2023-01-22 15:09:32.612340: step: 480/469, loss: 0.023487141355872154 2023-01-22 15:09:33.306350: step: 482/469, loss: 0.8848490715026855 2023-01-22 15:09:33.925261: step: 484/469, loss: 0.0044706896878778934 2023-01-22 15:09:34.491181: step: 486/469, loss: 0.014502924866974354 2023-01-22 15:09:35.054255: step: 488/469, loss: 0.007735269609838724 2023-01-22 15:09:35.635350: step: 490/469, loss: 0.01634342409670353 2023-01-22 15:09:36.324017: step: 492/469, loss: 0.004997485317289829 2023-01-22 15:09:36.928164: step: 494/469, loss: 0.2246975153684616 2023-01-22 15:09:37.547854: step: 496/469, loss: 0.014086057431995869 2023-01-22 15:09:38.166824: step: 498/469, loss: 0.00013090622087474912 2023-01-22 15:09:38.700676: step: 500/469, loss: 0.12599119544029236 2023-01-22 15:09:39.331166: step: 502/469, loss: 0.004129430279135704 2023-01-22 15:09:39.977024: step: 504/469, loss: 0.029582533985376358 2023-01-22 15:09:40.592482: step: 506/469, loss: 0.009352168999612331 2023-01-22 15:09:41.207463: step: 508/469, loss: 0.007790922187268734 2023-01-22 15:09:41.805320: step: 510/469, loss: 1.2287652492523193 2023-01-22 15:09:42.467204: step: 512/469, loss: 0.0032730766106396914 2023-01-22 15:09:43.115016: step: 514/469, loss: 0.0025268718600273132 2023-01-22 15:09:43.796490: step: 516/469, loss: 0.009384892880916595 2023-01-22 15:09:44.319328: step: 518/469, loss: 0.0007026235107332468 2023-01-22 15:09:44.913258: step: 520/469, loss: 0.0014882617397233844 2023-01-22 15:09:45.573070: step: 522/469, loss: 0.03958877548575401 2023-01-22 15:09:46.165933: step: 524/469, loss: 0.007507308851927519 2023-01-22 15:09:46.888655: step: 526/469, loss: 0.007090691011399031 2023-01-22 15:09:47.497533: step: 528/469, loss: 0.027186617255210876 2023-01-22 15:09:48.127837: step: 530/469, loss: 0.012380541302263737 2023-01-22 15:09:48.737418: step: 532/469, loss: 0.015014434233307838 2023-01-22 15:09:49.358577: step: 534/469, loss: 0.0004832313279621303 2023-01-22 15:09:49.993911: step: 536/469, loss: 0.012977574951946735 2023-01-22 15:09:50.642891: step: 538/469, loss: 0.01994369551539421 2023-01-22 15:09:51.219520: step: 540/469, loss: 0.0012370588956400752 2023-01-22 15:09:51.889432: step: 542/469, loss: 0.002112650079652667 2023-01-22 15:09:52.489052: step: 544/469, loss: 0.00645564217120409 2023-01-22 15:09:53.051318: step: 546/469, loss: 0.012297061271965504 2023-01-22 15:09:53.653455: step: 548/469, loss: 0.0006454386166296899 2023-01-22 15:09:54.248450: step: 550/469, loss: 0.0014267746591940522 2023-01-22 15:09:54.851808: step: 552/469, loss: 0.009479164145886898 2023-01-22 15:09:55.435386: step: 554/469, loss: 0.07529570907354355 2023-01-22 15:09:56.077649: step: 556/469, loss: 0.053799357265233994 2023-01-22 15:09:56.714825: step: 558/469, loss: 0.002171176951378584 2023-01-22 15:09:57.342769: step: 560/469, loss: 0.0010962956584990025 2023-01-22 15:09:57.984241: step: 562/469, loss: 0.012753731571137905 2023-01-22 15:09:58.580377: step: 564/469, loss: 0.08210617303848267 2023-01-22 15:09:59.176059: step: 566/469, loss: 0.07643083482980728 2023-01-22 15:09:59.937482: step: 568/469, loss: 0.0483868233859539 2023-01-22 15:10:00.538928: step: 570/469, loss: 2.3927494112285785e-05 2023-01-22 15:10:01.146141: step: 572/469, loss: 0.23457857966423035 2023-01-22 15:10:01.813157: step: 574/469, loss: 0.002064449479803443 2023-01-22 15:10:02.476620: step: 576/469, loss: 0.003332695923745632 2023-01-22 15:10:03.113984: step: 578/469, loss: 0.006315347272902727 2023-01-22 15:10:03.726268: step: 580/469, loss: 0.17449022829532623 2023-01-22 15:10:04.318912: step: 582/469, loss: 0.0014316319720819592 2023-01-22 15:10:04.927688: step: 584/469, loss: 0.1128283143043518 2023-01-22 15:10:05.551170: step: 586/469, loss: 0.011270860210061073 2023-01-22 15:10:06.147150: step: 588/469, loss: 1.1858852303703316e-05 2023-01-22 15:10:06.820962: step: 590/469, loss: 0.027867184951901436 2023-01-22 15:10:07.471218: step: 592/469, loss: 0.15390801429748535 2023-01-22 15:10:08.080956: step: 594/469, loss: 0.0001531842863187194 2023-01-22 15:10:08.661687: step: 596/469, loss: 0.0030719523783773184 2023-01-22 15:10:09.278501: step: 598/469, loss: 0.01468351949006319 2023-01-22 15:10:09.847623: step: 600/469, loss: 0.002717547584325075 2023-01-22 15:10:10.430336: step: 602/469, loss: 0.0015363650163635612 2023-01-22 15:10:11.058146: step: 604/469, loss: 0.06810019165277481 2023-01-22 15:10:11.777180: step: 606/469, loss: 0.007474309764802456 2023-01-22 15:10:12.380302: step: 608/469, loss: 0.010088734328746796 2023-01-22 15:10:12.992755: step: 610/469, loss: 0.05422927066683769 2023-01-22 15:10:13.624194: step: 612/469, loss: 0.011467122472822666 2023-01-22 15:10:14.251106: step: 614/469, loss: 0.040612462908029556 2023-01-22 15:10:14.845194: step: 616/469, loss: 0.000156102076289244 2023-01-22 15:10:15.461141: step: 618/469, loss: 0.024036496877670288 2023-01-22 15:10:16.133430: step: 620/469, loss: 0.06760215014219284 2023-01-22 15:10:16.715114: step: 622/469, loss: 0.052559491246938705 2023-01-22 15:10:17.317000: step: 624/469, loss: 0.002878944156691432 2023-01-22 15:10:17.921874: step: 626/469, loss: 0.030936047434806824 2023-01-22 15:10:18.403313: step: 628/469, loss: 0.04616130515933037 2023-01-22 15:10:19.019544: step: 630/469, loss: 0.0014087165473029017 2023-01-22 15:10:19.651423: step: 632/469, loss: 0.0077975899912416935 2023-01-22 15:10:20.259833: step: 634/469, loss: 0.023471761494874954 2023-01-22 15:10:20.863096: step: 636/469, loss: 0.00047905463725328445 2023-01-22 15:10:21.448003: step: 638/469, loss: 0.00016119459178298712 2023-01-22 15:10:22.147099: step: 640/469, loss: 0.003902548924088478 2023-01-22 15:10:22.727322: step: 642/469, loss: 0.001081079593859613 2023-01-22 15:10:23.312376: step: 644/469, loss: 0.009610828012228012 2023-01-22 15:10:24.049132: step: 646/469, loss: 0.005661234725266695 2023-01-22 15:10:24.706893: step: 648/469, loss: 0.0032922604586929083 2023-01-22 15:10:25.359440: step: 650/469, loss: 0.018884431570768356 2023-01-22 15:10:26.012388: step: 652/469, loss: 0.023012464866042137 2023-01-22 15:10:26.633931: step: 654/469, loss: 0.03778444975614548 2023-01-22 15:10:27.292079: step: 656/469, loss: 0.33450689911842346 2023-01-22 15:10:27.945688: step: 658/469, loss: 0.0032547968439757824 2023-01-22 15:10:28.486500: step: 660/469, loss: 0.002117619151249528 2023-01-22 15:10:29.156625: step: 662/469, loss: 0.015873834490776062 2023-01-22 15:10:29.861390: step: 664/469, loss: 0.001754995551891625 2023-01-22 15:10:30.421025: step: 666/469, loss: 0.0003140690387226641 2023-01-22 15:10:31.061300: step: 668/469, loss: 0.00021501169248949736 2023-01-22 15:10:31.714408: step: 670/469, loss: 0.07802244275808334 2023-01-22 15:10:32.339140: step: 672/469, loss: 0.017197560518980026 2023-01-22 15:10:33.031475: step: 674/469, loss: 0.004624032415449619 2023-01-22 15:10:33.684125: step: 676/469, loss: 0.001015166286379099 2023-01-22 15:10:34.337445: step: 678/469, loss: 0.0028869023080915213 2023-01-22 15:10:34.883940: step: 680/469, loss: 0.0017006745329126716 2023-01-22 15:10:35.495003: step: 682/469, loss: 0.014343960210680962 2023-01-22 15:10:36.155464: step: 684/469, loss: 0.05828974395990372 2023-01-22 15:10:36.759711: step: 686/469, loss: 0.0001194150245282799 2023-01-22 15:10:37.349284: step: 688/469, loss: 4.410809924593195e-05 2023-01-22 15:10:37.959459: step: 690/469, loss: 0.0006212394800968468 2023-01-22 15:10:38.523256: step: 692/469, loss: 0.0012261979281902313 2023-01-22 15:10:39.152617: step: 694/469, loss: 0.005106035619974136 2023-01-22 15:10:39.798102: step: 696/469, loss: 0.024821607396006584 2023-01-22 15:10:40.428556: step: 698/469, loss: 0.2126006931066513 2023-01-22 15:10:41.062791: step: 700/469, loss: 0.0022250772453844547 2023-01-22 15:10:41.688757: step: 702/469, loss: 0.00044866380630992353 2023-01-22 15:10:42.375569: step: 704/469, loss: 0.027074845507740974 2023-01-22 15:10:43.038835: step: 706/469, loss: 0.04755845293402672 2023-01-22 15:10:43.645301: step: 708/469, loss: 0.018422473222017288 2023-01-22 15:10:44.284102: step: 710/469, loss: 0.00020046424469910562 2023-01-22 15:10:44.921234: step: 712/469, loss: 0.00556178530678153 2023-01-22 15:10:45.686314: step: 714/469, loss: 0.013901887461543083 2023-01-22 15:10:46.234746: step: 716/469, loss: 0.010391877964138985 2023-01-22 15:10:46.874814: step: 718/469, loss: 0.007928375154733658 2023-01-22 15:10:47.519472: step: 720/469, loss: 0.010893300175666809 2023-01-22 15:10:48.153209: step: 722/469, loss: 0.008237047120928764 2023-01-22 15:10:48.735191: step: 724/469, loss: 0.0003001186123583466 2023-01-22 15:10:49.400864: step: 726/469, loss: 0.4318578243255615 2023-01-22 15:10:49.943265: step: 728/469, loss: 0.00014769600238651037 2023-01-22 15:10:50.544329: step: 730/469, loss: 0.07802700996398926 2023-01-22 15:10:51.082133: step: 732/469, loss: 0.000807570933829993 2023-01-22 15:10:51.694965: step: 734/469, loss: 0.0030142159666866064 2023-01-22 15:10:52.239886: step: 736/469, loss: 0.0005414392799139023 2023-01-22 15:10:52.857652: step: 738/469, loss: 0.018303856253623962 2023-01-22 15:10:53.477142: step: 740/469, loss: 0.11187437921762466 2023-01-22 15:10:54.074535: step: 742/469, loss: 0.007149531971663237 2023-01-22 15:10:54.665550: step: 744/469, loss: 0.003846829291433096 2023-01-22 15:10:55.198679: step: 746/469, loss: 0.004802349954843521 2023-01-22 15:10:55.778268: step: 748/469, loss: 0.001068518846295774 2023-01-22 15:10:56.425762: step: 750/469, loss: 0.0032422940712422132 2023-01-22 15:10:57.070330: step: 752/469, loss: 0.0010948532726615667 2023-01-22 15:10:57.743702: step: 754/469, loss: 0.0011246935464441776 2023-01-22 15:10:58.392509: step: 756/469, loss: 0.0019724294543266296 2023-01-22 15:10:58.972554: step: 758/469, loss: 0.006466195918619633 2023-01-22 15:10:59.595716: step: 760/469, loss: 0.03887542709708214 2023-01-22 15:11:00.294692: step: 762/469, loss: 0.004948553163558245 2023-01-22 15:11:01.022384: step: 764/469, loss: 0.004445853643119335 2023-01-22 15:11:01.615774: step: 766/469, loss: 0.01145121268928051 2023-01-22 15:11:02.208835: step: 768/469, loss: 0.009653294458985329 2023-01-22 15:11:02.789957: step: 770/469, loss: 0.0009770940523594618 2023-01-22 15:11:03.427375: step: 772/469, loss: 0.03544906899333 2023-01-22 15:11:04.040426: step: 774/469, loss: 0.00742124579846859 2023-01-22 15:11:04.683251: step: 776/469, loss: 0.016965607181191444 2023-01-22 15:11:05.219643: step: 778/469, loss: 0.012413175776600838 2023-01-22 15:11:06.001447: step: 780/469, loss: 0.000973120448179543 2023-01-22 15:11:06.666675: step: 782/469, loss: 0.00030484615126624703 2023-01-22 15:11:07.322188: step: 784/469, loss: 0.015686696395277977 2023-01-22 15:11:07.891448: step: 786/469, loss: 0.0005647270008921623 2023-01-22 15:11:08.545145: step: 788/469, loss: 3.056981222471222e-05 2023-01-22 15:11:09.186395: step: 790/469, loss: 0.05142643675208092 2023-01-22 15:11:09.859116: step: 792/469, loss: 0.0009963170159608126 2023-01-22 15:11:10.445925: step: 794/469, loss: 0.01668229140341282 2023-01-22 15:11:11.168163: step: 796/469, loss: 0.6107051968574524 2023-01-22 15:11:11.797192: step: 798/469, loss: 0.02283414453268051 2023-01-22 15:11:12.413769: step: 800/469, loss: 0.0021810675971210003 2023-01-22 15:11:13.119060: step: 802/469, loss: 0.024435829371213913 2023-01-22 15:11:13.673064: step: 804/469, loss: 0.27842965722084045 2023-01-22 15:11:14.314649: step: 806/469, loss: 0.007593396585434675 2023-01-22 15:11:14.910347: step: 808/469, loss: 0.00018909874779637903 2023-01-22 15:11:15.537696: step: 810/469, loss: 0.0016665005823597312 2023-01-22 15:11:16.204953: step: 812/469, loss: 0.009211063385009766 2023-01-22 15:11:16.898535: step: 814/469, loss: 0.004168577026575804 2023-01-22 15:11:17.494754: step: 816/469, loss: 0.044412530958652496 2023-01-22 15:11:18.216795: step: 818/469, loss: 2.7484076023101807 2023-01-22 15:11:18.797319: step: 820/469, loss: 0.059764403849840164 2023-01-22 15:11:19.439959: step: 822/469, loss: 0.0003559777105692774 2023-01-22 15:11:20.120978: step: 824/469, loss: 0.0015374531503766775 2023-01-22 15:11:20.837252: step: 826/469, loss: 0.02299226075410843 2023-01-22 15:11:21.525949: step: 828/469, loss: 0.001887224498204887 2023-01-22 15:11:22.185158: step: 830/469, loss: 0.001413281774148345 2023-01-22 15:11:22.754705: step: 832/469, loss: 0.01570207253098488 2023-01-22 15:11:23.391912: step: 834/469, loss: 0.0008477639057673514 2023-01-22 15:11:23.988311: step: 836/469, loss: 0.018604883924126625 2023-01-22 15:11:24.597690: step: 838/469, loss: 0.02100706286728382 2023-01-22 15:11:25.235426: step: 840/469, loss: 0.0008020714740268886 2023-01-22 15:11:25.864016: step: 842/469, loss: 0.21190530061721802 2023-01-22 15:11:26.509229: step: 844/469, loss: 0.00027793191839009523 2023-01-22 15:11:27.064719: step: 846/469, loss: 0.0073113772086799145 2023-01-22 15:11:27.668299: step: 848/469, loss: 0.002676371717825532 2023-01-22 15:11:28.334829: step: 850/469, loss: 0.004364520311355591 2023-01-22 15:11:28.942021: step: 852/469, loss: 0.041826117783784866 2023-01-22 15:11:29.545724: step: 854/469, loss: 0.025660106912255287 2023-01-22 15:11:30.142178: step: 856/469, loss: 0.0005329117411747575 2023-01-22 15:11:30.802899: step: 858/469, loss: 0.01842450723052025 2023-01-22 15:11:31.435013: step: 860/469, loss: 0.11757441610097885 2023-01-22 15:11:32.004160: step: 862/469, loss: 0.04275765269994736 2023-01-22 15:11:32.601143: step: 864/469, loss: 0.010624093934893608 2023-01-22 15:11:33.190626: step: 866/469, loss: 0.01629812642931938 2023-01-22 15:11:33.780333: step: 868/469, loss: 0.19480657577514648 2023-01-22 15:11:34.377107: step: 870/469, loss: 0.19016137719154358 2023-01-22 15:11:35.038743: step: 872/469, loss: 0.0025323720183223486 2023-01-22 15:11:35.667530: step: 874/469, loss: 0.007500536274164915 2023-01-22 15:11:36.263642: step: 876/469, loss: 0.04277237877249718 2023-01-22 15:11:36.880088: step: 878/469, loss: 0.0017568464390933514 2023-01-22 15:11:37.506597: step: 880/469, loss: 0.002035855781286955 2023-01-22 15:11:38.114075: step: 882/469, loss: 0.0010857965098693967 2023-01-22 15:11:38.691161: step: 884/469, loss: 0.020509520545601845 2023-01-22 15:11:39.307710: step: 886/469, loss: 0.01151403971016407 2023-01-22 15:11:39.929685: step: 888/469, loss: 0.006275097373872995 2023-01-22 15:11:40.565150: step: 890/469, loss: 0.0022157912608236074 2023-01-22 15:11:41.120096: step: 892/469, loss: 0.003706187242642045 2023-01-22 15:11:41.709113: step: 894/469, loss: 0.01411911379545927 2023-01-22 15:11:42.317992: step: 896/469, loss: 0.01766563020646572 2023-01-22 15:11:42.936962: step: 898/469, loss: 0.00021050726354587823 2023-01-22 15:11:43.530349: step: 900/469, loss: 0.0180773064494133 2023-01-22 15:11:44.248224: step: 902/469, loss: 0.005115542560815811 2023-01-22 15:11:44.837439: step: 904/469, loss: 0.0007388739613816142 2023-01-22 15:11:45.461422: step: 906/469, loss: 0.00018669040582608432 2023-01-22 15:11:46.127871: step: 908/469, loss: 0.0010879577603191137 2023-01-22 15:11:46.793352: step: 910/469, loss: 0.014332884922623634 2023-01-22 15:11:47.417756: step: 912/469, loss: 0.0029448089189827442 2023-01-22 15:11:48.047075: step: 914/469, loss: 0.0010465006344020367 2023-01-22 15:11:48.647809: step: 916/469, loss: 0.004090795759111643 2023-01-22 15:11:49.213180: step: 918/469, loss: 0.11881612241268158 2023-01-22 15:11:49.891117: step: 920/469, loss: 0.35682061314582825 2023-01-22 15:11:50.530086: step: 922/469, loss: 0.056122783571481705 2023-01-22 15:11:51.174701: step: 924/469, loss: 0.0008888035663403571 2023-01-22 15:11:51.732103: step: 926/469, loss: 0.004167707171291113 2023-01-22 15:11:52.345735: step: 928/469, loss: 0.0038449056446552277 2023-01-22 15:11:52.980494: step: 930/469, loss: 0.0010881380876526237 2023-01-22 15:11:53.626426: step: 932/469, loss: 0.01623956300318241 2023-01-22 15:11:54.303386: step: 934/469, loss: 0.019334888085722923 2023-01-22 15:11:54.888516: step: 936/469, loss: 0.0003507888759486377 2023-01-22 15:11:55.455556: step: 938/469, loss: 0.0006681834347546101 ================================================== Loss: 0.049 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31226208641182135, 'r': 0.33537066586165254, 'f1': 0.32340410047409124}, 'combined': 0.2382977582440672, 'epoch': 38} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.311506463906179, 'r': 0.2721762790763046, 'f1': 0.2905162822562509}, 'combined': 0.15846342668522775, 'epoch': 38} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30481606486659213, 'r': 0.3291088062791099, 'f1': 0.31649697246184477}, 'combined': 0.2332082954982014, 'epoch': 38} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3136914655740459, 'r': 0.2783904131809923, 'f1': 0.2949885812960006}, 'combined': 0.16090286252509123, 'epoch': 38} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3069049451736618, 'r': 0.3267052642171238, 'f1': 0.3164957247103387}, 'combined': 0.2332073761023548, 'epoch': 38} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3116912918655, 'r': 0.2775333420720205, 'f1': 0.2936222314675}, 'combined': 0.16015758080045453, 'epoch': 38} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24666666666666665, 'r': 0.35238095238095235, 'f1': 0.2901960784313725}, 'combined': 0.19346405228758168, 'epoch': 38} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.275, 'r': 0.358695652173913, 'f1': 0.3113207547169812}, 'combined': 0.1556603773584906, 'epoch': 38} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.421875, 'r': 0.23275862068965517, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 38} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11} ****************************** Epoch: 39 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 15:14:43.916561: step: 2/469, loss: 0.0030119754374027252 2023-01-22 15:14:44.451882: step: 4/469, loss: 0.002477311994880438 2023-01-22 15:14:45.005467: step: 6/469, loss: 0.013294542208313942 2023-01-22 15:14:45.620288: step: 8/469, loss: 0.000540114298928529 2023-01-22 15:14:46.202300: step: 10/469, loss: 0.0003245100087951869 2023-01-22 15:14:46.826137: step: 12/469, loss: 0.0332648903131485 2023-01-22 15:14:47.480951: step: 14/469, loss: 0.0003781408304348588 2023-01-22 15:14:48.144231: step: 16/469, loss: 0.011412395164370537 2023-01-22 15:14:48.768586: step: 18/469, loss: 0.07069984078407288 2023-01-22 15:14:49.407637: step: 20/469, loss: 0.017332054674625397 2023-01-22 15:14:50.045970: step: 22/469, loss: 0.001704319496639073 2023-01-22 15:14:50.659890: step: 24/469, loss: 0.004425748251378536 2023-01-22 15:14:51.259760: step: 26/469, loss: 0.0450458750128746 2023-01-22 15:14:51.841751: step: 28/469, loss: 0.003014008281752467 2023-01-22 15:14:52.420130: step: 30/469, loss: 0.008909519761800766 2023-01-22 15:14:53.034276: step: 32/469, loss: 0.014741238206624985 2023-01-22 15:14:53.669130: step: 34/469, loss: 0.005667850840836763 2023-01-22 15:14:54.312939: step: 36/469, loss: 0.02003190480172634 2023-01-22 15:14:54.941889: step: 38/469, loss: 0.03284147009253502 2023-01-22 15:14:55.529800: step: 40/469, loss: 0.000386177736800164 2023-01-22 15:14:56.167630: step: 42/469, loss: 0.00019314800738357008 2023-01-22 15:14:56.814857: step: 44/469, loss: 0.0005215840646997094 2023-01-22 15:14:57.510897: step: 46/469, loss: 0.0014168774941936135 2023-01-22 15:14:58.147347: step: 48/469, loss: 0.0005476261139847338 2023-01-22 15:14:58.739359: step: 50/469, loss: 0.04120873659849167 2023-01-22 15:14:59.345294: step: 52/469, loss: 0.00021077453857287765 2023-01-22 15:14:59.978641: step: 54/469, loss: 0.06059630587697029 2023-01-22 15:15:00.715471: step: 56/469, loss: 0.0011616606498137116 2023-01-22 15:15:01.334510: step: 58/469, loss: 0.027168288826942444 2023-01-22 15:15:02.040222: step: 60/469, loss: 0.010715505108237267 2023-01-22 15:15:02.668798: step: 62/469, loss: 0.006376428063958883 2023-01-22 15:15:03.221257: step: 64/469, loss: 0.006124431733042002 2023-01-22 15:15:03.807721: step: 66/469, loss: 0.020459473133087158 2023-01-22 15:15:04.409797: step: 68/469, loss: 0.0039391596801579 2023-01-22 15:15:05.046231: step: 70/469, loss: 0.08821238577365875 2023-01-22 15:15:05.696221: step: 72/469, loss: 0.010975842364132404 2023-01-22 15:15:06.263163: step: 74/469, loss: 0.00035055732587352395 2023-01-22 15:15:06.944682: step: 76/469, loss: 0.006269215140491724 2023-01-22 15:15:07.592734: step: 78/469, loss: 0.09885379672050476 2023-01-22 15:15:08.242181: step: 80/469, loss: 0.0010260924464091659 2023-01-22 15:15:08.898179: step: 82/469, loss: 0.0006054006516933441 2023-01-22 15:15:09.439183: step: 84/469, loss: 0.02720540389418602 2023-01-22 15:15:10.008029: step: 86/469, loss: 0.014896119013428688 2023-01-22 15:15:10.585488: step: 88/469, loss: 0.011236756108701229 2023-01-22 15:15:11.224535: step: 90/469, loss: 0.0060309115797281265 2023-01-22 15:15:11.804955: step: 92/469, loss: 0.006958479061722755 2023-01-22 15:15:12.440668: step: 94/469, loss: 0.0016738763079047203 2023-01-22 15:15:13.050217: step: 96/469, loss: 0.05706104636192322 2023-01-22 15:15:13.627440: step: 98/469, loss: 0.0050797052681446075 2023-01-22 15:15:14.202116: step: 100/469, loss: 0.00014971641940064728 2023-01-22 15:15:14.822186: step: 102/469, loss: 0.028395086526870728 2023-01-22 15:15:15.596033: step: 104/469, loss: 0.009888138622045517 2023-01-22 15:15:16.209082: step: 106/469, loss: 0.00012206560495542362 2023-01-22 15:15:16.881017: step: 108/469, loss: 0.0033363548573106527 2023-01-22 15:15:17.535222: step: 110/469, loss: 0.03384239599108696 2023-01-22 15:15:18.111932: step: 112/469, loss: 0.0065839351154863834 2023-01-22 15:15:18.717512: step: 114/469, loss: 0.0011425679549574852 2023-01-22 15:15:19.325628: step: 116/469, loss: 0.003612453117966652 2023-01-22 15:15:19.944028: step: 118/469, loss: 0.49945658445358276 2023-01-22 15:15:20.531225: step: 120/469, loss: 0.0940798670053482 2023-01-22 15:15:21.163280: step: 122/469, loss: 0.0006863982416689396 2023-01-22 15:15:21.764499: step: 124/469, loss: 0.0466776005923748 2023-01-22 15:15:22.382902: step: 126/469, loss: 0.004530573263764381 2023-01-22 15:15:22.948546: step: 128/469, loss: 0.030442632734775543 2023-01-22 15:15:23.549899: step: 130/469, loss: 0.0047458927147090435 2023-01-22 15:15:24.201940: step: 132/469, loss: 0.010832609608769417 2023-01-22 15:15:24.840002: step: 134/469, loss: 0.015890762209892273 2023-01-22 15:15:25.565377: step: 136/469, loss: 0.005582538899034262 2023-01-22 15:15:26.177250: step: 138/469, loss: 0.0020973137579858303 2023-01-22 15:15:26.743857: step: 140/469, loss: 0.023380955681204796 2023-01-22 15:15:27.430364: step: 142/469, loss: 0.01180985663086176 2023-01-22 15:15:28.072890: step: 144/469, loss: 0.0029723222833126783 2023-01-22 15:15:28.737486: step: 146/469, loss: 0.017057470977306366 2023-01-22 15:15:29.348292: step: 148/469, loss: 0.0005902117700316012 2023-01-22 15:15:30.026628: step: 150/469, loss: 0.0032326197251677513 2023-01-22 15:15:30.640035: step: 152/469, loss: 0.07234001904726028 2023-01-22 15:15:31.237698: step: 154/469, loss: 0.002089154440909624 2023-01-22 15:15:31.863311: step: 156/469, loss: 0.0006694797775708139 2023-01-22 15:15:32.535325: step: 158/469, loss: 6.99316369718872e-05 2023-01-22 15:15:33.186416: step: 160/469, loss: 0.00043517854646779597 2023-01-22 15:15:33.816331: step: 162/469, loss: 0.03860674053430557 2023-01-22 15:15:34.435575: step: 164/469, loss: 0.0001615319197298959 2023-01-22 15:15:35.116485: step: 166/469, loss: 0.17923331260681152 2023-01-22 15:15:35.722665: step: 168/469, loss: 0.011622265912592411 2023-01-22 15:15:36.319280: step: 170/469, loss: 0.006952633615583181 2023-01-22 15:15:36.948003: step: 172/469, loss: 0.039613474160432816 2023-01-22 15:15:37.576407: step: 174/469, loss: 0.03972364589571953 2023-01-22 15:15:38.119355: step: 176/469, loss: 0.0008014689665287733 2023-01-22 15:15:38.770102: step: 178/469, loss: 0.03004515916109085 2023-01-22 15:15:39.387295: step: 180/469, loss: 0.013035529293119907 2023-01-22 15:15:40.086018: step: 182/469, loss: 0.011972688138484955 2023-01-22 15:15:40.882486: step: 184/469, loss: 0.009005329571664333 2023-01-22 15:15:41.493214: step: 186/469, loss: 0.0024666080716997385 2023-01-22 15:15:42.085675: step: 188/469, loss: 8.075930963968858e-05 2023-01-22 15:15:42.764225: step: 190/469, loss: 0.0006661887746304274 2023-01-22 15:15:43.460027: step: 192/469, loss: 0.023676201701164246 2023-01-22 15:15:44.169526: step: 194/469, loss: 0.004343714565038681 2023-01-22 15:15:44.832029: step: 196/469, loss: 0.006186078302562237 2023-01-22 15:15:45.484026: step: 198/469, loss: 0.00859355740249157 2023-01-22 15:15:46.152331: step: 200/469, loss: 0.0005010629538446665 2023-01-22 15:15:46.784571: step: 202/469, loss: 0.007475322112441063 2023-01-22 15:15:47.418509: step: 204/469, loss: 0.004866512026637793 2023-01-22 15:15:47.990708: step: 206/469, loss: 0.001928465673699975 2023-01-22 15:15:48.624986: step: 208/469, loss: 0.007554308976978064 2023-01-22 15:15:49.240280: step: 210/469, loss: 0.02795448526740074 2023-01-22 15:15:49.871710: step: 212/469, loss: 0.019564524292945862 2023-01-22 15:15:50.462807: step: 214/469, loss: 0.003723745234310627 2023-01-22 15:15:51.079339: step: 216/469, loss: 0.016949478536844254 2023-01-22 15:15:51.699342: step: 218/469, loss: 0.0032471558079123497 2023-01-22 15:15:52.296479: step: 220/469, loss: 0.010439512319862843 2023-01-22 15:15:52.963821: step: 222/469, loss: 0.0035251828376203775 2023-01-22 15:15:53.571139: step: 224/469, loss: 0.011053825728595257 2023-01-22 15:15:54.243964: step: 226/469, loss: 0.0009576024021953344 2023-01-22 15:15:54.889334: step: 228/469, loss: 0.056902024894952774 2023-01-22 15:15:55.575204: step: 230/469, loss: 0.002920982427895069 2023-01-22 15:15:56.159263: step: 232/469, loss: 0.6969336271286011 2023-01-22 15:15:56.822647: step: 234/469, loss: 0.008771332912147045 2023-01-22 15:15:57.470884: step: 236/469, loss: 0.0016970186261460185 2023-01-22 15:15:58.056526: step: 238/469, loss: 0.0005713562131859362 2023-01-22 15:15:58.630045: step: 240/469, loss: 0.005612066015601158 2023-01-22 15:15:59.275224: step: 242/469, loss: 0.001563920290209353 2023-01-22 15:15:59.909145: step: 244/469, loss: 0.02681146189570427 2023-01-22 15:16:00.535489: step: 246/469, loss: 3.165794987580739e-05 2023-01-22 15:16:01.095709: step: 248/469, loss: 0.0004317841667216271 2023-01-22 15:16:01.719585: step: 250/469, loss: 0.00024566068896092474 2023-01-22 15:16:02.385301: step: 252/469, loss: 0.051570042967796326 2023-01-22 15:16:02.964190: step: 254/469, loss: 2.16726530197775e-05 2023-01-22 15:16:03.611774: step: 256/469, loss: 0.00011379853822290897 2023-01-22 15:16:04.245862: step: 258/469, loss: 0.0030371742323040962 2023-01-22 15:16:04.878037: step: 260/469, loss: 0.0005059054819867015 2023-01-22 15:16:05.549364: step: 262/469, loss: 0.0034952748101204634 2023-01-22 15:16:06.135765: step: 264/469, loss: 0.0011962997959926724 2023-01-22 15:16:06.722659: step: 266/469, loss: 9.325591963715851e-05 2023-01-22 15:16:07.299553: step: 268/469, loss: 0.0006678030476905406 2023-01-22 15:16:07.979913: step: 270/469, loss: 0.0006335260113701224 2023-01-22 15:16:08.535622: step: 272/469, loss: 0.002706658560782671 2023-01-22 15:16:09.128213: step: 274/469, loss: 0.008834761567413807 2023-01-22 15:16:09.741035: step: 276/469, loss: 0.003155779791995883 2023-01-22 15:16:10.381350: step: 278/469, loss: 0.002005981747061014 2023-01-22 15:16:10.960139: step: 280/469, loss: 0.9146357774734497 2023-01-22 15:16:11.652803: step: 282/469, loss: 0.03377266228199005 2023-01-22 15:16:12.217523: step: 284/469, loss: 0.008990660309791565 2023-01-22 15:16:12.877107: step: 286/469, loss: 0.02674446813762188 2023-01-22 15:16:13.519936: step: 288/469, loss: 0.00018079759320244193 2023-01-22 15:16:14.125084: step: 290/469, loss: 0.0032577719539403915 2023-01-22 15:16:14.774704: step: 292/469, loss: 0.00022912300482857972 2023-01-22 15:16:15.441068: step: 294/469, loss: 0.04188700392842293 2023-01-22 15:16:15.988555: step: 296/469, loss: 0.0006541315233334899 2023-01-22 15:16:16.678541: step: 298/469, loss: 0.00455709733068943 2023-01-22 15:16:17.377304: step: 300/469, loss: 0.04880499839782715 2023-01-22 15:16:17.979609: step: 302/469, loss: 0.016444897279143333 2023-01-22 15:16:18.620100: step: 304/469, loss: 0.021372556686401367 2023-01-22 15:16:19.252916: step: 306/469, loss: 0.00823103729635477 2023-01-22 15:16:19.911100: step: 308/469, loss: 0.002552240388467908 2023-01-22 15:16:20.567818: step: 310/469, loss: 0.0269322469830513 2023-01-22 15:16:21.143466: step: 312/469, loss: 0.002857362385839224 2023-01-22 15:16:21.830966: step: 314/469, loss: 0.04465809836983681 2023-01-22 15:16:22.464907: step: 316/469, loss: 0.0011126541066914797 2023-01-22 15:16:23.099994: step: 318/469, loss: 0.003672410501167178 2023-01-22 15:16:23.785420: step: 320/469, loss: 0.006917336490005255 2023-01-22 15:16:24.391937: step: 322/469, loss: 0.0008404464460909367 2023-01-22 15:16:24.967233: step: 324/469, loss: 0.025422999635338783 2023-01-22 15:16:25.612945: step: 326/469, loss: 0.0834602490067482 2023-01-22 15:16:26.220732: step: 328/469, loss: 0.0010484386002644897 2023-01-22 15:16:26.989697: step: 330/469, loss: 0.20623639225959778 2023-01-22 15:16:27.660906: step: 332/469, loss: 0.07975319772958755 2023-01-22 15:16:28.295437: step: 334/469, loss: 0.00032162253046408296 2023-01-22 15:16:28.831709: step: 336/469, loss: 0.03202521428465843 2023-01-22 15:16:29.452303: step: 338/469, loss: 0.018832555040717125 2023-01-22 15:16:30.041512: step: 340/469, loss: 0.0001598072558408603 2023-01-22 15:16:30.663106: step: 342/469, loss: 0.03233061358332634 2023-01-22 15:16:31.298044: step: 344/469, loss: 0.003593015717342496 2023-01-22 15:16:31.915092: step: 346/469, loss: 0.030679835006594658 2023-01-22 15:16:32.548998: step: 348/469, loss: 0.003391830250620842 2023-01-22 15:16:33.228616: step: 350/469, loss: 0.018660377711057663 2023-01-22 15:16:33.893700: step: 352/469, loss: 0.0004935087054036558 2023-01-22 15:16:34.480202: step: 354/469, loss: 5.612889435724355e-05 2023-01-22 15:16:35.058024: step: 356/469, loss: 7.583058322779834e-05 2023-01-22 15:16:35.613050: step: 358/469, loss: 0.0029486180283129215 2023-01-22 15:16:36.260859: step: 360/469, loss: 0.0005886894068680704 2023-01-22 15:16:36.897099: step: 362/469, loss: 0.016660086810588837 2023-01-22 15:16:37.521023: step: 364/469, loss: 0.0030485393945127726 2023-01-22 15:16:38.155720: step: 366/469, loss: 2.1959168910980225 2023-01-22 15:16:38.819942: step: 368/469, loss: 0.006272064987570047 2023-01-22 15:16:39.429904: step: 370/469, loss: 0.0006402720464393497 2023-01-22 15:16:39.989601: step: 372/469, loss: 0.0006316850194707513 2023-01-22 15:16:40.689357: step: 374/469, loss: 0.007134428713470697 2023-01-22 15:16:41.326081: step: 376/469, loss: 0.0076501937583088875 2023-01-22 15:16:41.951172: step: 378/469, loss: 0.003553960006684065 2023-01-22 15:16:42.562923: step: 380/469, loss: 0.02285035513341427 2023-01-22 15:16:43.252557: step: 382/469, loss: 0.003922155126929283 2023-01-22 15:16:43.895717: step: 384/469, loss: 0.7899556756019592 2023-01-22 15:16:44.487356: step: 386/469, loss: 0.0021147632505744696 2023-01-22 15:16:45.124862: step: 388/469, loss: 0.00772074144333601 2023-01-22 15:16:45.764313: step: 390/469, loss: 0.003524455474689603 2023-01-22 15:16:46.385873: step: 392/469, loss: 0.12726449966430664 2023-01-22 15:16:46.987275: step: 394/469, loss: 0.0011023666011169553 2023-01-22 15:16:47.611166: step: 396/469, loss: 0.043037544935941696 2023-01-22 15:16:48.227010: step: 398/469, loss: 0.029298389330506325 2023-01-22 15:16:48.807234: step: 400/469, loss: 0.002133794827386737 2023-01-22 15:16:49.440856: step: 402/469, loss: 0.010272110812366009 2023-01-22 15:16:50.043478: step: 404/469, loss: 0.00018153723794966936 2023-01-22 15:16:50.620668: step: 406/469, loss: 0.009023654274642467 2023-01-22 15:16:51.163816: step: 408/469, loss: 0.00046717398799955845 2023-01-22 15:16:51.783654: step: 410/469, loss: 0.017269618809223175 2023-01-22 15:16:52.427124: step: 412/469, loss: 0.004932466894388199 2023-01-22 15:16:53.058637: step: 414/469, loss: 0.3704630434513092 2023-01-22 15:16:53.674471: step: 416/469, loss: 0.06960838288068771 2023-01-22 15:16:54.378585: step: 418/469, loss: 0.03317670524120331 2023-01-22 15:16:54.990187: step: 420/469, loss: 0.014792066998779774 2023-01-22 15:16:55.566062: step: 422/469, loss: 0.012164157815277576 2023-01-22 15:16:56.147974: step: 424/469, loss: 0.0005053103668615222 2023-01-22 15:16:56.793276: step: 426/469, loss: 4.545390765997581e-05 2023-01-22 15:16:57.446219: step: 428/469, loss: 0.39047083258628845 2023-01-22 15:16:58.051746: step: 430/469, loss: 0.1179732158780098 2023-01-22 15:16:58.621129: step: 432/469, loss: 0.002892301417887211 2023-01-22 15:16:59.321160: step: 434/469, loss: 0.011080923490226269 2023-01-22 15:16:59.943318: step: 436/469, loss: 0.004454146604984999 2023-01-22 15:17:00.500653: step: 438/469, loss: 0.00016548072744626552 2023-01-22 15:17:01.136778: step: 440/469, loss: 0.0068971929140388966 2023-01-22 15:17:01.770358: step: 442/469, loss: 0.16627870500087738 2023-01-22 15:17:02.354067: step: 444/469, loss: 0.0004422983038239181 2023-01-22 15:17:02.954240: step: 446/469, loss: 0.0016291597858071327 2023-01-22 15:17:03.575910: step: 448/469, loss: 0.008061086758971214 2023-01-22 15:17:04.136724: step: 450/469, loss: 0.0023519310634583235 2023-01-22 15:17:04.787237: step: 452/469, loss: 0.883834183216095 2023-01-22 15:17:05.417029: step: 454/469, loss: 0.01137097179889679 2023-01-22 15:17:06.064545: step: 456/469, loss: 0.06367877125740051 2023-01-22 15:17:06.671145: step: 458/469, loss: 0.002710080938413739 2023-01-22 15:17:07.297616: step: 460/469, loss: 0.040760233998298645 2023-01-22 15:17:07.835115: step: 462/469, loss: 0.0007366880308836699 2023-01-22 15:17:08.446438: step: 464/469, loss: 0.005463487934321165 2023-01-22 15:17:09.040260: step: 466/469, loss: 0.05713071674108505 2023-01-22 15:17:09.643069: step: 468/469, loss: 0.02852305769920349 2023-01-22 15:17:10.301468: step: 470/469, loss: 0.021332694217562675 2023-01-22 15:17:10.905023: step: 472/469, loss: 0.06143294274806976 2023-01-22 15:17:11.550436: step: 474/469, loss: 0.003490180941298604 2023-01-22 15:17:12.137444: step: 476/469, loss: 0.007113657426089048 2023-01-22 15:17:12.748770: step: 478/469, loss: 0.029517650604248047 2023-01-22 15:17:13.485945: step: 480/469, loss: 0.19607967138290405 2023-01-22 15:17:14.084674: step: 482/469, loss: 0.004462911281734705 2023-01-22 15:17:14.663353: step: 484/469, loss: 0.0014543115394189954 2023-01-22 15:17:15.284059: step: 486/469, loss: 5.302381396177225e-05 2023-01-22 15:17:15.931824: step: 488/469, loss: 0.04611814022064209 2023-01-22 15:17:16.496803: step: 490/469, loss: 0.0006607295363210142 2023-01-22 15:17:17.186385: step: 492/469, loss: 0.017223069444298744 2023-01-22 15:17:17.783359: step: 494/469, loss: 0.008610890246927738 2023-01-22 15:17:18.473871: step: 496/469, loss: 0.002424276201054454 2023-01-22 15:17:19.074408: step: 498/469, loss: 0.0024730777367949486 2023-01-22 15:17:19.740457: step: 500/469, loss: 0.0046568987891077995 2023-01-22 15:17:20.357758: step: 502/469, loss: 0.025810427963733673 2023-01-22 15:17:21.001864: step: 504/469, loss: 3.631127765402198e-05 2023-01-22 15:17:21.708698: step: 506/469, loss: 0.001211229944601655 2023-01-22 15:17:22.345610: step: 508/469, loss: 0.0042308964766561985 2023-01-22 15:17:22.961527: step: 510/469, loss: 2.4700466383364983e-05 2023-01-22 15:17:23.603545: step: 512/469, loss: 0.007518894970417023 2023-01-22 15:17:24.194662: step: 514/469, loss: 0.001220062025822699 2023-01-22 15:17:24.856010: step: 516/469, loss: 0.16600191593170166 2023-01-22 15:17:25.544603: step: 518/469, loss: 0.001965184463188052 2023-01-22 15:17:26.105835: step: 520/469, loss: 0.09088757634162903 2023-01-22 15:17:26.713983: step: 522/469, loss: 0.038489799946546555 2023-01-22 15:17:27.311786: step: 524/469, loss: 0.0004704766906797886 2023-01-22 15:17:28.003157: step: 526/469, loss: 0.019209854304790497 2023-01-22 15:17:28.546619: step: 528/469, loss: 0.0010908718686550856 2023-01-22 15:17:29.176648: step: 530/469, loss: 0.02724377065896988 2023-01-22 15:17:29.843286: step: 532/469, loss: 0.012407144531607628 2023-01-22 15:17:30.509730: step: 534/469, loss: 0.010673681274056435 2023-01-22 15:17:31.055262: step: 536/469, loss: 0.0015231132274493575 2023-01-22 15:17:31.706255: step: 538/469, loss: 0.6964307427406311 2023-01-22 15:17:32.265771: step: 540/469, loss: 0.0008318874170072377 2023-01-22 15:17:32.873650: step: 542/469, loss: 0.03426213562488556 2023-01-22 15:17:33.490746: step: 544/469, loss: 0.005462720058858395 2023-01-22 15:17:34.101398: step: 546/469, loss: 0.0018660806817933917 2023-01-22 15:17:34.753677: step: 548/469, loss: 0.015423593111336231 2023-01-22 15:17:35.374435: step: 550/469, loss: 0.007194102741777897 2023-01-22 15:17:35.974879: step: 552/469, loss: 0.00035338601446710527 2023-01-22 15:17:36.623997: step: 554/469, loss: 0.002550520235672593 2023-01-22 15:17:37.213650: step: 556/469, loss: 0.07456482201814651 2023-01-22 15:17:37.853660: step: 558/469, loss: 0.01086405199021101 2023-01-22 15:17:38.527682: step: 560/469, loss: 0.0004567425057757646 2023-01-22 15:17:39.151883: step: 562/469, loss: 0.06614360958337784 2023-01-22 15:17:39.794233: step: 564/469, loss: 0.00047429444384761155 2023-01-22 15:17:40.402562: step: 566/469, loss: 0.024727847427129745 2023-01-22 15:17:40.999766: step: 568/469, loss: 0.0026039599906653166 2023-01-22 15:17:41.569224: step: 570/469, loss: 0.0047595687210559845 2023-01-22 15:17:42.202560: step: 572/469, loss: 0.0011343127116560936 2023-01-22 15:17:42.899012: step: 574/469, loss: 0.02800801768898964 2023-01-22 15:17:43.544871: step: 576/469, loss: 0.001546868123114109 2023-01-22 15:17:44.172507: step: 578/469, loss: 0.0015743788098916411 2023-01-22 15:17:44.814981: step: 580/469, loss: 0.0419471301138401 2023-01-22 15:17:45.366599: step: 582/469, loss: 0.014405900612473488 2023-01-22 15:17:46.027641: step: 584/469, loss: 0.00592692568898201 2023-01-22 15:17:46.653002: step: 586/469, loss: 0.004139040131121874 2023-01-22 15:17:47.188300: step: 588/469, loss: 0.02994312159717083 2023-01-22 15:17:47.791783: step: 590/469, loss: 0.0006825344171375036 2023-01-22 15:17:48.382886: step: 592/469, loss: 0.017383169382810593 2023-01-22 15:17:49.119841: step: 594/469, loss: 0.2890271246433258 2023-01-22 15:17:49.845467: step: 596/469, loss: 3.8460842915810645e-05 2023-01-22 15:17:50.466510: step: 598/469, loss: 0.02232646755874157 2023-01-22 15:17:51.079170: step: 600/469, loss: 0.00019514025188982487 2023-01-22 15:17:51.724579: step: 602/469, loss: 0.0007909578853286803 2023-01-22 15:17:52.270261: step: 604/469, loss: 0.014241098426282406 2023-01-22 15:17:52.856889: step: 606/469, loss: 0.001265901722945273 2023-01-22 15:17:53.505042: step: 608/469, loss: 0.004157170653343201 2023-01-22 15:17:54.191639: step: 610/469, loss: 0.02941458858549595 2023-01-22 15:17:54.835203: step: 612/469, loss: 0.22352951765060425 2023-01-22 15:17:55.441966: step: 614/469, loss: 0.01254553347826004 2023-01-22 15:17:56.026490: step: 616/469, loss: 0.0010224079014733434 2023-01-22 15:17:56.631997: step: 618/469, loss: 0.005898221395909786 2023-01-22 15:17:57.242976: step: 620/469, loss: 0.00041182778659276664 2023-01-22 15:17:57.843811: step: 622/469, loss: 0.01132738683372736 2023-01-22 15:17:58.437085: step: 624/469, loss: 0.02262009121477604 2023-01-22 15:17:59.068998: step: 626/469, loss: 0.002695016795769334 2023-01-22 15:17:59.758447: step: 628/469, loss: 0.04061281308531761 2023-01-22 15:18:00.374867: step: 630/469, loss: 0.014679528772830963 2023-01-22 15:18:00.968734: step: 632/469, loss: 0.0009139236062765121 2023-01-22 15:18:01.592657: step: 634/469, loss: 0.03422641009092331 2023-01-22 15:18:02.216072: step: 636/469, loss: 0.00804446917027235 2023-01-22 15:18:02.845532: step: 638/469, loss: 0.006519542541354895 2023-01-22 15:18:03.463631: step: 640/469, loss: 0.026553060859441757 2023-01-22 15:18:04.041789: step: 642/469, loss: 0.019603505730628967 2023-01-22 15:18:04.800379: step: 644/469, loss: 0.038403891026973724 2023-01-22 15:18:05.362430: step: 646/469, loss: 0.0019963104277849197 2023-01-22 15:18:06.000241: step: 648/469, loss: 0.006005199626088142 2023-01-22 15:18:06.638076: step: 650/469, loss: 0.0007043545483611524 2023-01-22 15:18:07.302463: step: 652/469, loss: 0.001996937906369567 2023-01-22 15:18:07.886063: step: 654/469, loss: 0.00488081993535161 2023-01-22 15:18:08.532138: step: 656/469, loss: 0.0031579190399497747 2023-01-22 15:18:09.120947: step: 658/469, loss: 4.6396431571338326e-05 2023-01-22 15:18:09.858040: step: 660/469, loss: 0.022864099591970444 2023-01-22 15:18:10.443605: step: 662/469, loss: 0.00975657906383276 2023-01-22 15:18:11.024946: step: 664/469, loss: 5.5786069424357265e-05 2023-01-22 15:18:11.690120: step: 666/469, loss: 0.0014591752551496029 2023-01-22 15:18:12.284154: step: 668/469, loss: 0.029773162677884102 2023-01-22 15:18:12.899163: step: 670/469, loss: 0.0008616966078989208 2023-01-22 15:18:13.504278: step: 672/469, loss: 0.00490968581289053 2023-01-22 15:18:14.123142: step: 674/469, loss: 0.00019307725597172976 2023-01-22 15:18:14.727957: step: 676/469, loss: 0.0011322894133627415 2023-01-22 15:18:15.356192: step: 678/469, loss: 0.00022908096434548497 2023-01-22 15:18:15.997061: step: 680/469, loss: 0.000859872146975249 2023-01-22 15:18:16.583271: step: 682/469, loss: 0.011646516621112823 2023-01-22 15:18:17.170656: step: 684/469, loss: 7.980142981978133e-05 2023-01-22 15:18:17.726105: step: 686/469, loss: 0.0006243081297725439 2023-01-22 15:18:18.313300: step: 688/469, loss: 0.002105759922415018 2023-01-22 15:18:18.934654: step: 690/469, loss: 0.5540243983268738 2023-01-22 15:18:19.515146: step: 692/469, loss: 0.0010243217693641782 2023-01-22 15:18:20.153827: step: 694/469, loss: 0.03987590968608856 2023-01-22 15:18:20.799359: step: 696/469, loss: 0.03170924633741379 2023-01-22 15:18:21.446077: step: 698/469, loss: 4.712785448646173e-05 2023-01-22 15:18:22.040670: step: 700/469, loss: 0.0008510553743690252 2023-01-22 15:18:22.738542: step: 702/469, loss: 0.00013377561117522418 2023-01-22 15:18:23.332654: step: 704/469, loss: 0.0004218009707983583 2023-01-22 15:18:23.927999: step: 706/469, loss: 0.012497241608798504 2023-01-22 15:18:24.484363: step: 708/469, loss: 0.00014099344843998551 2023-01-22 15:18:25.080461: step: 710/469, loss: 0.001278732088394463 2023-01-22 15:18:25.662476: step: 712/469, loss: 0.002304119523614645 2023-01-22 15:18:26.261782: step: 714/469, loss: 0.0002952862996608019 2023-01-22 15:18:26.940966: step: 716/469, loss: 0.0002496714296285063 2023-01-22 15:18:27.547957: step: 718/469, loss: 0.0008478966192342341 2023-01-22 15:18:28.217674: step: 720/469, loss: 0.002209728816524148 2023-01-22 15:18:28.868104: step: 722/469, loss: 0.009437385946512222 2023-01-22 15:18:29.484770: step: 724/469, loss: 0.0032731054816395044 2023-01-22 15:18:30.097352: step: 726/469, loss: 0.005128795281052589 2023-01-22 15:18:30.765676: step: 728/469, loss: 0.0031885921489447355 2023-01-22 15:18:31.357003: step: 730/469, loss: 0.07506471127271652 2023-01-22 15:18:31.926358: step: 732/469, loss: 0.0030589159578084946 2023-01-22 15:18:32.575334: step: 734/469, loss: 0.0003622500516939908 2023-01-22 15:18:33.117036: step: 736/469, loss: 0.00045982073061168194 2023-01-22 15:18:33.789550: step: 738/469, loss: 0.0006731986650265753 2023-01-22 15:18:34.411283: step: 740/469, loss: 0.0002361301303608343 2023-01-22 15:18:35.090984: step: 742/469, loss: 6.249495345400646e-05 2023-01-22 15:18:35.638341: step: 744/469, loss: 0.0003132132114842534 2023-01-22 15:18:36.263017: step: 746/469, loss: 0.1405199021100998 2023-01-22 15:18:36.828971: step: 748/469, loss: 6.839424167992547e-05 2023-01-22 15:18:37.383685: step: 750/469, loss: 0.002533519407734275 2023-01-22 15:18:38.044261: step: 752/469, loss: 0.001349869417026639 2023-01-22 15:18:38.667096: step: 754/469, loss: 0.01379681471735239 2023-01-22 15:18:39.260287: step: 756/469, loss: 0.00016496462922077626 2023-01-22 15:18:39.932139: step: 758/469, loss: 0.001254603615961969 2023-01-22 15:18:40.609238: step: 760/469, loss: 0.0010512936860322952 2023-01-22 15:18:41.278322: step: 762/469, loss: 0.024918323382735252 2023-01-22 15:18:42.012384: step: 764/469, loss: 0.06979934871196747 2023-01-22 15:18:42.633412: step: 766/469, loss: 0.003951192833483219 2023-01-22 15:18:43.328482: step: 768/469, loss: 0.0005764127126894891 2023-01-22 15:18:44.005365: step: 770/469, loss: 0.10904546082019806 2023-01-22 15:18:44.677252: step: 772/469, loss: 0.0023626761976629496 2023-01-22 15:18:45.313097: step: 774/469, loss: 0.0029543868731707335 2023-01-22 15:18:45.946606: step: 776/469, loss: 0.06157940998673439 2023-01-22 15:18:46.531480: step: 778/469, loss: 0.0031244659330695868 2023-01-22 15:18:47.161873: step: 780/469, loss: 0.00601613987237215 2023-01-22 15:18:47.772139: step: 782/469, loss: 0.026845060288906097 2023-01-22 15:18:48.415651: step: 784/469, loss: 0.0007251825300045311 2023-01-22 15:18:49.115743: step: 786/469, loss: 0.02268672175705433 2023-01-22 15:18:49.760715: step: 788/469, loss: 0.0004044454835820943 2023-01-22 15:18:50.369163: step: 790/469, loss: 0.008996649645268917 2023-01-22 15:18:50.962138: step: 792/469, loss: 0.00010959096107399091 2023-01-22 15:18:51.570948: step: 794/469, loss: 0.0003383233561180532 2023-01-22 15:18:52.201869: step: 796/469, loss: 0.00963415578007698 2023-01-22 15:18:52.765402: step: 798/469, loss: 0.005896236281841993 2023-01-22 15:18:53.361285: step: 800/469, loss: 0.0011253239354118705 2023-01-22 15:18:53.985370: step: 802/469, loss: 0.0005202133324928582 2023-01-22 15:18:54.565115: step: 804/469, loss: 0.03894947096705437 2023-01-22 15:18:55.151597: step: 806/469, loss: 0.0006364258006215096 2023-01-22 15:18:55.702131: step: 808/469, loss: 0.0010266718454658985 2023-01-22 15:18:56.306429: step: 810/469, loss: 0.0001264224702026695 2023-01-22 15:18:56.891386: step: 812/469, loss: 0.008374189957976341 2023-01-22 15:18:57.491288: step: 814/469, loss: 0.0027772036846727133 2023-01-22 15:18:58.039467: step: 816/469, loss: 0.001290840096771717 2023-01-22 15:18:58.734964: step: 818/469, loss: 0.0027226342353969812 2023-01-22 15:18:59.318630: step: 820/469, loss: 0.003282391233369708 2023-01-22 15:18:59.882769: step: 822/469, loss: 0.005161920562386513 2023-01-22 15:19:00.588129: step: 824/469, loss: 0.052473343908786774 2023-01-22 15:19:01.194518: step: 826/469, loss: 0.005467752460390329 2023-01-22 15:19:01.815049: step: 828/469, loss: 0.00017220845620613545 2023-01-22 15:19:02.458308: step: 830/469, loss: 0.021755851805210114 2023-01-22 15:19:03.105589: step: 832/469, loss: 0.021843472495675087 2023-01-22 15:19:03.808953: step: 834/469, loss: 0.06006266549229622 2023-01-22 15:19:04.426760: step: 836/469, loss: 0.09590421617031097 2023-01-22 15:19:05.134841: step: 838/469, loss: 0.013243522495031357 2023-01-22 15:19:05.774003: step: 840/469, loss: 0.020998096093535423 2023-01-22 15:19:06.396907: step: 842/469, loss: 0.01351924054324627 2023-01-22 15:19:07.071106: step: 844/469, loss: 0.0011390522122383118 2023-01-22 15:19:07.693072: step: 846/469, loss: 0.0002918808604590595 2023-01-22 15:19:08.265521: step: 848/469, loss: 0.005110482685267925 2023-01-22 15:19:08.835562: step: 850/469, loss: 0.002068703528493643 2023-01-22 15:19:09.462616: step: 852/469, loss: 0.0002787143748719245 2023-01-22 15:19:10.048892: step: 854/469, loss: 0.030446400865912437 2023-01-22 15:19:10.636816: step: 856/469, loss: 0.02337447553873062 2023-01-22 15:19:11.244902: step: 858/469, loss: 0.004919606260955334 2023-01-22 15:19:11.946812: step: 860/469, loss: 0.11173321306705475 2023-01-22 15:19:12.579709: step: 862/469, loss: 0.0001346757635474205 2023-01-22 15:19:13.224903: step: 864/469, loss: 0.061282794922590256 2023-01-22 15:19:13.853075: step: 866/469, loss: 0.00018122194160241634 2023-01-22 15:19:14.429783: step: 868/469, loss: 0.003808876732364297 2023-01-22 15:19:14.980843: step: 870/469, loss: 0.003564674872905016 2023-01-22 15:19:15.605608: step: 872/469, loss: 0.0012337930966168642 2023-01-22 15:19:16.193129: step: 874/469, loss: 0.003552865469828248 2023-01-22 15:19:16.772944: step: 876/469, loss: 0.009105992503464222 2023-01-22 15:19:17.393875: step: 878/469, loss: 0.024139082059264183 2023-01-22 15:19:18.025214: step: 880/469, loss: 0.0015041761798784137 2023-01-22 15:19:18.588619: step: 882/469, loss: 0.0035050634760409594 2023-01-22 15:19:19.204833: step: 884/469, loss: 0.0010745482286438346 2023-01-22 15:19:19.830268: step: 886/469, loss: 4.355171768111177e-05 2023-01-22 15:19:20.478558: step: 888/469, loss: 0.38475853204727173 2023-01-22 15:19:21.107420: step: 890/469, loss: 0.00010159765224670991 2023-01-22 15:19:21.767972: step: 892/469, loss: 0.0024965463671833277 2023-01-22 15:19:22.383924: step: 894/469, loss: 0.002298385603353381 2023-01-22 15:19:23.030675: step: 896/469, loss: 0.004638176877051592 2023-01-22 15:19:23.585015: step: 898/469, loss: 0.0022129861172288656 2023-01-22 15:19:24.223886: step: 900/469, loss: 0.01527074258774519 2023-01-22 15:19:24.913354: step: 902/469, loss: 0.45652323961257935 2023-01-22 15:19:25.597290: step: 904/469, loss: 0.004197975154966116 2023-01-22 15:19:26.226857: step: 906/469, loss: 0.007649663835763931 2023-01-22 15:19:26.830747: step: 908/469, loss: 0.00026691131643019617 2023-01-22 15:19:27.455863: step: 910/469, loss: 0.055212393403053284 2023-01-22 15:19:28.067363: step: 912/469, loss: 0.007090070750564337 2023-01-22 15:19:28.693166: step: 914/469, loss: 0.28060656785964966 2023-01-22 15:19:29.301954: step: 916/469, loss: 0.022623805329203606 2023-01-22 15:19:29.990045: step: 918/469, loss: 0.007064515259116888 2023-01-22 15:19:30.634776: step: 920/469, loss: 0.010560913942754269 2023-01-22 15:19:31.295222: step: 922/469, loss: 0.005171039141714573 2023-01-22 15:19:31.934992: step: 924/469, loss: 9.960651368601248e-05 2023-01-22 15:19:32.614803: step: 926/469, loss: 0.0042560952715575695 2023-01-22 15:19:33.171396: step: 928/469, loss: 0.0013860042672604322 2023-01-22 15:19:33.801807: step: 930/469, loss: 0.03449440747499466 2023-01-22 15:19:34.423882: step: 932/469, loss: 0.0010424494976177812 2023-01-22 15:19:35.034038: step: 934/469, loss: 0.00033884294680319726 2023-01-22 15:19:35.622218: step: 936/469, loss: 0.007781301159411669 2023-01-22 15:19:36.204110: step: 938/469, loss: 0.004696676507592201 ================================================== Loss: 0.036 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31905507167483216, 'r': 0.3033142142487493, 'f1': 0.3109855854262469}, 'combined': 0.22914727347197136, 'epoch': 39} Test Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3254036890919158, 'r': 0.2560358395416721, 'f1': 0.28658184600004877}, 'combined': 0.15631737054548114, 'epoch': 39} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31984411136808394, 'r': 0.3101334742107987, 'f1': 0.31491395165528113}, 'combined': 0.23204185911441766, 'epoch': 39} Test Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3318611079466343, 'r': 0.2595985794093068, 'f1': 0.2913154489675281}, 'combined': 0.15889933580046986, 'epoch': 39} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3061626438806671, 'r': 0.29977215226266457, 'f1': 0.3029336994102095}, 'combined': 0.2232143048285754, 'epoch': 39} Test Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.31818342418736123, 'r': 0.25879694794379154, 'f1': 0.28543396983104347}, 'combined': 0.15569125627147823, 'epoch': 39} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27416666666666667, 'r': 0.3916666666666666, 'f1': 0.3225490196078431}, 'combined': 0.21503267973856205, 'epoch': 39} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2578125, 'r': 0.358695652173913, 'f1': 0.29999999999999993}, 'combined': 0.14999999999999997, 'epoch': 39} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.484375, 'r': 0.2672413793103448, 'f1': 0.34444444444444444}, 'combined': 0.22962962962962963, 'epoch': 39} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31888917004048584, 'r': 0.2989207779886148, 'f1': 0.30858227228207646}, 'combined': 0.22737641115521423, 'epoch': 5} Test for Chinese: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.30513050261426883, 'r': 0.23645520193438765, 'f1': 0.26643869661266567}, 'combined': 0.1453301981523631, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31746031746031744, 'r': 0.38095238095238093, 'f1': 0.3463203463203463}, 'combined': 0.23088023088023085, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2941166102650874, 'r': 0.32983475648323846, 'f1': 0.31095333929636254}, 'combined': 0.2291235131657408, 'epoch': 19} Test for Korean: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3106598550207057, 'r': 0.26660470632152056, 'f1': 0.2869512004031728}, 'combined': 0.1565188365835488, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31896551724137934, 'r': 0.40217391304347827, 'f1': 0.3557692307692308}, 'combined': 0.1778846153846154, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30242135144673826, 'r': 0.32939251561751, 'f1': 0.3153312547328388}, 'combined': 0.23234934559261805, 'epoch': 11} Test for Russian: {'template': {'p': 0.8059701492537313, 'r': 0.4122137404580153, 'f1': 0.5454545454545454}, 'slot': {'p': 0.3081402220425713, 'r': 0.2703627382788892, 'f1': 0.28801800481367046}, 'combined': 0.15710072989836568, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 11}