Command that produces this log: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> basic_gcn.T_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.0.bias: torch.Size([1024]) >>> basic_gcn.T_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.1.bias: torch.Size([1024]) >>> basic_gcn.T_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.2.bias: torch.Size([1024]) >>> basic_gcn.T_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.0.bias: torch.Size([1024]) >>> basic_gcn.T_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.1.bias: torch.Size([1024]) >>> basic_gcn.T_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.2.bias: torch.Size([1024]) >>> basic_gcn.E_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.0.bias: torch.Size([1024]) >>> basic_gcn.E_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.1.bias: torch.Size([1024]) >>> basic_gcn.E_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.2.bias: torch.Size([1024]) >>> basic_gcn.E_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.0.bias: torch.Size([1024]) >>> basic_gcn.E_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.1.bias: torch.Size([1024]) >>> basic_gcn.E_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.2.bias: torch.Size([1024]) >>> basic_gcn.f_t.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_t.0.bias: torch.Size([1024]) >>> basic_gcn.f_e.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_e.0.bias: torch.Size([1024]) >>> name2classifier.occupy-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.occupy-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.occupy-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.occupy-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outcome-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outcome-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outcome-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outcome-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.when-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.when-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.when-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.when-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.where-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.where-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.where-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.where-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.who-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.who-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.who-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.who-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-against-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-against-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-against-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-against-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-for-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-for-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-for-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-for-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.wounded-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.wounded-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.wounded-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.wounded-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.arrested-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.arrested-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.arrested-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.arrested-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.imprisoned-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.imprisoned-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.imprisoned-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.imprisoned-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.corrupt-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.corrupt-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.corrupt-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.corrupt-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.judicial-actions-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.judicial-actions-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.judicial-actions-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.judicial-actions-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.charged-with-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.charged-with-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.charged-with-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.charged-with-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.prison-term-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.prison-term-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.prison-term-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.prison-term-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.fine-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.fine-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.fine-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.fine-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.npi-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.npi-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.npi-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.npi-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outbreak-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outbreak-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outbreak-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outbreak-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blamed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blamed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blamed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blamed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.claimed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.claimed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.claimed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.claimed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.terror-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.terror-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.terror-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.terror-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.kidnapped-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.kidnapped-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.kidnapped-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.kidnapped-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-org-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-org-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-org-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-org-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-physical-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-physical-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-physical-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-physical-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-human-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-human-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-human-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-human-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-captured-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-captured-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-captured-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-captured-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-objective-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-objective-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-objective-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-objective-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.weapon-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.weapon-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.weapon-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.weapon-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.damage-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.damage-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.damage-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.damage-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.major-disaster-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.major-disaster-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.major-disaster-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.major-disaster-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.responders-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.responders-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.responders-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.responders-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-provided-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-provided-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-provided-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-provided-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescue-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescue-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescue-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescue-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.individuals-affected-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.individuals-affected-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.individuals-affected-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.individuals-affected-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.missing-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.missing-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.missing-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.missing-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.injured-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.injured-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.injured-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.injured-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescued-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescued-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescued-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescued-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-needed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-needed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-needed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-needed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.repair-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.repair-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.repair-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.repair-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.declare-emergency-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.declare-emergency-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.declare-emergency-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.declare-emergency-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.current-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.current-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.current-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.current-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.group-identity-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.group-identity-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.group-identity-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.group-identity-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.total-displaced-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.total-displaced-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.total-displaced-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.total-displaced-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transitory-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transitory-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transitory-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transitory-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.destination-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.destination-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.destination-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.destination-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transiting-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transiting-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transiting-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transiting-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.detained-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.detained-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.detained-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.detained-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blocked-migration-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blocked-migration-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.cybercrime-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.cybercrime-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.cybercrime-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.cybercrime-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perpetrator-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perpetrator-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perpetrator-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perpetrator-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.response-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.response-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.response-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.response-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.information-stolen-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.information-stolen-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.information-stolen-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.information-stolen-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-crimes-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-crimes-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-crimes-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-crimes-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-impact-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-impact-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-impact-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-impact-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.etip-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.etip-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.etip-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.etip-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-name-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-name-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-name-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-name-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-recipient-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-recipient-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-recipient-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-recipient-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-source-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-source-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-source-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-source-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.overall-project-value-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.overall-project-value-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.overall-project-value-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.overall-project-value-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.signatories-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.signatories-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.signatories-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.signatories-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awardee-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awardee-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awardee-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awardee-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awarder-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awarder-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awarder-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awarder-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.agreement-length-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.agreement-length-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.agreement-length-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.agreement-length-ffn.layers.1.bias: torch.Size([2]) >>> irrealis_classifier.layers.0.weight: torch.Size([350, 1127]) >>> irrealis_classifier.layers.0.bias: torch.Size([350]) >>> irrealis_classifier.layers.1.weight: torch.Size([7, 350]) >>> irrealis_classifier.layers.1.bias: torch.Size([7]) n_trainable_params: 613743345, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 16:27:23.188532: step: 2/470, loss: 18.837427139282227 2023-01-22 16:27:23.928473: step: 4/470, loss: 6.08452033996582 2023-01-22 16:27:24.682540: step: 6/470, loss: 14.960529327392578 2023-01-22 16:27:25.382974: step: 8/470, loss: 9.693341255187988 2023-01-22 16:27:26.082320: step: 10/470, loss: 20.36423683166504 2023-01-22 16:27:26.906731: step: 12/470, loss: 12.639719009399414 2023-01-22 16:27:27.683800: step: 14/470, loss: 10.887170791625977 2023-01-22 16:27:28.549523: step: 16/470, loss: 30.509140014648438 2023-01-22 16:27:29.303140: step: 18/470, loss: 21.185964584350586 2023-01-22 16:27:30.008308: step: 20/470, loss: 22.507814407348633 2023-01-22 16:27:30.748837: step: 22/470, loss: 13.584888458251953 2023-01-22 16:27:31.435133: step: 24/470, loss: 10.651383399963379 2023-01-22 16:27:32.239185: step: 26/470, loss: 19.368906021118164 2023-01-22 16:27:32.983728: step: 28/470, loss: 14.308650970458984 2023-01-22 16:27:33.760010: step: 30/470, loss: 7.055563926696777 2023-01-22 16:27:34.577458: step: 32/470, loss: 14.257768630981445 2023-01-22 16:27:35.298869: step: 34/470, loss: 8.895671844482422 2023-01-22 16:27:35.990386: step: 36/470, loss: 26.354061126708984 2023-01-22 16:27:36.708135: step: 38/470, loss: 8.183581352233887 2023-01-22 16:27:37.452019: step: 40/470, loss: 23.308879852294922 2023-01-22 16:27:38.162965: step: 42/470, loss: 9.376198768615723 2023-01-22 16:27:38.937739: step: 44/470, loss: 40.81726837158203 2023-01-22 16:27:39.677703: step: 46/470, loss: 15.613691329956055 2023-01-22 16:27:40.352485: step: 48/470, loss: 13.526906967163086 2023-01-22 16:27:41.102713: step: 50/470, loss: 24.901996612548828 2023-01-22 16:27:41.891856: step: 52/470, loss: 12.065844535827637 2023-01-22 16:27:42.587691: step: 54/470, loss: 42.65087127685547 2023-01-22 16:27:43.451164: step: 56/470, loss: 27.784313201904297 2023-01-22 16:27:44.162427: step: 58/470, loss: 16.313859939575195 2023-01-22 16:27:44.899937: step: 60/470, loss: 23.540321350097656 2023-01-22 16:27:45.731984: step: 62/470, loss: 13.617783546447754 2023-01-22 16:27:46.456987: step: 64/470, loss: 22.79100799560547 2023-01-22 16:27:47.179711: step: 66/470, loss: 19.00048065185547 2023-01-22 16:27:47.993737: step: 68/470, loss: 19.12494468688965 2023-01-22 16:27:48.675237: step: 70/470, loss: 12.588727951049805 2023-01-22 16:27:49.396308: step: 72/470, loss: 16.181161880493164 2023-01-22 16:27:50.161263: step: 74/470, loss: 17.37213897705078 2023-01-22 16:27:50.990736: step: 76/470, loss: 13.227319717407227 2023-01-22 16:27:51.709336: step: 78/470, loss: 21.182538986206055 2023-01-22 16:27:52.491492: step: 80/470, loss: 16.062942504882812 2023-01-22 16:27:53.239410: step: 82/470, loss: 10.652351379394531 2023-01-22 16:27:53.989607: step: 84/470, loss: 5.419546604156494 2023-01-22 16:27:54.758943: step: 86/470, loss: 10.083927154541016 2023-01-22 16:27:55.524953: step: 88/470, loss: 6.871520042419434 2023-01-22 16:27:56.197567: step: 90/470, loss: 11.351171493530273 2023-01-22 16:27:56.996164: step: 92/470, loss: 20.639118194580078 2023-01-22 16:27:57.720791: step: 94/470, loss: 4.432590484619141 2023-01-22 16:27:58.491136: step: 96/470, loss: 18.61043357849121 2023-01-22 16:27:59.381480: step: 98/470, loss: 5.183197021484375 2023-01-22 16:28:00.068143: step: 100/470, loss: 22.156150817871094 2023-01-22 16:28:00.844068: step: 102/470, loss: 11.253484725952148 2023-01-22 16:28:01.541074: step: 104/470, loss: 9.191667556762695 2023-01-22 16:28:02.187121: step: 106/470, loss: 17.978792190551758 2023-01-22 16:28:02.859419: step: 108/470, loss: 26.32501983642578 2023-01-22 16:28:03.571578: step: 110/470, loss: 15.27033805847168 2023-01-22 16:28:04.489190: step: 112/470, loss: 18.11617660522461 2023-01-22 16:28:05.259528: step: 114/470, loss: 5.689704895019531 2023-01-22 16:28:06.007020: step: 116/470, loss: 17.882823944091797 2023-01-22 16:28:06.809628: step: 118/470, loss: 14.880837440490723 2023-01-22 16:28:07.535154: step: 120/470, loss: 3.9773387908935547 2023-01-22 16:28:08.512757: step: 122/470, loss: 17.60595703125 2023-01-22 16:28:09.228130: step: 124/470, loss: 13.977578163146973 2023-01-22 16:28:09.919190: step: 126/470, loss: 7.939039707183838 2023-01-22 16:28:10.712887: step: 128/470, loss: 5.822059631347656 2023-01-22 16:28:11.401094: step: 130/470, loss: 4.221154689788818 2023-01-22 16:28:12.066365: step: 132/470, loss: 11.863361358642578 2023-01-22 16:28:12.832178: step: 134/470, loss: 7.579117774963379 2023-01-22 16:28:13.507370: step: 136/470, loss: 16.241559982299805 2023-01-22 16:28:14.232419: step: 138/470, loss: 13.835257530212402 2023-01-22 16:28:14.964643: step: 140/470, loss: 11.795804977416992 2023-01-22 16:28:15.713329: step: 142/470, loss: 10.111894607543945 2023-01-22 16:28:16.554334: step: 144/470, loss: 3.8433728218078613 2023-01-22 16:28:17.347879: step: 146/470, loss: 8.96932315826416 2023-01-22 16:28:18.118820: step: 148/470, loss: 3.0903584957122803 2023-01-22 16:28:18.891588: step: 150/470, loss: 9.47050666809082 2023-01-22 16:28:19.576388: step: 152/470, loss: 12.27602767944336 2023-01-22 16:28:20.273841: step: 154/470, loss: 5.426934242248535 2023-01-22 16:28:20.988554: step: 156/470, loss: 12.116996765136719 2023-01-22 16:28:21.685073: step: 158/470, loss: 9.07990550994873 2023-01-22 16:28:22.407529: step: 160/470, loss: 4.529577732086182 2023-01-22 16:28:23.177636: step: 162/470, loss: 10.44383430480957 2023-01-22 16:28:23.896487: step: 164/470, loss: 6.473989963531494 2023-01-22 16:28:24.649490: step: 166/470, loss: 4.3379807472229 2023-01-22 16:28:25.464335: step: 168/470, loss: 9.986034393310547 2023-01-22 16:28:26.283094: step: 170/470, loss: 3.3363029956817627 2023-01-22 16:28:27.020335: step: 172/470, loss: 7.920467376708984 2023-01-22 16:28:27.849067: step: 174/470, loss: 8.45045280456543 2023-01-22 16:28:28.630175: step: 176/470, loss: 12.504642486572266 2023-01-22 16:28:29.337500: step: 178/470, loss: 5.040508270263672 2023-01-22 16:28:30.074902: step: 180/470, loss: 7.040112018585205 2023-01-22 16:28:30.879465: step: 182/470, loss: 10.85146713256836 2023-01-22 16:28:31.682333: step: 184/470, loss: 10.689022064208984 2023-01-22 16:28:32.450454: step: 186/470, loss: 3.291524648666382 2023-01-22 16:28:33.195383: step: 188/470, loss: 6.111932277679443 2023-01-22 16:28:33.866935: step: 190/470, loss: 2.9675354957580566 2023-01-22 16:28:34.697411: step: 192/470, loss: 4.350892066955566 2023-01-22 16:28:35.429128: step: 194/470, loss: 6.658123970031738 2023-01-22 16:28:36.183075: step: 196/470, loss: 7.661910057067871 2023-01-22 16:28:36.889397: step: 198/470, loss: 4.766957759857178 2023-01-22 16:28:37.678148: step: 200/470, loss: 4.938291549682617 2023-01-22 16:28:38.373913: step: 202/470, loss: 12.754507064819336 2023-01-22 16:28:39.208218: step: 204/470, loss: 8.91333293914795 2023-01-22 16:28:40.003633: step: 206/470, loss: 17.913904190063477 2023-01-22 16:28:40.712404: step: 208/470, loss: 5.756092071533203 2023-01-22 16:28:41.472342: step: 210/470, loss: 7.458624839782715 2023-01-22 16:28:42.248374: step: 212/470, loss: 15.039164543151855 2023-01-22 16:28:42.987571: step: 214/470, loss: 7.663404941558838 2023-01-22 16:28:43.691446: step: 216/470, loss: 2.9406890869140625 2023-01-22 16:28:44.530191: step: 218/470, loss: 7.196639060974121 2023-01-22 16:28:45.265493: step: 220/470, loss: 11.003211975097656 2023-01-22 16:28:45.952031: step: 222/470, loss: 2.7577972412109375 2023-01-22 16:28:46.710761: step: 224/470, loss: 13.001374244689941 2023-01-22 16:28:47.488612: step: 226/470, loss: 8.773453712463379 2023-01-22 16:28:48.160728: step: 228/470, loss: 8.044055938720703 2023-01-22 16:28:48.870435: step: 230/470, loss: 3.6388256549835205 2023-01-22 16:28:49.597482: step: 232/470, loss: 10.504514694213867 2023-01-22 16:28:50.380633: step: 234/470, loss: 8.363357543945312 2023-01-22 16:28:51.046537: step: 236/470, loss: 13.068225860595703 2023-01-22 16:28:51.765012: step: 238/470, loss: 9.169629096984863 2023-01-22 16:28:52.453797: step: 240/470, loss: 10.499161720275879 2023-01-22 16:28:53.252635: step: 242/470, loss: 2.7228548526763916 2023-01-22 16:28:53.988321: step: 244/470, loss: 2.8359575271606445 2023-01-22 16:28:54.649786: step: 246/470, loss: 11.094869613647461 2023-01-22 16:28:55.375917: step: 248/470, loss: 6.501433849334717 2023-01-22 16:28:56.087563: step: 250/470, loss: 5.333680629730225 2023-01-22 16:28:56.889556: step: 252/470, loss: 8.988299369812012 2023-01-22 16:28:57.652488: step: 254/470, loss: 3.2130463123321533 2023-01-22 16:28:58.401825: step: 256/470, loss: 6.5167083740234375 2023-01-22 16:28:59.155204: step: 258/470, loss: 5.343503952026367 2023-01-22 16:28:59.950275: step: 260/470, loss: 4.025961399078369 2023-01-22 16:29:00.662541: step: 262/470, loss: 5.961394786834717 2023-01-22 16:29:01.355804: step: 264/470, loss: 2.129112482070923 2023-01-22 16:29:02.020693: step: 266/470, loss: 3.658996820449829 2023-01-22 16:29:02.742560: step: 268/470, loss: 6.239549160003662 2023-01-22 16:29:03.485714: step: 270/470, loss: 10.180370330810547 2023-01-22 16:29:04.228993: step: 272/470, loss: 7.914863586425781 2023-01-22 16:29:05.073304: step: 274/470, loss: 6.526845932006836 2023-01-22 16:29:05.809096: step: 276/470, loss: 9.641494750976562 2023-01-22 16:29:06.499254: step: 278/470, loss: 8.146503448486328 2023-01-22 16:29:07.197110: step: 280/470, loss: 2.807060480117798 2023-01-22 16:29:07.829879: step: 282/470, loss: 7.910976409912109 2023-01-22 16:29:08.646478: step: 284/470, loss: 9.166391372680664 2023-01-22 16:29:09.533059: step: 286/470, loss: 12.144872665405273 2023-01-22 16:29:10.252539: step: 288/470, loss: 12.169351577758789 2023-01-22 16:29:10.966573: step: 290/470, loss: 5.240389823913574 2023-01-22 16:29:11.611659: step: 292/470, loss: 11.680598258972168 2023-01-22 16:29:12.390791: step: 294/470, loss: 5.746752738952637 2023-01-22 16:29:13.205692: step: 296/470, loss: 4.9092020988464355 2023-01-22 16:29:13.923262: step: 298/470, loss: 4.1507954597473145 2023-01-22 16:29:14.670115: step: 300/470, loss: 13.095625877380371 2023-01-22 16:29:15.421897: step: 302/470, loss: 7.79514217376709 2023-01-22 16:29:16.242091: step: 304/470, loss: 1.9198462963104248 2023-01-22 16:29:16.999039: step: 306/470, loss: 4.214913845062256 2023-01-22 16:29:17.728738: step: 308/470, loss: 5.49434757232666 2023-01-22 16:29:18.479185: step: 310/470, loss: 2.4591972827911377 2023-01-22 16:29:19.245714: step: 312/470, loss: 7.487868309020996 2023-01-22 16:29:20.101303: step: 314/470, loss: 7.941952228546143 2023-01-22 16:29:20.855168: step: 316/470, loss: 12.883258819580078 2023-01-22 16:29:21.636823: step: 318/470, loss: 12.775262832641602 2023-01-22 16:29:22.380116: step: 320/470, loss: 5.390825271606445 2023-01-22 16:29:23.137230: step: 322/470, loss: 7.754670143127441 2023-01-22 16:29:23.834271: step: 324/470, loss: 8.214757919311523 2023-01-22 16:29:24.522749: step: 326/470, loss: 8.570985794067383 2023-01-22 16:29:25.267945: step: 328/470, loss: 2.6725447177886963 2023-01-22 16:29:26.114588: step: 330/470, loss: 2.4087343215942383 2023-01-22 16:29:26.949310: step: 332/470, loss: 8.013860702514648 2023-01-22 16:29:27.685708: step: 334/470, loss: 7.368355751037598 2023-01-22 16:29:28.506755: step: 336/470, loss: 2.468770742416382 2023-01-22 16:29:29.234250: step: 338/470, loss: 11.629497528076172 2023-01-22 16:29:29.928776: step: 340/470, loss: 3.3276002407073975 2023-01-22 16:29:30.823570: step: 342/470, loss: 19.408966064453125 2023-01-22 16:29:31.520897: step: 344/470, loss: 2.7381513118743896 2023-01-22 16:29:32.351441: step: 346/470, loss: 12.861856460571289 2023-01-22 16:29:33.074814: step: 348/470, loss: 5.905899524688721 2023-01-22 16:29:33.866485: step: 350/470, loss: 8.441301345825195 2023-01-22 16:29:34.632993: step: 352/470, loss: 3.920839309692383 2023-01-22 16:29:35.399534: step: 354/470, loss: 9.670235633850098 2023-01-22 16:29:36.252348: step: 356/470, loss: 1.8189231157302856 2023-01-22 16:29:37.045720: step: 358/470, loss: 1.7387617826461792 2023-01-22 16:29:37.837546: step: 360/470, loss: 10.194907188415527 2023-01-22 16:29:38.524034: step: 362/470, loss: 2.97975492477417 2023-01-22 16:29:39.318213: step: 364/470, loss: 5.879580497741699 2023-01-22 16:29:40.131944: step: 366/470, loss: 6.046767711639404 2023-01-22 16:29:40.898824: step: 368/470, loss: 5.067502975463867 2023-01-22 16:29:41.664476: step: 370/470, loss: 9.547842025756836 2023-01-22 16:29:42.460262: step: 372/470, loss: 8.196964263916016 2023-01-22 16:29:43.135825: step: 374/470, loss: 5.261054992675781 2023-01-22 16:29:43.828395: step: 376/470, loss: 2.565000534057617 2023-01-22 16:29:44.522886: step: 378/470, loss: 2.8714776039123535 2023-01-22 16:29:45.202679: step: 380/470, loss: 3.206409454345703 2023-01-22 16:29:45.974493: step: 382/470, loss: 6.930261611938477 2023-01-22 16:29:46.811889: step: 384/470, loss: 2.868443012237549 2023-01-22 16:29:47.529632: step: 386/470, loss: 3.763054132461548 2023-01-22 16:29:48.233470: step: 388/470, loss: 5.591135025024414 2023-01-22 16:29:48.994970: step: 390/470, loss: 6.832804203033447 2023-01-22 16:29:49.706770: step: 392/470, loss: 2.2691705226898193 2023-01-22 16:29:50.464897: step: 394/470, loss: 2.894192695617676 2023-01-22 16:29:51.250484: step: 396/470, loss: 3.6917879581451416 2023-01-22 16:29:51.986949: step: 398/470, loss: 1.6198911666870117 2023-01-22 16:29:52.754094: step: 400/470, loss: 2.115755558013916 2023-01-22 16:29:53.490915: step: 402/470, loss: 3.108741283416748 2023-01-22 16:29:54.209066: step: 404/470, loss: 0.9105663895606995 2023-01-22 16:29:55.011807: step: 406/470, loss: 4.219695091247559 2023-01-22 16:29:55.756999: step: 408/470, loss: 2.287531852722168 2023-01-22 16:29:56.493274: step: 410/470, loss: 4.253011226654053 2023-01-22 16:29:57.215899: step: 412/470, loss: 4.190493106842041 2023-01-22 16:29:57.907376: step: 414/470, loss: 2.540191411972046 2023-01-22 16:29:58.665468: step: 416/470, loss: 1.3966948986053467 2023-01-22 16:29:59.452729: step: 418/470, loss: 2.3591842651367188 2023-01-22 16:30:00.224830: step: 420/470, loss: 2.5741522312164307 2023-01-22 16:30:01.091795: step: 422/470, loss: 0.9306092262268066 2023-01-22 16:30:01.847055: step: 424/470, loss: 1.9692203998565674 2023-01-22 16:30:02.609863: step: 426/470, loss: 6.810105323791504 2023-01-22 16:30:03.262593: step: 428/470, loss: 3.282905101776123 2023-01-22 16:30:04.028854: step: 430/470, loss: 1.8343316316604614 2023-01-22 16:30:04.752933: step: 432/470, loss: 1.6074039936065674 2023-01-22 16:30:05.481926: step: 434/470, loss: 0.9173816442489624 2023-01-22 16:30:06.184416: step: 436/470, loss: 2.9239602088928223 2023-01-22 16:30:06.906799: step: 438/470, loss: 2.229067802429199 2023-01-22 16:30:07.571028: step: 440/470, loss: 6.852571487426758 2023-01-22 16:30:08.304921: step: 442/470, loss: 3.8243494033813477 2023-01-22 16:30:09.019601: step: 444/470, loss: 5.331119537353516 2023-01-22 16:30:09.792816: step: 446/470, loss: 1.5648657083511353 2023-01-22 16:30:10.543117: step: 448/470, loss: 5.23274040222168 2023-01-22 16:30:11.339273: step: 450/470, loss: 2.8647255897521973 2023-01-22 16:30:12.070170: step: 452/470, loss: 4.566137790679932 2023-01-22 16:30:12.787693: step: 454/470, loss: 2.325453042984009 2023-01-22 16:30:13.479228: step: 456/470, loss: 1.3142560720443726 2023-01-22 16:30:14.268975: step: 458/470, loss: 1.2290973663330078 2023-01-22 16:30:15.000948: step: 460/470, loss: 0.9515959620475769 2023-01-22 16:30:15.718109: step: 462/470, loss: 0.4390300512313843 2023-01-22 16:30:16.646435: step: 464/470, loss: 5.895995140075684 2023-01-22 16:30:17.405628: step: 466/470, loss: 10.302230834960938 2023-01-22 16:30:18.198417: step: 468/470, loss: 0.4556967318058014 2023-01-22 16:30:19.025299: step: 470/470, loss: 4.270013332366943 2023-01-22 16:30:19.715443: step: 472/470, loss: 1.8166708946228027 2023-01-22 16:30:20.408166: step: 474/470, loss: 1.0261110067367554 2023-01-22 16:30:21.214472: step: 476/470, loss: 7.151349067687988 2023-01-22 16:30:22.022588: step: 478/470, loss: 1.8550636768341064 2023-01-22 16:30:22.783505: step: 480/470, loss: 4.943986415863037 2023-01-22 16:30:23.463961: step: 482/470, loss: 1.3664697408676147 2023-01-22 16:30:24.100662: step: 484/470, loss: 0.2491106390953064 2023-01-22 16:30:24.921499: step: 486/470, loss: 2.626844644546509 2023-01-22 16:30:25.616102: step: 488/470, loss: 2.570115804672241 2023-01-22 16:30:26.323434: step: 490/470, loss: 4.202301502227783 2023-01-22 16:30:27.086509: step: 492/470, loss: 0.742546021938324 2023-01-22 16:30:27.811807: step: 494/470, loss: 2.54813814163208 2023-01-22 16:30:28.559560: step: 496/470, loss: 2.840670585632324 2023-01-22 16:30:29.262445: step: 498/470, loss: 1.0759106874465942 2023-01-22 16:30:30.040558: step: 500/470, loss: 3.8876378536224365 2023-01-22 16:30:30.703454: step: 502/470, loss: 1.4235085248947144 2023-01-22 16:30:31.377426: step: 504/470, loss: 0.9331973195075989 2023-01-22 16:30:32.078520: step: 506/470, loss: 0.9420661926269531 2023-01-22 16:30:32.856341: step: 508/470, loss: 3.353362798690796 2023-01-22 16:30:33.666044: step: 510/470, loss: 7.415338516235352 2023-01-22 16:30:34.440345: step: 512/470, loss: 1.1503777503967285 2023-01-22 16:30:35.210540: step: 514/470, loss: 0.5799806118011475 2023-01-22 16:30:35.981582: step: 516/470, loss: 2.527512788772583 2023-01-22 16:30:36.810028: step: 518/470, loss: 3.385176181793213 2023-01-22 16:30:37.519092: step: 520/470, loss: 1.6425435543060303 2023-01-22 16:30:38.220403: step: 522/470, loss: 4.16226863861084 2023-01-22 16:30:39.114462: step: 524/470, loss: 6.3848772048950195 2023-01-22 16:30:39.861030: step: 526/470, loss: 4.581264019012451 2023-01-22 16:30:40.596644: step: 528/470, loss: 1.923591136932373 2023-01-22 16:30:41.402848: step: 530/470, loss: 4.6132893562316895 2023-01-22 16:30:42.199208: step: 532/470, loss: 2.3744194507598877 2023-01-22 16:30:42.933268: step: 534/470, loss: 1.7940552234649658 2023-01-22 16:30:43.707864: step: 536/470, loss: 1.6240293979644775 2023-01-22 16:30:44.393610: step: 538/470, loss: 5.111881256103516 2023-01-22 16:30:45.158489: step: 540/470, loss: 6.182251453399658 2023-01-22 16:30:45.902046: step: 542/470, loss: 5.470218181610107 2023-01-22 16:30:46.682547: step: 544/470, loss: 3.7869865894317627 2023-01-22 16:30:47.434421: step: 546/470, loss: 2.6353919506073 2023-01-22 16:30:48.136726: step: 548/470, loss: 3.2712767124176025 2023-01-22 16:30:48.975020: step: 550/470, loss: 6.047873497009277 2023-01-22 16:30:49.723292: step: 552/470, loss: 5.375014305114746 2023-01-22 16:30:50.387313: step: 554/470, loss: 4.62559175491333 2023-01-22 16:30:51.085633: step: 556/470, loss: 2.0044353008270264 2023-01-22 16:30:51.806604: step: 558/470, loss: 1.5262951850891113 2023-01-22 16:30:52.532422: step: 560/470, loss: 5.368670463562012 2023-01-22 16:30:53.374343: step: 562/470, loss: 4.242199897766113 2023-01-22 16:30:54.117420: step: 564/470, loss: 0.8480384349822998 2023-01-22 16:30:54.869162: step: 566/470, loss: 4.037248134613037 2023-01-22 16:30:55.547625: step: 568/470, loss: 3.518568515777588 2023-01-22 16:30:56.282705: step: 570/470, loss: 2.1327133178710938 2023-01-22 16:30:56.996615: step: 572/470, loss: 2.1160264015197754 2023-01-22 16:30:57.669873: step: 574/470, loss: 3.609269618988037 2023-01-22 16:30:58.371686: step: 576/470, loss: 1.870286226272583 2023-01-22 16:30:59.084207: step: 578/470, loss: 2.7097601890563965 2023-01-22 16:30:59.860679: step: 580/470, loss: 14.343367576599121 2023-01-22 16:31:00.644839: step: 582/470, loss: 2.831430196762085 2023-01-22 16:31:01.423512: step: 584/470, loss: 1.094900369644165 2023-01-22 16:31:02.165464: step: 586/470, loss: 1.2436193227767944 2023-01-22 16:31:02.920754: step: 588/470, loss: 2.6703548431396484 2023-01-22 16:31:03.764347: step: 590/470, loss: 5.652037143707275 2023-01-22 16:31:04.548369: step: 592/470, loss: 0.5362025499343872 2023-01-22 16:31:05.287076: step: 594/470, loss: 2.1328539848327637 2023-01-22 16:31:05.998940: step: 596/470, loss: 1.8717509508132935 2023-01-22 16:31:06.696600: step: 598/470, loss: 0.5863834619522095 2023-01-22 16:31:07.430776: step: 600/470, loss: 1.6512610912322998 2023-01-22 16:31:08.252931: step: 602/470, loss: 2.197679281234741 2023-01-22 16:31:08.991604: step: 604/470, loss: 1.5977303981781006 2023-01-22 16:31:09.722879: step: 606/470, loss: 0.8425391316413879 2023-01-22 16:31:10.492873: step: 608/470, loss: 2.4796876907348633 2023-01-22 16:31:11.272637: step: 610/470, loss: 0.7105655670166016 2023-01-22 16:31:12.041649: step: 612/470, loss: 4.145379066467285 2023-01-22 16:31:12.798499: step: 614/470, loss: 1.9634544849395752 2023-01-22 16:31:13.535670: step: 616/470, loss: 3.2371010780334473 2023-01-22 16:31:14.343769: step: 618/470, loss: 0.9649048447608948 2023-01-22 16:31:15.132795: step: 620/470, loss: 2.4109811782836914 2023-01-22 16:31:15.910393: step: 622/470, loss: 2.8423123359680176 2023-01-22 16:31:16.621541: step: 624/470, loss: 0.7191053628921509 2023-01-22 16:31:17.341439: step: 626/470, loss: 1.9427146911621094 2023-01-22 16:31:17.985096: step: 628/470, loss: 5.957315921783447 2023-01-22 16:31:18.725722: step: 630/470, loss: 1.467832088470459 2023-01-22 16:31:19.427851: step: 632/470, loss: 0.9858548641204834 2023-01-22 16:31:20.217627: step: 634/470, loss: 1.0836055278778076 2023-01-22 16:31:20.970959: step: 636/470, loss: 2.614570140838623 2023-01-22 16:31:21.649260: step: 638/470, loss: 0.810189425945282 2023-01-22 16:31:22.441410: step: 640/470, loss: 3.5685133934020996 2023-01-22 16:31:23.237034: step: 642/470, loss: 2.486414909362793 2023-01-22 16:31:23.907230: step: 644/470, loss: 1.3245995044708252 2023-01-22 16:31:24.667074: step: 646/470, loss: 1.389783263206482 2023-01-22 16:31:25.435723: step: 648/470, loss: 4.989058494567871 2023-01-22 16:31:26.121941: step: 650/470, loss: 0.5294803977012634 2023-01-22 16:31:26.783510: step: 652/470, loss: 2.751239776611328 2023-01-22 16:31:27.441769: step: 654/470, loss: 3.698434829711914 2023-01-22 16:31:28.216439: step: 656/470, loss: 1.105159878730774 2023-01-22 16:31:28.905011: step: 658/470, loss: 1.0109115839004517 2023-01-22 16:31:29.651101: step: 660/470, loss: 3.3653693199157715 2023-01-22 16:31:30.369297: step: 662/470, loss: 3.7181482315063477 2023-01-22 16:31:31.123494: step: 664/470, loss: 2.8250913619995117 2023-01-22 16:31:31.879566: step: 666/470, loss: 0.444732666015625 2023-01-22 16:31:32.575249: step: 668/470, loss: 1.967690348625183 2023-01-22 16:31:33.357618: step: 670/470, loss: 2.671586513519287 2023-01-22 16:31:34.094967: step: 672/470, loss: 0.917386531829834 2023-01-22 16:31:34.893735: step: 674/470, loss: 4.624493598937988 2023-01-22 16:31:35.586949: step: 676/470, loss: 1.2864645719528198 2023-01-22 16:31:36.356469: step: 678/470, loss: 2.9388926029205322 2023-01-22 16:31:37.106340: step: 680/470, loss: 2.476867437362671 2023-01-22 16:31:37.842192: step: 682/470, loss: 1.6455367803573608 2023-01-22 16:31:38.588505: step: 684/470, loss: 0.6645324230194092 2023-01-22 16:31:39.370172: step: 686/470, loss: 2.182851791381836 2023-01-22 16:31:40.092267: step: 688/470, loss: 3.1452040672302246 2023-01-22 16:31:40.839818: step: 690/470, loss: 0.7404079437255859 2023-01-22 16:31:41.616871: step: 692/470, loss: 0.627692461013794 2023-01-22 16:31:42.284344: step: 694/470, loss: 1.6905364990234375 2023-01-22 16:31:43.077003: step: 696/470, loss: 1.0452758073806763 2023-01-22 16:31:43.850733: step: 698/470, loss: 0.6959218382835388 2023-01-22 16:31:44.643254: step: 700/470, loss: 1.0421323776245117 2023-01-22 16:31:45.390008: step: 702/470, loss: 1.0812638998031616 2023-01-22 16:31:46.134785: step: 704/470, loss: 3.0148813724517822 2023-01-22 16:31:46.943820: step: 706/470, loss: 0.7072650790214539 2023-01-22 16:31:47.699108: step: 708/470, loss: 0.5093586444854736 2023-01-22 16:31:48.423684: step: 710/470, loss: 1.3678839206695557 2023-01-22 16:31:49.120486: step: 712/470, loss: 1.5004351139068604 2023-01-22 16:31:49.887001: step: 714/470, loss: 1.0333999395370483 2023-01-22 16:31:50.610095: step: 716/470, loss: 5.288928985595703 2023-01-22 16:31:51.392424: step: 718/470, loss: 4.261580467224121 2023-01-22 16:31:52.119851: step: 720/470, loss: 1.5531013011932373 2023-01-22 16:31:52.827818: step: 722/470, loss: 4.859399795532227 2023-01-22 16:31:53.582500: step: 724/470, loss: 7.980612754821777 2023-01-22 16:31:54.379342: step: 726/470, loss: 4.004562854766846 2023-01-22 16:31:55.107696: step: 728/470, loss: 2.1123268604278564 2023-01-22 16:31:55.923822: step: 730/470, loss: 0.8399812579154968 2023-01-22 16:31:56.756895: step: 732/470, loss: 0.6894631385803223 2023-01-22 16:31:57.501716: step: 734/470, loss: 6.760388374328613 2023-01-22 16:31:58.199683: step: 736/470, loss: 1.1757826805114746 2023-01-22 16:31:58.890396: step: 738/470, loss: 8.832976341247559 2023-01-22 16:31:59.653402: step: 740/470, loss: 2.382441997528076 2023-01-22 16:32:00.409764: step: 742/470, loss: 1.6580554246902466 2023-01-22 16:32:01.126756: step: 744/470, loss: 1.966712236404419 2023-01-22 16:32:01.761866: step: 746/470, loss: 0.506031334400177 2023-01-22 16:32:02.521690: step: 748/470, loss: 1.3848555088043213 2023-01-22 16:32:03.238906: step: 750/470, loss: 1.399755597114563 2023-01-22 16:32:03.974439: step: 752/470, loss: 2.190246343612671 2023-01-22 16:32:04.817809: step: 754/470, loss: 3.1123580932617188 2023-01-22 16:32:05.508410: step: 756/470, loss: 0.292216420173645 2023-01-22 16:32:06.232658: step: 758/470, loss: 5.910808086395264 2023-01-22 16:32:06.876528: step: 760/470, loss: 3.2733473777770996 2023-01-22 16:32:07.708278: step: 762/470, loss: 2.373502254486084 2023-01-22 16:32:08.430636: step: 764/470, loss: 1.994886875152588 2023-01-22 16:32:09.144739: step: 766/470, loss: 8.774368286132812 2023-01-22 16:32:09.812452: step: 768/470, loss: 1.556334376335144 2023-01-22 16:32:10.635718: step: 770/470, loss: 1.234019160270691 2023-01-22 16:32:11.513019: step: 772/470, loss: 2.650935649871826 2023-01-22 16:32:12.229966: step: 774/470, loss: 2.818275213241577 2023-01-22 16:32:12.933528: step: 776/470, loss: 1.808180570602417 2023-01-22 16:32:13.651253: step: 778/470, loss: 1.791804552078247 2023-01-22 16:32:14.374025: step: 780/470, loss: 6.736164569854736 2023-01-22 16:32:15.155031: step: 782/470, loss: 1.3359016180038452 2023-01-22 16:32:15.993911: step: 784/470, loss: 2.8063719272613525 2023-01-22 16:32:16.722229: step: 786/470, loss: 1.5591973066329956 2023-01-22 16:32:17.418800: step: 788/470, loss: 2.6075632572174072 2023-01-22 16:32:18.088539: step: 790/470, loss: 6.299186706542969 2023-01-22 16:32:18.882353: step: 792/470, loss: 2.2583096027374268 2023-01-22 16:32:19.685266: step: 794/470, loss: 0.5692673921585083 2023-01-22 16:32:20.460777: step: 796/470, loss: 1.2285269498825073 2023-01-22 16:32:21.287296: step: 798/470, loss: 3.3300468921661377 2023-01-22 16:32:22.022983: step: 800/470, loss: 0.6767564415931702 2023-01-22 16:32:22.782446: step: 802/470, loss: 1.033461570739746 2023-01-22 16:32:23.515708: step: 804/470, loss: 0.8145455121994019 2023-01-22 16:32:24.255500: step: 806/470, loss: 1.6333547830581665 2023-01-22 16:32:25.017180: step: 808/470, loss: 0.8114295601844788 2023-01-22 16:32:25.740039: step: 810/470, loss: 1.497851014137268 2023-01-22 16:32:26.432625: step: 812/470, loss: 6.200095176696777 2023-01-22 16:32:27.292376: step: 814/470, loss: 6.356876373291016 2023-01-22 16:32:27.996134: step: 816/470, loss: 1.4136056900024414 2023-01-22 16:32:28.735159: step: 818/470, loss: 0.7242187261581421 2023-01-22 16:32:29.480871: step: 820/470, loss: 4.6752166748046875 2023-01-22 16:32:30.202247: step: 822/470, loss: 1.517682433128357 2023-01-22 16:32:30.932655: step: 824/470, loss: 1.7950592041015625 2023-01-22 16:32:31.630654: step: 826/470, loss: 0.6983448266983032 2023-01-22 16:32:32.361197: step: 828/470, loss: 6.089503288269043 2023-01-22 16:32:33.167800: step: 830/470, loss: 0.9322970509529114 2023-01-22 16:32:33.848661: step: 832/470, loss: 1.1994924545288086 2023-01-22 16:32:34.563719: step: 834/470, loss: 7.563248157501221 2023-01-22 16:32:35.268991: step: 836/470, loss: 1.9371384382247925 2023-01-22 16:32:36.049996: step: 838/470, loss: 0.46454671025276184 2023-01-22 16:32:36.758405: step: 840/470, loss: 11.879871368408203 2023-01-22 16:32:37.463320: step: 842/470, loss: 1.3663816452026367 2023-01-22 16:32:38.193096: step: 844/470, loss: 1.1602144241333008 2023-01-22 16:32:38.903091: step: 846/470, loss: 1.8294084072113037 2023-01-22 16:32:39.729822: step: 848/470, loss: 1.413466453552246 2023-01-22 16:32:40.492514: step: 850/470, loss: 0.6919409036636353 2023-01-22 16:32:41.212288: step: 852/470, loss: 1.45651113986969 2023-01-22 16:32:41.908094: step: 854/470, loss: 1.881666898727417 2023-01-22 16:32:42.657191: step: 856/470, loss: 1.315659999847412 2023-01-22 16:32:43.347426: step: 858/470, loss: 1.2165385484695435 2023-01-22 16:32:44.127594: step: 860/470, loss: 0.6869033575057983 2023-01-22 16:32:44.888417: step: 862/470, loss: 4.98539924621582 2023-01-22 16:32:45.663258: step: 864/470, loss: 1.0015603303909302 2023-01-22 16:32:46.391415: step: 866/470, loss: 0.30697131156921387 2023-01-22 16:32:47.107423: step: 868/470, loss: 1.4575281143188477 2023-01-22 16:32:47.845475: step: 870/470, loss: 0.506115734577179 2023-01-22 16:32:48.585493: step: 872/470, loss: 8.884336471557617 2023-01-22 16:32:49.304712: step: 874/470, loss: 1.0489484071731567 2023-01-22 16:32:50.092137: step: 876/470, loss: 3.248522996902466 2023-01-22 16:32:50.877645: step: 878/470, loss: 0.8829824924468994 2023-01-22 16:32:51.685089: step: 880/470, loss: 2.079348564147949 2023-01-22 16:32:52.356020: step: 882/470, loss: 1.2602165937423706 2023-01-22 16:32:53.135681: step: 884/470, loss: 2.2233707904815674 2023-01-22 16:32:54.092858: step: 886/470, loss: 1.2698169946670532 2023-01-22 16:32:54.860337: step: 888/470, loss: 3.4488766193389893 2023-01-22 16:32:55.569146: step: 890/470, loss: 0.7761567831039429 2023-01-22 16:32:56.258650: step: 892/470, loss: 3.291733741760254 2023-01-22 16:32:56.972565: step: 894/470, loss: 1.81295907497406 2023-01-22 16:32:57.663074: step: 896/470, loss: 1.6111295223236084 2023-01-22 16:32:58.395846: step: 898/470, loss: 1.894487977027893 2023-01-22 16:32:59.145052: step: 900/470, loss: 0.7100313901901245 2023-01-22 16:32:59.857498: step: 902/470, loss: 5.061795234680176 2023-01-22 16:33:00.605170: step: 904/470, loss: 1.990469217300415 2023-01-22 16:33:01.376509: step: 906/470, loss: 0.8769509196281433 2023-01-22 16:33:02.168573: step: 908/470, loss: 1.2958335876464844 2023-01-22 16:33:02.863280: step: 910/470, loss: 1.5320318937301636 2023-01-22 16:33:03.635387: step: 912/470, loss: 8.486604690551758 2023-01-22 16:33:04.368716: step: 914/470, loss: 0.8106694221496582 2023-01-22 16:33:05.060248: step: 916/470, loss: 0.887906551361084 2023-01-22 16:33:05.705194: step: 918/470, loss: 2.665186882019043 2023-01-22 16:33:06.373978: step: 920/470, loss: 1.0016103982925415 2023-01-22 16:33:07.149438: step: 922/470, loss: 0.6781519651412964 2023-01-22 16:33:08.017337: step: 924/470, loss: 3.2051515579223633 2023-01-22 16:33:08.784486: step: 926/470, loss: 1.7840149402618408 2023-01-22 16:33:09.461718: step: 928/470, loss: 2.064391851425171 2023-01-22 16:33:10.241116: step: 930/470, loss: 2.9013218879699707 2023-01-22 16:33:10.975961: step: 932/470, loss: 0.4819917678833008 2023-01-22 16:33:11.738411: step: 934/470, loss: 4.124395847320557 2023-01-22 16:33:12.505384: step: 936/470, loss: 2.3460421562194824 2023-01-22 16:33:13.236799: step: 938/470, loss: 8.79629135131836 2023-01-22 16:33:13.990526: step: 940/470, loss: 1.3817631006240845 2023-01-22 16:33:14.699665: step: 942/470, loss: 1.4883410930633545 ================================================== Loss: 5.731 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33518311838624343, 'r': 0.09124077324691691, 'f1': 0.14343646076558408}, 'combined': 0.10569002372200932, 'epoch': 0} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.2739053126711471, 'r': 0.09969733602972788, 'f1': 0.1461854196840392}, 'combined': 0.10182069032719149, 'epoch': 0} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30097601784718353, 'r': 0.0927393022856161, 'f1': 0.1417892800840778}, 'combined': 0.10447631164089942, 'epoch': 0} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.27416930397516703, 'r': 0.10845969592121071, 'f1': 0.1554316026654001}, 'combined': 0.1082608177768956, 'epoch': 0} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31097432659932656, 'r': 0.09523221343873517, 'f1': 0.14581140639389553}, 'combined': 0.10743998365865987, 'epoch': 0} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.2418413267358432, 'r': 0.10470524874003939, 'f1': 0.14613941134304964}, 'combined': 0.101788644716552, 'epoch': 0} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.578125, 'r': 0.13214285714285715, 'f1': 0.21511627906976744}, 'combined': 0.14341085271317827, 'epoch': 0} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.06896551724137931, 'f1': 0.1212121212121212}, 'combined': 0.0808080808080808, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33518311838624343, 'r': 0.09124077324691691, 'f1': 0.14343646076558408}, 'combined': 0.10569002372200932, 'epoch': 0} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.2739053126711471, 'r': 0.09969733602972788, 'f1': 0.1461854196840392}, 'combined': 0.10182069032719149, 'epoch': 0} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.578125, 'r': 0.13214285714285715, 'f1': 0.21511627906976744}, 'combined': 0.14341085271317827, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30097601784718353, 'r': 0.0927393022856161, 'f1': 0.1417892800840778}, 'combined': 0.10447631164089942, 'epoch': 0} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.27416930397516703, 'r': 0.10845969592121071, 'f1': 0.1554316026654001}, 'combined': 0.1082608177768956, 'epoch': 0} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31097432659932656, 'r': 0.09523221343873517, 'f1': 0.14581140639389553}, 'combined': 0.10743998365865987, 'epoch': 0} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.2418413267358432, 'r': 0.10470524874003939, 'f1': 0.14613941134304964}, 'combined': 0.101788644716552, 'epoch': 0} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.06896551724137931, 'f1': 0.1212121212121212}, 'combined': 0.0808080808080808, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 16:36:16.480651: step: 2/470, loss: 1.862076997756958 2023-01-22 16:36:17.243062: step: 4/470, loss: 8.447455406188965 2023-01-22 16:36:17.943504: step: 6/470, loss: 2.0555386543273926 2023-01-22 16:36:18.647672: step: 8/470, loss: 1.5882792472839355 2023-01-22 16:36:19.443746: step: 10/470, loss: 3.0211076736450195 2023-01-22 16:36:20.226033: step: 12/470, loss: 6.071506500244141 2023-01-22 16:36:20.930406: step: 14/470, loss: 2.025646209716797 2023-01-22 16:36:21.711121: step: 16/470, loss: 5.890591621398926 2023-01-22 16:36:22.453696: step: 18/470, loss: 0.7686997056007385 2023-01-22 16:36:23.137618: step: 20/470, loss: 0.4861363172531128 2023-01-22 16:36:23.889004: step: 22/470, loss: 4.2360734939575195 2023-01-22 16:36:24.675758: step: 24/470, loss: 2.479112386703491 2023-01-22 16:36:25.434981: step: 26/470, loss: 1.051332712173462 2023-01-22 16:36:26.155653: step: 28/470, loss: 1.299823522567749 2023-01-22 16:36:26.875184: step: 30/470, loss: 1.6598849296569824 2023-01-22 16:36:27.556974: step: 32/470, loss: 1.582511067390442 2023-01-22 16:36:28.311975: step: 34/470, loss: 2.467061758041382 2023-01-22 16:36:29.039736: step: 36/470, loss: 1.9889912605285645 2023-01-22 16:36:29.797325: step: 38/470, loss: 2.0867104530334473 2023-01-22 16:36:30.564853: step: 40/470, loss: 1.2533296346664429 2023-01-22 16:36:31.345157: step: 42/470, loss: 1.3080697059631348 2023-01-22 16:36:32.121979: step: 44/470, loss: 1.3241928815841675 2023-01-22 16:36:32.831472: step: 46/470, loss: 5.139832019805908 2023-01-22 16:36:33.584460: step: 48/470, loss: 0.5771458148956299 2023-01-22 16:36:34.309918: step: 50/470, loss: 0.29020386934280396 2023-01-22 16:36:35.021415: step: 52/470, loss: 0.8561909198760986 2023-01-22 16:36:35.817069: step: 54/470, loss: 1.4810162782669067 2023-01-22 16:36:36.579345: step: 56/470, loss: 2.2414615154266357 2023-01-22 16:36:37.598999: step: 58/470, loss: 2.6271400451660156 2023-01-22 16:36:38.346322: step: 60/470, loss: 17.625469207763672 2023-01-22 16:36:39.057294: step: 62/470, loss: 2.8227107524871826 2023-01-22 16:36:39.864386: step: 64/470, loss: 0.8661047220230103 2023-01-22 16:36:40.577751: step: 66/470, loss: 1.7071223258972168 2023-01-22 16:36:41.288113: step: 68/470, loss: 2.8164873123168945 2023-01-22 16:36:41.996801: step: 70/470, loss: 0.5790960788726807 2023-01-22 16:36:42.703320: step: 72/470, loss: 2.1545395851135254 2023-01-22 16:36:43.390082: step: 74/470, loss: 1.5863559246063232 2023-01-22 16:36:44.163005: step: 76/470, loss: 2.1607186794281006 2023-01-22 16:36:45.050838: step: 78/470, loss: 9.826147079467773 2023-01-22 16:36:45.872374: step: 80/470, loss: 0.798759937286377 2023-01-22 16:36:46.666782: step: 82/470, loss: 0.9904079437255859 2023-01-22 16:36:47.469221: step: 84/470, loss: 1.9608020782470703 2023-01-22 16:36:48.255117: step: 86/470, loss: 1.5990160703659058 2023-01-22 16:36:49.011709: step: 88/470, loss: 0.8718377351760864 2023-01-22 16:36:49.923266: step: 90/470, loss: 2.45064640045166 2023-01-22 16:36:50.577887: step: 92/470, loss: 1.367138385772705 2023-01-22 16:36:51.305732: step: 94/470, loss: 0.5553440451622009 2023-01-22 16:36:52.020158: step: 96/470, loss: 2.1003284454345703 2023-01-22 16:36:52.704784: step: 98/470, loss: 0.6784709692001343 2023-01-22 16:36:53.418185: step: 100/470, loss: 4.6824727058410645 2023-01-22 16:36:54.179187: step: 102/470, loss: 3.344106674194336 2023-01-22 16:36:54.870687: step: 104/470, loss: 2.412670612335205 2023-01-22 16:36:55.644845: step: 106/470, loss: 2.828399419784546 2023-01-22 16:36:56.441170: step: 108/470, loss: 6.7077202796936035 2023-01-22 16:36:57.173473: step: 110/470, loss: 0.7992213368415833 2023-01-22 16:36:57.854893: step: 112/470, loss: 2.1331422328948975 2023-01-22 16:36:58.691028: step: 114/470, loss: 3.3567557334899902 2023-01-22 16:36:59.434708: step: 116/470, loss: 0.6175652742385864 2023-01-22 16:37:00.177098: step: 118/470, loss: 1.5179295539855957 2023-01-22 16:37:00.908697: step: 120/470, loss: 10.409992218017578 2023-01-22 16:37:01.680010: step: 122/470, loss: 2.6204042434692383 2023-01-22 16:37:02.427872: step: 124/470, loss: 2.7609641551971436 2023-01-22 16:37:03.260865: step: 126/470, loss: 2.3943400382995605 2023-01-22 16:37:03.958394: step: 128/470, loss: 2.3017160892486572 2023-01-22 16:37:04.715156: step: 130/470, loss: 1.209132194519043 2023-01-22 16:37:05.533541: step: 132/470, loss: 0.7397157549858093 2023-01-22 16:37:06.253807: step: 134/470, loss: 0.7620857954025269 2023-01-22 16:37:06.990011: step: 136/470, loss: 1.7000327110290527 2023-01-22 16:37:07.680720: step: 138/470, loss: 10.088142395019531 2023-01-22 16:37:08.419747: step: 140/470, loss: 1.530067801475525 2023-01-22 16:37:09.109989: step: 142/470, loss: 2.131448984146118 2023-01-22 16:37:09.805160: step: 144/470, loss: 5.388104438781738 2023-01-22 16:37:10.507813: step: 146/470, loss: 0.946931004524231 2023-01-22 16:37:11.235671: step: 148/470, loss: 0.22896254062652588 2023-01-22 16:37:11.953197: step: 150/470, loss: 1.9135855436325073 2023-01-22 16:37:12.685474: step: 152/470, loss: 6.019766807556152 2023-01-22 16:37:13.414582: step: 154/470, loss: 1.4003325700759888 2023-01-22 16:37:14.200022: step: 156/470, loss: 2.2941765785217285 2023-01-22 16:37:15.017230: step: 158/470, loss: 1.0491797924041748 2023-01-22 16:37:15.725751: step: 160/470, loss: 4.415127754211426 2023-01-22 16:37:16.428412: step: 162/470, loss: 1.0620689392089844 2023-01-22 16:37:17.241300: step: 164/470, loss: 1.4155352115631104 2023-01-22 16:37:17.966728: step: 166/470, loss: 1.2267463207244873 2023-01-22 16:37:18.713181: step: 168/470, loss: 1.4995251893997192 2023-01-22 16:37:19.449004: step: 170/470, loss: 0.8310667872428894 2023-01-22 16:37:20.134312: step: 172/470, loss: 1.5570124387741089 2023-01-22 16:37:20.911733: step: 174/470, loss: 0.6260913610458374 2023-01-22 16:37:21.621370: step: 176/470, loss: 1.448913335800171 2023-01-22 16:37:22.351334: step: 178/470, loss: 1.0051169395446777 2023-01-22 16:37:23.074239: step: 180/470, loss: 1.780196189880371 2023-01-22 16:37:23.841874: step: 182/470, loss: 6.025088310241699 2023-01-22 16:37:24.650685: step: 184/470, loss: 0.4942382574081421 2023-01-22 16:37:25.448578: step: 186/470, loss: 1.7729337215423584 2023-01-22 16:37:26.229082: step: 188/470, loss: 3.7398040294647217 2023-01-22 16:37:26.964544: step: 190/470, loss: 1.550480604171753 2023-01-22 16:37:27.707941: step: 192/470, loss: 4.191380023956299 2023-01-22 16:37:28.477994: step: 194/470, loss: 6.224770545959473 2023-01-22 16:37:29.272164: step: 196/470, loss: 4.869048595428467 2023-01-22 16:37:29.961144: step: 198/470, loss: 4.839520454406738 2023-01-22 16:37:30.822519: step: 200/470, loss: 0.34329086542129517 2023-01-22 16:37:31.548507: step: 202/470, loss: 3.1021952629089355 2023-01-22 16:37:32.368864: step: 204/470, loss: 1.2622747421264648 2023-01-22 16:37:33.185121: step: 206/470, loss: 1.1812101602554321 2023-01-22 16:37:33.922310: step: 208/470, loss: 2.3265373706817627 2023-01-22 16:37:34.715657: step: 210/470, loss: 1.3936223983764648 2023-01-22 16:37:35.537620: step: 212/470, loss: 0.3111218810081482 2023-01-22 16:37:36.297423: step: 214/470, loss: 0.7424604892730713 2023-01-22 16:37:37.044623: step: 216/470, loss: 3.502570152282715 2023-01-22 16:37:37.810294: step: 218/470, loss: 0.8425999283790588 2023-01-22 16:37:38.600108: step: 220/470, loss: 5.331447601318359 2023-01-22 16:37:39.319989: step: 222/470, loss: 1.8785395622253418 2023-01-22 16:37:40.158038: step: 224/470, loss: 3.651517868041992 2023-01-22 16:37:40.874448: step: 226/470, loss: 1.545884370803833 2023-01-22 16:37:41.731970: step: 228/470, loss: 0.853326141834259 2023-01-22 16:37:42.479541: step: 230/470, loss: 0.5514891147613525 2023-01-22 16:37:43.287015: step: 232/470, loss: 7.522525787353516 2023-01-22 16:37:43.995357: step: 234/470, loss: 1.4783458709716797 2023-01-22 16:37:44.718938: step: 236/470, loss: 1.1576764583587646 2023-01-22 16:37:45.369881: step: 238/470, loss: 1.4029746055603027 2023-01-22 16:37:46.081386: step: 240/470, loss: 0.9465068578720093 2023-01-22 16:37:46.762277: step: 242/470, loss: 1.4843324422836304 2023-01-22 16:37:47.472108: step: 244/470, loss: 4.074316501617432 2023-01-22 16:37:48.227010: step: 246/470, loss: 0.47795766592025757 2023-01-22 16:37:48.991417: step: 248/470, loss: 0.9381461143493652 2023-01-22 16:37:49.753688: step: 250/470, loss: 1.1530299186706543 2023-01-22 16:37:50.507869: step: 252/470, loss: 5.419069290161133 2023-01-22 16:37:51.323636: step: 254/470, loss: 2.582890272140503 2023-01-22 16:37:52.029152: step: 256/470, loss: 0.4698558449745178 2023-01-22 16:37:52.787400: step: 258/470, loss: 1.4230782985687256 2023-01-22 16:37:53.525622: step: 260/470, loss: 2.6051998138427734 2023-01-22 16:37:54.279753: step: 262/470, loss: 6.613999366760254 2023-01-22 16:37:55.061803: step: 264/470, loss: 0.8363248705863953 2023-01-22 16:37:55.826763: step: 266/470, loss: 2.7337307929992676 2023-01-22 16:37:56.545468: step: 268/470, loss: 0.22095702588558197 2023-01-22 16:37:57.272338: step: 270/470, loss: 3.376373291015625 2023-01-22 16:37:57.996057: step: 272/470, loss: 2.5851681232452393 2023-01-22 16:37:58.747057: step: 274/470, loss: 2.131282091140747 2023-01-22 16:37:59.547735: step: 276/470, loss: 1.776853322982788 2023-01-22 16:38:00.266291: step: 278/470, loss: 1.506990909576416 2023-01-22 16:38:01.070104: step: 280/470, loss: 6.9677934646606445 2023-01-22 16:38:01.831167: step: 282/470, loss: 1.1140801906585693 2023-01-22 16:38:02.536032: step: 284/470, loss: 2.3784890174865723 2023-01-22 16:38:03.245122: step: 286/470, loss: 0.3859087824821472 2023-01-22 16:38:04.088481: step: 288/470, loss: 4.417560577392578 2023-01-22 16:38:04.858312: step: 290/470, loss: 2.1486141681671143 2023-01-22 16:38:05.607534: step: 292/470, loss: 1.0960376262664795 2023-01-22 16:38:06.308601: step: 294/470, loss: 3.9408044815063477 2023-01-22 16:38:07.045096: step: 296/470, loss: 0.7118826508522034 2023-01-22 16:38:07.744909: step: 298/470, loss: 0.3212831914424896 2023-01-22 16:38:08.462910: step: 300/470, loss: 1.278481364250183 2023-01-22 16:38:09.176845: step: 302/470, loss: 5.880828380584717 2023-01-22 16:38:09.867927: step: 304/470, loss: 0.6283556222915649 2023-01-22 16:38:10.705854: step: 306/470, loss: 2.122545003890991 2023-01-22 16:38:11.447865: step: 308/470, loss: 0.7743858098983765 2023-01-22 16:38:12.306976: step: 310/470, loss: 4.460456848144531 2023-01-22 16:38:13.188679: step: 312/470, loss: 2.6492180824279785 2023-01-22 16:38:13.928898: step: 314/470, loss: 1.6523792743682861 2023-01-22 16:38:14.652476: step: 316/470, loss: 0.35584282875061035 2023-01-22 16:38:15.368898: step: 318/470, loss: 0.9775692820549011 2023-01-22 16:38:16.083500: step: 320/470, loss: 1.9322388172149658 2023-01-22 16:38:16.895964: step: 322/470, loss: 1.2334778308868408 2023-01-22 16:38:17.617338: step: 324/470, loss: 0.7135185599327087 2023-01-22 16:38:18.361376: step: 326/470, loss: 2.28690242767334 2023-01-22 16:38:19.099702: step: 328/470, loss: 1.8421454429626465 2023-01-22 16:38:19.822964: step: 330/470, loss: 0.3285214900970459 2023-01-22 16:38:20.486833: step: 332/470, loss: 1.2045437097549438 2023-01-22 16:38:21.168430: step: 334/470, loss: 0.5976580381393433 2023-01-22 16:38:21.920847: step: 336/470, loss: 0.5577397346496582 2023-01-22 16:38:22.644772: step: 338/470, loss: 1.911345362663269 2023-01-22 16:38:23.423178: step: 340/470, loss: 1.0533931255340576 2023-01-22 16:38:24.139054: step: 342/470, loss: 1.1119427680969238 2023-01-22 16:38:24.833854: step: 344/470, loss: 3.2432820796966553 2023-01-22 16:38:25.662084: step: 346/470, loss: 1.752776861190796 2023-01-22 16:38:26.377238: step: 348/470, loss: 0.7811027765274048 2023-01-22 16:38:27.113963: step: 350/470, loss: 9.563486099243164 2023-01-22 16:38:27.833739: step: 352/470, loss: 1.5302809476852417 2023-01-22 16:38:28.595763: step: 354/470, loss: 0.256759375333786 2023-01-22 16:38:29.337535: step: 356/470, loss: 0.8103635311126709 2023-01-22 16:38:30.046702: step: 358/470, loss: 1.6359080076217651 2023-01-22 16:38:30.752202: step: 360/470, loss: 1.4214690923690796 2023-01-22 16:38:31.476896: step: 362/470, loss: 1.7611870765686035 2023-01-22 16:38:32.117941: step: 364/470, loss: 1.1694140434265137 2023-01-22 16:38:32.796208: step: 366/470, loss: 0.360617458820343 2023-01-22 16:38:33.577542: step: 368/470, loss: 1.3414287567138672 2023-01-22 16:38:34.368528: step: 370/470, loss: 3.1062469482421875 2023-01-22 16:38:35.131476: step: 372/470, loss: 1.2116090059280396 2023-01-22 16:38:35.923124: step: 374/470, loss: 2.1322052478790283 2023-01-22 16:38:36.736202: step: 376/470, loss: 2.370061159133911 2023-01-22 16:38:37.474454: step: 378/470, loss: 0.9331423044204712 2023-01-22 16:38:38.273222: step: 380/470, loss: 2.5580132007598877 2023-01-22 16:38:39.026412: step: 382/470, loss: 0.35974249243736267 2023-01-22 16:38:39.872905: step: 384/470, loss: 3.127124786376953 2023-01-22 16:38:40.603476: step: 386/470, loss: 0.8177119493484497 2023-01-22 16:38:41.388091: step: 388/470, loss: 2.3154079914093018 2023-01-22 16:38:42.097386: step: 390/470, loss: 1.1731178760528564 2023-01-22 16:38:42.892609: step: 392/470, loss: 2.1064229011535645 2023-01-22 16:38:43.644901: step: 394/470, loss: 2.3349862098693848 2023-01-22 16:38:44.432356: step: 396/470, loss: 1.1178443431854248 2023-01-22 16:38:45.125670: step: 398/470, loss: 0.28772637248039246 2023-01-22 16:38:45.914803: step: 400/470, loss: 0.7768524289131165 2023-01-22 16:38:46.615056: step: 402/470, loss: 1.5863418579101562 2023-01-22 16:38:47.320917: step: 404/470, loss: 2.128065347671509 2023-01-22 16:38:48.072209: step: 406/470, loss: 0.3368366062641144 2023-01-22 16:38:48.830776: step: 408/470, loss: 1.258863091468811 2023-01-22 16:38:49.561647: step: 410/470, loss: 2.4128575325012207 2023-01-22 16:38:50.297131: step: 412/470, loss: 1.5281002521514893 2023-01-22 16:38:50.944529: step: 414/470, loss: 1.2488266229629517 2023-01-22 16:38:51.666392: step: 416/470, loss: 3.86458683013916 2023-01-22 16:38:52.392572: step: 418/470, loss: 1.467484474182129 2023-01-22 16:38:53.128758: step: 420/470, loss: 6.8370680809021 2023-01-22 16:38:53.912977: step: 422/470, loss: 1.9299228191375732 2023-01-22 16:38:54.625262: step: 424/470, loss: 3.437924861907959 2023-01-22 16:38:55.359794: step: 426/470, loss: 3.256579875946045 2023-01-22 16:38:56.144839: step: 428/470, loss: 7.5270185470581055 2023-01-22 16:38:56.926836: step: 430/470, loss: 2.0738301277160645 2023-01-22 16:38:57.654696: step: 432/470, loss: 1.6853046417236328 2023-01-22 16:38:58.390263: step: 434/470, loss: 1.0764847993850708 2023-01-22 16:38:59.205039: step: 436/470, loss: 2.1314823627471924 2023-01-22 16:38:59.998476: step: 438/470, loss: 0.8061450719833374 2023-01-22 16:39:00.730704: step: 440/470, loss: 0.616207480430603 2023-01-22 16:39:01.546403: step: 442/470, loss: 3.1026275157928467 2023-01-22 16:39:02.379796: step: 444/470, loss: 2.10392689704895 2023-01-22 16:39:03.101934: step: 446/470, loss: 0.5284572839736938 2023-01-22 16:39:03.847071: step: 448/470, loss: 0.8803726434707642 2023-01-22 16:39:04.602063: step: 450/470, loss: 4.2363104820251465 2023-01-22 16:39:05.420261: step: 452/470, loss: 3.7615818977355957 2023-01-22 16:39:06.170882: step: 454/470, loss: 1.4120464324951172 2023-01-22 16:39:06.890624: step: 456/470, loss: 2.512500524520874 2023-01-22 16:39:07.698233: step: 458/470, loss: 0.9251059889793396 2023-01-22 16:39:08.404764: step: 460/470, loss: 1.114088535308838 2023-01-22 16:39:09.101020: step: 462/470, loss: 0.8354222178459167 2023-01-22 16:39:09.782504: step: 464/470, loss: 4.140608310699463 2023-01-22 16:39:10.554608: step: 466/470, loss: 1.2718843221664429 2023-01-22 16:39:11.365229: step: 468/470, loss: 0.8437068462371826 2023-01-22 16:39:12.174162: step: 470/470, loss: 0.7166956663131714 2023-01-22 16:39:12.844803: step: 472/470, loss: 0.8796911835670471 2023-01-22 16:39:13.587049: step: 474/470, loss: 1.966931700706482 2023-01-22 16:39:14.407797: step: 476/470, loss: 1.254460334777832 2023-01-22 16:39:15.092802: step: 478/470, loss: 0.2941884994506836 2023-01-22 16:39:15.859772: step: 480/470, loss: 0.5314381122589111 2023-01-22 16:39:16.687115: step: 482/470, loss: 0.2590300142765045 2023-01-22 16:39:17.528386: step: 484/470, loss: 0.6093426942825317 2023-01-22 16:39:18.239579: step: 486/470, loss: 0.6896905303001404 2023-01-22 16:39:19.087210: step: 488/470, loss: 6.51729679107666 2023-01-22 16:39:19.925379: step: 490/470, loss: 1.2721233367919922 2023-01-22 16:39:20.651677: step: 492/470, loss: 0.5004115700721741 2023-01-22 16:39:21.368082: step: 494/470, loss: 1.5959545373916626 2023-01-22 16:39:22.117633: step: 496/470, loss: 2.6702709197998047 2023-01-22 16:39:22.837111: step: 498/470, loss: 0.9216657876968384 2023-01-22 16:39:23.554869: step: 500/470, loss: 1.1959245204925537 2023-01-22 16:39:24.346099: step: 502/470, loss: 3.9152817726135254 2023-01-22 16:39:25.103867: step: 504/470, loss: 1.635777473449707 2023-01-22 16:39:25.815093: step: 506/470, loss: 0.926995038986206 2023-01-22 16:39:26.574470: step: 508/470, loss: 0.5806446075439453 2023-01-22 16:39:27.293021: step: 510/470, loss: 1.9550533294677734 2023-01-22 16:39:28.094116: step: 512/470, loss: 0.8963161706924438 2023-01-22 16:39:28.897136: step: 514/470, loss: 0.9910989999771118 2023-01-22 16:39:29.540477: step: 516/470, loss: 1.1451404094696045 2023-01-22 16:39:30.262389: step: 518/470, loss: 0.4220796227455139 2023-01-22 16:39:31.075924: step: 520/470, loss: 0.37119582295417786 2023-01-22 16:39:31.939219: step: 522/470, loss: 0.8361790180206299 2023-01-22 16:39:32.700795: step: 524/470, loss: 0.5909321308135986 2023-01-22 16:39:33.484287: step: 526/470, loss: 1.3367969989776611 2023-01-22 16:39:34.202730: step: 528/470, loss: 0.8631378412246704 2023-01-22 16:39:34.964166: step: 530/470, loss: 0.7489875555038452 2023-01-22 16:39:35.740248: step: 532/470, loss: 0.8519724607467651 2023-01-22 16:39:36.442662: step: 534/470, loss: 1.9926557540893555 2023-01-22 16:39:37.177999: step: 536/470, loss: 9.287836074829102 2023-01-22 16:39:37.867771: step: 538/470, loss: 0.34389472007751465 2023-01-22 16:39:38.649745: step: 540/470, loss: 0.8343438506126404 2023-01-22 16:39:39.396849: step: 542/470, loss: 0.582844614982605 2023-01-22 16:39:40.165118: step: 544/470, loss: 8.287338256835938 2023-01-22 16:39:40.868119: step: 546/470, loss: 0.3143865764141083 2023-01-22 16:39:41.605619: step: 548/470, loss: 2.0470499992370605 2023-01-22 16:39:42.296940: step: 550/470, loss: 0.6878257989883423 2023-01-22 16:39:43.035157: step: 552/470, loss: 0.37249213457107544 2023-01-22 16:39:43.781913: step: 554/470, loss: 0.5701853036880493 2023-01-22 16:39:44.540337: step: 556/470, loss: 0.40355125069618225 2023-01-22 16:39:45.215830: step: 558/470, loss: 3.7175612449645996 2023-01-22 16:39:45.844679: step: 560/470, loss: 0.47289979457855225 2023-01-22 16:39:46.587997: step: 562/470, loss: 0.6108911633491516 2023-01-22 16:39:47.301309: step: 564/470, loss: 3.1750166416168213 2023-01-22 16:39:48.099568: step: 566/470, loss: 0.4857003092765808 2023-01-22 16:39:48.824953: step: 568/470, loss: 2.3505191802978516 2023-01-22 16:39:49.619440: step: 570/470, loss: 1.422705888748169 2023-01-22 16:39:50.353230: step: 572/470, loss: 5.138402938842773 2023-01-22 16:39:51.108494: step: 574/470, loss: 3.7409205436706543 2023-01-22 16:39:51.853706: step: 576/470, loss: 4.080702781677246 2023-01-22 16:39:52.571907: step: 578/470, loss: 1.1940529346466064 2023-01-22 16:39:53.344435: step: 580/470, loss: 1.1898057460784912 2023-01-22 16:39:54.186976: step: 582/470, loss: 4.188379764556885 2023-01-22 16:39:55.117984: step: 584/470, loss: 2.8826589584350586 2023-01-22 16:39:55.820956: step: 586/470, loss: 2.7253611087799072 2023-01-22 16:39:56.507538: step: 588/470, loss: 1.339220643043518 2023-01-22 16:39:57.202162: step: 590/470, loss: 0.9713016748428345 2023-01-22 16:39:57.957761: step: 592/470, loss: 0.6565048098564148 2023-01-22 16:39:58.695598: step: 594/470, loss: 0.6998676061630249 2023-01-22 16:39:59.444234: step: 596/470, loss: 5.920351028442383 2023-01-22 16:40:00.169315: step: 598/470, loss: 1.8115699291229248 2023-01-22 16:40:00.897973: step: 600/470, loss: 1.0007771253585815 2023-01-22 16:40:01.651516: step: 602/470, loss: 0.6656914949417114 2023-01-22 16:40:02.377448: step: 604/470, loss: 1.722856879234314 2023-01-22 16:40:03.205231: step: 606/470, loss: 1.1696670055389404 2023-01-22 16:40:03.936984: step: 608/470, loss: 1.6859041452407837 2023-01-22 16:40:04.722245: step: 610/470, loss: 1.470185399055481 2023-01-22 16:40:05.573498: step: 612/470, loss: 1.2091671228408813 2023-01-22 16:40:06.245327: step: 614/470, loss: 0.2507552206516266 2023-01-22 16:40:07.055353: step: 616/470, loss: 0.7892252802848816 2023-01-22 16:40:07.871513: step: 618/470, loss: 5.00029182434082 2023-01-22 16:40:08.536833: step: 620/470, loss: 0.6601501703262329 2023-01-22 16:40:09.292391: step: 622/470, loss: 0.43022409081459045 2023-01-22 16:40:10.110239: step: 624/470, loss: 0.3008459806442261 2023-01-22 16:40:10.903048: step: 626/470, loss: 3.7314624786376953 2023-01-22 16:40:11.633637: step: 628/470, loss: 1.765567421913147 2023-01-22 16:40:12.490497: step: 630/470, loss: 1.6932260990142822 2023-01-22 16:40:13.313237: step: 632/470, loss: 1.9966813325881958 2023-01-22 16:40:14.041122: step: 634/470, loss: 0.7442370653152466 2023-01-22 16:40:14.759189: step: 636/470, loss: 0.33508580923080444 2023-01-22 16:40:15.524477: step: 638/470, loss: 0.5471466779708862 2023-01-22 16:40:16.359262: step: 640/470, loss: 0.6237709522247314 2023-01-22 16:40:17.137071: step: 642/470, loss: 1.7720106840133667 2023-01-22 16:40:17.989491: step: 644/470, loss: 0.4505968987941742 2023-01-22 16:40:18.671054: step: 646/470, loss: 1.4006216526031494 2023-01-22 16:40:19.442014: step: 648/470, loss: 2.9941649436950684 2023-01-22 16:40:20.217256: step: 650/470, loss: 0.3443487286567688 2023-01-22 16:40:20.902034: step: 652/470, loss: 0.7548519968986511 2023-01-22 16:40:21.641671: step: 654/470, loss: 7.085939884185791 2023-01-22 16:40:22.386029: step: 656/470, loss: 1.421372652053833 2023-01-22 16:40:23.133938: step: 658/470, loss: 0.9636402130126953 2023-01-22 16:40:23.809436: step: 660/470, loss: 5.655693054199219 2023-01-22 16:40:24.560409: step: 662/470, loss: 1.7296228408813477 2023-01-22 16:40:25.210168: step: 664/470, loss: 1.15548837184906 2023-01-22 16:40:26.015216: step: 666/470, loss: 5.414955139160156 2023-01-22 16:40:26.763097: step: 668/470, loss: 1.2120192050933838 2023-01-22 16:40:27.586784: step: 670/470, loss: 0.7238351106643677 2023-01-22 16:40:28.290061: step: 672/470, loss: 0.3124326169490814 2023-01-22 16:40:29.064118: step: 674/470, loss: 0.9182902574539185 2023-01-22 16:40:29.838223: step: 676/470, loss: 1.8736960887908936 2023-01-22 16:40:30.582066: step: 678/470, loss: 2.540367841720581 2023-01-22 16:40:31.268371: step: 680/470, loss: 3.663296699523926 2023-01-22 16:40:32.084071: step: 682/470, loss: 0.9578647613525391 2023-01-22 16:40:32.804228: step: 684/470, loss: 1.4884737730026245 2023-01-22 16:40:33.591825: step: 686/470, loss: 0.906496524810791 2023-01-22 16:40:34.300161: step: 688/470, loss: 1.3751282691955566 2023-01-22 16:40:35.054196: step: 690/470, loss: 0.4115382134914398 2023-01-22 16:40:35.867852: step: 692/470, loss: 1.414757251739502 2023-01-22 16:40:36.597172: step: 694/470, loss: 0.8882244825363159 2023-01-22 16:40:37.413530: step: 696/470, loss: 0.3683246076107025 2023-01-22 16:40:38.136134: step: 698/470, loss: 1.179138422012329 2023-01-22 16:40:38.822630: step: 700/470, loss: 1.4897336959838867 2023-01-22 16:40:39.656782: step: 702/470, loss: 0.3474949300289154 2023-01-22 16:40:40.489985: step: 704/470, loss: 2.322219133377075 2023-01-22 16:40:41.232433: step: 706/470, loss: 2.4184319972991943 2023-01-22 16:40:41.918011: step: 708/470, loss: 0.44905775785446167 2023-01-22 16:40:42.680462: step: 710/470, loss: 1.451400637626648 2023-01-22 16:40:43.375655: step: 712/470, loss: 2.693408489227295 2023-01-22 16:40:44.108014: step: 714/470, loss: 0.6166913509368896 2023-01-22 16:40:44.797719: step: 716/470, loss: 1.744429349899292 2023-01-22 16:40:45.559015: step: 718/470, loss: 0.4033623933792114 2023-01-22 16:40:46.251384: step: 720/470, loss: 1.1890370845794678 2023-01-22 16:40:46.958877: step: 722/470, loss: 1.769646406173706 2023-01-22 16:40:47.771597: step: 724/470, loss: 7.846860885620117 2023-01-22 16:40:48.481300: step: 726/470, loss: 1.7710165977478027 2023-01-22 16:40:49.274383: step: 728/470, loss: 0.9685930013656616 2023-01-22 16:40:50.018328: step: 730/470, loss: 1.263967514038086 2023-01-22 16:40:50.864383: step: 732/470, loss: 0.39430445432662964 2023-01-22 16:40:51.609329: step: 734/470, loss: 5.113945960998535 2023-01-22 16:40:52.374693: step: 736/470, loss: 0.29610970616340637 2023-01-22 16:40:53.049152: step: 738/470, loss: 0.3253304660320282 2023-01-22 16:40:53.813317: step: 740/470, loss: 0.6527763605117798 2023-01-22 16:40:54.593481: step: 742/470, loss: 1.0859628915786743 2023-01-22 16:40:55.337445: step: 744/470, loss: 0.3612285256385803 2023-01-22 16:40:56.040284: step: 746/470, loss: 1.7214696407318115 2023-01-22 16:40:56.759563: step: 748/470, loss: 3.29121470451355 2023-01-22 16:40:57.555948: step: 750/470, loss: 2.983771324157715 2023-01-22 16:40:58.298680: step: 752/470, loss: 1.9223625659942627 2023-01-22 16:40:58.990225: step: 754/470, loss: 1.0342754125595093 2023-01-22 16:40:59.727821: step: 756/470, loss: 0.21807289123535156 2023-01-22 16:41:00.458394: step: 758/470, loss: 0.3551430106163025 2023-01-22 16:41:01.160133: step: 760/470, loss: 6.2613372802734375 2023-01-22 16:41:01.856556: step: 762/470, loss: 2.6288414001464844 2023-01-22 16:41:02.510366: step: 764/470, loss: 0.3011544942855835 2023-01-22 16:41:03.230366: step: 766/470, loss: 1.0295944213867188 2023-01-22 16:41:04.020502: step: 768/470, loss: 1.5300159454345703 2023-01-22 16:41:04.803411: step: 770/470, loss: 1.7910230159759521 2023-01-22 16:41:05.580752: step: 772/470, loss: 3.126624584197998 2023-01-22 16:41:06.315093: step: 774/470, loss: 1.3450465202331543 2023-01-22 16:41:07.078216: step: 776/470, loss: 2.3319666385650635 2023-01-22 16:41:07.830642: step: 778/470, loss: 0.3326357901096344 2023-01-22 16:41:08.606111: step: 780/470, loss: 3.0053279399871826 2023-01-22 16:41:09.412323: step: 782/470, loss: 1.9255592823028564 2023-01-22 16:41:10.124161: step: 784/470, loss: 3.3029167652130127 2023-01-22 16:41:10.871759: step: 786/470, loss: 1.8911656141281128 2023-01-22 16:41:11.662456: step: 788/470, loss: 1.2011070251464844 2023-01-22 16:41:12.375729: step: 790/470, loss: 1.5344570875167847 2023-01-22 16:41:13.085648: step: 792/470, loss: 0.8595614433288574 2023-01-22 16:41:13.861565: step: 794/470, loss: 0.2718544006347656 2023-01-22 16:41:14.638311: step: 796/470, loss: 0.30841192603111267 2023-01-22 16:41:15.419287: step: 798/470, loss: 2.8022773265838623 2023-01-22 16:41:16.176594: step: 800/470, loss: 1.4509482383728027 2023-01-22 16:41:16.981681: step: 802/470, loss: 0.4220521152019501 2023-01-22 16:41:17.721390: step: 804/470, loss: 3.620166301727295 2023-01-22 16:41:18.469097: step: 806/470, loss: 0.8263979554176331 2023-01-22 16:41:19.288530: step: 808/470, loss: 3.336337089538574 2023-01-22 16:41:20.093274: step: 810/470, loss: 0.8141534924507141 2023-01-22 16:41:20.856087: step: 812/470, loss: 1.7943094968795776 2023-01-22 16:41:21.685692: step: 814/470, loss: 3.9635958671569824 2023-01-22 16:41:22.435318: step: 816/470, loss: 1.4038106203079224 2023-01-22 16:41:23.113727: step: 818/470, loss: 0.2446841448545456 2023-01-22 16:41:23.846775: step: 820/470, loss: 0.7909644246101379 2023-01-22 16:41:24.542201: step: 822/470, loss: 1.7075693607330322 2023-01-22 16:41:25.253390: step: 824/470, loss: 1.881824254989624 2023-01-22 16:41:25.989064: step: 826/470, loss: 0.8060756325721741 2023-01-22 16:41:26.725231: step: 828/470, loss: 1.0279686450958252 2023-01-22 16:41:27.487315: step: 830/470, loss: 0.47068899869918823 2023-01-22 16:41:28.208745: step: 832/470, loss: 0.6771692633628845 2023-01-22 16:41:28.976221: step: 834/470, loss: 1.3380112648010254 2023-01-22 16:41:29.669634: step: 836/470, loss: 1.1310175657272339 2023-01-22 16:41:30.368588: step: 838/470, loss: 0.6593484878540039 2023-01-22 16:41:31.075063: step: 840/470, loss: 2.9913806915283203 2023-01-22 16:41:31.826241: step: 842/470, loss: 2.463867664337158 2023-01-22 16:41:32.543529: step: 844/470, loss: 1.755760669708252 2023-01-22 16:41:33.291778: step: 846/470, loss: 0.5958117246627808 2023-01-22 16:41:34.049013: step: 848/470, loss: 1.926990032196045 2023-01-22 16:41:34.799749: step: 850/470, loss: 1.5289037227630615 2023-01-22 16:41:35.527099: step: 852/470, loss: 4.594353199005127 2023-01-22 16:41:36.299770: step: 854/470, loss: 1.013411283493042 2023-01-22 16:41:37.095653: step: 856/470, loss: 14.002585411071777 2023-01-22 16:41:37.796822: step: 858/470, loss: 0.3551334738731384 2023-01-22 16:41:38.633821: step: 860/470, loss: 3.323481559753418 2023-01-22 16:41:39.384518: step: 862/470, loss: 0.4271811842918396 2023-01-22 16:41:40.096011: step: 864/470, loss: 2.759626865386963 2023-01-22 16:41:40.747969: step: 866/470, loss: 2.3287715911865234 2023-01-22 16:41:41.476176: step: 868/470, loss: 1.9157121181488037 2023-01-22 16:41:42.288617: step: 870/470, loss: 0.848456859588623 2023-01-22 16:41:42.999863: step: 872/470, loss: 9.034523963928223 2023-01-22 16:41:43.759967: step: 874/470, loss: 3.3581533432006836 2023-01-22 16:41:44.517449: step: 876/470, loss: 0.41928038001060486 2023-01-22 16:41:45.161043: step: 878/470, loss: 4.671534538269043 2023-01-22 16:41:45.965089: step: 880/470, loss: 1.3843767642974854 2023-01-22 16:41:46.743902: step: 882/470, loss: 0.8925577998161316 2023-01-22 16:41:47.458195: step: 884/470, loss: 0.2983015179634094 2023-01-22 16:41:48.127601: step: 886/470, loss: 1.8470473289489746 2023-01-22 16:41:48.867931: step: 888/470, loss: 2.3493099212646484 2023-01-22 16:41:49.653195: step: 890/470, loss: 1.196245551109314 2023-01-22 16:41:50.364962: step: 892/470, loss: 1.8996955156326294 2023-01-22 16:41:51.119935: step: 894/470, loss: 4.634321212768555 2023-01-22 16:41:51.888415: step: 896/470, loss: 2.013730764389038 2023-01-22 16:41:52.730426: step: 898/470, loss: 0.5299916863441467 2023-01-22 16:41:53.462327: step: 900/470, loss: 1.3865522146224976 2023-01-22 16:41:54.205229: step: 902/470, loss: 0.5649352073669434 2023-01-22 16:41:54.938985: step: 904/470, loss: 0.6543347835540771 2023-01-22 16:41:55.600907: step: 906/470, loss: 1.2292380332946777 2023-01-22 16:41:56.345373: step: 908/470, loss: 0.953291654586792 2023-01-22 16:41:57.038126: step: 910/470, loss: 5.457536220550537 2023-01-22 16:41:57.787227: step: 912/470, loss: 3.373645782470703 2023-01-22 16:41:58.445121: step: 914/470, loss: 3.1682276725769043 2023-01-22 16:41:59.151155: step: 916/470, loss: 1.7389862537384033 2023-01-22 16:41:59.876527: step: 918/470, loss: 0.5257987976074219 2023-01-22 16:42:00.646478: step: 920/470, loss: 0.9832305908203125 2023-01-22 16:42:01.386751: step: 922/470, loss: 1.7744770050048828 2023-01-22 16:42:02.135966: step: 924/470, loss: 0.5747044086456299 2023-01-22 16:42:02.916331: step: 926/470, loss: 0.39155885577201843 2023-01-22 16:42:03.727205: step: 928/470, loss: 0.668175220489502 2023-01-22 16:42:04.447596: step: 930/470, loss: 0.4773980975151062 2023-01-22 16:42:05.284718: step: 932/470, loss: 0.8903824090957642 2023-01-22 16:42:06.041042: step: 934/470, loss: 1.2178103923797607 2023-01-22 16:42:06.795271: step: 936/470, loss: 2.6373496055603027 2023-01-22 16:42:07.586132: step: 938/470, loss: 1.7452850341796875 2023-01-22 16:42:08.253538: step: 940/470, loss: 0.6965129375457764 2023-01-22 16:42:08.886575: step: 942/470, loss: 0.40335753560066223 ================================================== Loss: 2.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2259406997788764, 'r': 0.2511401351039307, 'f1': 0.23787489967767114}, 'combined': 0.17527624186775767, 'epoch': 1} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.29535803921270615, 'r': 0.2254122715372139, 'f1': 0.2556878729889223}, 'combined': 0.17809105581317974, 'epoch': 1} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2176302451673739, 'r': 0.2493079935187686, 'f1': 0.23239458779106356}, 'combined': 0.17123811731973104, 'epoch': 1} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.29179700553582955, 'r': 0.22856965821934105, 'f1': 0.25634210056212126}, 'combined': 0.1785467367099352, 'epoch': 1} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.20295208490866637, 'r': 0.24783940803591395, 'f1': 0.22316093081021016}, 'combined': 0.16443437007068115, 'epoch': 1} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.279769607766213, 'r': 0.2325601629274273, 'f1': 0.25398979050608034}, 'combined': 0.17690831179527985, 'epoch': 1} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.15079365079365079, 'r': 0.2714285714285714, 'f1': 0.19387755102040818}, 'combined': 0.1292517006802721, 'epoch': 1} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.16176470588235295, 'r': 0.2391304347826087, 'f1': 0.1929824561403509}, 'combined': 0.09649122807017545, 'epoch': 1} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45, 'r': 0.3103448275862069, 'f1': 0.3673469387755102}, 'combined': 0.24489795918367346, 'epoch': 1} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2259406997788764, 'r': 0.2511401351039307, 'f1': 0.23787489967767114}, 'combined': 0.17527624186775767, 'epoch': 1} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.29535803921270615, 'r': 0.2254122715372139, 'f1': 0.2556878729889223}, 'combined': 0.17809105581317974, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.15079365079365079, 'r': 0.2714285714285714, 'f1': 0.19387755102040818}, 'combined': 0.1292517006802721, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2176302451673739, 'r': 0.2493079935187686, 'f1': 0.23239458779106356}, 'combined': 0.17123811731973104, 'epoch': 1} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.29179700553582955, 'r': 0.22856965821934105, 'f1': 0.25634210056212126}, 'combined': 0.1785467367099352, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.16176470588235295, 'r': 0.2391304347826087, 'f1': 0.1929824561403509}, 'combined': 0.09649122807017545, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.20295208490866637, 'r': 0.24783940803591395, 'f1': 0.22316093081021016}, 'combined': 0.16443437007068115, 'epoch': 1} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.279769607766213, 'r': 0.2325601629274273, 'f1': 0.25398979050608034}, 'combined': 0.17690831179527985, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45, 'r': 0.3103448275862069, 'f1': 0.3673469387755102}, 'combined': 0.24489795918367346, 'epoch': 1} ****************************** Epoch: 2 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 16:45:16.664772: step: 2/470, loss: 2.46811842918396 2023-01-22 16:45:17.460751: step: 4/470, loss: 1.7362735271453857 2023-01-22 16:45:18.179607: step: 6/470, loss: 0.5531305074691772 2023-01-22 16:45:18.945327: step: 8/470, loss: 1.096218228340149 2023-01-22 16:45:19.715510: step: 10/470, loss: 0.9702210426330566 2023-01-22 16:45:20.459498: step: 12/470, loss: 1.731630802154541 2023-01-22 16:45:21.179657: step: 14/470, loss: 2.6520066261291504 2023-01-22 16:45:21.995681: step: 16/470, loss: 0.960263729095459 2023-01-22 16:45:22.626672: step: 18/470, loss: 2.0434067249298096 2023-01-22 16:45:23.343549: step: 20/470, loss: 1.127629280090332 2023-01-22 16:45:24.075024: step: 22/470, loss: 0.27968132495880127 2023-01-22 16:45:24.783281: step: 24/470, loss: 0.7146601676940918 2023-01-22 16:45:25.552160: step: 26/470, loss: 0.5010353922843933 2023-01-22 16:45:26.334503: step: 28/470, loss: 0.7857263088226318 2023-01-22 16:45:27.053639: step: 30/470, loss: 6.040136337280273 2023-01-22 16:45:27.922762: step: 32/470, loss: 0.8596094846725464 2023-01-22 16:45:28.693604: step: 34/470, loss: 0.8374332189559937 2023-01-22 16:45:29.447761: step: 36/470, loss: 0.49315130710601807 2023-01-22 16:45:30.212324: step: 38/470, loss: 0.49098867177963257 2023-01-22 16:45:30.957558: step: 40/470, loss: 0.5622952580451965 2023-01-22 16:45:31.701586: step: 42/470, loss: 0.7310096621513367 2023-01-22 16:45:32.528326: step: 44/470, loss: 0.5263378024101257 2023-01-22 16:45:33.291912: step: 46/470, loss: 1.4505245685577393 2023-01-22 16:45:34.061245: step: 48/470, loss: 0.34201517701148987 2023-01-22 16:45:34.840968: step: 50/470, loss: 8.089656829833984 2023-01-22 16:45:35.617162: step: 52/470, loss: 0.4995441138744354 2023-01-22 16:45:36.568161: step: 54/470, loss: 2.23051118850708 2023-01-22 16:45:37.375644: step: 56/470, loss: 0.6585686206817627 2023-01-22 16:45:38.079660: step: 58/470, loss: 1.034457802772522 2023-01-22 16:45:38.872008: step: 60/470, loss: 1.8960305452346802 2023-01-22 16:45:39.649072: step: 62/470, loss: 1.0263447761535645 2023-01-22 16:45:40.441378: step: 64/470, loss: 1.5714144706726074 2023-01-22 16:45:41.183356: step: 66/470, loss: 2.10054612159729 2023-01-22 16:45:41.950117: step: 68/470, loss: 0.5159252882003784 2023-01-22 16:45:42.699549: step: 70/470, loss: 0.5332794189453125 2023-01-22 16:45:43.539223: step: 72/470, loss: 1.1697885990142822 2023-01-22 16:45:44.241700: step: 74/470, loss: 0.3312620222568512 2023-01-22 16:45:44.921672: step: 76/470, loss: 1.2939296960830688 2023-01-22 16:45:45.656438: step: 78/470, loss: 1.7509288787841797 2023-01-22 16:45:46.312975: step: 80/470, loss: 0.5027635097503662 2023-01-22 16:45:47.091652: step: 82/470, loss: 1.1433383226394653 2023-01-22 16:45:47.833744: step: 84/470, loss: 0.35809245705604553 2023-01-22 16:45:48.566917: step: 86/470, loss: 0.18918129801750183 2023-01-22 16:45:49.375712: step: 88/470, loss: 0.9970061779022217 2023-01-22 16:45:50.120790: step: 90/470, loss: 3.602595090866089 2023-01-22 16:45:50.846612: step: 92/470, loss: 0.8049639463424683 2023-01-22 16:45:51.590277: step: 94/470, loss: 0.3445468544960022 2023-01-22 16:45:52.377373: step: 96/470, loss: 4.520965576171875 2023-01-22 16:45:53.162394: step: 98/470, loss: 1.303832769393921 2023-01-22 16:45:53.888483: step: 100/470, loss: 1.4521981477737427 2023-01-22 16:45:54.556047: step: 102/470, loss: 0.2904055118560791 2023-01-22 16:45:55.313007: step: 104/470, loss: 1.4000365734100342 2023-01-22 16:45:56.090474: step: 106/470, loss: 0.7857203483581543 2023-01-22 16:45:56.914970: step: 108/470, loss: 0.29761266708374023 2023-01-22 16:45:57.666561: step: 110/470, loss: 1.4033076763153076 2023-01-22 16:45:58.386875: step: 112/470, loss: 1.4501266479492188 2023-01-22 16:45:59.148969: step: 114/470, loss: 1.0425773859024048 2023-01-22 16:45:59.854770: step: 116/470, loss: 0.5747327208518982 2023-01-22 16:46:00.562925: step: 118/470, loss: 1.055211067199707 2023-01-22 16:46:01.314134: step: 120/470, loss: 2.8011343479156494 2023-01-22 16:46:02.076401: step: 122/470, loss: 1.3757091760635376 2023-01-22 16:46:02.927559: step: 124/470, loss: 0.9192894101142883 2023-01-22 16:46:03.649847: step: 126/470, loss: 1.9871468544006348 2023-01-22 16:46:04.470186: step: 128/470, loss: 0.4054987132549286 2023-01-22 16:46:05.198885: step: 130/470, loss: 0.4849776327610016 2023-01-22 16:46:05.974132: step: 132/470, loss: 1.0256551504135132 2023-01-22 16:46:06.616585: step: 134/470, loss: 0.4269829988479614 2023-01-22 16:46:07.414300: step: 136/470, loss: 0.9165063500404358 2023-01-22 16:46:08.175107: step: 138/470, loss: 1.5846391916275024 2023-01-22 16:46:08.973714: step: 140/470, loss: 0.4875439703464508 2023-01-22 16:46:09.653024: step: 142/470, loss: 4.090658187866211 2023-01-22 16:46:10.376594: step: 144/470, loss: 2.0160574913024902 2023-01-22 16:46:11.095519: step: 146/470, loss: 0.6893378496170044 2023-01-22 16:46:11.946596: step: 148/470, loss: 9.704105377197266 2023-01-22 16:46:12.708908: step: 150/470, loss: 2.638101100921631 2023-01-22 16:46:13.413871: step: 152/470, loss: 1.744828462600708 2023-01-22 16:46:14.153405: step: 154/470, loss: 0.3475514054298401 2023-01-22 16:46:14.864351: step: 156/470, loss: 0.37182146310806274 2023-01-22 16:46:15.669818: step: 158/470, loss: 2.7568507194519043 2023-01-22 16:46:16.346653: step: 160/470, loss: 4.175437927246094 2023-01-22 16:46:17.157382: step: 162/470, loss: 0.8821776509284973 2023-01-22 16:46:17.907379: step: 164/470, loss: 1.03183913230896 2023-01-22 16:46:18.640631: step: 166/470, loss: 1.8309249877929688 2023-01-22 16:46:19.352362: step: 168/470, loss: 0.4933227300643921 2023-01-22 16:46:20.126633: step: 170/470, loss: 1.0841269493103027 2023-01-22 16:46:20.852654: step: 172/470, loss: 5.927791595458984 2023-01-22 16:46:21.700043: step: 174/470, loss: 1.4856324195861816 2023-01-22 16:46:22.436466: step: 176/470, loss: 0.46990150213241577 2023-01-22 16:46:23.147908: step: 178/470, loss: 1.1630284786224365 2023-01-22 16:46:23.901623: step: 180/470, loss: 0.9261336922645569 2023-01-22 16:46:24.614088: step: 182/470, loss: 0.8008025884628296 2023-01-22 16:46:25.428501: step: 184/470, loss: 6.333198547363281 2023-01-22 16:46:26.226209: step: 186/470, loss: 0.42174702882766724 2023-01-22 16:46:26.974621: step: 188/470, loss: 0.4755881726741791 2023-01-22 16:46:27.748140: step: 190/470, loss: 1.147223949432373 2023-01-22 16:46:28.549111: step: 192/470, loss: 0.19824132323265076 2023-01-22 16:46:29.274847: step: 194/470, loss: 4.473010540008545 2023-01-22 16:46:29.957562: step: 196/470, loss: 0.8954963684082031 2023-01-22 16:46:30.662748: step: 198/470, loss: 0.2654518783092499 2023-01-22 16:46:31.410483: step: 200/470, loss: 2.4173974990844727 2023-01-22 16:46:32.202745: step: 202/470, loss: 0.2253369241952896 2023-01-22 16:46:33.060930: step: 204/470, loss: 0.5632342100143433 2023-01-22 16:46:33.871325: step: 206/470, loss: 1.0098700523376465 2023-01-22 16:46:34.565547: step: 208/470, loss: 0.7151339054107666 2023-01-22 16:46:35.297248: step: 210/470, loss: 1.4566631317138672 2023-01-22 16:46:36.004820: step: 212/470, loss: 1.1144832372665405 2023-01-22 16:46:36.763192: step: 214/470, loss: 0.6245443224906921 2023-01-22 16:46:37.479495: step: 216/470, loss: 3.356032371520996 2023-01-22 16:46:38.223434: step: 218/470, loss: 0.36491915583610535 2023-01-22 16:46:38.968292: step: 220/470, loss: 0.3796614408493042 2023-01-22 16:46:39.748784: step: 222/470, loss: 9.564358711242676 2023-01-22 16:46:40.528899: step: 224/470, loss: 1.751694679260254 2023-01-22 16:46:41.217077: step: 226/470, loss: 0.19614216685295105 2023-01-22 16:46:42.027165: step: 228/470, loss: 1.9366538524627686 2023-01-22 16:46:42.780918: step: 230/470, loss: 2.795724391937256 2023-01-22 16:46:43.456326: step: 232/470, loss: 1.4865249395370483 2023-01-22 16:46:44.169767: step: 234/470, loss: 0.4379088282585144 2023-01-22 16:46:44.903756: step: 236/470, loss: 0.5391400456428528 2023-01-22 16:46:45.707821: step: 238/470, loss: 1.4847371578216553 2023-01-22 16:46:46.537123: step: 240/470, loss: 3.0173943042755127 2023-01-22 16:46:47.315063: step: 242/470, loss: 2.1348071098327637 2023-01-22 16:46:47.978476: step: 244/470, loss: 0.5090938806533813 2023-01-22 16:46:48.743346: step: 246/470, loss: 2.2077693939208984 2023-01-22 16:46:49.492678: step: 248/470, loss: 1.9846214056015015 2023-01-22 16:46:50.279947: step: 250/470, loss: 0.22293855249881744 2023-01-22 16:46:50.980131: step: 252/470, loss: 7.160884857177734 2023-01-22 16:46:51.742296: step: 254/470, loss: 1.7741347551345825 2023-01-22 16:46:52.543465: step: 256/470, loss: 1.3045562505722046 2023-01-22 16:46:53.266679: step: 258/470, loss: 1.059791922569275 2023-01-22 16:46:54.062686: step: 260/470, loss: 1.5100021362304688 2023-01-22 16:46:54.872524: step: 262/470, loss: 0.9539613723754883 2023-01-22 16:46:55.650928: step: 264/470, loss: 0.6949822902679443 2023-01-22 16:46:56.400392: step: 266/470, loss: 0.7878126502037048 2023-01-22 16:46:57.183372: step: 268/470, loss: 0.5556519031524658 2023-01-22 16:46:57.953216: step: 270/470, loss: 0.49442341923713684 2023-01-22 16:46:58.815799: step: 272/470, loss: 1.6977099180221558 2023-01-22 16:46:59.555398: step: 274/470, loss: 0.9501678347587585 2023-01-22 16:47:00.359047: step: 276/470, loss: 0.25947466492652893 2023-01-22 16:47:01.195677: step: 278/470, loss: 8.976275444030762 2023-01-22 16:47:01.973996: step: 280/470, loss: 0.5047112703323364 2023-01-22 16:47:02.751764: step: 282/470, loss: 1.2007783651351929 2023-01-22 16:47:03.563291: step: 284/470, loss: 3.0851497650146484 2023-01-22 16:47:04.407408: step: 286/470, loss: 1.024082064628601 2023-01-22 16:47:05.189433: step: 288/470, loss: 0.8622781038284302 2023-01-22 16:47:05.919796: step: 290/470, loss: 0.44212666153907776 2023-01-22 16:47:06.683615: step: 292/470, loss: 1.0162142515182495 2023-01-22 16:47:07.439857: step: 294/470, loss: 1.515852689743042 2023-01-22 16:47:08.202335: step: 296/470, loss: 0.5383193492889404 2023-01-22 16:47:08.937781: step: 298/470, loss: 1.2464834451675415 2023-01-22 16:47:09.687042: step: 300/470, loss: 4.228458881378174 2023-01-22 16:47:10.539840: step: 302/470, loss: 2.1866626739501953 2023-01-22 16:47:11.224681: step: 304/470, loss: 2.0377941131591797 2023-01-22 16:47:11.942966: step: 306/470, loss: 1.7903894186019897 2023-01-22 16:47:12.687434: step: 308/470, loss: 2.7119550704956055 2023-01-22 16:47:13.531081: step: 310/470, loss: 4.348893642425537 2023-01-22 16:47:14.255732: step: 312/470, loss: 0.4842892289161682 2023-01-22 16:47:15.049579: step: 314/470, loss: 2.1156063079833984 2023-01-22 16:47:15.754398: step: 316/470, loss: 0.7636069059371948 2023-01-22 16:47:16.465761: step: 318/470, loss: 1.8225529193878174 2023-01-22 16:47:17.210282: step: 320/470, loss: 1.8462271690368652 2023-01-22 16:47:17.867670: step: 322/470, loss: 1.2222322225570679 2023-01-22 16:47:18.644975: step: 324/470, loss: 1.3601785898208618 2023-01-22 16:47:19.425498: step: 326/470, loss: 1.628795862197876 2023-01-22 16:47:20.131496: step: 328/470, loss: 0.41737183928489685 2023-01-22 16:47:20.935340: step: 330/470, loss: 0.7214322686195374 2023-01-22 16:47:21.726155: step: 332/470, loss: 0.3991130590438843 2023-01-22 16:47:22.360664: step: 334/470, loss: 1.107088565826416 2023-01-22 16:47:23.087918: step: 336/470, loss: 0.8664273023605347 2023-01-22 16:47:23.774504: step: 338/470, loss: 1.5518001317977905 2023-01-22 16:47:24.501173: step: 340/470, loss: 0.44015198945999146 2023-01-22 16:47:25.263280: step: 342/470, loss: 0.8017760515213013 2023-01-22 16:47:25.946079: step: 344/470, loss: 2.1953208446502686 2023-01-22 16:47:26.689985: step: 346/470, loss: 0.18744251132011414 2023-01-22 16:47:27.412680: step: 348/470, loss: 0.693583071231842 2023-01-22 16:47:28.188304: step: 350/470, loss: 3.926697015762329 2023-01-22 16:47:28.927522: step: 352/470, loss: 2.6467742919921875 2023-01-22 16:47:29.716124: step: 354/470, loss: 0.4489498734474182 2023-01-22 16:47:30.496382: step: 356/470, loss: 0.6220064163208008 2023-01-22 16:47:31.202032: step: 358/470, loss: 0.6211023330688477 2023-01-22 16:47:32.056868: step: 360/470, loss: 1.920097827911377 2023-01-22 16:47:32.787877: step: 362/470, loss: 0.7365934252738953 2023-01-22 16:47:33.515784: step: 364/470, loss: 0.5808885097503662 2023-01-22 16:47:34.259180: step: 366/470, loss: 0.21604259312152863 2023-01-22 16:47:35.037346: step: 368/470, loss: 0.4554995000362396 2023-01-22 16:47:35.775713: step: 370/470, loss: 0.5395200848579407 2023-01-22 16:47:36.579245: step: 372/470, loss: 3.7871570587158203 2023-01-22 16:47:37.318971: step: 374/470, loss: 1.1446315050125122 2023-01-22 16:47:38.042105: step: 376/470, loss: 3.1051321029663086 2023-01-22 16:47:38.758559: step: 378/470, loss: 0.9708766341209412 2023-01-22 16:47:39.494713: step: 380/470, loss: 5.53814697265625 2023-01-22 16:47:40.259706: step: 382/470, loss: 1.891498327255249 2023-01-22 16:47:40.985069: step: 384/470, loss: 0.2550835609436035 2023-01-22 16:47:41.768054: step: 386/470, loss: 0.6128339171409607 2023-01-22 16:47:42.483583: step: 388/470, loss: 0.6527276635169983 2023-01-22 16:47:43.182053: step: 390/470, loss: 1.466606855392456 2023-01-22 16:47:43.924244: step: 392/470, loss: 0.995391845703125 2023-01-22 16:47:44.656721: step: 394/470, loss: 1.0292932987213135 2023-01-22 16:47:45.411679: step: 396/470, loss: 0.5598565340042114 2023-01-22 16:47:46.159185: step: 398/470, loss: 5.429760932922363 2023-01-22 16:47:46.958284: step: 400/470, loss: 0.38686808943748474 2023-01-22 16:47:47.677216: step: 402/470, loss: 1.3042653799057007 2023-01-22 16:47:48.362056: step: 404/470, loss: 0.5991760492324829 2023-01-22 16:47:49.103177: step: 406/470, loss: 0.37910622358322144 2023-01-22 16:47:49.804721: step: 408/470, loss: 1.0450007915496826 2023-01-22 16:47:50.548711: step: 410/470, loss: 0.5937882661819458 2023-01-22 16:47:51.278479: step: 412/470, loss: 9.856136322021484 2023-01-22 16:47:51.969258: step: 414/470, loss: 1.9305442571640015 2023-01-22 16:47:52.691638: step: 416/470, loss: 0.7384703159332275 2023-01-22 16:47:53.477406: step: 418/470, loss: 0.9498000144958496 2023-01-22 16:47:54.274564: step: 420/470, loss: 0.6850473880767822 2023-01-22 16:47:54.998117: step: 422/470, loss: 6.5229716300964355 2023-01-22 16:47:55.707383: step: 424/470, loss: 1.0934593677520752 2023-01-22 16:47:56.459741: step: 426/470, loss: 1.6166236400604248 2023-01-22 16:47:57.153934: step: 428/470, loss: 2.7700743675231934 2023-01-22 16:47:57.915976: step: 430/470, loss: 1.4552366733551025 2023-01-22 16:47:58.597091: step: 432/470, loss: 1.5347661972045898 2023-01-22 16:47:59.331094: step: 434/470, loss: 0.6893869042396545 2023-01-22 16:48:00.157384: step: 436/470, loss: 0.4312663972377777 2023-01-22 16:48:00.924525: step: 438/470, loss: 0.7760770320892334 2023-01-22 16:48:01.621369: step: 440/470, loss: 0.6980346441268921 2023-01-22 16:48:02.409664: step: 442/470, loss: 4.75489616394043 2023-01-22 16:48:03.208715: step: 444/470, loss: 0.6150807738304138 2023-01-22 16:48:04.142138: step: 446/470, loss: 0.39284422993659973 2023-01-22 16:48:04.933273: step: 448/470, loss: 0.1718873381614685 2023-01-22 16:48:05.701288: step: 450/470, loss: 0.7158945798873901 2023-01-22 16:48:06.427759: step: 452/470, loss: 1.1925913095474243 2023-01-22 16:48:07.167962: step: 454/470, loss: 0.5722603797912598 2023-01-22 16:48:07.887175: step: 456/470, loss: 0.9510567784309387 2023-01-22 16:48:08.678351: step: 458/470, loss: 1.4540069103240967 2023-01-22 16:48:09.391150: step: 460/470, loss: 0.47908085584640503 2023-01-22 16:48:10.120182: step: 462/470, loss: 0.5568799376487732 2023-01-22 16:48:10.815306: step: 464/470, loss: 0.4244616627693176 2023-01-22 16:48:11.553303: step: 466/470, loss: 4.02734899520874 2023-01-22 16:48:12.366547: step: 468/470, loss: 0.6745584011077881 2023-01-22 16:48:13.156043: step: 470/470, loss: 1.6514837741851807 2023-01-22 16:48:13.886100: step: 472/470, loss: 1.545167326927185 2023-01-22 16:48:14.655076: step: 474/470, loss: 0.4058355391025543 2023-01-22 16:48:15.406998: step: 476/470, loss: 1.8532984256744385 2023-01-22 16:48:16.129617: step: 478/470, loss: 1.1376888751983643 2023-01-22 16:48:16.888799: step: 480/470, loss: 0.7555640339851379 2023-01-22 16:48:17.661842: step: 482/470, loss: 0.19936880469322205 2023-01-22 16:48:18.424424: step: 484/470, loss: 0.494382381439209 2023-01-22 16:48:19.172815: step: 486/470, loss: 1.0066436529159546 2023-01-22 16:48:20.072950: step: 488/470, loss: 0.6097040772438049 2023-01-22 16:48:20.932692: step: 490/470, loss: 0.35165679454803467 2023-01-22 16:48:21.648491: step: 492/470, loss: 0.39196884632110596 2023-01-22 16:48:22.366240: step: 494/470, loss: 2.287322998046875 2023-01-22 16:48:23.128854: step: 496/470, loss: 3.783491611480713 2023-01-22 16:48:23.843542: step: 498/470, loss: 2.2058379650115967 2023-01-22 16:48:24.651853: step: 500/470, loss: 1.644723653793335 2023-01-22 16:48:25.378720: step: 502/470, loss: 1.0776886940002441 2023-01-22 16:48:26.170243: step: 504/470, loss: 0.33019930124282837 2023-01-22 16:48:26.907795: step: 506/470, loss: 1.2233861684799194 2023-01-22 16:48:27.548170: step: 508/470, loss: 1.0420408248901367 2023-01-22 16:48:28.241555: step: 510/470, loss: 1.1336497068405151 2023-01-22 16:48:28.914754: step: 512/470, loss: 0.4762951731681824 2023-01-22 16:48:29.724914: step: 514/470, loss: 1.8861290216445923 2023-01-22 16:48:30.419106: step: 516/470, loss: 0.4970181882381439 2023-01-22 16:48:31.099806: step: 518/470, loss: 1.4443624019622803 2023-01-22 16:48:31.840208: step: 520/470, loss: 0.6937436461448669 2023-01-22 16:48:32.624412: step: 522/470, loss: 0.9865797758102417 2023-01-22 16:48:33.378387: step: 524/470, loss: 1.8651926517486572 2023-01-22 16:48:34.096785: step: 526/470, loss: 1.1333483457565308 2023-01-22 16:48:34.815927: step: 528/470, loss: 0.9567941427230835 2023-01-22 16:48:35.578392: step: 530/470, loss: 5.231558322906494 2023-01-22 16:48:36.356331: step: 532/470, loss: 2.1264209747314453 2023-01-22 16:48:37.078568: step: 534/470, loss: 1.152343511581421 2023-01-22 16:48:37.849490: step: 536/470, loss: 1.2699055671691895 2023-01-22 16:48:38.612848: step: 538/470, loss: 2.440457820892334 2023-01-22 16:48:39.348578: step: 540/470, loss: 12.110101699829102 2023-01-22 16:48:40.054287: step: 542/470, loss: 0.8827940225601196 2023-01-22 16:48:40.805168: step: 544/470, loss: 4.373961925506592 2023-01-22 16:48:41.545494: step: 546/470, loss: 0.31021592020988464 2023-01-22 16:48:42.225856: step: 548/470, loss: 4.370842456817627 2023-01-22 16:48:42.969507: step: 550/470, loss: 1.5527100563049316 2023-01-22 16:48:43.728335: step: 552/470, loss: 0.6078340411186218 2023-01-22 16:48:44.537604: step: 554/470, loss: 0.5594943761825562 2023-01-22 16:48:45.209979: step: 556/470, loss: 0.26643770933151245 2023-01-22 16:48:45.992292: step: 558/470, loss: 0.5052505731582642 2023-01-22 16:48:46.953989: step: 560/470, loss: 2.0178771018981934 2023-01-22 16:48:47.737293: step: 562/470, loss: 5.040163993835449 2023-01-22 16:48:48.479302: step: 564/470, loss: 1.0909911394119263 2023-01-22 16:48:49.240533: step: 566/470, loss: 0.4415287375450134 2023-01-22 16:48:50.076686: step: 568/470, loss: 2.973421096801758 2023-01-22 16:48:50.727905: step: 570/470, loss: 0.12367182970046997 2023-01-22 16:48:51.550923: step: 572/470, loss: 1.0943719148635864 2023-01-22 16:48:52.320944: step: 574/470, loss: 0.7173348069190979 2023-01-22 16:48:53.040583: step: 576/470, loss: 1.5143998861312866 2023-01-22 16:48:53.761118: step: 578/470, loss: 1.2005535364151 2023-01-22 16:48:54.496932: step: 580/470, loss: 1.2034974098205566 2023-01-22 16:48:55.175883: step: 582/470, loss: 1.0075057744979858 2023-01-22 16:48:55.881779: step: 584/470, loss: 1.3117833137512207 2023-01-22 16:48:56.626069: step: 586/470, loss: 0.3217265009880066 2023-01-22 16:48:57.365051: step: 588/470, loss: 1.2585625648498535 2023-01-22 16:48:58.161874: step: 590/470, loss: 1.6573166847229004 2023-01-22 16:48:58.862339: step: 592/470, loss: 0.7473699450492859 2023-01-22 16:48:59.670027: step: 594/470, loss: 1.4826210737228394 2023-01-22 16:49:00.429787: step: 596/470, loss: 3.1206932067871094 2023-01-22 16:49:01.219332: step: 598/470, loss: 1.286102056503296 2023-01-22 16:49:01.924283: step: 600/470, loss: 0.4325481653213501 2023-01-22 16:49:02.645895: step: 602/470, loss: 0.6148621439933777 2023-01-22 16:49:03.462766: step: 604/470, loss: 0.8197581768035889 2023-01-22 16:49:04.142001: step: 606/470, loss: 1.3260382413864136 2023-01-22 16:49:04.927925: step: 608/470, loss: 0.716848611831665 2023-01-22 16:49:05.598466: step: 610/470, loss: 1.1792848110198975 2023-01-22 16:49:06.395310: step: 612/470, loss: 1.3996143341064453 2023-01-22 16:49:07.085037: step: 614/470, loss: 1.7410094738006592 2023-01-22 16:49:07.875104: step: 616/470, loss: 3.7007956504821777 2023-01-22 16:49:08.577824: step: 618/470, loss: 0.2247745543718338 2023-01-22 16:49:09.360488: step: 620/470, loss: 2.267205238342285 2023-01-22 16:49:10.067911: step: 622/470, loss: 0.4186290204524994 2023-01-22 16:49:10.789386: step: 624/470, loss: 0.8166482448577881 2023-01-22 16:49:11.465073: step: 626/470, loss: 0.4580449163913727 2023-01-22 16:49:12.285706: step: 628/470, loss: 2.071941375732422 2023-01-22 16:49:13.106678: step: 630/470, loss: 1.0789272785186768 2023-01-22 16:49:13.872331: step: 632/470, loss: 1.5344969034194946 2023-01-22 16:49:14.676872: step: 634/470, loss: 1.948580026626587 2023-01-22 16:49:15.425895: step: 636/470, loss: 3.033902168273926 2023-01-22 16:49:16.212890: step: 638/470, loss: 0.23131220042705536 2023-01-22 16:49:16.886598: step: 640/470, loss: 5.123125076293945 2023-01-22 16:49:17.652069: step: 642/470, loss: 0.3747403919696808 2023-01-22 16:49:18.341181: step: 644/470, loss: 0.9803649187088013 2023-01-22 16:49:19.068257: step: 646/470, loss: 1.3845829963684082 2023-01-22 16:49:19.806671: step: 648/470, loss: 0.9319378137588501 2023-01-22 16:49:20.565371: step: 650/470, loss: 1.185803771018982 2023-01-22 16:49:21.314683: step: 652/470, loss: 1.5447531938552856 2023-01-22 16:49:22.052791: step: 654/470, loss: 0.3655240833759308 2023-01-22 16:49:22.713824: step: 656/470, loss: 1.5592364072799683 2023-01-22 16:49:23.466131: step: 658/470, loss: 0.8640881776809692 2023-01-22 16:49:24.214419: step: 660/470, loss: 2.0770931243896484 2023-01-22 16:49:24.982146: step: 662/470, loss: 0.6485579013824463 2023-01-22 16:49:25.747202: step: 664/470, loss: 0.644680380821228 2023-01-22 16:49:26.406102: step: 666/470, loss: 1.5386685132980347 2023-01-22 16:49:27.176717: step: 668/470, loss: 1.689470648765564 2023-01-22 16:49:27.880049: step: 670/470, loss: 0.8974085450172424 2023-01-22 16:49:28.584893: step: 672/470, loss: 2.1679720878601074 2023-01-22 16:49:29.272679: step: 674/470, loss: 0.3004220426082611 2023-01-22 16:49:30.083114: step: 676/470, loss: 1.7437026500701904 2023-01-22 16:49:30.835719: step: 678/470, loss: 1.1159954071044922 2023-01-22 16:49:31.558251: step: 680/470, loss: 0.47660908102989197 2023-01-22 16:49:32.311565: step: 682/470, loss: 7.534053325653076 2023-01-22 16:49:33.084753: step: 684/470, loss: 0.7392002940177917 2023-01-22 16:49:33.809888: step: 686/470, loss: 4.810300350189209 2023-01-22 16:49:34.579906: step: 688/470, loss: 0.2002626359462738 2023-01-22 16:49:35.392996: step: 690/470, loss: 0.3717239499092102 2023-01-22 16:49:36.099607: step: 692/470, loss: 0.9321846961975098 2023-01-22 16:49:36.827077: step: 694/470, loss: 0.8831678032875061 2023-01-22 16:49:37.524009: step: 696/470, loss: 1.0070817470550537 2023-01-22 16:49:38.247026: step: 698/470, loss: 1.1700540781021118 2023-01-22 16:49:38.982005: step: 700/470, loss: 1.3304771184921265 2023-01-22 16:49:39.700192: step: 702/470, loss: 1.0372437238693237 2023-01-22 16:49:40.426629: step: 704/470, loss: 0.636731743812561 2023-01-22 16:49:41.177082: step: 706/470, loss: 0.39378777146339417 2023-01-22 16:49:41.934643: step: 708/470, loss: 2.0515360832214355 2023-01-22 16:49:42.687206: step: 710/470, loss: 0.6573086977005005 2023-01-22 16:49:43.446238: step: 712/470, loss: 2.0945732593536377 2023-01-22 16:49:44.150070: step: 714/470, loss: 0.41838696599006653 2023-01-22 16:49:44.874506: step: 716/470, loss: 0.5202152729034424 2023-01-22 16:49:45.654954: step: 718/470, loss: 1.1282581090927124 2023-01-22 16:49:46.370598: step: 720/470, loss: 0.8712818622589111 2023-01-22 16:49:47.091897: step: 722/470, loss: 0.28079456090927124 2023-01-22 16:49:47.898650: step: 724/470, loss: 1.146299123764038 2023-01-22 16:49:48.608596: step: 726/470, loss: 2.3962433338165283 2023-01-22 16:49:49.340298: step: 728/470, loss: 1.109757423400879 2023-01-22 16:49:50.064993: step: 730/470, loss: 0.8224062323570251 2023-01-22 16:49:50.779120: step: 732/470, loss: 0.716911792755127 2023-01-22 16:49:51.506555: step: 734/470, loss: 0.7856378555297852 2023-01-22 16:49:52.201324: step: 736/470, loss: 0.9323542714118958 2023-01-22 16:49:52.926032: step: 738/470, loss: 1.7010040283203125 2023-01-22 16:49:53.771665: step: 740/470, loss: 0.9458099603652954 2023-01-22 16:49:54.496914: step: 742/470, loss: 0.4535662531852722 2023-01-22 16:49:55.287095: step: 744/470, loss: 1.6949098110198975 2023-01-22 16:49:56.204156: step: 746/470, loss: 0.36973050236701965 2023-01-22 16:49:57.073351: step: 748/470, loss: 1.6900949478149414 2023-01-22 16:49:57.821082: step: 750/470, loss: 0.700052797794342 2023-01-22 16:49:58.701454: step: 752/470, loss: 2.680393934249878 2023-01-22 16:49:59.536635: step: 754/470, loss: 2.4601190090179443 2023-01-22 16:50:00.287815: step: 756/470, loss: 1.6700736284255981 2023-01-22 16:50:01.085950: step: 758/470, loss: 1.34128737449646 2023-01-22 16:50:01.876756: step: 760/470, loss: 1.392914056777954 2023-01-22 16:50:02.676777: step: 762/470, loss: 0.7549951076507568 2023-01-22 16:50:03.380591: step: 764/470, loss: 3.097191333770752 2023-01-22 16:50:04.024930: step: 766/470, loss: 1.1415493488311768 2023-01-22 16:50:04.755224: step: 768/470, loss: 1.2913892269134521 2023-01-22 16:50:05.444411: step: 770/470, loss: 1.9585628509521484 2023-01-22 16:50:06.219182: step: 772/470, loss: 1.1048572063446045 2023-01-22 16:50:06.914161: step: 774/470, loss: 1.6778151988983154 2023-01-22 16:50:07.671228: step: 776/470, loss: 0.4695655107498169 2023-01-22 16:50:08.321872: step: 778/470, loss: 1.7176390886306763 2023-01-22 16:50:09.146810: step: 780/470, loss: 0.1662791669368744 2023-01-22 16:50:09.899080: step: 782/470, loss: 3.580739736557007 2023-01-22 16:50:10.615384: step: 784/470, loss: 0.5405029058456421 2023-01-22 16:50:11.353940: step: 786/470, loss: 0.7567795515060425 2023-01-22 16:50:12.132606: step: 788/470, loss: 1.8015867471694946 2023-01-22 16:50:12.845011: step: 790/470, loss: 3.115386724472046 2023-01-22 16:50:13.607575: step: 792/470, loss: 1.5635459423065186 2023-01-22 16:50:14.350423: step: 794/470, loss: 0.3208344578742981 2023-01-22 16:50:15.118453: step: 796/470, loss: 3.5830130577087402 2023-01-22 16:50:15.846574: step: 798/470, loss: 1.7164645195007324 2023-01-22 16:50:16.588941: step: 800/470, loss: 0.2683294117450714 2023-01-22 16:50:17.259273: step: 802/470, loss: 9.538615226745605 2023-01-22 16:50:18.058307: step: 804/470, loss: 4.4917073249816895 2023-01-22 16:50:18.797985: step: 806/470, loss: 2.0354855060577393 2023-01-22 16:50:19.520375: step: 808/470, loss: 1.513418197631836 2023-01-22 16:50:20.235014: step: 810/470, loss: 1.2347862720489502 2023-01-22 16:50:21.005639: step: 812/470, loss: 2.0256099700927734 2023-01-22 16:50:21.713354: step: 814/470, loss: 2.208359718322754 2023-01-22 16:50:22.473059: step: 816/470, loss: 0.9855014681816101 2023-01-22 16:50:23.212086: step: 818/470, loss: 0.9821873903274536 2023-01-22 16:50:23.993277: step: 820/470, loss: 17.047348022460938 2023-01-22 16:50:24.743323: step: 822/470, loss: 1.7741284370422363 2023-01-22 16:50:25.489331: step: 824/470, loss: 0.6360207200050354 2023-01-22 16:50:26.223565: step: 826/470, loss: 1.1706583499908447 2023-01-22 16:50:27.027321: step: 828/470, loss: 1.1831610202789307 2023-01-22 16:50:27.765275: step: 830/470, loss: 0.9164629578590393 2023-01-22 16:50:28.565788: step: 832/470, loss: 0.7324165105819702 2023-01-22 16:50:29.270988: step: 834/470, loss: 2.8356175422668457 2023-01-22 16:50:29.963208: step: 836/470, loss: 1.6835747957229614 2023-01-22 16:50:30.661194: step: 838/470, loss: 1.1602541208267212 2023-01-22 16:50:31.378727: step: 840/470, loss: 0.8675392866134644 2023-01-22 16:50:32.115063: step: 842/470, loss: 0.9252186417579651 2023-01-22 16:50:32.877515: step: 844/470, loss: 2.3158793449401855 2023-01-22 16:50:33.630949: step: 846/470, loss: 6.0092453956604 2023-01-22 16:50:34.280750: step: 848/470, loss: 1.1773701906204224 2023-01-22 16:50:35.010549: step: 850/470, loss: 2.684903144836426 2023-01-22 16:50:35.802268: step: 852/470, loss: 1.871997356414795 2023-01-22 16:50:36.641077: step: 854/470, loss: 3.4746434688568115 2023-01-22 16:50:37.401720: step: 856/470, loss: 3.444838523864746 2023-01-22 16:50:38.121560: step: 858/470, loss: 2.6179463863372803 2023-01-22 16:50:38.908517: step: 860/470, loss: 0.5549417734146118 2023-01-22 16:50:39.674573: step: 862/470, loss: 1.6258676052093506 2023-01-22 16:50:40.355811: step: 864/470, loss: 0.6284773349761963 2023-01-22 16:50:41.079066: step: 866/470, loss: 2.572608232498169 2023-01-22 16:50:41.745898: step: 868/470, loss: 1.005090594291687 2023-01-22 16:50:42.455391: step: 870/470, loss: 0.31928566098213196 2023-01-22 16:50:43.193877: step: 872/470, loss: 0.3751024603843689 2023-01-22 16:50:43.955087: step: 874/470, loss: 2.8960084915161133 2023-01-22 16:50:44.714753: step: 876/470, loss: 0.4217081069946289 2023-01-22 16:50:45.472320: step: 878/470, loss: 2.551321029663086 2023-01-22 16:50:46.197453: step: 880/470, loss: 5.909186363220215 2023-01-22 16:50:46.975290: step: 882/470, loss: 2.9134228229522705 2023-01-22 16:50:47.806633: step: 884/470, loss: 2.1783947944641113 2023-01-22 16:50:48.546663: step: 886/470, loss: 0.5688473582267761 2023-01-22 16:50:49.298381: step: 888/470, loss: 3.83414888381958 2023-01-22 16:50:50.036664: step: 890/470, loss: 1.6998573541641235 2023-01-22 16:50:50.964272: step: 892/470, loss: 1.6830124855041504 2023-01-22 16:50:51.764611: step: 894/470, loss: 1.0185949802398682 2023-01-22 16:50:52.454568: step: 896/470, loss: 1.8073221445083618 2023-01-22 16:50:53.186981: step: 898/470, loss: 0.8843210339546204 2023-01-22 16:50:53.960777: step: 900/470, loss: 4.365537166595459 2023-01-22 16:50:54.702892: step: 902/470, loss: 0.808355987071991 2023-01-22 16:50:55.583384: step: 904/470, loss: 1.7078111171722412 2023-01-22 16:50:56.344435: step: 906/470, loss: 0.526005208492279 2023-01-22 16:50:57.110576: step: 908/470, loss: 0.30686283111572266 2023-01-22 16:50:57.856907: step: 910/470, loss: 0.9184078574180603 2023-01-22 16:50:58.614122: step: 912/470, loss: 0.605542778968811 2023-01-22 16:50:59.359385: step: 914/470, loss: 0.6337424516677856 2023-01-22 16:51:00.074604: step: 916/470, loss: 1.6130006313323975 2023-01-22 16:51:00.858632: step: 918/470, loss: 4.33975887298584 2023-01-22 16:51:01.684683: step: 920/470, loss: 3.3465535640716553 2023-01-22 16:51:02.478729: step: 922/470, loss: 0.68430495262146 2023-01-22 16:51:03.218735: step: 924/470, loss: 1.328958511352539 2023-01-22 16:51:04.015816: step: 926/470, loss: 2.2398757934570312 2023-01-22 16:51:04.779356: step: 928/470, loss: 1.0955443382263184 2023-01-22 16:51:05.377670: step: 930/470, loss: 1.5726451873779297 2023-01-22 16:51:06.069577: step: 932/470, loss: 1.2137433290481567 2023-01-22 16:51:06.809633: step: 934/470, loss: 1.729867935180664 2023-01-22 16:51:07.707372: step: 936/470, loss: 0.5839881300926208 2023-01-22 16:51:08.401954: step: 938/470, loss: 0.5170664191246033 2023-01-22 16:51:09.146441: step: 940/470, loss: 0.8649019002914429 2023-01-22 16:51:09.924160: step: 942/470, loss: 2.5765528678894043 ================================================== Loss: 1.597 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27756423515352086, 'r': 0.27282404897442847, 'f1': 0.2751737297789929}, 'combined': 0.20275959036346844, 'epoch': 2} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3282026888474822, 'r': 0.25190581017208286, 'f1': 0.28503690042297636}, 'combined': 0.19853316447371488, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2694339847182892, 'r': 0.2719902843835481, 'f1': 0.2707060998491593}, 'combined': 0.19946765252043314, 'epoch': 2} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32578715195855573, 'r': 0.2528684138160548, 'f1': 0.2847333897052602}, 'combined': 0.19832176397381307, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.24997564703352598, 'r': 0.268000456497044, 'f1': 0.2586744332856084}, 'combined': 0.19060221399992197, 'epoch': 2} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32089141554509126, 'r': 0.2626315908207663, 'f1': 0.28885312841459876}, 'combined': 0.2011912337216111, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24695121951219512, 'r': 0.2892857142857143, 'f1': 0.2664473684210527}, 'combined': 0.17763157894736847, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.21153846153846154, 'r': 0.2391304347826087, 'f1': 0.22448979591836737}, 'combined': 0.11224489795918369, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35, 'r': 0.2413793103448276, 'f1': 0.2857142857142857}, 'combined': 0.19047619047619047, 'epoch': 2} New best chinese model... New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27756423515352086, 'r': 0.27282404897442847, 'f1': 0.2751737297789929}, 'combined': 0.20275959036346844, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3282026888474822, 'r': 0.25190581017208286, 'f1': 0.28503690042297636}, 'combined': 0.19853316447371488, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24695121951219512, 'r': 0.2892857142857143, 'f1': 0.2664473684210527}, 'combined': 0.17763157894736847, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2694339847182892, 'r': 0.2719902843835481, 'f1': 0.2707060998491593}, 'combined': 0.19946765252043314, 'epoch': 2} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32578715195855573, 'r': 0.2528684138160548, 'f1': 0.2847333897052602}, 'combined': 0.19832176397381307, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.21153846153846154, 'r': 0.2391304347826087, 'f1': 0.22448979591836737}, 'combined': 0.11224489795918369, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.20295208490866637, 'r': 0.24783940803591395, 'f1': 0.22316093081021016}, 'combined': 0.16443437007068115, 'epoch': 1} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.279769607766213, 'r': 0.2325601629274273, 'f1': 0.25398979050608034}, 'combined': 0.17690831179527985, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45, 'r': 0.3103448275862069, 'f1': 0.3673469387755102}, 'combined': 0.24489795918367346, 'epoch': 1} ****************************** Epoch: 3 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 16:54:08.458371: step: 2/470, loss: 0.36363697052001953 2023-01-22 16:54:09.203976: step: 4/470, loss: 1.200259804725647 2023-01-22 16:54:09.898907: step: 6/470, loss: 0.9495527744293213 2023-01-22 16:54:10.661096: step: 8/470, loss: 6.474288463592529 2023-01-22 16:54:11.426638: step: 10/470, loss: 0.4756370186805725 2023-01-22 16:54:12.184026: step: 12/470, loss: 0.5976176261901855 2023-01-22 16:54:12.915743: step: 14/470, loss: 1.184709906578064 2023-01-22 16:54:13.647706: step: 16/470, loss: 0.3212374746799469 2023-01-22 16:54:14.369824: step: 18/470, loss: 0.17789015173912048 2023-01-22 16:54:15.062485: step: 20/470, loss: 1.2034302949905396 2023-01-22 16:54:15.868036: step: 22/470, loss: 0.7665480375289917 2023-01-22 16:54:16.570729: step: 24/470, loss: 0.40206414461135864 2023-01-22 16:54:17.297221: step: 26/470, loss: 1.6805636882781982 2023-01-22 16:54:18.092256: step: 28/470, loss: 1.1735949516296387 2023-01-22 16:54:18.838141: step: 30/470, loss: 1.2147589921951294 2023-01-22 16:54:19.524715: step: 32/470, loss: 1.4510881900787354 2023-01-22 16:54:20.208098: step: 34/470, loss: 0.7579923272132874 2023-01-22 16:54:20.900316: step: 36/470, loss: 0.22519704699516296 2023-01-22 16:54:21.753219: step: 38/470, loss: 0.5341430306434631 2023-01-22 16:54:22.490532: step: 40/470, loss: 0.6423546075820923 2023-01-22 16:54:23.158413: step: 42/470, loss: 0.5371485948562622 2023-01-22 16:54:23.882928: step: 44/470, loss: 1.21221923828125 2023-01-22 16:54:24.640312: step: 46/470, loss: 2.2409815788269043 2023-01-22 16:54:25.392288: step: 48/470, loss: 2.1587605476379395 2023-01-22 16:54:26.084168: step: 50/470, loss: 0.41026097536087036 2023-01-22 16:54:26.835993: step: 52/470, loss: 1.3984534740447998 2023-01-22 16:54:27.653697: step: 54/470, loss: 0.902085542678833 2023-01-22 16:54:28.424165: step: 56/470, loss: 0.4346156120300293 2023-01-22 16:54:29.184736: step: 58/470, loss: 1.1000165939331055 2023-01-22 16:54:29.958012: step: 60/470, loss: 0.9447662830352783 2023-01-22 16:54:30.614794: step: 62/470, loss: 4.6900763511657715 2023-01-22 16:54:31.330863: step: 64/470, loss: 1.7766426801681519 2023-01-22 16:54:32.065526: step: 66/470, loss: 0.977912187576294 2023-01-22 16:54:32.811410: step: 68/470, loss: 0.8247696161270142 2023-01-22 16:54:33.521916: step: 70/470, loss: 2.7817070484161377 2023-01-22 16:54:34.236756: step: 72/470, loss: 3.9839916229248047 2023-01-22 16:54:35.085210: step: 74/470, loss: 1.374387264251709 2023-01-22 16:54:35.809245: step: 76/470, loss: 0.7330376505851746 2023-01-22 16:54:36.653829: step: 78/470, loss: 2.2703309059143066 2023-01-22 16:54:37.376223: step: 80/470, loss: 0.8914951682090759 2023-01-22 16:54:38.102250: step: 82/470, loss: 1.1255903244018555 2023-01-22 16:54:38.916268: step: 84/470, loss: 2.9205687046051025 2023-01-22 16:54:39.692776: step: 86/470, loss: 0.7798607349395752 2023-01-22 16:54:40.382948: step: 88/470, loss: 1.1443251371383667 2023-01-22 16:54:41.117653: step: 90/470, loss: 1.046618938446045 2023-01-22 16:54:41.870100: step: 92/470, loss: 4.348773956298828 2023-01-22 16:54:42.568803: step: 94/470, loss: 3.3680717945098877 2023-01-22 16:54:43.339918: step: 96/470, loss: 1.9639177322387695 2023-01-22 16:54:44.078196: step: 98/470, loss: 0.4387917220592499 2023-01-22 16:54:44.795592: step: 100/470, loss: 0.3733503222465515 2023-01-22 16:54:45.542425: step: 102/470, loss: 0.8334224224090576 2023-01-22 16:54:46.282724: step: 104/470, loss: 0.9581044912338257 2023-01-22 16:54:47.016630: step: 106/470, loss: 1.0296529531478882 2023-01-22 16:54:47.801773: step: 108/470, loss: 5.17448091506958 2023-01-22 16:54:48.515727: step: 110/470, loss: 0.4054220914840698 2023-01-22 16:54:49.281759: step: 112/470, loss: 0.47016581892967224 2023-01-22 16:54:50.014527: step: 114/470, loss: 0.7138015031814575 2023-01-22 16:54:50.718072: step: 116/470, loss: 0.9061262607574463 2023-01-22 16:54:51.402512: step: 118/470, loss: 0.30310964584350586 2023-01-22 16:54:52.169172: step: 120/470, loss: 0.6186302304267883 2023-01-22 16:54:52.934153: step: 122/470, loss: 1.7593357563018799 2023-01-22 16:54:53.601873: step: 124/470, loss: 0.7808341979980469 2023-01-22 16:54:54.316485: step: 126/470, loss: 1.6488226652145386 2023-01-22 16:54:55.180845: step: 128/470, loss: 0.45947107672691345 2023-01-22 16:54:55.955149: step: 130/470, loss: 1.057709813117981 2023-01-22 16:54:56.621631: step: 132/470, loss: 0.8940787315368652 2023-01-22 16:54:57.356623: step: 134/470, loss: 0.5826739072799683 2023-01-22 16:54:58.103720: step: 136/470, loss: 1.4600317478179932 2023-01-22 16:54:58.879149: step: 138/470, loss: 0.3595348298549652 2023-01-22 16:54:59.681019: step: 140/470, loss: 1.4183976650238037 2023-01-22 16:55:00.382789: step: 142/470, loss: 0.9755016565322876 2023-01-22 16:55:01.137790: step: 144/470, loss: 0.27329689264297485 2023-01-22 16:55:01.876212: step: 146/470, loss: 1.2449324131011963 2023-01-22 16:55:02.619939: step: 148/470, loss: 0.7816654443740845 2023-01-22 16:55:03.471443: step: 150/470, loss: 0.28573817014694214 2023-01-22 16:55:04.266693: step: 152/470, loss: 0.9601019620895386 2023-01-22 16:55:05.061692: step: 154/470, loss: 0.4332647919654846 2023-01-22 16:55:05.820168: step: 156/470, loss: 0.21661068499088287 2023-01-22 16:55:06.470888: step: 158/470, loss: 1.6688331365585327 2023-01-22 16:55:07.206830: step: 160/470, loss: 0.8690783977508545 2023-01-22 16:55:07.879838: step: 162/470, loss: 0.490153968334198 2023-01-22 16:55:08.674468: step: 164/470, loss: 1.6668365001678467 2023-01-22 16:55:09.428850: step: 166/470, loss: 0.7929226160049438 2023-01-22 16:55:10.141740: step: 168/470, loss: 1.0394725799560547 2023-01-22 16:55:10.943651: step: 170/470, loss: 0.30205702781677246 2023-01-22 16:55:11.701839: step: 172/470, loss: 0.9545096158981323 2023-01-22 16:55:12.433073: step: 174/470, loss: 0.2452886998653412 2023-01-22 16:55:13.181172: step: 176/470, loss: 0.3460700213909149 2023-01-22 16:55:13.990481: step: 178/470, loss: 1.1051923036575317 2023-01-22 16:55:14.702219: step: 180/470, loss: 1.239685297012329 2023-01-22 16:55:15.523086: step: 182/470, loss: 0.9798608422279358 2023-01-22 16:55:16.236899: step: 184/470, loss: 2.91426420211792 2023-01-22 16:55:16.960531: step: 186/470, loss: 1.605899453163147 2023-01-22 16:55:17.777311: step: 188/470, loss: 0.4200703501701355 2023-01-22 16:55:18.489947: step: 190/470, loss: 2.6780664920806885 2023-01-22 16:55:19.305244: step: 192/470, loss: 0.3517438471317291 2023-01-22 16:55:20.040710: step: 194/470, loss: 0.46974340081214905 2023-01-22 16:55:20.850891: step: 196/470, loss: 0.29148203134536743 2023-01-22 16:55:21.571462: step: 198/470, loss: 1.264331579208374 2023-01-22 16:55:22.260355: step: 200/470, loss: 0.47580063343048096 2023-01-22 16:55:23.001212: step: 202/470, loss: 1.759827971458435 2023-01-22 16:55:23.714455: step: 204/470, loss: 1.8886005878448486 2023-01-22 16:55:24.440175: step: 206/470, loss: 1.2642725706100464 2023-01-22 16:55:25.206844: step: 208/470, loss: 1.3302885293960571 2023-01-22 16:55:26.003964: step: 210/470, loss: 0.262773334980011 2023-01-22 16:55:26.729162: step: 212/470, loss: 0.39958715438842773 2023-01-22 16:55:27.459638: step: 214/470, loss: 1.108762264251709 2023-01-22 16:55:28.310830: step: 216/470, loss: 0.5211945176124573 2023-01-22 16:55:29.092099: step: 218/470, loss: 0.5695528984069824 2023-01-22 16:55:29.898957: step: 220/470, loss: 0.9404783248901367 2023-01-22 16:55:30.635883: step: 222/470, loss: 5.394157409667969 2023-01-22 16:55:31.445008: step: 224/470, loss: 0.3269270658493042 2023-01-22 16:55:32.172699: step: 226/470, loss: 0.12120553106069565 2023-01-22 16:55:32.853566: step: 228/470, loss: 2.013479232788086 2023-01-22 16:55:33.609554: step: 230/470, loss: 0.7474368810653687 2023-01-22 16:55:34.328784: step: 232/470, loss: 0.8971037864685059 2023-01-22 16:55:35.042542: step: 234/470, loss: 0.8100451827049255 2023-01-22 16:55:35.821010: step: 236/470, loss: 0.3806197941303253 2023-01-22 16:55:36.589129: step: 238/470, loss: 0.3603185713291168 2023-01-22 16:55:37.415629: step: 240/470, loss: 8.618203163146973 2023-01-22 16:55:38.179447: step: 242/470, loss: 0.440914511680603 2023-01-22 16:55:39.024700: step: 244/470, loss: 0.538329005241394 2023-01-22 16:55:39.751318: step: 246/470, loss: 0.6860210299491882 2023-01-22 16:55:40.468459: step: 248/470, loss: 0.5581848621368408 2023-01-22 16:55:41.248147: step: 250/470, loss: 1.4989908933639526 2023-01-22 16:55:41.985802: step: 252/470, loss: 0.3475220799446106 2023-01-22 16:55:42.725558: step: 254/470, loss: 0.6883515119552612 2023-01-22 16:55:43.447521: step: 256/470, loss: 0.3571469783782959 2023-01-22 16:55:44.168973: step: 258/470, loss: 1.115850567817688 2023-01-22 16:55:44.959127: step: 260/470, loss: 0.4317207932472229 2023-01-22 16:55:45.715849: step: 262/470, loss: 0.49563005566596985 2023-01-22 16:55:46.447929: step: 264/470, loss: 0.5665435791015625 2023-01-22 16:55:47.177059: step: 266/470, loss: 0.4394031763076782 2023-01-22 16:55:47.932248: step: 268/470, loss: 1.2470353841781616 2023-01-22 16:55:48.685510: step: 270/470, loss: 0.5768090486526489 2023-01-22 16:55:49.396229: step: 272/470, loss: 1.6476483345031738 2023-01-22 16:55:50.142614: step: 274/470, loss: 1.6945419311523438 2023-01-22 16:55:50.868767: step: 276/470, loss: 0.6581287980079651 2023-01-22 16:55:51.664137: step: 278/470, loss: 0.7141035199165344 2023-01-22 16:55:52.438309: step: 280/470, loss: 1.2979902029037476 2023-01-22 16:55:53.205486: step: 282/470, loss: 2.7254929542541504 2023-01-22 16:55:54.048327: step: 284/470, loss: 3.011003017425537 2023-01-22 16:55:54.801334: step: 286/470, loss: 0.907600998878479 2023-01-22 16:55:55.700058: step: 288/470, loss: 1.4326318502426147 2023-01-22 16:55:56.566812: step: 290/470, loss: 1.6391098499298096 2023-01-22 16:55:57.259009: step: 292/470, loss: 0.27279168367385864 2023-01-22 16:55:57.951183: step: 294/470, loss: 1.117002248764038 2023-01-22 16:55:58.728741: step: 296/470, loss: 0.19795672595500946 2023-01-22 16:55:59.404024: step: 298/470, loss: 0.3689045011997223 2023-01-22 16:56:00.111705: step: 300/470, loss: 2.757559061050415 2023-01-22 16:56:00.878967: step: 302/470, loss: 0.2532775402069092 2023-01-22 16:56:01.658145: step: 304/470, loss: 0.5779852271080017 2023-01-22 16:56:02.329908: step: 306/470, loss: 0.2892742156982422 2023-01-22 16:56:03.084734: step: 308/470, loss: 6.725302696228027 2023-01-22 16:56:03.738756: step: 310/470, loss: 0.16188186407089233 2023-01-22 16:56:04.583886: step: 312/470, loss: 0.713376522064209 2023-01-22 16:56:05.359918: step: 314/470, loss: 0.9798615574836731 2023-01-22 16:56:06.042653: step: 316/470, loss: 0.2618459463119507 2023-01-22 16:56:06.898689: step: 318/470, loss: 0.3729875385761261 2023-01-22 16:56:07.592609: step: 320/470, loss: 1.94132399559021 2023-01-22 16:56:08.375291: step: 322/470, loss: 0.37356746196746826 2023-01-22 16:56:09.077437: step: 324/470, loss: 5.201150417327881 2023-01-22 16:56:09.820991: step: 326/470, loss: 0.7417845129966736 2023-01-22 16:56:10.546220: step: 328/470, loss: 1.832114815711975 2023-01-22 16:56:11.254941: step: 330/470, loss: 1.1253231763839722 2023-01-22 16:56:12.016117: step: 332/470, loss: 0.22344562411308289 2023-01-22 16:56:12.800774: step: 334/470, loss: 0.8852127194404602 2023-01-22 16:56:13.566480: step: 336/470, loss: 0.9841639399528503 2023-01-22 16:56:14.327634: step: 338/470, loss: 1.8902114629745483 2023-01-22 16:56:15.056433: step: 340/470, loss: 0.27074041962623596 2023-01-22 16:56:15.738450: step: 342/470, loss: 0.16591957211494446 2023-01-22 16:56:16.521990: step: 344/470, loss: 0.5446414947509766 2023-01-22 16:56:17.296753: step: 346/470, loss: 1.8010767698287964 2023-01-22 16:56:18.034429: step: 348/470, loss: 0.44357505440711975 2023-01-22 16:56:18.743174: step: 350/470, loss: 4.071453094482422 2023-01-22 16:56:19.735979: step: 352/470, loss: 1.5298190116882324 2023-01-22 16:56:20.578755: step: 354/470, loss: 0.6060335636138916 2023-01-22 16:56:21.273691: step: 356/470, loss: 0.4127192497253418 2023-01-22 16:56:22.013114: step: 358/470, loss: 1.532690167427063 2023-01-22 16:56:22.810677: step: 360/470, loss: 0.8012219667434692 2023-01-22 16:56:23.551809: step: 362/470, loss: 0.5713666677474976 2023-01-22 16:56:24.339806: step: 364/470, loss: 2.333641529083252 2023-01-22 16:56:25.076598: step: 366/470, loss: 1.080212116241455 2023-01-22 16:56:25.753919: step: 368/470, loss: 1.0552024841308594 2023-01-22 16:56:26.458596: step: 370/470, loss: 2.2088849544525146 2023-01-22 16:56:27.216819: step: 372/470, loss: 0.6469262838363647 2023-01-22 16:56:27.944037: step: 374/470, loss: 0.5959089994430542 2023-01-22 16:56:28.716003: step: 376/470, loss: 0.6726745963096619 2023-01-22 16:56:29.375454: step: 378/470, loss: 0.41327551007270813 2023-01-22 16:56:30.193815: step: 380/470, loss: 1.2673792839050293 2023-01-22 16:56:30.925284: step: 382/470, loss: 1.0747371912002563 2023-01-22 16:56:31.707177: step: 384/470, loss: 0.8380264043807983 2023-01-22 16:56:32.418721: step: 386/470, loss: 0.47480469942092896 2023-01-22 16:56:33.172584: step: 388/470, loss: 0.09107540547847748 2023-01-22 16:56:33.893375: step: 390/470, loss: 1.519758939743042 2023-01-22 16:56:34.548526: step: 392/470, loss: 0.8037975430488586 2023-01-22 16:56:35.238994: step: 394/470, loss: 0.9735026359558105 2023-01-22 16:56:36.251570: step: 396/470, loss: 0.7729388475418091 2023-01-22 16:56:36.989273: step: 398/470, loss: 0.43985089659690857 2023-01-22 16:56:37.741629: step: 400/470, loss: 1.1569021940231323 2023-01-22 16:56:38.531676: step: 402/470, loss: 0.90910404920578 2023-01-22 16:56:39.277388: step: 404/470, loss: 1.4025219678878784 2023-01-22 16:56:39.963858: step: 406/470, loss: 0.174116849899292 2023-01-22 16:56:40.729026: step: 408/470, loss: 0.9569568634033203 2023-01-22 16:56:41.521730: step: 410/470, loss: 0.889129638671875 2023-01-22 16:56:42.224580: step: 412/470, loss: 0.5545390844345093 2023-01-22 16:56:42.978803: step: 414/470, loss: 2.280895709991455 2023-01-22 16:56:43.669895: step: 416/470, loss: 0.6171635389328003 2023-01-22 16:56:44.498169: step: 418/470, loss: 2.749620199203491 2023-01-22 16:56:45.205463: step: 420/470, loss: 0.6688118577003479 2023-01-22 16:56:45.930573: step: 422/470, loss: 0.48191848397254944 2023-01-22 16:56:46.662723: step: 424/470, loss: 0.9727869629859924 2023-01-22 16:56:47.396221: step: 426/470, loss: 1.0109034776687622 2023-01-22 16:56:48.186320: step: 428/470, loss: 1.8949697017669678 2023-01-22 16:56:48.993791: step: 430/470, loss: 0.29387497901916504 2023-01-22 16:56:49.728898: step: 432/470, loss: 1.4768874645233154 2023-01-22 16:56:50.523515: step: 434/470, loss: 0.6072927117347717 2023-01-22 16:56:51.291889: step: 436/470, loss: 2.1941230297088623 2023-01-22 16:56:51.935726: step: 438/470, loss: 0.42255786061286926 2023-01-22 16:56:52.639386: step: 440/470, loss: 0.3252505660057068 2023-01-22 16:56:53.391375: step: 442/470, loss: 0.7017578482627869 2023-01-22 16:56:54.105601: step: 444/470, loss: 0.40087300539016724 2023-01-22 16:56:54.897431: step: 446/470, loss: 1.6672818660736084 2023-01-22 16:56:55.637225: step: 448/470, loss: 1.3076132535934448 2023-01-22 16:56:56.402866: step: 450/470, loss: 1.0871223211288452 2023-01-22 16:56:57.202819: step: 452/470, loss: 4.525178909301758 2023-01-22 16:56:58.010844: step: 454/470, loss: 0.15165036916732788 2023-01-22 16:56:58.798644: step: 456/470, loss: 0.9667521715164185 2023-01-22 16:56:59.651479: step: 458/470, loss: 0.9113951325416565 2023-01-22 16:57:00.406860: step: 460/470, loss: 0.8675466775894165 2023-01-22 16:57:01.152875: step: 462/470, loss: 1.114274263381958 2023-01-22 16:57:01.899119: step: 464/470, loss: 0.7635269165039062 2023-01-22 16:57:02.728822: step: 466/470, loss: 0.9710996747016907 2023-01-22 16:57:03.405980: step: 468/470, loss: 0.2852608561515808 2023-01-22 16:57:04.206712: step: 470/470, loss: 0.7204350233078003 2023-01-22 16:57:05.124435: step: 472/470, loss: 9.467118263244629 2023-01-22 16:57:05.870391: step: 474/470, loss: 0.6275573372840881 2023-01-22 16:57:06.637928: step: 476/470, loss: 1.6595731973648071 2023-01-22 16:57:07.502233: step: 478/470, loss: 1.0893810987472534 2023-01-22 16:57:08.306070: step: 480/470, loss: 0.4000731408596039 2023-01-22 16:57:09.036301: step: 482/470, loss: 11.773370742797852 2023-01-22 16:57:09.764914: step: 484/470, loss: 3.65254545211792 2023-01-22 16:57:10.514006: step: 486/470, loss: 1.3855931758880615 2023-01-22 16:57:11.360288: step: 488/470, loss: 0.3679276406764984 2023-01-22 16:57:12.042453: step: 490/470, loss: 0.6360776424407959 2023-01-22 16:57:12.768778: step: 492/470, loss: 3.5798442363739014 2023-01-22 16:57:13.481940: step: 494/470, loss: 0.6142995357513428 2023-01-22 16:57:14.251811: step: 496/470, loss: 2.273555278778076 2023-01-22 16:57:15.028791: step: 498/470, loss: 0.5966622233390808 2023-01-22 16:57:15.798829: step: 500/470, loss: 0.5236387848854065 2023-01-22 16:57:16.589892: step: 502/470, loss: 0.5399066209793091 2023-01-22 16:57:17.291369: step: 504/470, loss: 1.1629985570907593 2023-01-22 16:57:18.169524: step: 506/470, loss: 0.5488350987434387 2023-01-22 16:57:18.852751: step: 508/470, loss: 0.6530537009239197 2023-01-22 16:57:19.650151: step: 510/470, loss: 0.6670851707458496 2023-01-22 16:57:20.412702: step: 512/470, loss: 1.0367801189422607 2023-01-22 16:57:21.130305: step: 514/470, loss: 1.2847917079925537 2023-01-22 16:57:21.953869: step: 516/470, loss: 0.6694021821022034 2023-01-22 16:57:22.629659: step: 518/470, loss: 2.218747138977051 2023-01-22 16:57:23.378622: step: 520/470, loss: 0.42207273840904236 2023-01-22 16:57:24.179029: step: 522/470, loss: 1.1089502573013306 2023-01-22 16:57:24.798497: step: 524/470, loss: 1.3500819206237793 2023-01-22 16:57:25.573241: step: 526/470, loss: 0.6098719835281372 2023-01-22 16:57:26.345734: step: 528/470, loss: 1.7950224876403809 2023-01-22 16:57:27.121413: step: 530/470, loss: 0.34178969264030457 2023-01-22 16:57:27.925345: step: 532/470, loss: 4.002415657043457 2023-01-22 16:57:28.607590: step: 534/470, loss: 0.6252081394195557 2023-01-22 16:57:29.306395: step: 536/470, loss: 1.957798719406128 2023-01-22 16:57:30.086676: step: 538/470, loss: 0.6906619071960449 2023-01-22 16:57:30.892650: step: 540/470, loss: 0.5147668123245239 2023-01-22 16:57:31.650385: step: 542/470, loss: 1.0314600467681885 2023-01-22 16:57:32.414217: step: 544/470, loss: 0.37217873334884644 2023-01-22 16:57:33.177787: step: 546/470, loss: 1.1730372905731201 2023-01-22 16:57:33.882293: step: 548/470, loss: 0.7325170636177063 2023-01-22 16:57:34.636321: step: 550/470, loss: 0.49213165044784546 2023-01-22 16:57:35.421025: step: 552/470, loss: 0.47592616081237793 2023-01-22 16:57:36.295818: step: 554/470, loss: 0.8138816952705383 2023-01-22 16:57:37.091966: step: 556/470, loss: 0.5806734561920166 2023-01-22 16:57:37.865452: step: 558/470, loss: 1.703600525856018 2023-01-22 16:57:38.547678: step: 560/470, loss: 2.787195920944214 2023-01-22 16:57:39.272010: step: 562/470, loss: 0.3978123664855957 2023-01-22 16:57:40.027857: step: 564/470, loss: 1.3924106359481812 2023-01-22 16:57:40.791701: step: 566/470, loss: 0.9471191167831421 2023-01-22 16:57:41.520525: step: 568/470, loss: 0.41248780488967896 2023-01-22 16:57:42.346893: step: 570/470, loss: 1.1843392848968506 2023-01-22 16:57:43.054906: step: 572/470, loss: 1.386232852935791 2023-01-22 16:57:43.774313: step: 574/470, loss: 0.30617228150367737 2023-01-22 16:57:44.580376: step: 576/470, loss: 0.6840832233428955 2023-01-22 16:57:45.352536: step: 578/470, loss: 0.8363122940063477 2023-01-22 16:57:46.087781: step: 580/470, loss: 0.6774252653121948 2023-01-22 16:57:46.911773: step: 582/470, loss: 0.34732234477996826 2023-01-22 16:57:47.620238: step: 584/470, loss: 3.143740653991699 2023-01-22 16:57:48.336714: step: 586/470, loss: 1.0732841491699219 2023-01-22 16:57:49.074721: step: 588/470, loss: 0.6088477373123169 2023-01-22 16:57:49.850699: step: 590/470, loss: 1.6710162162780762 2023-01-22 16:57:50.515259: step: 592/470, loss: 0.8715441226959229 2023-01-22 16:57:51.252856: step: 594/470, loss: 1.7189347743988037 2023-01-22 16:57:51.985875: step: 596/470, loss: 7.218348979949951 2023-01-22 16:57:52.649698: step: 598/470, loss: 2.807617664337158 2023-01-22 16:57:53.394301: step: 600/470, loss: 1.4759058952331543 2023-01-22 16:57:54.195033: step: 602/470, loss: 7.785268306732178 2023-01-22 16:57:54.924764: step: 604/470, loss: 1.1287786960601807 2023-01-22 16:57:55.662666: step: 606/470, loss: 0.3461344242095947 2023-01-22 16:57:56.368064: step: 608/470, loss: 0.34514838457107544 2023-01-22 16:57:57.080780: step: 610/470, loss: 0.30250078439712524 2023-01-22 16:57:57.831067: step: 612/470, loss: 0.9699506163597107 2023-01-22 16:57:58.571155: step: 614/470, loss: 0.9597812294960022 2023-01-22 16:57:59.280701: step: 616/470, loss: 0.7912279963493347 2023-01-22 16:57:59.965542: step: 618/470, loss: 1.2856942415237427 2023-01-22 16:58:00.689350: step: 620/470, loss: 1.5808870792388916 2023-01-22 16:58:01.498708: step: 622/470, loss: 2.507807731628418 2023-01-22 16:58:02.263882: step: 624/470, loss: 0.46560168266296387 2023-01-22 16:58:03.016079: step: 626/470, loss: 0.5538115501403809 2023-01-22 16:58:03.831627: step: 628/470, loss: 0.9292363524436951 2023-01-22 16:58:04.660175: step: 630/470, loss: 0.7725551724433899 2023-01-22 16:58:05.447473: step: 632/470, loss: 1.3639918565750122 2023-01-22 16:58:06.177236: step: 634/470, loss: 1.2045042514801025 2023-01-22 16:58:07.029994: step: 636/470, loss: 1.0402116775512695 2023-01-22 16:58:07.737665: step: 638/470, loss: 1.0203620195388794 2023-01-22 16:58:08.515091: step: 640/470, loss: 1.0372285842895508 2023-01-22 16:58:09.217540: step: 642/470, loss: 4.547273635864258 2023-01-22 16:58:09.879456: step: 644/470, loss: 0.877484917640686 2023-01-22 16:58:10.594806: step: 646/470, loss: 1.7687321901321411 2023-01-22 16:58:11.367037: step: 648/470, loss: 1.9696149826049805 2023-01-22 16:58:12.041599: step: 650/470, loss: 1.9689509868621826 2023-01-22 16:58:12.753847: step: 652/470, loss: 0.36682435870170593 2023-01-22 16:58:13.562624: step: 654/470, loss: 1.2010406255722046 2023-01-22 16:58:14.276800: step: 656/470, loss: 0.7754824757575989 2023-01-22 16:58:15.061327: step: 658/470, loss: 0.16382497549057007 2023-01-22 16:58:15.784739: step: 660/470, loss: 0.6597549915313721 2023-01-22 16:58:16.574082: step: 662/470, loss: 1.6874454021453857 2023-01-22 16:58:17.382601: step: 664/470, loss: 1.7826333045959473 2023-01-22 16:58:18.073327: step: 666/470, loss: 1.3918657302856445 2023-01-22 16:58:18.822695: step: 668/470, loss: 0.9533292055130005 2023-01-22 16:58:19.489801: step: 670/470, loss: 0.17870013415813446 2023-01-22 16:58:20.216277: step: 672/470, loss: 0.4774816632270813 2023-01-22 16:58:20.943047: step: 674/470, loss: 1.4324686527252197 2023-01-22 16:58:21.685867: step: 676/470, loss: 1.8398833274841309 2023-01-22 16:58:22.437269: step: 678/470, loss: 0.42002177238464355 2023-01-22 16:58:23.154904: step: 680/470, loss: 0.953679621219635 2023-01-22 16:58:23.837366: step: 682/470, loss: 0.5611454248428345 2023-01-22 16:58:24.602872: step: 684/470, loss: 0.5755783319473267 2023-01-22 16:58:25.383801: step: 686/470, loss: 0.5575564503669739 2023-01-22 16:58:26.080056: step: 688/470, loss: 0.7334228157997131 2023-01-22 16:58:26.796114: step: 690/470, loss: 4.685910224914551 2023-01-22 16:58:27.520470: step: 692/470, loss: 0.306856632232666 2023-01-22 16:58:28.211856: step: 694/470, loss: 0.8744876980781555 2023-01-22 16:58:28.903335: step: 696/470, loss: 1.340862512588501 2023-01-22 16:58:29.673332: step: 698/470, loss: 2.6037561893463135 2023-01-22 16:58:30.402018: step: 700/470, loss: 0.3846321403980255 2023-01-22 16:58:31.263077: step: 702/470, loss: 1.2338802814483643 2023-01-22 16:58:32.050459: step: 704/470, loss: 0.6107156872749329 2023-01-22 16:58:32.801203: step: 706/470, loss: 1.1513054370880127 2023-01-22 16:58:33.580989: step: 708/470, loss: 1.4285832643508911 2023-01-22 16:58:34.402733: step: 710/470, loss: 1.7846858501434326 2023-01-22 16:58:35.117502: step: 712/470, loss: 0.536953330039978 2023-01-22 16:58:35.900540: step: 714/470, loss: 0.8060521483421326 2023-01-22 16:58:36.636608: step: 716/470, loss: 1.0133894681930542 2023-01-22 16:58:37.380156: step: 718/470, loss: 0.5618113875389099 2023-01-22 16:58:38.111566: step: 720/470, loss: 3.355659246444702 2023-01-22 16:58:38.876451: step: 722/470, loss: 0.5400373935699463 2023-01-22 16:58:39.578480: step: 724/470, loss: 9.46622085571289 2023-01-22 16:58:40.323704: step: 726/470, loss: 1.4518628120422363 2023-01-22 16:58:41.145522: step: 728/470, loss: 0.7577975988388062 2023-01-22 16:58:41.909788: step: 730/470, loss: 3.8897013664245605 2023-01-22 16:58:42.723775: step: 732/470, loss: 0.8022290468215942 2023-01-22 16:58:43.504425: step: 734/470, loss: 1.1684952974319458 2023-01-22 16:58:44.252346: step: 736/470, loss: 1.0844392776489258 2023-01-22 16:58:45.001101: step: 738/470, loss: 1.2642875909805298 2023-01-22 16:58:45.743077: step: 740/470, loss: 0.4668574333190918 2023-01-22 16:58:46.425706: step: 742/470, loss: 0.4412175118923187 2023-01-22 16:58:47.130673: step: 744/470, loss: 0.6135561466217041 2023-01-22 16:58:47.838842: step: 746/470, loss: 2.345003604888916 2023-01-22 16:58:48.591432: step: 748/470, loss: 0.6416370868682861 2023-01-22 16:58:49.371622: step: 750/470, loss: 0.5936519503593445 2023-01-22 16:58:50.112094: step: 752/470, loss: 1.1450408697128296 2023-01-22 16:58:50.805080: step: 754/470, loss: 1.5515800714492798 2023-01-22 16:58:51.575826: step: 756/470, loss: 3.4162683486938477 2023-01-22 16:58:52.251237: step: 758/470, loss: 0.24610131978988647 2023-01-22 16:58:52.962936: step: 760/470, loss: 1.518908977508545 2023-01-22 16:58:53.701607: step: 762/470, loss: 2.093191146850586 2023-01-22 16:58:54.407495: step: 764/470, loss: 0.7881523370742798 2023-01-22 16:58:55.118816: step: 766/470, loss: 1.1381288766860962 2023-01-22 16:58:55.841613: step: 768/470, loss: 0.9814718961715698 2023-01-22 16:58:56.554771: step: 770/470, loss: 1.3323256969451904 2023-01-22 16:58:57.296386: step: 772/470, loss: 2.680574655532837 2023-01-22 16:58:58.058597: step: 774/470, loss: 2.739765167236328 2023-01-22 16:58:58.804040: step: 776/470, loss: 1.2006040811538696 2023-01-22 16:58:59.560955: step: 778/470, loss: 0.9175070524215698 2023-01-22 16:59:00.292041: step: 780/470, loss: 1.995614767074585 2023-01-22 16:59:01.037557: step: 782/470, loss: 0.7214681506156921 2023-01-22 16:59:01.868245: step: 784/470, loss: 1.2614259719848633 2023-01-22 16:59:02.660445: step: 786/470, loss: 1.1801100969314575 2023-01-22 16:59:03.369570: step: 788/470, loss: 1.3768471479415894 2023-01-22 16:59:04.091036: step: 790/470, loss: 0.4173336625099182 2023-01-22 16:59:04.872725: step: 792/470, loss: 0.45784080028533936 2023-01-22 16:59:05.622508: step: 794/470, loss: 1.0468000173568726 2023-01-22 16:59:06.406806: step: 796/470, loss: 2.4224095344543457 2023-01-22 16:59:07.099835: step: 798/470, loss: 2.1978325843811035 2023-01-22 16:59:07.911710: step: 800/470, loss: 0.6861466765403748 2023-01-22 16:59:08.637022: step: 802/470, loss: 2.6235058307647705 2023-01-22 16:59:09.424610: step: 804/470, loss: 1.106905221939087 2023-01-22 16:59:10.150655: step: 806/470, loss: 3.2012083530426025 2023-01-22 16:59:10.908356: step: 808/470, loss: 1.7539875507354736 2023-01-22 16:59:11.633821: step: 810/470, loss: 0.43845510482788086 2023-01-22 16:59:12.377370: step: 812/470, loss: 1.7235209941864014 2023-01-22 16:59:13.191580: step: 814/470, loss: 1.9387578964233398 2023-01-22 16:59:13.930428: step: 816/470, loss: 0.919151782989502 2023-01-22 16:59:14.672913: step: 818/470, loss: 2.7217369079589844 2023-01-22 16:59:15.377530: step: 820/470, loss: 0.5926926732063293 2023-01-22 16:59:16.126616: step: 822/470, loss: 2.346853494644165 2023-01-22 16:59:16.887977: step: 824/470, loss: 0.5080801844596863 2023-01-22 16:59:17.573591: step: 826/470, loss: 1.3107140064239502 2023-01-22 16:59:18.357470: step: 828/470, loss: 0.6457303166389465 2023-01-22 16:59:19.148383: step: 830/470, loss: 0.620394229888916 2023-01-22 16:59:19.902783: step: 832/470, loss: 1.6627405881881714 2023-01-22 16:59:20.618899: step: 834/470, loss: 0.5543290376663208 2023-01-22 16:59:21.385806: step: 836/470, loss: 0.7170817852020264 2023-01-22 16:59:22.160908: step: 838/470, loss: 0.6219049096107483 2023-01-22 16:59:22.925845: step: 840/470, loss: 0.889132022857666 2023-01-22 16:59:23.726482: step: 842/470, loss: 0.6863826513290405 2023-01-22 16:59:24.454471: step: 844/470, loss: 3.6622161865234375 2023-01-22 16:59:25.223572: step: 846/470, loss: 0.48640185594558716 2023-01-22 16:59:25.970247: step: 848/470, loss: 0.3788396418094635 2023-01-22 16:59:26.710692: step: 850/470, loss: 0.2160799503326416 2023-01-22 16:59:27.406427: step: 852/470, loss: 0.4344361126422882 2023-01-22 16:59:28.248044: step: 854/470, loss: 2.5788564682006836 2023-01-22 16:59:29.033962: step: 856/470, loss: 0.9327910542488098 2023-01-22 16:59:29.773823: step: 858/470, loss: 0.6671287417411804 2023-01-22 16:59:30.399871: step: 860/470, loss: 0.8870024085044861 2023-01-22 16:59:31.054385: step: 862/470, loss: 1.443222999572754 2023-01-22 16:59:31.830543: step: 864/470, loss: 0.40049707889556885 2023-01-22 16:59:32.526983: step: 866/470, loss: 0.6292204260826111 2023-01-22 16:59:33.261744: step: 868/470, loss: 0.7831001877784729 2023-01-22 16:59:33.959512: step: 870/470, loss: 1.5764271020889282 2023-01-22 16:59:34.684537: step: 872/470, loss: 0.44661274552345276 2023-01-22 16:59:35.391196: step: 874/470, loss: 1.3428727388381958 2023-01-22 16:59:36.180161: step: 876/470, loss: 0.4933682084083557 2023-01-22 16:59:36.883199: step: 878/470, loss: 3.2525839805603027 2023-01-22 16:59:37.695969: step: 880/470, loss: 2.814858913421631 2023-01-22 16:59:38.578135: step: 882/470, loss: 0.4872850775718689 2023-01-22 16:59:39.392332: step: 884/470, loss: 0.6432516574859619 2023-01-22 16:59:40.163622: step: 886/470, loss: 3.085758924484253 2023-01-22 16:59:40.979752: step: 888/470, loss: 0.3077142834663391 2023-01-22 16:59:41.753826: step: 890/470, loss: 0.7868139743804932 2023-01-22 16:59:42.487169: step: 892/470, loss: 0.7199787497520447 2023-01-22 16:59:43.270896: step: 894/470, loss: 0.8233124017715454 2023-01-22 16:59:44.019177: step: 896/470, loss: 1.426706075668335 2023-01-22 16:59:44.733501: step: 898/470, loss: 3.785362720489502 2023-01-22 16:59:45.534300: step: 900/470, loss: 0.2815132439136505 2023-01-22 16:59:46.278913: step: 902/470, loss: 0.5339265465736389 2023-01-22 16:59:46.971602: step: 904/470, loss: 2.60254168510437 2023-01-22 16:59:47.715654: step: 906/470, loss: 1.4491829872131348 2023-01-22 16:59:48.556911: step: 908/470, loss: 2.853203058242798 2023-01-22 16:59:49.333801: step: 910/470, loss: 0.6764827966690063 2023-01-22 16:59:50.146638: step: 912/470, loss: 1.3102974891662598 2023-01-22 16:59:50.878278: step: 914/470, loss: 1.3560751676559448 2023-01-22 16:59:51.620684: step: 916/470, loss: 1.378153681755066 2023-01-22 16:59:52.349740: step: 918/470, loss: 1.9102532863616943 2023-01-22 16:59:53.037241: step: 920/470, loss: 0.6327913999557495 2023-01-22 16:59:53.738967: step: 922/470, loss: 1.337106466293335 2023-01-22 16:59:54.483384: step: 924/470, loss: 0.5834131836891174 2023-01-22 16:59:55.190173: step: 926/470, loss: 1.6707696914672852 2023-01-22 16:59:55.899248: step: 928/470, loss: 1.1640057563781738 2023-01-22 16:59:56.746363: step: 930/470, loss: 0.6929572224617004 2023-01-22 16:59:57.495300: step: 932/470, loss: 0.8510525226593018 2023-01-22 16:59:58.262274: step: 934/470, loss: 1.042264461517334 2023-01-22 16:59:59.080635: step: 936/470, loss: 2.107961654663086 2023-01-22 16:59:59.948138: step: 938/470, loss: 0.5670161247253418 2023-01-22 17:00:00.727018: step: 940/470, loss: 1.541463017463684 2023-01-22 17:00:01.383375: step: 942/470, loss: 2.885584592819214 ================================================== Loss: 1.284 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30298576732673266, 'r': 0.23139177693761814, 'f1': 0.26239281886387994}, 'combined': 0.19334207705759573, 'epoch': 3} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.38174946184187203, 'r': 0.24166464491238587, 'f1': 0.2959681121809337}, 'combined': 0.20614694380761553, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29876193317422434, 'r': 0.23663752362948962, 'f1': 0.2640954641350211}, 'combined': 0.19459665778369975, 'epoch': 3} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.39738219832697874, 'r': 0.2515608729082411, 'f1': 0.30808808082056355}, 'combined': 0.21458871300934776, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2790948275862069, 'r': 0.2295014177693762, 'f1': 0.2518801867219917}, 'combined': 0.18559592705830966, 'epoch': 3} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.38381273403571387, 'r': 0.24752606264107457, 'f1': 0.3009593433745621}, 'combined': 0.20962342324596367, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2099056603773585, 'r': 0.31785714285714284, 'f1': 0.2528409090909091}, 'combined': 0.16856060606060608, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23214285714285715, 'r': 0.2826086956521739, 'f1': 0.2549019607843137}, 'combined': 0.12745098039215685, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4411764705882353, 'r': 0.25862068965517243, 'f1': 0.32608695652173914}, 'combined': 0.21739130434782608, 'epoch': 3} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27756423515352086, 'r': 0.27282404897442847, 'f1': 0.2751737297789929}, 'combined': 0.20275959036346844, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3282026888474822, 'r': 0.25190581017208286, 'f1': 0.28503690042297636}, 'combined': 0.19853316447371488, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24695121951219512, 'r': 0.2892857142857143, 'f1': 0.2664473684210527}, 'combined': 0.17763157894736847, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29876193317422434, 'r': 0.23663752362948962, 'f1': 0.2640954641350211}, 'combined': 0.19459665778369975, 'epoch': 3} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.39738219832697874, 'r': 0.2515608729082411, 'f1': 0.30808808082056355}, 'combined': 0.21458871300934776, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23214285714285715, 'r': 0.2826086956521739, 'f1': 0.2549019607843137}, 'combined': 0.12745098039215685, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.20295208490866637, 'r': 0.24783940803591395, 'f1': 0.22316093081021016}, 'combined': 0.16443437007068115, 'epoch': 1} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.279769607766213, 'r': 0.2325601629274273, 'f1': 0.25398979050608034}, 'combined': 0.17690831179527985, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45, 'r': 0.3103448275862069, 'f1': 0.3673469387755102}, 'combined': 0.24489795918367346, 'epoch': 1} ****************************** Epoch: 4 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 17:02:44.134532: step: 2/470, loss: 1.2434356212615967 2023-01-22 17:02:44.807854: step: 4/470, loss: 1.2220540046691895 2023-01-22 17:02:45.488020: step: 6/470, loss: 1.7187833786010742 2023-01-22 17:02:46.349722: step: 8/470, loss: 1.2707059383392334 2023-01-22 17:02:47.036983: step: 10/470, loss: 1.190471887588501 2023-01-22 17:02:47.782992: step: 12/470, loss: 0.3669613301753998 2023-01-22 17:02:48.510567: step: 14/470, loss: 2.916283130645752 2023-01-22 17:02:49.349019: step: 16/470, loss: 1.854135513305664 2023-01-22 17:02:50.126509: step: 18/470, loss: 0.7268097400665283 2023-01-22 17:02:50.890671: step: 20/470, loss: 0.864444375038147 2023-01-22 17:02:51.669050: step: 22/470, loss: 0.24146677553653717 2023-01-22 17:02:52.492058: step: 24/470, loss: 1.435685634613037 2023-01-22 17:02:53.223374: step: 26/470, loss: 0.8564450144767761 2023-01-22 17:02:53.904661: step: 28/470, loss: 1.2398236989974976 2023-01-22 17:02:54.655691: step: 30/470, loss: 0.5465511679649353 2023-01-22 17:02:55.324953: step: 32/470, loss: 1.0255184173583984 2023-01-22 17:02:56.059294: step: 34/470, loss: 1.3359484672546387 2023-01-22 17:02:56.730343: step: 36/470, loss: 0.2009323537349701 2023-01-22 17:02:57.550695: step: 38/470, loss: 0.4609377980232239 2023-01-22 17:02:58.285985: step: 40/470, loss: 0.290250301361084 2023-01-22 17:02:59.052800: step: 42/470, loss: 0.2391287386417389 2023-01-22 17:02:59.787130: step: 44/470, loss: 1.3748838901519775 2023-01-22 17:03:00.542605: step: 46/470, loss: 0.31518200039863586 2023-01-22 17:03:01.300102: step: 48/470, loss: 1.317157506942749 2023-01-22 17:03:02.051226: step: 50/470, loss: 0.3318483829498291 2023-01-22 17:03:02.788720: step: 52/470, loss: 2.242248296737671 2023-01-22 17:03:03.520923: step: 54/470, loss: 0.5057197213172913 2023-01-22 17:03:04.221738: step: 56/470, loss: 1.300000786781311 2023-01-22 17:03:04.922606: step: 58/470, loss: 0.9471038579940796 2023-01-22 17:03:05.686526: step: 60/470, loss: 0.8389618992805481 2023-01-22 17:03:06.383683: step: 62/470, loss: 0.5545322895050049 2023-01-22 17:03:07.128938: step: 64/470, loss: 0.9042381048202515 2023-01-22 17:03:07.848710: step: 66/470, loss: 1.5064542293548584 2023-01-22 17:03:08.551935: step: 68/470, loss: 0.651218056678772 2023-01-22 17:03:09.287344: step: 70/470, loss: 4.524369239807129 2023-01-22 17:03:10.015017: step: 72/470, loss: 0.3224298655986786 2023-01-22 17:03:10.662390: step: 74/470, loss: 0.5799638628959656 2023-01-22 17:03:11.411424: step: 76/470, loss: 1.0132986307144165 2023-01-22 17:03:12.165398: step: 78/470, loss: 0.2071341574192047 2023-01-22 17:03:12.959432: step: 80/470, loss: 0.13997377455234528 2023-01-22 17:03:13.707417: step: 82/470, loss: 0.8105186223983765 2023-01-22 17:03:14.319399: step: 84/470, loss: 0.37218907475471497 2023-01-22 17:03:15.004826: step: 86/470, loss: 0.5148276090621948 2023-01-22 17:03:15.743944: step: 88/470, loss: 0.8321148753166199 2023-01-22 17:03:16.466978: step: 90/470, loss: 0.889289379119873 2023-01-22 17:03:17.173624: step: 92/470, loss: 0.5049362182617188 2023-01-22 17:03:17.886834: step: 94/470, loss: 0.9081429243087769 2023-01-22 17:03:18.630390: step: 96/470, loss: 0.8203686475753784 2023-01-22 17:03:19.444464: step: 98/470, loss: 1.3147096633911133 2023-01-22 17:03:20.164473: step: 100/470, loss: 0.49650490283966064 2023-01-22 17:03:20.836768: step: 102/470, loss: 0.6795364022254944 2023-01-22 17:03:21.709986: step: 104/470, loss: 5.672989845275879 2023-01-22 17:03:22.514039: step: 106/470, loss: 0.5761081576347351 2023-01-22 17:03:23.214712: step: 108/470, loss: 0.8563786149024963 2023-01-22 17:03:23.923997: step: 110/470, loss: 1.6315613985061646 2023-01-22 17:03:24.732802: step: 112/470, loss: 0.5608785152435303 2023-01-22 17:03:25.502058: step: 114/470, loss: 0.5131690502166748 2023-01-22 17:03:26.232578: step: 116/470, loss: 0.20318368077278137 2023-01-22 17:03:26.917866: step: 118/470, loss: 0.45412278175354004 2023-01-22 17:03:27.675620: step: 120/470, loss: 0.656235933303833 2023-01-22 17:03:28.383170: step: 122/470, loss: 0.1925651729106903 2023-01-22 17:03:29.038845: step: 124/470, loss: 0.43493232131004333 2023-01-22 17:03:29.814084: step: 126/470, loss: 0.27077344059944153 2023-01-22 17:03:30.535944: step: 128/470, loss: 0.3672603666782379 2023-01-22 17:03:31.288980: step: 130/470, loss: 0.9226909875869751 2023-01-22 17:03:32.000527: step: 132/470, loss: 0.32075291872024536 2023-01-22 17:03:32.795208: step: 134/470, loss: 0.6723787784576416 2023-01-22 17:03:33.476403: step: 136/470, loss: 1.2128407955169678 2023-01-22 17:03:34.195376: step: 138/470, loss: 0.8287149667739868 2023-01-22 17:03:34.905214: step: 140/470, loss: 0.4041156768798828 2023-01-22 17:03:35.619891: step: 142/470, loss: 0.16680775582790375 2023-01-22 17:03:36.382910: step: 144/470, loss: 2.376657485961914 2023-01-22 17:03:37.099210: step: 146/470, loss: 0.47927552461624146 2023-01-22 17:03:37.787996: step: 148/470, loss: 1.4806530475616455 2023-01-22 17:03:38.548142: step: 150/470, loss: 0.7680869698524475 2023-01-22 17:03:39.225129: step: 152/470, loss: 0.3706393539905548 2023-01-22 17:03:39.892359: step: 154/470, loss: 2.1673104763031006 2023-01-22 17:03:40.617338: step: 156/470, loss: 0.905188798904419 2023-01-22 17:03:41.367012: step: 158/470, loss: 0.19686326384544373 2023-01-22 17:03:42.096475: step: 160/470, loss: 1.0826082229614258 2023-01-22 17:03:42.842060: step: 162/470, loss: 1.3279461860656738 2023-01-22 17:03:43.696669: step: 164/470, loss: 0.796668291091919 2023-01-22 17:03:44.611270: step: 166/470, loss: 0.6903431415557861 2023-01-22 17:03:45.345472: step: 168/470, loss: 0.4284389317035675 2023-01-22 17:03:46.065338: step: 170/470, loss: 1.0535764694213867 2023-01-22 17:03:46.823526: step: 172/470, loss: 0.5381697416305542 2023-01-22 17:03:47.525340: step: 174/470, loss: 0.5848957896232605 2023-01-22 17:03:48.196057: step: 176/470, loss: 1.5677191019058228 2023-01-22 17:03:48.970800: step: 178/470, loss: 1.2079250812530518 2023-01-22 17:03:49.803903: step: 180/470, loss: 0.6007267236709595 2023-01-22 17:03:50.598703: step: 182/470, loss: 1.3235710859298706 2023-01-22 17:03:51.513546: step: 184/470, loss: 1.0727261304855347 2023-01-22 17:03:52.262196: step: 186/470, loss: 3.2140517234802246 2023-01-22 17:03:52.962884: step: 188/470, loss: 2.628072500228882 2023-01-22 17:03:53.663096: step: 190/470, loss: 0.410220205783844 2023-01-22 17:03:54.408532: step: 192/470, loss: 1.1373114585876465 2023-01-22 17:03:55.096975: step: 194/470, loss: 1.4659881591796875 2023-01-22 17:03:55.783172: step: 196/470, loss: 0.6437451839447021 2023-01-22 17:03:56.478362: step: 198/470, loss: 0.30417031049728394 2023-01-22 17:03:57.178890: step: 200/470, loss: 1.0152573585510254 2023-01-22 17:03:57.846358: step: 202/470, loss: 0.33993643522262573 2023-01-22 17:03:58.662720: step: 204/470, loss: 1.0089237689971924 2023-01-22 17:03:59.395141: step: 206/470, loss: 0.37781351804733276 2023-01-22 17:04:00.075403: step: 208/470, loss: 0.689214825630188 2023-01-22 17:04:00.783935: step: 210/470, loss: 0.4352153539657593 2023-01-22 17:04:01.498132: step: 212/470, loss: 1.1648905277252197 2023-01-22 17:04:02.316168: step: 214/470, loss: 0.3460136651992798 2023-01-22 17:04:03.109929: step: 216/470, loss: 0.7107413411140442 2023-01-22 17:04:03.837262: step: 218/470, loss: 0.08092077076435089 2023-01-22 17:04:04.564458: step: 220/470, loss: 1.2060343027114868 2023-01-22 17:04:05.210053: step: 222/470, loss: 1.078926920890808 2023-01-22 17:04:05.896719: step: 224/470, loss: 0.8819116950035095 2023-01-22 17:04:06.691572: step: 226/470, loss: 0.7317195534706116 2023-01-22 17:04:07.401515: step: 228/470, loss: 0.30972084403038025 2023-01-22 17:04:08.212876: step: 230/470, loss: 1.6904860734939575 2023-01-22 17:04:08.936605: step: 232/470, loss: 1.5937964916229248 2023-01-22 17:04:09.627264: step: 234/470, loss: 0.4739353656768799 2023-01-22 17:04:10.453292: step: 236/470, loss: 0.939883828163147 2023-01-22 17:04:11.217865: step: 238/470, loss: 0.4551941752433777 2023-01-22 17:04:11.936805: step: 240/470, loss: 0.5418661832809448 2023-01-22 17:04:12.714769: step: 242/470, loss: 1.2252233028411865 2023-01-22 17:04:13.561887: step: 244/470, loss: 1.1485824584960938 2023-01-22 17:04:14.301179: step: 246/470, loss: 0.8404242992401123 2023-01-22 17:04:15.044286: step: 248/470, loss: 0.6991187334060669 2023-01-22 17:04:15.834186: step: 250/470, loss: 1.8893976211547852 2023-01-22 17:04:16.534813: step: 252/470, loss: 0.6644213795661926 2023-01-22 17:04:17.344000: step: 254/470, loss: 0.8386790752410889 2023-01-22 17:04:18.071533: step: 256/470, loss: 1.4650182723999023 2023-01-22 17:04:18.731158: step: 258/470, loss: 0.5462832450866699 2023-01-22 17:04:19.558279: step: 260/470, loss: 5.154643535614014 2023-01-22 17:04:20.294585: step: 262/470, loss: 0.7503474354743958 2023-01-22 17:04:20.978590: step: 264/470, loss: 0.34197211265563965 2023-01-22 17:04:21.700980: step: 266/470, loss: 0.46603813767433167 2023-01-22 17:04:22.499680: step: 268/470, loss: 0.20978543162345886 2023-01-22 17:04:23.218169: step: 270/470, loss: 0.8326939344406128 2023-01-22 17:04:23.949350: step: 272/470, loss: 1.0488128662109375 2023-01-22 17:04:24.696792: step: 274/470, loss: 1.5846171379089355 2023-01-22 17:04:25.450959: step: 276/470, loss: 0.6706929802894592 2023-01-22 17:04:26.135491: step: 278/470, loss: 2.744781732559204 2023-01-22 17:04:26.925391: step: 280/470, loss: 0.7086952328681946 2023-01-22 17:04:27.783118: step: 282/470, loss: 1.4308860301971436 2023-01-22 17:04:28.584909: step: 284/470, loss: 0.30105042457580566 2023-01-22 17:04:29.365919: step: 286/470, loss: 0.7560703754425049 2023-01-22 17:04:30.088629: step: 288/470, loss: 1.4897514581680298 2023-01-22 17:04:30.879557: step: 290/470, loss: 0.583710253238678 2023-01-22 17:04:31.523692: step: 292/470, loss: 0.9798109531402588 2023-01-22 17:04:32.283964: step: 294/470, loss: 1.5323822498321533 2023-01-22 17:04:33.032650: step: 296/470, loss: 2.428147792816162 2023-01-22 17:04:33.724179: step: 298/470, loss: 0.5572175979614258 2023-01-22 17:04:34.444891: step: 300/470, loss: 0.8898938894271851 2023-01-22 17:04:35.171532: step: 302/470, loss: 1.1512093544006348 2023-01-22 17:04:35.997712: step: 304/470, loss: 2.286986827850342 2023-01-22 17:04:36.775725: step: 306/470, loss: 1.5244067907333374 2023-01-22 17:04:37.582280: step: 308/470, loss: 0.6587584018707275 2023-01-22 17:04:38.352255: step: 310/470, loss: 0.5230644345283508 2023-01-22 17:04:39.049050: step: 312/470, loss: 0.8668720126152039 2023-01-22 17:04:39.789251: step: 314/470, loss: 0.6899608373641968 2023-01-22 17:04:40.526827: step: 316/470, loss: 0.9593360424041748 2023-01-22 17:04:41.199490: step: 318/470, loss: 0.6568881869316101 2023-01-22 17:04:41.967466: step: 320/470, loss: 0.8749952912330627 2023-01-22 17:04:42.702381: step: 322/470, loss: 0.46149203181266785 2023-01-22 17:04:43.396069: step: 324/470, loss: 1.287691354751587 2023-01-22 17:04:44.178511: step: 326/470, loss: 0.5143733620643616 2023-01-22 17:04:44.889712: step: 328/470, loss: 0.423993319272995 2023-01-22 17:04:45.590271: step: 330/470, loss: 1.9649615287780762 2023-01-22 17:04:46.316807: step: 332/470, loss: 1.341388463973999 2023-01-22 17:04:47.118226: step: 334/470, loss: 0.20511598885059357 2023-01-22 17:04:47.975616: step: 336/470, loss: 0.7076253890991211 2023-01-22 17:04:48.734522: step: 338/470, loss: 0.2398476004600525 2023-01-22 17:04:49.529914: step: 340/470, loss: 0.43527939915657043 2023-01-22 17:04:50.176083: step: 342/470, loss: 0.27098989486694336 2023-01-22 17:04:50.870906: step: 344/470, loss: 0.42461445927619934 2023-01-22 17:04:51.642995: step: 346/470, loss: 0.6119195222854614 2023-01-22 17:04:52.463403: step: 348/470, loss: 0.5161367058753967 2023-01-22 17:04:53.254095: step: 350/470, loss: 0.35241755843162537 2023-01-22 17:04:54.014922: step: 352/470, loss: 0.33312055468559265 2023-01-22 17:04:54.836946: step: 354/470, loss: 2.3488333225250244 2023-01-22 17:04:55.637537: step: 356/470, loss: 2.8952274322509766 2023-01-22 17:04:56.366231: step: 358/470, loss: 1.2893620729446411 2023-01-22 17:04:57.077494: step: 360/470, loss: 0.3711218535900116 2023-01-22 17:04:57.799743: step: 362/470, loss: 0.2599567174911499 2023-01-22 17:04:58.535449: step: 364/470, loss: 5.034658908843994 2023-01-22 17:04:59.345240: step: 366/470, loss: 0.35741162300109863 2023-01-22 17:05:00.078358: step: 368/470, loss: 0.5050439238548279 2023-01-22 17:05:00.938811: step: 370/470, loss: 0.46297574043273926 2023-01-22 17:05:01.720601: step: 372/470, loss: 1.4221333265304565 2023-01-22 17:05:02.463045: step: 374/470, loss: 0.3796398341655731 2023-01-22 17:05:03.173874: step: 376/470, loss: 0.4562537372112274 2023-01-22 17:05:03.964049: step: 378/470, loss: 0.6173561811447144 2023-01-22 17:05:04.735694: step: 380/470, loss: 1.5535647869110107 2023-01-22 17:05:05.518403: step: 382/470, loss: 0.6625960469245911 2023-01-22 17:05:06.246004: step: 384/470, loss: 2.1780447959899902 2023-01-22 17:05:06.938278: step: 386/470, loss: 1.2066243886947632 2023-01-22 17:05:07.810466: step: 388/470, loss: 0.383740097284317 2023-01-22 17:05:08.618922: step: 390/470, loss: 0.36354339122772217 2023-01-22 17:05:09.443862: step: 392/470, loss: 0.3574196994304657 2023-01-22 17:05:10.264874: step: 394/470, loss: 1.139248251914978 2023-01-22 17:05:10.992652: step: 396/470, loss: 0.5587860345840454 2023-01-22 17:05:11.797856: step: 398/470, loss: 1.277011513710022 2023-01-22 17:05:12.581314: step: 400/470, loss: 0.6317623257637024 2023-01-22 17:05:13.454041: step: 402/470, loss: 1.273547649383545 2023-01-22 17:05:14.143107: step: 404/470, loss: 5.302393913269043 2023-01-22 17:05:14.818103: step: 406/470, loss: 0.3220556676387787 2023-01-22 17:05:15.544135: step: 408/470, loss: 1.9869134426116943 2023-01-22 17:05:16.288323: step: 410/470, loss: 0.33363187313079834 2023-01-22 17:05:17.027726: step: 412/470, loss: 0.9794175624847412 2023-01-22 17:05:17.825594: step: 414/470, loss: 1.7475107908248901 2023-01-22 17:05:18.521734: step: 416/470, loss: 1.0983277559280396 2023-01-22 17:05:19.285497: step: 418/470, loss: 2.780597686767578 2023-01-22 17:05:20.086932: step: 420/470, loss: 0.28116825222969055 2023-01-22 17:05:20.820783: step: 422/470, loss: 0.4581286311149597 2023-01-22 17:05:21.550922: step: 424/470, loss: 0.6359933018684387 2023-01-22 17:05:22.214104: step: 426/470, loss: 0.4756195843219757 2023-01-22 17:05:22.928841: step: 428/470, loss: 0.43690595030784607 2023-01-22 17:05:23.645947: step: 430/470, loss: 0.886446475982666 2023-01-22 17:05:24.411029: step: 432/470, loss: 1.1900947093963623 2023-01-22 17:05:25.148308: step: 434/470, loss: 0.9924726486206055 2023-01-22 17:05:25.858875: step: 436/470, loss: 1.7097697257995605 2023-01-22 17:05:26.553204: step: 438/470, loss: 0.9539413452148438 2023-01-22 17:05:27.295179: step: 440/470, loss: 1.100608229637146 2023-01-22 17:05:28.042318: step: 442/470, loss: 1.6105233430862427 2023-01-22 17:05:28.750155: step: 444/470, loss: 0.42767927050590515 2023-01-22 17:05:29.490146: step: 446/470, loss: 0.4778148829936981 2023-01-22 17:05:30.288331: step: 448/470, loss: 1.9240278005599976 2023-01-22 17:05:31.036121: step: 450/470, loss: 0.6379396319389343 2023-01-22 17:05:31.864684: step: 452/470, loss: 0.8942621350288391 2023-01-22 17:05:32.608508: step: 454/470, loss: 2.3800547122955322 2023-01-22 17:05:33.428148: step: 456/470, loss: 1.0691368579864502 2023-01-22 17:05:34.180377: step: 458/470, loss: 0.7086570858955383 2023-01-22 17:05:34.994921: step: 460/470, loss: 0.39457079768180847 2023-01-22 17:05:35.719755: step: 462/470, loss: 0.27901706099510193 2023-01-22 17:05:36.408970: step: 464/470, loss: 0.8260778784751892 2023-01-22 17:05:37.150920: step: 466/470, loss: 1.605619192123413 2023-01-22 17:05:37.934120: step: 468/470, loss: 0.8436158299446106 2023-01-22 17:05:38.657334: step: 470/470, loss: 1.076684832572937 2023-01-22 17:05:39.344272: step: 472/470, loss: 1.1909767389297485 2023-01-22 17:05:40.123050: step: 474/470, loss: 0.6028439998626709 2023-01-22 17:05:40.796198: step: 476/470, loss: 2.0072274208068848 2023-01-22 17:05:41.476692: step: 478/470, loss: 2.010789155960083 2023-01-22 17:05:42.245530: step: 480/470, loss: 0.4975433945655823 2023-01-22 17:05:43.034782: step: 482/470, loss: 0.9296523928642273 2023-01-22 17:05:43.766186: step: 484/470, loss: 0.5860471725463867 2023-01-22 17:05:44.531405: step: 486/470, loss: 0.750137209892273 2023-01-22 17:05:45.266930: step: 488/470, loss: 1.2189258337020874 2023-01-22 17:05:46.072920: step: 490/470, loss: 3.423570156097412 2023-01-22 17:05:46.874643: step: 492/470, loss: 1.0367627143859863 2023-01-22 17:05:47.631062: step: 494/470, loss: 1.586470365524292 2023-01-22 17:05:48.349888: step: 496/470, loss: 0.3984842300415039 2023-01-22 17:05:49.107703: step: 498/470, loss: 1.551780104637146 2023-01-22 17:05:49.837321: step: 500/470, loss: 0.6801697015762329 2023-01-22 17:05:50.555863: step: 502/470, loss: 0.6109309792518616 2023-01-22 17:05:51.322631: step: 504/470, loss: 2.4782888889312744 2023-01-22 17:05:52.171712: step: 506/470, loss: 1.3563328981399536 2023-01-22 17:05:52.876040: step: 508/470, loss: 1.7630181312561035 2023-01-22 17:05:53.627578: step: 510/470, loss: 0.9129251837730408 2023-01-22 17:05:54.344247: step: 512/470, loss: 1.837816834449768 2023-01-22 17:05:55.264820: step: 514/470, loss: 1.0328463315963745 2023-01-22 17:05:56.057587: step: 516/470, loss: 0.43190184235572815 2023-01-22 17:05:56.809326: step: 518/470, loss: 0.677377462387085 2023-01-22 17:05:57.612360: step: 520/470, loss: 1.7360193729400635 2023-01-22 17:05:58.345513: step: 522/470, loss: 1.5820202827453613 2023-01-22 17:05:59.093695: step: 524/470, loss: 3.5445146560668945 2023-01-22 17:05:59.927326: step: 526/470, loss: 9.609672546386719 2023-01-22 17:06:00.778570: step: 528/470, loss: 2.3321354389190674 2023-01-22 17:06:01.580073: step: 530/470, loss: 0.533820629119873 2023-01-22 17:06:02.381726: step: 532/470, loss: 0.19819556176662445 2023-01-22 17:06:03.108212: step: 534/470, loss: 1.0940450429916382 2023-01-22 17:06:03.837956: step: 536/470, loss: 0.9480574727058411 2023-01-22 17:06:04.545106: step: 538/470, loss: 0.29331231117248535 2023-01-22 17:06:05.225592: step: 540/470, loss: 0.4723627269268036 2023-01-22 17:06:05.906596: step: 542/470, loss: 0.6444365978240967 2023-01-22 17:06:06.694768: step: 544/470, loss: 0.3107144236564636 2023-01-22 17:06:07.475576: step: 546/470, loss: 0.4428755044937134 2023-01-22 17:06:08.228660: step: 548/470, loss: 0.4551246762275696 2023-01-22 17:06:08.935707: step: 550/470, loss: 0.9456477165222168 2023-01-22 17:06:09.821542: step: 552/470, loss: 0.4225022792816162 2023-01-22 17:06:10.563885: step: 554/470, loss: 0.21875688433647156 2023-01-22 17:06:11.272214: step: 556/470, loss: 0.5006171464920044 2023-01-22 17:06:12.045777: step: 558/470, loss: 0.7604213953018188 2023-01-22 17:06:13.007819: step: 560/470, loss: 1.2393240928649902 2023-01-22 17:06:13.758655: step: 562/470, loss: 1.712594985961914 2023-01-22 17:06:14.554309: step: 564/470, loss: 0.17193198204040527 2023-01-22 17:06:15.374590: step: 566/470, loss: 0.7961640357971191 2023-01-22 17:06:16.162137: step: 568/470, loss: 0.5501372218132019 2023-01-22 17:06:16.917008: step: 570/470, loss: 0.4883258044719696 2023-01-22 17:06:17.645917: step: 572/470, loss: 0.6990291476249695 2023-01-22 17:06:18.404425: step: 574/470, loss: 1.1884771585464478 2023-01-22 17:06:19.177182: step: 576/470, loss: 1.0261270999908447 2023-01-22 17:06:19.862069: step: 578/470, loss: 0.6111153960227966 2023-01-22 17:06:20.598937: step: 580/470, loss: 0.7478018403053284 2023-01-22 17:06:21.293014: step: 582/470, loss: 1.9652400016784668 2023-01-22 17:06:22.076567: step: 584/470, loss: 0.6027696132659912 2023-01-22 17:06:22.827326: step: 586/470, loss: 0.38277050852775574 2023-01-22 17:06:23.663582: step: 588/470, loss: 0.5123499035835266 2023-01-22 17:06:24.402365: step: 590/470, loss: 0.3519931137561798 2023-01-22 17:06:25.197763: step: 592/470, loss: 0.7184064984321594 2023-01-22 17:06:25.885754: step: 594/470, loss: 1.8940832614898682 2023-01-22 17:06:26.600366: step: 596/470, loss: 0.23110346496105194 2023-01-22 17:06:27.361622: step: 598/470, loss: 0.5986997485160828 2023-01-22 17:06:28.150848: step: 600/470, loss: 0.29345154762268066 2023-01-22 17:06:28.910526: step: 602/470, loss: 0.6144067645072937 2023-01-22 17:06:29.678686: step: 604/470, loss: 0.31155356764793396 2023-01-22 17:06:30.408539: step: 606/470, loss: 0.4316118359565735 2023-01-22 17:06:31.192295: step: 608/470, loss: 1.4895285367965698 2023-01-22 17:06:31.917075: step: 610/470, loss: 0.3460380434989929 2023-01-22 17:06:32.616032: step: 612/470, loss: 4.381056308746338 2023-01-22 17:06:33.338242: step: 614/470, loss: 0.26738184690475464 2023-01-22 17:06:34.064454: step: 616/470, loss: 1.009708285331726 2023-01-22 17:06:34.802664: step: 618/470, loss: 1.4514302015304565 2023-01-22 17:06:35.637501: step: 620/470, loss: 0.7399471998214722 2023-01-22 17:06:36.368175: step: 622/470, loss: 2.295945644378662 2023-01-22 17:06:37.150142: step: 624/470, loss: 0.5105316638946533 2023-01-22 17:06:37.844100: step: 626/470, loss: 0.2828077971935272 2023-01-22 17:06:38.594655: step: 628/470, loss: 1.003699541091919 2023-01-22 17:06:39.315037: step: 630/470, loss: 0.38585180044174194 2023-01-22 17:06:40.033498: step: 632/470, loss: 1.3301163911819458 2023-01-22 17:06:40.804574: step: 634/470, loss: 1.180068016052246 2023-01-22 17:06:41.535727: step: 636/470, loss: 0.9667542576789856 2023-01-22 17:06:42.269382: step: 638/470, loss: 0.6113733649253845 2023-01-22 17:06:43.120957: step: 640/470, loss: 1.1039268970489502 2023-01-22 17:06:43.916143: step: 642/470, loss: 0.7926710844039917 2023-01-22 17:06:44.666740: step: 644/470, loss: 1.8398045301437378 2023-01-22 17:06:45.486606: step: 646/470, loss: 0.5182595252990723 2023-01-22 17:06:46.255341: step: 648/470, loss: 1.8043544292449951 2023-01-22 17:06:46.986281: step: 650/470, loss: 1.3427050113677979 2023-01-22 17:06:47.761931: step: 652/470, loss: 0.2207910120487213 2023-01-22 17:06:48.515521: step: 654/470, loss: 0.7075690031051636 2023-01-22 17:06:49.185054: step: 656/470, loss: 1.3858473300933838 2023-01-22 17:06:49.913997: step: 658/470, loss: 0.40335145592689514 2023-01-22 17:06:50.646823: step: 660/470, loss: 0.8443214893341064 2023-01-22 17:06:51.425322: step: 662/470, loss: 1.4021592140197754 2023-01-22 17:06:52.126143: step: 664/470, loss: 0.8110883235931396 2023-01-22 17:06:52.915696: step: 666/470, loss: 0.2092544585466385 2023-01-22 17:06:53.627285: step: 668/470, loss: 4.8981614112854 2023-01-22 17:06:54.354490: step: 670/470, loss: 0.4453684389591217 2023-01-22 17:06:55.128289: step: 672/470, loss: 0.47258445620536804 2023-01-22 17:06:55.825618: step: 674/470, loss: 0.7400813102722168 2023-01-22 17:06:56.548465: step: 676/470, loss: 0.281443327665329 2023-01-22 17:06:57.254919: step: 678/470, loss: 0.5487380027770996 2023-01-22 17:06:57.983919: step: 680/470, loss: 0.4541553854942322 2023-01-22 17:06:58.723043: step: 682/470, loss: 0.6198228597640991 2023-01-22 17:06:59.470017: step: 684/470, loss: 0.6936663389205933 2023-01-22 17:07:00.210545: step: 686/470, loss: 1.3587620258331299 2023-01-22 17:07:01.024479: step: 688/470, loss: 1.5153071880340576 2023-01-22 17:07:01.705962: step: 690/470, loss: 1.0358877182006836 2023-01-22 17:07:02.409498: step: 692/470, loss: 1.0552879571914673 2023-01-22 17:07:03.125063: step: 694/470, loss: 1.0266014337539673 2023-01-22 17:07:03.840530: step: 696/470, loss: 1.048282504081726 2023-01-22 17:07:04.607467: step: 698/470, loss: 1.082366943359375 2023-01-22 17:07:05.344688: step: 700/470, loss: 3.19528865814209 2023-01-22 17:07:06.074899: step: 702/470, loss: 0.5113957524299622 2023-01-22 17:07:06.777972: step: 704/470, loss: 0.6064223051071167 2023-01-22 17:07:07.538638: step: 706/470, loss: 0.29762303829193115 2023-01-22 17:07:08.277461: step: 708/470, loss: 1.149381399154663 2023-01-22 17:07:09.089454: step: 710/470, loss: 0.45705515146255493 2023-01-22 17:07:09.809098: step: 712/470, loss: 0.4075102210044861 2023-01-22 17:07:10.558508: step: 714/470, loss: 0.7546180486679077 2023-01-22 17:07:11.263633: step: 716/470, loss: 0.7100285291671753 2023-01-22 17:07:11.971438: step: 718/470, loss: 2.0336670875549316 2023-01-22 17:07:12.762108: step: 720/470, loss: 0.505089521408081 2023-01-22 17:07:13.528016: step: 722/470, loss: 0.5286163091659546 2023-01-22 17:07:14.244408: step: 724/470, loss: 3.978087902069092 2023-01-22 17:07:14.947543: step: 726/470, loss: 1.456428050994873 2023-01-22 17:07:15.737840: step: 728/470, loss: 6.835483551025391 2023-01-22 17:07:16.464183: step: 730/470, loss: 1.0272594690322876 2023-01-22 17:07:17.124302: step: 732/470, loss: 1.0365326404571533 2023-01-22 17:07:17.857370: step: 734/470, loss: 0.3310011327266693 2023-01-22 17:07:18.655474: step: 736/470, loss: 0.6663587689399719 2023-01-22 17:07:19.444252: step: 738/470, loss: 0.6486376523971558 2023-01-22 17:07:20.189833: step: 740/470, loss: 0.29191356897354126 2023-01-22 17:07:20.965292: step: 742/470, loss: 0.32745280861854553 2023-01-22 17:07:21.735366: step: 744/470, loss: 2.1090571880340576 2023-01-22 17:07:22.506530: step: 746/470, loss: 1.205168604850769 2023-01-22 17:07:23.220589: step: 748/470, loss: 0.9146398901939392 2023-01-22 17:07:23.958410: step: 750/470, loss: 8.422287940979004 2023-01-22 17:07:24.764658: step: 752/470, loss: 1.7295546531677246 2023-01-22 17:07:25.536419: step: 754/470, loss: 1.0920233726501465 2023-01-22 17:07:26.239460: step: 756/470, loss: 2.5528173446655273 2023-01-22 17:07:27.050044: step: 758/470, loss: 0.6946008801460266 2023-01-22 17:07:27.817307: step: 760/470, loss: 0.9669357538223267 2023-01-22 17:07:28.517832: step: 762/470, loss: 3.9733476638793945 2023-01-22 17:07:29.206122: step: 764/470, loss: 0.6126687526702881 2023-01-22 17:07:29.988753: step: 766/470, loss: 1.2648811340332031 2023-01-22 17:07:30.747925: step: 768/470, loss: 1.0761961936950684 2023-01-22 17:07:31.448575: step: 770/470, loss: 0.36128804087638855 2023-01-22 17:07:32.159831: step: 772/470, loss: 2.7535998821258545 2023-01-22 17:07:32.872279: step: 774/470, loss: 1.588283658027649 2023-01-22 17:07:33.718347: step: 776/470, loss: 1.3686546087265015 2023-01-22 17:07:34.563682: step: 778/470, loss: 0.6748623847961426 2023-01-22 17:07:35.332620: step: 780/470, loss: 0.4505867660045624 2023-01-22 17:07:36.082439: step: 782/470, loss: 0.2927594780921936 2023-01-22 17:07:36.780030: step: 784/470, loss: 0.8455650806427002 2023-01-22 17:07:37.469905: step: 786/470, loss: 0.3428768515586853 2023-01-22 17:07:38.217070: step: 788/470, loss: 1.1048076152801514 2023-01-22 17:07:38.948072: step: 790/470, loss: 3.1269044876098633 2023-01-22 17:07:39.701420: step: 792/470, loss: 1.0847244262695312 2023-01-22 17:07:40.603615: step: 794/470, loss: 5.910243511199951 2023-01-22 17:07:41.348018: step: 796/470, loss: 0.3136829137802124 2023-01-22 17:07:42.179812: step: 798/470, loss: 0.6312191486358643 2023-01-22 17:07:42.849656: step: 800/470, loss: 1.0586059093475342 2023-01-22 17:07:43.565533: step: 802/470, loss: 0.7780605554580688 2023-01-22 17:07:44.272734: step: 804/470, loss: 0.42219239473342896 2023-01-22 17:07:44.985854: step: 806/470, loss: 0.661794900894165 2023-01-22 17:07:45.683101: step: 808/470, loss: 0.5359216332435608 2023-01-22 17:07:46.401832: step: 810/470, loss: 0.20118995010852814 2023-01-22 17:07:47.155898: step: 812/470, loss: 2.887706995010376 2023-01-22 17:07:47.920219: step: 814/470, loss: 0.8478198051452637 2023-01-22 17:07:48.637742: step: 816/470, loss: 0.7324924468994141 2023-01-22 17:07:49.375433: step: 818/470, loss: 1.24992835521698 2023-01-22 17:07:50.133734: step: 820/470, loss: 0.47944867610931396 2023-01-22 17:07:50.886575: step: 822/470, loss: 3.812171459197998 2023-01-22 17:07:51.656953: step: 824/470, loss: 1.0839214324951172 2023-01-22 17:07:52.378140: step: 826/470, loss: 0.307800829410553 2023-01-22 17:07:53.117644: step: 828/470, loss: 0.6476705074310303 2023-01-22 17:07:53.977416: step: 830/470, loss: 0.3994646668434143 2023-01-22 17:07:54.750390: step: 832/470, loss: 1.0658934116363525 2023-01-22 17:07:55.493191: step: 834/470, loss: 0.22010746598243713 2023-01-22 17:07:56.226292: step: 836/470, loss: 0.7103077173233032 2023-01-22 17:07:57.100368: step: 838/470, loss: 0.9640560150146484 2023-01-22 17:07:57.811652: step: 840/470, loss: 1.359990119934082 2023-01-22 17:07:58.666883: step: 842/470, loss: 1.2544822692871094 2023-01-22 17:07:59.507089: step: 844/470, loss: 1.0046195983886719 2023-01-22 17:08:00.182523: step: 846/470, loss: 1.0893107652664185 2023-01-22 17:08:00.915090: step: 848/470, loss: 0.3106290102005005 2023-01-22 17:08:01.697034: step: 850/470, loss: 3.447446346282959 2023-01-22 17:08:02.394740: step: 852/470, loss: 1.2170885801315308 2023-01-22 17:08:03.087639: step: 854/470, loss: 0.2806965112686157 2023-01-22 17:08:03.898926: step: 856/470, loss: 4.124128341674805 2023-01-22 17:08:04.664428: step: 858/470, loss: 1.0237302780151367 2023-01-22 17:08:05.403057: step: 860/470, loss: 1.629802942276001 2023-01-22 17:08:06.150534: step: 862/470, loss: 6.174650192260742 2023-01-22 17:08:06.946483: step: 864/470, loss: 0.9307006001472473 2023-01-22 17:08:07.745510: step: 866/470, loss: 0.4283657371997833 2023-01-22 17:08:08.563485: step: 868/470, loss: 2.484179973602295 2023-01-22 17:08:09.234561: step: 870/470, loss: 0.7242858409881592 2023-01-22 17:08:09.942271: step: 872/470, loss: 0.5142600536346436 2023-01-22 17:08:10.679004: step: 874/470, loss: 0.43242359161376953 2023-01-22 17:08:11.365454: step: 876/470, loss: 1.1232759952545166 2023-01-22 17:08:12.066265: step: 878/470, loss: 0.9467688798904419 2023-01-22 17:08:12.789677: step: 880/470, loss: 0.1361820548772812 2023-01-22 17:08:13.605185: step: 882/470, loss: 2.624424934387207 2023-01-22 17:08:14.265785: step: 884/470, loss: 1.0939950942993164 2023-01-22 17:08:14.985040: step: 886/470, loss: 0.4590575098991394 2023-01-22 17:08:15.687311: step: 888/470, loss: 0.7806578874588013 2023-01-22 17:08:16.477615: step: 890/470, loss: 0.7640312910079956 2023-01-22 17:08:17.245214: step: 892/470, loss: 7.197998046875 2023-01-22 17:08:17.979361: step: 894/470, loss: 0.49443161487579346 2023-01-22 17:08:18.710758: step: 896/470, loss: 0.8511555790901184 2023-01-22 17:08:19.380568: step: 898/470, loss: 2.684095621109009 2023-01-22 17:08:20.113759: step: 900/470, loss: 0.8094006776809692 2023-01-22 17:08:20.776792: step: 902/470, loss: 0.8172462582588196 2023-01-22 17:08:21.566085: step: 904/470, loss: 3.9463624954223633 2023-01-22 17:08:22.280943: step: 906/470, loss: 2.681535243988037 2023-01-22 17:08:23.013259: step: 908/470, loss: 1.012972116470337 2023-01-22 17:08:23.767414: step: 910/470, loss: 2.3120675086975098 2023-01-22 17:08:24.535294: step: 912/470, loss: 1.2887799739837646 2023-01-22 17:08:25.322599: step: 914/470, loss: 0.8099074959754944 2023-01-22 17:08:26.099114: step: 916/470, loss: 0.5998104810714722 2023-01-22 17:08:26.778208: step: 918/470, loss: 1.7960407733917236 2023-01-22 17:08:27.517927: step: 920/470, loss: 0.6787109971046448 2023-01-22 17:08:28.244193: step: 922/470, loss: 1.4123334884643555 2023-01-22 17:08:28.904908: step: 924/470, loss: 0.24107448756694794 2023-01-22 17:08:29.605797: step: 926/470, loss: 0.34054338932037354 2023-01-22 17:08:30.321388: step: 928/470, loss: 0.6413167119026184 2023-01-22 17:08:31.084591: step: 930/470, loss: 0.6883243322372437 2023-01-22 17:08:31.780759: step: 932/470, loss: 0.3803039491176605 2023-01-22 17:08:32.502065: step: 934/470, loss: 0.22277651727199554 2023-01-22 17:08:33.258509: step: 936/470, loss: 1.7990632057189941 2023-01-22 17:08:34.139680: step: 938/470, loss: 0.1533496081829071 2023-01-22 17:08:34.860097: step: 940/470, loss: 0.6035867929458618 2023-01-22 17:08:35.580572: step: 942/470, loss: 0.27842485904693604 ================================================== Loss: 1.112 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3099720927445607, 'r': 0.24233112373958068, 'f1': 0.2720095893732887}, 'combined': 0.20042811848558112, 'epoch': 4} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3270647422029072, 'r': 0.26611319562522806, 'f1': 0.2934574540740068}, 'combined': 0.2043982267182137, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28632789500190714, 'r': 0.21624004214565282, 'f1': 0.24639676153677628}, 'combined': 0.1815555085007825, 'epoch': 4} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3345971660892728, 'r': 0.26806343565653556, 'f1': 0.2976576389530171}, 'combined': 0.20732372862399204, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2650408878504673, 'r': 0.2152514231499051, 'f1': 0.23756544502617802}, 'combined': 0.175048222650868, 'epoch': 4} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.31248242935600457, 'r': 0.26835666843829786, 'f1': 0.28874345410260266}, 'combined': 0.20111484365355412, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21875, 'r': 0.25, 'f1': 0.23333333333333334}, 'combined': 0.15555555555555556, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32407407407407407, 'r': 0.3804347826086957, 'f1': 0.35000000000000003}, 'combined': 0.17500000000000002, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3409090909090909, 'r': 0.12931034482758622, 'f1': 0.1875}, 'combined': 0.125, 'epoch': 4} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27756423515352086, 'r': 0.27282404897442847, 'f1': 0.2751737297789929}, 'combined': 0.20275959036346844, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3282026888474822, 'r': 0.25190581017208286, 'f1': 0.28503690042297636}, 'combined': 0.19853316447371488, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24695121951219512, 'r': 0.2892857142857143, 'f1': 0.2664473684210527}, 'combined': 0.17763157894736847, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28632789500190714, 'r': 0.21624004214565282, 'f1': 0.24639676153677628}, 'combined': 0.1815555085007825, 'epoch': 4} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3345971660892728, 'r': 0.26806343565653556, 'f1': 0.2976576389530171}, 'combined': 0.20732372862399204, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32407407407407407, 'r': 0.3804347826086957, 'f1': 0.35000000000000003}, 'combined': 0.17500000000000002, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.20295208490866637, 'r': 0.24783940803591395, 'f1': 0.22316093081021016}, 'combined': 0.16443437007068115, 'epoch': 1} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.279769607766213, 'r': 0.2325601629274273, 'f1': 0.25398979050608034}, 'combined': 0.17690831179527985, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45, 'r': 0.3103448275862069, 'f1': 0.3673469387755102}, 'combined': 0.24489795918367346, 'epoch': 1} ****************************** Epoch: 5 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 17:11:17.752399: step: 2/470, loss: 0.22235730290412903 2023-01-22 17:11:18.464197: step: 4/470, loss: 0.21198546886444092 2023-01-22 17:11:19.251499: step: 6/470, loss: 0.49766987562179565 2023-01-22 17:11:19.971012: step: 8/470, loss: 0.3908613622188568 2023-01-22 17:11:20.755623: step: 10/470, loss: 0.16270841658115387 2023-01-22 17:11:21.579161: step: 12/470, loss: 0.20050539076328278 2023-01-22 17:11:22.288222: step: 14/470, loss: 0.7683029174804688 2023-01-22 17:11:23.074921: step: 16/470, loss: 0.3124810755252838 2023-01-22 17:11:23.845623: step: 18/470, loss: 1.0326603651046753 2023-01-22 17:11:24.532850: step: 20/470, loss: 0.4175031781196594 2023-01-22 17:11:25.274128: step: 22/470, loss: 0.8004642724990845 2023-01-22 17:11:26.087336: step: 24/470, loss: 0.31021180748939514 2023-01-22 17:11:26.796140: step: 26/470, loss: 0.43700122833251953 2023-01-22 17:11:27.630688: step: 28/470, loss: 0.9346105456352234 2023-01-22 17:11:28.366915: step: 30/470, loss: 0.3246211111545563 2023-01-22 17:11:29.069167: step: 32/470, loss: 0.7316885590553284 2023-01-22 17:11:29.848898: step: 34/470, loss: 0.9466471076011658 2023-01-22 17:11:30.616334: step: 36/470, loss: 0.294159471988678 2023-01-22 17:11:31.402790: step: 38/470, loss: 2.3637757301330566 2023-01-22 17:11:32.161300: step: 40/470, loss: 0.3901894688606262 2023-01-22 17:11:32.974667: step: 42/470, loss: 0.89076167345047 2023-01-22 17:11:33.705002: step: 44/470, loss: 1.0623388290405273 2023-01-22 17:11:34.530667: step: 46/470, loss: 0.1770210862159729 2023-01-22 17:11:35.327218: step: 48/470, loss: 0.3942860960960388 2023-01-22 17:11:36.050704: step: 50/470, loss: 0.8177869915962219 2023-01-22 17:11:36.766064: step: 52/470, loss: 1.5779149532318115 2023-01-22 17:11:37.600834: step: 54/470, loss: 1.26964271068573 2023-01-22 17:11:38.478384: step: 56/470, loss: 1.0765001773834229 2023-01-22 17:11:39.216749: step: 58/470, loss: 0.8852806091308594 2023-01-22 17:11:39.905741: step: 60/470, loss: 0.265352338552475 2023-01-22 17:11:40.584083: step: 62/470, loss: 0.4972139298915863 2023-01-22 17:11:41.283179: step: 64/470, loss: 0.9439274668693542 2023-01-22 17:11:42.040011: step: 66/470, loss: 1.225905179977417 2023-01-22 17:11:42.802111: step: 68/470, loss: 0.6210903525352478 2023-01-22 17:11:43.565623: step: 70/470, loss: 0.33965161442756653 2023-01-22 17:11:44.304070: step: 72/470, loss: 0.17134492099285126 2023-01-22 17:11:44.971884: step: 74/470, loss: 0.3916568458080292 2023-01-22 17:11:45.696338: step: 76/470, loss: 0.5033536553382874 2023-01-22 17:11:46.485862: step: 78/470, loss: 0.39052829146385193 2023-01-22 17:11:47.288349: step: 80/470, loss: 0.5496810078620911 2023-01-22 17:11:48.142697: step: 82/470, loss: 0.7038556337356567 2023-01-22 17:11:48.842925: step: 84/470, loss: 2.2712252140045166 2023-01-22 17:11:49.554911: step: 86/470, loss: 0.3298700749874115 2023-01-22 17:11:50.354623: step: 88/470, loss: 0.6470965147018433 2023-01-22 17:11:51.145017: step: 90/470, loss: 0.3787349760532379 2023-01-22 17:11:51.919496: step: 92/470, loss: 0.6228494644165039 2023-01-22 17:11:52.706205: step: 94/470, loss: 0.611712634563446 2023-01-22 17:11:53.387400: step: 96/470, loss: 0.8371176719665527 2023-01-22 17:11:54.090992: step: 98/470, loss: 0.6518052220344543 2023-01-22 17:11:54.804507: step: 100/470, loss: 0.47128623723983765 2023-01-22 17:11:55.506958: step: 102/470, loss: 0.8628720045089722 2023-01-22 17:11:56.300162: step: 104/470, loss: 0.4808705747127533 2023-01-22 17:11:57.082482: step: 106/470, loss: 0.4174078702926636 2023-01-22 17:11:57.894841: step: 108/470, loss: 0.3857043981552124 2023-01-22 17:11:58.602627: step: 110/470, loss: 0.21693837642669678 2023-01-22 17:11:59.398437: step: 112/470, loss: 0.6489458084106445 2023-01-22 17:12:00.102750: step: 114/470, loss: 0.5416321158409119 2023-01-22 17:12:00.942223: step: 116/470, loss: 0.19514138996601105 2023-01-22 17:12:01.663750: step: 118/470, loss: 0.3536219596862793 2023-01-22 17:12:02.473650: step: 120/470, loss: 1.3784006834030151 2023-01-22 17:12:03.216514: step: 122/470, loss: 2.3029212951660156 2023-01-22 17:12:03.974338: step: 124/470, loss: 1.3331398963928223 2023-01-22 17:12:04.728636: step: 126/470, loss: 0.3695890009403229 2023-01-22 17:12:05.410108: step: 128/470, loss: 1.1727420091629028 2023-01-22 17:12:06.158989: step: 130/470, loss: 1.360120415687561 2023-01-22 17:12:06.896492: step: 132/470, loss: 0.36498603224754333 2023-01-22 17:12:07.640538: step: 134/470, loss: 0.5842001438140869 2023-01-22 17:12:08.459065: step: 136/470, loss: 0.5986462235450745 2023-01-22 17:12:09.287388: step: 138/470, loss: 0.48415353894233704 2023-01-22 17:12:10.023464: step: 140/470, loss: 0.2749024033546448 2023-01-22 17:12:10.820365: step: 142/470, loss: 1.0702927112579346 2023-01-22 17:12:11.636841: step: 144/470, loss: 0.2774654030799866 2023-01-22 17:12:12.406803: step: 146/470, loss: 0.38094377517700195 2023-01-22 17:12:13.184198: step: 148/470, loss: 0.20257362723350525 2023-01-22 17:12:13.924061: step: 150/470, loss: 1.267014503479004 2023-01-22 17:12:14.647154: step: 152/470, loss: 1.0668094158172607 2023-01-22 17:12:15.419344: step: 154/470, loss: 1.20293128490448 2023-01-22 17:12:16.147744: step: 156/470, loss: 0.724846363067627 2023-01-22 17:12:16.834078: step: 158/470, loss: 0.979896068572998 2023-01-22 17:12:17.564290: step: 160/470, loss: 0.9010365605354309 2023-01-22 17:12:18.268463: step: 162/470, loss: 0.7949374914169312 2023-01-22 17:12:19.008447: step: 164/470, loss: 0.45920002460479736 2023-01-22 17:12:19.698600: step: 166/470, loss: 2.250549077987671 2023-01-22 17:12:20.454753: step: 168/470, loss: 0.19924406707286835 2023-01-22 17:12:21.252660: step: 170/470, loss: 0.7295480966567993 2023-01-22 17:12:22.017767: step: 172/470, loss: 1.2707566022872925 2023-01-22 17:12:22.698904: step: 174/470, loss: 0.5840062499046326 2023-01-22 17:12:23.482493: step: 176/470, loss: 0.5518929362297058 2023-01-22 17:12:24.340763: step: 178/470, loss: 1.6353731155395508 2023-01-22 17:12:25.171659: step: 180/470, loss: 0.9040023684501648 2023-01-22 17:12:25.925599: step: 182/470, loss: 1.9166462421417236 2023-01-22 17:12:26.651547: step: 184/470, loss: 0.33155763149261475 2023-01-22 17:12:27.402828: step: 186/470, loss: 0.1614404320716858 2023-01-22 17:12:28.144723: step: 188/470, loss: 0.4751662313938141 2023-01-22 17:12:28.838415: step: 190/470, loss: 0.3759790360927582 2023-01-22 17:12:29.579138: step: 192/470, loss: 0.6819899678230286 2023-01-22 17:12:30.349202: step: 194/470, loss: 2.1987533569335938 2023-01-22 17:12:31.109247: step: 196/470, loss: 0.6992949843406677 2023-01-22 17:12:31.880148: step: 198/470, loss: 1.0487316846847534 2023-01-22 17:12:32.586338: step: 200/470, loss: 0.5224685072898865 2023-01-22 17:12:33.285195: step: 202/470, loss: 0.9172898530960083 2023-01-22 17:12:34.041619: step: 204/470, loss: 0.6186913847923279 2023-01-22 17:12:34.786282: step: 206/470, loss: 3.2034780979156494 2023-01-22 17:12:35.527499: step: 208/470, loss: 0.720175564289093 2023-01-22 17:12:36.280322: step: 210/470, loss: 2.231804847717285 2023-01-22 17:12:36.976913: step: 212/470, loss: 0.12492244690656662 2023-01-22 17:12:37.826523: step: 214/470, loss: 0.5483446717262268 2023-01-22 17:12:38.596035: step: 216/470, loss: 1.2820593118667603 2023-01-22 17:12:39.310509: step: 218/470, loss: 0.47009074687957764 2023-01-22 17:12:40.039845: step: 220/470, loss: 0.3459213972091675 2023-01-22 17:12:40.686062: step: 222/470, loss: 0.20750129222869873 2023-01-22 17:12:41.428908: step: 224/470, loss: 0.8360021114349365 2023-01-22 17:12:42.208164: step: 226/470, loss: 0.3708805739879608 2023-01-22 17:12:42.980664: step: 228/470, loss: 0.7422099709510803 2023-01-22 17:12:43.718777: step: 230/470, loss: 0.4704272449016571 2023-01-22 17:12:44.402207: step: 232/470, loss: 0.7870251536369324 2023-01-22 17:12:45.227474: step: 234/470, loss: 0.8178002238273621 2023-01-22 17:12:45.869920: step: 236/470, loss: 0.5741672515869141 2023-01-22 17:12:46.596053: step: 238/470, loss: 0.3935246467590332 2023-01-22 17:12:47.263558: step: 240/470, loss: 0.7990236878395081 2023-01-22 17:12:48.033667: step: 242/470, loss: 1.1086221933364868 2023-01-22 17:12:48.682965: step: 244/470, loss: 1.578269124031067 2023-01-22 17:12:49.427441: step: 246/470, loss: 1.1762317419052124 2023-01-22 17:12:50.090017: step: 248/470, loss: 0.1974961757659912 2023-01-22 17:12:50.855695: step: 250/470, loss: 1.7104171514511108 2023-01-22 17:12:51.608950: step: 252/470, loss: 0.31335341930389404 2023-01-22 17:12:52.371192: step: 254/470, loss: 0.48065751791000366 2023-01-22 17:12:53.051270: step: 256/470, loss: 0.8966016173362732 2023-01-22 17:12:53.791512: step: 258/470, loss: 0.5578399896621704 2023-01-22 17:12:54.518901: step: 260/470, loss: 2.122208833694458 2023-01-22 17:12:55.248026: step: 262/470, loss: 1.1785131692886353 2023-01-22 17:12:56.017781: step: 264/470, loss: 0.5294274091720581 2023-01-22 17:12:56.787733: step: 266/470, loss: 0.9701927900314331 2023-01-22 17:12:57.520720: step: 268/470, loss: 0.23452767729759216 2023-01-22 17:12:58.230966: step: 270/470, loss: 0.6140098571777344 2023-01-22 17:12:58.961946: step: 272/470, loss: 0.4548776149749756 2023-01-22 17:12:59.747065: step: 274/470, loss: 0.9099631309509277 2023-01-22 17:13:00.552224: step: 276/470, loss: 0.5374931693077087 2023-01-22 17:13:01.336661: step: 278/470, loss: 1.1470648050308228 2023-01-22 17:13:02.097638: step: 280/470, loss: 0.719149649143219 2023-01-22 17:13:02.853234: step: 282/470, loss: 1.6352453231811523 2023-01-22 17:13:03.671090: step: 284/470, loss: 0.3871826231479645 2023-01-22 17:13:04.435558: step: 286/470, loss: 0.7986884117126465 2023-01-22 17:13:05.134282: step: 288/470, loss: 1.6879855394363403 2023-01-22 17:13:05.934797: step: 290/470, loss: 0.24217256903648376 2023-01-22 17:13:06.663197: step: 292/470, loss: 0.7460265755653381 2023-01-22 17:13:07.480906: step: 294/470, loss: 2.9903945922851562 2023-01-22 17:13:08.251465: step: 296/470, loss: 0.8292760848999023 2023-01-22 17:13:08.968630: step: 298/470, loss: 1.7127366065979004 2023-01-22 17:13:09.674200: step: 300/470, loss: 1.0309264659881592 2023-01-22 17:13:10.413497: step: 302/470, loss: 0.19080205261707306 2023-01-22 17:13:11.192784: step: 304/470, loss: 0.7009590864181519 2023-01-22 17:13:12.030861: step: 306/470, loss: 0.6426486968994141 2023-01-22 17:13:12.779233: step: 308/470, loss: 0.7753332853317261 2023-01-22 17:13:13.463890: step: 310/470, loss: 0.6057764887809753 2023-01-22 17:13:14.186171: step: 312/470, loss: 0.5348176956176758 2023-01-22 17:13:14.907068: step: 314/470, loss: 0.1700417399406433 2023-01-22 17:13:15.643338: step: 316/470, loss: 1.0512360334396362 2023-01-22 17:13:16.393020: step: 318/470, loss: 2.0664336681365967 2023-01-22 17:13:17.108263: step: 320/470, loss: 1.663642168045044 2023-01-22 17:13:17.883781: step: 322/470, loss: 0.18958774209022522 2023-01-22 17:13:18.644376: step: 324/470, loss: 0.4568879008293152 2023-01-22 17:13:19.279950: step: 326/470, loss: 1.1089987754821777 2023-01-22 17:13:20.075341: step: 328/470, loss: 0.6072306632995605 2023-01-22 17:13:20.805574: step: 330/470, loss: 0.9218146800994873 2023-01-22 17:13:21.562252: step: 332/470, loss: 0.6980925798416138 2023-01-22 17:13:22.400068: step: 334/470, loss: 0.33780360221862793 2023-01-22 17:13:23.122171: step: 336/470, loss: 0.2144942283630371 2023-01-22 17:13:23.748346: step: 338/470, loss: 0.5171912908554077 2023-01-22 17:13:24.482363: step: 340/470, loss: 0.2723813056945801 2023-01-22 17:13:25.141695: step: 342/470, loss: 1.158375859260559 2023-01-22 17:13:25.863402: step: 344/470, loss: 1.1009371280670166 2023-01-22 17:13:26.626913: step: 346/470, loss: 0.8941398859024048 2023-01-22 17:13:27.463912: step: 348/470, loss: 0.8668206334114075 2023-01-22 17:13:28.321786: step: 350/470, loss: 0.43403181433677673 2023-01-22 17:13:29.067220: step: 352/470, loss: 0.3821115791797638 2023-01-22 17:13:29.823374: step: 354/470, loss: 0.5914931893348694 2023-01-22 17:13:30.567766: step: 356/470, loss: 0.8853386640548706 2023-01-22 17:13:31.328623: step: 358/470, loss: 1.1184029579162598 2023-01-22 17:13:32.106172: step: 360/470, loss: 0.33806416392326355 2023-01-22 17:13:32.864277: step: 362/470, loss: 0.4669269323348999 2023-01-22 17:13:33.602317: step: 364/470, loss: 1.3949357271194458 2023-01-22 17:13:34.267889: step: 366/470, loss: 0.25456702709198 2023-01-22 17:13:35.026157: step: 368/470, loss: 0.23126432299613953 2023-01-22 17:13:35.683489: step: 370/470, loss: 0.48816195130348206 2023-01-22 17:13:36.489070: step: 372/470, loss: 0.9824779629707336 2023-01-22 17:13:37.414986: step: 374/470, loss: 0.9794360399246216 2023-01-22 17:13:38.181038: step: 376/470, loss: 0.8073421120643616 2023-01-22 17:13:38.853364: step: 378/470, loss: 1.1331653594970703 2023-01-22 17:13:39.530899: step: 380/470, loss: 0.23539990186691284 2023-01-22 17:13:40.251559: step: 382/470, loss: 1.2172232866287231 2023-01-22 17:13:40.928435: step: 384/470, loss: 0.4674474000930786 2023-01-22 17:13:41.677973: step: 386/470, loss: 1.1901929378509521 2023-01-22 17:13:42.426641: step: 388/470, loss: 0.27445924282073975 2023-01-22 17:13:43.188016: step: 390/470, loss: 0.4656749665737152 2023-01-22 17:13:43.958186: step: 392/470, loss: 0.29722970724105835 2023-01-22 17:13:44.677430: step: 394/470, loss: 0.16318340599536896 2023-01-22 17:13:45.338697: step: 396/470, loss: 0.7538102865219116 2023-01-22 17:13:46.060067: step: 398/470, loss: 0.34675395488739014 2023-01-22 17:13:46.823513: step: 400/470, loss: 1.1929813623428345 2023-01-22 17:13:47.548694: step: 402/470, loss: 0.44265463948249817 2023-01-22 17:13:48.333891: step: 404/470, loss: 8.768211364746094 2023-01-22 17:13:49.164060: step: 406/470, loss: 0.8950780034065247 2023-01-22 17:13:49.955440: step: 408/470, loss: 0.7870341539382935 2023-01-22 17:13:50.642029: step: 410/470, loss: 2.6422438621520996 2023-01-22 17:13:51.357138: step: 412/470, loss: 1.0197150707244873 2023-01-22 17:13:52.239926: step: 414/470, loss: 1.3697253465652466 2023-01-22 17:13:53.016496: step: 416/470, loss: 1.2400399446487427 2023-01-22 17:13:53.789704: step: 418/470, loss: 0.4381885826587677 2023-01-22 17:13:54.542451: step: 420/470, loss: 0.4691491425037384 2023-01-22 17:13:55.273708: step: 422/470, loss: 0.3527713418006897 2023-01-22 17:13:55.981719: step: 424/470, loss: 0.6151089072227478 2023-01-22 17:13:56.782403: step: 426/470, loss: 0.35179460048675537 2023-01-22 17:13:57.454472: step: 428/470, loss: 0.31551289558410645 2023-01-22 17:13:58.151498: step: 430/470, loss: 0.3387365937232971 2023-01-22 17:13:58.833813: step: 432/470, loss: 0.9518783092498779 2023-01-22 17:13:59.499678: step: 434/470, loss: 0.33223646879196167 2023-01-22 17:14:00.284792: step: 436/470, loss: 0.948758065700531 2023-01-22 17:14:01.147495: step: 438/470, loss: 0.7131083607673645 2023-01-22 17:14:01.857491: step: 440/470, loss: 0.22820569574832916 2023-01-22 17:14:02.635697: step: 442/470, loss: 1.1058170795440674 2023-01-22 17:14:03.366651: step: 444/470, loss: 0.5581318140029907 2023-01-22 17:14:04.118906: step: 446/470, loss: 0.977337658405304 2023-01-22 17:14:04.903655: step: 448/470, loss: 0.9387962222099304 2023-01-22 17:14:05.638337: step: 450/470, loss: 0.45336490869522095 2023-01-22 17:14:06.323404: step: 452/470, loss: 0.7090914249420166 2023-01-22 17:14:07.063462: step: 454/470, loss: 1.7675325870513916 2023-01-22 17:14:07.915196: step: 456/470, loss: 0.7944380044937134 2023-01-22 17:14:08.620197: step: 458/470, loss: 0.595599353313446 2023-01-22 17:14:09.369351: step: 460/470, loss: 0.7079035043716431 2023-01-22 17:14:10.090000: step: 462/470, loss: 1.0472602844238281 2023-01-22 17:14:10.844721: step: 464/470, loss: 0.6768998503684998 2023-01-22 17:14:11.624678: step: 466/470, loss: 0.6595719456672668 2023-01-22 17:14:12.430221: step: 468/470, loss: 1.1421606540679932 2023-01-22 17:14:13.237226: step: 470/470, loss: 0.4721830487251282 2023-01-22 17:14:13.945522: step: 472/470, loss: 0.27509674429893494 2023-01-22 17:14:14.711470: step: 474/470, loss: 2.248384714126587 2023-01-22 17:14:15.445333: step: 476/470, loss: 0.5151785016059875 2023-01-22 17:14:16.245591: step: 478/470, loss: 1.4433629512786865 2023-01-22 17:14:16.954742: step: 480/470, loss: 0.573886513710022 2023-01-22 17:14:17.687129: step: 482/470, loss: 0.4885188937187195 2023-01-22 17:14:18.382805: step: 484/470, loss: 0.9017962217330933 2023-01-22 17:14:19.132093: step: 486/470, loss: 0.47882330417633057 2023-01-22 17:14:19.869920: step: 488/470, loss: 0.43378081917762756 2023-01-22 17:14:20.712583: step: 490/470, loss: 1.083345890045166 2023-01-22 17:14:21.431938: step: 492/470, loss: 0.5740756392478943 2023-01-22 17:14:22.190560: step: 494/470, loss: 0.7037774324417114 2023-01-22 17:14:22.955036: step: 496/470, loss: 1.5782699584960938 2023-01-22 17:14:23.674377: step: 498/470, loss: 1.1951762437820435 2023-01-22 17:14:24.350229: step: 500/470, loss: 3.3578271865844727 2023-01-22 17:14:25.061486: step: 502/470, loss: 0.5678948760032654 2023-01-22 17:14:25.711938: step: 504/470, loss: 0.7719627022743225 2023-01-22 17:14:26.421887: step: 506/470, loss: 0.2801421284675598 2023-01-22 17:14:27.122585: step: 508/470, loss: 0.18048332631587982 2023-01-22 17:14:27.800071: step: 510/470, loss: 1.8545786142349243 2023-01-22 17:14:28.556701: step: 512/470, loss: 1.0798882246017456 2023-01-22 17:14:29.320111: step: 514/470, loss: 1.414536952972412 2023-01-22 17:14:30.075444: step: 516/470, loss: 0.5196714997291565 2023-01-22 17:14:30.836558: step: 518/470, loss: 0.41817137598991394 2023-01-22 17:14:31.594521: step: 520/470, loss: 0.5657111406326294 2023-01-22 17:14:32.347831: step: 522/470, loss: 0.34673720598220825 2023-01-22 17:14:33.067251: step: 524/470, loss: 0.7128427028656006 2023-01-22 17:14:33.777698: step: 526/470, loss: 0.45498931407928467 2023-01-22 17:14:34.504535: step: 528/470, loss: 0.45157119631767273 2023-01-22 17:14:35.189932: step: 530/470, loss: 0.4576375186443329 2023-01-22 17:14:35.979517: step: 532/470, loss: 0.33187219500541687 2023-01-22 17:14:36.764802: step: 534/470, loss: 0.35542771220207214 2023-01-22 17:14:37.519522: step: 536/470, loss: 0.6708710193634033 2023-01-22 17:14:38.304334: step: 538/470, loss: 0.8843319416046143 2023-01-22 17:14:38.954342: step: 540/470, loss: 0.8301752805709839 2023-01-22 17:14:39.714885: step: 542/470, loss: 0.2672118842601776 2023-01-22 17:14:40.537007: step: 544/470, loss: 0.3918260335922241 2023-01-22 17:14:41.371615: step: 546/470, loss: 0.4030534029006958 2023-01-22 17:14:42.207919: step: 548/470, loss: 3.20204496383667 2023-01-22 17:14:42.940567: step: 550/470, loss: 2.8569650650024414 2023-01-22 17:14:43.603315: step: 552/470, loss: 0.45698219537734985 2023-01-22 17:14:44.300649: step: 554/470, loss: 0.2088402956724167 2023-01-22 17:14:45.170829: step: 556/470, loss: 0.5278826951980591 2023-01-22 17:14:45.829681: step: 558/470, loss: 0.3262355327606201 2023-01-22 17:14:46.558185: step: 560/470, loss: 0.48174381256103516 2023-01-22 17:14:47.380486: step: 562/470, loss: 0.8578274250030518 2023-01-22 17:14:48.107292: step: 564/470, loss: 2.5235321521759033 2023-01-22 17:14:48.862959: step: 566/470, loss: 0.13515417277812958 2023-01-22 17:14:49.657251: step: 568/470, loss: 0.5105624794960022 2023-01-22 17:14:50.407449: step: 570/470, loss: 1.3545113801956177 2023-01-22 17:14:51.104315: step: 572/470, loss: 0.43925705552101135 2023-01-22 17:14:51.817353: step: 574/470, loss: 0.8375481367111206 2023-01-22 17:14:52.549940: step: 576/470, loss: 1.22329580783844 2023-01-22 17:14:53.277635: step: 578/470, loss: 1.2642005681991577 2023-01-22 17:14:54.018965: step: 580/470, loss: 0.7364957332611084 2023-01-22 17:14:54.796039: step: 582/470, loss: 12.388585090637207 2023-01-22 17:14:55.514752: step: 584/470, loss: 0.9263208508491516 2023-01-22 17:14:56.243210: step: 586/470, loss: 0.48008114099502563 2023-01-22 17:14:56.944512: step: 588/470, loss: 5.323960781097412 2023-01-22 17:14:57.644580: step: 590/470, loss: 0.3100872337818146 2023-01-22 17:14:58.471580: step: 592/470, loss: 0.39643803238868713 2023-01-22 17:14:59.153325: step: 594/470, loss: 0.5192726254463196 2023-01-22 17:14:59.869160: step: 596/470, loss: 0.08385767787694931 2023-01-22 17:15:00.646096: step: 598/470, loss: 0.23493848741054535 2023-01-22 17:15:01.407864: step: 600/470, loss: 0.8058767914772034 2023-01-22 17:15:02.154141: step: 602/470, loss: 0.7003133296966553 2023-01-22 17:15:02.897566: step: 604/470, loss: 0.3727822005748749 2023-01-22 17:15:03.644764: step: 606/470, loss: 1.2153240442276 2023-01-22 17:15:04.384436: step: 608/470, loss: 1.3041852712631226 2023-01-22 17:15:05.118681: step: 610/470, loss: 1.750708818435669 2023-01-22 17:15:05.856348: step: 612/470, loss: 2.3848016262054443 2023-01-22 17:15:06.704340: step: 614/470, loss: 0.21485082805156708 2023-01-22 17:15:07.453593: step: 616/470, loss: 1.353153109550476 2023-01-22 17:15:08.184476: step: 618/470, loss: 0.9749501347541809 2023-01-22 17:15:09.002580: step: 620/470, loss: 1.2679924964904785 2023-01-22 17:15:09.705047: step: 622/470, loss: 0.6863827705383301 2023-01-22 17:15:10.449431: step: 624/470, loss: 0.29089784622192383 2023-01-22 17:15:11.162170: step: 626/470, loss: 0.26793599128723145 2023-01-22 17:15:12.040568: step: 628/470, loss: 0.4520954489707947 2023-01-22 17:15:12.739284: step: 630/470, loss: 0.6169873476028442 2023-01-22 17:15:13.581583: step: 632/470, loss: 0.570949375629425 2023-01-22 17:15:14.286534: step: 634/470, loss: 0.278425008058548 2023-01-22 17:15:15.080131: step: 636/470, loss: 1.7957154512405396 2023-01-22 17:15:15.843588: step: 638/470, loss: 0.29361969232559204 2023-01-22 17:15:16.710050: step: 640/470, loss: 0.5023148059844971 2023-01-22 17:15:17.415395: step: 642/470, loss: 0.48273128271102905 2023-01-22 17:15:18.173336: step: 644/470, loss: 1.2374017238616943 2023-01-22 17:15:18.968965: step: 646/470, loss: 0.22361309826374054 2023-01-22 17:15:19.699188: step: 648/470, loss: 0.7312672734260559 2023-01-22 17:15:20.454449: step: 650/470, loss: 0.360087126493454 2023-01-22 17:15:21.138364: step: 652/470, loss: 1.0528044700622559 2023-01-22 17:15:21.867136: step: 654/470, loss: 1.5701942443847656 2023-01-22 17:15:22.572846: step: 656/470, loss: 0.09973641484975815 2023-01-22 17:15:23.248734: step: 658/470, loss: 0.39809560775756836 2023-01-22 17:15:23.962986: step: 660/470, loss: 0.2803775668144226 2023-01-22 17:15:24.839378: step: 662/470, loss: 0.5510927438735962 2023-01-22 17:15:25.519592: step: 664/470, loss: 0.28761452436447144 2023-01-22 17:15:26.190064: step: 666/470, loss: 1.0456538200378418 2023-01-22 17:15:26.938486: step: 668/470, loss: 1.3108751773834229 2023-01-22 17:15:27.658705: step: 670/470, loss: 0.3738193213939667 2023-01-22 17:15:28.421575: step: 672/470, loss: 0.9928417205810547 2023-01-22 17:15:29.137281: step: 674/470, loss: 1.9106264114379883 2023-01-22 17:15:29.855059: step: 676/470, loss: 1.6555376052856445 2023-01-22 17:15:30.509097: step: 678/470, loss: 0.9879150390625 2023-01-22 17:15:31.163194: step: 680/470, loss: 1.1278126239776611 2023-01-22 17:15:31.928919: step: 682/470, loss: 0.7462809085845947 2023-01-22 17:15:32.609083: step: 684/470, loss: 0.2949332594871521 2023-01-22 17:15:33.312422: step: 686/470, loss: 0.5867691040039062 2023-01-22 17:15:34.074554: step: 688/470, loss: 0.783571720123291 2023-01-22 17:15:34.783911: step: 690/470, loss: 0.28259211778640747 2023-01-22 17:15:35.622274: step: 692/470, loss: 2.536674976348877 2023-01-22 17:15:36.373491: step: 694/470, loss: 0.3623759150505066 2023-01-22 17:15:37.062156: step: 696/470, loss: 0.904730498790741 2023-01-22 17:15:37.777099: step: 698/470, loss: 0.15288175642490387 2023-01-22 17:15:38.519855: step: 700/470, loss: 0.5689459443092346 2023-01-22 17:15:39.242905: step: 702/470, loss: 0.19390466809272766 2023-01-22 17:15:39.988996: step: 704/470, loss: 5.31660270690918 2023-01-22 17:15:40.730401: step: 706/470, loss: 3.0160305500030518 2023-01-22 17:15:41.441012: step: 708/470, loss: 0.5570680499076843 2023-01-22 17:15:42.159638: step: 710/470, loss: 0.6179437637329102 2023-01-22 17:15:42.922891: step: 712/470, loss: 0.9332218170166016 2023-01-22 17:15:43.604189: step: 714/470, loss: 0.5039190649986267 2023-01-22 17:15:44.305324: step: 716/470, loss: 2.8622703552246094 2023-01-22 17:15:45.144030: step: 718/470, loss: 1.5712451934814453 2023-01-22 17:15:45.851882: step: 720/470, loss: 0.47472241520881653 2023-01-22 17:15:46.584166: step: 722/470, loss: 0.4161088764667511 2023-01-22 17:15:47.335412: step: 724/470, loss: 0.408611923456192 2023-01-22 17:15:48.164716: step: 726/470, loss: 0.4773179292678833 2023-01-22 17:15:49.004520: step: 728/470, loss: 0.8895754814147949 2023-01-22 17:15:49.772002: step: 730/470, loss: 0.5631923675537109 2023-01-22 17:15:50.502280: step: 732/470, loss: 2.1085853576660156 2023-01-22 17:15:51.277432: step: 734/470, loss: 0.3119111657142639 2023-01-22 17:15:52.046230: step: 736/470, loss: 1.9866414070129395 2023-01-22 17:15:52.811734: step: 738/470, loss: 4.040884494781494 2023-01-22 17:15:53.496958: step: 740/470, loss: 1.7604376077651978 2023-01-22 17:15:54.324365: step: 742/470, loss: 1.824992060661316 2023-01-22 17:15:55.060968: step: 744/470, loss: 1.3587254285812378 2023-01-22 17:15:55.775964: step: 746/470, loss: 0.6129835844039917 2023-01-22 17:15:56.496907: step: 748/470, loss: 0.756197988986969 2023-01-22 17:15:57.288121: step: 750/470, loss: 1.2754197120666504 2023-01-22 17:15:58.175794: step: 752/470, loss: 2.0039122104644775 2023-01-22 17:15:58.922788: step: 754/470, loss: 0.694054365158081 2023-01-22 17:15:59.739905: step: 756/470, loss: 1.3741402626037598 2023-01-22 17:16:00.469370: step: 758/470, loss: 1.2446845769882202 2023-01-22 17:16:01.176782: step: 760/470, loss: 1.1512802839279175 2023-01-22 17:16:01.957396: step: 762/470, loss: 0.31948322057724 2023-01-22 17:16:02.683544: step: 764/470, loss: 0.9004114866256714 2023-01-22 17:16:03.495289: step: 766/470, loss: 0.7966269254684448 2023-01-22 17:16:04.268967: step: 768/470, loss: 0.5935583114624023 2023-01-22 17:16:05.028993: step: 770/470, loss: 1.2309718132019043 2023-01-22 17:16:05.742185: step: 772/470, loss: 0.7194991707801819 2023-01-22 17:16:06.458008: step: 774/470, loss: 0.9128872156143188 2023-01-22 17:16:07.175901: step: 776/470, loss: 0.262151837348938 2023-01-22 17:16:07.913831: step: 778/470, loss: 0.8558886051177979 2023-01-22 17:16:08.709845: step: 780/470, loss: 0.5099863409996033 2023-01-22 17:16:09.555580: step: 782/470, loss: 0.5506338477134705 2023-01-22 17:16:10.248498: step: 784/470, loss: 1.1489386558532715 2023-01-22 17:16:10.916556: step: 786/470, loss: 0.35874584317207336 2023-01-22 17:16:11.842807: step: 788/470, loss: 0.16319824755191803 2023-01-22 17:16:12.492305: step: 790/470, loss: 2.4045443534851074 2023-01-22 17:16:13.230484: step: 792/470, loss: 0.33701398968696594 2023-01-22 17:16:13.956672: step: 794/470, loss: 0.24340465664863586 2023-01-22 17:16:14.693180: step: 796/470, loss: 0.34215545654296875 2023-01-22 17:16:15.461847: step: 798/470, loss: 0.5952900648117065 2023-01-22 17:16:16.223351: step: 800/470, loss: 0.30067580938339233 2023-01-22 17:16:16.970197: step: 802/470, loss: 0.27011197805404663 2023-01-22 17:16:17.678802: step: 804/470, loss: 0.5309256315231323 2023-01-22 17:16:18.399887: step: 806/470, loss: 0.8290297985076904 2023-01-22 17:16:19.137489: step: 808/470, loss: 0.8590260148048401 2023-01-22 17:16:20.079520: step: 810/470, loss: 0.4753422737121582 2023-01-22 17:16:20.842895: step: 812/470, loss: 1.0361976623535156 2023-01-22 17:16:21.532530: step: 814/470, loss: 0.4226698577404022 2023-01-22 17:16:22.336656: step: 816/470, loss: 0.7795215249061584 2023-01-22 17:16:23.129532: step: 818/470, loss: 0.30003947019577026 2023-01-22 17:16:23.845501: step: 820/470, loss: 0.40043067932128906 2023-01-22 17:16:24.676196: step: 822/470, loss: 1.1841524839401245 2023-01-22 17:16:25.388756: step: 824/470, loss: 0.3702228367328644 2023-01-22 17:16:26.109954: step: 826/470, loss: 0.14006973803043365 2023-01-22 17:16:26.795670: step: 828/470, loss: 2.2968199253082275 2023-01-22 17:16:27.516227: step: 830/470, loss: 0.25178012251853943 2023-01-22 17:16:28.243089: step: 832/470, loss: 3.981785774230957 2023-01-22 17:16:28.963419: step: 834/470, loss: 0.2811685800552368 2023-01-22 17:16:29.731988: step: 836/470, loss: 1.4167596101760864 2023-01-22 17:16:30.568511: step: 838/470, loss: 1.3315188884735107 2023-01-22 17:16:31.287005: step: 840/470, loss: 0.20409701764583588 2023-01-22 17:16:32.018683: step: 842/470, loss: 0.794757604598999 2023-01-22 17:16:32.824780: step: 844/470, loss: 0.4948848485946655 2023-01-22 17:16:33.538492: step: 846/470, loss: 2.682526111602783 2023-01-22 17:16:34.314993: step: 848/470, loss: 0.9555333852767944 2023-01-22 17:16:35.074816: step: 850/470, loss: 1.1651482582092285 2023-01-22 17:16:35.801703: step: 852/470, loss: 0.5566208362579346 2023-01-22 17:16:36.525765: step: 854/470, loss: 0.6673471927642822 2023-01-22 17:16:37.255731: step: 856/470, loss: 0.4028908610343933 2023-01-22 17:16:37.998522: step: 858/470, loss: 1.8048272132873535 2023-01-22 17:16:38.750153: step: 860/470, loss: 2.100435972213745 2023-01-22 17:16:39.445271: step: 862/470, loss: 0.656417965888977 2023-01-22 17:16:40.215283: step: 864/470, loss: 0.5497065186500549 2023-01-22 17:16:40.973452: step: 866/470, loss: 0.4357500970363617 2023-01-22 17:16:41.634060: step: 868/470, loss: 0.40799766778945923 2023-01-22 17:16:42.342560: step: 870/470, loss: 1.0405466556549072 2023-01-22 17:16:43.069920: step: 872/470, loss: 1.2654999494552612 2023-01-22 17:16:43.830258: step: 874/470, loss: 0.4379280209541321 2023-01-22 17:16:44.637857: step: 876/470, loss: 1.0722965002059937 2023-01-22 17:16:45.351109: step: 878/470, loss: 0.22580832242965698 2023-01-22 17:16:46.061435: step: 880/470, loss: 0.5495216846466064 2023-01-22 17:16:46.828260: step: 882/470, loss: 0.23686954379081726 2023-01-22 17:16:47.532254: step: 884/470, loss: 0.6185952425003052 2023-01-22 17:16:48.311161: step: 886/470, loss: 1.4941705465316772 2023-01-22 17:16:49.030121: step: 888/470, loss: 6.853786945343018 2023-01-22 17:16:49.762451: step: 890/470, loss: 0.35840287804603577 2023-01-22 17:16:50.553174: step: 892/470, loss: 0.45041191577911377 2023-01-22 17:16:51.262690: step: 894/470, loss: 0.6162185668945312 2023-01-22 17:16:51.990284: step: 896/470, loss: 0.2759486436843872 2023-01-22 17:16:52.732123: step: 898/470, loss: 0.5360721349716187 2023-01-22 17:16:53.531879: step: 900/470, loss: 0.7364007234573364 2023-01-22 17:16:54.257988: step: 902/470, loss: 1.1799520254135132 2023-01-22 17:16:55.046422: step: 904/470, loss: 0.29465073347091675 2023-01-22 17:16:55.802689: step: 906/470, loss: 0.6081302165985107 2023-01-22 17:16:56.620137: step: 908/470, loss: 1.4914206266403198 2023-01-22 17:16:57.303574: step: 910/470, loss: 0.7673165202140808 2023-01-22 17:16:58.253816: step: 912/470, loss: 0.6707650423049927 2023-01-22 17:16:59.085693: step: 914/470, loss: 0.4942966103553772 2023-01-22 17:16:59.823511: step: 916/470, loss: 1.598168969154358 2023-01-22 17:17:00.639722: step: 918/470, loss: 1.6423366069793701 2023-01-22 17:17:01.526867: step: 920/470, loss: 1.2543729543685913 2023-01-22 17:17:02.268016: step: 922/470, loss: 0.7959638237953186 2023-01-22 17:17:03.028492: step: 924/470, loss: 1.5553340911865234 2023-01-22 17:17:03.826275: step: 926/470, loss: 1.098331093788147 2023-01-22 17:17:04.522212: step: 928/470, loss: 0.5700544118881226 2023-01-22 17:17:05.301937: step: 930/470, loss: 0.5074666738510132 2023-01-22 17:17:05.989000: step: 932/470, loss: 0.383233904838562 2023-01-22 17:17:06.684657: step: 934/470, loss: 1.1787450313568115 2023-01-22 17:17:07.391287: step: 936/470, loss: 5.3494744300842285 2023-01-22 17:17:08.132298: step: 938/470, loss: 1.421501636505127 2023-01-22 17:17:08.837979: step: 940/470, loss: 1.8558794260025024 2023-01-22 17:17:09.543019: step: 942/470, loss: 0.21680720150470734 ================================================== Loss: 0.910 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2642533988207765, 'r': 0.3093820627560135, 'f1': 0.2850425648119215}, 'combined': 0.21003136354562635, 'epoch': 5} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32274736699326, 'r': 0.3286437131210215, 'f1': 0.3256688534024415}, 'combined': 0.22683402724548166, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.25691603610707214, 'r': 0.3115167876136985, 'f1': 0.281594077311182}, 'combined': 0.2074903727556078, 'epoch': 5} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32750994948956674, 'r': 0.33097399703224484, 'f1': 0.32923286170591554}, 'combined': 0.2293164210886974, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.24098832417994995, 'r': 0.31278180975158587, 'f1': 0.27223123656331255}, 'combined': 0.20059143746770397, 'epoch': 5} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3105387015499828, 'r': 0.33621786340892373, 'f1': 0.3228684930242665}, 'combined': 0.22488352747958862, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21323529411764705, 'r': 0.4142857142857143, 'f1': 0.28155339805825247}, 'combined': 0.1877022653721683, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.22058823529411764, 'r': 0.32608695652173914, 'f1': 0.2631578947368421}, 'combined': 0.13157894736842105, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35526315789473684, 'r': 0.23275862068965517, 'f1': 0.28125}, 'combined': 0.1875, 'epoch': 5} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2642533988207765, 'r': 0.3093820627560135, 'f1': 0.2850425648119215}, 'combined': 0.21003136354562635, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32274736699326, 'r': 0.3286437131210215, 'f1': 0.3256688534024415}, 'combined': 0.22683402724548166, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21323529411764705, 'r': 0.4142857142857143, 'f1': 0.28155339805825247}, 'combined': 0.1877022653721683, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28632789500190714, 'r': 0.21624004214565282, 'f1': 0.24639676153677628}, 'combined': 0.1815555085007825, 'epoch': 4} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3345971660892728, 'r': 0.26806343565653556, 'f1': 0.2976576389530171}, 'combined': 0.20732372862399204, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32407407407407407, 'r': 0.3804347826086957, 'f1': 0.35000000000000003}, 'combined': 0.17500000000000002, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.20295208490866637, 'r': 0.24783940803591395, 'f1': 0.22316093081021016}, 'combined': 0.16443437007068115, 'epoch': 1} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.279769607766213, 'r': 0.2325601629274273, 'f1': 0.25398979050608034}, 'combined': 0.17690831179527985, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45, 'r': 0.3103448275862069, 'f1': 0.3673469387755102}, 'combined': 0.24489795918367346, 'epoch': 1} ****************************** Epoch: 6 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 17:19:54.065028: step: 2/470, loss: 0.46913114190101624 2023-01-22 17:19:54.807215: step: 4/470, loss: 0.3887912929058075 2023-01-22 17:19:55.497540: step: 6/470, loss: 0.4061891436576843 2023-01-22 17:19:56.153411: step: 8/470, loss: 0.3760465979576111 2023-01-22 17:19:56.839623: step: 10/470, loss: 0.7916153073310852 2023-01-22 17:19:57.598154: step: 12/470, loss: 0.27006056904792786 2023-01-22 17:19:58.381528: step: 14/470, loss: 0.5665428042411804 2023-01-22 17:19:59.071192: step: 16/470, loss: 1.7454071044921875 2023-01-22 17:19:59.813031: step: 18/470, loss: 0.7772339582443237 2023-01-22 17:20:00.673423: step: 20/470, loss: 0.10461916774511337 2023-01-22 17:20:01.460462: step: 22/470, loss: 0.2878537178039551 2023-01-22 17:20:02.226250: step: 24/470, loss: 0.647771418094635 2023-01-22 17:20:02.982165: step: 26/470, loss: 0.3383640646934509 2023-01-22 17:20:03.709914: step: 28/470, loss: 0.6665865778923035 2023-01-22 17:20:04.483671: step: 30/470, loss: 0.4555095434188843 2023-01-22 17:20:05.252725: step: 32/470, loss: 0.3503432273864746 2023-01-22 17:20:05.963417: step: 34/470, loss: 0.2352353185415268 2023-01-22 17:20:06.641018: step: 36/470, loss: 1.3757781982421875 2023-01-22 17:20:07.332191: step: 38/470, loss: 0.4522511959075928 2023-01-22 17:20:08.152690: step: 40/470, loss: 0.2714459002017975 2023-01-22 17:20:08.875770: step: 42/470, loss: 0.26817435026168823 2023-01-22 17:20:09.599995: step: 44/470, loss: 0.6385064125061035 2023-01-22 17:20:10.372612: step: 46/470, loss: 0.2124578356742859 2023-01-22 17:20:11.135092: step: 48/470, loss: 0.12701259553432465 2023-01-22 17:20:11.865758: step: 50/470, loss: 0.8603786826133728 2023-01-22 17:20:12.666904: step: 52/470, loss: 0.4419752061367035 2023-01-22 17:20:13.396184: step: 54/470, loss: 3.501265287399292 2023-01-22 17:20:14.050863: step: 56/470, loss: 1.0722558498382568 2023-01-22 17:20:14.895151: step: 58/470, loss: 0.18907511234283447 2023-01-22 17:20:15.587335: step: 60/470, loss: 3.298689842224121 2023-01-22 17:20:16.305228: step: 62/470, loss: 0.30768173933029175 2023-01-22 17:20:17.014368: step: 64/470, loss: 0.3563343286514282 2023-01-22 17:20:17.752709: step: 66/470, loss: 0.727003276348114 2023-01-22 17:20:18.473198: step: 68/470, loss: 0.4526892602443695 2023-01-22 17:20:19.265142: step: 70/470, loss: 0.3136780560016632 2023-01-22 17:20:20.018652: step: 72/470, loss: 0.8561743497848511 2023-01-22 17:20:20.810790: step: 74/470, loss: 0.3809729814529419 2023-01-22 17:20:21.615112: step: 76/470, loss: 0.3150908648967743 2023-01-22 17:20:22.310650: step: 78/470, loss: 0.3166169822216034 2023-01-22 17:20:23.076459: step: 80/470, loss: 1.1344679594039917 2023-01-22 17:20:23.824620: step: 82/470, loss: 0.2568296790122986 2023-01-22 17:20:24.573812: step: 84/470, loss: 0.19643135368824005 2023-01-22 17:20:25.215262: step: 86/470, loss: 0.7549906373023987 2023-01-22 17:20:25.997728: step: 88/470, loss: 1.3208808898925781 2023-01-22 17:20:26.743314: step: 90/470, loss: 0.7214917540550232 2023-01-22 17:20:27.496983: step: 92/470, loss: 0.5928862690925598 2023-01-22 17:20:28.263841: step: 94/470, loss: 1.5356335639953613 2023-01-22 17:20:28.961857: step: 96/470, loss: 0.336683452129364 2023-01-22 17:20:29.723008: step: 98/470, loss: 5.519505500793457 2023-01-22 17:20:30.455249: step: 100/470, loss: 0.6986427307128906 2023-01-22 17:20:31.178377: step: 102/470, loss: 1.0318654775619507 2023-01-22 17:20:31.960891: step: 104/470, loss: 0.5973689556121826 2023-01-22 17:20:32.701215: step: 106/470, loss: 0.46960803866386414 2023-01-22 17:20:33.407835: step: 108/470, loss: 0.45779475569725037 2023-01-22 17:20:34.166560: step: 110/470, loss: 1.0601186752319336 2023-01-22 17:20:34.887364: step: 112/470, loss: 0.5745185613632202 2023-01-22 17:20:35.636767: step: 114/470, loss: 0.34605327248573303 2023-01-22 17:20:36.354530: step: 116/470, loss: 0.9498859643936157 2023-01-22 17:20:37.083246: step: 118/470, loss: 0.25999417901039124 2023-01-22 17:20:37.908026: step: 120/470, loss: 0.32462361454963684 2023-01-22 17:20:38.654181: step: 122/470, loss: 0.1615123152732849 2023-01-22 17:20:39.328663: step: 124/470, loss: 0.34102755784988403 2023-01-22 17:20:40.116013: step: 126/470, loss: 0.18280981481075287 2023-01-22 17:20:40.909616: step: 128/470, loss: 0.6337746977806091 2023-01-22 17:20:41.669703: step: 130/470, loss: 0.35255953669548035 2023-01-22 17:20:42.418524: step: 132/470, loss: 0.36493754386901855 2023-01-22 17:20:43.140596: step: 134/470, loss: 0.4938736855983734 2023-01-22 17:20:43.913855: step: 136/470, loss: 0.8310595154762268 2023-01-22 17:20:44.757410: step: 138/470, loss: 0.36032968759536743 2023-01-22 17:20:45.518849: step: 140/470, loss: 0.6380537748336792 2023-01-22 17:20:46.205710: step: 142/470, loss: 0.634308397769928 2023-01-22 17:20:46.924444: step: 144/470, loss: 0.3668132722377777 2023-01-22 17:20:47.675257: step: 146/470, loss: 0.263011634349823 2023-01-22 17:20:48.457075: step: 148/470, loss: 0.5096966624259949 2023-01-22 17:20:49.253695: step: 150/470, loss: 0.811173141002655 2023-01-22 17:20:49.969926: step: 152/470, loss: 0.5178455114364624 2023-01-22 17:20:50.696528: step: 154/470, loss: 0.7749607563018799 2023-01-22 17:20:51.469032: step: 156/470, loss: 0.5281555652618408 2023-01-22 17:20:52.301719: step: 158/470, loss: 0.7538830637931824 2023-01-22 17:20:53.058417: step: 160/470, loss: 0.13230903446674347 2023-01-22 17:20:53.879530: step: 162/470, loss: 1.0788747072219849 2023-01-22 17:20:54.646704: step: 164/470, loss: 0.6544629335403442 2023-01-22 17:20:55.340509: step: 166/470, loss: 0.6774575114250183 2023-01-22 17:20:55.982659: step: 168/470, loss: 0.5532941818237305 2023-01-22 17:20:56.729725: step: 170/470, loss: 0.3752504289150238 2023-01-22 17:20:57.500511: step: 172/470, loss: 1.2700722217559814 2023-01-22 17:20:58.205350: step: 174/470, loss: 0.4598991572856903 2023-01-22 17:20:59.046370: step: 176/470, loss: 2.486417293548584 2023-01-22 17:20:59.750262: step: 178/470, loss: 1.1655266284942627 2023-01-22 17:21:00.475774: step: 180/470, loss: 0.5170928239822388 2023-01-22 17:21:01.245330: step: 182/470, loss: 0.3279862403869629 2023-01-22 17:21:02.005982: step: 184/470, loss: 0.6931787729263306 2023-01-22 17:21:02.674336: step: 186/470, loss: 3.434424877166748 2023-01-22 17:21:03.384651: step: 188/470, loss: 0.5698443055152893 2023-01-22 17:21:04.009004: step: 190/470, loss: 0.19923467934131622 2023-01-22 17:21:04.661563: step: 192/470, loss: 0.250698983669281 2023-01-22 17:21:05.329561: step: 194/470, loss: 0.8043193817138672 2023-01-22 17:21:06.004919: step: 196/470, loss: 0.6029735803604126 2023-01-22 17:21:06.776715: step: 198/470, loss: 0.30597418546676636 2023-01-22 17:21:07.432492: step: 200/470, loss: 0.4779759645462036 2023-01-22 17:21:08.172761: step: 202/470, loss: 0.23415221273899078 2023-01-22 17:21:08.878662: step: 204/470, loss: 0.5951058268547058 2023-01-22 17:21:09.598794: step: 206/470, loss: 1.3589835166931152 2023-01-22 17:21:10.298876: step: 208/470, loss: 0.26382386684417725 2023-01-22 17:21:11.055544: step: 210/470, loss: 0.6494569182395935 2023-01-22 17:21:11.734139: step: 212/470, loss: 0.566684365272522 2023-01-22 17:21:12.478295: step: 214/470, loss: 0.5304621458053589 2023-01-22 17:21:13.269013: step: 216/470, loss: 0.36520957946777344 2023-01-22 17:21:14.021257: step: 218/470, loss: 1.9913074970245361 2023-01-22 17:21:14.702170: step: 220/470, loss: 0.5152336955070496 2023-01-22 17:21:15.454195: step: 222/470, loss: 0.7162929773330688 2023-01-22 17:21:16.238200: step: 224/470, loss: 0.4547756612300873 2023-01-22 17:21:17.005154: step: 226/470, loss: 1.3032582998275757 2023-01-22 17:21:17.714194: step: 228/470, loss: 0.29936328530311584 2023-01-22 17:21:18.514480: step: 230/470, loss: 0.4857710599899292 2023-01-22 17:21:19.280735: step: 232/470, loss: 0.701897382736206 2023-01-22 17:21:20.002364: step: 234/470, loss: 1.8443883657455444 2023-01-22 17:21:20.745333: step: 236/470, loss: 0.3022279441356659 2023-01-22 17:21:21.430727: step: 238/470, loss: 0.4773560166358948 2023-01-22 17:21:22.115801: step: 240/470, loss: 0.23034065961837769 2023-01-22 17:21:22.823111: step: 242/470, loss: 0.5290822386741638 2023-01-22 17:21:23.652992: step: 244/470, loss: 0.9485624432563782 2023-01-22 17:21:24.366992: step: 246/470, loss: 0.16987621784210205 2023-01-22 17:21:25.151697: step: 248/470, loss: 0.24645563960075378 2023-01-22 17:21:25.905402: step: 250/470, loss: 0.35188791155815125 2023-01-22 17:21:26.758082: step: 252/470, loss: 0.45709526538848877 2023-01-22 17:21:27.443127: step: 254/470, loss: 0.9818328022956848 2023-01-22 17:21:28.217550: step: 256/470, loss: 0.13087573647499084 2023-01-22 17:21:28.982558: step: 258/470, loss: 0.2843538224697113 2023-01-22 17:21:29.643477: step: 260/470, loss: 0.8852912783622742 2023-01-22 17:21:30.344149: step: 262/470, loss: 0.7009601593017578 2023-01-22 17:21:31.048626: step: 264/470, loss: 0.623857855796814 2023-01-22 17:21:31.760826: step: 266/470, loss: 0.18668121099472046 2023-01-22 17:21:32.503528: step: 268/470, loss: 0.9859969615936279 2023-01-22 17:21:33.213322: step: 270/470, loss: 0.6130749583244324 2023-01-22 17:21:33.906700: step: 272/470, loss: 0.7168523669242859 2023-01-22 17:21:34.696981: step: 274/470, loss: 0.4143531620502472 2023-01-22 17:21:35.470342: step: 276/470, loss: 0.7964304089546204 2023-01-22 17:21:36.201588: step: 278/470, loss: 0.6704311370849609 2023-01-22 17:21:36.937129: step: 280/470, loss: 1.371323823928833 2023-01-22 17:21:37.762704: step: 282/470, loss: 0.8651912212371826 2023-01-22 17:21:38.539655: step: 284/470, loss: 0.3530345559120178 2023-01-22 17:21:39.285399: step: 286/470, loss: 0.3043641448020935 2023-01-22 17:21:39.991141: step: 288/470, loss: 0.1724073439836502 2023-01-22 17:21:40.786703: step: 290/470, loss: 0.2577505111694336 2023-01-22 17:21:41.585747: step: 292/470, loss: 0.3419148623943329 2023-01-22 17:21:42.383018: step: 294/470, loss: 0.2580012083053589 2023-01-22 17:21:43.163635: step: 296/470, loss: 0.7738077044487 2023-01-22 17:21:43.987334: step: 298/470, loss: 1.5503582954406738 2023-01-22 17:21:44.830928: step: 300/470, loss: 0.36178097128868103 2023-01-22 17:21:45.606529: step: 302/470, loss: 3.4303958415985107 2023-01-22 17:21:46.388295: step: 304/470, loss: 1.861863374710083 2023-01-22 17:21:47.199688: step: 306/470, loss: 0.3691105246543884 2023-01-22 17:21:48.054466: step: 308/470, loss: 0.2734450399875641 2023-01-22 17:21:48.889222: step: 310/470, loss: 1.402500033378601 2023-01-22 17:21:49.783114: step: 312/470, loss: 0.5831880569458008 2023-01-22 17:21:50.547366: step: 314/470, loss: 0.6897637844085693 2023-01-22 17:21:51.265524: step: 316/470, loss: 0.45082640647888184 2023-01-22 17:21:52.079461: step: 318/470, loss: 0.24482515454292297 2023-01-22 17:21:52.816826: step: 320/470, loss: 0.731791615486145 2023-01-22 17:21:53.563157: step: 322/470, loss: 0.4253387749195099 2023-01-22 17:21:54.269598: step: 324/470, loss: 0.7619374394416809 2023-01-22 17:21:54.981522: step: 326/470, loss: 0.3442683815956116 2023-01-22 17:21:55.847939: step: 328/470, loss: 0.7861752510070801 2023-01-22 17:21:56.597449: step: 330/470, loss: 0.6463299989700317 2023-01-22 17:21:57.321814: step: 332/470, loss: 0.7153461575508118 2023-01-22 17:21:58.076837: step: 334/470, loss: 0.2146746665239334 2023-01-22 17:21:58.805094: step: 336/470, loss: 0.26649871468544006 2023-01-22 17:21:59.500280: step: 338/470, loss: 0.24172751605510712 2023-01-22 17:22:00.232686: step: 340/470, loss: 0.26765137910842896 2023-01-22 17:22:00.987882: step: 342/470, loss: 0.253934770822525 2023-01-22 17:22:01.815921: step: 344/470, loss: 0.4995259940624237 2023-01-22 17:22:02.648610: step: 346/470, loss: 0.9960381388664246 2023-01-22 17:22:03.504486: step: 348/470, loss: 0.3678852617740631 2023-01-22 17:22:04.299564: step: 350/470, loss: 1.4511865377426147 2023-01-22 17:22:05.045189: step: 352/470, loss: 0.4098103940486908 2023-01-22 17:22:05.780214: step: 354/470, loss: 0.8686992526054382 2023-01-22 17:22:06.652529: step: 356/470, loss: 0.3895498514175415 2023-01-22 17:22:07.435225: step: 358/470, loss: 0.4124021828174591 2023-01-22 17:22:08.115125: step: 360/470, loss: 0.39607658982276917 2023-01-22 17:22:08.869088: step: 362/470, loss: 0.5164311528205872 2023-01-22 17:22:09.653736: step: 364/470, loss: 0.38875195384025574 2023-01-22 17:22:10.415555: step: 366/470, loss: 1.0487358570098877 2023-01-22 17:22:11.151972: step: 368/470, loss: 0.2708939015865326 2023-01-22 17:22:12.020427: step: 370/470, loss: 0.3877793848514557 2023-01-22 17:22:12.878893: step: 372/470, loss: 1.2002607583999634 2023-01-22 17:22:13.630474: step: 374/470, loss: 0.542573869228363 2023-01-22 17:22:14.347596: step: 376/470, loss: 0.11055511236190796 2023-01-22 17:22:15.176234: step: 378/470, loss: 0.9897867441177368 2023-01-22 17:22:15.916705: step: 380/470, loss: 1.5830128192901611 2023-01-22 17:22:16.667853: step: 382/470, loss: 0.3970726430416107 2023-01-22 17:22:17.457480: step: 384/470, loss: 0.5450624227523804 2023-01-22 17:22:18.167961: step: 386/470, loss: 0.47805434465408325 2023-01-22 17:22:18.960022: step: 388/470, loss: 0.5078467130661011 2023-01-22 17:22:19.714187: step: 390/470, loss: 0.8410879969596863 2023-01-22 17:22:20.404429: step: 392/470, loss: 0.564120352268219 2023-01-22 17:22:21.164233: step: 394/470, loss: 0.4679042398929596 2023-01-22 17:22:21.892081: step: 396/470, loss: 0.3612174689769745 2023-01-22 17:22:22.670058: step: 398/470, loss: 0.36533409357070923 2023-01-22 17:22:23.385975: step: 400/470, loss: 1.2119630575180054 2023-01-22 17:22:24.140842: step: 402/470, loss: 0.5056576132774353 2023-01-22 17:22:24.889654: step: 404/470, loss: 0.634218156337738 2023-01-22 17:22:25.558393: step: 406/470, loss: 3.289701223373413 2023-01-22 17:22:26.304846: step: 408/470, loss: 0.0705670490860939 2023-01-22 17:22:27.075343: step: 410/470, loss: 0.584409773349762 2023-01-22 17:22:27.829975: step: 412/470, loss: 0.5824598073959351 2023-01-22 17:22:28.563400: step: 414/470, loss: 1.616589069366455 2023-01-22 17:22:29.356184: step: 416/470, loss: 0.17866788804531097 2023-01-22 17:22:30.079197: step: 418/470, loss: 0.4966486692428589 2023-01-22 17:22:30.823225: step: 420/470, loss: 0.14873385429382324 2023-01-22 17:22:31.615669: step: 422/470, loss: 0.6694574952125549 2023-01-22 17:22:32.277152: step: 424/470, loss: 0.6328750848770142 2023-01-22 17:22:33.003072: step: 426/470, loss: 0.42485523223876953 2023-01-22 17:22:33.761451: step: 428/470, loss: 0.36120638251304626 2023-01-22 17:22:34.569803: step: 430/470, loss: 0.38846999406814575 2023-01-22 17:22:35.304499: step: 432/470, loss: 1.087398886680603 2023-01-22 17:22:36.090208: step: 434/470, loss: 0.24200265109539032 2023-01-22 17:22:36.812082: step: 436/470, loss: 0.44511035084724426 2023-01-22 17:22:37.583451: step: 438/470, loss: 0.578136146068573 2023-01-22 17:22:38.307929: step: 440/470, loss: 0.04400842636823654 2023-01-22 17:22:39.064769: step: 442/470, loss: 0.42095282673835754 2023-01-22 17:22:39.807591: step: 444/470, loss: 0.2756405174732208 2023-01-22 17:22:40.569469: step: 446/470, loss: 0.6643534302711487 2023-01-22 17:22:41.295235: step: 448/470, loss: 0.5027168989181519 2023-01-22 17:22:41.963637: step: 450/470, loss: 0.5610426664352417 2023-01-22 17:22:42.720144: step: 452/470, loss: 0.7037179470062256 2023-01-22 17:22:43.446650: step: 454/470, loss: 1.122239112854004 2023-01-22 17:22:44.339859: step: 456/470, loss: 0.4908750653266907 2023-01-22 17:22:45.080797: step: 458/470, loss: 0.4792795181274414 2023-01-22 17:22:45.826711: step: 460/470, loss: 0.3053261637687683 2023-01-22 17:22:46.541323: step: 462/470, loss: 0.5884460806846619 2023-01-22 17:22:47.243542: step: 464/470, loss: 2.739895820617676 2023-01-22 17:22:48.026603: step: 466/470, loss: 0.8006578683853149 2023-01-22 17:22:48.740566: step: 468/470, loss: 1.319366455078125 2023-01-22 17:22:49.540678: step: 470/470, loss: 2.922891139984131 2023-01-22 17:22:50.260965: step: 472/470, loss: 0.2505470812320709 2023-01-22 17:22:51.093657: step: 474/470, loss: 0.4484995901584625 2023-01-22 17:22:51.833367: step: 476/470, loss: 0.5939066410064697 2023-01-22 17:22:52.572258: step: 478/470, loss: 0.27172547578811646 2023-01-22 17:22:53.492072: step: 480/470, loss: 0.3544227182865143 2023-01-22 17:22:54.093928: step: 482/470, loss: 0.19915227591991425 2023-01-22 17:22:54.801866: step: 484/470, loss: 0.5538665056228638 2023-01-22 17:22:55.673976: step: 486/470, loss: 1.7567211389541626 2023-01-22 17:22:56.475212: step: 488/470, loss: 0.23877158761024475 2023-01-22 17:22:57.221673: step: 490/470, loss: 0.37857306003570557 2023-01-22 17:22:57.923065: step: 492/470, loss: 0.2932409346103668 2023-01-22 17:22:58.791767: step: 494/470, loss: 0.3827472925186157 2023-01-22 17:22:59.497216: step: 496/470, loss: 1.6333781480789185 2023-01-22 17:23:00.308303: step: 498/470, loss: 0.33791467547416687 2023-01-22 17:23:01.013620: step: 500/470, loss: 0.32591983675956726 2023-01-22 17:23:01.753459: step: 502/470, loss: 0.4133923649787903 2023-01-22 17:23:02.527722: step: 504/470, loss: 1.181382417678833 2023-01-22 17:23:03.320054: step: 506/470, loss: 5.112921237945557 2023-01-22 17:23:04.091980: step: 508/470, loss: 0.24143043160438538 2023-01-22 17:23:04.797933: step: 510/470, loss: 0.14529116451740265 2023-01-22 17:23:05.536901: step: 512/470, loss: 0.4227316975593567 2023-01-22 17:23:06.288917: step: 514/470, loss: 1.3871934413909912 2023-01-22 17:23:07.037489: step: 516/470, loss: 1.0044560432434082 2023-01-22 17:23:07.787690: step: 518/470, loss: 0.29355016350746155 2023-01-22 17:23:08.548763: step: 520/470, loss: 0.29415249824523926 2023-01-22 17:23:09.262895: step: 522/470, loss: 1.2313815355300903 2023-01-22 17:23:09.976071: step: 524/470, loss: 0.27359142899513245 2023-01-22 17:23:10.698707: step: 526/470, loss: 0.9681866765022278 2023-01-22 17:23:11.418886: step: 528/470, loss: 0.4213339388370514 2023-01-22 17:23:12.176798: step: 530/470, loss: 0.6697089672088623 2023-01-22 17:23:12.911504: step: 532/470, loss: 1.0336536169052124 2023-01-22 17:23:13.667364: step: 534/470, loss: 0.18524248898029327 2023-01-22 17:23:14.440020: step: 536/470, loss: 0.5117719173431396 2023-01-22 17:23:15.235170: step: 538/470, loss: 0.563843846321106 2023-01-22 17:23:16.060917: step: 540/470, loss: 0.42840808629989624 2023-01-22 17:23:16.881139: step: 542/470, loss: 1.106241226196289 2023-01-22 17:23:17.575202: step: 544/470, loss: 0.27418795228004456 2023-01-22 17:23:18.261156: step: 546/470, loss: 0.1472853273153305 2023-01-22 17:23:19.058473: step: 548/470, loss: 7.418033599853516 2023-01-22 17:23:19.782419: step: 550/470, loss: 0.27359166741371155 2023-01-22 17:23:20.649129: step: 552/470, loss: 0.23063817620277405 2023-01-22 17:23:21.417356: step: 554/470, loss: 1.2608001232147217 2023-01-22 17:23:22.136610: step: 556/470, loss: 0.40279632806777954 2023-01-22 17:23:22.903295: step: 558/470, loss: 1.5408741235733032 2023-01-22 17:23:23.650026: step: 560/470, loss: 0.8065625429153442 2023-01-22 17:23:24.402236: step: 562/470, loss: 0.20651859045028687 2023-01-22 17:23:25.141606: step: 564/470, loss: 1.4523169994354248 2023-01-22 17:23:25.843509: step: 566/470, loss: 0.589959442615509 2023-01-22 17:23:26.537061: step: 568/470, loss: 1.7879338264465332 2023-01-22 17:23:27.201451: step: 570/470, loss: 1.0214474201202393 2023-01-22 17:23:27.972703: step: 572/470, loss: 0.5045061707496643 2023-01-22 17:23:28.709994: step: 574/470, loss: 1.857149600982666 2023-01-22 17:23:29.459541: step: 576/470, loss: 0.49319571256637573 2023-01-22 17:23:30.156962: step: 578/470, loss: 1.0521998405456543 2023-01-22 17:23:30.849884: step: 580/470, loss: 0.8370808362960815 2023-01-22 17:23:31.707421: step: 582/470, loss: 0.752477765083313 2023-01-22 17:23:32.422736: step: 584/470, loss: 0.2725905478000641 2023-01-22 17:23:33.178164: step: 586/470, loss: 1.0730834007263184 2023-01-22 17:23:33.892742: step: 588/470, loss: 0.5087326765060425 2023-01-22 17:23:34.615112: step: 590/470, loss: 0.23478223383426666 2023-01-22 17:23:35.481990: step: 592/470, loss: 0.5487679243087769 2023-01-22 17:23:36.253347: step: 594/470, loss: 0.7396395206451416 2023-01-22 17:23:37.049703: step: 596/470, loss: 1.1245198249816895 2023-01-22 17:23:37.795382: step: 598/470, loss: 0.6135616302490234 2023-01-22 17:23:38.596068: step: 600/470, loss: 1.019770622253418 2023-01-22 17:23:39.337413: step: 602/470, loss: 0.26452648639678955 2023-01-22 17:23:40.042419: step: 604/470, loss: 1.3409970998764038 2023-01-22 17:23:40.739824: step: 606/470, loss: 0.41237398982048035 2023-01-22 17:23:41.482257: step: 608/470, loss: 0.1840699464082718 2023-01-22 17:23:42.318486: step: 610/470, loss: 1.9304275512695312 2023-01-22 17:23:43.107784: step: 612/470, loss: 0.3491050899028778 2023-01-22 17:23:43.886867: step: 614/470, loss: 0.7268772721290588 2023-01-22 17:23:44.875607: step: 616/470, loss: 0.4124700129032135 2023-01-22 17:23:45.602618: step: 618/470, loss: 0.2458016574382782 2023-01-22 17:23:46.350536: step: 620/470, loss: 0.25328996777534485 2023-01-22 17:23:47.101809: step: 622/470, loss: 0.3315480351448059 2023-01-22 17:23:47.803326: step: 624/470, loss: 0.4825563430786133 2023-01-22 17:23:48.495429: step: 626/470, loss: 0.6606676578521729 2023-01-22 17:23:49.323729: step: 628/470, loss: 0.5025643110275269 2023-01-22 17:23:50.093650: step: 630/470, loss: 0.3678370714187622 2023-01-22 17:23:50.936305: step: 632/470, loss: 0.4976312816143036 2023-01-22 17:23:51.650369: step: 634/470, loss: 0.22208793461322784 2023-01-22 17:23:52.392024: step: 636/470, loss: 1.0294278860092163 2023-01-22 17:23:53.127006: step: 638/470, loss: 1.9563071727752686 2023-01-22 17:23:53.937095: step: 640/470, loss: 2.2986676692962646 2023-01-22 17:23:54.655746: step: 642/470, loss: 1.1405041217803955 2023-01-22 17:23:55.433289: step: 644/470, loss: 0.3489605784416199 2023-01-22 17:23:56.210874: step: 646/470, loss: 0.9580307602882385 2023-01-22 17:23:56.933653: step: 648/470, loss: 0.27914896607398987 2023-01-22 17:23:57.616918: step: 650/470, loss: 0.983113169670105 2023-01-22 17:23:58.223145: step: 652/470, loss: 0.4380299746990204 2023-01-22 17:23:58.967747: step: 654/470, loss: 2.07478666305542 2023-01-22 17:23:59.639722: step: 656/470, loss: 0.8824518918991089 2023-01-22 17:24:00.328131: step: 658/470, loss: 0.3353792130947113 2023-01-22 17:24:01.115685: step: 660/470, loss: 0.4645380675792694 2023-01-22 17:24:01.912842: step: 662/470, loss: 0.8738151788711548 2023-01-22 17:24:02.616276: step: 664/470, loss: 0.2929144501686096 2023-01-22 17:24:03.324544: step: 666/470, loss: 0.2078137993812561 2023-01-22 17:24:04.122017: step: 668/470, loss: 0.3449633717536926 2023-01-22 17:24:04.934103: step: 670/470, loss: 4.962142467498779 2023-01-22 17:24:05.685480: step: 672/470, loss: 0.266704261302948 2023-01-22 17:24:06.447369: step: 674/470, loss: 0.7107980847358704 2023-01-22 17:24:07.235888: step: 676/470, loss: 0.7811194658279419 2023-01-22 17:24:08.047013: step: 678/470, loss: 0.6608456969261169 2023-01-22 17:24:08.738635: step: 680/470, loss: 0.36729899048805237 2023-01-22 17:24:09.557839: step: 682/470, loss: 1.2359304428100586 2023-01-22 17:24:10.310107: step: 684/470, loss: 0.6000874638557434 2023-01-22 17:24:11.004497: step: 686/470, loss: 0.30050334334373474 2023-01-22 17:24:11.676564: step: 688/470, loss: 1.0338342189788818 2023-01-22 17:24:12.374274: step: 690/470, loss: 0.6685682535171509 2023-01-22 17:24:13.140518: step: 692/470, loss: 2.2404136657714844 2023-01-22 17:24:13.932361: step: 694/470, loss: 0.9425269365310669 2023-01-22 17:24:14.700934: step: 696/470, loss: 0.9702392220497131 2023-01-22 17:24:15.459209: step: 698/470, loss: 0.9032459259033203 2023-01-22 17:24:16.191048: step: 700/470, loss: 0.46420717239379883 2023-01-22 17:24:16.942728: step: 702/470, loss: 0.16542571783065796 2023-01-22 17:24:17.636534: step: 704/470, loss: 0.4053605794906616 2023-01-22 17:24:18.380053: step: 706/470, loss: 0.5979849100112915 2023-01-22 17:24:19.061084: step: 708/470, loss: 0.4328975975513458 2023-01-22 17:24:19.804969: step: 710/470, loss: 0.603645920753479 2023-01-22 17:24:20.510145: step: 712/470, loss: 0.6759111881256104 2023-01-22 17:24:21.303053: step: 714/470, loss: 0.17363420128822327 2023-01-22 17:24:22.105587: step: 716/470, loss: 0.45107150077819824 2023-01-22 17:24:22.777783: step: 718/470, loss: 0.30570104718208313 2023-01-22 17:24:23.519665: step: 720/470, loss: 0.8583315014839172 2023-01-22 17:24:24.345883: step: 722/470, loss: 1.9609408378601074 2023-01-22 17:24:25.039844: step: 724/470, loss: 0.3080098032951355 2023-01-22 17:24:25.810593: step: 726/470, loss: 0.6534498333930969 2023-01-22 17:24:26.449393: step: 728/470, loss: 0.9306233525276184 2023-01-22 17:24:27.226453: step: 730/470, loss: 0.4405674934387207 2023-01-22 17:24:27.997367: step: 732/470, loss: 1.8319543600082397 2023-01-22 17:24:28.739228: step: 734/470, loss: 0.3661953806877136 2023-01-22 17:24:29.458385: step: 736/470, loss: 1.5648008584976196 2023-01-22 17:24:30.151863: step: 738/470, loss: 1.1446764469146729 2023-01-22 17:24:30.849475: step: 740/470, loss: 0.5488044619560242 2023-01-22 17:24:31.581024: step: 742/470, loss: 0.7896547913551331 2023-01-22 17:24:32.268866: step: 744/470, loss: 0.2640857994556427 2023-01-22 17:24:33.017843: step: 746/470, loss: 0.12843212485313416 2023-01-22 17:24:33.743687: step: 748/470, loss: 0.7997981905937195 2023-01-22 17:24:34.557285: step: 750/470, loss: 0.9078993797302246 2023-01-22 17:24:35.287587: step: 752/470, loss: 0.5914628505706787 2023-01-22 17:24:36.000532: step: 754/470, loss: 0.6052213311195374 2023-01-22 17:24:36.748805: step: 756/470, loss: 0.16759061813354492 2023-01-22 17:24:37.488241: step: 758/470, loss: 0.423061728477478 2023-01-22 17:24:38.309432: step: 760/470, loss: 0.9458290934562683 2023-01-22 17:24:39.102885: step: 762/470, loss: 0.44426000118255615 2023-01-22 17:24:39.842191: step: 764/470, loss: 0.48349910974502563 2023-01-22 17:24:40.623114: step: 766/470, loss: 1.0650396347045898 2023-01-22 17:24:41.452148: step: 768/470, loss: 0.9286936521530151 2023-01-22 17:24:42.236379: step: 770/470, loss: 0.5421870946884155 2023-01-22 17:24:42.932497: step: 772/470, loss: 0.6645587682723999 2023-01-22 17:24:43.678255: step: 774/470, loss: 0.3658778965473175 2023-01-22 17:24:44.402751: step: 776/470, loss: 0.4753032624721527 2023-01-22 17:24:45.109668: step: 778/470, loss: 3.5970163345336914 2023-01-22 17:24:46.029773: step: 780/470, loss: 0.8341478109359741 2023-01-22 17:24:46.935804: step: 782/470, loss: 0.875688910484314 2023-01-22 17:24:47.719058: step: 784/470, loss: 0.44907113909721375 2023-01-22 17:24:48.481112: step: 786/470, loss: 0.9514230489730835 2023-01-22 17:24:49.296285: step: 788/470, loss: 0.46913057565689087 2023-01-22 17:24:50.192807: step: 790/470, loss: 1.4260585308074951 2023-01-22 17:24:51.087926: step: 792/470, loss: 0.3309740424156189 2023-01-22 17:24:51.838975: step: 794/470, loss: 0.2095767855644226 2023-01-22 17:24:52.543769: step: 796/470, loss: 1.0972883701324463 2023-01-22 17:24:53.284847: step: 798/470, loss: 0.6162765026092529 2023-01-22 17:24:53.997923: step: 800/470, loss: 2.2116286754608154 2023-01-22 17:24:54.790282: step: 802/470, loss: 0.3036021888256073 2023-01-22 17:24:55.583553: step: 804/470, loss: 0.762950599193573 2023-01-22 17:24:56.340956: step: 806/470, loss: 0.6544211506843567 2023-01-22 17:24:57.033299: step: 808/470, loss: 0.37966063618659973 2023-01-22 17:24:57.818775: step: 810/470, loss: 1.9399421215057373 2023-01-22 17:24:58.592805: step: 812/470, loss: 1.2812671661376953 2023-01-22 17:24:59.410525: step: 814/470, loss: 0.187590554356575 2023-01-22 17:25:00.143329: step: 816/470, loss: 0.9980336427688599 2023-01-22 17:25:00.847350: step: 818/470, loss: 0.3564274311065674 2023-01-22 17:25:01.496130: step: 820/470, loss: 0.34761491417884827 2023-01-22 17:25:02.261651: step: 822/470, loss: 0.26210886240005493 2023-01-22 17:25:02.973881: step: 824/470, loss: 0.43427425622940063 2023-01-22 17:25:03.671629: step: 826/470, loss: 0.3225962817668915 2023-01-22 17:25:04.448907: step: 828/470, loss: 1.4615715742111206 2023-01-22 17:25:05.208974: step: 830/470, loss: 0.1873364895582199 2023-01-22 17:25:06.038612: step: 832/470, loss: 0.9489684104919434 2023-01-22 17:25:06.725217: step: 834/470, loss: 0.24692928791046143 2023-01-22 17:25:07.523191: step: 836/470, loss: 0.5066057443618774 2023-01-22 17:25:08.330149: step: 838/470, loss: 0.8843993544578552 2023-01-22 17:25:09.080880: step: 840/470, loss: 1.108005166053772 2023-01-22 17:25:09.832117: step: 842/470, loss: 0.8157863616943359 2023-01-22 17:25:10.590446: step: 844/470, loss: 0.5103515386581421 2023-01-22 17:25:11.375334: step: 846/470, loss: 0.9719839096069336 2023-01-22 17:25:12.057420: step: 848/470, loss: 0.6897994875907898 2023-01-22 17:25:12.787723: step: 850/470, loss: 0.6400911211967468 2023-01-22 17:25:13.565126: step: 852/470, loss: 0.7998232841491699 2023-01-22 17:25:14.388290: step: 854/470, loss: 1.2332285642623901 2023-01-22 17:25:15.185569: step: 856/470, loss: 0.7018003463745117 2023-01-22 17:25:15.948129: step: 858/470, loss: 0.46146655082702637 2023-01-22 17:25:16.787605: step: 860/470, loss: 0.7082826495170593 2023-01-22 17:25:17.514670: step: 862/470, loss: 1.376923680305481 2023-01-22 17:25:18.178930: step: 864/470, loss: 0.12437704205513 2023-01-22 17:25:18.855672: step: 866/470, loss: 1.9006187915802002 2023-01-22 17:25:19.619890: step: 868/470, loss: 0.4418231248855591 2023-01-22 17:25:20.384381: step: 870/470, loss: 0.7229202389717102 2023-01-22 17:25:21.184834: step: 872/470, loss: 0.44860169291496277 2023-01-22 17:25:21.944878: step: 874/470, loss: 0.46400347352027893 2023-01-22 17:25:22.665548: step: 876/470, loss: 0.865450918674469 2023-01-22 17:25:23.385176: step: 878/470, loss: 0.2543260157108307 2023-01-22 17:25:24.250662: step: 880/470, loss: 0.8604297041893005 2023-01-22 17:25:25.084131: step: 882/470, loss: 0.4274219274520874 2023-01-22 17:25:25.897098: step: 884/470, loss: 0.669735848903656 2023-01-22 17:25:26.610104: step: 886/470, loss: 0.3069272041320801 2023-01-22 17:25:27.421520: step: 888/470, loss: 1.567513346672058 2023-01-22 17:25:28.153415: step: 890/470, loss: 0.3045634925365448 2023-01-22 17:25:28.982041: step: 892/470, loss: 0.5335885286331177 2023-01-22 17:25:29.699168: step: 894/470, loss: 1.0912373065948486 2023-01-22 17:25:30.466721: step: 896/470, loss: 0.39587855339050293 2023-01-22 17:25:31.246590: step: 898/470, loss: 0.22928422689437866 2023-01-22 17:25:31.992648: step: 900/470, loss: 0.5208638906478882 2023-01-22 17:25:32.777648: step: 902/470, loss: 1.4234001636505127 2023-01-22 17:25:33.610728: step: 904/470, loss: 0.8621724247932434 2023-01-22 17:25:34.363478: step: 906/470, loss: 0.5629292726516724 2023-01-22 17:25:35.179672: step: 908/470, loss: 0.2364557683467865 2023-01-22 17:25:35.948612: step: 910/470, loss: 0.18407753109931946 2023-01-22 17:25:36.648939: step: 912/470, loss: 1.1533396244049072 2023-01-22 17:25:37.375172: step: 914/470, loss: 0.2399020940065384 2023-01-22 17:25:38.135697: step: 916/470, loss: 0.565222978591919 2023-01-22 17:25:38.944248: step: 918/470, loss: 0.31371474266052246 2023-01-22 17:25:39.600968: step: 920/470, loss: 0.29471805691719055 2023-01-22 17:25:40.309966: step: 922/470, loss: 0.6195104718208313 2023-01-22 17:25:41.106599: step: 924/470, loss: 0.24831870198249817 2023-01-22 17:25:41.844789: step: 926/470, loss: 0.25780385732650757 2023-01-22 17:25:42.707953: step: 928/470, loss: 0.13444189727306366 2023-01-22 17:25:43.389344: step: 930/470, loss: 0.08237230777740479 2023-01-22 17:25:44.118952: step: 932/470, loss: 1.3069645166397095 2023-01-22 17:25:44.887418: step: 934/470, loss: 0.43915003538131714 2023-01-22 17:25:45.630631: step: 936/470, loss: 0.34764325618743896 2023-01-22 17:25:46.438110: step: 938/470, loss: 0.43518635630607605 2023-01-22 17:25:47.192733: step: 940/470, loss: 0.7495778799057007 2023-01-22 17:25:47.830899: step: 942/470, loss: 2.3461673259735107 ================================================== Loss: 0.732 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2934608531199441, 'r': 0.2756415982815414, 'f1': 0.28427225497920217}, 'combined': 0.20946376682678053, 'epoch': 6} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32719529096896893, 'r': 0.3297097600638313, 'f1': 0.32844771313535726}, 'combined': 0.22876955143756228, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2955410500853728, 'r': 0.28152107617240446, 'f1': 0.28836075246425097}, 'combined': 0.21247634392102702, 'epoch': 6} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3323852737507508, 'r': 0.32759586058431345, 'f1': 0.32997318903557843}, 'combined': 0.22983207196507952, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2762745365321701, 'r': 0.2747018162103551, 'f1': 0.2754859317656653}, 'combined': 0.2029896339325955, 'epoch': 6} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32108673446573915, 'r': 0.32818086980936256, 'f1': 0.3245950455786664}, 'combined': 0.22608610139807614, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21816037735849056, 'r': 0.33035714285714285, 'f1': 0.26278409090909094}, 'combined': 0.17518939393939395, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25757575757575757, 'r': 0.3695652173913043, 'f1': 0.30357142857142855}, 'combined': 0.15178571428571427, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45, 'r': 0.3103448275862069, 'f1': 0.3673469387755102}, 'combined': 0.24489795918367346, 'epoch': 6} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2642533988207765, 'r': 0.3093820627560135, 'f1': 0.2850425648119215}, 'combined': 0.21003136354562635, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32274736699326, 'r': 0.3286437131210215, 'f1': 0.3256688534024415}, 'combined': 0.22683402724548166, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21323529411764705, 'r': 0.4142857142857143, 'f1': 0.28155339805825247}, 'combined': 0.1877022653721683, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28632789500190714, 'r': 0.21624004214565282, 'f1': 0.24639676153677628}, 'combined': 0.1815555085007825, 'epoch': 4} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3345971660892728, 'r': 0.26806343565653556, 'f1': 0.2976576389530171}, 'combined': 0.20732372862399204, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32407407407407407, 'r': 0.3804347826086957, 'f1': 0.35000000000000003}, 'combined': 0.17500000000000002, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2762745365321701, 'r': 0.2747018162103551, 'f1': 0.2754859317656653}, 'combined': 0.2029896339325955, 'epoch': 6} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32108673446573915, 'r': 0.32818086980936256, 'f1': 0.3245950455786664}, 'combined': 0.22608610139807614, 'epoch': 6} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45, 'r': 0.3103448275862069, 'f1': 0.3673469387755102}, 'combined': 0.24489795918367346, 'epoch': 6} ****************************** Epoch: 7 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 17:28:34.758786: step: 2/470, loss: 0.3670995235443115 2023-01-22 17:28:35.530162: step: 4/470, loss: 0.4737393856048584 2023-01-22 17:28:36.294163: step: 6/470, loss: 0.24018116295337677 2023-01-22 17:28:36.983301: step: 8/470, loss: 2.259197473526001 2023-01-22 17:28:37.759651: step: 10/470, loss: 0.07207592576742172 2023-01-22 17:28:38.493675: step: 12/470, loss: 0.2164432555437088 2023-01-22 17:28:39.273141: step: 14/470, loss: 0.33616185188293457 2023-01-22 17:28:39.981182: step: 16/470, loss: 0.23780158162117004 2023-01-22 17:28:40.778192: step: 18/470, loss: 0.8281857371330261 2023-01-22 17:28:41.580598: step: 20/470, loss: 0.6098978519439697 2023-01-22 17:28:42.337455: step: 22/470, loss: 0.8348177671432495 2023-01-22 17:28:43.198921: step: 24/470, loss: 0.13545599579811096 2023-01-22 17:28:43.958569: step: 26/470, loss: 1.4059125185012817 2023-01-22 17:28:44.809526: step: 28/470, loss: 0.25741472840309143 2023-01-22 17:28:45.508043: step: 30/470, loss: 0.3914801776409149 2023-01-22 17:28:46.274396: step: 32/470, loss: 0.2636181116104126 2023-01-22 17:28:47.047884: step: 34/470, loss: 0.3287275433540344 2023-01-22 17:28:47.837126: step: 36/470, loss: 1.114621877670288 2023-01-22 17:28:48.566455: step: 38/470, loss: 0.4752127230167389 2023-01-22 17:28:49.296482: step: 40/470, loss: 0.4046996235847473 2023-01-22 17:28:49.993197: step: 42/470, loss: 1.4045734405517578 2023-01-22 17:28:50.789554: step: 44/470, loss: 0.22467926144599915 2023-01-22 17:28:51.627032: step: 46/470, loss: 0.46707257628440857 2023-01-22 17:28:52.372608: step: 48/470, loss: 0.8245423436164856 2023-01-22 17:28:53.144804: step: 50/470, loss: 0.4884158968925476 2023-01-22 17:28:53.920370: step: 52/470, loss: 0.38864484429359436 2023-01-22 17:28:54.780018: step: 54/470, loss: 0.3698347210884094 2023-01-22 17:28:55.506858: step: 56/470, loss: 0.5932155847549438 2023-01-22 17:28:56.247352: step: 58/470, loss: 0.5183910131454468 2023-01-22 17:28:57.014097: step: 60/470, loss: 0.20385155081748962 2023-01-22 17:28:57.746496: step: 62/470, loss: 0.35150226950645447 2023-01-22 17:28:58.552604: step: 64/470, loss: 2.04789400100708 2023-01-22 17:28:59.297508: step: 66/470, loss: 0.38406845927238464 2023-01-22 17:29:00.012038: step: 68/470, loss: 0.18193836510181427 2023-01-22 17:29:00.826085: step: 70/470, loss: 0.5214804410934448 2023-01-22 17:29:01.559719: step: 72/470, loss: 0.5263029336929321 2023-01-22 17:29:02.350498: step: 74/470, loss: 0.3935713469982147 2023-01-22 17:29:03.152198: step: 76/470, loss: 0.43086206912994385 2023-01-22 17:29:03.979799: step: 78/470, loss: 0.2726958990097046 2023-01-22 17:29:04.683322: step: 80/470, loss: 0.23460204899311066 2023-01-22 17:29:05.452025: step: 82/470, loss: 0.3521403968334198 2023-01-22 17:29:06.155238: step: 84/470, loss: 0.3516944646835327 2023-01-22 17:29:06.826677: step: 86/470, loss: 1.4504263401031494 2023-01-22 17:29:07.581712: step: 88/470, loss: 0.3307155668735504 2023-01-22 17:29:08.351193: step: 90/470, loss: 0.9473370313644409 2023-01-22 17:29:09.136651: step: 92/470, loss: 0.19111379981040955 2023-01-22 17:29:09.914034: step: 94/470, loss: 0.723115861415863 2023-01-22 17:29:10.715370: step: 96/470, loss: 0.24940860271453857 2023-01-22 17:29:11.497962: step: 98/470, loss: 0.276034414768219 2023-01-22 17:29:12.382860: step: 100/470, loss: 0.7983378171920776 2023-01-22 17:29:13.146050: step: 102/470, loss: 0.6610528826713562 2023-01-22 17:29:13.908370: step: 104/470, loss: 0.3130510449409485 2023-01-22 17:29:14.685806: step: 106/470, loss: 1.4086874723434448 2023-01-22 17:29:15.345958: step: 108/470, loss: 0.41898658871650696 2023-01-22 17:29:16.087597: step: 110/470, loss: 0.28429749608039856 2023-01-22 17:29:16.851785: step: 112/470, loss: 0.3643468916416168 2023-01-22 17:29:17.595146: step: 114/470, loss: 0.07166903465986252 2023-01-22 17:29:18.413126: step: 116/470, loss: 0.2408754527568817 2023-01-22 17:29:19.205621: step: 118/470, loss: 0.5108166933059692 2023-01-22 17:29:19.985431: step: 120/470, loss: 1.3073979616165161 2023-01-22 17:29:20.767761: step: 122/470, loss: 0.13918791711330414 2023-01-22 17:29:21.530181: step: 124/470, loss: 0.22650255262851715 2023-01-22 17:29:22.314222: step: 126/470, loss: 0.3875696361064911 2023-01-22 17:29:23.082257: step: 128/470, loss: 0.9161421060562134 2023-01-22 17:29:23.850934: step: 130/470, loss: 0.273093581199646 2023-01-22 17:29:24.719689: step: 132/470, loss: 0.5050501227378845 2023-01-22 17:29:25.472053: step: 134/470, loss: 0.2323817014694214 2023-01-22 17:29:26.170434: step: 136/470, loss: 0.4136822521686554 2023-01-22 17:29:26.928802: step: 138/470, loss: 0.6526037454605103 2023-01-22 17:29:27.682295: step: 140/470, loss: 0.17813269793987274 2023-01-22 17:29:28.457139: step: 142/470, loss: 0.4769699275493622 2023-01-22 17:29:29.227590: step: 144/470, loss: 0.40157580375671387 2023-01-22 17:29:30.038376: step: 146/470, loss: 0.8085362911224365 2023-01-22 17:29:30.801348: step: 148/470, loss: 0.33716344833374023 2023-01-22 17:29:31.586217: step: 150/470, loss: 1.0862269401550293 2023-01-22 17:29:32.392046: step: 152/470, loss: 0.640563428401947 2023-01-22 17:29:33.170873: step: 154/470, loss: 0.4485277235507965 2023-01-22 17:29:34.019400: step: 156/470, loss: 0.5932475328445435 2023-01-22 17:29:34.794724: step: 158/470, loss: 0.26284921169281006 2023-01-22 17:29:35.610381: step: 160/470, loss: 0.22903652489185333 2023-01-22 17:29:36.407011: step: 162/470, loss: 1.9001516103744507 2023-01-22 17:29:37.147135: step: 164/470, loss: 1.4668172597885132 2023-01-22 17:29:37.953074: step: 166/470, loss: 0.4874824285507202 2023-01-22 17:29:38.677683: step: 168/470, loss: 0.8743273019790649 2023-01-22 17:29:39.425591: step: 170/470, loss: 0.4101831614971161 2023-01-22 17:29:40.080954: step: 172/470, loss: 0.362973153591156 2023-01-22 17:29:40.821769: step: 174/470, loss: 0.43648961186408997 2023-01-22 17:29:41.664005: step: 176/470, loss: 0.13790124654769897 2023-01-22 17:29:42.424827: step: 178/470, loss: 0.08411405980587006 2023-01-22 17:29:43.167215: step: 180/470, loss: 0.24036382138729095 2023-01-22 17:29:44.003655: step: 182/470, loss: 0.6687015891075134 2023-01-22 17:29:44.789149: step: 184/470, loss: 0.35721492767333984 2023-01-22 17:29:45.624154: step: 186/470, loss: 0.48564645648002625 2023-01-22 17:29:46.355625: step: 188/470, loss: 1.280880331993103 2023-01-22 17:29:47.078380: step: 190/470, loss: 0.5749147534370422 2023-01-22 17:29:47.898765: step: 192/470, loss: 3.5238230228424072 2023-01-22 17:29:48.747914: step: 194/470, loss: 0.6828189492225647 2023-01-22 17:29:49.601406: step: 196/470, loss: 1.2112079858779907 2023-01-22 17:29:50.337548: step: 198/470, loss: 0.612357497215271 2023-01-22 17:29:51.096072: step: 200/470, loss: 0.1848372370004654 2023-01-22 17:29:51.854803: step: 202/470, loss: 0.24752122163772583 2023-01-22 17:29:52.636790: step: 204/470, loss: 0.7347160577774048 2023-01-22 17:29:53.378555: step: 206/470, loss: 0.28709715604782104 2023-01-22 17:29:54.117016: step: 208/470, loss: 1.068163514137268 2023-01-22 17:29:54.886295: step: 210/470, loss: 0.9789504408836365 2023-01-22 17:29:55.690232: step: 212/470, loss: 0.8234558701515198 2023-01-22 17:29:56.474958: step: 214/470, loss: 1.2307389974594116 2023-01-22 17:29:57.215733: step: 216/470, loss: 0.13372446596622467 2023-01-22 17:29:57.902342: step: 218/470, loss: 0.11340329051017761 2023-01-22 17:29:58.661850: step: 220/470, loss: 0.5686858892440796 2023-01-22 17:29:59.392851: step: 222/470, loss: 0.3490334451198578 2023-01-22 17:30:00.302936: step: 224/470, loss: 0.43614357709884644 2023-01-22 17:30:01.059780: step: 226/470, loss: 0.44121402502059937 2023-01-22 17:30:01.894474: step: 228/470, loss: 0.9783852100372314 2023-01-22 17:30:02.703917: step: 230/470, loss: 0.8979207873344421 2023-01-22 17:30:03.435802: step: 232/470, loss: 0.3936818242073059 2023-01-22 17:30:04.117487: step: 234/470, loss: 0.7134736776351929 2023-01-22 17:30:04.862443: step: 236/470, loss: 0.35246771574020386 2023-01-22 17:30:05.633591: step: 238/470, loss: 1.3471710681915283 2023-01-22 17:30:06.407451: step: 240/470, loss: 0.5043021440505981 2023-01-22 17:30:07.123812: step: 242/470, loss: 1.0671354532241821 2023-01-22 17:30:07.875521: step: 244/470, loss: 0.628767728805542 2023-01-22 17:30:08.685041: step: 246/470, loss: 0.5917458534240723 2023-01-22 17:30:09.399022: step: 248/470, loss: 0.9961802363395691 2023-01-22 17:30:10.177933: step: 250/470, loss: 0.8598777651786804 2023-01-22 17:30:10.987026: step: 252/470, loss: 0.2611555755138397 2023-01-22 17:30:11.727509: step: 254/470, loss: 0.2617139518260956 2023-01-22 17:30:12.506633: step: 256/470, loss: 1.007016897201538 2023-01-22 17:30:13.247233: step: 258/470, loss: 0.6046254634857178 2023-01-22 17:30:14.046399: step: 260/470, loss: 0.4044310450553894 2023-01-22 17:30:14.753850: step: 262/470, loss: 1.1649810075759888 2023-01-22 17:30:15.500335: step: 264/470, loss: 0.6738048195838928 2023-01-22 17:30:16.204541: step: 266/470, loss: 1.02718186378479 2023-01-22 17:30:16.933813: step: 268/470, loss: 1.9333301782608032 2023-01-22 17:30:17.908004: step: 270/470, loss: 0.3444717824459076 2023-01-22 17:30:18.672673: step: 272/470, loss: 1.909464955329895 2023-01-22 17:30:19.484778: step: 274/470, loss: 0.5475513935089111 2023-01-22 17:30:20.244364: step: 276/470, loss: 0.24307137727737427 2023-01-22 17:30:21.016085: step: 278/470, loss: 0.22565793991088867 2023-01-22 17:30:21.707170: step: 280/470, loss: 0.11121651530265808 2023-01-22 17:30:22.585437: step: 282/470, loss: 0.6998484134674072 2023-01-22 17:30:23.321995: step: 284/470, loss: 0.8299077749252319 2023-01-22 17:30:24.017172: step: 286/470, loss: 0.19652137160301208 2023-01-22 17:30:24.806418: step: 288/470, loss: 0.29388874769210815 2023-01-22 17:30:25.558893: step: 290/470, loss: 0.4312860369682312 2023-01-22 17:30:26.297328: step: 292/470, loss: 0.6240105032920837 2023-01-22 17:30:27.042997: step: 294/470, loss: 0.16214123368263245 2023-01-22 17:30:27.798879: step: 296/470, loss: 0.5396556854248047 2023-01-22 17:30:28.522439: step: 298/470, loss: 0.5401109457015991 2023-01-22 17:30:29.258436: step: 300/470, loss: 0.9766262769699097 2023-01-22 17:30:29.980889: step: 302/470, loss: 0.8032498359680176 2023-01-22 17:30:30.836099: step: 304/470, loss: 0.5323628783226013 2023-01-22 17:30:31.602973: step: 306/470, loss: 0.35838931798934937 2023-01-22 17:30:32.450756: step: 308/470, loss: 0.15917706489562988 2023-01-22 17:30:33.288353: step: 310/470, loss: 0.825878381729126 2023-01-22 17:30:34.083721: step: 312/470, loss: 0.5406302213668823 2023-01-22 17:30:34.856515: step: 314/470, loss: 0.8202871680259705 2023-01-22 17:30:35.559146: step: 316/470, loss: 0.14145931601524353 2023-01-22 17:30:36.294022: step: 318/470, loss: 1.2496742010116577 2023-01-22 17:30:37.012914: step: 320/470, loss: 0.45300769805908203 2023-01-22 17:30:37.716602: step: 322/470, loss: 0.2567143440246582 2023-01-22 17:30:38.438574: step: 324/470, loss: 0.3849056363105774 2023-01-22 17:30:39.278738: step: 326/470, loss: 0.17785045504570007 2023-01-22 17:30:40.022594: step: 328/470, loss: 0.5878977179527283 2023-01-22 17:30:40.722546: step: 330/470, loss: 0.3370095491409302 2023-01-22 17:30:41.425132: step: 332/470, loss: 0.2186794877052307 2023-01-22 17:30:42.218069: step: 334/470, loss: 0.30272671580314636 2023-01-22 17:30:42.955914: step: 336/470, loss: 0.4330812394618988 2023-01-22 17:30:43.704181: step: 338/470, loss: 0.1457645744085312 2023-01-22 17:30:44.405437: step: 340/470, loss: 0.5812200307846069 2023-01-22 17:30:45.078636: step: 342/470, loss: 0.4354366958141327 2023-01-22 17:30:45.864735: step: 344/470, loss: 1.0845625400543213 2023-01-22 17:30:46.621058: step: 346/470, loss: 0.5721340775489807 2023-01-22 17:30:47.357077: step: 348/470, loss: 0.5678675174713135 2023-01-22 17:30:48.148526: step: 350/470, loss: 0.46091604232788086 2023-01-22 17:30:48.955057: step: 352/470, loss: 1.013948678970337 2023-01-22 17:30:49.714521: step: 354/470, loss: 0.4761132597923279 2023-01-22 17:30:50.459314: step: 356/470, loss: 0.34836894273757935 2023-01-22 17:30:51.187792: step: 358/470, loss: 0.24478018283843994 2023-01-22 17:30:51.919540: step: 360/470, loss: 2.375002384185791 2023-01-22 17:30:52.607521: step: 362/470, loss: 0.3373410105705261 2023-01-22 17:30:53.339820: step: 364/470, loss: 0.435524046421051 2023-01-22 17:30:54.136291: step: 366/470, loss: 0.9048151969909668 2023-01-22 17:30:54.898983: step: 368/470, loss: 0.6413469314575195 2023-01-22 17:30:55.679779: step: 370/470, loss: 0.27448025345802307 2023-01-22 17:30:56.507927: step: 372/470, loss: 1.3750803470611572 2023-01-22 17:30:57.306191: step: 374/470, loss: 0.48742660880088806 2023-01-22 17:30:58.078276: step: 376/470, loss: 0.8035762310028076 2023-01-22 17:30:58.856536: step: 378/470, loss: 0.5545987486839294 2023-01-22 17:30:59.603723: step: 380/470, loss: 0.28187888860702515 2023-01-22 17:31:00.309114: step: 382/470, loss: 0.30163049697875977 2023-01-22 17:31:01.087180: step: 384/470, loss: 0.9044409990310669 2023-01-22 17:31:01.825014: step: 386/470, loss: 1.142310619354248 2023-01-22 17:31:02.576397: step: 388/470, loss: 1.2953393459320068 2023-01-22 17:31:03.316028: step: 390/470, loss: 0.15360940992832184 2023-01-22 17:31:04.082431: step: 392/470, loss: 0.46768492460250854 2023-01-22 17:31:04.839253: step: 394/470, loss: 0.44785362482070923 2023-01-22 17:31:05.587517: step: 396/470, loss: 0.46685466170310974 2023-01-22 17:31:06.381468: step: 398/470, loss: 0.939845085144043 2023-01-22 17:31:07.086700: step: 400/470, loss: 0.7884602546691895 2023-01-22 17:31:07.851276: step: 402/470, loss: 1.0477254390716553 2023-01-22 17:31:08.630328: step: 404/470, loss: 0.7993929982185364 2023-01-22 17:31:09.410035: step: 406/470, loss: 0.8812851905822754 2023-01-22 17:31:10.133691: step: 408/470, loss: 0.2269768863916397 2023-01-22 17:31:10.822087: step: 410/470, loss: 1.8129310607910156 2023-01-22 17:31:11.568577: step: 412/470, loss: 0.3134598731994629 2023-01-22 17:31:12.299907: step: 414/470, loss: 0.1766653060913086 2023-01-22 17:31:13.067475: step: 416/470, loss: 0.9808623194694519 2023-01-22 17:31:13.910958: step: 418/470, loss: 0.9330800175666809 2023-01-22 17:31:14.660175: step: 420/470, loss: 0.4644604027271271 2023-01-22 17:31:15.454638: step: 422/470, loss: 0.5063079595565796 2023-01-22 17:31:16.217579: step: 424/470, loss: 0.5117875933647156 2023-01-22 17:31:16.928015: step: 426/470, loss: 0.3270796537399292 2023-01-22 17:31:17.637862: step: 428/470, loss: 0.7015947103500366 2023-01-22 17:31:18.406876: step: 430/470, loss: 0.3183510899543762 2023-01-22 17:31:19.128352: step: 432/470, loss: 0.8972214460372925 2023-01-22 17:31:19.905129: step: 434/470, loss: 0.9476366639137268 2023-01-22 17:31:20.608658: step: 436/470, loss: 0.8914029002189636 2023-01-22 17:31:21.340525: step: 438/470, loss: 0.702813446521759 2023-01-22 17:31:22.080673: step: 440/470, loss: 1.0427266359329224 2023-01-22 17:31:22.919600: step: 442/470, loss: 0.3731694221496582 2023-01-22 17:31:23.615420: step: 444/470, loss: 0.3855333924293518 2023-01-22 17:31:24.481871: step: 446/470, loss: 0.382285475730896 2023-01-22 17:31:25.169686: step: 448/470, loss: 0.3315264880657196 2023-01-22 17:31:26.028203: step: 450/470, loss: 0.6684935092926025 2023-01-22 17:31:26.927522: step: 452/470, loss: 0.30676472187042236 2023-01-22 17:31:27.722017: step: 454/470, loss: 0.5190876126289368 2023-01-22 17:31:28.550448: step: 456/470, loss: 0.26106128096580505 2023-01-22 17:31:29.316983: step: 458/470, loss: 1.2300512790679932 2023-01-22 17:31:30.192260: step: 460/470, loss: 0.28473010659217834 2023-01-22 17:31:30.935565: step: 462/470, loss: 0.5453563332557678 2023-01-22 17:31:31.704082: step: 464/470, loss: 0.6540701985359192 2023-01-22 17:31:32.396543: step: 466/470, loss: 0.8440691232681274 2023-01-22 17:31:33.264413: step: 468/470, loss: 0.5105758309364319 2023-01-22 17:31:34.092419: step: 470/470, loss: 0.20678004622459412 2023-01-22 17:31:34.866850: step: 472/470, loss: 0.45606935024261475 2023-01-22 17:31:35.590103: step: 474/470, loss: 1.512797236442566 2023-01-22 17:31:36.454555: step: 476/470, loss: 0.2980559766292572 2023-01-22 17:31:37.217573: step: 478/470, loss: 0.6002671718597412 2023-01-22 17:31:37.920692: step: 480/470, loss: 0.27965351939201355 2023-01-22 17:31:38.666998: step: 482/470, loss: 0.5044351816177368 2023-01-22 17:31:39.415828: step: 484/470, loss: 0.1443350464105606 2023-01-22 17:31:40.111567: step: 486/470, loss: 2.0403623580932617 2023-01-22 17:31:40.865230: step: 488/470, loss: 0.4997430443763733 2023-01-22 17:31:41.620043: step: 490/470, loss: 0.14253254234790802 2023-01-22 17:31:42.371104: step: 492/470, loss: 1.0414563417434692 2023-01-22 17:31:43.106788: step: 494/470, loss: 2.314917802810669 2023-01-22 17:31:43.887688: step: 496/470, loss: 0.8212421536445618 2023-01-22 17:31:44.610699: step: 498/470, loss: 0.17680610716342926 2023-01-22 17:31:45.321544: step: 500/470, loss: 0.41409242153167725 2023-01-22 17:31:46.077921: step: 502/470, loss: 0.43370938301086426 2023-01-22 17:31:46.840239: step: 504/470, loss: 0.8938826322555542 2023-01-22 17:31:47.595123: step: 506/470, loss: 0.12901009619235992 2023-01-22 17:31:48.363898: step: 508/470, loss: 0.2584211528301239 2023-01-22 17:31:49.179522: step: 510/470, loss: 0.46783918142318726 2023-01-22 17:31:49.972861: step: 512/470, loss: 0.5496209263801575 2023-01-22 17:31:50.732506: step: 514/470, loss: 0.3696674108505249 2023-01-22 17:31:51.532392: step: 516/470, loss: 0.31152698397636414 2023-01-22 17:31:52.305665: step: 518/470, loss: 0.3273497223854065 2023-01-22 17:31:53.033782: step: 520/470, loss: 1.6968348026275635 2023-01-22 17:31:53.743886: step: 522/470, loss: 0.9195625185966492 2023-01-22 17:31:54.442385: step: 524/470, loss: 0.1977846920490265 2023-01-22 17:31:55.258397: step: 526/470, loss: 0.2801767587661743 2023-01-22 17:31:55.966108: step: 528/470, loss: 0.9938501715660095 2023-01-22 17:31:56.700429: step: 530/470, loss: 0.26219820976257324 2023-01-22 17:31:57.539218: step: 532/470, loss: 0.6336964964866638 2023-01-22 17:31:58.289794: step: 534/470, loss: 2.7305359840393066 2023-01-22 17:31:59.092347: step: 536/470, loss: 0.3516845703125 2023-01-22 17:31:59.799642: step: 538/470, loss: 0.5453897714614868 2023-01-22 17:32:00.586868: step: 540/470, loss: 0.6060112118721008 2023-01-22 17:32:01.323849: step: 542/470, loss: 0.27010199427604675 2023-01-22 17:32:02.024166: step: 544/470, loss: 2.1664516925811768 2023-01-22 17:32:02.800964: step: 546/470, loss: 0.24300040304660797 2023-01-22 17:32:03.541635: step: 548/470, loss: 0.11754722148180008 2023-01-22 17:32:04.369457: step: 550/470, loss: 0.48181718587875366 2023-01-22 17:32:05.127107: step: 552/470, loss: 0.7739644050598145 2023-01-22 17:32:05.896213: step: 554/470, loss: 0.6491574048995972 2023-01-22 17:32:06.656186: step: 556/470, loss: 0.49489882588386536 2023-01-22 17:32:07.402145: step: 558/470, loss: 1.208168625831604 2023-01-22 17:32:08.156738: step: 560/470, loss: 0.798096239566803 2023-01-22 17:32:08.868497: step: 562/470, loss: 0.31834012269973755 2023-01-22 17:32:09.670145: step: 564/470, loss: 2.36625337600708 2023-01-22 17:32:10.456093: step: 566/470, loss: 0.34011802077293396 2023-01-22 17:32:11.214779: step: 568/470, loss: 0.841728687286377 2023-01-22 17:32:11.999841: step: 570/470, loss: 0.7265416383743286 2023-01-22 17:32:12.744430: step: 572/470, loss: 0.9253761768341064 2023-01-22 17:32:13.620574: step: 574/470, loss: 0.23297470808029175 2023-01-22 17:32:14.336818: step: 576/470, loss: 1.5338727235794067 2023-01-22 17:32:15.015035: step: 578/470, loss: 1.0285563468933105 2023-01-22 17:32:15.732913: step: 580/470, loss: 0.3040406107902527 2023-01-22 17:32:16.400307: step: 582/470, loss: 1.5267548561096191 2023-01-22 17:32:17.113934: step: 584/470, loss: 0.6100663542747498 2023-01-22 17:32:17.824939: step: 586/470, loss: 1.353019118309021 2023-01-22 17:32:18.589089: step: 588/470, loss: 0.6592656970024109 2023-01-22 17:32:19.328450: step: 590/470, loss: 0.1781897246837616 2023-01-22 17:32:20.136727: step: 592/470, loss: 0.27507659792900085 2023-01-22 17:32:20.848605: step: 594/470, loss: 0.3861004710197449 2023-01-22 17:32:21.690034: step: 596/470, loss: 0.4841284453868866 2023-01-22 17:32:22.439593: step: 598/470, loss: 3.2577338218688965 2023-01-22 17:32:23.207142: step: 600/470, loss: 0.2953993082046509 2023-01-22 17:32:24.015632: step: 602/470, loss: 0.5135447382926941 2023-01-22 17:32:24.769931: step: 604/470, loss: 0.3044143617153168 2023-01-22 17:32:25.621100: step: 606/470, loss: 0.5463592410087585 2023-01-22 17:32:26.378792: step: 608/470, loss: 0.32110247015953064 2023-01-22 17:32:27.137003: step: 610/470, loss: 0.3176352381706238 2023-01-22 17:32:27.919858: step: 612/470, loss: 0.2889813184738159 2023-01-22 17:32:28.692143: step: 614/470, loss: 0.11909591406583786 2023-01-22 17:32:29.450953: step: 616/470, loss: 0.6773265600204468 2023-01-22 17:32:30.357467: step: 618/470, loss: 0.5883302092552185 2023-01-22 17:32:31.091979: step: 620/470, loss: 0.2453279346227646 2023-01-22 17:32:31.795537: step: 622/470, loss: 0.7039670944213867 2023-01-22 17:32:32.564621: step: 624/470, loss: 0.38650938868522644 2023-01-22 17:32:33.308359: step: 626/470, loss: 0.34423843026161194 2023-01-22 17:32:34.034597: step: 628/470, loss: 0.939646303653717 2023-01-22 17:32:34.836163: step: 630/470, loss: 0.3436138927936554 2023-01-22 17:32:35.599073: step: 632/470, loss: 1.440124750137329 2023-01-22 17:32:36.417002: step: 634/470, loss: 0.5489599704742432 2023-01-22 17:32:37.106226: step: 636/470, loss: 1.5086581707000732 2023-01-22 17:32:37.877233: step: 638/470, loss: 0.39192065596580505 2023-01-22 17:32:38.621624: step: 640/470, loss: 1.1574674844741821 2023-01-22 17:32:39.365807: step: 642/470, loss: 1.1345126628875732 2023-01-22 17:32:40.257137: step: 644/470, loss: 0.14073410630226135 2023-01-22 17:32:41.061842: step: 646/470, loss: 0.8759070038795471 2023-01-22 17:32:41.815560: step: 648/470, loss: 0.3100970983505249 2023-01-22 17:32:42.498909: step: 650/470, loss: 0.4851211607456207 2023-01-22 17:32:43.252621: step: 652/470, loss: 0.8310330510139465 2023-01-22 17:32:43.964206: step: 654/470, loss: 0.20696614682674408 2023-01-22 17:32:44.863677: step: 656/470, loss: 0.4490260183811188 2023-01-22 17:32:45.573399: step: 658/470, loss: 0.7504600286483765 2023-01-22 17:32:46.378331: step: 660/470, loss: 9.698339462280273 2023-01-22 17:32:47.163438: step: 662/470, loss: 0.3065306544303894 2023-01-22 17:32:47.966730: step: 664/470, loss: 0.46533751487731934 2023-01-22 17:32:48.777234: step: 666/470, loss: 0.24716587364673615 2023-01-22 17:32:49.591335: step: 668/470, loss: 0.4091520309448242 2023-01-22 17:32:50.339119: step: 670/470, loss: 0.44841229915618896 2023-01-22 17:32:51.106046: step: 672/470, loss: 0.17277362942695618 2023-01-22 17:32:51.872398: step: 674/470, loss: 0.3857150375843048 2023-01-22 17:32:52.726895: step: 676/470, loss: 0.36999809741973877 2023-01-22 17:32:53.489651: step: 678/470, loss: 0.21060891449451447 2023-01-22 17:32:54.197756: step: 680/470, loss: 0.23017674684524536 2023-01-22 17:32:54.894250: step: 682/470, loss: 0.40644142031669617 2023-01-22 17:32:55.651259: step: 684/470, loss: 0.6531462073326111 2023-01-22 17:32:56.411436: step: 686/470, loss: 0.062062717974185944 2023-01-22 17:32:57.236378: step: 688/470, loss: 0.45378008484840393 2023-01-22 17:32:58.010259: step: 690/470, loss: 0.6536173224449158 2023-01-22 17:32:58.773602: step: 692/470, loss: 0.8225758075714111 2023-01-22 17:32:59.548151: step: 694/470, loss: 0.27153313159942627 2023-01-22 17:33:00.279338: step: 696/470, loss: 0.19550104439258575 2023-01-22 17:33:00.983982: step: 698/470, loss: 0.28255999088287354 2023-01-22 17:33:01.770904: step: 700/470, loss: 0.9339784979820251 2023-01-22 17:33:02.513500: step: 702/470, loss: 0.9983887672424316 2023-01-22 17:33:03.298502: step: 704/470, loss: 0.7024455070495605 2023-01-22 17:33:04.067298: step: 706/470, loss: 0.24352699518203735 2023-01-22 17:33:04.833185: step: 708/470, loss: 0.8246220350265503 2023-01-22 17:33:05.619200: step: 710/470, loss: 0.5286746621131897 2023-01-22 17:33:06.305166: step: 712/470, loss: 0.4412599802017212 2023-01-22 17:33:07.096055: step: 714/470, loss: 0.7559401988983154 2023-01-22 17:33:07.818684: step: 716/470, loss: 0.6777728796005249 2023-01-22 17:33:08.592169: step: 718/470, loss: 0.5251533389091492 2023-01-22 17:33:09.469653: step: 720/470, loss: 0.5137686729431152 2023-01-22 17:33:10.213300: step: 722/470, loss: 0.2733078598976135 2023-01-22 17:33:11.009210: step: 724/470, loss: 0.5248976349830627 2023-01-22 17:33:11.730299: step: 726/470, loss: 0.9532487988471985 2023-01-22 17:33:12.476168: step: 728/470, loss: 0.1258946657180786 2023-01-22 17:33:13.368453: step: 730/470, loss: 0.9458543658256531 2023-01-22 17:33:14.152992: step: 732/470, loss: 1.3671302795410156 2023-01-22 17:33:14.892107: step: 734/470, loss: 0.15608762204647064 2023-01-22 17:33:15.670709: step: 736/470, loss: 0.7429428100585938 2023-01-22 17:33:16.395406: step: 738/470, loss: 0.2898152768611908 2023-01-22 17:33:17.164086: step: 740/470, loss: 1.8488043546676636 2023-01-22 17:33:17.913271: step: 742/470, loss: 0.16220681369304657 2023-01-22 17:33:18.664673: step: 744/470, loss: 0.5282084345817566 2023-01-22 17:33:19.496453: step: 746/470, loss: 0.7009122371673584 2023-01-22 17:33:20.243599: step: 748/470, loss: 0.4774022698402405 2023-01-22 17:33:20.971345: step: 750/470, loss: 0.8804485201835632 2023-01-22 17:33:21.803343: step: 752/470, loss: 0.2241060435771942 2023-01-22 17:33:22.555940: step: 754/470, loss: 0.3125775158405304 2023-01-22 17:33:23.270345: step: 756/470, loss: 3.320235013961792 2023-01-22 17:33:24.053119: step: 758/470, loss: 0.3262557089328766 2023-01-22 17:33:24.807527: step: 760/470, loss: 0.7048736214637756 2023-01-22 17:33:25.544020: step: 762/470, loss: 0.39851272106170654 2023-01-22 17:33:26.351647: step: 764/470, loss: 0.2439843863248825 2023-01-22 17:33:27.103136: step: 766/470, loss: 0.309922993183136 2023-01-22 17:33:27.793193: step: 768/470, loss: 0.19634529948234558 2023-01-22 17:33:28.554003: step: 770/470, loss: 0.46125590801239014 2023-01-22 17:33:29.357719: step: 772/470, loss: 0.9932135343551636 2023-01-22 17:33:30.100410: step: 774/470, loss: 0.3272228240966797 2023-01-22 17:33:30.902881: step: 776/470, loss: 0.6332528591156006 2023-01-22 17:33:31.663301: step: 778/470, loss: 0.6290199160575867 2023-01-22 17:33:32.417574: step: 780/470, loss: 0.17212772369384766 2023-01-22 17:33:33.242167: step: 782/470, loss: 0.4054405689239502 2023-01-22 17:33:33.962500: step: 784/470, loss: 0.21502184867858887 2023-01-22 17:33:34.756630: step: 786/470, loss: 0.2917923927307129 2023-01-22 17:33:35.537541: step: 788/470, loss: 0.39834240078926086 2023-01-22 17:33:36.276334: step: 790/470, loss: 0.7714516520500183 2023-01-22 17:33:36.991298: step: 792/470, loss: 1.9584661722183228 2023-01-22 17:33:37.769071: step: 794/470, loss: 1.415565013885498 2023-01-22 17:33:38.516457: step: 796/470, loss: 0.8934961557388306 2023-01-22 17:33:39.488791: step: 798/470, loss: 0.19620083272457123 2023-01-22 17:33:40.204897: step: 800/470, loss: 1.1191173791885376 2023-01-22 17:33:41.001211: step: 802/470, loss: 0.15313296020030975 2023-01-22 17:33:41.770085: step: 804/470, loss: 0.5882033109664917 2023-01-22 17:33:42.511172: step: 806/470, loss: 0.44391781091690063 2023-01-22 17:33:43.295992: step: 808/470, loss: 0.5112501382827759 2023-01-22 17:33:44.064654: step: 810/470, loss: 0.6775764226913452 2023-01-22 17:33:44.807214: step: 812/470, loss: 0.6724539399147034 2023-01-22 17:33:45.613912: step: 814/470, loss: 0.7998963594436646 2023-01-22 17:33:46.309355: step: 816/470, loss: 0.40762490034103394 2023-01-22 17:33:47.068389: step: 818/470, loss: 0.32030996680259705 2023-01-22 17:33:47.802209: step: 820/470, loss: 0.15251174569129944 2023-01-22 17:33:48.534923: step: 822/470, loss: 0.2601527273654938 2023-01-22 17:33:49.366330: step: 824/470, loss: 0.21793058514595032 2023-01-22 17:33:50.194451: step: 826/470, loss: 0.7894784808158875 2023-01-22 17:33:50.947985: step: 828/470, loss: 0.30823183059692383 2023-01-22 17:33:51.696811: step: 830/470, loss: 0.25014978647232056 2023-01-22 17:33:52.487806: step: 832/470, loss: 0.3014823794364929 2023-01-22 17:33:53.273690: step: 834/470, loss: 0.16695991158485413 2023-01-22 17:33:54.017159: step: 836/470, loss: 0.4740227162837982 2023-01-22 17:33:54.800123: step: 838/470, loss: 0.6113675236701965 2023-01-22 17:33:55.506340: step: 840/470, loss: 0.5166838765144348 2023-01-22 17:33:56.284470: step: 842/470, loss: 0.8357441425323486 2023-01-22 17:33:57.035956: step: 844/470, loss: 0.07860533893108368 2023-01-22 17:33:57.838085: step: 846/470, loss: 0.90674889087677 2023-01-22 17:33:58.647169: step: 848/470, loss: 0.928524374961853 2023-01-22 17:33:59.434612: step: 850/470, loss: 0.3567863702774048 2023-01-22 17:34:00.182779: step: 852/470, loss: 0.1565251648426056 2023-01-22 17:34:00.928130: step: 854/470, loss: 0.9612409472465515 2023-01-22 17:34:01.697576: step: 856/470, loss: 0.7247118353843689 2023-01-22 17:34:02.441320: step: 858/470, loss: 0.47365325689315796 2023-01-22 17:34:03.175599: step: 860/470, loss: 0.716667652130127 2023-01-22 17:34:03.923682: step: 862/470, loss: 0.808570384979248 2023-01-22 17:34:04.707759: step: 864/470, loss: 0.3732510507106781 2023-01-22 17:34:05.545818: step: 866/470, loss: 0.23475761711597443 2023-01-22 17:34:06.290446: step: 868/470, loss: 0.642833948135376 2023-01-22 17:34:07.012235: step: 870/470, loss: 0.1018008217215538 2023-01-22 17:34:07.769990: step: 872/470, loss: 0.676296591758728 2023-01-22 17:34:08.507449: step: 874/470, loss: 1.0268975496292114 2023-01-22 17:34:09.258394: step: 876/470, loss: 0.5832228064537048 2023-01-22 17:34:09.941955: step: 878/470, loss: 0.17225563526153564 2023-01-22 17:34:10.689010: step: 880/470, loss: 0.3411220908164978 2023-01-22 17:34:11.517244: step: 882/470, loss: 0.9757616519927979 2023-01-22 17:34:12.213346: step: 884/470, loss: 0.26120713353157043 2023-01-22 17:34:12.955928: step: 886/470, loss: 1.29201340675354 2023-01-22 17:34:13.651924: step: 888/470, loss: 0.48865917325019836 2023-01-22 17:34:14.456383: step: 890/470, loss: 0.9293355345726013 2023-01-22 17:34:15.201836: step: 892/470, loss: 0.2673357129096985 2023-01-22 17:34:15.921039: step: 894/470, loss: 0.4621124565601349 2023-01-22 17:34:16.729967: step: 896/470, loss: 0.4767850935459137 2023-01-22 17:34:17.466702: step: 898/470, loss: 0.9976144433021545 2023-01-22 17:34:18.192144: step: 900/470, loss: 0.17807906866073608 2023-01-22 17:34:19.062215: step: 902/470, loss: 0.3325049877166748 2023-01-22 17:34:19.859914: step: 904/470, loss: 0.32338327169418335 2023-01-22 17:34:20.574828: step: 906/470, loss: 0.3096674680709839 2023-01-22 17:34:21.391160: step: 908/470, loss: 1.2738778591156006 2023-01-22 17:34:22.146806: step: 910/470, loss: 0.6537337899208069 2023-01-22 17:34:22.901515: step: 912/470, loss: 0.7893470525741577 2023-01-22 17:34:23.715500: step: 914/470, loss: 0.5990859270095825 2023-01-22 17:34:24.406572: step: 916/470, loss: 0.527148425579071 2023-01-22 17:34:25.205772: step: 918/470, loss: 0.2752528786659241 2023-01-22 17:34:25.884656: step: 920/470, loss: 0.40670332312583923 2023-01-22 17:34:26.644309: step: 922/470, loss: 0.9234625101089478 2023-01-22 17:34:27.421243: step: 924/470, loss: 0.8040624856948853 2023-01-22 17:34:28.168043: step: 926/470, loss: 0.4370003640651703 2023-01-22 17:34:28.877256: step: 928/470, loss: 0.2960697412490845 2023-01-22 17:34:29.609942: step: 930/470, loss: 1.2495319843292236 2023-01-22 17:34:30.380660: step: 932/470, loss: 0.8773002624511719 2023-01-22 17:34:31.179562: step: 934/470, loss: 0.43924665451049805 2023-01-22 17:34:31.944548: step: 936/470, loss: 0.423836350440979 2023-01-22 17:34:32.752086: step: 938/470, loss: 0.36395108699798584 2023-01-22 17:34:33.598323: step: 940/470, loss: 0.4329916536808014 2023-01-22 17:34:34.263372: step: 942/470, loss: 0.15708792209625244 ================================================== Loss: 0.632 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2656935345053647, 'r': 0.3241763618348188, 'f1': 0.29203579946487096}, 'combined': 0.21518427328990491, 'epoch': 7} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32047864894407607, 'r': 0.339584106861896, 'f1': 0.32975487501061795}, 'combined': 0.22968001244520656, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26474047961800956, 'r': 0.3240182340675829, 'f1': 0.29139523780480575}, 'combined': 0.2147122804877516, 'epoch': 7} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32541475117674507, 'r': 0.34573363573629207, 'f1': 0.33526661835256644}, 'combined': 0.23351903765850401, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.24530443812582606, 'r': 0.33095152847715814, 'f1': 0.2817632560702138}, 'combined': 0.20761503078857857, 'epoch': 7} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.31375791500059286, 'r': 0.35297765437566697, 'f1': 0.33221426294180423}, 'combined': 0.23139301896444078, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21774193548387097, 'r': 0.38571428571428573, 'f1': 0.27835051546391754}, 'combined': 0.18556701030927836, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2722222222222222, 'r': 0.532608695652174, 'f1': 0.36029411764705876}, 'combined': 0.18014705882352938, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.38333333333333336, 'r': 0.19827586206896552, 'f1': 0.2613636363636364}, 'combined': 0.17424242424242425, 'epoch': 7} New best chinese model... New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2656935345053647, 'r': 0.3241763618348188, 'f1': 0.29203579946487096}, 'combined': 0.21518427328990491, 'epoch': 7} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32047864894407607, 'r': 0.339584106861896, 'f1': 0.32975487501061795}, 'combined': 0.22968001244520656, 'epoch': 7} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21774193548387097, 'r': 0.38571428571428573, 'f1': 0.27835051546391754}, 'combined': 0.18556701030927836, 'epoch': 7} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26474047961800956, 'r': 0.3240182340675829, 'f1': 0.29139523780480575}, 'combined': 0.2147122804877516, 'epoch': 7} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32541475117674507, 'r': 0.34573363573629207, 'f1': 0.33526661835256644}, 'combined': 0.23351903765850401, 'epoch': 7} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2722222222222222, 'r': 0.532608695652174, 'f1': 0.36029411764705876}, 'combined': 0.18014705882352938, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2762745365321701, 'r': 0.2747018162103551, 'f1': 0.2754859317656653}, 'combined': 0.2029896339325955, 'epoch': 6} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32108673446573915, 'r': 0.32818086980936256, 'f1': 0.3245950455786664}, 'combined': 0.22608610139807614, 'epoch': 6} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45, 'r': 0.3103448275862069, 'f1': 0.3673469387755102}, 'combined': 0.24489795918367346, 'epoch': 6} ****************************** Epoch: 8 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 17:37:31.372698: step: 2/470, loss: 0.439403235912323 2023-01-22 17:37:32.183716: step: 4/470, loss: 0.4706476330757141 2023-01-22 17:37:32.960572: step: 6/470, loss: 0.16600431501865387 2023-01-22 17:37:33.706239: step: 8/470, loss: 0.3319374620914459 2023-01-22 17:37:34.398690: step: 10/470, loss: 0.3293440639972687 2023-01-22 17:37:35.169656: step: 12/470, loss: 0.5663127899169922 2023-01-22 17:37:35.932189: step: 14/470, loss: 0.45440736413002014 2023-01-22 17:37:36.746546: step: 16/470, loss: 0.5999109745025635 2023-01-22 17:37:37.506511: step: 18/470, loss: 0.3009253442287445 2023-01-22 17:37:38.190912: step: 20/470, loss: 0.1967972069978714 2023-01-22 17:37:38.947222: step: 22/470, loss: 0.26259857416152954 2023-01-22 17:37:39.684534: step: 24/470, loss: 0.492567241191864 2023-01-22 17:37:40.413991: step: 26/470, loss: 0.24835701286792755 2023-01-22 17:37:41.156542: step: 28/470, loss: 0.11328727006912231 2023-01-22 17:37:41.913062: step: 30/470, loss: 0.38536766171455383 2023-01-22 17:37:42.775330: step: 32/470, loss: 0.14614824950695038 2023-01-22 17:37:43.593310: step: 34/470, loss: 0.8179748058319092 2023-01-22 17:37:44.364839: step: 36/470, loss: 0.6151262521743774 2023-01-22 17:37:45.104004: step: 38/470, loss: 0.11764610558748245 2023-01-22 17:37:45.867816: step: 40/470, loss: 0.6587837338447571 2023-01-22 17:37:46.607274: step: 42/470, loss: 0.168352410197258 2023-01-22 17:37:47.319821: step: 44/470, loss: 0.2563874423503876 2023-01-22 17:37:48.071373: step: 46/470, loss: 0.6100096702575684 2023-01-22 17:37:48.853047: step: 48/470, loss: 1.589992880821228 2023-01-22 17:37:49.712471: step: 50/470, loss: 0.5913103222846985 2023-01-22 17:37:50.466106: step: 52/470, loss: 0.4870699942111969 2023-01-22 17:37:51.273590: step: 54/470, loss: 0.1689032018184662 2023-01-22 17:37:52.041673: step: 56/470, loss: 0.1719590425491333 2023-01-22 17:37:52.831809: step: 58/470, loss: 0.3236994743347168 2023-01-22 17:37:53.613728: step: 60/470, loss: 0.4069594740867615 2023-01-22 17:37:54.287566: step: 62/470, loss: 0.25249919295310974 2023-01-22 17:37:55.074143: step: 64/470, loss: 0.6021767854690552 2023-01-22 17:37:55.790918: step: 66/470, loss: 1.9965308904647827 2023-01-22 17:37:56.509526: step: 68/470, loss: 0.6415644884109497 2023-01-22 17:37:57.222228: step: 70/470, loss: 0.22782424092292786 2023-01-22 17:37:57.979297: step: 72/470, loss: 0.39690855145454407 2023-01-22 17:37:58.742757: step: 74/470, loss: 0.31533282995224 2023-01-22 17:37:59.530710: step: 76/470, loss: 0.1685294210910797 2023-01-22 17:38:00.380014: step: 78/470, loss: 0.2691434323787689 2023-01-22 17:38:01.186415: step: 80/470, loss: 0.16956226527690887 2023-01-22 17:38:01.966847: step: 82/470, loss: 1.0252076387405396 2023-01-22 17:38:02.701677: step: 84/470, loss: 0.39817947149276733 2023-01-22 17:38:03.529044: step: 86/470, loss: 0.23661364614963531 2023-01-22 17:38:04.229237: step: 88/470, loss: 0.4424164593219757 2023-01-22 17:38:05.029995: step: 90/470, loss: 0.2330237776041031 2023-01-22 17:38:05.923763: step: 92/470, loss: 0.5269856452941895 2023-01-22 17:38:06.711598: step: 94/470, loss: 0.7007234692573547 2023-01-22 17:38:07.412595: step: 96/470, loss: 0.29682648181915283 2023-01-22 17:38:08.141332: step: 98/470, loss: 0.23675212264060974 2023-01-22 17:38:08.839496: step: 100/470, loss: 0.3501730263233185 2023-01-22 17:38:09.546085: step: 102/470, loss: 0.1919550895690918 2023-01-22 17:38:10.338760: step: 104/470, loss: 1.0851826667785645 2023-01-22 17:38:11.160050: step: 106/470, loss: 0.4781433343887329 2023-01-22 17:38:11.878547: step: 108/470, loss: 0.1578303575515747 2023-01-22 17:38:12.660284: step: 110/470, loss: 0.9350889325141907 2023-01-22 17:38:13.403822: step: 112/470, loss: 0.3748646378517151 2023-01-22 17:38:14.097087: step: 114/470, loss: 0.16980983316898346 2023-01-22 17:38:14.916470: step: 116/470, loss: 0.30513840913772583 2023-01-22 17:38:15.696765: step: 118/470, loss: 0.15757113695144653 2023-01-22 17:38:16.404816: step: 120/470, loss: 0.23493137955665588 2023-01-22 17:38:17.134187: step: 122/470, loss: 0.06003459915518761 2023-01-22 17:38:17.944647: step: 124/470, loss: 0.7464461326599121 2023-01-22 17:38:18.683455: step: 126/470, loss: 0.4456807076931 2023-01-22 17:38:19.514851: step: 128/470, loss: 0.14154116809368134 2023-01-22 17:38:20.295820: step: 130/470, loss: 0.3404453992843628 2023-01-22 17:38:21.038914: step: 132/470, loss: 0.27857649326324463 2023-01-22 17:38:21.847109: step: 134/470, loss: 0.029560066759586334 2023-01-22 17:38:22.587871: step: 136/470, loss: 0.18978171050548553 2023-01-22 17:38:23.364812: step: 138/470, loss: 0.5333614349365234 2023-01-22 17:38:24.093397: step: 140/470, loss: 0.13952665030956268 2023-01-22 17:38:24.795167: step: 142/470, loss: 1.607400894165039 2023-01-22 17:38:25.559420: step: 144/470, loss: 0.4059104919433594 2023-01-22 17:38:26.282976: step: 146/470, loss: 0.7796083688735962 2023-01-22 17:38:27.049411: step: 148/470, loss: 0.24871180951595306 2023-01-22 17:38:27.947771: step: 150/470, loss: 0.38717377185821533 2023-01-22 17:38:28.685534: step: 152/470, loss: 0.8463844656944275 2023-01-22 17:38:29.512990: step: 154/470, loss: 0.4317239820957184 2023-01-22 17:38:30.236000: step: 156/470, loss: 0.2562013864517212 2023-01-22 17:38:30.993584: step: 158/470, loss: 0.2918889820575714 2023-01-22 17:38:31.770243: step: 160/470, loss: 0.16746020317077637 2023-01-22 17:38:32.513615: step: 162/470, loss: 0.8833320140838623 2023-01-22 17:38:33.253637: step: 164/470, loss: 0.19680127501487732 2023-01-22 17:38:33.965794: step: 166/470, loss: 0.21358007192611694 2023-01-22 17:38:34.779398: step: 168/470, loss: 1.5736429691314697 2023-01-22 17:38:35.617553: step: 170/470, loss: 0.2858242094516754 2023-01-22 17:38:36.310302: step: 172/470, loss: 0.436594158411026 2023-01-22 17:38:37.099744: step: 174/470, loss: 0.25975707173347473 2023-01-22 17:38:37.853521: step: 176/470, loss: 0.5039308071136475 2023-01-22 17:38:38.618002: step: 178/470, loss: 0.5124750137329102 2023-01-22 17:38:39.421964: step: 180/470, loss: 0.3290499746799469 2023-01-22 17:38:40.163342: step: 182/470, loss: 0.3273886740207672 2023-01-22 17:38:40.950933: step: 184/470, loss: 0.18152934312820435 2023-01-22 17:38:41.619732: step: 186/470, loss: 0.14718110859394073 2023-01-22 17:38:42.355585: step: 188/470, loss: 0.18126271665096283 2023-01-22 17:38:43.191040: step: 190/470, loss: 0.34319642186164856 2023-01-22 17:38:43.885906: step: 192/470, loss: 0.179406076669693 2023-01-22 17:38:44.617429: step: 194/470, loss: 0.38715943694114685 2023-01-22 17:38:45.380433: step: 196/470, loss: 0.42315950989723206 2023-01-22 17:38:46.138807: step: 198/470, loss: 0.4406783878803253 2023-01-22 17:38:46.917325: step: 200/470, loss: 1.8089121580123901 2023-01-22 17:38:47.685357: step: 202/470, loss: 0.11779771745204926 2023-01-22 17:38:48.390434: step: 204/470, loss: 0.19911624491214752 2023-01-22 17:38:49.166834: step: 206/470, loss: 0.8379590511322021 2023-01-22 17:38:49.981299: step: 208/470, loss: 0.1292008012533188 2023-01-22 17:38:50.722270: step: 210/470, loss: 1.0208288431167603 2023-01-22 17:38:51.587230: step: 212/470, loss: 0.29888206720352173 2023-01-22 17:38:52.301494: step: 214/470, loss: 0.5137273073196411 2023-01-22 17:38:53.129924: step: 216/470, loss: 0.16628697514533997 2023-01-22 17:38:53.871069: step: 218/470, loss: 1.1978864669799805 2023-01-22 17:38:54.574027: step: 220/470, loss: 0.34227266907691956 2023-01-22 17:38:55.307677: step: 222/470, loss: 0.4677780866622925 2023-01-22 17:38:56.074535: step: 224/470, loss: 0.7787224054336548 2023-01-22 17:38:56.875099: step: 226/470, loss: 0.5353028178215027 2023-01-22 17:38:57.671522: step: 228/470, loss: 0.8215941786766052 2023-01-22 17:38:58.363436: step: 230/470, loss: 0.46585798263549805 2023-01-22 17:38:59.173968: step: 232/470, loss: 0.2040032297372818 2023-01-22 17:38:59.924662: step: 234/470, loss: 0.4652753174304962 2023-01-22 17:39:00.630927: step: 236/470, loss: 0.4290299713611603 2023-01-22 17:39:01.380645: step: 238/470, loss: 0.7905390858650208 2023-01-22 17:39:02.139833: step: 240/470, loss: 0.8647803664207458 2023-01-22 17:39:02.952865: step: 242/470, loss: 0.4174932837486267 2023-01-22 17:39:03.678101: step: 244/470, loss: 0.27988308668136597 2023-01-22 17:39:04.437501: step: 246/470, loss: 0.5162237882614136 2023-01-22 17:39:05.199826: step: 248/470, loss: 0.5198226571083069 2023-01-22 17:39:05.968317: step: 250/470, loss: 0.9243934154510498 2023-01-22 17:39:06.744189: step: 252/470, loss: 0.9011183977127075 2023-01-22 17:39:07.490275: step: 254/470, loss: 0.1754419207572937 2023-01-22 17:39:08.249000: step: 256/470, loss: 0.5413885712623596 2023-01-22 17:39:08.966575: step: 258/470, loss: 0.28291255235671997 2023-01-22 17:39:09.725591: step: 260/470, loss: 0.3509974181652069 2023-01-22 17:39:10.591057: step: 262/470, loss: 0.22708922624588013 2023-01-22 17:39:11.308772: step: 264/470, loss: 0.22111210227012634 2023-01-22 17:39:12.087219: step: 266/470, loss: 0.19584240019321442 2023-01-22 17:39:12.878658: step: 268/470, loss: 0.3218596279621124 2023-01-22 17:39:13.588255: step: 270/470, loss: 0.5958482623100281 2023-01-22 17:39:14.326189: step: 272/470, loss: 0.6783241629600525 2023-01-22 17:39:15.010063: step: 274/470, loss: 0.40668368339538574 2023-01-22 17:39:15.844156: step: 276/470, loss: 0.3130112290382385 2023-01-22 17:39:16.621920: step: 278/470, loss: 0.26188164949417114 2023-01-22 17:39:17.517647: step: 280/470, loss: 0.21814662218093872 2023-01-22 17:39:18.345504: step: 282/470, loss: 0.5404313206672668 2023-01-22 17:39:19.087608: step: 284/470, loss: 0.9255490303039551 2023-01-22 17:39:19.840345: step: 286/470, loss: 1.064405918121338 2023-01-22 17:39:20.712650: step: 288/470, loss: 0.6848692893981934 2023-01-22 17:39:21.445266: step: 290/470, loss: 0.1509696990251541 2023-01-22 17:39:22.221291: step: 292/470, loss: 0.4635472297668457 2023-01-22 17:39:23.018959: step: 294/470, loss: 0.6151084899902344 2023-01-22 17:39:23.769308: step: 296/470, loss: 0.11453315615653992 2023-01-22 17:39:24.556588: step: 298/470, loss: 0.1999867707490921 2023-01-22 17:39:25.329555: step: 300/470, loss: 0.21419981122016907 2023-01-22 17:39:26.150892: step: 302/470, loss: 0.46333491802215576 2023-01-22 17:39:26.987631: step: 304/470, loss: 0.8135701417922974 2023-01-22 17:39:27.708675: step: 306/470, loss: 0.2585707902908325 2023-01-22 17:39:28.548832: step: 308/470, loss: 0.46079540252685547 2023-01-22 17:39:29.319570: step: 310/470, loss: 0.27376070618629456 2023-01-22 17:39:30.098818: step: 312/470, loss: 0.4137362837791443 2023-01-22 17:39:30.816418: step: 314/470, loss: 0.24535976350307465 2023-01-22 17:39:31.601072: step: 316/470, loss: 0.349854052066803 2023-01-22 17:39:32.407170: step: 318/470, loss: 0.4655457139015198 2023-01-22 17:39:33.142875: step: 320/470, loss: 0.2523977756500244 2023-01-22 17:39:33.988760: step: 322/470, loss: 0.4521579146385193 2023-01-22 17:39:34.738848: step: 324/470, loss: 0.4393160045146942 2023-01-22 17:39:35.526749: step: 326/470, loss: 0.8228650689125061 2023-01-22 17:39:36.314937: step: 328/470, loss: 0.4634270966053009 2023-01-22 17:39:37.098705: step: 330/470, loss: 0.6964781284332275 2023-01-22 17:39:37.809294: step: 332/470, loss: 0.45030978322029114 2023-01-22 17:39:38.591976: step: 334/470, loss: 0.3737616539001465 2023-01-22 17:39:39.444570: step: 336/470, loss: 0.8672752976417542 2023-01-22 17:39:40.209105: step: 338/470, loss: 0.14447632431983948 2023-01-22 17:39:40.944156: step: 340/470, loss: 0.5296969413757324 2023-01-22 17:39:41.686664: step: 342/470, loss: 0.7490493655204773 2023-01-22 17:39:42.523520: step: 344/470, loss: 0.28861531615257263 2023-01-22 17:39:43.337404: step: 346/470, loss: 0.5648653507232666 2023-01-22 17:39:44.142982: step: 348/470, loss: 0.964550256729126 2023-01-22 17:39:44.838853: step: 350/470, loss: 0.8278896808624268 2023-01-22 17:39:45.519843: step: 352/470, loss: 0.17448605597019196 2023-01-22 17:39:46.269242: step: 354/470, loss: 0.6799153089523315 2023-01-22 17:39:46.948972: step: 356/470, loss: 0.4022669792175293 2023-01-22 17:39:47.699433: step: 358/470, loss: 0.2065814882516861 2023-01-22 17:39:48.531981: step: 360/470, loss: 0.3100614845752716 2023-01-22 17:39:49.360878: step: 362/470, loss: 0.3368839919567108 2023-01-22 17:39:50.046023: step: 364/470, loss: 1.1598883867263794 2023-01-22 17:39:50.837956: step: 366/470, loss: 0.31141793727874756 2023-01-22 17:39:51.612097: step: 368/470, loss: 0.23780982196331024 2023-01-22 17:39:52.399121: step: 370/470, loss: 0.22582471370697021 2023-01-22 17:39:53.222487: step: 372/470, loss: 0.803047239780426 2023-01-22 17:39:54.027101: step: 374/470, loss: 0.3405497670173645 2023-01-22 17:39:54.800057: step: 376/470, loss: 0.7714990973472595 2023-01-22 17:39:55.563147: step: 378/470, loss: 0.1867769956588745 2023-01-22 17:39:56.326984: step: 380/470, loss: 0.3200242519378662 2023-01-22 17:39:57.075049: step: 382/470, loss: 0.16745364665985107 2023-01-22 17:39:57.746306: step: 384/470, loss: 0.09259352087974548 2023-01-22 17:39:58.496926: step: 386/470, loss: 0.4721934497356415 2023-01-22 17:39:59.224028: step: 388/470, loss: 0.26353833079338074 2023-01-22 17:40:00.081903: step: 390/470, loss: 0.2055242657661438 2023-01-22 17:40:00.955352: step: 392/470, loss: 0.9652345180511475 2023-01-22 17:40:01.738175: step: 394/470, loss: 0.3389044404029846 2023-01-22 17:40:02.439574: step: 396/470, loss: 0.24200768768787384 2023-01-22 17:40:03.188129: step: 398/470, loss: 0.17797183990478516 2023-01-22 17:40:03.951811: step: 400/470, loss: 0.15373848378658295 2023-01-22 17:40:04.697482: step: 402/470, loss: 0.7525474429130554 2023-01-22 17:40:05.450976: step: 404/470, loss: 0.8085270524024963 2023-01-22 17:40:06.172336: step: 406/470, loss: 0.13830354809761047 2023-01-22 17:40:06.991424: step: 408/470, loss: 0.14868831634521484 2023-01-22 17:40:07.758619: step: 410/470, loss: 0.14518840610980988 2023-01-22 17:40:08.703515: step: 412/470, loss: 0.4489766061306 2023-01-22 17:40:09.456812: step: 414/470, loss: 0.24416552484035492 2023-01-22 17:40:10.175202: step: 416/470, loss: 0.2164216786623001 2023-01-22 17:40:10.890632: step: 418/470, loss: 0.0824371725320816 2023-01-22 17:40:11.729047: step: 420/470, loss: 0.09185123443603516 2023-01-22 17:40:12.542840: step: 422/470, loss: 0.3424897789955139 2023-01-22 17:40:13.272741: step: 424/470, loss: 0.3741994798183441 2023-01-22 17:40:14.027338: step: 426/470, loss: 0.14311550557613373 2023-01-22 17:40:14.736701: step: 428/470, loss: 0.22634892165660858 2023-01-22 17:40:15.519957: step: 430/470, loss: 0.2907640337944031 2023-01-22 17:40:16.299619: step: 432/470, loss: 0.1375235915184021 2023-01-22 17:40:17.012406: step: 434/470, loss: 0.27444496750831604 2023-01-22 17:40:17.718421: step: 436/470, loss: 0.18194928765296936 2023-01-22 17:40:18.449346: step: 438/470, loss: 0.3027937412261963 2023-01-22 17:40:19.188972: step: 440/470, loss: 0.22203978896141052 2023-01-22 17:40:19.962078: step: 442/470, loss: 0.40211665630340576 2023-01-22 17:40:20.718619: step: 444/470, loss: 0.9534308910369873 2023-01-22 17:40:21.571106: step: 446/470, loss: 0.3148941993713379 2023-01-22 17:40:22.328756: step: 448/470, loss: 0.25301098823547363 2023-01-22 17:40:23.055731: step: 450/470, loss: 0.283833384513855 2023-01-22 17:40:23.868279: step: 452/470, loss: 0.33523184061050415 2023-01-22 17:40:24.674303: step: 454/470, loss: 0.15356293320655823 2023-01-22 17:40:25.341876: step: 456/470, loss: 0.5051017999649048 2023-01-22 17:40:26.140524: step: 458/470, loss: 0.4202573895454407 2023-01-22 17:40:26.879847: step: 460/470, loss: 0.12335009127855301 2023-01-22 17:40:27.635996: step: 462/470, loss: 0.4301653504371643 2023-01-22 17:40:28.327195: step: 464/470, loss: 1.2519079446792603 2023-01-22 17:40:29.073422: step: 466/470, loss: 0.2109188437461853 2023-01-22 17:40:29.845703: step: 468/470, loss: 0.3727503716945648 2023-01-22 17:40:30.619171: step: 470/470, loss: 0.46644580364227295 2023-01-22 17:40:31.472818: step: 472/470, loss: 0.8295896649360657 2023-01-22 17:40:32.171415: step: 474/470, loss: 0.5774760246276855 2023-01-22 17:40:32.905308: step: 476/470, loss: 0.9126483201980591 2023-01-22 17:40:33.637523: step: 478/470, loss: 0.31328561902046204 2023-01-22 17:40:34.364594: step: 480/470, loss: 0.22493775188922882 2023-01-22 17:40:35.129774: step: 482/470, loss: 0.36869755387306213 2023-01-22 17:40:35.937110: step: 484/470, loss: 0.2161940336227417 2023-01-22 17:40:36.634069: step: 486/470, loss: 0.6317044496536255 2023-01-22 17:40:37.378465: step: 488/470, loss: 0.5619537234306335 2023-01-22 17:40:38.149258: step: 490/470, loss: 1.1349985599517822 2023-01-22 17:40:38.970355: step: 492/470, loss: 0.09379585087299347 2023-01-22 17:40:39.803399: step: 494/470, loss: 0.9672868847846985 2023-01-22 17:40:40.557639: step: 496/470, loss: 1.1263362169265747 2023-01-22 17:40:41.330876: step: 498/470, loss: 0.2689302861690521 2023-01-22 17:40:42.132567: step: 500/470, loss: 0.3239392042160034 2023-01-22 17:40:42.909047: step: 502/470, loss: 0.2518550157546997 2023-01-22 17:40:43.620139: step: 504/470, loss: 0.6391338109970093 2023-01-22 17:40:44.370026: step: 506/470, loss: 0.3393190801143646 2023-01-22 17:40:45.081048: step: 508/470, loss: 0.6354091763496399 2023-01-22 17:40:45.823255: step: 510/470, loss: 0.26952916383743286 2023-01-22 17:40:46.568935: step: 512/470, loss: 0.839598536491394 2023-01-22 17:40:47.351713: step: 514/470, loss: 0.41435518860816956 2023-01-22 17:40:48.132758: step: 516/470, loss: 0.5042211413383484 2023-01-22 17:40:48.901858: step: 518/470, loss: 1.5641471147537231 2023-01-22 17:40:49.654884: step: 520/470, loss: 0.34855765104293823 2023-01-22 17:40:50.423055: step: 522/470, loss: 0.521365761756897 2023-01-22 17:40:51.272483: step: 524/470, loss: 0.33301544189453125 2023-01-22 17:40:51.953863: step: 526/470, loss: 0.5081035494804382 2023-01-22 17:40:52.701893: step: 528/470, loss: 0.20576249063014984 2023-01-22 17:40:53.410047: step: 530/470, loss: 0.10231142491102219 2023-01-22 17:40:54.180743: step: 532/470, loss: 0.46384578943252563 2023-01-22 17:40:54.930888: step: 534/470, loss: 4.53569221496582 2023-01-22 17:40:55.706703: step: 536/470, loss: 0.2767690420150757 2023-01-22 17:40:56.515670: step: 538/470, loss: 0.24536767601966858 2023-01-22 17:40:57.266285: step: 540/470, loss: 0.5284467935562134 2023-01-22 17:40:58.141260: step: 542/470, loss: 1.4372402429580688 2023-01-22 17:40:58.911200: step: 544/470, loss: 0.2401868849992752 2023-01-22 17:40:59.718931: step: 546/470, loss: 0.6595420837402344 2023-01-22 17:41:00.509061: step: 548/470, loss: 0.8081770539283752 2023-01-22 17:41:01.249685: step: 550/470, loss: 0.17885182797908783 2023-01-22 17:41:02.083147: step: 552/470, loss: 0.21516308188438416 2023-01-22 17:41:02.826012: step: 554/470, loss: 0.5545898675918579 2023-01-22 17:41:03.510128: step: 556/470, loss: 0.20706087350845337 2023-01-22 17:41:04.281878: step: 558/470, loss: 0.13563042879104614 2023-01-22 17:41:04.989901: step: 560/470, loss: 0.2453116625547409 2023-01-22 17:41:05.727295: step: 562/470, loss: 0.7842438817024231 2023-01-22 17:41:06.602318: step: 564/470, loss: 0.2593076229095459 2023-01-22 17:41:07.383418: step: 566/470, loss: 0.12692120671272278 2023-01-22 17:41:08.081145: step: 568/470, loss: 0.4532640874385834 2023-01-22 17:41:08.825638: step: 570/470, loss: 0.3847683072090149 2023-01-22 17:41:09.664000: step: 572/470, loss: 0.19840320944786072 2023-01-22 17:41:10.399337: step: 574/470, loss: 0.15931807458400726 2023-01-22 17:41:11.192050: step: 576/470, loss: 0.2908623516559601 2023-01-22 17:41:11.927584: step: 578/470, loss: 0.6550799608230591 2023-01-22 17:41:12.652264: step: 580/470, loss: 0.6560707688331604 2023-01-22 17:41:13.435596: step: 582/470, loss: 0.41067826747894287 2023-01-22 17:41:14.197078: step: 584/470, loss: 0.11992362141609192 2023-01-22 17:41:14.904418: step: 586/470, loss: 0.460469126701355 2023-01-22 17:41:15.723882: step: 588/470, loss: 0.8359988331794739 2023-01-22 17:41:16.466191: step: 590/470, loss: 0.36444351077079773 2023-01-22 17:41:17.251781: step: 592/470, loss: 0.2154173105955124 2023-01-22 17:41:17.999955: step: 594/470, loss: 0.5687950849533081 2023-01-22 17:41:18.725766: step: 596/470, loss: 0.9395169615745544 2023-01-22 17:41:19.468369: step: 598/470, loss: 0.49935394525527954 2023-01-22 17:41:20.226733: step: 600/470, loss: 0.49853333830833435 2023-01-22 17:41:20.955041: step: 602/470, loss: 1.7896212339401245 2023-01-22 17:41:21.676224: step: 604/470, loss: 0.1358480155467987 2023-01-22 17:41:22.432416: step: 606/470, loss: 0.7760083079338074 2023-01-22 17:41:23.148417: step: 608/470, loss: 0.3000049293041229 2023-01-22 17:41:23.891884: step: 610/470, loss: 0.4000217616558075 2023-01-22 17:41:24.624273: step: 612/470, loss: 0.3303145170211792 2023-01-22 17:41:25.414868: step: 614/470, loss: 0.517999529838562 2023-01-22 17:41:26.184225: step: 616/470, loss: 0.37297362089157104 2023-01-22 17:41:26.961310: step: 618/470, loss: 0.2246372401714325 2023-01-22 17:41:27.720044: step: 620/470, loss: 0.9546456336975098 2023-01-22 17:41:28.486582: step: 622/470, loss: 0.9173968434333801 2023-01-22 17:41:29.257960: step: 624/470, loss: 0.35250478982925415 2023-01-22 17:41:30.111491: step: 626/470, loss: 0.8878437280654907 2023-01-22 17:41:30.781157: step: 628/470, loss: 0.49247634410858154 2023-01-22 17:41:31.494816: step: 630/470, loss: 0.3329099416732788 2023-01-22 17:41:32.269383: step: 632/470, loss: 1.0935202836990356 2023-01-22 17:41:33.009428: step: 634/470, loss: 0.2755989730358124 2023-01-22 17:41:33.704923: step: 636/470, loss: 0.6668111085891724 2023-01-22 17:41:34.444196: step: 638/470, loss: 0.24469828605651855 2023-01-22 17:41:35.238549: step: 640/470, loss: 0.22104227542877197 2023-01-22 17:41:35.999224: step: 642/470, loss: 0.16912305355072021 2023-01-22 17:41:36.720623: step: 644/470, loss: 0.10374326258897781 2023-01-22 17:41:37.466803: step: 646/470, loss: 0.20086267590522766 2023-01-22 17:41:38.254549: step: 648/470, loss: 0.1727813482284546 2023-01-22 17:41:39.046068: step: 650/470, loss: 0.13764482736587524 2023-01-22 17:41:39.824412: step: 652/470, loss: 0.3168490529060364 2023-01-22 17:41:40.683388: step: 654/470, loss: 0.5362805128097534 2023-01-22 17:41:41.401204: step: 656/470, loss: 0.40005090832710266 2023-01-22 17:41:42.145028: step: 658/470, loss: 2.0221118927001953 2023-01-22 17:41:42.908958: step: 660/470, loss: 0.46189936995506287 2023-01-22 17:41:43.623474: step: 662/470, loss: 0.6272675395011902 2023-01-22 17:41:44.396473: step: 664/470, loss: 0.44351091980934143 2023-01-22 17:41:45.161393: step: 666/470, loss: 0.31063738465309143 2023-01-22 17:41:45.949708: step: 668/470, loss: 0.41334736347198486 2023-01-22 17:41:46.683837: step: 670/470, loss: 0.10286879539489746 2023-01-22 17:41:47.434605: step: 672/470, loss: 0.5979005098342896 2023-01-22 17:41:48.148364: step: 674/470, loss: 4.1260666847229 2023-01-22 17:41:48.955058: step: 676/470, loss: 0.8004294633865356 2023-01-22 17:41:49.682082: step: 678/470, loss: 0.24940814077854156 2023-01-22 17:41:50.429193: step: 680/470, loss: 0.16425098478794098 2023-01-22 17:41:51.159809: step: 682/470, loss: 0.671233594417572 2023-01-22 17:41:51.942098: step: 684/470, loss: 0.15050573647022247 2023-01-22 17:41:52.773532: step: 686/470, loss: 0.36023542284965515 2023-01-22 17:41:53.547425: step: 688/470, loss: 1.1040581464767456 2023-01-22 17:41:54.326992: step: 690/470, loss: 0.3097226321697235 2023-01-22 17:41:55.025295: step: 692/470, loss: 0.27629736065864563 2023-01-22 17:41:55.740296: step: 694/470, loss: 2.556936264038086 2023-01-22 17:41:56.535616: step: 696/470, loss: 0.2204158753156662 2023-01-22 17:41:57.297817: step: 698/470, loss: 0.7285110950469971 2023-01-22 17:41:58.135066: step: 700/470, loss: 4.453820705413818 2023-01-22 17:41:58.922061: step: 702/470, loss: 0.6035624742507935 2023-01-22 17:41:59.689483: step: 704/470, loss: 0.21447139978408813 2023-01-22 17:42:00.394362: step: 706/470, loss: 0.23186089098453522 2023-01-22 17:42:01.238705: step: 708/470, loss: 0.40758103132247925 2023-01-22 17:42:02.014059: step: 710/470, loss: 0.13845710456371307 2023-01-22 17:42:02.841953: step: 712/470, loss: 3.9527015686035156 2023-01-22 17:42:03.621122: step: 714/470, loss: 0.26543423533439636 2023-01-22 17:42:04.370356: step: 716/470, loss: 0.45242205262184143 2023-01-22 17:42:05.091377: step: 718/470, loss: 2.2168221473693848 2023-01-22 17:42:05.868496: step: 720/470, loss: 0.22453352808952332 2023-01-22 17:42:06.593811: step: 722/470, loss: 0.5967671871185303 2023-01-22 17:42:07.365301: step: 724/470, loss: 0.45324817299842834 2023-01-22 17:42:08.141521: step: 726/470, loss: 1.0313066244125366 2023-01-22 17:42:08.887148: step: 728/470, loss: 0.11269115656614304 2023-01-22 17:42:09.726001: step: 730/470, loss: 0.49560102820396423 2023-01-22 17:42:10.519821: step: 732/470, loss: 0.2794017791748047 2023-01-22 17:42:11.219145: step: 734/470, loss: 0.6522477865219116 2023-01-22 17:42:11.966469: step: 736/470, loss: 0.28647884726524353 2023-01-22 17:42:12.696725: step: 738/470, loss: 0.20137378573417664 2023-01-22 17:42:13.462971: step: 740/470, loss: 0.6079099178314209 2023-01-22 17:42:14.253444: step: 742/470, loss: 0.7093207240104675 2023-01-22 17:42:15.092319: step: 744/470, loss: 0.21907228231430054 2023-01-22 17:42:15.851958: step: 746/470, loss: 0.131643146276474 2023-01-22 17:42:16.649521: step: 748/470, loss: 0.4653341472148895 2023-01-22 17:42:17.380720: step: 750/470, loss: 5.116756916046143 2023-01-22 17:42:18.129832: step: 752/470, loss: 0.3567095100879669 2023-01-22 17:42:18.900533: step: 754/470, loss: 0.18065395951271057 2023-01-22 17:42:19.617720: step: 756/470, loss: 0.2523786723613739 2023-01-22 17:42:20.375832: step: 758/470, loss: 0.9525728225708008 2023-01-22 17:42:21.105536: step: 760/470, loss: 0.30923041701316833 2023-01-22 17:42:21.923071: step: 762/470, loss: 0.32964733242988586 2023-01-22 17:42:22.733234: step: 764/470, loss: 0.2381993532180786 2023-01-22 17:42:23.487178: step: 766/470, loss: 0.18978539109230042 2023-01-22 17:42:24.299888: step: 768/470, loss: 0.7485260963439941 2023-01-22 17:42:25.036307: step: 770/470, loss: 0.25674277544021606 2023-01-22 17:42:25.828358: step: 772/470, loss: 0.22901327908039093 2023-01-22 17:42:26.545711: step: 774/470, loss: 0.2624027729034424 2023-01-22 17:42:27.327555: step: 776/470, loss: 0.14591439068317413 2023-01-22 17:42:28.160491: step: 778/470, loss: 1.4718310832977295 2023-01-22 17:42:28.928676: step: 780/470, loss: 0.26930221915245056 2023-01-22 17:42:29.684904: step: 782/470, loss: 0.7424861192703247 2023-01-22 17:42:30.400481: step: 784/470, loss: 0.29697778820991516 2023-01-22 17:42:31.123463: step: 786/470, loss: 1.1480909585952759 2023-01-22 17:42:31.829029: step: 788/470, loss: 0.8097434639930725 2023-01-22 17:42:32.576741: step: 790/470, loss: 0.5420349836349487 2023-01-22 17:42:33.291654: step: 792/470, loss: 0.21315941214561462 2023-01-22 17:42:34.021510: step: 794/470, loss: 0.2593609094619751 2023-01-22 17:42:34.725117: step: 796/470, loss: 0.21246762573719025 2023-01-22 17:42:35.497225: step: 798/470, loss: 0.45392927527427673 2023-01-22 17:42:36.252213: step: 800/470, loss: 0.4097192883491516 2023-01-22 17:42:37.065490: step: 802/470, loss: 0.9928512573242188 2023-01-22 17:42:37.809340: step: 804/470, loss: 0.5397576689720154 2023-01-22 17:42:38.548008: step: 806/470, loss: 0.0932132676243782 2023-01-22 17:42:39.331525: step: 808/470, loss: 0.43611350655555725 2023-01-22 17:42:40.104431: step: 810/470, loss: 0.5918594598770142 2023-01-22 17:42:40.875905: step: 812/470, loss: 0.23620811104774475 2023-01-22 17:42:41.712776: step: 814/470, loss: 0.5560216307640076 2023-01-22 17:42:42.498671: step: 816/470, loss: 0.598693311214447 2023-01-22 17:42:43.284442: step: 818/470, loss: 0.40362799167633057 2023-01-22 17:42:44.089610: step: 820/470, loss: 0.23154190182685852 2023-01-22 17:42:44.809957: step: 822/470, loss: 0.5826197266578674 2023-01-22 17:42:45.518215: step: 824/470, loss: 0.2394208461046219 2023-01-22 17:42:46.176320: step: 826/470, loss: 0.17906901240348816 2023-01-22 17:42:46.982467: step: 828/470, loss: 0.372158408164978 2023-01-22 17:42:47.705077: step: 830/470, loss: 1.067903757095337 2023-01-22 17:42:48.463981: step: 832/470, loss: 0.1843709796667099 2023-01-22 17:42:49.267034: step: 834/470, loss: 0.2733551263809204 2023-01-22 17:42:50.171416: step: 836/470, loss: 1.0208349227905273 2023-01-22 17:42:50.906495: step: 838/470, loss: 0.6295246481895447 2023-01-22 17:42:51.642303: step: 840/470, loss: 0.357212096452713 2023-01-22 17:42:52.430736: step: 842/470, loss: 0.20953968167304993 2023-01-22 17:42:53.237568: step: 844/470, loss: 0.630314290523529 2023-01-22 17:42:54.109001: step: 846/470, loss: 1.2299977540969849 2023-01-22 17:42:54.880492: step: 848/470, loss: 0.599105179309845 2023-01-22 17:42:55.747249: step: 850/470, loss: 0.8288000226020813 2023-01-22 17:42:56.548523: step: 852/470, loss: 0.1812320500612259 2023-01-22 17:42:57.350599: step: 854/470, loss: 0.23691441118717194 2023-01-22 17:42:58.057135: step: 856/470, loss: 0.3210389316082001 2023-01-22 17:42:58.744638: step: 858/470, loss: 0.5429426431655884 2023-01-22 17:42:59.510538: step: 860/470, loss: 0.13180933892726898 2023-01-22 17:43:00.224554: step: 862/470, loss: 1.8928117752075195 2023-01-22 17:43:00.972928: step: 864/470, loss: 0.7599416971206665 2023-01-22 17:43:01.742981: step: 866/470, loss: 0.5055848360061646 2023-01-22 17:43:02.499256: step: 868/470, loss: 0.1821976751089096 2023-01-22 17:43:03.200340: step: 870/470, loss: 1.22907555103302 2023-01-22 17:43:04.022285: step: 872/470, loss: 0.3916356861591339 2023-01-22 17:43:04.696056: step: 874/470, loss: 0.26299530267715454 2023-01-22 17:43:05.410491: step: 876/470, loss: 0.25578999519348145 2023-01-22 17:43:06.172916: step: 878/470, loss: 0.37457263469696045 2023-01-22 17:43:06.847962: step: 880/470, loss: 0.3378101885318756 2023-01-22 17:43:07.502649: step: 882/470, loss: 1.6557979583740234 2023-01-22 17:43:08.279438: step: 884/470, loss: 0.39262452721595764 2023-01-22 17:43:09.052855: step: 886/470, loss: 0.4881884753704071 2023-01-22 17:43:09.749047: step: 888/470, loss: 0.19406718015670776 2023-01-22 17:43:10.577445: step: 890/470, loss: 0.2705124318599701 2023-01-22 17:43:11.290167: step: 892/470, loss: 0.34521085023880005 2023-01-22 17:43:12.052474: step: 894/470, loss: 0.45954829454421997 2023-01-22 17:43:12.807961: step: 896/470, loss: 0.14961890876293182 2023-01-22 17:43:13.558721: step: 898/470, loss: 1.0215060710906982 2023-01-22 17:43:14.325515: step: 900/470, loss: 1.122787594795227 2023-01-22 17:43:15.084171: step: 902/470, loss: 0.2572832703590393 2023-01-22 17:43:15.946650: step: 904/470, loss: 0.19345666468143463 2023-01-22 17:43:16.719611: step: 906/470, loss: 0.4549930989742279 2023-01-22 17:43:17.453464: step: 908/470, loss: 0.32068750262260437 2023-01-22 17:43:18.196037: step: 910/470, loss: 0.5561640858650208 2023-01-22 17:43:19.194105: step: 912/470, loss: 0.2232218235731125 2023-01-22 17:43:19.961314: step: 914/470, loss: 0.2716374099254608 2023-01-22 17:43:20.666436: step: 916/470, loss: 0.1163991242647171 2023-01-22 17:43:21.524329: step: 918/470, loss: 0.3325546085834503 2023-01-22 17:43:22.219645: step: 920/470, loss: 0.5324735641479492 2023-01-22 17:43:22.958733: step: 922/470, loss: 0.1445450484752655 2023-01-22 17:43:23.786584: step: 924/470, loss: 0.61869215965271 2023-01-22 17:43:24.540797: step: 926/470, loss: 0.2812950611114502 2023-01-22 17:43:25.363236: step: 928/470, loss: 0.8153495192527771 2023-01-22 17:43:26.095337: step: 930/470, loss: 0.4129032492637634 2023-01-22 17:43:26.875885: step: 932/470, loss: 0.3423391282558441 2023-01-22 17:43:27.714052: step: 934/470, loss: 1.508226752281189 2023-01-22 17:43:28.506167: step: 936/470, loss: 0.32150372862815857 2023-01-22 17:43:29.264434: step: 938/470, loss: 0.3547199070453644 2023-01-22 17:43:30.034330: step: 940/470, loss: 0.3670746386051178 2023-01-22 17:43:30.725561: step: 942/470, loss: 0.5506512522697449 ================================================== Loss: 0.508 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3049308236808237, 'r': 0.3211320818650041, 'f1': 0.3128218246633219}, 'combined': 0.23050029185718457, 'epoch': 8} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32065206847366756, 'r': 0.3490174437617227, 'f1': 0.3342340161254067}, 'combined': 0.23279981222665144, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29067732698296866, 'r': 0.3182558210041232, 'f1': 0.3038420609948785}, 'combined': 0.22388362389096308, 'epoch': 8} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3310886777617448, 'r': 0.3488994039429722, 'f1': 0.3397607853177119}, 'combined': 0.23664930320636654, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27751515331590204, 'r': 0.3164831255082678, 'f1': 0.29572093465045596}, 'combined': 0.2178996360582307, 'epoch': 8} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3162120582663088, 'r': 0.3614719974417939, 'f1': 0.3373306583029202}, 'combined': 0.23495667742491955, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24523809523809523, 'r': 0.31530612244897954, 'f1': 0.27589285714285716}, 'combined': 0.18392857142857144, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.27941176470588236, 'r': 0.41304347826086957, 'f1': 0.33333333333333337}, 'combined': 0.16666666666666669, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4605263157894737, 'r': 0.3017241379310345, 'f1': 0.3645833333333333}, 'combined': 0.24305555555555552, 'epoch': 8} New best chinese model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3049308236808237, 'r': 0.3211320818650041, 'f1': 0.3128218246633219}, 'combined': 0.23050029185718457, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32065206847366756, 'r': 0.3490174437617227, 'f1': 0.3342340161254067}, 'combined': 0.23279981222665144, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24523809523809523, 'r': 0.31530612244897954, 'f1': 0.27589285714285716}, 'combined': 0.18392857142857144, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26474047961800956, 'r': 0.3240182340675829, 'f1': 0.29139523780480575}, 'combined': 0.2147122804877516, 'epoch': 7} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32541475117674507, 'r': 0.34573363573629207, 'f1': 0.33526661835256644}, 'combined': 0.23351903765850401, 'epoch': 7} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2722222222222222, 'r': 0.532608695652174, 'f1': 0.36029411764705876}, 'combined': 0.18014705882352938, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27751515331590204, 'r': 0.3164831255082678, 'f1': 0.29572093465045596}, 'combined': 0.2178996360582307, 'epoch': 8} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3162120582663088, 'r': 0.3614719974417939, 'f1': 0.3373306583029202}, 'combined': 0.23495667742491955, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4605263157894737, 'r': 0.3017241379310345, 'f1': 0.3645833333333333}, 'combined': 0.24305555555555552, 'epoch': 8} ****************************** Epoch: 9 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 17:46:25.632716: step: 2/470, loss: 0.5447205901145935 2023-01-22 17:46:26.400413: step: 4/470, loss: 0.22318868339061737 2023-01-22 17:46:27.154100: step: 6/470, loss: 0.3136242926120758 2023-01-22 17:46:27.924777: step: 8/470, loss: 0.28907865285873413 2023-01-22 17:46:28.713580: step: 10/470, loss: 0.2637608051300049 2023-01-22 17:46:29.511978: step: 12/470, loss: 0.5738874077796936 2023-01-22 17:46:30.250794: step: 14/470, loss: 1.0422797203063965 2023-01-22 17:46:31.053478: step: 16/470, loss: 0.602383553981781 2023-01-22 17:46:31.822810: step: 18/470, loss: 0.08700596541166306 2023-01-22 17:46:32.631403: step: 20/470, loss: 0.6579717993736267 2023-01-22 17:46:33.476470: step: 22/470, loss: 0.17143812775611877 2023-01-22 17:46:34.263178: step: 24/470, loss: 0.21379876136779785 2023-01-22 17:46:35.090068: step: 26/470, loss: 0.6119391322135925 2023-01-22 17:46:35.872371: step: 28/470, loss: 0.23522713780403137 2023-01-22 17:46:36.614362: step: 30/470, loss: 0.5445173978805542 2023-01-22 17:46:37.389709: step: 32/470, loss: 3.121372938156128 2023-01-22 17:46:38.136405: step: 34/470, loss: 0.2587936520576477 2023-01-22 17:46:38.908979: step: 36/470, loss: 0.5228575468063354 2023-01-22 17:46:39.753192: step: 38/470, loss: 0.3780156075954437 2023-01-22 17:46:40.450468: step: 40/470, loss: 0.4384208023548126 2023-01-22 17:46:41.248395: step: 42/470, loss: 0.2887096107006073 2023-01-22 17:46:41.982801: step: 44/470, loss: 0.2585183382034302 2023-01-22 17:46:42.740925: step: 46/470, loss: 0.2520710825920105 2023-01-22 17:46:43.440282: step: 48/470, loss: 0.7488427758216858 2023-01-22 17:46:44.184320: step: 50/470, loss: 0.10620920360088348 2023-01-22 17:46:45.028050: step: 52/470, loss: 0.14906106889247894 2023-01-22 17:46:45.782572: step: 54/470, loss: 0.6837480068206787 2023-01-22 17:46:46.634025: step: 56/470, loss: 0.15488582849502563 2023-01-22 17:46:47.375070: step: 58/470, loss: 0.9800782203674316 2023-01-22 17:46:48.156814: step: 60/470, loss: 0.2453881949186325 2023-01-22 17:46:48.884363: step: 62/470, loss: 0.25628599524497986 2023-01-22 17:46:49.661954: step: 64/470, loss: 0.5019969940185547 2023-01-22 17:46:50.367498: step: 66/470, loss: 0.15023790299892426 2023-01-22 17:46:51.210724: step: 68/470, loss: 0.39702898263931274 2023-01-22 17:46:51.961601: step: 70/470, loss: 0.5472665429115295 2023-01-22 17:46:52.669055: step: 72/470, loss: 0.5863292217254639 2023-01-22 17:46:53.421958: step: 74/470, loss: 0.13345842063426971 2023-01-22 17:46:54.319071: step: 76/470, loss: 0.1856902539730072 2023-01-22 17:46:55.081290: step: 78/470, loss: 0.19066360592842102 2023-01-22 17:46:55.843903: step: 80/470, loss: 0.2122010886669159 2023-01-22 17:46:56.625935: step: 82/470, loss: 0.49828097224235535 2023-01-22 17:46:57.327161: step: 84/470, loss: 0.15491345524787903 2023-01-22 17:46:58.117211: step: 86/470, loss: 0.1964101791381836 2023-01-22 17:46:58.860608: step: 88/470, loss: 0.1925598680973053 2023-01-22 17:46:59.621447: step: 90/470, loss: 0.5155028700828552 2023-01-22 17:47:00.318947: step: 92/470, loss: 0.21076497435569763 2023-01-22 17:47:01.065732: step: 94/470, loss: 0.18973298370838165 2023-01-22 17:47:01.882321: step: 96/470, loss: 0.5644153356552124 2023-01-22 17:47:02.675054: step: 98/470, loss: 0.10288572311401367 2023-01-22 17:47:03.411847: step: 100/470, loss: 0.07585206627845764 2023-01-22 17:47:04.174021: step: 102/470, loss: 0.3527693748474121 2023-01-22 17:47:04.941167: step: 104/470, loss: 0.14347948133945465 2023-01-22 17:47:05.779890: step: 106/470, loss: 0.36973798274993896 2023-01-22 17:47:06.496208: step: 108/470, loss: 0.38219624757766724 2023-01-22 17:47:07.216139: step: 110/470, loss: 0.744696319103241 2023-01-22 17:47:07.996152: step: 112/470, loss: 0.36855944991111755 2023-01-22 17:47:08.856438: step: 114/470, loss: 0.1453740894794464 2023-01-22 17:47:09.625702: step: 116/470, loss: 1.321637511253357 2023-01-22 17:47:10.384790: step: 118/470, loss: 0.9926891922950745 2023-01-22 17:47:11.108880: step: 120/470, loss: 0.15449316799640656 2023-01-22 17:47:11.927135: step: 122/470, loss: 0.14840567111968994 2023-01-22 17:47:12.697705: step: 124/470, loss: 0.18444156646728516 2023-01-22 17:47:13.448586: step: 126/470, loss: 0.3966729938983917 2023-01-22 17:47:14.173189: step: 128/470, loss: 1.7696688175201416 2023-01-22 17:47:14.871800: step: 130/470, loss: 0.30441614985466003 2023-01-22 17:47:15.613842: step: 132/470, loss: 0.17320138216018677 2023-01-22 17:47:16.324708: step: 134/470, loss: 0.2448633313179016 2023-01-22 17:47:17.120704: step: 136/470, loss: 0.2567095458507538 2023-01-22 17:47:17.933169: step: 138/470, loss: 0.04755272716283798 2023-01-22 17:47:18.619290: step: 140/470, loss: 0.15079620480537415 2023-01-22 17:47:19.425073: step: 142/470, loss: 0.5248032212257385 2023-01-22 17:47:20.142785: step: 144/470, loss: 0.3448765277862549 2023-01-22 17:47:20.902224: step: 146/470, loss: 0.45058566331863403 2023-01-22 17:47:21.626654: step: 148/470, loss: 0.1982106864452362 2023-01-22 17:47:22.376893: step: 150/470, loss: 0.17295490205287933 2023-01-22 17:47:23.141535: step: 152/470, loss: 1.0078809261322021 2023-01-22 17:47:23.913043: step: 154/470, loss: 0.33238446712493896 2023-01-22 17:47:24.658531: step: 156/470, loss: 0.15200795233249664 2023-01-22 17:47:25.428858: step: 158/470, loss: 0.07408700883388519 2023-01-22 17:47:26.227874: step: 160/470, loss: 0.26883095502853394 2023-01-22 17:47:26.963333: step: 162/470, loss: 0.9753660559654236 2023-01-22 17:47:27.770636: step: 164/470, loss: 0.3113194704055786 2023-01-22 17:47:28.479886: step: 166/470, loss: 0.25497275590896606 2023-01-22 17:47:29.302480: step: 168/470, loss: 0.4309341311454773 2023-01-22 17:47:30.092685: step: 170/470, loss: 0.07410217821598053 2023-01-22 17:47:30.800642: step: 172/470, loss: 0.8484674096107483 2023-01-22 17:47:31.555398: step: 174/470, loss: 0.28762853145599365 2023-01-22 17:47:32.346686: step: 176/470, loss: 0.14900214970111847 2023-01-22 17:47:33.130139: step: 178/470, loss: 0.14563746750354767 2023-01-22 17:47:33.918197: step: 180/470, loss: 0.2309417426586151 2023-01-22 17:47:34.694399: step: 182/470, loss: 0.7100458145141602 2023-01-22 17:47:35.483583: step: 184/470, loss: 0.2716662287712097 2023-01-22 17:47:36.250902: step: 186/470, loss: 0.28548765182495117 2023-01-22 17:47:37.036562: step: 188/470, loss: 0.8044639229774475 2023-01-22 17:47:37.987294: step: 190/470, loss: 0.19010743498802185 2023-01-22 17:47:38.674321: step: 192/470, loss: 0.2690931558609009 2023-01-22 17:47:39.403786: step: 194/470, loss: 0.1267169564962387 2023-01-22 17:47:40.187113: step: 196/470, loss: 0.9669914245605469 2023-01-22 17:47:41.019038: step: 198/470, loss: 0.5740648508071899 2023-01-22 17:47:41.785300: step: 200/470, loss: 0.16114988923072815 2023-01-22 17:47:42.538199: step: 202/470, loss: 0.14519870281219482 2023-01-22 17:47:43.292245: step: 204/470, loss: 0.18535593152046204 2023-01-22 17:47:44.044836: step: 206/470, loss: 0.34165194630622864 2023-01-22 17:47:44.813652: step: 208/470, loss: 0.1012420654296875 2023-01-22 17:47:45.633003: step: 210/470, loss: 0.35992568731307983 2023-01-22 17:47:46.456834: step: 212/470, loss: 1.1616597175598145 2023-01-22 17:47:47.263850: step: 214/470, loss: 0.3503967523574829 2023-01-22 17:47:48.048793: step: 216/470, loss: 0.5987624526023865 2023-01-22 17:47:48.860452: step: 218/470, loss: 0.17567022144794464 2023-01-22 17:47:49.626360: step: 220/470, loss: 0.16215509176254272 2023-01-22 17:47:50.441062: step: 222/470, loss: 0.3935254216194153 2023-01-22 17:47:51.166071: step: 224/470, loss: 0.45323100686073303 2023-01-22 17:47:51.994924: step: 226/470, loss: 1.0208436250686646 2023-01-22 17:47:52.773879: step: 228/470, loss: 0.39432036876678467 2023-01-22 17:47:53.598035: step: 230/470, loss: 0.2786700129508972 2023-01-22 17:47:54.312507: step: 232/470, loss: 0.19462734460830688 2023-01-22 17:47:55.177646: step: 234/470, loss: 0.12101204693317413 2023-01-22 17:47:55.953094: step: 236/470, loss: 0.17866027355194092 2023-01-22 17:47:56.708557: step: 238/470, loss: 0.2924273610115051 2023-01-22 17:47:57.514247: step: 240/470, loss: 0.5252290368080139 2023-01-22 17:47:58.236284: step: 242/470, loss: 0.13447031378746033 2023-01-22 17:47:59.018161: step: 244/470, loss: 0.08729273080825806 2023-01-22 17:47:59.858533: step: 246/470, loss: 0.13195963203907013 2023-01-22 17:48:00.543905: step: 248/470, loss: 0.40723153948783875 2023-01-22 17:48:01.320884: step: 250/470, loss: 0.8148595094680786 2023-01-22 17:48:02.063285: step: 252/470, loss: 0.10905633121728897 2023-01-22 17:48:02.885189: step: 254/470, loss: 0.3877982497215271 2023-01-22 17:48:03.623082: step: 256/470, loss: 0.6170598864555359 2023-01-22 17:48:04.296344: step: 258/470, loss: 0.3111323416233063 2023-01-22 17:48:05.012655: step: 260/470, loss: 0.24980317056179047 2023-01-22 17:48:05.727688: step: 262/470, loss: 0.26733797788619995 2023-01-22 17:48:06.560474: step: 264/470, loss: 0.04581334814429283 2023-01-22 17:48:07.372865: step: 266/470, loss: 0.3556976616382599 2023-01-22 17:48:08.137375: step: 268/470, loss: 0.20936265587806702 2023-01-22 17:48:08.834312: step: 270/470, loss: 0.07905016839504242 2023-01-22 17:48:09.616085: step: 272/470, loss: 0.5653750896453857 2023-01-22 17:48:10.302406: step: 274/470, loss: 0.15013067424297333 2023-01-22 17:48:11.024658: step: 276/470, loss: 0.25626760721206665 2023-01-22 17:48:11.795604: step: 278/470, loss: 0.19960841536521912 2023-01-22 17:48:12.597530: step: 280/470, loss: 0.302801251411438 2023-01-22 17:48:13.312556: step: 282/470, loss: 0.2810971140861511 2023-01-22 17:48:14.068407: step: 284/470, loss: 0.22355952858924866 2023-01-22 17:48:14.825991: step: 286/470, loss: 0.7538304924964905 2023-01-22 17:48:15.571005: step: 288/470, loss: 0.9847416281700134 2023-01-22 17:48:16.396541: step: 290/470, loss: 0.12261106073856354 2023-01-22 17:48:17.134423: step: 292/470, loss: 0.3926984667778015 2023-01-22 17:48:17.897497: step: 294/470, loss: 0.6225259900093079 2023-01-22 17:48:18.698095: step: 296/470, loss: 0.5642341375350952 2023-01-22 17:48:19.493434: step: 298/470, loss: 0.3781619668006897 2023-01-22 17:48:20.229577: step: 300/470, loss: 0.6121640205383301 2023-01-22 17:48:20.995010: step: 302/470, loss: 0.4178549647331238 2023-01-22 17:48:21.651795: step: 304/470, loss: 0.3481173515319824 2023-01-22 17:48:22.425467: step: 306/470, loss: 0.48398879170417786 2023-01-22 17:48:23.192388: step: 308/470, loss: 1.0851445198059082 2023-01-22 17:48:23.948797: step: 310/470, loss: 0.42979589104652405 2023-01-22 17:48:24.678677: step: 312/470, loss: 0.3409073054790497 2023-01-22 17:48:25.562427: step: 314/470, loss: 0.1891537755727768 2023-01-22 17:48:26.299833: step: 316/470, loss: 0.08484365046024323 2023-01-22 17:48:27.055121: step: 318/470, loss: 0.08334700763225555 2023-01-22 17:48:27.778634: step: 320/470, loss: 0.13253387808799744 2023-01-22 17:48:28.531715: step: 322/470, loss: 0.18955287337303162 2023-01-22 17:48:29.279943: step: 324/470, loss: 0.4056585729122162 2023-01-22 17:48:30.092807: step: 326/470, loss: 0.398506224155426 2023-01-22 17:48:30.864209: step: 328/470, loss: 0.4494258165359497 2023-01-22 17:48:31.612996: step: 330/470, loss: 0.5383342504501343 2023-01-22 17:48:32.375965: step: 332/470, loss: 0.3049156665802002 2023-01-22 17:48:33.084510: step: 334/470, loss: 0.3931887745857239 2023-01-22 17:48:33.800728: step: 336/470, loss: 0.3626604676246643 2023-01-22 17:48:34.600482: step: 338/470, loss: 0.19436430931091309 2023-01-22 17:48:35.321370: step: 340/470, loss: 0.12340643256902695 2023-01-22 17:48:36.087977: step: 342/470, loss: 0.10149870067834854 2023-01-22 17:48:36.881993: step: 344/470, loss: 0.26956290006637573 2023-01-22 17:48:37.679264: step: 346/470, loss: 0.35809779167175293 2023-01-22 17:48:38.488875: step: 348/470, loss: 0.2530977129936218 2023-01-22 17:48:39.134584: step: 350/470, loss: 0.20895856618881226 2023-01-22 17:48:39.989885: step: 352/470, loss: 0.1304413229227066 2023-01-22 17:48:40.713877: step: 354/470, loss: 0.31440871953964233 2023-01-22 17:48:41.444842: step: 356/470, loss: 0.09652471542358398 2023-01-22 17:48:42.257910: step: 358/470, loss: 0.8130881786346436 2023-01-22 17:48:42.992985: step: 360/470, loss: 0.2621191740036011 2023-01-22 17:48:43.689433: step: 362/470, loss: 0.2604580819606781 2023-01-22 17:48:44.391344: step: 364/470, loss: 0.27661120891571045 2023-01-22 17:48:45.118125: step: 366/470, loss: 0.23585830628871918 2023-01-22 17:48:45.859503: step: 368/470, loss: 0.2118925154209137 2023-01-22 17:48:46.554158: step: 370/470, loss: 0.9197829365730286 2023-01-22 17:48:47.339612: step: 372/470, loss: 0.3997812867164612 2023-01-22 17:48:48.117705: step: 374/470, loss: 0.20830069482326508 2023-01-22 17:48:48.848871: step: 376/470, loss: 0.3739283084869385 2023-01-22 17:48:49.668169: step: 378/470, loss: 0.3728155195713043 2023-01-22 17:48:50.491833: step: 380/470, loss: 0.1496754288673401 2023-01-22 17:48:51.197750: step: 382/470, loss: 0.08371566236019135 2023-01-22 17:48:52.003078: step: 384/470, loss: 0.33781763911247253 2023-01-22 17:48:52.865879: step: 386/470, loss: 0.1618962287902832 2023-01-22 17:48:53.585512: step: 388/470, loss: 0.42494645714759827 2023-01-22 17:48:54.335911: step: 390/470, loss: 0.19731223583221436 2023-01-22 17:48:55.081143: step: 392/470, loss: 0.21920977532863617 2023-01-22 17:48:55.919819: step: 394/470, loss: 0.767740786075592 2023-01-22 17:48:56.716970: step: 396/470, loss: 0.29436159133911133 2023-01-22 17:48:57.492833: step: 398/470, loss: 0.1768275797367096 2023-01-22 17:48:58.179765: step: 400/470, loss: 0.27189314365386963 2023-01-22 17:48:58.929969: step: 402/470, loss: 0.6092924475669861 2023-01-22 17:48:59.681705: step: 404/470, loss: 0.31457844376564026 2023-01-22 17:49:00.540398: step: 406/470, loss: 0.18229728937149048 2023-01-22 17:49:01.282725: step: 408/470, loss: 0.24030300974845886 2023-01-22 17:49:02.067409: step: 410/470, loss: 0.2986292243003845 2023-01-22 17:49:02.872341: step: 412/470, loss: 0.3573097586631775 2023-01-22 17:49:03.591188: step: 414/470, loss: 0.25912851095199585 2023-01-22 17:49:04.374233: step: 416/470, loss: 0.42473936080932617 2023-01-22 17:49:05.158615: step: 418/470, loss: 0.1592417061328888 2023-01-22 17:49:05.912102: step: 420/470, loss: 0.18681542575359344 2023-01-22 17:49:06.611026: step: 422/470, loss: 0.7109906077384949 2023-01-22 17:49:07.365513: step: 424/470, loss: 0.18564318120479584 2023-01-22 17:49:08.163896: step: 426/470, loss: 0.2508256137371063 2023-01-22 17:49:08.922490: step: 428/470, loss: 0.8340566158294678 2023-01-22 17:49:09.713183: step: 430/470, loss: 0.2437361627817154 2023-01-22 17:49:10.467077: step: 432/470, loss: 0.5458521246910095 2023-01-22 17:49:11.327397: step: 434/470, loss: 0.4009840786457062 2023-01-22 17:49:12.070623: step: 436/470, loss: 0.2011309266090393 2023-01-22 17:49:12.820313: step: 438/470, loss: 0.20616890490055084 2023-01-22 17:49:13.539858: step: 440/470, loss: 0.3620125651359558 2023-01-22 17:49:14.324781: step: 442/470, loss: 0.2353145331144333 2023-01-22 17:49:15.080358: step: 444/470, loss: 0.6447851657867432 2023-01-22 17:49:15.812686: step: 446/470, loss: 1.2982354164123535 2023-01-22 17:49:16.633759: step: 448/470, loss: 0.16182303428649902 2023-01-22 17:49:17.417255: step: 450/470, loss: 0.9789178371429443 2023-01-22 17:49:18.185956: step: 452/470, loss: 0.23408180475234985 2023-01-22 17:49:18.943892: step: 454/470, loss: 0.44805386662483215 2023-01-22 17:49:19.669839: step: 456/470, loss: 0.24204997718334198 2023-01-22 17:49:20.428896: step: 458/470, loss: 2.4867639541625977 2023-01-22 17:49:21.212001: step: 460/470, loss: 0.21604380011558533 2023-01-22 17:49:22.013474: step: 462/470, loss: 0.11348915100097656 2023-01-22 17:49:22.680991: step: 464/470, loss: 0.6297299265861511 2023-01-22 17:49:23.473380: step: 466/470, loss: 1.2539268732070923 2023-01-22 17:49:24.214020: step: 468/470, loss: 0.3964780867099762 2023-01-22 17:49:24.999733: step: 470/470, loss: 0.3635796904563904 2023-01-22 17:49:25.844908: step: 472/470, loss: 0.15717843174934387 2023-01-22 17:49:26.581674: step: 474/470, loss: 0.17565882205963135 2023-01-22 17:49:27.421173: step: 476/470, loss: 0.3112245202064514 2023-01-22 17:49:28.204723: step: 478/470, loss: 0.18767109513282776 2023-01-22 17:49:28.940996: step: 480/470, loss: 0.5524680018424988 2023-01-22 17:49:29.678076: step: 482/470, loss: 0.2581130266189575 2023-01-22 17:49:30.374925: step: 484/470, loss: 0.27895861864089966 2023-01-22 17:49:31.121890: step: 486/470, loss: 0.42541971802711487 2023-01-22 17:49:31.773782: step: 488/470, loss: 0.23396803438663483 2023-01-22 17:49:32.475232: step: 490/470, loss: 0.8111799359321594 2023-01-22 17:49:33.177801: step: 492/470, loss: 0.5374805331230164 2023-01-22 17:49:33.951814: step: 494/470, loss: 0.1266305297613144 2023-01-22 17:49:34.671442: step: 496/470, loss: 0.5135464668273926 2023-01-22 17:49:35.457627: step: 498/470, loss: 0.19670523703098297 2023-01-22 17:49:36.098313: step: 500/470, loss: 0.3329498767852783 2023-01-22 17:49:36.861515: step: 502/470, loss: 0.09014225006103516 2023-01-22 17:49:37.563187: step: 504/470, loss: 0.15468470752239227 2023-01-22 17:49:38.274062: step: 506/470, loss: 0.9147515296936035 2023-01-22 17:49:39.003835: step: 508/470, loss: 0.2802393138408661 2023-01-22 17:49:39.877660: step: 510/470, loss: 0.30511119961738586 2023-01-22 17:49:40.590488: step: 512/470, loss: 0.20601776242256165 2023-01-22 17:49:41.304807: step: 514/470, loss: 0.6622411012649536 2023-01-22 17:49:42.101610: step: 516/470, loss: 0.30970463156700134 2023-01-22 17:49:42.781606: step: 518/470, loss: 0.18782645463943481 2023-01-22 17:49:43.481045: step: 520/470, loss: 0.3975909948348999 2023-01-22 17:49:44.243764: step: 522/470, loss: 0.2327815592288971 2023-01-22 17:49:44.998283: step: 524/470, loss: 0.09739214926958084 2023-01-22 17:49:45.759379: step: 526/470, loss: 0.8930928111076355 2023-01-22 17:49:46.559044: step: 528/470, loss: 0.365343302488327 2023-01-22 17:49:47.295445: step: 530/470, loss: 0.203633651137352 2023-01-22 17:49:48.090481: step: 532/470, loss: 1.6148757934570312 2023-01-22 17:49:48.837733: step: 534/470, loss: 0.30228936672210693 2023-01-22 17:49:49.784011: step: 536/470, loss: 0.23546025156974792 2023-01-22 17:49:50.478353: step: 538/470, loss: 0.1620427817106247 2023-01-22 17:49:51.140044: step: 540/470, loss: 0.16190826892852783 2023-01-22 17:49:51.891101: step: 542/470, loss: 0.0964878499507904 2023-01-22 17:49:52.701710: step: 544/470, loss: 0.12071750313043594 2023-01-22 17:49:53.537339: step: 546/470, loss: 0.43174058198928833 2023-01-22 17:49:54.360431: step: 548/470, loss: 1.1310662031173706 2023-01-22 17:49:55.127631: step: 550/470, loss: 0.5546995401382446 2023-01-22 17:49:56.011167: step: 552/470, loss: 0.7945947051048279 2023-01-22 17:49:56.756271: step: 554/470, loss: 0.5049540996551514 2023-01-22 17:49:57.507772: step: 556/470, loss: 0.09438646584749222 2023-01-22 17:49:58.236883: step: 558/470, loss: 0.9007434844970703 2023-01-22 17:49:58.923826: step: 560/470, loss: 0.10236416757106781 2023-01-22 17:49:59.647314: step: 562/470, loss: 0.957366406917572 2023-01-22 17:50:00.453776: step: 564/470, loss: 0.2416938692331314 2023-01-22 17:50:01.218885: step: 566/470, loss: 0.7880321145057678 2023-01-22 17:50:01.979921: step: 568/470, loss: 0.5304555296897888 2023-01-22 17:50:02.746372: step: 570/470, loss: 0.3842546045780182 2023-01-22 17:50:03.594743: step: 572/470, loss: 0.22081181406974792 2023-01-22 17:50:04.412172: step: 574/470, loss: 0.10166381299495697 2023-01-22 17:50:05.199495: step: 576/470, loss: 0.19878722727298737 2023-01-22 17:50:05.975251: step: 578/470, loss: 0.3669015169143677 2023-01-22 17:50:06.800684: step: 580/470, loss: 0.27071359753608704 2023-01-22 17:50:07.588427: step: 582/470, loss: 0.1869358867406845 2023-01-22 17:50:08.372138: step: 584/470, loss: 0.2697611451148987 2023-01-22 17:50:09.065632: step: 586/470, loss: 0.27031758427619934 2023-01-22 17:50:09.861343: step: 588/470, loss: 0.9854125380516052 2023-01-22 17:50:10.606550: step: 590/470, loss: 0.14197781682014465 2023-01-22 17:50:11.365857: step: 592/470, loss: 0.3427768051624298 2023-01-22 17:50:12.155460: step: 594/470, loss: 0.5351526141166687 2023-01-22 17:50:12.885903: step: 596/470, loss: 0.14945143461227417 2023-01-22 17:50:13.574115: step: 598/470, loss: 0.47398558259010315 2023-01-22 17:50:14.336543: step: 600/470, loss: 0.4863567650318146 2023-01-22 17:50:15.071013: step: 602/470, loss: 0.48081153631210327 2023-01-22 17:50:15.821322: step: 604/470, loss: 0.13413645327091217 2023-01-22 17:50:16.629520: step: 606/470, loss: 0.5402474403381348 2023-01-22 17:50:17.404249: step: 608/470, loss: 0.5544330477714539 2023-01-22 17:50:18.130567: step: 610/470, loss: 0.388392835855484 2023-01-22 17:50:18.864535: step: 612/470, loss: 1.0991339683532715 2023-01-22 17:50:19.679000: step: 614/470, loss: 0.35932278633117676 2023-01-22 17:50:20.454061: step: 616/470, loss: 0.26169684529304504 2023-01-22 17:50:21.231485: step: 618/470, loss: 0.15292510390281677 2023-01-22 17:50:22.111478: step: 620/470, loss: 0.503343939781189 2023-01-22 17:50:22.775340: step: 622/470, loss: 0.2210429310798645 2023-01-22 17:50:23.600437: step: 624/470, loss: 0.11323003470897675 2023-01-22 17:50:24.360336: step: 626/470, loss: 0.49351710081100464 2023-01-22 17:50:25.080739: step: 628/470, loss: 0.19296793639659882 2023-01-22 17:50:25.842007: step: 630/470, loss: 0.17127367854118347 2023-01-22 17:50:26.528798: step: 632/470, loss: 0.3556111454963684 2023-01-22 17:50:27.299219: step: 634/470, loss: 0.31057944893836975 2023-01-22 17:50:27.950351: step: 636/470, loss: 0.42370232939720154 2023-01-22 17:50:28.760805: step: 638/470, loss: 0.8841353058815002 2023-01-22 17:50:29.527948: step: 640/470, loss: 0.24589870870113373 2023-01-22 17:50:30.276092: step: 642/470, loss: 0.1649733930826187 2023-01-22 17:50:30.977507: step: 644/470, loss: 0.6155900955200195 2023-01-22 17:50:31.751765: step: 646/470, loss: 0.36966732144355774 2023-01-22 17:50:32.488882: step: 648/470, loss: 0.2079157680273056 2023-01-22 17:50:33.379342: step: 650/470, loss: 0.8073568344116211 2023-01-22 17:50:34.075852: step: 652/470, loss: 0.19634874165058136 2023-01-22 17:50:34.860919: step: 654/470, loss: 0.3249640166759491 2023-01-22 17:50:35.657694: step: 656/470, loss: 1.5049084424972534 2023-01-22 17:50:36.442487: step: 658/470, loss: 0.42330726981163025 2023-01-22 17:50:37.193362: step: 660/470, loss: 0.20834825932979584 2023-01-22 17:50:37.973571: step: 662/470, loss: 0.4067968428134918 2023-01-22 17:50:38.663535: step: 664/470, loss: 0.3112955093383789 2023-01-22 17:50:39.421490: step: 666/470, loss: 1.358984351158142 2023-01-22 17:50:40.197374: step: 668/470, loss: 0.25829464197158813 2023-01-22 17:50:40.979755: step: 670/470, loss: 0.4780522286891937 2023-01-22 17:50:41.707637: step: 672/470, loss: 0.18391302227973938 2023-01-22 17:50:42.523184: step: 674/470, loss: 0.32501521706581116 2023-01-22 17:50:43.431454: step: 676/470, loss: 0.11137870699167252 2023-01-22 17:50:44.159576: step: 678/470, loss: 1.4713650941848755 2023-01-22 17:50:45.024727: step: 680/470, loss: 1.0248942375183105 2023-01-22 17:50:45.801985: step: 682/470, loss: 0.17500053346157074 2023-01-22 17:50:46.556080: step: 684/470, loss: 0.5887848734855652 2023-01-22 17:50:47.291916: step: 686/470, loss: 0.3135591149330139 2023-01-22 17:50:48.054854: step: 688/470, loss: 0.47807666659355164 2023-01-22 17:50:48.835517: step: 690/470, loss: 0.15201377868652344 2023-01-22 17:50:49.686449: step: 692/470, loss: 0.1818476915359497 2023-01-22 17:50:50.414639: step: 694/470, loss: 0.15883833169937134 2023-01-22 17:50:51.219253: step: 696/470, loss: 0.2723800539970398 2023-01-22 17:50:52.091685: step: 698/470, loss: 0.9609336853027344 2023-01-22 17:50:52.774076: step: 700/470, loss: 0.14401142299175262 2023-01-22 17:50:53.492898: step: 702/470, loss: 0.3710044026374817 2023-01-22 17:50:54.292074: step: 704/470, loss: 0.49077916145324707 2023-01-22 17:50:55.061434: step: 706/470, loss: 0.19224616885185242 2023-01-22 17:50:55.757865: step: 708/470, loss: 0.09910887479782104 2023-01-22 17:50:56.633846: step: 710/470, loss: 2.2329654693603516 2023-01-22 17:50:57.389604: step: 712/470, loss: 0.3099704384803772 2023-01-22 17:50:58.061151: step: 714/470, loss: 0.07138052582740784 2023-01-22 17:50:58.862702: step: 716/470, loss: 0.21385283768177032 2023-01-22 17:50:59.578210: step: 718/470, loss: 0.6778049468994141 2023-01-22 17:51:00.359536: step: 720/470, loss: 0.43242332339286804 2023-01-22 17:51:01.160814: step: 722/470, loss: 0.4446261525154114 2023-01-22 17:51:01.917798: step: 724/470, loss: 0.2377616912126541 2023-01-22 17:51:02.670230: step: 726/470, loss: 0.1549036204814911 2023-01-22 17:51:03.425163: step: 728/470, loss: 0.25162452459335327 2023-01-22 17:51:04.134422: step: 730/470, loss: 0.6771296262741089 2023-01-22 17:51:04.926863: step: 732/470, loss: 0.7284178137779236 2023-01-22 17:51:05.683811: step: 734/470, loss: 0.8691809177398682 2023-01-22 17:51:06.461230: step: 736/470, loss: 0.19968397915363312 2023-01-22 17:51:07.203822: step: 738/470, loss: 0.1259831339120865 2023-01-22 17:51:08.006142: step: 740/470, loss: 0.10791204124689102 2023-01-22 17:51:08.761249: step: 742/470, loss: 0.31472277641296387 2023-01-22 17:51:09.521266: step: 744/470, loss: 0.19977986812591553 2023-01-22 17:51:10.244752: step: 746/470, loss: 0.15267623960971832 2023-01-22 17:51:11.010676: step: 748/470, loss: 2.1145780086517334 2023-01-22 17:51:11.723088: step: 750/470, loss: 0.2913758456707001 2023-01-22 17:51:12.506154: step: 752/470, loss: 0.16115057468414307 2023-01-22 17:51:13.259075: step: 754/470, loss: 0.1917055994272232 2023-01-22 17:51:14.039608: step: 756/470, loss: 0.13388597965240479 2023-01-22 17:51:14.778378: step: 758/470, loss: 0.8417869210243225 2023-01-22 17:51:15.597648: step: 760/470, loss: 0.259040892124176 2023-01-22 17:51:16.425809: step: 762/470, loss: 1.860278606414795 2023-01-22 17:51:17.249442: step: 764/470, loss: 0.15627700090408325 2023-01-22 17:51:17.966801: step: 766/470, loss: 0.2632465958595276 2023-01-22 17:51:18.659002: step: 768/470, loss: 0.5329182147979736 2023-01-22 17:51:19.401112: step: 770/470, loss: 0.17463798820972443 2023-01-22 17:51:20.137312: step: 772/470, loss: 0.3025292158126831 2023-01-22 17:51:20.935671: step: 774/470, loss: 0.0922599583864212 2023-01-22 17:51:21.769032: step: 776/470, loss: 0.17804045975208282 2023-01-22 17:51:22.466721: step: 778/470, loss: 0.5846086740493774 2023-01-22 17:51:23.239248: step: 780/470, loss: 1.0991511344909668 2023-01-22 17:51:23.940771: step: 782/470, loss: 0.20432426035404205 2023-01-22 17:51:24.588733: step: 784/470, loss: 0.1008855476975441 2023-01-22 17:51:25.355935: step: 786/470, loss: 1.1069185733795166 2023-01-22 17:51:26.140283: step: 788/470, loss: 0.32851848006248474 2023-01-22 17:51:26.895861: step: 790/470, loss: 0.3457392454147339 2023-01-22 17:51:27.633053: step: 792/470, loss: 0.14442162215709686 2023-01-22 17:51:28.396549: step: 794/470, loss: 0.12679028511047363 2023-01-22 17:51:29.144596: step: 796/470, loss: 0.12389878928661346 2023-01-22 17:51:29.853638: step: 798/470, loss: 0.4223572909832001 2023-01-22 17:51:30.632669: step: 800/470, loss: 0.9779374599456787 2023-01-22 17:51:31.368081: step: 802/470, loss: 0.849580705165863 2023-01-22 17:51:32.069589: step: 804/470, loss: 0.32323935627937317 2023-01-22 17:51:32.829698: step: 806/470, loss: 0.3388105034828186 2023-01-22 17:51:33.598529: step: 808/470, loss: 0.7310128211975098 2023-01-22 17:51:34.411305: step: 810/470, loss: 0.24826985597610474 2023-01-22 17:51:35.185352: step: 812/470, loss: 0.2552504539489746 2023-01-22 17:51:35.948388: step: 814/470, loss: 0.2834188938140869 2023-01-22 17:51:36.678371: step: 816/470, loss: 0.6483306884765625 2023-01-22 17:51:37.414677: step: 818/470, loss: 0.4713938236236572 2023-01-22 17:51:38.218524: step: 820/470, loss: 0.26083284616470337 2023-01-22 17:51:38.978849: step: 822/470, loss: 0.20542579889297485 2023-01-22 17:51:39.697123: step: 824/470, loss: 0.2176228165626526 2023-01-22 17:51:40.401030: step: 826/470, loss: 0.2208702117204666 2023-01-22 17:51:41.248932: step: 828/470, loss: 0.27593034505844116 2023-01-22 17:51:41.962741: step: 830/470, loss: 0.4739680588245392 2023-01-22 17:51:42.712755: step: 832/470, loss: 0.2514054477214813 2023-01-22 17:51:43.509262: step: 834/470, loss: 0.250079482793808 2023-01-22 17:51:44.263833: step: 836/470, loss: 0.17073297500610352 2023-01-22 17:51:45.078163: step: 838/470, loss: 1.1584669351577759 2023-01-22 17:51:45.822925: step: 840/470, loss: 0.38292911648750305 2023-01-22 17:51:46.563651: step: 842/470, loss: 0.2206546664237976 2023-01-22 17:51:47.352153: step: 844/470, loss: 0.49227169156074524 2023-01-22 17:51:48.174241: step: 846/470, loss: 0.2030370980501175 2023-01-22 17:51:48.961058: step: 848/470, loss: 0.5499109625816345 2023-01-22 17:51:49.805790: step: 850/470, loss: 0.3381698727607727 2023-01-22 17:51:50.500665: step: 852/470, loss: 0.6527577638626099 2023-01-22 17:51:51.311899: step: 854/470, loss: 0.22261139750480652 2023-01-22 17:51:52.180848: step: 856/470, loss: 0.6786369681358337 2023-01-22 17:51:52.932623: step: 858/470, loss: 0.3246194124221802 2023-01-22 17:51:53.788428: step: 860/470, loss: 0.17136085033416748 2023-01-22 17:51:54.543852: step: 862/470, loss: 0.7740350365638733 2023-01-22 17:51:55.432819: step: 864/470, loss: 6.283660888671875 2023-01-22 17:51:56.175355: step: 866/470, loss: 0.3030953109264374 2023-01-22 17:51:57.051984: step: 868/470, loss: 0.5647780299186707 2023-01-22 17:51:57.770355: step: 870/470, loss: 0.3866567313671112 2023-01-22 17:51:58.515975: step: 872/470, loss: 1.285661220550537 2023-01-22 17:51:59.242548: step: 874/470, loss: 0.18321466445922852 2023-01-22 17:51:59.981197: step: 876/470, loss: 0.2706562876701355 2023-01-22 17:52:00.788490: step: 878/470, loss: 0.30584192276000977 2023-01-22 17:52:01.570454: step: 880/470, loss: 0.143959641456604 2023-01-22 17:52:02.386762: step: 882/470, loss: 0.4257970452308655 2023-01-22 17:52:03.156167: step: 884/470, loss: 0.32551464438438416 2023-01-22 17:52:03.952494: step: 886/470, loss: 0.21082256734371185 2023-01-22 17:52:04.744754: step: 888/470, loss: 0.2381884753704071 2023-01-22 17:52:05.533954: step: 890/470, loss: 0.30021804571151733 2023-01-22 17:52:06.379066: step: 892/470, loss: 0.33029264211654663 2023-01-22 17:52:07.181141: step: 894/470, loss: 0.11910691857337952 2023-01-22 17:52:07.925896: step: 896/470, loss: 0.3467719554901123 2023-01-22 17:52:08.691778: step: 898/470, loss: 0.7879258394241333 2023-01-22 17:52:09.448883: step: 900/470, loss: 0.2893523871898651 2023-01-22 17:52:10.222544: step: 902/470, loss: 0.25279688835144043 2023-01-22 17:52:10.924456: step: 904/470, loss: 0.15351781249046326 2023-01-22 17:52:11.632051: step: 906/470, loss: 0.41283392906188965 2023-01-22 17:52:12.369232: step: 908/470, loss: 0.14755499362945557 2023-01-22 17:52:13.114484: step: 910/470, loss: 0.07284556329250336 2023-01-22 17:52:13.841885: step: 912/470, loss: 0.21138407289981842 2023-01-22 17:52:14.725849: step: 914/470, loss: 0.503731906414032 2023-01-22 17:52:15.499881: step: 916/470, loss: 0.367249995470047 2023-01-22 17:52:16.196991: step: 918/470, loss: 0.2869451344013214 2023-01-22 17:52:16.958991: step: 920/470, loss: 0.5139609575271606 2023-01-22 17:52:17.701856: step: 922/470, loss: 0.4172641932964325 2023-01-22 17:52:18.402189: step: 924/470, loss: 0.1905202567577362 2023-01-22 17:52:19.184358: step: 926/470, loss: 0.16458773612976074 2023-01-22 17:52:19.992005: step: 928/470, loss: 0.20133328437805176 2023-01-22 17:52:20.706502: step: 930/470, loss: 0.2865738272666931 2023-01-22 17:52:21.432735: step: 932/470, loss: 0.10317887365818024 2023-01-22 17:52:22.107898: step: 934/470, loss: 0.2089749574661255 2023-01-22 17:52:22.875884: step: 936/470, loss: 0.7139807939529419 2023-01-22 17:52:23.553306: step: 938/470, loss: 5.944061279296875 2023-01-22 17:52:24.340468: step: 940/470, loss: 0.49773871898651123 2023-01-22 17:52:25.001770: step: 942/470, loss: 0.29733988642692566 ================================================== Loss: 0.424 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2720285818408996, 'r': 0.3541017213336948, 'f1': 0.3076860793781651}, 'combined': 0.22671605848917428, 'epoch': 9} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.319785163193168, 'r': 0.3627918133824509, 'f1': 0.33993364332235054}, 'combined': 0.2367697018165626, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26970405278248755, 'r': 0.3546582705469903, 'f1': 0.30640148947256374}, 'combined': 0.22576951855873117, 'epoch': 9} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3172264650103097, 'r': 0.3602350530549767, 'f1': 0.337365560717853}, 'combined': 0.23498098756467375, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2503801155740811, 'r': 0.3582288560585525, 'f1': 0.2947488011598082}, 'combined': 0.217183327170385, 'epoch': 9} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3054672020186251, 'r': 0.36802923473974736, 'f1': 0.33384248070592}, 'combined': 0.23252710098919802, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.19333333333333333, 'r': 0.4142857142857143, 'f1': 0.2636363636363636}, 'combined': 0.17575757575757572, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.5, 'f1': 0.3333333333333333}, 'combined': 0.16666666666666666, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.25862068965517243, 'f1': 0.25423728813559326}, 'combined': 0.16949152542372883, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3049308236808237, 'r': 0.3211320818650041, 'f1': 0.3128218246633219}, 'combined': 0.23050029185718457, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32065206847366756, 'r': 0.3490174437617227, 'f1': 0.3342340161254067}, 'combined': 0.23279981222665144, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24523809523809523, 'r': 0.31530612244897954, 'f1': 0.27589285714285716}, 'combined': 0.18392857142857144, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26474047961800956, 'r': 0.3240182340675829, 'f1': 0.29139523780480575}, 'combined': 0.2147122804877516, 'epoch': 7} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32541475117674507, 'r': 0.34573363573629207, 'f1': 0.33526661835256644}, 'combined': 0.23351903765850401, 'epoch': 7} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2722222222222222, 'r': 0.532608695652174, 'f1': 0.36029411764705876}, 'combined': 0.18014705882352938, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27751515331590204, 'r': 0.3164831255082678, 'f1': 0.29572093465045596}, 'combined': 0.2178996360582307, 'epoch': 8} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3162120582663088, 'r': 0.3614719974417939, 'f1': 0.3373306583029202}, 'combined': 0.23495667742491955, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4605263157894737, 'r': 0.3017241379310345, 'f1': 0.3645833333333333}, 'combined': 0.24305555555555552, 'epoch': 8} ****************************** Epoch: 10 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 17:55:08.797906: step: 2/470, loss: 0.056591589003801346 2023-01-22 17:55:09.557241: step: 4/470, loss: 0.13280236721038818 2023-01-22 17:55:10.291229: step: 6/470, loss: 0.4893929958343506 2023-01-22 17:55:10.979047: step: 8/470, loss: 0.2868712842464447 2023-01-22 17:55:11.708062: step: 10/470, loss: 0.157928004860878 2023-01-22 17:55:12.416697: step: 12/470, loss: 0.7920506000518799 2023-01-22 17:55:13.198954: step: 14/470, loss: 0.37678074836730957 2023-01-22 17:55:13.986077: step: 16/470, loss: 0.29826444387435913 2023-01-22 17:55:14.704516: step: 18/470, loss: 0.0674176812171936 2023-01-22 17:55:15.415572: step: 20/470, loss: 0.08113688230514526 2023-01-22 17:55:16.169027: step: 22/470, loss: 0.13426075875759125 2023-01-22 17:55:16.914706: step: 24/470, loss: 0.1328933835029602 2023-01-22 17:55:17.644401: step: 26/470, loss: 0.06381936371326447 2023-01-22 17:55:18.534787: step: 28/470, loss: 0.07034029066562653 2023-01-22 17:55:19.272578: step: 30/470, loss: 0.06103328615427017 2023-01-22 17:55:20.087451: step: 32/470, loss: 0.045456431806087494 2023-01-22 17:55:20.833478: step: 34/470, loss: 0.373674601316452 2023-01-22 17:55:21.630874: step: 36/470, loss: 0.04646005108952522 2023-01-22 17:55:22.451595: step: 38/470, loss: 0.5150260329246521 2023-01-22 17:55:23.237939: step: 40/470, loss: 0.3609604835510254 2023-01-22 17:55:23.981166: step: 42/470, loss: 0.34102901816368103 2023-01-22 17:55:24.787412: step: 44/470, loss: 0.293104887008667 2023-01-22 17:55:25.533571: step: 46/470, loss: 0.2512171268463135 2023-01-22 17:55:26.309410: step: 48/470, loss: 0.2921146750450134 2023-01-22 17:55:27.165006: step: 50/470, loss: 0.14088846743106842 2023-01-22 17:55:27.810625: step: 52/470, loss: 0.3724920451641083 2023-01-22 17:55:28.553463: step: 54/470, loss: 0.2027990221977234 2023-01-22 17:55:29.262488: step: 56/470, loss: 0.19799435138702393 2023-01-22 17:55:30.043297: step: 58/470, loss: 0.3948134779930115 2023-01-22 17:55:30.890925: step: 60/470, loss: 0.08492279052734375 2023-01-22 17:55:31.595656: step: 62/470, loss: 0.12444708496332169 2023-01-22 17:55:32.334027: step: 64/470, loss: 0.08406122028827667 2023-01-22 17:55:33.079093: step: 66/470, loss: 5.688138008117676 2023-01-22 17:55:33.814492: step: 68/470, loss: 0.10036779195070267 2023-01-22 17:55:34.537071: step: 70/470, loss: 0.14819800853729248 2023-01-22 17:55:35.318574: step: 72/470, loss: 0.4329559803009033 2023-01-22 17:55:36.057742: step: 74/470, loss: 0.3488907814025879 2023-01-22 17:55:36.762337: step: 76/470, loss: 0.3759381175041199 2023-01-22 17:55:37.527268: step: 78/470, loss: 0.49098092317581177 2023-01-22 17:55:38.237250: step: 80/470, loss: 0.5063804984092712 2023-01-22 17:55:39.037986: step: 82/470, loss: 0.14004996418952942 2023-01-22 17:55:39.826337: step: 84/470, loss: 0.09356193989515305 2023-01-22 17:55:40.584914: step: 86/470, loss: 0.17746534943580627 2023-01-22 17:55:41.388580: step: 88/470, loss: 0.09610351175069809 2023-01-22 17:55:42.157277: step: 90/470, loss: 0.14981889724731445 2023-01-22 17:55:42.889694: step: 92/470, loss: 1.6448625326156616 2023-01-22 17:55:43.668921: step: 94/470, loss: 0.2239532321691513 2023-01-22 17:55:44.371022: step: 96/470, loss: 0.1338728666305542 2023-01-22 17:55:45.168837: step: 98/470, loss: 0.2755134701728821 2023-01-22 17:55:45.943370: step: 100/470, loss: 0.18865349888801575 2023-01-22 17:55:46.711727: step: 102/470, loss: 0.06774507462978363 2023-01-22 17:55:47.503539: step: 104/470, loss: 0.8064208030700684 2023-01-22 17:55:48.273981: step: 106/470, loss: 0.08489122241735458 2023-01-22 17:55:49.029970: step: 108/470, loss: 0.08381538838148117 2023-01-22 17:55:49.773492: step: 110/470, loss: 0.3395574390888214 2023-01-22 17:55:50.641820: step: 112/470, loss: 0.15705344080924988 2023-01-22 17:55:51.353799: step: 114/470, loss: 0.4517096281051636 2023-01-22 17:55:52.064895: step: 116/470, loss: 0.1848120093345642 2023-01-22 17:55:52.773931: step: 118/470, loss: 0.2923411726951599 2023-01-22 17:55:53.476739: step: 120/470, loss: 0.24967914819717407 2023-01-22 17:55:54.206976: step: 122/470, loss: 0.8194922804832458 2023-01-22 17:55:55.010160: step: 124/470, loss: 0.44352638721466064 2023-01-22 17:55:55.804059: step: 126/470, loss: 0.319720059633255 2023-01-22 17:55:56.559647: step: 128/470, loss: 0.652948796749115 2023-01-22 17:55:57.346593: step: 130/470, loss: 0.6255174279212952 2023-01-22 17:55:58.114355: step: 132/470, loss: 0.5446348190307617 2023-01-22 17:55:58.832818: step: 134/470, loss: 0.3582460284233093 2023-01-22 17:55:59.658277: step: 136/470, loss: 0.655910074710846 2023-01-22 17:56:00.455272: step: 138/470, loss: 0.08800873160362244 2023-01-22 17:56:01.149811: step: 140/470, loss: 0.09775952994823456 2023-01-22 17:56:01.887133: step: 142/470, loss: 0.6266007423400879 2023-01-22 17:56:02.666519: step: 144/470, loss: 0.3636000454425812 2023-01-22 17:56:03.412626: step: 146/470, loss: 0.1658439189195633 2023-01-22 17:56:04.144517: step: 148/470, loss: 0.059976786375045776 2023-01-22 17:56:04.847531: step: 150/470, loss: 0.11647554486989975 2023-01-22 17:56:05.594996: step: 152/470, loss: 0.14295542240142822 2023-01-22 17:56:06.414680: step: 154/470, loss: 0.7250072956085205 2023-01-22 17:56:07.128437: step: 156/470, loss: 0.12420438975095749 2023-01-22 17:56:07.895341: step: 158/470, loss: 0.22176730632781982 2023-01-22 17:56:08.693220: step: 160/470, loss: 0.13311302661895752 2023-01-22 17:56:09.421169: step: 162/470, loss: 0.1882036328315735 2023-01-22 17:56:10.160641: step: 164/470, loss: 0.3161405920982361 2023-01-22 17:56:10.929015: step: 166/470, loss: 0.38165828585624695 2023-01-22 17:56:11.838727: step: 168/470, loss: 0.5611687302589417 2023-01-22 17:56:12.594303: step: 170/470, loss: 0.7891055941581726 2023-01-22 17:56:13.342255: step: 172/470, loss: 0.2864110469818115 2023-01-22 17:56:14.086861: step: 174/470, loss: 1.51108717918396 2023-01-22 17:56:14.819652: step: 176/470, loss: 0.3315257430076599 2023-01-22 17:56:15.593770: step: 178/470, loss: 0.8074526190757751 2023-01-22 17:56:16.311993: step: 180/470, loss: 0.13533882796764374 2023-01-22 17:56:16.998359: step: 182/470, loss: 0.6222794055938721 2023-01-22 17:56:17.733286: step: 184/470, loss: 1.3589999675750732 2023-01-22 17:56:18.534070: step: 186/470, loss: 0.250261515378952 2023-01-22 17:56:19.330091: step: 188/470, loss: 0.1996300369501114 2023-01-22 17:56:20.106070: step: 190/470, loss: 0.8840703368186951 2023-01-22 17:56:20.824492: step: 192/470, loss: 0.5001525282859802 2023-01-22 17:56:21.643845: step: 194/470, loss: 0.20161223411560059 2023-01-22 17:56:22.391940: step: 196/470, loss: 0.5042540431022644 2023-01-22 17:56:23.122376: step: 198/470, loss: 0.28795769810676575 2023-01-22 17:56:23.983964: step: 200/470, loss: 0.6410402655601501 2023-01-22 17:56:24.723306: step: 202/470, loss: 0.2731340527534485 2023-01-22 17:56:25.482916: step: 204/470, loss: 0.12334098666906357 2023-01-22 17:56:26.190705: step: 206/470, loss: 0.22625799477100372 2023-01-22 17:56:26.913254: step: 208/470, loss: 0.5543537735939026 2023-01-22 17:56:27.668553: step: 210/470, loss: 0.2280716747045517 2023-01-22 17:56:28.442520: step: 212/470, loss: 0.08557216078042984 2023-01-22 17:56:29.197084: step: 214/470, loss: 0.18337619304656982 2023-01-22 17:56:29.965983: step: 216/470, loss: 1.1990138292312622 2023-01-22 17:56:30.703535: step: 218/470, loss: 0.13386143743991852 2023-01-22 17:56:31.467164: step: 220/470, loss: 0.5193535685539246 2023-01-22 17:56:32.319026: step: 222/470, loss: 0.17291080951690674 2023-01-22 17:56:33.138230: step: 224/470, loss: 0.2032584249973297 2023-01-22 17:56:33.963538: step: 226/470, loss: 0.6475588083267212 2023-01-22 17:56:34.700099: step: 228/470, loss: 0.26753678917884827 2023-01-22 17:56:35.541383: step: 230/470, loss: 0.46687716245651245 2023-01-22 17:56:36.351254: step: 232/470, loss: 0.2332172989845276 2023-01-22 17:56:37.180744: step: 234/470, loss: 0.3625449240207672 2023-01-22 17:56:37.942378: step: 236/470, loss: 13.1072359085083 2023-01-22 17:56:38.651808: step: 238/470, loss: 0.16786223649978638 2023-01-22 17:56:39.397827: step: 240/470, loss: 0.1498054563999176 2023-01-22 17:56:40.155041: step: 242/470, loss: 0.7497914433479309 2023-01-22 17:56:40.819319: step: 244/470, loss: 0.7256143093109131 2023-01-22 17:56:41.623871: step: 246/470, loss: 0.9502539038658142 2023-01-22 17:56:42.417546: step: 248/470, loss: 0.6548596620559692 2023-01-22 17:56:43.197968: step: 250/470, loss: 0.21729148924350739 2023-01-22 17:56:43.901636: step: 252/470, loss: 0.17379331588745117 2023-01-22 17:56:44.665314: step: 254/470, loss: 0.10878202319145203 2023-01-22 17:56:45.386041: step: 256/470, loss: 0.342523455619812 2023-01-22 17:56:46.259829: step: 258/470, loss: 1.1544556617736816 2023-01-22 17:56:46.970995: step: 260/470, loss: 0.23245486617088318 2023-01-22 17:56:47.753887: step: 262/470, loss: 0.543807864189148 2023-01-22 17:56:48.537925: step: 264/470, loss: 0.154671311378479 2023-01-22 17:56:49.320544: step: 266/470, loss: 0.2436520904302597 2023-01-22 17:56:50.047165: step: 268/470, loss: 0.11845968663692474 2023-01-22 17:56:50.826769: step: 270/470, loss: 0.5449745655059814 2023-01-22 17:56:51.514284: step: 272/470, loss: 0.3452926278114319 2023-01-22 17:56:52.254386: step: 274/470, loss: 0.2843412756919861 2023-01-22 17:56:53.020165: step: 276/470, loss: 0.2615320682525635 2023-01-22 17:56:53.697726: step: 278/470, loss: 0.12062683701515198 2023-01-22 17:56:54.500154: step: 280/470, loss: 0.08838797360658646 2023-01-22 17:56:55.258240: step: 282/470, loss: 0.5510808229446411 2023-01-22 17:56:56.069708: step: 284/470, loss: 0.18408583104610443 2023-01-22 17:56:56.849733: step: 286/470, loss: 0.3631865084171295 2023-01-22 17:56:57.648008: step: 288/470, loss: 0.1815035343170166 2023-01-22 17:56:58.424886: step: 290/470, loss: 0.13083362579345703 2023-01-22 17:56:59.155441: step: 292/470, loss: 0.18467651307582855 2023-01-22 17:56:59.907842: step: 294/470, loss: 0.28844454884529114 2023-01-22 17:57:00.631187: step: 296/470, loss: 0.11280845105648041 2023-01-22 17:57:01.458030: step: 298/470, loss: 0.1065002903342247 2023-01-22 17:57:02.261883: step: 300/470, loss: 0.13871590793132782 2023-01-22 17:57:02.986970: step: 302/470, loss: 0.7692720890045166 2023-01-22 17:57:03.772731: step: 304/470, loss: 0.42822524905204773 2023-01-22 17:57:04.536767: step: 306/470, loss: 0.17703254520893097 2023-01-22 17:57:05.385529: step: 308/470, loss: 0.6865633130073547 2023-01-22 17:57:06.186517: step: 310/470, loss: 0.17033447325229645 2023-01-22 17:57:06.963384: step: 312/470, loss: 0.1849544495344162 2023-01-22 17:57:07.822723: step: 314/470, loss: 0.5009557604789734 2023-01-22 17:57:08.654666: step: 316/470, loss: 0.1985601931810379 2023-01-22 17:57:09.400663: step: 318/470, loss: 0.7720727920532227 2023-01-22 17:57:10.154181: step: 320/470, loss: 0.5260408520698547 2023-01-22 17:57:10.962327: step: 322/470, loss: 0.5189958810806274 2023-01-22 17:57:11.870261: step: 324/470, loss: 0.34944188594818115 2023-01-22 17:57:12.593624: step: 326/470, loss: 0.309198260307312 2023-01-22 17:57:13.355732: step: 328/470, loss: 1.9898673295974731 2023-01-22 17:57:14.084998: step: 330/470, loss: 0.07750724256038666 2023-01-22 17:57:14.831073: step: 332/470, loss: 0.26930928230285645 2023-01-22 17:57:15.598210: step: 334/470, loss: 0.3827114403247833 2023-01-22 17:57:16.298469: step: 336/470, loss: 0.18784800171852112 2023-01-22 17:57:17.097943: step: 338/470, loss: 0.5030959844589233 2023-01-22 17:57:17.774132: step: 340/470, loss: 0.17444992065429688 2023-01-22 17:57:18.705468: step: 342/470, loss: 0.16230079531669617 2023-01-22 17:57:19.518312: step: 344/470, loss: 0.7461163997650146 2023-01-22 17:57:20.266955: step: 346/470, loss: 0.3461681008338928 2023-01-22 17:57:21.091323: step: 348/470, loss: 0.9509586691856384 2023-01-22 17:57:21.854242: step: 350/470, loss: 0.23319120705127716 2023-01-22 17:57:22.599886: step: 352/470, loss: 0.2624581456184387 2023-01-22 17:57:23.363973: step: 354/470, loss: 0.09867286682128906 2023-01-22 17:57:24.083251: step: 356/470, loss: 0.3626404404640198 2023-01-22 17:57:24.900309: step: 358/470, loss: 0.1559712439775467 2023-01-22 17:57:25.570592: step: 360/470, loss: 0.2118682861328125 2023-01-22 17:57:26.309661: step: 362/470, loss: 0.1414262056350708 2023-01-22 17:57:27.022351: step: 364/470, loss: 0.21667654812335968 2023-01-22 17:57:27.808012: step: 366/470, loss: 0.36082273721694946 2023-01-22 17:57:28.558481: step: 368/470, loss: 0.12949353456497192 2023-01-22 17:57:29.321394: step: 370/470, loss: 1.4123237133026123 2023-01-22 17:57:30.010053: step: 372/470, loss: 0.3567815124988556 2023-01-22 17:57:30.817018: step: 374/470, loss: 0.958263635635376 2023-01-22 17:57:31.549392: step: 376/470, loss: 0.40245184302330017 2023-01-22 17:57:32.295130: step: 378/470, loss: 0.412080854177475 2023-01-22 17:57:33.058836: step: 380/470, loss: 0.02421180158853531 2023-01-22 17:57:33.861601: step: 382/470, loss: 0.1654946208000183 2023-01-22 17:57:34.740172: step: 384/470, loss: 0.12343335151672363 2023-01-22 17:57:35.487217: step: 386/470, loss: 0.1281142681837082 2023-01-22 17:57:36.201875: step: 388/470, loss: 0.36397942900657654 2023-01-22 17:57:37.104806: step: 390/470, loss: 0.5486847162246704 2023-01-22 17:57:37.819124: step: 392/470, loss: 0.3194306790828705 2023-01-22 17:57:38.581875: step: 394/470, loss: 0.061125390231609344 2023-01-22 17:57:39.349053: step: 396/470, loss: 0.21372345089912415 2023-01-22 17:57:40.074393: step: 398/470, loss: 0.38437792658805847 2023-01-22 17:57:40.868965: step: 400/470, loss: 0.1892595738172531 2023-01-22 17:57:41.578988: step: 402/470, loss: 0.3131447732448578 2023-01-22 17:57:42.458508: step: 404/470, loss: 0.12467707693576813 2023-01-22 17:57:43.211561: step: 406/470, loss: 0.18171878159046173 2023-01-22 17:57:43.949958: step: 408/470, loss: 0.41719669103622437 2023-01-22 17:57:44.730044: step: 410/470, loss: 1.214868426322937 2023-01-22 17:57:45.485544: step: 412/470, loss: 0.30766603350639343 2023-01-22 17:57:46.148356: step: 414/470, loss: 0.28935831785202026 2023-01-22 17:57:46.902781: step: 416/470, loss: 0.27082717418670654 2023-01-22 17:57:47.635115: step: 418/470, loss: 0.17194396257400513 2023-01-22 17:57:48.471547: step: 420/470, loss: 0.36183246970176697 2023-01-22 17:57:49.304977: step: 422/470, loss: 0.4918966591358185 2023-01-22 17:57:50.109452: step: 424/470, loss: 0.5111696720123291 2023-01-22 17:57:50.859510: step: 426/470, loss: 2.1021931171417236 2023-01-22 17:57:51.602954: step: 428/470, loss: 0.24027954041957855 2023-01-22 17:57:52.321371: step: 430/470, loss: 0.47709715366363525 2023-01-22 17:57:53.150398: step: 432/470, loss: 0.3914461135864258 2023-01-22 17:57:53.798839: step: 434/470, loss: 0.26286399364471436 2023-01-22 17:57:54.562478: step: 436/470, loss: 0.21852940320968628 2023-01-22 17:57:55.323188: step: 438/470, loss: 0.1935357004404068 2023-01-22 17:57:56.183566: step: 440/470, loss: 0.21522024273872375 2023-01-22 17:57:56.968143: step: 442/470, loss: 6.902156829833984 2023-01-22 17:57:57.828940: step: 444/470, loss: 0.3409741222858429 2023-01-22 17:57:58.569334: step: 446/470, loss: 0.24355652928352356 2023-01-22 17:57:59.336716: step: 448/470, loss: 0.6423745155334473 2023-01-22 17:58:00.137034: step: 450/470, loss: 0.24495574831962585 2023-01-22 17:58:00.949234: step: 452/470, loss: 0.22389310598373413 2023-01-22 17:58:01.732678: step: 454/470, loss: 0.3068379759788513 2023-01-22 17:58:02.456156: step: 456/470, loss: 0.11338848620653152 2023-01-22 17:58:03.200948: step: 458/470, loss: 0.29847270250320435 2023-01-22 17:58:03.957454: step: 460/470, loss: 0.42329713702201843 2023-01-22 17:58:04.659744: step: 462/470, loss: 0.046367011964321136 2023-01-22 17:58:05.390971: step: 464/470, loss: 0.3115167021751404 2023-01-22 17:58:06.197848: step: 466/470, loss: 0.22372061014175415 2023-01-22 17:58:06.884573: step: 468/470, loss: 0.15685810148715973 2023-01-22 17:58:07.733583: step: 470/470, loss: 0.340934157371521 2023-01-22 17:58:08.573040: step: 472/470, loss: 0.3756090998649597 2023-01-22 17:58:09.376325: step: 474/470, loss: 0.4003685712814331 2023-01-22 17:58:10.090529: step: 476/470, loss: 0.565597414970398 2023-01-22 17:58:10.829615: step: 478/470, loss: 0.35876619815826416 2023-01-22 17:58:11.589415: step: 480/470, loss: 0.10507578402757645 2023-01-22 17:58:12.272570: step: 482/470, loss: 0.12312311679124832 2023-01-22 17:58:13.023930: step: 484/470, loss: 0.07300717383623123 2023-01-22 17:58:13.772660: step: 486/470, loss: 0.5644948482513428 2023-01-22 17:58:14.526100: step: 488/470, loss: 0.20122499763965607 2023-01-22 17:58:15.344517: step: 490/470, loss: 0.25764524936676025 2023-01-22 17:58:16.114760: step: 492/470, loss: 0.843420684337616 2023-01-22 17:58:16.870323: step: 494/470, loss: 0.22662875056266785 2023-01-22 17:58:17.644650: step: 496/470, loss: 0.37807315587997437 2023-01-22 17:58:18.415906: step: 498/470, loss: 0.15613622963428497 2023-01-22 17:58:19.176045: step: 500/470, loss: 0.20546023547649384 2023-01-22 17:58:19.992906: step: 502/470, loss: 0.26250770688056946 2023-01-22 17:58:20.770427: step: 504/470, loss: 0.5560357570648193 2023-01-22 17:58:21.564303: step: 506/470, loss: 0.2005891352891922 2023-01-22 17:58:22.334318: step: 508/470, loss: 0.08451083302497864 2023-01-22 17:58:23.037258: step: 510/470, loss: 0.09619811177253723 2023-01-22 17:58:23.752496: step: 512/470, loss: 1.73138427734375 2023-01-22 17:58:24.482042: step: 514/470, loss: 0.9232888221740723 2023-01-22 17:58:25.246726: step: 516/470, loss: 0.4726581573486328 2023-01-22 17:58:25.960175: step: 518/470, loss: 0.1541184037923813 2023-01-22 17:58:26.739264: step: 520/470, loss: 0.1253805309534073 2023-01-22 17:58:27.608215: step: 522/470, loss: 0.6628063321113586 2023-01-22 17:58:28.375029: step: 524/470, loss: 1.1210182905197144 2023-01-22 17:58:29.174012: step: 526/470, loss: 0.4437292218208313 2023-01-22 17:58:29.964592: step: 528/470, loss: 1.0097692012786865 2023-01-22 17:58:30.697613: step: 530/470, loss: 0.398298054933548 2023-01-22 17:58:31.416241: step: 532/470, loss: 0.1273142546415329 2023-01-22 17:58:32.168900: step: 534/470, loss: 0.3876824378967285 2023-01-22 17:58:33.015708: step: 536/470, loss: 0.31943345069885254 2023-01-22 17:58:33.784118: step: 538/470, loss: 0.5478048920631409 2023-01-22 17:58:34.464777: step: 540/470, loss: 0.06618456542491913 2023-01-22 17:58:35.226231: step: 542/470, loss: 0.14831160008907318 2023-01-22 17:58:35.988634: step: 544/470, loss: 0.11674753576517105 2023-01-22 17:58:36.762164: step: 546/470, loss: 0.6683562994003296 2023-01-22 17:58:37.529258: step: 548/470, loss: 0.2430974245071411 2023-01-22 17:58:38.233995: step: 550/470, loss: 0.2425515353679657 2023-01-22 17:58:38.961658: step: 552/470, loss: 0.5050057768821716 2023-01-22 17:58:39.683111: step: 554/470, loss: 0.11187369376420975 2023-01-22 17:58:40.347116: step: 556/470, loss: 0.5013543963432312 2023-01-22 17:58:41.099264: step: 558/470, loss: 0.36751458048820496 2023-01-22 17:58:41.902175: step: 560/470, loss: 0.28177356719970703 2023-01-22 17:58:42.664322: step: 562/470, loss: 0.576210618019104 2023-01-22 17:58:43.437516: step: 564/470, loss: 0.3474891781806946 2023-01-22 17:58:44.200077: step: 566/470, loss: 0.30152004957199097 2023-01-22 17:58:44.859402: step: 568/470, loss: 0.3646096885204315 2023-01-22 17:58:45.605182: step: 570/470, loss: 0.35699066519737244 2023-01-22 17:58:46.324987: step: 572/470, loss: 0.13822859525680542 2023-01-22 17:58:47.062081: step: 574/470, loss: 0.3227802515029907 2023-01-22 17:58:47.766061: step: 576/470, loss: 0.10874330252408981 2023-01-22 17:58:48.505705: step: 578/470, loss: 0.08362071961164474 2023-01-22 17:58:49.239936: step: 580/470, loss: 0.2959776222705841 2023-01-22 17:58:50.002233: step: 582/470, loss: 0.09202943742275238 2023-01-22 17:58:50.815299: step: 584/470, loss: 0.12419924139976501 2023-01-22 17:58:51.487644: step: 586/470, loss: 0.4467095732688904 2023-01-22 17:58:52.202809: step: 588/470, loss: 0.17356723546981812 2023-01-22 17:58:53.177764: step: 590/470, loss: 0.18288274109363556 2023-01-22 17:58:53.972917: step: 592/470, loss: 0.2276880443096161 2023-01-22 17:58:54.783036: step: 594/470, loss: 0.10493405163288116 2023-01-22 17:58:55.510338: step: 596/470, loss: 0.13986705243587494 2023-01-22 17:58:56.250530: step: 598/470, loss: 0.18077510595321655 2023-01-22 17:58:56.953955: step: 600/470, loss: 0.12601624429225922 2023-01-22 17:58:57.620321: step: 602/470, loss: 0.3344309628009796 2023-01-22 17:58:58.434592: step: 604/470, loss: 0.3879287838935852 2023-01-22 17:58:59.273374: step: 606/470, loss: 0.16008992493152618 2023-01-22 17:59:00.205209: step: 608/470, loss: 0.19730323553085327 2023-01-22 17:59:00.858072: step: 610/470, loss: 1.7209839820861816 2023-01-22 17:59:01.521301: step: 612/470, loss: 0.17242218554019928 2023-01-22 17:59:02.279026: step: 614/470, loss: 0.42183101177215576 2023-01-22 17:59:03.005656: step: 616/470, loss: 0.1833951324224472 2023-01-22 17:59:03.731747: step: 618/470, loss: 0.1907496601343155 2023-01-22 17:59:04.472688: step: 620/470, loss: 0.21647794544696808 2023-01-22 17:59:05.259861: step: 622/470, loss: 0.39642030000686646 2023-01-22 17:59:06.022386: step: 624/470, loss: 0.20872657001018524 2023-01-22 17:59:06.736370: step: 626/470, loss: 0.16940362751483917 2023-01-22 17:59:07.457554: step: 628/470, loss: 0.29818278551101685 2023-01-22 17:59:08.133084: step: 630/470, loss: 0.12899573147296906 2023-01-22 17:59:08.831123: step: 632/470, loss: 0.08327148109674454 2023-01-22 17:59:09.538907: step: 634/470, loss: 0.2249470353126526 2023-01-22 17:59:10.456417: step: 636/470, loss: 0.1515919417142868 2023-01-22 17:59:11.240787: step: 638/470, loss: 0.8622671365737915 2023-01-22 17:59:12.038581: step: 640/470, loss: 0.37675124406814575 2023-01-22 17:59:12.743849: step: 642/470, loss: 0.2437460869550705 2023-01-22 17:59:13.466067: step: 644/470, loss: 0.4033161997795105 2023-01-22 17:59:14.256048: step: 646/470, loss: 0.24557210505008698 2023-01-22 17:59:15.059262: step: 648/470, loss: 0.25930410623550415 2023-01-22 17:59:15.712019: step: 650/470, loss: 0.49194440245628357 2023-01-22 17:59:16.516270: step: 652/470, loss: 0.4548197090625763 2023-01-22 17:59:17.264036: step: 654/470, loss: 0.2559857666492462 2023-01-22 17:59:17.939198: step: 656/470, loss: 0.3703431189060211 2023-01-22 17:59:18.692939: step: 658/470, loss: 0.0801117792725563 2023-01-22 17:59:19.454325: step: 660/470, loss: 0.23692208528518677 2023-01-22 17:59:20.196785: step: 662/470, loss: 0.8581908941268921 2023-01-22 17:59:20.930919: step: 664/470, loss: 0.09321422874927521 2023-01-22 17:59:21.617018: step: 666/470, loss: 0.5431147217750549 2023-01-22 17:59:22.350337: step: 668/470, loss: 0.17868438363075256 2023-01-22 17:59:23.013496: step: 670/470, loss: 0.2592407763004303 2023-01-22 17:59:23.750965: step: 672/470, loss: 0.22203195095062256 2023-01-22 17:59:24.501961: step: 674/470, loss: 0.2031756341457367 2023-01-22 17:59:25.246509: step: 676/470, loss: 1.166580319404602 2023-01-22 17:59:25.986573: step: 678/470, loss: 0.20502953231334686 2023-01-22 17:59:26.692475: step: 680/470, loss: 0.14193998277187347 2023-01-22 17:59:27.404768: step: 682/470, loss: 0.16833271086215973 2023-01-22 17:59:28.151145: step: 684/470, loss: 0.20455804467201233 2023-01-22 17:59:28.805111: step: 686/470, loss: 0.7733557224273682 2023-01-22 17:59:29.576236: step: 688/470, loss: 0.20254459977149963 2023-01-22 17:59:30.364737: step: 690/470, loss: 0.8873804211616516 2023-01-22 17:59:31.096394: step: 692/470, loss: 0.4637981057167053 2023-01-22 17:59:31.825596: step: 694/470, loss: 0.4916077256202698 2023-01-22 17:59:32.575463: step: 696/470, loss: 0.826230525970459 2023-01-22 17:59:33.351841: step: 698/470, loss: 0.3543017506599426 2023-01-22 17:59:34.062864: step: 700/470, loss: 0.10216841101646423 2023-01-22 17:59:34.797600: step: 702/470, loss: 0.21498167514801025 2023-01-22 17:59:35.539521: step: 704/470, loss: 0.10220889002084732 2023-01-22 17:59:36.335998: step: 706/470, loss: 0.08111105859279633 2023-01-22 17:59:37.049471: step: 708/470, loss: 0.24107980728149414 2023-01-22 17:59:37.818800: step: 710/470, loss: 0.21305520832538605 2023-01-22 17:59:38.570818: step: 712/470, loss: 0.14277955889701843 2023-01-22 17:59:39.303875: step: 714/470, loss: 0.9436085224151611 2023-01-22 17:59:40.047559: step: 716/470, loss: 0.6749270558357239 2023-01-22 17:59:40.740916: step: 718/470, loss: 0.14690832793712616 2023-01-22 17:59:41.500281: step: 720/470, loss: 0.1711893379688263 2023-01-22 17:59:42.236938: step: 722/470, loss: 0.22402094304561615 2023-01-22 17:59:42.997723: step: 724/470, loss: 0.3463267683982849 2023-01-22 17:59:43.701663: step: 726/470, loss: 0.34018969535827637 2023-01-22 17:59:44.464176: step: 728/470, loss: 0.5446484088897705 2023-01-22 17:59:45.185403: step: 730/470, loss: 0.11589177697896957 2023-01-22 17:59:45.920747: step: 732/470, loss: 1.1572738885879517 2023-01-22 17:59:46.735732: step: 734/470, loss: 0.20397549867630005 2023-01-22 17:59:47.567702: step: 736/470, loss: 0.6422154307365417 2023-01-22 17:59:48.254677: step: 738/470, loss: 0.19147703051567078 2023-01-22 17:59:48.890844: step: 740/470, loss: 0.520534336566925 2023-01-22 17:59:49.739870: step: 742/470, loss: 0.14674602448940277 2023-01-22 17:59:50.550511: step: 744/470, loss: 0.8777087926864624 2023-01-22 17:59:51.267383: step: 746/470, loss: 1.0014443397521973 2023-01-22 17:59:51.929844: step: 748/470, loss: 0.3216632306575775 2023-01-22 17:59:52.734080: step: 750/470, loss: 0.20832213759422302 2023-01-22 17:59:53.481083: step: 752/470, loss: 0.5051476359367371 2023-01-22 17:59:54.277928: step: 754/470, loss: 0.2241048663854599 2023-01-22 17:59:55.065136: step: 756/470, loss: 0.8101214170455933 2023-01-22 17:59:55.859912: step: 758/470, loss: 0.12697064876556396 2023-01-22 17:59:56.587130: step: 760/470, loss: 1.2407550811767578 2023-01-22 17:59:57.369322: step: 762/470, loss: 0.39389562606811523 2023-01-22 17:59:58.122964: step: 764/470, loss: 0.4802873730659485 2023-01-22 17:59:58.875842: step: 766/470, loss: 0.7842137813568115 2023-01-22 17:59:59.625495: step: 768/470, loss: 0.204922616481781 2023-01-22 18:00:00.402939: step: 770/470, loss: 0.25310850143432617 2023-01-22 18:00:01.102543: step: 772/470, loss: 0.07442860305309296 2023-01-22 18:00:01.839746: step: 774/470, loss: 0.16630667448043823 2023-01-22 18:00:02.651249: step: 776/470, loss: 0.4407159388065338 2023-01-22 18:00:03.440342: step: 778/470, loss: 0.2151498794555664 2023-01-22 18:00:04.165409: step: 780/470, loss: 0.4645977020263672 2023-01-22 18:00:04.916388: step: 782/470, loss: 0.4509057104587555 2023-01-22 18:00:05.587782: step: 784/470, loss: 0.4268851578235626 2023-01-22 18:00:06.282876: step: 786/470, loss: 0.2786135971546173 2023-01-22 18:00:07.124009: step: 788/470, loss: 0.42344367504119873 2023-01-22 18:00:07.828753: step: 790/470, loss: 0.22360637784004211 2023-01-22 18:00:08.541355: step: 792/470, loss: 0.2640346884727478 2023-01-22 18:00:09.299732: step: 794/470, loss: 1.436629056930542 2023-01-22 18:00:10.117675: step: 796/470, loss: 0.168908953666687 2023-01-22 18:00:10.789837: step: 798/470, loss: 0.2268424928188324 2023-01-22 18:00:11.527843: step: 800/470, loss: 0.16930358111858368 2023-01-22 18:00:12.280402: step: 802/470, loss: 0.19378112256526947 2023-01-22 18:00:13.116613: step: 804/470, loss: 0.4183207154273987 2023-01-22 18:00:13.828365: step: 806/470, loss: 0.39443352818489075 2023-01-22 18:00:14.558999: step: 808/470, loss: 0.22412896156311035 2023-01-22 18:00:15.282123: step: 810/470, loss: 0.23135864734649658 2023-01-22 18:00:16.036741: step: 812/470, loss: 0.3557632863521576 2023-01-22 18:00:16.852600: step: 814/470, loss: 0.23979082703590393 2023-01-22 18:00:17.603023: step: 816/470, loss: 0.4324354827404022 2023-01-22 18:00:18.312618: step: 818/470, loss: 0.20390821993350983 2023-01-22 18:00:18.991979: step: 820/470, loss: 0.11008242517709732 2023-01-22 18:00:19.764297: step: 822/470, loss: 0.7956212759017944 2023-01-22 18:00:20.575247: step: 824/470, loss: 0.09564602375030518 2023-01-22 18:00:21.360982: step: 826/470, loss: 0.10002394765615463 2023-01-22 18:00:22.170223: step: 828/470, loss: 0.12996633350849152 2023-01-22 18:00:22.985050: step: 830/470, loss: 0.16086608171463013 2023-01-22 18:00:23.703305: step: 832/470, loss: 0.32509753108024597 2023-01-22 18:00:24.475117: step: 834/470, loss: 0.6973584890365601 2023-01-22 18:00:25.200723: step: 836/470, loss: 0.5293657779693604 2023-01-22 18:00:25.967937: step: 838/470, loss: 0.4230278730392456 2023-01-22 18:00:26.778583: step: 840/470, loss: 0.26785650849342346 2023-01-22 18:00:27.505801: step: 842/470, loss: 0.10598357021808624 2023-01-22 18:00:28.314629: step: 844/470, loss: 0.08085938543081284 2023-01-22 18:00:29.036902: step: 846/470, loss: 0.0774892121553421 2023-01-22 18:00:29.757913: step: 848/470, loss: 0.3891444206237793 2023-01-22 18:00:30.519996: step: 850/470, loss: 0.20412513613700867 2023-01-22 18:00:31.263560: step: 852/470, loss: 0.7257307171821594 2023-01-22 18:00:32.038556: step: 854/470, loss: 0.19312706589698792 2023-01-22 18:00:32.809371: step: 856/470, loss: 0.2457079440355301 2023-01-22 18:00:33.546401: step: 858/470, loss: 0.22375904023647308 2023-01-22 18:00:34.229216: step: 860/470, loss: 0.07559472322463989 2023-01-22 18:00:35.037335: step: 862/470, loss: 5.461023330688477 2023-01-22 18:00:35.791424: step: 864/470, loss: 0.28360649943351746 2023-01-22 18:00:36.560172: step: 866/470, loss: 0.1746789664030075 2023-01-22 18:00:37.400176: step: 868/470, loss: 0.2900182604789734 2023-01-22 18:00:38.120827: step: 870/470, loss: 0.5256376266479492 2023-01-22 18:00:38.872967: step: 872/470, loss: 0.5666700601577759 2023-01-22 18:00:39.674376: step: 874/470, loss: 0.3807283341884613 2023-01-22 18:00:40.514023: step: 876/470, loss: 0.39703118801116943 2023-01-22 18:00:41.389350: step: 878/470, loss: 0.3907458186149597 2023-01-22 18:00:42.160171: step: 880/470, loss: 0.2166074514389038 2023-01-22 18:00:42.907509: step: 882/470, loss: 0.6513550281524658 2023-01-22 18:00:43.654494: step: 884/470, loss: 0.17348910868167877 2023-01-22 18:00:44.381907: step: 886/470, loss: 0.5899196863174438 2023-01-22 18:00:45.022787: step: 888/470, loss: 0.33359190821647644 2023-01-22 18:00:45.803640: step: 890/470, loss: 0.2644504904747009 2023-01-22 18:00:46.533355: step: 892/470, loss: 0.0992155447602272 2023-01-22 18:00:47.278897: step: 894/470, loss: 0.07050655782222748 2023-01-22 18:00:48.171791: step: 896/470, loss: 0.15530425310134888 2023-01-22 18:00:48.879027: step: 898/470, loss: 1.7606289386749268 2023-01-22 18:00:49.652973: step: 900/470, loss: 0.39144524931907654 2023-01-22 18:00:50.508988: step: 902/470, loss: 0.10359073430299759 2023-01-22 18:00:51.236707: step: 904/470, loss: 0.5953198075294495 2023-01-22 18:00:51.972857: step: 906/470, loss: 0.07822130620479584 2023-01-22 18:00:52.635805: step: 908/470, loss: 0.16700084507465363 2023-01-22 18:00:53.493773: step: 910/470, loss: 0.48216283321380615 2023-01-22 18:00:54.294884: step: 912/470, loss: 0.14438587427139282 2023-01-22 18:00:55.086667: step: 914/470, loss: 0.28868240118026733 2023-01-22 18:00:55.844704: step: 916/470, loss: 0.15756703913211823 2023-01-22 18:00:56.542784: step: 918/470, loss: 0.25201234221458435 2023-01-22 18:00:57.318677: step: 920/470, loss: 0.13608524203300476 2023-01-22 18:00:58.073940: step: 922/470, loss: 0.11261554062366486 2023-01-22 18:00:58.758477: step: 924/470, loss: 0.12380633503198624 2023-01-22 18:00:59.549838: step: 926/470, loss: 0.3758620023727417 2023-01-22 18:01:00.280146: step: 928/470, loss: 0.11808370053768158 2023-01-22 18:01:01.007273: step: 930/470, loss: 0.5944559574127197 2023-01-22 18:01:01.751513: step: 932/470, loss: 0.38902971148490906 2023-01-22 18:01:02.520827: step: 934/470, loss: 0.08703064173460007 2023-01-22 18:01:03.316508: step: 936/470, loss: 0.1778847724199295 2023-01-22 18:01:04.081308: step: 938/470, loss: 0.3809297978878021 2023-01-22 18:01:04.776706: step: 940/470, loss: 0.24091286957263947 2023-01-22 18:01:05.459972: step: 942/470, loss: 0.10962583124637604 ================================================== Loss: 0.416 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2976010690414319, 'r': 0.3218835092098979, 'f1': 0.3092663798607405}, 'combined': 0.22788049042370354, 'epoch': 10} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3549763417163531, 'r': 0.32667371312609633, 'f1': 0.34023745409131195}, 'combined': 0.23698131130738148, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29295514959330793, 'r': 0.32074975581658194, 'f1': 0.3062230458611208}, 'combined': 0.2256380337924048, 'epoch': 10} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35643838707889336, 'r': 0.31295358865524353, 'f1': 0.3332835660256865}, 'combined': 0.2321378071820702, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27696660863807243, 'r': 0.31953642894107787, 'f1': 0.2967325075805252}, 'combined': 0.21864500558565014, 'epoch': 10} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34185545824681857, 'r': 0.32870717139117167, 'f1': 0.3351524100459005}, 'combined': 0.23343948958420932, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21568627450980393, 'r': 0.3142857142857143, 'f1': 0.2558139534883721}, 'combined': 0.17054263565891473, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.234375, 'r': 0.32608695652173914, 'f1': 0.2727272727272727}, 'combined': 0.13636363636363635, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4318181818181818, 'r': 0.3275862068965517, 'f1': 0.3725490196078432}, 'combined': 0.24836601307189546, 'epoch': 10} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3049308236808237, 'r': 0.3211320818650041, 'f1': 0.3128218246633219}, 'combined': 0.23050029185718457, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32065206847366756, 'r': 0.3490174437617227, 'f1': 0.3342340161254067}, 'combined': 0.23279981222665144, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24523809523809523, 'r': 0.31530612244897954, 'f1': 0.27589285714285716}, 'combined': 0.18392857142857144, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26474047961800956, 'r': 0.3240182340675829, 'f1': 0.29139523780480575}, 'combined': 0.2147122804877516, 'epoch': 7} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32541475117674507, 'r': 0.34573363573629207, 'f1': 0.33526661835256644}, 'combined': 0.23351903765850401, 'epoch': 7} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2722222222222222, 'r': 0.532608695652174, 'f1': 0.36029411764705876}, 'combined': 0.18014705882352938, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27696660863807243, 'r': 0.31953642894107787, 'f1': 0.2967325075805252}, 'combined': 0.21864500558565014, 'epoch': 10} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34185545824681857, 'r': 0.32870717139117167, 'f1': 0.3351524100459005}, 'combined': 0.23343948958420932, 'epoch': 10} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4318181818181818, 'r': 0.3275862068965517, 'f1': 0.3725490196078432}, 'combined': 0.24836601307189546, 'epoch': 10} ****************************** Epoch: 11 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 18:03:52.946054: step: 2/470, loss: 0.3994400203227997 2023-01-22 18:03:53.692797: step: 4/470, loss: 0.10796771198511124 2023-01-22 18:03:54.394307: step: 6/470, loss: 0.06982637196779251 2023-01-22 18:03:55.150299: step: 8/470, loss: 0.6267420649528503 2023-01-22 18:03:55.851131: step: 10/470, loss: 0.12536020576953888 2023-01-22 18:03:56.541728: step: 12/470, loss: 0.2619161605834961 2023-01-22 18:03:57.330625: step: 14/470, loss: 0.1946829855442047 2023-01-22 18:03:58.006514: step: 16/470, loss: 0.20657046139240265 2023-01-22 18:03:58.718662: step: 18/470, loss: 0.369625061750412 2023-01-22 18:03:59.445252: step: 20/470, loss: 0.24612738192081451 2023-01-22 18:04:00.228428: step: 22/470, loss: 0.17834819853305817 2023-01-22 18:04:00.994516: step: 24/470, loss: 0.18645793199539185 2023-01-22 18:04:01.789387: step: 26/470, loss: 0.22116416692733765 2023-01-22 18:04:02.604363: step: 28/470, loss: 0.12848031520843506 2023-01-22 18:04:03.326362: step: 30/470, loss: 0.2887888252735138 2023-01-22 18:04:04.064305: step: 32/470, loss: 0.09650327265262604 2023-01-22 18:04:04.889716: step: 34/470, loss: 0.1269495189189911 2023-01-22 18:04:05.653256: step: 36/470, loss: 0.05965163931250572 2023-01-22 18:04:06.340386: step: 38/470, loss: 0.09995022416114807 2023-01-22 18:04:07.154355: step: 40/470, loss: 0.1404799371957779 2023-01-22 18:04:07.933161: step: 42/470, loss: 0.07613828033208847 2023-01-22 18:04:08.728995: step: 44/470, loss: 0.21182122826576233 2023-01-22 18:04:09.561963: step: 46/470, loss: 0.5042648911476135 2023-01-22 18:04:10.324701: step: 48/470, loss: 0.5012888312339783 2023-01-22 18:04:11.003490: step: 50/470, loss: 0.4014410972595215 2023-01-22 18:04:11.742753: step: 52/470, loss: 1.0104151964187622 2023-01-22 18:04:12.589431: step: 54/470, loss: 0.5547516942024231 2023-01-22 18:04:13.405382: step: 56/470, loss: 1.227064847946167 2023-01-22 18:04:14.179356: step: 58/470, loss: 0.4300103783607483 2023-01-22 18:04:14.832143: step: 60/470, loss: 0.20219558477401733 2023-01-22 18:04:15.552527: step: 62/470, loss: 0.13512657582759857 2023-01-22 18:04:16.345837: step: 64/470, loss: 0.1292756050825119 2023-01-22 18:04:17.082513: step: 66/470, loss: 0.3424178957939148 2023-01-22 18:04:17.886578: step: 68/470, loss: 0.8694711327552795 2023-01-22 18:04:18.699085: step: 70/470, loss: 0.10702022910118103 2023-01-22 18:04:19.396131: step: 72/470, loss: 0.07732173800468445 2023-01-22 18:04:20.094203: step: 74/470, loss: 0.21523575484752655 2023-01-22 18:04:20.828859: step: 76/470, loss: 0.9190123081207275 2023-01-22 18:04:21.490691: step: 78/470, loss: 0.09142867475748062 2023-01-22 18:04:22.230823: step: 80/470, loss: 0.2203410565853119 2023-01-22 18:04:22.933198: step: 82/470, loss: 0.21649318933486938 2023-01-22 18:04:23.639185: step: 84/470, loss: 0.13063064217567444 2023-01-22 18:04:24.485661: step: 86/470, loss: 0.3328152596950531 2023-01-22 18:04:25.333734: step: 88/470, loss: 0.2513705790042877 2023-01-22 18:04:26.155377: step: 90/470, loss: 0.20509395003318787 2023-01-22 18:04:27.010977: step: 92/470, loss: 0.07670695334672928 2023-01-22 18:04:27.748168: step: 94/470, loss: 0.10654658079147339 2023-01-22 18:04:28.463050: step: 96/470, loss: 0.44697830080986023 2023-01-22 18:04:29.204324: step: 98/470, loss: 0.13903780281543732 2023-01-22 18:04:29.956764: step: 100/470, loss: 0.028267454355955124 2023-01-22 18:04:30.747810: step: 102/470, loss: 0.15167954564094543 2023-01-22 18:04:31.584343: step: 104/470, loss: 0.284242182970047 2023-01-22 18:04:32.255772: step: 106/470, loss: 0.3138585090637207 2023-01-22 18:04:33.150275: step: 108/470, loss: 0.07407403737306595 2023-01-22 18:04:33.966940: step: 110/470, loss: 0.6403460502624512 2023-01-22 18:04:34.676498: step: 112/470, loss: 0.11428175866603851 2023-01-22 18:04:35.430243: step: 114/470, loss: 0.26986491680145264 2023-01-22 18:04:36.246522: step: 116/470, loss: 0.1651705801486969 2023-01-22 18:04:37.023362: step: 118/470, loss: 0.07059428840875626 2023-01-22 18:04:37.777803: step: 120/470, loss: 0.0850813016295433 2023-01-22 18:04:38.520757: step: 122/470, loss: 0.23236671090126038 2023-01-22 18:04:39.302194: step: 124/470, loss: 1.0825673341751099 2023-01-22 18:04:40.023623: step: 126/470, loss: 0.18390889465808868 2023-01-22 18:04:40.748409: step: 128/470, loss: 0.12133748084306717 2023-01-22 18:04:41.443333: step: 130/470, loss: 0.26943790912628174 2023-01-22 18:04:42.183520: step: 132/470, loss: 0.059222038835287094 2023-01-22 18:04:42.924263: step: 134/470, loss: 0.4377175569534302 2023-01-22 18:04:43.629585: step: 136/470, loss: 0.1320364624261856 2023-01-22 18:04:44.407022: step: 138/470, loss: 0.2063646763563156 2023-01-22 18:04:45.106192: step: 140/470, loss: 0.17360788583755493 2023-01-22 18:04:45.821863: step: 142/470, loss: 0.17847619950771332 2023-01-22 18:04:46.593567: step: 144/470, loss: 0.4014095664024353 2023-01-22 18:04:47.328872: step: 146/470, loss: 0.26048025488853455 2023-01-22 18:04:48.120605: step: 148/470, loss: 0.4044667184352875 2023-01-22 18:04:48.874486: step: 150/470, loss: 0.6233318448066711 2023-01-22 18:04:49.541126: step: 152/470, loss: 0.10592886805534363 2023-01-22 18:04:50.345503: step: 154/470, loss: 0.6332147121429443 2023-01-22 18:04:51.148638: step: 156/470, loss: 0.4474610686302185 2023-01-22 18:04:51.792977: step: 158/470, loss: 0.22585195302963257 2023-01-22 18:04:52.509336: step: 160/470, loss: 0.10840116441249847 2023-01-22 18:04:53.255025: step: 162/470, loss: 0.17162242531776428 2023-01-22 18:04:54.032219: step: 164/470, loss: 0.1772492676973343 2023-01-22 18:04:54.704023: step: 166/470, loss: 0.3632153868675232 2023-01-22 18:04:55.463380: step: 168/470, loss: 0.14816507697105408 2023-01-22 18:04:56.175241: step: 170/470, loss: 0.9186021089553833 2023-01-22 18:04:56.934675: step: 172/470, loss: 0.22490334510803223 2023-01-22 18:04:57.635811: step: 174/470, loss: 0.20037584006786346 2023-01-22 18:04:58.318582: step: 176/470, loss: 0.13259491324424744 2023-01-22 18:04:58.993265: step: 178/470, loss: 0.42409461736679077 2023-01-22 18:04:59.739394: step: 180/470, loss: 0.12134432792663574 2023-01-22 18:05:00.462666: step: 182/470, loss: 0.10569081455469131 2023-01-22 18:05:01.160374: step: 184/470, loss: 0.07015454769134521 2023-01-22 18:05:01.927521: step: 186/470, loss: 0.18518798053264618 2023-01-22 18:05:02.714132: step: 188/470, loss: 0.1714620441198349 2023-01-22 18:05:03.399435: step: 190/470, loss: 0.5320131182670593 2023-01-22 18:05:04.097214: step: 192/470, loss: 0.09196509420871735 2023-01-22 18:05:04.771712: step: 194/470, loss: 0.24720802903175354 2023-01-22 18:05:05.564288: step: 196/470, loss: 0.381684273481369 2023-01-22 18:05:06.336649: step: 198/470, loss: 0.4392554759979248 2023-01-22 18:05:07.142121: step: 200/470, loss: 0.1657257229089737 2023-01-22 18:05:07.912143: step: 202/470, loss: 0.9423061013221741 2023-01-22 18:05:08.591910: step: 204/470, loss: 0.22993268072605133 2023-01-22 18:05:09.288981: step: 206/470, loss: 0.08702653646469116 2023-01-22 18:05:09.957391: step: 208/470, loss: 0.4365525543689728 2023-01-22 18:05:10.747386: step: 210/470, loss: 0.21688061952590942 2023-01-22 18:05:11.446156: step: 212/470, loss: 0.37000590562820435 2023-01-22 18:05:12.213766: step: 214/470, loss: 0.1840032935142517 2023-01-22 18:05:12.951836: step: 216/470, loss: 0.21828442811965942 2023-01-22 18:05:13.681959: step: 218/470, loss: 0.6175702214241028 2023-01-22 18:05:14.470187: step: 220/470, loss: 0.3277837038040161 2023-01-22 18:05:15.281408: step: 222/470, loss: 0.20514245331287384 2023-01-22 18:05:15.985402: step: 224/470, loss: 0.27700185775756836 2023-01-22 18:05:16.640829: step: 226/470, loss: 0.8384249210357666 2023-01-22 18:05:17.415422: step: 228/470, loss: 0.3489769697189331 2023-01-22 18:05:18.197720: step: 230/470, loss: 0.167975515127182 2023-01-22 18:05:18.974511: step: 232/470, loss: 0.12696748971939087 2023-01-22 18:05:19.737077: step: 234/470, loss: 0.36338022351264954 2023-01-22 18:05:20.537829: step: 236/470, loss: 0.17215953767299652 2023-01-22 18:05:21.257973: step: 238/470, loss: 0.14851003885269165 2023-01-22 18:05:21.950081: step: 240/470, loss: 0.5321226119995117 2023-01-22 18:05:22.709385: step: 242/470, loss: 0.37152212858200073 2023-01-22 18:05:23.521568: step: 244/470, loss: 0.11287014931440353 2023-01-22 18:05:24.217149: step: 246/470, loss: 0.1164015606045723 2023-01-22 18:05:25.042217: step: 248/470, loss: 0.3851315379142761 2023-01-22 18:05:25.856110: step: 250/470, loss: 0.5415903925895691 2023-01-22 18:05:26.575762: step: 252/470, loss: 0.07699915021657944 2023-01-22 18:05:27.348477: step: 254/470, loss: 0.10870891064405441 2023-01-22 18:05:28.115627: step: 256/470, loss: 0.18335126340389252 2023-01-22 18:05:28.874489: step: 258/470, loss: 0.1485547423362732 2023-01-22 18:05:29.595596: step: 260/470, loss: 0.440429151058197 2023-01-22 18:05:30.224489: step: 262/470, loss: 0.16356483101844788 2023-01-22 18:05:30.931123: step: 264/470, loss: 0.5098501443862915 2023-01-22 18:05:31.682959: step: 266/470, loss: 0.49232688546180725 2023-01-22 18:05:32.481013: step: 268/470, loss: 0.22366316616535187 2023-01-22 18:05:33.234573: step: 270/470, loss: 2.619814395904541 2023-01-22 18:05:33.915837: step: 272/470, loss: 0.2295650839805603 2023-01-22 18:05:34.646296: step: 274/470, loss: 0.11074075847864151 2023-01-22 18:05:35.375060: step: 276/470, loss: 0.3683517873287201 2023-01-22 18:05:36.146673: step: 278/470, loss: 0.13178575038909912 2023-01-22 18:05:36.836425: step: 280/470, loss: 0.16775688529014587 2023-01-22 18:05:37.530247: step: 282/470, loss: 0.0917547270655632 2023-01-22 18:05:38.254512: step: 284/470, loss: 0.24330279231071472 2023-01-22 18:05:38.964398: step: 286/470, loss: 0.21394579112529755 2023-01-22 18:05:39.665790: step: 288/470, loss: 0.16779950261116028 2023-01-22 18:05:40.418116: step: 290/470, loss: 0.21933045983314514 2023-01-22 18:05:41.192423: step: 292/470, loss: 0.16219143569469452 2023-01-22 18:05:41.955729: step: 294/470, loss: 0.11340250819921494 2023-01-22 18:05:42.686888: step: 296/470, loss: 0.26899465918540955 2023-01-22 18:05:43.422153: step: 298/470, loss: 0.15036383271217346 2023-01-22 18:05:44.174826: step: 300/470, loss: 0.20359119772911072 2023-01-22 18:05:44.933641: step: 302/470, loss: 0.08829786628484726 2023-01-22 18:05:45.718537: step: 304/470, loss: 0.32991862297058105 2023-01-22 18:05:46.464060: step: 306/470, loss: 0.09574191272258759 2023-01-22 18:05:47.152368: step: 308/470, loss: 0.1676369309425354 2023-01-22 18:05:47.868355: step: 310/470, loss: 0.07528150081634521 2023-01-22 18:05:48.593347: step: 312/470, loss: 0.2053246796131134 2023-01-22 18:05:49.348823: step: 314/470, loss: 0.5107622146606445 2023-01-22 18:05:50.119016: step: 316/470, loss: 0.2811824381351471 2023-01-22 18:05:50.972769: step: 318/470, loss: 1.0416152477264404 2023-01-22 18:05:51.763532: step: 320/470, loss: 0.24820706248283386 2023-01-22 18:05:52.496956: step: 322/470, loss: 0.1375453770160675 2023-01-22 18:05:53.170119: step: 324/470, loss: 0.2664700448513031 2023-01-22 18:05:53.968030: step: 326/470, loss: 0.9418017864227295 2023-01-22 18:05:54.685742: step: 328/470, loss: 1.100771427154541 2023-01-22 18:05:55.499500: step: 330/470, loss: 0.1819295734167099 2023-01-22 18:05:56.289828: step: 332/470, loss: 0.17129111289978027 2023-01-22 18:05:57.098294: step: 334/470, loss: 0.38183021545410156 2023-01-22 18:05:57.826919: step: 336/470, loss: 0.1744094341993332 2023-01-22 18:05:58.586433: step: 338/470, loss: 0.21806088089942932 2023-01-22 18:05:59.260163: step: 340/470, loss: 0.1362285017967224 2023-01-22 18:06:00.029363: step: 342/470, loss: 0.08650539070367813 2023-01-22 18:06:00.833797: step: 344/470, loss: 0.14947454631328583 2023-01-22 18:06:01.572867: step: 346/470, loss: 0.04758727550506592 2023-01-22 18:06:02.316408: step: 348/470, loss: 0.09763923287391663 2023-01-22 18:06:03.044078: step: 350/470, loss: 0.12080357223749161 2023-01-22 18:06:03.786412: step: 352/470, loss: 0.6871947050094604 2023-01-22 18:06:04.557940: step: 354/470, loss: 0.24329255521297455 2023-01-22 18:06:05.319276: step: 356/470, loss: 0.4151495099067688 2023-01-22 18:06:05.992473: step: 358/470, loss: 0.1159839779138565 2023-01-22 18:06:06.729867: step: 360/470, loss: 0.42312848567962646 2023-01-22 18:06:07.476088: step: 362/470, loss: 0.19960635900497437 2023-01-22 18:06:08.243422: step: 364/470, loss: 0.5251233577728271 2023-01-22 18:06:08.901118: step: 366/470, loss: 0.4900449514389038 2023-01-22 18:06:09.587194: step: 368/470, loss: 0.45938706398010254 2023-01-22 18:06:10.356439: step: 370/470, loss: 0.12873217463493347 2023-01-22 18:06:11.053242: step: 372/470, loss: 0.20823711156845093 2023-01-22 18:06:11.822316: step: 374/470, loss: 0.30151286721229553 2023-01-22 18:06:12.583801: step: 376/470, loss: 0.4461778700351715 2023-01-22 18:06:13.406902: step: 378/470, loss: 1.0548112392425537 2023-01-22 18:06:14.145060: step: 380/470, loss: 0.4324851632118225 2023-01-22 18:06:14.871087: step: 382/470, loss: 0.9602062702178955 2023-01-22 18:06:15.606330: step: 384/470, loss: 0.6398365497589111 2023-01-22 18:06:16.389263: step: 386/470, loss: 0.22366558015346527 2023-01-22 18:06:17.187985: step: 388/470, loss: 0.21046940982341766 2023-01-22 18:06:17.935069: step: 390/470, loss: 0.22315052151679993 2023-01-22 18:06:18.631085: step: 392/470, loss: 0.22419774532318115 2023-01-22 18:06:19.411739: step: 394/470, loss: 0.71689373254776 2023-01-22 18:06:20.161910: step: 396/470, loss: 0.16539393365383148 2023-01-22 18:06:20.942444: step: 398/470, loss: 0.37051236629486084 2023-01-22 18:06:21.723288: step: 400/470, loss: 0.17021459341049194 2023-01-22 18:06:22.430236: step: 402/470, loss: 0.23352442681789398 2023-01-22 18:06:23.177735: step: 404/470, loss: 0.13187989592552185 2023-01-22 18:06:23.946146: step: 406/470, loss: 0.6370197534561157 2023-01-22 18:06:24.752187: step: 408/470, loss: 0.1171882152557373 2023-01-22 18:06:25.498180: step: 410/470, loss: 0.2786184549331665 2023-01-22 18:06:26.246323: step: 412/470, loss: 0.6290634870529175 2023-01-22 18:06:26.937795: step: 414/470, loss: 0.1813952922821045 2023-01-22 18:06:27.662719: step: 416/470, loss: 0.3490357995033264 2023-01-22 18:06:28.412767: step: 418/470, loss: 0.23016490042209625 2023-01-22 18:06:29.126015: step: 420/470, loss: 1.1225723028182983 2023-01-22 18:06:29.888588: step: 422/470, loss: 0.7987148761749268 2023-01-22 18:06:30.728353: step: 424/470, loss: 0.1776483952999115 2023-01-22 18:06:31.423192: step: 426/470, loss: 0.2515849769115448 2023-01-22 18:06:32.182861: step: 428/470, loss: 0.22037197649478912 2023-01-22 18:06:32.973110: step: 430/470, loss: 0.1036430150270462 2023-01-22 18:06:33.738928: step: 432/470, loss: 0.2177654653787613 2023-01-22 18:06:34.500680: step: 434/470, loss: 0.1274358332157135 2023-01-22 18:06:35.240828: step: 436/470, loss: 0.33280283212661743 2023-01-22 18:06:36.018578: step: 438/470, loss: 0.26393696665763855 2023-01-22 18:06:36.733527: step: 440/470, loss: 0.13263751566410065 2023-01-22 18:06:37.468955: step: 442/470, loss: 0.13120940327644348 2023-01-22 18:06:38.174378: step: 444/470, loss: 0.4707326292991638 2023-01-22 18:06:39.024327: step: 446/470, loss: 0.46750926971435547 2023-01-22 18:06:39.693456: step: 448/470, loss: 0.06135157495737076 2023-01-22 18:06:40.398167: step: 450/470, loss: 0.09700169414281845 2023-01-22 18:06:41.175858: step: 452/470, loss: 0.23718973994255066 2023-01-22 18:06:41.861199: step: 454/470, loss: 0.3361484706401825 2023-01-22 18:06:42.655295: step: 456/470, loss: 0.13764187693595886 2023-01-22 18:06:43.423948: step: 458/470, loss: 0.14335286617279053 2023-01-22 18:06:44.202054: step: 460/470, loss: 0.12495262920856476 2023-01-22 18:06:44.945296: step: 462/470, loss: 0.881492018699646 2023-01-22 18:06:45.671122: step: 464/470, loss: 0.4301450848579407 2023-01-22 18:06:46.362498: step: 466/470, loss: 1.3912532329559326 2023-01-22 18:06:47.148511: step: 468/470, loss: 0.11058748513460159 2023-01-22 18:06:47.877288: step: 470/470, loss: 0.4207277297973633 2023-01-22 18:06:48.644361: step: 472/470, loss: 0.15675045549869537 2023-01-22 18:06:49.407242: step: 474/470, loss: 0.32435324788093567 2023-01-22 18:06:50.098507: step: 476/470, loss: 0.21787133812904358 2023-01-22 18:06:50.829958: step: 478/470, loss: 0.4728389382362366 2023-01-22 18:06:51.552939: step: 480/470, loss: 0.29139944911003113 2023-01-22 18:06:52.344002: step: 482/470, loss: 0.19343923032283783 2023-01-22 18:06:53.074389: step: 484/470, loss: 0.15642260015010834 2023-01-22 18:06:53.853026: step: 486/470, loss: 0.2686142027378082 2023-01-22 18:06:54.592667: step: 488/470, loss: 0.03354679420590401 2023-01-22 18:06:55.259292: step: 490/470, loss: 0.14455752074718475 2023-01-22 18:06:55.962215: step: 492/470, loss: 0.29530078172683716 2023-01-22 18:06:56.704331: step: 494/470, loss: 0.21259760856628418 2023-01-22 18:06:57.398573: step: 496/470, loss: 0.12661580741405487 2023-01-22 18:06:58.124889: step: 498/470, loss: 0.24475648999214172 2023-01-22 18:06:58.915600: step: 500/470, loss: 0.5884524583816528 2023-01-22 18:06:59.657439: step: 502/470, loss: 0.21985645592212677 2023-01-22 18:07:00.384622: step: 504/470, loss: 0.12764428555965424 2023-01-22 18:07:01.102057: step: 506/470, loss: 0.2050672471523285 2023-01-22 18:07:01.789911: step: 508/470, loss: 0.22950823605060577 2023-01-22 18:07:02.522586: step: 510/470, loss: 0.10886823385953903 2023-01-22 18:07:03.320571: step: 512/470, loss: 0.3298364281654358 2023-01-22 18:07:04.075421: step: 514/470, loss: 0.45132243633270264 2023-01-22 18:07:04.794595: step: 516/470, loss: 0.03829793632030487 2023-01-22 18:07:05.547129: step: 518/470, loss: 0.2860611081123352 2023-01-22 18:07:06.300458: step: 520/470, loss: 0.18205812573432922 2023-01-22 18:07:07.118350: step: 522/470, loss: 0.2330647110939026 2023-01-22 18:07:07.911338: step: 524/470, loss: 0.16605840623378754 2023-01-22 18:07:08.614313: step: 526/470, loss: 0.1584710031747818 2023-01-22 18:07:09.328016: step: 528/470, loss: 0.28219524025917053 2023-01-22 18:07:10.096175: step: 530/470, loss: 0.1415386199951172 2023-01-22 18:07:10.889747: step: 532/470, loss: 0.187623992562294 2023-01-22 18:07:11.618253: step: 534/470, loss: 0.20938031375408173 2023-01-22 18:07:12.384513: step: 536/470, loss: 0.3544868528842926 2023-01-22 18:07:13.252255: step: 538/470, loss: 0.11445646733045578 2023-01-22 18:07:14.062617: step: 540/470, loss: 0.4840231239795685 2023-01-22 18:07:14.759733: step: 542/470, loss: 0.32870981097221375 2023-01-22 18:07:15.586557: step: 544/470, loss: 0.2359415590763092 2023-01-22 18:07:16.319500: step: 546/470, loss: 0.14783021807670593 2023-01-22 18:07:17.041558: step: 548/470, loss: 0.16923844814300537 2023-01-22 18:07:17.740390: step: 550/470, loss: 0.14774803817272186 2023-01-22 18:07:18.484486: step: 552/470, loss: 0.20552322268486023 2023-01-22 18:07:19.207012: step: 554/470, loss: 0.12092557549476624 2023-01-22 18:07:19.936152: step: 556/470, loss: 0.5868374705314636 2023-01-22 18:07:20.635678: step: 558/470, loss: 0.2626172602176666 2023-01-22 18:07:21.417114: step: 560/470, loss: 0.27606257796287537 2023-01-22 18:07:22.111604: step: 562/470, loss: 0.08012336492538452 2023-01-22 18:07:22.928674: step: 564/470, loss: 1.1808509826660156 2023-01-22 18:07:23.696254: step: 566/470, loss: 1.3284801244735718 2023-01-22 18:07:24.460094: step: 568/470, loss: 2.19006609916687 2023-01-22 18:07:25.177082: step: 570/470, loss: 0.13095425069332123 2023-01-22 18:07:25.921163: step: 572/470, loss: 0.461041122674942 2023-01-22 18:07:26.629438: step: 574/470, loss: 0.2042723149061203 2023-01-22 18:07:27.409652: step: 576/470, loss: 0.32484281063079834 2023-01-22 18:07:28.366078: step: 578/470, loss: 0.2067028135061264 2023-01-22 18:07:29.032821: step: 580/470, loss: 0.32187509536743164 2023-01-22 18:07:29.795343: step: 582/470, loss: 0.21868886053562164 2023-01-22 18:07:30.633598: step: 584/470, loss: 0.2576506733894348 2023-01-22 18:07:31.426894: step: 586/470, loss: 0.7114197611808777 2023-01-22 18:07:32.158433: step: 588/470, loss: 0.1497022956609726 2023-01-22 18:07:32.931850: step: 590/470, loss: 0.10337451845407486 2023-01-22 18:07:33.668629: step: 592/470, loss: 0.30149146914482117 2023-01-22 18:07:34.429337: step: 594/470, loss: 0.11470700055360794 2023-01-22 18:07:35.140162: step: 596/470, loss: 0.1277574747800827 2023-01-22 18:07:35.877991: step: 598/470, loss: 0.09452803432941437 2023-01-22 18:07:36.709310: step: 600/470, loss: 0.25843483209609985 2023-01-22 18:07:37.398993: step: 602/470, loss: 0.8243882060050964 2023-01-22 18:07:38.260759: step: 604/470, loss: 0.27867597341537476 2023-01-22 18:07:38.974829: step: 606/470, loss: 0.2572006583213806 2023-01-22 18:07:39.733614: step: 608/470, loss: 0.08993202447891235 2023-01-22 18:07:40.566694: step: 610/470, loss: 0.23165597021579742 2023-01-22 18:07:41.308977: step: 612/470, loss: 0.4315301179885864 2023-01-22 18:07:42.132455: step: 614/470, loss: 0.17089776694774628 2023-01-22 18:07:42.879252: step: 616/470, loss: 0.1850140392780304 2023-01-22 18:07:43.594456: step: 618/470, loss: 0.37521472573280334 2023-01-22 18:07:44.309922: step: 620/470, loss: 0.1793273687362671 2023-01-22 18:07:45.057271: step: 622/470, loss: 0.2895492911338806 2023-01-22 18:07:45.767905: step: 624/470, loss: 0.1320488601922989 2023-01-22 18:07:46.537840: step: 626/470, loss: 0.15132221579551697 2023-01-22 18:07:47.334573: step: 628/470, loss: 0.15033775568008423 2023-01-22 18:07:48.051736: step: 630/470, loss: 0.0726805329322815 2023-01-22 18:07:48.794054: step: 632/470, loss: 0.225330650806427 2023-01-22 18:07:49.618952: step: 634/470, loss: 0.19526013731956482 2023-01-22 18:07:50.310332: step: 636/470, loss: 0.18398520350456238 2023-01-22 18:07:51.032793: step: 638/470, loss: 0.6068989634513855 2023-01-22 18:07:51.719036: step: 640/470, loss: 0.1774880290031433 2023-01-22 18:07:52.413908: step: 642/470, loss: 0.04690373316407204 2023-01-22 18:07:53.070011: step: 644/470, loss: 0.08224115520715714 2023-01-22 18:07:53.900409: step: 646/470, loss: 0.20977337658405304 2023-01-22 18:07:54.679608: step: 648/470, loss: 0.6167982220649719 2023-01-22 18:07:55.388535: step: 650/470, loss: 2.5424869060516357 2023-01-22 18:07:56.167631: step: 652/470, loss: 0.26920145750045776 2023-01-22 18:07:56.903178: step: 654/470, loss: 0.09911765903234482 2023-01-22 18:07:57.646186: step: 656/470, loss: 1.6140161752700806 2023-01-22 18:07:58.341420: step: 658/470, loss: 0.07306616753339767 2023-01-22 18:07:59.161188: step: 660/470, loss: 0.13788312673568726 2023-01-22 18:07:59.860587: step: 662/470, loss: 0.2710290551185608 2023-01-22 18:08:00.678869: step: 664/470, loss: 0.6698711514472961 2023-01-22 18:08:01.399083: step: 666/470, loss: 4.250461578369141 2023-01-22 18:08:02.084103: step: 668/470, loss: 0.44460374116897583 2023-01-22 18:08:02.761524: step: 670/470, loss: 0.23895730078220367 2023-01-22 18:08:03.488635: step: 672/470, loss: 0.13557671010494232 2023-01-22 18:08:04.202360: step: 674/470, loss: 0.0696479007601738 2023-01-22 18:08:04.882072: step: 676/470, loss: 0.061501167714595795 2023-01-22 18:08:05.647630: step: 678/470, loss: 0.18382152915000916 2023-01-22 18:08:06.395156: step: 680/470, loss: 0.1674911230802536 2023-01-22 18:08:07.124438: step: 682/470, loss: 0.5173250436782837 2023-01-22 18:08:07.820180: step: 684/470, loss: 0.1093844622373581 2023-01-22 18:08:08.691750: step: 686/470, loss: 0.5219208598136902 2023-01-22 18:08:09.464837: step: 688/470, loss: 0.16668257117271423 2023-01-22 18:08:10.174098: step: 690/470, loss: 0.5589529871940613 2023-01-22 18:08:10.921504: step: 692/470, loss: 0.16195274889469147 2023-01-22 18:08:11.630445: step: 694/470, loss: 0.17775781452655792 2023-01-22 18:08:12.354121: step: 696/470, loss: 14.064286231994629 2023-01-22 18:08:13.098448: step: 698/470, loss: 1.6122430562973022 2023-01-22 18:08:13.796657: step: 700/470, loss: 0.1364855021238327 2023-01-22 18:08:14.514475: step: 702/470, loss: 0.2651909291744232 2023-01-22 18:08:15.161718: step: 704/470, loss: 0.10258844494819641 2023-01-22 18:08:15.913320: step: 706/470, loss: 0.4878748059272766 2023-01-22 18:08:16.622032: step: 708/470, loss: 0.09945238381624222 2023-01-22 18:08:17.476723: step: 710/470, loss: 0.25955334305763245 2023-01-22 18:08:18.176304: step: 712/470, loss: 0.3970111906528473 2023-01-22 18:08:18.939326: step: 714/470, loss: 0.5209802985191345 2023-01-22 18:08:19.652572: step: 716/470, loss: 0.11759445071220398 2023-01-22 18:08:20.392456: step: 718/470, loss: 0.15108118951320648 2023-01-22 18:08:21.089699: step: 720/470, loss: 0.36267387866973877 2023-01-22 18:08:21.904618: step: 722/470, loss: 0.07263346761465073 2023-01-22 18:08:22.703572: step: 724/470, loss: 0.10593510419130325 2023-01-22 18:08:23.398773: step: 726/470, loss: 0.26219284534454346 2023-01-22 18:08:24.230609: step: 728/470, loss: 0.17348651587963104 2023-01-22 18:08:24.940659: step: 730/470, loss: 0.1823439598083496 2023-01-22 18:08:25.682655: step: 732/470, loss: 0.20073945820331573 2023-01-22 18:08:26.454327: step: 734/470, loss: 0.27448293566703796 2023-01-22 18:08:27.261559: step: 736/470, loss: 0.33971887826919556 2023-01-22 18:08:28.063085: step: 738/470, loss: 0.1402978003025055 2023-01-22 18:08:28.853846: step: 740/470, loss: 0.5981901288032532 2023-01-22 18:08:29.616542: step: 742/470, loss: 0.04211556911468506 2023-01-22 18:08:30.410175: step: 744/470, loss: 0.11189659684896469 2023-01-22 18:08:31.015574: step: 746/470, loss: 0.12144724279642105 2023-01-22 18:08:31.667416: step: 748/470, loss: 0.3307271897792816 2023-01-22 18:08:32.382920: step: 750/470, loss: 0.2954995632171631 2023-01-22 18:08:33.045625: step: 752/470, loss: 0.4646444022655487 2023-01-22 18:08:33.707321: step: 754/470, loss: 0.06638015806674957 2023-01-22 18:08:34.522440: step: 756/470, loss: 0.07224085181951523 2023-01-22 18:08:35.170840: step: 758/470, loss: 0.2975500822067261 2023-01-22 18:08:35.886399: step: 760/470, loss: 0.8255310654640198 2023-01-22 18:08:36.604014: step: 762/470, loss: 0.12929823994636536 2023-01-22 18:08:37.352945: step: 764/470, loss: 0.1628466248512268 2023-01-22 18:08:38.100309: step: 766/470, loss: 0.14177992939949036 2023-01-22 18:08:38.865854: step: 768/470, loss: 0.393669992685318 2023-01-22 18:08:39.613038: step: 770/470, loss: 0.04610329121351242 2023-01-22 18:08:40.424317: step: 772/470, loss: 0.45871657133102417 2023-01-22 18:08:41.173157: step: 774/470, loss: 0.3327120244503021 2023-01-22 18:08:41.897652: step: 776/470, loss: 0.43813204765319824 2023-01-22 18:08:42.796128: step: 778/470, loss: 1.1366257667541504 2023-01-22 18:08:43.524943: step: 780/470, loss: 0.23157668113708496 2023-01-22 18:08:44.267147: step: 782/470, loss: 0.3343833088874817 2023-01-22 18:08:44.995453: step: 784/470, loss: 0.06032833456993103 2023-01-22 18:08:45.872621: step: 786/470, loss: 0.18906527757644653 2023-01-22 18:08:46.545507: step: 788/470, loss: 0.2094613015651703 2023-01-22 18:08:47.284431: step: 790/470, loss: 0.26915594935417175 2023-01-22 18:08:48.061659: step: 792/470, loss: 0.21122537553310394 2023-01-22 18:08:48.807645: step: 794/470, loss: 0.07323165982961655 2023-01-22 18:08:49.581174: step: 796/470, loss: 0.21279887855052948 2023-01-22 18:08:50.338139: step: 798/470, loss: 0.561419665813446 2023-01-22 18:08:51.030442: step: 800/470, loss: 0.16097694635391235 2023-01-22 18:08:51.852560: step: 802/470, loss: 0.2642227113246918 2023-01-22 18:08:52.615322: step: 804/470, loss: 0.2735598385334015 2023-01-22 18:08:53.310770: step: 806/470, loss: 0.1263493448495865 2023-01-22 18:08:54.070796: step: 808/470, loss: 0.33118510246276855 2023-01-22 18:08:54.731753: step: 810/470, loss: 0.3240700960159302 2023-01-22 18:08:55.541905: step: 812/470, loss: 0.09242391586303711 2023-01-22 18:08:56.298799: step: 814/470, loss: 0.15188434720039368 2023-01-22 18:08:57.107203: step: 816/470, loss: 0.5113020539283752 2023-01-22 18:08:57.798521: step: 818/470, loss: 0.1256381869316101 2023-01-22 18:08:58.545393: step: 820/470, loss: 0.08072460442781448 2023-01-22 18:08:59.224228: step: 822/470, loss: 0.4400816559791565 2023-01-22 18:08:59.971000: step: 824/470, loss: 0.22420644760131836 2023-01-22 18:09:00.742679: step: 826/470, loss: 0.13756045699119568 2023-01-22 18:09:01.526944: step: 828/470, loss: 0.05152636021375656 2023-01-22 18:09:02.529922: step: 830/470, loss: 0.1861773133277893 2023-01-22 18:09:03.193632: step: 832/470, loss: 0.08842433243989944 2023-01-22 18:09:03.902736: step: 834/470, loss: 0.1939559429883957 2023-01-22 18:09:04.637006: step: 836/470, loss: 0.06441611051559448 2023-01-22 18:09:05.522285: step: 838/470, loss: 0.18977122008800507 2023-01-22 18:09:06.183382: step: 840/470, loss: 0.02858108840882778 2023-01-22 18:09:06.985737: step: 842/470, loss: 0.2794545888900757 2023-01-22 18:09:07.707208: step: 844/470, loss: 0.666828453540802 2023-01-22 18:09:08.465764: step: 846/470, loss: 0.5264043807983398 2023-01-22 18:09:09.196048: step: 848/470, loss: 0.2906803488731384 2023-01-22 18:09:09.953876: step: 850/470, loss: 0.4317677617073059 2023-01-22 18:09:10.734985: step: 852/470, loss: 0.05206689238548279 2023-01-22 18:09:11.439673: step: 854/470, loss: 0.3372666537761688 2023-01-22 18:09:12.242258: step: 856/470, loss: 0.22663447260856628 2023-01-22 18:09:12.977185: step: 858/470, loss: 0.35830050706863403 2023-01-22 18:09:13.731665: step: 860/470, loss: 0.28675583004951477 2023-01-22 18:09:14.468228: step: 862/470, loss: 0.1459394097328186 2023-01-22 18:09:15.186651: step: 864/470, loss: 0.2950734496116638 2023-01-22 18:09:15.919331: step: 866/470, loss: 0.18734970688819885 2023-01-22 18:09:16.597741: step: 868/470, loss: 0.08804669976234436 2023-01-22 18:09:17.302755: step: 870/470, loss: 0.18895027041435242 2023-01-22 18:09:18.113227: step: 872/470, loss: 0.2600648105144501 2023-01-22 18:09:18.893797: step: 874/470, loss: 0.44221991300582886 2023-01-22 18:09:19.633366: step: 876/470, loss: 0.17826533317565918 2023-01-22 18:09:20.352203: step: 878/470, loss: 0.17969872057437897 2023-01-22 18:09:21.081095: step: 880/470, loss: 0.2378416508436203 2023-01-22 18:09:21.913569: step: 882/470, loss: 0.12380614876747131 2023-01-22 18:09:22.674709: step: 884/470, loss: 0.35380885004997253 2023-01-22 18:09:23.494991: step: 886/470, loss: 0.18434365093708038 2023-01-22 18:09:24.289092: step: 888/470, loss: 0.09525241702795029 2023-01-22 18:09:24.941600: step: 890/470, loss: 0.05519452691078186 2023-01-22 18:09:25.667055: step: 892/470, loss: 0.12015210837125778 2023-01-22 18:09:26.362181: step: 894/470, loss: 0.15597781538963318 2023-01-22 18:09:27.086632: step: 896/470, loss: 0.052873898297548294 2023-01-22 18:09:27.839292: step: 898/470, loss: 0.587254524230957 2023-01-22 18:09:28.605687: step: 900/470, loss: 0.21591414511203766 2023-01-22 18:09:29.280642: step: 902/470, loss: 0.1420390009880066 2023-01-22 18:09:30.105786: step: 904/470, loss: 0.5451937317848206 2023-01-22 18:09:30.972999: step: 906/470, loss: 0.4762882590293884 2023-01-22 18:09:31.643588: step: 908/470, loss: 0.08040604740381241 2023-01-22 18:09:32.382350: step: 910/470, loss: 0.1304711103439331 2023-01-22 18:09:33.118956: step: 912/470, loss: 0.43863868713378906 2023-01-22 18:09:33.869449: step: 914/470, loss: 0.07513883709907532 2023-01-22 18:09:34.584811: step: 916/470, loss: 1.0359426736831665 2023-01-22 18:09:35.331547: step: 918/470, loss: 0.2730335593223572 2023-01-22 18:09:36.110004: step: 920/470, loss: 0.16464608907699585 2023-01-22 18:09:36.845537: step: 922/470, loss: 0.5824774503707886 2023-01-22 18:09:37.669792: step: 924/470, loss: 0.6275187134742737 2023-01-22 18:09:38.428237: step: 926/470, loss: 0.1604541540145874 2023-01-22 18:09:39.174794: step: 928/470, loss: 0.27200543880462646 2023-01-22 18:09:39.842197: step: 930/470, loss: 0.886191189289093 2023-01-22 18:09:40.598766: step: 932/470, loss: 0.22329403460025787 2023-01-22 18:09:41.325740: step: 934/470, loss: 0.12522681057453156 2023-01-22 18:09:41.986663: step: 936/470, loss: 0.1403811126947403 2023-01-22 18:09:42.707865: step: 938/470, loss: 0.22944138944149017 2023-01-22 18:09:43.421101: step: 940/470, loss: 0.12413433194160461 2023-01-22 18:09:44.102174: step: 942/470, loss: 0.30450427532196045 ================================================== Loss: 0.340 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3091249669923422, 'r': 0.3173370154513418, 'f1': 0.3131771669341894}, 'combined': 0.23076212300413954, 'epoch': 11} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3375444187463703, 'r': 0.3472812769794387, 'f1': 0.3423436284915794}, 'combined': 0.23844829845184637, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30646887258908145, 'r': 0.32100724415402837, 'f1': 0.3135696342338702}, 'combined': 0.23105130943548327, 'epoch': 11} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3411388045021567, 'r': 0.33851465985214013, 'f1': 0.33982166626083565}, 'combined': 0.23669170784336813, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2912099494097808, 'r': 0.327680265654649, 'f1': 0.3083705357142857}, 'combined': 0.22722039473684208, 'epoch': 11} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.33296624392136376, 'r': 0.355377433416071, 'f1': 0.34380700535136166}, 'combined': 0.23946756591637133, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2288135593220339, 'r': 0.38571428571428573, 'f1': 0.2872340425531915}, 'combined': 0.19148936170212766, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25757575757575757, 'r': 0.3695652173913043, 'f1': 0.30357142857142855}, 'combined': 0.15178571428571427, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.27586206896551724, 'f1': 0.3018867924528302}, 'combined': 0.2012578616352201, 'epoch': 11} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3091249669923422, 'r': 0.3173370154513418, 'f1': 0.3131771669341894}, 'combined': 0.23076212300413954, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3375444187463703, 'r': 0.3472812769794387, 'f1': 0.3423436284915794}, 'combined': 0.23844829845184637, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2288135593220339, 'r': 0.38571428571428573, 'f1': 0.2872340425531915}, 'combined': 0.19148936170212766, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26474047961800956, 'r': 0.3240182340675829, 'f1': 0.29139523780480575}, 'combined': 0.2147122804877516, 'epoch': 7} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32541475117674507, 'r': 0.34573363573629207, 'f1': 0.33526661835256644}, 'combined': 0.23351903765850401, 'epoch': 7} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2722222222222222, 'r': 0.532608695652174, 'f1': 0.36029411764705876}, 'combined': 0.18014705882352938, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27696660863807243, 'r': 0.31953642894107787, 'f1': 0.2967325075805252}, 'combined': 0.21864500558565014, 'epoch': 10} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34185545824681857, 'r': 0.32870717139117167, 'f1': 0.3351524100459005}, 'combined': 0.23343948958420932, 'epoch': 10} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4318181818181818, 'r': 0.3275862068965517, 'f1': 0.3725490196078432}, 'combined': 0.24836601307189546, 'epoch': 10} ****************************** Epoch: 12 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 18:12:31.537144: step: 2/470, loss: 1.6027824878692627 2023-01-22 18:12:32.288275: step: 4/470, loss: 0.513495922088623 2023-01-22 18:12:33.003171: step: 6/470, loss: 0.4840008020401001 2023-01-22 18:12:33.775028: step: 8/470, loss: 0.2687922716140747 2023-01-22 18:12:34.555335: step: 10/470, loss: 0.8560855388641357 2023-01-22 18:12:35.444248: step: 12/470, loss: 0.17021265625953674 2023-01-22 18:12:36.227771: step: 14/470, loss: 0.18634700775146484 2023-01-22 18:12:36.917051: step: 16/470, loss: 0.05369818955659866 2023-01-22 18:12:37.688471: step: 18/470, loss: 0.19231680035591125 2023-01-22 18:12:38.442025: step: 20/470, loss: 0.19430579245090485 2023-01-22 18:12:39.173962: step: 22/470, loss: 0.7491946220397949 2023-01-22 18:12:39.984393: step: 24/470, loss: 0.19225333631038666 2023-01-22 18:12:40.718683: step: 26/470, loss: 0.32709619402885437 2023-01-22 18:12:41.406870: step: 28/470, loss: 0.1636715531349182 2023-01-22 18:12:42.211823: step: 30/470, loss: 0.08695600926876068 2023-01-22 18:12:42.930852: step: 32/470, loss: 0.33848845958709717 2023-01-22 18:12:43.687991: step: 34/470, loss: 0.039047807455062866 2023-01-22 18:12:44.481509: step: 36/470, loss: 0.09349209070205688 2023-01-22 18:12:45.203818: step: 38/470, loss: 0.12132520228624344 2023-01-22 18:12:45.977897: step: 40/470, loss: 0.24875620007514954 2023-01-22 18:12:46.814774: step: 42/470, loss: 0.37038731575012207 2023-01-22 18:12:47.539647: step: 44/470, loss: 0.14126403629779816 2023-01-22 18:12:48.279711: step: 46/470, loss: 0.41059553623199463 2023-01-22 18:12:48.997436: step: 48/470, loss: 0.09567522257566452 2023-01-22 18:12:49.810987: step: 50/470, loss: 0.20272527635097504 2023-01-22 18:12:50.588528: step: 52/470, loss: 0.13555192947387695 2023-01-22 18:12:51.356294: step: 54/470, loss: 0.08126991242170334 2023-01-22 18:12:52.122844: step: 56/470, loss: 0.13052316009998322 2023-01-22 18:12:52.825028: step: 58/470, loss: 0.08066800236701965 2023-01-22 18:12:53.583323: step: 60/470, loss: 0.17245280742645264 2023-01-22 18:12:54.311518: step: 62/470, loss: 0.3052728772163391 2023-01-22 18:12:55.115318: step: 64/470, loss: 0.3907634913921356 2023-01-22 18:12:55.919770: step: 66/470, loss: 0.2599896192550659 2023-01-22 18:12:56.632461: step: 68/470, loss: 0.141568124294281 2023-01-22 18:12:57.359191: step: 70/470, loss: 0.10497156530618668 2023-01-22 18:12:58.147225: step: 72/470, loss: 0.2574126124382019 2023-01-22 18:12:58.869845: step: 74/470, loss: 0.18920814990997314 2023-01-22 18:12:59.606753: step: 76/470, loss: 0.20635807514190674 2023-01-22 18:13:00.277853: step: 78/470, loss: 0.11566061526536942 2023-01-22 18:13:01.088973: step: 80/470, loss: 0.11462550610303879 2023-01-22 18:13:01.847800: step: 82/470, loss: 0.3002080023288727 2023-01-22 18:13:02.552987: step: 84/470, loss: 0.13464710116386414 2023-01-22 18:13:03.291821: step: 86/470, loss: 0.044168468564748764 2023-01-22 18:13:04.128668: step: 88/470, loss: 0.29472196102142334 2023-01-22 18:13:04.914211: step: 90/470, loss: 0.20468667149543762 2023-01-22 18:13:05.686867: step: 92/470, loss: 0.2128215730190277 2023-01-22 18:13:06.428851: step: 94/470, loss: 0.03551870957016945 2023-01-22 18:13:07.091656: step: 96/470, loss: 0.050920166075229645 2023-01-22 18:13:07.870726: step: 98/470, loss: 0.13329026103019714 2023-01-22 18:13:08.610663: step: 100/470, loss: 0.04325414076447487 2023-01-22 18:13:09.332443: step: 102/470, loss: 0.5133774876594543 2023-01-22 18:13:10.052122: step: 104/470, loss: 0.19939365983009338 2023-01-22 18:13:10.769594: step: 106/470, loss: 0.14586398005485535 2023-01-22 18:13:11.408620: step: 108/470, loss: 0.49300411343574524 2023-01-22 18:13:12.119230: step: 110/470, loss: 0.22268635034561157 2023-01-22 18:13:12.872413: step: 112/470, loss: 0.07112699747085571 2023-01-22 18:13:13.662649: step: 114/470, loss: 0.8724595308303833 2023-01-22 18:13:14.416892: step: 116/470, loss: 0.040853098034858704 2023-01-22 18:13:15.177766: step: 118/470, loss: 0.11627252399921417 2023-01-22 18:13:15.859491: step: 120/470, loss: 0.44871455430984497 2023-01-22 18:13:16.591361: step: 122/470, loss: 0.10901898890733719 2023-01-22 18:13:17.296176: step: 124/470, loss: 0.1313031166791916 2023-01-22 18:13:18.022330: step: 126/470, loss: 0.35010117292404175 2023-01-22 18:13:18.775411: step: 128/470, loss: 0.20002852380275726 2023-01-22 18:13:19.507213: step: 130/470, loss: 0.21068109571933746 2023-01-22 18:13:20.238217: step: 132/470, loss: 0.07485038787126541 2023-01-22 18:13:20.998323: step: 134/470, loss: 0.20554983615875244 2023-01-22 18:13:21.775344: step: 136/470, loss: 0.09597218781709671 2023-01-22 18:13:22.549749: step: 138/470, loss: 0.1307460069656372 2023-01-22 18:13:23.310269: step: 140/470, loss: 0.0812094509601593 2023-01-22 18:13:24.027440: step: 142/470, loss: 0.2051864117383957 2023-01-22 18:13:24.751501: step: 144/470, loss: 0.22790312767028809 2023-01-22 18:13:25.410164: step: 146/470, loss: 0.07648234069347382 2023-01-22 18:13:26.138567: step: 148/470, loss: 0.45176663994789124 2023-01-22 18:13:26.905712: step: 150/470, loss: 0.17060501873493195 2023-01-22 18:13:27.609990: step: 152/470, loss: 0.1733158975839615 2023-01-22 18:13:28.453634: step: 154/470, loss: 0.10912138223648071 2023-01-22 18:13:29.213215: step: 156/470, loss: 0.15763217210769653 2023-01-22 18:13:29.955207: step: 158/470, loss: 0.40257176756858826 2023-01-22 18:13:30.706772: step: 160/470, loss: 0.30875569581985474 2023-01-22 18:13:31.467264: step: 162/470, loss: 0.8680994510650635 2023-01-22 18:13:32.242206: step: 164/470, loss: 0.2595478892326355 2023-01-22 18:13:32.994840: step: 166/470, loss: 0.06596395373344421 2023-01-22 18:13:33.712494: step: 168/470, loss: 0.12518943846225739 2023-01-22 18:13:34.432383: step: 170/470, loss: 0.24903565645217896 2023-01-22 18:13:35.218198: step: 172/470, loss: 0.1930832415819168 2023-01-22 18:13:35.957308: step: 174/470, loss: 0.021100223064422607 2023-01-22 18:13:36.725868: step: 176/470, loss: 0.23497158288955688 2023-01-22 18:13:37.579861: step: 178/470, loss: 0.1109008640050888 2023-01-22 18:13:38.330370: step: 180/470, loss: 0.35954418778419495 2023-01-22 18:13:39.062655: step: 182/470, loss: 0.11698159575462341 2023-01-22 18:13:39.742008: step: 184/470, loss: 0.10983223468065262 2023-01-22 18:13:40.496304: step: 186/470, loss: 0.12484107166528702 2023-01-22 18:13:41.262591: step: 188/470, loss: 0.19515368342399597 2023-01-22 18:13:41.994779: step: 190/470, loss: 0.26054152846336365 2023-01-22 18:13:42.691412: step: 192/470, loss: 0.14039883017539978 2023-01-22 18:13:43.352640: step: 194/470, loss: 0.16187353432178497 2023-01-22 18:13:44.025659: step: 196/470, loss: 0.14299781620502472 2023-01-22 18:13:44.741411: step: 198/470, loss: 0.21850766241550446 2023-01-22 18:13:45.505448: step: 200/470, loss: 0.18616288900375366 2023-01-22 18:13:46.222567: step: 202/470, loss: 0.08063409477472305 2023-01-22 18:13:46.896678: step: 204/470, loss: 0.061108168214559555 2023-01-22 18:13:47.605609: step: 206/470, loss: 0.10571593791246414 2023-01-22 18:13:48.383947: step: 208/470, loss: 0.5858561992645264 2023-01-22 18:13:49.070159: step: 210/470, loss: 0.0865621417760849 2023-01-22 18:13:49.768124: step: 212/470, loss: 0.3058476150035858 2023-01-22 18:13:50.434789: step: 214/470, loss: 0.30787721276283264 2023-01-22 18:13:51.168055: step: 216/470, loss: 0.05481060966849327 2023-01-22 18:13:51.837337: step: 218/470, loss: 0.1102779284119606 2023-01-22 18:13:52.592505: step: 220/470, loss: 0.21178603172302246 2023-01-22 18:13:53.376474: step: 222/470, loss: 0.10403713583946228 2023-01-22 18:13:54.241194: step: 224/470, loss: 0.6571042537689209 2023-01-22 18:13:54.971723: step: 226/470, loss: 0.10259111225605011 2023-01-22 18:13:55.650033: step: 228/470, loss: 0.1595088392496109 2023-01-22 18:13:56.422279: step: 230/470, loss: 0.17050521075725555 2023-01-22 18:13:57.181457: step: 232/470, loss: 0.5048624873161316 2023-01-22 18:13:57.989767: step: 234/470, loss: 0.20384322106838226 2023-01-22 18:13:58.732008: step: 236/470, loss: 0.11227284371852875 2023-01-22 18:13:59.496798: step: 238/470, loss: 0.07610318064689636 2023-01-22 18:14:00.221149: step: 240/470, loss: 0.4045836627483368 2023-01-22 18:14:00.965009: step: 242/470, loss: 0.068994902074337 2023-01-22 18:14:01.672204: step: 244/470, loss: 0.14514769613742828 2023-01-22 18:14:02.447251: step: 246/470, loss: 0.06832067668437958 2023-01-22 18:14:03.194508: step: 248/470, loss: 0.415750116109848 2023-01-22 18:14:03.932197: step: 250/470, loss: 0.2067742943763733 2023-01-22 18:14:04.747594: step: 252/470, loss: 0.3949142098426819 2023-01-22 18:14:05.492270: step: 254/470, loss: 0.09282752871513367 2023-01-22 18:14:06.261480: step: 256/470, loss: 0.2223929464817047 2023-01-22 18:14:07.007911: step: 258/470, loss: 0.252516508102417 2023-01-22 18:14:07.779311: step: 260/470, loss: 2.0077507495880127 2023-01-22 18:14:08.586139: step: 262/470, loss: 0.5033937096595764 2023-01-22 18:14:09.288165: step: 264/470, loss: 0.14328442513942719 2023-01-22 18:14:09.992658: step: 266/470, loss: 0.30592918395996094 2023-01-22 18:14:10.723949: step: 268/470, loss: 0.15917566418647766 2023-01-22 18:14:11.411566: step: 270/470, loss: 0.14353780448436737 2023-01-22 18:14:12.161821: step: 272/470, loss: 0.064031682908535 2023-01-22 18:14:13.052319: step: 274/470, loss: 0.19164782762527466 2023-01-22 18:14:13.740613: step: 276/470, loss: 0.1831219345331192 2023-01-22 18:14:14.565437: step: 278/470, loss: 0.10314060747623444 2023-01-22 18:14:15.312831: step: 280/470, loss: 0.11051566898822784 2023-01-22 18:14:16.095694: step: 282/470, loss: 0.12450094521045685 2023-01-22 18:14:17.005277: step: 284/470, loss: 0.19194933772087097 2023-01-22 18:14:17.742309: step: 286/470, loss: 0.3615543842315674 2023-01-22 18:14:18.432034: step: 288/470, loss: 0.07781562954187393 2023-01-22 18:14:19.195737: step: 290/470, loss: 0.12711076438426971 2023-01-22 18:14:19.921430: step: 292/470, loss: 0.5902228951454163 2023-01-22 18:14:20.631058: step: 294/470, loss: 0.4633359909057617 2023-01-22 18:14:21.462091: step: 296/470, loss: 0.14595328271389008 2023-01-22 18:14:22.187563: step: 298/470, loss: 0.18961495161056519 2023-01-22 18:14:22.879496: step: 300/470, loss: 1.7901432514190674 2023-01-22 18:14:23.656196: step: 302/470, loss: 0.1573238968849182 2023-01-22 18:14:24.428666: step: 304/470, loss: 2.244335651397705 2023-01-22 18:14:25.078698: step: 306/470, loss: 0.12172958999872208 2023-01-22 18:14:25.845123: step: 308/470, loss: 0.07545558363199234 2023-01-22 18:14:26.596498: step: 310/470, loss: 0.0413089245557785 2023-01-22 18:14:27.323657: step: 312/470, loss: 0.3559807538986206 2023-01-22 18:14:28.063896: step: 314/470, loss: 0.08101952075958252 2023-01-22 18:14:28.775759: step: 316/470, loss: 0.1672833114862442 2023-01-22 18:14:29.446201: step: 318/470, loss: 0.16862982511520386 2023-01-22 18:14:30.207740: step: 320/470, loss: 0.06150231882929802 2023-01-22 18:14:31.024306: step: 322/470, loss: 0.10682566463947296 2023-01-22 18:14:31.762402: step: 324/470, loss: 0.233676478266716 2023-01-22 18:14:32.627618: step: 326/470, loss: 0.17197014391422272 2023-01-22 18:14:33.375538: step: 328/470, loss: 0.3912794589996338 2023-01-22 18:14:34.140630: step: 330/470, loss: 0.4253474771976471 2023-01-22 18:14:34.845077: step: 332/470, loss: 0.15210644900798798 2023-01-22 18:14:35.657345: step: 334/470, loss: 0.2834761440753937 2023-01-22 18:14:36.420972: step: 336/470, loss: 0.09623689949512482 2023-01-22 18:14:37.248332: step: 338/470, loss: 0.32247433066368103 2023-01-22 18:14:37.933117: step: 340/470, loss: 0.059781987220048904 2023-01-22 18:14:38.646017: step: 342/470, loss: 0.2658904492855072 2023-01-22 18:14:39.428579: step: 344/470, loss: 0.3153829872608185 2023-01-22 18:14:40.148868: step: 346/470, loss: 0.08230673521757126 2023-01-22 18:14:40.934470: step: 348/470, loss: 0.17258965969085693 2023-01-22 18:14:41.650331: step: 350/470, loss: 0.28981152176856995 2023-01-22 18:14:42.255623: step: 352/470, loss: 0.1635764241218567 2023-01-22 18:14:42.994632: step: 354/470, loss: 0.21193452179431915 2023-01-22 18:14:43.669443: step: 356/470, loss: 0.07874863594770432 2023-01-22 18:14:44.409748: step: 358/470, loss: 0.09164706617593765 2023-01-22 18:14:45.168441: step: 360/470, loss: 5.99439811706543 2023-01-22 18:14:45.898108: step: 362/470, loss: 0.16293644905090332 2023-01-22 18:14:46.628516: step: 364/470, loss: 0.09890060126781464 2023-01-22 18:14:47.328034: step: 366/470, loss: 0.21786588430404663 2023-01-22 18:14:48.064312: step: 368/470, loss: 0.33316826820373535 2023-01-22 18:14:48.823254: step: 370/470, loss: 0.055760059505701065 2023-01-22 18:14:49.522529: step: 372/470, loss: 0.3406805098056793 2023-01-22 18:14:50.304430: step: 374/470, loss: 0.6691519021987915 2023-01-22 18:14:50.979094: step: 376/470, loss: 0.0685260146856308 2023-01-22 18:14:51.734401: step: 378/470, loss: 0.12233281135559082 2023-01-22 18:14:52.374373: step: 380/470, loss: 0.14288434386253357 2023-01-22 18:14:53.100473: step: 382/470, loss: 0.19023549556732178 2023-01-22 18:14:53.807078: step: 384/470, loss: 0.08775024116039276 2023-01-22 18:14:54.485537: step: 386/470, loss: 0.27361616492271423 2023-01-22 18:14:55.262078: step: 388/470, loss: 0.13204854726791382 2023-01-22 18:14:55.993164: step: 390/470, loss: 0.18165439367294312 2023-01-22 18:14:56.868985: step: 392/470, loss: 0.12773683667182922 2023-01-22 18:14:57.639613: step: 394/470, loss: 0.0818340927362442 2023-01-22 18:14:58.385165: step: 396/470, loss: 0.1057894304394722 2023-01-22 18:14:59.107510: step: 398/470, loss: 0.5085698366165161 2023-01-22 18:14:59.800485: step: 400/470, loss: 0.05038611590862274 2023-01-22 18:15:00.493810: step: 402/470, loss: 0.168458491563797 2023-01-22 18:15:01.269380: step: 404/470, loss: 0.18103928864002228 2023-01-22 18:15:01.950820: step: 406/470, loss: 0.18114416301250458 2023-01-22 18:15:02.702144: step: 408/470, loss: 0.13039417564868927 2023-01-22 18:15:03.439248: step: 410/470, loss: 0.12932468950748444 2023-01-22 18:15:04.172476: step: 412/470, loss: 0.30325838923454285 2023-01-22 18:15:04.939894: step: 414/470, loss: 0.3485577702522278 2023-01-22 18:15:05.626540: step: 416/470, loss: 0.2910950183868408 2023-01-22 18:15:06.373003: step: 418/470, loss: 0.12932966649532318 2023-01-22 18:15:07.081214: step: 420/470, loss: 0.08383800089359283 2023-01-22 18:15:07.767677: step: 422/470, loss: 0.17576591670513153 2023-01-22 18:15:08.451479: step: 424/470, loss: 0.1013016402721405 2023-01-22 18:15:09.122963: step: 426/470, loss: 0.2601459324359894 2023-01-22 18:15:10.029948: step: 428/470, loss: 0.09860192984342575 2023-01-22 18:15:10.731861: step: 430/470, loss: 0.12635904550552368 2023-01-22 18:15:11.495857: step: 432/470, loss: 0.08993472903966904 2023-01-22 18:15:12.160839: step: 434/470, loss: 0.2409133017063141 2023-01-22 18:15:12.963911: step: 436/470, loss: 0.7562994956970215 2023-01-22 18:15:13.684817: step: 438/470, loss: 0.14493654668331146 2023-01-22 18:15:14.426297: step: 440/470, loss: 0.2697089910507202 2023-01-22 18:15:15.133700: step: 442/470, loss: 0.17629806697368622 2023-01-22 18:15:15.879406: step: 444/470, loss: 0.3165607452392578 2023-01-22 18:15:16.632326: step: 446/470, loss: 0.24010397493839264 2023-01-22 18:15:17.367725: step: 448/470, loss: 0.08545230329036713 2023-01-22 18:15:18.111349: step: 450/470, loss: 0.24912571907043457 2023-01-22 18:15:18.953924: step: 452/470, loss: 0.42028677463531494 2023-01-22 18:15:19.664672: step: 454/470, loss: 0.12412894517183304 2023-01-22 18:15:20.397866: step: 456/470, loss: 0.07028322666883469 2023-01-22 18:15:21.060521: step: 458/470, loss: 0.045641541481018066 2023-01-22 18:15:21.770459: step: 460/470, loss: 0.2509128451347351 2023-01-22 18:15:22.497615: step: 462/470, loss: 0.7367977499961853 2023-01-22 18:15:23.183304: step: 464/470, loss: 0.09236481040716171 2023-01-22 18:15:23.977018: step: 466/470, loss: 0.1762775182723999 2023-01-22 18:15:24.673299: step: 468/470, loss: 0.12322688847780228 2023-01-22 18:15:25.420682: step: 470/470, loss: 0.1871875375509262 2023-01-22 18:15:26.197583: step: 472/470, loss: 0.17395652830600739 2023-01-22 18:15:26.936920: step: 474/470, loss: 0.5580199956893921 2023-01-22 18:15:27.676250: step: 476/470, loss: 0.11032622307538986 2023-01-22 18:15:28.380428: step: 478/470, loss: 0.0684078112244606 2023-01-22 18:15:29.107805: step: 480/470, loss: 0.12853725254535675 2023-01-22 18:15:29.835725: step: 482/470, loss: 0.11245977878570557 2023-01-22 18:15:30.586236: step: 484/470, loss: 0.44150134921073914 2023-01-22 18:15:31.300207: step: 486/470, loss: 0.10275861620903015 2023-01-22 18:15:31.997174: step: 488/470, loss: 1.2313703298568726 2023-01-22 18:15:32.777895: step: 490/470, loss: 0.16322949528694153 2023-01-22 18:15:33.461229: step: 492/470, loss: 0.23350676894187927 2023-01-22 18:15:34.171616: step: 494/470, loss: 0.16384433209896088 2023-01-22 18:15:34.922211: step: 496/470, loss: 0.22567422688007355 2023-01-22 18:15:35.680538: step: 498/470, loss: 0.568155825138092 2023-01-22 18:15:36.525394: step: 500/470, loss: 0.2018314152956009 2023-01-22 18:15:37.251959: step: 502/470, loss: 0.30602437257766724 2023-01-22 18:15:37.962522: step: 504/470, loss: 0.5151650905609131 2023-01-22 18:15:38.689860: step: 506/470, loss: 0.1632133275270462 2023-01-22 18:15:39.412186: step: 508/470, loss: 0.39310041069984436 2023-01-22 18:15:40.264002: step: 510/470, loss: 0.32889223098754883 2023-01-22 18:15:41.041029: step: 512/470, loss: 0.16092555224895477 2023-01-22 18:15:41.773260: step: 514/470, loss: 0.0802043080329895 2023-01-22 18:15:42.418937: step: 516/470, loss: 0.4701124131679535 2023-01-22 18:15:43.136626: step: 518/470, loss: 0.33139216899871826 2023-01-22 18:15:43.857401: step: 520/470, loss: 0.19706635177135468 2023-01-22 18:15:44.631286: step: 522/470, loss: 1.2813704013824463 2023-01-22 18:15:45.314722: step: 524/470, loss: 0.16043278574943542 2023-01-22 18:15:46.025942: step: 526/470, loss: 0.2318851202726364 2023-01-22 18:15:46.798311: step: 528/470, loss: 0.5282825827598572 2023-01-22 18:15:47.577621: step: 530/470, loss: 0.19871442019939423 2023-01-22 18:15:48.272845: step: 532/470, loss: 0.08099329471588135 2023-01-22 18:15:48.996894: step: 534/470, loss: 0.03250506892800331 2023-01-22 18:15:49.679968: step: 536/470, loss: 0.25197839736938477 2023-01-22 18:15:50.453160: step: 538/470, loss: 0.11561629921197891 2023-01-22 18:15:51.201066: step: 540/470, loss: 0.24496881663799286 2023-01-22 18:15:51.944403: step: 542/470, loss: 0.13660961389541626 2023-01-22 18:15:52.624144: step: 544/470, loss: 0.15981179475784302 2023-01-22 18:15:53.310572: step: 546/470, loss: 0.5604614019393921 2023-01-22 18:15:54.070571: step: 548/470, loss: 0.1567903310060501 2023-01-22 18:15:54.814510: step: 550/470, loss: 0.3064483404159546 2023-01-22 18:15:55.551226: step: 552/470, loss: 0.2242286205291748 2023-01-22 18:15:56.286138: step: 554/470, loss: 0.06868718564510345 2023-01-22 18:15:57.057402: step: 556/470, loss: 0.40813806653022766 2023-01-22 18:15:57.789376: step: 558/470, loss: 0.36897414922714233 2023-01-22 18:15:58.502525: step: 560/470, loss: 0.1006973534822464 2023-01-22 18:15:59.280921: step: 562/470, loss: 0.09448845684528351 2023-01-22 18:16:00.067186: step: 564/470, loss: 0.7840608954429626 2023-01-22 18:16:00.845821: step: 566/470, loss: 0.22917340695858002 2023-01-22 18:16:01.564170: step: 568/470, loss: 0.07258880138397217 2023-01-22 18:16:02.288409: step: 570/470, loss: 0.17850033938884735 2023-01-22 18:16:03.090745: step: 572/470, loss: 0.19725555181503296 2023-01-22 18:16:03.853776: step: 574/470, loss: 0.08748982846736908 2023-01-22 18:16:04.773270: step: 576/470, loss: 0.1885845810174942 2023-01-22 18:16:05.486898: step: 578/470, loss: 0.21629391610622406 2023-01-22 18:16:06.205758: step: 580/470, loss: 0.1099558100104332 2023-01-22 18:16:06.915773: step: 582/470, loss: 0.07752203196287155 2023-01-22 18:16:07.735798: step: 584/470, loss: 0.21718913316726685 2023-01-22 18:16:08.519213: step: 586/470, loss: 0.11684189736843109 2023-01-22 18:16:09.212615: step: 588/470, loss: 0.14402146637439728 2023-01-22 18:16:10.058843: step: 590/470, loss: 0.30400651693344116 2023-01-22 18:16:10.737128: step: 592/470, loss: 0.14186808466911316 2023-01-22 18:16:11.424156: step: 594/470, loss: 0.15555933117866516 2023-01-22 18:16:12.139768: step: 596/470, loss: 0.1906273365020752 2023-01-22 18:16:12.956461: step: 598/470, loss: 1.0738638639450073 2023-01-22 18:16:13.706108: step: 600/470, loss: 0.3519884943962097 2023-01-22 18:16:14.453470: step: 602/470, loss: 0.1826198250055313 2023-01-22 18:16:15.154584: step: 604/470, loss: 0.07761722803115845 2023-01-22 18:16:16.182286: step: 606/470, loss: 0.15749958157539368 2023-01-22 18:16:16.978171: step: 608/470, loss: 0.3068345785140991 2023-01-22 18:16:17.675652: step: 610/470, loss: 0.12547728419303894 2023-01-22 18:16:18.384908: step: 612/470, loss: 0.2842352092266083 2023-01-22 18:16:19.094394: step: 614/470, loss: 0.6424673795700073 2023-01-22 18:16:19.845829: step: 616/470, loss: 0.13101936876773834 2023-01-22 18:16:20.663450: step: 618/470, loss: 0.12014391273260117 2023-01-22 18:16:21.368453: step: 620/470, loss: 0.33476048707962036 2023-01-22 18:16:22.097685: step: 622/470, loss: 0.1459568589925766 2023-01-22 18:16:23.038847: step: 624/470, loss: 0.1385248303413391 2023-01-22 18:16:23.770611: step: 626/470, loss: 0.21259213984012604 2023-01-22 18:16:24.469772: step: 628/470, loss: 0.4322361946105957 2023-01-22 18:16:25.175134: step: 630/470, loss: 0.03826691210269928 2023-01-22 18:16:25.889459: step: 632/470, loss: 0.23468813300132751 2023-01-22 18:16:26.593299: step: 634/470, loss: 0.7665181756019592 2023-01-22 18:16:27.338876: step: 636/470, loss: 0.4738726317882538 2023-01-22 18:16:27.996710: step: 638/470, loss: 0.12215352058410645 2023-01-22 18:16:28.688387: step: 640/470, loss: 0.0886230543255806 2023-01-22 18:16:29.401387: step: 642/470, loss: 0.25113239884376526 2023-01-22 18:16:30.172655: step: 644/470, loss: 1.358510136604309 2023-01-22 18:16:30.915324: step: 646/470, loss: 0.02677135542035103 2023-01-22 18:16:31.616578: step: 648/470, loss: 0.0627710297703743 2023-01-22 18:16:32.365910: step: 650/470, loss: 0.17032356560230255 2023-01-22 18:16:33.074214: step: 652/470, loss: 0.14521194994449615 2023-01-22 18:16:33.802347: step: 654/470, loss: 0.10609611868858337 2023-01-22 18:16:34.589307: step: 656/470, loss: 0.18908576667308807 2023-01-22 18:16:35.353153: step: 658/470, loss: 0.2079189121723175 2023-01-22 18:16:36.106597: step: 660/470, loss: 1.0469714403152466 2023-01-22 18:16:36.865776: step: 662/470, loss: 0.32972848415374756 2023-01-22 18:16:37.655069: step: 664/470, loss: 0.28195086121559143 2023-01-22 18:16:38.426238: step: 666/470, loss: 0.16927146911621094 2023-01-22 18:16:39.225198: step: 668/470, loss: 0.0880625993013382 2023-01-22 18:16:40.021297: step: 670/470, loss: 0.3091566264629364 2023-01-22 18:16:40.759665: step: 672/470, loss: 0.13625238835811615 2023-01-22 18:16:41.524358: step: 674/470, loss: 0.16259387135505676 2023-01-22 18:16:42.311500: step: 676/470, loss: 0.10333267599344254 2023-01-22 18:16:43.089235: step: 678/470, loss: 0.08288844674825668 2023-01-22 18:16:43.743305: step: 680/470, loss: 0.1420072615146637 2023-01-22 18:16:44.468514: step: 682/470, loss: 0.23318932950496674 2023-01-22 18:16:45.289652: step: 684/470, loss: 0.16551189124584198 2023-01-22 18:16:46.128707: step: 686/470, loss: 0.1825304627418518 2023-01-22 18:16:46.816071: step: 688/470, loss: 0.3301374614238739 2023-01-22 18:16:47.555125: step: 690/470, loss: 0.24686174094676971 2023-01-22 18:16:48.268492: step: 692/470, loss: 0.1321008801460266 2023-01-22 18:16:48.902874: step: 694/470, loss: 0.163587749004364 2023-01-22 18:16:49.570862: step: 696/470, loss: 0.24472731351852417 2023-01-22 18:16:50.270812: step: 698/470, loss: 0.05031857639551163 2023-01-22 18:16:50.990322: step: 700/470, loss: 0.5361093282699585 2023-01-22 18:16:51.693665: step: 702/470, loss: 0.12264763563871384 2023-01-22 18:16:52.390914: step: 704/470, loss: 0.6803569197654724 2023-01-22 18:16:53.156410: step: 706/470, loss: 0.14722055196762085 2023-01-22 18:16:53.886055: step: 708/470, loss: 0.33901581168174744 2023-01-22 18:16:54.629174: step: 710/470, loss: 0.10217005759477615 2023-01-22 18:16:55.330462: step: 712/470, loss: 0.11541683226823807 2023-01-22 18:16:56.073633: step: 714/470, loss: 0.19055843353271484 2023-01-22 18:16:56.812101: step: 716/470, loss: 0.14943887293338776 2023-01-22 18:16:57.566925: step: 718/470, loss: 0.5411022901535034 2023-01-22 18:16:58.236991: step: 720/470, loss: 0.10385146737098694 2023-01-22 18:16:58.954074: step: 722/470, loss: 0.1555739790201187 2023-01-22 18:16:59.630608: step: 724/470, loss: 0.27904900908470154 2023-01-22 18:17:00.431265: step: 726/470, loss: 0.13543671369552612 2023-01-22 18:17:01.183844: step: 728/470, loss: 0.0534023754298687 2023-01-22 18:17:01.877712: step: 730/470, loss: 0.23771588504314423 2023-01-22 18:17:02.667842: step: 732/470, loss: 0.12300637364387512 2023-01-22 18:17:03.371460: step: 734/470, loss: 0.08528205752372742 2023-01-22 18:17:04.124417: step: 736/470, loss: 0.41384363174438477 2023-01-22 18:17:04.912027: step: 738/470, loss: 0.11085784435272217 2023-01-22 18:17:05.682591: step: 740/470, loss: 0.4100949168205261 2023-01-22 18:17:06.440847: step: 742/470, loss: 0.5684028267860413 2023-01-22 18:17:07.236230: step: 744/470, loss: 0.21597033739089966 2023-01-22 18:17:07.942319: step: 746/470, loss: 0.1797659546136856 2023-01-22 18:17:08.672825: step: 748/470, loss: 0.12278896570205688 2023-01-22 18:17:09.390562: step: 750/470, loss: 0.06112644821405411 2023-01-22 18:17:10.053407: step: 752/470, loss: 0.13294389843940735 2023-01-22 18:17:10.706487: step: 754/470, loss: 0.4018506109714508 2023-01-22 18:17:11.453433: step: 756/470, loss: 0.312796026468277 2023-01-22 18:17:12.172548: step: 758/470, loss: 0.17056278884410858 2023-01-22 18:17:12.871831: step: 760/470, loss: 0.12827859818935394 2023-01-22 18:17:13.674896: step: 762/470, loss: 0.3029170036315918 2023-01-22 18:17:14.314080: step: 764/470, loss: 0.08240678906440735 2023-01-22 18:17:14.988176: step: 766/470, loss: 0.18388421833515167 2023-01-22 18:17:15.745485: step: 768/470, loss: 0.05635695904493332 2023-01-22 18:17:16.462629: step: 770/470, loss: 0.3035624027252197 2023-01-22 18:17:17.271606: step: 772/470, loss: 0.0933908224105835 2023-01-22 18:17:18.015780: step: 774/470, loss: 0.15930312871932983 2023-01-22 18:17:18.747627: step: 776/470, loss: 0.5322938561439514 2023-01-22 18:17:19.560968: step: 778/470, loss: 0.34571999311447144 2023-01-22 18:17:20.330371: step: 780/470, loss: 0.14167526364326477 2023-01-22 18:17:21.071538: step: 782/470, loss: 0.08634297549724579 2023-01-22 18:17:21.769123: step: 784/470, loss: 3.5037174224853516 2023-01-22 18:17:22.576909: step: 786/470, loss: 0.16057580709457397 2023-01-22 18:17:23.264752: step: 788/470, loss: 0.10480234026908875 2023-01-22 18:17:23.963002: step: 790/470, loss: 0.40698543190956116 2023-01-22 18:17:24.700753: step: 792/470, loss: 0.2794446647167206 2023-01-22 18:17:25.405198: step: 794/470, loss: 0.14553791284561157 2023-01-22 18:17:26.086221: step: 796/470, loss: 0.05922282487154007 2023-01-22 18:17:26.816402: step: 798/470, loss: 0.25358933210372925 2023-01-22 18:17:27.544605: step: 800/470, loss: 0.07599953562021255 2023-01-22 18:17:28.217745: step: 802/470, loss: 0.11059369891881943 2023-01-22 18:17:28.882387: step: 804/470, loss: 0.22309422492980957 2023-01-22 18:17:29.599515: step: 806/470, loss: 0.07817824929952621 2023-01-22 18:17:30.337983: step: 808/470, loss: 0.24265766143798828 2023-01-22 18:17:31.109711: step: 810/470, loss: 0.02267645113170147 2023-01-22 18:17:31.900329: step: 812/470, loss: 0.16064167022705078 2023-01-22 18:17:32.571286: step: 814/470, loss: 0.18023733794689178 2023-01-22 18:17:33.340850: step: 816/470, loss: 0.13179604709148407 2023-01-22 18:17:34.104358: step: 818/470, loss: 0.28788211941719055 2023-01-22 18:17:34.846215: step: 820/470, loss: 0.11537092179059982 2023-01-22 18:17:35.619635: step: 822/470, loss: 0.12304264307022095 2023-01-22 18:17:36.381195: step: 824/470, loss: 0.20748859643936157 2023-01-22 18:17:37.190351: step: 826/470, loss: 0.19135302305221558 2023-01-22 18:17:37.930470: step: 828/470, loss: 0.1294027864933014 2023-01-22 18:17:38.614063: step: 830/470, loss: 0.15381257236003876 2023-01-22 18:17:39.352051: step: 832/470, loss: 0.09458699077367783 2023-01-22 18:17:40.066588: step: 834/470, loss: 0.10095732659101486 2023-01-22 18:17:40.799678: step: 836/470, loss: 0.12210088223218918 2023-01-22 18:17:41.494829: step: 838/470, loss: 0.12754324078559875 2023-01-22 18:17:42.243920: step: 840/470, loss: 0.12278389185667038 2023-01-22 18:17:43.044397: step: 842/470, loss: 0.10551811009645462 2023-01-22 18:17:43.806519: step: 844/470, loss: 0.13036848604679108 2023-01-22 18:17:44.532498: step: 846/470, loss: 0.598879873752594 2023-01-22 18:17:45.306108: step: 848/470, loss: 0.10476802289485931 2023-01-22 18:17:46.015731: step: 850/470, loss: 0.16438759863376617 2023-01-22 18:17:46.661588: step: 852/470, loss: 0.06739547103643417 2023-01-22 18:17:47.499781: step: 854/470, loss: 0.11674723029136658 2023-01-22 18:17:48.289873: step: 856/470, loss: 0.17071661353111267 2023-01-22 18:17:49.058220: step: 858/470, loss: 0.16365188360214233 2023-01-22 18:17:49.743456: step: 860/470, loss: 0.15071061253547668 2023-01-22 18:17:50.486952: step: 862/470, loss: 0.46078917384147644 2023-01-22 18:17:51.154986: step: 864/470, loss: 0.08874162286520004 2023-01-22 18:17:51.833720: step: 866/470, loss: 0.08609306067228317 2023-01-22 18:17:52.640883: step: 868/470, loss: 0.13420622050762177 2023-01-22 18:17:53.458510: step: 870/470, loss: 0.5047072172164917 2023-01-22 18:17:54.198792: step: 872/470, loss: 0.4354456961154938 2023-01-22 18:17:54.891878: step: 874/470, loss: 0.5482062101364136 2023-01-22 18:17:55.752493: step: 876/470, loss: 0.22582288086414337 2023-01-22 18:17:56.567684: step: 878/470, loss: 0.1643458604812622 2023-01-22 18:17:57.346093: step: 880/470, loss: 0.0947733074426651 2023-01-22 18:17:58.181075: step: 882/470, loss: 0.059879835695028305 2023-01-22 18:17:58.937035: step: 884/470, loss: 0.10321329534053802 2023-01-22 18:17:59.685653: step: 886/470, loss: 0.11775056272745132 2023-01-22 18:18:00.399657: step: 888/470, loss: 0.12950459122657776 2023-01-22 18:18:01.150930: step: 890/470, loss: 0.3330346941947937 2023-01-22 18:18:01.884974: step: 892/470, loss: 0.14579877257347107 2023-01-22 18:18:02.622518: step: 894/470, loss: 0.12271419167518616 2023-01-22 18:18:03.405429: step: 896/470, loss: 0.23777388036251068 2023-01-22 18:18:04.218686: step: 898/470, loss: 0.20127157866954803 2023-01-22 18:18:04.845684: step: 900/470, loss: 0.033625878393650055 2023-01-22 18:18:05.595608: step: 902/470, loss: 0.21832531690597534 2023-01-22 18:18:06.360316: step: 904/470, loss: 0.19224612414836884 2023-01-22 18:18:07.092715: step: 906/470, loss: 0.11556744575500488 2023-01-22 18:18:07.840113: step: 908/470, loss: 0.11261487007141113 2023-01-22 18:18:08.607739: step: 910/470, loss: 0.1271752119064331 2023-01-22 18:18:09.355858: step: 912/470, loss: 0.1272096335887909 2023-01-22 18:18:10.166765: step: 914/470, loss: 0.19481460750102997 2023-01-22 18:18:10.890187: step: 916/470, loss: 0.445446252822876 2023-01-22 18:18:11.698016: step: 918/470, loss: 0.1915096789598465 2023-01-22 18:18:12.383619: step: 920/470, loss: 0.11832750588655472 2023-01-22 18:18:13.112811: step: 922/470, loss: 0.43603986501693726 2023-01-22 18:18:13.945379: step: 924/470, loss: 0.20713625848293304 2023-01-22 18:18:14.745573: step: 926/470, loss: 0.3290116488933563 2023-01-22 18:18:15.561287: step: 928/470, loss: 1.042456865310669 2023-01-22 18:18:16.302494: step: 930/470, loss: 0.13364510238170624 2023-01-22 18:18:17.052147: step: 932/470, loss: 0.5061575770378113 2023-01-22 18:18:17.677840: step: 934/470, loss: 1.4723851680755615 2023-01-22 18:18:18.433273: step: 936/470, loss: 0.20052802562713623 2023-01-22 18:18:19.135735: step: 938/470, loss: 0.7012951970100403 2023-01-22 18:18:19.939977: step: 940/470, loss: 0.3045251667499542 2023-01-22 18:18:20.605626: step: 942/470, loss: 0.33923205733299255 ================================================== Loss: 0.263 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29459497821253666, 'r': 0.3281352034359754, 'f1': 0.31046185316114727}, 'combined': 0.22876136548716114, 'epoch': 12} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34264664713865695, 'r': 0.34363505092848, 'f1': 0.3431401372689575}, 'combined': 0.23900308068484602, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2866659190590112, 'r': 0.3263748604087414, 'f1': 0.3052343415002781}, 'combined': 0.2249095147896786, 'epoch': 12} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3434021966950355, 'r': 0.3351473361975587, 'f1': 0.33922455439947546}, 'combined': 0.23627580903446055, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27355769230769234, 'r': 0.3337715297036929, 'f1': 0.3006796515450362}, 'combined': 0.22155342745423717, 'epoch': 12} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.33771028169806255, 'r': 0.35621940290651405, 'f1': 0.34671799627774885}, 'combined': 0.24149512178549673, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20218894009216587, 'r': 0.3581632653061224, 'f1': 0.25846833578792333}, 'combined': 0.17231222385861555, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2236842105263158, 'r': 0.3695652173913043, 'f1': 0.2786885245901639}, 'combined': 0.13934426229508196, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4605263157894737, 'r': 0.3017241379310345, 'f1': 0.3645833333333333}, 'combined': 0.24305555555555552, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3091249669923422, 'r': 0.3173370154513418, 'f1': 0.3131771669341894}, 'combined': 0.23076212300413954, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3375444187463703, 'r': 0.3472812769794387, 'f1': 0.3423436284915794}, 'combined': 0.23844829845184637, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2288135593220339, 'r': 0.38571428571428573, 'f1': 0.2872340425531915}, 'combined': 0.19148936170212766, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26474047961800956, 'r': 0.3240182340675829, 'f1': 0.29139523780480575}, 'combined': 0.2147122804877516, 'epoch': 7} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32541475117674507, 'r': 0.34573363573629207, 'f1': 0.33526661835256644}, 'combined': 0.23351903765850401, 'epoch': 7} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2722222222222222, 'r': 0.532608695652174, 'f1': 0.36029411764705876}, 'combined': 0.18014705882352938, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27696660863807243, 'r': 0.31953642894107787, 'f1': 0.2967325075805252}, 'combined': 0.21864500558565014, 'epoch': 10} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34185545824681857, 'r': 0.32870717139117167, 'f1': 0.3351524100459005}, 'combined': 0.23343948958420932, 'epoch': 10} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4318181818181818, 'r': 0.3275862068965517, 'f1': 0.3725490196078432}, 'combined': 0.24836601307189546, 'epoch': 10} ****************************** Epoch: 13 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 18:20:58.084099: step: 2/470, loss: 0.11869052797555923 2023-01-22 18:20:58.789040: step: 4/470, loss: 0.087203748524189 2023-01-22 18:20:59.505557: step: 6/470, loss: 0.1547858864068985 2023-01-22 18:21:00.250981: step: 8/470, loss: 0.24847088754177094 2023-01-22 18:21:00.942086: step: 10/470, loss: 0.4256363809108734 2023-01-22 18:21:01.704265: step: 12/470, loss: 0.09074677526950836 2023-01-22 18:21:02.438246: step: 14/470, loss: 0.19883684813976288 2023-01-22 18:21:03.157646: step: 16/470, loss: 0.2916736602783203 2023-01-22 18:21:03.973560: step: 18/470, loss: 0.08487699925899506 2023-01-22 18:21:04.894049: step: 20/470, loss: 0.37382224202156067 2023-01-22 18:21:05.641174: step: 22/470, loss: 0.05458125099539757 2023-01-22 18:21:06.433214: step: 24/470, loss: 0.07370239496231079 2023-01-22 18:21:07.195379: step: 26/470, loss: 0.19649724662303925 2023-01-22 18:21:07.949299: step: 28/470, loss: 0.14036303758621216 2023-01-22 18:21:08.679343: step: 30/470, loss: 0.12299536913633347 2023-01-22 18:21:09.425251: step: 32/470, loss: 0.13496029376983643 2023-01-22 18:21:10.182954: step: 34/470, loss: 0.07654550671577454 2023-01-22 18:21:10.918479: step: 36/470, loss: 0.12526889145374298 2023-01-22 18:21:11.697734: step: 38/470, loss: 0.08467797935009003 2023-01-22 18:21:12.447082: step: 40/470, loss: 0.10701703280210495 2023-01-22 18:21:13.116038: step: 42/470, loss: 0.30219635367393494 2023-01-22 18:21:13.847899: step: 44/470, loss: 0.09338068217039108 2023-01-22 18:21:14.539996: step: 46/470, loss: 0.5995178818702698 2023-01-22 18:21:15.332370: step: 48/470, loss: 0.10941080749034882 2023-01-22 18:21:16.087422: step: 50/470, loss: 0.20678047835826874 2023-01-22 18:21:16.794744: step: 52/470, loss: 0.05293460562825203 2023-01-22 18:21:17.469756: step: 54/470, loss: 0.7169974446296692 2023-01-22 18:21:18.181229: step: 56/470, loss: 0.12911155819892883 2023-01-22 18:21:18.942454: step: 58/470, loss: 0.10857953131198883 2023-01-22 18:21:19.706189: step: 60/470, loss: 0.11933538317680359 2023-01-22 18:21:20.449847: step: 62/470, loss: 0.09998290985822678 2023-01-22 18:21:21.203429: step: 64/470, loss: 0.14585649967193604 2023-01-22 18:21:21.986276: step: 66/470, loss: 0.1499679684638977 2023-01-22 18:21:22.839947: step: 68/470, loss: 0.05815058946609497 2023-01-22 18:21:23.568485: step: 70/470, loss: 0.060805968940258026 2023-01-22 18:21:24.310206: step: 72/470, loss: 0.6332342624664307 2023-01-22 18:21:24.986196: step: 74/470, loss: 0.03274387866258621 2023-01-22 18:21:25.754631: step: 76/470, loss: 0.15731528401374817 2023-01-22 18:21:26.429162: step: 78/470, loss: 0.12228990346193314 2023-01-22 18:21:27.226691: step: 80/470, loss: 0.2630844712257385 2023-01-22 18:21:27.933386: step: 82/470, loss: 0.285457044839859 2023-01-22 18:21:28.668001: step: 84/470, loss: 0.06887723505496979 2023-01-22 18:21:29.417345: step: 86/470, loss: 0.17145711183547974 2023-01-22 18:21:30.139881: step: 88/470, loss: 0.16493162512779236 2023-01-22 18:21:30.834721: step: 90/470, loss: 0.9345531463623047 2023-01-22 18:21:31.582185: step: 92/470, loss: 0.08099890500307083 2023-01-22 18:21:32.302272: step: 94/470, loss: 0.13539457321166992 2023-01-22 18:21:33.031983: step: 96/470, loss: 0.2658799886703491 2023-01-22 18:21:33.714715: step: 98/470, loss: 0.4507644772529602 2023-01-22 18:21:34.443698: step: 100/470, loss: 0.06587552279233932 2023-01-22 18:21:35.180753: step: 102/470, loss: 0.9483935832977295 2023-01-22 18:21:35.849696: step: 104/470, loss: 0.15638364851474762 2023-01-22 18:21:36.664033: step: 106/470, loss: 0.23561960458755493 2023-01-22 18:21:37.383283: step: 108/470, loss: 0.08107713609933853 2023-01-22 18:21:38.131622: step: 110/470, loss: 0.4484912157058716 2023-01-22 18:21:38.822746: step: 112/470, loss: 0.17864054441452026 2023-01-22 18:21:39.577615: step: 114/470, loss: 0.11197991669178009 2023-01-22 18:21:40.265497: step: 116/470, loss: 1.0874292850494385 2023-01-22 18:21:41.004306: step: 118/470, loss: 0.19661211967468262 2023-01-22 18:21:41.798210: step: 120/470, loss: 0.09106841683387756 2023-01-22 18:21:42.559778: step: 122/470, loss: 0.6079081296920776 2023-01-22 18:21:43.286312: step: 124/470, loss: 0.14605315029621124 2023-01-22 18:21:44.048889: step: 126/470, loss: 0.9814774990081787 2023-01-22 18:21:44.915733: step: 128/470, loss: 0.16904780268669128 2023-01-22 18:21:45.716713: step: 130/470, loss: 0.1599675416946411 2023-01-22 18:21:46.436702: step: 132/470, loss: 0.2830541431903839 2023-01-22 18:21:47.166225: step: 134/470, loss: 0.031339630484580994 2023-01-22 18:21:47.847048: step: 136/470, loss: 0.15232618153095245 2023-01-22 18:21:48.575849: step: 138/470, loss: 0.17885567247867584 2023-01-22 18:21:49.325724: step: 140/470, loss: 0.13249598443508148 2023-01-22 18:21:50.042698: step: 142/470, loss: 0.07661613076925278 2023-01-22 18:21:50.804076: step: 144/470, loss: 0.13011841475963593 2023-01-22 18:21:51.481645: step: 146/470, loss: 0.10363959521055222 2023-01-22 18:21:52.307286: step: 148/470, loss: 0.04361295700073242 2023-01-22 18:21:53.023177: step: 150/470, loss: 0.12805719673633575 2023-01-22 18:21:53.759868: step: 152/470, loss: 0.09809063374996185 2023-01-22 18:21:54.446544: step: 154/470, loss: 0.09070705622434616 2023-01-22 18:21:55.206321: step: 156/470, loss: 0.44800418615341187 2023-01-22 18:21:55.978120: step: 158/470, loss: 0.15312308073043823 2023-01-22 18:21:56.713739: step: 160/470, loss: 0.06737309694290161 2023-01-22 18:21:57.441148: step: 162/470, loss: 0.3796755075454712 2023-01-22 18:21:58.185714: step: 164/470, loss: 0.18876059353351593 2023-01-22 18:21:58.910364: step: 166/470, loss: 0.25417694449424744 2023-01-22 18:21:59.672748: step: 168/470, loss: 0.17342378199100494 2023-01-22 18:22:00.447650: step: 170/470, loss: 0.10127972066402435 2023-01-22 18:22:01.158205: step: 172/470, loss: 0.16987819969654083 2023-01-22 18:22:01.821488: step: 174/470, loss: 0.3513706624507904 2023-01-22 18:22:02.700722: step: 176/470, loss: 0.10086065530776978 2023-01-22 18:22:03.373339: step: 178/470, loss: 0.09717612713575363 2023-01-22 18:22:04.114576: step: 180/470, loss: 0.02228609286248684 2023-01-22 18:22:04.867812: step: 182/470, loss: 0.08578619360923767 2023-01-22 18:22:05.605440: step: 184/470, loss: 0.6068621277809143 2023-01-22 18:22:06.324544: step: 186/470, loss: 0.026224324479699135 2023-01-22 18:22:07.059829: step: 188/470, loss: 0.34837964177131653 2023-01-22 18:22:07.872590: step: 190/470, loss: 0.28307831287384033 2023-01-22 18:22:08.592788: step: 192/470, loss: 0.21804864704608917 2023-01-22 18:22:09.471389: step: 194/470, loss: 0.10385201871395111 2023-01-22 18:22:10.183242: step: 196/470, loss: 0.06404811888933182 2023-01-22 18:22:10.886132: step: 198/470, loss: 0.13678865134716034 2023-01-22 18:22:11.598732: step: 200/470, loss: 0.2867162823677063 2023-01-22 18:22:12.344535: step: 202/470, loss: 0.12509754300117493 2023-01-22 18:22:13.087114: step: 204/470, loss: 0.21526752412319183 2023-01-22 18:22:13.828082: step: 206/470, loss: 0.410299688577652 2023-01-22 18:22:14.518973: step: 208/470, loss: 0.1994316130876541 2023-01-22 18:22:15.317844: step: 210/470, loss: 0.21224021911621094 2023-01-22 18:22:16.146388: step: 212/470, loss: 1.471451759338379 2023-01-22 18:22:16.844241: step: 214/470, loss: 0.040065914392471313 2023-01-22 18:22:17.605118: step: 216/470, loss: 0.1329803615808487 2023-01-22 18:22:18.425700: step: 218/470, loss: 0.05075236037373543 2023-01-22 18:22:19.266281: step: 220/470, loss: 0.25361740589141846 2023-01-22 18:22:20.041901: step: 222/470, loss: 1.064577341079712 2023-01-22 18:22:20.799497: step: 224/470, loss: 0.13827519118785858 2023-01-22 18:22:21.526315: step: 226/470, loss: 0.13590987026691437 2023-01-22 18:22:22.214971: step: 228/470, loss: 0.20364883542060852 2023-01-22 18:22:22.903675: step: 230/470, loss: 0.2405257523059845 2023-01-22 18:22:23.663562: step: 232/470, loss: 0.11050628870725632 2023-01-22 18:22:24.473873: step: 234/470, loss: 0.18924807012081146 2023-01-22 18:22:25.216595: step: 236/470, loss: 0.18803924322128296 2023-01-22 18:22:25.990821: step: 238/470, loss: 0.1766262650489807 2023-01-22 18:22:26.935022: step: 240/470, loss: 0.24651962518692017 2023-01-22 18:22:27.618446: step: 242/470, loss: 0.5090709924697876 2023-01-22 18:22:28.443674: step: 244/470, loss: 0.07496100664138794 2023-01-22 18:22:29.157388: step: 246/470, loss: 0.39603158831596375 2023-01-22 18:22:29.832734: step: 248/470, loss: 0.09065035730600357 2023-01-22 18:22:30.605471: step: 250/470, loss: 0.662716269493103 2023-01-22 18:22:31.347855: step: 252/470, loss: 0.11193042248487473 2023-01-22 18:22:32.113320: step: 254/470, loss: 0.24775205552577972 2023-01-22 18:22:32.986037: step: 256/470, loss: 20.37836456298828 2023-01-22 18:22:33.696114: step: 258/470, loss: 0.34449484944343567 2023-01-22 18:22:34.456758: step: 260/470, loss: 0.07279616594314575 2023-01-22 18:22:35.262048: step: 262/470, loss: 0.18098340928554535 2023-01-22 18:22:36.007250: step: 264/470, loss: 0.16679392755031586 2023-01-22 18:22:36.756240: step: 266/470, loss: 0.4016364812850952 2023-01-22 18:22:37.510023: step: 268/470, loss: 0.22494986653327942 2023-01-22 18:22:38.184515: step: 270/470, loss: 0.05715346336364746 2023-01-22 18:22:38.926462: step: 272/470, loss: 0.2364903837442398 2023-01-22 18:22:39.637834: step: 274/470, loss: 0.09979073703289032 2023-01-22 18:22:40.353333: step: 276/470, loss: 0.08214342594146729 2023-01-22 18:22:41.175362: step: 278/470, loss: 0.1216912716627121 2023-01-22 18:22:41.823686: step: 280/470, loss: 0.07261183857917786 2023-01-22 18:22:42.587041: step: 282/470, loss: 0.1756569743156433 2023-01-22 18:22:43.350256: step: 284/470, loss: 0.6266811490058899 2023-01-22 18:22:44.036429: step: 286/470, loss: 0.1553003340959549 2023-01-22 18:22:44.746648: step: 288/470, loss: 0.33585941791534424 2023-01-22 18:22:45.415875: step: 290/470, loss: 0.05042559280991554 2023-01-22 18:22:46.135373: step: 292/470, loss: 0.2024879902601242 2023-01-22 18:22:46.793632: step: 294/470, loss: 0.026582282036542892 2023-01-22 18:22:47.496644: step: 296/470, loss: 0.11875536292791367 2023-01-22 18:22:48.317296: step: 298/470, loss: 0.16971439123153687 2023-01-22 18:22:49.083235: step: 300/470, loss: 0.34994009137153625 2023-01-22 18:22:49.839826: step: 302/470, loss: 0.1329270601272583 2023-01-22 18:22:50.661687: step: 304/470, loss: 0.30022162199020386 2023-01-22 18:22:51.433754: step: 306/470, loss: 0.16325759887695312 2023-01-22 18:22:52.198382: step: 308/470, loss: 0.15821513533592224 2023-01-22 18:22:52.921859: step: 310/470, loss: 0.11898909509181976 2023-01-22 18:22:53.665773: step: 312/470, loss: 0.11442035436630249 2023-01-22 18:22:54.345968: step: 314/470, loss: 0.2317693531513214 2023-01-22 18:22:55.067025: step: 316/470, loss: 0.210124209523201 2023-01-22 18:22:55.780952: step: 318/470, loss: 0.22964359819889069 2023-01-22 18:22:56.533053: step: 320/470, loss: 0.06219105422496796 2023-01-22 18:22:57.246850: step: 322/470, loss: 0.12114651501178741 2023-01-22 18:22:58.033454: step: 324/470, loss: 0.36422327160835266 2023-01-22 18:22:58.773446: step: 326/470, loss: 0.10115069150924683 2023-01-22 18:22:59.486475: step: 328/470, loss: 0.9817755818367004 2023-01-22 18:23:00.184006: step: 330/470, loss: 0.02880793623626232 2023-01-22 18:23:00.978600: step: 332/470, loss: 0.1212698295712471 2023-01-22 18:23:01.790853: step: 334/470, loss: 0.10181504487991333 2023-01-22 18:23:02.555639: step: 336/470, loss: 0.049668557941913605 2023-01-22 18:23:03.331924: step: 338/470, loss: 8.260157585144043 2023-01-22 18:23:04.119143: step: 340/470, loss: 0.1510852873325348 2023-01-22 18:23:04.905728: step: 342/470, loss: 0.2651934027671814 2023-01-22 18:23:05.644943: step: 344/470, loss: 0.2363731414079666 2023-01-22 18:23:06.393753: step: 346/470, loss: 0.28845059871673584 2023-01-22 18:23:07.251001: step: 348/470, loss: 0.17618459463119507 2023-01-22 18:23:07.981548: step: 350/470, loss: 0.3283766210079193 2023-01-22 18:23:08.682265: step: 352/470, loss: 0.3739703595638275 2023-01-22 18:23:09.484147: step: 354/470, loss: 0.06648049503564835 2023-01-22 18:23:10.213797: step: 356/470, loss: 0.1445521116256714 2023-01-22 18:23:10.903402: step: 358/470, loss: 0.06190051883459091 2023-01-22 18:23:11.645014: step: 360/470, loss: 0.12461217492818832 2023-01-22 18:23:12.310086: step: 362/470, loss: 0.033573780208826065 2023-01-22 18:23:13.022309: step: 364/470, loss: 0.13312368094921112 2023-01-22 18:23:13.756013: step: 366/470, loss: 0.1520005315542221 2023-01-22 18:23:14.531624: step: 368/470, loss: 0.9422814249992371 2023-01-22 18:23:15.319713: step: 370/470, loss: 0.1411898136138916 2023-01-22 18:23:16.053779: step: 372/470, loss: 0.051140353083610535 2023-01-22 18:23:16.779280: step: 374/470, loss: 0.3242073357105255 2023-01-22 18:23:17.483350: step: 376/470, loss: 0.16489548981189728 2023-01-22 18:23:18.270730: step: 378/470, loss: 1.8228318691253662 2023-01-22 18:23:19.125443: step: 380/470, loss: 1.1325253248214722 2023-01-22 18:23:19.881156: step: 382/470, loss: 0.36451154947280884 2023-01-22 18:23:20.567012: step: 384/470, loss: 0.0645284503698349 2023-01-22 18:23:21.300513: step: 386/470, loss: 0.10189077258110046 2023-01-22 18:23:21.967021: step: 388/470, loss: 0.18132157623767853 2023-01-22 18:23:22.717393: step: 390/470, loss: 0.24271626770496368 2023-01-22 18:23:23.398215: step: 392/470, loss: 0.11971033364534378 2023-01-22 18:23:24.097881: step: 394/470, loss: 0.1440243124961853 2023-01-22 18:23:24.887348: step: 396/470, loss: 0.12291662395000458 2023-01-22 18:23:25.594203: step: 398/470, loss: 0.07444258779287338 2023-01-22 18:23:26.373140: step: 400/470, loss: 0.7031933069229126 2023-01-22 18:23:27.157069: step: 402/470, loss: 0.16021253168582916 2023-01-22 18:23:27.829001: step: 404/470, loss: 0.2858032286167145 2023-01-22 18:23:28.590243: step: 406/470, loss: 0.15188010036945343 2023-01-22 18:23:29.355190: step: 408/470, loss: 0.5649017691612244 2023-01-22 18:23:30.075444: step: 410/470, loss: 0.06190142035484314 2023-01-22 18:23:30.809114: step: 412/470, loss: 0.18122579157352448 2023-01-22 18:23:31.501642: step: 414/470, loss: 0.11591540277004242 2023-01-22 18:23:32.149208: step: 416/470, loss: 0.07641121000051498 2023-01-22 18:23:32.866511: step: 418/470, loss: 0.07922433316707611 2023-01-22 18:23:33.562116: step: 420/470, loss: 0.04138394817709923 2023-01-22 18:23:34.353581: step: 422/470, loss: 0.30484187602996826 2023-01-22 18:23:35.110854: step: 424/470, loss: 0.11592315137386322 2023-01-22 18:23:35.963751: step: 426/470, loss: 0.47379979491233826 2023-01-22 18:23:36.700780: step: 428/470, loss: 0.05659574270248413 2023-01-22 18:23:37.395372: step: 430/470, loss: 0.10657653957605362 2023-01-22 18:23:38.073392: step: 432/470, loss: 0.14987456798553467 2023-01-22 18:23:38.761459: step: 434/470, loss: 0.526384174823761 2023-01-22 18:23:39.534052: step: 436/470, loss: 0.1093791201710701 2023-01-22 18:23:40.205673: step: 438/470, loss: 0.16932667791843414 2023-01-22 18:23:40.978906: step: 440/470, loss: 0.2574578523635864 2023-01-22 18:23:41.718180: step: 442/470, loss: 0.06636017560958862 2023-01-22 18:23:42.452602: step: 444/470, loss: 0.7201718091964722 2023-01-22 18:23:43.207984: step: 446/470, loss: 0.11673801392316818 2023-01-22 18:23:43.911991: step: 448/470, loss: 0.1540815830230713 2023-01-22 18:23:44.619799: step: 450/470, loss: 0.2399187535047531 2023-01-22 18:23:45.347439: step: 452/470, loss: 0.1115301102399826 2023-01-22 18:23:46.185900: step: 454/470, loss: 0.3593323230743408 2023-01-22 18:23:46.956267: step: 456/470, loss: 1.4198927879333496 2023-01-22 18:23:47.689879: step: 458/470, loss: 0.6755783557891846 2023-01-22 18:23:48.361258: step: 460/470, loss: 0.050497036427259445 2023-01-22 18:23:49.064817: step: 462/470, loss: 0.10053464025259018 2023-01-22 18:23:49.797039: step: 464/470, loss: 0.03807546943426132 2023-01-22 18:23:50.508053: step: 466/470, loss: 0.3850559592247009 2023-01-22 18:23:51.260122: step: 468/470, loss: 0.06416615843772888 2023-01-22 18:23:52.053028: step: 470/470, loss: 0.06735183298587799 2023-01-22 18:23:52.733743: step: 472/470, loss: 0.0959152951836586 2023-01-22 18:23:53.417400: step: 474/470, loss: 0.49368205666542053 2023-01-22 18:23:54.176414: step: 476/470, loss: 0.13260005414485931 2023-01-22 18:23:54.883427: step: 478/470, loss: 0.17261973023414612 2023-01-22 18:23:55.586915: step: 480/470, loss: 0.08892907947301865 2023-01-22 18:23:56.361449: step: 482/470, loss: 0.15367530286312103 2023-01-22 18:23:57.111532: step: 484/470, loss: 0.11208932101726532 2023-01-22 18:23:57.756274: step: 486/470, loss: 0.15923534333705902 2023-01-22 18:23:58.505356: step: 488/470, loss: 0.09813371300697327 2023-01-22 18:23:59.155976: step: 490/470, loss: 0.46980464458465576 2023-01-22 18:23:59.876527: step: 492/470, loss: 0.14123369753360748 2023-01-22 18:24:00.565931: step: 494/470, loss: 0.105765201151371 2023-01-22 18:24:01.242633: step: 496/470, loss: 0.21727946400642395 2023-01-22 18:24:01.952457: step: 498/470, loss: 0.13507087528705597 2023-01-22 18:24:02.714239: step: 500/470, loss: 0.12536272406578064 2023-01-22 18:24:03.476625: step: 502/470, loss: 0.06248565390706062 2023-01-22 18:24:04.248406: step: 504/470, loss: 0.29726463556289673 2023-01-22 18:24:04.941310: step: 506/470, loss: 0.10283642262220383 2023-01-22 18:24:05.691984: step: 508/470, loss: 0.88539719581604 2023-01-22 18:24:06.425893: step: 510/470, loss: 0.14810267090797424 2023-01-22 18:24:07.200594: step: 512/470, loss: 0.18709732592105865 2023-01-22 18:24:07.936300: step: 514/470, loss: 0.043946363031864166 2023-01-22 18:24:08.664837: step: 516/470, loss: 0.20995289087295532 2023-01-22 18:24:09.441892: step: 518/470, loss: 0.19275762140750885 2023-01-22 18:24:10.174293: step: 520/470, loss: 0.3477177619934082 2023-01-22 18:24:11.009712: step: 522/470, loss: 0.3986015319824219 2023-01-22 18:24:11.723296: step: 524/470, loss: 0.1369524747133255 2023-01-22 18:24:12.465973: step: 526/470, loss: 0.07624433934688568 2023-01-22 18:24:13.179358: step: 528/470, loss: 0.29792025685310364 2023-01-22 18:24:13.888712: step: 530/470, loss: 0.17045418918132782 2023-01-22 18:24:14.639791: step: 532/470, loss: 0.3308311998844147 2023-01-22 18:24:15.337861: step: 534/470, loss: 0.11802157014608383 2023-01-22 18:24:16.061269: step: 536/470, loss: 0.510388195514679 2023-01-22 18:24:16.709098: step: 538/470, loss: 0.22368597984313965 2023-01-22 18:24:17.408478: step: 540/470, loss: 0.13283610343933105 2023-01-22 18:24:18.227948: step: 542/470, loss: 0.6673287153244019 2023-01-22 18:24:18.914901: step: 544/470, loss: 0.25879979133605957 2023-01-22 18:24:19.630051: step: 546/470, loss: 0.13402698934078217 2023-01-22 18:24:20.357410: step: 548/470, loss: 0.28232210874557495 2023-01-22 18:24:21.112878: step: 550/470, loss: 0.1593974232673645 2023-01-22 18:24:21.836376: step: 552/470, loss: 0.15679427981376648 2023-01-22 18:24:22.487304: step: 554/470, loss: 0.5649197101593018 2023-01-22 18:24:23.133556: step: 556/470, loss: 0.1094643771648407 2023-01-22 18:24:23.862726: step: 558/470, loss: 0.21649640798568726 2023-01-22 18:24:24.591257: step: 560/470, loss: 0.4756641089916229 2023-01-22 18:24:25.286334: step: 562/470, loss: 0.17450223863124847 2023-01-22 18:24:25.989976: step: 564/470, loss: 0.15516166388988495 2023-01-22 18:24:26.707447: step: 566/470, loss: 0.15304596722126007 2023-01-22 18:24:27.460976: step: 568/470, loss: 0.15756924450397491 2023-01-22 18:24:28.245938: step: 570/470, loss: 0.17908306419849396 2023-01-22 18:24:28.919133: step: 572/470, loss: 0.1460331678390503 2023-01-22 18:24:29.680860: step: 574/470, loss: 0.5658903121948242 2023-01-22 18:24:30.431424: step: 576/470, loss: 0.12265262007713318 2023-01-22 18:24:31.186070: step: 578/470, loss: 0.22960016131401062 2023-01-22 18:24:31.936463: step: 580/470, loss: 0.10772555321455002 2023-01-22 18:24:32.638658: step: 582/470, loss: 0.36096444725990295 2023-01-22 18:24:33.365527: step: 584/470, loss: 0.19996917247772217 2023-01-22 18:24:34.156497: step: 586/470, loss: 0.09180688112974167 2023-01-22 18:24:34.797920: step: 588/470, loss: 0.18549667298793793 2023-01-22 18:24:35.594727: step: 590/470, loss: 0.09706774353981018 2023-01-22 18:24:36.358372: step: 592/470, loss: 0.128683939576149 2023-01-22 18:24:37.164821: step: 594/470, loss: 0.11436925083398819 2023-01-22 18:24:37.976131: step: 596/470, loss: 0.1896413415670395 2023-01-22 18:24:38.695384: step: 598/470, loss: 0.06010688096284866 2023-01-22 18:24:39.440004: step: 600/470, loss: 0.2670471966266632 2023-01-22 18:24:40.167583: step: 602/470, loss: 0.11307162046432495 2023-01-22 18:24:40.973357: step: 604/470, loss: 0.11850042641162872 2023-01-22 18:24:41.695759: step: 606/470, loss: 0.444345623254776 2023-01-22 18:24:42.433481: step: 608/470, loss: 0.15307031571865082 2023-01-22 18:24:43.221218: step: 610/470, loss: 0.20294825732707977 2023-01-22 18:24:43.955071: step: 612/470, loss: 0.1727193295955658 2023-01-22 18:24:44.792661: step: 614/470, loss: 0.1558392196893692 2023-01-22 18:24:45.500425: step: 616/470, loss: 0.11232329159975052 2023-01-22 18:24:46.271464: step: 618/470, loss: 0.4051525294780731 2023-01-22 18:24:46.953436: step: 620/470, loss: 0.18653224408626556 2023-01-22 18:24:47.678248: step: 622/470, loss: 0.07225558906793594 2023-01-22 18:24:48.459276: step: 624/470, loss: 0.21247851848602295 2023-01-22 18:24:49.149970: step: 626/470, loss: 0.1073872521519661 2023-01-22 18:24:49.911132: step: 628/470, loss: 0.1231706514954567 2023-01-22 18:24:50.679799: step: 630/470, loss: 0.03070944733917713 2023-01-22 18:24:51.378119: step: 632/470, loss: 0.3459760546684265 2023-01-22 18:24:52.153427: step: 634/470, loss: 0.14295950531959534 2023-01-22 18:24:52.887516: step: 636/470, loss: 0.17095544934272766 2023-01-22 18:24:53.563580: step: 638/470, loss: 0.08782177418470383 2023-01-22 18:24:54.255270: step: 640/470, loss: 2.7348151206970215 2023-01-22 18:24:55.040130: step: 642/470, loss: 0.18300358951091766 2023-01-22 18:24:55.728337: step: 644/470, loss: 0.610358715057373 2023-01-22 18:24:56.573117: step: 646/470, loss: 0.07318799197673798 2023-01-22 18:24:57.305373: step: 648/470, loss: 0.2117634117603302 2023-01-22 18:24:58.106935: step: 650/470, loss: 1.6853128671646118 2023-01-22 18:24:58.878022: step: 652/470, loss: 0.3203961253166199 2023-01-22 18:24:59.621646: step: 654/470, loss: 0.10503191500902176 2023-01-22 18:25:00.348015: step: 656/470, loss: 0.12326573580503464 2023-01-22 18:25:01.055514: step: 658/470, loss: 0.0717514380812645 2023-01-22 18:25:01.797479: step: 660/470, loss: 0.04833994433283806 2023-01-22 18:25:02.561079: step: 662/470, loss: 0.025357216596603394 2023-01-22 18:25:03.329415: step: 664/470, loss: 0.0652332752943039 2023-01-22 18:25:04.101370: step: 666/470, loss: 0.1199808269739151 2023-01-22 18:25:04.802581: step: 668/470, loss: 0.09009598940610886 2023-01-22 18:25:05.545543: step: 670/470, loss: 0.5139247179031372 2023-01-22 18:25:06.227302: step: 672/470, loss: 0.05846283957362175 2023-01-22 18:25:06.940262: step: 674/470, loss: 0.1617492139339447 2023-01-22 18:25:07.715691: step: 676/470, loss: 0.06745065003633499 2023-01-22 18:25:08.496741: step: 678/470, loss: 0.14190346002578735 2023-01-22 18:25:09.253742: step: 680/470, loss: 0.04410451278090477 2023-01-22 18:25:10.024677: step: 682/470, loss: 0.35122886300086975 2023-01-22 18:25:10.803422: step: 684/470, loss: 0.1424378901720047 2023-01-22 18:25:11.558960: step: 686/470, loss: 0.13077247142791748 2023-01-22 18:25:12.346517: step: 688/470, loss: 0.067043736577034 2023-01-22 18:25:13.079823: step: 690/470, loss: 0.22250400483608246 2023-01-22 18:25:13.814006: step: 692/470, loss: 0.07035396248102188 2023-01-22 18:25:14.520263: step: 694/470, loss: 0.18889664113521576 2023-01-22 18:25:15.206570: step: 696/470, loss: 0.15015734732151031 2023-01-22 18:25:16.038067: step: 698/470, loss: 0.16164840757846832 2023-01-22 18:25:16.832790: step: 700/470, loss: 0.0613911971449852 2023-01-22 18:25:17.601962: step: 702/470, loss: 0.2514897584915161 2023-01-22 18:25:18.473384: step: 704/470, loss: 0.09639281034469604 2023-01-22 18:25:19.202917: step: 706/470, loss: 0.4936193525791168 2023-01-22 18:25:19.956306: step: 708/470, loss: 0.23112571239471436 2023-01-22 18:25:20.665558: step: 710/470, loss: 0.21785999834537506 2023-01-22 18:25:21.365364: step: 712/470, loss: 0.26574963331222534 2023-01-22 18:25:22.121861: step: 714/470, loss: 0.1496441513299942 2023-01-22 18:25:22.921467: step: 716/470, loss: 0.13528425991535187 2023-01-22 18:25:23.793187: step: 718/470, loss: 0.4600887894630432 2023-01-22 18:25:24.531574: step: 720/470, loss: 0.3019505739212036 2023-01-22 18:25:25.213423: step: 722/470, loss: 0.2099757045507431 2023-01-22 18:25:25.970065: step: 724/470, loss: 0.22414270043373108 2023-01-22 18:25:26.788185: step: 726/470, loss: 0.10880865156650543 2023-01-22 18:25:27.562965: step: 728/470, loss: 0.2567862272262573 2023-01-22 18:25:28.276084: step: 730/470, loss: 0.12142646312713623 2023-01-22 18:25:29.042110: step: 732/470, loss: 0.1998579055070877 2023-01-22 18:25:29.766003: step: 734/470, loss: 0.13268613815307617 2023-01-22 18:25:30.481109: step: 736/470, loss: 0.14437507092952728 2023-01-22 18:25:31.289921: step: 738/470, loss: 0.16617853939533234 2023-01-22 18:25:32.064677: step: 740/470, loss: 0.10675939172506332 2023-01-22 18:25:32.904974: step: 742/470, loss: 0.20530328154563904 2023-01-22 18:25:33.718941: step: 744/470, loss: 0.23302339017391205 2023-01-22 18:25:34.482819: step: 746/470, loss: 0.13051211833953857 2023-01-22 18:25:35.189996: step: 748/470, loss: 0.04546245560050011 2023-01-22 18:25:35.914692: step: 750/470, loss: 0.16170257329940796 2023-01-22 18:25:36.646768: step: 752/470, loss: 0.3879983723163605 2023-01-22 18:25:37.492896: step: 754/470, loss: 0.17441397905349731 2023-01-22 18:25:38.196890: step: 756/470, loss: 0.12725909054279327 2023-01-22 18:25:38.940186: step: 758/470, loss: 0.11163493990898132 2023-01-22 18:25:39.674804: step: 760/470, loss: 0.2151859700679779 2023-01-22 18:25:40.432711: step: 762/470, loss: 0.3291402757167816 2023-01-22 18:25:41.176822: step: 764/470, loss: 0.10096342861652374 2023-01-22 18:25:42.036808: step: 766/470, loss: 0.10235702246427536 2023-01-22 18:25:42.737319: step: 768/470, loss: 0.03586709871888161 2023-01-22 18:25:43.563276: step: 770/470, loss: 0.5148991346359253 2023-01-22 18:25:44.265740: step: 772/470, loss: 0.2516341507434845 2023-01-22 18:25:45.058472: step: 774/470, loss: 0.33739376068115234 2023-01-22 18:25:45.727928: step: 776/470, loss: 0.09920867532491684 2023-01-22 18:25:46.484891: step: 778/470, loss: 0.15994364023208618 2023-01-22 18:25:47.274925: step: 780/470, loss: 0.08426479995250702 2023-01-22 18:25:48.142562: step: 782/470, loss: 0.18891899287700653 2023-01-22 18:25:48.862251: step: 784/470, loss: 0.07184939086437225 2023-01-22 18:25:49.688146: step: 786/470, loss: 0.16742153465747833 2023-01-22 18:25:50.418893: step: 788/470, loss: 0.42684534192085266 2023-01-22 18:25:51.178130: step: 790/470, loss: 0.12125355750322342 2023-01-22 18:25:51.910135: step: 792/470, loss: 0.08575651794672012 2023-01-22 18:25:52.659392: step: 794/470, loss: 0.11674223095178604 2023-01-22 18:25:53.336143: step: 796/470, loss: 0.10641640424728394 2023-01-22 18:25:54.006164: step: 798/470, loss: 0.4802986681461334 2023-01-22 18:25:54.757452: step: 800/470, loss: 0.08506816625595093 2023-01-22 18:25:55.506851: step: 802/470, loss: 0.057946957647800446 2023-01-22 18:25:56.235230: step: 804/470, loss: 0.16969476640224457 2023-01-22 18:25:56.918664: step: 806/470, loss: 0.1266757696866989 2023-01-22 18:25:57.651238: step: 808/470, loss: 0.22330895066261292 2023-01-22 18:25:58.427963: step: 810/470, loss: 0.14940103888511658 2023-01-22 18:25:59.172918: step: 812/470, loss: 0.10136136412620544 2023-01-22 18:25:59.894256: step: 814/470, loss: 0.3054467439651489 2023-01-22 18:26:00.662741: step: 816/470, loss: 0.32518401741981506 2023-01-22 18:26:01.380014: step: 818/470, loss: 0.16174927353858948 2023-01-22 18:26:02.162545: step: 820/470, loss: 0.0795656368136406 2023-01-22 18:26:02.905212: step: 822/470, loss: 0.45578598976135254 2023-01-22 18:26:03.610199: step: 824/470, loss: 0.13583436608314514 2023-01-22 18:26:04.301254: step: 826/470, loss: 0.22682815790176392 2023-01-22 18:26:05.018806: step: 828/470, loss: 0.06597944349050522 2023-01-22 18:26:05.746437: step: 830/470, loss: 0.06516607105731964 2023-01-22 18:26:06.447192: step: 832/470, loss: 0.18027722835540771 2023-01-22 18:26:07.163965: step: 834/470, loss: 0.11800383776426315 2023-01-22 18:26:07.878444: step: 836/470, loss: 0.12889358401298523 2023-01-22 18:26:08.647605: step: 838/470, loss: 0.04844401776790619 2023-01-22 18:26:09.317964: step: 840/470, loss: 0.12319555133581161 2023-01-22 18:26:10.055495: step: 842/470, loss: 0.06649138778448105 2023-01-22 18:26:10.815534: step: 844/470, loss: 0.11697138100862503 2023-01-22 18:26:11.575154: step: 846/470, loss: 0.4858449399471283 2023-01-22 18:26:12.337785: step: 848/470, loss: 0.18316267430782318 2023-01-22 18:26:13.133682: step: 850/470, loss: 0.2892132103443146 2023-01-22 18:26:13.904819: step: 852/470, loss: 0.1974104344844818 2023-01-22 18:26:14.618538: step: 854/470, loss: 0.1520778387784958 2023-01-22 18:26:15.353151: step: 856/470, loss: 0.2704646587371826 2023-01-22 18:26:16.154921: step: 858/470, loss: 0.2902047634124756 2023-01-22 18:26:16.791683: step: 860/470, loss: 0.08880189806222916 2023-01-22 18:26:17.627587: step: 862/470, loss: 0.11974621564149857 2023-01-22 18:26:18.382128: step: 864/470, loss: 0.09045007079839706 2023-01-22 18:26:19.109642: step: 866/470, loss: 0.22416849434375763 2023-01-22 18:26:19.842917: step: 868/470, loss: 0.19987420737743378 2023-01-22 18:26:20.589348: step: 870/470, loss: 0.17100857198238373 2023-01-22 18:26:21.372192: step: 872/470, loss: 0.3392637073993683 2023-01-22 18:26:22.131950: step: 874/470, loss: 0.1819869726896286 2023-01-22 18:26:22.814409: step: 876/470, loss: 0.16775751113891602 2023-01-22 18:26:23.588386: step: 878/470, loss: 0.5377464890480042 2023-01-22 18:26:24.314167: step: 880/470, loss: 1.2753757238388062 2023-01-22 18:26:25.109659: step: 882/470, loss: 0.5215312838554382 2023-01-22 18:26:25.838239: step: 884/470, loss: 0.18393318355083466 2023-01-22 18:26:26.550566: step: 886/470, loss: 0.10737390071153641 2023-01-22 18:26:27.272639: step: 888/470, loss: 0.15539099276065826 2023-01-22 18:26:28.104499: step: 890/470, loss: 0.9249545335769653 2023-01-22 18:26:28.781704: step: 892/470, loss: 0.29865944385528564 2023-01-22 18:26:29.528772: step: 894/470, loss: 0.31313595175743103 2023-01-22 18:26:30.257697: step: 896/470, loss: 0.09606197476387024 2023-01-22 18:26:31.009939: step: 898/470, loss: 0.308594286441803 2023-01-22 18:26:31.837007: step: 900/470, loss: 0.11458901315927505 2023-01-22 18:26:32.654352: step: 902/470, loss: 0.07904741168022156 2023-01-22 18:26:33.437104: step: 904/470, loss: 0.1473604291677475 2023-01-22 18:26:34.160746: step: 906/470, loss: 0.22174973785877228 2023-01-22 18:26:34.908590: step: 908/470, loss: 1.0358433723449707 2023-01-22 18:26:35.663639: step: 910/470, loss: 0.49611371755599976 2023-01-22 18:26:36.407391: step: 912/470, loss: 0.15608665347099304 2023-01-22 18:26:37.138804: step: 914/470, loss: 0.15466263890266418 2023-01-22 18:26:37.906993: step: 916/470, loss: 0.20168907940387726 2023-01-22 18:26:38.640995: step: 918/470, loss: 0.4854433536529541 2023-01-22 18:26:39.370507: step: 920/470, loss: 0.2656620144844055 2023-01-22 18:26:40.108380: step: 922/470, loss: 0.17299237847328186 2023-01-22 18:26:40.853680: step: 924/470, loss: 0.41194406151771545 2023-01-22 18:26:41.569577: step: 926/470, loss: 0.11019831150770187 2023-01-22 18:26:42.221313: step: 928/470, loss: 0.17341381311416626 2023-01-22 18:26:43.068980: step: 930/470, loss: 0.11898980289697647 2023-01-22 18:26:43.881869: step: 932/470, loss: 0.0407705120742321 2023-01-22 18:26:44.695919: step: 934/470, loss: 0.16358143091201782 2023-01-22 18:26:45.414004: step: 936/470, loss: 0.148806631565094 2023-01-22 18:26:46.138418: step: 938/470, loss: 0.08406595140695572 2023-01-22 18:26:46.829644: step: 940/470, loss: 0.17969819903373718 2023-01-22 18:26:47.534639: step: 942/470, loss: 0.07237522304058075 ================================================== Loss: 0.291 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3059714988425926, 'r': 0.3344204617330803, 'f1': 0.31956406769416745}, 'combined': 0.23546826040622865, 'epoch': 13} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.33907906154280193, 'r': 0.3579710745778476, 'f1': 0.34826905479956943}, 'combined': 0.24257546105442648, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3023461521070718, 'r': 0.33390030460401476, 'f1': 0.31734077642257125}, 'combined': 0.23383004578505248, 'epoch': 13} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34556379475902327, 'r': 0.3591869828216386, 'f1': 0.35224371724139947}, 'combined': 0.24534388265570115, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28835685483870965, 'r': 0.3392433586337761, 'f1': 0.3117371403661726}, 'combined': 0.22970105079612718, 'epoch': 13} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3398584075592588, 'r': 0.3712299528724211, 'f1': 0.35485216083393195}, 'combined': 0.24716070903855958, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23039215686274508, 'r': 0.3357142857142857, 'f1': 0.27325581395348836}, 'combined': 0.1821705426356589, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2692307692307692, 'r': 0.45652173913043476, 'f1': 0.3387096774193548}, 'combined': 0.1693548387096774, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.34782608695652173, 'r': 0.27586206896551724, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 13} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3091249669923422, 'r': 0.3173370154513418, 'f1': 0.3131771669341894}, 'combined': 0.23076212300413954, 'epoch': 11} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3375444187463703, 'r': 0.3472812769794387, 'f1': 0.3423436284915794}, 'combined': 0.23844829845184637, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2288135593220339, 'r': 0.38571428571428573, 'f1': 0.2872340425531915}, 'combined': 0.19148936170212766, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3023461521070718, 'r': 0.33390030460401476, 'f1': 0.31734077642257125}, 'combined': 0.23383004578505248, 'epoch': 13} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34556379475902327, 'r': 0.3591869828216386, 'f1': 0.35224371724139947}, 'combined': 0.24534388265570115, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2692307692307692, 'r': 0.45652173913043476, 'f1': 0.3387096774193548}, 'combined': 0.1693548387096774, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27696660863807243, 'r': 0.31953642894107787, 'f1': 0.2967325075805252}, 'combined': 0.21864500558565014, 'epoch': 10} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34185545824681857, 'r': 0.32870717139117167, 'f1': 0.3351524100459005}, 'combined': 0.23343948958420932, 'epoch': 10} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4318181818181818, 'r': 0.3275862068965517, 'f1': 0.3725490196078432}, 'combined': 0.24836601307189546, 'epoch': 10} ****************************** Epoch: 14 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 18:29:34.180021: step: 2/470, loss: 0.11083290725946426 2023-01-22 18:29:34.901119: step: 4/470, loss: 0.11919131875038147 2023-01-22 18:29:35.596675: step: 6/470, loss: 0.22162587940692902 2023-01-22 18:29:36.376635: step: 8/470, loss: 0.1282801777124405 2023-01-22 18:29:37.160923: step: 10/470, loss: 0.05915132537484169 2023-01-22 18:29:37.874340: step: 12/470, loss: 0.03638844937086105 2023-01-22 18:29:38.772651: step: 14/470, loss: 0.12631765007972717 2023-01-22 18:29:39.593733: step: 16/470, loss: 0.30836811661720276 2023-01-22 18:29:40.301761: step: 18/470, loss: 0.08455128967761993 2023-01-22 18:29:41.034064: step: 20/470, loss: 0.4171124994754791 2023-01-22 18:29:41.756988: step: 22/470, loss: 0.12701795995235443 2023-01-22 18:29:42.582140: step: 24/470, loss: 0.06594716012477875 2023-01-22 18:29:43.271412: step: 26/470, loss: 0.15928548574447632 2023-01-22 18:29:44.069533: step: 28/470, loss: 0.0775287002325058 2023-01-22 18:29:44.862156: step: 30/470, loss: 0.11700832098722458 2023-01-22 18:29:45.612986: step: 32/470, loss: 0.11270631104707718 2023-01-22 18:29:46.463251: step: 34/470, loss: 0.14612014591693878 2023-01-22 18:29:47.178812: step: 36/470, loss: 0.12699899077415466 2023-01-22 18:29:47.960788: step: 38/470, loss: 0.20811907947063446 2023-01-22 18:29:48.730393: step: 40/470, loss: 0.12297671288251877 2023-01-22 18:29:49.684927: step: 42/470, loss: 0.11870261281728745 2023-01-22 18:29:50.371118: step: 44/470, loss: 0.20325523614883423 2023-01-22 18:29:51.068833: step: 46/470, loss: 0.07447163760662079 2023-01-22 18:29:51.785299: step: 48/470, loss: 0.10491825640201569 2023-01-22 18:29:52.577667: step: 50/470, loss: 0.09760572761297226 2023-01-22 18:29:53.312614: step: 52/470, loss: 0.3343038856983185 2023-01-22 18:29:54.041498: step: 54/470, loss: 0.19771350920200348 2023-01-22 18:29:54.767797: step: 56/470, loss: 0.09543336182832718 2023-01-22 18:29:55.450957: step: 58/470, loss: 0.4951615333557129 2023-01-22 18:29:56.173806: step: 60/470, loss: 0.08876185119152069 2023-01-22 18:29:56.908734: step: 62/470, loss: 0.11803992092609406 2023-01-22 18:29:57.695496: step: 64/470, loss: 0.14623303711414337 2023-01-22 18:29:58.428784: step: 66/470, loss: 0.24651095271110535 2023-01-22 18:29:59.214927: step: 68/470, loss: 0.119205541908741 2023-01-22 18:29:59.946209: step: 70/470, loss: 0.10121873021125793 2023-01-22 18:30:00.663668: step: 72/470, loss: 0.2187107503414154 2023-01-22 18:30:01.382482: step: 74/470, loss: 0.07482891529798508 2023-01-22 18:30:02.108976: step: 76/470, loss: 0.8483836650848389 2023-01-22 18:30:02.911817: step: 78/470, loss: 0.04838128015398979 2023-01-22 18:30:03.669765: step: 80/470, loss: 0.1129889115691185 2023-01-22 18:30:04.406267: step: 82/470, loss: 0.16097694635391235 2023-01-22 18:30:05.183456: step: 84/470, loss: 0.08617453277111053 2023-01-22 18:30:05.951195: step: 86/470, loss: 0.12346760928630829 2023-01-22 18:30:06.782208: step: 88/470, loss: 0.763508677482605 2023-01-22 18:30:07.531192: step: 90/470, loss: 0.09003492444753647 2023-01-22 18:30:08.254752: step: 92/470, loss: 0.11231306940317154 2023-01-22 18:30:08.964933: step: 94/470, loss: 0.3491142988204956 2023-01-22 18:30:09.686092: step: 96/470, loss: 0.4276227653026581 2023-01-22 18:30:10.546391: step: 98/470, loss: 1.9698024988174438 2023-01-22 18:30:11.318482: step: 100/470, loss: 0.16232357919216156 2023-01-22 18:30:12.093404: step: 102/470, loss: 0.07967112213373184 2023-01-22 18:30:12.799351: step: 104/470, loss: 0.04950232803821564 2023-01-22 18:30:13.552747: step: 106/470, loss: 0.10787200182676315 2023-01-22 18:30:14.278117: step: 108/470, loss: 0.10786844789981842 2023-01-22 18:30:14.992201: step: 110/470, loss: 0.10731559991836548 2023-01-22 18:30:15.791655: step: 112/470, loss: 0.07661902159452438 2023-01-22 18:30:16.539648: step: 114/470, loss: 0.030627790838479996 2023-01-22 18:30:17.234189: step: 116/470, loss: 0.13857971131801605 2023-01-22 18:30:18.038969: step: 118/470, loss: 0.10013086348772049 2023-01-22 18:30:18.771495: step: 120/470, loss: 0.15490609407424927 2023-01-22 18:30:19.537994: step: 122/470, loss: 1.2364153861999512 2023-01-22 18:30:20.229275: step: 124/470, loss: 0.12906284630298615 2023-01-22 18:30:20.975979: step: 126/470, loss: 0.1098984032869339 2023-01-22 18:30:21.698318: step: 128/470, loss: 0.11757684499025345 2023-01-22 18:30:22.429567: step: 130/470, loss: 0.07572782039642334 2023-01-22 18:30:23.133225: step: 132/470, loss: 0.1737520694732666 2023-01-22 18:30:23.857860: step: 134/470, loss: 0.13601481914520264 2023-01-22 18:30:24.611311: step: 136/470, loss: 0.09237069636583328 2023-01-22 18:30:25.299122: step: 138/470, loss: 0.3090798258781433 2023-01-22 18:30:26.032908: step: 140/470, loss: 0.14041438698768616 2023-01-22 18:30:26.790636: step: 142/470, loss: 0.17475241422653198 2023-01-22 18:30:27.525389: step: 144/470, loss: 0.11330439150333405 2023-01-22 18:30:28.295183: step: 146/470, loss: 0.17179451882839203 2023-01-22 18:30:29.070534: step: 148/470, loss: 0.1465025544166565 2023-01-22 18:30:29.778203: step: 150/470, loss: 0.013929449953138828 2023-01-22 18:30:30.539919: step: 152/470, loss: 0.08549786359071732 2023-01-22 18:30:31.291791: step: 154/470, loss: 0.03197602927684784 2023-01-22 18:30:31.988875: step: 156/470, loss: 0.5165415406227112 2023-01-22 18:30:32.732642: step: 158/470, loss: 0.05465986579656601 2023-01-22 18:30:33.543260: step: 160/470, loss: 0.08515684306621552 2023-01-22 18:30:34.308529: step: 162/470, loss: 0.12424996495246887 2023-01-22 18:30:35.001008: step: 164/470, loss: 0.12669268250465393 2023-01-22 18:30:35.675611: step: 166/470, loss: 0.03578896448016167 2023-01-22 18:30:36.389251: step: 168/470, loss: 0.07446733117103577 2023-01-22 18:30:37.184829: step: 170/470, loss: 0.14528201520442963 2023-01-22 18:30:37.837362: step: 172/470, loss: 0.06307399272918701 2023-01-22 18:30:38.644509: step: 174/470, loss: 0.08100112527608871 2023-01-22 18:30:39.330892: step: 176/470, loss: 0.06932196021080017 2023-01-22 18:30:40.052336: step: 178/470, loss: 0.10252606868743896 2023-01-22 18:30:40.747207: step: 180/470, loss: 0.05974116176366806 2023-01-22 18:30:41.449518: step: 182/470, loss: 0.0428239107131958 2023-01-22 18:30:42.284685: step: 184/470, loss: 0.1692391037940979 2023-01-22 18:30:42.994415: step: 186/470, loss: 0.06524728983640671 2023-01-22 18:30:43.749345: step: 188/470, loss: 0.1310448795557022 2023-01-22 18:30:44.476680: step: 190/470, loss: 0.07662883400917053 2023-01-22 18:30:45.160382: step: 192/470, loss: 0.08161720633506775 2023-01-22 18:30:45.889098: step: 194/470, loss: 0.20154725015163422 2023-01-22 18:30:46.563252: step: 196/470, loss: 0.17144368588924408 2023-01-22 18:30:47.239189: step: 198/470, loss: 0.05600402504205704 2023-01-22 18:30:47.934561: step: 200/470, loss: 0.1903025358915329 2023-01-22 18:30:48.768089: step: 202/470, loss: 0.10436660796403885 2023-01-22 18:30:49.539608: step: 204/470, loss: 0.23059800267219543 2023-01-22 18:30:50.290510: step: 206/470, loss: 0.17381146550178528 2023-01-22 18:30:50.981935: step: 208/470, loss: 0.3720693588256836 2023-01-22 18:30:51.679573: step: 210/470, loss: 0.0502849817276001 2023-01-22 18:30:52.400958: step: 212/470, loss: 0.13139395415782928 2023-01-22 18:30:53.137579: step: 214/470, loss: 0.038059886544942856 2023-01-22 18:30:53.879654: step: 216/470, loss: 0.09123457223176956 2023-01-22 18:30:54.627685: step: 218/470, loss: 0.2840889096260071 2023-01-22 18:30:55.322870: step: 220/470, loss: 0.022772392258048058 2023-01-22 18:30:56.001128: step: 222/470, loss: 0.10895684361457825 2023-01-22 18:30:56.713604: step: 224/470, loss: 0.1379581242799759 2023-01-22 18:30:57.421349: step: 226/470, loss: 0.08646092563867569 2023-01-22 18:30:58.162484: step: 228/470, loss: 0.06485362350940704 2023-01-22 18:30:58.865200: step: 230/470, loss: 0.10785852372646332 2023-01-22 18:30:59.575028: step: 232/470, loss: 0.149741530418396 2023-01-22 18:31:00.296246: step: 234/470, loss: 0.2812889516353607 2023-01-22 18:31:01.057153: step: 236/470, loss: 0.11035189032554626 2023-01-22 18:31:01.823379: step: 238/470, loss: 0.42968517541885376 2023-01-22 18:31:02.541104: step: 240/470, loss: 0.014646702446043491 2023-01-22 18:31:03.245872: step: 242/470, loss: 0.09640643745660782 2023-01-22 18:31:04.010331: step: 244/470, loss: 0.02477741241455078 2023-01-22 18:31:04.803141: step: 246/470, loss: 0.17379818856716156 2023-01-22 18:31:05.541125: step: 248/470, loss: 0.27178406715393066 2023-01-22 18:31:06.232992: step: 250/470, loss: 0.12189687788486481 2023-01-22 18:31:06.975604: step: 252/470, loss: 0.020987318828701973 2023-01-22 18:31:07.695824: step: 254/470, loss: 0.09624991565942764 2023-01-22 18:31:08.391985: step: 256/470, loss: 0.15449093282222748 2023-01-22 18:31:09.197122: step: 258/470, loss: 0.14626288414001465 2023-01-22 18:31:09.923535: step: 260/470, loss: 0.28058895468711853 2023-01-22 18:31:10.677892: step: 262/470, loss: 0.06803245097398758 2023-01-22 18:31:11.387369: step: 264/470, loss: 0.19811493158340454 2023-01-22 18:31:12.128009: step: 266/470, loss: 0.04378509148955345 2023-01-22 18:31:12.870575: step: 268/470, loss: 0.43500596284866333 2023-01-22 18:31:13.630602: step: 270/470, loss: 0.5609925985336304 2023-01-22 18:31:14.361805: step: 272/470, loss: 0.4385612905025482 2023-01-22 18:31:15.061176: step: 274/470, loss: 0.14124025404453278 2023-01-22 18:31:15.776259: step: 276/470, loss: 0.08243583142757416 2023-01-22 18:31:16.491392: step: 278/470, loss: 0.2784011960029602 2023-01-22 18:31:17.233405: step: 280/470, loss: 0.3031136393547058 2023-01-22 18:31:17.935821: step: 282/470, loss: 0.09605658054351807 2023-01-22 18:31:18.667505: step: 284/470, loss: 0.07707042992115021 2023-01-22 18:31:19.427196: step: 286/470, loss: 0.03834238275885582 2023-01-22 18:31:20.209818: step: 288/470, loss: 0.13241738080978394 2023-01-22 18:31:20.888532: step: 290/470, loss: 0.06490999460220337 2023-01-22 18:31:21.572249: step: 292/470, loss: 0.07722263038158417 2023-01-22 18:31:22.307202: step: 294/470, loss: 0.1114049032330513 2023-01-22 18:31:23.038986: step: 296/470, loss: 0.25798553228378296 2023-01-22 18:31:23.731517: step: 298/470, loss: 0.09492284804582596 2023-01-22 18:31:24.422326: step: 300/470, loss: 0.06170547008514404 2023-01-22 18:31:25.198819: step: 302/470, loss: 0.0761180892586708 2023-01-22 18:31:25.963419: step: 304/470, loss: 0.17359501123428345 2023-01-22 18:31:26.695794: step: 306/470, loss: 0.08720303326845169 2023-01-22 18:31:27.462178: step: 308/470, loss: 0.037650175392627716 2023-01-22 18:31:28.154463: step: 310/470, loss: 0.042883120477199554 2023-01-22 18:31:28.882979: step: 312/470, loss: 0.14848671853542328 2023-01-22 18:31:29.701122: step: 314/470, loss: 0.5215410590171814 2023-01-22 18:31:30.424706: step: 316/470, loss: 0.016007540747523308 2023-01-22 18:31:31.115181: step: 318/470, loss: 0.11587570607662201 2023-01-22 18:31:31.856459: step: 320/470, loss: 0.32297030091285706 2023-01-22 18:31:32.574735: step: 322/470, loss: 0.1690657138824463 2023-01-22 18:31:33.395605: step: 324/470, loss: 0.030179066583514214 2023-01-22 18:31:34.110026: step: 326/470, loss: 0.0764576867222786 2023-01-22 18:31:34.819096: step: 328/470, loss: 0.11027715355157852 2023-01-22 18:31:35.566587: step: 330/470, loss: 0.09635484218597412 2023-01-22 18:31:36.212839: step: 332/470, loss: 0.26524147391319275 2023-01-22 18:31:36.914679: step: 334/470, loss: 0.48851656913757324 2023-01-22 18:31:37.615199: step: 336/470, loss: 0.12017401307821274 2023-01-22 18:31:38.434495: step: 338/470, loss: 0.3046230971813202 2023-01-22 18:31:39.310167: step: 340/470, loss: 0.0651002898812294 2023-01-22 18:31:40.058352: step: 342/470, loss: 0.5883811712265015 2023-01-22 18:31:40.748478: step: 344/470, loss: 0.20933616161346436 2023-01-22 18:31:41.541230: step: 346/470, loss: 0.08957860618829727 2023-01-22 18:31:42.292039: step: 348/470, loss: 0.13973873853683472 2023-01-22 18:31:42.975566: step: 350/470, loss: 0.4042279124259949 2023-01-22 18:31:43.664249: step: 352/470, loss: 0.09614955633878708 2023-01-22 18:31:44.329285: step: 354/470, loss: 0.023717915639281273 2023-01-22 18:31:45.068687: step: 356/470, loss: 0.2741442024707794 2023-01-22 18:31:45.824506: step: 358/470, loss: 0.13002316653728485 2023-01-22 18:31:46.477149: step: 360/470, loss: 0.30730727314949036 2023-01-22 18:31:47.196283: step: 362/470, loss: 0.23137405514717102 2023-01-22 18:31:47.862334: step: 364/470, loss: 0.283146470785141 2023-01-22 18:31:48.612734: step: 366/470, loss: 0.34386488795280457 2023-01-22 18:31:49.408933: step: 368/470, loss: 0.09826954454183578 2023-01-22 18:31:50.122520: step: 370/470, loss: 0.1936502754688263 2023-01-22 18:31:50.875032: step: 372/470, loss: 0.08318967372179031 2023-01-22 18:31:51.607582: step: 374/470, loss: 0.16155041754245758 2023-01-22 18:31:52.275326: step: 376/470, loss: 0.3982257843017578 2023-01-22 18:31:53.010438: step: 378/470, loss: 0.0868930071592331 2023-01-22 18:31:53.770477: step: 380/470, loss: 0.5517705082893372 2023-01-22 18:31:54.485235: step: 382/470, loss: 0.12113271653652191 2023-01-22 18:31:55.187846: step: 384/470, loss: 0.15076488256454468 2023-01-22 18:31:55.877617: step: 386/470, loss: 0.17827637493610382 2023-01-22 18:31:56.636246: step: 388/470, loss: 0.056175053119659424 2023-01-22 18:31:57.369830: step: 390/470, loss: 0.28909632563591003 2023-01-22 18:31:58.144983: step: 392/470, loss: 0.16936790943145752 2023-01-22 18:31:58.825544: step: 394/470, loss: 0.10315996408462524 2023-01-22 18:31:59.562269: step: 396/470, loss: 0.08916748315095901 2023-01-22 18:32:00.270007: step: 398/470, loss: 0.08588486909866333 2023-01-22 18:32:00.970818: step: 400/470, loss: 0.7330968976020813 2023-01-22 18:32:01.714787: step: 402/470, loss: 0.8898875117301941 2023-01-22 18:32:02.486281: step: 404/470, loss: 0.22537262737751007 2023-01-22 18:32:03.158811: step: 406/470, loss: 0.09088273346424103 2023-01-22 18:32:03.887812: step: 408/470, loss: 0.2716708183288574 2023-01-22 18:32:04.617046: step: 410/470, loss: 0.10589412599802017 2023-01-22 18:32:05.321505: step: 412/470, loss: 0.12420433014631271 2023-01-22 18:32:06.052352: step: 414/470, loss: 0.25589585304260254 2023-01-22 18:32:06.714452: step: 416/470, loss: 0.15810632705688477 2023-01-22 18:32:07.464891: step: 418/470, loss: 0.11597256362438202 2023-01-22 18:32:08.163028: step: 420/470, loss: 0.28290224075317383 2023-01-22 18:32:08.966858: step: 422/470, loss: 0.07624372839927673 2023-01-22 18:32:09.702057: step: 424/470, loss: 6.502451419830322 2023-01-22 18:32:10.395797: step: 426/470, loss: 0.054799798876047134 2023-01-22 18:32:11.141089: step: 428/470, loss: 0.29792070388793945 2023-01-22 18:32:11.978195: step: 430/470, loss: 0.20813298225402832 2023-01-22 18:32:12.757503: step: 432/470, loss: 0.04179874062538147 2023-01-22 18:32:13.497255: step: 434/470, loss: 0.0941062718629837 2023-01-22 18:32:14.276962: step: 436/470, loss: 0.21649214625358582 2023-01-22 18:32:14.983534: step: 438/470, loss: 0.12335845082998276 2023-01-22 18:32:15.711870: step: 440/470, loss: 0.0966825857758522 2023-01-22 18:32:16.528388: step: 442/470, loss: 0.07399679720401764 2023-01-22 18:32:17.354458: step: 444/470, loss: 0.12531259655952454 2023-01-22 18:32:18.097378: step: 446/470, loss: 0.08484052866697311 2023-01-22 18:32:18.848730: step: 448/470, loss: 0.13579043745994568 2023-01-22 18:32:19.632848: step: 450/470, loss: 0.1644921749830246 2023-01-22 18:32:20.320225: step: 452/470, loss: 0.11840783804655075 2023-01-22 18:32:21.001050: step: 454/470, loss: 0.05891914665699005 2023-01-22 18:32:21.723152: step: 456/470, loss: 0.23505060374736786 2023-01-22 18:32:22.426517: step: 458/470, loss: 0.15571844577789307 2023-01-22 18:32:23.135195: step: 460/470, loss: 0.19243013858795166 2023-01-22 18:32:24.009890: step: 462/470, loss: 0.14108626544475555 2023-01-22 18:32:24.769380: step: 464/470, loss: 0.15079638361930847 2023-01-22 18:32:25.549639: step: 466/470, loss: 0.25679388642311096 2023-01-22 18:32:26.368884: step: 468/470, loss: 0.110379159450531 2023-01-22 18:32:27.064665: step: 470/470, loss: 0.233668714761734 2023-01-22 18:32:27.815966: step: 472/470, loss: 0.1186995655298233 2023-01-22 18:32:28.500427: step: 474/470, loss: 0.4312833249568939 2023-01-22 18:32:29.233357: step: 476/470, loss: 0.1033293753862381 2023-01-22 18:32:29.970179: step: 478/470, loss: 0.32570046186447144 2023-01-22 18:32:30.759018: step: 480/470, loss: 0.09971433132886887 2023-01-22 18:32:31.529929: step: 482/470, loss: 1.5249170064926147 2023-01-22 18:32:32.264412: step: 484/470, loss: 0.4827004671096802 2023-01-22 18:32:32.939636: step: 486/470, loss: 0.06607785820960999 2023-01-22 18:32:33.711407: step: 488/470, loss: 0.10160824656486511 2023-01-22 18:32:34.386771: step: 490/470, loss: 0.04615769162774086 2023-01-22 18:32:35.104298: step: 492/470, loss: 0.1232377365231514 2023-01-22 18:32:35.886305: step: 494/470, loss: 0.15672165155410767 2023-01-22 18:32:36.621714: step: 496/470, loss: 0.1081511378288269 2023-01-22 18:32:37.434003: step: 498/470, loss: 0.2734154164791107 2023-01-22 18:32:38.246181: step: 500/470, loss: 0.2586720883846283 2023-01-22 18:32:38.934077: step: 502/470, loss: 0.48053133487701416 2023-01-22 18:32:39.675376: step: 504/470, loss: 0.2579320967197418 2023-01-22 18:32:40.406855: step: 506/470, loss: 0.3125365376472473 2023-01-22 18:32:41.104934: step: 508/470, loss: 0.04690413922071457 2023-01-22 18:32:41.874358: step: 510/470, loss: 0.184226855635643 2023-01-22 18:32:42.593325: step: 512/470, loss: 0.046648863703012466 2023-01-22 18:32:43.327011: step: 514/470, loss: 0.7336933016777039 2023-01-22 18:32:44.126530: step: 516/470, loss: 0.287691205739975 2023-01-22 18:32:44.860770: step: 518/470, loss: 0.2230347841978073 2023-01-22 18:32:45.586391: step: 520/470, loss: 0.15412744879722595 2023-01-22 18:32:46.284252: step: 522/470, loss: 0.2500941753387451 2023-01-22 18:32:46.914259: step: 524/470, loss: 0.19288751482963562 2023-01-22 18:32:47.824845: step: 526/470, loss: 0.216958686709404 2023-01-22 18:32:48.633096: step: 528/470, loss: 0.298077791929245 2023-01-22 18:32:49.426275: step: 530/470, loss: 0.25814521312713623 2023-01-22 18:32:50.128654: step: 532/470, loss: 0.07196559756994247 2023-01-22 18:32:50.844805: step: 534/470, loss: 0.05458388477563858 2023-01-22 18:32:51.558143: step: 536/470, loss: 0.355390727519989 2023-01-22 18:32:52.268499: step: 538/470, loss: 0.14116257429122925 2023-01-22 18:32:53.063373: step: 540/470, loss: 0.027166053652763367 2023-01-22 18:32:53.741978: step: 542/470, loss: 0.04634636268019676 2023-01-22 18:32:54.556150: step: 544/470, loss: 0.14489805698394775 2023-01-22 18:32:55.289626: step: 546/470, loss: 0.03581508621573448 2023-01-22 18:32:56.026353: step: 548/470, loss: 0.14004434645175934 2023-01-22 18:32:56.753332: step: 550/470, loss: 0.22724568843841553 2023-01-22 18:32:57.440609: step: 552/470, loss: 0.0776071846485138 2023-01-22 18:32:58.234404: step: 554/470, loss: 0.09695343673229218 2023-01-22 18:32:59.020396: step: 556/470, loss: 0.7682523131370544 2023-01-22 18:32:59.897894: step: 558/470, loss: 0.20350007712841034 2023-01-22 18:33:00.609663: step: 560/470, loss: 0.6482030749320984 2023-01-22 18:33:01.363787: step: 562/470, loss: 0.07920963317155838 2023-01-22 18:33:02.138378: step: 564/470, loss: 0.06108212471008301 2023-01-22 18:33:02.832252: step: 566/470, loss: 0.3057776391506195 2023-01-22 18:33:03.544254: step: 568/470, loss: 0.10524436086416245 2023-01-22 18:33:04.233242: step: 570/470, loss: 0.0752614364027977 2023-01-22 18:33:05.026611: step: 572/470, loss: 0.10594554245471954 2023-01-22 18:33:05.754764: step: 574/470, loss: 0.11854086071252823 2023-01-22 18:33:06.590933: step: 576/470, loss: 0.10533951222896576 2023-01-22 18:33:07.399732: step: 578/470, loss: 0.1088142916560173 2023-01-22 18:33:08.207300: step: 580/470, loss: 0.17378634214401245 2023-01-22 18:33:09.132336: step: 582/470, loss: 0.19175052642822266 2023-01-22 18:33:09.913567: step: 584/470, loss: 0.11666764318943024 2023-01-22 18:33:10.610409: step: 586/470, loss: 0.06512768566608429 2023-01-22 18:33:11.433998: step: 588/470, loss: 0.0760718509554863 2023-01-22 18:33:12.207743: step: 590/470, loss: 0.028491565957665443 2023-01-22 18:33:12.971503: step: 592/470, loss: 0.07426527142524719 2023-01-22 18:33:13.700637: step: 594/470, loss: 0.056701093912124634 2023-01-22 18:33:14.463054: step: 596/470, loss: 0.1391458362340927 2023-01-22 18:33:15.181300: step: 598/470, loss: 0.11566898971796036 2023-01-22 18:33:15.879689: step: 600/470, loss: 0.14809054136276245 2023-01-22 18:33:16.681097: step: 602/470, loss: 0.13627216219902039 2023-01-22 18:33:17.417936: step: 604/470, loss: 0.11067622900009155 2023-01-22 18:33:18.127546: step: 606/470, loss: 0.3479231595993042 2023-01-22 18:33:18.792657: step: 608/470, loss: 0.6823694109916687 2023-01-22 18:33:19.505217: step: 610/470, loss: 0.10552807152271271 2023-01-22 18:33:20.237330: step: 612/470, loss: 0.5463363528251648 2023-01-22 18:33:20.977414: step: 614/470, loss: 0.10402689129114151 2023-01-22 18:33:21.647420: step: 616/470, loss: 0.34647348523139954 2023-01-22 18:33:22.374799: step: 618/470, loss: 0.07819667458534241 2023-01-22 18:33:23.063708: step: 620/470, loss: 0.03418436273932457 2023-01-22 18:33:23.705914: step: 622/470, loss: 0.0040921662002801895 2023-01-22 18:33:24.442703: step: 624/470, loss: 0.06056486442685127 2023-01-22 18:33:25.200314: step: 626/470, loss: 0.142343208193779 2023-01-22 18:33:25.904083: step: 628/470, loss: 0.3498281240463257 2023-01-22 18:33:26.696827: step: 630/470, loss: 0.1718958616256714 2023-01-22 18:33:27.386145: step: 632/470, loss: 0.14138327538967133 2023-01-22 18:33:28.126239: step: 634/470, loss: 0.1276867389678955 2023-01-22 18:33:28.840214: step: 636/470, loss: 0.0756273865699768 2023-01-22 18:33:29.563166: step: 638/470, loss: 0.03293231502175331 2023-01-22 18:33:30.306459: step: 640/470, loss: 0.16658903658390045 2023-01-22 18:33:31.172043: step: 642/470, loss: 1.328713297843933 2023-01-22 18:33:31.907815: step: 644/470, loss: 0.14836126565933228 2023-01-22 18:33:32.587379: step: 646/470, loss: 0.12975986301898956 2023-01-22 18:33:33.420302: step: 648/470, loss: 0.2378319948911667 2023-01-22 18:33:34.146588: step: 650/470, loss: 0.1449371576309204 2023-01-22 18:33:34.854227: step: 652/470, loss: 0.0579066202044487 2023-01-22 18:33:35.572899: step: 654/470, loss: 0.17443352937698364 2023-01-22 18:33:36.242926: step: 656/470, loss: 0.08816853910684586 2023-01-22 18:33:36.989009: step: 658/470, loss: 0.19708070158958435 2023-01-22 18:33:37.753804: step: 660/470, loss: 0.3211943507194519 2023-01-22 18:33:38.453193: step: 662/470, loss: 0.08856945484876633 2023-01-22 18:33:39.276349: step: 664/470, loss: 0.14771507680416107 2023-01-22 18:33:40.018399: step: 666/470, loss: 0.6534758806228638 2023-01-22 18:33:40.807568: step: 668/470, loss: 0.5298561453819275 2023-01-22 18:33:41.524214: step: 670/470, loss: 0.3639879822731018 2023-01-22 18:33:42.273829: step: 672/470, loss: 0.06922122836112976 2023-01-22 18:33:43.031112: step: 674/470, loss: 0.44452109932899475 2023-01-22 18:33:43.727293: step: 676/470, loss: 0.10079913586378098 2023-01-22 18:33:44.454315: step: 678/470, loss: 0.12544503808021545 2023-01-22 18:33:45.183739: step: 680/470, loss: 0.14979170262813568 2023-01-22 18:33:45.932856: step: 682/470, loss: 0.10007375478744507 2023-01-22 18:33:46.721411: step: 684/470, loss: 0.015384846366941929 2023-01-22 18:33:47.474537: step: 686/470, loss: 0.04832969605922699 2023-01-22 18:33:48.119319: step: 688/470, loss: 0.1544710397720337 2023-01-22 18:33:48.960837: step: 690/470, loss: 0.3322787284851074 2023-01-22 18:33:49.703056: step: 692/470, loss: 0.3732565939426422 2023-01-22 18:33:50.477824: step: 694/470, loss: 0.13223111629486084 2023-01-22 18:33:51.223979: step: 696/470, loss: 0.301310271024704 2023-01-22 18:33:51.954556: step: 698/470, loss: 0.09329129755496979 2023-01-22 18:33:52.649738: step: 700/470, loss: 0.07543506473302841 2023-01-22 18:33:53.413790: step: 702/470, loss: 3.3870034217834473 2023-01-22 18:33:54.106315: step: 704/470, loss: 0.4438471496105194 2023-01-22 18:33:54.872783: step: 706/470, loss: 0.10665325820446014 2023-01-22 18:33:55.522274: step: 708/470, loss: 0.1168651431798935 2023-01-22 18:33:56.239036: step: 710/470, loss: 0.1690247803926468 2023-01-22 18:33:57.008445: step: 712/470, loss: 0.09398052096366882 2023-01-22 18:33:57.731056: step: 714/470, loss: 0.10398267954587936 2023-01-22 18:33:58.496580: step: 716/470, loss: 0.08926042914390564 2023-01-22 18:33:59.163621: step: 718/470, loss: 0.2242121696472168 2023-01-22 18:33:59.874594: step: 720/470, loss: 0.09120961278676987 2023-01-22 18:34:00.595854: step: 722/470, loss: 0.13982771337032318 2023-01-22 18:34:01.318260: step: 724/470, loss: 0.07675092667341232 2023-01-22 18:34:02.099148: step: 726/470, loss: 0.1070447787642479 2023-01-22 18:34:02.843769: step: 728/470, loss: 0.10396528244018555 2023-01-22 18:34:03.607391: step: 730/470, loss: 0.13090196251869202 2023-01-22 18:34:04.318222: step: 732/470, loss: 0.10720758885145187 2023-01-22 18:34:05.029911: step: 734/470, loss: 0.31446948647499084 2023-01-22 18:34:05.769144: step: 736/470, loss: 0.2971380054950714 2023-01-22 18:34:06.474777: step: 738/470, loss: 0.11717523634433746 2023-01-22 18:34:07.288639: step: 740/470, loss: 0.3187268376350403 2023-01-22 18:34:08.067475: step: 742/470, loss: 0.1835777312517166 2023-01-22 18:34:08.911798: step: 744/470, loss: 0.10451960563659668 2023-01-22 18:34:09.623700: step: 746/470, loss: 0.381958931684494 2023-01-22 18:34:10.399672: step: 748/470, loss: 0.14256399869918823 2023-01-22 18:34:11.161838: step: 750/470, loss: 0.1951764076948166 2023-01-22 18:34:11.865348: step: 752/470, loss: 0.08754151314496994 2023-01-22 18:34:12.639744: step: 754/470, loss: 0.11652766168117523 2023-01-22 18:34:13.342038: step: 756/470, loss: 0.029814664274454117 2023-01-22 18:34:14.126057: step: 758/470, loss: 0.14490105211734772 2023-01-22 18:34:14.736793: step: 760/470, loss: 0.08405127376317978 2023-01-22 18:34:15.538871: step: 762/470, loss: 0.10964828729629517 2023-01-22 18:34:16.274382: step: 764/470, loss: 0.08635842055082321 2023-01-22 18:34:17.036869: step: 766/470, loss: 0.18066643178462982 2023-01-22 18:34:17.796567: step: 768/470, loss: 0.3340843915939331 2023-01-22 18:34:18.522077: step: 770/470, loss: 0.20563696324825287 2023-01-22 18:34:19.309291: step: 772/470, loss: 0.4167340397834778 2023-01-22 18:34:20.083894: step: 774/470, loss: 0.07869897782802582 2023-01-22 18:34:20.843624: step: 776/470, loss: 0.12975147366523743 2023-01-22 18:34:21.633785: step: 778/470, loss: 0.06549026817083359 2023-01-22 18:34:22.373313: step: 780/470, loss: 0.6230465769767761 2023-01-22 18:34:23.123356: step: 782/470, loss: 0.09621866047382355 2023-01-22 18:34:23.883775: step: 784/470, loss: 0.38288676738739014 2023-01-22 18:34:24.674266: step: 786/470, loss: 0.13935059309005737 2023-01-22 18:34:25.393304: step: 788/470, loss: 0.13502168655395508 2023-01-22 18:34:26.161442: step: 790/470, loss: 0.10943473875522614 2023-01-22 18:34:26.890138: step: 792/470, loss: 0.11306705325841904 2023-01-22 18:34:27.620650: step: 794/470, loss: 0.15334700047969818 2023-01-22 18:34:28.415604: step: 796/470, loss: 0.19283847510814667 2023-01-22 18:34:29.120082: step: 798/470, loss: 0.2030116617679596 2023-01-22 18:34:29.874346: step: 800/470, loss: 0.20117712020874023 2023-01-22 18:34:30.615244: step: 802/470, loss: 0.021305864676833153 2023-01-22 18:34:31.373992: step: 804/470, loss: 0.5952919721603394 2023-01-22 18:34:32.115731: step: 806/470, loss: 0.11923728138208389 2023-01-22 18:34:32.778002: step: 808/470, loss: 0.12033215165138245 2023-01-22 18:34:33.536137: step: 810/470, loss: 0.1041664257645607 2023-01-22 18:34:34.378669: step: 812/470, loss: 0.07111985236406326 2023-01-22 18:34:35.230658: step: 814/470, loss: 0.25388404726982117 2023-01-22 18:34:36.093996: step: 816/470, loss: 0.3828502297401428 2023-01-22 18:34:36.836752: step: 818/470, loss: 0.4347081780433655 2023-01-22 18:34:37.621550: step: 820/470, loss: 0.041309136897325516 2023-01-22 18:34:38.349481: step: 822/470, loss: 0.08783011138439178 2023-01-22 18:34:39.124935: step: 824/470, loss: 0.49217742681503296 2023-01-22 18:34:39.848909: step: 826/470, loss: 0.24008692800998688 2023-01-22 18:34:40.628910: step: 828/470, loss: 0.11003921926021576 2023-01-22 18:34:41.343031: step: 830/470, loss: 0.15111075341701508 2023-01-22 18:34:42.079971: step: 832/470, loss: 0.14422738552093506 2023-01-22 18:34:42.777893: step: 834/470, loss: 0.31494441628456116 2023-01-22 18:34:43.454741: step: 836/470, loss: 0.09576459974050522 2023-01-22 18:34:44.149505: step: 838/470, loss: 0.07254704087972641 2023-01-22 18:34:44.901016: step: 840/470, loss: 0.1577906757593155 2023-01-22 18:34:45.642361: step: 842/470, loss: 1.3641383647918701 2023-01-22 18:34:46.379821: step: 844/470, loss: 0.16647478938102722 2023-01-22 18:34:47.129873: step: 846/470, loss: 0.06729204952716827 2023-01-22 18:34:47.924968: step: 848/470, loss: 0.6149294972419739 2023-01-22 18:34:48.629355: step: 850/470, loss: 0.058741990476846695 2023-01-22 18:34:49.382115: step: 852/470, loss: 0.13534869253635406 2023-01-22 18:34:50.174354: step: 854/470, loss: 0.11562196910381317 2023-01-22 18:34:50.890639: step: 856/470, loss: 0.14991667866706848 2023-01-22 18:34:51.614010: step: 858/470, loss: 0.12905238568782806 2023-01-22 18:34:52.392525: step: 860/470, loss: 0.0312718003988266 2023-01-22 18:34:53.171421: step: 862/470, loss: 0.1413513869047165 2023-01-22 18:34:53.865391: step: 864/470, loss: 0.2965477705001831 2023-01-22 18:34:54.601618: step: 866/470, loss: 0.1007094457745552 2023-01-22 18:34:55.395910: step: 868/470, loss: 0.1321275383234024 2023-01-22 18:34:56.239500: step: 870/470, loss: 0.1397213190793991 2023-01-22 18:34:56.986710: step: 872/470, loss: 0.5523156523704529 2023-01-22 18:34:57.724158: step: 874/470, loss: 0.03732849657535553 2023-01-22 18:34:58.581161: step: 876/470, loss: 0.17241162061691284 2023-01-22 18:34:59.356894: step: 878/470, loss: 0.5599587559700012 2023-01-22 18:35:00.083212: step: 880/470, loss: 0.1906861662864685 2023-01-22 18:35:00.857606: step: 882/470, loss: 0.07153639942407608 2023-01-22 18:35:01.740629: step: 884/470, loss: 0.17692595720291138 2023-01-22 18:35:02.441592: step: 886/470, loss: 0.08000165224075317 2023-01-22 18:35:03.251054: step: 888/470, loss: 7.325255870819092 2023-01-22 18:35:04.012136: step: 890/470, loss: 0.07783879339694977 2023-01-22 18:35:04.748414: step: 892/470, loss: 0.06207854673266411 2023-01-22 18:35:05.410236: step: 894/470, loss: 0.1321595311164856 2023-01-22 18:35:06.213646: step: 896/470, loss: 0.19343137741088867 2023-01-22 18:35:06.988115: step: 898/470, loss: 0.10653921216726303 2023-01-22 18:35:07.696910: step: 900/470, loss: 0.12337514758110046 2023-01-22 18:35:08.389942: step: 902/470, loss: 0.13530763983726501 2023-01-22 18:35:09.171163: step: 904/470, loss: 0.05055097118020058 2023-01-22 18:35:09.838165: step: 906/470, loss: 0.05119376629590988 2023-01-22 18:35:10.544770: step: 908/470, loss: 0.05396522581577301 2023-01-22 18:35:11.275889: step: 910/470, loss: 0.16279923915863037 2023-01-22 18:35:12.053764: step: 912/470, loss: 0.10443281382322311 2023-01-22 18:35:12.883434: step: 914/470, loss: 0.08299147337675095 2023-01-22 18:35:13.622024: step: 916/470, loss: 0.11369689553976059 2023-01-22 18:35:14.351793: step: 918/470, loss: 0.9923412799835205 2023-01-22 18:35:15.130875: step: 920/470, loss: 0.12374808639287949 2023-01-22 18:35:15.949770: step: 922/470, loss: 0.29581671953201294 2023-01-22 18:35:16.704783: step: 924/470, loss: 0.20488214492797852 2023-01-22 18:35:17.471704: step: 926/470, loss: 0.2639607787132263 2023-01-22 18:35:18.156248: step: 928/470, loss: 0.11370810866355896 2023-01-22 18:35:18.901149: step: 930/470, loss: 0.3266356289386749 2023-01-22 18:35:19.610237: step: 932/470, loss: 0.0699765533208847 2023-01-22 18:35:20.352927: step: 934/470, loss: 0.05949941277503967 2023-01-22 18:35:21.045420: step: 936/470, loss: 0.09237470477819443 2023-01-22 18:35:21.790969: step: 938/470, loss: 0.20467214286327362 2023-01-22 18:35:22.473434: step: 940/470, loss: 0.23523172736167908 2023-01-22 18:35:23.199115: step: 942/470, loss: 0.17894554138183594 ================================================== Loss: 0.225 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28994319242254574, 'r': 0.3102997353440527, 'f1': 0.2997762796082783}, 'combined': 0.22088778497452083, 'epoch': 14} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3507474812318518, 'r': 0.36187696861709323, 'f1': 0.3562253169538826}, 'combined': 0.2481171361867839, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2747616086235489, 'r': 0.31438567362428843, 'f1': 0.29324115044247784}, 'combined': 0.21607242664182577, 'epoch': 14} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3377919490575152, 'r': 0.368618303678518, 'f1': 0.3525325256126204}, 'combined': 0.24554504271525798, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.28846153846153844, 'r': 0.4891304347826087, 'f1': 0.3629032258064516}, 'combined': 0.1814516129032258, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5416666666666666, 'r': 0.33620689655172414, 'f1': 0.41489361702127664}, 'combined': 0.2765957446808511, 'epoch': 14} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28994319242254574, 'r': 0.3102997353440527, 'f1': 0.2997762796082783}, 'combined': 0.22088778497452083, 'epoch': 14} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3507474812318518, 'r': 0.36187696861709323, 'f1': 0.3562253169538826}, 'combined': 0.2481171361867839, 'epoch': 14} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.28846153846153844, 'r': 0.4891304347826087, 'f1': 0.3629032258064516}, 'combined': 0.1814516129032258, 'epoch': 14} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2747616086235489, 'r': 0.31438567362428843, 'f1': 0.29324115044247784}, 'combined': 0.21607242664182577, 'epoch': 14} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3377919490575152, 'r': 0.368618303678518, 'f1': 0.3525325256126204}, 'combined': 0.24554504271525798, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5416666666666666, 'r': 0.33620689655172414, 'f1': 0.41489361702127664}, 'combined': 0.2765957446808511, 'epoch': 14} ****************************** Epoch: 15 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 18:38:17.732373: step: 2/470, loss: 0.11473778635263443 2023-01-22 18:38:18.447360: step: 4/470, loss: 0.14706557989120483 2023-01-22 18:38:19.216206: step: 6/470, loss: 0.1552312821149826 2023-01-22 18:38:19.973172: step: 8/470, loss: 0.23627066612243652 2023-01-22 18:38:20.828198: step: 10/470, loss: 0.0939834862947464 2023-01-22 18:38:21.504151: step: 12/470, loss: 0.12837252020835876 2023-01-22 18:38:22.245559: step: 14/470, loss: 0.15545101463794708 2023-01-22 18:38:23.006293: step: 16/470, loss: 0.23269550502300262 2023-01-22 18:38:23.799190: step: 18/470, loss: 0.2941681742668152 2023-01-22 18:38:24.510845: step: 20/470, loss: 0.05719856545329094 2023-01-22 18:38:25.239891: step: 22/470, loss: 0.1427365392446518 2023-01-22 18:38:26.016893: step: 24/470, loss: 0.12609969079494476 2023-01-22 18:38:26.705118: step: 26/470, loss: 0.03879668563604355 2023-01-22 18:38:27.536119: step: 28/470, loss: 0.09652974456548691 2023-01-22 18:38:28.328946: step: 30/470, loss: 0.08992653340101242 2023-01-22 18:38:29.044397: step: 32/470, loss: 0.18987002968788147 2023-01-22 18:38:29.729254: step: 34/470, loss: 0.021707141771912575 2023-01-22 18:38:30.526383: step: 36/470, loss: 0.1298495978116989 2023-01-22 18:38:31.255065: step: 38/470, loss: 0.19483181834220886 2023-01-22 18:38:31.984781: step: 40/470, loss: 0.0935438945889473 2023-01-22 18:38:32.766689: step: 42/470, loss: 0.038643594831228256 2023-01-22 18:38:33.511509: step: 44/470, loss: 0.08340389281511307 2023-01-22 18:38:34.204484: step: 46/470, loss: 0.02248873934149742 2023-01-22 18:38:34.970063: step: 48/470, loss: 0.08780361711978912 2023-01-22 18:38:35.673223: step: 50/470, loss: 0.9724255204200745 2023-01-22 18:38:36.416557: step: 52/470, loss: 0.19970646500587463 2023-01-22 18:38:37.086923: step: 54/470, loss: 0.1309264898300171 2023-01-22 18:38:37.808575: step: 56/470, loss: 0.050499871373176575 2023-01-22 18:38:38.529689: step: 58/470, loss: 0.07953242212533951 2023-01-22 18:38:39.245755: step: 60/470, loss: 0.04383343458175659 2023-01-22 18:38:39.980134: step: 62/470, loss: 0.08836504071950912 2023-01-22 18:38:40.762393: step: 64/470, loss: 2.471203565597534 2023-01-22 18:38:41.482594: step: 66/470, loss: 0.03604745864868164 2023-01-22 18:38:42.232509: step: 68/470, loss: 0.0975373387336731 2023-01-22 18:38:42.954666: step: 70/470, loss: 0.06281895935535431 2023-01-22 18:38:43.803491: step: 72/470, loss: 0.1086447536945343 2023-01-22 18:38:44.573912: step: 74/470, loss: 0.058805786073207855 2023-01-22 18:38:45.338879: step: 76/470, loss: 0.14832088351249695 2023-01-22 18:38:46.165288: step: 78/470, loss: 0.30360713601112366 2023-01-22 18:38:47.002904: step: 80/470, loss: 2.0807321071624756 2023-01-22 18:38:47.712981: step: 82/470, loss: 0.15625305473804474 2023-01-22 18:38:48.468346: step: 84/470, loss: 0.3062744140625 2023-01-22 18:38:49.319833: step: 86/470, loss: 0.05567606911063194 2023-01-22 18:38:50.012851: step: 88/470, loss: 0.09600371867418289 2023-01-22 18:38:50.692834: step: 90/470, loss: 0.024420877918601036 2023-01-22 18:38:51.489625: step: 92/470, loss: 0.2148466855287552 2023-01-22 18:38:52.188701: step: 94/470, loss: 0.10316638648509979 2023-01-22 18:38:52.870665: step: 96/470, loss: 0.06793716549873352 2023-01-22 18:38:53.582575: step: 98/470, loss: 0.11314757913351059 2023-01-22 18:38:54.358545: step: 100/470, loss: 0.10866573452949524 2023-01-22 18:38:55.095533: step: 102/470, loss: 0.06647631525993347 2023-01-22 18:38:55.798299: step: 104/470, loss: 0.053145021200180054 2023-01-22 18:38:56.630469: step: 106/470, loss: 0.0989309549331665 2023-01-22 18:38:57.336426: step: 108/470, loss: 0.07991360872983932 2023-01-22 18:38:58.079186: step: 110/470, loss: 0.28862452507019043 2023-01-22 18:38:58.857546: step: 112/470, loss: 0.05426064506173134 2023-01-22 18:38:59.563460: step: 114/470, loss: 0.090945765376091 2023-01-22 18:39:00.292389: step: 116/470, loss: 0.05981391668319702 2023-01-22 18:39:00.967092: step: 118/470, loss: 0.04086308926343918 2023-01-22 18:39:01.699119: step: 120/470, loss: 0.279904305934906 2023-01-22 18:39:02.387573: step: 122/470, loss: 0.08591174334287643 2023-01-22 18:39:03.089497: step: 124/470, loss: 0.025589006021618843 2023-01-22 18:39:03.773570: step: 126/470, loss: 0.2623073160648346 2023-01-22 18:39:04.493596: step: 128/470, loss: 0.03692317008972168 2023-01-22 18:39:05.193010: step: 130/470, loss: 0.16277796030044556 2023-01-22 18:39:05.935760: step: 132/470, loss: 0.11714790016412735 2023-01-22 18:39:06.595108: step: 134/470, loss: 0.16155806183815002 2023-01-22 18:39:07.294032: step: 136/470, loss: 0.034791506826877594 2023-01-22 18:39:07.994975: step: 138/470, loss: 0.15954738855361938 2023-01-22 18:39:08.842417: step: 140/470, loss: 0.100949726998806 2023-01-22 18:39:09.685810: step: 142/470, loss: 0.08963809162378311 2023-01-22 18:39:10.456769: step: 144/470, loss: 0.597632884979248 2023-01-22 18:39:11.158178: step: 146/470, loss: 0.07044071704149246 2023-01-22 18:39:11.933503: step: 148/470, loss: 0.09197443723678589 2023-01-22 18:39:12.716673: step: 150/470, loss: 0.09861261397600174 2023-01-22 18:39:13.447623: step: 152/470, loss: 0.054308950901031494 2023-01-22 18:39:14.140829: step: 154/470, loss: 0.224558487534523 2023-01-22 18:39:14.842023: step: 156/470, loss: 1.2589021921157837 2023-01-22 18:39:15.595926: step: 158/470, loss: 0.6944888234138489 2023-01-22 18:39:16.277903: step: 160/470, loss: 0.2508222460746765 2023-01-22 18:39:17.071591: step: 162/470, loss: 0.16872183978557587 2023-01-22 18:39:17.776433: step: 164/470, loss: 0.13078132271766663 2023-01-22 18:39:18.407292: step: 166/470, loss: 0.09100139886140823 2023-01-22 18:39:19.160556: step: 168/470, loss: 0.05044722184538841 2023-01-22 18:39:19.877524: step: 170/470, loss: 0.13399793207645416 2023-01-22 18:39:20.555444: step: 172/470, loss: 0.6419817209243774 2023-01-22 18:39:21.327622: step: 174/470, loss: 0.01634124107658863 2023-01-22 18:39:22.000573: step: 176/470, loss: 0.025893285870552063 2023-01-22 18:39:22.704282: step: 178/470, loss: 0.11607389897108078 2023-01-22 18:39:23.438878: step: 180/470, loss: 0.07055290043354034 2023-01-22 18:39:24.176750: step: 182/470, loss: 0.11892832070589066 2023-01-22 18:39:24.855104: step: 184/470, loss: 0.04048566147685051 2023-01-22 18:39:25.563855: step: 186/470, loss: 0.06207974627614021 2023-01-22 18:39:26.313006: step: 188/470, loss: 0.026901859790086746 2023-01-22 18:39:27.060332: step: 190/470, loss: 0.15540508925914764 2023-01-22 18:39:27.875189: step: 192/470, loss: 0.05698360875248909 2023-01-22 18:39:28.609738: step: 194/470, loss: 0.9431207180023193 2023-01-22 18:39:29.370667: step: 196/470, loss: 0.5588711500167847 2023-01-22 18:39:30.107456: step: 198/470, loss: 0.6502451300621033 2023-01-22 18:39:30.949331: step: 200/470, loss: 0.08674420416355133 2023-01-22 18:39:31.692494: step: 202/470, loss: 0.04215487837791443 2023-01-22 18:39:32.466513: step: 204/470, loss: 0.11216975003480911 2023-01-22 18:39:33.116707: step: 206/470, loss: 0.016669681295752525 2023-01-22 18:39:33.964284: step: 208/470, loss: 0.11073028296232224 2023-01-22 18:39:34.726513: step: 210/470, loss: 0.26849380135536194 2023-01-22 18:39:35.508827: step: 212/470, loss: 0.13260619342327118 2023-01-22 18:39:36.289699: step: 214/470, loss: 0.0956636592745781 2023-01-22 18:39:36.997836: step: 216/470, loss: 0.04913400486111641 2023-01-22 18:39:37.798191: step: 218/470, loss: 0.11870007961988449 2023-01-22 18:39:38.530783: step: 220/470, loss: 0.051276322454214096 2023-01-22 18:39:39.262726: step: 222/470, loss: 0.15064826607704163 2023-01-22 18:39:39.968292: step: 224/470, loss: 0.0929044634103775 2023-01-22 18:39:40.829882: step: 226/470, loss: 0.09530377388000488 2023-01-22 18:39:41.684390: step: 228/470, loss: 0.5640994906425476 2023-01-22 18:39:42.370830: step: 230/470, loss: 0.0663192942738533 2023-01-22 18:39:43.134368: step: 232/470, loss: 0.10693991929292679 2023-01-22 18:39:43.861938: step: 234/470, loss: 0.05415727570652962 2023-01-22 18:39:44.745777: step: 236/470, loss: 0.048255015164613724 2023-01-22 18:39:45.519424: step: 238/470, loss: 0.12781493365764618 2023-01-22 18:39:46.238211: step: 240/470, loss: 0.06164423003792763 2023-01-22 18:39:46.972355: step: 242/470, loss: 1.3429557085037231 2023-01-22 18:39:47.685065: step: 244/470, loss: 0.14472408592700958 2023-01-22 18:39:48.644008: step: 246/470, loss: 0.2433803528547287 2023-01-22 18:39:49.353043: step: 248/470, loss: 0.17759829759597778 2023-01-22 18:39:50.099901: step: 250/470, loss: 0.15550030767917633 2023-01-22 18:39:50.813531: step: 252/470, loss: 0.1389748603105545 2023-01-22 18:39:51.533343: step: 254/470, loss: 0.11234139651060104 2023-01-22 18:39:52.237407: step: 256/470, loss: 0.0669410452246666 2023-01-22 18:39:53.080674: step: 258/470, loss: 0.1081426739692688 2023-01-22 18:39:53.837983: step: 260/470, loss: 0.048209790140390396 2023-01-22 18:39:54.567834: step: 262/470, loss: 0.09756072610616684 2023-01-22 18:39:55.350778: step: 264/470, loss: 0.07871279865503311 2023-01-22 18:39:56.088512: step: 266/470, loss: 0.04785219579935074 2023-01-22 18:39:56.848230: step: 268/470, loss: 0.1019335687160492 2023-01-22 18:39:57.551105: step: 270/470, loss: 0.16639164090156555 2023-01-22 18:39:58.361838: step: 272/470, loss: 0.18241894245147705 2023-01-22 18:39:59.139831: step: 274/470, loss: 0.024366773664951324 2023-01-22 18:39:59.916304: step: 276/470, loss: 0.5418151021003723 2023-01-22 18:40:00.631538: step: 278/470, loss: 0.031031997874379158 2023-01-22 18:40:01.354998: step: 280/470, loss: 0.06488221883773804 2023-01-22 18:40:02.123678: step: 282/470, loss: 0.07829637080430984 2023-01-22 18:40:02.915091: step: 284/470, loss: 0.11502385139465332 2023-01-22 18:40:03.672356: step: 286/470, loss: 0.036991775035858154 2023-01-22 18:40:04.474023: step: 288/470, loss: 0.04660653695464134 2023-01-22 18:40:05.153159: step: 290/470, loss: 0.06741064786911011 2023-01-22 18:40:05.799719: step: 292/470, loss: 0.054885994642972946 2023-01-22 18:40:06.555420: step: 294/470, loss: 0.03092462196946144 2023-01-22 18:40:07.250856: step: 296/470, loss: 0.022188784554600716 2023-01-22 18:40:08.057599: step: 298/470, loss: 0.30903947353363037 2023-01-22 18:40:08.833008: step: 300/470, loss: 0.07914907485246658 2023-01-22 18:40:09.562276: step: 302/470, loss: 0.19007016718387604 2023-01-22 18:40:10.315468: step: 304/470, loss: 0.12614548206329346 2023-01-22 18:40:11.108871: step: 306/470, loss: 0.05936804041266441 2023-01-22 18:40:11.775439: step: 308/470, loss: 0.08794538676738739 2023-01-22 18:40:12.629987: step: 310/470, loss: 0.014910301193594933 2023-01-22 18:40:13.370399: step: 312/470, loss: 0.44476574659347534 2023-01-22 18:40:14.226702: step: 314/470, loss: 0.2262786328792572 2023-01-22 18:40:14.965028: step: 316/470, loss: 0.04540727660059929 2023-01-22 18:40:15.729861: step: 318/470, loss: 0.09350919723510742 2023-01-22 18:40:16.482734: step: 320/470, loss: 0.3775199353694916 2023-01-22 18:40:17.237092: step: 322/470, loss: 0.09253061562776566 2023-01-22 18:40:17.943038: step: 324/470, loss: 0.040534548461437225 2023-01-22 18:40:18.655621: step: 326/470, loss: 0.5241174697875977 2023-01-22 18:40:19.434592: step: 328/470, loss: 0.24729689955711365 2023-01-22 18:40:20.182652: step: 330/470, loss: 0.09145756810903549 2023-01-22 18:40:20.951389: step: 332/470, loss: 0.08529587835073471 2023-01-22 18:40:21.681369: step: 334/470, loss: 0.07263697683811188 2023-01-22 18:40:22.351687: step: 336/470, loss: 0.04024118930101395 2023-01-22 18:40:23.117413: step: 338/470, loss: 0.06803514063358307 2023-01-22 18:40:23.911902: step: 340/470, loss: 0.4728766679763794 2023-01-22 18:40:24.628416: step: 342/470, loss: 0.10786987096071243 2023-01-22 18:40:25.420973: step: 344/470, loss: 0.05304402485489845 2023-01-22 18:40:26.102541: step: 346/470, loss: 0.13090533018112183 2023-01-22 18:40:26.770861: step: 348/470, loss: 0.02692641317844391 2023-01-22 18:40:27.493236: step: 350/470, loss: 0.292754203081131 2023-01-22 18:40:28.226626: step: 352/470, loss: 0.10036718100309372 2023-01-22 18:40:28.891093: step: 354/470, loss: 0.08418302237987518 2023-01-22 18:40:29.689821: step: 356/470, loss: 0.8037522435188293 2023-01-22 18:40:30.514564: step: 358/470, loss: 0.10909231752157211 2023-01-22 18:40:31.251171: step: 360/470, loss: 0.15878289937973022 2023-01-22 18:40:31.958875: step: 362/470, loss: 0.41693437099456787 2023-01-22 18:40:32.620051: step: 364/470, loss: 0.526682436466217 2023-01-22 18:40:33.406351: step: 366/470, loss: 1.07395601272583 2023-01-22 18:40:34.083012: step: 368/470, loss: 0.10555024445056915 2023-01-22 18:40:34.863828: step: 370/470, loss: 0.06377702951431274 2023-01-22 18:40:35.671019: step: 372/470, loss: 0.06496112048625946 2023-01-22 18:40:36.385603: step: 374/470, loss: 0.28721484541893005 2023-01-22 18:40:37.111121: step: 376/470, loss: 0.06607744842767715 2023-01-22 18:40:37.817700: step: 378/470, loss: 0.10380702465772629 2023-01-22 18:40:38.549993: step: 380/470, loss: 0.545688807964325 2023-01-22 18:40:39.366609: step: 382/470, loss: 0.08570117503404617 2023-01-22 18:40:40.163882: step: 384/470, loss: 0.09269419312477112 2023-01-22 18:40:40.828262: step: 386/470, loss: 0.13103975355625153 2023-01-22 18:40:41.567449: step: 388/470, loss: 0.16990436613559723 2023-01-22 18:40:42.306256: step: 390/470, loss: 0.07417949289083481 2023-01-22 18:40:43.159876: step: 392/470, loss: 0.30628538131713867 2023-01-22 18:40:43.913043: step: 394/470, loss: 0.037749405950307846 2023-01-22 18:40:44.733922: step: 396/470, loss: 0.17938406765460968 2023-01-22 18:40:45.402848: step: 398/470, loss: 0.1570398360490799 2023-01-22 18:40:46.105092: step: 400/470, loss: 0.07525985687971115 2023-01-22 18:40:46.761461: step: 402/470, loss: 0.3523021340370178 2023-01-22 18:40:47.452049: step: 404/470, loss: 0.7962073087692261 2023-01-22 18:40:48.113801: step: 406/470, loss: 0.620932936668396 2023-01-22 18:40:48.836679: step: 408/470, loss: 0.1963292360305786 2023-01-22 18:40:49.526743: step: 410/470, loss: 0.29872724413871765 2023-01-22 18:40:50.235244: step: 412/470, loss: 0.20817507803440094 2023-01-22 18:40:51.000626: step: 414/470, loss: 0.12282727658748627 2023-01-22 18:40:51.610545: step: 416/470, loss: 0.10243619978427887 2023-01-22 18:40:52.369499: step: 418/470, loss: 0.17507648468017578 2023-01-22 18:40:53.142418: step: 420/470, loss: 0.17442834377288818 2023-01-22 18:40:53.931724: step: 422/470, loss: 0.25709930062294006 2023-01-22 18:40:54.744194: step: 424/470, loss: 0.08250849694013596 2023-01-22 18:40:55.451320: step: 426/470, loss: 0.09402203559875488 2023-01-22 18:40:56.148678: step: 428/470, loss: 0.11240358650684357 2023-01-22 18:40:56.976236: step: 430/470, loss: 0.1616356074810028 2023-01-22 18:40:57.711697: step: 432/470, loss: 0.048348795622587204 2023-01-22 18:40:58.417039: step: 434/470, loss: 0.06427058577537537 2023-01-22 18:40:59.131358: step: 436/470, loss: 0.10640700906515121 2023-01-22 18:40:59.847526: step: 438/470, loss: 0.23929402232170105 2023-01-22 18:41:00.674589: step: 440/470, loss: 0.12179520726203918 2023-01-22 18:41:01.452849: step: 442/470, loss: 0.10428435355424881 2023-01-22 18:41:02.219000: step: 444/470, loss: 0.06364937126636505 2023-01-22 18:41:02.872143: step: 446/470, loss: 0.18099617958068848 2023-01-22 18:41:03.558901: step: 448/470, loss: 0.19195039570331573 2023-01-22 18:41:04.264804: step: 450/470, loss: 0.11918045580387115 2023-01-22 18:41:05.027364: step: 452/470, loss: 0.24603021144866943 2023-01-22 18:41:05.722338: step: 454/470, loss: 0.09445057809352875 2023-01-22 18:41:06.486494: step: 456/470, loss: 0.0714806392788887 2023-01-22 18:41:07.239503: step: 458/470, loss: 0.06616587191820145 2023-01-22 18:41:07.990134: step: 460/470, loss: 0.23478007316589355 2023-01-22 18:41:08.812910: step: 462/470, loss: 0.10817045718431473 2023-01-22 18:41:09.447736: step: 464/470, loss: 0.12772588431835175 2023-01-22 18:41:10.132043: step: 466/470, loss: 0.11464595049619675 2023-01-22 18:41:10.882333: step: 468/470, loss: 0.4346560835838318 2023-01-22 18:41:11.591102: step: 470/470, loss: 0.055237311869859695 2023-01-22 18:41:12.315742: step: 472/470, loss: 0.13824693858623505 2023-01-22 18:41:13.055550: step: 474/470, loss: 0.1086035966873169 2023-01-22 18:41:13.778326: step: 476/470, loss: 0.26932814717292786 2023-01-22 18:41:14.533547: step: 478/470, loss: 3.1218864917755127 2023-01-22 18:41:15.305751: step: 480/470, loss: 0.06301096081733704 2023-01-22 18:41:16.002031: step: 482/470, loss: 0.21947330236434937 2023-01-22 18:41:16.744386: step: 484/470, loss: 0.0712946206331253 2023-01-22 18:41:17.549383: step: 486/470, loss: 0.35837650299072266 2023-01-22 18:41:18.276856: step: 488/470, loss: 0.05311376228928566 2023-01-22 18:41:19.063472: step: 490/470, loss: 0.07864044606685638 2023-01-22 18:41:19.914001: step: 492/470, loss: 0.15913601219654083 2023-01-22 18:41:20.606152: step: 494/470, loss: 0.49420005083084106 2023-01-22 18:41:21.294649: step: 496/470, loss: 0.07326009124517441 2023-01-22 18:41:21.978948: step: 498/470, loss: 0.09878735989332199 2023-01-22 18:41:22.750833: step: 500/470, loss: 0.18507295846939087 2023-01-22 18:41:23.598642: step: 502/470, loss: 0.23065824806690216 2023-01-22 18:41:24.363829: step: 504/470, loss: 0.056721873581409454 2023-01-22 18:41:25.062999: step: 506/470, loss: 0.04180415719747543 2023-01-22 18:41:25.789142: step: 508/470, loss: 0.4849397838115692 2023-01-22 18:41:26.535260: step: 510/470, loss: 0.12329412996768951 2023-01-22 18:41:27.242742: step: 512/470, loss: 0.4908522963523865 2023-01-22 18:41:28.064601: step: 514/470, loss: 0.42387697100639343 2023-01-22 18:41:28.803680: step: 516/470, loss: 0.07153800129890442 2023-01-22 18:41:29.548963: step: 518/470, loss: 0.0721924901008606 2023-01-22 18:41:30.280772: step: 520/470, loss: 0.6146575212478638 2023-01-22 18:41:31.023474: step: 522/470, loss: 0.10892462730407715 2023-01-22 18:41:31.762613: step: 524/470, loss: 0.06572824716567993 2023-01-22 18:41:32.518664: step: 526/470, loss: 0.15284620225429535 2023-01-22 18:41:33.280844: step: 528/470, loss: 0.18958349525928497 2023-01-22 18:41:33.968282: step: 530/470, loss: 0.03336469456553459 2023-01-22 18:41:34.705497: step: 532/470, loss: 0.1496352255344391 2023-01-22 18:41:35.429953: step: 534/470, loss: 0.13351905345916748 2023-01-22 18:41:36.120620: step: 536/470, loss: 0.08257067203521729 2023-01-22 18:41:36.856374: step: 538/470, loss: 0.7537316679954529 2023-01-22 18:41:37.571641: step: 540/470, loss: 0.3301614224910736 2023-01-22 18:41:38.252688: step: 542/470, loss: 0.08029845356941223 2023-01-22 18:41:38.996143: step: 544/470, loss: 0.08368469774723053 2023-01-22 18:41:39.756209: step: 546/470, loss: 0.054839249700307846 2023-01-22 18:41:40.475187: step: 548/470, loss: 0.0818573608994484 2023-01-22 18:41:41.219075: step: 550/470, loss: 0.09326575696468353 2023-01-22 18:41:42.009626: step: 552/470, loss: 0.08490006625652313 2023-01-22 18:41:42.675805: step: 554/470, loss: 0.2637978792190552 2023-01-22 18:41:43.402293: step: 556/470, loss: 0.02072441391646862 2023-01-22 18:41:44.121008: step: 558/470, loss: 0.07177267968654633 2023-01-22 18:41:44.921734: step: 560/470, loss: 0.17377981543540955 2023-01-22 18:41:45.630850: step: 562/470, loss: 0.10091854631900787 2023-01-22 18:41:46.342223: step: 564/470, loss: 0.07952025532722473 2023-01-22 18:41:47.037558: step: 566/470, loss: 0.021815728396177292 2023-01-22 18:41:47.819449: step: 568/470, loss: 0.1319160908460617 2023-01-22 18:41:48.444929: step: 570/470, loss: 0.03820590302348137 2023-01-22 18:41:49.189378: step: 572/470, loss: 0.05425729230046272 2023-01-22 18:41:49.880851: step: 574/470, loss: 0.07900302857160568 2023-01-22 18:41:50.653645: step: 576/470, loss: 0.10675648599863052 2023-01-22 18:41:51.380607: step: 578/470, loss: 0.13442978262901306 2023-01-22 18:41:52.123071: step: 580/470, loss: 0.16306370496749878 2023-01-22 18:41:52.834833: step: 582/470, loss: 0.22952473163604736 2023-01-22 18:41:53.630641: step: 584/470, loss: 0.2049272507429123 2023-01-22 18:41:54.403324: step: 586/470, loss: 0.01995740458369255 2023-01-22 18:41:55.155763: step: 588/470, loss: 0.10787932574748993 2023-01-22 18:41:55.905802: step: 590/470, loss: 0.06048334389925003 2023-01-22 18:41:56.698074: step: 592/470, loss: 0.23972097039222717 2023-01-22 18:41:57.373582: step: 594/470, loss: 0.3398410975933075 2023-01-22 18:41:58.062229: step: 596/470, loss: 0.23750761151313782 2023-01-22 18:41:58.805803: step: 598/470, loss: 0.07478002458810806 2023-01-22 18:41:59.601117: step: 600/470, loss: 0.046232108026742935 2023-01-22 18:42:00.393516: step: 602/470, loss: 0.18411901593208313 2023-01-22 18:42:01.215647: step: 604/470, loss: 0.249111145734787 2023-01-22 18:42:01.992301: step: 606/470, loss: 0.10080849379301071 2023-01-22 18:42:02.756650: step: 608/470, loss: 0.34081441164016724 2023-01-22 18:42:03.456719: step: 610/470, loss: 0.10482829064130783 2023-01-22 18:42:04.291651: step: 612/470, loss: 0.1037624180316925 2023-01-22 18:42:05.081597: step: 614/470, loss: 0.14347702264785767 2023-01-22 18:42:05.903051: step: 616/470, loss: 0.6947146654129028 2023-01-22 18:42:06.631841: step: 618/470, loss: 0.10346982628107071 2023-01-22 18:42:07.343505: step: 620/470, loss: 0.0636051818728447 2023-01-22 18:42:08.163053: step: 622/470, loss: 0.5542206168174744 2023-01-22 18:42:08.977529: step: 624/470, loss: 0.1027393713593483 2023-01-22 18:42:09.716339: step: 626/470, loss: 0.08677075058221817 2023-01-22 18:42:10.433024: step: 628/470, loss: 0.09158849716186523 2023-01-22 18:42:11.119487: step: 630/470, loss: 0.05454597622156143 2023-01-22 18:42:11.796740: step: 632/470, loss: 0.2582951486110687 2023-01-22 18:42:12.634984: step: 634/470, loss: 0.06388426572084427 2023-01-22 18:42:13.375616: step: 636/470, loss: 0.08858684450387955 2023-01-22 18:42:14.118112: step: 638/470, loss: 0.08609853684902191 2023-01-22 18:42:14.868775: step: 640/470, loss: 0.06940389424562454 2023-01-22 18:42:15.611105: step: 642/470, loss: 0.42918092012405396 2023-01-22 18:42:16.353469: step: 644/470, loss: 0.10172919929027557 2023-01-22 18:42:17.043298: step: 646/470, loss: 0.09180402010679245 2023-01-22 18:42:17.752447: step: 648/470, loss: 0.08429131656885147 2023-01-22 18:42:18.474477: step: 650/470, loss: 0.1133129671216011 2023-01-22 18:42:19.173446: step: 652/470, loss: 0.3817265033721924 2023-01-22 18:42:19.908040: step: 654/470, loss: 0.17819011211395264 2023-01-22 18:42:20.607393: step: 656/470, loss: 0.060797855257987976 2023-01-22 18:42:21.388675: step: 658/470, loss: 0.24461068212985992 2023-01-22 18:42:22.204163: step: 660/470, loss: 5.418313980102539 2023-01-22 18:42:22.933919: step: 662/470, loss: 0.05842447280883789 2023-01-22 18:42:23.636923: step: 664/470, loss: 1.0201159715652466 2023-01-22 18:42:24.461377: step: 666/470, loss: 0.44793573021888733 2023-01-22 18:42:25.177135: step: 668/470, loss: 0.16222833096981049 2023-01-22 18:42:25.869211: step: 670/470, loss: 0.13106600940227509 2023-01-22 18:42:26.622651: step: 672/470, loss: 0.07552053779363632 2023-01-22 18:42:27.338745: step: 674/470, loss: 0.043479401618242264 2023-01-22 18:42:28.131288: step: 676/470, loss: 0.16281522810459137 2023-01-22 18:42:28.919327: step: 678/470, loss: 0.07090484350919724 2023-01-22 18:42:29.644160: step: 680/470, loss: 0.8017717003822327 2023-01-22 18:42:30.382596: step: 682/470, loss: 0.2855534553527832 2023-01-22 18:42:31.119058: step: 684/470, loss: 0.12028669565916061 2023-01-22 18:42:31.854251: step: 686/470, loss: 0.20114941895008087 2023-01-22 18:42:32.607350: step: 688/470, loss: 0.08315178006887436 2023-01-22 18:42:33.313903: step: 690/470, loss: 0.06788298487663269 2023-01-22 18:42:34.110338: step: 692/470, loss: 0.2848212420940399 2023-01-22 18:42:34.866968: step: 694/470, loss: 0.21830137073993683 2023-01-22 18:42:35.583137: step: 696/470, loss: 0.04804154112935066 2023-01-22 18:42:36.246101: step: 698/470, loss: 0.1450975388288498 2023-01-22 18:42:36.903087: step: 700/470, loss: 0.12967953085899353 2023-01-22 18:42:37.664494: step: 702/470, loss: 0.09858199208974838 2023-01-22 18:42:38.388150: step: 704/470, loss: 0.06423236429691315 2023-01-22 18:42:39.128854: step: 706/470, loss: 0.08795824646949768 2023-01-22 18:42:39.862588: step: 708/470, loss: 0.08408377319574356 2023-01-22 18:42:40.591010: step: 710/470, loss: 0.6152773499488831 2023-01-22 18:42:41.255817: step: 712/470, loss: 0.08272521942853928 2023-01-22 18:42:42.045705: step: 714/470, loss: 0.07895615696907043 2023-01-22 18:42:42.827924: step: 716/470, loss: 0.06702841818332672 2023-01-22 18:42:43.633932: step: 718/470, loss: 0.1409570723772049 2023-01-22 18:42:44.294144: step: 720/470, loss: 0.7910665273666382 2023-01-22 18:42:45.006825: step: 722/470, loss: 0.0968073159456253 2023-01-22 18:42:45.855961: step: 724/470, loss: 0.12350492179393768 2023-01-22 18:42:46.555119: step: 726/470, loss: 0.18203213810920715 2023-01-22 18:42:47.270124: step: 728/470, loss: 0.12851355969905853 2023-01-22 18:42:48.029664: step: 730/470, loss: 0.22939978539943695 2023-01-22 18:42:48.731344: step: 732/470, loss: 0.05603186413645744 2023-01-22 18:42:49.482916: step: 734/470, loss: 0.16891199350357056 2023-01-22 18:42:50.177715: step: 736/470, loss: 0.14518122375011444 2023-01-22 18:42:50.902632: step: 738/470, loss: 0.12637437880039215 2023-01-22 18:42:51.601123: step: 740/470, loss: 0.9105393290519714 2023-01-22 18:42:52.358632: step: 742/470, loss: 0.7299985289573669 2023-01-22 18:42:53.119373: step: 744/470, loss: 0.11732316762208939 2023-01-22 18:42:53.835199: step: 746/470, loss: 0.1654384881258011 2023-01-22 18:42:54.581903: step: 748/470, loss: 0.16303178668022156 2023-01-22 18:42:55.269853: step: 750/470, loss: 0.03869093954563141 2023-01-22 18:42:56.030901: step: 752/470, loss: 0.33318397402763367 2023-01-22 18:42:56.741876: step: 754/470, loss: 0.2498822659254074 2023-01-22 18:42:57.459809: step: 756/470, loss: 0.19036591053009033 2023-01-22 18:42:58.178320: step: 758/470, loss: 0.24502049386501312 2023-01-22 18:42:58.890671: step: 760/470, loss: 0.05735207721590996 2023-01-22 18:42:59.571211: step: 762/470, loss: 0.1237659901380539 2023-01-22 18:43:00.282074: step: 764/470, loss: 0.05063727870583534 2023-01-22 18:43:01.074753: step: 766/470, loss: 0.09125498682260513 2023-01-22 18:43:01.743531: step: 768/470, loss: 0.09228543937206268 2023-01-22 18:43:02.563323: step: 770/470, loss: 0.09300341457128525 2023-01-22 18:43:03.303155: step: 772/470, loss: 0.12723006308078766 2023-01-22 18:43:04.017263: step: 774/470, loss: 0.19253137707710266 2023-01-22 18:43:04.760571: step: 776/470, loss: 0.16553926467895508 2023-01-22 18:43:05.530038: step: 778/470, loss: 0.08979497104883194 2023-01-22 18:43:06.254474: step: 780/470, loss: 0.29222023487091064 2023-01-22 18:43:06.966142: step: 782/470, loss: 0.1268426924943924 2023-01-22 18:43:07.712652: step: 784/470, loss: 0.10541335493326187 2023-01-22 18:43:08.495263: step: 786/470, loss: 0.14692994952201843 2023-01-22 18:43:09.206951: step: 788/470, loss: 0.06541899591684341 2023-01-22 18:43:09.932018: step: 790/470, loss: 0.07346995174884796 2023-01-22 18:43:10.695487: step: 792/470, loss: 0.13709881901741028 2023-01-22 18:43:11.377935: step: 794/470, loss: 0.09347955882549286 2023-01-22 18:43:12.120158: step: 796/470, loss: 0.08111049234867096 2023-01-22 18:43:12.932903: step: 798/470, loss: 0.07415740191936493 2023-01-22 18:43:13.658170: step: 800/470, loss: 0.05813341215252876 2023-01-22 18:43:14.400230: step: 802/470, loss: 0.09454671293497086 2023-01-22 18:43:15.052640: step: 804/470, loss: 0.037501685321331024 2023-01-22 18:43:15.815335: step: 806/470, loss: 0.1539684534072876 2023-01-22 18:43:16.656344: step: 808/470, loss: 0.11793018132448196 2023-01-22 18:43:17.370036: step: 810/470, loss: 0.1428481787443161 2023-01-22 18:43:18.082197: step: 812/470, loss: 0.1079527959227562 2023-01-22 18:43:18.781361: step: 814/470, loss: 0.1281379610300064 2023-01-22 18:43:19.553878: step: 816/470, loss: 0.1891109198331833 2023-01-22 18:43:20.335378: step: 818/470, loss: 0.34778857231140137 2023-01-22 18:43:21.093525: step: 820/470, loss: 0.07001124322414398 2023-01-22 18:43:21.830482: step: 822/470, loss: 0.04569976404309273 2023-01-22 18:43:22.585119: step: 824/470, loss: 0.04330173507332802 2023-01-22 18:43:23.303387: step: 826/470, loss: 0.09614764899015427 2023-01-22 18:43:24.097422: step: 828/470, loss: 0.35434603691101074 2023-01-22 18:43:24.832118: step: 830/470, loss: 0.35880136489868164 2023-01-22 18:43:25.698165: step: 832/470, loss: 0.15067602694034576 2023-01-22 18:43:26.438081: step: 834/470, loss: 0.0773734450340271 2023-01-22 18:43:27.195766: step: 836/470, loss: 0.14914895594120026 2023-01-22 18:43:27.969613: step: 838/470, loss: 0.21950659155845642 2023-01-22 18:43:28.742404: step: 840/470, loss: 0.03406314551830292 2023-01-22 18:43:29.490999: step: 842/470, loss: 0.07147298008203506 2023-01-22 18:43:30.203942: step: 844/470, loss: 0.07960271090269089 2023-01-22 18:43:30.863815: step: 846/470, loss: 0.04376105219125748 2023-01-22 18:43:31.586137: step: 848/470, loss: 0.13630259037017822 2023-01-22 18:43:32.287169: step: 850/470, loss: 0.09853252023458481 2023-01-22 18:43:33.026650: step: 852/470, loss: 0.05566703900694847 2023-01-22 18:43:33.811767: step: 854/470, loss: 0.09490513801574707 2023-01-22 18:43:34.551833: step: 856/470, loss: 0.11718861013650894 2023-01-22 18:43:35.383822: step: 858/470, loss: 0.05593058466911316 2023-01-22 18:43:36.117868: step: 860/470, loss: 0.5031771063804626 2023-01-22 18:43:36.856562: step: 862/470, loss: 0.1421380341053009 2023-01-22 18:43:37.696857: step: 864/470, loss: 0.20217017829418182 2023-01-22 18:43:38.369921: step: 866/470, loss: 0.08146615326404572 2023-01-22 18:43:39.115679: step: 868/470, loss: 0.07637915015220642 2023-01-22 18:43:39.851939: step: 870/470, loss: 0.6401387453079224 2023-01-22 18:43:40.646999: step: 872/470, loss: 0.1042008101940155 2023-01-22 18:43:41.371050: step: 874/470, loss: 0.18115811049938202 2023-01-22 18:43:42.044162: step: 876/470, loss: 0.1381768435239792 2023-01-22 18:43:42.745495: step: 878/470, loss: 0.1072993129491806 2023-01-22 18:43:43.481249: step: 880/470, loss: 0.055918753147125244 2023-01-22 18:43:44.247023: step: 882/470, loss: 0.12597472965717316 2023-01-22 18:43:44.950838: step: 884/470, loss: 2.0582730770111084 2023-01-22 18:43:45.721012: step: 886/470, loss: 0.03130277991294861 2023-01-22 18:43:46.496819: step: 888/470, loss: 0.11425133794546127 2023-01-22 18:43:47.269216: step: 890/470, loss: 0.1338731199502945 2023-01-22 18:43:48.036283: step: 892/470, loss: 0.29117199778556824 2023-01-22 18:43:48.770858: step: 894/470, loss: 0.40173524618148804 2023-01-22 18:43:49.556341: step: 896/470, loss: 0.24469105899333954 2023-01-22 18:43:50.226802: step: 898/470, loss: 0.07817135006189346 2023-01-22 18:43:50.964218: step: 900/470, loss: 0.1031189039349556 2023-01-22 18:43:51.782387: step: 902/470, loss: 0.5347837805747986 2023-01-22 18:43:52.530543: step: 904/470, loss: 0.223730206489563 2023-01-22 18:43:53.319027: step: 906/470, loss: 0.16207782924175262 2023-01-22 18:43:54.023167: step: 908/470, loss: 0.062290169298648834 2023-01-22 18:43:54.766198: step: 910/470, loss: 0.1052016094326973 2023-01-22 18:43:55.500598: step: 912/470, loss: 0.16469532251358032 2023-01-22 18:43:56.262217: step: 914/470, loss: 0.08949004858732224 2023-01-22 18:43:57.038226: step: 916/470, loss: 0.23239319026470184 2023-01-22 18:43:57.840145: step: 918/470, loss: 0.29859524965286255 2023-01-22 18:43:58.614148: step: 920/470, loss: 0.1092582494020462 2023-01-22 18:43:59.421809: step: 922/470, loss: 0.4721441864967346 2023-01-22 18:44:00.107042: step: 924/470, loss: 0.02895715832710266 2023-01-22 18:44:00.877795: step: 926/470, loss: 0.6657435894012451 2023-01-22 18:44:01.533718: step: 928/470, loss: 0.07168980687856674 2023-01-22 18:44:02.177877: step: 930/470, loss: 0.07691118121147156 2023-01-22 18:44:02.978181: step: 932/470, loss: 0.5273089408874512 2023-01-22 18:44:03.654631: step: 934/470, loss: 0.1843612641096115 2023-01-22 18:44:04.387826: step: 936/470, loss: 0.0656123235821724 2023-01-22 18:44:05.238977: step: 938/470, loss: 0.1719179004430771 2023-01-22 18:44:06.019110: step: 940/470, loss: 0.14247168600559235 2023-01-22 18:44:06.701475: step: 942/470, loss: 0.2547386884689331 ================================================== Loss: 0.204 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2855591748099892, 'r': 0.3327008222643897, 'f1': 0.3073327490505405}, 'combined': 0.22645570982671404, 'epoch': 15} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3358431714808179, 'r': 0.35877092645691216, 'f1': 0.3469286504092875}, 'combined': 0.24164184605622016, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28976966759680817, 'r': 0.3403556437237652, 'f1': 0.3130321540007404}, 'combined': 0.2306552713689666, 'epoch': 15} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.336597090231335, 'r': 0.3537505957912011, 'f1': 0.34496073101064145}, 'combined': 0.24027115592780998, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2791196741854637, 'r': 0.3522098355471221, 'f1': 0.3114338646532439}, 'combined': 0.2294775844813376, 'epoch': 15} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32485481802107175, 'r': 0.36577403067564906, 'f1': 0.3441022088671868}, 'combined': 0.23967318030550325, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21774193548387097, 'r': 0.38571428571428573, 'f1': 0.27835051546391754}, 'combined': 0.18556701030927836, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.29545454545454547, 'r': 0.42391304347826086, 'f1': 0.3482142857142857}, 'combined': 0.17410714285714285, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42391304347826086, 'r': 0.33620689655172414, 'f1': 0.375}, 'combined': 0.25, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28994319242254574, 'r': 0.3102997353440527, 'f1': 0.2997762796082783}, 'combined': 0.22088778497452083, 'epoch': 14} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3507474812318518, 'r': 0.36187696861709323, 'f1': 0.3562253169538826}, 'combined': 0.2481171361867839, 'epoch': 14} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.28846153846153844, 'r': 0.4891304347826087, 'f1': 0.3629032258064516}, 'combined': 0.1814516129032258, 'epoch': 14} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2747616086235489, 'r': 0.31438567362428843, 'f1': 0.29324115044247784}, 'combined': 0.21607242664182577, 'epoch': 14} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3377919490575152, 'r': 0.368618303678518, 'f1': 0.3525325256126204}, 'combined': 0.24554504271525798, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5416666666666666, 'r': 0.33620689655172414, 'f1': 0.41489361702127664}, 'combined': 0.2765957446808511, 'epoch': 14} ****************************** Epoch: 16 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 18:46:46.713321: step: 2/470, loss: 0.08884326368570328 2023-01-22 18:46:47.484479: step: 4/470, loss: 0.061807192862033844 2023-01-22 18:46:48.223594: step: 6/470, loss: 0.08429668098688126 2023-01-22 18:46:48.981201: step: 8/470, loss: 1.1761099100112915 2023-01-22 18:46:49.705240: step: 10/470, loss: 0.28416168689727783 2023-01-22 18:46:50.438386: step: 12/470, loss: 0.08847323060035706 2023-01-22 18:46:51.191846: step: 14/470, loss: 0.06219949573278427 2023-01-22 18:46:52.004093: step: 16/470, loss: 0.04178072139620781 2023-01-22 18:46:52.720854: step: 18/470, loss: 0.40100765228271484 2023-01-22 18:46:53.433854: step: 20/470, loss: 0.1000329926609993 2023-01-22 18:46:54.185332: step: 22/470, loss: 0.5311740040779114 2023-01-22 18:46:54.885944: step: 24/470, loss: 0.13259932398796082 2023-01-22 18:46:55.662090: step: 26/470, loss: 0.09883047640323639 2023-01-22 18:46:56.357386: step: 28/470, loss: 0.034732915461063385 2023-01-22 18:46:57.120700: step: 30/470, loss: 0.07482585310935974 2023-01-22 18:46:57.822479: step: 32/470, loss: 0.22222690284252167 2023-01-22 18:46:58.508937: step: 34/470, loss: 0.08215279877185822 2023-01-22 18:46:59.237415: step: 36/470, loss: 0.4559054970741272 2023-01-22 18:46:59.948824: step: 38/470, loss: 0.10464425384998322 2023-01-22 18:47:00.698832: step: 40/470, loss: 0.15875568985939026 2023-01-22 18:47:01.373688: step: 42/470, loss: 0.048225291073322296 2023-01-22 18:47:02.170572: step: 44/470, loss: 0.020997516810894012 2023-01-22 18:47:02.939248: step: 46/470, loss: 0.10919099301099777 2023-01-22 18:47:03.657389: step: 48/470, loss: 0.04074873775243759 2023-01-22 18:47:04.463064: step: 50/470, loss: 0.03056240826845169 2023-01-22 18:47:05.197096: step: 52/470, loss: 0.12954410910606384 2023-01-22 18:47:05.868588: step: 54/470, loss: 0.22133603692054749 2023-01-22 18:47:06.613524: step: 56/470, loss: 0.20818258821964264 2023-01-22 18:47:07.314040: step: 58/470, loss: 0.029186418280005455 2023-01-22 18:47:08.036357: step: 60/470, loss: 0.07272211462259293 2023-01-22 18:47:08.820110: step: 62/470, loss: 0.09152626991271973 2023-01-22 18:47:09.609103: step: 64/470, loss: 0.015041274018585682 2023-01-22 18:47:10.387037: step: 66/470, loss: 1.1043593883514404 2023-01-22 18:47:11.115161: step: 68/470, loss: 0.34245747327804565 2023-01-22 18:47:11.906769: step: 70/470, loss: 0.15388578176498413 2023-01-22 18:47:12.673781: step: 72/470, loss: 0.11390519142150879 2023-01-22 18:47:13.448899: step: 74/470, loss: 0.05043567717075348 2023-01-22 18:47:14.228223: step: 76/470, loss: 0.11875592917203903 2023-01-22 18:47:15.050666: step: 78/470, loss: 0.0501096136868 2023-01-22 18:47:15.767396: step: 80/470, loss: 0.3267119526863098 2023-01-22 18:47:16.583206: step: 82/470, loss: 0.04021922126412392 2023-01-22 18:47:17.310110: step: 84/470, loss: 0.14459112286567688 2023-01-22 18:47:18.035684: step: 86/470, loss: 0.09926001727581024 2023-01-22 18:47:18.785279: step: 88/470, loss: 0.8933921456336975 2023-01-22 18:47:19.567657: step: 90/470, loss: 0.06707798689603806 2023-01-22 18:47:20.308466: step: 92/470, loss: 0.41027313470840454 2023-01-22 18:47:21.040794: step: 94/470, loss: 0.0322352796792984 2023-01-22 18:47:21.722095: step: 96/470, loss: 0.029441189020872116 2023-01-22 18:47:22.418428: step: 98/470, loss: 0.21683290600776672 2023-01-22 18:47:23.059936: step: 100/470, loss: 0.06455568969249725 2023-01-22 18:47:23.884560: step: 102/470, loss: 0.050209976732730865 2023-01-22 18:47:24.627224: step: 104/470, loss: 0.036682043224573135 2023-01-22 18:47:25.332471: step: 106/470, loss: 0.14355768263339996 2023-01-22 18:47:26.027365: step: 108/470, loss: 0.023732803761959076 2023-01-22 18:47:26.747504: step: 110/470, loss: 0.02847733162343502 2023-01-22 18:47:27.465466: step: 112/470, loss: 0.1768191009759903 2023-01-22 18:47:28.178338: step: 114/470, loss: 0.06269264221191406 2023-01-22 18:47:28.928251: step: 116/470, loss: 0.008748441934585571 2023-01-22 18:47:29.674038: step: 118/470, loss: 0.10878868401050568 2023-01-22 18:47:30.455000: step: 120/470, loss: 0.09285591542720795 2023-01-22 18:47:31.231841: step: 122/470, loss: 0.6085570454597473 2023-01-22 18:47:31.965583: step: 124/470, loss: 0.06355506926774979 2023-01-22 18:47:32.691057: step: 126/470, loss: 0.11871222406625748 2023-01-22 18:47:33.448720: step: 128/470, loss: 0.08303084224462509 2023-01-22 18:47:34.234021: step: 130/470, loss: 0.11246610432863235 2023-01-22 18:47:34.999600: step: 132/470, loss: 0.25137367844581604 2023-01-22 18:47:35.780315: step: 134/470, loss: 0.24548359215259552 2023-01-22 18:47:36.472845: step: 136/470, loss: 0.12862925231456757 2023-01-22 18:47:37.129024: step: 138/470, loss: 0.059389956295490265 2023-01-22 18:47:37.818913: step: 140/470, loss: 0.05305398255586624 2023-01-22 18:47:38.554771: step: 142/470, loss: 0.12227759510278702 2023-01-22 18:47:39.346810: step: 144/470, loss: 0.12327904254198074 2023-01-22 18:47:40.053366: step: 146/470, loss: 0.05995366722345352 2023-01-22 18:47:40.771461: step: 148/470, loss: 0.13276717066764832 2023-01-22 18:47:41.480554: step: 150/470, loss: 0.050435952842235565 2023-01-22 18:47:42.176502: step: 152/470, loss: 0.32247716188430786 2023-01-22 18:47:42.905496: step: 154/470, loss: 0.06091221049427986 2023-01-22 18:47:43.604984: step: 156/470, loss: 0.2275611311197281 2023-01-22 18:47:44.274815: step: 158/470, loss: 0.28586113452911377 2023-01-22 18:47:44.931729: step: 160/470, loss: 0.0427815206348896 2023-01-22 18:47:45.729832: step: 162/470, loss: 0.4265275001525879 2023-01-22 18:47:46.389620: step: 164/470, loss: 0.06603558361530304 2023-01-22 18:47:47.205319: step: 166/470, loss: 0.08618386089801788 2023-01-22 18:47:47.914975: step: 168/470, loss: 0.13042916357517242 2023-01-22 18:47:48.646335: step: 170/470, loss: 0.10537727922201157 2023-01-22 18:47:49.342842: step: 172/470, loss: 0.025995755568146706 2023-01-22 18:47:50.175208: step: 174/470, loss: 0.07933039963245392 2023-01-22 18:47:50.824277: step: 176/470, loss: 0.026341214776039124 2023-01-22 18:47:51.531888: step: 178/470, loss: 0.07335160672664642 2023-01-22 18:47:52.234514: step: 180/470, loss: 0.012121165171265602 2023-01-22 18:47:52.917356: step: 182/470, loss: 0.11823894083499908 2023-01-22 18:47:53.627214: step: 184/470, loss: 0.1767769157886505 2023-01-22 18:47:54.362041: step: 186/470, loss: 2.089386224746704 2023-01-22 18:47:55.146159: step: 188/470, loss: 0.08466898649930954 2023-01-22 18:47:55.942927: step: 190/470, loss: 0.48831504583358765 2023-01-22 18:47:56.676447: step: 192/470, loss: 0.328980952501297 2023-01-22 18:47:57.333252: step: 194/470, loss: 0.18098825216293335 2023-01-22 18:47:58.038968: step: 196/470, loss: 0.10612848401069641 2023-01-22 18:47:58.754454: step: 198/470, loss: 0.09899935126304626 2023-01-22 18:47:59.449387: step: 200/470, loss: 0.06739851832389832 2023-01-22 18:48:00.487863: step: 202/470, loss: 0.6201255321502686 2023-01-22 18:48:01.198952: step: 204/470, loss: 0.031738992780447006 2023-01-22 18:48:01.953321: step: 206/470, loss: 0.03578285127878189 2023-01-22 18:48:02.611507: step: 208/470, loss: 0.024340057745575905 2023-01-22 18:48:03.374258: step: 210/470, loss: 0.7221086025238037 2023-01-22 18:48:04.154460: step: 212/470, loss: 0.06913825124502182 2023-01-22 18:48:05.013783: step: 214/470, loss: 0.11934089660644531 2023-01-22 18:48:05.759348: step: 216/470, loss: 0.31613805890083313 2023-01-22 18:48:06.434219: step: 218/470, loss: 0.08025716245174408 2023-01-22 18:48:07.196229: step: 220/470, loss: 0.07177558541297913 2023-01-22 18:48:07.921471: step: 222/470, loss: 0.38496649265289307 2023-01-22 18:48:08.636066: step: 224/470, loss: 0.20351849496364594 2023-01-22 18:48:09.331218: step: 226/470, loss: 0.1382501870393753 2023-01-22 18:48:10.107904: step: 228/470, loss: 0.08902169018983841 2023-01-22 18:48:10.947859: step: 230/470, loss: 0.1085016131401062 2023-01-22 18:48:11.643094: step: 232/470, loss: 0.084829181432724 2023-01-22 18:48:12.365895: step: 234/470, loss: 0.2627657353878021 2023-01-22 18:48:13.119766: step: 236/470, loss: 0.08875922858715057 2023-01-22 18:48:13.841411: step: 238/470, loss: 0.11912406980991364 2023-01-22 18:48:14.535683: step: 240/470, loss: 0.061584122478961945 2023-01-22 18:48:15.204836: step: 242/470, loss: 0.026997093111276627 2023-01-22 18:48:15.936758: step: 244/470, loss: 0.09223021566867828 2023-01-22 18:48:16.709151: step: 246/470, loss: 0.4238168001174927 2023-01-22 18:48:17.439557: step: 248/470, loss: 0.034468550235033035 2023-01-22 18:48:18.137675: step: 250/470, loss: 0.06773322820663452 2023-01-22 18:48:18.956386: step: 252/470, loss: 0.3193717300891876 2023-01-22 18:48:19.770514: step: 254/470, loss: 0.2410053014755249 2023-01-22 18:48:20.631754: step: 256/470, loss: 0.07392468303442001 2023-01-22 18:48:21.398645: step: 258/470, loss: 0.05369978025555611 2023-01-22 18:48:22.156130: step: 260/470, loss: 0.11789437383413315 2023-01-22 18:48:22.866109: step: 262/470, loss: 0.06956873089075089 2023-01-22 18:48:23.586284: step: 264/470, loss: 0.0506395660340786 2023-01-22 18:48:24.305856: step: 266/470, loss: 0.05074666440486908 2023-01-22 18:48:25.003628: step: 268/470, loss: 0.37575647234916687 2023-01-22 18:48:25.714185: step: 270/470, loss: 0.05577372387051582 2023-01-22 18:48:26.458471: step: 272/470, loss: 0.3809775114059448 2023-01-22 18:48:27.133883: step: 274/470, loss: 0.1822841912508011 2023-01-22 18:48:27.894743: step: 276/470, loss: 0.12010300159454346 2023-01-22 18:48:28.636328: step: 278/470, loss: 0.2520848214626312 2023-01-22 18:48:29.429368: step: 280/470, loss: 0.057388413697481155 2023-01-22 18:48:30.225333: step: 282/470, loss: 0.0458437018096447 2023-01-22 18:48:30.958791: step: 284/470, loss: 0.025038981810212135 2023-01-22 18:48:31.653529: step: 286/470, loss: 0.025928793475031853 2023-01-22 18:48:32.421252: step: 288/470, loss: 0.12468525022268295 2023-01-22 18:48:33.166598: step: 290/470, loss: 0.1737351417541504 2023-01-22 18:48:33.869473: step: 292/470, loss: 0.07722808420658112 2023-01-22 18:48:34.614013: step: 294/470, loss: 0.12904956936836243 2023-01-22 18:48:35.303223: step: 296/470, loss: 0.011522680521011353 2023-01-22 18:48:36.010304: step: 298/470, loss: 0.06235891208052635 2023-01-22 18:48:36.727238: step: 300/470, loss: 0.09630980342626572 2023-01-22 18:48:37.478045: step: 302/470, loss: 0.07960200309753418 2023-01-22 18:48:38.233321: step: 304/470, loss: 0.11289746314287186 2023-01-22 18:48:39.009857: step: 306/470, loss: 0.03673465922474861 2023-01-22 18:48:39.694601: step: 308/470, loss: 0.047929637134075165 2023-01-22 18:48:40.379777: step: 310/470, loss: 0.18434062600135803 2023-01-22 18:48:41.185281: step: 312/470, loss: 0.10890356451272964 2023-01-22 18:48:41.847545: step: 314/470, loss: 0.09170568734407425 2023-01-22 18:48:42.583789: step: 316/470, loss: 0.07127133011817932 2023-01-22 18:48:43.320556: step: 318/470, loss: 0.09422150999307632 2023-01-22 18:48:44.006419: step: 320/470, loss: 0.0380314365029335 2023-01-22 18:48:44.736195: step: 322/470, loss: 0.047910191118717194 2023-01-22 18:48:45.535579: step: 324/470, loss: 0.18401391804218292 2023-01-22 18:48:46.297389: step: 326/470, loss: 0.12445008754730225 2023-01-22 18:48:47.019821: step: 328/470, loss: 0.21150392293930054 2023-01-22 18:48:47.785230: step: 330/470, loss: 0.053453728556632996 2023-01-22 18:48:48.471638: step: 332/470, loss: 0.13986550271511078 2023-01-22 18:48:49.149571: step: 334/470, loss: 0.09363771975040436 2023-01-22 18:48:49.879891: step: 336/470, loss: 0.0591520257294178 2023-01-22 18:48:50.639722: step: 338/470, loss: 0.05675804242491722 2023-01-22 18:48:51.392063: step: 340/470, loss: 0.5691471695899963 2023-01-22 18:48:52.141034: step: 342/470, loss: 0.19100329279899597 2023-01-22 18:48:52.947533: step: 344/470, loss: 0.34803155064582825 2023-01-22 18:48:53.605316: step: 346/470, loss: 0.404619425535202 2023-01-22 18:48:54.346261: step: 348/470, loss: 0.04862171784043312 2023-01-22 18:48:55.097673: step: 350/470, loss: 0.15870419144630432 2023-01-22 18:48:55.799889: step: 352/470, loss: 0.0585881844162941 2023-01-22 18:48:56.545429: step: 354/470, loss: 0.09602773189544678 2023-01-22 18:48:57.290091: step: 356/470, loss: 0.0472840778529644 2023-01-22 18:48:58.049182: step: 358/470, loss: 0.05619442090392113 2023-01-22 18:48:58.901672: step: 360/470, loss: 0.11173807084560394 2023-01-22 18:48:59.594828: step: 362/470, loss: 0.26064473390579224 2023-01-22 18:49:00.318635: step: 364/470, loss: 0.17030306160449982 2023-01-22 18:49:01.198530: step: 366/470, loss: 0.06355582922697067 2023-01-22 18:49:01.897442: step: 368/470, loss: 0.6005545258522034 2023-01-22 18:49:02.643516: step: 370/470, loss: 0.24275268614292145 2023-01-22 18:49:03.441383: step: 372/470, loss: 0.09774607419967651 2023-01-22 18:49:04.151906: step: 374/470, loss: 0.2669150233268738 2023-01-22 18:49:04.864196: step: 376/470, loss: 0.10955331474542618 2023-01-22 18:49:05.549011: step: 378/470, loss: 0.28141841292381287 2023-01-22 18:49:06.316365: step: 380/470, loss: 0.05212775245308876 2023-01-22 18:49:06.994186: step: 382/470, loss: 0.1570778340101242 2023-01-22 18:49:07.700736: step: 384/470, loss: 0.09866324067115784 2023-01-22 18:49:08.378316: step: 386/470, loss: 0.17579655349254608 2023-01-22 18:49:09.091627: step: 388/470, loss: 0.17191143333911896 2023-01-22 18:49:09.774547: step: 390/470, loss: 0.10095331817865372 2023-01-22 18:49:10.492844: step: 392/470, loss: 0.07796035706996918 2023-01-22 18:49:11.295989: step: 394/470, loss: 0.10861441493034363 2023-01-22 18:49:12.025261: step: 396/470, loss: 0.15292930603027344 2023-01-22 18:49:12.757223: step: 398/470, loss: 0.058260828256607056 2023-01-22 18:49:13.551573: step: 400/470, loss: 0.058411069214344025 2023-01-22 18:49:14.288820: step: 402/470, loss: 0.0554041787981987 2023-01-22 18:49:14.987369: step: 404/470, loss: 1.0163735151290894 2023-01-22 18:49:15.805524: step: 406/470, loss: 0.3198876678943634 2023-01-22 18:49:16.510698: step: 408/470, loss: 0.0814136192202568 2023-01-22 18:49:17.254192: step: 410/470, loss: 0.7785913944244385 2023-01-22 18:49:18.022047: step: 412/470, loss: 0.06416447460651398 2023-01-22 18:49:18.864251: step: 414/470, loss: 0.26347091794013977 2023-01-22 18:49:19.608972: step: 416/470, loss: 0.08544527739286423 2023-01-22 18:49:20.370612: step: 418/470, loss: 0.45779088139533997 2023-01-22 18:49:21.105823: step: 420/470, loss: 0.9352731108665466 2023-01-22 18:49:21.922503: step: 422/470, loss: 0.1798955500125885 2023-01-22 18:49:22.723448: step: 424/470, loss: 0.04317692294716835 2023-01-22 18:49:23.533916: step: 426/470, loss: 0.152594193816185 2023-01-22 18:49:24.313082: step: 428/470, loss: 0.45149144530296326 2023-01-22 18:49:25.065368: step: 430/470, loss: 0.06854032725095749 2023-01-22 18:49:25.773188: step: 432/470, loss: 0.15352515876293182 2023-01-22 18:49:26.468949: step: 434/470, loss: 0.24940556287765503 2023-01-22 18:49:27.192756: step: 436/470, loss: 0.04720636084675789 2023-01-22 18:49:27.869636: step: 438/470, loss: 0.09284143149852753 2023-01-22 18:49:28.638739: step: 440/470, loss: 0.05546830594539642 2023-01-22 18:49:29.322648: step: 442/470, loss: 0.04827970266342163 2023-01-22 18:49:30.043807: step: 444/470, loss: 0.0933823436498642 2023-01-22 18:49:30.691488: step: 446/470, loss: 0.2462373822927475 2023-01-22 18:49:31.410046: step: 448/470, loss: 0.14782100915908813 2023-01-22 18:49:32.145174: step: 450/470, loss: 0.04853854700922966 2023-01-22 18:49:32.875492: step: 452/470, loss: 0.4875885844230652 2023-01-22 18:49:33.676933: step: 454/470, loss: 0.29292890429496765 2023-01-22 18:49:34.434920: step: 456/470, loss: 0.07768307626247406 2023-01-22 18:49:35.187537: step: 458/470, loss: 0.29037296772003174 2023-01-22 18:49:35.922122: step: 460/470, loss: 0.05874329432845116 2023-01-22 18:49:36.640632: step: 462/470, loss: 0.1875893473625183 2023-01-22 18:49:37.432340: step: 464/470, loss: 0.04672089219093323 2023-01-22 18:49:38.163031: step: 466/470, loss: 0.3265398144721985 2023-01-22 18:49:38.859735: step: 468/470, loss: 0.11972032487392426 2023-01-22 18:49:39.478353: step: 470/470, loss: 0.11014973372220993 2023-01-22 18:49:40.240175: step: 472/470, loss: 0.06904499232769012 2023-01-22 18:49:41.035081: step: 474/470, loss: 0.17454054951667786 2023-01-22 18:49:41.803172: step: 476/470, loss: 0.07249860465526581 2023-01-22 18:49:42.517590: step: 478/470, loss: 0.08408724516630173 2023-01-22 18:49:43.302703: step: 480/470, loss: 0.15881037712097168 2023-01-22 18:49:44.058900: step: 482/470, loss: 0.08231707662343979 2023-01-22 18:49:44.789105: step: 484/470, loss: 0.09459856152534485 2023-01-22 18:49:45.529196: step: 486/470, loss: 0.8342221975326538 2023-01-22 18:49:46.270525: step: 488/470, loss: 0.08584868907928467 2023-01-22 18:49:46.935901: step: 490/470, loss: 0.131908118724823 2023-01-22 18:49:47.664878: step: 492/470, loss: 0.06489690393209457 2023-01-22 18:49:48.448145: step: 494/470, loss: 0.1734696328639984 2023-01-22 18:49:49.174225: step: 496/470, loss: 0.17412090301513672 2023-01-22 18:49:49.995546: step: 498/470, loss: 0.01628013141453266 2023-01-22 18:49:50.671889: step: 500/470, loss: 0.02044498547911644 2023-01-22 18:49:51.356754: step: 502/470, loss: 0.1580786108970642 2023-01-22 18:49:52.045784: step: 504/470, loss: 0.0427110381424427 2023-01-22 18:49:52.800880: step: 506/470, loss: 0.06857065856456757 2023-01-22 18:49:53.474000: step: 508/470, loss: 0.9077308177947998 2023-01-22 18:49:54.136413: step: 510/470, loss: 0.12271424382925034 2023-01-22 18:49:54.866982: step: 512/470, loss: 0.08618942648172379 2023-01-22 18:49:55.585292: step: 514/470, loss: 0.09517448395490646 2023-01-22 18:49:56.310623: step: 516/470, loss: 0.0746607780456543 2023-01-22 18:49:56.990029: step: 518/470, loss: 0.06032832711935043 2023-01-22 18:49:57.770893: step: 520/470, loss: 0.07779128104448318 2023-01-22 18:49:58.573728: step: 522/470, loss: 0.2717607021331787 2023-01-22 18:49:59.228482: step: 524/470, loss: 0.1430988907814026 2023-01-22 18:49:59.944901: step: 526/470, loss: 0.0658825933933258 2023-01-22 18:50:00.706012: step: 528/470, loss: 0.1088942140340805 2023-01-22 18:50:01.480763: step: 530/470, loss: 0.12835966050624847 2023-01-22 18:50:02.206256: step: 532/470, loss: 0.055738162249326706 2023-01-22 18:50:02.987708: step: 534/470, loss: 0.0650675892829895 2023-01-22 18:50:03.814679: step: 536/470, loss: 0.03095190040767193 2023-01-22 18:50:04.661111: step: 538/470, loss: 0.21852679550647736 2023-01-22 18:50:05.443607: step: 540/470, loss: 0.050854191184043884 2023-01-22 18:50:06.179706: step: 542/470, loss: 0.07948761433362961 2023-01-22 18:50:06.914500: step: 544/470, loss: 0.7797904014587402 2023-01-22 18:50:07.609531: step: 546/470, loss: 0.07549011707305908 2023-01-22 18:50:08.373577: step: 548/470, loss: 0.07842864841222763 2023-01-22 18:50:09.129819: step: 550/470, loss: 0.0789763554930687 2023-01-22 18:50:09.799114: step: 552/470, loss: 0.07106094062328339 2023-01-22 18:50:10.492417: step: 554/470, loss: 0.397691935300827 2023-01-22 18:50:11.342628: step: 556/470, loss: 0.378380686044693 2023-01-22 18:50:12.015396: step: 558/470, loss: 0.07716850936412811 2023-01-22 18:50:12.691164: step: 560/470, loss: 0.08261480927467346 2023-01-22 18:50:13.448538: step: 562/470, loss: 0.1947333961725235 2023-01-22 18:50:14.250174: step: 564/470, loss: 0.07653369754552841 2023-01-22 18:50:15.008343: step: 566/470, loss: 0.13547946512699127 2023-01-22 18:50:15.750453: step: 568/470, loss: 0.03947483003139496 2023-01-22 18:50:16.543606: step: 570/470, loss: 0.543188750743866 2023-01-22 18:50:17.266652: step: 572/470, loss: 0.1665872037410736 2023-01-22 18:50:17.955417: step: 574/470, loss: 5.638265609741211 2023-01-22 18:50:18.629363: step: 576/470, loss: 0.11097951233386993 2023-01-22 18:50:19.358589: step: 578/470, loss: 0.2536165714263916 2023-01-22 18:50:20.059368: step: 580/470, loss: 0.029628895223140717 2023-01-22 18:50:20.762608: step: 582/470, loss: 0.19913624227046967 2023-01-22 18:50:21.526628: step: 584/470, loss: 0.2235163003206253 2023-01-22 18:50:22.306827: step: 586/470, loss: 0.10956007242202759 2023-01-22 18:50:23.021138: step: 588/470, loss: 0.039578016847372055 2023-01-22 18:50:23.728295: step: 590/470, loss: 0.12435401976108551 2023-01-22 18:50:24.445634: step: 592/470, loss: 0.21014420688152313 2023-01-22 18:50:25.087273: step: 594/470, loss: 0.012091286480426788 2023-01-22 18:50:25.883915: step: 596/470, loss: 0.08850495517253876 2023-01-22 18:50:26.656918: step: 598/470, loss: 0.05613786727190018 2023-01-22 18:50:27.442200: step: 600/470, loss: 0.3040921986103058 2023-01-22 18:50:28.160006: step: 602/470, loss: 0.24259580671787262 2023-01-22 18:50:28.974840: step: 604/470, loss: 0.006539918482303619 2023-01-22 18:50:29.681395: step: 606/470, loss: 0.029432980343699455 2023-01-22 18:50:30.443599: step: 608/470, loss: 0.3112735450267792 2023-01-22 18:50:31.247253: step: 610/470, loss: 0.18472446501255035 2023-01-22 18:50:32.063663: step: 612/470, loss: 0.03998330980539322 2023-01-22 18:50:32.806861: step: 614/470, loss: 0.028782224282622337 2023-01-22 18:50:33.546961: step: 616/470, loss: 0.10626031458377838 2023-01-22 18:50:34.348138: step: 618/470, loss: 0.03874929994344711 2023-01-22 18:50:35.160555: step: 620/470, loss: 0.05607333034276962 2023-01-22 18:50:35.869474: step: 622/470, loss: 0.03576376661658287 2023-01-22 18:50:36.615933: step: 624/470, loss: 0.10499916970729828 2023-01-22 18:50:37.383075: step: 626/470, loss: 0.09289852529764175 2023-01-22 18:50:38.122619: step: 628/470, loss: 0.1215863972902298 2023-01-22 18:50:38.837597: step: 630/470, loss: 0.02862360328435898 2023-01-22 18:50:39.617592: step: 632/470, loss: 0.05546386539936066 2023-01-22 18:50:40.307336: step: 634/470, loss: 0.0644495040178299 2023-01-22 18:50:41.097563: step: 636/470, loss: 0.3706085681915283 2023-01-22 18:50:41.834877: step: 638/470, loss: 0.057276401668787 2023-01-22 18:50:42.638840: step: 640/470, loss: 0.04506406560540199 2023-01-22 18:50:43.371949: step: 642/470, loss: 0.15678298473358154 2023-01-22 18:50:44.147555: step: 644/470, loss: 0.11337969452142715 2023-01-22 18:50:44.947793: step: 646/470, loss: 0.5968871116638184 2023-01-22 18:50:45.783763: step: 648/470, loss: 0.04819050803780556 2023-01-22 18:50:46.498556: step: 650/470, loss: 0.09025636315345764 2023-01-22 18:50:47.199774: step: 652/470, loss: 1.1916296482086182 2023-01-22 18:50:47.992206: step: 654/470, loss: 0.03702091798186302 2023-01-22 18:50:48.787008: step: 656/470, loss: 0.12011308968067169 2023-01-22 18:50:49.534009: step: 658/470, loss: 0.4539858102798462 2023-01-22 18:50:50.353936: step: 660/470, loss: 0.09682758152484894 2023-01-22 18:50:51.083882: step: 662/470, loss: 0.05873757600784302 2023-01-22 18:50:51.835863: step: 664/470, loss: 0.10081055015325546 2023-01-22 18:50:52.606911: step: 666/470, loss: 0.06736086308956146 2023-01-22 18:50:53.369513: step: 668/470, loss: 0.22437730431556702 2023-01-22 18:50:54.164444: step: 670/470, loss: 0.11580760031938553 2023-01-22 18:50:54.936760: step: 672/470, loss: 0.02462395839393139 2023-01-22 18:50:55.626521: step: 674/470, loss: 0.1718244105577469 2023-01-22 18:50:56.389588: step: 676/470, loss: 0.02999183163046837 2023-01-22 18:50:57.151629: step: 678/470, loss: 0.06906621158123016 2023-01-22 18:50:57.909281: step: 680/470, loss: 0.4289069175720215 2023-01-22 18:50:58.688129: step: 682/470, loss: 0.26691025495529175 2023-01-22 18:50:59.500173: step: 684/470, loss: 0.11220283061265945 2023-01-22 18:51:00.279536: step: 686/470, loss: 0.0480489581823349 2023-01-22 18:51:01.127825: step: 688/470, loss: 0.7784223556518555 2023-01-22 18:51:01.902138: step: 690/470, loss: 0.012960987165570259 2023-01-22 18:51:02.800985: step: 692/470, loss: 0.5317075252532959 2023-01-22 18:51:03.534592: step: 694/470, loss: 0.17809821665287018 2023-01-22 18:51:04.330582: step: 696/470, loss: 0.07087529450654984 2023-01-22 18:51:05.072611: step: 698/470, loss: 0.19641615450382233 2023-01-22 18:51:05.760672: step: 700/470, loss: 0.08749356865882874 2023-01-22 18:51:06.558586: step: 702/470, loss: 0.09865260124206543 2023-01-22 18:51:07.327831: step: 704/470, loss: 0.11657115072011948 2023-01-22 18:51:08.065417: step: 706/470, loss: 0.15794336795806885 2023-01-22 18:51:08.770053: step: 708/470, loss: 0.10114340484142303 2023-01-22 18:51:09.489025: step: 710/470, loss: 0.214201882481575 2023-01-22 18:51:10.296459: step: 712/470, loss: 0.04397993162274361 2023-01-22 18:51:10.976944: step: 714/470, loss: 0.1239086389541626 2023-01-22 18:51:11.679686: step: 716/470, loss: 0.11514618992805481 2023-01-22 18:51:12.411959: step: 718/470, loss: 0.09257897734642029 2023-01-22 18:51:13.127682: step: 720/470, loss: 0.25863686203956604 2023-01-22 18:51:13.876872: step: 722/470, loss: 0.03553589805960655 2023-01-22 18:51:14.660984: step: 724/470, loss: 0.16370436549186707 2023-01-22 18:51:15.383681: step: 726/470, loss: 0.06427986919879913 2023-01-22 18:51:16.152771: step: 728/470, loss: 0.08685880899429321 2023-01-22 18:51:16.937590: step: 730/470, loss: 0.08242062479257584 2023-01-22 18:51:17.769163: step: 732/470, loss: 0.03324902057647705 2023-01-22 18:51:18.525135: step: 734/470, loss: 0.044015150517225266 2023-01-22 18:51:19.299180: step: 736/470, loss: 0.06782012432813644 2023-01-22 18:51:19.978371: step: 738/470, loss: 0.06624935567378998 2023-01-22 18:51:20.691067: step: 740/470, loss: 0.09224491566419601 2023-01-22 18:51:21.439675: step: 742/470, loss: 0.10784140229225159 2023-01-22 18:51:22.115155: step: 744/470, loss: 0.0007243788568302989 2023-01-22 18:51:22.802035: step: 746/470, loss: 0.0974595695734024 2023-01-22 18:51:23.482817: step: 748/470, loss: 0.2744196653366089 2023-01-22 18:51:24.198244: step: 750/470, loss: 0.013780027627944946 2023-01-22 18:51:24.873903: step: 752/470, loss: 0.11913798004388809 2023-01-22 18:51:25.662685: step: 754/470, loss: 0.21007420122623444 2023-01-22 18:51:26.426459: step: 756/470, loss: 0.15533891320228577 2023-01-22 18:51:27.204777: step: 758/470, loss: 3.9849534034729004 2023-01-22 18:51:27.840403: step: 760/470, loss: 3.1590588092803955 2023-01-22 18:51:28.570587: step: 762/470, loss: 0.1593295782804489 2023-01-22 18:51:29.214447: step: 764/470, loss: 0.06664041429758072 2023-01-22 18:51:29.962885: step: 766/470, loss: 0.26761144399642944 2023-01-22 18:51:30.731385: step: 768/470, loss: 0.09391117095947266 2023-01-22 18:51:31.462511: step: 770/470, loss: 0.043674781918525696 2023-01-22 18:51:32.309089: step: 772/470, loss: 0.14461608231067657 2023-01-22 18:51:33.006785: step: 774/470, loss: 0.23033620417118073 2023-01-22 18:51:33.756469: step: 776/470, loss: 0.05211387947201729 2023-01-22 18:51:34.472642: step: 778/470, loss: 0.09210966527462006 2023-01-22 18:51:35.198906: step: 780/470, loss: 0.0924573689699173 2023-01-22 18:51:35.958445: step: 782/470, loss: 0.10759110748767853 2023-01-22 18:51:36.686607: step: 784/470, loss: 0.1636630892753601 2023-01-22 18:51:37.406952: step: 786/470, loss: 0.08606382459402084 2023-01-22 18:51:38.161173: step: 788/470, loss: 0.10852282494306564 2023-01-22 18:51:38.901861: step: 790/470, loss: 0.7059446573257446 2023-01-22 18:51:39.641639: step: 792/470, loss: 0.03020882047712803 2023-01-22 18:51:40.381164: step: 794/470, loss: 0.05972389131784439 2023-01-22 18:51:41.141583: step: 796/470, loss: 0.0445023775100708 2023-01-22 18:51:41.903868: step: 798/470, loss: 0.07505831122398376 2023-01-22 18:51:42.703317: step: 800/470, loss: 0.07195542752742767 2023-01-22 18:51:43.353328: step: 802/470, loss: 0.23525984585285187 2023-01-22 18:51:44.138811: step: 804/470, loss: 0.0642390102148056 2023-01-22 18:51:44.871265: step: 806/470, loss: 0.048134271055459976 2023-01-22 18:51:45.581533: step: 808/470, loss: 0.02630416862666607 2023-01-22 18:51:46.308905: step: 810/470, loss: 0.03595459833741188 2023-01-22 18:51:47.038144: step: 812/470, loss: 0.5879958868026733 2023-01-22 18:51:47.761974: step: 814/470, loss: 0.025413349270820618 2023-01-22 18:51:48.457428: step: 816/470, loss: 0.1703702062368393 2023-01-22 18:51:49.191062: step: 818/470, loss: 0.14653141796588898 2023-01-22 18:51:49.852308: step: 820/470, loss: 0.026079224422574043 2023-01-22 18:51:50.621382: step: 822/470, loss: 0.06979137659072876 2023-01-22 18:51:51.500906: step: 824/470, loss: 0.15548716485500336 2023-01-22 18:51:52.295834: step: 826/470, loss: 0.10300223529338837 2023-01-22 18:51:52.990263: step: 828/470, loss: 0.10399255156517029 2023-01-22 18:51:53.787474: step: 830/470, loss: 0.06566653400659561 2023-01-22 18:51:54.508207: step: 832/470, loss: 0.15286491811275482 2023-01-22 18:51:55.275739: step: 834/470, loss: 0.0035868631675839424 2023-01-22 18:51:56.108599: step: 836/470, loss: 0.10651691257953644 2023-01-22 18:51:56.834185: step: 838/470, loss: 0.08586788922548294 2023-01-22 18:51:57.577758: step: 840/470, loss: 0.09690944850444794 2023-01-22 18:51:58.349896: step: 842/470, loss: 0.08737190812826157 2023-01-22 18:51:59.120913: step: 844/470, loss: 0.5851885676383972 2023-01-22 18:51:59.761029: step: 846/470, loss: 0.05255848914384842 2023-01-22 18:52:00.548068: step: 848/470, loss: 0.055961720645427704 2023-01-22 18:52:01.292803: step: 850/470, loss: 0.1446704864501953 2023-01-22 18:52:02.044379: step: 852/470, loss: 0.19530963897705078 2023-01-22 18:52:02.730360: step: 854/470, loss: 0.10102560371160507 2023-01-22 18:52:03.446418: step: 856/470, loss: 0.047898419201374054 2023-01-22 18:52:04.130211: step: 858/470, loss: 0.2797347903251648 2023-01-22 18:52:04.858245: step: 860/470, loss: 0.021785497665405273 2023-01-22 18:52:05.550866: step: 862/470, loss: 0.08698924630880356 2023-01-22 18:52:06.260137: step: 864/470, loss: 0.06238389387726784 2023-01-22 18:52:06.969808: step: 866/470, loss: 0.03106139786541462 2023-01-22 18:52:07.784680: step: 868/470, loss: 0.40585944056510925 2023-01-22 18:52:08.565886: step: 870/470, loss: 0.40325307846069336 2023-01-22 18:52:09.274388: step: 872/470, loss: 0.04273195564746857 2023-01-22 18:52:10.030720: step: 874/470, loss: 0.04423359036445618 2023-01-22 18:52:10.809207: step: 876/470, loss: 0.052610598504543304 2023-01-22 18:52:11.570418: step: 878/470, loss: 0.06774067133665085 2023-01-22 18:52:12.347455: step: 880/470, loss: 0.14847953617572784 2023-01-22 18:52:13.040671: step: 882/470, loss: 0.24171356856822968 2023-01-22 18:52:13.732695: step: 884/470, loss: 0.03664424270391464 2023-01-22 18:52:14.483530: step: 886/470, loss: 0.32642123103141785 2023-01-22 18:52:15.229202: step: 888/470, loss: 0.09348136186599731 2023-01-22 18:52:15.964222: step: 890/470, loss: 0.07769238203763962 2023-01-22 18:52:16.722818: step: 892/470, loss: 0.02832121029496193 2023-01-22 18:52:17.527188: step: 894/470, loss: 0.07402808964252472 2023-01-22 18:52:18.186525: step: 896/470, loss: 0.04894760623574257 2023-01-22 18:52:18.897117: step: 898/470, loss: 0.04703076183795929 2023-01-22 18:52:19.637775: step: 900/470, loss: 0.037153225392103195 2023-01-22 18:52:20.371317: step: 902/470, loss: 0.21902887523174286 2023-01-22 18:52:21.080044: step: 904/470, loss: 0.0130607383325696 2023-01-22 18:52:21.786224: step: 906/470, loss: 0.08418157696723938 2023-01-22 18:52:22.601201: step: 908/470, loss: 0.036279212683439255 2023-01-22 18:52:23.348059: step: 910/470, loss: 0.05508041754364967 2023-01-22 18:52:24.087879: step: 912/470, loss: 0.07078361511230469 2023-01-22 18:52:24.758052: step: 914/470, loss: 0.050121258944272995 2023-01-22 18:52:25.460039: step: 916/470, loss: 0.05768989026546478 2023-01-22 18:52:26.246754: step: 918/470, loss: 0.2717893123626709 2023-01-22 18:52:26.952069: step: 920/470, loss: 0.1305137723684311 2023-01-22 18:52:27.752107: step: 922/470, loss: 0.10596335679292679 2023-01-22 18:52:28.521494: step: 924/470, loss: 0.16074761748313904 2023-01-22 18:52:29.203520: step: 926/470, loss: 0.34018227458000183 2023-01-22 18:52:29.946382: step: 928/470, loss: 0.05126188322901726 2023-01-22 18:52:30.625718: step: 930/470, loss: 0.15903814136981964 2023-01-22 18:52:31.420700: step: 932/470, loss: 0.0827934592962265 2023-01-22 18:52:32.178818: step: 934/470, loss: 0.19107471406459808 2023-01-22 18:52:32.903786: step: 936/470, loss: 0.11879337579011917 2023-01-22 18:52:33.622849: step: 938/470, loss: 0.07795053720474243 2023-01-22 18:52:34.356870: step: 940/470, loss: 0.026459183543920517 2023-01-22 18:52:35.164958: step: 942/470, loss: 0.029973674565553665 ================================================== Loss: 0.184 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29003706543967284, 'r': 0.35883143580012655, 'f1': 0.32078739044387905}, 'combined': 0.23636965611654245, 'epoch': 16} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.33212946596293325, 'r': 0.35892953814438033, 'f1': 0.34500983306398886}, 'combined': 0.24030535636297734, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2848654123876765, 'r': 0.35459527614101666, 'f1': 0.3159285046936869}, 'combined': 0.23278942451113768, 'epoch': 16} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34234948544882304, 'r': 0.36208144426431715, 'f1': 0.3519391068899666}, 'combined': 0.2451317162417678, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26665851803563667, 'r': 0.3582433221427529, 'f1': 0.3057396449704142}, 'combined': 0.22528184366241047, 'epoch': 16} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32380480189265964, 'r': 0.36890729591229043, 'f1': 0.34488773690587726}, 'combined': 0.2402203142628001, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20149253731343283, 'r': 0.38571428571428573, 'f1': 0.2647058823529412}, 'combined': 0.1764705882352941, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2804878048780488, 'r': 0.5, 'f1': 0.359375}, 'combined': 0.1796875, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40625, 'r': 0.33620689655172414, 'f1': 0.36792452830188677}, 'combined': 0.2452830188679245, 'epoch': 16} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2848654123876765, 'r': 0.35459527614101666, 'f1': 0.3159285046936869}, 'combined': 0.23278942451113768, 'epoch': 16} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34234948544882304, 'r': 0.36208144426431715, 'f1': 0.3519391068899666}, 'combined': 0.2451317162417678, 'epoch': 16} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2804878048780488, 'r': 0.5, 'f1': 0.359375}, 'combined': 0.1796875, 'epoch': 16} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2747616086235489, 'r': 0.31438567362428843, 'f1': 0.29324115044247784}, 'combined': 0.21607242664182577, 'epoch': 14} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3377919490575152, 'r': 0.368618303678518, 'f1': 0.3525325256126204}, 'combined': 0.24554504271525798, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5416666666666666, 'r': 0.33620689655172414, 'f1': 0.41489361702127664}, 'combined': 0.2765957446808511, 'epoch': 14} ****************************** Epoch: 17 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 18:55:20.100406: step: 2/470, loss: 0.4924387037754059 2023-01-22 18:55:20.812458: step: 4/470, loss: 0.0939476415514946 2023-01-22 18:55:21.576743: step: 6/470, loss: 0.11822464317083359 2023-01-22 18:55:22.219545: step: 8/470, loss: 3.426934242248535 2023-01-22 18:55:22.932570: step: 10/470, loss: 0.07467096298933029 2023-01-22 18:55:23.643430: step: 12/470, loss: 0.107358917593956 2023-01-22 18:55:24.379236: step: 14/470, loss: 0.07628848403692245 2023-01-22 18:55:25.150204: step: 16/470, loss: 0.06268688291311264 2023-01-22 18:55:25.909141: step: 18/470, loss: 0.10565035790205002 2023-01-22 18:55:26.703069: step: 20/470, loss: 0.4389980733394623 2023-01-22 18:55:27.393748: step: 22/470, loss: 0.050272297114133835 2023-01-22 18:55:28.105423: step: 24/470, loss: 0.02019389532506466 2023-01-22 18:55:28.807774: step: 26/470, loss: 0.1535024642944336 2023-01-22 18:55:29.458710: step: 28/470, loss: 0.09952471405267715 2023-01-22 18:55:30.207236: step: 30/470, loss: 0.0704805925488472 2023-01-22 18:55:30.878778: step: 32/470, loss: 0.28921636939048767 2023-01-22 18:55:31.644058: step: 34/470, loss: 0.090544693171978 2023-01-22 18:55:32.425297: step: 36/470, loss: 0.16174203157424927 2023-01-22 18:55:33.127417: step: 38/470, loss: 0.01790589466691017 2023-01-22 18:55:33.988472: step: 40/470, loss: 0.06238782778382301 2023-01-22 18:55:34.763133: step: 42/470, loss: 0.017772074788808823 2023-01-22 18:55:35.481836: step: 44/470, loss: 0.17414626479148865 2023-01-22 18:55:36.207644: step: 46/470, loss: 0.08931662142276764 2023-01-22 18:55:36.949206: step: 48/470, loss: 0.1393406093120575 2023-01-22 18:55:37.783372: step: 50/470, loss: 0.0651363730430603 2023-01-22 18:55:38.511769: step: 52/470, loss: 0.0816269963979721 2023-01-22 18:55:39.215880: step: 54/470, loss: 0.057508260011672974 2023-01-22 18:55:39.967739: step: 56/470, loss: 0.04575724899768829 2023-01-22 18:55:40.635125: step: 58/470, loss: 0.059317465871572495 2023-01-22 18:55:41.379576: step: 60/470, loss: 0.016667962074279785 2023-01-22 18:55:42.081759: step: 62/470, loss: 0.04501299932599068 2023-01-22 18:55:43.035763: step: 64/470, loss: 0.04957332834601402 2023-01-22 18:55:43.754673: step: 66/470, loss: 0.06242368370294571 2023-01-22 18:55:44.434328: step: 68/470, loss: 0.3788084387779236 2023-01-22 18:55:45.132405: step: 70/470, loss: 0.01375067513436079 2023-01-22 18:55:45.877222: step: 72/470, loss: 0.010087433271110058 2023-01-22 18:55:46.643601: step: 74/470, loss: 0.04814882203936577 2023-01-22 18:55:47.290609: step: 76/470, loss: 0.025498254224658012 2023-01-22 18:55:48.032356: step: 78/470, loss: 0.03883810341358185 2023-01-22 18:55:48.713310: step: 80/470, loss: 0.020485306158661842 2023-01-22 18:55:49.443693: step: 82/470, loss: 0.09962893277406693 2023-01-22 18:55:50.219507: step: 84/470, loss: 0.08178018778562546 2023-01-22 18:55:50.898798: step: 86/470, loss: 0.058422110974788666 2023-01-22 18:55:51.626265: step: 88/470, loss: 0.073599673807621 2023-01-22 18:55:52.407382: step: 90/470, loss: 0.06374432146549225 2023-01-22 18:55:53.141745: step: 92/470, loss: 0.16310732066631317 2023-01-22 18:55:53.896947: step: 94/470, loss: 0.6309743523597717 2023-01-22 18:55:54.787260: step: 96/470, loss: 0.043083060532808304 2023-01-22 18:55:55.523786: step: 98/470, loss: 0.07802923768758774 2023-01-22 18:55:56.242817: step: 100/470, loss: 0.06232817471027374 2023-01-22 18:55:56.949942: step: 102/470, loss: 0.152653768658638 2023-01-22 18:55:57.688828: step: 104/470, loss: 0.054097384214401245 2023-01-22 18:55:58.484129: step: 106/470, loss: 0.04027635604143143 2023-01-22 18:55:59.206080: step: 108/470, loss: 0.03251076117157936 2023-01-22 18:55:59.906642: step: 110/470, loss: 0.0775684118270874 2023-01-22 18:56:00.718375: step: 112/470, loss: 0.04833029955625534 2023-01-22 18:56:01.423090: step: 114/470, loss: 0.07459627836942673 2023-01-22 18:56:02.141161: step: 116/470, loss: 0.01013647299259901 2023-01-22 18:56:02.877159: step: 118/470, loss: 0.07257718592882156 2023-01-22 18:56:03.639773: step: 120/470, loss: 0.22532156109809875 2023-01-22 18:56:04.328840: step: 122/470, loss: 0.5136581659317017 2023-01-22 18:56:04.991351: step: 124/470, loss: 0.02729104831814766 2023-01-22 18:56:05.801392: step: 126/470, loss: 0.07071585953235626 2023-01-22 18:56:06.581992: step: 128/470, loss: 0.06299882382154465 2023-01-22 18:56:07.260470: step: 130/470, loss: 0.07367923855781555 2023-01-22 18:56:07.977896: step: 132/470, loss: 0.10907070338726044 2023-01-22 18:56:08.753141: step: 134/470, loss: 0.10193345695734024 2023-01-22 18:56:09.449718: step: 136/470, loss: 0.4187318980693817 2023-01-22 18:56:10.157253: step: 138/470, loss: 0.07536610960960388 2023-01-22 18:56:10.925450: step: 140/470, loss: 0.14740565419197083 2023-01-22 18:56:11.637094: step: 142/470, loss: 0.13611826300621033 2023-01-22 18:56:12.415235: step: 144/470, loss: 0.5455102324485779 2023-01-22 18:56:13.132983: step: 146/470, loss: 0.11672738194465637 2023-01-22 18:56:13.821245: step: 148/470, loss: 0.07301642745733261 2023-01-22 18:56:14.624835: step: 150/470, loss: 0.07370822876691818 2023-01-22 18:56:15.342132: step: 152/470, loss: 0.24967250227928162 2023-01-22 18:56:16.150503: step: 154/470, loss: 0.012790908105671406 2023-01-22 18:56:16.844889: step: 156/470, loss: 0.1556771844625473 2023-01-22 18:56:17.557390: step: 158/470, loss: 0.06868446618318558 2023-01-22 18:56:18.314980: step: 160/470, loss: 0.06958885490894318 2023-01-22 18:56:19.093257: step: 162/470, loss: 0.03733561187982559 2023-01-22 18:56:19.791340: step: 164/470, loss: 0.1148039773106575 2023-01-22 18:56:20.458504: step: 166/470, loss: 0.5623202323913574 2023-01-22 18:56:21.219066: step: 168/470, loss: 0.6287685632705688 2023-01-22 18:56:21.953667: step: 170/470, loss: 0.11442598700523376 2023-01-22 18:56:22.694265: step: 172/470, loss: 0.06008310243487358 2023-01-22 18:56:23.474062: step: 174/470, loss: 0.06911762058734894 2023-01-22 18:56:24.210514: step: 176/470, loss: 0.005540335550904274 2023-01-22 18:56:24.905172: step: 178/470, loss: 0.07414489984512329 2023-01-22 18:56:25.633101: step: 180/470, loss: 0.07211507856845856 2023-01-22 18:56:26.512635: step: 182/470, loss: 0.050867147743701935 2023-01-22 18:56:27.194772: step: 184/470, loss: 0.013534078374505043 2023-01-22 18:56:27.976130: step: 186/470, loss: 0.05755781754851341 2023-01-22 18:56:28.740580: step: 188/470, loss: 0.7376323938369751 2023-01-22 18:56:29.429885: step: 190/470, loss: 0.023348214104771614 2023-01-22 18:56:30.218172: step: 192/470, loss: 0.1323942095041275 2023-01-22 18:56:30.934410: step: 194/470, loss: 0.5270007848739624 2023-01-22 18:56:31.661230: step: 196/470, loss: 0.01694687083363533 2023-01-22 18:56:32.462360: step: 198/470, loss: 0.04331256076693535 2023-01-22 18:56:33.297219: step: 200/470, loss: 0.1521039754152298 2023-01-22 18:56:33.973410: step: 202/470, loss: 0.05080660060048103 2023-01-22 18:56:34.627068: step: 204/470, loss: 0.23060721158981323 2023-01-22 18:56:35.272565: step: 206/470, loss: 0.08327927440404892 2023-01-22 18:56:35.977254: step: 208/470, loss: 0.1002572625875473 2023-01-22 18:56:36.676059: step: 210/470, loss: 0.051106516271829605 2023-01-22 18:56:37.439272: step: 212/470, loss: 0.05127272009849548 2023-01-22 18:56:38.163301: step: 214/470, loss: 0.0413568951189518 2023-01-22 18:56:38.955672: step: 216/470, loss: 0.04580258950591087 2023-01-22 18:56:39.708151: step: 218/470, loss: 0.10093220323324203 2023-01-22 18:56:40.467012: step: 220/470, loss: 0.12144112586975098 2023-01-22 18:56:41.211328: step: 222/470, loss: 0.06039506569504738 2023-01-22 18:56:41.905064: step: 224/470, loss: 0.028329022228717804 2023-01-22 18:56:42.584166: step: 226/470, loss: 0.16305898129940033 2023-01-22 18:56:43.309072: step: 228/470, loss: 0.10858103632926941 2023-01-22 18:56:44.003364: step: 230/470, loss: 0.05416190251708031 2023-01-22 18:56:44.803804: step: 232/470, loss: 0.03306606039404869 2023-01-22 18:56:45.601679: step: 234/470, loss: 0.08176732063293457 2023-01-22 18:56:46.325555: step: 236/470, loss: 0.4104897975921631 2023-01-22 18:56:47.069803: step: 238/470, loss: 0.257541686296463 2023-01-22 18:56:47.799706: step: 240/470, loss: 0.045913856476545334 2023-01-22 18:56:48.476606: step: 242/470, loss: 0.030991556122899055 2023-01-22 18:56:49.211744: step: 244/470, loss: 0.056961365044116974 2023-01-22 18:56:50.080207: step: 246/470, loss: 0.1344350129365921 2023-01-22 18:56:50.853876: step: 248/470, loss: 0.10876922309398651 2023-01-22 18:56:51.619350: step: 250/470, loss: 0.05064590647816658 2023-01-22 18:56:52.432740: step: 252/470, loss: 0.05507608503103256 2023-01-22 18:56:53.136738: step: 254/470, loss: 0.1291094422340393 2023-01-22 18:56:53.831397: step: 256/470, loss: 0.1127728670835495 2023-01-22 18:56:54.517429: step: 258/470, loss: 0.2618001699447632 2023-01-22 18:56:55.258533: step: 260/470, loss: 0.30301615595817566 2023-01-22 18:56:56.042768: step: 262/470, loss: 0.08459173142910004 2023-01-22 18:56:56.808970: step: 264/470, loss: 0.036896541714668274 2023-01-22 18:56:57.516320: step: 266/470, loss: 0.03432435914874077 2023-01-22 18:56:58.304883: step: 268/470, loss: 0.04738718271255493 2023-01-22 18:56:59.047915: step: 270/470, loss: 0.2793467342853546 2023-01-22 18:56:59.750582: step: 272/470, loss: 0.07192108035087585 2023-01-22 18:57:00.471379: step: 274/470, loss: 0.12388882040977478 2023-01-22 18:57:01.144894: step: 276/470, loss: 0.05751514807343483 2023-01-22 18:57:01.942637: step: 278/470, loss: 0.12290464341640472 2023-01-22 18:57:02.701755: step: 280/470, loss: 0.0717790424823761 2023-01-22 18:57:03.468256: step: 282/470, loss: 0.07550595700740814 2023-01-22 18:57:04.230012: step: 284/470, loss: 0.133597269654274 2023-01-22 18:57:04.966913: step: 286/470, loss: 0.14142487943172455 2023-01-22 18:57:05.618776: step: 288/470, loss: 0.17519471049308777 2023-01-22 18:57:06.426404: step: 290/470, loss: 0.04353666305541992 2023-01-22 18:57:07.215287: step: 292/470, loss: 1.747153639793396 2023-01-22 18:57:08.012949: step: 294/470, loss: 0.499929815530777 2023-01-22 18:57:08.838991: step: 296/470, loss: 0.07138823717832565 2023-01-22 18:57:09.645647: step: 298/470, loss: 0.03818880766630173 2023-01-22 18:57:10.357900: step: 300/470, loss: 0.29137733578681946 2023-01-22 18:57:11.096090: step: 302/470, loss: 0.43531379103660583 2023-01-22 18:57:11.932178: step: 304/470, loss: 0.06073610112071037 2023-01-22 18:57:12.646074: step: 306/470, loss: 0.12262983620166779 2023-01-22 18:57:13.385636: step: 308/470, loss: 0.2984396815299988 2023-01-22 18:57:14.125686: step: 310/470, loss: 0.5396106243133545 2023-01-22 18:57:14.947898: step: 312/470, loss: 0.14061273634433746 2023-01-22 18:57:15.687751: step: 314/470, loss: 0.28197863698005676 2023-01-22 18:57:16.391590: step: 316/470, loss: 0.04360943287611008 2023-01-22 18:57:17.065243: step: 318/470, loss: 0.0115542346611619 2023-01-22 18:57:17.775757: step: 320/470, loss: 0.1039966344833374 2023-01-22 18:57:18.527705: step: 322/470, loss: 0.08922134339809418 2023-01-22 18:57:19.234110: step: 324/470, loss: 0.08642390370368958 2023-01-22 18:57:20.011235: step: 326/470, loss: 0.05966321378946304 2023-01-22 18:57:20.882587: step: 328/470, loss: 0.0547308623790741 2023-01-22 18:57:21.624454: step: 330/470, loss: 0.08806410431861877 2023-01-22 18:57:22.369096: step: 332/470, loss: 0.07780762016773224 2023-01-22 18:57:23.040154: step: 334/470, loss: 0.08751654624938965 2023-01-22 18:57:23.800358: step: 336/470, loss: 0.07845862209796906 2023-01-22 18:57:24.565418: step: 338/470, loss: 0.034335438162088394 2023-01-22 18:57:25.387287: step: 340/470, loss: 0.028561709448695183 2023-01-22 18:57:26.145147: step: 342/470, loss: 0.10376818478107452 2023-01-22 18:57:26.877960: step: 344/470, loss: 0.05915750935673714 2023-01-22 18:57:27.643238: step: 346/470, loss: 0.25353285670280457 2023-01-22 18:57:28.401835: step: 348/470, loss: 0.13213659822940826 2023-01-22 18:57:29.121355: step: 350/470, loss: 0.31304770708084106 2023-01-22 18:57:29.831015: step: 352/470, loss: 0.1196952536702156 2023-01-22 18:57:30.691608: step: 354/470, loss: 0.11991266906261444 2023-01-22 18:57:31.408629: step: 356/470, loss: 0.03160081058740616 2023-01-22 18:57:32.121628: step: 358/470, loss: 0.11861895769834518 2023-01-22 18:57:32.900057: step: 360/470, loss: 0.06153372302651405 2023-01-22 18:57:33.641089: step: 362/470, loss: 0.08058642596006393 2023-01-22 18:57:34.387538: step: 364/470, loss: 0.13972775638103485 2023-01-22 18:57:35.150216: step: 366/470, loss: 0.12595215439796448 2023-01-22 18:57:35.891190: step: 368/470, loss: 0.05694999545812607 2023-01-22 18:57:36.635816: step: 370/470, loss: 0.08451858162879944 2023-01-22 18:57:37.359473: step: 372/470, loss: 0.09174233675003052 2023-01-22 18:57:38.073498: step: 374/470, loss: 0.00898781232535839 2023-01-22 18:57:38.825288: step: 376/470, loss: 0.025392355397343636 2023-01-22 18:57:39.591510: step: 378/470, loss: 0.04026934504508972 2023-01-22 18:57:40.338395: step: 380/470, loss: 0.01192416436970234 2023-01-22 18:57:41.108180: step: 382/470, loss: 0.0662471279501915 2023-01-22 18:57:41.862093: step: 384/470, loss: 0.08708631247282028 2023-01-22 18:57:42.580010: step: 386/470, loss: 0.01594570279121399 2023-01-22 18:57:43.340273: step: 388/470, loss: 0.15144769847393036 2023-01-22 18:57:44.133147: step: 390/470, loss: 0.03741728886961937 2023-01-22 18:57:44.857965: step: 392/470, loss: 1.6541204452514648 2023-01-22 18:57:45.518826: step: 394/470, loss: 0.07524050027132034 2023-01-22 18:57:46.284819: step: 396/470, loss: 0.004373232834041119 2023-01-22 18:57:47.030477: step: 398/470, loss: 0.12484033405780792 2023-01-22 18:57:47.830863: step: 400/470, loss: 0.1493765264749527 2023-01-22 18:57:48.478802: step: 402/470, loss: 0.029209822416305542 2023-01-22 18:57:49.174650: step: 404/470, loss: 0.07852840423583984 2023-01-22 18:57:49.881099: step: 406/470, loss: 0.03971928730607033 2023-01-22 18:57:50.511335: step: 408/470, loss: 0.027434857562184334 2023-01-22 18:57:51.247282: step: 410/470, loss: 0.149210125207901 2023-01-22 18:57:51.952787: step: 412/470, loss: 0.06711407005786896 2023-01-22 18:57:52.730136: step: 414/470, loss: 0.041621264070272446 2023-01-22 18:57:53.393634: step: 416/470, loss: 0.06607449799776077 2023-01-22 18:57:54.194928: step: 418/470, loss: 0.03028223291039467 2023-01-22 18:57:54.900602: step: 420/470, loss: 0.08194995671510696 2023-01-22 18:57:55.567822: step: 422/470, loss: 1.202500820159912 2023-01-22 18:57:56.194802: step: 424/470, loss: 0.03522910550236702 2023-01-22 18:57:56.874906: step: 426/470, loss: 0.11419682204723358 2023-01-22 18:57:57.636922: step: 428/470, loss: 0.027502695098519325 2023-01-22 18:57:58.294884: step: 430/470, loss: 0.1228659600019455 2023-01-22 18:57:58.991758: step: 432/470, loss: 0.013959257863461971 2023-01-22 18:57:59.800840: step: 434/470, loss: 0.12251380831003189 2023-01-22 18:58:00.603879: step: 436/470, loss: 0.03891543298959732 2023-01-22 18:58:01.396537: step: 438/470, loss: 0.058498311787843704 2023-01-22 18:58:02.131713: step: 440/470, loss: 0.02747928909957409 2023-01-22 18:58:02.863310: step: 442/470, loss: 0.10275592654943466 2023-01-22 18:58:03.607210: step: 444/470, loss: 0.5213198065757751 2023-01-22 18:58:04.291547: step: 446/470, loss: 0.19728517532348633 2023-01-22 18:58:05.041314: step: 448/470, loss: 0.4907250702381134 2023-01-22 18:58:05.789007: step: 450/470, loss: 0.10592763125896454 2023-01-22 18:58:06.509692: step: 452/470, loss: 0.0349123552441597 2023-01-22 18:58:07.308081: step: 454/470, loss: 0.05272674188017845 2023-01-22 18:58:08.031812: step: 456/470, loss: 0.0323164276778698 2023-01-22 18:58:08.772393: step: 458/470, loss: 0.08098090440034866 2023-01-22 18:58:09.506540: step: 460/470, loss: 0.07947229593992233 2023-01-22 18:58:10.282008: step: 462/470, loss: 0.194414883852005 2023-01-22 18:58:11.022793: step: 464/470, loss: 0.12346761673688889 2023-01-22 18:58:11.804641: step: 466/470, loss: 0.05517350882291794 2023-01-22 18:58:12.693053: step: 468/470, loss: 0.4561017155647278 2023-01-22 18:58:13.347507: step: 470/470, loss: 0.08983838558197021 2023-01-22 18:58:14.074529: step: 472/470, loss: 0.4218137264251709 2023-01-22 18:58:14.871289: step: 474/470, loss: 0.05566902086138725 2023-01-22 18:58:15.614238: step: 476/470, loss: 0.05252564325928688 2023-01-22 18:58:16.311933: step: 478/470, loss: 0.031113555654883385 2023-01-22 18:58:17.037928: step: 480/470, loss: 0.6434183716773987 2023-01-22 18:58:17.787282: step: 482/470, loss: 1.0721107721328735 2023-01-22 18:58:18.509911: step: 484/470, loss: 0.01686706766486168 2023-01-22 18:58:19.280923: step: 486/470, loss: 0.016208596527576447 2023-01-22 18:58:19.979494: step: 488/470, loss: 0.07484219968318939 2023-01-22 18:58:20.691664: step: 490/470, loss: 0.03733556717634201 2023-01-22 18:58:21.393384: step: 492/470, loss: 0.107573002576828 2023-01-22 18:58:22.042720: step: 494/470, loss: 0.06075252965092659 2023-01-22 18:58:22.830751: step: 496/470, loss: 0.058279551565647125 2023-01-22 18:58:23.638001: step: 498/470, loss: 0.05781916528940201 2023-01-22 18:58:24.434287: step: 500/470, loss: 0.13210691511631012 2023-01-22 18:58:25.173618: step: 502/470, loss: 1.0277457237243652 2023-01-22 18:58:25.889446: step: 504/470, loss: 0.04871448129415512 2023-01-22 18:58:26.608745: step: 506/470, loss: 0.5733938813209534 2023-01-22 18:58:27.353620: step: 508/470, loss: 0.016267672181129456 2023-01-22 18:58:28.088105: step: 510/470, loss: 0.05442097783088684 2023-01-22 18:58:28.856219: step: 512/470, loss: 1.0532729625701904 2023-01-22 18:58:29.643713: step: 514/470, loss: 0.0817883089184761 2023-01-22 18:58:30.453540: step: 516/470, loss: 0.08425731956958771 2023-01-22 18:58:31.217174: step: 518/470, loss: 0.1442107856273651 2023-01-22 18:58:32.052752: step: 520/470, loss: 0.5000361204147339 2023-01-22 18:58:32.796014: step: 522/470, loss: 0.05786876752972603 2023-01-22 18:58:33.464569: step: 524/470, loss: 0.1195589229464531 2023-01-22 18:58:34.225431: step: 526/470, loss: 0.06997283548116684 2023-01-22 18:58:34.964467: step: 528/470, loss: 0.020335132256150246 2023-01-22 18:58:35.693231: step: 530/470, loss: 0.08447545021772385 2023-01-22 18:58:36.445293: step: 532/470, loss: 0.04109674319624901 2023-01-22 18:58:37.188818: step: 534/470, loss: 0.11775404959917068 2023-01-22 18:58:37.917638: step: 536/470, loss: 0.1142866313457489 2023-01-22 18:58:38.676916: step: 538/470, loss: 0.3399909436702728 2023-01-22 18:58:39.363619: step: 540/470, loss: 0.04983226954936981 2023-01-22 18:58:40.082745: step: 542/470, loss: 0.038683708757162094 2023-01-22 18:58:40.792110: step: 544/470, loss: 0.00999883096665144 2023-01-22 18:58:41.519251: step: 546/470, loss: 0.03342743217945099 2023-01-22 18:58:42.292417: step: 548/470, loss: 0.11033465713262558 2023-01-22 18:58:43.107395: step: 550/470, loss: 0.04878013953566551 2023-01-22 18:58:43.870673: step: 552/470, loss: 0.09103644639253616 2023-01-22 18:58:44.544355: step: 554/470, loss: 0.015820780768990517 2023-01-22 18:58:45.308813: step: 556/470, loss: 0.07098133116960526 2023-01-22 18:58:46.059326: step: 558/470, loss: 0.5348267555236816 2023-01-22 18:58:46.824644: step: 560/470, loss: 0.06724416464567184 2023-01-22 18:58:47.622669: step: 562/470, loss: 0.05711890384554863 2023-01-22 18:58:48.378815: step: 564/470, loss: 0.025366008281707764 2023-01-22 18:58:49.085805: step: 566/470, loss: 0.030365778133273125 2023-01-22 18:58:49.817015: step: 568/470, loss: 0.08409339189529419 2023-01-22 18:58:50.532251: step: 570/470, loss: 1.9741016626358032 2023-01-22 18:58:51.180436: step: 572/470, loss: 0.0606938898563385 2023-01-22 18:58:51.991278: step: 574/470, loss: 0.08465118706226349 2023-01-22 18:58:52.707723: step: 576/470, loss: 0.03369426727294922 2023-01-22 18:58:53.503793: step: 578/470, loss: 0.042784154415130615 2023-01-22 18:58:54.152967: step: 580/470, loss: 0.11120610684156418 2023-01-22 18:58:54.865218: step: 582/470, loss: 0.11828941851854324 2023-01-22 18:58:55.534018: step: 584/470, loss: 0.08425047993659973 2023-01-22 18:58:56.274105: step: 586/470, loss: 0.03659561276435852 2023-01-22 18:58:56.982334: step: 588/470, loss: 0.050091128796339035 2023-01-22 18:58:57.643507: step: 590/470, loss: 0.022585172206163406 2023-01-22 18:58:58.450379: step: 592/470, loss: 0.1083977222442627 2023-01-22 18:58:59.138176: step: 594/470, loss: 0.008220894262194633 2023-01-22 18:58:59.813303: step: 596/470, loss: 0.12439019232988358 2023-01-22 18:59:00.644672: step: 598/470, loss: 0.027765508741140366 2023-01-22 18:59:01.347029: step: 600/470, loss: 0.036968786269426346 2023-01-22 18:59:02.161250: step: 602/470, loss: 0.07465256750583649 2023-01-22 18:59:02.830358: step: 604/470, loss: 0.01624050922691822 2023-01-22 18:59:03.575080: step: 606/470, loss: 0.012987270019948483 2023-01-22 18:59:04.448617: step: 608/470, loss: 0.05419148504734039 2023-01-22 18:59:05.214273: step: 610/470, loss: 0.1707671582698822 2023-01-22 18:59:05.945003: step: 612/470, loss: 0.08779844641685486 2023-01-22 18:59:06.783919: step: 614/470, loss: 0.12106174230575562 2023-01-22 18:59:07.512003: step: 616/470, loss: 0.5510255694389343 2023-01-22 18:59:08.329984: step: 618/470, loss: 0.07842077314853668 2023-01-22 18:59:09.006818: step: 620/470, loss: 0.08350328356027603 2023-01-22 18:59:09.691343: step: 622/470, loss: 0.1282799243927002 2023-01-22 18:59:10.429455: step: 624/470, loss: 0.08514758944511414 2023-01-22 18:59:11.174423: step: 626/470, loss: 0.08214154094457626 2023-01-22 18:59:11.853944: step: 628/470, loss: 0.02037135139107704 2023-01-22 18:59:12.601058: step: 630/470, loss: 0.0328313373029232 2023-01-22 18:59:13.354497: step: 632/470, loss: 0.12586846947669983 2023-01-22 18:59:14.156479: step: 634/470, loss: 1.1721528768539429 2023-01-22 18:59:14.952851: step: 636/470, loss: 0.06453651934862137 2023-01-22 18:59:15.708947: step: 638/470, loss: 0.12365590780973434 2023-01-22 18:59:16.430889: step: 640/470, loss: 0.23456744849681854 2023-01-22 18:59:17.166600: step: 642/470, loss: 0.06679598242044449 2023-01-22 18:59:18.070214: step: 644/470, loss: 0.30910104513168335 2023-01-22 18:59:18.786811: step: 646/470, loss: 0.06718228757381439 2023-01-22 18:59:19.579025: step: 648/470, loss: 0.07001607865095139 2023-01-22 18:59:20.225869: step: 650/470, loss: 0.05453366041183472 2023-01-22 18:59:21.013500: step: 652/470, loss: 0.08327239006757736 2023-01-22 18:59:21.723478: step: 654/470, loss: 0.4447418451309204 2023-01-22 18:59:22.432364: step: 656/470, loss: 0.08664055168628693 2023-01-22 18:59:23.184070: step: 658/470, loss: 0.038746707141399384 2023-01-22 18:59:24.063540: step: 660/470, loss: 0.8028762340545654 2023-01-22 18:59:24.768480: step: 662/470, loss: 0.03993918374180794 2023-01-22 18:59:25.499714: step: 664/470, loss: 0.029848841950297356 2023-01-22 18:59:26.159598: step: 666/470, loss: 0.09658187627792358 2023-01-22 18:59:26.879324: step: 668/470, loss: 0.06456496566534042 2023-01-22 18:59:27.642582: step: 670/470, loss: 0.04641230031847954 2023-01-22 18:59:28.381092: step: 672/470, loss: 0.09238805621862411 2023-01-22 18:59:29.079249: step: 674/470, loss: 0.056428153067827225 2023-01-22 18:59:29.777369: step: 676/470, loss: 0.16270509362220764 2023-01-22 18:59:30.493930: step: 678/470, loss: 0.07294663041830063 2023-01-22 18:59:31.271533: step: 680/470, loss: 0.1018824577331543 2023-01-22 18:59:32.023070: step: 682/470, loss: 0.3484339118003845 2023-01-22 18:59:32.798368: step: 684/470, loss: 0.10737051069736481 2023-01-22 18:59:33.570916: step: 686/470, loss: 0.05558640882372856 2023-01-22 18:59:34.266790: step: 688/470, loss: 0.13277633488178253 2023-01-22 18:59:35.020419: step: 690/470, loss: 0.08180245012044907 2023-01-22 18:59:35.754379: step: 692/470, loss: 0.06329744309186935 2023-01-22 18:59:36.485577: step: 694/470, loss: 0.14542846381664276 2023-01-22 18:59:37.194337: step: 696/470, loss: 0.0733979269862175 2023-01-22 18:59:37.936276: step: 698/470, loss: 0.08746114373207092 2023-01-22 18:59:38.740140: step: 700/470, loss: 0.0837959498167038 2023-01-22 18:59:39.447339: step: 702/470, loss: 0.20224621891975403 2023-01-22 18:59:40.143120: step: 704/470, loss: 0.35301804542541504 2023-01-22 18:59:40.923876: step: 706/470, loss: 0.06424695998430252 2023-01-22 18:59:41.683466: step: 708/470, loss: 0.08036937564611435 2023-01-22 18:59:42.385178: step: 710/470, loss: 0.022141428664326668 2023-01-22 18:59:43.124600: step: 712/470, loss: 0.16465261578559875 2023-01-22 18:59:43.868394: step: 714/470, loss: 0.03278358653187752 2023-01-22 18:59:44.593556: step: 716/470, loss: 0.1571766436100006 2023-01-22 18:59:45.295846: step: 718/470, loss: 0.24385596811771393 2023-01-22 18:59:45.997592: step: 720/470, loss: 0.11035379767417908 2023-01-22 18:59:46.716121: step: 722/470, loss: 0.1532362550497055 2023-01-22 18:59:47.528765: step: 724/470, loss: 0.11130030453205109 2023-01-22 18:59:48.290258: step: 726/470, loss: 0.3597228527069092 2023-01-22 18:59:49.159318: step: 728/470, loss: 0.02049565315246582 2023-01-22 18:59:49.993697: step: 730/470, loss: 0.11515626311302185 2023-01-22 18:59:50.750397: step: 732/470, loss: 0.020818457007408142 2023-01-22 18:59:51.489024: step: 734/470, loss: 0.06096053496003151 2023-01-22 18:59:52.200407: step: 736/470, loss: 0.061641495674848557 2023-01-22 18:59:52.888730: step: 738/470, loss: 0.09029534459114075 2023-01-22 18:59:53.543510: step: 740/470, loss: 0.07005026191473007 2023-01-22 18:59:54.270130: step: 742/470, loss: 0.07654117792844772 2023-01-22 18:59:54.987858: step: 744/470, loss: 0.06442206352949142 2023-01-22 18:59:55.714522: step: 746/470, loss: 0.6992735862731934 2023-01-22 18:59:56.459443: step: 748/470, loss: 0.060101427137851715 2023-01-22 18:59:57.179706: step: 750/470, loss: 0.25749671459198 2023-01-22 18:59:57.878544: step: 752/470, loss: 0.1867312639951706 2023-01-22 18:59:58.609130: step: 754/470, loss: 0.045231420546770096 2023-01-22 18:59:59.326573: step: 756/470, loss: 0.04085368663072586 2023-01-22 19:00:00.096943: step: 758/470, loss: 0.6277973055839539 2023-01-22 19:00:00.855046: step: 760/470, loss: 0.1876017451286316 2023-01-22 19:00:01.689343: step: 762/470, loss: 0.23550686240196228 2023-01-22 19:00:02.427205: step: 764/470, loss: 0.06056235358119011 2023-01-22 19:00:03.175541: step: 766/470, loss: 0.11358806490898132 2023-01-22 19:00:03.933788: step: 768/470, loss: 0.16502447426319122 2023-01-22 19:00:04.604264: step: 770/470, loss: 0.11180621385574341 2023-01-22 19:00:05.361241: step: 772/470, loss: 0.05992017686367035 2023-01-22 19:00:06.167582: step: 774/470, loss: 0.09332225471735 2023-01-22 19:00:06.892512: step: 776/470, loss: 0.1730080544948578 2023-01-22 19:00:07.638745: step: 778/470, loss: 0.9607817530632019 2023-01-22 19:00:08.402731: step: 780/470, loss: 0.11541511863470078 2023-01-22 19:00:09.198319: step: 782/470, loss: 0.028602128848433495 2023-01-22 19:00:09.931763: step: 784/470, loss: 0.060707904398441315 2023-01-22 19:00:10.643718: step: 786/470, loss: 0.01682162657380104 2023-01-22 19:00:11.401871: step: 788/470, loss: 0.05692780017852783 2023-01-22 19:00:12.101917: step: 790/470, loss: 0.23850642144680023 2023-01-22 19:00:12.767743: step: 792/470, loss: 0.0492679700255394 2023-01-22 19:00:13.477746: step: 794/470, loss: 0.058511458337306976 2023-01-22 19:00:14.214603: step: 796/470, loss: 0.06941919773817062 2023-01-22 19:00:14.945502: step: 798/470, loss: 0.07419852167367935 2023-01-22 19:00:15.633278: step: 800/470, loss: 0.0205177403986454 2023-01-22 19:00:16.378354: step: 802/470, loss: 0.08371607959270477 2023-01-22 19:00:17.123312: step: 804/470, loss: 0.03052728809416294 2023-01-22 19:00:17.815470: step: 806/470, loss: 0.040770161896944046 2023-01-22 19:00:18.575287: step: 808/470, loss: 0.09312587976455688 2023-01-22 19:00:19.330333: step: 810/470, loss: 0.33695071935653687 2023-01-22 19:00:20.088242: step: 812/470, loss: 0.1501253843307495 2023-01-22 19:00:20.848477: step: 814/470, loss: 0.05207722634077072 2023-01-22 19:00:21.532356: step: 816/470, loss: 0.026118360459804535 2023-01-22 19:00:22.317447: step: 818/470, loss: 0.029398392885923386 2023-01-22 19:00:23.099615: step: 820/470, loss: 0.10104161500930786 2023-01-22 19:00:23.922959: step: 822/470, loss: 0.05879708379507065 2023-01-22 19:00:24.590591: step: 824/470, loss: 0.07474382221698761 2023-01-22 19:00:25.288473: step: 826/470, loss: 0.038804251700639725 2023-01-22 19:00:26.013344: step: 828/470, loss: 0.09933701902627945 2023-01-22 19:00:26.748253: step: 830/470, loss: 0.03747892752289772 2023-01-22 19:00:27.623645: step: 832/470, loss: 0.21376895904541016 2023-01-22 19:00:28.303559: step: 834/470, loss: 0.11364637315273285 2023-01-22 19:00:29.016973: step: 836/470, loss: 0.22915977239608765 2023-01-22 19:00:29.757182: step: 838/470, loss: 0.10806091874837875 2023-01-22 19:00:30.562347: step: 840/470, loss: 0.15018446743488312 2023-01-22 19:00:31.448049: step: 842/470, loss: 0.11992557346820831 2023-01-22 19:00:32.182541: step: 844/470, loss: 0.08834873884916306 2023-01-22 19:00:32.945424: step: 846/470, loss: 0.08360497653484344 2023-01-22 19:00:33.678586: step: 848/470, loss: 0.04521460458636284 2023-01-22 19:00:34.415896: step: 850/470, loss: 0.07406667619943619 2023-01-22 19:00:35.116389: step: 852/470, loss: 0.1984773576259613 2023-01-22 19:00:35.867931: step: 854/470, loss: 0.07086524367332458 2023-01-22 19:00:36.550560: step: 856/470, loss: 2.956345319747925 2023-01-22 19:00:37.272063: step: 858/470, loss: 0.28626927733421326 2023-01-22 19:00:37.958746: step: 860/470, loss: 0.02902461588382721 2023-01-22 19:00:38.734549: step: 862/470, loss: 0.2611776292324066 2023-01-22 19:00:39.487926: step: 864/470, loss: 0.09869827330112457 2023-01-22 19:00:40.244116: step: 866/470, loss: 0.08181178569793701 2023-01-22 19:00:40.971349: step: 868/470, loss: 0.16328763961791992 2023-01-22 19:00:41.714731: step: 870/470, loss: 0.01159185916185379 2023-01-22 19:00:42.563900: step: 872/470, loss: 0.29373496770858765 2023-01-22 19:00:43.311501: step: 874/470, loss: 0.027659697458148003 2023-01-22 19:00:44.110199: step: 876/470, loss: 0.05898299440741539 2023-01-22 19:00:44.782710: step: 878/470, loss: 0.11096679419279099 2023-01-22 19:00:45.484563: step: 880/470, loss: 0.3364698886871338 2023-01-22 19:00:46.266141: step: 882/470, loss: 0.04068123549222946 2023-01-22 19:00:47.112897: step: 884/470, loss: 0.08637043088674545 2023-01-22 19:00:47.892770: step: 886/470, loss: 0.10517950356006622 2023-01-22 19:00:48.604014: step: 888/470, loss: 0.1388533115386963 2023-01-22 19:00:49.454606: step: 890/470, loss: 0.10692066699266434 2023-01-22 19:00:50.151328: step: 892/470, loss: 0.4482387900352478 2023-01-22 19:00:50.927613: step: 894/470, loss: 0.04289408028125763 2023-01-22 19:00:51.610011: step: 896/470, loss: 0.1445654332637787 2023-01-22 19:00:52.348543: step: 898/470, loss: 0.3577795922756195 2023-01-22 19:00:53.072319: step: 900/470, loss: 0.03457728773355484 2023-01-22 19:00:53.864870: step: 902/470, loss: 0.07262220978736877 2023-01-22 19:00:54.585435: step: 904/470, loss: 0.0956735610961914 2023-01-22 19:00:55.316507: step: 906/470, loss: 0.2117413431406021 2023-01-22 19:00:56.160722: step: 908/470, loss: 0.08425169438123703 2023-01-22 19:00:56.872379: step: 910/470, loss: 0.23610138893127441 2023-01-22 19:00:57.653227: step: 912/470, loss: 0.6577050685882568 2023-01-22 19:00:58.428210: step: 914/470, loss: 0.06360519677400589 2023-01-22 19:00:59.077379: step: 916/470, loss: 0.06351134926080704 2023-01-22 19:00:59.839727: step: 918/470, loss: 0.042916182428598404 2023-01-22 19:01:00.696886: step: 920/470, loss: 0.07389490306377411 2023-01-22 19:01:01.473602: step: 922/470, loss: 0.1284465193748474 2023-01-22 19:01:02.239070: step: 924/470, loss: 0.07059228420257568 2023-01-22 19:01:02.986814: step: 926/470, loss: 0.1461246758699417 2023-01-22 19:01:03.713630: step: 928/470, loss: 0.24688246846199036 2023-01-22 19:01:04.417993: step: 930/470, loss: 0.016211118549108505 2023-01-22 19:01:05.075980: step: 932/470, loss: 0.05001834034919739 2023-01-22 19:01:05.851619: step: 934/470, loss: 0.043632302433252335 2023-01-22 19:01:06.585183: step: 936/470, loss: 0.10320833325386047 2023-01-22 19:01:07.325269: step: 938/470, loss: 0.03066897578537464 2023-01-22 19:01:07.961770: step: 940/470, loss: 0.06406714022159576 2023-01-22 19:01:08.619388: step: 942/470, loss: 0.11159685254096985 ================================================== Loss: 0.157 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3107669293636895, 'r': 0.3367133143580013, 'f1': 0.3232202489374621}, 'combined': 0.23816228869076153, 'epoch': 17} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.339832635684455, 'r': 0.33264386839112997, 'f1': 0.33619982811154053}, 'combined': 0.23416903450555063, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29978370937139087, 'r': 0.32993273517154975, 'f1': 0.3141364976249444}, 'combined': 0.231468998249959, 'epoch': 17} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3494487917344357, 'r': 0.33600845359080356, 'f1': 0.34259685464160367}, 'combined': 0.23862467487474884, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22131147540983606, 'r': 0.38571428571428573, 'f1': 0.28125}, 'combined': 0.1875, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29978370937139087, 'r': 0.32993273517154975, 'f1': 0.3141364976249444}, 'combined': 0.231468998249959, 'epoch': 17} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3494487917344357, 'r': 0.33600845359080356, 'f1': 0.34259685464160367}, 'combined': 0.23862467487474884, 'epoch': 17} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 18 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 19:03:55.515436: step: 2/470, loss: 0.06617684662342072 2023-01-22 19:03:56.225143: step: 4/470, loss: 0.017266876995563507 2023-01-22 19:03:56.940202: step: 6/470, loss: 0.04035760834813118 2023-01-22 19:03:57.656841: step: 8/470, loss: 0.07709954679012299 2023-01-22 19:03:58.417589: step: 10/470, loss: 0.160027876496315 2023-01-22 19:03:59.149869: step: 12/470, loss: 0.06833804398775101 2023-01-22 19:03:59.872975: step: 14/470, loss: 0.4198529124259949 2023-01-22 19:04:00.729216: step: 16/470, loss: 0.1530308574438095 2023-01-22 19:04:01.499718: step: 18/470, loss: 0.3755422532558441 2023-01-22 19:04:02.300235: step: 20/470, loss: 0.14509013295173645 2023-01-22 19:04:03.064305: step: 22/470, loss: 0.10982964187860489 2023-01-22 19:04:03.751791: step: 24/470, loss: 0.038370661437511444 2023-01-22 19:04:04.453585: step: 26/470, loss: 0.0846431627869606 2023-01-22 19:04:05.224102: step: 28/470, loss: 0.11229250580072403 2023-01-22 19:04:06.001872: step: 30/470, loss: 0.7171079516410828 2023-01-22 19:04:06.709912: step: 32/470, loss: 0.08063667267560959 2023-01-22 19:04:07.370795: step: 34/470, loss: 0.026313627138733864 2023-01-22 19:04:08.103987: step: 36/470, loss: 0.07363957166671753 2023-01-22 19:04:08.834024: step: 38/470, loss: 1.1096469163894653 2023-01-22 19:04:09.562033: step: 40/470, loss: 0.07688609510660172 2023-01-22 19:04:10.280409: step: 42/470, loss: 0.18271422386169434 2023-01-22 19:04:11.038795: step: 44/470, loss: 0.07778210192918777 2023-01-22 19:04:11.795080: step: 46/470, loss: 0.04570697247982025 2023-01-22 19:04:12.680824: step: 48/470, loss: 0.19306838512420654 2023-01-22 19:04:13.429661: step: 50/470, loss: 0.01399903278797865 2023-01-22 19:04:14.135174: step: 52/470, loss: 0.12448009103536606 2023-01-22 19:04:14.992317: step: 54/470, loss: 0.08125516772270203 2023-01-22 19:04:15.786911: step: 56/470, loss: 0.0601322315633297 2023-01-22 19:04:16.605457: step: 58/470, loss: 0.5188689231872559 2023-01-22 19:04:17.299716: step: 60/470, loss: 0.03932400047779083 2023-01-22 19:04:17.994544: step: 62/470, loss: 0.027753813192248344 2023-01-22 19:04:18.741040: step: 64/470, loss: 0.019212109968066216 2023-01-22 19:04:19.406290: step: 66/470, loss: 0.003584251506254077 2023-01-22 19:04:20.241385: step: 68/470, loss: 0.09412702918052673 2023-01-22 19:04:20.919141: step: 70/470, loss: 0.038881633430719376 2023-01-22 19:04:21.655744: step: 72/470, loss: 0.0578327551484108 2023-01-22 19:04:22.364359: step: 74/470, loss: 0.23435544967651367 2023-01-22 19:04:23.157497: step: 76/470, loss: 0.034016791731119156 2023-01-22 19:04:23.794797: step: 78/470, loss: 0.011392496526241302 2023-01-22 19:04:24.648135: step: 80/470, loss: 0.07721582800149918 2023-01-22 19:04:25.345484: step: 82/470, loss: 0.025755221024155617 2023-01-22 19:04:26.031629: step: 84/470, loss: 0.03272271901369095 2023-01-22 19:04:26.630485: step: 86/470, loss: 0.013092340901494026 2023-01-22 19:04:27.448411: step: 88/470, loss: 0.057706430554389954 2023-01-22 19:04:28.179697: step: 90/470, loss: 0.05745483562350273 2023-01-22 19:04:28.945745: step: 92/470, loss: 0.04043126851320267 2023-01-22 19:04:29.658524: step: 94/470, loss: 0.09307193011045456 2023-01-22 19:04:30.371508: step: 96/470, loss: 0.10384157299995422 2023-01-22 19:04:31.144746: step: 98/470, loss: 0.058362387120723724 2023-01-22 19:04:31.871580: step: 100/470, loss: 0.08502303808927536 2023-01-22 19:04:32.701859: step: 102/470, loss: 0.1189383789896965 2023-01-22 19:04:33.371465: step: 104/470, loss: 0.005357891321182251 2023-01-22 19:04:34.144222: step: 106/470, loss: 0.07590699940919876 2023-01-22 19:04:34.860227: step: 108/470, loss: 0.010280979797244072 2023-01-22 19:04:35.594696: step: 110/470, loss: 0.033743537962436676 2023-01-22 19:04:36.286407: step: 112/470, loss: 0.053621165454387665 2023-01-22 19:04:36.993507: step: 114/470, loss: 0.2918620705604553 2023-01-22 19:04:37.772868: step: 116/470, loss: 0.08658783882856369 2023-01-22 19:04:38.492075: step: 118/470, loss: 0.0276590958237648 2023-01-22 19:04:39.189812: step: 120/470, loss: 0.08560670167207718 2023-01-22 19:04:39.917498: step: 122/470, loss: 0.3002013862133026 2023-01-22 19:04:40.625993: step: 124/470, loss: 0.2824411690235138 2023-01-22 19:04:41.361390: step: 126/470, loss: 0.03612075001001358 2023-01-22 19:04:42.034625: step: 128/470, loss: 0.06621766090393066 2023-01-22 19:04:42.725585: step: 130/470, loss: 0.039325080811977386 2023-01-22 19:04:43.423485: step: 132/470, loss: 0.042571116238832474 2023-01-22 19:04:44.212314: step: 134/470, loss: 0.09064992517232895 2023-01-22 19:04:44.917102: step: 136/470, loss: 0.08911306411027908 2023-01-22 19:04:45.733595: step: 138/470, loss: 0.241469144821167 2023-01-22 19:04:46.415562: step: 140/470, loss: 0.028448637574911118 2023-01-22 19:04:47.133195: step: 142/470, loss: 0.0016861435724422336 2023-01-22 19:04:47.909921: step: 144/470, loss: 0.04221523180603981 2023-01-22 19:04:48.542483: step: 146/470, loss: 0.18134230375289917 2023-01-22 19:04:49.310979: step: 148/470, loss: 0.030191145837306976 2023-01-22 19:04:50.114315: step: 150/470, loss: 0.16584885120391846 2023-01-22 19:04:50.923289: step: 152/470, loss: 0.08478162437677383 2023-01-22 19:04:51.689200: step: 154/470, loss: 0.08762053400278091 2023-01-22 19:04:52.409511: step: 156/470, loss: 0.11175291985273361 2023-01-22 19:04:53.092814: step: 158/470, loss: 0.3621843159198761 2023-01-22 19:04:53.789568: step: 160/470, loss: 0.1218324527144432 2023-01-22 19:04:54.471528: step: 162/470, loss: 0.043930042535066605 2023-01-22 19:04:55.180538: step: 164/470, loss: 0.07743663340806961 2023-01-22 19:04:55.832682: step: 166/470, loss: 0.015336059965193272 2023-01-22 19:04:56.599847: step: 168/470, loss: 0.04017908126115799 2023-01-22 19:04:57.343914: step: 170/470, loss: 0.21953128278255463 2023-01-22 19:04:57.964347: step: 172/470, loss: 0.04235304147005081 2023-01-22 19:04:58.773826: step: 174/470, loss: 0.12579070031642914 2023-01-22 19:04:59.561479: step: 176/470, loss: 0.07374074310064316 2023-01-22 19:05:00.303112: step: 178/470, loss: 0.08439971506595612 2023-01-22 19:05:00.971734: step: 180/470, loss: 0.15861716866493225 2023-01-22 19:05:01.688965: step: 182/470, loss: 0.01040814071893692 2023-01-22 19:05:02.376527: step: 184/470, loss: 0.027954377233982086 2023-01-22 19:05:03.121141: step: 186/470, loss: 0.04491977393627167 2023-01-22 19:05:03.826700: step: 188/470, loss: 0.05019805580377579 2023-01-22 19:05:04.516758: step: 190/470, loss: 0.0625576600432396 2023-01-22 19:05:05.216483: step: 192/470, loss: 0.06504880636930466 2023-01-22 19:05:06.040983: step: 194/470, loss: 0.1290799230337143 2023-01-22 19:05:06.996514: step: 196/470, loss: 0.08973461389541626 2023-01-22 19:05:07.716848: step: 198/470, loss: 0.09824441373348236 2023-01-22 19:05:08.548082: step: 200/470, loss: 0.08071193844079971 2023-01-22 19:05:09.300868: step: 202/470, loss: 0.1525980830192566 2023-01-22 19:05:10.071757: step: 204/470, loss: 0.0688774362206459 2023-01-22 19:05:10.753243: step: 206/470, loss: 0.034748729318380356 2023-01-22 19:05:11.406813: step: 208/470, loss: 0.05070429667830467 2023-01-22 19:05:12.126067: step: 210/470, loss: 0.07284100353717804 2023-01-22 19:05:12.959136: step: 212/470, loss: 0.18640294671058655 2023-01-22 19:05:13.710517: step: 214/470, loss: 0.04599619284272194 2023-01-22 19:05:14.417291: step: 216/470, loss: 0.045409638434648514 2023-01-22 19:05:15.173620: step: 218/470, loss: 0.1961684674024582 2023-01-22 19:05:15.835449: step: 220/470, loss: 0.040671225637197495 2023-01-22 19:05:16.778640: step: 222/470, loss: 0.1723230630159378 2023-01-22 19:05:17.474232: step: 224/470, loss: 0.011676449328660965 2023-01-22 19:05:18.231741: step: 226/470, loss: 0.04717397689819336 2023-01-22 19:05:19.029563: step: 228/470, loss: 0.08629091829061508 2023-01-22 19:05:19.809823: step: 230/470, loss: 0.06894738972187042 2023-01-22 19:05:20.457571: step: 232/470, loss: 0.747312605381012 2023-01-22 19:05:21.185801: step: 234/470, loss: 0.09399952739477158 2023-01-22 19:05:21.872685: step: 236/470, loss: 0.055615007877349854 2023-01-22 19:05:22.601906: step: 238/470, loss: 0.04675089567899704 2023-01-22 19:05:23.413529: step: 240/470, loss: 0.04450520500540733 2023-01-22 19:05:24.079454: step: 242/470, loss: 0.12249691784381866 2023-01-22 19:05:24.833674: step: 244/470, loss: 0.020723912864923477 2023-01-22 19:05:25.624008: step: 246/470, loss: 1.1003464460372925 2023-01-22 19:05:26.382019: step: 248/470, loss: 0.18243949115276337 2023-01-22 19:05:27.078262: step: 250/470, loss: 0.06836390495300293 2023-01-22 19:05:27.803392: step: 252/470, loss: 0.10224248468875885 2023-01-22 19:05:28.504118: step: 254/470, loss: 0.03016090951859951 2023-01-22 19:05:29.259652: step: 256/470, loss: 0.7845423221588135 2023-01-22 19:05:29.965357: step: 258/470, loss: 0.12435699254274368 2023-01-22 19:05:30.702792: step: 260/470, loss: 0.02161235734820366 2023-01-22 19:05:31.417284: step: 262/470, loss: 0.06350536644458771 2023-01-22 19:05:32.249031: step: 264/470, loss: 0.16371527314186096 2023-01-22 19:05:32.967628: step: 266/470, loss: 0.03186386078596115 2023-01-22 19:05:33.740325: step: 268/470, loss: 0.0583181194961071 2023-01-22 19:05:34.405227: step: 270/470, loss: 0.03716019168496132 2023-01-22 19:05:35.205305: step: 272/470, loss: 0.054880399256944656 2023-01-22 19:05:35.991333: step: 274/470, loss: 0.08905167132616043 2023-01-22 19:05:36.750810: step: 276/470, loss: 0.20153184235095978 2023-01-22 19:05:37.588608: step: 278/470, loss: 0.10790564119815826 2023-01-22 19:05:38.401880: step: 280/470, loss: 0.07056804746389389 2023-01-22 19:05:39.089929: step: 282/470, loss: 0.05816777050495148 2023-01-22 19:05:39.908515: step: 284/470, loss: 0.4308560788631439 2023-01-22 19:05:40.651840: step: 286/470, loss: 0.07634017616510391 2023-01-22 19:05:41.397753: step: 288/470, loss: 0.03175393491983414 2023-01-22 19:05:42.068443: step: 290/470, loss: 0.13919062912464142 2023-01-22 19:05:42.783329: step: 292/470, loss: 0.032145872712135315 2023-01-22 19:05:43.513315: step: 294/470, loss: 0.1720714420080185 2023-01-22 19:05:44.318561: step: 296/470, loss: 1.0381907224655151 2023-01-22 19:05:45.073004: step: 298/470, loss: 0.08752277493476868 2023-01-22 19:05:45.792619: step: 300/470, loss: 5.016946792602539 2023-01-22 19:05:46.577962: step: 302/470, loss: 0.048376813530921936 2023-01-22 19:05:47.395855: step: 304/470, loss: 0.020166104659438133 2023-01-22 19:05:48.137054: step: 306/470, loss: 0.3207864463329315 2023-01-22 19:05:48.895322: step: 308/470, loss: 0.42874202132225037 2023-01-22 19:05:49.656848: step: 310/470, loss: 0.04274129867553711 2023-01-22 19:05:50.371513: step: 312/470, loss: 0.012306938879191875 2023-01-22 19:05:51.140655: step: 314/470, loss: 0.03849383816123009 2023-01-22 19:05:51.920986: step: 316/470, loss: 0.6824967265129089 2023-01-22 19:05:52.622148: step: 318/470, loss: 0.055570270866155624 2023-01-22 19:05:53.330904: step: 320/470, loss: 0.06937985122203827 2023-01-22 19:05:54.095548: step: 322/470, loss: 0.044349320232868195 2023-01-22 19:05:54.849596: step: 324/470, loss: 0.09120985120534897 2023-01-22 19:05:55.529570: step: 326/470, loss: 0.025129251182079315 2023-01-22 19:05:56.235249: step: 328/470, loss: 0.06615206599235535 2023-01-22 19:05:56.948940: step: 330/470, loss: 0.13011448085308075 2023-01-22 19:05:57.705903: step: 332/470, loss: 1.3872214555740356 2023-01-22 19:05:58.432980: step: 334/470, loss: 0.03803999722003937 2023-01-22 19:05:59.202530: step: 336/470, loss: 0.0787554532289505 2023-01-22 19:05:59.925010: step: 338/470, loss: 0.05206383764743805 2023-01-22 19:06:00.645227: step: 340/470, loss: 0.08769537508487701 2023-01-22 19:06:01.453449: step: 342/470, loss: 0.28150492906570435 2023-01-22 19:06:02.221819: step: 344/470, loss: 0.3160443902015686 2023-01-22 19:06:03.032008: step: 346/470, loss: 0.01590467244386673 2023-01-22 19:06:03.797909: step: 348/470, loss: 0.08347859978675842 2023-01-22 19:06:04.467103: step: 350/470, loss: 0.18649937212467194 2023-01-22 19:06:05.159595: step: 352/470, loss: 0.06714587658643723 2023-01-22 19:06:05.910542: step: 354/470, loss: 0.2771860361099243 2023-01-22 19:06:06.652262: step: 356/470, loss: 0.10782112181186676 2023-01-22 19:06:07.381720: step: 358/470, loss: 0.04154413565993309 2023-01-22 19:06:08.184419: step: 360/470, loss: 0.16717395186424255 2023-01-22 19:06:08.859805: step: 362/470, loss: 0.06858609616756439 2023-01-22 19:06:09.616215: step: 364/470, loss: 0.09976839274168015 2023-01-22 19:06:10.275705: step: 366/470, loss: 0.04712813347578049 2023-01-22 19:06:10.982888: step: 368/470, loss: 0.004059855360537767 2023-01-22 19:06:11.715520: step: 370/470, loss: 0.04966858774423599 2023-01-22 19:06:12.466114: step: 372/470, loss: 0.05597155913710594 2023-01-22 19:06:13.212287: step: 374/470, loss: 0.03586982935667038 2023-01-22 19:06:13.916445: step: 376/470, loss: 0.07770383358001709 2023-01-22 19:06:14.696604: step: 378/470, loss: 2.023169994354248 2023-01-22 19:06:15.488836: step: 380/470, loss: 0.025740159675478935 2023-01-22 19:06:16.221422: step: 382/470, loss: 0.09446519613265991 2023-01-22 19:06:17.014021: step: 384/470, loss: 0.12673798203468323 2023-01-22 19:06:17.808021: step: 386/470, loss: 0.1375817209482193 2023-01-22 19:06:18.484295: step: 388/470, loss: 0.16045433282852173 2023-01-22 19:06:19.237270: step: 390/470, loss: 0.06736897677183151 2023-01-22 19:06:19.933443: step: 392/470, loss: 0.02480211667716503 2023-01-22 19:06:20.649135: step: 394/470, loss: 0.030097251757979393 2023-01-22 19:06:21.444454: step: 396/470, loss: 0.13856565952301025 2023-01-22 19:06:22.177879: step: 398/470, loss: 0.10427145659923553 2023-01-22 19:06:22.941286: step: 400/470, loss: 0.10263396799564362 2023-01-22 19:06:23.695675: step: 402/470, loss: 0.06050116941332817 2023-01-22 19:06:24.458633: step: 404/470, loss: 0.13885028660297394 2023-01-22 19:06:25.229784: step: 406/470, loss: 0.07277980446815491 2023-01-22 19:06:25.947545: step: 408/470, loss: 3.110565662384033 2023-01-22 19:06:26.639680: step: 410/470, loss: 0.10733214020729065 2023-01-22 19:06:27.293723: step: 412/470, loss: 0.12563331425189972 2023-01-22 19:06:28.000730: step: 414/470, loss: 0.04333885759115219 2023-01-22 19:06:28.848990: step: 416/470, loss: 0.0657099261879921 2023-01-22 19:06:29.675410: step: 418/470, loss: 0.07604394853115082 2023-01-22 19:06:30.401390: step: 420/470, loss: 0.06309117376804352 2023-01-22 19:06:31.149521: step: 422/470, loss: 0.07451029866933823 2023-01-22 19:06:31.929847: step: 424/470, loss: 0.2557286024093628 2023-01-22 19:06:32.639672: step: 426/470, loss: 0.021574387326836586 2023-01-22 19:06:33.362139: step: 428/470, loss: 0.16285766661167145 2023-01-22 19:06:34.135509: step: 430/470, loss: 0.13354262709617615 2023-01-22 19:06:34.783344: step: 432/470, loss: 0.01653607189655304 2023-01-22 19:06:35.460648: step: 434/470, loss: 0.052297040820121765 2023-01-22 19:06:36.203474: step: 436/470, loss: 0.0829712525010109 2023-01-22 19:06:37.026703: step: 438/470, loss: 0.06701302528381348 2023-01-22 19:06:37.691044: step: 440/470, loss: 0.08787357807159424 2023-01-22 19:06:38.343410: step: 442/470, loss: 0.013717160560190678 2023-01-22 19:06:39.055271: step: 444/470, loss: 0.028695937246084213 2023-01-22 19:06:39.808181: step: 446/470, loss: 0.16002628207206726 2023-01-22 19:06:40.520682: step: 448/470, loss: 0.08444713056087494 2023-01-22 19:06:41.205738: step: 450/470, loss: 0.02779926173388958 2023-01-22 19:06:41.888386: step: 452/470, loss: 0.07570282369852066 2023-01-22 19:06:42.623841: step: 454/470, loss: 0.053751636296510696 2023-01-22 19:06:43.350810: step: 456/470, loss: 0.20872299373149872 2023-01-22 19:06:44.113273: step: 458/470, loss: 0.02030782587826252 2023-01-22 19:06:44.726424: step: 460/470, loss: 0.10540497303009033 2023-01-22 19:06:45.449892: step: 462/470, loss: 0.07553648203611374 2023-01-22 19:06:46.237498: step: 464/470, loss: 0.03500121459364891 2023-01-22 19:06:47.000165: step: 466/470, loss: 1.096193790435791 2023-01-22 19:06:47.697969: step: 468/470, loss: 0.020026426762342453 2023-01-22 19:06:48.486785: step: 470/470, loss: 0.012369709089398384 2023-01-22 19:06:49.186195: step: 472/470, loss: 0.17361843585968018 2023-01-22 19:06:49.953340: step: 474/470, loss: 0.07756824046373367 2023-01-22 19:06:50.711591: step: 476/470, loss: 0.5463083386421204 2023-01-22 19:06:51.447171: step: 478/470, loss: 0.46463775634765625 2023-01-22 19:06:52.154487: step: 480/470, loss: 0.05993659421801567 2023-01-22 19:06:52.812976: step: 482/470, loss: 0.469533234834671 2023-01-22 19:06:53.532750: step: 484/470, loss: 0.0658133402466774 2023-01-22 19:06:54.278722: step: 486/470, loss: 0.03351491689682007 2023-01-22 19:06:55.093811: step: 488/470, loss: 1.313239336013794 2023-01-22 19:06:55.763734: step: 490/470, loss: 0.03160230442881584 2023-01-22 19:06:56.496400: step: 492/470, loss: 0.008728746324777603 2023-01-22 19:06:57.221005: step: 494/470, loss: 0.05184914916753769 2023-01-22 19:06:57.967761: step: 496/470, loss: 0.12218936532735825 2023-01-22 19:06:58.674406: step: 498/470, loss: 0.014416754245758057 2023-01-22 19:06:59.379342: step: 500/470, loss: 0.0723162293434143 2023-01-22 19:07:00.019659: step: 502/470, loss: 0.09827473759651184 2023-01-22 19:07:00.779721: step: 504/470, loss: 0.018960921093821526 2023-01-22 19:07:01.567615: step: 506/470, loss: 0.11824572086334229 2023-01-22 19:07:02.299971: step: 508/470, loss: 0.22170805931091309 2023-01-22 19:07:03.083520: step: 510/470, loss: 0.02900109253823757 2023-01-22 19:07:03.888107: step: 512/470, loss: 0.044659458100795746 2023-01-22 19:07:04.680215: step: 514/470, loss: 0.0988159105181694 2023-01-22 19:07:05.384235: step: 516/470, loss: 0.08702954649925232 2023-01-22 19:07:06.114092: step: 518/470, loss: 0.07674989104270935 2023-01-22 19:07:06.852481: step: 520/470, loss: 0.9716522693634033 2023-01-22 19:07:07.559142: step: 522/470, loss: 0.036884721368551254 2023-01-22 19:07:08.309488: step: 524/470, loss: 0.03706406056880951 2023-01-22 19:07:09.070478: step: 526/470, loss: 0.10544019937515259 2023-01-22 19:07:09.868265: step: 528/470, loss: 0.32073140144348145 2023-01-22 19:07:10.628726: step: 530/470, loss: 0.06953152269124985 2023-01-22 19:07:11.406346: step: 532/470, loss: 0.03154062107205391 2023-01-22 19:07:12.171433: step: 534/470, loss: 0.05240052565932274 2023-01-22 19:07:12.966667: step: 536/470, loss: 0.07844150811433792 2023-01-22 19:07:13.789283: step: 538/470, loss: 0.040399134159088135 2023-01-22 19:07:14.501669: step: 540/470, loss: 0.0194169282913208 2023-01-22 19:07:15.256430: step: 542/470, loss: 0.05020745098590851 2023-01-22 19:07:15.999727: step: 544/470, loss: 0.07957773655653 2023-01-22 19:07:16.756400: step: 546/470, loss: 0.08744139224290848 2023-01-22 19:07:17.513358: step: 548/470, loss: 0.026095112785696983 2023-01-22 19:07:18.251343: step: 550/470, loss: 0.2702293395996094 2023-01-22 19:07:18.981359: step: 552/470, loss: 0.10846442729234695 2023-01-22 19:07:19.748205: step: 554/470, loss: 0.3181234300136566 2023-01-22 19:07:20.485157: step: 556/470, loss: 0.0786203145980835 2023-01-22 19:07:21.206613: step: 558/470, loss: 0.0033358277287334204 2023-01-22 19:07:21.941326: step: 560/470, loss: 0.07501276582479477 2023-01-22 19:07:22.748031: step: 562/470, loss: 0.05452758073806763 2023-01-22 19:07:23.455203: step: 564/470, loss: 0.09366501122713089 2023-01-22 19:07:24.148395: step: 566/470, loss: 0.014939019456505775 2023-01-22 19:07:24.798024: step: 568/470, loss: 0.06117936596274376 2023-01-22 19:07:25.564547: step: 570/470, loss: 0.21456541121006012 2023-01-22 19:07:26.323650: step: 572/470, loss: 0.06580285727977753 2023-01-22 19:07:27.092935: step: 574/470, loss: 0.16548193991184235 2023-01-22 19:07:27.826145: step: 576/470, loss: 0.06566378474235535 2023-01-22 19:07:28.525779: step: 578/470, loss: 0.0672176256775856 2023-01-22 19:07:29.318793: step: 580/470, loss: 0.1079394519329071 2023-01-22 19:07:30.078757: step: 582/470, loss: 0.08780059963464737 2023-01-22 19:07:30.860589: step: 584/470, loss: 0.10914134234189987 2023-01-22 19:07:31.508178: step: 586/470, loss: 0.02274099551141262 2023-01-22 19:07:32.170904: step: 588/470, loss: 0.0968480110168457 2023-01-22 19:07:32.867776: step: 590/470, loss: 0.10516564548015594 2023-01-22 19:07:33.631183: step: 592/470, loss: 0.03458961844444275 2023-01-22 19:07:34.410615: step: 594/470, loss: 0.0399974063038826 2023-01-22 19:07:35.103992: step: 596/470, loss: 0.10185714811086655 2023-01-22 19:07:35.875696: step: 598/470, loss: 0.0844685435295105 2023-01-22 19:07:36.645940: step: 600/470, loss: 0.24795423448085785 2023-01-22 19:07:37.480918: step: 602/470, loss: 0.0773269459605217 2023-01-22 19:07:38.314612: step: 604/470, loss: 0.534523606300354 2023-01-22 19:07:39.161028: step: 606/470, loss: 0.09742405265569687 2023-01-22 19:07:39.961789: step: 608/470, loss: 0.06567207723855972 2023-01-22 19:07:40.772580: step: 610/470, loss: 0.14598585665225983 2023-01-22 19:07:41.544598: step: 612/470, loss: 0.1488225907087326 2023-01-22 19:07:42.285905: step: 614/470, loss: 0.038161151111125946 2023-01-22 19:07:42.996190: step: 616/470, loss: 0.05915544927120209 2023-01-22 19:07:43.756538: step: 618/470, loss: 0.1564996987581253 2023-01-22 19:07:44.515739: step: 620/470, loss: 0.05826148763298988 2023-01-22 19:07:45.272363: step: 622/470, loss: 0.011290734633803368 2023-01-22 19:07:46.029702: step: 624/470, loss: 0.09952137619256973 2023-01-22 19:07:46.693843: step: 626/470, loss: 0.030536210164427757 2023-01-22 19:07:47.429867: step: 628/470, loss: 0.10485729575157166 2023-01-22 19:07:48.249840: step: 630/470, loss: 0.05025089532136917 2023-01-22 19:07:49.043414: step: 632/470, loss: 0.022319285199046135 2023-01-22 19:07:49.836509: step: 634/470, loss: 0.24370166659355164 2023-01-22 19:07:50.578873: step: 636/470, loss: 0.060697052627801895 2023-01-22 19:07:51.312929: step: 638/470, loss: 0.11710238456726074 2023-01-22 19:07:52.032803: step: 640/470, loss: 0.028134386986494064 2023-01-22 19:07:52.790706: step: 642/470, loss: 0.1157449260354042 2023-01-22 19:07:53.554281: step: 644/470, loss: 0.5257167816162109 2023-01-22 19:07:54.253157: step: 646/470, loss: 0.09944950044155121 2023-01-22 19:07:55.039011: step: 648/470, loss: 0.019325170665979385 2023-01-22 19:07:55.715894: step: 650/470, loss: 0.2056947946548462 2023-01-22 19:07:56.465589: step: 652/470, loss: 0.09543462097644806 2023-01-22 19:07:57.282836: step: 654/470, loss: 0.06548590958118439 2023-01-22 19:07:57.982939: step: 656/470, loss: 0.06484925746917725 2023-01-22 19:07:58.701078: step: 658/470, loss: 0.3009560704231262 2023-01-22 19:07:59.449957: step: 660/470, loss: 0.08352717012166977 2023-01-22 19:08:00.179511: step: 662/470, loss: 0.0790558010339737 2023-01-22 19:08:00.950468: step: 664/470, loss: 0.18998511135578156 2023-01-22 19:08:01.824797: step: 666/470, loss: 0.15023095905780792 2023-01-22 19:08:02.526702: step: 668/470, loss: 0.14798638224601746 2023-01-22 19:08:03.393704: step: 670/470, loss: 0.03329675644636154 2023-01-22 19:08:04.129336: step: 672/470, loss: 0.0386032834649086 2023-01-22 19:08:04.831245: step: 674/470, loss: 0.373938649892807 2023-01-22 19:08:05.548064: step: 676/470, loss: 0.03455100581049919 2023-01-22 19:08:06.306668: step: 678/470, loss: 0.05225971341133118 2023-01-22 19:08:07.045314: step: 680/470, loss: 0.9394486546516418 2023-01-22 19:08:07.753272: step: 682/470, loss: 0.07498309016227722 2023-01-22 19:08:08.459471: step: 684/470, loss: 0.04044778645038605 2023-01-22 19:08:09.375310: step: 686/470, loss: 0.04468849301338196 2023-01-22 19:08:10.059632: step: 688/470, loss: 0.038324546068906784 2023-01-22 19:08:10.730048: step: 690/470, loss: 0.05637683719396591 2023-01-22 19:08:11.404373: step: 692/470, loss: 0.02472817339003086 2023-01-22 19:08:12.070507: step: 694/470, loss: 0.096453458070755 2023-01-22 19:08:12.822497: step: 696/470, loss: 0.03920508921146393 2023-01-22 19:08:13.464671: step: 698/470, loss: 0.007813246920704842 2023-01-22 19:08:14.201472: step: 700/470, loss: 0.04346537962555885 2023-01-22 19:08:14.973017: step: 702/470, loss: 0.038816504180431366 2023-01-22 19:08:15.637868: step: 704/470, loss: 0.030292358249425888 2023-01-22 19:08:16.347375: step: 706/470, loss: 0.016416076570749283 2023-01-22 19:08:17.057420: step: 708/470, loss: 0.2907312214374542 2023-01-22 19:08:17.761272: step: 710/470, loss: 0.06063879653811455 2023-01-22 19:08:18.568904: step: 712/470, loss: 0.041221536695957184 2023-01-22 19:08:19.323360: step: 714/470, loss: 0.07422397285699844 2023-01-22 19:08:20.022246: step: 716/470, loss: 0.040925491601228714 2023-01-22 19:08:20.702766: step: 718/470, loss: 0.806887149810791 2023-01-22 19:08:21.473154: step: 720/470, loss: 0.036033619195222855 2023-01-22 19:08:22.148998: step: 722/470, loss: 0.024567672982811928 2023-01-22 19:08:22.965480: step: 724/470, loss: 0.12131427973508835 2023-01-22 19:08:23.656598: step: 726/470, loss: 0.10178028047084808 2023-01-22 19:08:24.378051: step: 728/470, loss: 0.055488470941782 2023-01-22 19:08:25.044679: step: 730/470, loss: 0.0426790677011013 2023-01-22 19:08:25.823919: step: 732/470, loss: 0.13151328265666962 2023-01-22 19:08:26.513277: step: 734/470, loss: 0.1700894981622696 2023-01-22 19:08:27.264534: step: 736/470, loss: 0.0167455542832613 2023-01-22 19:08:28.012903: step: 738/470, loss: 0.041223183274269104 2023-01-22 19:08:28.624866: step: 740/470, loss: 1.210181713104248 2023-01-22 19:08:29.306604: step: 742/470, loss: 0.019830353558063507 2023-01-22 19:08:30.016490: step: 744/470, loss: 0.0577569454908371 2023-01-22 19:08:30.886048: step: 746/470, loss: 0.15902186930179596 2023-01-22 19:08:31.658943: step: 748/470, loss: 0.06879295408725739 2023-01-22 19:08:32.369615: step: 750/470, loss: 0.2863670587539673 2023-01-22 19:08:33.155089: step: 752/470, loss: 0.0369873046875 2023-01-22 19:08:33.939661: step: 754/470, loss: 0.06788759678602219 2023-01-22 19:08:34.725368: step: 756/470, loss: 0.06014617532491684 2023-01-22 19:08:35.510540: step: 758/470, loss: 0.12267798185348511 2023-01-22 19:08:36.293872: step: 760/470, loss: 2.288891553878784 2023-01-22 19:08:37.064152: step: 762/470, loss: 0.09062454104423523 2023-01-22 19:08:37.774315: step: 764/470, loss: 0.09816782921552658 2023-01-22 19:08:38.522388: step: 766/470, loss: 0.028900889679789543 2023-01-22 19:08:39.206902: step: 768/470, loss: 0.1546131819486618 2023-01-22 19:08:40.061512: step: 770/470, loss: 0.06838797777891159 2023-01-22 19:08:40.865874: step: 772/470, loss: 0.1268923431634903 2023-01-22 19:08:41.665679: step: 774/470, loss: 0.038736216723918915 2023-01-22 19:08:42.407040: step: 776/470, loss: 0.09154653549194336 2023-01-22 19:08:43.179909: step: 778/470, loss: 0.17325304448604584 2023-01-22 19:08:43.869201: step: 780/470, loss: 0.04542065039277077 2023-01-22 19:08:44.542421: step: 782/470, loss: 0.045644138008356094 2023-01-22 19:08:45.252116: step: 784/470, loss: 0.19901789724826813 2023-01-22 19:08:46.070231: step: 786/470, loss: 0.07332056015729904 2023-01-22 19:08:46.707055: step: 788/470, loss: 0.017355095595121384 2023-01-22 19:08:47.401390: step: 790/470, loss: 0.047633036971092224 2023-01-22 19:08:48.100134: step: 792/470, loss: 0.09777167439460754 2023-01-22 19:08:48.895276: step: 794/470, loss: 0.14233334362506866 2023-01-22 19:08:49.636366: step: 796/470, loss: 0.094623863697052 2023-01-22 19:08:50.378238: step: 798/470, loss: 0.04363381117582321 2023-01-22 19:08:51.042221: step: 800/470, loss: 0.03856759890913963 2023-01-22 19:08:51.828415: step: 802/470, loss: 0.047487128525972366 2023-01-22 19:08:52.611339: step: 804/470, loss: 0.08930141478776932 2023-01-22 19:08:53.493459: step: 806/470, loss: 0.05340230092406273 2023-01-22 19:08:54.184931: step: 808/470, loss: 0.02979603409767151 2023-01-22 19:08:54.912911: step: 810/470, loss: 0.07141705602407455 2023-01-22 19:08:55.614982: step: 812/470, loss: 0.05251197889447212 2023-01-22 19:08:56.313686: step: 814/470, loss: 0.02951197512447834 2023-01-22 19:08:57.051577: step: 816/470, loss: 0.016900768503546715 2023-01-22 19:08:57.779693: step: 818/470, loss: 0.028845131397247314 2023-01-22 19:08:58.598208: step: 820/470, loss: 0.10073421150445938 2023-01-22 19:08:59.334840: step: 822/470, loss: 0.2575160264968872 2023-01-22 19:09:00.103293: step: 824/470, loss: 2.035039186477661 2023-01-22 19:09:00.893444: step: 826/470, loss: 0.03579817712306976 2023-01-22 19:09:01.637838: step: 828/470, loss: 0.04633788764476776 2023-01-22 19:09:02.400988: step: 830/470, loss: 0.4512938857078552 2023-01-22 19:09:03.167212: step: 832/470, loss: 0.06470350176095963 2023-01-22 19:09:03.861469: step: 834/470, loss: 0.07506409287452698 2023-01-22 19:09:04.543150: step: 836/470, loss: 0.026313841342926025 2023-01-22 19:09:05.304371: step: 838/470, loss: 0.020502228289842606 2023-01-22 19:09:06.046597: step: 840/470, loss: 0.7991170883178711 2023-01-22 19:09:06.770055: step: 842/470, loss: 0.1815629005432129 2023-01-22 19:09:07.531451: step: 844/470, loss: 0.08853790909051895 2023-01-22 19:09:08.283476: step: 846/470, loss: 0.05072927847504616 2023-01-22 19:09:09.023131: step: 848/470, loss: 0.20891301333904266 2023-01-22 19:09:09.759023: step: 850/470, loss: 0.09082869440317154 2023-01-22 19:09:10.538530: step: 852/470, loss: 0.032474786043167114 2023-01-22 19:09:11.319501: step: 854/470, loss: 0.18315306305885315 2023-01-22 19:09:12.084884: step: 856/470, loss: 0.16794231534004211 2023-01-22 19:09:12.853077: step: 858/470, loss: 0.03778474032878876 2023-01-22 19:09:13.556912: step: 860/470, loss: 0.07641467452049255 2023-01-22 19:09:14.356981: step: 862/470, loss: 0.11089635640382767 2023-01-22 19:09:15.057241: step: 864/470, loss: 0.2194468230009079 2023-01-22 19:09:15.855921: step: 866/470, loss: 0.028484074398875237 2023-01-22 19:09:16.633220: step: 868/470, loss: 0.19141916930675507 2023-01-22 19:09:17.341543: step: 870/470, loss: 0.23752793669700623 2023-01-22 19:09:18.072870: step: 872/470, loss: 0.08412695676088333 2023-01-22 19:09:18.807234: step: 874/470, loss: 0.055731259286403656 2023-01-22 19:09:19.565926: step: 876/470, loss: 0.05968417599797249 2023-01-22 19:09:20.275428: step: 878/470, loss: 0.14127624034881592 2023-01-22 19:09:21.054171: step: 880/470, loss: 0.0697111114859581 2023-01-22 19:09:21.783220: step: 882/470, loss: 0.21765391528606415 2023-01-22 19:09:22.476151: step: 884/470, loss: 0.02833917923271656 2023-01-22 19:09:23.196299: step: 886/470, loss: 0.27807044982910156 2023-01-22 19:09:23.920884: step: 888/470, loss: 0.25375717878341675 2023-01-22 19:09:24.735764: step: 890/470, loss: 0.08540809154510498 2023-01-22 19:09:25.500382: step: 892/470, loss: 0.04102737829089165 2023-01-22 19:09:26.176778: step: 894/470, loss: 0.2897949516773224 2023-01-22 19:09:26.890883: step: 896/470, loss: 0.04384997487068176 2023-01-22 19:09:27.644793: step: 898/470, loss: 0.06272339075803757 2023-01-22 19:09:28.418865: step: 900/470, loss: 0.13991448283195496 2023-01-22 19:09:29.161226: step: 902/470, loss: 0.03492581099271774 2023-01-22 19:09:29.827744: step: 904/470, loss: 4.457045078277588 2023-01-22 19:09:30.620353: step: 906/470, loss: 0.05300872027873993 2023-01-22 19:09:31.464454: step: 908/470, loss: 0.17280398309230804 2023-01-22 19:09:32.168853: step: 910/470, loss: 0.12466652691364288 2023-01-22 19:09:32.852085: step: 912/470, loss: 0.05572166293859482 2023-01-22 19:09:33.619979: step: 914/470, loss: 0.11022337526082993 2023-01-22 19:09:34.394600: step: 916/470, loss: 0.15707486867904663 2023-01-22 19:09:35.112869: step: 918/470, loss: 0.03694531321525574 2023-01-22 19:09:35.829238: step: 920/470, loss: 0.05348490923643112 2023-01-22 19:09:36.605179: step: 922/470, loss: 0.04882257059216499 2023-01-22 19:09:37.306532: step: 924/470, loss: 0.08925341814756393 2023-01-22 19:09:38.005130: step: 926/470, loss: 0.06599713861942291 2023-01-22 19:09:38.720954: step: 928/470, loss: 0.024995747953653336 2023-01-22 19:09:39.449474: step: 930/470, loss: 0.1284753382205963 2023-01-22 19:09:40.212663: step: 932/470, loss: 0.5296043157577515 2023-01-22 19:09:41.002814: step: 934/470, loss: 0.07279351353645325 2023-01-22 19:09:41.685320: step: 936/470, loss: 0.23871983587741852 2023-01-22 19:09:42.419514: step: 938/470, loss: 0.14109857380390167 2023-01-22 19:09:43.147632: step: 940/470, loss: 0.015602878294885159 2023-01-22 19:09:43.964516: step: 942/470, loss: 0.18439671397209167 ================================================== Loss: 0.165 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3114274465080917, 'r': 0.3297467080673912, 'f1': 0.32032537355118}, 'combined': 0.23602922261665896, 'epoch': 18} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3446986847487509, 'r': 0.3675461480029909, 'f1': 0.35575596473371784}, 'combined': 0.24779022419263932, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2966384955078422, 'r': 0.3360401932033811, 'f1': 0.3151124231640246}, 'combined': 0.23218810127875494, 'epoch': 18} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.33645058944485723, 'r': 0.372002524928944, 'f1': 0.3533345150100645}, 'combined': 0.24610364229556733, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20599489795918366, 'r': 0.32959183673469383, 'f1': 0.25353218210361067}, 'combined': 0.16902145473574043, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.3017241379310345, 'f1': 0.3571428571428571}, 'combined': 0.23809523809523805, 'epoch': 18} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 19 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 19:12:30.893821: step: 2/470, loss: 0.0112921092659235 2023-01-22 19:12:31.675931: step: 4/470, loss: 0.07280120998620987 2023-01-22 19:12:32.476235: step: 6/470, loss: 0.003984284121543169 2023-01-22 19:12:33.348567: step: 8/470, loss: 0.07252970337867737 2023-01-22 19:12:34.135689: step: 10/470, loss: 0.3424142301082611 2023-01-22 19:12:34.920654: step: 12/470, loss: 0.0439504012465477 2023-01-22 19:12:35.664424: step: 14/470, loss: 0.053510237485170364 2023-01-22 19:12:36.419977: step: 16/470, loss: 0.11842609941959381 2023-01-22 19:12:37.158320: step: 18/470, loss: 0.04970640316605568 2023-01-22 19:12:37.919374: step: 20/470, loss: 0.026285069063305855 2023-01-22 19:12:38.647001: step: 22/470, loss: 0.7065211534500122 2023-01-22 19:12:39.417989: step: 24/470, loss: 0.010196846909821033 2023-01-22 19:12:40.165212: step: 26/470, loss: 0.260796457529068 2023-01-22 19:12:40.882193: step: 28/470, loss: 0.067171610891819 2023-01-22 19:12:41.651769: step: 30/470, loss: 0.030061058700084686 2023-01-22 19:12:42.329689: step: 32/470, loss: 0.06347762048244476 2023-01-22 19:12:43.078023: step: 34/470, loss: 0.029726915061473846 2023-01-22 19:12:43.811345: step: 36/470, loss: 0.03759774938225746 2023-01-22 19:12:44.616354: step: 38/470, loss: 0.029163209721446037 2023-01-22 19:12:45.312509: step: 40/470, loss: 0.3536049723625183 2023-01-22 19:12:46.068544: step: 42/470, loss: 0.01684482768177986 2023-01-22 19:12:46.784397: step: 44/470, loss: 0.0455920547246933 2023-01-22 19:12:47.456523: step: 46/470, loss: 0.1695699840784073 2023-01-22 19:12:48.195768: step: 48/470, loss: 0.03408074751496315 2023-01-22 19:12:48.913095: step: 50/470, loss: 0.07696592807769775 2023-01-22 19:12:49.673392: step: 52/470, loss: 0.02807328663766384 2023-01-22 19:12:50.344801: step: 54/470, loss: 0.017425425350666046 2023-01-22 19:12:51.127744: step: 56/470, loss: 0.09263166040182114 2023-01-22 19:12:51.922949: step: 58/470, loss: 0.0654522031545639 2023-01-22 19:12:52.606907: step: 60/470, loss: 0.04144338145852089 2023-01-22 19:12:53.485795: step: 62/470, loss: 0.06384022533893585 2023-01-22 19:12:54.294509: step: 64/470, loss: 0.034149911254644394 2023-01-22 19:12:55.014853: step: 66/470, loss: 0.05268741771578789 2023-01-22 19:12:55.788785: step: 68/470, loss: 0.07051324099302292 2023-01-22 19:12:56.540377: step: 70/470, loss: 0.046795789152383804 2023-01-22 19:12:57.311847: step: 72/470, loss: 0.0369303859770298 2023-01-22 19:12:57.993332: step: 74/470, loss: 0.1018744483590126 2023-01-22 19:12:58.666147: step: 76/470, loss: 0.012132197618484497 2023-01-22 19:12:59.372996: step: 78/470, loss: 0.023422876372933388 2023-01-22 19:13:00.141664: step: 80/470, loss: 0.04017108678817749 2023-01-22 19:13:00.963622: step: 82/470, loss: 0.1267279088497162 2023-01-22 19:13:01.699571: step: 84/470, loss: 0.09292332828044891 2023-01-22 19:13:02.422211: step: 86/470, loss: 0.013976332731544971 2023-01-22 19:13:03.158829: step: 88/470, loss: 0.060683172196149826 2023-01-22 19:13:04.002265: step: 90/470, loss: 0.025250034406781197 2023-01-22 19:13:04.730751: step: 92/470, loss: 0.7996619343757629 2023-01-22 19:13:05.452795: step: 94/470, loss: 0.47412997484207153 2023-01-22 19:13:06.118976: step: 96/470, loss: 0.048665400594472885 2023-01-22 19:13:06.910661: step: 98/470, loss: 0.1110273227095604 2023-01-22 19:13:07.589940: step: 100/470, loss: 0.00958691630512476 2023-01-22 19:13:08.399654: step: 102/470, loss: 0.08475369215011597 2023-01-22 19:13:09.160473: step: 104/470, loss: 0.1207355335354805 2023-01-22 19:13:09.951456: step: 106/470, loss: 0.15649910271167755 2023-01-22 19:13:10.719027: step: 108/470, loss: 0.05611814185976982 2023-01-22 19:13:11.474032: step: 110/470, loss: 0.05005284771323204 2023-01-22 19:13:12.222379: step: 112/470, loss: 0.060722410678863525 2023-01-22 19:13:12.951564: step: 114/470, loss: 0.3491228520870209 2023-01-22 19:13:13.682148: step: 116/470, loss: 0.03365219384431839 2023-01-22 19:13:14.452560: step: 118/470, loss: 0.18201521039009094 2023-01-22 19:13:15.217413: step: 120/470, loss: 0.09609755873680115 2023-01-22 19:13:15.997453: step: 122/470, loss: 0.0444912314414978 2023-01-22 19:13:16.826399: step: 124/470, loss: 0.02517714537680149 2023-01-22 19:13:17.586517: step: 126/470, loss: 0.07190493494272232 2023-01-22 19:13:18.320897: step: 128/470, loss: 0.06636962294578552 2023-01-22 19:13:19.082858: step: 130/470, loss: 0.06659968942403793 2023-01-22 19:13:19.834637: step: 132/470, loss: 0.36475393176078796 2023-01-22 19:13:20.506171: step: 134/470, loss: 0.15585525333881378 2023-01-22 19:13:21.195759: step: 136/470, loss: 0.02720065414905548 2023-01-22 19:13:21.904542: step: 138/470, loss: 0.0021766903810203075 2023-01-22 19:13:22.782585: step: 140/470, loss: 0.023098669946193695 2023-01-22 19:13:23.524983: step: 142/470, loss: 0.09725446254014969 2023-01-22 19:13:24.228743: step: 144/470, loss: 0.0761142149567604 2023-01-22 19:13:24.984217: step: 146/470, loss: 0.03910772502422333 2023-01-22 19:13:25.677670: step: 148/470, loss: 0.045949943363666534 2023-01-22 19:13:26.371002: step: 150/470, loss: 0.18881461024284363 2023-01-22 19:13:27.127582: step: 152/470, loss: 0.08599704504013062 2023-01-22 19:13:27.843321: step: 154/470, loss: 0.09139792621135712 2023-01-22 19:13:28.568619: step: 156/470, loss: 0.023786649107933044 2023-01-22 19:13:29.299185: step: 158/470, loss: 0.0356992706656456 2023-01-22 19:13:30.051100: step: 160/470, loss: 0.20965898036956787 2023-01-22 19:13:30.763749: step: 162/470, loss: 0.05315234512090683 2023-01-22 19:13:31.560525: step: 164/470, loss: 0.09431155771017075 2023-01-22 19:13:32.257056: step: 166/470, loss: 0.08751388639211655 2023-01-22 19:13:33.041936: step: 168/470, loss: 0.035697486251592636 2023-01-22 19:13:33.858495: step: 170/470, loss: 0.08968447893857956 2023-01-22 19:13:34.672867: step: 172/470, loss: 0.04234302416443825 2023-01-22 19:13:35.453973: step: 174/470, loss: 0.03975873067975044 2023-01-22 19:13:36.369268: step: 176/470, loss: 0.03170318529009819 2023-01-22 19:13:37.080014: step: 178/470, loss: 0.032391466200351715 2023-01-22 19:13:37.828408: step: 180/470, loss: 0.0803714469075203 2023-01-22 19:13:38.537406: step: 182/470, loss: 0.06298526376485825 2023-01-22 19:13:39.271926: step: 184/470, loss: 0.10829924792051315 2023-01-22 19:13:40.133905: step: 186/470, loss: 0.046199098229408264 2023-01-22 19:13:40.979174: step: 188/470, loss: 8.178955078125 2023-01-22 19:13:41.706679: step: 190/470, loss: 0.04769464582204819 2023-01-22 19:13:42.443156: step: 192/470, loss: 0.0582704059779644 2023-01-22 19:13:43.230410: step: 194/470, loss: 0.08901997655630112 2023-01-22 19:13:43.927487: step: 196/470, loss: 0.0373103991150856 2023-01-22 19:13:44.628952: step: 198/470, loss: 0.028127502650022507 2023-01-22 19:13:45.404852: step: 200/470, loss: 0.07751333713531494 2023-01-22 19:13:46.160375: step: 202/470, loss: 0.04906482994556427 2023-01-22 19:13:46.829792: step: 204/470, loss: 0.04419788345694542 2023-01-22 19:13:47.609277: step: 206/470, loss: 0.044183794409036636 2023-01-22 19:13:48.368459: step: 208/470, loss: 0.026250679045915604 2023-01-22 19:13:49.125379: step: 210/470, loss: 0.03679383546113968 2023-01-22 19:13:49.871537: step: 212/470, loss: 0.04583572596311569 2023-01-22 19:13:50.744389: step: 214/470, loss: 0.07375669479370117 2023-01-22 19:13:51.523738: step: 216/470, loss: 0.2075904905796051 2023-01-22 19:13:52.302026: step: 218/470, loss: 0.050344791263341904 2023-01-22 19:13:53.012125: step: 220/470, loss: 0.7687535881996155 2023-01-22 19:13:53.772873: step: 222/470, loss: 0.01730995625257492 2023-01-22 19:13:54.567411: step: 224/470, loss: 0.0984438806772232 2023-01-22 19:13:55.300151: step: 226/470, loss: 0.014674522913992405 2023-01-22 19:13:56.027527: step: 228/470, loss: 0.07806924730539322 2023-01-22 19:13:56.836642: step: 230/470, loss: 0.10137531161308289 2023-01-22 19:13:57.591908: step: 232/470, loss: 0.021633736789226532 2023-01-22 19:13:58.350185: step: 234/470, loss: 0.02728520892560482 2023-01-22 19:13:59.008434: step: 236/470, loss: 0.023789340630173683 2023-01-22 19:13:59.698968: step: 238/470, loss: 0.08473862707614899 2023-01-22 19:14:00.426443: step: 240/470, loss: 0.5015853047370911 2023-01-22 19:14:01.157151: step: 242/470, loss: 0.3454964756965637 2023-01-22 19:14:01.980759: step: 244/470, loss: 0.104658342897892 2023-01-22 19:14:02.704238: step: 246/470, loss: 0.0792723074555397 2023-01-22 19:14:03.549026: step: 248/470, loss: 0.22262783348560333 2023-01-22 19:14:04.343929: step: 250/470, loss: 0.04709459841251373 2023-01-22 19:14:05.120278: step: 252/470, loss: 0.11567848920822144 2023-01-22 19:14:05.951645: step: 254/470, loss: 0.022635197266936302 2023-01-22 19:14:06.676793: step: 256/470, loss: 0.032210566103458405 2023-01-22 19:14:07.379407: step: 258/470, loss: 0.09060268104076385 2023-01-22 19:14:08.109850: step: 260/470, loss: 0.06221858039498329 2023-01-22 19:14:08.892258: step: 262/470, loss: 0.04487275332212448 2023-01-22 19:14:09.636498: step: 264/470, loss: 0.145310178399086 2023-01-22 19:14:10.356147: step: 266/470, loss: 0.025644149631261826 2023-01-22 19:14:11.246272: step: 268/470, loss: 0.03474476560950279 2023-01-22 19:14:11.956846: step: 270/470, loss: 0.06833195686340332 2023-01-22 19:14:12.665329: step: 272/470, loss: 0.1358962208032608 2023-01-22 19:14:13.422953: step: 274/470, loss: 0.08055564761161804 2023-01-22 19:14:14.146174: step: 276/470, loss: 0.060504548251628876 2023-01-22 19:14:14.937709: step: 278/470, loss: 0.1443237066268921 2023-01-22 19:14:15.671769: step: 280/470, loss: 0.0553126223385334 2023-01-22 19:14:16.417043: step: 282/470, loss: 0.047990117222070694 2023-01-22 19:14:17.132331: step: 284/470, loss: 0.053962647914886475 2023-01-22 19:14:17.849094: step: 286/470, loss: 0.06690418720245361 2023-01-22 19:14:18.560802: step: 288/470, loss: 0.02675960212945938 2023-01-22 19:14:19.275707: step: 290/470, loss: 0.10729622840881348 2023-01-22 19:14:19.980275: step: 292/470, loss: 0.009620921686291695 2023-01-22 19:14:20.627298: step: 294/470, loss: 0.017848452553153038 2023-01-22 19:14:21.422617: step: 296/470, loss: 0.0679577887058258 2023-01-22 19:14:22.181728: step: 298/470, loss: 0.009503382258117199 2023-01-22 19:14:22.926152: step: 300/470, loss: 0.008858383633196354 2023-01-22 19:14:23.629670: step: 302/470, loss: 6.043306827545166 2023-01-22 19:14:24.375091: step: 304/470, loss: 0.07553528249263763 2023-01-22 19:14:25.061270: step: 306/470, loss: 0.016572916880249977 2023-01-22 19:14:25.792363: step: 308/470, loss: 0.10772523283958435 2023-01-22 19:14:26.536975: step: 310/470, loss: 0.08887186646461487 2023-01-22 19:14:27.242463: step: 312/470, loss: 0.06254518777132034 2023-01-22 19:14:27.978847: step: 314/470, loss: 0.07799230515956879 2023-01-22 19:14:28.691054: step: 316/470, loss: 0.1513335406780243 2023-01-22 19:14:29.431674: step: 318/470, loss: 0.03515966981649399 2023-01-22 19:14:30.221013: step: 320/470, loss: 0.028799178078770638 2023-01-22 19:14:30.946391: step: 322/470, loss: 0.02126074954867363 2023-01-22 19:14:31.715557: step: 324/470, loss: 0.06063699349761009 2023-01-22 19:14:32.461624: step: 326/470, loss: 0.05479899421334267 2023-01-22 19:14:33.454445: step: 328/470, loss: 0.042343273758888245 2023-01-22 19:14:34.183525: step: 330/470, loss: 0.04537075757980347 2023-01-22 19:14:34.890809: step: 332/470, loss: 0.02867543324828148 2023-01-22 19:14:35.614886: step: 334/470, loss: 0.03638402745127678 2023-01-22 19:14:36.412086: step: 336/470, loss: 0.09336365759372711 2023-01-22 19:14:37.139595: step: 338/470, loss: 0.07571189105510712 2023-01-22 19:14:37.831242: step: 340/470, loss: 0.07789936661720276 2023-01-22 19:14:38.599100: step: 342/470, loss: 0.22282201051712036 2023-01-22 19:14:39.331719: step: 344/470, loss: 0.06214971840381622 2023-01-22 19:14:40.090410: step: 346/470, loss: 0.1050262525677681 2023-01-22 19:14:40.828607: step: 348/470, loss: 0.12013573199510574 2023-01-22 19:14:41.530674: step: 350/470, loss: 0.021225430071353912 2023-01-22 19:14:42.237537: step: 352/470, loss: 0.06574123352766037 2023-01-22 19:14:43.040281: step: 354/470, loss: 0.01238292921334505 2023-01-22 19:14:43.820889: step: 356/470, loss: 0.05769104138016701 2023-01-22 19:14:44.535606: step: 358/470, loss: 0.036605287343263626 2023-01-22 19:14:45.387385: step: 360/470, loss: 0.04926653951406479 2023-01-22 19:14:46.077439: step: 362/470, loss: 0.05047943815588951 2023-01-22 19:14:46.879068: step: 364/470, loss: 0.2702573835849762 2023-01-22 19:14:47.551182: step: 366/470, loss: 0.09223837405443192 2023-01-22 19:14:48.289012: step: 368/470, loss: 0.10138967633247375 2023-01-22 19:14:49.020236: step: 370/470, loss: 0.04373805224895477 2023-01-22 19:14:49.722342: step: 372/470, loss: 0.03066779114305973 2023-01-22 19:14:50.434242: step: 374/470, loss: 0.077591672539711 2023-01-22 19:14:51.093411: step: 376/470, loss: 0.196303129196167 2023-01-22 19:14:51.820023: step: 378/470, loss: 0.06719297915697098 2023-01-22 19:14:52.584214: step: 380/470, loss: 0.44437164068222046 2023-01-22 19:14:53.350299: step: 382/470, loss: 0.042737286537885666 2023-01-22 19:14:54.058765: step: 384/470, loss: 0.0069000255316495895 2023-01-22 19:14:54.822773: step: 386/470, loss: 0.05957155302166939 2023-01-22 19:14:55.510841: step: 388/470, loss: 0.08688811212778091 2023-01-22 19:14:56.305272: step: 390/470, loss: 0.05815081670880318 2023-01-22 19:14:57.019322: step: 392/470, loss: 0.016378343105316162 2023-01-22 19:14:57.754037: step: 394/470, loss: 0.035406243056058884 2023-01-22 19:14:58.505246: step: 396/470, loss: 0.008739456534385681 2023-01-22 19:14:59.253603: step: 398/470, loss: 0.18853512406349182 2023-01-22 19:14:59.995368: step: 400/470, loss: 0.03277993202209473 2023-01-22 19:15:00.771998: step: 402/470, loss: 0.13732224702835083 2023-01-22 19:15:01.424758: step: 404/470, loss: 0.008723941631615162 2023-01-22 19:15:02.102741: step: 406/470, loss: 0.06874186545610428 2023-01-22 19:15:02.854188: step: 408/470, loss: 0.7236286401748657 2023-01-22 19:15:03.557384: step: 410/470, loss: 0.28471216559410095 2023-01-22 19:15:04.318847: step: 412/470, loss: 0.08407147228717804 2023-01-22 19:15:05.065771: step: 414/470, loss: 0.038917530328035355 2023-01-22 19:15:05.766286: step: 416/470, loss: 0.08968863636255264 2023-01-22 19:15:06.477839: step: 418/470, loss: 0.1135501116514206 2023-01-22 19:15:07.204960: step: 420/470, loss: 0.051190122961997986 2023-01-22 19:15:07.901514: step: 422/470, loss: 0.06608197093009949 2023-01-22 19:15:08.616829: step: 424/470, loss: 0.005093181971460581 2023-01-22 19:15:09.351738: step: 426/470, loss: 0.04653076454997063 2023-01-22 19:15:10.136277: step: 428/470, loss: 0.01797359250485897 2023-01-22 19:15:10.842079: step: 430/470, loss: 0.12045972049236298 2023-01-22 19:15:11.550442: step: 432/470, loss: 0.04225793853402138 2023-01-22 19:15:12.274351: step: 434/470, loss: 0.3560810685157776 2023-01-22 19:15:12.940852: step: 436/470, loss: 0.03933987766504288 2023-01-22 19:15:13.634162: step: 438/470, loss: 0.10735952109098434 2023-01-22 19:15:14.322706: step: 440/470, loss: 0.04517611861228943 2023-01-22 19:15:15.098883: step: 442/470, loss: 0.04805009067058563 2023-01-22 19:15:15.815797: step: 444/470, loss: 0.034436922520399094 2023-01-22 19:15:16.606819: step: 446/470, loss: 0.0980946347117424 2023-01-22 19:15:17.328541: step: 448/470, loss: 0.02357635833323002 2023-01-22 19:15:18.077602: step: 450/470, loss: 0.40950459241867065 2023-01-22 19:15:18.778972: step: 452/470, loss: 0.09088637679815292 2023-01-22 19:15:19.531226: step: 454/470, loss: 0.06179659068584442 2023-01-22 19:15:20.192371: step: 456/470, loss: 0.024704836308956146 2023-01-22 19:15:20.947138: step: 458/470, loss: 0.013954793103039265 2023-01-22 19:15:21.671725: step: 460/470, loss: 0.012341751717031002 2023-01-22 19:15:22.336371: step: 462/470, loss: 0.4283565282821655 2023-01-22 19:15:23.055113: step: 464/470, loss: 0.12819719314575195 2023-01-22 19:15:23.861386: step: 466/470, loss: 0.10749958455562592 2023-01-22 19:15:24.570578: step: 468/470, loss: 0.0625910684466362 2023-01-22 19:15:25.325357: step: 470/470, loss: 0.0827048048377037 2023-01-22 19:15:25.978366: step: 472/470, loss: 0.05317022651433945 2023-01-22 19:15:26.696784: step: 474/470, loss: 0.020091721788048744 2023-01-22 19:15:27.399913: step: 476/470, loss: 0.06902855634689331 2023-01-22 19:15:28.102194: step: 478/470, loss: 0.044647008180618286 2023-01-22 19:15:28.823719: step: 480/470, loss: 0.061124030500650406 2023-01-22 19:15:29.576571: step: 482/470, loss: 0.09614740312099457 2023-01-22 19:15:30.316236: step: 484/470, loss: 0.09537113457918167 2023-01-22 19:15:31.181724: step: 486/470, loss: 0.1970382034778595 2023-01-22 19:15:31.933582: step: 488/470, loss: 0.08817904442548752 2023-01-22 19:15:32.718394: step: 490/470, loss: 0.19899365305900574 2023-01-22 19:15:33.451816: step: 492/470, loss: 0.010458926670253277 2023-01-22 19:15:34.171625: step: 494/470, loss: 0.09982287138700485 2023-01-22 19:15:34.876623: step: 496/470, loss: 0.02451414242386818 2023-01-22 19:15:35.541055: step: 498/470, loss: 0.05906842648983002 2023-01-22 19:15:36.322970: step: 500/470, loss: 0.02308048866689205 2023-01-22 19:15:37.171702: step: 502/470, loss: 0.4586753845214844 2023-01-22 19:15:37.952193: step: 504/470, loss: 0.001469065435230732 2023-01-22 19:15:38.644047: step: 506/470, loss: 0.39444416761398315 2023-01-22 19:15:39.342806: step: 508/470, loss: 0.07880131155252457 2023-01-22 19:15:40.099404: step: 510/470, loss: 0.34225597977638245 2023-01-22 19:15:40.834381: step: 512/470, loss: 0.07212910801172256 2023-01-22 19:15:41.665494: step: 514/470, loss: 0.0419406034052372 2023-01-22 19:15:42.343836: step: 516/470, loss: 0.24012605845928192 2023-01-22 19:15:43.064777: step: 518/470, loss: 0.02023383416235447 2023-01-22 19:15:43.777756: step: 520/470, loss: 0.2502630650997162 2023-01-22 19:15:44.532201: step: 522/470, loss: 0.2373182326555252 2023-01-22 19:15:45.262507: step: 524/470, loss: 0.07686462253332138 2023-01-22 19:15:46.018439: step: 526/470, loss: 0.11860328167676926 2023-01-22 19:15:46.765417: step: 528/470, loss: 0.07658058404922485 2023-01-22 19:15:47.544053: step: 530/470, loss: 0.033899374306201935 2023-01-22 19:15:48.239071: step: 532/470, loss: 0.010917163453996181 2023-01-22 19:15:48.972037: step: 534/470, loss: 0.017348704859614372 2023-01-22 19:15:49.716771: step: 536/470, loss: 0.32592514157295227 2023-01-22 19:15:50.427008: step: 538/470, loss: 0.12130807340145111 2023-01-22 19:15:51.081273: step: 540/470, loss: 0.06882882863283157 2023-01-22 19:15:51.932802: step: 542/470, loss: 0.009247006848454475 2023-01-22 19:15:52.721936: step: 544/470, loss: 0.05684985592961311 2023-01-22 19:15:53.529275: step: 546/470, loss: 0.08473062515258789 2023-01-22 19:15:54.274981: step: 548/470, loss: 0.0109866289421916 2023-01-22 19:15:54.970829: step: 550/470, loss: 0.04165481775999069 2023-01-22 19:15:55.738404: step: 552/470, loss: 0.01383355911821127 2023-01-22 19:15:56.424584: step: 554/470, loss: 0.08635249733924866 2023-01-22 19:15:57.168354: step: 556/470, loss: 0.05461049824953079 2023-01-22 19:15:57.830124: step: 558/470, loss: 0.015371817164123058 2023-01-22 19:15:58.486018: step: 560/470, loss: 0.6815204620361328 2023-01-22 19:15:59.230685: step: 562/470, loss: 0.09630201011896133 2023-01-22 19:15:59.891867: step: 564/470, loss: 0.05369073897600174 2023-01-22 19:16:00.581785: step: 566/470, loss: 0.058216556906700134 2023-01-22 19:16:01.344411: step: 568/470, loss: 0.02524617128074169 2023-01-22 19:16:02.021380: step: 570/470, loss: 0.04199972376227379 2023-01-22 19:16:02.668839: step: 572/470, loss: 0.015193572267889977 2023-01-22 19:16:03.411672: step: 574/470, loss: 0.039925094693899155 2023-01-22 19:16:04.207772: step: 576/470, loss: 0.13338324427604675 2023-01-22 19:16:04.916010: step: 578/470, loss: 0.062471386045217514 2023-01-22 19:16:05.668659: step: 580/470, loss: 0.09208384156227112 2023-01-22 19:16:06.397465: step: 582/470, loss: 0.030092593282461166 2023-01-22 19:16:07.108703: step: 584/470, loss: 0.06586410105228424 2023-01-22 19:16:07.836972: step: 586/470, loss: 0.02899825945496559 2023-01-22 19:16:08.559122: step: 588/470, loss: 0.10743267834186554 2023-01-22 19:16:09.258500: step: 590/470, loss: 0.10501951724290848 2023-01-22 19:16:09.979704: step: 592/470, loss: 1.813976526260376 2023-01-22 19:16:10.724427: step: 594/470, loss: 0.06850782036781311 2023-01-22 19:16:11.485753: step: 596/470, loss: 0.08768956363201141 2023-01-22 19:16:12.234853: step: 598/470, loss: 0.0878264382481575 2023-01-22 19:16:12.960736: step: 600/470, loss: 0.022833187133073807 2023-01-22 19:16:13.688286: step: 602/470, loss: 0.07362283021211624 2023-01-22 19:16:14.479629: step: 604/470, loss: 0.02391095831990242 2023-01-22 19:16:15.296942: step: 606/470, loss: 0.0804729014635086 2023-01-22 19:16:15.989518: step: 608/470, loss: 0.012506152503192425 2023-01-22 19:16:16.711700: step: 610/470, loss: 0.134328231215477 2023-01-22 19:16:17.410361: step: 612/470, loss: 0.10547507554292679 2023-01-22 19:16:18.173947: step: 614/470, loss: 0.12143544852733612 2023-01-22 19:16:18.973449: step: 616/470, loss: 0.08066742867231369 2023-01-22 19:16:19.726951: step: 618/470, loss: 0.03654317930340767 2023-01-22 19:16:20.502816: step: 620/470, loss: 0.03914722800254822 2023-01-22 19:16:21.262407: step: 622/470, loss: 0.05489708110690117 2023-01-22 19:16:22.082222: step: 624/470, loss: 0.06426960974931717 2023-01-22 19:16:22.807570: step: 626/470, loss: 0.06736937165260315 2023-01-22 19:16:23.537765: step: 628/470, loss: 0.11102497577667236 2023-01-22 19:16:24.289690: step: 630/470, loss: 0.012651097029447556 2023-01-22 19:16:25.013146: step: 632/470, loss: 2.867856502532959 2023-01-22 19:16:25.693945: step: 634/470, loss: 0.15367330610752106 2023-01-22 19:16:26.448546: step: 636/470, loss: 0.0965505912899971 2023-01-22 19:16:27.217108: step: 638/470, loss: 0.015126102603971958 2023-01-22 19:16:27.933854: step: 640/470, loss: 0.11571274697780609 2023-01-22 19:16:28.623615: step: 642/470, loss: 0.057073745876550674 2023-01-22 19:16:29.426185: step: 644/470, loss: 0.13571985065937042 2023-01-22 19:16:30.255348: step: 646/470, loss: 0.14553725719451904 2023-01-22 19:16:31.025859: step: 648/470, loss: 0.17457713186740875 2023-01-22 19:16:31.753179: step: 650/470, loss: 0.045833222568035126 2023-01-22 19:16:32.483774: step: 652/470, loss: 0.08109059184789658 2023-01-22 19:16:33.192824: step: 654/470, loss: 0.06932274252176285 2023-01-22 19:16:33.958008: step: 656/470, loss: 0.020560914650559425 2023-01-22 19:16:34.684502: step: 658/470, loss: 0.08866900950670242 2023-01-22 19:16:35.451616: step: 660/470, loss: 0.14196772873401642 2023-01-22 19:16:36.162638: step: 662/470, loss: 0.025075623765587807 2023-01-22 19:16:36.842088: step: 664/470, loss: 0.05653095617890358 2023-01-22 19:16:37.625660: step: 666/470, loss: 0.0553109236061573 2023-01-22 19:16:38.395956: step: 668/470, loss: 0.014820709824562073 2023-01-22 19:16:39.151044: step: 670/470, loss: 0.1057378500699997 2023-01-22 19:16:39.933059: step: 672/470, loss: 0.04209692403674126 2023-01-22 19:16:40.684481: step: 674/470, loss: 0.009847085922956467 2023-01-22 19:16:41.468044: step: 676/470, loss: 0.0047174664214253426 2023-01-22 19:16:42.140761: step: 678/470, loss: 0.04511541128158569 2023-01-22 19:16:42.929820: step: 680/470, loss: 0.04158242791891098 2023-01-22 19:16:43.640527: step: 682/470, loss: 0.02903454191982746 2023-01-22 19:16:44.404935: step: 684/470, loss: 0.015415815636515617 2023-01-22 19:16:45.072301: step: 686/470, loss: 0.050201885402202606 2023-01-22 19:16:45.953720: step: 688/470, loss: 0.05017707124352455 2023-01-22 19:16:46.632265: step: 690/470, loss: 0.09871582686901093 2023-01-22 19:16:47.356871: step: 692/470, loss: 0.12438881397247314 2023-01-22 19:16:48.061056: step: 694/470, loss: 1.1262582540512085 2023-01-22 19:16:48.798211: step: 696/470, loss: 0.15150487422943115 2023-01-22 19:16:49.530562: step: 698/470, loss: 0.8048862814903259 2023-01-22 19:16:50.221331: step: 700/470, loss: 0.04326051101088524 2023-01-22 19:16:50.965154: step: 702/470, loss: 0.05587514489889145 2023-01-22 19:16:51.720828: step: 704/470, loss: 0.07721059769392014 2023-01-22 19:16:52.479547: step: 706/470, loss: 0.06396961212158203 2023-01-22 19:16:53.261575: step: 708/470, loss: 0.057133980095386505 2023-01-22 19:16:54.142137: step: 710/470, loss: 0.14436408877372742 2023-01-22 19:16:54.888224: step: 712/470, loss: 0.058494627475738525 2023-01-22 19:16:55.657952: step: 714/470, loss: 0.6884530782699585 2023-01-22 19:16:56.432336: step: 716/470, loss: 0.07548058032989502 2023-01-22 19:16:57.155236: step: 718/470, loss: 0.020441729575395584 2023-01-22 19:16:57.874193: step: 720/470, loss: 0.23590579628944397 2023-01-22 19:16:58.538438: step: 722/470, loss: 0.05578876659274101 2023-01-22 19:16:59.267200: step: 724/470, loss: 0.12205921858549118 2023-01-22 19:16:59.958918: step: 726/470, loss: 0.08680590242147446 2023-01-22 19:17:00.712922: step: 728/470, loss: 0.07038237154483795 2023-01-22 19:17:01.468370: step: 730/470, loss: 0.0950479656457901 2023-01-22 19:17:02.254432: step: 732/470, loss: 0.019861508160829544 2023-01-22 19:17:02.988502: step: 734/470, loss: 0.014996448531746864 2023-01-22 19:17:03.708885: step: 736/470, loss: 0.10738176852464676 2023-01-22 19:17:04.471510: step: 738/470, loss: 0.07451841235160828 2023-01-22 19:17:05.253872: step: 740/470, loss: 0.1438053697347641 2023-01-22 19:17:05.970339: step: 742/470, loss: 0.11218611896038055 2023-01-22 19:17:06.710572: step: 744/470, loss: 0.06557218730449677 2023-01-22 19:17:07.434214: step: 746/470, loss: 0.04441092908382416 2023-01-22 19:17:08.234433: step: 748/470, loss: 0.09895999729633331 2023-01-22 19:17:09.035494: step: 750/470, loss: 0.22363828122615814 2023-01-22 19:17:09.866576: step: 752/470, loss: 0.029841436073184013 2023-01-22 19:17:10.592415: step: 754/470, loss: 0.14877954125404358 2023-01-22 19:17:11.342909: step: 756/470, loss: 0.025677978992462158 2023-01-22 19:17:11.968719: step: 758/470, loss: 0.05344592407345772 2023-01-22 19:17:12.670546: step: 760/470, loss: 0.029401803389191628 2023-01-22 19:17:13.352751: step: 762/470, loss: 0.03349433094263077 2023-01-22 19:17:14.082212: step: 764/470, loss: 0.08564404398202896 2023-01-22 19:17:14.814981: step: 766/470, loss: 0.04291162267327309 2023-01-22 19:17:15.500776: step: 768/470, loss: 0.02198793552815914 2023-01-22 19:17:16.231919: step: 770/470, loss: 0.04124632105231285 2023-01-22 19:17:16.997775: step: 772/470, loss: 0.07800045609474182 2023-01-22 19:17:17.786018: step: 774/470, loss: 0.008934520184993744 2023-01-22 19:17:18.468489: step: 776/470, loss: 0.006552144419401884 2023-01-22 19:17:19.359288: step: 778/470, loss: 0.008206459693610668 2023-01-22 19:17:20.007710: step: 780/470, loss: 0.07419215887784958 2023-01-22 19:17:20.743263: step: 782/470, loss: 0.07244870811700821 2023-01-22 19:17:21.522467: step: 784/470, loss: 0.010627111420035362 2023-01-22 19:17:22.337042: step: 786/470, loss: 0.02431710809469223 2023-01-22 19:17:23.089917: step: 788/470, loss: 0.053509872406721115 2023-01-22 19:17:23.851453: step: 790/470, loss: 0.030945895239710808 2023-01-22 19:17:24.602596: step: 792/470, loss: 0.03115873783826828 2023-01-22 19:17:25.359591: step: 794/470, loss: 0.07354782521724701 2023-01-22 19:17:26.084463: step: 796/470, loss: 0.04989173263311386 2023-01-22 19:17:26.836038: step: 798/470, loss: 0.047723811119794846 2023-01-22 19:17:27.610028: step: 800/470, loss: 0.15342743694782257 2023-01-22 19:17:28.371436: step: 802/470, loss: 0.018950236961245537 2023-01-22 19:17:29.069760: step: 804/470, loss: 0.12344154715538025 2023-01-22 19:17:29.825810: step: 806/470, loss: 0.04489344358444214 2023-01-22 19:17:30.528930: step: 808/470, loss: 0.013071177527308464 2023-01-22 19:17:31.272572: step: 810/470, loss: 0.1920340359210968 2023-01-22 19:17:31.976938: step: 812/470, loss: 0.041845425963401794 2023-01-22 19:17:32.749510: step: 814/470, loss: 0.031873006373643875 2023-01-22 19:17:33.439539: step: 816/470, loss: 0.008344985544681549 2023-01-22 19:17:34.220997: step: 818/470, loss: 0.4394215941429138 2023-01-22 19:17:34.978757: step: 820/470, loss: 0.046475548297166824 2023-01-22 19:17:35.869519: step: 822/470, loss: 0.023420801386237144 2023-01-22 19:17:36.540305: step: 824/470, loss: 0.046169888228178024 2023-01-22 19:17:37.296766: step: 826/470, loss: 0.0012928505893796682 2023-01-22 19:17:38.066800: step: 828/470, loss: 0.06589809060096741 2023-01-22 19:17:38.767875: step: 830/470, loss: 0.08819597214460373 2023-01-22 19:17:39.554641: step: 832/470, loss: 0.04559420421719551 2023-01-22 19:17:40.261647: step: 834/470, loss: 0.01821593940258026 2023-01-22 19:17:40.950821: step: 836/470, loss: 0.1270139217376709 2023-01-22 19:17:41.721348: step: 838/470, loss: 0.0652085393667221 2023-01-22 19:17:42.434508: step: 840/470, loss: 0.190468892455101 2023-01-22 19:17:43.218951: step: 842/470, loss: 0.4353700280189514 2023-01-22 19:17:44.008287: step: 844/470, loss: 0.08427219092845917 2023-01-22 19:17:44.790972: step: 846/470, loss: 0.044107187539339066 2023-01-22 19:17:45.486487: step: 848/470, loss: 0.025654705241322517 2023-01-22 19:17:46.271730: step: 850/470, loss: 0.06636069715023041 2023-01-22 19:17:47.030032: step: 852/470, loss: 0.10997164994478226 2023-01-22 19:17:47.722169: step: 854/470, loss: 0.006697851698845625 2023-01-22 19:17:48.494395: step: 856/470, loss: 0.07333116978406906 2023-01-22 19:17:49.164113: step: 858/470, loss: 0.05577976629137993 2023-01-22 19:17:49.843620: step: 860/470, loss: 0.5426846742630005 2023-01-22 19:17:50.665106: step: 862/470, loss: 0.11723057180643082 2023-01-22 19:17:51.467534: step: 864/470, loss: 0.2411772608757019 2023-01-22 19:17:52.153915: step: 866/470, loss: 0.05249849334359169 2023-01-22 19:17:52.845324: step: 868/470, loss: 0.03625642880797386 2023-01-22 19:17:53.628406: step: 870/470, loss: 0.04509961977601051 2023-01-22 19:17:54.388840: step: 872/470, loss: 0.013741428032517433 2023-01-22 19:17:55.118568: step: 874/470, loss: 0.04218994453549385 2023-01-22 19:17:55.882817: step: 876/470, loss: 0.055986031889915466 2023-01-22 19:17:56.680125: step: 878/470, loss: 0.048953864723443985 2023-01-22 19:17:57.349470: step: 880/470, loss: 0.09331956505775452 2023-01-22 19:17:58.120302: step: 882/470, loss: 0.6611769199371338 2023-01-22 19:17:58.819875: step: 884/470, loss: 0.07350049912929535 2023-01-22 19:17:59.628621: step: 886/470, loss: 0.18695667386054993 2023-01-22 19:18:00.446332: step: 888/470, loss: 0.07724187523126602 2023-01-22 19:18:01.179407: step: 890/470, loss: 0.06299719214439392 2023-01-22 19:18:01.969573: step: 892/470, loss: 0.10604791343212128 2023-01-22 19:18:02.688223: step: 894/470, loss: 0.04927706718444824 2023-01-22 19:18:03.405000: step: 896/470, loss: 0.1778610497713089 2023-01-22 19:18:04.091596: step: 898/470, loss: 0.10980663448572159 2023-01-22 19:18:04.838495: step: 900/470, loss: 0.05071067810058594 2023-01-22 19:18:05.571894: step: 902/470, loss: 0.14896638691425323 2023-01-22 19:18:06.308125: step: 904/470, loss: 0.09147992730140686 2023-01-22 19:18:07.065587: step: 906/470, loss: 0.054453279823064804 2023-01-22 19:18:07.809038: step: 908/470, loss: 0.11247234046459198 2023-01-22 19:18:08.621942: step: 910/470, loss: 0.05727868527173996 2023-01-22 19:18:09.499561: step: 912/470, loss: 0.050508055835962296 2023-01-22 19:18:10.211599: step: 914/470, loss: 0.03650575131177902 2023-01-22 19:18:11.054209: step: 916/470, loss: 0.08102092146873474 2023-01-22 19:18:11.808865: step: 918/470, loss: 0.06759768724441528 2023-01-22 19:18:12.568709: step: 920/470, loss: 0.1721310019493103 2023-01-22 19:18:13.366631: step: 922/470, loss: 0.03154829144477844 2023-01-22 19:18:14.181389: step: 924/470, loss: 0.21797621250152588 2023-01-22 19:18:14.842210: step: 926/470, loss: 0.06564341485500336 2023-01-22 19:18:15.734029: step: 928/470, loss: 0.09147854149341583 2023-01-22 19:18:16.489544: step: 930/470, loss: 0.014387188479304314 2023-01-22 19:18:17.173581: step: 932/470, loss: 0.08677081763744354 2023-01-22 19:18:17.886145: step: 934/470, loss: 0.003573360852897167 2023-01-22 19:18:18.644112: step: 936/470, loss: 0.19656676054000854 2023-01-22 19:18:19.412790: step: 938/470, loss: 0.19232219457626343 2023-01-22 19:18:20.180630: step: 940/470, loss: 0.13135120272636414 2023-01-22 19:18:20.779614: step: 942/470, loss: 0.04300255328416824 ================================================== Loss: 0.135 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2896543560606061, 'r': 0.3385713156230234, 'f1': 0.31220836978711}, 'combined': 0.2300482724747126, 'epoch': 19} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34115630463621305, 'r': 0.3657589227590169, 'f1': 0.3530294938926938}, 'combined': 0.24589118977600563, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28973168685247336, 'r': 0.3425101345523547, 'f1': 0.31391798418972333}, 'combined': 0.23130798835032243, 'epoch': 19} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34471087975247255, 'r': 0.36592385696800933, 'f1': 0.35500075676000903}, 'combined': 0.24726420868856352, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27054796847397744, 'r': 0.34344704157322753, 'f1': 0.30266988446336274}, 'combined': 0.22301991486774095, 'epoch': 19} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3327365613721349, 'r': 0.376248265551568, 'f1': 0.3531572167632046}, 'combined': 0.2459801509793465, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23684210526315788, 'r': 0.38571428571428573, 'f1': 0.2934782608695652}, 'combined': 0.19565217391304346, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23837209302325582, 'r': 0.44565217391304346, 'f1': 0.3106060606060606}, 'combined': 0.1553030303030303, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3522727272727273, 'r': 0.2672413793103448, 'f1': 0.303921568627451}, 'combined': 0.20261437908496732, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 20 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 19:20:58.707184: step: 2/470, loss: 0.0692053884267807 2023-01-22 19:20:59.418216: step: 4/470, loss: 0.011433595791459084 2023-01-22 19:21:00.122098: step: 6/470, loss: 0.01590590551495552 2023-01-22 19:21:00.888888: step: 8/470, loss: 0.33525508642196655 2023-01-22 19:21:01.591339: step: 10/470, loss: 0.005369840655475855 2023-01-22 19:21:02.314967: step: 12/470, loss: 0.0471285842359066 2023-01-22 19:21:03.023435: step: 14/470, loss: 0.06779136508703232 2023-01-22 19:21:03.809403: step: 16/470, loss: 0.08163978904485703 2023-01-22 19:21:04.557517: step: 18/470, loss: 0.03811454027891159 2023-01-22 19:21:05.278699: step: 20/470, loss: 0.0674220472574234 2023-01-22 19:21:06.010267: step: 22/470, loss: 0.08177307993173599 2023-01-22 19:21:06.676541: step: 24/470, loss: 0.18022318184375763 2023-01-22 19:21:07.416887: step: 26/470, loss: 0.00783989205956459 2023-01-22 19:21:08.158253: step: 28/470, loss: 0.10600445419549942 2023-01-22 19:21:08.836756: step: 30/470, loss: 0.026321159675717354 2023-01-22 19:21:09.585922: step: 32/470, loss: 0.022986600175499916 2023-01-22 19:21:10.342098: step: 34/470, loss: 0.11820484697818756 2023-01-22 19:21:11.099983: step: 36/470, loss: 0.011436332017183304 2023-01-22 19:21:11.770266: step: 38/470, loss: 0.020493147894740105 2023-01-22 19:21:12.513259: step: 40/470, loss: 0.05821879953145981 2023-01-22 19:21:13.242647: step: 42/470, loss: 0.0022761637810617685 2023-01-22 19:21:13.882987: step: 44/470, loss: 0.036995094269514084 2023-01-22 19:21:14.652886: step: 46/470, loss: 0.03528263792395592 2023-01-22 19:21:15.406991: step: 48/470, loss: 0.09343452751636505 2023-01-22 19:21:16.114311: step: 50/470, loss: 0.06097423657774925 2023-01-22 19:21:16.874370: step: 52/470, loss: 0.10285371541976929 2023-01-22 19:21:17.484197: step: 54/470, loss: 0.00979323498904705 2023-01-22 19:21:18.212498: step: 56/470, loss: 0.062471531331539154 2023-01-22 19:21:18.960710: step: 58/470, loss: 0.02455725520849228 2023-01-22 19:21:19.670548: step: 60/470, loss: 0.0015147414524108171 2023-01-22 19:21:20.399563: step: 62/470, loss: 0.01592283509671688 2023-01-22 19:21:21.118960: step: 64/470, loss: 0.09599973261356354 2023-01-22 19:21:21.842819: step: 66/470, loss: 0.033546753227710724 2023-01-22 19:21:22.596284: step: 68/470, loss: 0.04527419060468674 2023-01-22 19:21:23.254008: step: 70/470, loss: 0.01420632191002369 2023-01-22 19:21:24.046475: step: 72/470, loss: 0.03015095181763172 2023-01-22 19:21:24.850251: step: 74/470, loss: 0.021656258031725883 2023-01-22 19:21:25.678253: step: 76/470, loss: 0.05661938339471817 2023-01-22 19:21:26.453438: step: 78/470, loss: 0.08553628623485565 2023-01-22 19:21:27.193424: step: 80/470, loss: 0.017559433355927467 2023-01-22 19:21:27.917188: step: 82/470, loss: 0.17091146111488342 2023-01-22 19:21:28.625114: step: 84/470, loss: 0.022843779996037483 2023-01-22 19:21:29.318992: step: 86/470, loss: 0.04321271553635597 2023-01-22 19:21:30.005253: step: 88/470, loss: 0.01985297165811062 2023-01-22 19:21:30.718990: step: 90/470, loss: 0.05657010152935982 2023-01-22 19:21:31.482850: step: 92/470, loss: 0.039768945425748825 2023-01-22 19:21:32.298437: step: 94/470, loss: 0.03442995622754097 2023-01-22 19:21:33.070699: step: 96/470, loss: 0.06425513327121735 2023-01-22 19:21:33.780925: step: 98/470, loss: 0.061561282724142075 2023-01-22 19:21:34.460038: step: 100/470, loss: 0.04602031782269478 2023-01-22 19:21:35.123889: step: 102/470, loss: 0.09504874795675278 2023-01-22 19:21:35.813244: step: 104/470, loss: 0.11075009405612946 2023-01-22 19:21:36.532243: step: 106/470, loss: 0.3181428015232086 2023-01-22 19:21:37.245643: step: 108/470, loss: 0.006885759066790342 2023-01-22 19:21:37.984793: step: 110/470, loss: 0.012126323767006397 2023-01-22 19:21:38.692990: step: 112/470, loss: 0.04041305184364319 2023-01-22 19:21:39.404271: step: 114/470, loss: 0.053244318813085556 2023-01-22 19:21:40.117600: step: 116/470, loss: 0.06406726688146591 2023-01-22 19:21:40.856560: step: 118/470, loss: 0.276273250579834 2023-01-22 19:21:41.604082: step: 120/470, loss: 0.017418205738067627 2023-01-22 19:21:42.339246: step: 122/470, loss: 0.0642717108130455 2023-01-22 19:21:43.108765: step: 124/470, loss: 0.06656879186630249 2023-01-22 19:21:43.901260: step: 126/470, loss: 0.038813501596450806 2023-01-22 19:21:44.602613: step: 128/470, loss: 0.037797726690769196 2023-01-22 19:21:45.441408: step: 130/470, loss: 0.012955233454704285 2023-01-22 19:21:46.163233: step: 132/470, loss: 0.14164546132087708 2023-01-22 19:21:46.937604: step: 134/470, loss: 0.026768483221530914 2023-01-22 19:21:47.679495: step: 136/470, loss: 0.518687903881073 2023-01-22 19:21:48.443330: step: 138/470, loss: 0.08869636803865433 2023-01-22 19:21:49.159757: step: 140/470, loss: 0.4429130554199219 2023-01-22 19:21:49.908325: step: 142/470, loss: 0.03325126692652702 2023-01-22 19:21:50.639400: step: 144/470, loss: 0.046417348086833954 2023-01-22 19:21:51.385944: step: 146/470, loss: 0.022128436714410782 2023-01-22 19:21:52.055316: step: 148/470, loss: 0.08988092094659805 2023-01-22 19:21:52.803145: step: 150/470, loss: 0.05364342778921127 2023-01-22 19:21:53.515825: step: 152/470, loss: 0.017953716218471527 2023-01-22 19:21:54.304967: step: 154/470, loss: 0.09780565649271011 2023-01-22 19:21:55.007645: step: 156/470, loss: 0.009147114120423794 2023-01-22 19:21:55.684019: step: 158/470, loss: 0.09362566471099854 2023-01-22 19:21:56.371372: step: 160/470, loss: 0.013519582338631153 2023-01-22 19:21:57.045461: step: 162/470, loss: 0.45257019996643066 2023-01-22 19:21:57.703983: step: 164/470, loss: 0.08413586020469666 2023-01-22 19:21:58.484775: step: 166/470, loss: 0.04121706634759903 2023-01-22 19:21:59.237667: step: 168/470, loss: 0.026533417403697968 2023-01-22 19:22:00.017764: step: 170/470, loss: 0.21483860909938812 2023-01-22 19:22:00.828401: step: 172/470, loss: 0.024038920179009438 2023-01-22 19:22:01.545665: step: 174/470, loss: 0.2483181357383728 2023-01-22 19:22:02.242755: step: 176/470, loss: 0.030309997498989105 2023-01-22 19:22:03.014131: step: 178/470, loss: 0.02682347223162651 2023-01-22 19:22:03.801595: step: 180/470, loss: 0.17442865669727325 2023-01-22 19:22:04.509260: step: 182/470, loss: 0.27949750423431396 2023-01-22 19:22:05.277534: step: 184/470, loss: 0.035176832228899 2023-01-22 19:22:06.028869: step: 186/470, loss: 0.06619660556316376 2023-01-22 19:22:06.743419: step: 188/470, loss: 1.9417052268981934 2023-01-22 19:22:07.466248: step: 190/470, loss: 0.1208546981215477 2023-01-22 19:22:08.193593: step: 192/470, loss: 0.12052920460700989 2023-01-22 19:22:08.888187: step: 194/470, loss: 0.06708040833473206 2023-01-22 19:22:09.509185: step: 196/470, loss: 0.0005991773214191198 2023-01-22 19:22:10.237525: step: 198/470, loss: 0.18721455335617065 2023-01-22 19:22:10.936592: step: 200/470, loss: 0.031240878626704216 2023-01-22 19:22:11.664318: step: 202/470, loss: 0.2110924869775772 2023-01-22 19:22:12.397701: step: 204/470, loss: 0.11531258374452591 2023-01-22 19:22:13.165479: step: 206/470, loss: 0.04128406569361687 2023-01-22 19:22:13.939562: step: 208/470, loss: 0.07297002524137497 2023-01-22 19:22:14.639829: step: 210/470, loss: 0.04553265497088432 2023-01-22 19:22:15.423868: step: 212/470, loss: 0.02252204902470112 2023-01-22 19:22:16.267002: step: 214/470, loss: 0.25439098477363586 2023-01-22 19:22:17.034700: step: 216/470, loss: 0.7509600520133972 2023-01-22 19:22:17.699004: step: 218/470, loss: 0.034447189420461655 2023-01-22 19:22:18.383855: step: 220/470, loss: 0.01638934761285782 2023-01-22 19:22:19.099260: step: 222/470, loss: 0.04577914625406265 2023-01-22 19:22:19.902233: step: 224/470, loss: 0.04711466655135155 2023-01-22 19:22:20.638785: step: 226/470, loss: 0.30996254086494446 2023-01-22 19:22:21.387905: step: 228/470, loss: 0.00331405783072114 2023-01-22 19:22:22.114380: step: 230/470, loss: 0.07725219428539276 2023-01-22 19:22:22.759373: step: 232/470, loss: 0.5706613659858704 2023-01-22 19:22:23.522848: step: 234/470, loss: 0.03361904248595238 2023-01-22 19:22:24.280965: step: 236/470, loss: 0.04464048147201538 2023-01-22 19:22:25.018439: step: 238/470, loss: 0.024454206228256226 2023-01-22 19:22:25.723010: step: 240/470, loss: 0.012058422900736332 2023-01-22 19:22:26.495202: step: 242/470, loss: 0.07473550736904144 2023-01-22 19:22:27.229583: step: 244/470, loss: 0.06981978565454483 2023-01-22 19:22:27.951021: step: 246/470, loss: 0.06838370859622955 2023-01-22 19:22:28.653617: step: 248/470, loss: 1.1867821216583252 2023-01-22 19:22:29.361748: step: 250/470, loss: 0.1255021095275879 2023-01-22 19:22:30.113793: step: 252/470, loss: 0.009189283475279808 2023-01-22 19:22:30.887478: step: 254/470, loss: 0.060588739812374115 2023-01-22 19:22:31.658794: step: 256/470, loss: 0.08720239251852036 2023-01-22 19:22:32.436940: step: 258/470, loss: 0.04909271374344826 2023-01-22 19:22:33.275325: step: 260/470, loss: 0.018197335302829742 2023-01-22 19:22:33.969586: step: 262/470, loss: 0.034091971814632416 2023-01-22 19:22:34.694112: step: 264/470, loss: 0.036441948264837265 2023-01-22 19:22:35.407508: step: 266/470, loss: 0.00993148423731327 2023-01-22 19:22:36.171794: step: 268/470, loss: 0.053492508828639984 2023-01-22 19:22:37.002455: step: 270/470, loss: 0.04624278470873833 2023-01-22 19:22:37.688756: step: 272/470, loss: 0.17606455087661743 2023-01-22 19:22:38.402182: step: 274/470, loss: 0.018186697736382484 2023-01-22 19:22:39.137841: step: 276/470, loss: 0.059042301028966904 2023-01-22 19:22:39.926922: step: 278/470, loss: 0.05925830453634262 2023-01-22 19:22:40.650603: step: 280/470, loss: 0.017211155965924263 2023-01-22 19:22:41.445830: step: 282/470, loss: 1.1372332572937012 2023-01-22 19:22:42.183590: step: 284/470, loss: 0.011981363408267498 2023-01-22 19:22:42.947744: step: 286/470, loss: 0.0610564649105072 2023-01-22 19:22:43.767243: step: 288/470, loss: 0.016656432300806046 2023-01-22 19:22:44.605001: step: 290/470, loss: 0.04388560354709625 2023-01-22 19:22:45.296104: step: 292/470, loss: 0.03743298724293709 2023-01-22 19:22:46.027816: step: 294/470, loss: 0.021319955587387085 2023-01-22 19:22:46.798257: step: 296/470, loss: 0.08074367791414261 2023-01-22 19:22:47.485984: step: 298/470, loss: 0.14202333986759186 2023-01-22 19:22:48.232411: step: 300/470, loss: 0.09190955758094788 2023-01-22 19:22:49.011038: step: 302/470, loss: 0.053686317056417465 2023-01-22 19:22:49.710394: step: 304/470, loss: 0.05814488232135773 2023-01-22 19:22:50.443605: step: 306/470, loss: 0.028976615518331528 2023-01-22 19:22:51.187582: step: 308/470, loss: 0.03165995329618454 2023-01-22 19:22:51.958796: step: 310/470, loss: 0.18481889367103577 2023-01-22 19:22:52.691119: step: 312/470, loss: 0.06986112892627716 2023-01-22 19:22:53.401273: step: 314/470, loss: 0.08050384372472763 2023-01-22 19:22:54.090276: step: 316/470, loss: 0.03150481358170509 2023-01-22 19:22:54.794698: step: 318/470, loss: 0.027586523443460464 2023-01-22 19:22:55.531962: step: 320/470, loss: 0.00899940449744463 2023-01-22 19:22:56.247863: step: 322/470, loss: 0.02793573960661888 2023-01-22 19:22:57.051736: step: 324/470, loss: 0.04435381293296814 2023-01-22 19:22:57.723682: step: 326/470, loss: 0.037694673985242844 2023-01-22 19:22:58.493012: step: 328/470, loss: 0.8200516700744629 2023-01-22 19:22:59.237806: step: 330/470, loss: 0.02281474508345127 2023-01-22 19:23:00.003086: step: 332/470, loss: 0.09635286033153534 2023-01-22 19:23:00.784905: step: 334/470, loss: 0.15880021452903748 2023-01-22 19:23:01.503427: step: 336/470, loss: 0.00870030838996172 2023-01-22 19:23:02.225178: step: 338/470, loss: 0.0723002627491951 2023-01-22 19:23:03.054096: step: 340/470, loss: 0.07791668176651001 2023-01-22 19:23:03.807530: step: 342/470, loss: 0.041542887687683105 2023-01-22 19:23:04.544401: step: 344/470, loss: 0.07663816213607788 2023-01-22 19:23:05.247814: step: 346/470, loss: 0.02207820490002632 2023-01-22 19:23:05.981158: step: 348/470, loss: 0.11398249864578247 2023-01-22 19:23:06.765153: step: 350/470, loss: 0.06222032010555267 2023-01-22 19:23:07.457969: step: 352/470, loss: 0.7580427527427673 2023-01-22 19:23:08.132136: step: 354/470, loss: 0.31359562277793884 2023-01-22 19:23:08.856713: step: 356/470, loss: 0.09936029464006424 2023-01-22 19:23:09.538731: step: 358/470, loss: 0.10575620830059052 2023-01-22 19:23:10.213620: step: 360/470, loss: 0.08771451562643051 2023-01-22 19:23:10.993458: step: 362/470, loss: 0.045165304094552994 2023-01-22 19:23:11.736430: step: 364/470, loss: 0.08250702917575836 2023-01-22 19:23:12.486359: step: 366/470, loss: 0.036362942308187485 2023-01-22 19:23:13.259469: step: 368/470, loss: 0.035159844905138016 2023-01-22 19:23:13.936694: step: 370/470, loss: 0.06006016209721565 2023-01-22 19:23:14.694024: step: 372/470, loss: 0.11637677252292633 2023-01-22 19:23:15.474595: step: 374/470, loss: 0.012416702695190907 2023-01-22 19:23:16.233723: step: 376/470, loss: 0.1937997043132782 2023-01-22 19:23:16.937275: step: 378/470, loss: 0.0882231742143631 2023-01-22 19:23:17.655984: step: 380/470, loss: 0.015106822364032269 2023-01-22 19:23:18.356817: step: 382/470, loss: 0.1471942663192749 2023-01-22 19:23:19.079151: step: 384/470, loss: 0.024438517168164253 2023-01-22 19:23:19.822131: step: 386/470, loss: 0.015194579027593136 2023-01-22 19:23:20.536166: step: 388/470, loss: 0.18839724361896515 2023-01-22 19:23:21.380251: step: 390/470, loss: 0.05323400720953941 2023-01-22 19:23:22.069502: step: 392/470, loss: 0.06262640655040741 2023-01-22 19:23:22.774626: step: 394/470, loss: 0.052381135523319244 2023-01-22 19:23:23.472595: step: 396/470, loss: 0.1799657940864563 2023-01-22 19:23:24.182306: step: 398/470, loss: 0.004220017232000828 2023-01-22 19:23:24.939874: step: 400/470, loss: 0.4785889685153961 2023-01-22 19:23:25.626890: step: 402/470, loss: 0.03493707254528999 2023-01-22 19:23:26.328670: step: 404/470, loss: 0.07729945331811905 2023-01-22 19:23:27.141827: step: 406/470, loss: 0.1558285653591156 2023-01-22 19:23:28.059616: step: 408/470, loss: 0.14127235114574432 2023-01-22 19:23:28.828024: step: 410/470, loss: 0.24950149655342102 2023-01-22 19:23:29.589954: step: 412/470, loss: 0.041239332407712936 2023-01-22 19:23:30.272262: step: 414/470, loss: 0.2635876536369324 2023-01-22 19:23:31.040628: step: 416/470, loss: 0.10579922795295715 2023-01-22 19:23:31.763657: step: 418/470, loss: 0.03441992029547691 2023-01-22 19:23:32.599285: step: 420/470, loss: 0.032621387392282486 2023-01-22 19:23:33.422928: step: 422/470, loss: 0.05008916184306145 2023-01-22 19:23:34.156681: step: 424/470, loss: 0.13900715112686157 2023-01-22 19:23:34.878977: step: 426/470, loss: 0.023830300197005272 2023-01-22 19:23:35.647493: step: 428/470, loss: 0.03306613117456436 2023-01-22 19:23:36.339882: step: 430/470, loss: 0.014829058200120926 2023-01-22 19:23:37.136646: step: 432/470, loss: 0.042442865669727325 2023-01-22 19:23:37.899929: step: 434/470, loss: 0.32640278339385986 2023-01-22 19:23:38.639286: step: 436/470, loss: 0.010043848305940628 2023-01-22 19:23:39.377640: step: 438/470, loss: 0.017869826406240463 2023-01-22 19:23:40.088560: step: 440/470, loss: 0.1027931347489357 2023-01-22 19:23:40.807937: step: 442/470, loss: 0.0788130909204483 2023-01-22 19:23:41.506477: step: 444/470, loss: 0.12138024717569351 2023-01-22 19:23:42.244356: step: 446/470, loss: 0.01406558882445097 2023-01-22 19:23:42.976507: step: 448/470, loss: 0.004238845780491829 2023-01-22 19:23:43.812107: step: 450/470, loss: 0.0067049539647996426 2023-01-22 19:23:44.539949: step: 452/470, loss: 0.04448673129081726 2023-01-22 19:23:45.333396: step: 454/470, loss: 0.3222072422504425 2023-01-22 19:23:46.005063: step: 456/470, loss: 0.4006147086620331 2023-01-22 19:23:46.778755: step: 458/470, loss: 0.1256273090839386 2023-01-22 19:23:47.465175: step: 460/470, loss: 0.014067552983760834 2023-01-22 19:23:48.214471: step: 462/470, loss: 0.030554937198758125 2023-01-22 19:23:48.911074: step: 464/470, loss: 0.05165807902812958 2023-01-22 19:23:49.627001: step: 466/470, loss: 0.0559585839509964 2023-01-22 19:23:50.371735: step: 468/470, loss: 0.751693844795227 2023-01-22 19:23:51.105770: step: 470/470, loss: 0.042564861476421356 2023-01-22 19:23:51.828637: step: 472/470, loss: 0.04895629733800888 2023-01-22 19:23:52.636709: step: 474/470, loss: 0.037162844091653824 2023-01-22 19:23:53.397657: step: 476/470, loss: 0.025382233783602715 2023-01-22 19:23:54.145907: step: 478/470, loss: 0.06292865425348282 2023-01-22 19:23:54.926520: step: 480/470, loss: 0.014180587604641914 2023-01-22 19:23:55.782702: step: 482/470, loss: 0.027322018519043922 2023-01-22 19:23:56.553850: step: 484/470, loss: 0.1358848214149475 2023-01-22 19:23:57.235051: step: 486/470, loss: 0.050086334347724915 2023-01-22 19:23:57.959494: step: 488/470, loss: 0.01673760637640953 2023-01-22 19:23:58.660760: step: 490/470, loss: 0.021020902320742607 2023-01-22 19:23:59.356798: step: 492/470, loss: 0.015545294620096684 2023-01-22 19:24:00.092202: step: 494/470, loss: 0.3143145740032196 2023-01-22 19:24:00.844180: step: 496/470, loss: 0.0633402094244957 2023-01-22 19:24:01.582114: step: 498/470, loss: 0.018096817657351494 2023-01-22 19:24:02.327989: step: 500/470, loss: 0.0012216436443850398 2023-01-22 19:24:03.050670: step: 502/470, loss: 0.1902422159910202 2023-01-22 19:24:03.823157: step: 504/470, loss: 0.07036527991294861 2023-01-22 19:24:04.625708: step: 506/470, loss: 0.06424633413553238 2023-01-22 19:24:05.362567: step: 508/470, loss: 0.0811762586236 2023-01-22 19:24:06.094200: step: 510/470, loss: 0.06267711520195007 2023-01-22 19:24:06.960309: step: 512/470, loss: 0.023827198892831802 2023-01-22 19:24:07.696632: step: 514/470, loss: 0.028254536911845207 2023-01-22 19:24:08.393815: step: 516/470, loss: 0.012615739367902279 2023-01-22 19:24:09.048379: step: 518/470, loss: 0.12324491143226624 2023-01-22 19:24:09.754972: step: 520/470, loss: 0.08821484446525574 2023-01-22 19:24:10.477950: step: 522/470, loss: 0.01370615977793932 2023-01-22 19:24:11.219725: step: 524/470, loss: 0.02321157045662403 2023-01-22 19:24:11.889587: step: 526/470, loss: 0.02410917542874813 2023-01-22 19:24:12.636406: step: 528/470, loss: 0.052108101546764374 2023-01-22 19:24:13.356301: step: 530/470, loss: 0.0737091451883316 2023-01-22 19:24:14.070779: step: 532/470, loss: 0.05240051448345184 2023-01-22 19:24:14.750411: step: 534/470, loss: 0.14461572468280792 2023-01-22 19:24:15.443093: step: 536/470, loss: 0.06819018721580505 2023-01-22 19:24:16.200799: step: 538/470, loss: 0.08415396511554718 2023-01-22 19:24:16.874576: step: 540/470, loss: 0.039721451699733734 2023-01-22 19:24:17.623755: step: 542/470, loss: 0.24157670140266418 2023-01-22 19:24:18.323053: step: 544/470, loss: 0.05411506071686745 2023-01-22 19:24:19.228137: step: 546/470, loss: 0.07562316954135895 2023-01-22 19:24:19.961701: step: 548/470, loss: 0.1613171249628067 2023-01-22 19:24:20.634580: step: 550/470, loss: 0.04178428277373314 2023-01-22 19:24:21.364654: step: 552/470, loss: 0.014160760678350925 2023-01-22 19:24:22.114479: step: 554/470, loss: 0.04917175695300102 2023-01-22 19:24:22.823697: step: 556/470, loss: 0.01794840767979622 2023-01-22 19:24:23.595418: step: 558/470, loss: 0.07323423773050308 2023-01-22 19:24:24.342951: step: 560/470, loss: 0.014605486765503883 2023-01-22 19:24:25.008139: step: 562/470, loss: 0.07594714313745499 2023-01-22 19:24:25.704587: step: 564/470, loss: 0.029397977516055107 2023-01-22 19:24:26.455916: step: 566/470, loss: 0.01664639636874199 2023-01-22 19:24:27.315643: step: 568/470, loss: 0.02191932313144207 2023-01-22 19:24:27.986309: step: 570/470, loss: 0.06313026696443558 2023-01-22 19:24:28.741002: step: 572/470, loss: 0.022390831261873245 2023-01-22 19:24:29.472324: step: 574/470, loss: 0.16852179169654846 2023-01-22 19:24:30.185697: step: 576/470, loss: 0.041340142488479614 2023-01-22 19:24:30.963406: step: 578/470, loss: 0.0978948175907135 2023-01-22 19:24:31.868249: step: 580/470, loss: 0.10298915952444077 2023-01-22 19:24:32.654203: step: 582/470, loss: 0.051485490053892136 2023-01-22 19:24:33.526026: step: 584/470, loss: 0.03344123065471649 2023-01-22 19:24:34.325303: step: 586/470, loss: 0.03898704797029495 2023-01-22 19:24:35.022885: step: 588/470, loss: 0.15321972966194153 2023-01-22 19:24:35.712909: step: 590/470, loss: 0.3100159466266632 2023-01-22 19:24:36.490436: step: 592/470, loss: 0.03277548775076866 2023-01-22 19:24:37.175190: step: 594/470, loss: 0.06358427554368973 2023-01-22 19:24:38.056103: step: 596/470, loss: 0.21886910498142242 2023-01-22 19:24:38.766804: step: 598/470, loss: 0.06400435417890549 2023-01-22 19:24:39.580191: step: 600/470, loss: 0.021453171968460083 2023-01-22 19:24:40.287321: step: 602/470, loss: 0.029457837343215942 2023-01-22 19:24:41.078703: step: 604/470, loss: 0.06845896691083908 2023-01-22 19:24:41.828807: step: 606/470, loss: 0.33153408765792847 2023-01-22 19:24:42.587636: step: 608/470, loss: 0.40867775678634644 2023-01-22 19:24:43.364159: step: 610/470, loss: 0.8713477849960327 2023-01-22 19:24:44.069051: step: 612/470, loss: 0.05822436138987541 2023-01-22 19:24:44.797396: step: 614/470, loss: 0.01446563471108675 2023-01-22 19:24:45.569005: step: 616/470, loss: 0.05371719226241112 2023-01-22 19:24:46.396025: step: 618/470, loss: 0.22751787304878235 2023-01-22 19:24:47.151667: step: 620/470, loss: 0.20397146046161652 2023-01-22 19:24:47.901919: step: 622/470, loss: 0.041325464844703674 2023-01-22 19:24:48.628850: step: 624/470, loss: 0.0995861068367958 2023-01-22 19:24:49.318558: step: 626/470, loss: 0.4585106074810028 2023-01-22 19:24:50.028984: step: 628/470, loss: 0.11708439886569977 2023-01-22 19:24:50.756652: step: 630/470, loss: 0.07522755861282349 2023-01-22 19:24:51.498027: step: 632/470, loss: 0.9740254878997803 2023-01-22 19:24:52.224520: step: 634/470, loss: 0.32297220826148987 2023-01-22 19:24:53.091530: step: 636/470, loss: 0.03178303316235542 2023-01-22 19:24:53.876604: step: 638/470, loss: 0.07319549471139908 2023-01-22 19:24:54.611177: step: 640/470, loss: 0.02889355830848217 2023-01-22 19:24:55.352013: step: 642/470, loss: 0.10229209810495377 2023-01-22 19:24:56.051614: step: 644/470, loss: 0.039184339344501495 2023-01-22 19:24:56.795497: step: 646/470, loss: 0.07694227993488312 2023-01-22 19:24:57.511291: step: 648/470, loss: 0.10057219117879868 2023-01-22 19:24:58.221588: step: 650/470, loss: 0.05900505557656288 2023-01-22 19:24:58.974136: step: 652/470, loss: 0.03375502675771713 2023-01-22 19:24:59.673402: step: 654/470, loss: 0.011107545346021652 2023-01-22 19:25:00.453336: step: 656/470, loss: 0.04302780702710152 2023-01-22 19:25:01.153844: step: 658/470, loss: 0.05545317381620407 2023-01-22 19:25:01.850795: step: 660/470, loss: 0.1119319424033165 2023-01-22 19:25:02.637076: step: 662/470, loss: 0.043362777680158615 2023-01-22 19:25:03.380577: step: 664/470, loss: 0.1941765397787094 2023-01-22 19:25:04.106647: step: 666/470, loss: 0.06740397959947586 2023-01-22 19:25:04.777399: step: 668/470, loss: 0.04577159881591797 2023-01-22 19:25:05.490071: step: 670/470, loss: 0.11848827451467514 2023-01-22 19:25:06.237748: step: 672/470, loss: 0.03170297294855118 2023-01-22 19:25:06.927019: step: 674/470, loss: 0.14291562139987946 2023-01-22 19:25:07.604685: step: 676/470, loss: 0.07052916288375854 2023-01-22 19:25:08.336869: step: 678/470, loss: 0.08889731764793396 2023-01-22 19:25:09.057863: step: 680/470, loss: 0.18591055274009705 2023-01-22 19:25:09.798789: step: 682/470, loss: 0.058656156063079834 2023-01-22 19:25:10.552769: step: 684/470, loss: 0.06513772159814835 2023-01-22 19:25:11.282517: step: 686/470, loss: 0.10984183102846146 2023-01-22 19:25:12.000373: step: 688/470, loss: 0.07132207602262497 2023-01-22 19:25:12.727972: step: 690/470, loss: 0.020248308777809143 2023-01-22 19:25:13.368225: step: 692/470, loss: 0.07261063158512115 2023-01-22 19:25:14.176736: step: 694/470, loss: 0.03563803806900978 2023-01-22 19:25:15.000786: step: 696/470, loss: 0.0738886147737503 2023-01-22 19:25:15.807954: step: 698/470, loss: 0.06751029938459396 2023-01-22 19:25:16.578859: step: 700/470, loss: 0.23834992945194244 2023-01-22 19:25:17.262020: step: 702/470, loss: 0.013831811025738716 2023-01-22 19:25:18.001714: step: 704/470, loss: 0.3347160816192627 2023-01-22 19:25:18.762991: step: 706/470, loss: 0.05514927953481674 2023-01-22 19:25:19.516843: step: 708/470, loss: 0.16086360812187195 2023-01-22 19:25:20.239281: step: 710/470, loss: 0.011491565965116024 2023-01-22 19:25:21.066616: step: 712/470, loss: 0.09873014688491821 2023-01-22 19:25:21.812592: step: 714/470, loss: 0.10902773588895798 2023-01-22 19:25:22.503661: step: 716/470, loss: 0.018024688586592674 2023-01-22 19:25:23.218998: step: 718/470, loss: 0.046969544142484665 2023-01-22 19:25:23.889410: step: 720/470, loss: 0.4311079680919647 2023-01-22 19:25:24.613146: step: 722/470, loss: 0.0782650038599968 2023-01-22 19:25:25.247505: step: 724/470, loss: 0.35217511653900146 2023-01-22 19:25:25.989395: step: 726/470, loss: 0.07346347719430923 2023-01-22 19:25:26.743591: step: 728/470, loss: 0.09162180125713348 2023-01-22 19:25:27.579429: step: 730/470, loss: 0.10204845666885376 2023-01-22 19:25:28.323196: step: 732/470, loss: 0.009644899517297745 2023-01-22 19:25:29.158414: step: 734/470, loss: 0.0478099063038826 2023-01-22 19:25:29.854635: step: 736/470, loss: 0.07511983066797256 2023-01-22 19:25:30.503683: step: 738/470, loss: 0.06089290976524353 2023-01-22 19:25:31.235281: step: 740/470, loss: 0.05333712324500084 2023-01-22 19:25:32.000536: step: 742/470, loss: 0.04015492647886276 2023-01-22 19:25:32.721305: step: 744/470, loss: 0.04333452507853508 2023-01-22 19:25:33.487827: step: 746/470, loss: 0.11922835558652878 2023-01-22 19:25:34.217857: step: 748/470, loss: 0.03594861179590225 2023-01-22 19:25:34.884313: step: 750/470, loss: 0.06654554605484009 2023-01-22 19:25:35.680558: step: 752/470, loss: 0.07238250225782394 2023-01-22 19:25:36.505843: step: 754/470, loss: 0.08213133364915848 2023-01-22 19:25:37.272944: step: 756/470, loss: 0.08642800152301788 2023-01-22 19:25:37.986174: step: 758/470, loss: 0.06127552315592766 2023-01-22 19:25:38.762573: step: 760/470, loss: 0.05956351384520531 2023-01-22 19:25:39.451235: step: 762/470, loss: 0.03790678456425667 2023-01-22 19:25:40.125595: step: 764/470, loss: 0.00936797820031643 2023-01-22 19:25:40.967501: step: 766/470, loss: 0.07397977262735367 2023-01-22 19:25:41.703526: step: 768/470, loss: 0.049599699676036835 2023-01-22 19:25:42.449141: step: 770/470, loss: 0.04407091066241264 2023-01-22 19:25:43.308989: step: 772/470, loss: 0.02440464124083519 2023-01-22 19:25:44.085496: step: 774/470, loss: 0.015560795553028584 2023-01-22 19:25:44.826980: step: 776/470, loss: 0.03959346562623978 2023-01-22 19:25:45.614542: step: 778/470, loss: 0.014441375620663166 2023-01-22 19:25:46.371698: step: 780/470, loss: 0.02576235495507717 2023-01-22 19:25:47.030848: step: 782/470, loss: 0.06063159555196762 2023-01-22 19:25:47.733488: step: 784/470, loss: 0.005881750024855137 2023-01-22 19:25:48.550789: step: 786/470, loss: 0.019624939188361168 2023-01-22 19:25:49.366869: step: 788/470, loss: 0.016726138070225716 2023-01-22 19:25:50.200662: step: 790/470, loss: 0.044762153178453445 2023-01-22 19:25:50.978750: step: 792/470, loss: 0.017699047923088074 2023-01-22 19:25:51.719409: step: 794/470, loss: 0.03728308528661728 2023-01-22 19:25:52.436993: step: 796/470, loss: 0.01777748577296734 2023-01-22 19:25:53.166070: step: 798/470, loss: 0.20916269719600677 2023-01-22 19:25:53.924386: step: 800/470, loss: 0.13329799473285675 2023-01-22 19:25:54.676672: step: 802/470, loss: 0.01543135941028595 2023-01-22 19:25:55.385634: step: 804/470, loss: 0.0656021237373352 2023-01-22 19:25:56.127476: step: 806/470, loss: 0.027505187317728996 2023-01-22 19:25:56.885223: step: 808/470, loss: 0.10071372240781784 2023-01-22 19:25:57.638773: step: 810/470, loss: 0.10161048173904419 2023-01-22 19:25:58.395684: step: 812/470, loss: 0.006511330138891935 2023-01-22 19:25:59.227934: step: 814/470, loss: 0.05376344919204712 2023-01-22 19:25:59.941956: step: 816/470, loss: 0.07200208306312561 2023-01-22 19:26:00.639760: step: 818/470, loss: 0.05889580398797989 2023-01-22 19:26:01.357005: step: 820/470, loss: 0.009265481494367123 2023-01-22 19:26:02.118498: step: 822/470, loss: 0.12977077066898346 2023-01-22 19:26:02.820893: step: 824/470, loss: 0.041659578680992126 2023-01-22 19:26:03.698665: step: 826/470, loss: 0.29780128598213196 2023-01-22 19:26:04.436006: step: 828/470, loss: 0.027976591140031815 2023-01-22 19:26:05.219766: step: 830/470, loss: 0.1818331480026245 2023-01-22 19:26:06.024063: step: 832/470, loss: 0.04622675105929375 2023-01-22 19:26:06.713292: step: 834/470, loss: 0.061723433434963226 2023-01-22 19:26:07.411587: step: 836/470, loss: 0.1804792582988739 2023-01-22 19:26:08.219146: step: 838/470, loss: 0.08750531077384949 2023-01-22 19:26:09.018525: step: 840/470, loss: 0.12658429145812988 2023-01-22 19:26:09.804297: step: 842/470, loss: 0.029569925740361214 2023-01-22 19:26:10.528423: step: 844/470, loss: 0.01851717382669449 2023-01-22 19:26:11.339495: step: 846/470, loss: 0.0617799237370491 2023-01-22 19:26:12.100105: step: 848/470, loss: 0.016349801793694496 2023-01-22 19:26:12.799633: step: 850/470, loss: 0.11626914143562317 2023-01-22 19:26:13.467146: step: 852/470, loss: 0.24209186434745789 2023-01-22 19:26:14.170058: step: 854/470, loss: 0.056616343557834625 2023-01-22 19:26:14.955600: step: 856/470, loss: 0.18732884526252747 2023-01-22 19:26:15.696329: step: 858/470, loss: 0.05298414081335068 2023-01-22 19:26:16.450679: step: 860/470, loss: 0.42847129702568054 2023-01-22 19:26:17.238470: step: 862/470, loss: 0.033945754170417786 2023-01-22 19:26:18.018095: step: 864/470, loss: 0.026322832331061363 2023-01-22 19:26:18.740843: step: 866/470, loss: 0.02989846095442772 2023-01-22 19:26:19.464544: step: 868/470, loss: 0.3587307929992676 2023-01-22 19:26:20.242980: step: 870/470, loss: 0.05438103526830673 2023-01-22 19:26:20.971928: step: 872/470, loss: 0.04368476942181587 2023-01-22 19:26:21.751252: step: 874/470, loss: 0.03072342276573181 2023-01-22 19:26:22.527408: step: 876/470, loss: 0.21938635408878326 2023-01-22 19:26:23.189313: step: 878/470, loss: 0.08039675652980804 2023-01-22 19:26:23.881985: step: 880/470, loss: 0.010841970331966877 2023-01-22 19:26:24.613259: step: 882/470, loss: 0.043295301496982574 2023-01-22 19:26:25.338566: step: 884/470, loss: 0.013651460409164429 2023-01-22 19:26:26.045822: step: 886/470, loss: 0.2560485005378723 2023-01-22 19:26:26.736924: step: 888/470, loss: 0.021218927577137947 2023-01-22 19:26:27.425264: step: 890/470, loss: 0.054894767701625824 2023-01-22 19:26:28.145881: step: 892/470, loss: 0.0300539992749691 2023-01-22 19:26:28.869388: step: 894/470, loss: 0.0708957314491272 2023-01-22 19:26:29.565805: step: 896/470, loss: 0.09575964510440826 2023-01-22 19:26:30.272118: step: 898/470, loss: 0.031115295365452766 2023-01-22 19:26:31.008150: step: 900/470, loss: 0.06746406108140945 2023-01-22 19:26:31.788680: step: 902/470, loss: 0.06835021823644638 2023-01-22 19:26:32.511563: step: 904/470, loss: 0.021396541967988014 2023-01-22 19:26:33.252245: step: 906/470, loss: 0.02145826816558838 2023-01-22 19:26:33.961505: step: 908/470, loss: 0.1311969757080078 2023-01-22 19:26:34.773935: step: 910/470, loss: 0.13202637434005737 2023-01-22 19:26:35.462704: step: 912/470, loss: 0.04499669000506401 2023-01-22 19:26:36.242471: step: 914/470, loss: 0.11342813074588776 2023-01-22 19:26:36.911200: step: 916/470, loss: 0.898378849029541 2023-01-22 19:26:37.753117: step: 918/470, loss: 0.10744353383779526 2023-01-22 19:26:38.568255: step: 920/470, loss: 0.026462199166417122 2023-01-22 19:26:39.286728: step: 922/470, loss: 0.5927501916885376 2023-01-22 19:26:39.943236: step: 924/470, loss: 0.03971520811319351 2023-01-22 19:26:40.647310: step: 926/470, loss: 0.052058689296245575 2023-01-22 19:26:41.406869: step: 928/470, loss: 0.06348146498203278 2023-01-22 19:26:42.068594: step: 930/470, loss: 0.05130045861005783 2023-01-22 19:26:42.717494: step: 932/470, loss: 0.01657716929912567 2023-01-22 19:26:43.390175: step: 934/470, loss: 3.5439648628234863 2023-01-22 19:26:44.164325: step: 936/470, loss: 0.11058296263217926 2023-01-22 19:26:44.932370: step: 938/470, loss: 0.07302563637495041 2023-01-22 19:26:45.624156: step: 940/470, loss: 0.0987381637096405 2023-01-22 19:26:46.284137: step: 942/470, loss: 0.04440511763095856 ================================================== Loss: 0.110 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28893526058631924, 'r': 0.3366342504743833, 'f1': 0.31096625766871167}, 'combined': 0.2291330319664191, 'epoch': 20} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3276979133967761, 'r': 0.35322053934402503, 'f1': 0.3399808985819398}, 'combined': 0.23680261592771928, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28683624356823917, 'r': 0.33691012290083505, 'f1': 0.30986323694369994}, 'combined': 0.22832027985325257, 'epoch': 20} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.33326832341093904, 'r': 0.3589043482887036, 'f1': 0.34561159464838126}, 'combined': 0.24072449378494218, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26346369760479044, 'r': 0.3339539848197344, 'f1': 0.29455020920502095}, 'combined': 0.21703699625633122, 'epoch': 20} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32420125946602074, 'r': 0.36815546868208704, 'f1': 0.3447831494186137}, 'combined': 0.24014746725674588, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.19285714285714284, 'r': 0.3581632653061224, 'f1': 0.25071428571428567}, 'combined': 0.1671428571428571, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2948717948717949, 'r': 0.5, 'f1': 0.3709677419354839}, 'combined': 0.18548387096774194, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4642857142857143, 'r': 0.33620689655172414, 'f1': 0.39}, 'combined': 0.26, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 21 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 19:29:24.828754: step: 2/470, loss: 0.030697286128997803 2023-01-22 19:29:25.660760: step: 4/470, loss: 0.8990464210510254 2023-01-22 19:29:26.364584: step: 6/470, loss: 0.012093383818864822 2023-01-22 19:29:27.052579: step: 8/470, loss: 0.04054703935980797 2023-01-22 19:29:27.805467: step: 10/470, loss: 0.0492924265563488 2023-01-22 19:29:28.563801: step: 12/470, loss: 0.043698910623788834 2023-01-22 19:29:29.361869: step: 14/470, loss: 0.052403904497623444 2023-01-22 19:29:30.036385: step: 16/470, loss: 0.04030047729611397 2023-01-22 19:29:30.884191: step: 18/470, loss: 0.04345276579260826 2023-01-22 19:29:31.630786: step: 20/470, loss: 0.07873324304819107 2023-01-22 19:29:32.389365: step: 22/470, loss: 0.020181206986308098 2023-01-22 19:29:33.151588: step: 24/470, loss: 0.013497297652065754 2023-01-22 19:29:33.937687: step: 26/470, loss: 0.017897669225931168 2023-01-22 19:29:34.857220: step: 28/470, loss: 0.0618402361869812 2023-01-22 19:29:35.578584: step: 30/470, loss: 0.07274410128593445 2023-01-22 19:29:36.270042: step: 32/470, loss: 0.020234020426869392 2023-01-22 19:29:37.096449: step: 34/470, loss: 0.06494118273258209 2023-01-22 19:29:37.825333: step: 36/470, loss: 0.021747954189777374 2023-01-22 19:29:38.595629: step: 38/470, loss: 0.05529887601733208 2023-01-22 19:29:39.361953: step: 40/470, loss: 1.0049673318862915 2023-01-22 19:29:40.148224: step: 42/470, loss: 0.11773818731307983 2023-01-22 19:29:40.869593: step: 44/470, loss: 0.04687252268195152 2023-01-22 19:29:41.553815: step: 46/470, loss: 0.09744272381067276 2023-01-22 19:29:42.270009: step: 48/470, loss: 0.039838120341300964 2023-01-22 19:29:43.026035: step: 50/470, loss: 0.06667645275592804 2023-01-22 19:29:43.864454: step: 52/470, loss: 0.023879552260041237 2023-01-22 19:29:44.586481: step: 54/470, loss: 0.008113712072372437 2023-01-22 19:29:45.313847: step: 56/470, loss: 0.03958437219262123 2023-01-22 19:29:46.099345: step: 58/470, loss: 0.1176476776599884 2023-01-22 19:29:46.895163: step: 60/470, loss: 0.07839195430278778 2023-01-22 19:29:47.564077: step: 62/470, loss: 0.02542118728160858 2023-01-22 19:29:48.292175: step: 64/470, loss: 0.680618166923523 2023-01-22 19:29:49.006436: step: 66/470, loss: 0.017299430444836617 2023-01-22 19:29:49.695845: step: 68/470, loss: 0.02113557793200016 2023-01-22 19:29:50.433614: step: 70/470, loss: 0.16099169850349426 2023-01-22 19:29:51.180853: step: 72/470, loss: 0.04751966521143913 2023-01-22 19:29:51.920061: step: 74/470, loss: 0.04941952973604202 2023-01-22 19:29:52.642857: step: 76/470, loss: 1.7027865648269653 2023-01-22 19:29:53.352004: step: 78/470, loss: 0.025158904492855072 2023-01-22 19:29:54.133981: step: 80/470, loss: 0.05669613182544708 2023-01-22 19:29:54.893899: step: 82/470, loss: 0.11963464319705963 2023-01-22 19:29:55.646415: step: 84/470, loss: 0.047465141862630844 2023-01-22 19:29:56.413729: step: 86/470, loss: 0.0787685364484787 2023-01-22 19:29:57.245225: step: 88/470, loss: 0.044626928865909576 2023-01-22 19:29:57.945829: step: 90/470, loss: 0.0552486777305603 2023-01-22 19:29:58.636617: step: 92/470, loss: 0.006156580988317728 2023-01-22 19:29:59.305495: step: 94/470, loss: 0.11764095723628998 2023-01-22 19:30:00.051506: step: 96/470, loss: 0.03031170181930065 2023-01-22 19:30:00.817572: step: 98/470, loss: 0.029158277437090874 2023-01-22 19:30:01.466201: step: 100/470, loss: 0.03246721625328064 2023-01-22 19:30:02.302918: step: 102/470, loss: 0.016130754724144936 2023-01-22 19:30:03.162916: step: 104/470, loss: 0.05944613739848137 2023-01-22 19:30:03.976766: step: 106/470, loss: 0.034837506711483 2023-01-22 19:30:04.717706: step: 108/470, loss: 0.19040045142173767 2023-01-22 19:30:05.419688: step: 110/470, loss: 0.04526451230049133 2023-01-22 19:30:06.138937: step: 112/470, loss: 0.02529776096343994 2023-01-22 19:30:06.767639: step: 114/470, loss: 0.030075442045927048 2023-01-22 19:30:07.500813: step: 116/470, loss: 0.2894465923309326 2023-01-22 19:30:08.216586: step: 118/470, loss: 0.02525232918560505 2023-01-22 19:30:09.026654: step: 120/470, loss: 1.0722471475601196 2023-01-22 19:30:09.727629: step: 122/470, loss: 0.020707713440060616 2023-01-22 19:30:10.412024: step: 124/470, loss: 0.015486765652894974 2023-01-22 19:30:11.089030: step: 126/470, loss: 0.031173061579465866 2023-01-22 19:30:11.877263: step: 128/470, loss: 0.02271159552037716 2023-01-22 19:30:12.619707: step: 130/470, loss: 0.035077158361673355 2023-01-22 19:30:13.394357: step: 132/470, loss: 0.024227894842624664 2023-01-22 19:30:14.265265: step: 134/470, loss: 0.03320540860295296 2023-01-22 19:30:15.019653: step: 136/470, loss: 0.03366602584719658 2023-01-22 19:30:15.673853: step: 138/470, loss: 0.28107237815856934 2023-01-22 19:30:16.385762: step: 140/470, loss: 0.06899859011173248 2023-01-22 19:30:17.048384: step: 142/470, loss: 0.008150143548846245 2023-01-22 19:30:17.790891: step: 144/470, loss: 0.03204032778739929 2023-01-22 19:30:18.407103: step: 146/470, loss: 0.0016799191944301128 2023-01-22 19:30:19.234610: step: 148/470, loss: 0.021955549716949463 2023-01-22 19:30:19.947374: step: 150/470, loss: 0.03780049830675125 2023-01-22 19:30:20.650690: step: 152/470, loss: 0.007639870513230562 2023-01-22 19:30:21.437746: step: 154/470, loss: 0.06269722431898117 2023-01-22 19:30:22.194983: step: 156/470, loss: 0.025132397189736366 2023-01-22 19:30:22.936233: step: 158/470, loss: 0.07948961853981018 2023-01-22 19:30:23.585679: step: 160/470, loss: 0.005530932452529669 2023-01-22 19:30:24.358812: step: 162/470, loss: 0.10777927935123444 2023-01-22 19:30:25.059373: step: 164/470, loss: 0.07513689249753952 2023-01-22 19:30:26.034196: step: 166/470, loss: 0.013257588259875774 2023-01-22 19:30:26.774295: step: 168/470, loss: 0.1652069240808487 2023-01-22 19:30:27.542879: step: 170/470, loss: 0.007435483392328024 2023-01-22 19:30:28.316841: step: 172/470, loss: 0.07258084416389465 2023-01-22 19:30:29.230700: step: 174/470, loss: 0.043368544429540634 2023-01-22 19:30:30.087181: step: 176/470, loss: 0.0565069355070591 2023-01-22 19:30:30.779742: step: 178/470, loss: 0.04345664754509926 2023-01-22 19:30:31.426331: step: 180/470, loss: 0.008021237328648567 2023-01-22 19:30:32.232487: step: 182/470, loss: 0.0683949664235115 2023-01-22 19:30:33.026399: step: 184/470, loss: 0.04733900725841522 2023-01-22 19:30:33.706471: step: 186/470, loss: 0.14194054901599884 2023-01-22 19:30:34.454037: step: 188/470, loss: 0.06841456145048141 2023-01-22 19:30:35.252236: step: 190/470, loss: 0.01492251455783844 2023-01-22 19:30:35.914837: step: 192/470, loss: 0.07086261361837387 2023-01-22 19:30:36.638908: step: 194/470, loss: 0.16320353746414185 2023-01-22 19:30:37.410915: step: 196/470, loss: 0.08280780166387558 2023-01-22 19:30:38.102373: step: 198/470, loss: 0.017911670729517937 2023-01-22 19:30:38.875257: step: 200/470, loss: 0.08260060846805573 2023-01-22 19:30:39.629995: step: 202/470, loss: 0.044123224914073944 2023-01-22 19:30:40.315590: step: 204/470, loss: 0.010030948556959629 2023-01-22 19:30:41.002820: step: 206/470, loss: 0.0387822724878788 2023-01-22 19:30:41.720360: step: 208/470, loss: 0.034351300448179245 2023-01-22 19:30:42.556713: step: 210/470, loss: 0.010738598182797432 2023-01-22 19:30:43.290100: step: 212/470, loss: 0.06208763271570206 2023-01-22 19:30:44.081214: step: 214/470, loss: 0.08142954856157303 2023-01-22 19:30:44.800270: step: 216/470, loss: 0.0489891953766346 2023-01-22 19:30:45.566872: step: 218/470, loss: 0.05691111460328102 2023-01-22 19:30:46.285085: step: 220/470, loss: 0.10647915303707123 2023-01-22 19:30:46.999998: step: 222/470, loss: 0.029838304966688156 2023-01-22 19:30:47.860497: step: 224/470, loss: 0.043678972870111465 2023-01-22 19:30:48.592560: step: 226/470, loss: 0.039795830845832825 2023-01-22 19:30:49.292865: step: 228/470, loss: 0.0034092552959918976 2023-01-22 19:30:50.027415: step: 230/470, loss: 0.05187808722257614 2023-01-22 19:30:50.829509: step: 232/470, loss: 0.022801529616117477 2023-01-22 19:30:51.562542: step: 234/470, loss: 0.0022835489362478256 2023-01-22 19:30:52.262073: step: 236/470, loss: 0.060452841222286224 2023-01-22 19:30:52.961652: step: 238/470, loss: 0.17915183305740356 2023-01-22 19:30:53.755491: step: 240/470, loss: 0.04132472351193428 2023-01-22 19:30:54.414850: step: 242/470, loss: 0.027403315529227257 2023-01-22 19:30:55.185576: step: 244/470, loss: 0.034430038183927536 2023-01-22 19:30:55.957383: step: 246/470, loss: 0.31158602237701416 2023-01-22 19:30:56.731659: step: 248/470, loss: 0.007013510912656784 2023-01-22 19:30:57.425983: step: 250/470, loss: 0.0664106085896492 2023-01-22 19:30:58.214148: step: 252/470, loss: 0.026947399601340294 2023-01-22 19:30:58.995864: step: 254/470, loss: 0.08892068266868591 2023-01-22 19:30:59.729279: step: 256/470, loss: 0.07869922369718552 2023-01-22 19:31:00.536501: step: 258/470, loss: 0.050374485552310944 2023-01-22 19:31:01.257676: step: 260/470, loss: 0.10828451067209244 2023-01-22 19:31:02.015980: step: 262/470, loss: 0.032886065542697906 2023-01-22 19:31:02.684887: step: 264/470, loss: 0.008889845572412014 2023-01-22 19:31:03.398374: step: 266/470, loss: 0.02972853183746338 2023-01-22 19:31:04.180462: step: 268/470, loss: 0.00936366431415081 2023-01-22 19:31:04.907825: step: 270/470, loss: 0.018172763288021088 2023-01-22 19:31:05.790092: step: 272/470, loss: 0.03780972212553024 2023-01-22 19:31:06.475118: step: 274/470, loss: 0.03203846141695976 2023-01-22 19:31:07.244360: step: 276/470, loss: 0.059636037796735764 2023-01-22 19:31:07.939034: step: 278/470, loss: 0.2443484216928482 2023-01-22 19:31:08.729305: step: 280/470, loss: 0.08666800707578659 2023-01-22 19:31:09.447126: step: 282/470, loss: 0.061435677111148834 2023-01-22 19:31:10.217448: step: 284/470, loss: 0.15919671952724457 2023-01-22 19:31:10.927000: step: 286/470, loss: 0.028493307530879974 2023-01-22 19:31:11.612713: step: 288/470, loss: 0.0469590425491333 2023-01-22 19:31:12.315622: step: 290/470, loss: 0.057702258229255676 2023-01-22 19:31:13.029862: step: 292/470, loss: 0.02364461123943329 2023-01-22 19:31:13.807909: step: 294/470, loss: 0.09106270968914032 2023-01-22 19:31:14.504530: step: 296/470, loss: 0.009492744691669941 2023-01-22 19:31:15.225992: step: 298/470, loss: 0.08927375078201294 2023-01-22 19:31:15.961187: step: 300/470, loss: 0.2396526336669922 2023-01-22 19:31:16.661315: step: 302/470, loss: 0.021701229736208916 2023-01-22 19:31:17.391783: step: 304/470, loss: 0.04955710098147392 2023-01-22 19:31:18.095024: step: 306/470, loss: 0.013163585215806961 2023-01-22 19:31:18.852037: step: 308/470, loss: 2.5453379154205322 2023-01-22 19:31:19.577148: step: 310/470, loss: 0.2919505834579468 2023-01-22 19:31:20.228307: step: 312/470, loss: 0.004827272146940231 2023-01-22 19:31:20.975387: step: 314/470, loss: 0.23596075177192688 2023-01-22 19:31:21.621917: step: 316/470, loss: 0.06390093266963959 2023-01-22 19:31:22.380436: step: 318/470, loss: 0.08488807827234268 2023-01-22 19:31:23.055523: step: 320/470, loss: 0.381671279668808 2023-01-22 19:31:23.734628: step: 322/470, loss: 1.292970061302185 2023-01-22 19:31:24.517263: step: 324/470, loss: 0.03118908405303955 2023-01-22 19:31:25.327255: step: 326/470, loss: 0.4090331196784973 2023-01-22 19:31:26.060229: step: 328/470, loss: 0.01948448270559311 2023-01-22 19:31:26.807996: step: 330/470, loss: 0.03427987918257713 2023-01-22 19:31:27.530490: step: 332/470, loss: 0.11818772554397583 2023-01-22 19:31:28.247129: step: 334/470, loss: 0.019033944234251976 2023-01-22 19:31:29.043235: step: 336/470, loss: 0.05994606763124466 2023-01-22 19:31:29.810516: step: 338/470, loss: 0.11957002431154251 2023-01-22 19:31:30.582292: step: 340/470, loss: 0.013482224196195602 2023-01-22 19:31:31.377410: step: 342/470, loss: 1.4503638744354248 2023-01-22 19:31:32.052046: step: 344/470, loss: 0.02695303224027157 2023-01-22 19:31:32.780342: step: 346/470, loss: 0.050801824778318405 2023-01-22 19:31:33.520662: step: 348/470, loss: 0.07081786543130875 2023-01-22 19:31:34.279858: step: 350/470, loss: 0.15673120319843292 2023-01-22 19:31:34.981022: step: 352/470, loss: 0.033411867916584015 2023-01-22 19:31:35.805083: step: 354/470, loss: 0.012247402220964432 2023-01-22 19:31:36.553458: step: 356/470, loss: 0.04690024256706238 2023-01-22 19:31:37.361854: step: 358/470, loss: 0.012932350859045982 2023-01-22 19:31:38.079475: step: 360/470, loss: 0.03173913061618805 2023-01-22 19:31:38.854761: step: 362/470, loss: 0.07606765627861023 2023-01-22 19:31:39.620363: step: 364/470, loss: 0.04719306901097298 2023-01-22 19:31:40.350437: step: 366/470, loss: 0.04025932401418686 2023-01-22 19:31:41.069717: step: 368/470, loss: 0.040751710534095764 2023-01-22 19:31:41.915585: step: 370/470, loss: 0.10012707859277725 2023-01-22 19:31:42.665471: step: 372/470, loss: 0.1372625231742859 2023-01-22 19:31:43.403394: step: 374/470, loss: 0.29762670397758484 2023-01-22 19:31:44.192636: step: 376/470, loss: 0.01679973676800728 2023-01-22 19:31:44.998318: step: 378/470, loss: 0.1596241146326065 2023-01-22 19:31:45.747056: step: 380/470, loss: 0.01258562970906496 2023-01-22 19:31:46.491618: step: 382/470, loss: 0.012428507208824158 2023-01-22 19:31:47.232746: step: 384/470, loss: 0.06731784343719482 2023-01-22 19:31:47.975446: step: 386/470, loss: 0.039965856820344925 2023-01-22 19:31:48.748450: step: 388/470, loss: 0.022317346185445786 2023-01-22 19:31:49.514135: step: 390/470, loss: 0.07760674506425858 2023-01-22 19:31:50.191476: step: 392/470, loss: 0.04869608208537102 2023-01-22 19:31:50.967875: step: 394/470, loss: 0.12491077929735184 2023-01-22 19:31:51.737536: step: 396/470, loss: 0.017211737111210823 2023-01-22 19:31:52.478117: step: 398/470, loss: 0.03493155539035797 2023-01-22 19:31:53.183138: step: 400/470, loss: 0.008576873689889908 2023-01-22 19:31:53.845192: step: 402/470, loss: 0.004653942305594683 2023-01-22 19:31:54.590101: step: 404/470, loss: 0.03935122489929199 2023-01-22 19:31:55.298953: step: 406/470, loss: 0.019616080448031425 2023-01-22 19:31:56.095182: step: 408/470, loss: 0.016680454835295677 2023-01-22 19:31:56.857615: step: 410/470, loss: 1.8669378757476807 2023-01-22 19:31:57.593526: step: 412/470, loss: 0.050840165466070175 2023-01-22 19:31:58.288331: step: 414/470, loss: 0.017026670277118683 2023-01-22 19:31:59.032584: step: 416/470, loss: 0.02777211181819439 2023-01-22 19:31:59.797834: step: 418/470, loss: 0.047070860862731934 2023-01-22 19:32:00.573423: step: 420/470, loss: 0.018415415659546852 2023-01-22 19:32:01.254217: step: 422/470, loss: 0.11197975277900696 2023-01-22 19:32:01.951598: step: 424/470, loss: 0.022071341052651405 2023-01-22 19:32:02.673251: step: 426/470, loss: 0.016471032053232193 2023-01-22 19:32:03.492653: step: 428/470, loss: 0.0054807537235319614 2023-01-22 19:32:04.238133: step: 430/470, loss: 0.025621192529797554 2023-01-22 19:32:04.991299: step: 432/470, loss: 0.029046742245554924 2023-01-22 19:32:05.703429: step: 434/470, loss: 0.014325280673801899 2023-01-22 19:32:06.420049: step: 436/470, loss: 0.0053360736928880215 2023-01-22 19:32:07.075437: step: 438/470, loss: 0.049321822822093964 2023-01-22 19:32:07.808857: step: 440/470, loss: 0.012010014615952969 2023-01-22 19:32:08.566554: step: 442/470, loss: 0.022885087877511978 2023-01-22 19:32:09.288601: step: 444/470, loss: 0.030774349346756935 2023-01-22 19:32:09.932268: step: 446/470, loss: 0.07810332626104355 2023-01-22 19:32:10.583708: step: 448/470, loss: 0.018929030746221542 2023-01-22 19:32:11.310060: step: 450/470, loss: 0.008509436622262001 2023-01-22 19:32:11.959311: step: 452/470, loss: 0.020978335291147232 2023-01-22 19:32:12.666637: step: 454/470, loss: 0.013563503511250019 2023-01-22 19:32:13.375772: step: 456/470, loss: 0.10911507159471512 2023-01-22 19:32:14.139175: step: 458/470, loss: 0.03588716313242912 2023-01-22 19:32:14.822646: step: 460/470, loss: 0.035442594438791275 2023-01-22 19:32:15.567158: step: 462/470, loss: 0.01282803900539875 2023-01-22 19:32:16.364558: step: 464/470, loss: 0.04582873731851578 2023-01-22 19:32:17.160529: step: 466/470, loss: 0.0383700355887413 2023-01-22 19:32:17.919276: step: 468/470, loss: 0.025660475715994835 2023-01-22 19:32:18.619134: step: 470/470, loss: 0.14962579309940338 2023-01-22 19:32:19.332863: step: 472/470, loss: 0.1222333088517189 2023-01-22 19:32:20.027065: step: 474/470, loss: 0.004696396645158529 2023-01-22 19:32:20.724923: step: 476/470, loss: 0.10672306269407272 2023-01-22 19:32:21.473585: step: 478/470, loss: 0.03075164370238781 2023-01-22 19:32:22.191261: step: 480/470, loss: 0.011191125959157944 2023-01-22 19:32:22.908155: step: 482/470, loss: 0.020779795944690704 2023-01-22 19:32:23.587709: step: 484/470, loss: 0.023522723466157913 2023-01-22 19:32:24.337809: step: 486/470, loss: 0.11427683383226395 2023-01-22 19:32:25.141634: step: 488/470, loss: 0.04714156314730644 2023-01-22 19:32:25.896939: step: 490/470, loss: 0.21134763956069946 2023-01-22 19:32:26.636678: step: 492/470, loss: 0.38170838356018066 2023-01-22 19:32:27.474347: step: 494/470, loss: 0.041793301701545715 2023-01-22 19:32:28.152482: step: 496/470, loss: 0.03228547051548958 2023-01-22 19:32:28.924673: step: 498/470, loss: 0.05064351484179497 2023-01-22 19:32:29.655249: step: 500/470, loss: 0.10985661298036575 2023-01-22 19:32:30.293019: step: 502/470, loss: 0.4626348614692688 2023-01-22 19:32:31.043170: step: 504/470, loss: 0.06298696249723434 2023-01-22 19:32:31.782977: step: 506/470, loss: 0.08736024796962738 2023-01-22 19:32:32.466858: step: 508/470, loss: 0.05532427132129669 2023-01-22 19:32:33.187740: step: 510/470, loss: 0.0031663402915000916 2023-01-22 19:32:33.860888: step: 512/470, loss: 0.1809593141078949 2023-01-22 19:32:34.658900: step: 514/470, loss: 0.031655311584472656 2023-01-22 19:32:35.461089: step: 516/470, loss: 0.0275458712130785 2023-01-22 19:32:36.252593: step: 518/470, loss: 0.046752817928791046 2023-01-22 19:32:37.011822: step: 520/470, loss: 0.024380570277571678 2023-01-22 19:32:37.763732: step: 522/470, loss: 0.03911636769771576 2023-01-22 19:32:38.501362: step: 524/470, loss: 0.011473782360553741 2023-01-22 19:32:39.292610: step: 526/470, loss: 0.03254931420087814 2023-01-22 19:32:40.027566: step: 528/470, loss: 0.04068319499492645 2023-01-22 19:32:40.790528: step: 530/470, loss: 0.07182002812623978 2023-01-22 19:32:41.551740: step: 532/470, loss: 0.18645454943180084 2023-01-22 19:32:42.242110: step: 534/470, loss: 0.005089792422950268 2023-01-22 19:32:42.965559: step: 536/470, loss: 0.02244439162313938 2023-01-22 19:32:43.709238: step: 538/470, loss: 0.02713472954928875 2023-01-22 19:32:44.487490: step: 540/470, loss: 0.09527122229337692 2023-01-22 19:32:45.298903: step: 542/470, loss: 0.04841366410255432 2023-01-22 19:32:46.050507: step: 544/470, loss: 0.03840105980634689 2023-01-22 19:32:46.755664: step: 546/470, loss: 0.003627850441262126 2023-01-22 19:32:47.575390: step: 548/470, loss: 0.06749998778104782 2023-01-22 19:32:48.324787: step: 550/470, loss: 0.058877523988485336 2023-01-22 19:32:49.023257: step: 552/470, loss: 0.018091080710291862 2023-01-22 19:32:49.768920: step: 554/470, loss: 0.12940962612628937 2023-01-22 19:32:50.536419: step: 556/470, loss: 0.1229933425784111 2023-01-22 19:32:51.242608: step: 558/470, loss: 0.024270979687571526 2023-01-22 19:32:51.927082: step: 560/470, loss: 0.10469833761453629 2023-01-22 19:32:52.724985: step: 562/470, loss: 0.05231137201189995 2023-01-22 19:32:53.404186: step: 564/470, loss: 0.04341939464211464 2023-01-22 19:32:54.132076: step: 566/470, loss: 0.06202490255236626 2023-01-22 19:32:54.817434: step: 568/470, loss: 0.2968774437904358 2023-01-22 19:32:55.562563: step: 570/470, loss: 0.22153881192207336 2023-01-22 19:32:56.373093: step: 572/470, loss: 0.07482737302780151 2023-01-22 19:32:57.159570: step: 574/470, loss: 0.049079857766628265 2023-01-22 19:32:57.894574: step: 576/470, loss: 0.015658581629395485 2023-01-22 19:32:58.593227: step: 578/470, loss: 0.016094373539090157 2023-01-22 19:32:59.308895: step: 580/470, loss: 0.05446375906467438 2023-01-22 19:33:00.017565: step: 582/470, loss: 0.08582345396280289 2023-01-22 19:33:00.681264: step: 584/470, loss: 0.055172644555568695 2023-01-22 19:33:01.384520: step: 586/470, loss: 0.03194602578878403 2023-01-22 19:33:02.244421: step: 588/470, loss: 0.06124107167124748 2023-01-22 19:33:03.005115: step: 590/470, loss: 0.06561607867479324 2023-01-22 19:33:03.738451: step: 592/470, loss: 0.387003630399704 2023-01-22 19:33:04.437203: step: 594/470, loss: 0.11961881816387177 2023-01-22 19:33:05.146408: step: 596/470, loss: 0.03727561980485916 2023-01-22 19:33:05.859027: step: 598/470, loss: 0.02133580483496189 2023-01-22 19:33:06.607710: step: 600/470, loss: 0.020184461027383804 2023-01-22 19:33:07.389890: step: 602/470, loss: 0.06725658476352692 2023-01-22 19:33:08.118339: step: 604/470, loss: 0.017879968509078026 2023-01-22 19:33:08.879209: step: 606/470, loss: 0.029784638434648514 2023-01-22 19:33:09.730434: step: 608/470, loss: 0.021767864003777504 2023-01-22 19:33:10.507481: step: 610/470, loss: 0.057398948818445206 2023-01-22 19:33:11.217310: step: 612/470, loss: 0.6706560254096985 2023-01-22 19:33:12.008448: step: 614/470, loss: 0.03931858763098717 2023-01-22 19:33:12.758022: step: 616/470, loss: 0.5221364498138428 2023-01-22 19:33:13.535158: step: 618/470, loss: 0.01334367971867323 2023-01-22 19:33:14.292210: step: 620/470, loss: 0.06150234863162041 2023-01-22 19:33:15.054327: step: 622/470, loss: 0.029126210138201714 2023-01-22 19:33:15.818333: step: 624/470, loss: 0.13291211426258087 2023-01-22 19:33:16.601946: step: 626/470, loss: 0.5435463786125183 2023-01-22 19:33:17.338563: step: 628/470, loss: 0.016903935000300407 2023-01-22 19:33:18.006311: step: 630/470, loss: 0.012967291288077831 2023-01-22 19:33:18.719587: step: 632/470, loss: 0.04698651283979416 2023-01-22 19:33:19.489757: step: 634/470, loss: 0.03846525028347969 2023-01-22 19:33:20.254277: step: 636/470, loss: 0.024630436673760414 2023-01-22 19:33:20.923258: step: 638/470, loss: 0.062395889312028885 2023-01-22 19:33:21.627974: step: 640/470, loss: 0.06373529881238937 2023-01-22 19:33:22.427138: step: 642/470, loss: 0.08350540697574615 2023-01-22 19:33:23.107211: step: 644/470, loss: 0.0395352728664875 2023-01-22 19:33:23.786212: step: 646/470, loss: 0.006685740314424038 2023-01-22 19:33:24.533730: step: 648/470, loss: 0.06376517564058304 2023-01-22 19:33:25.271132: step: 650/470, loss: 0.06809066981077194 2023-01-22 19:33:25.969083: step: 652/470, loss: 0.03920336440205574 2023-01-22 19:33:26.749847: step: 654/470, loss: 0.18704262375831604 2023-01-22 19:33:27.432962: step: 656/470, loss: 0.0032950390595942736 2023-01-22 19:33:28.233324: step: 658/470, loss: 0.06400889903306961 2023-01-22 19:33:29.007015: step: 660/470, loss: 0.026591304689645767 2023-01-22 19:33:29.723190: step: 662/470, loss: 0.006560401059687138 2023-01-22 19:33:30.463355: step: 664/470, loss: 0.08201814442873001 2023-01-22 19:33:31.181116: step: 666/470, loss: 0.059747278690338135 2023-01-22 19:33:31.944331: step: 668/470, loss: 0.11375654488801956 2023-01-22 19:33:32.673063: step: 670/470, loss: 0.028703976422548294 2023-01-22 19:33:33.414082: step: 672/470, loss: 0.19319429993629456 2023-01-22 19:33:34.173743: step: 674/470, loss: 0.07025258243083954 2023-01-22 19:33:34.939413: step: 676/470, loss: 0.032008834183216095 2023-01-22 19:33:35.636401: step: 678/470, loss: 0.13485921919345856 2023-01-22 19:33:36.317906: step: 680/470, loss: 0.050178226083517075 2023-01-22 19:33:37.051448: step: 682/470, loss: 0.04921692609786987 2023-01-22 19:33:37.833682: step: 684/470, loss: 0.022719813510775566 2023-01-22 19:33:38.495174: step: 686/470, loss: 0.04771514609456062 2023-01-22 19:33:39.280960: step: 688/470, loss: 0.033646490424871445 2023-01-22 19:33:40.036080: step: 690/470, loss: 0.07247103005647659 2023-01-22 19:33:40.835154: step: 692/470, loss: 0.05885102227330208 2023-01-22 19:33:41.558877: step: 694/470, loss: 0.005150055047124624 2023-01-22 19:33:42.428379: step: 696/470, loss: 0.0049107871018350124 2023-01-22 19:33:43.115946: step: 698/470, loss: 0.011220758780837059 2023-01-22 19:33:43.847987: step: 700/470, loss: 0.06259648501873016 2023-01-22 19:33:44.569855: step: 702/470, loss: 0.023064645007252693 2023-01-22 19:33:45.333592: step: 704/470, loss: 0.026079723611474037 2023-01-22 19:33:46.093461: step: 706/470, loss: 0.011188274249434471 2023-01-22 19:33:46.818784: step: 708/470, loss: 0.015496410429477692 2023-01-22 19:33:47.618935: step: 710/470, loss: 0.7063004374504089 2023-01-22 19:33:48.411119: step: 712/470, loss: 0.0391618087887764 2023-01-22 19:33:49.173246: step: 714/470, loss: 0.07354892045259476 2023-01-22 19:33:50.000612: step: 716/470, loss: 0.02870473451912403 2023-01-22 19:33:50.725238: step: 718/470, loss: 0.06522411108016968 2023-01-22 19:33:51.587654: step: 720/470, loss: 0.23585094511508942 2023-01-22 19:33:52.324196: step: 722/470, loss: 0.11705224961042404 2023-01-22 19:33:53.040028: step: 724/470, loss: 0.5201276540756226 2023-01-22 19:33:53.729199: step: 726/470, loss: 0.033718567341566086 2023-01-22 19:33:54.436307: step: 728/470, loss: 0.11925947666168213 2023-01-22 19:33:55.189797: step: 730/470, loss: 0.01283105555921793 2023-01-22 19:33:55.916450: step: 732/470, loss: 0.03227861598134041 2023-01-22 19:33:56.718651: step: 734/470, loss: 0.20628242194652557 2023-01-22 19:33:57.422397: step: 736/470, loss: 0.0496375747025013 2023-01-22 19:33:58.277221: step: 738/470, loss: 0.12754222750663757 2023-01-22 19:33:59.029225: step: 740/470, loss: 0.27654582262039185 2023-01-22 19:33:59.755741: step: 742/470, loss: 0.007181598339229822 2023-01-22 19:34:00.532062: step: 744/470, loss: 0.03585813194513321 2023-01-22 19:34:01.236536: step: 746/470, loss: 0.5074892044067383 2023-01-22 19:34:01.959251: step: 748/470, loss: 0.08907847851514816 2023-01-22 19:34:02.623022: step: 750/470, loss: 0.024945350363850594 2023-01-22 19:34:03.392360: step: 752/470, loss: 0.029513496905565262 2023-01-22 19:34:04.152857: step: 754/470, loss: 0.0634850412607193 2023-01-22 19:34:04.900121: step: 756/470, loss: 0.27008649706840515 2023-01-22 19:34:05.703864: step: 758/470, loss: 0.07133635878562927 2023-01-22 19:34:06.412953: step: 760/470, loss: 0.04460354521870613 2023-01-22 19:34:07.215756: step: 762/470, loss: 0.156412735581398 2023-01-22 19:34:07.926297: step: 764/470, loss: 0.020505452528595924 2023-01-22 19:34:08.651866: step: 766/470, loss: 0.020364558324217796 2023-01-22 19:34:09.338394: step: 768/470, loss: 0.12095355987548828 2023-01-22 19:34:10.112831: step: 770/470, loss: 0.05226239189505577 2023-01-22 19:34:10.899349: step: 772/470, loss: 0.04754621163010597 2023-01-22 19:34:11.609542: step: 774/470, loss: 0.056481197476387024 2023-01-22 19:34:12.362604: step: 776/470, loss: 0.02067723497748375 2023-01-22 19:34:13.154315: step: 778/470, loss: 0.02264983393251896 2023-01-22 19:34:13.933200: step: 780/470, loss: 0.01082298532128334 2023-01-22 19:34:14.663389: step: 782/470, loss: 0.01869276538491249 2023-01-22 19:34:15.370839: step: 784/470, loss: 0.030841980129480362 2023-01-22 19:34:16.084572: step: 786/470, loss: 0.01216953992843628 2023-01-22 19:34:16.825292: step: 788/470, loss: 0.32434332370758057 2023-01-22 19:34:17.584488: step: 790/470, loss: 0.07466316968202591 2023-01-22 19:34:18.317206: step: 792/470, loss: 0.020550915971398354 2023-01-22 19:34:19.059473: step: 794/470, loss: 0.012714998796582222 2023-01-22 19:34:19.820366: step: 796/470, loss: 0.07571426033973694 2023-01-22 19:34:20.607432: step: 798/470, loss: 0.05830766260623932 2023-01-22 19:34:21.346791: step: 800/470, loss: 0.06974931806325912 2023-01-22 19:34:22.077993: step: 802/470, loss: 0.042594559490680695 2023-01-22 19:34:22.811467: step: 804/470, loss: 0.0458582304418087 2023-01-22 19:34:23.511179: step: 806/470, loss: 0.024310944601893425 2023-01-22 19:34:24.242219: step: 808/470, loss: 0.06325914710760117 2023-01-22 19:34:24.957252: step: 810/470, loss: 0.2123449146747589 2023-01-22 19:34:25.770228: step: 812/470, loss: 0.08683294802904129 2023-01-22 19:34:26.538593: step: 814/470, loss: 0.0271016675978899 2023-01-22 19:34:27.302390: step: 816/470, loss: 0.06353487074375153 2023-01-22 19:34:28.106434: step: 818/470, loss: 0.028592655435204506 2023-01-22 19:34:28.796915: step: 820/470, loss: 0.013006173074245453 2023-01-22 19:34:29.537253: step: 822/470, loss: 0.030378835275769234 2023-01-22 19:34:30.256812: step: 824/470, loss: 0.001919006579555571 2023-01-22 19:34:30.996456: step: 826/470, loss: 0.03218744695186615 2023-01-22 19:34:31.706589: step: 828/470, loss: 0.06677714735269547 2023-01-22 19:34:32.398865: step: 830/470, loss: 0.016359543427824974 2023-01-22 19:34:33.143795: step: 832/470, loss: 0.013201995752751827 2023-01-22 19:34:33.838154: step: 834/470, loss: 0.023714501410722733 2023-01-22 19:34:34.538277: step: 836/470, loss: 0.06850353628396988 2023-01-22 19:34:35.325843: step: 838/470, loss: 0.05281051993370056 2023-01-22 19:34:36.068772: step: 840/470, loss: 0.08620928972959518 2023-01-22 19:34:36.783225: step: 842/470, loss: 0.04048202931880951 2023-01-22 19:34:37.641346: step: 844/470, loss: 0.11093959212303162 2023-01-22 19:34:38.352037: step: 846/470, loss: 0.11896642297506332 2023-01-22 19:34:39.047311: step: 848/470, loss: 0.8897960186004639 2023-01-22 19:34:39.695098: step: 850/470, loss: 0.0081399567425251 2023-01-22 19:34:40.404314: step: 852/470, loss: 0.013954234309494495 2023-01-22 19:34:41.095970: step: 854/470, loss: 0.05100409686565399 2023-01-22 19:34:41.839252: step: 856/470, loss: 0.2577970325946808 2023-01-22 19:34:42.511952: step: 858/470, loss: 0.07873797416687012 2023-01-22 19:34:43.194278: step: 860/470, loss: 0.06876885890960693 2023-01-22 19:34:43.973254: step: 862/470, loss: 0.03414501994848251 2023-01-22 19:34:44.834416: step: 864/470, loss: 0.05697346851229668 2023-01-22 19:34:45.524665: step: 866/470, loss: 0.07026387751102448 2023-01-22 19:34:46.279058: step: 868/470, loss: 0.06572747230529785 2023-01-22 19:34:47.014340: step: 870/470, loss: 0.4360022246837616 2023-01-22 19:34:47.764833: step: 872/470, loss: 0.03277615085244179 2023-01-22 19:34:48.457067: step: 874/470, loss: 0.04744017496705055 2023-01-22 19:34:49.214899: step: 876/470, loss: 0.03529006615281105 2023-01-22 19:34:49.915998: step: 878/470, loss: 0.04175432026386261 2023-01-22 19:34:50.680445: step: 880/470, loss: 0.035176780074834824 2023-01-22 19:34:51.425994: step: 882/470, loss: 0.037990227341651917 2023-01-22 19:34:52.181937: step: 884/470, loss: 0.0412430465221405 2023-01-22 19:34:52.995355: step: 886/470, loss: 0.059082094579935074 2023-01-22 19:34:53.805976: step: 888/470, loss: 0.0283492561429739 2023-01-22 19:34:54.577791: step: 890/470, loss: 0.08661621809005737 2023-01-22 19:34:55.241548: step: 892/470, loss: 0.04014264792203903 2023-01-22 19:34:56.026122: step: 894/470, loss: 0.016945162788033485 2023-01-22 19:34:56.739403: step: 896/470, loss: 0.020986400544643402 2023-01-22 19:34:57.409025: step: 898/470, loss: 0.06758707016706467 2023-01-22 19:34:58.116688: step: 900/470, loss: 0.10314557701349258 2023-01-22 19:34:58.811898: step: 902/470, loss: 0.12326005101203918 2023-01-22 19:34:59.592316: step: 904/470, loss: 0.047155629843473434 2023-01-22 19:35:00.369224: step: 906/470, loss: 0.025163166224956512 2023-01-22 19:35:01.078201: step: 908/470, loss: 0.032101456075906754 2023-01-22 19:35:01.784992: step: 910/470, loss: 0.0255853533744812 2023-01-22 19:35:02.589065: step: 912/470, loss: 0.031632889062166214 2023-01-22 19:35:03.347003: step: 914/470, loss: 0.07384685426950455 2023-01-22 19:35:04.162035: step: 916/470, loss: 0.09033031016588211 2023-01-22 19:35:04.875680: step: 918/470, loss: 0.061557587236166 2023-01-22 19:35:05.601337: step: 920/470, loss: 0.1455271691083908 2023-01-22 19:35:06.349529: step: 922/470, loss: 0.017164742574095726 2023-01-22 19:35:07.050730: step: 924/470, loss: 0.03933778777718544 2023-01-22 19:35:07.797111: step: 926/470, loss: 0.040244653820991516 2023-01-22 19:35:08.547065: step: 928/470, loss: 0.051455993205308914 2023-01-22 19:35:09.264607: step: 930/470, loss: 0.07561231404542923 2023-01-22 19:35:09.928505: step: 932/470, loss: 0.02452336624264717 2023-01-22 19:35:10.696153: step: 934/470, loss: 0.03766307979822159 2023-01-22 19:35:11.422752: step: 936/470, loss: 0.041747868061065674 2023-01-22 19:35:12.207486: step: 938/470, loss: 0.02425321564078331 2023-01-22 19:35:13.003081: step: 940/470, loss: 0.27364808320999146 2023-01-22 19:35:13.654420: step: 942/470, loss: 0.016857439652085304 ================================================== Loss: 0.097 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2877562566560171, 'r': 0.34181293485136, 'f1': 0.3124638623879735}, 'combined': 0.23023653018061205, 'epoch': 21} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3439033402557875, 'r': 0.3716801485072165, 'f1': 0.357252638121539}, 'combined': 0.24883268326873365, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2824927887011563, 'r': 0.34306524624049345, 'f1': 0.3098464177699058}, 'combined': 0.22830788677782532, 'epoch': 21} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3453082771863508, 'r': 0.3662259901312932, 'f1': 0.3554596637765235}, 'combined': 0.24758384541648404, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2699193305470839, 'r': 0.3487951880504063, 'f1': 0.3043295763287485}, 'combined': 0.22424284571591996, 'epoch': 21} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.33252897285164096, 'r': 0.3747345732520415, 'f1': 0.3523724739440535}, 'combined': 0.24543356394113183, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24305555555555555, 'r': 0.375, 'f1': 0.2949438202247191}, 'combined': 0.19662921348314605, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2638888888888889, 'r': 0.41304347826086957, 'f1': 0.3220338983050847}, 'combined': 0.16101694915254236, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3522727272727273, 'r': 0.2672413793103448, 'f1': 0.303921568627451}, 'combined': 0.20261437908496732, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 22 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 19:37:52.188591: step: 2/470, loss: 0.07132654637098312 2023-01-22 19:37:52.891337: step: 4/470, loss: 0.04968281090259552 2023-01-22 19:37:53.642156: step: 6/470, loss: 0.025600193068385124 2023-01-22 19:37:54.418329: step: 8/470, loss: 0.11954417079687119 2023-01-22 19:37:55.158680: step: 10/470, loss: 0.004893624223768711 2023-01-22 19:37:55.937171: step: 12/470, loss: 0.032928865402936935 2023-01-22 19:37:56.714109: step: 14/470, loss: 0.01784546673297882 2023-01-22 19:37:57.467983: step: 16/470, loss: 0.02579541504383087 2023-01-22 19:37:58.184709: step: 18/470, loss: 0.0351317934691906 2023-01-22 19:37:58.860561: step: 20/470, loss: 0.009095302782952785 2023-01-22 19:37:59.539732: step: 22/470, loss: 0.004136822652071714 2023-01-22 19:38:00.166461: step: 24/470, loss: 0.014530248008668423 2023-01-22 19:38:00.842353: step: 26/470, loss: 0.03549989312887192 2023-01-22 19:38:01.659490: step: 28/470, loss: 0.04445575922727585 2023-01-22 19:38:02.377780: step: 30/470, loss: 0.01351911574602127 2023-01-22 19:38:03.232356: step: 32/470, loss: 0.12070487439632416 2023-01-22 19:38:04.001353: step: 34/470, loss: 0.007829630747437477 2023-01-22 19:38:04.666776: step: 36/470, loss: 0.06734476238489151 2023-01-22 19:38:05.363047: step: 38/470, loss: 0.016050485894083977 2023-01-22 19:38:06.082745: step: 40/470, loss: 0.025376321747899055 2023-01-22 19:38:06.799503: step: 42/470, loss: 0.005891892127692699 2023-01-22 19:38:07.446201: step: 44/470, loss: 0.1300346851348877 2023-01-22 19:38:08.160982: step: 46/470, loss: 0.03699648007750511 2023-01-22 19:38:08.868484: step: 48/470, loss: 0.014293630607426167 2023-01-22 19:38:09.610110: step: 50/470, loss: 0.02472846582531929 2023-01-22 19:38:10.329070: step: 52/470, loss: 0.020292188972234726 2023-01-22 19:38:11.098941: step: 54/470, loss: 0.0226434413343668 2023-01-22 19:38:11.786945: step: 56/470, loss: 0.09266740083694458 2023-01-22 19:38:12.516790: step: 58/470, loss: 0.01962917111814022 2023-01-22 19:38:13.311720: step: 60/470, loss: 0.03125033900141716 2023-01-22 19:38:14.103266: step: 62/470, loss: 0.02047717012465 2023-01-22 19:38:14.869433: step: 64/470, loss: 0.018749892711639404 2023-01-22 19:38:15.623108: step: 66/470, loss: 0.033666905015707016 2023-01-22 19:38:16.467887: step: 68/470, loss: 0.03922034800052643 2023-01-22 19:38:17.221711: step: 70/470, loss: 0.019506221637129784 2023-01-22 19:38:17.904767: step: 72/470, loss: 0.0295857060700655 2023-01-22 19:38:18.632448: step: 74/470, loss: 0.05708824843168259 2023-01-22 19:38:19.361190: step: 76/470, loss: 0.035686276853084564 2023-01-22 19:38:20.070575: step: 78/470, loss: 0.0031681691762059927 2023-01-22 19:38:20.795162: step: 80/470, loss: 0.06565077602863312 2023-01-22 19:38:21.543013: step: 82/470, loss: 0.013326709158718586 2023-01-22 19:38:22.223877: step: 84/470, loss: 0.059001680463552475 2023-01-22 19:38:22.937391: step: 86/470, loss: 0.04502973333001137 2023-01-22 19:38:23.643839: step: 88/470, loss: 0.05877036228775978 2023-01-22 19:38:24.482682: step: 90/470, loss: 0.03713955730199814 2023-01-22 19:38:25.204752: step: 92/470, loss: 0.4271875321865082 2023-01-22 19:38:25.951738: step: 94/470, loss: 0.0117900799959898 2023-01-22 19:38:26.769523: step: 96/470, loss: 0.023388944566249847 2023-01-22 19:38:27.594602: step: 98/470, loss: 0.042075708508491516 2023-01-22 19:38:28.269249: step: 100/470, loss: 0.023363204672932625 2023-01-22 19:38:29.006191: step: 102/470, loss: 0.04883814603090286 2023-01-22 19:38:29.679925: step: 104/470, loss: 0.02259974554181099 2023-01-22 19:38:30.377958: step: 106/470, loss: 0.06939293444156647 2023-01-22 19:38:31.089776: step: 108/470, loss: 0.07869086414575577 2023-01-22 19:38:31.836380: step: 110/470, loss: 0.01648254692554474 2023-01-22 19:38:32.516998: step: 112/470, loss: 2.4709521312615834e-05 2023-01-22 19:38:33.296696: step: 114/470, loss: 0.027162201702594757 2023-01-22 19:38:34.033629: step: 116/470, loss: 0.042458001524209976 2023-01-22 19:38:34.808933: step: 118/470, loss: 0.008122868835926056 2023-01-22 19:38:35.708895: step: 120/470, loss: 0.08594401180744171 2023-01-22 19:38:36.472979: step: 122/470, loss: 0.06479133665561676 2023-01-22 19:38:37.207757: step: 124/470, loss: 0.05414796248078346 2023-01-22 19:38:37.908159: step: 126/470, loss: 0.013639035634696484 2023-01-22 19:38:38.590647: step: 128/470, loss: 0.014860237948596478 2023-01-22 19:38:39.282198: step: 130/470, loss: 0.009585319086909294 2023-01-22 19:38:40.009801: step: 132/470, loss: 0.0008866112912073731 2023-01-22 19:38:40.742709: step: 134/470, loss: 0.07709459215402603 2023-01-22 19:38:41.602749: step: 136/470, loss: 0.08505282551050186 2023-01-22 19:38:42.294309: step: 138/470, loss: 0.03271591290831566 2023-01-22 19:38:43.056012: step: 140/470, loss: 0.36453869938850403 2023-01-22 19:38:43.788679: step: 142/470, loss: 0.06906229257583618 2023-01-22 19:38:44.560891: step: 144/470, loss: 0.09906693547964096 2023-01-22 19:38:45.343876: step: 146/470, loss: 0.15732789039611816 2023-01-22 19:38:46.088441: step: 148/470, loss: 0.0644831508398056 2023-01-22 19:38:46.818615: step: 150/470, loss: 0.0066782585345208645 2023-01-22 19:38:47.644310: step: 152/470, loss: 0.034233175218105316 2023-01-22 19:38:48.401033: step: 154/470, loss: 0.061225440353155136 2023-01-22 19:38:49.120463: step: 156/470, loss: 0.05808292329311371 2023-01-22 19:38:49.841687: step: 158/470, loss: 0.039374712854623795 2023-01-22 19:38:50.578903: step: 160/470, loss: 0.029254967346787453 2023-01-22 19:38:51.376707: step: 162/470, loss: 0.17803142964839935 2023-01-22 19:38:52.079942: step: 164/470, loss: 0.023453183472156525 2023-01-22 19:38:52.830176: step: 166/470, loss: 0.11502149701118469 2023-01-22 19:38:53.616087: step: 168/470, loss: 0.18725618720054626 2023-01-22 19:38:54.421468: step: 170/470, loss: 0.07862892746925354 2023-01-22 19:38:55.116308: step: 172/470, loss: 0.02760915271937847 2023-01-22 19:38:55.821417: step: 174/470, loss: 0.009849579073488712 2023-01-22 19:38:56.612711: step: 176/470, loss: 0.03337179869413376 2023-01-22 19:38:57.314815: step: 178/470, loss: 0.02945728227496147 2023-01-22 19:38:58.033804: step: 180/470, loss: 1.613263726234436 2023-01-22 19:38:58.720322: step: 182/470, loss: 0.03204035386443138 2023-01-22 19:38:59.398479: step: 184/470, loss: 0.04056650400161743 2023-01-22 19:39:00.135479: step: 186/470, loss: 0.06198835000395775 2023-01-22 19:39:00.849413: step: 188/470, loss: 0.013698305003345013 2023-01-22 19:39:01.570930: step: 190/470, loss: 0.26306137442588806 2023-01-22 19:39:02.438942: step: 192/470, loss: 0.007418145891278982 2023-01-22 19:39:03.204411: step: 194/470, loss: 0.45867234468460083 2023-01-22 19:39:03.969974: step: 196/470, loss: 0.014525515027344227 2023-01-22 19:39:04.703960: step: 198/470, loss: 0.06874248385429382 2023-01-22 19:39:05.426508: step: 200/470, loss: 1.0923312902450562 2023-01-22 19:39:06.114254: step: 202/470, loss: 0.012392209842801094 2023-01-22 19:39:06.843610: step: 204/470, loss: 0.10479408502578735 2023-01-22 19:39:07.590847: step: 206/470, loss: 0.013904137536883354 2023-01-22 19:39:08.285598: step: 208/470, loss: 0.03619790077209473 2023-01-22 19:39:09.021726: step: 210/470, loss: 0.36961981654167175 2023-01-22 19:39:09.724314: step: 212/470, loss: 0.08961759507656097 2023-01-22 19:39:10.527535: step: 214/470, loss: 0.14479438960552216 2023-01-22 19:39:11.187784: step: 216/470, loss: 0.027769066393375397 2023-01-22 19:39:11.876044: step: 218/470, loss: 0.11556971818208694 2023-01-22 19:39:12.600908: step: 220/470, loss: 0.03934004157781601 2023-01-22 19:39:13.379240: step: 222/470, loss: 0.08625157177448273 2023-01-22 19:39:14.130011: step: 224/470, loss: 2.8860392570495605 2023-01-22 19:39:14.895021: step: 226/470, loss: 0.03975597769021988 2023-01-22 19:39:15.663807: step: 228/470, loss: 0.013374090194702148 2023-01-22 19:39:16.427068: step: 230/470, loss: 0.035754621028900146 2023-01-22 19:39:17.189002: step: 232/470, loss: 0.19659562408924103 2023-01-22 19:39:17.909541: step: 234/470, loss: 0.23096928000450134 2023-01-22 19:39:18.583325: step: 236/470, loss: 0.04870816692709923 2023-01-22 19:39:19.409635: step: 238/470, loss: 0.05176544934511185 2023-01-22 19:39:20.153387: step: 240/470, loss: 0.034658364951610565 2023-01-22 19:39:20.907926: step: 242/470, loss: 0.06927529722452164 2023-01-22 19:39:21.590716: step: 244/470, loss: 0.010075340047478676 2023-01-22 19:39:22.401059: step: 246/470, loss: 0.03195039927959442 2023-01-22 19:39:23.199402: step: 248/470, loss: 0.02179635688662529 2023-01-22 19:39:23.958609: step: 250/470, loss: 0.13300231099128723 2023-01-22 19:39:24.691956: step: 252/470, loss: 0.04315396025776863 2023-01-22 19:39:25.466334: step: 254/470, loss: 0.02336571365594864 2023-01-22 19:39:26.222256: step: 256/470, loss: 0.04286831617355347 2023-01-22 19:39:26.822002: step: 258/470, loss: 0.17317862808704376 2023-01-22 19:39:27.591620: step: 260/470, loss: 0.15021666884422302 2023-01-22 19:39:28.225937: step: 262/470, loss: 0.039372485131025314 2023-01-22 19:39:29.072095: step: 264/470, loss: 0.014304211363196373 2023-01-22 19:39:29.800763: step: 266/470, loss: 0.04224678874015808 2023-01-22 19:39:30.521148: step: 268/470, loss: 0.0519757904112339 2023-01-22 19:39:31.259623: step: 270/470, loss: 0.018106307834386826 2023-01-22 19:39:31.967196: step: 272/470, loss: 0.22459372878074646 2023-01-22 19:39:32.635812: step: 274/470, loss: 0.00824404414743185 2023-01-22 19:39:33.398409: step: 276/470, loss: 0.020545797422528267 2023-01-22 19:39:34.077285: step: 278/470, loss: 0.013951159082353115 2023-01-22 19:39:34.889481: step: 280/470, loss: 0.0626802146434784 2023-01-22 19:39:35.651201: step: 282/470, loss: 0.0456269308924675 2023-01-22 19:39:36.362676: step: 284/470, loss: 1.7039402723312378 2023-01-22 19:39:37.081042: step: 286/470, loss: 0.024264952167868614 2023-01-22 19:39:37.818980: step: 288/470, loss: 0.060633957386016846 2023-01-22 19:39:38.563507: step: 290/470, loss: 0.01541169360280037 2023-01-22 19:39:39.295763: step: 292/470, loss: 0.02074408531188965 2023-01-22 19:39:40.014879: step: 294/470, loss: 0.008654659613966942 2023-01-22 19:39:40.740802: step: 296/470, loss: 0.03434770554304123 2023-01-22 19:39:41.466155: step: 298/470, loss: 0.1805637627840042 2023-01-22 19:39:42.219787: step: 300/470, loss: 0.1149728000164032 2023-01-22 19:39:42.950187: step: 302/470, loss: 0.015428435057401657 2023-01-22 19:39:43.670157: step: 304/470, loss: 0.018192537128925323 2023-01-22 19:39:44.442305: step: 306/470, loss: 0.0014132431242614985 2023-01-22 19:39:45.134393: step: 308/470, loss: 1.010900616645813 2023-01-22 19:39:45.844748: step: 310/470, loss: 0.025461159646511078 2023-01-22 19:39:46.624743: step: 312/470, loss: 0.031049776822328568 2023-01-22 19:39:47.433479: step: 314/470, loss: 0.04371613636612892 2023-01-22 19:39:48.146601: step: 316/470, loss: 0.11215726286172867 2023-01-22 19:39:48.932574: step: 318/470, loss: 0.002360484329983592 2023-01-22 19:39:49.656782: step: 320/470, loss: 0.04522380232810974 2023-01-22 19:39:50.322570: step: 322/470, loss: 0.048406392335891724 2023-01-22 19:39:50.999907: step: 324/470, loss: 0.002590734278783202 2023-01-22 19:39:51.723430: step: 326/470, loss: 0.17612895369529724 2023-01-22 19:39:52.457223: step: 328/470, loss: 0.04783718287944794 2023-01-22 19:39:53.235843: step: 330/470, loss: 0.08684522658586502 2023-01-22 19:39:53.994663: step: 332/470, loss: 0.0222398079931736 2023-01-22 19:39:54.683962: step: 334/470, loss: 0.09281022101640701 2023-01-22 19:39:55.437108: step: 336/470, loss: 0.018266357481479645 2023-01-22 19:39:56.231565: step: 338/470, loss: 0.05038531869649887 2023-01-22 19:39:56.915226: step: 340/470, loss: 0.12421422451734543 2023-01-22 19:39:57.591931: step: 342/470, loss: 0.030940696597099304 2023-01-22 19:39:58.385031: step: 344/470, loss: 0.0634685754776001 2023-01-22 19:39:59.133362: step: 346/470, loss: 0.06550871580839157 2023-01-22 19:39:59.940408: step: 348/470, loss: 0.02845531329512596 2023-01-22 19:40:00.774424: step: 350/470, loss: 0.03609459847211838 2023-01-22 19:40:01.568955: step: 352/470, loss: 0.0488816574215889 2023-01-22 19:40:02.355843: step: 354/470, loss: 0.013090868480503559 2023-01-22 19:40:02.991618: step: 356/470, loss: 0.009363566525280476 2023-01-22 19:40:03.687445: step: 358/470, loss: 0.06547439098358154 2023-01-22 19:40:04.418304: step: 360/470, loss: 0.11378369480371475 2023-01-22 19:40:05.170815: step: 362/470, loss: 0.10389399528503418 2023-01-22 19:40:05.877651: step: 364/470, loss: 0.1084824800491333 2023-01-22 19:40:06.665237: step: 366/470, loss: 0.03515470772981644 2023-01-22 19:40:07.374109: step: 368/470, loss: 0.03233461454510689 2023-01-22 19:40:08.068092: step: 370/470, loss: 0.010006394237279892 2023-01-22 19:40:08.795060: step: 372/470, loss: 0.0295244287699461 2023-01-22 19:40:09.532673: step: 374/470, loss: 0.006042733788490295 2023-01-22 19:40:10.250017: step: 376/470, loss: 0.06146341562271118 2023-01-22 19:40:11.168266: step: 378/470, loss: 0.03354865685105324 2023-01-22 19:40:11.930974: step: 380/470, loss: 0.024119818583130836 2023-01-22 19:40:12.684329: step: 382/470, loss: 0.12077596783638 2023-01-22 19:40:13.361785: step: 384/470, loss: 0.1539594531059265 2023-01-22 19:40:14.107412: step: 386/470, loss: 0.03286004438996315 2023-01-22 19:40:14.830780: step: 388/470, loss: 0.12388432770967484 2023-01-22 19:40:15.552646: step: 390/470, loss: 0.02824668027460575 2023-01-22 19:40:16.352473: step: 392/470, loss: 0.054188963025808334 2023-01-22 19:40:17.148983: step: 394/470, loss: 0.025531675666570663 2023-01-22 19:40:17.880453: step: 396/470, loss: 0.0763605609536171 2023-01-22 19:40:18.600677: step: 398/470, loss: 0.010550078935921192 2023-01-22 19:40:19.301784: step: 400/470, loss: 0.01099458895623684 2023-01-22 19:40:20.155437: step: 402/470, loss: 0.04127969220280647 2023-01-22 19:40:20.936684: step: 404/470, loss: 0.018147412687540054 2023-01-22 19:40:21.628037: step: 406/470, loss: 0.007162865251302719 2023-01-22 19:40:22.319361: step: 408/470, loss: 0.04229717701673508 2023-01-22 19:40:22.981365: step: 410/470, loss: 0.03728770837187767 2023-01-22 19:40:23.735522: step: 412/470, loss: 0.014461766928434372 2023-01-22 19:40:24.555387: step: 414/470, loss: 0.09930559992790222 2023-01-22 19:40:25.345084: step: 416/470, loss: 0.08320431411266327 2023-01-22 19:40:26.083723: step: 418/470, loss: 0.08579551428556442 2023-01-22 19:40:26.885994: step: 420/470, loss: 0.05773615464568138 2023-01-22 19:40:27.654124: step: 422/470, loss: 0.007528889924287796 2023-01-22 19:40:28.398178: step: 424/470, loss: 0.04420556128025055 2023-01-22 19:40:29.211797: step: 426/470, loss: 0.015755586326122284 2023-01-22 19:40:29.914112: step: 428/470, loss: 0.015585158951580524 2023-01-22 19:40:30.665335: step: 430/470, loss: 0.01235332153737545 2023-01-22 19:40:31.405414: step: 432/470, loss: 0.09456421434879303 2023-01-22 19:40:32.143530: step: 434/470, loss: 0.22690565884113312 2023-01-22 19:40:32.936471: step: 436/470, loss: 0.4971714913845062 2023-01-22 19:40:33.707624: step: 438/470, loss: 0.02801249548792839 2023-01-22 19:40:34.457721: step: 440/470, loss: 0.007650639396160841 2023-01-22 19:40:35.190803: step: 442/470, loss: 0.054860420525074005 2023-01-22 19:40:35.952228: step: 444/470, loss: 0.029611477628350258 2023-01-22 19:40:36.689439: step: 446/470, loss: 0.07205002754926682 2023-01-22 19:40:37.459443: step: 448/470, loss: 0.02133980020880699 2023-01-22 19:40:38.259920: step: 450/470, loss: 0.4700184762477875 2023-01-22 19:40:38.954775: step: 452/470, loss: 0.20984295010566711 2023-01-22 19:40:39.650346: step: 454/470, loss: 0.038992129266262054 2023-01-22 19:40:40.419792: step: 456/470, loss: 0.04633091017603874 2023-01-22 19:40:41.132670: step: 458/470, loss: 0.05490873381495476 2023-01-22 19:40:41.821579: step: 460/470, loss: 0.0526411309838295 2023-01-22 19:40:42.578709: step: 462/470, loss: 0.091351717710495 2023-01-22 19:40:43.295830: step: 464/470, loss: 0.008627299219369888 2023-01-22 19:40:43.967003: step: 466/470, loss: 0.00773763470351696 2023-01-22 19:40:44.707565: step: 468/470, loss: 0.004108825232833624 2023-01-22 19:40:45.455529: step: 470/470, loss: 0.06798605620861053 2023-01-22 19:40:46.231156: step: 472/470, loss: 0.04032795503735542 2023-01-22 19:40:46.975867: step: 474/470, loss: 0.03978598490357399 2023-01-22 19:40:47.728259: step: 476/470, loss: 0.022391952574253082 2023-01-22 19:40:48.489885: step: 478/470, loss: 0.08442055433988571 2023-01-22 19:40:49.366109: step: 480/470, loss: 0.041006091982126236 2023-01-22 19:40:50.109838: step: 482/470, loss: 0.054082486778497696 2023-01-22 19:40:50.899511: step: 484/470, loss: 0.056996893137693405 2023-01-22 19:40:51.671030: step: 486/470, loss: 0.21792449057102203 2023-01-22 19:40:52.462796: step: 488/470, loss: 0.09255671501159668 2023-01-22 19:40:53.182450: step: 490/470, loss: 0.0614955797791481 2023-01-22 19:40:53.974787: step: 492/470, loss: 0.034090857952833176 2023-01-22 19:40:54.645182: step: 494/470, loss: 0.27086207270622253 2023-01-22 19:40:55.453085: step: 496/470, loss: 0.3944252133369446 2023-01-22 19:40:56.126977: step: 498/470, loss: 0.06420669704675674 2023-01-22 19:40:56.899857: step: 500/470, loss: 2.5734214782714844 2023-01-22 19:40:57.594796: step: 502/470, loss: 0.16732986271381378 2023-01-22 19:40:58.331162: step: 504/470, loss: 0.0660572499036789 2023-01-22 19:40:59.095639: step: 506/470, loss: 0.03720968961715698 2023-01-22 19:40:59.868834: step: 508/470, loss: 0.10983237624168396 2023-01-22 19:41:00.590594: step: 510/470, loss: 0.09013104438781738 2023-01-22 19:41:01.292467: step: 512/470, loss: 0.020754126831889153 2023-01-22 19:41:01.978797: step: 514/470, loss: 0.017007894814014435 2023-01-22 19:41:02.678756: step: 516/470, loss: 0.035360775887966156 2023-01-22 19:41:03.445961: step: 518/470, loss: 0.04036063700914383 2023-01-22 19:41:04.172879: step: 520/470, loss: 0.08036433905363083 2023-01-22 19:41:04.934485: step: 522/470, loss: 0.06309843808412552 2023-01-22 19:41:05.765289: step: 524/470, loss: 0.03624216467142105 2023-01-22 19:41:06.444256: step: 526/470, loss: 0.016651665791869164 2023-01-22 19:41:07.247905: step: 528/470, loss: 0.03483118116855621 2023-01-22 19:41:07.967637: step: 530/470, loss: 0.0016926492098718882 2023-01-22 19:41:08.681282: step: 532/470, loss: 0.04679853096604347 2023-01-22 19:41:09.414301: step: 534/470, loss: 0.08570143580436707 2023-01-22 19:41:10.174681: step: 536/470, loss: 0.01729772239923477 2023-01-22 19:41:10.908419: step: 538/470, loss: 0.026970678940415382 2023-01-22 19:41:11.670816: step: 540/470, loss: 0.13230566680431366 2023-01-22 19:41:12.435870: step: 542/470, loss: 0.06809458136558533 2023-01-22 19:41:13.166208: step: 544/470, loss: 0.0313749760389328 2023-01-22 19:41:13.865821: step: 546/470, loss: 0.0077603161334991455 2023-01-22 19:41:14.673017: step: 548/470, loss: 0.031418073922395706 2023-01-22 19:41:15.482400: step: 550/470, loss: 0.0281534343957901 2023-01-22 19:41:16.265624: step: 552/470, loss: 1.547700047492981 2023-01-22 19:41:16.961961: step: 554/470, loss: 0.041558846831321716 2023-01-22 19:41:17.700759: step: 556/470, loss: 0.05451665446162224 2023-01-22 19:41:18.415619: step: 558/470, loss: 0.03179001063108444 2023-01-22 19:41:19.101140: step: 560/470, loss: 0.04917387664318085 2023-01-22 19:41:19.753936: step: 562/470, loss: 0.019586345180869102 2023-01-22 19:41:20.535654: step: 564/470, loss: 0.03305808827280998 2023-01-22 19:41:21.265790: step: 566/470, loss: 0.05808739736676216 2023-01-22 19:41:22.068039: step: 568/470, loss: 0.08200092613697052 2023-01-22 19:41:22.787662: step: 570/470, loss: 0.12195821106433868 2023-01-22 19:41:23.538055: step: 572/470, loss: 0.0011931839399039745 2023-01-22 19:41:24.265165: step: 574/470, loss: 0.06672245264053345 2023-01-22 19:41:25.035179: step: 576/470, loss: 0.10479816049337387 2023-01-22 19:41:25.705571: step: 578/470, loss: 0.09342262893915176 2023-01-22 19:41:26.498702: step: 580/470, loss: 0.04020530730485916 2023-01-22 19:41:27.227916: step: 582/470, loss: 0.058462418615818024 2023-01-22 19:41:27.989486: step: 584/470, loss: 0.03125924617052078 2023-01-22 19:41:28.726949: step: 586/470, loss: 0.12108433991670609 2023-01-22 19:41:29.422779: step: 588/470, loss: 0.008420255035161972 2023-01-22 19:41:30.209176: step: 590/470, loss: 0.03752874210476875 2023-01-22 19:41:31.049757: step: 592/470, loss: 0.30069804191589355 2023-01-22 19:41:31.795482: step: 594/470, loss: 0.050650861114263535 2023-01-22 19:41:32.511954: step: 596/470, loss: 0.06926306337118149 2023-01-22 19:41:33.221970: step: 598/470, loss: 0.052621450275182724 2023-01-22 19:41:34.021913: step: 600/470, loss: 0.05443857982754707 2023-01-22 19:41:34.718095: step: 602/470, loss: 0.0711992159485817 2023-01-22 19:41:35.445829: step: 604/470, loss: 0.005964161362498999 2023-01-22 19:41:36.300949: step: 606/470, loss: 0.18475210666656494 2023-01-22 19:41:37.001658: step: 608/470, loss: 0.09569357335567474 2023-01-22 19:41:37.781454: step: 610/470, loss: 0.07499527186155319 2023-01-22 19:41:38.493190: step: 612/470, loss: 0.13202105462551117 2023-01-22 19:41:39.208676: step: 614/470, loss: 0.04779544472694397 2023-01-22 19:41:40.190756: step: 616/470, loss: 0.04207386076450348 2023-01-22 19:41:40.928405: step: 618/470, loss: 0.08963283151388168 2023-01-22 19:41:41.677768: step: 620/470, loss: 0.02025291509926319 2023-01-22 19:41:42.393042: step: 622/470, loss: 0.22763288021087646 2023-01-22 19:41:43.189018: step: 624/470, loss: 0.0565686859190464 2023-01-22 19:41:43.811976: step: 626/470, loss: 0.07253819704055786 2023-01-22 19:41:44.474176: step: 628/470, loss: 0.1353847086429596 2023-01-22 19:41:45.169397: step: 630/470, loss: 0.1034807413816452 2023-01-22 19:41:45.954160: step: 632/470, loss: 0.04602696746587753 2023-01-22 19:41:46.674532: step: 634/470, loss: 0.0005324460798874497 2023-01-22 19:41:47.408938: step: 636/470, loss: 0.04955475404858589 2023-01-22 19:41:48.152749: step: 638/470, loss: 0.04272598400712013 2023-01-22 19:41:48.847078: step: 640/470, loss: 0.08605597168207169 2023-01-22 19:41:49.586741: step: 642/470, loss: 0.013445237651467323 2023-01-22 19:41:50.328938: step: 644/470, loss: 0.018981290981173515 2023-01-22 19:41:51.049024: step: 646/470, loss: 0.039564017206430435 2023-01-22 19:41:51.778289: step: 648/470, loss: 0.03769481182098389 2023-01-22 19:41:52.546969: step: 650/470, loss: 0.02641088329255581 2023-01-22 19:41:53.300367: step: 652/470, loss: 0.00315751601010561 2023-01-22 19:41:54.037284: step: 654/470, loss: 0.030088361352682114 2023-01-22 19:41:54.753773: step: 656/470, loss: 0.0600021593272686 2023-01-22 19:41:55.479282: step: 658/470, loss: 0.05731518939137459 2023-01-22 19:41:56.240477: step: 660/470, loss: 0.02242978662252426 2023-01-22 19:41:56.967747: step: 662/470, loss: 0.11539184302091599 2023-01-22 19:41:57.732620: step: 664/470, loss: 0.08133013546466827 2023-01-22 19:41:58.489729: step: 666/470, loss: 0.0326472632586956 2023-01-22 19:41:59.247626: step: 668/470, loss: 0.04640135541558266 2023-01-22 19:41:59.915185: step: 670/470, loss: 0.05912771821022034 2023-01-22 19:42:00.686535: step: 672/470, loss: 0.08168984949588776 2023-01-22 19:42:01.383948: step: 674/470, loss: 0.021860754117369652 2023-01-22 19:42:02.080570: step: 676/470, loss: 0.00997862871736288 2023-01-22 19:42:02.795414: step: 678/470, loss: 0.015150370076298714 2023-01-22 19:42:03.652389: step: 680/470, loss: 0.23191246390342712 2023-01-22 19:42:04.359419: step: 682/470, loss: 0.005268333945423365 2023-01-22 19:42:05.108412: step: 684/470, loss: 0.050369635224342346 2023-01-22 19:42:05.794924: step: 686/470, loss: 0.19161257147789001 2023-01-22 19:42:06.577321: step: 688/470, loss: 0.01200926210731268 2023-01-22 19:42:07.290037: step: 690/470, loss: 0.10803577303886414 2023-01-22 19:42:08.014181: step: 692/470, loss: 0.13654294610023499 2023-01-22 19:42:08.734563: step: 694/470, loss: 0.06747213006019592 2023-01-22 19:42:09.472206: step: 696/470, loss: 0.05152638256549835 2023-01-22 19:42:10.213845: step: 698/470, loss: 0.06810185313224792 2023-01-22 19:42:10.940649: step: 700/470, loss: 0.004062777850776911 2023-01-22 19:42:11.595625: step: 702/470, loss: 0.021000513806939125 2023-01-22 19:42:12.357541: step: 704/470, loss: 0.0744672566652298 2023-01-22 19:42:13.192106: step: 706/470, loss: 0.06481971591711044 2023-01-22 19:42:13.988629: step: 708/470, loss: 0.040443241596221924 2023-01-22 19:42:14.708193: step: 710/470, loss: 0.03952976316213608 2023-01-22 19:42:15.447415: step: 712/470, loss: 0.05129685252904892 2023-01-22 19:42:16.272666: step: 714/470, loss: 0.08018472790718079 2023-01-22 19:42:16.983620: step: 716/470, loss: 0.0011982826981693506 2023-01-22 19:42:17.672084: step: 718/470, loss: 0.01237798947840929 2023-01-22 19:42:18.346696: step: 720/470, loss: 0.030002785846590996 2023-01-22 19:42:19.012644: step: 722/470, loss: 0.029811648651957512 2023-01-22 19:42:19.759644: step: 724/470, loss: 0.028446856886148453 2023-01-22 19:42:20.537556: step: 726/470, loss: 0.03428805246949196 2023-01-22 19:42:21.229541: step: 728/470, loss: 0.04685278609395027 2023-01-22 19:42:22.021072: step: 730/470, loss: 0.011925329454243183 2023-01-22 19:42:22.839646: step: 732/470, loss: 0.10092224925756454 2023-01-22 19:42:23.512661: step: 734/470, loss: 0.014230447821319103 2023-01-22 19:42:24.259845: step: 736/470, loss: 0.06267601996660233 2023-01-22 19:42:24.991752: step: 738/470, loss: 0.0850699171423912 2023-01-22 19:42:25.794438: step: 740/470, loss: 0.014023186638951302 2023-01-22 19:42:26.494130: step: 742/470, loss: 0.11571689695119858 2023-01-22 19:42:27.168562: step: 744/470, loss: 0.05851830914616585 2023-01-22 19:42:27.885039: step: 746/470, loss: 0.7251389622688293 2023-01-22 19:42:28.685321: step: 748/470, loss: 0.21461817622184753 2023-01-22 19:42:29.429439: step: 750/470, loss: 0.056537434458732605 2023-01-22 19:42:30.146756: step: 752/470, loss: 0.06353659927845001 2023-01-22 19:42:30.999566: step: 754/470, loss: 0.14348191022872925 2023-01-22 19:42:31.704503: step: 756/470, loss: 0.021871326491236687 2023-01-22 19:42:32.508184: step: 758/470, loss: 0.059082724153995514 2023-01-22 19:42:33.276212: step: 760/470, loss: 0.0077812704257667065 2023-01-22 19:42:34.078844: step: 762/470, loss: 0.1153213232755661 2023-01-22 19:42:34.744283: step: 764/470, loss: 0.16919955611228943 2023-01-22 19:42:35.424507: step: 766/470, loss: 0.02236533723771572 2023-01-22 19:42:36.189153: step: 768/470, loss: 0.010637041181325912 2023-01-22 19:42:36.913213: step: 770/470, loss: 0.09718048572540283 2023-01-22 19:42:37.553805: step: 772/470, loss: 0.02509610913693905 2023-01-22 19:42:38.245345: step: 774/470, loss: 0.41436123847961426 2023-01-22 19:42:39.083301: step: 776/470, loss: 0.046184662729501724 2023-01-22 19:42:39.792362: step: 778/470, loss: 0.043335553258657455 2023-01-22 19:42:40.512214: step: 780/470, loss: 0.011558422818779945 2023-01-22 19:42:41.335890: step: 782/470, loss: 0.008647819980978966 2023-01-22 19:42:42.056075: step: 784/470, loss: 0.010468276217579842 2023-01-22 19:42:42.796339: step: 786/470, loss: 0.012684951536357403 2023-01-22 19:42:43.615946: step: 788/470, loss: 0.060763075947761536 2023-01-22 19:42:44.343336: step: 790/470, loss: 0.019749129191040993 2023-01-22 19:42:45.045616: step: 792/470, loss: 0.01861565373837948 2023-01-22 19:42:45.818670: step: 794/470, loss: 0.023979458957910538 2023-01-22 19:42:46.606614: step: 796/470, loss: 0.07430318742990494 2023-01-22 19:42:47.406483: step: 798/470, loss: 0.00071112992009148 2023-01-22 19:42:48.179008: step: 800/470, loss: 0.16077010333538055 2023-01-22 19:42:48.957690: step: 802/470, loss: 0.04622608423233032 2023-01-22 19:42:49.710815: step: 804/470, loss: 0.03751537576317787 2023-01-22 19:42:50.463727: step: 806/470, loss: 0.02206636592745781 2023-01-22 19:42:51.247229: step: 808/470, loss: 0.037656575441360474 2023-01-22 19:42:51.982613: step: 810/470, loss: 0.0038295581471174955 2023-01-22 19:42:52.693471: step: 812/470, loss: 0.007838837802410126 2023-01-22 19:42:53.348702: step: 814/470, loss: 0.0843760147690773 2023-01-22 19:42:54.121560: step: 816/470, loss: 0.12722979485988617 2023-01-22 19:42:54.827965: step: 818/470, loss: 0.004428504034876823 2023-01-22 19:42:55.576799: step: 820/470, loss: 0.017400939017534256 2023-01-22 19:42:56.306193: step: 822/470, loss: 0.11112985014915466 2023-01-22 19:42:57.112349: step: 824/470, loss: 0.06626398861408234 2023-01-22 19:42:57.864030: step: 826/470, loss: 0.006494198925793171 2023-01-22 19:42:58.564805: step: 828/470, loss: 0.003452116623520851 2023-01-22 19:42:59.311156: step: 830/470, loss: 0.04936700686812401 2023-01-22 19:43:00.035109: step: 832/470, loss: 0.06773626059293747 2023-01-22 19:43:00.784125: step: 834/470, loss: 0.25022315979003906 2023-01-22 19:43:01.499673: step: 836/470, loss: 0.09486892819404602 2023-01-22 19:43:02.263600: step: 838/470, loss: 0.06972448527812958 2023-01-22 19:43:03.110407: step: 840/470, loss: 0.05532016232609749 2023-01-22 19:43:03.793891: step: 842/470, loss: 0.06207747757434845 2023-01-22 19:43:04.473330: step: 844/470, loss: 0.07339915633201599 2023-01-22 19:43:05.256310: step: 846/470, loss: 0.04446965828537941 2023-01-22 19:43:06.007576: step: 848/470, loss: 0.1462089866399765 2023-01-22 19:43:06.719819: step: 850/470, loss: 0.06136621907353401 2023-01-22 19:43:07.450302: step: 852/470, loss: 0.08713188767433167 2023-01-22 19:43:08.168954: step: 854/470, loss: 0.06073979288339615 2023-01-22 19:43:08.795893: step: 856/470, loss: 0.024500612169504166 2023-01-22 19:43:09.508315: step: 858/470, loss: 0.0761837363243103 2023-01-22 19:43:10.264539: step: 860/470, loss: 0.037365589290857315 2023-01-22 19:43:11.033880: step: 862/470, loss: 0.018834102898836136 2023-01-22 19:43:11.692473: step: 864/470, loss: 0.009648923762142658 2023-01-22 19:43:12.445392: step: 866/470, loss: 0.006375704426318407 2023-01-22 19:43:13.237072: step: 868/470, loss: 0.047611527144908905 2023-01-22 19:43:13.961905: step: 870/470, loss: 0.009436516091227531 2023-01-22 19:43:14.739377: step: 872/470, loss: 0.1485144942998886 2023-01-22 19:43:15.463312: step: 874/470, loss: 0.030754825100302696 2023-01-22 19:43:16.188866: step: 876/470, loss: 0.03466818481683731 2023-01-22 19:43:16.962708: step: 878/470, loss: 0.05955606326460838 2023-01-22 19:43:17.703829: step: 880/470, loss: 0.036698900163173676 2023-01-22 19:43:18.449669: step: 882/470, loss: 0.03446055203676224 2023-01-22 19:43:19.283707: step: 884/470, loss: 0.04526906460523605 2023-01-22 19:43:19.955703: step: 886/470, loss: 0.015627926215529442 2023-01-22 19:43:20.739474: step: 888/470, loss: 0.0814151018857956 2023-01-22 19:43:21.462229: step: 890/470, loss: 0.25850528478622437 2023-01-22 19:43:22.287019: step: 892/470, loss: 0.18925324082374573 2023-01-22 19:43:23.016833: step: 894/470, loss: 0.04655158147215843 2023-01-22 19:43:23.747261: step: 896/470, loss: 0.08437757939100266 2023-01-22 19:43:24.480874: step: 898/470, loss: 0.04652692750096321 2023-01-22 19:43:25.273143: step: 900/470, loss: 3.10775089263916 2023-01-22 19:43:26.051170: step: 902/470, loss: 0.059189215302467346 2023-01-22 19:43:26.746343: step: 904/470, loss: 0.00953720137476921 2023-01-22 19:43:27.451598: step: 906/470, loss: 0.06070271134376526 2023-01-22 19:43:28.249949: step: 908/470, loss: 0.008491786196827888 2023-01-22 19:43:28.958060: step: 910/470, loss: 0.11616123467683792 2023-01-22 19:43:29.629287: step: 912/470, loss: 0.004306926857680082 2023-01-22 19:43:30.377258: step: 914/470, loss: 0.03736087679862976 2023-01-22 19:43:31.151740: step: 916/470, loss: 0.06064944341778755 2023-01-22 19:43:31.919245: step: 918/470, loss: 0.02283492125570774 2023-01-22 19:43:32.620864: step: 920/470, loss: 0.010427909903228283 2023-01-22 19:43:33.449531: step: 922/470, loss: 0.034641899168491364 2023-01-22 19:43:34.164034: step: 924/470, loss: 0.03633886203169823 2023-01-22 19:43:34.884569: step: 926/470, loss: 0.31999385356903076 2023-01-22 19:43:35.588567: step: 928/470, loss: 0.06611615419387817 2023-01-22 19:43:36.234468: step: 930/470, loss: 0.058460332453250885 2023-01-22 19:43:36.908288: step: 932/470, loss: 0.08226493000984192 2023-01-22 19:43:37.654060: step: 934/470, loss: 0.05760214105248451 2023-01-22 19:43:38.375980: step: 936/470, loss: 0.018661608919501305 2023-01-22 19:43:39.021879: step: 938/470, loss: 0.03469004109501839 2023-01-22 19:43:39.771635: step: 940/470, loss: 0.05704415589570999 2023-01-22 19:43:40.430838: step: 942/470, loss: 0.05449334532022476 ================================================== Loss: 0.096 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3092939210567948, 'r': 0.35331108249751514, 'f1': 0.32984046142814966}, 'combined': 0.24304033999968921, 'epoch': 22} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3275139861122523, 'r': 0.35869079825178396, 'f1': 0.3423941534482381}, 'combined': 0.23848348996394694, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30467303045542077, 'r': 0.3543919690117134, 'f1': 0.3276571362617069}, 'combined': 0.2414315740875735, 'epoch': 22} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3318889213155725, 'r': 0.3532496876250282, 'f1': 0.3422363190485383}, 'combined': 0.23837355555619585, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29258975351274114, 'r': 0.3586584075317472, 'f1': 0.3222727719850482}, 'combined': 0.23746414777845656, 'epoch': 22} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3165754369455721, 'r': 0.3649749508632124, 'f1': 0.3390566761033863}, 'combined': 0.23615887887798054, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.19827586206896552, 'r': 0.32857142857142857, 'f1': 0.24731182795698925}, 'combined': 0.16487455197132617, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26282051282051283, 'r': 0.44565217391304346, 'f1': 0.3306451612903226}, 'combined': 0.1653225806451613, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2980769230769231, 'r': 0.2672413793103448, 'f1': 0.2818181818181818}, 'combined': 0.18787878787878787, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 23 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 19:46:16.807902: step: 2/470, loss: 0.020145274698734283 2023-01-22 19:46:17.562321: step: 4/470, loss: 0.012752152979373932 2023-01-22 19:46:18.239854: step: 6/470, loss: 0.018607724457979202 2023-01-22 19:46:19.006521: step: 8/470, loss: 0.04674272611737251 2023-01-22 19:46:19.734098: step: 10/470, loss: 0.08120296895503998 2023-01-22 19:46:20.510665: step: 12/470, loss: 0.04648889973759651 2023-01-22 19:46:21.314670: step: 14/470, loss: 0.041562411934137344 2023-01-22 19:46:22.071135: step: 16/470, loss: 0.24655379354953766 2023-01-22 19:46:22.835889: step: 18/470, loss: 0.02498440630733967 2023-01-22 19:46:23.516957: step: 20/470, loss: 0.05179232358932495 2023-01-22 19:46:24.243234: step: 22/470, loss: 0.03058280237019062 2023-01-22 19:46:25.044101: step: 24/470, loss: 0.11315041780471802 2023-01-22 19:46:25.723821: step: 26/470, loss: 0.018697861582040787 2023-01-22 19:46:26.407226: step: 28/470, loss: 0.006492226850241423 2023-01-22 19:46:27.144811: step: 30/470, loss: 0.09545272588729858 2023-01-22 19:46:27.873719: step: 32/470, loss: 0.024350939318537712 2023-01-22 19:46:28.592232: step: 34/470, loss: 0.5242292881011963 2023-01-22 19:46:29.286774: step: 36/470, loss: 0.017941568046808243 2023-01-22 19:46:30.015848: step: 38/470, loss: 0.08385952562093735 2023-01-22 19:46:30.696085: step: 40/470, loss: 0.03444410860538483 2023-01-22 19:46:31.509403: step: 42/470, loss: 0.007343780715018511 2023-01-22 19:46:32.198649: step: 44/470, loss: 0.08139993250370026 2023-01-22 19:46:33.049699: step: 46/470, loss: 0.029229672625660896 2023-01-22 19:46:33.774933: step: 48/470, loss: 0.031573113054037094 2023-01-22 19:46:34.637110: step: 50/470, loss: 0.01538156345486641 2023-01-22 19:46:35.371991: step: 52/470, loss: 0.01159243006259203 2023-01-22 19:46:36.102544: step: 54/470, loss: 0.08330456912517548 2023-01-22 19:46:36.833225: step: 56/470, loss: 0.004855206701904535 2023-01-22 19:46:37.548040: step: 58/470, loss: 0.12275192141532898 2023-01-22 19:46:38.375413: step: 60/470, loss: 0.07747679203748703 2023-01-22 19:46:39.081237: step: 62/470, loss: 0.009875631891191006 2023-01-22 19:46:39.854354: step: 64/470, loss: 0.016967376694083214 2023-01-22 19:46:40.526947: step: 66/470, loss: 0.003874419489875436 2023-01-22 19:46:41.298024: step: 68/470, loss: 0.018619472160935402 2023-01-22 19:46:42.069959: step: 70/470, loss: 0.08524934202432632 2023-01-22 19:46:42.783965: step: 72/470, loss: 0.027295870706439018 2023-01-22 19:46:43.530182: step: 74/470, loss: 0.0906219631433487 2023-01-22 19:46:44.306810: step: 76/470, loss: 0.04819449409842491 2023-01-22 19:46:45.058151: step: 78/470, loss: 0.0014960899716243148 2023-01-22 19:46:45.787761: step: 80/470, loss: 0.05570714548230171 2023-01-22 19:46:46.565310: step: 82/470, loss: 0.0980103462934494 2023-01-22 19:46:47.336618: step: 84/470, loss: 0.014589948579668999 2023-01-22 19:46:48.118907: step: 86/470, loss: 0.05184318125247955 2023-01-22 19:46:48.988653: step: 88/470, loss: 0.11208540946245193 2023-01-22 19:46:49.819905: step: 90/470, loss: 0.5855203866958618 2023-01-22 19:46:50.523063: step: 92/470, loss: 0.028581831604242325 2023-01-22 19:46:51.212159: step: 94/470, loss: 0.11515840142965317 2023-01-22 19:46:51.877369: step: 96/470, loss: 0.013613392598927021 2023-01-22 19:46:52.662454: step: 98/470, loss: 0.030206475406885147 2023-01-22 19:46:53.386049: step: 100/470, loss: 0.028599973767995834 2023-01-22 19:46:54.185794: step: 102/470, loss: 0.03127244487404823 2023-01-22 19:46:54.878848: step: 104/470, loss: 0.06208731234073639 2023-01-22 19:46:55.594862: step: 106/470, loss: 0.03612957522273064 2023-01-22 19:46:56.341099: step: 108/470, loss: 0.010129529982805252 2023-01-22 19:46:57.066577: step: 110/470, loss: 0.007978275418281555 2023-01-22 19:46:57.835663: step: 112/470, loss: 0.153214231133461 2023-01-22 19:46:58.614559: step: 114/470, loss: 0.022448312491178513 2023-01-22 19:46:59.389976: step: 116/470, loss: 0.035694267600774765 2023-01-22 19:47:00.157768: step: 118/470, loss: 0.10163560509681702 2023-01-22 19:47:00.933158: step: 120/470, loss: 0.015968233346939087 2023-01-22 19:47:01.651762: step: 122/470, loss: 0.025551877915859222 2023-01-22 19:47:02.307112: step: 124/470, loss: 0.019368741661310196 2023-01-22 19:47:03.056347: step: 126/470, loss: 0.027455579489469528 2023-01-22 19:47:03.757125: step: 128/470, loss: 0.052771057933568954 2023-01-22 19:47:04.665082: step: 130/470, loss: 0.007331357337534428 2023-01-22 19:47:05.446451: step: 132/470, loss: 0.05879795923829079 2023-01-22 19:47:06.265561: step: 134/470, loss: 0.013023512437939644 2023-01-22 19:47:06.971147: step: 136/470, loss: 0.009905382990837097 2023-01-22 19:47:07.655578: step: 138/470, loss: 0.0017207978526130319 2023-01-22 19:47:08.381113: step: 140/470, loss: 0.04617958515882492 2023-01-22 19:47:09.059245: step: 142/470, loss: 0.009676694869995117 2023-01-22 19:47:09.850377: step: 144/470, loss: 0.10442635416984558 2023-01-22 19:47:10.542682: step: 146/470, loss: 0.03223101794719696 2023-01-22 19:47:11.214719: step: 148/470, loss: 0.17558103799819946 2023-01-22 19:47:11.880671: step: 150/470, loss: 0.007184363901615143 2023-01-22 19:47:12.569852: step: 152/470, loss: 0.016059819608926773 2023-01-22 19:47:13.347419: step: 154/470, loss: 0.017226694151759148 2023-01-22 19:47:14.201322: step: 156/470, loss: 0.054372578859329224 2023-01-22 19:47:14.933392: step: 158/470, loss: 0.09041217714548111 2023-01-22 19:47:15.608609: step: 160/470, loss: 0.010833369567990303 2023-01-22 19:47:16.324026: step: 162/470, loss: 0.007517059799283743 2023-01-22 19:47:17.026197: step: 164/470, loss: 0.029931971803307533 2023-01-22 19:47:17.766191: step: 166/470, loss: 0.029189355671405792 2023-01-22 19:47:18.521749: step: 168/470, loss: 0.03147708997130394 2023-01-22 19:47:19.293119: step: 170/470, loss: 0.023568544536828995 2023-01-22 19:47:20.110565: step: 172/470, loss: 0.011535699479281902 2023-01-22 19:47:20.879882: step: 174/470, loss: 0.05071718618273735 2023-01-22 19:47:21.656568: step: 176/470, loss: 0.043926727026700974 2023-01-22 19:47:22.435036: step: 178/470, loss: 0.019493265077471733 2023-01-22 19:47:23.095903: step: 180/470, loss: 0.09586971253156662 2023-01-22 19:47:23.766626: step: 182/470, loss: 0.0498097725212574 2023-01-22 19:47:24.596760: step: 184/470, loss: 0.07411568611860275 2023-01-22 19:47:25.369315: step: 186/470, loss: 0.014985108748078346 2023-01-22 19:47:26.106169: step: 188/470, loss: 0.02051725424826145 2023-01-22 19:47:26.833736: step: 190/470, loss: 0.022670986130833626 2023-01-22 19:47:27.574121: step: 192/470, loss: 0.033521879464387894 2023-01-22 19:47:28.286518: step: 194/470, loss: 0.000296061800327152 2023-01-22 19:47:29.158047: step: 196/470, loss: 0.025557244196534157 2023-01-22 19:47:29.901914: step: 198/470, loss: 0.00772892776876688 2023-01-22 19:47:30.737533: step: 200/470, loss: 0.035560574382543564 2023-01-22 19:47:31.454194: step: 202/470, loss: 0.007423603907227516 2023-01-22 19:47:32.289383: step: 204/470, loss: 0.017200924456119537 2023-01-22 19:47:33.041396: step: 206/470, loss: 0.44262224435806274 2023-01-22 19:47:33.856279: step: 208/470, loss: 0.03299145773053169 2023-01-22 19:47:34.549640: step: 210/470, loss: 0.0001655942905927077 2023-01-22 19:47:35.313769: step: 212/470, loss: 0.01341554056853056 2023-01-22 19:47:36.054516: step: 214/470, loss: 0.3039006292819977 2023-01-22 19:47:36.826758: step: 216/470, loss: 0.0034072119742631912 2023-01-22 19:47:37.567171: step: 218/470, loss: 0.25233790278434753 2023-01-22 19:47:38.358791: step: 220/470, loss: 0.0021255554165691137 2023-01-22 19:47:39.073866: step: 222/470, loss: 0.7128084301948547 2023-01-22 19:47:39.837181: step: 224/470, loss: 0.0008448681910522282 2023-01-22 19:47:40.590057: step: 226/470, loss: 0.01992366835474968 2023-01-22 19:47:41.330851: step: 228/470, loss: 0.04137638583779335 2023-01-22 19:47:42.152764: step: 230/470, loss: 0.024264074862003326 2023-01-22 19:47:42.820195: step: 232/470, loss: 0.010322164744138718 2023-01-22 19:47:43.525345: step: 234/470, loss: 0.00993278156965971 2023-01-22 19:47:44.196324: step: 236/470, loss: 0.0286688432097435 2023-01-22 19:47:45.078676: step: 238/470, loss: 0.039216842502355576 2023-01-22 19:47:45.788080: step: 240/470, loss: 0.12787392735481262 2023-01-22 19:47:46.497193: step: 242/470, loss: 0.1834522932767868 2023-01-22 19:47:47.208678: step: 244/470, loss: 0.12740002572536469 2023-01-22 19:47:48.035206: step: 246/470, loss: 0.03960498049855232 2023-01-22 19:47:48.760890: step: 248/470, loss: 0.03761105611920357 2023-01-22 19:47:49.547074: step: 250/470, loss: 0.004436446353793144 2023-01-22 19:47:50.254354: step: 252/470, loss: 0.009433421306312084 2023-01-22 19:47:50.975438: step: 254/470, loss: 0.04656631872057915 2023-01-22 19:47:51.768380: step: 256/470, loss: 0.0027728108689188957 2023-01-22 19:47:52.562650: step: 258/470, loss: 0.9103434085845947 2023-01-22 19:47:53.220284: step: 260/470, loss: 0.04946238175034523 2023-01-22 19:47:53.932763: step: 262/470, loss: 0.0284775011241436 2023-01-22 19:47:54.660078: step: 264/470, loss: 0.12191655486822128 2023-01-22 19:47:55.389993: step: 266/470, loss: 0.07705827057361603 2023-01-22 19:47:56.099519: step: 268/470, loss: 0.05367019772529602 2023-01-22 19:47:56.841282: step: 270/470, loss: 0.03151824325323105 2023-01-22 19:47:57.586091: step: 272/470, loss: 0.0288032628595829 2023-01-22 19:47:58.333223: step: 274/470, loss: 0.02198009565472603 2023-01-22 19:47:59.015080: step: 276/470, loss: 0.3013882637023926 2023-01-22 19:47:59.847165: step: 278/470, loss: 0.00810268521308899 2023-01-22 19:48:00.554123: step: 280/470, loss: 0.3207792043685913 2023-01-22 19:48:01.314817: step: 282/470, loss: 0.03247154504060745 2023-01-22 19:48:02.087481: step: 284/470, loss: 0.11384246498346329 2023-01-22 19:48:02.856346: step: 286/470, loss: 0.06521860510110855 2023-01-22 19:48:03.562304: step: 288/470, loss: 0.08328651636838913 2023-01-22 19:48:04.183031: step: 290/470, loss: 0.004215493332594633 2023-01-22 19:48:04.946712: step: 292/470, loss: 0.08901931345462799 2023-01-22 19:48:05.725064: step: 294/470, loss: 0.00959330890327692 2023-01-22 19:48:06.446141: step: 296/470, loss: 0.6423669457435608 2023-01-22 19:48:07.182041: step: 298/470, loss: 0.032605089247226715 2023-01-22 19:48:07.956976: step: 300/470, loss: 0.02665814571082592 2023-01-22 19:48:08.604922: step: 302/470, loss: 0.04625895991921425 2023-01-22 19:48:09.290028: step: 304/470, loss: 0.19270682334899902 2023-01-22 19:48:09.985934: step: 306/470, loss: 0.005112417042255402 2023-01-22 19:48:10.684058: step: 308/470, loss: 0.01898195967078209 2023-01-22 19:48:11.451107: step: 310/470, loss: 0.007997587323188782 2023-01-22 19:48:12.239389: step: 312/470, loss: 0.03887288644909859 2023-01-22 19:48:12.949178: step: 314/470, loss: 0.03566152974963188 2023-01-22 19:48:13.675159: step: 316/470, loss: 0.017126863822340965 2023-01-22 19:48:14.494300: step: 318/470, loss: 0.16232366859912872 2023-01-22 19:48:15.284522: step: 320/470, loss: 0.005544004961848259 2023-01-22 19:48:16.008642: step: 322/470, loss: 0.02310621552169323 2023-01-22 19:48:16.765980: step: 324/470, loss: 0.0022491663694381714 2023-01-22 19:48:17.436073: step: 326/470, loss: 0.02474634349346161 2023-01-22 19:48:18.203166: step: 328/470, loss: 0.04117470234632492 2023-01-22 19:48:18.870398: step: 330/470, loss: 0.41619208455085754 2023-01-22 19:48:19.608893: step: 332/470, loss: 0.006768751889467239 2023-01-22 19:48:20.281204: step: 334/470, loss: 0.5537669062614441 2023-01-22 19:48:21.007540: step: 336/470, loss: 0.023046918213367462 2023-01-22 19:48:21.758696: step: 338/470, loss: 0.03325394541025162 2023-01-22 19:48:22.513727: step: 340/470, loss: 0.04114500433206558 2023-01-22 19:48:23.316142: step: 342/470, loss: 0.028026726096868515 2023-01-22 19:48:23.998970: step: 344/470, loss: 0.09893575310707092 2023-01-22 19:48:24.738128: step: 346/470, loss: 0.018749456852674484 2023-01-22 19:48:25.463398: step: 348/470, loss: 0.05088183283805847 2023-01-22 19:48:26.180328: step: 350/470, loss: 0.0032982854172587395 2023-01-22 19:48:26.899064: step: 352/470, loss: 0.054463837295770645 2023-01-22 19:48:27.650856: step: 354/470, loss: 0.11088722199201584 2023-01-22 19:48:28.406794: step: 356/470, loss: 0.13968579471111298 2023-01-22 19:48:29.090293: step: 358/470, loss: 0.030351005494594574 2023-01-22 19:48:29.776000: step: 360/470, loss: 0.03071235679090023 2023-01-22 19:48:30.472805: step: 362/470, loss: 0.0850130170583725 2023-01-22 19:48:31.127392: step: 364/470, loss: 0.759006679058075 2023-01-22 19:48:31.788921: step: 366/470, loss: 0.06871728599071503 2023-01-22 19:48:32.530376: step: 368/470, loss: 0.077363520860672 2023-01-22 19:48:33.194618: step: 370/470, loss: 0.01663314364850521 2023-01-22 19:48:33.886343: step: 372/470, loss: 0.03362368047237396 2023-01-22 19:48:34.642631: step: 374/470, loss: 0.01191516499966383 2023-01-22 19:48:35.324185: step: 376/470, loss: 0.029850145801901817 2023-01-22 19:48:36.001128: step: 378/470, loss: 0.007099964190274477 2023-01-22 19:48:36.708404: step: 380/470, loss: 0.9981684684753418 2023-01-22 19:48:37.563047: step: 382/470, loss: 0.018105922266840935 2023-01-22 19:48:38.220007: step: 384/470, loss: 0.07014259696006775 2023-01-22 19:48:38.990081: step: 386/470, loss: 0.04365800693631172 2023-01-22 19:48:39.775289: step: 388/470, loss: 0.10529167205095291 2023-01-22 19:48:40.460887: step: 390/470, loss: 0.07534227520227432 2023-01-22 19:48:41.180545: step: 392/470, loss: 0.07272927463054657 2023-01-22 19:48:41.885209: step: 394/470, loss: 0.06634674966335297 2023-01-22 19:48:42.636892: step: 396/470, loss: 0.04313298314809799 2023-01-22 19:48:43.338824: step: 398/470, loss: 0.0057480488903820515 2023-01-22 19:48:44.069417: step: 400/470, loss: 0.04004475101828575 2023-01-22 19:48:44.812311: step: 402/470, loss: 0.11582747101783752 2023-01-22 19:48:45.530347: step: 404/470, loss: 0.028773313388228416 2023-01-22 19:48:46.292295: step: 406/470, loss: 0.0024830615147948265 2023-01-22 19:48:47.048904: step: 408/470, loss: 0.07367391884326935 2023-01-22 19:48:47.846624: step: 410/470, loss: 0.08376302570104599 2023-01-22 19:48:48.570204: step: 412/470, loss: 0.021925557404756546 2023-01-22 19:48:49.267829: step: 414/470, loss: 0.0081571564078331 2023-01-22 19:48:49.959487: step: 416/470, loss: 0.01600341498851776 2023-01-22 19:48:50.678321: step: 418/470, loss: 0.07978179305791855 2023-01-22 19:48:51.396289: step: 420/470, loss: 0.03541692718863487 2023-01-22 19:48:52.109080: step: 422/470, loss: 0.0004405932268127799 2023-01-22 19:48:52.850537: step: 424/470, loss: 0.04251012206077576 2023-01-22 19:48:53.688158: step: 426/470, loss: 0.026230430230498314 2023-01-22 19:48:54.382388: step: 428/470, loss: 0.034719884395599365 2023-01-22 19:48:55.086639: step: 430/470, loss: 0.7030810117721558 2023-01-22 19:48:55.783311: step: 432/470, loss: 0.03227429836988449 2023-01-22 19:48:56.501073: step: 434/470, loss: 0.013126318342983723 2023-01-22 19:48:57.194323: step: 436/470, loss: 0.024919021874666214 2023-01-22 19:48:57.981023: step: 438/470, loss: 0.26918357610702515 2023-01-22 19:48:58.694923: step: 440/470, loss: 0.215628519654274 2023-01-22 19:48:59.395899: step: 442/470, loss: 0.03114582598209381 2023-01-22 19:49:00.073675: step: 444/470, loss: 0.0005924082943238318 2023-01-22 19:49:00.828103: step: 446/470, loss: 0.30109506845474243 2023-01-22 19:49:01.552737: step: 448/470, loss: 0.34661105275154114 2023-01-22 19:49:02.234800: step: 450/470, loss: 0.016731295734643936 2023-01-22 19:49:02.935705: step: 452/470, loss: 0.048470642417669296 2023-01-22 19:49:03.696704: step: 454/470, loss: 0.05453205108642578 2023-01-22 19:49:04.457528: step: 456/470, loss: 0.05783369764685631 2023-01-22 19:49:05.148369: step: 458/470, loss: 0.03494717925786972 2023-01-22 19:49:05.832229: step: 460/470, loss: 0.032794974744319916 2023-01-22 19:49:06.524850: step: 462/470, loss: 0.0035079510416835546 2023-01-22 19:49:07.217773: step: 464/470, loss: 0.06600449979305267 2023-01-22 19:49:07.943557: step: 466/470, loss: 0.0054876902140676975 2023-01-22 19:49:08.686532: step: 468/470, loss: 0.14131823182106018 2023-01-22 19:49:09.475560: step: 470/470, loss: 0.027415527030825615 2023-01-22 19:49:10.230943: step: 472/470, loss: 0.014964505098760128 2023-01-22 19:49:10.943671: step: 474/470, loss: 0.06728798151016235 2023-01-22 19:49:11.656499: step: 476/470, loss: 0.007798313163220882 2023-01-22 19:49:12.481500: step: 478/470, loss: 0.0030685949604958296 2023-01-22 19:49:13.199477: step: 480/470, loss: 0.07129280269145966 2023-01-22 19:49:13.922156: step: 482/470, loss: 0.03215346857905388 2023-01-22 19:49:14.652360: step: 484/470, loss: 0.0043903798796236515 2023-01-22 19:49:15.370528: step: 486/470, loss: 0.10907711833715439 2023-01-22 19:49:16.079402: step: 488/470, loss: 0.030243542045354843 2023-01-22 19:49:16.795818: step: 490/470, loss: 0.721255362033844 2023-01-22 19:49:17.621065: step: 492/470, loss: 0.030622025951743126 2023-01-22 19:49:18.325198: step: 494/470, loss: 0.026004338636994362 2023-01-22 19:49:19.105691: step: 496/470, loss: 0.03940839692950249 2023-01-22 19:49:19.822761: step: 498/470, loss: 0.08969125896692276 2023-01-22 19:49:20.544460: step: 500/470, loss: 0.016034258529543877 2023-01-22 19:49:21.327406: step: 502/470, loss: 0.054923348128795624 2023-01-22 19:49:22.076765: step: 504/470, loss: 0.03527640178799629 2023-01-22 19:49:22.848353: step: 506/470, loss: 0.05074672773480415 2023-01-22 19:49:23.570321: step: 508/470, loss: 0.019439974799752235 2023-01-22 19:49:24.442137: step: 510/470, loss: 0.029996780678629875 2023-01-22 19:49:25.163735: step: 512/470, loss: 0.0256869625300169 2023-01-22 19:49:25.977480: step: 514/470, loss: 0.03297635540366173 2023-01-22 19:49:26.738129: step: 516/470, loss: 0.01902947574853897 2023-01-22 19:49:27.415133: step: 518/470, loss: 0.1115870252251625 2023-01-22 19:49:28.091389: step: 520/470, loss: 0.013269971124827862 2023-01-22 19:49:28.875328: step: 522/470, loss: 0.030053507536649704 2023-01-22 19:49:29.575186: step: 524/470, loss: 0.4772378206253052 2023-01-22 19:49:30.373455: step: 526/470, loss: 0.12607523798942566 2023-01-22 19:49:31.108853: step: 528/470, loss: 0.007882537320256233 2023-01-22 19:49:31.897646: step: 530/470, loss: 0.041834231466054916 2023-01-22 19:49:32.686998: step: 532/470, loss: 0.13057224452495575 2023-01-22 19:49:33.496732: step: 534/470, loss: 0.14410966634750366 2023-01-22 19:49:34.235200: step: 536/470, loss: 0.03726746886968613 2023-01-22 19:49:34.981072: step: 538/470, loss: 0.023791544139385223 2023-01-22 19:49:35.757489: step: 540/470, loss: 0.01803162507712841 2023-01-22 19:49:36.562249: step: 542/470, loss: 0.1559169441461563 2023-01-22 19:49:37.363799: step: 544/470, loss: 0.024006202816963196 2023-01-22 19:49:38.098454: step: 546/470, loss: 0.018065424636006355 2023-01-22 19:49:38.758327: step: 548/470, loss: 0.01143345795571804 2023-01-22 19:49:39.449523: step: 550/470, loss: 0.13216164708137512 2023-01-22 19:49:40.171369: step: 552/470, loss: 0.09048359096050262 2023-01-22 19:49:40.943549: step: 554/470, loss: 0.016017138957977295 2023-01-22 19:49:41.667429: step: 556/470, loss: 0.028317036107182503 2023-01-22 19:49:42.397878: step: 558/470, loss: 0.01729518733918667 2023-01-22 19:49:43.075971: step: 560/470, loss: 0.01721752993762493 2023-01-22 19:49:43.818894: step: 562/470, loss: 0.0444251224398613 2023-01-22 19:49:44.628145: step: 564/470, loss: 0.005927725229412317 2023-01-22 19:49:45.398314: step: 566/470, loss: 0.027946837246418 2023-01-22 19:49:46.127919: step: 568/470, loss: 0.020798763260245323 2023-01-22 19:49:46.881985: step: 570/470, loss: 0.020342588424682617 2023-01-22 19:49:47.644455: step: 572/470, loss: 0.015030240640044212 2023-01-22 19:49:48.367499: step: 574/470, loss: 0.03206343948841095 2023-01-22 19:49:49.156164: step: 576/470, loss: 0.022603966295719147 2023-01-22 19:49:49.856826: step: 578/470, loss: 0.015919381752610207 2023-01-22 19:49:50.697944: step: 580/470, loss: 0.04253147542476654 2023-01-22 19:49:51.411158: step: 582/470, loss: 0.009290401823818684 2023-01-22 19:49:52.274774: step: 584/470, loss: 0.12215080112218857 2023-01-22 19:49:52.949156: step: 586/470, loss: 0.07029620558023453 2023-01-22 19:49:53.675075: step: 588/470, loss: 0.09041906893253326 2023-01-22 19:49:54.446719: step: 590/470, loss: 0.029489658772945404 2023-01-22 19:49:55.160695: step: 592/470, loss: 0.06253940612077713 2023-01-22 19:49:55.919148: step: 594/470, loss: 0.009752373211085796 2023-01-22 19:49:56.606326: step: 596/470, loss: 0.04571554809808731 2023-01-22 19:49:57.421382: step: 598/470, loss: 0.038326047360897064 2023-01-22 19:49:58.105106: step: 600/470, loss: 0.014072294346988201 2023-01-22 19:49:58.918590: step: 602/470, loss: 0.12891465425491333 2023-01-22 19:49:59.617089: step: 604/470, loss: 0.08130002021789551 2023-01-22 19:50:00.369843: step: 606/470, loss: 0.015089713968336582 2023-01-22 19:50:01.167443: step: 608/470, loss: 0.04352075606584549 2023-01-22 19:50:01.917879: step: 610/470, loss: 0.13570909202098846 2023-01-22 19:50:02.625588: step: 612/470, loss: 0.04086529463529587 2023-01-22 19:50:03.436138: step: 614/470, loss: 1.2370469570159912 2023-01-22 19:50:04.166986: step: 616/470, loss: 0.02251041680574417 2023-01-22 19:50:04.952159: step: 618/470, loss: 0.04271961376070976 2023-01-22 19:50:05.711835: step: 620/470, loss: 0.03870357945561409 2023-01-22 19:50:06.436938: step: 622/470, loss: 0.0048131453804671764 2023-01-22 19:50:07.149664: step: 624/470, loss: 0.05446334183216095 2023-01-22 19:50:08.086126: step: 626/470, loss: 0.025662001222372055 2023-01-22 19:50:08.796457: step: 628/470, loss: 0.036178234964609146 2023-01-22 19:50:09.522597: step: 630/470, loss: 0.05717697739601135 2023-01-22 19:50:10.332636: step: 632/470, loss: 0.030181117355823517 2023-01-22 19:50:11.089353: step: 634/470, loss: 0.013264654204249382 2023-01-22 19:50:11.816468: step: 636/470, loss: 0.26037362217903137 2023-01-22 19:50:12.598981: step: 638/470, loss: 0.10497309267520905 2023-01-22 19:50:13.282154: step: 640/470, loss: 0.0047654202207922935 2023-01-22 19:50:13.985834: step: 642/470, loss: 0.04302401840686798 2023-01-22 19:50:14.672927: step: 644/470, loss: 0.06489899009466171 2023-01-22 19:50:15.449954: step: 646/470, loss: 0.18413802981376648 2023-01-22 19:50:16.169271: step: 648/470, loss: 0.8595938682556152 2023-01-22 19:50:16.925370: step: 650/470, loss: 0.005972123239189386 2023-01-22 19:50:17.590608: step: 652/470, loss: 0.0459543913602829 2023-01-22 19:50:18.364784: step: 654/470, loss: 0.0675494596362114 2023-01-22 19:50:19.194407: step: 656/470, loss: 0.053448911756277084 2023-01-22 19:50:19.905799: step: 658/470, loss: 0.009192129597067833 2023-01-22 19:50:20.642914: step: 660/470, loss: 0.02981734089553356 2023-01-22 19:50:21.373599: step: 662/470, loss: 0.1048010066151619 2023-01-22 19:50:22.117407: step: 664/470, loss: 0.09085634350776672 2023-01-22 19:50:22.903070: step: 666/470, loss: 0.026635007932782173 2023-01-22 19:50:23.596770: step: 668/470, loss: 0.0010363566689193249 2023-01-22 19:50:24.230908: step: 670/470, loss: 0.009339814074337482 2023-01-22 19:50:24.920099: step: 672/470, loss: 0.0018867823528125882 2023-01-22 19:50:25.631979: step: 674/470, loss: 0.2046578824520111 2023-01-22 19:50:26.393879: step: 676/470, loss: 0.06945720314979553 2023-01-22 19:50:27.209084: step: 678/470, loss: 0.003970560152083635 2023-01-22 19:50:27.915853: step: 680/470, loss: 0.01167396642267704 2023-01-22 19:50:28.597210: step: 682/470, loss: 0.035459429025650024 2023-01-22 19:50:29.299195: step: 684/470, loss: 0.024924641475081444 2023-01-22 19:50:29.939194: step: 686/470, loss: 0.013809597119688988 2023-01-22 19:50:30.739608: step: 688/470, loss: 0.1835506558418274 2023-01-22 19:50:31.492177: step: 690/470, loss: 0.022687483578920364 2023-01-22 19:50:32.232338: step: 692/470, loss: 0.008208868093788624 2023-01-22 19:50:32.899732: step: 694/470, loss: 0.010190270841121674 2023-01-22 19:50:33.678596: step: 696/470, loss: 0.035994742065668106 2023-01-22 19:50:34.443104: step: 698/470, loss: 0.037245918065309525 2023-01-22 19:50:35.213764: step: 700/470, loss: 0.10593031346797943 2023-01-22 19:50:35.937574: step: 702/470, loss: 0.0002846399147529155 2023-01-22 19:50:36.629613: step: 704/470, loss: 0.01668049953877926 2023-01-22 19:50:37.368319: step: 706/470, loss: 0.15217755734920502 2023-01-22 19:50:38.053602: step: 708/470, loss: 0.02269909158349037 2023-01-22 19:50:38.875929: step: 710/470, loss: 0.02121802605688572 2023-01-22 19:50:39.584156: step: 712/470, loss: 0.06413817405700684 2023-01-22 19:50:40.251343: step: 714/470, loss: 0.026240617036819458 2023-01-22 19:50:40.948659: step: 716/470, loss: 0.04450815171003342 2023-01-22 19:50:41.691060: step: 718/470, loss: 0.012417121790349483 2023-01-22 19:50:42.440852: step: 720/470, loss: 0.053188763558864594 2023-01-22 19:50:43.178130: step: 722/470, loss: 0.006177571602165699 2023-01-22 19:50:43.869826: step: 724/470, loss: 0.007825598120689392 2023-01-22 19:50:44.632819: step: 726/470, loss: 0.808793306350708 2023-01-22 19:50:45.314639: step: 728/470, loss: 0.021689899265766144 2023-01-22 19:50:46.063570: step: 730/470, loss: 0.015059007331728935 2023-01-22 19:50:46.873533: step: 732/470, loss: 0.14715488255023956 2023-01-22 19:50:47.578516: step: 734/470, loss: 0.008766661398112774 2023-01-22 19:50:48.351529: step: 736/470, loss: 0.07060243934392929 2023-01-22 19:50:49.174137: step: 738/470, loss: 0.05194990336894989 2023-01-22 19:50:49.895490: step: 740/470, loss: 0.1445123255252838 2023-01-22 19:50:50.623112: step: 742/470, loss: 0.020801223814487457 2023-01-22 19:50:51.327007: step: 744/470, loss: 0.010486302897334099 2023-01-22 19:50:52.008168: step: 746/470, loss: 0.037710774689912796 2023-01-22 19:50:52.783198: step: 748/470, loss: 0.20508471131324768 2023-01-22 19:50:53.493512: step: 750/470, loss: 0.047555048018693924 2023-01-22 19:50:54.249142: step: 752/470, loss: 0.020251981914043427 2023-01-22 19:50:55.039390: step: 754/470, loss: 0.09589096158742905 2023-01-22 19:50:55.840133: step: 756/470, loss: 0.030110789462924004 2023-01-22 19:50:56.496892: step: 758/470, loss: 0.012403149157762527 2023-01-22 19:50:57.280279: step: 760/470, loss: 0.017901351675391197 2023-01-22 19:50:58.035306: step: 762/470, loss: 0.049641214311122894 2023-01-22 19:50:58.804259: step: 764/470, loss: 0.09870140254497528 2023-01-22 19:50:59.546252: step: 766/470, loss: 0.03105722926557064 2023-01-22 19:51:00.311824: step: 768/470, loss: 0.026463506743311882 2023-01-22 19:51:00.999906: step: 770/470, loss: 0.010589334182441235 2023-01-22 19:51:01.799338: step: 772/470, loss: 0.033002715557813644 2023-01-22 19:51:02.567562: step: 774/470, loss: 0.25736182928085327 2023-01-22 19:51:03.349957: step: 776/470, loss: 0.025068465620279312 2023-01-22 19:51:04.043686: step: 778/470, loss: 0.334775447845459 2023-01-22 19:51:04.725461: step: 780/470, loss: 0.042881835252046585 2023-01-22 19:51:05.432215: step: 782/470, loss: 0.04640914872288704 2023-01-22 19:51:06.108430: step: 784/470, loss: 0.009364593774080276 2023-01-22 19:51:06.751464: step: 786/470, loss: 0.04730172082781792 2023-01-22 19:51:07.481152: step: 788/470, loss: 0.1467842310667038 2023-01-22 19:51:08.281332: step: 790/470, loss: 0.09448494762182236 2023-01-22 19:51:09.049184: step: 792/470, loss: 0.03505510836839676 2023-01-22 19:51:09.826742: step: 794/470, loss: 0.1019258052110672 2023-01-22 19:51:10.621615: step: 796/470, loss: 0.2154480367898941 2023-01-22 19:51:11.258697: step: 798/470, loss: 0.04859409108757973 2023-01-22 19:51:11.986230: step: 800/470, loss: 0.005880818236619234 2023-01-22 19:51:12.667472: step: 802/470, loss: 0.0315140001475811 2023-01-22 19:51:13.383238: step: 804/470, loss: 0.023350920528173447 2023-01-22 19:51:14.097503: step: 806/470, loss: 0.0514444075524807 2023-01-22 19:51:14.836306: step: 808/470, loss: 0.004267614334821701 2023-01-22 19:51:15.583946: step: 810/470, loss: 0.12531037628650665 2023-01-22 19:51:16.313449: step: 812/470, loss: 0.15931852161884308 2023-01-22 19:51:17.043529: step: 814/470, loss: 0.028155192732810974 2023-01-22 19:51:17.750166: step: 816/470, loss: 0.013806957751512527 2023-01-22 19:51:18.439565: step: 818/470, loss: 0.014839094132184982 2023-01-22 19:51:19.136092: step: 820/470, loss: 0.01098904013633728 2023-01-22 19:51:19.861353: step: 822/470, loss: 0.06523977220058441 2023-01-22 19:51:20.555695: step: 824/470, loss: 0.022953085601329803 2023-01-22 19:51:21.309027: step: 826/470, loss: 0.019507238641381264 2023-01-22 19:51:22.128005: step: 828/470, loss: 0.036449965089559555 2023-01-22 19:51:22.837009: step: 830/470, loss: 0.368724524974823 2023-01-22 19:51:23.650325: step: 832/470, loss: 0.007018078118562698 2023-01-22 19:51:24.374036: step: 834/470, loss: 0.023230871185660362 2023-01-22 19:51:25.045088: step: 836/470, loss: 0.0515056848526001 2023-01-22 19:51:25.798154: step: 838/470, loss: 0.12788495421409607 2023-01-22 19:51:26.578506: step: 840/470, loss: 0.0066448175348341465 2023-01-22 19:51:27.265833: step: 842/470, loss: 0.01157106552273035 2023-01-22 19:51:28.005598: step: 844/470, loss: 0.002934497781097889 2023-01-22 19:51:28.726949: step: 846/470, loss: 0.023657750338315964 2023-01-22 19:51:29.430272: step: 848/470, loss: 0.050684839487075806 2023-01-22 19:51:30.217300: step: 850/470, loss: 0.016548197716474533 2023-01-22 19:51:30.994838: step: 852/470, loss: 0.042522866278886795 2023-01-22 19:51:31.685947: step: 854/470, loss: 0.41254308819770813 2023-01-22 19:51:32.395627: step: 856/470, loss: 0.0507955327630043 2023-01-22 19:51:33.177054: step: 858/470, loss: 0.023885458707809448 2023-01-22 19:51:33.926295: step: 860/470, loss: 0.02228032425045967 2023-01-22 19:51:34.656913: step: 862/470, loss: 0.034615080803632736 2023-01-22 19:51:35.370873: step: 864/470, loss: 0.09413474798202515 2023-01-22 19:51:36.204589: step: 866/470, loss: 0.03352166339755058 2023-01-22 19:51:36.929254: step: 868/470, loss: 0.05351152643561363 2023-01-22 19:51:37.642027: step: 870/470, loss: 0.024592425674200058 2023-01-22 19:51:38.311288: step: 872/470, loss: 0.06424624472856522 2023-01-22 19:51:39.074079: step: 874/470, loss: 0.0035424302332103252 2023-01-22 19:51:39.784233: step: 876/470, loss: 0.003755184356123209 2023-01-22 19:51:40.524411: step: 878/470, loss: 0.08013807982206345 2023-01-22 19:51:41.289061: step: 880/470, loss: 0.03286373242735863 2023-01-22 19:51:42.012599: step: 882/470, loss: 0.04773545265197754 2023-01-22 19:51:42.731882: step: 884/470, loss: 0.053945161402225494 2023-01-22 19:51:43.439924: step: 886/470, loss: 0.1546299159526825 2023-01-22 19:51:44.186491: step: 888/470, loss: 0.01916525699198246 2023-01-22 19:51:44.873968: step: 890/470, loss: 0.06273111701011658 2023-01-22 19:51:45.557700: step: 892/470, loss: 0.08657549321651459 2023-01-22 19:51:46.299089: step: 894/470, loss: 0.10691189765930176 2023-01-22 19:51:47.010565: step: 896/470, loss: 0.0036090798676013947 2023-01-22 19:51:47.744114: step: 898/470, loss: 0.05727309733629227 2023-01-22 19:51:48.584673: step: 900/470, loss: 0.04444019868969917 2023-01-22 19:51:49.319198: step: 902/470, loss: 0.06358946114778519 2023-01-22 19:51:50.046801: step: 904/470, loss: 0.0498536042869091 2023-01-22 19:51:50.791931: step: 906/470, loss: 0.03799587860703468 2023-01-22 19:51:51.540456: step: 908/470, loss: 0.0830104649066925 2023-01-22 19:51:52.386222: step: 910/470, loss: 0.0941431000828743 2023-01-22 19:51:53.078670: step: 912/470, loss: 0.01036920491605997 2023-01-22 19:51:53.803801: step: 914/470, loss: 0.05386245623230934 2023-01-22 19:51:54.566184: step: 916/470, loss: 0.02375110797584057 2023-01-22 19:51:55.283065: step: 918/470, loss: 0.033866845071315765 2023-01-22 19:51:55.968090: step: 920/470, loss: 0.02378959022462368 2023-01-22 19:51:56.674941: step: 922/470, loss: 0.07435478270053864 2023-01-22 19:51:57.391220: step: 924/470, loss: 0.004783568903803825 2023-01-22 19:51:58.112151: step: 926/470, loss: 0.015685396268963814 2023-01-22 19:51:58.813234: step: 928/470, loss: 0.056146442890167236 2023-01-22 19:51:59.548529: step: 930/470, loss: 0.05725179240107536 2023-01-22 19:52:00.439709: step: 932/470, loss: 0.05904132127761841 2023-01-22 19:52:01.173584: step: 934/470, loss: 0.015635449439287186 2023-01-22 19:52:01.979774: step: 936/470, loss: 0.04005289822816849 2023-01-22 19:52:02.684942: step: 938/470, loss: 0.018923653289675713 2023-01-22 19:52:03.322610: step: 940/470, loss: 0.011358820833265781 2023-01-22 19:52:04.071638: step: 942/470, loss: 0.037756726145744324 ================================================== Loss: 0.075 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3127608825283244, 'r': 0.3317520556609741, 'f1': 0.32197667280540215}, 'combined': 0.23724596943555948, 'epoch': 23} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3318102481602698, 'r': 0.36977699770937766, 'f1': 0.34976632798340407}, 'combined': 0.24361833789888843, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29553775743707095, 'r': 0.32245580745031455, 'f1': 0.30841054541981083}, 'combined': 0.22724987557249218, 'epoch': 23} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3428697959566185, 'r': 0.3712224906222619, 'f1': 0.3564832781598822}, 'combined': 0.2482968106586244, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2902272727272727, 'r': 0.3331831119544592, 'f1': 0.31022526501766784}, 'combined': 0.22858703738143946, 'epoch': 23} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32936868566543354, 'r': 0.38352449840465386, 'f1': 0.3543895853761351}, 'combined': 0.24683851717740754, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23317307692307693, 'r': 0.3464285714285714, 'f1': 0.278735632183908}, 'combined': 0.185823754789272, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2948717948717949, 'r': 0.5, 'f1': 0.3709677419354839}, 'combined': 0.18548387096774194, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4605263157894737, 'r': 0.3017241379310345, 'f1': 0.3645833333333333}, 'combined': 0.24305555555555552, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 24 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 19:54:40.229341: step: 2/470, loss: 0.09071078896522522 2023-01-22 19:54:41.008997: step: 4/470, loss: 0.025325173512101173 2023-01-22 19:54:41.805530: step: 6/470, loss: 0.08036006987094879 2023-01-22 19:54:42.542003: step: 8/470, loss: 0.021646693348884583 2023-01-22 19:54:43.304232: step: 10/470, loss: 0.049812790006399155 2023-01-22 19:54:44.059712: step: 12/470, loss: 0.01448730006814003 2023-01-22 19:54:44.755314: step: 14/470, loss: 0.030710160732269287 2023-01-22 19:54:45.519827: step: 16/470, loss: 0.019698522984981537 2023-01-22 19:54:46.244285: step: 18/470, loss: 0.007313847541809082 2023-01-22 19:54:47.094986: step: 20/470, loss: 0.058628715574741364 2023-01-22 19:54:47.755199: step: 22/470, loss: 0.008414885960519314 2023-01-22 19:54:48.500594: step: 24/470, loss: 0.04240531101822853 2023-01-22 19:54:49.221190: step: 26/470, loss: 0.011772439815104008 2023-01-22 19:54:49.907291: step: 28/470, loss: 0.018150269985198975 2023-01-22 19:54:50.516711: step: 30/470, loss: 0.1334639936685562 2023-01-22 19:54:51.247882: step: 32/470, loss: 0.006515135522931814 2023-01-22 19:54:51.919098: step: 34/470, loss: 0.0021217716857790947 2023-01-22 19:54:52.654395: step: 36/470, loss: 0.10562937706708908 2023-01-22 19:54:53.477053: step: 38/470, loss: 0.012553170323371887 2023-01-22 19:54:54.159041: step: 40/470, loss: 0.020525362342596054 2023-01-22 19:54:55.025488: step: 42/470, loss: 0.10595296323299408 2023-01-22 19:54:55.702936: step: 44/470, loss: 0.08366550505161285 2023-01-22 19:54:56.374935: step: 46/470, loss: 0.020429229363799095 2023-01-22 19:54:57.071190: step: 48/470, loss: 0.024408716708421707 2023-01-22 19:54:57.732127: step: 50/470, loss: 0.03457806631922722 2023-01-22 19:54:58.461058: step: 52/470, loss: 0.06466998159885406 2023-01-22 19:54:59.193850: step: 54/470, loss: 0.009562093764543533 2023-01-22 19:54:59.932287: step: 56/470, loss: 1.4924793243408203 2023-01-22 19:55:00.667329: step: 58/470, loss: 0.04375853016972542 2023-01-22 19:55:01.460620: step: 60/470, loss: 0.0036672528367489576 2023-01-22 19:55:02.154944: step: 62/470, loss: 0.0689840093255043 2023-01-22 19:55:02.970698: step: 64/470, loss: 0.342257022857666 2023-01-22 19:55:03.677246: step: 66/470, loss: 0.10635102540254593 2023-01-22 19:55:04.317241: step: 68/470, loss: 0.026408610865473747 2023-01-22 19:55:05.022738: step: 70/470, loss: 0.051238052546978 2023-01-22 19:55:05.813972: step: 72/470, loss: 0.036443691700696945 2023-01-22 19:55:06.543456: step: 74/470, loss: 0.06277167797088623 2023-01-22 19:55:07.280709: step: 76/470, loss: 0.01571185700595379 2023-01-22 19:55:08.044151: step: 78/470, loss: 0.0491529181599617 2023-01-22 19:55:08.836999: step: 80/470, loss: 0.04565410315990448 2023-01-22 19:55:09.543885: step: 82/470, loss: 0.00350984581746161 2023-01-22 19:55:10.239625: step: 84/470, loss: 0.011095493100583553 2023-01-22 19:55:10.902728: step: 86/470, loss: 0.09822019934654236 2023-01-22 19:55:11.692413: step: 88/470, loss: 0.007752100471407175 2023-01-22 19:55:12.404292: step: 90/470, loss: 0.031544268131256104 2023-01-22 19:55:13.076106: step: 92/470, loss: 0.017505712807178497 2023-01-22 19:55:13.796308: step: 94/470, loss: 0.008974706754088402 2023-01-22 19:55:14.483314: step: 96/470, loss: 0.004836901556700468 2023-01-22 19:55:15.143585: step: 98/470, loss: 0.026484636589884758 2023-01-22 19:55:15.910534: step: 100/470, loss: 0.3591688275337219 2023-01-22 19:55:16.579849: step: 102/470, loss: 0.030483385547995567 2023-01-22 19:55:17.313710: step: 104/470, loss: 0.0007766528287902474 2023-01-22 19:55:17.997075: step: 106/470, loss: 0.015138084068894386 2023-01-22 19:55:18.786032: step: 108/470, loss: 0.02235487848520279 2023-01-22 19:55:19.617685: step: 110/470, loss: 0.20309732854366302 2023-01-22 19:55:20.323160: step: 112/470, loss: 0.17128710448741913 2023-01-22 19:55:21.114067: step: 114/470, loss: 0.032276567071676254 2023-01-22 19:55:21.798281: step: 116/470, loss: 0.02128906175494194 2023-01-22 19:55:22.505736: step: 118/470, loss: 0.002846565330401063 2023-01-22 19:55:23.266667: step: 120/470, loss: 0.5752863883972168 2023-01-22 19:55:23.951490: step: 122/470, loss: 0.0021211388520896435 2023-01-22 19:55:24.662839: step: 124/470, loss: 0.01313631609082222 2023-01-22 19:55:25.339181: step: 126/470, loss: 0.009652921929955482 2023-01-22 19:55:26.089851: step: 128/470, loss: 0.07296048104763031 2023-01-22 19:55:26.791851: step: 130/470, loss: 0.01581062190234661 2023-01-22 19:55:27.670132: step: 132/470, loss: 0.032663170248270035 2023-01-22 19:55:28.401964: step: 134/470, loss: 0.19434767961502075 2023-01-22 19:55:29.147076: step: 136/470, loss: 0.03558581322431564 2023-01-22 19:55:29.951971: step: 138/470, loss: 0.02339303307235241 2023-01-22 19:55:30.675223: step: 140/470, loss: 0.015170947648584843 2023-01-22 19:55:31.331376: step: 142/470, loss: 0.1154085025191307 2023-01-22 19:55:32.117211: step: 144/470, loss: 0.007798160891979933 2023-01-22 19:55:32.898101: step: 146/470, loss: 0.01072743721306324 2023-01-22 19:55:33.641468: step: 148/470, loss: 0.018375081941485405 2023-01-22 19:55:34.407597: step: 150/470, loss: 0.06826124340295792 2023-01-22 19:55:35.066806: step: 152/470, loss: 0.020997071638703346 2023-01-22 19:55:35.743470: step: 154/470, loss: 0.20793934166431427 2023-01-22 19:55:36.528835: step: 156/470, loss: 0.015171626582741737 2023-01-22 19:55:37.354938: step: 158/470, loss: 0.026798272505402565 2023-01-22 19:55:38.087133: step: 160/470, loss: 0.027822552248835564 2023-01-22 19:55:38.822957: step: 162/470, loss: 0.0003175846068188548 2023-01-22 19:55:39.605717: step: 164/470, loss: 0.007902979850769043 2023-01-22 19:55:40.405012: step: 166/470, loss: 0.034732285887002945 2023-01-22 19:55:41.119097: step: 168/470, loss: 0.18924480676651 2023-01-22 19:55:41.897668: step: 170/470, loss: 0.08738310635089874 2023-01-22 19:55:42.719287: step: 172/470, loss: 0.018750211223959923 2023-01-22 19:55:43.449150: step: 174/470, loss: 0.05455208197236061 2023-01-22 19:55:44.117321: step: 176/470, loss: 0.014304988086223602 2023-01-22 19:55:44.914749: step: 178/470, loss: 0.015335150994360447 2023-01-22 19:55:45.610193: step: 180/470, loss: 0.0569082647562027 2023-01-22 19:55:46.445183: step: 182/470, loss: 0.07909173518419266 2023-01-22 19:55:47.117336: step: 184/470, loss: 0.014027910307049751 2023-01-22 19:55:47.851270: step: 186/470, loss: 0.018430842086672783 2023-01-22 19:55:48.605402: step: 188/470, loss: 0.004810623824596405 2023-01-22 19:55:49.357853: step: 190/470, loss: 0.03506871312856674 2023-01-22 19:55:50.097556: step: 192/470, loss: 0.05087616667151451 2023-01-22 19:55:50.759918: step: 194/470, loss: 0.025519607588648796 2023-01-22 19:55:51.603248: step: 196/470, loss: 0.010457420721650124 2023-01-22 19:55:52.257971: step: 198/470, loss: 0.010845007374882698 2023-01-22 19:55:52.909456: step: 200/470, loss: 0.015168682672083378 2023-01-22 19:55:53.684294: step: 202/470, loss: 0.026464076712727547 2023-01-22 19:55:54.441763: step: 204/470, loss: 0.015966270118951797 2023-01-22 19:55:55.230093: step: 206/470, loss: 0.011198129504919052 2023-01-22 19:55:55.943674: step: 208/470, loss: 0.0012512399116531014 2023-01-22 19:55:56.656882: step: 210/470, loss: 0.024326816201210022 2023-01-22 19:55:57.358533: step: 212/470, loss: 0.03241521865129471 2023-01-22 19:55:58.096948: step: 214/470, loss: 0.08937463164329529 2023-01-22 19:55:58.862754: step: 216/470, loss: 7.647789607290179e-05 2023-01-22 19:55:59.604969: step: 218/470, loss: 0.09640821069478989 2023-01-22 19:56:00.314557: step: 220/470, loss: 0.051727913320064545 2023-01-22 19:56:01.011796: step: 222/470, loss: 0.25078266859054565 2023-01-22 19:56:01.760143: step: 224/470, loss: 0.001110507408156991 2023-01-22 19:56:02.575851: step: 226/470, loss: 0.023081980645656586 2023-01-22 19:56:03.275104: step: 228/470, loss: 0.2709239721298218 2023-01-22 19:56:03.940003: step: 230/470, loss: 0.012674129568040371 2023-01-22 19:56:04.795075: step: 232/470, loss: 0.010699287056922913 2023-01-22 19:56:05.572806: step: 234/470, loss: 0.007047413848340511 2023-01-22 19:56:06.316289: step: 236/470, loss: 0.011672616936266422 2023-01-22 19:56:07.093094: step: 238/470, loss: 0.013916079886257648 2023-01-22 19:56:07.802819: step: 240/470, loss: 0.18569129705429077 2023-01-22 19:56:08.483274: step: 242/470, loss: 0.011190307326614857 2023-01-22 19:56:09.253647: step: 244/470, loss: 0.09461580961942673 2023-01-22 19:56:10.006129: step: 246/470, loss: 0.03384866937994957 2023-01-22 19:56:10.675548: step: 248/470, loss: 0.004926465917378664 2023-01-22 19:56:11.422591: step: 250/470, loss: 0.017905734479427338 2023-01-22 19:56:12.188837: step: 252/470, loss: 2.1755669116973877 2023-01-22 19:56:12.906747: step: 254/470, loss: 0.05439945310354233 2023-01-22 19:56:13.641926: step: 256/470, loss: 0.12149098515510559 2023-01-22 19:56:14.374883: step: 258/470, loss: 0.012873425148427486 2023-01-22 19:56:15.201521: step: 260/470, loss: 0.08463416993618011 2023-01-22 19:56:15.940709: step: 262/470, loss: 0.06308569759130478 2023-01-22 19:56:16.729415: step: 264/470, loss: 0.0345967635512352 2023-01-22 19:56:17.410896: step: 266/470, loss: 0.03556538745760918 2023-01-22 19:56:18.206766: step: 268/470, loss: 0.036040298640728 2023-01-22 19:56:18.945329: step: 270/470, loss: 0.02199576050043106 2023-01-22 19:56:19.631398: step: 272/470, loss: 0.04857220500707626 2023-01-22 19:56:20.360781: step: 274/470, loss: 0.009337391704320908 2023-01-22 19:56:21.181624: step: 276/470, loss: 0.017865784466266632 2023-01-22 19:56:21.892917: step: 278/470, loss: 0.05819854885339737 2023-01-22 19:56:22.686729: step: 280/470, loss: 0.03172709047794342 2023-01-22 19:56:23.446107: step: 282/470, loss: 0.017821263521909714 2023-01-22 19:56:24.279026: step: 284/470, loss: 0.12068352103233337 2023-01-22 19:56:24.994759: step: 286/470, loss: 0.010906575247645378 2023-01-22 19:56:25.721720: step: 288/470, loss: 0.017595164477825165 2023-01-22 19:56:26.410535: step: 290/470, loss: 0.01801401562988758 2023-01-22 19:56:27.208862: step: 292/470, loss: 0.012894317507743835 2023-01-22 19:56:28.017970: step: 294/470, loss: 2.5075581073760986 2023-01-22 19:56:28.739997: step: 296/470, loss: 0.053497977554798126 2023-01-22 19:56:29.478686: step: 298/470, loss: 0.007422546856105328 2023-01-22 19:56:30.204484: step: 300/470, loss: 0.30072087049484253 2023-01-22 19:56:30.914684: step: 302/470, loss: 0.08959358930587769 2023-01-22 19:56:31.674153: step: 304/470, loss: 0.009266148321330547 2023-01-22 19:56:32.392886: step: 306/470, loss: 0.014265595935285091 2023-01-22 19:56:33.102451: step: 308/470, loss: 0.010140521451830864 2023-01-22 19:56:33.766861: step: 310/470, loss: 0.07897411286830902 2023-01-22 19:56:34.556415: step: 312/470, loss: 0.025504056364297867 2023-01-22 19:56:35.326081: step: 314/470, loss: 0.02470008283853531 2023-01-22 19:56:36.268828: step: 316/470, loss: 0.07380572706460953 2023-01-22 19:56:36.975607: step: 318/470, loss: 0.013543383218348026 2023-01-22 19:56:37.755777: step: 320/470, loss: 0.05348219349980354 2023-01-22 19:56:38.440233: step: 322/470, loss: 0.03541599214076996 2023-01-22 19:56:39.131443: step: 324/470, loss: 0.01773747242987156 2023-01-22 19:56:39.873740: step: 326/470, loss: 0.7491196990013123 2023-01-22 19:56:40.554594: step: 328/470, loss: 0.03136305883526802 2023-01-22 19:56:41.288410: step: 330/470, loss: 0.042486634105443954 2023-01-22 19:56:42.138844: step: 332/470, loss: 0.025926154106855392 2023-01-22 19:56:42.828122: step: 334/470, loss: 0.02339969575405121 2023-01-22 19:56:43.568383: step: 336/470, loss: 0.04502348601818085 2023-01-22 19:56:44.314655: step: 338/470, loss: 0.014835294336080551 2023-01-22 19:56:45.005666: step: 340/470, loss: 0.04183325543999672 2023-01-22 19:56:45.753347: step: 342/470, loss: 0.0881599634885788 2023-01-22 19:56:46.473044: step: 344/470, loss: 0.0676642656326294 2023-01-22 19:56:47.247662: step: 346/470, loss: 0.05381879583001137 2023-01-22 19:56:47.936444: step: 348/470, loss: 0.056068144738674164 2023-01-22 19:56:48.682412: step: 350/470, loss: 0.03975946456193924 2023-01-22 19:56:49.427964: step: 352/470, loss: 0.004915451630949974 2023-01-22 19:56:50.120820: step: 354/470, loss: 0.037539318203926086 2023-01-22 19:56:50.835214: step: 356/470, loss: 0.017015933990478516 2023-01-22 19:56:51.625012: step: 358/470, loss: 0.01815337873995304 2023-01-22 19:56:52.276292: step: 360/470, loss: 0.011578904464840889 2023-01-22 19:56:52.976244: step: 362/470, loss: 0.0952455922961235 2023-01-22 19:56:53.715386: step: 364/470, loss: 0.029356520622968674 2023-01-22 19:56:54.383576: step: 366/470, loss: 0.03291197121143341 2023-01-22 19:56:55.085741: step: 368/470, loss: 0.044890958815813065 2023-01-22 19:56:55.880581: step: 370/470, loss: 0.014463989064097404 2023-01-22 19:56:56.580064: step: 372/470, loss: 0.029671261087059975 2023-01-22 19:56:57.382976: step: 374/470, loss: 0.1202852725982666 2023-01-22 19:56:58.106776: step: 376/470, loss: 0.02309921756386757 2023-01-22 19:56:58.811731: step: 378/470, loss: 0.0008545005111955106 2023-01-22 19:56:59.581743: step: 380/470, loss: 0.02537860907614231 2023-01-22 19:57:00.310236: step: 382/470, loss: 0.01265999861061573 2023-01-22 19:57:01.058959: step: 384/470, loss: 0.06934566795825958 2023-01-22 19:57:01.704201: step: 386/470, loss: 0.04263785108923912 2023-01-22 19:57:02.355502: step: 388/470, loss: 0.055505167692899704 2023-01-22 19:57:03.092026: step: 390/470, loss: 0.05536120757460594 2023-01-22 19:57:03.885370: step: 392/470, loss: 0.01172667182981968 2023-01-22 19:57:04.638407: step: 394/470, loss: 0.13547594845294952 2023-01-22 19:57:05.388414: step: 396/470, loss: 0.0048871287144720554 2023-01-22 19:57:06.140946: step: 398/470, loss: 0.22317151725292206 2023-01-22 19:57:06.865291: step: 400/470, loss: 0.02091968059539795 2023-01-22 19:57:07.591176: step: 402/470, loss: 0.0756245031952858 2023-01-22 19:57:08.265985: step: 404/470, loss: 0.08365000039339066 2023-01-22 19:57:09.000391: step: 406/470, loss: 0.021144846454262733 2023-01-22 19:57:09.700663: step: 408/470, loss: 0.02719028852880001 2023-01-22 19:57:10.373223: step: 410/470, loss: 0.002817704575136304 2023-01-22 19:57:11.093439: step: 412/470, loss: 0.020920658484101295 2023-01-22 19:57:11.923855: step: 414/470, loss: 0.037627965211868286 2023-01-22 19:57:12.591560: step: 416/470, loss: 0.0015627248212695122 2023-01-22 19:57:13.291207: step: 418/470, loss: 0.027174735441803932 2023-01-22 19:57:14.001537: step: 420/470, loss: 0.016970563679933548 2023-01-22 19:57:14.754881: step: 422/470, loss: 0.006052455864846706 2023-01-22 19:57:15.424135: step: 424/470, loss: 0.14455130696296692 2023-01-22 19:57:16.132196: step: 426/470, loss: 0.05896090343594551 2023-01-22 19:57:16.871270: step: 428/470, loss: 0.016825811937451363 2023-01-22 19:57:17.666002: step: 430/470, loss: 0.009072363376617432 2023-01-22 19:57:18.436040: step: 432/470, loss: 0.004168605897575617 2023-01-22 19:57:19.205023: step: 434/470, loss: 0.05164012685418129 2023-01-22 19:57:19.953853: step: 436/470, loss: 0.1026514321565628 2023-01-22 19:57:20.636125: step: 438/470, loss: 0.0008245318895205855 2023-01-22 19:57:21.330762: step: 440/470, loss: 0.001791208516806364 2023-01-22 19:57:22.061744: step: 442/470, loss: 0.042904458940029144 2023-01-22 19:57:22.828320: step: 444/470, loss: 0.007724056486040354 2023-01-22 19:57:23.556504: step: 446/470, loss: 0.043044883757829666 2023-01-22 19:57:24.265978: step: 448/470, loss: 0.011818325147032738 2023-01-22 19:57:24.981607: step: 450/470, loss: 0.061016157269477844 2023-01-22 19:57:25.755303: step: 452/470, loss: 0.06787383556365967 2023-01-22 19:57:26.505534: step: 454/470, loss: 0.03366108238697052 2023-01-22 19:57:27.324450: step: 456/470, loss: 0.042890019714832306 2023-01-22 19:57:28.070846: step: 458/470, loss: 0.0524488128721714 2023-01-22 19:57:28.853456: step: 460/470, loss: 0.013294907286763191 2023-01-22 19:57:29.579245: step: 462/470, loss: 0.05481591448187828 2023-01-22 19:57:30.266846: step: 464/470, loss: 0.01968459226191044 2023-01-22 19:57:31.041947: step: 466/470, loss: 0.011313038878142834 2023-01-22 19:57:31.919974: step: 468/470, loss: 0.18432138860225677 2023-01-22 19:57:32.632017: step: 470/470, loss: 0.04200728237628937 2023-01-22 19:57:33.437219: step: 472/470, loss: 0.03416682779788971 2023-01-22 19:57:34.153720: step: 474/470, loss: 0.013105719350278378 2023-01-22 19:57:34.882619: step: 476/470, loss: 0.008894217200577259 2023-01-22 19:57:35.598689: step: 478/470, loss: 0.03804539516568184 2023-01-22 19:57:36.239777: step: 480/470, loss: 0.0294265728443861 2023-01-22 19:57:36.961312: step: 482/470, loss: 0.021114999428391457 2023-01-22 19:57:37.681293: step: 484/470, loss: 0.049864597618579865 2023-01-22 19:57:38.448701: step: 486/470, loss: 0.021185453981161118 2023-01-22 19:57:39.175508: step: 488/470, loss: 0.0015564959030598402 2023-01-22 19:57:39.807089: step: 490/470, loss: 0.02223014645278454 2023-01-22 19:57:40.556948: step: 492/470, loss: 0.02108120173215866 2023-01-22 19:57:41.266127: step: 494/470, loss: 0.10416891425848007 2023-01-22 19:57:41.964884: step: 496/470, loss: 0.04583205655217171 2023-01-22 19:57:42.608338: step: 498/470, loss: 0.02937087044119835 2023-01-22 19:57:43.398565: step: 500/470, loss: 0.00716931140050292 2023-01-22 19:57:44.217453: step: 502/470, loss: 0.01689540222287178 2023-01-22 19:57:44.919855: step: 504/470, loss: 0.029801655560731888 2023-01-22 19:57:45.603431: step: 506/470, loss: 0.05905457213521004 2023-01-22 19:57:46.310792: step: 508/470, loss: 0.11677312850952148 2023-01-22 19:57:47.151944: step: 510/470, loss: 0.018942374736070633 2023-01-22 19:57:47.860378: step: 512/470, loss: 0.00495618861168623 2023-01-22 19:57:48.648251: step: 514/470, loss: 0.0023889688309282064 2023-01-22 19:57:49.383704: step: 516/470, loss: 0.07100418955087662 2023-01-22 19:57:50.128410: step: 518/470, loss: 0.021969538182020187 2023-01-22 19:57:50.905192: step: 520/470, loss: 0.05502269044518471 2023-01-22 19:57:51.604963: step: 522/470, loss: 0.15007732808589935 2023-01-22 19:57:52.318586: step: 524/470, loss: 0.11543755233287811 2023-01-22 19:57:53.066906: step: 526/470, loss: 0.051974616944789886 2023-01-22 19:57:53.791348: step: 528/470, loss: 0.04661441594362259 2023-01-22 19:57:54.519135: step: 530/470, loss: 0.010496355593204498 2023-01-22 19:57:55.266332: step: 532/470, loss: 0.022741319611668587 2023-01-22 19:57:55.997789: step: 534/470, loss: 0.0360884964466095 2023-01-22 19:57:56.614845: step: 536/470, loss: 0.28176605701446533 2023-01-22 19:57:57.357890: step: 538/470, loss: 0.000371862348401919 2023-01-22 19:57:58.129467: step: 540/470, loss: 0.016311364248394966 2023-01-22 19:57:58.817171: step: 542/470, loss: 0.6123561859130859 2023-01-22 19:57:59.529420: step: 544/470, loss: 0.06656290590763092 2023-01-22 19:58:00.329052: step: 546/470, loss: 0.00262643164023757 2023-01-22 19:58:01.166756: step: 548/470, loss: 0.07897511124610901 2023-01-22 19:58:01.906940: step: 550/470, loss: 0.06647037714719772 2023-01-22 19:58:02.581419: step: 552/470, loss: 0.06914768368005753 2023-01-22 19:58:03.309819: step: 554/470, loss: 0.023323729634284973 2023-01-22 19:58:04.034256: step: 556/470, loss: 0.05517176166176796 2023-01-22 19:58:04.807193: step: 558/470, loss: 0.01975119113922119 2023-01-22 19:58:05.514720: step: 560/470, loss: 0.010775907896459103 2023-01-22 19:58:06.250827: step: 562/470, loss: 0.06182064861059189 2023-01-22 19:58:06.888628: step: 564/470, loss: 0.0013573778560385108 2023-01-22 19:58:07.606092: step: 566/470, loss: 0.026441359892487526 2023-01-22 19:58:08.320684: step: 568/470, loss: 0.03428468853235245 2023-01-22 19:58:09.123829: step: 570/470, loss: 0.015539498068392277 2023-01-22 19:58:09.936214: step: 572/470, loss: 0.038939252495765686 2023-01-22 19:58:10.624730: step: 574/470, loss: 0.02000570483505726 2023-01-22 19:58:11.302225: step: 576/470, loss: 0.01621418446302414 2023-01-22 19:58:12.051973: step: 578/470, loss: 0.08785783499479294 2023-01-22 19:58:12.817029: step: 580/470, loss: 0.10585880279541016 2023-01-22 19:58:13.557509: step: 582/470, loss: 0.037938281893730164 2023-01-22 19:58:14.262846: step: 584/470, loss: 0.0022830679081380367 2023-01-22 19:58:15.007104: step: 586/470, loss: 0.007178565952926874 2023-01-22 19:58:15.812236: step: 588/470, loss: 0.009578043594956398 2023-01-22 19:58:16.586558: step: 590/470, loss: 0.06791942566633224 2023-01-22 19:58:17.427036: step: 592/470, loss: 0.3170134127140045 2023-01-22 19:58:18.048049: step: 594/470, loss: 0.00753396563231945 2023-01-22 19:58:18.781324: step: 596/470, loss: 0.04484763741493225 2023-01-22 19:58:19.510879: step: 598/470, loss: 0.013004643842577934 2023-01-22 19:58:20.226314: step: 600/470, loss: 0.02657913789153099 2023-01-22 19:58:20.982042: step: 602/470, loss: 0.0408782996237278 2023-01-22 19:58:21.732527: step: 604/470, loss: 0.012685599736869335 2023-01-22 19:58:22.478124: step: 606/470, loss: 0.003009774489328265 2023-01-22 19:58:23.200359: step: 608/470, loss: 0.2504468560218811 2023-01-22 19:58:23.856091: step: 610/470, loss: 0.0925365537405014 2023-01-22 19:58:24.584755: step: 612/470, loss: 0.018450234085321426 2023-01-22 19:58:25.333411: step: 614/470, loss: 0.04527146741747856 2023-01-22 19:58:26.039717: step: 616/470, loss: 0.03232667222619057 2023-01-22 19:58:26.727847: step: 618/470, loss: 0.037647586315870285 2023-01-22 19:58:27.501020: step: 620/470, loss: 0.02571326307952404 2023-01-22 19:58:28.285780: step: 622/470, loss: 0.054269880056381226 2023-01-22 19:58:29.029544: step: 624/470, loss: 0.028405921533703804 2023-01-22 19:58:29.731059: step: 626/470, loss: 0.020112626254558563 2023-01-22 19:58:30.500944: step: 628/470, loss: 0.005061836447566748 2023-01-22 19:58:31.216152: step: 630/470, loss: 0.14112667739391327 2023-01-22 19:58:31.935517: step: 632/470, loss: 0.006791027262806892 2023-01-22 19:58:32.641640: step: 634/470, loss: 0.009881870821118355 2023-01-22 19:58:33.375798: step: 636/470, loss: 0.06305323541164398 2023-01-22 19:58:34.088715: step: 638/470, loss: 0.05328913778066635 2023-01-22 19:58:34.765981: step: 640/470, loss: 0.002296677092090249 2023-01-22 19:58:35.507978: step: 642/470, loss: 0.007883894257247448 2023-01-22 19:58:36.334863: step: 644/470, loss: 0.05610523000359535 2023-01-22 19:58:37.065237: step: 646/470, loss: 0.022641412913799286 2023-01-22 19:58:37.736262: step: 648/470, loss: 0.025629589334130287 2023-01-22 19:58:38.489564: step: 650/470, loss: 0.047313909977674484 2023-01-22 19:58:39.158345: step: 652/470, loss: 0.02791016362607479 2023-01-22 19:58:39.847583: step: 654/470, loss: 0.01351831667125225 2023-01-22 19:58:40.682736: step: 656/470, loss: 0.01588682271540165 2023-01-22 19:58:41.369918: step: 658/470, loss: 0.04910393804311752 2023-01-22 19:58:42.190264: step: 660/470, loss: 0.11366312205791473 2023-01-22 19:58:42.924350: step: 662/470, loss: 0.12863023579120636 2023-01-22 19:58:43.569071: step: 664/470, loss: 0.09127775579690933 2023-01-22 19:58:44.334207: step: 666/470, loss: 0.21341535449028015 2023-01-22 19:58:45.066958: step: 668/470, loss: 0.015620485879480839 2023-01-22 19:58:45.763726: step: 670/470, loss: 0.03588823601603508 2023-01-22 19:58:46.475714: step: 672/470, loss: 0.025998149067163467 2023-01-22 19:58:47.283605: step: 674/470, loss: 0.06447438150644302 2023-01-22 19:58:47.934834: step: 676/470, loss: 0.06617400795221329 2023-01-22 19:58:48.647140: step: 678/470, loss: 0.2020503729581833 2023-01-22 19:58:49.441578: step: 680/470, loss: 0.03483056649565697 2023-01-22 19:58:50.171673: step: 682/470, loss: 0.017954112961888313 2023-01-22 19:58:50.897237: step: 684/470, loss: 0.01776493713259697 2023-01-22 19:58:51.601007: step: 686/470, loss: 0.018190190196037292 2023-01-22 19:58:52.358894: step: 688/470, loss: 0.0707760602235794 2023-01-22 19:58:53.117469: step: 690/470, loss: 0.06905476748943329 2023-01-22 19:58:53.970056: step: 692/470, loss: 0.08601033687591553 2023-01-22 19:58:54.737304: step: 694/470, loss: 0.007111764047294855 2023-01-22 19:58:55.463558: step: 696/470, loss: 0.026374198496341705 2023-01-22 19:58:56.162025: step: 698/470, loss: 0.03967350348830223 2023-01-22 19:58:56.915761: step: 700/470, loss: 0.03607969358563423 2023-01-22 19:58:57.626334: step: 702/470, loss: 0.017128845676779747 2023-01-22 19:58:58.363601: step: 704/470, loss: 0.04204748570919037 2023-01-22 19:58:59.191487: step: 706/470, loss: 0.17484718561172485 2023-01-22 19:58:59.917371: step: 708/470, loss: 0.0665166974067688 2023-01-22 19:59:00.549197: step: 710/470, loss: 0.03982820361852646 2023-01-22 19:59:01.384937: step: 712/470, loss: 0.015020914375782013 2023-01-22 19:59:02.079404: step: 714/470, loss: 0.014696701429784298 2023-01-22 19:59:02.842580: step: 716/470, loss: 0.2812211513519287 2023-01-22 19:59:03.574246: step: 718/470, loss: 2.2691924571990967 2023-01-22 19:59:04.368338: step: 720/470, loss: 0.06255054473876953 2023-01-22 19:59:05.151238: step: 722/470, loss: 0.025868145748972893 2023-01-22 19:59:05.904240: step: 724/470, loss: 0.0933215320110321 2023-01-22 19:59:06.629418: step: 726/470, loss: 0.156637504696846 2023-01-22 19:59:07.332374: step: 728/470, loss: 0.11742421239614487 2023-01-22 19:59:08.059243: step: 730/470, loss: 0.09174972772598267 2023-01-22 19:59:08.844087: step: 732/470, loss: 0.04268868640065193 2023-01-22 19:59:09.663391: step: 734/470, loss: 0.07636651396751404 2023-01-22 19:59:10.385503: step: 736/470, loss: 0.013568645343184471 2023-01-22 19:59:11.099377: step: 738/470, loss: 0.03813782334327698 2023-01-22 19:59:11.806486: step: 740/470, loss: 0.019819870591163635 2023-01-22 19:59:12.602217: step: 742/470, loss: 0.04321083426475525 2023-01-22 19:59:13.325795: step: 744/470, loss: 0.006098398473113775 2023-01-22 19:59:14.031538: step: 746/470, loss: 0.022449664771556854 2023-01-22 19:59:14.792787: step: 748/470, loss: 0.05610019341111183 2023-01-22 19:59:15.462287: step: 750/470, loss: 0.05538138747215271 2023-01-22 19:59:16.186513: step: 752/470, loss: 0.03515056148171425 2023-01-22 19:59:16.900483: step: 754/470, loss: 0.06996199488639832 2023-01-22 19:59:17.647898: step: 756/470, loss: 0.010419724509119987 2023-01-22 19:59:18.419828: step: 758/470, loss: 0.02060377597808838 2023-01-22 19:59:19.197476: step: 760/470, loss: 0.021778682246804237 2023-01-22 19:59:19.930111: step: 762/470, loss: 0.14664672315120697 2023-01-22 19:59:20.630653: step: 764/470, loss: 0.04197612777352333 2023-01-22 19:59:21.258375: step: 766/470, loss: 0.00862900447100401 2023-01-22 19:59:22.023815: step: 768/470, loss: 0.01935235597193241 2023-01-22 19:59:22.717485: step: 770/470, loss: 0.0040095048025250435 2023-01-22 19:59:23.421947: step: 772/470, loss: 0.03624457120895386 2023-01-22 19:59:24.144026: step: 774/470, loss: 0.10813373327255249 2023-01-22 19:59:24.871586: step: 776/470, loss: 0.019472643733024597 2023-01-22 19:59:25.645234: step: 778/470, loss: 0.03459063544869423 2023-01-22 19:59:26.390464: step: 780/470, loss: 0.055417634546756744 2023-01-22 19:59:27.153033: step: 782/470, loss: 0.0014409045688807964 2023-01-22 19:59:27.846036: step: 784/470, loss: 0.029017573222517967 2023-01-22 19:59:28.612616: step: 786/470, loss: 0.04178899899125099 2023-01-22 19:59:29.349514: step: 788/470, loss: 0.18817931413650513 2023-01-22 19:59:30.050041: step: 790/470, loss: 1.237821102142334 2023-01-22 19:59:30.890276: step: 792/470, loss: 0.05888526514172554 2023-01-22 19:59:31.595571: step: 794/470, loss: 0.04464132338762283 2023-01-22 19:59:32.330488: step: 796/470, loss: 0.03926714509725571 2023-01-22 19:59:33.045390: step: 798/470, loss: 0.003827402601018548 2023-01-22 19:59:33.724793: step: 800/470, loss: 0.02599688246846199 2023-01-22 19:59:34.451395: step: 802/470, loss: 0.03956460580229759 2023-01-22 19:59:35.179761: step: 804/470, loss: 0.025580542162060738 2023-01-22 19:59:35.973495: step: 806/470, loss: 0.07505716383457184 2023-01-22 19:59:36.678671: step: 808/470, loss: 0.019929545000195503 2023-01-22 19:59:37.382989: step: 810/470, loss: 0.04150541499257088 2023-01-22 19:59:38.248326: step: 812/470, loss: 0.2298429161310196 2023-01-22 19:59:39.009000: step: 814/470, loss: 0.14862318336963654 2023-01-22 19:59:39.737248: step: 816/470, loss: 0.4220031499862671 2023-01-22 19:59:40.448213: step: 818/470, loss: 0.018468687310814857 2023-01-22 19:59:41.100148: step: 820/470, loss: 0.08981668949127197 2023-01-22 19:59:41.913467: step: 822/470, loss: 0.08471503853797913 2023-01-22 19:59:42.654875: step: 824/470, loss: 0.021649127826094627 2023-01-22 19:59:43.477629: step: 826/470, loss: 0.2502903342247009 2023-01-22 19:59:44.273044: step: 828/470, loss: 0.01829441450536251 2023-01-22 19:59:44.933804: step: 830/470, loss: 0.011802778579294682 2023-01-22 19:59:45.655682: step: 832/470, loss: 0.14194156229496002 2023-01-22 19:59:46.361886: step: 834/470, loss: 0.007301392499357462 2023-01-22 19:59:47.079288: step: 836/470, loss: 0.012469622306525707 2023-01-22 19:59:47.749385: step: 838/470, loss: 0.07566139101982117 2023-01-22 19:59:48.448816: step: 840/470, loss: 0.033544205129146576 2023-01-22 19:59:49.228554: step: 842/470, loss: 0.6390538215637207 2023-01-22 19:59:50.044164: step: 844/470, loss: 0.0382160022854805 2023-01-22 19:59:50.704385: step: 846/470, loss: 0.1456291675567627 2023-01-22 19:59:51.460631: step: 848/470, loss: 0.019455960020422935 2023-01-22 19:59:52.210850: step: 850/470, loss: 0.05021538957953453 2023-01-22 19:59:52.983666: step: 852/470, loss: 0.03165812790393829 2023-01-22 19:59:53.671877: step: 854/470, loss: 0.03875069320201874 2023-01-22 19:59:54.354935: step: 856/470, loss: 0.07675700634717941 2023-01-22 19:59:55.145083: step: 858/470, loss: 0.0549500472843647 2023-01-22 19:59:55.862628: step: 860/470, loss: 0.07575095444917679 2023-01-22 19:59:56.571108: step: 862/470, loss: 0.01323747355490923 2023-01-22 19:59:57.369008: step: 864/470, loss: 0.19846421480178833 2023-01-22 19:59:58.156884: step: 866/470, loss: 0.0069488100707530975 2023-01-22 19:59:58.844205: step: 868/470, loss: 0.09136679023504257 2023-01-22 19:59:59.589516: step: 870/470, loss: 0.027809590101242065 2023-01-22 20:00:00.316936: step: 872/470, loss: 0.003995862323790789 2023-01-22 20:00:00.988186: step: 874/470, loss: 0.010341154411435127 2023-01-22 20:00:01.761407: step: 876/470, loss: 0.014673410914838314 2023-01-22 20:00:02.491782: step: 878/470, loss: 0.035946592688560486 2023-01-22 20:00:03.240151: step: 880/470, loss: 0.034462690353393555 2023-01-22 20:00:03.943592: step: 882/470, loss: 0.0298713780939579 2023-01-22 20:00:04.699758: step: 884/470, loss: 0.01902618259191513 2023-01-22 20:00:05.429454: step: 886/470, loss: 0.007162892259657383 2023-01-22 20:00:06.178598: step: 888/470, loss: 0.024051036685705185 2023-01-22 20:00:06.938687: step: 890/470, loss: 0.014205212704837322 2023-01-22 20:00:07.761297: step: 892/470, loss: 0.0005725105293095112 2023-01-22 20:00:08.547118: step: 894/470, loss: 0.04063863679766655 2023-01-22 20:00:09.302603: step: 896/470, loss: 0.11876030266284943 2023-01-22 20:00:10.104592: step: 898/470, loss: 0.012916138395667076 2023-01-22 20:00:10.901258: step: 900/470, loss: 0.013642225414514542 2023-01-22 20:00:11.703699: step: 902/470, loss: 0.022717280313372612 2023-01-22 20:00:12.437887: step: 904/470, loss: 0.4222767651081085 2023-01-22 20:00:13.133068: step: 906/470, loss: 0.013618819415569305 2023-01-22 20:00:13.886215: step: 908/470, loss: 0.061890263110399246 2023-01-22 20:00:14.644579: step: 910/470, loss: 0.04235636442899704 2023-01-22 20:00:15.403890: step: 912/470, loss: 0.0523671992123127 2023-01-22 20:00:16.237381: step: 914/470, loss: 0.0566101111471653 2023-01-22 20:00:16.952370: step: 916/470, loss: 0.07533188164234161 2023-01-22 20:00:17.711256: step: 918/470, loss: 0.019266333431005478 2023-01-22 20:00:18.449499: step: 920/470, loss: 0.008472178131341934 2023-01-22 20:00:19.143528: step: 922/470, loss: 0.03950265422463417 2023-01-22 20:00:19.829329: step: 924/470, loss: 0.02345268428325653 2023-01-22 20:00:20.625896: step: 926/470, loss: 0.015790196135640144 2023-01-22 20:00:21.321952: step: 928/470, loss: 0.027424369007349014 2023-01-22 20:00:22.126605: step: 930/470, loss: 0.1694997251033783 2023-01-22 20:00:22.854895: step: 932/470, loss: 0.04345071688294411 2023-01-22 20:00:23.623580: step: 934/470, loss: 0.029627729207277298 2023-01-22 20:00:24.328161: step: 936/470, loss: 1.4033803939819336 2023-01-22 20:00:25.043331: step: 938/470, loss: 0.07272469252347946 2023-01-22 20:00:25.781309: step: 940/470, loss: 0.07386843860149384 2023-01-22 20:00:26.432530: step: 942/470, loss: 0.00953019130975008 ================================================== Loss: 0.078 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2951851081530782, 'r': 0.3366342504743833, 'f1': 0.3145500886524823}, 'combined': 0.23177374953340799, 'epoch': 24} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.33448411009531387, 'r': 0.36857575977810547, 'f1': 0.35070337618410763}, 'combined': 0.24427100828743817, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29017783486930493, 'r': 0.3353288452284757, 'f1': 0.3111237701327583}, 'combined': 0.22924909378203243, 'epoch': 24} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3305422989570629, 'r': 0.36232521231831893, 'f1': 0.3457047897349098}, 'combined': 0.24078940578550934, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2818378552971576, 'r': 0.3449438646426312, 'f1': 0.3102140216154721}, 'combined': 0.22857875276929523, 'epoch': 24} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3271542618069103, 'r': 0.3778002581058647, 'f1': 0.3506579816422127}, 'combined': 0.24423939019855612, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20227272727272727, 'r': 0.31785714285714284, 'f1': 0.24722222222222223}, 'combined': 0.1648148148148148, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23333333333333334, 'r': 0.45652173913043476, 'f1': 0.3088235294117647}, 'combined': 0.15441176470588236, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4605263157894737, 'r': 0.3017241379310345, 'f1': 0.3645833333333333}, 'combined': 0.24305555555555552, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 25 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 20:03:02.208414: step: 2/470, loss: 0.02162099815905094 2023-01-22 20:03:02.960449: step: 4/470, loss: 0.012446287088096142 2023-01-22 20:03:03.682543: step: 6/470, loss: 0.034468457102775574 2023-01-22 20:03:04.446766: step: 8/470, loss: 0.016727212816476822 2023-01-22 20:03:05.214627: step: 10/470, loss: 0.03275219723582268 2023-01-22 20:03:05.910108: step: 12/470, loss: 0.012032398022711277 2023-01-22 20:03:06.729149: step: 14/470, loss: 0.04827987775206566 2023-01-22 20:03:07.463381: step: 16/470, loss: 0.039694756269454956 2023-01-22 20:03:08.197966: step: 18/470, loss: 0.018378468230366707 2023-01-22 20:03:08.786230: step: 20/470, loss: 0.010981571860611439 2023-01-22 20:03:09.634501: step: 22/470, loss: 1.3256505727767944 2023-01-22 20:03:10.352731: step: 24/470, loss: 0.007463351357728243 2023-01-22 20:03:11.127357: step: 26/470, loss: 0.008972594514489174 2023-01-22 20:03:11.819350: step: 28/470, loss: 0.005426797084510326 2023-01-22 20:03:12.582333: step: 30/470, loss: 0.005324822850525379 2023-01-22 20:03:13.277984: step: 32/470, loss: 0.07613864541053772 2023-01-22 20:03:13.987213: step: 34/470, loss: 0.03191568702459335 2023-01-22 20:03:14.709648: step: 36/470, loss: 0.05562712624669075 2023-01-22 20:03:15.453824: step: 38/470, loss: 0.01600096933543682 2023-01-22 20:03:16.188829: step: 40/470, loss: 0.044862739741802216 2023-01-22 20:03:16.947253: step: 42/470, loss: 0.3098965287208557 2023-01-22 20:03:17.682120: step: 44/470, loss: 0.027755551040172577 2023-01-22 20:03:18.407908: step: 46/470, loss: 0.09233322739601135 2023-01-22 20:03:19.113272: step: 48/470, loss: 0.39350318908691406 2023-01-22 20:03:19.804796: step: 50/470, loss: 0.04757951945066452 2023-01-22 20:03:20.543417: step: 52/470, loss: 0.004094702657312155 2023-01-22 20:03:21.291164: step: 54/470, loss: 0.037220463156700134 2023-01-22 20:03:22.005010: step: 56/470, loss: 0.0336788184940815 2023-01-22 20:03:22.814500: step: 58/470, loss: 0.010243861936032772 2023-01-22 20:03:23.516706: step: 60/470, loss: 0.08532825112342834 2023-01-22 20:03:24.199642: step: 62/470, loss: 0.031235769391059875 2023-01-22 20:03:24.811600: step: 64/470, loss: 0.012863215059041977 2023-01-22 20:03:25.606214: step: 66/470, loss: 1.0012062788009644 2023-01-22 20:03:26.336741: step: 68/470, loss: 0.02800358459353447 2023-01-22 20:03:27.107016: step: 70/470, loss: 0.021341489627957344 2023-01-22 20:03:27.816905: step: 72/470, loss: 0.013578714802861214 2023-01-22 20:03:28.606450: step: 74/470, loss: 0.01855326071381569 2023-01-22 20:03:29.357341: step: 76/470, loss: 0.04243265837430954 2023-01-22 20:03:30.041180: step: 78/470, loss: 0.03518841415643692 2023-01-22 20:03:30.723632: step: 80/470, loss: 0.08064284920692444 2023-01-22 20:03:31.437155: step: 82/470, loss: 0.017204048112034798 2023-01-22 20:03:32.205440: step: 84/470, loss: 0.014039833098649979 2023-01-22 20:03:32.916980: step: 86/470, loss: 0.023023655638098717 2023-01-22 20:03:33.615644: step: 88/470, loss: 0.021660305559635162 2023-01-22 20:03:34.346502: step: 90/470, loss: 0.10536940395832062 2023-01-22 20:03:35.078132: step: 92/470, loss: 0.024169238284230232 2023-01-22 20:03:35.736614: step: 94/470, loss: 0.029674746096134186 2023-01-22 20:03:36.510310: step: 96/470, loss: 0.028852693736553192 2023-01-22 20:03:37.218231: step: 98/470, loss: 0.00507341930642724 2023-01-22 20:03:37.994384: step: 100/470, loss: 0.017849357798695564 2023-01-22 20:03:38.779957: step: 102/470, loss: 0.0505632720887661 2023-01-22 20:03:39.608571: step: 104/470, loss: 0.009880991652607918 2023-01-22 20:03:40.392854: step: 106/470, loss: 0.02503761276602745 2023-01-22 20:03:41.075328: step: 108/470, loss: 0.0026527876034379005 2023-01-22 20:03:41.811982: step: 110/470, loss: 0.017480017617344856 2023-01-22 20:03:42.477181: step: 112/470, loss: 0.011955924332141876 2023-01-22 20:03:43.144897: step: 114/470, loss: 0.035263460129499435 2023-01-22 20:03:43.930379: step: 116/470, loss: 0.0948866754770279 2023-01-22 20:03:44.684859: step: 118/470, loss: 0.01817036233842373 2023-01-22 20:03:45.334197: step: 120/470, loss: 0.005630629137158394 2023-01-22 20:03:45.961903: step: 122/470, loss: 0.05774114280939102 2023-01-22 20:03:46.686546: step: 124/470, loss: 0.0010737971169874072 2023-01-22 20:03:47.601412: step: 126/470, loss: 0.01047587115317583 2023-01-22 20:03:48.290127: step: 128/470, loss: 0.0022798320278525352 2023-01-22 20:03:49.009082: step: 130/470, loss: 0.03199382126331329 2023-01-22 20:03:49.728867: step: 132/470, loss: 0.10577887296676636 2023-01-22 20:03:50.422620: step: 134/470, loss: 0.020609887316823006 2023-01-22 20:03:51.133612: step: 136/470, loss: 0.0020259118173271418 2023-01-22 20:03:51.908935: step: 138/470, loss: 0.020044716075062752 2023-01-22 20:03:52.630186: step: 140/470, loss: 0.004093657713383436 2023-01-22 20:03:53.371622: step: 142/470, loss: 0.062444720417261124 2023-01-22 20:03:54.060750: step: 144/470, loss: 0.03266696259379387 2023-01-22 20:03:54.847317: step: 146/470, loss: 0.05523476377129555 2023-01-22 20:03:55.590847: step: 148/470, loss: 0.0045193941332399845 2023-01-22 20:03:56.286072: step: 150/470, loss: 0.009763001464307308 2023-01-22 20:03:56.988296: step: 152/470, loss: 0.042540084570646286 2023-01-22 20:03:57.755469: step: 154/470, loss: 0.016827277839183807 2023-01-22 20:03:58.528118: step: 156/470, loss: 0.17072248458862305 2023-01-22 20:03:59.303968: step: 158/470, loss: 0.005717588122934103 2023-01-22 20:04:00.050285: step: 160/470, loss: 0.009820440784096718 2023-01-22 20:04:00.792957: step: 162/470, loss: 0.010699333623051643 2023-01-22 20:04:01.583265: step: 164/470, loss: 0.08145033568143845 2023-01-22 20:04:02.315582: step: 166/470, loss: 0.0005983722512610257 2023-01-22 20:04:03.125046: step: 168/470, loss: 0.028514329344034195 2023-01-22 20:04:03.849529: step: 170/470, loss: 0.006405293010175228 2023-01-22 20:04:04.558463: step: 172/470, loss: 0.00240780645981431 2023-01-22 20:04:05.288475: step: 174/470, loss: 0.026528997346758842 2023-01-22 20:04:06.104816: step: 176/470, loss: 0.02798590436577797 2023-01-22 20:04:06.839345: step: 178/470, loss: 0.014988838694989681 2023-01-22 20:04:07.568783: step: 180/470, loss: 0.13888667523860931 2023-01-22 20:04:08.286842: step: 182/470, loss: 0.020751958712935448 2023-01-22 20:04:08.922280: step: 184/470, loss: 0.0074022323824465275 2023-01-22 20:04:09.622459: step: 186/470, loss: 0.16846947371959686 2023-01-22 20:04:10.340121: step: 188/470, loss: 0.02288518100976944 2023-01-22 20:04:11.110314: step: 190/470, loss: 0.025580741465091705 2023-01-22 20:04:11.870415: step: 192/470, loss: 0.01590382121503353 2023-01-22 20:04:12.657068: step: 194/470, loss: 0.018982110545039177 2023-01-22 20:04:13.489373: step: 196/470, loss: 0.028010079637169838 2023-01-22 20:04:14.167193: step: 198/470, loss: 0.009743987582623959 2023-01-22 20:04:14.896654: step: 200/470, loss: 0.0009152348502539098 2023-01-22 20:04:15.732317: step: 202/470, loss: 0.0641738623380661 2023-01-22 20:04:16.543270: step: 204/470, loss: 0.006123876199126244 2023-01-22 20:04:17.291111: step: 206/470, loss: 0.03843360021710396 2023-01-22 20:04:18.087166: step: 208/470, loss: 0.0037462252657860518 2023-01-22 20:04:18.814117: step: 210/470, loss: 0.002261190675199032 2023-01-22 20:04:19.651658: step: 212/470, loss: 0.16585534811019897 2023-01-22 20:04:20.372141: step: 214/470, loss: 0.2700032889842987 2023-01-22 20:04:21.038341: step: 216/470, loss: 0.024171005934476852 2023-01-22 20:04:21.751961: step: 218/470, loss: 0.004075937904417515 2023-01-22 20:04:22.461687: step: 220/470, loss: 0.004899140447378159 2023-01-22 20:04:23.152966: step: 222/470, loss: 0.04851776361465454 2023-01-22 20:04:23.918240: step: 224/470, loss: 0.01797030307352543 2023-01-22 20:04:24.666156: step: 226/470, loss: 0.0035274296533316374 2023-01-22 20:04:25.422124: step: 228/470, loss: 0.024435440078377724 2023-01-22 20:04:26.198641: step: 230/470, loss: 0.14094187319278717 2023-01-22 20:04:27.024325: step: 232/470, loss: 0.009624892845749855 2023-01-22 20:04:27.788402: step: 234/470, loss: 0.03491733595728874 2023-01-22 20:04:28.540714: step: 236/470, loss: 0.028152016922831535 2023-01-22 20:04:29.268604: step: 238/470, loss: 0.02921573631465435 2023-01-22 20:04:30.074569: step: 240/470, loss: 0.018016021698713303 2023-01-22 20:04:30.830186: step: 242/470, loss: 0.21584904193878174 2023-01-22 20:04:31.498166: step: 244/470, loss: 0.0007679513073526323 2023-01-22 20:04:32.181116: step: 246/470, loss: 0.007389437407255173 2023-01-22 20:04:32.851499: step: 248/470, loss: 0.019924050197005272 2023-01-22 20:04:33.633529: step: 250/470, loss: 0.0013338790740817785 2023-01-22 20:04:34.366868: step: 252/470, loss: 0.09131523221731186 2023-01-22 20:04:35.092103: step: 254/470, loss: 0.04288085922598839 2023-01-22 20:04:35.878842: step: 256/470, loss: 0.008014186285436153 2023-01-22 20:04:36.596732: step: 258/470, loss: 0.002048267750069499 2023-01-22 20:04:37.347394: step: 260/470, loss: 0.007588651031255722 2023-01-22 20:04:38.047615: step: 262/470, loss: 0.010957635007798672 2023-01-22 20:04:38.796140: step: 264/470, loss: 0.011484313756227493 2023-01-22 20:04:39.516430: step: 266/470, loss: 0.04040658846497536 2023-01-22 20:04:40.252904: step: 268/470, loss: 0.015184608288109303 2023-01-22 20:04:40.989432: step: 270/470, loss: 0.010376902297139168 2023-01-22 20:04:41.637422: step: 272/470, loss: 0.04080960154533386 2023-01-22 20:04:42.332798: step: 274/470, loss: 1.0133119821548462 2023-01-22 20:04:43.070894: step: 276/470, loss: 0.005866447929292917 2023-01-22 20:04:43.827345: step: 278/470, loss: 0.11790863424539566 2023-01-22 20:04:44.546094: step: 280/470, loss: 0.00046028837095946074 2023-01-22 20:04:45.273684: step: 282/470, loss: 0.03480033576488495 2023-01-22 20:04:46.003497: step: 284/470, loss: 0.012186521664261818 2023-01-22 20:04:46.739427: step: 286/470, loss: 0.03706849738955498 2023-01-22 20:04:47.503067: step: 288/470, loss: 0.02106441929936409 2023-01-22 20:04:48.261632: step: 290/470, loss: 0.004235308617353439 2023-01-22 20:04:49.024119: step: 292/470, loss: 0.0014547484461218119 2023-01-22 20:04:49.791868: step: 294/470, loss: 0.029318470507860184 2023-01-22 20:04:50.500913: step: 296/470, loss: 0.05262761935591698 2023-01-22 20:04:51.207876: step: 298/470, loss: 0.004162387456744909 2023-01-22 20:04:51.944145: step: 300/470, loss: 0.08064429461956024 2023-01-22 20:04:52.582975: step: 302/470, loss: 0.011829620227217674 2023-01-22 20:04:53.336351: step: 304/470, loss: 0.020030930638313293 2023-01-22 20:04:54.010308: step: 306/470, loss: 0.006781530566513538 2023-01-22 20:04:54.699125: step: 308/470, loss: 0.10114534944295883 2023-01-22 20:04:55.415622: step: 310/470, loss: 0.03585520759224892 2023-01-22 20:04:56.179184: step: 312/470, loss: 0.02825266122817993 2023-01-22 20:04:56.872336: step: 314/470, loss: 0.043976690620183945 2023-01-22 20:04:57.653149: step: 316/470, loss: 0.045031096786260605 2023-01-22 20:04:58.415967: step: 318/470, loss: 0.04603327438235283 2023-01-22 20:04:59.198368: step: 320/470, loss: 0.04247571900486946 2023-01-22 20:04:59.859167: step: 322/470, loss: 0.0015748925507068634 2023-01-22 20:05:00.653775: step: 324/470, loss: 0.06393693387508392 2023-01-22 20:05:01.399144: step: 326/470, loss: 0.035000111907720566 2023-01-22 20:05:02.125877: step: 328/470, loss: 0.014710234478116035 2023-01-22 20:05:02.791851: step: 330/470, loss: 0.028020773082971573 2023-01-22 20:05:03.554905: step: 332/470, loss: 0.06262163072824478 2023-01-22 20:05:04.202511: step: 334/470, loss: 0.0007441341294907033 2023-01-22 20:05:04.865998: step: 336/470, loss: 0.08218454569578171 2023-01-22 20:05:05.726926: step: 338/470, loss: 0.4393693506717682 2023-01-22 20:05:06.478271: step: 340/470, loss: 0.014411939308047295 2023-01-22 20:05:07.188852: step: 342/470, loss: 0.060469575226306915 2023-01-22 20:05:07.944112: step: 344/470, loss: 0.036549992859363556 2023-01-22 20:05:08.743126: step: 346/470, loss: 0.031213141977787018 2023-01-22 20:05:09.477734: step: 348/470, loss: 0.0008316990570165217 2023-01-22 20:05:10.244727: step: 350/470, loss: 1.0978364944458008 2023-01-22 20:05:10.923325: step: 352/470, loss: 0.014219812117516994 2023-01-22 20:05:11.640805: step: 354/470, loss: 0.06110917776823044 2023-01-22 20:05:12.543865: step: 356/470, loss: 0.06231218948960304 2023-01-22 20:05:13.209120: step: 358/470, loss: 0.024384677410125732 2023-01-22 20:05:14.061173: step: 360/470, loss: 0.06801487505435944 2023-01-22 20:05:14.780066: step: 362/470, loss: 0.043319717049598694 2023-01-22 20:05:15.518784: step: 364/470, loss: 0.006488930433988571 2023-01-22 20:05:16.208030: step: 366/470, loss: 1.2596720457077026 2023-01-22 20:05:16.889468: step: 368/470, loss: 0.009231701493263245 2023-01-22 20:05:17.620880: step: 370/470, loss: 0.07412904500961304 2023-01-22 20:05:18.433078: step: 372/470, loss: 0.054003313183784485 2023-01-22 20:05:19.226956: step: 374/470, loss: 0.04891440272331238 2023-01-22 20:05:20.037881: step: 376/470, loss: 0.03218501806259155 2023-01-22 20:05:20.726489: step: 378/470, loss: 0.010670957155525684 2023-01-22 20:05:21.494683: step: 380/470, loss: 0.3814714848995209 2023-01-22 20:05:22.252737: step: 382/470, loss: 0.029765913262963295 2023-01-22 20:05:22.981506: step: 384/470, loss: 0.03948010876774788 2023-01-22 20:05:23.648254: step: 386/470, loss: 0.008174107410013676 2023-01-22 20:05:24.397001: step: 388/470, loss: 0.0791185200214386 2023-01-22 20:05:25.131254: step: 390/470, loss: 0.0023983244318515062 2023-01-22 20:05:25.880447: step: 392/470, loss: 0.0599571131169796 2023-01-22 20:05:26.551113: step: 394/470, loss: 0.05517046898603439 2023-01-22 20:05:27.299104: step: 396/470, loss: 0.044240474700927734 2023-01-22 20:05:28.021780: step: 398/470, loss: 0.017728475853800774 2023-01-22 20:05:28.783202: step: 400/470, loss: 0.06311778724193573 2023-01-22 20:05:29.516610: step: 402/470, loss: 0.00163270381744951 2023-01-22 20:05:30.319427: step: 404/470, loss: 0.014112650416791439 2023-01-22 20:05:31.004706: step: 406/470, loss: 0.0612499974668026 2023-01-22 20:05:31.782258: step: 408/470, loss: 0.01821378618478775 2023-01-22 20:05:32.485323: step: 410/470, loss: 0.20836558938026428 2023-01-22 20:05:33.280619: step: 412/470, loss: 0.03567926958203316 2023-01-22 20:05:34.006616: step: 414/470, loss: 0.07208713889122009 2023-01-22 20:05:34.761899: step: 416/470, loss: 0.13654865324497223 2023-01-22 20:05:35.482211: step: 418/470, loss: 0.03147239238023758 2023-01-22 20:05:36.184822: step: 420/470, loss: 0.017493341118097305 2023-01-22 20:05:36.934112: step: 422/470, loss: 0.09981317818164825 2023-01-22 20:05:37.646881: step: 424/470, loss: 0.032189272344112396 2023-01-22 20:05:38.390154: step: 426/470, loss: 0.014166963286697865 2023-01-22 20:05:39.278299: step: 428/470, loss: 0.09840293973684311 2023-01-22 20:05:40.047111: step: 430/470, loss: 0.08024189621210098 2023-01-22 20:05:40.785284: step: 432/470, loss: 0.04482298344373703 2023-01-22 20:05:41.544758: step: 434/470, loss: 0.038904860615730286 2023-01-22 20:05:42.335348: step: 436/470, loss: 0.2726028561592102 2023-01-22 20:05:43.044339: step: 438/470, loss: 0.02278323471546173 2023-01-22 20:05:43.807322: step: 440/470, loss: 0.01581525057554245 2023-01-22 20:05:44.538973: step: 442/470, loss: 0.04916553571820259 2023-01-22 20:05:45.336670: step: 444/470, loss: 0.018240492790937424 2023-01-22 20:05:46.041842: step: 446/470, loss: 0.05501729995012283 2023-01-22 20:05:46.826633: step: 448/470, loss: 0.014724891632795334 2023-01-22 20:05:47.689444: step: 450/470, loss: 0.09602244943380356 2023-01-22 20:05:48.376661: step: 452/470, loss: 0.004648915026336908 2023-01-22 20:05:49.152360: step: 454/470, loss: 0.01640310324728489 2023-01-22 20:05:49.896208: step: 456/470, loss: 0.03248788043856621 2023-01-22 20:05:50.624489: step: 458/470, loss: 1.1334494352340698 2023-01-22 20:05:51.384806: step: 460/470, loss: 0.14382703602313995 2023-01-22 20:05:52.124610: step: 462/470, loss: 0.06437462568283081 2023-01-22 20:05:52.962967: step: 464/470, loss: 0.020893843844532967 2023-01-22 20:05:53.615584: step: 466/470, loss: 0.020247068256139755 2023-01-22 20:05:54.379413: step: 468/470, loss: 0.06369510293006897 2023-01-22 20:05:55.130861: step: 470/470, loss: 0.002879982814192772 2023-01-22 20:05:55.822467: step: 472/470, loss: 0.01400853507220745 2023-01-22 20:05:56.569159: step: 474/470, loss: 0.03950931504368782 2023-01-22 20:05:57.288189: step: 476/470, loss: 0.02229795604944229 2023-01-22 20:05:58.112162: step: 478/470, loss: 0.017860140651464462 2023-01-22 20:05:58.867750: step: 480/470, loss: 0.00995658803731203 2023-01-22 20:05:59.589926: step: 482/470, loss: 0.0026193808298557997 2023-01-22 20:06:00.336822: step: 484/470, loss: 0.022055814042687416 2023-01-22 20:06:01.065376: step: 486/470, loss: 0.2256946861743927 2023-01-22 20:06:01.851662: step: 488/470, loss: 0.01226732973009348 2023-01-22 20:06:02.562505: step: 490/470, loss: 0.017396673560142517 2023-01-22 20:06:03.327255: step: 492/470, loss: 0.0489540733397007 2023-01-22 20:06:04.082725: step: 494/470, loss: 0.03391120210289955 2023-01-22 20:06:04.789239: step: 496/470, loss: 0.01827331632375717 2023-01-22 20:06:05.622999: step: 498/470, loss: 0.012197432108223438 2023-01-22 20:06:06.363409: step: 500/470, loss: 0.02815680392086506 2023-01-22 20:06:07.081598: step: 502/470, loss: 0.07767844945192337 2023-01-22 20:06:07.904240: step: 504/470, loss: 0.01819152943789959 2023-01-22 20:06:08.691957: step: 506/470, loss: 0.008796506561338902 2023-01-22 20:06:09.400582: step: 508/470, loss: 0.26540952920913696 2023-01-22 20:06:10.161307: step: 510/470, loss: 0.03128129988908768 2023-01-22 20:06:10.882884: step: 512/470, loss: 0.021161213517189026 2023-01-22 20:06:11.663917: step: 514/470, loss: 0.031146377325057983 2023-01-22 20:06:12.377298: step: 516/470, loss: 0.0022348014172166586 2023-01-22 20:06:13.206675: step: 518/470, loss: 0.0887453556060791 2023-01-22 20:06:13.874606: step: 520/470, loss: 0.023102333769202232 2023-01-22 20:06:14.705703: step: 522/470, loss: 0.035375066101551056 2023-01-22 20:06:15.417801: step: 524/470, loss: 0.006511027924716473 2023-01-22 20:06:16.157779: step: 526/470, loss: 0.0845172256231308 2023-01-22 20:06:16.884956: step: 528/470, loss: 0.03893345594406128 2023-01-22 20:06:17.646530: step: 530/470, loss: 0.22155825793743134 2023-01-22 20:06:18.376188: step: 532/470, loss: 0.06408680230379105 2023-01-22 20:06:19.126890: step: 534/470, loss: 0.08290005475282669 2023-01-22 20:06:19.839234: step: 536/470, loss: 0.018902825191617012 2023-01-22 20:06:20.564035: step: 538/470, loss: 0.05063078925013542 2023-01-22 20:06:21.286179: step: 540/470, loss: 0.06731826812028885 2023-01-22 20:06:21.986242: step: 542/470, loss: 0.04933793842792511 2023-01-22 20:06:22.687477: step: 544/470, loss: 0.12002909183502197 2023-01-22 20:06:23.392428: step: 546/470, loss: 0.042481400072574615 2023-01-22 20:06:24.073123: step: 548/470, loss: 0.11045798659324646 2023-01-22 20:06:24.741546: step: 550/470, loss: 0.003042022930458188 2023-01-22 20:06:25.524304: step: 552/470, loss: 0.03922036290168762 2023-01-22 20:06:26.245046: step: 554/470, loss: 0.03836175426840782 2023-01-22 20:06:27.002509: step: 556/470, loss: 0.03422430902719498 2023-01-22 20:06:27.740561: step: 558/470, loss: 0.01736604794859886 2023-01-22 20:06:28.484904: step: 560/470, loss: 0.029811743646860123 2023-01-22 20:06:29.195258: step: 562/470, loss: 0.010475804097950459 2023-01-22 20:06:29.915185: step: 564/470, loss: 0.0307230856269598 2023-01-22 20:06:30.647787: step: 566/470, loss: 0.020388364791870117 2023-01-22 20:06:31.423076: step: 568/470, loss: 0.18014267086982727 2023-01-22 20:06:32.083078: step: 570/470, loss: 0.03197851777076721 2023-01-22 20:06:32.843810: step: 572/470, loss: 0.6306474804878235 2023-01-22 20:06:33.472804: step: 574/470, loss: 0.21226122975349426 2023-01-22 20:06:34.175297: step: 576/470, loss: 0.012852661311626434 2023-01-22 20:06:34.902748: step: 578/470, loss: 0.07441854476928711 2023-01-22 20:06:35.577980: step: 580/470, loss: 0.016917813569307327 2023-01-22 20:06:36.280041: step: 582/470, loss: 0.08907926082611084 2023-01-22 20:06:36.965132: step: 584/470, loss: 0.003389795310795307 2023-01-22 20:06:37.730587: step: 586/470, loss: 0.040450599044561386 2023-01-22 20:06:38.527951: step: 588/470, loss: 0.054036617279052734 2023-01-22 20:06:39.240819: step: 590/470, loss: 0.05018281936645508 2023-01-22 20:06:39.948272: step: 592/470, loss: 0.01974686235189438 2023-01-22 20:06:40.642839: step: 594/470, loss: 0.10064013302326202 2023-01-22 20:06:41.373373: step: 596/470, loss: 0.008306887932121754 2023-01-22 20:06:42.195393: step: 598/470, loss: 0.04838766157627106 2023-01-22 20:06:42.990417: step: 600/470, loss: 0.001629787846468389 2023-01-22 20:06:43.772312: step: 602/470, loss: 0.028213849291205406 2023-01-22 20:06:44.473561: step: 604/470, loss: 0.024601630866527557 2023-01-22 20:06:45.196328: step: 606/470, loss: 0.028212811797857285 2023-01-22 20:06:45.970836: step: 608/470, loss: 0.030642293393611908 2023-01-22 20:06:46.718194: step: 610/470, loss: 0.011128339916467667 2023-01-22 20:06:47.408331: step: 612/470, loss: 0.000944534142035991 2023-01-22 20:06:48.126252: step: 614/470, loss: 0.029674546793103218 2023-01-22 20:06:48.807643: step: 616/470, loss: 0.007636074908077717 2023-01-22 20:06:49.621490: step: 618/470, loss: 0.014691839925944805 2023-01-22 20:06:50.491124: step: 620/470, loss: 0.05527832731604576 2023-01-22 20:06:51.230716: step: 622/470, loss: 0.015919934958219528 2023-01-22 20:06:51.945620: step: 624/470, loss: 0.02508891187608242 2023-01-22 20:06:52.757562: step: 626/470, loss: 0.01099168136715889 2023-01-22 20:06:53.446669: step: 628/470, loss: 0.035270821303129196 2023-01-22 20:06:54.075911: step: 630/470, loss: 0.03347640857100487 2023-01-22 20:06:54.726744: step: 632/470, loss: 0.05645357072353363 2023-01-22 20:06:55.408565: step: 634/470, loss: 0.011641307733952999 2023-01-22 20:06:56.230577: step: 636/470, loss: 0.06422200053930283 2023-01-22 20:06:56.929644: step: 638/470, loss: 0.01144405733793974 2023-01-22 20:06:57.714995: step: 640/470, loss: 0.029585037380456924 2023-01-22 20:06:58.413192: step: 642/470, loss: 0.04478341341018677 2023-01-22 20:06:59.144996: step: 644/470, loss: 0.14443320035934448 2023-01-22 20:06:59.848423: step: 646/470, loss: 0.000602882297243923 2023-01-22 20:07:00.579937: step: 648/470, loss: 0.01970474235713482 2023-01-22 20:07:01.326727: step: 650/470, loss: 0.05201108008623123 2023-01-22 20:07:02.050882: step: 652/470, loss: 0.022257506847381592 2023-01-22 20:07:02.739334: step: 654/470, loss: 0.08728256821632385 2023-01-22 20:07:03.454597: step: 656/470, loss: 0.015211334452033043 2023-01-22 20:07:04.197468: step: 658/470, loss: 0.2973870038986206 2023-01-22 20:07:04.926983: step: 660/470, loss: 0.035559751093387604 2023-01-22 20:07:05.602116: step: 662/470, loss: 0.026579704135656357 2023-01-22 20:07:06.355035: step: 664/470, loss: 0.02378200925886631 2023-01-22 20:07:07.123608: step: 666/470, loss: 0.08443901687860489 2023-01-22 20:07:07.834509: step: 668/470, loss: 0.006817949004471302 2023-01-22 20:07:08.560784: step: 670/470, loss: 0.08191236853599548 2023-01-22 20:07:09.364914: step: 672/470, loss: 0.00972555298358202 2023-01-22 20:07:10.051152: step: 674/470, loss: 0.002309043426066637 2023-01-22 20:07:10.763226: step: 676/470, loss: 0.03206325322389603 2023-01-22 20:07:11.496514: step: 678/470, loss: 0.023099826648831367 2023-01-22 20:07:12.184123: step: 680/470, loss: 0.03047085553407669 2023-01-22 20:07:12.939646: step: 682/470, loss: 0.26479578018188477 2023-01-22 20:07:13.665782: step: 684/470, loss: 0.04502062872052193 2023-01-22 20:07:14.404564: step: 686/470, loss: 0.017808128148317337 2023-01-22 20:07:15.082562: step: 688/470, loss: 0.14120163023471832 2023-01-22 20:07:15.888608: step: 690/470, loss: 0.006991311442106962 2023-01-22 20:07:16.600724: step: 692/470, loss: 0.04135780781507492 2023-01-22 20:07:17.307262: step: 694/470, loss: 0.024161092936992645 2023-01-22 20:07:18.019424: step: 696/470, loss: 0.02334967628121376 2023-01-22 20:07:18.733895: step: 698/470, loss: 0.04725329205393791 2023-01-22 20:07:19.495542: step: 700/470, loss: 0.13663579523563385 2023-01-22 20:07:20.179882: step: 702/470, loss: 0.029831871390342712 2023-01-22 20:07:20.899703: step: 704/470, loss: 0.06156457960605621 2023-01-22 20:07:21.641195: step: 706/470, loss: 0.08219487965106964 2023-01-22 20:07:22.448205: step: 708/470, loss: 0.23499932885169983 2023-01-22 20:07:23.316647: step: 710/470, loss: 0.05763453617691994 2023-01-22 20:07:24.079970: step: 712/470, loss: 0.6921198964118958 2023-01-22 20:07:24.882300: step: 714/470, loss: 0.025135423988103867 2023-01-22 20:07:25.670489: step: 716/470, loss: 0.06057261303067207 2023-01-22 20:07:26.378205: step: 718/470, loss: 0.0005019385716877878 2023-01-22 20:07:27.147194: step: 720/470, loss: 0.03366491198539734 2023-01-22 20:07:27.916347: step: 722/470, loss: 0.0844867005944252 2023-01-22 20:07:28.638354: step: 724/470, loss: 0.001989529235288501 2023-01-22 20:07:29.635989: step: 726/470, loss: 0.05526869744062424 2023-01-22 20:07:30.362134: step: 728/470, loss: 0.003986681811511517 2023-01-22 20:07:31.061652: step: 730/470, loss: 0.06542991101741791 2023-01-22 20:07:31.743068: step: 732/470, loss: 0.024848083034157753 2023-01-22 20:07:32.499075: step: 734/470, loss: 0.017168505117297173 2023-01-22 20:07:33.191421: step: 736/470, loss: 0.00605942215770483 2023-01-22 20:07:33.837874: step: 738/470, loss: 0.013854175806045532 2023-01-22 20:07:34.593511: step: 740/470, loss: 0.02051333710551262 2023-01-22 20:07:35.366645: step: 742/470, loss: 0.01970795914530754 2023-01-22 20:07:36.137488: step: 744/470, loss: 0.052142977714538574 2023-01-22 20:07:36.929266: step: 746/470, loss: 0.03665049001574516 2023-01-22 20:07:37.616879: step: 748/470, loss: 0.014172352850437164 2023-01-22 20:07:38.335738: step: 750/470, loss: 0.0662955716252327 2023-01-22 20:07:39.049223: step: 752/470, loss: 0.04770228639245033 2023-01-22 20:07:39.829147: step: 754/470, loss: 0.024025220423936844 2023-01-22 20:07:40.520290: step: 756/470, loss: 0.02347307652235031 2023-01-22 20:07:41.387784: step: 758/470, loss: 0.03415545076131821 2023-01-22 20:07:42.208756: step: 760/470, loss: 0.08358060568571091 2023-01-22 20:07:42.895872: step: 762/470, loss: 0.07076893001794815 2023-01-22 20:07:43.681029: step: 764/470, loss: 0.9924178719520569 2023-01-22 20:07:44.465123: step: 766/470, loss: 0.02128605917096138 2023-01-22 20:07:45.154329: step: 768/470, loss: 0.060910508036613464 2023-01-22 20:07:45.839663: step: 770/470, loss: 0.010893095284700394 2023-01-22 20:07:46.553425: step: 772/470, loss: 0.004468762315809727 2023-01-22 20:07:47.240100: step: 774/470, loss: 0.0033063499722629786 2023-01-22 20:07:48.005341: step: 776/470, loss: 0.16144682466983795 2023-01-22 20:07:48.727026: step: 778/470, loss: 0.01916014961898327 2023-01-22 20:07:49.420324: step: 780/470, loss: 0.007597747258841991 2023-01-22 20:07:50.101094: step: 782/470, loss: 0.013567719608545303 2023-01-22 20:07:50.774657: step: 784/470, loss: 0.0008880794048309326 2023-01-22 20:07:51.510464: step: 786/470, loss: 0.015842584893107414 2023-01-22 20:07:52.215616: step: 788/470, loss: 0.0290867630392313 2023-01-22 20:07:52.951042: step: 790/470, loss: 0.5169112682342529 2023-01-22 20:07:53.707023: step: 792/470, loss: 0.012393763288855553 2023-01-22 20:07:54.377940: step: 794/470, loss: 0.010337642394006252 2023-01-22 20:07:55.096951: step: 796/470, loss: 0.20648834109306335 2023-01-22 20:07:55.839438: step: 798/470, loss: 0.07127867639064789 2023-01-22 20:07:56.535634: step: 800/470, loss: 0.022180991247296333 2023-01-22 20:07:57.322235: step: 802/470, loss: 0.17413491010665894 2023-01-22 20:07:58.057933: step: 804/470, loss: 0.056421320885419846 2023-01-22 20:07:58.735426: step: 806/470, loss: 0.01221885159611702 2023-01-22 20:07:59.410025: step: 808/470, loss: 0.03740542382001877 2023-01-22 20:08:00.159555: step: 810/470, loss: 0.10675573348999023 2023-01-22 20:08:00.893580: step: 812/470, loss: 0.0472906269133091 2023-01-22 20:08:01.627389: step: 814/470, loss: 0.005474620033055544 2023-01-22 20:08:02.377557: step: 816/470, loss: 0.0021934225223958492 2023-01-22 20:08:03.130727: step: 818/470, loss: 0.028478167951107025 2023-01-22 20:08:03.872915: step: 820/470, loss: 0.07020837813615799 2023-01-22 20:08:04.550268: step: 822/470, loss: 0.004713045433163643 2023-01-22 20:08:05.325841: step: 824/470, loss: 0.034453753381967545 2023-01-22 20:08:06.125092: step: 826/470, loss: 0.08387379348278046 2023-01-22 20:08:06.776054: step: 828/470, loss: 0.008352800272405148 2023-01-22 20:08:07.541620: step: 830/470, loss: 0.06293504685163498 2023-01-22 20:08:08.212541: step: 832/470, loss: 0.05639214813709259 2023-01-22 20:08:08.951816: step: 834/470, loss: 0.016982192173600197 2023-01-22 20:08:09.682996: step: 836/470, loss: 0.08415093272924423 2023-01-22 20:08:10.369030: step: 838/470, loss: 0.0067184302024543285 2023-01-22 20:08:11.129995: step: 840/470, loss: 0.01616278663277626 2023-01-22 20:08:11.873966: step: 842/470, loss: 0.035632479935884476 2023-01-22 20:08:12.649505: step: 844/470, loss: 0.10122037678956985 2023-01-22 20:08:13.440321: step: 846/470, loss: 0.06999140232801437 2023-01-22 20:08:14.159473: step: 848/470, loss: 0.0007854777504689991 2023-01-22 20:08:14.905650: step: 850/470, loss: 0.0418260395526886 2023-01-22 20:08:15.676087: step: 852/470, loss: 0.15163059532642365 2023-01-22 20:08:16.476542: step: 854/470, loss: 0.05955205485224724 2023-01-22 20:08:17.300809: step: 856/470, loss: 0.005851359572261572 2023-01-22 20:08:18.054913: step: 858/470, loss: 0.025483133271336555 2023-01-22 20:08:18.765059: step: 860/470, loss: 0.03567005693912506 2023-01-22 20:08:19.477332: step: 862/470, loss: 0.007423198316246271 2023-01-22 20:08:20.179482: step: 864/470, loss: 0.00906333327293396 2023-01-22 20:08:20.981316: step: 866/470, loss: 0.02991068735718727 2023-01-22 20:08:21.697392: step: 868/470, loss: 0.03796318545937538 2023-01-22 20:08:22.320724: step: 870/470, loss: 0.03030499443411827 2023-01-22 20:08:23.109398: step: 872/470, loss: 0.003593269968405366 2023-01-22 20:08:23.798020: step: 874/470, loss: 0.0007432375568896532 2023-01-22 20:08:24.466293: step: 876/470, loss: 0.06043066084384918 2023-01-22 20:08:25.158005: step: 878/470, loss: 0.04188834875822067 2023-01-22 20:08:25.847848: step: 880/470, loss: 0.004259438719600439 2023-01-22 20:08:26.594654: step: 882/470, loss: 0.2212732881307602 2023-01-22 20:08:27.334323: step: 884/470, loss: 0.01849282905459404 2023-01-22 20:08:28.116385: step: 886/470, loss: 0.087897889316082 2023-01-22 20:08:28.925780: step: 888/470, loss: 0.028527600690722466 2023-01-22 20:08:29.683385: step: 890/470, loss: 0.009783981367945671 2023-01-22 20:08:30.331731: step: 892/470, loss: 0.010732408612966537 2023-01-22 20:08:31.007661: step: 894/470, loss: 0.005476772785186768 2023-01-22 20:08:31.723331: step: 896/470, loss: 0.11816710978746414 2023-01-22 20:08:32.434567: step: 898/470, loss: 0.03139311075210571 2023-01-22 20:08:33.115892: step: 900/470, loss: 0.05651181936264038 2023-01-22 20:08:33.756011: step: 902/470, loss: 0.011831426993012428 2023-01-22 20:08:34.455983: step: 904/470, loss: 0.05420336499810219 2023-01-22 20:08:35.133476: step: 906/470, loss: 0.06597546488046646 2023-01-22 20:08:35.899091: step: 908/470, loss: 0.6028205156326294 2023-01-22 20:08:36.627098: step: 910/470, loss: 0.036814119666814804 2023-01-22 20:08:37.572131: step: 912/470, loss: 0.022296173498034477 2023-01-22 20:08:38.351923: step: 914/470, loss: 0.02219575271010399 2023-01-22 20:08:39.114264: step: 916/470, loss: 0.041941218078136444 2023-01-22 20:08:39.855996: step: 918/470, loss: 0.010028064250946045 2023-01-22 20:08:40.509020: step: 920/470, loss: 0.04588298127055168 2023-01-22 20:08:41.235070: step: 922/470, loss: 0.054243478924036026 2023-01-22 20:08:42.079703: step: 924/470, loss: 0.1619579941034317 2023-01-22 20:08:42.859870: step: 926/470, loss: 0.04308824613690376 2023-01-22 20:08:43.546463: step: 928/470, loss: 0.0242212675511837 2023-01-22 20:08:44.239947: step: 930/470, loss: 0.0006321282708086073 2023-01-22 20:08:44.915914: step: 932/470, loss: 0.01612095721065998 2023-01-22 20:08:45.729711: step: 934/470, loss: 0.0768963098526001 2023-01-22 20:08:46.413995: step: 936/470, loss: 0.009668453596532345 2023-01-22 20:08:47.131582: step: 938/470, loss: 0.06522442400455475 2023-01-22 20:08:47.871374: step: 940/470, loss: 0.04865960404276848 2023-01-22 20:08:48.558020: step: 942/470, loss: 0.011983740143477917 ================================================== Loss: 0.066 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30469626823793494, 'r': 0.3434337444655282, 'f1': 0.3229073743681238}, 'combined': 0.237931749534407, 'epoch': 25} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3353308535299347, 'r': 0.34726089351128814, 'f1': 0.3411916195103823}, 'combined': 0.2376459041365847, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3033368551760195, 'r': 0.34765742035354036, 'f1': 0.32398843594397125}, 'combined': 0.23872832122187354, 'epoch': 25} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.33509607951121756, 'r': 0.33992919604262933, 'f1': 0.3374953354504387}, 'combined': 0.2350713779256787, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28511661211129297, 'r': 0.35599000145964094, 'f1': 0.31663583252190847}, 'combined': 0.2333106134371957, 'epoch': 25} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3309281577809326, 'r': 0.35574776961450255, 'f1': 0.3428894164959061}, 'combined': 0.23882844930063113, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2175925925925926, 'r': 0.3357142857142857, 'f1': 0.2640449438202247}, 'combined': 0.17602996254681647, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.24285714285714285, 'r': 0.3695652173913043, 'f1': 0.29310344827586204}, 'combined': 0.14655172413793102, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3977272727272727, 'r': 0.3017241379310345, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 26 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 20:11:24.004028: step: 2/470, loss: 0.019485026597976685 2023-01-22 20:11:24.755677: step: 4/470, loss: 0.04196685925126076 2023-01-22 20:11:25.490792: step: 6/470, loss: 0.048435427248477936 2023-01-22 20:11:26.241418: step: 8/470, loss: 0.08164027333259583 2023-01-22 20:11:27.033149: step: 10/470, loss: 0.031989555805921555 2023-01-22 20:11:27.837511: step: 12/470, loss: 0.03375351428985596 2023-01-22 20:11:28.592680: step: 14/470, loss: 0.04762651026248932 2023-01-22 20:11:29.367929: step: 16/470, loss: 0.030239004641771317 2023-01-22 20:11:30.044111: step: 18/470, loss: 0.00040057190926745534 2023-01-22 20:11:30.798831: step: 20/470, loss: 0.010928238742053509 2023-01-22 20:11:31.569931: step: 22/470, loss: 0.013470311649143696 2023-01-22 20:11:32.352222: step: 24/470, loss: 0.04967685043811798 2023-01-22 20:11:33.052818: step: 26/470, loss: 0.023781154304742813 2023-01-22 20:11:33.781586: step: 28/470, loss: 0.015070714987814426 2023-01-22 20:11:34.507461: step: 30/470, loss: 0.00012959256127942353 2023-01-22 20:11:35.248845: step: 32/470, loss: 0.020162206143140793 2023-01-22 20:11:35.988454: step: 34/470, loss: 0.05546073988080025 2023-01-22 20:11:36.638508: step: 36/470, loss: 0.06939268857240677 2023-01-22 20:11:37.431320: step: 38/470, loss: 0.0007489689160138369 2023-01-22 20:11:38.189226: step: 40/470, loss: 0.014805969782173634 2023-01-22 20:11:38.851738: step: 42/470, loss: 0.02614147588610649 2023-01-22 20:11:39.585667: step: 44/470, loss: 0.11476456373929977 2023-01-22 20:11:40.265499: step: 46/470, loss: 0.000534221762791276 2023-01-22 20:11:41.021175: step: 48/470, loss: 0.005874789785593748 2023-01-22 20:11:41.770813: step: 50/470, loss: 0.0771356076002121 2023-01-22 20:11:42.472777: step: 52/470, loss: 0.016250325366854668 2023-01-22 20:11:43.265032: step: 54/470, loss: 0.06606414169073105 2023-01-22 20:11:43.947072: step: 56/470, loss: 0.16989466547966003 2023-01-22 20:11:44.627855: step: 58/470, loss: 0.025766367092728615 2023-01-22 20:11:45.313408: step: 60/470, loss: 0.004818424582481384 2023-01-22 20:11:46.090717: step: 62/470, loss: 0.20682062208652496 2023-01-22 20:11:46.790240: step: 64/470, loss: 0.04396335780620575 2023-01-22 20:11:47.508884: step: 66/470, loss: 0.12180660665035248 2023-01-22 20:11:48.242307: step: 68/470, loss: 0.03703419491648674 2023-01-22 20:11:49.057080: step: 70/470, loss: 0.0014291710685938597 2023-01-22 20:11:49.772895: step: 72/470, loss: 0.046893779188394547 2023-01-22 20:11:50.447514: step: 74/470, loss: 0.012694248929619789 2023-01-22 20:11:51.229595: step: 76/470, loss: 0.019860833883285522 2023-01-22 20:11:51.953119: step: 78/470, loss: 1.4195009469985962 2023-01-22 20:11:52.676191: step: 80/470, loss: 0.23771710693836212 2023-01-22 20:11:53.519880: step: 82/470, loss: 0.24715445935726166 2023-01-22 20:11:54.273577: step: 84/470, loss: 0.01750795915722847 2023-01-22 20:11:55.018056: step: 86/470, loss: 0.20475627481937408 2023-01-22 20:11:55.842618: step: 88/470, loss: 0.0445147342979908 2023-01-22 20:11:56.555997: step: 90/470, loss: 0.0016261684941127896 2023-01-22 20:11:57.388470: step: 92/470, loss: 0.011423871852457523 2023-01-22 20:11:58.186268: step: 94/470, loss: 0.21087023615837097 2023-01-22 20:11:58.916430: step: 96/470, loss: 0.006003097631037235 2023-01-22 20:11:59.593164: step: 98/470, loss: 0.04363858327269554 2023-01-22 20:12:00.211709: step: 100/470, loss: 0.04111883044242859 2023-01-22 20:12:00.945506: step: 102/470, loss: 0.00875331461429596 2023-01-22 20:12:01.685933: step: 104/470, loss: 0.01105851773172617 2023-01-22 20:12:02.433442: step: 106/470, loss: 0.05842787027359009 2023-01-22 20:12:03.223867: step: 108/470, loss: 0.02871653437614441 2023-01-22 20:12:03.932461: step: 110/470, loss: 0.6604738235473633 2023-01-22 20:12:04.632441: step: 112/470, loss: 0.02996966242790222 2023-01-22 20:12:05.372264: step: 114/470, loss: 0.040596798062324524 2023-01-22 20:12:06.118800: step: 116/470, loss: 2.317312717437744 2023-01-22 20:12:06.875719: step: 118/470, loss: 0.00040206126868724823 2023-01-22 20:12:07.595697: step: 120/470, loss: 0.0035905339755117893 2023-01-22 20:12:08.355244: step: 122/470, loss: 0.006906208582222462 2023-01-22 20:12:09.093077: step: 124/470, loss: 0.08038660883903503 2023-01-22 20:12:09.799005: step: 126/470, loss: 0.003461079904809594 2023-01-22 20:12:10.575638: step: 128/470, loss: 0.012931152246892452 2023-01-22 20:12:11.301823: step: 130/470, loss: 0.04952579736709595 2023-01-22 20:12:12.052466: step: 132/470, loss: 0.5545482635498047 2023-01-22 20:12:12.776186: step: 134/470, loss: 0.009975926019251347 2023-01-22 20:12:13.523944: step: 136/470, loss: 0.0052613853476941586 2023-01-22 20:12:14.403931: step: 138/470, loss: 0.005113921128213406 2023-01-22 20:12:15.108115: step: 140/470, loss: 0.062149327248334885 2023-01-22 20:12:15.885529: step: 142/470, loss: 0.18093203008174896 2023-01-22 20:12:16.627866: step: 144/470, loss: 0.021489135921001434 2023-01-22 20:12:17.307432: step: 146/470, loss: 0.07839653640985489 2023-01-22 20:12:18.120315: step: 148/470, loss: 0.04690365120768547 2023-01-22 20:12:18.907421: step: 150/470, loss: 0.03063216619193554 2023-01-22 20:12:19.616536: step: 152/470, loss: 0.03267190232872963 2023-01-22 20:12:20.315744: step: 154/470, loss: 0.035660747438669205 2023-01-22 20:12:21.017820: step: 156/470, loss: 0.09285139292478561 2023-01-22 20:12:21.706403: step: 158/470, loss: 0.0034859278239309788 2023-01-22 20:12:22.391174: step: 160/470, loss: 0.018955281004309654 2023-01-22 20:12:23.229359: step: 162/470, loss: 0.0037439637817442417 2023-01-22 20:12:23.916778: step: 164/470, loss: 0.0017843381501734257 2023-01-22 20:12:24.687437: step: 166/470, loss: 0.04432709515094757 2023-01-22 20:12:25.480901: step: 168/470, loss: 0.04893715679645538 2023-01-22 20:12:26.163674: step: 170/470, loss: 0.0067788949236273766 2023-01-22 20:12:26.893920: step: 172/470, loss: 0.03473373129963875 2023-01-22 20:12:27.610369: step: 174/470, loss: 0.018431130796670914 2023-01-22 20:12:28.297758: step: 176/470, loss: 0.36892861127853394 2023-01-22 20:12:29.013164: step: 178/470, loss: 0.0027816283982247114 2023-01-22 20:12:29.810061: step: 180/470, loss: 0.15821048617362976 2023-01-22 20:12:30.567992: step: 182/470, loss: 0.6537167429924011 2023-01-22 20:12:31.268194: step: 184/470, loss: 0.0012202489888295531 2023-01-22 20:12:31.986104: step: 186/470, loss: 0.011547365225851536 2023-01-22 20:12:32.712064: step: 188/470, loss: 0.03449565917253494 2023-01-22 20:12:33.389835: step: 190/470, loss: 0.007175063248723745 2023-01-22 20:12:34.123944: step: 192/470, loss: 0.05448294058442116 2023-01-22 20:12:34.862353: step: 194/470, loss: 0.013299376703798771 2023-01-22 20:12:35.670317: step: 196/470, loss: 0.02153635025024414 2023-01-22 20:12:36.521238: step: 198/470, loss: 0.06576363742351532 2023-01-22 20:12:37.303987: step: 200/470, loss: 0.028407655656337738 2023-01-22 20:12:38.047769: step: 202/470, loss: 0.021715879440307617 2023-01-22 20:12:38.840583: step: 204/470, loss: 0.009564381092786789 2023-01-22 20:12:39.710777: step: 206/470, loss: 0.010834124870598316 2023-01-22 20:12:40.420121: step: 208/470, loss: 0.002131436485797167 2023-01-22 20:12:41.188964: step: 210/470, loss: 0.04402882605791092 2023-01-22 20:12:41.958534: step: 212/470, loss: 0.045613422989845276 2023-01-22 20:12:42.654138: step: 214/470, loss: 0.12864279747009277 2023-01-22 20:12:43.342507: step: 216/470, loss: 0.056581586599349976 2023-01-22 20:12:44.083947: step: 218/470, loss: 0.022101709619164467 2023-01-22 20:12:44.867628: step: 220/470, loss: 0.009549708105623722 2023-01-22 20:12:45.581456: step: 222/470, loss: 0.01309411134570837 2023-01-22 20:12:46.302634: step: 224/470, loss: 0.0033089620992541313 2023-01-22 20:12:46.991932: step: 226/470, loss: 0.023617535829544067 2023-01-22 20:12:47.704041: step: 228/470, loss: 0.00651435274630785 2023-01-22 20:12:48.449801: step: 230/470, loss: 0.004386777523905039 2023-01-22 20:12:49.129186: step: 232/470, loss: 0.7032762169837952 2023-01-22 20:12:49.981780: step: 234/470, loss: 0.04321502149105072 2023-01-22 20:12:50.797901: step: 236/470, loss: 0.008020198903977871 2023-01-22 20:12:51.482836: step: 238/470, loss: 0.016117895022034645 2023-01-22 20:12:52.160137: step: 240/470, loss: 0.0072062062099576 2023-01-22 20:12:52.901145: step: 242/470, loss: 0.040230728685855865 2023-01-22 20:12:53.632126: step: 244/470, loss: 0.021644841879606247 2023-01-22 20:12:54.388020: step: 246/470, loss: 0.013207136653363705 2023-01-22 20:12:55.149336: step: 248/470, loss: 0.0457160547375679 2023-01-22 20:12:55.855981: step: 250/470, loss: 0.03336277976632118 2023-01-22 20:12:56.558834: step: 252/470, loss: 0.0007846613880246878 2023-01-22 20:12:57.255352: step: 254/470, loss: 0.016548141837120056 2023-01-22 20:12:58.016252: step: 256/470, loss: 0.029072560369968414 2023-01-22 20:12:58.726588: step: 258/470, loss: 0.023857641965150833 2023-01-22 20:12:59.413085: step: 260/470, loss: 0.016008907929062843 2023-01-22 20:13:00.125067: step: 262/470, loss: 0.005333620123565197 2023-01-22 20:13:00.927453: step: 264/470, loss: 0.025424938648939133 2023-01-22 20:13:01.620612: step: 266/470, loss: 0.008555452339351177 2023-01-22 20:13:02.313859: step: 268/470, loss: 0.13639460504055023 2023-01-22 20:13:03.099654: step: 270/470, loss: 0.06864644587039948 2023-01-22 20:13:03.783477: step: 272/470, loss: 0.00018786423606798053 2023-01-22 20:13:04.547932: step: 274/470, loss: 0.13025711476802826 2023-01-22 20:13:05.252888: step: 276/470, loss: 0.1863914430141449 2023-01-22 20:13:05.973215: step: 278/470, loss: 0.15763676166534424 2023-01-22 20:13:06.673577: step: 280/470, loss: 0.017980456352233887 2023-01-22 20:13:07.426599: step: 282/470, loss: 0.04068325459957123 2023-01-22 20:13:08.178287: step: 284/470, loss: 0.008834258653223515 2023-01-22 20:13:08.925897: step: 286/470, loss: 0.09366513043642044 2023-01-22 20:13:09.608076: step: 288/470, loss: 0.0012397804530337453 2023-01-22 20:13:10.441078: step: 290/470, loss: 0.09330645203590393 2023-01-22 20:13:11.119268: step: 292/470, loss: 0.0060478076338768005 2023-01-22 20:13:11.850341: step: 294/470, loss: 0.01826561614871025 2023-01-22 20:13:12.671361: step: 296/470, loss: 0.016506120562553406 2023-01-22 20:13:13.329689: step: 298/470, loss: 0.018143299967050552 2023-01-22 20:13:14.051587: step: 300/470, loss: 0.002731665037572384 2023-01-22 20:13:14.803284: step: 302/470, loss: 0.042497992515563965 2023-01-22 20:13:15.495280: step: 304/470, loss: 0.0025194089394062757 2023-01-22 20:13:16.207206: step: 306/470, loss: 0.035188719630241394 2023-01-22 20:13:16.915723: step: 308/470, loss: 0.004296632017940283 2023-01-22 20:13:17.639036: step: 310/470, loss: 0.001335528795607388 2023-01-22 20:13:18.411102: step: 312/470, loss: 0.039289023727178574 2023-01-22 20:13:19.183761: step: 314/470, loss: 0.01676071807742119 2023-01-22 20:13:19.972111: step: 316/470, loss: 0.005507394205778837 2023-01-22 20:13:20.639123: step: 318/470, loss: 0.0006347045418806374 2023-01-22 20:13:21.272914: step: 320/470, loss: 0.0023768669925630093 2023-01-22 20:13:21.959300: step: 322/470, loss: 0.02550424449145794 2023-01-22 20:13:22.579730: step: 324/470, loss: 0.013288677670061588 2023-01-22 20:13:23.243931: step: 326/470, loss: 0.00595318665727973 2023-01-22 20:13:24.041973: step: 328/470, loss: 0.09272654354572296 2023-01-22 20:13:24.795486: step: 330/470, loss: 0.007605270016938448 2023-01-22 20:13:25.508165: step: 332/470, loss: 0.010236957110464573 2023-01-22 20:13:26.199949: step: 334/470, loss: 0.007872911170125008 2023-01-22 20:13:26.910476: step: 336/470, loss: 0.0018507775384932756 2023-01-22 20:13:27.666893: step: 338/470, loss: 0.028767062351107597 2023-01-22 20:13:28.391655: step: 340/470, loss: 0.01310647651553154 2023-01-22 20:13:29.165122: step: 342/470, loss: 0.03576286509633064 2023-01-22 20:13:29.894778: step: 344/470, loss: 0.01042783074080944 2023-01-22 20:13:30.605416: step: 346/470, loss: 0.4090971052646637 2023-01-22 20:13:31.552642: step: 348/470, loss: 0.006796136498451233 2023-01-22 20:13:32.282265: step: 350/470, loss: 0.039724674075841904 2023-01-22 20:13:33.042130: step: 352/470, loss: 0.010841785930097103 2023-01-22 20:13:33.816854: step: 354/470, loss: 0.03900299593806267 2023-01-22 20:13:34.634997: step: 356/470, loss: 0.048096682876348495 2023-01-22 20:13:35.343962: step: 358/470, loss: 0.0033374207559973 2023-01-22 20:13:36.068010: step: 360/470, loss: 0.08358647674322128 2023-01-22 20:13:36.831820: step: 362/470, loss: 0.2687551975250244 2023-01-22 20:13:37.563349: step: 364/470, loss: 0.01693040318787098 2023-01-22 20:13:38.270734: step: 366/470, loss: 0.005727311596274376 2023-01-22 20:13:38.969107: step: 368/470, loss: 0.0020413741003721952 2023-01-22 20:13:39.737131: step: 370/470, loss: 0.021475672721862793 2023-01-22 20:13:40.513297: step: 372/470, loss: 0.19835321605205536 2023-01-22 20:13:41.203799: step: 374/470, loss: 0.01151078287512064 2023-01-22 20:13:42.081792: step: 376/470, loss: 0.015609940513968468 2023-01-22 20:13:42.812461: step: 378/470, loss: 0.04101203382015228 2023-01-22 20:13:43.536886: step: 380/470, loss: 0.006876502186059952 2023-01-22 20:13:44.368200: step: 382/470, loss: 1.5188071727752686 2023-01-22 20:13:45.095524: step: 384/470, loss: 0.03681845963001251 2023-01-22 20:13:45.800482: step: 386/470, loss: 0.05127471312880516 2023-01-22 20:13:46.520747: step: 388/470, loss: 0.018120506778359413 2023-01-22 20:13:47.241003: step: 390/470, loss: 0.01568225957453251 2023-01-22 20:13:47.898515: step: 392/470, loss: 0.058441437780857086 2023-01-22 20:13:48.729918: step: 394/470, loss: 0.013389245606958866 2023-01-22 20:13:49.498531: step: 396/470, loss: 0.0006397226825356483 2023-01-22 20:13:50.259094: step: 398/470, loss: 0.026342155411839485 2023-01-22 20:13:51.072317: step: 400/470, loss: 0.044728025794029236 2023-01-22 20:13:51.779924: step: 402/470, loss: 0.008601291105151176 2023-01-22 20:13:52.511881: step: 404/470, loss: 0.04410114884376526 2023-01-22 20:13:53.315164: step: 406/470, loss: 0.006642171647399664 2023-01-22 20:13:54.129057: step: 408/470, loss: 0.0413130447268486 2023-01-22 20:13:54.894328: step: 410/470, loss: 0.04338083043694496 2023-01-22 20:13:55.559679: step: 412/470, loss: 0.006818010471761227 2023-01-22 20:13:56.386692: step: 414/470, loss: 0.03733073174953461 2023-01-22 20:13:57.153521: step: 416/470, loss: 0.1958683431148529 2023-01-22 20:13:57.935962: step: 418/470, loss: 0.0009550434770062566 2023-01-22 20:13:58.694433: step: 420/470, loss: 0.020624913275241852 2023-01-22 20:13:59.333018: step: 422/470, loss: 0.004369684495031834 2023-01-22 20:14:00.042846: step: 424/470, loss: 0.029023706912994385 2023-01-22 20:14:00.819095: step: 426/470, loss: 0.03372396156191826 2023-01-22 20:14:01.558559: step: 428/470, loss: 0.1394890546798706 2023-01-22 20:14:02.254985: step: 430/470, loss: 0.03233359009027481 2023-01-22 20:14:03.128324: step: 432/470, loss: 0.019605809822678566 2023-01-22 20:14:03.936758: step: 434/470, loss: 0.02610873058438301 2023-01-22 20:14:04.726614: step: 436/470, loss: 0.04683273285627365 2023-01-22 20:14:05.535354: step: 438/470, loss: 0.049369730055332184 2023-01-22 20:14:06.226760: step: 440/470, loss: 0.0025972574949264526 2023-01-22 20:14:06.954903: step: 442/470, loss: 0.011458461172878742 2023-01-22 20:14:07.721596: step: 444/470, loss: 0.008207599632441998 2023-01-22 20:14:08.556688: step: 446/470, loss: 0.0014893775805830956 2023-01-22 20:14:09.253716: step: 448/470, loss: 0.0214060191065073 2023-01-22 20:14:09.951760: step: 450/470, loss: 0.03423811122775078 2023-01-22 20:14:10.672622: step: 452/470, loss: 0.023825276643037796 2023-01-22 20:14:11.324349: step: 454/470, loss: 0.00232740119099617 2023-01-22 20:14:12.079832: step: 456/470, loss: 0.06115736439824104 2023-01-22 20:14:12.927536: step: 458/470, loss: 0.3774428963661194 2023-01-22 20:14:13.660164: step: 460/470, loss: 0.02544984221458435 2023-01-22 20:14:14.373126: step: 462/470, loss: 0.0458686426281929 2023-01-22 20:14:15.085319: step: 464/470, loss: 0.0019502779468894005 2023-01-22 20:14:15.681428: step: 466/470, loss: 0.0001484197418903932 2023-01-22 20:14:16.363737: step: 468/470, loss: 0.019259247928857803 2023-01-22 20:14:17.160072: step: 470/470, loss: 0.04199659079313278 2023-01-22 20:14:17.932992: step: 472/470, loss: 0.0764729380607605 2023-01-22 20:14:18.656725: step: 474/470, loss: 0.012650109827518463 2023-01-22 20:14:19.399165: step: 476/470, loss: 0.19659210741519928 2023-01-22 20:14:20.081828: step: 478/470, loss: 0.016243983060121536 2023-01-22 20:14:20.838756: step: 480/470, loss: 0.7099974751472473 2023-01-22 20:14:21.567497: step: 482/470, loss: 0.008293403312563896 2023-01-22 20:14:22.296865: step: 484/470, loss: 0.024716457352042198 2023-01-22 20:14:23.052426: step: 486/470, loss: 0.005962827242910862 2023-01-22 20:14:23.755885: step: 488/470, loss: 0.0011397538473829627 2023-01-22 20:14:24.560665: step: 490/470, loss: 0.007134813815355301 2023-01-22 20:14:25.320665: step: 492/470, loss: 0.1319524198770523 2023-01-22 20:14:26.037792: step: 494/470, loss: 0.5560120344161987 2023-01-22 20:14:26.714125: step: 496/470, loss: 0.058050792664289474 2023-01-22 20:14:27.413801: step: 498/470, loss: 0.025943944230675697 2023-01-22 20:14:28.149540: step: 500/470, loss: 0.031523432582616806 2023-01-22 20:14:28.886193: step: 502/470, loss: 0.008412609808146954 2023-01-22 20:14:29.577507: step: 504/470, loss: 0.009613175876438618 2023-01-22 20:14:30.336318: step: 506/470, loss: 0.01078837364912033 2023-01-22 20:14:31.108596: step: 508/470, loss: 0.0254935584962368 2023-01-22 20:14:31.785382: step: 510/470, loss: 0.009062767960131168 2023-01-22 20:14:32.482385: step: 512/470, loss: 0.048958227038383484 2023-01-22 20:14:33.201503: step: 514/470, loss: 0.015435706824064255 2023-01-22 20:14:33.956369: step: 516/470, loss: 0.017597481608390808 2023-01-22 20:14:34.644057: step: 518/470, loss: 0.01942635513842106 2023-01-22 20:14:35.324531: step: 520/470, loss: 0.007770019117742777 2023-01-22 20:14:36.026365: step: 522/470, loss: 0.0021495139226317406 2023-01-22 20:14:36.823628: step: 524/470, loss: 0.034324824810028076 2023-01-22 20:14:37.604251: step: 526/470, loss: 0.4315849840641022 2023-01-22 20:14:38.322166: step: 528/470, loss: 0.007081069517880678 2023-01-22 20:14:38.986919: step: 530/470, loss: 0.02178391069173813 2023-01-22 20:14:39.783625: step: 532/470, loss: 0.01920427940785885 2023-01-22 20:14:40.552837: step: 534/470, loss: 0.005591890309005976 2023-01-22 20:14:41.282500: step: 536/470, loss: 0.011183743365108967 2023-01-22 20:14:41.981000: step: 538/470, loss: 0.016883065924048424 2023-01-22 20:14:42.673167: step: 540/470, loss: 0.002876394661143422 2023-01-22 20:14:43.532837: step: 542/470, loss: 0.007916656322777271 2023-01-22 20:14:44.300588: step: 544/470, loss: 0.023198112845420837 2023-01-22 20:14:45.053076: step: 546/470, loss: 0.004471739754080772 2023-01-22 20:14:45.740020: step: 548/470, loss: 0.14783816039562225 2023-01-22 20:14:46.400180: step: 550/470, loss: 0.004815774969756603 2023-01-22 20:14:47.097185: step: 552/470, loss: 0.01722968928515911 2023-01-22 20:14:47.811776: step: 554/470, loss: 0.30158334970474243 2023-01-22 20:14:48.545659: step: 556/470, loss: 0.055766280740499496 2023-01-22 20:14:49.241031: step: 558/470, loss: 0.033450160175561905 2023-01-22 20:14:49.948533: step: 560/470, loss: 0.03725216165184975 2023-01-22 20:14:50.784170: step: 562/470, loss: 0.0021749266888946295 2023-01-22 20:14:51.574674: step: 564/470, loss: 0.006228404585272074 2023-01-22 20:14:52.366340: step: 566/470, loss: 0.003034188412129879 2023-01-22 20:14:53.084869: step: 568/470, loss: 0.0010111165465787053 2023-01-22 20:14:53.849018: step: 570/470, loss: 0.7480594515800476 2023-01-22 20:14:54.777294: step: 572/470, loss: 0.15064027905464172 2023-01-22 20:14:55.470627: step: 574/470, loss: 0.0038994362112134695 2023-01-22 20:14:56.150442: step: 576/470, loss: 0.00042104304884560406 2023-01-22 20:14:56.878089: step: 578/470, loss: 0.1475483477115631 2023-01-22 20:14:57.592221: step: 580/470, loss: 0.21218250691890717 2023-01-22 20:14:58.272323: step: 582/470, loss: 0.009204575791954994 2023-01-22 20:14:59.047607: step: 584/470, loss: 0.22064003348350525 2023-01-22 20:14:59.747675: step: 586/470, loss: 0.017769113183021545 2023-01-22 20:15:00.479247: step: 588/470, loss: 0.006627894006669521 2023-01-22 20:15:01.151779: step: 590/470, loss: 0.13599713146686554 2023-01-22 20:15:01.898786: step: 592/470, loss: 0.042819686233997345 2023-01-22 20:15:02.622052: step: 594/470, loss: 0.011788510717451572 2023-01-22 20:15:03.412077: step: 596/470, loss: 0.01742238737642765 2023-01-22 20:15:04.212176: step: 598/470, loss: 0.6136803030967712 2023-01-22 20:15:04.864413: step: 600/470, loss: 0.01876203715801239 2023-01-22 20:15:05.592829: step: 602/470, loss: 0.03775479272007942 2023-01-22 20:15:06.279870: step: 604/470, loss: 0.02878272905945778 2023-01-22 20:15:07.048067: step: 606/470, loss: 0.0009686170378699899 2023-01-22 20:15:07.852630: step: 608/470, loss: 0.065780408680439 2023-01-22 20:15:08.654897: step: 610/470, loss: 0.3254455327987671 2023-01-22 20:15:09.412413: step: 612/470, loss: 0.011034596711397171 2023-01-22 20:15:10.180439: step: 614/470, loss: 0.032889679074287415 2023-01-22 20:15:10.993911: step: 616/470, loss: 0.025389349088072777 2023-01-22 20:15:11.701803: step: 618/470, loss: 0.06543834507465363 2023-01-22 20:15:12.496173: step: 620/470, loss: 0.0328560434281826 2023-01-22 20:15:13.218572: step: 622/470, loss: 0.030454453080892563 2023-01-22 20:15:13.924729: step: 624/470, loss: 0.0002651447430253029 2023-01-22 20:15:14.704063: step: 626/470, loss: 0.008108319714665413 2023-01-22 20:15:15.403041: step: 628/470, loss: 0.028416186571121216 2023-01-22 20:15:16.122693: step: 630/470, loss: 0.014787995256483555 2023-01-22 20:15:16.880125: step: 632/470, loss: 0.4447817802429199 2023-01-22 20:15:17.616578: step: 634/470, loss: 0.00016679373220540583 2023-01-22 20:15:18.371698: step: 636/470, loss: 0.008493703790009022 2023-01-22 20:15:19.105052: step: 638/470, loss: 0.051174718886613846 2023-01-22 20:15:19.915268: step: 640/470, loss: 0.02636360004544258 2023-01-22 20:15:20.628207: step: 642/470, loss: 0.012426851317286491 2023-01-22 20:15:21.419908: step: 644/470, loss: 0.004039624240249395 2023-01-22 20:15:22.121450: step: 646/470, loss: 0.09385068714618683 2023-01-22 20:15:22.902583: step: 648/470, loss: 0.02949446067214012 2023-01-22 20:15:23.680142: step: 650/470, loss: 0.04192233458161354 2023-01-22 20:15:24.387453: step: 652/470, loss: 0.020343631505966187 2023-01-22 20:15:25.170259: step: 654/470, loss: 0.05575917288661003 2023-01-22 20:15:25.987800: step: 656/470, loss: 0.01872909814119339 2023-01-22 20:15:26.736796: step: 658/470, loss: 0.00723220594227314 2023-01-22 20:15:27.518398: step: 660/470, loss: 0.014894779771566391 2023-01-22 20:15:28.330707: step: 662/470, loss: 0.00329477246850729 2023-01-22 20:15:29.017556: step: 664/470, loss: 0.030976993963122368 2023-01-22 20:15:29.734725: step: 666/470, loss: 0.016939232125878334 2023-01-22 20:15:30.434963: step: 668/470, loss: 0.005721811670809984 2023-01-22 20:15:31.203490: step: 670/470, loss: 0.06608849763870239 2023-01-22 20:15:31.959762: step: 672/470, loss: 0.05654697120189667 2023-01-22 20:15:32.694195: step: 674/470, loss: 0.028054917231202126 2023-01-22 20:15:33.410060: step: 676/470, loss: 0.007750812452286482 2023-01-22 20:15:34.116938: step: 678/470, loss: 0.02219163253903389 2023-01-22 20:15:34.882378: step: 680/470, loss: 0.037425290793180466 2023-01-22 20:15:35.542982: step: 682/470, loss: 0.03459089994430542 2023-01-22 20:15:36.310711: step: 684/470, loss: 0.009903617203235626 2023-01-22 20:15:37.015772: step: 686/470, loss: 0.008926295675337315 2023-01-22 20:15:37.683209: step: 688/470, loss: 0.014270035549998283 2023-01-22 20:15:38.414326: step: 690/470, loss: 0.08540667593479156 2023-01-22 20:15:39.125673: step: 692/470, loss: 0.04163281247019768 2023-01-22 20:15:39.838315: step: 694/470, loss: 0.01574213244020939 2023-01-22 20:15:40.713724: step: 696/470, loss: 0.0054738158360123634 2023-01-22 20:15:41.469887: step: 698/470, loss: 0.20161749422550201 2023-01-22 20:15:42.181811: step: 700/470, loss: 0.4112975597381592 2023-01-22 20:15:42.884787: step: 702/470, loss: 0.00508745014667511 2023-01-22 20:15:43.549662: step: 704/470, loss: 0.00045277041499502957 2023-01-22 20:15:44.292047: step: 706/470, loss: 0.14759576320648193 2023-01-22 20:15:45.085034: step: 708/470, loss: 0.041470929980278015 2023-01-22 20:15:45.881819: step: 710/470, loss: 0.0423739068210125 2023-01-22 20:15:46.606698: step: 712/470, loss: 0.6495215892791748 2023-01-22 20:15:47.408944: step: 714/470, loss: 0.03245260939002037 2023-01-22 20:15:48.028062: step: 716/470, loss: 0.027776306495070457 2023-01-22 20:15:48.798719: step: 718/470, loss: 0.01630890928208828 2023-01-22 20:15:49.556484: step: 720/470, loss: 0.04993465170264244 2023-01-22 20:15:50.344309: step: 722/470, loss: 0.030327895656228065 2023-01-22 20:15:51.016708: step: 724/470, loss: 0.028972510248422623 2023-01-22 20:15:51.755366: step: 726/470, loss: 0.05604696646332741 2023-01-22 20:15:52.526652: step: 728/470, loss: 0.010281789116561413 2023-01-22 20:15:53.267410: step: 730/470, loss: 0.03586762771010399 2023-01-22 20:15:53.925200: step: 732/470, loss: 0.0005163901951164007 2023-01-22 20:15:54.630485: step: 734/470, loss: 0.01637759618461132 2023-01-22 20:15:55.277601: step: 736/470, loss: 0.022752603515982628 2023-01-22 20:15:56.057720: step: 738/470, loss: 0.015957066789269447 2023-01-22 20:15:56.724153: step: 740/470, loss: 0.013974886387586594 2023-01-22 20:15:57.408374: step: 742/470, loss: 0.007855596952140331 2023-01-22 20:15:58.140348: step: 744/470, loss: 0.01974407769739628 2023-01-22 20:15:58.888220: step: 746/470, loss: 0.019117096439003944 2023-01-22 20:15:59.628413: step: 748/470, loss: 0.03735620900988579 2023-01-22 20:16:00.342387: step: 750/470, loss: 0.08970566838979721 2023-01-22 20:16:01.034508: step: 752/470, loss: 0.018881360068917274 2023-01-22 20:16:01.746490: step: 754/470, loss: 0.001953059108927846 2023-01-22 20:16:02.562757: step: 756/470, loss: 0.691685140132904 2023-01-22 20:16:03.266392: step: 758/470, loss: 0.06533520668745041 2023-01-22 20:16:03.980813: step: 760/470, loss: 0.0035757829900830984 2023-01-22 20:16:04.668650: step: 762/470, loss: 0.00113035854883492 2023-01-22 20:16:05.384385: step: 764/470, loss: 0.02485186792910099 2023-01-22 20:16:06.098737: step: 766/470, loss: 0.004313065204769373 2023-01-22 20:16:06.789680: step: 768/470, loss: 0.016619572415947914 2023-01-22 20:16:07.598116: step: 770/470, loss: 0.004702294245362282 2023-01-22 20:16:08.391618: step: 772/470, loss: 0.054384443908929825 2023-01-22 20:16:09.124931: step: 774/470, loss: 0.00823363196104765 2023-01-22 20:16:09.905068: step: 776/470, loss: 0.0492500476539135 2023-01-22 20:16:10.637994: step: 778/470, loss: 0.0389709398150444 2023-01-22 20:16:11.384561: step: 780/470, loss: 0.03600706160068512 2023-01-22 20:16:12.044577: step: 782/470, loss: 0.007133893668651581 2023-01-22 20:16:12.747913: step: 784/470, loss: 0.045114368200302124 2023-01-22 20:16:13.583418: step: 786/470, loss: 0.016657711938023567 2023-01-22 20:16:14.363939: step: 788/470, loss: 0.025096865370869637 2023-01-22 20:16:15.103257: step: 790/470, loss: 1.0770021677017212 2023-01-22 20:16:15.875391: step: 792/470, loss: 0.9711056351661682 2023-01-22 20:16:16.547158: step: 794/470, loss: 0.014067924581468105 2023-01-22 20:16:17.251042: step: 796/470, loss: 0.04596575349569321 2023-01-22 20:16:18.093162: step: 798/470, loss: 0.0024457420222461224 2023-01-22 20:16:18.909971: step: 800/470, loss: 0.05553967505693436 2023-01-22 20:16:19.651065: step: 802/470, loss: 0.02744687721133232 2023-01-22 20:16:20.313267: step: 804/470, loss: 0.006461080629378557 2023-01-22 20:16:21.019403: step: 806/470, loss: 0.016188278794288635 2023-01-22 20:16:21.777056: step: 808/470, loss: 0.044517967849969864 2023-01-22 20:16:22.522383: step: 810/470, loss: 0.00267368508502841 2023-01-22 20:16:23.235287: step: 812/470, loss: 0.031701844185590744 2023-01-22 20:16:23.970538: step: 814/470, loss: 0.0011221464956179261 2023-01-22 20:16:24.712389: step: 816/470, loss: 0.013465669006109238 2023-01-22 20:16:25.444113: step: 818/470, loss: 0.2107423096895218 2023-01-22 20:16:26.204157: step: 820/470, loss: 0.028793897479772568 2023-01-22 20:16:27.028299: step: 822/470, loss: 0.01618114300072193 2023-01-22 20:16:27.750421: step: 824/470, loss: 0.09623509645462036 2023-01-22 20:16:28.484832: step: 826/470, loss: 0.041487544775009155 2023-01-22 20:16:29.192298: step: 828/470, loss: 0.011326837353408337 2023-01-22 20:16:29.879739: step: 830/470, loss: 0.045046452432870865 2023-01-22 20:16:30.686381: step: 832/470, loss: 0.10204900801181793 2023-01-22 20:16:31.511477: step: 834/470, loss: 0.07618521898984909 2023-01-22 20:16:32.313933: step: 836/470, loss: 0.08233476430177689 2023-01-22 20:16:33.039761: step: 838/470, loss: 0.06587542593479156 2023-01-22 20:16:33.714902: step: 840/470, loss: 0.01595766469836235 2023-01-22 20:16:34.410897: step: 842/470, loss: 0.0016488569090142846 2023-01-22 20:16:35.119151: step: 844/470, loss: 0.006250299047678709 2023-01-22 20:16:35.910476: step: 846/470, loss: 0.10261400043964386 2023-01-22 20:16:36.595836: step: 848/470, loss: 0.06171036139130592 2023-01-22 20:16:37.283913: step: 850/470, loss: 0.029668550938367844 2023-01-22 20:16:38.106672: step: 852/470, loss: 0.003090712008997798 2023-01-22 20:16:38.837773: step: 854/470, loss: 0.00039391950122080743 2023-01-22 20:16:39.540667: step: 856/470, loss: 0.0029829915147274733 2023-01-22 20:16:40.208784: step: 858/470, loss: 0.06782427430152893 2023-01-22 20:16:40.854344: step: 860/470, loss: 0.037175048142671585 2023-01-22 20:16:41.549364: step: 862/470, loss: 0.012290716171264648 2023-01-22 20:16:42.233396: step: 864/470, loss: 0.004582709167152643 2023-01-22 20:16:42.973445: step: 866/470, loss: 0.06253193318843842 2023-01-22 20:16:43.710263: step: 868/470, loss: 0.26506075263023376 2023-01-22 20:16:44.388441: step: 870/470, loss: 0.036856453865766525 2023-01-22 20:16:45.050600: step: 872/470, loss: 0.06115124374628067 2023-01-22 20:16:45.764528: step: 874/470, loss: 0.0035287141799926758 2023-01-22 20:16:46.417983: step: 876/470, loss: 0.010850594379007816 2023-01-22 20:16:47.177219: step: 878/470, loss: 0.022742489352822304 2023-01-22 20:16:47.923383: step: 880/470, loss: 0.1650201976299286 2023-01-22 20:16:48.652139: step: 882/470, loss: 0.021147647872567177 2023-01-22 20:16:49.373567: step: 884/470, loss: 0.02876165509223938 2023-01-22 20:16:50.073547: step: 886/470, loss: 0.031704407185316086 2023-01-22 20:16:50.747228: step: 888/470, loss: 0.020638834685087204 2023-01-22 20:16:51.425200: step: 890/470, loss: 0.020739523693919182 2023-01-22 20:16:52.101556: step: 892/470, loss: 0.02357344888150692 2023-01-22 20:16:52.803310: step: 894/470, loss: 0.009717467240989208 2023-01-22 20:16:53.468659: step: 896/470, loss: 0.033283960074186325 2023-01-22 20:16:54.259208: step: 898/470, loss: 0.04801145941019058 2023-01-22 20:16:55.002487: step: 900/470, loss: 0.0027546961791813374 2023-01-22 20:16:55.748151: step: 902/470, loss: 0.0054783690720796585 2023-01-22 20:16:56.548852: step: 904/470, loss: 0.14740252494812012 2023-01-22 20:16:57.263359: step: 906/470, loss: 0.047735873609781265 2023-01-22 20:16:57.977381: step: 908/470, loss: 0.01004134863615036 2023-01-22 20:16:58.689772: step: 910/470, loss: 0.048433274030685425 2023-01-22 20:16:59.458724: step: 912/470, loss: 0.018374241888523102 2023-01-22 20:17:00.184557: step: 914/470, loss: 0.015799539163708687 2023-01-22 20:17:00.891309: step: 916/470, loss: 0.01117359846830368 2023-01-22 20:17:01.651752: step: 918/470, loss: 0.012997347861528397 2023-01-22 20:17:02.342046: step: 920/470, loss: 0.02212097868323326 2023-01-22 20:17:03.011255: step: 922/470, loss: 0.08863328397274017 2023-01-22 20:17:03.730893: step: 924/470, loss: 0.008121066726744175 2023-01-22 20:17:04.448959: step: 926/470, loss: 0.2342129349708557 2023-01-22 20:17:05.183895: step: 928/470, loss: 0.011628974229097366 2023-01-22 20:17:05.902692: step: 930/470, loss: 0.019393447786569595 2023-01-22 20:17:06.702596: step: 932/470, loss: 0.013421921990811825 2023-01-22 20:17:07.455388: step: 934/470, loss: 0.3163841962814331 2023-01-22 20:17:08.166801: step: 936/470, loss: 0.003866309067234397 2023-01-22 20:17:08.898109: step: 938/470, loss: 0.04852140694856644 2023-01-22 20:17:09.613644: step: 940/470, loss: 0.008729308843612671 2023-01-22 20:17:10.268601: step: 942/470, loss: 0.04352358356118202 ================================================== Loss: 0.072 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29753125662216573, 'r': 0.34156814090400434, 'f1': 0.31803252695478845}, 'combined': 0.23433975670352833, 'epoch': 26} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364348350561144, 'r': 0.3597264774830762, 'f1': 0.3476910191286238}, 'combined': 0.2421728491443151, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29081003289473684, 'r': 0.3443367372415859, 'f1': 0.31531791577118295}, 'combined': 0.23233951688402954, 'epoch': 26} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34448135984675676, 'r': 0.35640571461068293, 'f1': 0.35034210131862975}, 'combined': 0.24401937405277696, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2828576864535769, 'r': 0.3526328273244782, 'f1': 0.31391469594594595}, 'combined': 0.2313055654338549, 'epoch': 26} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.33027861948824616, 'r': 0.3683876909676592, 'f1': 0.34829381691487776}, 'combined': 0.24259270829891985, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20601851851851852, 'r': 0.31785714285714284, 'f1': 0.25}, 'combined': 0.16666666666666666, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3, 'r': 0.45652173913043476, 'f1': 0.3620689655172414}, 'combined': 0.1810344827586207, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3977272727272727, 'r': 0.3017241379310345, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 27 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 20:19:45.793782: step: 2/470, loss: 0.01683763414621353 2023-01-22 20:19:46.472094: step: 4/470, loss: 0.00944485329091549 2023-01-22 20:19:47.110081: step: 6/470, loss: 0.06548219174146652 2023-01-22 20:19:47.903506: step: 8/470, loss: 0.07034937292337418 2023-01-22 20:19:48.651936: step: 10/470, loss: 0.00018511965754441917 2023-01-22 20:19:49.378070: step: 12/470, loss: 0.007175577338784933 2023-01-22 20:19:50.030903: step: 14/470, loss: 0.0017143557779490948 2023-01-22 20:19:50.752193: step: 16/470, loss: 0.038774389773607254 2023-01-22 20:19:51.491799: step: 18/470, loss: 0.067210853099823 2023-01-22 20:19:52.289697: step: 20/470, loss: 0.01777871884405613 2023-01-22 20:19:52.957358: step: 22/470, loss: 0.00815779808908701 2023-01-22 20:19:53.563720: step: 24/470, loss: 0.00567712401971221 2023-01-22 20:19:54.274201: step: 26/470, loss: 0.022142581641674042 2023-01-22 20:19:55.026510: step: 28/470, loss: 0.009569667279720306 2023-01-22 20:19:55.705205: step: 30/470, loss: 0.0004666670865844935 2023-01-22 20:19:56.445032: step: 32/470, loss: 0.011350602842867374 2023-01-22 20:19:57.125304: step: 34/470, loss: 0.029375847429037094 2023-01-22 20:19:57.823287: step: 36/470, loss: 0.006464695557951927 2023-01-22 20:19:58.543394: step: 38/470, loss: 0.0230893325060606 2023-01-22 20:19:59.207913: step: 40/470, loss: 0.015446262434124947 2023-01-22 20:19:59.896127: step: 42/470, loss: 0.026244178414344788 2023-01-22 20:20:00.570572: step: 44/470, loss: 0.019167501479387283 2023-01-22 20:20:01.398300: step: 46/470, loss: 0.025599118322134018 2023-01-22 20:20:02.200440: step: 48/470, loss: 0.06443053483963013 2023-01-22 20:20:02.941821: step: 50/470, loss: 0.016018331050872803 2023-01-22 20:20:03.771583: step: 52/470, loss: 0.003692210651934147 2023-01-22 20:20:04.496918: step: 54/470, loss: 0.02155778743326664 2023-01-22 20:20:05.208037: step: 56/470, loss: 0.1240386813879013 2023-01-22 20:20:05.880351: step: 58/470, loss: 0.013327186927199364 2023-01-22 20:20:06.605688: step: 60/470, loss: 0.009797775186598301 2023-01-22 20:20:07.303259: step: 62/470, loss: 0.02393781952559948 2023-01-22 20:20:08.056210: step: 64/470, loss: 0.00024999401648528874 2023-01-22 20:20:08.765714: step: 66/470, loss: 0.014125204645097256 2023-01-22 20:20:09.561594: step: 68/470, loss: 0.012959994375705719 2023-01-22 20:20:10.290107: step: 70/470, loss: 0.012555788271129131 2023-01-22 20:20:11.016609: step: 72/470, loss: 0.010418666526675224 2023-01-22 20:20:11.699796: step: 74/470, loss: 0.025468451902270317 2023-01-22 20:20:12.414934: step: 76/470, loss: 0.005443628877401352 2023-01-22 20:20:13.141691: step: 78/470, loss: 0.017685700207948685 2023-01-22 20:20:13.899768: step: 80/470, loss: 0.04424380883574486 2023-01-22 20:20:14.644920: step: 82/470, loss: 0.037951018661260605 2023-01-22 20:20:15.368285: step: 84/470, loss: 0.009052631445229053 2023-01-22 20:20:16.114115: step: 86/470, loss: 0.0344005785882473 2023-01-22 20:20:16.812965: step: 88/470, loss: 0.007054035551846027 2023-01-22 20:20:17.507092: step: 90/470, loss: 0.019252652302384377 2023-01-22 20:20:18.378508: step: 92/470, loss: 0.008761339820921421 2023-01-22 20:20:19.248464: step: 94/470, loss: 0.02975519932806492 2023-01-22 20:20:20.024989: step: 96/470, loss: 0.006972640287131071 2023-01-22 20:20:20.774499: step: 98/470, loss: 0.03771801292896271 2023-01-22 20:20:21.461673: step: 100/470, loss: 0.017774201929569244 2023-01-22 20:20:22.200159: step: 102/470, loss: 0.03576158732175827 2023-01-22 20:20:23.010008: step: 104/470, loss: 0.056759800761938095 2023-01-22 20:20:23.764882: step: 106/470, loss: 0.061209507286548615 2023-01-22 20:20:24.413198: step: 108/470, loss: 0.0349019393324852 2023-01-22 20:20:25.135701: step: 110/470, loss: 0.014727793633937836 2023-01-22 20:20:25.892081: step: 112/470, loss: 0.003246006788685918 2023-01-22 20:20:26.712387: step: 114/470, loss: 0.08207906782627106 2023-01-22 20:20:27.497146: step: 116/470, loss: 0.016705762594938278 2023-01-22 20:20:28.269486: step: 118/470, loss: 0.012947305105626583 2023-01-22 20:20:28.992294: step: 120/470, loss: 0.034973785281181335 2023-01-22 20:20:29.715819: step: 122/470, loss: 0.007712268736213446 2023-01-22 20:20:30.431937: step: 124/470, loss: 0.020008675754070282 2023-01-22 20:20:31.238967: step: 126/470, loss: 0.023369409143924713 2023-01-22 20:20:31.970216: step: 128/470, loss: 0.014888121746480465 2023-01-22 20:20:32.679323: step: 130/470, loss: 0.05595388263463974 2023-01-22 20:20:33.438012: step: 132/470, loss: 0.01942775398492813 2023-01-22 20:20:34.180108: step: 134/470, loss: 0.02803257293999195 2023-01-22 20:20:34.856595: step: 136/470, loss: 0.023012829944491386 2023-01-22 20:20:35.518792: step: 138/470, loss: 0.0042619104497134686 2023-01-22 20:20:36.404122: step: 140/470, loss: 0.007180250249803066 2023-01-22 20:20:37.091505: step: 142/470, loss: 0.019708437845110893 2023-01-22 20:20:37.805773: step: 144/470, loss: 0.0061777918599545956 2023-01-22 20:20:38.570502: step: 146/470, loss: 0.0012747780419886112 2023-01-22 20:20:39.338173: step: 148/470, loss: 0.022044524550437927 2023-01-22 20:20:40.024116: step: 150/470, loss: 0.020499933511018753 2023-01-22 20:20:40.758542: step: 152/470, loss: 0.0026768154930323362 2023-01-22 20:20:41.660782: step: 154/470, loss: 0.010907319374382496 2023-01-22 20:20:42.367482: step: 156/470, loss: 0.005641602911055088 2023-01-22 20:20:43.088107: step: 158/470, loss: 0.013856947422027588 2023-01-22 20:20:43.844930: step: 160/470, loss: 0.010804500430822372 2023-01-22 20:20:44.540340: step: 162/470, loss: 0.015558190643787384 2023-01-22 20:20:45.295181: step: 164/470, loss: 0.3554113507270813 2023-01-22 20:20:45.989897: step: 166/470, loss: 0.12705135345458984 2023-01-22 20:20:46.693091: step: 168/470, loss: 0.02279657870531082 2023-01-22 20:20:47.402898: step: 170/470, loss: 0.07756506651639938 2023-01-22 20:20:48.090264: step: 172/470, loss: 0.012663285247981548 2023-01-22 20:20:48.803341: step: 174/470, loss: 8.404543768847361e-05 2023-01-22 20:20:49.571814: step: 176/470, loss: 0.02814382128417492 2023-01-22 20:20:50.278029: step: 178/470, loss: 0.02191387489438057 2023-01-22 20:20:51.006249: step: 180/470, loss: 0.011857813224196434 2023-01-22 20:20:51.730531: step: 182/470, loss: 0.12916842103004456 2023-01-22 20:20:52.483375: step: 184/470, loss: 0.07631305605173111 2023-01-22 20:20:53.191584: step: 186/470, loss: 0.028871390968561172 2023-01-22 20:20:53.901025: step: 188/470, loss: 0.033751778304576874 2023-01-22 20:20:54.717882: step: 190/470, loss: 0.13906638324260712 2023-01-22 20:20:55.455264: step: 192/470, loss: 0.03259306401014328 2023-01-22 20:20:56.189474: step: 194/470, loss: 0.0015212270664051175 2023-01-22 20:20:56.915933: step: 196/470, loss: 0.02239634282886982 2023-01-22 20:20:57.667470: step: 198/470, loss: 0.06503739953041077 2023-01-22 20:20:58.358218: step: 200/470, loss: 0.4716545343399048 2023-01-22 20:20:59.067109: step: 202/470, loss: 0.02160857617855072 2023-01-22 20:20:59.769296: step: 204/470, loss: 0.004003697074949741 2023-01-22 20:21:00.515374: step: 206/470, loss: 0.007205252069979906 2023-01-22 20:21:01.209337: step: 208/470, loss: 0.01853962056338787 2023-01-22 20:21:01.893053: step: 210/470, loss: 0.004126167390495539 2023-01-22 20:21:02.633431: step: 212/470, loss: 0.026516355574131012 2023-01-22 20:21:03.389289: step: 214/470, loss: 0.013195443898439407 2023-01-22 20:21:04.051924: step: 216/470, loss: 0.02729533053934574 2023-01-22 20:21:04.750192: step: 218/470, loss: 0.016501324251294136 2023-01-22 20:21:05.507739: step: 220/470, loss: 0.011663485318422318 2023-01-22 20:21:06.269085: step: 222/470, loss: 0.16120895743370056 2023-01-22 20:21:06.969196: step: 224/470, loss: 0.00041892650187946856 2023-01-22 20:21:07.710016: step: 226/470, loss: 0.032136738300323486 2023-01-22 20:21:08.467759: step: 228/470, loss: 0.023074127733707428 2023-01-22 20:21:09.178989: step: 230/470, loss: 0.02281912975013256 2023-01-22 20:21:09.934619: step: 232/470, loss: 0.012881905771791935 2023-01-22 20:21:10.679106: step: 234/470, loss: 0.01737522892653942 2023-01-22 20:21:11.442028: step: 236/470, loss: 0.32629257440567017 2023-01-22 20:21:12.183680: step: 238/470, loss: 0.015904588624835014 2023-01-22 20:21:12.985014: step: 240/470, loss: 0.007637233939021826 2023-01-22 20:21:13.636121: step: 242/470, loss: 0.030431710183620453 2023-01-22 20:21:14.352915: step: 244/470, loss: 0.028963802382349968 2023-01-22 20:21:15.109114: step: 246/470, loss: 0.0031187073327600956 2023-01-22 20:21:15.881655: step: 248/470, loss: 0.15134938061237335 2023-01-22 20:21:16.628470: step: 250/470, loss: 0.026609888300299644 2023-01-22 20:21:17.438182: step: 252/470, loss: 0.018683232367038727 2023-01-22 20:21:18.158199: step: 254/470, loss: 0.01902608573436737 2023-01-22 20:21:18.899268: step: 256/470, loss: 0.23490701615810394 2023-01-22 20:21:19.530275: step: 258/470, loss: 0.005870967172086239 2023-01-22 20:21:20.205449: step: 260/470, loss: 0.037582509219646454 2023-01-22 20:21:20.924242: step: 262/470, loss: 0.056500356644392014 2023-01-22 20:21:21.675691: step: 264/470, loss: 0.023657605051994324 2023-01-22 20:21:22.361074: step: 266/470, loss: 0.0007725696195848286 2023-01-22 20:21:23.041911: step: 268/470, loss: 0.08393879979848862 2023-01-22 20:21:23.755712: step: 270/470, loss: 0.02035318873822689 2023-01-22 20:21:24.403421: step: 272/470, loss: 0.003998577129095793 2023-01-22 20:21:25.146908: step: 274/470, loss: 0.008322400040924549 2023-01-22 20:21:25.827268: step: 276/470, loss: 0.0008807040867395699 2023-01-22 20:21:26.592270: step: 278/470, loss: 0.0010697413235902786 2023-01-22 20:21:27.274791: step: 280/470, loss: 0.00225759856402874 2023-01-22 20:21:28.018924: step: 282/470, loss: 0.014795668423175812 2023-01-22 20:21:28.805352: step: 284/470, loss: 0.029663510620594025 2023-01-22 20:21:29.534406: step: 286/470, loss: 0.05489639937877655 2023-01-22 20:21:30.323180: step: 288/470, loss: 0.025471646338701248 2023-01-22 20:21:31.113576: step: 290/470, loss: 0.015530110336840153 2023-01-22 20:21:32.022783: step: 292/470, loss: 0.011148087680339813 2023-01-22 20:21:32.817816: step: 294/470, loss: 0.023910433053970337 2023-01-22 20:21:33.568962: step: 296/470, loss: 0.013895057141780853 2023-01-22 20:21:34.200192: step: 298/470, loss: 0.0005625097546726465 2023-01-22 20:21:34.909710: step: 300/470, loss: 0.05426546931266785 2023-01-22 20:21:35.565319: step: 302/470, loss: 0.00862344354391098 2023-01-22 20:21:36.313401: step: 304/470, loss: 0.060839202255010605 2023-01-22 20:21:37.043557: step: 306/470, loss: 0.003655111650004983 2023-01-22 20:21:37.790511: step: 308/470, loss: 0.039200231432914734 2023-01-22 20:21:38.644234: step: 310/470, loss: 0.04813004285097122 2023-01-22 20:21:39.283319: step: 312/470, loss: 0.0009427457116544247 2023-01-22 20:21:40.030847: step: 314/470, loss: 0.004311168100684881 2023-01-22 20:21:40.798672: step: 316/470, loss: 0.06394602358341217 2023-01-22 20:21:41.475685: step: 318/470, loss: 0.003433618927374482 2023-01-22 20:21:42.219217: step: 320/470, loss: 0.021177219226956367 2023-01-22 20:21:42.942659: step: 322/470, loss: 0.001753106014803052 2023-01-22 20:21:43.677134: step: 324/470, loss: 0.08285431563854218 2023-01-22 20:21:44.470281: step: 326/470, loss: 0.09834257513284683 2023-01-22 20:21:45.233622: step: 328/470, loss: 0.07702438533306122 2023-01-22 20:21:45.941729: step: 330/470, loss: 0.01453570369631052 2023-01-22 20:21:46.679533: step: 332/470, loss: 0.03407316654920578 2023-01-22 20:21:47.390387: step: 334/470, loss: 0.0027093144599348307 2023-01-22 20:21:48.061664: step: 336/470, loss: 0.017348872497677803 2023-01-22 20:21:48.888380: step: 338/470, loss: 0.10025624930858612 2023-01-22 20:21:49.642943: step: 340/470, loss: 0.023405015468597412 2023-01-22 20:21:50.404947: step: 342/470, loss: 0.1289200335741043 2023-01-22 20:21:51.165429: step: 344/470, loss: 0.001318649505265057 2023-01-22 20:21:51.868923: step: 346/470, loss: 0.3252336084842682 2023-01-22 20:21:52.580228: step: 348/470, loss: 0.00015436287503689528 2023-01-22 20:21:53.326750: step: 350/470, loss: 0.12891341745853424 2023-01-22 20:21:54.114109: step: 352/470, loss: 0.012002014555037022 2023-01-22 20:21:54.804668: step: 354/470, loss: 0.008551257662475109 2023-01-22 20:21:55.525278: step: 356/470, loss: 0.007327394559979439 2023-01-22 20:21:56.281738: step: 358/470, loss: 0.007789163384586573 2023-01-22 20:21:57.038944: step: 360/470, loss: 0.4248470962047577 2023-01-22 20:21:57.901642: step: 362/470, loss: 0.011140398681163788 2023-01-22 20:21:58.621477: step: 364/470, loss: 0.0026315178256481886 2023-01-22 20:21:59.409483: step: 366/470, loss: 0.016022127121686935 2023-01-22 20:22:00.150831: step: 368/470, loss: 0.05837252363562584 2023-01-22 20:22:00.844395: step: 370/470, loss: 0.0032597638200968504 2023-01-22 20:22:01.614953: step: 372/470, loss: 0.056031063199043274 2023-01-22 20:22:02.375338: step: 374/470, loss: 0.006611979100853205 2023-01-22 20:22:03.150687: step: 376/470, loss: 0.05569504201412201 2023-01-22 20:22:03.850783: step: 378/470, loss: 0.08907879143953323 2023-01-22 20:22:04.671258: step: 380/470, loss: 0.014300604350864887 2023-01-22 20:22:05.300317: step: 382/470, loss: 0.006313091143965721 2023-01-22 20:22:06.174053: step: 384/470, loss: 0.027734091505408287 2023-01-22 20:22:06.998699: step: 386/470, loss: 0.041512493044137955 2023-01-22 20:22:07.693622: step: 388/470, loss: 0.02768852189183235 2023-01-22 20:22:08.482845: step: 390/470, loss: 0.035742416977882385 2023-01-22 20:22:09.263099: step: 392/470, loss: 0.08417049050331116 2023-01-22 20:22:10.000447: step: 394/470, loss: 0.056899912655353546 2023-01-22 20:22:10.730684: step: 396/470, loss: 0.01473038736730814 2023-01-22 20:22:11.534893: step: 398/470, loss: 0.04913134500384331 2023-01-22 20:22:12.210215: step: 400/470, loss: 0.026948675513267517 2023-01-22 20:22:12.997543: step: 402/470, loss: 0.06986348330974579 2023-01-22 20:22:13.757930: step: 404/470, loss: 0.004675567615777254 2023-01-22 20:22:14.389756: step: 406/470, loss: 0.0008290672558359802 2023-01-22 20:22:15.092594: step: 408/470, loss: 0.008351105265319347 2023-01-22 20:22:15.864909: step: 410/470, loss: 0.03763250261545181 2023-01-22 20:22:16.574887: step: 412/470, loss: 0.020851025357842445 2023-01-22 20:22:17.316004: step: 414/470, loss: 0.06433157622814178 2023-01-22 20:22:18.017333: step: 416/470, loss: 0.005936459172517061 2023-01-22 20:22:18.765428: step: 418/470, loss: 0.046613890677690506 2023-01-22 20:22:19.456904: step: 420/470, loss: 0.019648538902401924 2023-01-22 20:22:20.185958: step: 422/470, loss: 0.010377058759331703 2023-01-22 20:22:20.871046: step: 424/470, loss: 0.03797370567917824 2023-01-22 20:22:21.703918: step: 426/470, loss: 0.10179644823074341 2023-01-22 20:22:22.437846: step: 428/470, loss: 0.042833250015974045 2023-01-22 20:22:23.224822: step: 430/470, loss: 0.13208261132240295 2023-01-22 20:22:23.967476: step: 432/470, loss: 0.09129922091960907 2023-01-22 20:22:24.666473: step: 434/470, loss: 0.0105623509734869 2023-01-22 20:22:25.370860: step: 436/470, loss: 0.006014485843479633 2023-01-22 20:22:26.103533: step: 438/470, loss: 0.014641005545854568 2023-01-22 20:22:26.814743: step: 440/470, loss: 0.012232620269060135 2023-01-22 20:22:27.490555: step: 442/470, loss: 2.0588850020430982e-05 2023-01-22 20:22:28.179555: step: 444/470, loss: 0.01798243820667267 2023-01-22 20:22:28.942174: step: 446/470, loss: 0.010261360555887222 2023-01-22 20:22:29.628988: step: 448/470, loss: 0.032064393162727356 2023-01-22 20:22:30.309434: step: 450/470, loss: 0.002512154169380665 2023-01-22 20:22:31.000536: step: 452/470, loss: 0.016261639073491096 2023-01-22 20:22:31.768223: step: 454/470, loss: 0.05349350348114967 2023-01-22 20:22:32.455633: step: 456/470, loss: 0.07273565232753754 2023-01-22 20:22:33.142893: step: 458/470, loss: 0.014662419445812702 2023-01-22 20:22:33.871383: step: 460/470, loss: 0.04515858367085457 2023-01-22 20:22:34.576517: step: 462/470, loss: 0.0028082530479878187 2023-01-22 20:22:35.253722: step: 464/470, loss: 0.06509249657392502 2023-01-22 20:22:35.979731: step: 466/470, loss: 0.015094724483788013 2023-01-22 20:22:36.690705: step: 468/470, loss: 0.0066069429740309715 2023-01-22 20:22:37.453523: step: 470/470, loss: 0.09069467335939407 2023-01-22 20:22:38.131829: step: 472/470, loss: 0.001958508975803852 2023-01-22 20:22:38.872689: step: 474/470, loss: 0.004850469995290041 2023-01-22 20:22:39.598288: step: 476/470, loss: 0.0013880267506465316 2023-01-22 20:22:40.338317: step: 478/470, loss: 0.07323987036943436 2023-01-22 20:22:40.999745: step: 480/470, loss: 0.18070386350154877 2023-01-22 20:22:41.799083: step: 482/470, loss: 0.0033805707935243845 2023-01-22 20:22:42.510832: step: 484/470, loss: 0.024858374148607254 2023-01-22 20:22:43.139991: step: 486/470, loss: 0.0003526340296957642 2023-01-22 20:22:43.870194: step: 488/470, loss: 0.007729541510343552 2023-01-22 20:22:44.628185: step: 490/470, loss: 0.039696142077445984 2023-01-22 20:22:45.335380: step: 492/470, loss: 0.011434253305196762 2023-01-22 20:22:46.076169: step: 494/470, loss: 0.0433819405734539 2023-01-22 20:22:46.803879: step: 496/470, loss: 0.030899059027433395 2023-01-22 20:22:47.511288: step: 498/470, loss: 0.04821249470114708 2023-01-22 20:22:48.256505: step: 500/470, loss: 0.02131025679409504 2023-01-22 20:22:49.115221: step: 502/470, loss: 0.05077216029167175 2023-01-22 20:22:49.851781: step: 504/470, loss: 0.0022677837405353785 2023-01-22 20:22:50.711386: step: 506/470, loss: 0.026514790952205658 2023-01-22 20:22:51.607817: step: 508/470, loss: 0.043541330844163895 2023-01-22 20:22:52.332603: step: 510/470, loss: 0.010781260207295418 2023-01-22 20:22:53.126135: step: 512/470, loss: 0.02016189508140087 2023-01-22 20:22:53.859178: step: 514/470, loss: 0.043015431612730026 2023-01-22 20:22:54.627493: step: 516/470, loss: 0.018223173916339874 2023-01-22 20:22:55.384138: step: 518/470, loss: 0.01400044560432434 2023-01-22 20:22:56.116430: step: 520/470, loss: 0.11256732046604156 2023-01-22 20:22:56.907549: step: 522/470, loss: 0.06279245764017105 2023-01-22 20:22:57.662934: step: 524/470, loss: 0.03156261146068573 2023-01-22 20:22:58.396060: step: 526/470, loss: 0.47835269570350647 2023-01-22 20:22:59.255845: step: 528/470, loss: 0.00015236996114253998 2023-01-22 20:23:00.008067: step: 530/470, loss: 0.12638600170612335 2023-01-22 20:23:00.792175: step: 532/470, loss: 0.012023607268929482 2023-01-22 20:23:01.530185: step: 534/470, loss: 0.015657739713788033 2023-01-22 20:23:02.189231: step: 536/470, loss: 0.007505686488002539 2023-01-22 20:23:02.895309: step: 538/470, loss: 0.0051700943149626255 2023-01-22 20:23:03.697958: step: 540/470, loss: 0.009726532734930515 2023-01-22 20:23:04.492448: step: 542/470, loss: 0.04598125070333481 2023-01-22 20:23:05.220958: step: 544/470, loss: 0.006266491021960974 2023-01-22 20:23:05.901756: step: 546/470, loss: 0.008492393419146538 2023-01-22 20:23:06.547088: step: 548/470, loss: 0.001625984674319625 2023-01-22 20:23:07.322391: step: 550/470, loss: 0.044714074581861496 2023-01-22 20:23:08.016629: step: 552/470, loss: 0.13521620631217957 2023-01-22 20:23:08.673345: step: 554/470, loss: 0.0015927805798128247 2023-01-22 20:23:09.497643: step: 556/470, loss: 0.04356268420815468 2023-01-22 20:23:10.302205: step: 558/470, loss: 0.051665596663951874 2023-01-22 20:23:11.038768: step: 560/470, loss: 0.028444265946745872 2023-01-22 20:23:11.779037: step: 562/470, loss: 0.041954174637794495 2023-01-22 20:23:12.507502: step: 564/470, loss: 0.022699544206261635 2023-01-22 20:23:13.157242: step: 566/470, loss: 0.02368500828742981 2023-01-22 20:23:13.923937: step: 568/470, loss: 0.04992617666721344 2023-01-22 20:23:14.659315: step: 570/470, loss: 0.0033017806708812714 2023-01-22 20:23:15.316756: step: 572/470, loss: 0.005991379264742136 2023-01-22 20:23:16.030577: step: 574/470, loss: 0.3665614724159241 2023-01-22 20:23:16.778143: step: 576/470, loss: 0.01564880833029747 2023-01-22 20:23:17.577365: step: 578/470, loss: 0.04914989322423935 2023-01-22 20:23:18.302123: step: 580/470, loss: 0.007808396127074957 2023-01-22 20:23:19.067298: step: 582/470, loss: 0.13653407990932465 2023-01-22 20:23:19.770380: step: 584/470, loss: 0.0009268509456887841 2023-01-22 20:23:20.523646: step: 586/470, loss: 0.033205803483724594 2023-01-22 20:23:21.216585: step: 588/470, loss: 0.05496671423316002 2023-01-22 20:23:21.951905: step: 590/470, loss: 0.04230504482984543 2023-01-22 20:23:22.708974: step: 592/470, loss: 0.03090505301952362 2023-01-22 20:23:23.378888: step: 594/470, loss: 0.007094330154359341 2023-01-22 20:23:24.114461: step: 596/470, loss: 0.013848799280822277 2023-01-22 20:23:24.868350: step: 598/470, loss: 0.003194592660292983 2023-01-22 20:23:25.662332: step: 600/470, loss: 0.01055984664708376 2023-01-22 20:23:26.491808: step: 602/470, loss: 0.014651630073785782 2023-01-22 20:23:27.262037: step: 604/470, loss: 0.0857321172952652 2023-01-22 20:23:27.994465: step: 606/470, loss: 0.001349782571196556 2023-01-22 20:23:28.710812: step: 608/470, loss: 0.14607299864292145 2023-01-22 20:23:29.472218: step: 610/470, loss: 0.007140479516237974 2023-01-22 20:23:30.198208: step: 612/470, loss: 0.0029003897216171026 2023-01-22 20:23:30.869030: step: 614/470, loss: 0.008922108449041843 2023-01-22 20:23:31.634931: step: 616/470, loss: 0.009059912525117397 2023-01-22 20:23:32.359430: step: 618/470, loss: 0.03438716009259224 2023-01-22 20:23:33.077052: step: 620/470, loss: 0.039528436958789825 2023-01-22 20:23:33.744219: step: 622/470, loss: 0.1352343112230301 2023-01-22 20:23:34.496861: step: 624/470, loss: 0.06945229321718216 2023-01-22 20:23:35.269628: step: 626/470, loss: 0.001418177504092455 2023-01-22 20:23:35.951060: step: 628/470, loss: 0.030531620606780052 2023-01-22 20:23:36.692831: step: 630/470, loss: 0.00512252002954483 2023-01-22 20:23:37.438598: step: 632/470, loss: 0.07616572082042694 2023-01-22 20:23:38.130586: step: 634/470, loss: 0.004690864589065313 2023-01-22 20:23:38.781575: step: 636/470, loss: 0.6737900376319885 2023-01-22 20:23:39.559935: step: 638/470, loss: 0.02247127704322338 2023-01-22 20:23:40.357695: step: 640/470, loss: 0.04330654442310333 2023-01-22 20:23:41.096675: step: 642/470, loss: 0.02554977498948574 2023-01-22 20:23:41.833870: step: 644/470, loss: 0.05359509587287903 2023-01-22 20:23:42.552090: step: 646/470, loss: 0.04412570595741272 2023-01-22 20:23:43.299236: step: 648/470, loss: 0.03207087889313698 2023-01-22 20:23:44.052034: step: 650/470, loss: 0.03689567372202873 2023-01-22 20:23:44.726386: step: 652/470, loss: 0.01859690062701702 2023-01-22 20:23:45.459079: step: 654/470, loss: 0.026259824633598328 2023-01-22 20:23:46.170012: step: 656/470, loss: 0.0039988174103200436 2023-01-22 20:23:46.961975: step: 658/470, loss: 0.02299419976770878 2023-01-22 20:23:47.666275: step: 660/470, loss: 0.020622316747903824 2023-01-22 20:23:48.380311: step: 662/470, loss: 0.029058068990707397 2023-01-22 20:23:49.187943: step: 664/470, loss: 0.002780361333861947 2023-01-22 20:23:49.945584: step: 666/470, loss: 0.0022796066477894783 2023-01-22 20:23:50.655886: step: 668/470, loss: 0.03231862932443619 2023-01-22 20:23:51.423956: step: 670/470, loss: 0.05333375930786133 2023-01-22 20:23:52.223185: step: 672/470, loss: 0.006049145944416523 2023-01-22 20:23:52.934929: step: 674/470, loss: 0.011256158351898193 2023-01-22 20:23:53.608676: step: 676/470, loss: 0.008002914488315582 2023-01-22 20:23:54.367333: step: 678/470, loss: 0.022282328456640244 2023-01-22 20:23:55.014009: step: 680/470, loss: 0.001529976725578308 2023-01-22 20:23:55.731971: step: 682/470, loss: 0.4798831641674042 2023-01-22 20:23:56.505640: step: 684/470, loss: 0.012694069184362888 2023-01-22 20:23:57.313120: step: 686/470, loss: 0.2012556791305542 2023-01-22 20:23:58.001963: step: 688/470, loss: 0.3873967230319977 2023-01-22 20:23:58.825925: step: 690/470, loss: 0.0851685032248497 2023-01-22 20:23:59.596629: step: 692/470, loss: 0.11975271999835968 2023-01-22 20:24:00.320695: step: 694/470, loss: 0.009530964307487011 2023-01-22 20:24:01.011447: step: 696/470, loss: 0.046175580471754074 2023-01-22 20:24:01.723050: step: 698/470, loss: 1.4199903011322021 2023-01-22 20:24:02.447381: step: 700/470, loss: 0.004222056828439236 2023-01-22 20:24:03.123148: step: 702/470, loss: 0.004082622472196817 2023-01-22 20:24:03.894587: step: 704/470, loss: 0.04654672369360924 2023-01-22 20:24:04.627701: step: 706/470, loss: 0.11172907054424286 2023-01-22 20:24:05.387892: step: 708/470, loss: 0.0367024689912796 2023-01-22 20:24:06.094472: step: 710/470, loss: 8.773482841206715e-05 2023-01-22 20:24:06.849623: step: 712/470, loss: 0.01656370982527733 2023-01-22 20:24:07.547223: step: 714/470, loss: 0.012438789010047913 2023-01-22 20:24:08.254781: step: 716/470, loss: 0.009644479490816593 2023-01-22 20:24:08.937497: step: 718/470, loss: 0.013749388046562672 2023-01-22 20:24:09.652873: step: 720/470, loss: 0.023235702887177467 2023-01-22 20:24:10.412560: step: 722/470, loss: 0.07161347568035126 2023-01-22 20:24:11.228557: step: 724/470, loss: 0.043580781668424606 2023-01-22 20:24:11.981081: step: 726/470, loss: 0.05737774074077606 2023-01-22 20:24:12.701955: step: 728/470, loss: 0.0016026493394747376 2023-01-22 20:24:13.467466: step: 730/470, loss: 0.057738371193408966 2023-01-22 20:24:14.216982: step: 732/470, loss: 0.00018781011749524623 2023-01-22 20:24:15.075104: step: 734/470, loss: 0.03806695714592934 2023-01-22 20:24:15.775750: step: 736/470, loss: 0.06477882713079453 2023-01-22 20:24:16.499548: step: 738/470, loss: 0.0006179798510856926 2023-01-22 20:24:17.266783: step: 740/470, loss: 0.047551143914461136 2023-01-22 20:24:18.002333: step: 742/470, loss: 0.47022610902786255 2023-01-22 20:24:18.769769: step: 744/470, loss: 0.02439289726316929 2023-01-22 20:24:19.493750: step: 746/470, loss: 0.023850999772548676 2023-01-22 20:24:20.258935: step: 748/470, loss: 0.13051940500736237 2023-01-22 20:24:20.928864: step: 750/470, loss: 0.011319885030388832 2023-01-22 20:24:21.685591: step: 752/470, loss: 0.05051903426647186 2023-01-22 20:24:22.338863: step: 754/470, loss: 0.05477369949221611 2023-01-22 20:24:22.959733: step: 756/470, loss: 0.02628343366086483 2023-01-22 20:24:23.694850: step: 758/470, loss: 0.006637097802013159 2023-01-22 20:24:24.421167: step: 760/470, loss: 0.07080532610416412 2023-01-22 20:24:25.094262: step: 762/470, loss: 0.03669466823339462 2023-01-22 20:24:25.750973: step: 764/470, loss: 0.0033256318420171738 2023-01-22 20:24:26.410403: step: 766/470, loss: 0.11121101677417755 2023-01-22 20:24:27.114197: step: 768/470, loss: 0.006534420885145664 2023-01-22 20:24:27.844377: step: 770/470, loss: 0.03840123862028122 2023-01-22 20:24:28.578205: step: 772/470, loss: 0.015428757295012474 2023-01-22 20:24:29.303220: step: 774/470, loss: 0.30759334564208984 2023-01-22 20:24:30.118279: step: 776/470, loss: 0.013859348371624947 2023-01-22 20:24:30.757945: step: 778/470, loss: 0.02122689038515091 2023-01-22 20:24:31.528477: step: 780/470, loss: 0.03834621235728264 2023-01-22 20:24:32.229594: step: 782/470, loss: 0.5577226281166077 2023-01-22 20:24:32.918548: step: 784/470, loss: 0.01965031400322914 2023-01-22 20:24:33.698501: step: 786/470, loss: 0.016025938093662262 2023-01-22 20:24:34.455501: step: 788/470, loss: 0.017744455486536026 2023-01-22 20:24:35.185420: step: 790/470, loss: 0.006757447961717844 2023-01-22 20:24:35.933431: step: 792/470, loss: 0.07866999506950378 2023-01-22 20:24:36.798531: step: 794/470, loss: 0.006022907793521881 2023-01-22 20:24:37.545277: step: 796/470, loss: 0.0842958465218544 2023-01-22 20:24:38.235893: step: 798/470, loss: 0.00013147601566743106 2023-01-22 20:24:38.940900: step: 800/470, loss: 0.002207849407568574 2023-01-22 20:24:39.724620: step: 802/470, loss: 0.011025402694940567 2023-01-22 20:24:40.449216: step: 804/470, loss: 0.07100294530391693 2023-01-22 20:24:41.166428: step: 806/470, loss: 0.12755174934864044 2023-01-22 20:24:41.881602: step: 808/470, loss: 0.0014298518653959036 2023-01-22 20:24:42.611424: step: 810/470, loss: 0.07408328354358673 2023-01-22 20:24:43.267805: step: 812/470, loss: 0.0061090909875929356 2023-01-22 20:24:44.025227: step: 814/470, loss: 0.018763698637485504 2023-01-22 20:24:44.701071: step: 816/470, loss: 0.004722012206912041 2023-01-22 20:24:45.505039: step: 818/470, loss: 0.10819140076637268 2023-01-22 20:24:46.249236: step: 820/470, loss: 0.013299317099153996 2023-01-22 20:24:46.978352: step: 822/470, loss: 0.23392438888549805 2023-01-22 20:24:47.642403: step: 824/470, loss: 0.018688471987843513 2023-01-22 20:24:48.367431: step: 826/470, loss: 0.05231665074825287 2023-01-22 20:24:49.131329: step: 828/470, loss: 0.05416341871023178 2023-01-22 20:24:49.888154: step: 830/470, loss: 0.0048498413525521755 2023-01-22 20:24:50.577622: step: 832/470, loss: 0.056482456624507904 2023-01-22 20:24:51.344805: step: 834/470, loss: 0.06690236181020737 2023-01-22 20:24:52.003745: step: 836/470, loss: 0.04062666743993759 2023-01-22 20:24:52.681896: step: 838/470, loss: 0.04290313646197319 2023-01-22 20:24:53.403916: step: 840/470, loss: 0.06305599957704544 2023-01-22 20:24:54.148990: step: 842/470, loss: 9.941688537597656 2023-01-22 20:24:54.867237: step: 844/470, loss: 0.060064323246479034 2023-01-22 20:24:55.641935: step: 846/470, loss: 0.054962724447250366 2023-01-22 20:24:56.387158: step: 848/470, loss: 0.0005881677498109639 2023-01-22 20:24:57.169950: step: 850/470, loss: 0.016850154846906662 2023-01-22 20:24:57.883374: step: 852/470, loss: 0.04970991238951683 2023-01-22 20:24:58.726330: step: 854/470, loss: 0.22735270857810974 2023-01-22 20:24:59.476148: step: 856/470, loss: 0.021856531500816345 2023-01-22 20:25:00.133857: step: 858/470, loss: 0.06958413124084473 2023-01-22 20:25:00.834664: step: 860/470, loss: 0.010652045719325542 2023-01-22 20:25:01.612727: step: 862/470, loss: 0.0281230416148901 2023-01-22 20:25:02.345075: step: 864/470, loss: 0.0192283783107996 2023-01-22 20:25:03.009424: step: 866/470, loss: 0.014173178002238274 2023-01-22 20:25:03.744940: step: 868/470, loss: 0.02912173420190811 2023-01-22 20:25:04.465802: step: 870/470, loss: 0.011722569353878498 2023-01-22 20:25:05.228256: step: 872/470, loss: 0.04161560535430908 2023-01-22 20:25:05.965764: step: 874/470, loss: 0.19505290687084198 2023-01-22 20:25:06.635887: step: 876/470, loss: 0.03534523397684097 2023-01-22 20:25:07.315073: step: 878/470, loss: 0.05336989462375641 2023-01-22 20:25:07.966688: step: 880/470, loss: 0.0025530781131237745 2023-01-22 20:25:08.693322: step: 882/470, loss: 0.016439758241176605 2023-01-22 20:25:09.475403: step: 884/470, loss: 0.006466133054345846 2023-01-22 20:25:10.229454: step: 886/470, loss: 0.01774168759584427 2023-01-22 20:25:10.905388: step: 888/470, loss: 0.04091699793934822 2023-01-22 20:25:11.599308: step: 890/470, loss: 0.1776171773672104 2023-01-22 20:25:12.419971: step: 892/470, loss: 0.11639781296253204 2023-01-22 20:25:13.088543: step: 894/470, loss: 0.01566343568265438 2023-01-22 20:25:13.837595: step: 896/470, loss: 0.024013573303818703 2023-01-22 20:25:14.602303: step: 898/470, loss: 0.004490656778216362 2023-01-22 20:25:15.448700: step: 900/470, loss: 0.0408441387116909 2023-01-22 20:25:16.103492: step: 902/470, loss: 0.08740266412496567 2023-01-22 20:25:16.811506: step: 904/470, loss: 0.026669248938560486 2023-01-22 20:25:17.642141: step: 906/470, loss: 0.037839457392692566 2023-01-22 20:25:18.375881: step: 908/470, loss: 0.19800342619419098 2023-01-22 20:25:19.069251: step: 910/470, loss: 0.007794266100972891 2023-01-22 20:25:19.758622: step: 912/470, loss: 0.36751270294189453 2023-01-22 20:25:20.466120: step: 914/470, loss: 0.5664834976196289 2023-01-22 20:25:21.200288: step: 916/470, loss: 0.021370526403188705 2023-01-22 20:25:21.945171: step: 918/470, loss: 0.010546115227043629 2023-01-22 20:25:22.717826: step: 920/470, loss: 0.019952479749917984 2023-01-22 20:25:23.465213: step: 922/470, loss: 0.03363412991166115 2023-01-22 20:25:24.237938: step: 924/470, loss: 0.018996473401784897 2023-01-22 20:25:25.070387: step: 926/470, loss: 0.052708521485328674 2023-01-22 20:25:25.699486: step: 928/470, loss: 0.3829970955848694 2023-01-22 20:25:26.456818: step: 930/470, loss: 0.05672343447804451 2023-01-22 20:25:27.233190: step: 932/470, loss: 0.19621804356575012 2023-01-22 20:25:27.966386: step: 934/470, loss: 0.040584880858659744 2023-01-22 20:25:28.653592: step: 936/470, loss: 0.04768889769911766 2023-01-22 20:25:29.367666: step: 938/470, loss: 0.30375567078590393 2023-01-22 20:25:30.137845: step: 940/470, loss: 0.2606930136680603 2023-01-22 20:25:30.776231: step: 942/470, loss: 0.0006679189973510802 ================================================== Loss: 0.073 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3085748792270532, 'r': 0.32321315623023406, 'f1': 0.3157244362063639}, 'combined': 0.23263905825732079, 'epoch': 27} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.33988454281387587, 'r': 0.35099615286740643, 'f1': 0.3453509924144775}, 'combined': 0.24054297979117836, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30335597567927036, 'r': 0.3188979326875062, 'f1': 0.31093285943814203}, 'combined': 0.22910842274389412, 'epoch': 27} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34509521403698895, 'r': 0.3567089952786184, 'f1': 0.3508060095411471}, 'combined': 0.24434249420776416, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28896710610520926, 'r': 0.3295431323894322, 'f1': 0.30792416803055106}, 'combined': 0.22689149223303762, 'epoch': 27} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3362186543844472, 'r': 0.3594953304572166, 'f1': 0.3474676056463804}, 'combined': 0.24201723776364806, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24744897959183673, 'r': 0.3464285714285714, 'f1': 0.28869047619047616}, 'combined': 0.19246031746031744, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4642857142857143, 'r': 0.33620689655172414, 'f1': 0.39}, 'combined': 0.26, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 28 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 20:28:06.611413: step: 2/470, loss: 0.0020869718864560127 2023-01-22 20:28:07.490109: step: 4/470, loss: 0.0639810636639595 2023-01-22 20:28:08.299660: step: 6/470, loss: 0.009995676577091217 2023-01-22 20:28:08.982630: step: 8/470, loss: 0.009512209333479404 2023-01-22 20:28:09.766167: step: 10/470, loss: 0.006158136297017336 2023-01-22 20:28:10.451989: step: 12/470, loss: 0.007754956372082233 2023-01-22 20:28:11.104249: step: 14/470, loss: 0.0035998544190078974 2023-01-22 20:28:11.815648: step: 16/470, loss: 0.015439898706972599 2023-01-22 20:28:12.552140: step: 18/470, loss: 0.014050621539354324 2023-01-22 20:28:13.240051: step: 20/470, loss: 0.04366869106888771 2023-01-22 20:28:14.059365: step: 22/470, loss: 0.014802333898842335 2023-01-22 20:28:14.817268: step: 24/470, loss: 0.03355008736252785 2023-01-22 20:28:15.531564: step: 26/470, loss: 0.0021390863694250584 2023-01-22 20:28:16.196050: step: 28/470, loss: 0.017872659489512444 2023-01-22 20:28:16.956656: step: 30/470, loss: 0.0009868554770946503 2023-01-22 20:28:17.715524: step: 32/470, loss: 0.007105813827365637 2023-01-22 20:28:18.457805: step: 34/470, loss: 0.020172271877527237 2023-01-22 20:28:19.224982: step: 36/470, loss: 0.058939579874277115 2023-01-22 20:28:19.959632: step: 38/470, loss: 0.08055589348077774 2023-01-22 20:28:20.684204: step: 40/470, loss: 0.053452666848897934 2023-01-22 20:28:21.417122: step: 42/470, loss: 0.018411824479699135 2023-01-22 20:28:22.188658: step: 44/470, loss: 0.018836546689271927 2023-01-22 20:28:22.885769: step: 46/470, loss: 0.001567376428283751 2023-01-22 20:28:23.590656: step: 48/470, loss: 0.002337083453312516 2023-01-22 20:28:24.307959: step: 50/470, loss: 0.2951555848121643 2023-01-22 20:28:24.960043: step: 52/470, loss: 0.01612759754061699 2023-01-22 20:28:25.808388: step: 54/470, loss: 0.011461992748081684 2023-01-22 20:28:26.540268: step: 56/470, loss: 0.0009162522037513554 2023-01-22 20:28:27.217405: step: 58/470, loss: 0.017956262454390526 2023-01-22 20:28:27.977413: step: 60/470, loss: 0.167605459690094 2023-01-22 20:28:28.714682: step: 62/470, loss: 0.0007975992048159242 2023-01-22 20:28:29.432970: step: 64/470, loss: 0.00465493556112051 2023-01-22 20:28:30.111796: step: 66/470, loss: 0.0031678981613367796 2023-01-22 20:28:30.809046: step: 68/470, loss: 0.011101577430963516 2023-01-22 20:28:31.572554: step: 70/470, loss: 0.04563984274864197 2023-01-22 20:28:32.286491: step: 72/470, loss: 0.014584038406610489 2023-01-22 20:28:33.031976: step: 74/470, loss: 0.000692845496814698 2023-01-22 20:28:33.859669: step: 76/470, loss: 0.003819751553237438 2023-01-22 20:28:34.539466: step: 78/470, loss: 0.007177893538028002 2023-01-22 20:28:35.332133: step: 80/470, loss: 0.013194219209253788 2023-01-22 20:28:36.067160: step: 82/470, loss: 0.005768152419477701 2023-01-22 20:28:36.825986: step: 84/470, loss: 0.008911560289561749 2023-01-22 20:28:37.632435: step: 86/470, loss: 0.029109152033925056 2023-01-22 20:28:38.347048: step: 88/470, loss: 0.0036423057317733765 2023-01-22 20:28:39.068296: step: 90/470, loss: 0.01244268286973238 2023-01-22 20:28:39.755155: step: 92/470, loss: 0.03755531460046768 2023-01-22 20:28:40.467910: step: 94/470, loss: 0.007035805378109217 2023-01-22 20:28:41.209438: step: 96/470, loss: 0.0064859106205403805 2023-01-22 20:28:41.889390: step: 98/470, loss: 0.000647165987174958 2023-01-22 20:28:42.570539: step: 100/470, loss: 0.014123012311756611 2023-01-22 20:28:43.299480: step: 102/470, loss: 0.0009315513307228684 2023-01-22 20:28:43.961593: step: 104/470, loss: 0.03605140745639801 2023-01-22 20:28:44.702482: step: 106/470, loss: 0.08731332421302795 2023-01-22 20:28:45.430112: step: 108/470, loss: 0.008800004608929157 2023-01-22 20:28:46.139365: step: 110/470, loss: 0.029222659766674042 2023-01-22 20:28:46.908584: step: 112/470, loss: 0.03977571055293083 2023-01-22 20:28:47.654206: step: 114/470, loss: 0.00808743480592966 2023-01-22 20:28:48.424263: step: 116/470, loss: 0.030444975942373276 2023-01-22 20:28:49.178460: step: 118/470, loss: 0.0036042286083102226 2023-01-22 20:28:49.834223: step: 120/470, loss: 0.001594691420905292 2023-01-22 20:28:50.565241: step: 122/470, loss: 0.034792497754096985 2023-01-22 20:28:51.311232: step: 124/470, loss: 0.003342484124004841 2023-01-22 20:28:52.000580: step: 126/470, loss: 0.003620708826929331 2023-01-22 20:28:52.705854: step: 128/470, loss: 0.058778248727321625 2023-01-22 20:28:53.371784: step: 130/470, loss: 0.09761997312307358 2023-01-22 20:28:54.100874: step: 132/470, loss: 0.042754001915454865 2023-01-22 20:28:54.763690: step: 134/470, loss: 0.018591521307826042 2023-01-22 20:28:55.507482: step: 136/470, loss: 0.02833203226327896 2023-01-22 20:28:56.267172: step: 138/470, loss: 0.06880441308021545 2023-01-22 20:28:56.979496: step: 140/470, loss: 0.03585517033934593 2023-01-22 20:28:57.773961: step: 142/470, loss: 0.004842748399823904 2023-01-22 20:28:58.460218: step: 144/470, loss: 0.012239599600434303 2023-01-22 20:28:59.256441: step: 146/470, loss: 0.0006771074840798974 2023-01-22 20:29:00.007971: step: 148/470, loss: 0.005776724312454462 2023-01-22 20:29:00.741874: step: 150/470, loss: 0.0004973204340785742 2023-01-22 20:29:01.405157: step: 152/470, loss: 0.06911028921604156 2023-01-22 20:29:02.211122: step: 154/470, loss: 0.010073547251522541 2023-01-22 20:29:02.936884: step: 156/470, loss: 0.0016500890487805009 2023-01-22 20:29:03.622606: step: 158/470, loss: 0.07199298590421677 2023-01-22 20:29:04.323324: step: 160/470, loss: 0.0059176781214773655 2023-01-22 20:29:05.029923: step: 162/470, loss: 0.006079540122300386 2023-01-22 20:29:05.752861: step: 164/470, loss: 0.16469572484493256 2023-01-22 20:29:06.519751: step: 166/470, loss: 0.08042943477630615 2023-01-22 20:29:07.199838: step: 168/470, loss: 0.07466889917850494 2023-01-22 20:29:07.928427: step: 170/470, loss: 0.02825554646551609 2023-01-22 20:29:08.633024: step: 172/470, loss: 0.07861734926700592 2023-01-22 20:29:09.379604: step: 174/470, loss: 0.012655568309128284 2023-01-22 20:29:10.099091: step: 176/470, loss: 0.4837439954280853 2023-01-22 20:29:10.851749: step: 178/470, loss: 0.015461144037544727 2023-01-22 20:29:11.596888: step: 180/470, loss: 0.04108460992574692 2023-01-22 20:29:12.343742: step: 182/470, loss: 0.0002883071720134467 2023-01-22 20:29:12.983275: step: 184/470, loss: 0.0014073270140215755 2023-01-22 20:29:13.753014: step: 186/470, loss: 0.0024875374510884285 2023-01-22 20:29:14.505310: step: 188/470, loss: 0.08386242389678955 2023-01-22 20:29:15.184995: step: 190/470, loss: 0.004048591013997793 2023-01-22 20:29:15.826217: step: 192/470, loss: 0.0006271903403103352 2023-01-22 20:29:16.627895: step: 194/470, loss: 0.003816502168774605 2023-01-22 20:29:17.395203: step: 196/470, loss: 0.04496389627456665 2023-01-22 20:29:18.148255: step: 198/470, loss: 0.06660241633653641 2023-01-22 20:29:18.899516: step: 200/470, loss: 0.011314080096781254 2023-01-22 20:29:19.712814: step: 202/470, loss: 0.010811764746904373 2023-01-22 20:29:20.408147: step: 204/470, loss: 0.001625021337531507 2023-01-22 20:29:21.107840: step: 206/470, loss: 0.07973671704530716 2023-01-22 20:29:21.902803: step: 208/470, loss: 0.016757629811763763 2023-01-22 20:29:22.609886: step: 210/470, loss: 0.014848452992737293 2023-01-22 20:29:23.368883: step: 212/470, loss: 0.02553386054933071 2023-01-22 20:29:24.051486: step: 214/470, loss: 0.06657985597848892 2023-01-22 20:29:24.839258: step: 216/470, loss: 0.00882027018815279 2023-01-22 20:29:25.592598: step: 218/470, loss: 0.015855491161346436 2023-01-22 20:29:26.330659: step: 220/470, loss: 0.023395488038659096 2023-01-22 20:29:27.020683: step: 222/470, loss: 0.08313391357660294 2023-01-22 20:29:27.729660: step: 224/470, loss: 0.002644822234287858 2023-01-22 20:29:28.416418: step: 226/470, loss: 0.4776703119277954 2023-01-22 20:29:29.162956: step: 228/470, loss: 0.033827830106019974 2023-01-22 20:29:29.907852: step: 230/470, loss: 0.002036722842603922 2023-01-22 20:29:30.658130: step: 232/470, loss: 0.07246463000774384 2023-01-22 20:29:31.478371: step: 234/470, loss: 2.729046583175659 2023-01-22 20:29:32.242267: step: 236/470, loss: 0.0005667444784194231 2023-01-22 20:29:32.980095: step: 238/470, loss: 0.010163289494812489 2023-01-22 20:29:33.703902: step: 240/470, loss: 0.040548261255025864 2023-01-22 20:29:34.453833: step: 242/470, loss: 0.07027406245470047 2023-01-22 20:29:35.260987: step: 244/470, loss: 0.045413631945848465 2023-01-22 20:29:35.971063: step: 246/470, loss: 0.018411103636026382 2023-01-22 20:29:36.621455: step: 248/470, loss: 0.0008637277642264962 2023-01-22 20:29:37.364229: step: 250/470, loss: 0.02072082832455635 2023-01-22 20:29:38.122295: step: 252/470, loss: 0.03055809810757637 2023-01-22 20:29:38.847659: step: 254/470, loss: 0.015467851422727108 2023-01-22 20:29:39.576497: step: 256/470, loss: 0.003622818971052766 2023-01-22 20:29:40.350225: step: 258/470, loss: 0.007900088094174862 2023-01-22 20:29:41.030811: step: 260/470, loss: 0.0008739789482206106 2023-01-22 20:29:41.704367: step: 262/470, loss: 0.07829133421182632 2023-01-22 20:29:42.440514: step: 264/470, loss: 0.013694636523723602 2023-01-22 20:29:43.120908: step: 266/470, loss: 0.15331532061100006 2023-01-22 20:29:43.824457: step: 268/470, loss: 0.008623996749520302 2023-01-22 20:29:44.502813: step: 270/470, loss: 0.014343681745231152 2023-01-22 20:29:45.273283: step: 272/470, loss: 0.00040280522080138326 2023-01-22 20:29:45.968448: step: 274/470, loss: 0.09008922427892685 2023-01-22 20:29:46.705414: step: 276/470, loss: 0.016768047586083412 2023-01-22 20:29:47.488602: step: 278/470, loss: 0.00032845677924342453 2023-01-22 20:29:48.293653: step: 280/470, loss: 0.02407873421907425 2023-01-22 20:29:49.068500: step: 282/470, loss: 0.07986540347337723 2023-01-22 20:29:49.783783: step: 284/470, loss: 0.03831029310822487 2023-01-22 20:29:50.465367: step: 286/470, loss: 0.00768911000341177 2023-01-22 20:29:51.164387: step: 288/470, loss: 0.14820675551891327 2023-01-22 20:29:51.866506: step: 290/470, loss: 0.06754887849092484 2023-01-22 20:29:52.640347: step: 292/470, loss: 0.03733934462070465 2023-01-22 20:29:53.357747: step: 294/470, loss: 0.03799036517739296 2023-01-22 20:29:54.057629: step: 296/470, loss: 0.002790941623970866 2023-01-22 20:29:54.871684: step: 298/470, loss: 0.010808521881699562 2023-01-22 20:29:55.617533: step: 300/470, loss: 0.010266797617077827 2023-01-22 20:29:56.299459: step: 302/470, loss: 0.0021426889579743147 2023-01-22 20:29:57.052496: step: 304/470, loss: 0.006693427916616201 2023-01-22 20:29:57.807743: step: 306/470, loss: 0.012375026941299438 2023-01-22 20:29:58.596520: step: 308/470, loss: 0.0827789232134819 2023-01-22 20:29:59.274591: step: 310/470, loss: 0.07285638898611069 2023-01-22 20:30:00.055574: step: 312/470, loss: 0.0009760952088981867 2023-01-22 20:30:00.770394: step: 314/470, loss: 0.006536416243761778 2023-01-22 20:30:01.625433: step: 316/470, loss: 0.2650093734264374 2023-01-22 20:30:02.339138: step: 318/470, loss: 0.02324749529361725 2023-01-22 20:30:03.124407: step: 320/470, loss: 0.007267648819833994 2023-01-22 20:30:03.836253: step: 322/470, loss: 0.02628829888999462 2023-01-22 20:30:04.539418: step: 324/470, loss: 0.023430757224559784 2023-01-22 20:30:05.261061: step: 326/470, loss: 0.019561942666769028 2023-01-22 20:30:05.968560: step: 328/470, loss: 0.0054289596155285835 2023-01-22 20:30:06.684758: step: 330/470, loss: 0.015765221789479256 2023-01-22 20:30:07.422953: step: 332/470, loss: 0.18133984506130219 2023-01-22 20:30:08.108457: step: 334/470, loss: 0.018263421952724457 2023-01-22 20:30:08.802624: step: 336/470, loss: 0.060671113431453705 2023-01-22 20:30:09.563208: step: 338/470, loss: 0.07180456817150116 2023-01-22 20:30:10.327997: step: 340/470, loss: 0.005747984629124403 2023-01-22 20:30:11.095582: step: 342/470, loss: 0.29718539118766785 2023-01-22 20:30:11.855430: step: 344/470, loss: 0.0482538603246212 2023-01-22 20:30:12.625670: step: 346/470, loss: 0.05792737379670143 2023-01-22 20:30:13.384709: step: 348/470, loss: 0.12915441393852234 2023-01-22 20:30:14.054213: step: 350/470, loss: 0.017134780064225197 2023-01-22 20:30:14.729021: step: 352/470, loss: 0.037275586277246475 2023-01-22 20:30:15.516826: step: 354/470, loss: 0.010397748090326786 2023-01-22 20:30:16.232237: step: 356/470, loss: 0.008629174903035164 2023-01-22 20:30:16.871026: step: 358/470, loss: 0.001150502823293209 2023-01-22 20:30:17.708573: step: 360/470, loss: 0.03983455151319504 2023-01-22 20:30:18.523051: step: 362/470, loss: 0.18434298038482666 2023-01-22 20:30:19.244700: step: 364/470, loss: 0.009704058058559895 2023-01-22 20:30:19.988502: step: 366/470, loss: 0.04657233878970146 2023-01-22 20:30:20.688436: step: 368/470, loss: 14.60478687286377 2023-01-22 20:30:21.424550: step: 370/470, loss: 0.02620551362633705 2023-01-22 20:30:22.125342: step: 372/470, loss: 0.0038664399180561304 2023-01-22 20:30:22.855705: step: 374/470, loss: 0.005592767149209976 2023-01-22 20:30:23.670546: step: 376/470, loss: 0.0027753994800150394 2023-01-22 20:30:24.364691: step: 378/470, loss: 0.017724450677633286 2023-01-22 20:30:25.154257: step: 380/470, loss: 0.015447490848600864 2023-01-22 20:30:26.014086: step: 382/470, loss: 0.03897276520729065 2023-01-22 20:30:26.722501: step: 384/470, loss: 0.0031961435452103615 2023-01-22 20:30:27.430884: step: 386/470, loss: 0.022479455918073654 2023-01-22 20:30:28.148698: step: 388/470, loss: 0.03246815502643585 2023-01-22 20:30:28.885669: step: 390/470, loss: 0.011416290886700153 2023-01-22 20:30:29.625894: step: 392/470, loss: 0.0019335534889250994 2023-01-22 20:30:30.282469: step: 394/470, loss: 0.016625449061393738 2023-01-22 20:30:30.924189: step: 396/470, loss: 0.0012744449777528644 2023-01-22 20:30:31.623736: step: 398/470, loss: 0.02477888949215412 2023-01-22 20:30:32.407282: step: 400/470, loss: 0.2593715786933899 2023-01-22 20:30:33.177355: step: 402/470, loss: 0.019976742565631866 2023-01-22 20:30:33.895755: step: 404/470, loss: 0.07440569996833801 2023-01-22 20:30:34.647382: step: 406/470, loss: 0.06923633068799973 2023-01-22 20:30:35.381981: step: 408/470, loss: 0.0011120573617517948 2023-01-22 20:30:36.077272: step: 410/470, loss: 0.003188740462064743 2023-01-22 20:30:36.808418: step: 412/470, loss: 0.4376097023487091 2023-01-22 20:30:37.506923: step: 414/470, loss: 0.0028341033030301332 2023-01-22 20:30:38.228757: step: 416/470, loss: 0.0008696450968272984 2023-01-22 20:30:38.932492: step: 418/470, loss: 0.008818319998681545 2023-01-22 20:30:39.706616: step: 420/470, loss: 0.0027629744727164507 2023-01-22 20:30:40.401020: step: 422/470, loss: 0.0039850943721830845 2023-01-22 20:30:41.190684: step: 424/470, loss: 0.035751424729824066 2023-01-22 20:30:41.916779: step: 426/470, loss: 0.020366976037621498 2023-01-22 20:30:42.720687: step: 428/470, loss: 0.2410111129283905 2023-01-22 20:30:43.466135: step: 430/470, loss: 0.005729255266487598 2023-01-22 20:30:44.207248: step: 432/470, loss: 0.010410521179437637 2023-01-22 20:30:44.939045: step: 434/470, loss: 0.06469309329986572 2023-01-22 20:30:45.789859: step: 436/470, loss: 0.04472040757536888 2023-01-22 20:30:46.492456: step: 438/470, loss: 0.023935789242386818 2023-01-22 20:30:47.480751: step: 440/470, loss: 0.041207972913980484 2023-01-22 20:30:48.166502: step: 442/470, loss: 0.018505996093153954 2023-01-22 20:30:48.955198: step: 444/470, loss: 0.025313975289463997 2023-01-22 20:30:49.639829: step: 446/470, loss: 0.013848811388015747 2023-01-22 20:30:50.361556: step: 448/470, loss: 0.00033676475868560374 2023-01-22 20:30:51.109923: step: 450/470, loss: 0.012692051008343697 2023-01-22 20:30:51.833971: step: 452/470, loss: 0.01313791237771511 2023-01-22 20:30:52.482806: step: 454/470, loss: 0.0016963921952992678 2023-01-22 20:30:53.196243: step: 456/470, loss: 0.5339106917381287 2023-01-22 20:30:53.955605: step: 458/470, loss: 0.055918145924806595 2023-01-22 20:30:54.603879: step: 460/470, loss: 0.00320792431011796 2023-01-22 20:30:55.394727: step: 462/470, loss: 0.0457182303071022 2023-01-22 20:30:56.208092: step: 464/470, loss: 0.01659577526152134 2023-01-22 20:30:57.038944: step: 466/470, loss: 0.03423323854804039 2023-01-22 20:30:57.879438: step: 468/470, loss: 0.009822395630180836 2023-01-22 20:30:58.556794: step: 470/470, loss: 0.0007459279731847346 2023-01-22 20:30:59.315115: step: 472/470, loss: 0.009417381137609482 2023-01-22 20:31:00.023923: step: 474/470, loss: 0.015447549521923065 2023-01-22 20:31:00.806056: step: 476/470, loss: 0.004266915377229452 2023-01-22 20:31:01.577226: step: 478/470, loss: 0.0016163645777851343 2023-01-22 20:31:02.320558: step: 480/470, loss: 0.007870226167142391 2023-01-22 20:31:03.015138: step: 482/470, loss: 0.04453890770673752 2023-01-22 20:31:03.782010: step: 484/470, loss: 0.08574463427066803 2023-01-22 20:31:04.511608: step: 486/470, loss: 0.0023265713825821877 2023-01-22 20:31:05.292270: step: 488/470, loss: 0.0027136588469147682 2023-01-22 20:31:06.006043: step: 490/470, loss: 0.056579723954200745 2023-01-22 20:31:06.764461: step: 492/470, loss: 0.049596644937992096 2023-01-22 20:31:07.372569: step: 494/470, loss: 0.009921560995280743 2023-01-22 20:31:08.060104: step: 496/470, loss: 0.011929440312087536 2023-01-22 20:31:08.895713: step: 498/470, loss: 0.05545575171709061 2023-01-22 20:31:09.700590: step: 500/470, loss: 0.041103947907686234 2023-01-22 20:31:10.593457: step: 502/470, loss: 0.05899772793054581 2023-01-22 20:31:11.330243: step: 504/470, loss: 0.02625754103064537 2023-01-22 20:31:12.044463: step: 506/470, loss: 1.3242918252944946 2023-01-22 20:31:12.738168: step: 508/470, loss: 0.011036560870707035 2023-01-22 20:31:13.471132: step: 510/470, loss: 0.002730847103521228 2023-01-22 20:31:14.125407: step: 512/470, loss: 0.04828563705086708 2023-01-22 20:31:14.886722: step: 514/470, loss: 0.011057223193347454 2023-01-22 20:31:15.631331: step: 516/470, loss: 0.024130506440997124 2023-01-22 20:31:16.319439: step: 518/470, loss: 0.009844631887972355 2023-01-22 20:31:17.073936: step: 520/470, loss: 0.09269572049379349 2023-01-22 20:31:17.804388: step: 522/470, loss: 0.010625121183693409 2023-01-22 20:31:18.626225: step: 524/470, loss: 0.09826123714447021 2023-01-22 20:31:19.376680: step: 526/470, loss: 0.07059445977210999 2023-01-22 20:31:20.197393: step: 528/470, loss: 0.21039122343063354 2023-01-22 20:31:20.900121: step: 530/470, loss: 0.004249433521181345 2023-01-22 20:31:21.703501: step: 532/470, loss: 0.6244630813598633 2023-01-22 20:31:22.412072: step: 534/470, loss: 0.10188359767198563 2023-01-22 20:31:23.160374: step: 536/470, loss: 0.0008839786169119179 2023-01-22 20:31:23.936310: step: 538/470, loss: 0.009506146423518658 2023-01-22 20:31:24.603041: step: 540/470, loss: 0.026201093569397926 2023-01-22 20:31:25.377239: step: 542/470, loss: 0.02899881824851036 2023-01-22 20:31:26.075370: step: 544/470, loss: 0.01946968585252762 2023-01-22 20:31:26.712686: step: 546/470, loss: 0.002837974112480879 2023-01-22 20:31:27.379952: step: 548/470, loss: 0.006309479475021362 2023-01-22 20:31:28.129345: step: 550/470, loss: 0.025487856939435005 2023-01-22 20:31:28.859813: step: 552/470, loss: 0.044974181801080704 2023-01-22 20:31:29.709151: step: 554/470, loss: 0.08220124244689941 2023-01-22 20:31:30.548525: step: 556/470, loss: 0.023944033309817314 2023-01-22 20:31:31.294903: step: 558/470, loss: 0.06157348304986954 2023-01-22 20:31:31.999834: step: 560/470, loss: 0.03174411877989769 2023-01-22 20:31:32.724684: step: 562/470, loss: 0.001561825512908399 2023-01-22 20:31:33.472454: step: 564/470, loss: 0.004232198931276798 2023-01-22 20:31:34.316716: step: 566/470, loss: 0.012148827314376831 2023-01-22 20:31:35.012571: step: 568/470, loss: 0.012420494109392166 2023-01-22 20:31:35.752079: step: 570/470, loss: 0.03385348618030548 2023-01-22 20:31:36.451521: step: 572/470, loss: 0.11622752249240875 2023-01-22 20:31:37.206000: step: 574/470, loss: 0.023387907072901726 2023-01-22 20:31:37.907022: step: 576/470, loss: 0.011087493039667606 2023-01-22 20:31:38.663929: step: 578/470, loss: 0.026949353516101837 2023-01-22 20:31:39.364447: step: 580/470, loss: 0.007216866593807936 2023-01-22 20:31:40.080812: step: 582/470, loss: 0.5527914762496948 2023-01-22 20:31:40.810015: step: 584/470, loss: 0.5917420387268066 2023-01-22 20:31:41.574495: step: 586/470, loss: 0.08542931079864502 2023-01-22 20:31:42.347220: step: 588/470, loss: 0.10411461442708969 2023-01-22 20:31:43.098509: step: 590/470, loss: 0.003107214579358697 2023-01-22 20:31:43.888564: step: 592/470, loss: 0.034524258226156235 2023-01-22 20:31:44.697236: step: 594/470, loss: 0.028853056952357292 2023-01-22 20:31:45.380471: step: 596/470, loss: 0.05348816141486168 2023-01-22 20:31:46.107274: step: 598/470, loss: 0.041593506932258606 2023-01-22 20:31:46.877218: step: 600/470, loss: 0.0063796937465667725 2023-01-22 20:31:47.630950: step: 602/470, loss: 0.016017813235521317 2023-01-22 20:31:48.360476: step: 604/470, loss: 0.062454525381326675 2023-01-22 20:31:49.093821: step: 606/470, loss: 0.19269520044326782 2023-01-22 20:31:49.773003: step: 608/470, loss: 0.014841437339782715 2023-01-22 20:31:50.606326: step: 610/470, loss: 2.196732759475708 2023-01-22 20:31:51.351487: step: 612/470, loss: 0.011269205249845982 2023-01-22 20:31:52.049976: step: 614/470, loss: 0.000654394447337836 2023-01-22 20:31:52.756797: step: 616/470, loss: 0.0068985833786427975 2023-01-22 20:31:53.500699: step: 618/470, loss: 0.009661390446126461 2023-01-22 20:31:54.168150: step: 620/470, loss: 0.009938615374267101 2023-01-22 20:31:54.941332: step: 622/470, loss: 0.28914228081703186 2023-01-22 20:31:55.727748: step: 624/470, loss: 0.057103097438812256 2023-01-22 20:31:56.614312: step: 626/470, loss: 0.0777493342757225 2023-01-22 20:31:57.331355: step: 628/470, loss: 0.0037567424587905407 2023-01-22 20:31:58.037797: step: 630/470, loss: 0.044795405119657516 2023-01-22 20:31:58.806894: step: 632/470, loss: 0.05516333505511284 2023-01-22 20:31:59.476479: step: 634/470, loss: 0.0026756152510643005 2023-01-22 20:32:00.195203: step: 636/470, loss: 0.049536339938640594 2023-01-22 20:32:00.940509: step: 638/470, loss: 0.020610058680176735 2023-01-22 20:32:01.708812: step: 640/470, loss: 0.0036903752479702234 2023-01-22 20:32:02.442931: step: 642/470, loss: 0.019514787942171097 2023-01-22 20:32:03.180453: step: 644/470, loss: 0.0012103342451155186 2023-01-22 20:32:03.933798: step: 646/470, loss: 0.043970245867967606 2023-01-22 20:32:04.647138: step: 648/470, loss: 0.3032195270061493 2023-01-22 20:32:05.362725: step: 650/470, loss: 0.05885202810168266 2023-01-22 20:32:06.129784: step: 652/470, loss: 0.06101761758327484 2023-01-22 20:32:06.814507: step: 654/470, loss: 0.017636749893426895 2023-01-22 20:32:07.501654: step: 656/470, loss: 0.09666401892900467 2023-01-22 20:32:08.247977: step: 658/470, loss: 0.006696424447000027 2023-01-22 20:32:08.992748: step: 660/470, loss: 0.012530512176454067 2023-01-22 20:32:09.758786: step: 662/470, loss: 0.03560841828584671 2023-01-22 20:32:10.517608: step: 664/470, loss: 0.12582863867282867 2023-01-22 20:32:11.150526: step: 666/470, loss: 0.039586570113897324 2023-01-22 20:32:11.734555: step: 668/470, loss: 0.0001087912532966584 2023-01-22 20:32:12.595062: step: 670/470, loss: 0.027073320001363754 2023-01-22 20:32:13.335930: step: 672/470, loss: 0.004335601814091206 2023-01-22 20:32:14.016334: step: 674/470, loss: 0.03368784487247467 2023-01-22 20:32:14.727874: step: 676/470, loss: 0.03928745165467262 2023-01-22 20:32:15.539850: step: 678/470, loss: 0.0066179316490888596 2023-01-22 20:32:16.295418: step: 680/470, loss: 0.03221479430794716 2023-01-22 20:32:17.099417: step: 682/470, loss: 0.08291277289390564 2023-01-22 20:32:17.829591: step: 684/470, loss: 0.023358231410384178 2023-01-22 20:32:18.586468: step: 686/470, loss: 0.052179984748363495 2023-01-22 20:32:19.326678: step: 688/470, loss: 0.012996486388146877 2023-01-22 20:32:20.079460: step: 690/470, loss: 0.2901443541049957 2023-01-22 20:32:20.836145: step: 692/470, loss: 0.15641692280769348 2023-01-22 20:32:21.595946: step: 694/470, loss: 0.03846118599176407 2023-01-22 20:32:22.386270: step: 696/470, loss: 0.008250097744166851 2023-01-22 20:32:23.143507: step: 698/470, loss: 0.1495591700077057 2023-01-22 20:32:23.932289: step: 700/470, loss: 0.05644164979457855 2023-01-22 20:32:24.594490: step: 702/470, loss: 0.0023439424112439156 2023-01-22 20:32:25.311625: step: 704/470, loss: 0.03498091921210289 2023-01-22 20:32:25.984413: step: 706/470, loss: 0.00011742630158551037 2023-01-22 20:32:26.685214: step: 708/470, loss: 0.011936147697269917 2023-01-22 20:32:27.389271: step: 710/470, loss: 0.013959010131657124 2023-01-22 20:32:28.078675: step: 712/470, loss: 0.003490228671580553 2023-01-22 20:32:28.818169: step: 714/470, loss: 0.03471314162015915 2023-01-22 20:32:29.616150: step: 716/470, loss: 0.04512022063136101 2023-01-22 20:32:30.291998: step: 718/470, loss: 0.013724502176046371 2023-01-22 20:32:30.960732: step: 720/470, loss: 0.04511544108390808 2023-01-22 20:32:31.670769: step: 722/470, loss: 0.04014601930975914 2023-01-22 20:32:32.466699: step: 724/470, loss: 0.0754375234246254 2023-01-22 20:32:33.364162: step: 726/470, loss: 0.06476394087076187 2023-01-22 20:32:34.029151: step: 728/470, loss: 0.027749724686145782 2023-01-22 20:32:34.759115: step: 730/470, loss: 0.259750634431839 2023-01-22 20:32:35.459832: step: 732/470, loss: 0.00022448692470788956 2023-01-22 20:32:36.193385: step: 734/470, loss: 0.057673707604408264 2023-01-22 20:32:36.899665: step: 736/470, loss: 0.012090279720723629 2023-01-22 20:32:37.652931: step: 738/470, loss: 0.0011684439377859235 2023-01-22 20:32:38.369212: step: 740/470, loss: 0.013645489700138569 2023-01-22 20:32:39.100652: step: 742/470, loss: 0.02157679945230484 2023-01-22 20:32:39.828362: step: 744/470, loss: 0.016018304973840714 2023-01-22 20:32:40.506136: step: 746/470, loss: 0.007724686060100794 2023-01-22 20:32:41.203610: step: 748/470, loss: 0.038802552968263626 2023-01-22 20:32:41.967443: step: 750/470, loss: 0.005301786120980978 2023-01-22 20:32:42.687506: step: 752/470, loss: 0.02522115781903267 2023-01-22 20:32:43.441698: step: 754/470, loss: 0.020645877346396446 2023-01-22 20:32:44.114569: step: 756/470, loss: 0.00777442567050457 2023-01-22 20:32:44.928098: step: 758/470, loss: 0.001848795684054494 2023-01-22 20:32:45.725216: step: 760/470, loss: 0.03325757756829262 2023-01-22 20:32:46.395827: step: 762/470, loss: 0.029930531978607178 2023-01-22 20:32:47.074295: step: 764/470, loss: 0.06273278594017029 2023-01-22 20:32:47.855196: step: 766/470, loss: 0.05899015814065933 2023-01-22 20:32:48.686105: step: 768/470, loss: 0.2313128113746643 2023-01-22 20:32:49.512763: step: 770/470, loss: 0.043592121452093124 2023-01-22 20:32:50.184990: step: 772/470, loss: 0.0039254468865692616 2023-01-22 20:32:50.968018: step: 774/470, loss: 1.0532337427139282 2023-01-22 20:32:51.727519: step: 776/470, loss: 0.0696612298488617 2023-01-22 20:32:52.446000: step: 778/470, loss: 0.05012049153447151 2023-01-22 20:32:53.212450: step: 780/470, loss: 0.029199251905083656 2023-01-22 20:32:53.946600: step: 782/470, loss: 0.6384598016738892 2023-01-22 20:32:54.637072: step: 784/470, loss: 0.05911829322576523 2023-01-22 20:32:55.365680: step: 786/470, loss: 0.010864563286304474 2023-01-22 20:32:56.017115: step: 788/470, loss: 0.005859819240868092 2023-01-22 20:32:56.811304: step: 790/470, loss: 0.020966263487935066 2023-01-22 20:32:57.585933: step: 792/470, loss: 0.33560967445373535 2023-01-22 20:32:58.325471: step: 794/470, loss: 0.040173858404159546 2023-01-22 20:32:59.146638: step: 796/470, loss: 0.025398118421435356 2023-01-22 20:32:59.880687: step: 798/470, loss: 0.06342984735965729 2023-01-22 20:33:00.596637: step: 800/470, loss: 0.06393054127693176 2023-01-22 20:33:01.349101: step: 802/470, loss: 0.04026153311133385 2023-01-22 20:33:02.068500: step: 804/470, loss: 0.0187918022274971 2023-01-22 20:33:02.740159: step: 806/470, loss: 0.3132553696632385 2023-01-22 20:33:03.507555: step: 808/470, loss: 0.007414802443236113 2023-01-22 20:33:04.215579: step: 810/470, loss: 0.005097710061818361 2023-01-22 20:33:04.969773: step: 812/470, loss: 0.011615417897701263 2023-01-22 20:33:05.705998: step: 814/470, loss: 0.12536713480949402 2023-01-22 20:33:06.508129: step: 816/470, loss: 0.029747048392891884 2023-01-22 20:33:07.243041: step: 818/470, loss: 0.050025250762701035 2023-01-22 20:33:07.989303: step: 820/470, loss: 0.01923954486846924 2023-01-22 20:33:08.723415: step: 822/470, loss: 0.039691563695669174 2023-01-22 20:33:09.464960: step: 824/470, loss: 0.03583470359444618 2023-01-22 20:33:10.153100: step: 826/470, loss: 0.06700265407562256 2023-01-22 20:33:10.899995: step: 828/470, loss: 0.02268780767917633 2023-01-22 20:33:11.570115: step: 830/470, loss: 0.05540559068322182 2023-01-22 20:33:12.304231: step: 832/470, loss: 0.03642084449529648 2023-01-22 20:33:12.967164: step: 834/470, loss: 0.007380470167845488 2023-01-22 20:33:13.692238: step: 836/470, loss: 0.04133666679263115 2023-01-22 20:33:14.368152: step: 838/470, loss: 0.09914152324199677 2023-01-22 20:33:15.123345: step: 840/470, loss: 0.0013483419315889478 2023-01-22 20:33:15.879759: step: 842/470, loss: 0.03963959217071533 2023-01-22 20:33:16.534495: step: 844/470, loss: 0.041381705552339554 2023-01-22 20:33:17.188246: step: 846/470, loss: 0.016487672924995422 2023-01-22 20:33:17.821573: step: 848/470, loss: 0.0062914639711380005 2023-01-22 20:33:18.496781: step: 850/470, loss: 0.011747756041586399 2023-01-22 20:33:19.258545: step: 852/470, loss: 0.0007929243729449809 2023-01-22 20:33:19.973108: step: 854/470, loss: 0.0006810800405219197 2023-01-22 20:33:20.669589: step: 856/470, loss: 0.002209881553426385 2023-01-22 20:33:21.411319: step: 858/470, loss: 0.028073936700820923 2023-01-22 20:33:22.081391: step: 860/470, loss: 0.16893041133880615 2023-01-22 20:33:22.853985: step: 862/470, loss: 0.0011155976681038737 2023-01-22 20:33:23.567337: step: 864/470, loss: 0.00859520398080349 2023-01-22 20:33:24.319532: step: 866/470, loss: 7.151709723984823e-05 2023-01-22 20:33:25.129313: step: 868/470, loss: 0.01832861453294754 2023-01-22 20:33:25.842383: step: 870/470, loss: 0.047499969601631165 2023-01-22 20:33:26.658046: step: 872/470, loss: 0.003909479361027479 2023-01-22 20:33:27.395148: step: 874/470, loss: 0.25040119886398315 2023-01-22 20:33:28.157148: step: 876/470, loss: 0.06178472191095352 2023-01-22 20:33:28.853453: step: 878/470, loss: 0.0017721912590786815 2023-01-22 20:33:29.590628: step: 880/470, loss: 0.007689218968153 2023-01-22 20:33:30.404651: step: 882/470, loss: 0.033220697194337845 2023-01-22 20:33:31.223558: step: 884/470, loss: 0.008743722923099995 2023-01-22 20:33:31.945733: step: 886/470, loss: 0.0026598607655614614 2023-01-22 20:33:32.701621: step: 888/470, loss: 0.006200912408530712 2023-01-22 20:33:33.375664: step: 890/470, loss: 0.033201370388269424 2023-01-22 20:33:34.083571: step: 892/470, loss: 0.005671496037393808 2023-01-22 20:33:34.823140: step: 894/470, loss: 0.0014364663511514664 2023-01-22 20:33:35.528960: step: 896/470, loss: 0.12019861489534378 2023-01-22 20:33:36.410148: step: 898/470, loss: 0.0630432739853859 2023-01-22 20:33:37.138592: step: 900/470, loss: 0.009634872898459435 2023-01-22 20:33:37.828054: step: 902/470, loss: 0.056970443576574326 2023-01-22 20:33:38.541210: step: 904/470, loss: 0.04602930322289467 2023-01-22 20:33:39.278641: step: 906/470, loss: 0.03439059853553772 2023-01-22 20:33:39.895159: step: 908/470, loss: 0.14804024994373322 2023-01-22 20:33:40.628991: step: 910/470, loss: 0.005356145091354847 2023-01-22 20:33:41.406445: step: 912/470, loss: 0.02320312149822712 2023-01-22 20:33:42.190689: step: 914/470, loss: 0.12080468982458115 2023-01-22 20:33:42.905353: step: 916/470, loss: 0.01958886720240116 2023-01-22 20:33:43.621773: step: 918/470, loss: 0.011003172025084496 2023-01-22 20:33:44.274991: step: 920/470, loss: 0.0002306982351001352 2023-01-22 20:33:45.065971: step: 922/470, loss: 0.06692475080490112 2023-01-22 20:33:45.830862: step: 924/470, loss: 0.030821437016129494 2023-01-22 20:33:46.515323: step: 926/470, loss: 0.03762578219175339 2023-01-22 20:33:47.160706: step: 928/470, loss: 0.0017827172996476293 2023-01-22 20:33:47.935888: step: 930/470, loss: 0.04079880192875862 2023-01-22 20:33:48.756535: step: 932/470, loss: 0.003436343977227807 2023-01-22 20:33:49.536146: step: 934/470, loss: 0.021370133385062218 2023-01-22 20:33:50.262158: step: 936/470, loss: 0.029703887179493904 2023-01-22 20:33:50.968299: step: 938/470, loss: 0.06702406704425812 2023-01-22 20:33:51.615929: step: 940/470, loss: 0.015583197586238384 2023-01-22 20:33:52.246601: step: 942/470, loss: 0.001184674329124391 ================================================== Loss: 0.093 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28582109842305603, 'r': 0.33246363061353573, 'f1': 0.3073830409356725}, 'combined': 0.22649276700523235, 'epoch': 28} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.324120752408383, 'r': 0.36681742844679505, 'f1': 0.34414986520944235}, 'combined': 0.23970637377772105, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27722653286563065, 'r': 0.33509165357762183, 'f1': 0.30342491655568166}, 'combined': 0.22357625430418648, 'epoch': 28} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32527617462539155, 'r': 0.36124421316569927, 'f1': 0.34231797876294057}, 'combined': 0.23843043296921235, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27060735512630013, 'r': 0.34557637571157496, 'f1': 0.30353125000000003}, 'combined': 0.22365460526315792, 'epoch': 28} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.31329216537409327, 'r': 0.3690220217146772, 'f1': 0.3388811501838978}, 'combined': 0.23603662201863532, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22629310344827586, 'r': 0.375, 'f1': 0.28225806451612906}, 'combined': 0.1881720430107527, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.28378378378378377, 'r': 0.45652173913043476, 'f1': 0.3499999999999999}, 'combined': 0.17499999999999996, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4431818181818182, 'r': 0.33620689655172414, 'f1': 0.38235294117647056}, 'combined': 0.2549019607843137, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 29 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 20:36:27.608956: step: 2/470, loss: 0.2647108733654022 2023-01-22 20:36:28.445352: step: 4/470, loss: 0.02322511561214924 2023-01-22 20:36:29.111700: step: 6/470, loss: 0.02740596979856491 2023-01-22 20:36:29.823356: step: 8/470, loss: 0.013857753947377205 2023-01-22 20:36:30.618779: step: 10/470, loss: 0.005119292065501213 2023-01-22 20:36:31.361882: step: 12/470, loss: 0.00834457017481327 2023-01-22 20:36:31.999105: step: 14/470, loss: 0.0004951293813064694 2023-01-22 20:36:32.757013: step: 16/470, loss: 0.027414359152317047 2023-01-22 20:36:33.677129: step: 18/470, loss: 0.03084324114024639 2023-01-22 20:36:34.262209: step: 20/470, loss: 0.006820783019065857 2023-01-22 20:36:35.087880: step: 22/470, loss: 0.00931725837290287 2023-01-22 20:36:35.816353: step: 24/470, loss: 0.0211730245500803 2023-01-22 20:36:36.594875: step: 26/470, loss: 0.0012839719420298934 2023-01-22 20:36:37.280282: step: 28/470, loss: 0.04594583436846733 2023-01-22 20:36:37.989540: step: 30/470, loss: 0.012252802029252052 2023-01-22 20:36:38.696098: step: 32/470, loss: 0.004927062429487705 2023-01-22 20:36:39.493525: step: 34/470, loss: 0.006308556068688631 2023-01-22 20:36:40.301880: step: 36/470, loss: 0.015124126337468624 2023-01-22 20:36:41.067837: step: 38/470, loss: 0.007813768461346626 2023-01-22 20:36:41.747715: step: 40/470, loss: 0.0029113588389009237 2023-01-22 20:36:42.493345: step: 42/470, loss: 0.011285551823675632 2023-01-22 20:36:43.242984: step: 44/470, loss: 0.3646211326122284 2023-01-22 20:36:44.012230: step: 46/470, loss: 0.006847033277153969 2023-01-22 20:36:44.734114: step: 48/470, loss: 0.06652196496725082 2023-01-22 20:36:45.504990: step: 50/470, loss: 0.2817710041999817 2023-01-22 20:36:46.272210: step: 52/470, loss: 0.0034084455110132694 2023-01-22 20:36:47.024804: step: 54/470, loss: 0.04189238324761391 2023-01-22 20:36:47.679586: step: 56/470, loss: 0.002203726675361395 2023-01-22 20:36:48.499757: step: 58/470, loss: 0.011194828897714615 2023-01-22 20:36:49.284676: step: 60/470, loss: 0.19877992570400238 2023-01-22 20:36:50.013245: step: 62/470, loss: 0.007485152687877417 2023-01-22 20:36:50.705064: step: 64/470, loss: 0.020456980913877487 2023-01-22 20:36:51.411567: step: 66/470, loss: 0.005883743055164814 2023-01-22 20:36:52.141466: step: 68/470, loss: 0.012196771800518036 2023-01-22 20:36:52.824451: step: 70/470, loss: 0.00925607979297638 2023-01-22 20:36:53.461890: step: 72/470, loss: 0.014364490285515785 2023-01-22 20:36:54.192563: step: 74/470, loss: 0.37649011611938477 2023-01-22 20:36:54.946755: step: 76/470, loss: 0.010440860874950886 2023-01-22 20:36:55.754206: step: 78/470, loss: 0.02263338305056095 2023-01-22 20:36:56.503508: step: 80/470, loss: 0.2164899855852127 2023-01-22 20:36:57.240408: step: 82/470, loss: 0.0030306533444672823 2023-01-22 20:36:57.965748: step: 84/470, loss: 0.003299787174910307 2023-01-22 20:36:58.692931: step: 86/470, loss: 0.0003010545624420047 2023-01-22 20:36:59.408849: step: 88/470, loss: 0.3850425183773041 2023-01-22 20:37:00.218016: step: 90/470, loss: 0.01980891264975071 2023-01-22 20:37:00.902012: step: 92/470, loss: 0.021628176793456078 2023-01-22 20:37:01.707698: step: 94/470, loss: 0.0020503345876932144 2023-01-22 20:37:02.456240: step: 96/470, loss: 0.008198212832212448 2023-01-22 20:37:03.135512: step: 98/470, loss: 0.03375517949461937 2023-01-22 20:37:03.800281: step: 100/470, loss: 0.0032718630973249674 2023-01-22 20:37:04.442801: step: 102/470, loss: 0.0006153931026346982 2023-01-22 20:37:05.119721: step: 104/470, loss: 0.006163314450532198 2023-01-22 20:37:05.930701: step: 106/470, loss: 0.055401623249053955 2023-01-22 20:37:06.710487: step: 108/470, loss: 0.012150800786912441 2023-01-22 20:37:07.499916: step: 110/470, loss: 0.13651181757450104 2023-01-22 20:37:08.221142: step: 112/470, loss: 0.02135992981493473 2023-01-22 20:37:08.931225: step: 114/470, loss: 0.4167734980583191 2023-01-22 20:37:09.781497: step: 116/470, loss: 0.013348422944545746 2023-01-22 20:37:10.516349: step: 118/470, loss: 0.003862533252686262 2023-01-22 20:37:11.166664: step: 120/470, loss: 0.0047416831366717815 2023-01-22 20:37:11.847887: step: 122/470, loss: 0.0004611566837411374 2023-01-22 20:37:12.701271: step: 124/470, loss: 0.0077300965785980225 2023-01-22 20:37:13.468263: step: 126/470, loss: 0.005382179748266935 2023-01-22 20:37:14.122827: step: 128/470, loss: 0.004562853369861841 2023-01-22 20:37:14.811694: step: 130/470, loss: 0.04411611706018448 2023-01-22 20:37:15.556789: step: 132/470, loss: 0.013166757300496101 2023-01-22 20:37:16.246570: step: 134/470, loss: 0.005444676149636507 2023-01-22 20:37:17.017496: step: 136/470, loss: 9.606832463759929e-05 2023-01-22 20:37:17.810542: step: 138/470, loss: 0.003493986092507839 2023-01-22 20:37:18.553931: step: 140/470, loss: 0.0017787780379876494 2023-01-22 20:37:19.336022: step: 142/470, loss: 0.040565188974142075 2023-01-22 20:37:20.143512: step: 144/470, loss: 0.01262040063738823 2023-01-22 20:37:20.933983: step: 146/470, loss: 0.01510031707584858 2023-01-22 20:37:21.680699: step: 148/470, loss: 0.0011344999074935913 2023-01-22 20:37:22.477079: step: 150/470, loss: 0.049181923270225525 2023-01-22 20:37:23.203395: step: 152/470, loss: 0.04775718227028847 2023-01-22 20:37:23.907226: step: 154/470, loss: 4.9370075430488214e-05 2023-01-22 20:37:24.612455: step: 156/470, loss: 0.4461440443992615 2023-01-22 20:37:25.317419: step: 158/470, loss: 0.15681709349155426 2023-01-22 20:37:26.007896: step: 160/470, loss: 0.009236715734004974 2023-01-22 20:37:26.808454: step: 162/470, loss: 0.032232243567705154 2023-01-22 20:37:27.625958: step: 164/470, loss: 0.002545413561165333 2023-01-22 20:37:28.345679: step: 166/470, loss: 0.009975194931030273 2023-01-22 20:37:29.091825: step: 168/470, loss: 0.015161970630288124 2023-01-22 20:37:29.820329: step: 170/470, loss: 0.005908914841711521 2023-01-22 20:37:30.552056: step: 172/470, loss: 0.021803176030516624 2023-01-22 20:37:31.322435: step: 174/470, loss: 0.1319359987974167 2023-01-22 20:37:32.021720: step: 176/470, loss: 0.047474365681409836 2023-01-22 20:37:32.792494: step: 178/470, loss: 0.02708449400961399 2023-01-22 20:37:33.501844: step: 180/470, loss: 0.07310396432876587 2023-01-22 20:37:34.208855: step: 182/470, loss: 0.04242819547653198 2023-01-22 20:37:34.908128: step: 184/470, loss: 0.005080712027847767 2023-01-22 20:37:35.637878: step: 186/470, loss: 0.011022936552762985 2023-01-22 20:37:36.283488: step: 188/470, loss: 0.004775905515998602 2023-01-22 20:37:37.030015: step: 190/470, loss: 0.014512178488075733 2023-01-22 20:37:37.712899: step: 192/470, loss: 0.018934812396764755 2023-01-22 20:37:38.457649: step: 194/470, loss: 0.00309011316858232 2023-01-22 20:37:39.188679: step: 196/470, loss: 0.004391052294522524 2023-01-22 20:37:39.938842: step: 198/470, loss: 0.00033165913191623986 2023-01-22 20:37:40.599590: step: 200/470, loss: 0.00046572668361477554 2023-01-22 20:37:41.336227: step: 202/470, loss: 0.013232901692390442 2023-01-22 20:37:42.170243: step: 204/470, loss: 0.1024237647652626 2023-01-22 20:37:42.871632: step: 206/470, loss: 0.01619911752641201 2023-01-22 20:37:43.580043: step: 208/470, loss: 0.0010863811476156116 2023-01-22 20:37:44.297969: step: 210/470, loss: 0.0015397859970107675 2023-01-22 20:37:45.021975: step: 212/470, loss: 0.07132253795862198 2023-01-22 20:37:45.685557: step: 214/470, loss: 0.004997688811272383 2023-01-22 20:37:46.418503: step: 216/470, loss: 0.00376240280456841 2023-01-22 20:37:47.203504: step: 218/470, loss: 0.03204397112131119 2023-01-22 20:37:47.893685: step: 220/470, loss: 0.008753920905292034 2023-01-22 20:37:48.615242: step: 222/470, loss: 0.005575764458626509 2023-01-22 20:37:49.424684: step: 224/470, loss: 0.0023701766040176153 2023-01-22 20:37:50.123942: step: 226/470, loss: 0.005872036796063185 2023-01-22 20:37:50.889559: step: 228/470, loss: 0.023437704890966415 2023-01-22 20:37:51.642881: step: 230/470, loss: 0.010248042643070221 2023-01-22 20:37:52.399415: step: 232/470, loss: 0.02714042365550995 2023-01-22 20:37:53.196757: step: 234/470, loss: 0.01943190023303032 2023-01-22 20:37:53.886112: step: 236/470, loss: 0.00399386091157794 2023-01-22 20:37:54.566130: step: 238/470, loss: 0.0016181677347049117 2023-01-22 20:37:55.403372: step: 240/470, loss: 0.049513738602399826 2023-01-22 20:37:56.151090: step: 242/470, loss: 0.07210097461938858 2023-01-22 20:37:56.851427: step: 244/470, loss: 0.00037181295920163393 2023-01-22 20:37:57.591048: step: 246/470, loss: 0.19889435172080994 2023-01-22 20:37:58.284350: step: 248/470, loss: 0.006400907877832651 2023-01-22 20:37:59.167326: step: 250/470, loss: 0.028619172051548958 2023-01-22 20:37:59.887560: step: 252/470, loss: 0.021511005237698555 2023-01-22 20:38:00.600162: step: 254/470, loss: 0.04519416764378548 2023-01-22 20:38:01.324973: step: 256/470, loss: 0.13093401491641998 2023-01-22 20:38:02.078024: step: 258/470, loss: 0.09740595519542694 2023-01-22 20:38:02.831296: step: 260/470, loss: 0.13155671954154968 2023-01-22 20:38:03.628731: step: 262/470, loss: 0.07691262662410736 2023-01-22 20:38:04.388002: step: 264/470, loss: 0.004110024776309729 2023-01-22 20:38:05.065951: step: 266/470, loss: 0.1348675787448883 2023-01-22 20:38:05.759508: step: 268/470, loss: 0.10196854174137115 2023-01-22 20:38:06.513079: step: 270/470, loss: 0.009700944647192955 2023-01-22 20:38:07.232958: step: 272/470, loss: 0.0025991289876401424 2023-01-22 20:38:07.970858: step: 274/470, loss: 0.0007453685393556952 2023-01-22 20:38:08.620966: step: 276/470, loss: 0.0021520478185266256 2023-01-22 20:38:09.336916: step: 278/470, loss: 0.052726294845342636 2023-01-22 20:38:10.065618: step: 280/470, loss: 0.003794698743149638 2023-01-22 20:38:10.871767: step: 282/470, loss: 0.01943463459610939 2023-01-22 20:38:11.555695: step: 284/470, loss: 0.023348847404122353 2023-01-22 20:38:12.271012: step: 286/470, loss: 0.019955376163125038 2023-01-22 20:38:12.986152: step: 288/470, loss: 0.003484874265268445 2023-01-22 20:38:13.724351: step: 290/470, loss: 0.028693674132227898 2023-01-22 20:38:14.440891: step: 292/470, loss: 0.0033248071558773518 2023-01-22 20:38:15.161579: step: 294/470, loss: 0.03285335376858711 2023-01-22 20:38:15.934766: step: 296/470, loss: 0.01780891977250576 2023-01-22 20:38:16.718023: step: 298/470, loss: 0.05349568650126457 2023-01-22 20:38:17.438631: step: 300/470, loss: 0.5065484642982483 2023-01-22 20:38:18.270128: step: 302/470, loss: 0.03260203078389168 2023-01-22 20:38:19.024874: step: 304/470, loss: 0.03035620041191578 2023-01-22 20:38:19.726944: step: 306/470, loss: 0.006546898279339075 2023-01-22 20:38:20.519129: step: 308/470, loss: 0.002254707971587777 2023-01-22 20:38:21.260058: step: 310/470, loss: 0.00036036467645317316 2023-01-22 20:38:21.936594: step: 312/470, loss: 0.003247553249821067 2023-01-22 20:38:22.602780: step: 314/470, loss: 0.06962236016988754 2023-01-22 20:38:23.331690: step: 316/470, loss: 0.02976413629949093 2023-01-22 20:38:24.010010: step: 318/470, loss: 0.004767751786857843 2023-01-22 20:38:24.751625: step: 320/470, loss: 0.10309252142906189 2023-01-22 20:38:25.543014: step: 322/470, loss: 0.10256746411323547 2023-01-22 20:38:26.359726: step: 324/470, loss: 0.05060182884335518 2023-01-22 20:38:27.102103: step: 326/470, loss: 0.006237236317247152 2023-01-22 20:38:27.829615: step: 328/470, loss: 0.05871773138642311 2023-01-22 20:38:28.551366: step: 330/470, loss: 0.24029061198234558 2023-01-22 20:38:29.250486: step: 332/470, loss: 0.19288605451583862 2023-01-22 20:38:29.983125: step: 334/470, loss: 0.007970509119331837 2023-01-22 20:38:30.749054: step: 336/470, loss: 0.04301189258694649 2023-01-22 20:38:31.460288: step: 338/470, loss: 0.01984056457877159 2023-01-22 20:38:32.182914: step: 340/470, loss: 0.03184615448117256 2023-01-22 20:38:32.869369: step: 342/470, loss: 0.002288240008056164 2023-01-22 20:38:33.600433: step: 344/470, loss: 0.00532893929630518 2023-01-22 20:38:34.337318: step: 346/470, loss: 0.02240307815372944 2023-01-22 20:38:35.109616: step: 348/470, loss: 0.005755066871643066 2023-01-22 20:38:35.824437: step: 350/470, loss: 0.028193417936563492 2023-01-22 20:38:36.488497: step: 352/470, loss: 0.0016564959660172462 2023-01-22 20:38:37.215158: step: 354/470, loss: 0.1647537648677826 2023-01-22 20:38:37.990698: step: 356/470, loss: 0.02808982878923416 2023-01-22 20:38:38.722422: step: 358/470, loss: 0.02180049754679203 2023-01-22 20:38:39.469286: step: 360/470, loss: 0.009425032883882523 2023-01-22 20:38:40.192646: step: 362/470, loss: 0.002624044893309474 2023-01-22 20:38:40.977757: step: 364/470, loss: 0.03871319815516472 2023-01-22 20:38:41.743356: step: 366/470, loss: 0.0012272871099412441 2023-01-22 20:38:42.524056: step: 368/470, loss: 0.03193151205778122 2023-01-22 20:38:43.348946: step: 370/470, loss: 0.005393751431256533 2023-01-22 20:38:43.994136: step: 372/470, loss: 0.007179801352322102 2023-01-22 20:38:44.689456: step: 374/470, loss: 0.02233264036476612 2023-01-22 20:38:45.423147: step: 376/470, loss: 0.019881457090377808 2023-01-22 20:38:46.201537: step: 378/470, loss: 0.004716763272881508 2023-01-22 20:38:46.971590: step: 380/470, loss: 0.010665499605238438 2023-01-22 20:38:47.683262: step: 382/470, loss: 0.021654922515153885 2023-01-22 20:38:48.446742: step: 384/470, loss: 0.017089908942580223 2023-01-22 20:38:49.128842: step: 386/470, loss: 0.004330948460847139 2023-01-22 20:38:49.931210: step: 388/470, loss: 0.004959953483194113 2023-01-22 20:38:50.592840: step: 390/470, loss: 0.0001449349510949105 2023-01-22 20:38:51.268523: step: 392/470, loss: 0.020653393119573593 2023-01-22 20:38:52.041796: step: 394/470, loss: 0.029949171468615532 2023-01-22 20:38:52.732253: step: 396/470, loss: 0.002220702590420842 2023-01-22 20:38:53.495710: step: 398/470, loss: 0.004065732005983591 2023-01-22 20:38:54.209657: step: 400/470, loss: 0.06798814982175827 2023-01-22 20:38:55.010086: step: 402/470, loss: 0.011189811863005161 2023-01-22 20:38:55.689051: step: 404/470, loss: 0.00252006808295846 2023-01-22 20:38:56.443786: step: 406/470, loss: 0.008702918887138367 2023-01-22 20:38:57.138128: step: 408/470, loss: 0.17309211194515228 2023-01-22 20:38:57.876443: step: 410/470, loss: 0.05191047117114067 2023-01-22 20:38:58.635459: step: 412/470, loss: 0.09695277363061905 2023-01-22 20:38:59.318262: step: 414/470, loss: 0.006256232038140297 2023-01-22 20:39:00.042065: step: 416/470, loss: 0.05564850568771362 2023-01-22 20:39:00.816868: step: 418/470, loss: 0.0020740872714668512 2023-01-22 20:39:01.510571: step: 420/470, loss: 0.005247580353170633 2023-01-22 20:39:02.404755: step: 422/470, loss: 0.003924751654267311 2023-01-22 20:39:03.112116: step: 424/470, loss: 0.010796195827424526 2023-01-22 20:39:03.782979: step: 426/470, loss: 0.02733398787677288 2023-01-22 20:39:04.586107: step: 428/470, loss: 0.020288608968257904 2023-01-22 20:39:05.296051: step: 430/470, loss: 0.006797471083700657 2023-01-22 20:39:06.005939: step: 432/470, loss: 0.05538531392812729 2023-01-22 20:39:06.707845: step: 434/470, loss: 0.011707188561558723 2023-01-22 20:39:07.402682: step: 436/470, loss: 0.025928908959031105 2023-01-22 20:39:08.041183: step: 438/470, loss: 0.011687841266393661 2023-01-22 20:39:08.782729: step: 440/470, loss: 0.02929680421948433 2023-01-22 20:39:09.480466: step: 442/470, loss: 0.00014456742792390287 2023-01-22 20:39:10.231278: step: 444/470, loss: 0.018118448555469513 2023-01-22 20:39:10.947901: step: 446/470, loss: 1.0357182025909424 2023-01-22 20:39:11.767188: step: 448/470, loss: 0.035767797380685806 2023-01-22 20:39:12.570415: step: 450/470, loss: 0.012241028249263763 2023-01-22 20:39:13.289436: step: 452/470, loss: 0.02669922076165676 2023-01-22 20:39:13.992794: step: 454/470, loss: 0.0076654767617583275 2023-01-22 20:39:14.764071: step: 456/470, loss: 0.005637957714498043 2023-01-22 20:39:15.497378: step: 458/470, loss: 0.056888647377491 2023-01-22 20:39:16.229040: step: 460/470, loss: 0.020066574215888977 2023-01-22 20:39:16.996864: step: 462/470, loss: 0.36348721385002136 2023-01-22 20:39:17.671437: step: 464/470, loss: 0.029186828061938286 2023-01-22 20:39:18.430153: step: 466/470, loss: 0.013277608901262283 2023-01-22 20:39:19.115542: step: 468/470, loss: 0.19909025728702545 2023-01-22 20:39:19.842378: step: 470/470, loss: 0.004801159258931875 2023-01-22 20:39:20.679472: step: 472/470, loss: 0.012583213858306408 2023-01-22 20:39:21.439802: step: 474/470, loss: 0.07196840643882751 2023-01-22 20:39:22.161428: step: 476/470, loss: 0.024347444996237755 2023-01-22 20:39:22.915286: step: 478/470, loss: 0.020326513797044754 2023-01-22 20:39:23.636758: step: 480/470, loss: 0.011932437308132648 2023-01-22 20:39:24.416640: step: 482/470, loss: 0.02215908281505108 2023-01-22 20:39:25.160776: step: 484/470, loss: 0.0020135114900767803 2023-01-22 20:39:25.858639: step: 486/470, loss: 0.0020898778457194567 2023-01-22 20:39:26.592462: step: 488/470, loss: 0.014602022245526314 2023-01-22 20:39:27.399484: step: 490/470, loss: 0.017223408445715904 2023-01-22 20:39:28.069322: step: 492/470, loss: 0.0020636683329939842 2023-01-22 20:39:28.824353: step: 494/470, loss: 0.01937001757323742 2023-01-22 20:39:29.505213: step: 496/470, loss: 0.0012707647401839495 2023-01-22 20:39:30.207636: step: 498/470, loss: 0.17490433156490326 2023-01-22 20:39:30.946781: step: 500/470, loss: 0.013577910140156746 2023-01-22 20:39:31.658764: step: 502/470, loss: 0.007008845917880535 2023-01-22 20:39:32.369578: step: 504/470, loss: 0.002107697306200862 2023-01-22 20:39:33.121184: step: 506/470, loss: 0.005590823013335466 2023-01-22 20:39:33.872367: step: 508/470, loss: 0.019169259816408157 2023-01-22 20:39:34.541089: step: 510/470, loss: 0.0015802793204784393 2023-01-22 20:39:35.257909: step: 512/470, loss: 0.013873277232050896 2023-01-22 20:39:36.062000: step: 514/470, loss: 0.007271386217325926 2023-01-22 20:39:36.882358: step: 516/470, loss: 0.0057012708857655525 2023-01-22 20:39:37.643010: step: 518/470, loss: 0.0004432882706169039 2023-01-22 20:39:38.318401: step: 520/470, loss: 0.0004786129866261035 2023-01-22 20:39:39.005305: step: 522/470, loss: 0.1716701239347458 2023-01-22 20:39:39.645277: step: 524/470, loss: 0.018754906952381134 2023-01-22 20:39:40.421106: step: 526/470, loss: 0.0010185715509578586 2023-01-22 20:39:41.305310: step: 528/470, loss: 0.012098163366317749 2023-01-22 20:39:42.149555: step: 530/470, loss: 0.04463057965040207 2023-01-22 20:39:42.838368: step: 532/470, loss: 0.001784288208000362 2023-01-22 20:39:43.572295: step: 534/470, loss: 0.040405258536338806 2023-01-22 20:39:44.227217: step: 536/470, loss: 0.016458706930279732 2023-01-22 20:39:44.962359: step: 538/470, loss: 0.037848204374313354 2023-01-22 20:39:45.606643: step: 540/470, loss: 0.13946174085140228 2023-01-22 20:39:46.273679: step: 542/470, loss: 0.15389235317707062 2023-01-22 20:39:46.975309: step: 544/470, loss: 0.0027951474767178297 2023-01-22 20:39:47.667793: step: 546/470, loss: 0.0260777585208416 2023-01-22 20:39:48.347369: step: 548/470, loss: 0.0007106401026248932 2023-01-22 20:39:49.076574: step: 550/470, loss: 0.011294242925941944 2023-01-22 20:39:49.797035: step: 552/470, loss: 0.0024706493131816387 2023-01-22 20:39:50.500536: step: 554/470, loss: 0.0029934581834822893 2023-01-22 20:39:51.267926: step: 556/470, loss: 0.03564335033297539 2023-01-22 20:39:51.881114: step: 558/470, loss: 0.0014540080446749926 2023-01-22 20:39:52.584493: step: 560/470, loss: 0.0077100833877921104 2023-01-22 20:39:53.349845: step: 562/470, loss: 0.0005074184155091643 2023-01-22 20:39:54.161634: step: 564/470, loss: 0.13422176241874695 2023-01-22 20:39:54.938270: step: 566/470, loss: 0.011121930554509163 2023-01-22 20:39:55.681260: step: 568/470, loss: 0.0019553580787032843 2023-01-22 20:39:56.363308: step: 570/470, loss: 0.12414314597845078 2023-01-22 20:39:57.074694: step: 572/470, loss: 0.22418749332427979 2023-01-22 20:39:57.821691: step: 574/470, loss: 0.02621358633041382 2023-01-22 20:39:58.569023: step: 576/470, loss: 0.033430345356464386 2023-01-22 20:39:59.362701: step: 578/470, loss: 0.025795383378863335 2023-01-22 20:40:00.128966: step: 580/470, loss: 0.04664403572678566 2023-01-22 20:40:00.834723: step: 582/470, loss: 0.04867984354496002 2023-01-22 20:40:01.530724: step: 584/470, loss: 0.022736769169569016 2023-01-22 20:40:02.233929: step: 586/470, loss: 0.0002613358956295997 2023-01-22 20:40:03.017019: step: 588/470, loss: 0.016873285174369812 2023-01-22 20:40:03.720954: step: 590/470, loss: 0.013911283574998379 2023-01-22 20:40:04.467939: step: 592/470, loss: 0.0659724697470665 2023-01-22 20:40:05.163337: step: 594/470, loss: 0.008108820766210556 2023-01-22 20:40:05.953209: step: 596/470, loss: 0.02690688520669937 2023-01-22 20:40:06.711402: step: 598/470, loss: 0.05805189535021782 2023-01-22 20:40:07.502070: step: 600/470, loss: 0.08737591654062271 2023-01-22 20:40:08.249192: step: 602/470, loss: 0.02579532191157341 2023-01-22 20:40:08.987398: step: 604/470, loss: 0.009756003506481647 2023-01-22 20:40:09.597344: step: 606/470, loss: 0.0011332191061228514 2023-01-22 20:40:10.371036: step: 608/470, loss: 0.014959428459405899 2023-01-22 20:40:11.146442: step: 610/470, loss: 0.014169977977871895 2023-01-22 20:40:11.835466: step: 612/470, loss: 0.007126861251890659 2023-01-22 20:40:12.519788: step: 614/470, loss: 0.03526424989104271 2023-01-22 20:40:13.269550: step: 616/470, loss: 0.0036206073127686977 2023-01-22 20:40:14.037000: step: 618/470, loss: 0.005215490702539682 2023-01-22 20:40:14.825728: step: 620/470, loss: 0.011563556268811226 2023-01-22 20:40:15.580391: step: 622/470, loss: 0.03009108267724514 2023-01-22 20:40:16.327758: step: 624/470, loss: 0.47227317094802856 2023-01-22 20:40:17.063081: step: 626/470, loss: 0.000962880440056324 2023-01-22 20:40:17.771219: step: 628/470, loss: 0.004227377008646727 2023-01-22 20:40:18.444942: step: 630/470, loss: 0.05415330082178116 2023-01-22 20:40:19.146814: step: 632/470, loss: 0.02446996420621872 2023-01-22 20:40:19.901675: step: 634/470, loss: 0.10332430154085159 2023-01-22 20:40:20.580506: step: 636/470, loss: 0.0059739393182098866 2023-01-22 20:40:21.362738: step: 638/470, loss: 0.0033245279919356108 2023-01-22 20:40:22.088954: step: 640/470, loss: 0.044418323785066605 2023-01-22 20:40:22.754027: step: 642/470, loss: 0.00199850439094007 2023-01-22 20:40:23.530954: step: 644/470, loss: 0.05675657093524933 2023-01-22 20:40:24.288523: step: 646/470, loss: 0.0008988552144728601 2023-01-22 20:40:25.018221: step: 648/470, loss: 0.013591834343969822 2023-01-22 20:40:25.728084: step: 650/470, loss: 0.003277568379417062 2023-01-22 20:40:26.458930: step: 652/470, loss: 0.002477661008015275 2023-01-22 20:40:27.197950: step: 654/470, loss: 0.011621094308793545 2023-01-22 20:40:27.885970: step: 656/470, loss: 0.008883575908839703 2023-01-22 20:40:28.565125: step: 658/470, loss: 0.016700388863682747 2023-01-22 20:40:29.294374: step: 660/470, loss: 0.013254842720925808 2023-01-22 20:40:30.042209: step: 662/470, loss: 0.04380848631262779 2023-01-22 20:40:30.792919: step: 664/470, loss: 0.2254922240972519 2023-01-22 20:40:31.556278: step: 666/470, loss: 0.00549095356836915 2023-01-22 20:40:32.242722: step: 668/470, loss: 0.041242629289627075 2023-01-22 20:40:32.959565: step: 670/470, loss: 0.002172822365537286 2023-01-22 20:40:33.630380: step: 672/470, loss: 0.0004444690130185336 2023-01-22 20:40:34.503699: step: 674/470, loss: 0.21776027977466583 2023-01-22 20:40:35.341508: step: 676/470, loss: 0.04360530152916908 2023-01-22 20:40:36.126415: step: 678/470, loss: 0.01206052303314209 2023-01-22 20:40:36.927525: step: 680/470, loss: 0.1170274093747139 2023-01-22 20:40:37.684828: step: 682/470, loss: 0.05427645891904831 2023-01-22 20:40:38.400244: step: 684/470, loss: 0.025601759552955627 2023-01-22 20:40:39.188600: step: 686/470, loss: 0.027646034955978394 2023-01-22 20:40:39.875977: step: 688/470, loss: 0.18431520462036133 2023-01-22 20:40:40.632507: step: 690/470, loss: 0.0012170057743787766 2023-01-22 20:40:41.372432: step: 692/470, loss: 0.007006289437413216 2023-01-22 20:40:42.077569: step: 694/470, loss: 0.0009528612717986107 2023-01-22 20:40:42.801151: step: 696/470, loss: 0.11783985793590546 2023-01-22 20:40:43.478886: step: 698/470, loss: 0.038966577500104904 2023-01-22 20:40:44.193257: step: 700/470, loss: 0.008607025258243084 2023-01-22 20:40:44.918514: step: 702/470, loss: 0.024364376440644264 2023-01-22 20:40:45.713479: step: 704/470, loss: 0.030996840447187424 2023-01-22 20:40:46.464310: step: 706/470, loss: 0.03350379317998886 2023-01-22 20:40:47.080928: step: 708/470, loss: 0.0056647504679858685 2023-01-22 20:40:47.784382: step: 710/470, loss: 0.011958747170865536 2023-01-22 20:40:48.562623: step: 712/470, loss: 0.07865112274885178 2023-01-22 20:40:49.346462: step: 714/470, loss: 0.03213072568178177 2023-01-22 20:40:50.129443: step: 716/470, loss: 0.04514642804861069 2023-01-22 20:40:50.865107: step: 718/470, loss: 0.004229575861245394 2023-01-22 20:40:51.661209: step: 720/470, loss: 0.025142908096313477 2023-01-22 20:40:52.415424: step: 722/470, loss: 0.16131198406219482 2023-01-22 20:40:53.052102: step: 724/470, loss: 0.0029195209499448538 2023-01-22 20:40:53.794419: step: 726/470, loss: 0.06855201721191406 2023-01-22 20:40:54.479818: step: 728/470, loss: 0.006867049727588892 2023-01-22 20:40:55.235549: step: 730/470, loss: 0.004258542787283659 2023-01-22 20:40:55.903814: step: 732/470, loss: 0.0028415187261998653 2023-01-22 20:40:56.621502: step: 734/470, loss: 0.0037412350066006184 2023-01-22 20:40:57.399961: step: 736/470, loss: 0.0005636032437905669 2023-01-22 20:40:58.098729: step: 738/470, loss: 0.03270193934440613 2023-01-22 20:40:58.810039: step: 740/470, loss: 0.023219116032123566 2023-01-22 20:40:59.533219: step: 742/470, loss: 0.01399591937661171 2023-01-22 20:41:00.223308: step: 744/470, loss: 0.12446845322847366 2023-01-22 20:41:00.906887: step: 746/470, loss: 0.0018124807393178344 2023-01-22 20:41:01.708465: step: 748/470, loss: 0.025428785011172295 2023-01-22 20:41:02.384321: step: 750/470, loss: 0.005672887898981571 2023-01-22 20:41:03.090911: step: 752/470, loss: 0.002514556283131242 2023-01-22 20:41:03.922155: step: 754/470, loss: 0.016503768041729927 2023-01-22 20:41:04.809953: step: 756/470, loss: 0.001229889108799398 2023-01-22 20:41:05.433043: step: 758/470, loss: 0.008788308128714561 2023-01-22 20:41:06.225132: step: 760/470, loss: 0.030882153660058975 2023-01-22 20:41:06.926501: step: 762/470, loss: 0.027840539813041687 2023-01-22 20:41:07.703456: step: 764/470, loss: 0.0054314760491251945 2023-01-22 20:41:08.468642: step: 766/470, loss: 0.021803250536322594 2023-01-22 20:41:09.222582: step: 768/470, loss: 0.030251774936914444 2023-01-22 20:41:09.914955: step: 770/470, loss: 0.10658083856105804 2023-01-22 20:41:10.603435: step: 772/470, loss: 0.08436030149459839 2023-01-22 20:41:11.331992: step: 774/470, loss: 0.0032615482341498137 2023-01-22 20:41:11.991390: step: 776/470, loss: 0.01066649705171585 2023-01-22 20:41:12.743036: step: 778/470, loss: 0.009199859574437141 2023-01-22 20:41:13.495370: step: 780/470, loss: 0.018781311810016632 2023-01-22 20:41:14.204349: step: 782/470, loss: 0.020045407116413116 2023-01-22 20:41:14.953809: step: 784/470, loss: 0.02236953005194664 2023-01-22 20:41:15.717375: step: 786/470, loss: 0.01765470579266548 2023-01-22 20:41:16.459155: step: 788/470, loss: 0.18303386867046356 2023-01-22 20:41:17.286812: step: 790/470, loss: 0.004157851915806532 2023-01-22 20:41:17.971952: step: 792/470, loss: 0.017420660704374313 2023-01-22 20:41:18.744735: step: 794/470, loss: 0.026377053931355476 2023-01-22 20:41:19.453197: step: 796/470, loss: 0.057920631021261215 2023-01-22 20:41:20.210181: step: 798/470, loss: 0.015336936339735985 2023-01-22 20:41:20.945814: step: 800/470, loss: 0.19589649140834808 2023-01-22 20:41:21.586634: step: 802/470, loss: 0.08331488817930222 2023-01-22 20:41:22.358102: step: 804/470, loss: 0.1330551952123642 2023-01-22 20:41:23.079908: step: 806/470, loss: 0.007211843505501747 2023-01-22 20:41:23.810001: step: 808/470, loss: 0.024340027943253517 2023-01-22 20:41:24.605575: step: 810/470, loss: 0.04757057875394821 2023-01-22 20:41:25.310069: step: 812/470, loss: 0.01020081341266632 2023-01-22 20:41:26.027497: step: 814/470, loss: 0.041293397545814514 2023-01-22 20:41:26.758531: step: 816/470, loss: 0.02104293741285801 2023-01-22 20:41:27.570706: step: 818/470, loss: 0.08056701719760895 2023-01-22 20:41:28.397096: step: 820/470, loss: 0.013175521977245808 2023-01-22 20:41:29.119731: step: 822/470, loss: 0.04553241282701492 2023-01-22 20:41:29.872215: step: 824/470, loss: 0.07833508402109146 2023-01-22 20:41:30.640055: step: 826/470, loss: 0.027197131887078285 2023-01-22 20:41:31.361715: step: 828/470, loss: 0.007502416614443064 2023-01-22 20:41:32.101944: step: 830/470, loss: 0.04629238322377205 2023-01-22 20:41:32.792345: step: 832/470, loss: 0.0031321379356086254 2023-01-22 20:41:33.474807: step: 834/470, loss: 0.007129800971597433 2023-01-22 20:41:34.172901: step: 836/470, loss: 0.019213836640119553 2023-01-22 20:41:34.866521: step: 838/470, loss: 0.003881721990182996 2023-01-22 20:41:35.750475: step: 840/470, loss: 0.02087746188044548 2023-01-22 20:41:36.446506: step: 842/470, loss: 0.001440670806914568 2023-01-22 20:41:37.150504: step: 844/470, loss: 0.011802955530583858 2023-01-22 20:41:37.860192: step: 846/470, loss: 0.0013450667029246688 2023-01-22 20:41:38.559484: step: 848/470, loss: 0.03148926421999931 2023-01-22 20:41:39.316664: step: 850/470, loss: 0.08651398122310638 2023-01-22 20:41:40.173644: step: 852/470, loss: 0.03913048282265663 2023-01-22 20:41:40.872667: step: 854/470, loss: 0.04345300793647766 2023-01-22 20:41:41.586625: step: 856/470, loss: 0.0018893154338002205 2023-01-22 20:41:42.235740: step: 858/470, loss: 8.755026647122577e-05 2023-01-22 20:41:42.987367: step: 860/470, loss: 0.02182687819004059 2023-01-22 20:41:43.717046: step: 862/470, loss: 0.013897470198571682 2023-01-22 20:41:44.439308: step: 864/470, loss: 0.00483159814029932 2023-01-22 20:41:45.186610: step: 866/470, loss: 0.009607330895960331 2023-01-22 20:41:45.886974: step: 868/470, loss: 0.015445969067513943 2023-01-22 20:41:46.649619: step: 870/470, loss: 0.032280661165714264 2023-01-22 20:41:47.378711: step: 872/470, loss: 0.09819857031106949 2023-01-22 20:41:48.140486: step: 874/470, loss: 0.022332623600959778 2023-01-22 20:41:48.871953: step: 876/470, loss: 0.05316994711756706 2023-01-22 20:41:49.598705: step: 878/470, loss: 0.09301023185253143 2023-01-22 20:41:50.317124: step: 880/470, loss: 0.014414233155548573 2023-01-22 20:41:51.018804: step: 882/470, loss: 0.7137559056282043 2023-01-22 20:41:51.790709: step: 884/470, loss: 0.020797649398446083 2023-01-22 20:41:52.553567: step: 886/470, loss: 0.05245582386851311 2023-01-22 20:41:53.340186: step: 888/470, loss: 0.09029704332351685 2023-01-22 20:41:53.970646: step: 890/470, loss: 0.007554202806204557 2023-01-22 20:41:54.697488: step: 892/470, loss: 0.03437653183937073 2023-01-22 20:41:55.396609: step: 894/470, loss: 0.0020255944691598415 2023-01-22 20:41:56.150835: step: 896/470, loss: 0.11641070246696472 2023-01-22 20:41:56.905817: step: 898/470, loss: 0.11411511898040771 2023-01-22 20:41:57.614084: step: 900/470, loss: 0.028752509504556656 2023-01-22 20:41:58.320294: step: 902/470, loss: 0.020708488300442696 2023-01-22 20:41:59.069449: step: 904/470, loss: 0.06170295178890228 2023-01-22 20:42:00.055692: step: 906/470, loss: 0.030286438763141632 2023-01-22 20:42:00.848525: step: 908/470, loss: 0.011742115952074528 2023-01-22 20:42:01.557945: step: 910/470, loss: 0.008268856443464756 2023-01-22 20:42:02.280046: step: 912/470, loss: 0.07056379318237305 2023-01-22 20:42:02.922464: step: 914/470, loss: 0.0315357968211174 2023-01-22 20:42:03.685751: step: 916/470, loss: 0.032394226640462875 2023-01-22 20:42:04.450501: step: 918/470, loss: 0.012661329470574856 2023-01-22 20:42:05.141393: step: 920/470, loss: 0.06156188249588013 2023-01-22 20:42:05.909339: step: 922/470, loss: 0.0035831385757774115 2023-01-22 20:42:06.667105: step: 924/470, loss: 0.00764728058129549 2023-01-22 20:42:07.462860: step: 926/470, loss: 0.026822997257113457 2023-01-22 20:42:08.211212: step: 928/470, loss: 0.01422158908098936 2023-01-22 20:42:08.996720: step: 930/470, loss: 0.012633465230464935 2023-01-22 20:42:09.739066: step: 932/470, loss: 0.0021819123066961765 2023-01-22 20:42:10.446534: step: 934/470, loss: 0.09248220920562744 2023-01-22 20:42:11.134435: step: 936/470, loss: 0.019571691751480103 2023-01-22 20:42:11.847682: step: 938/470, loss: 0.37316635251045227 2023-01-22 20:42:12.674491: step: 940/470, loss: 0.016227828338742256 2023-01-22 20:42:13.349375: step: 942/470, loss: 0.004429019521921873 ================================================== Loss: 0.044 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2982536367953035, 'r': 0.3361720308470783, 'f1': 0.31607967931562936}, 'combined': 0.23290081633783213, 'epoch': 29} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.36339379413674205, 'r': 0.34242876755193, 'f1': 0.3525999190633735}, 'combined': 0.24559198342722535, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29809959887155074, 'r': 0.337695371017677, 'f1': 0.31666452050945865}, 'combined': 0.23333175195433795, 'epoch': 29} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3720185925866563, 'r': 0.34483261851301605, 'f1': 0.35791010304744186}, 'combined': 0.24929061903801922, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28678933021806857, 'r': 0.3493714421252372, 'f1': 0.3150021385799829}, 'combined': 0.2321068389536716, 'epoch': 29} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35292353630625145, 'r': 0.3444397974527358, 'f1': 0.34863006262855983}, 'combined': 0.24282690929352427, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23863636363636365, 'r': 0.375, 'f1': 0.2916666666666667}, 'combined': 0.19444444444444445, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.28125, 'r': 0.4891304347826087, 'f1': 0.3571428571428572}, 'combined': 0.1785714285714286, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40625, 'r': 0.33620689655172414, 'f1': 0.36792452830188677}, 'combined': 0.2452830188679245, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 30 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 20:44:47.661825: step: 2/470, loss: 0.0015534983249381185 2023-01-22 20:44:48.355499: step: 4/470, loss: 0.0009303026017732918 2023-01-22 20:44:49.067052: step: 6/470, loss: 0.07198537886142731 2023-01-22 20:44:49.803204: step: 8/470, loss: 0.0074896845035254955 2023-01-22 20:44:50.581579: step: 10/470, loss: 0.0020844112150371075 2023-01-22 20:44:51.362441: step: 12/470, loss: 0.3400344252586365 2023-01-22 20:44:52.070892: step: 14/470, loss: 0.019580742344260216 2023-01-22 20:44:52.800474: step: 16/470, loss: 0.005592713598161936 2023-01-22 20:44:53.505049: step: 18/470, loss: 0.004001240245997906 2023-01-22 20:44:54.271968: step: 20/470, loss: 0.04294969141483307 2023-01-22 20:44:55.000824: step: 22/470, loss: 0.030762318521738052 2023-01-22 20:44:55.730100: step: 24/470, loss: 0.07296521216630936 2023-01-22 20:44:56.407008: step: 26/470, loss: 0.019644813612103462 2023-01-22 20:44:57.170784: step: 28/470, loss: 0.0019523368682712317 2023-01-22 20:44:57.871869: step: 30/470, loss: 0.004232059698551893 2023-01-22 20:44:58.605159: step: 32/470, loss: 0.00470116687938571 2023-01-22 20:44:59.342501: step: 34/470, loss: 3.0470375349977985e-05 2023-01-22 20:45:00.140991: step: 36/470, loss: 0.014174572192132473 2023-01-22 20:45:00.926519: step: 38/470, loss: 0.049307264387607574 2023-01-22 20:45:01.667213: step: 40/470, loss: 0.0007710535428486764 2023-01-22 20:45:02.387084: step: 42/470, loss: 0.004445178434252739 2023-01-22 20:45:03.128753: step: 44/470, loss: 0.006911000236868858 2023-01-22 20:45:03.793861: step: 46/470, loss: 0.007463586516678333 2023-01-22 20:45:04.572121: step: 48/470, loss: 0.00076903315493837 2023-01-22 20:45:05.394731: step: 50/470, loss: 0.03810786083340645 2023-01-22 20:45:06.135279: step: 52/470, loss: 0.029291216284036636 2023-01-22 20:45:06.805275: step: 54/470, loss: 0.00221418053843081 2023-01-22 20:45:07.591242: step: 56/470, loss: 0.0012072960380464792 2023-01-22 20:45:08.371498: step: 58/470, loss: 0.015327895060181618 2023-01-22 20:45:09.096380: step: 60/470, loss: 0.0036242054775357246 2023-01-22 20:45:09.851304: step: 62/470, loss: 0.039196036756038666 2023-01-22 20:45:10.542637: step: 64/470, loss: 0.004683348350226879 2023-01-22 20:45:11.251190: step: 66/470, loss: 0.005313752684742212 2023-01-22 20:45:12.002100: step: 68/470, loss: 0.003183474065735936 2023-01-22 20:45:12.679299: step: 70/470, loss: 0.042856525629758835 2023-01-22 20:45:13.304761: step: 72/470, loss: 0.015468944795429707 2023-01-22 20:45:13.955509: step: 74/470, loss: 0.003673919942229986 2023-01-22 20:45:14.621676: step: 76/470, loss: 0.0050133909098804 2023-01-22 20:45:15.310416: step: 78/470, loss: 0.0003503368643578142 2023-01-22 20:45:16.004789: step: 80/470, loss: 0.00029192844522185624 2023-01-22 20:45:16.756129: step: 82/470, loss: 0.002908646361902356 2023-01-22 20:45:17.534570: step: 84/470, loss: 0.041369758546352386 2023-01-22 20:45:18.312161: step: 86/470, loss: 0.011434613727033138 2023-01-22 20:45:19.049982: step: 88/470, loss: 0.014062466099858284 2023-01-22 20:45:19.798359: step: 90/470, loss: 0.012946287170052528 2023-01-22 20:45:20.533506: step: 92/470, loss: 0.0006630457355640829 2023-01-22 20:45:21.227837: step: 94/470, loss: 0.02848971076309681 2023-01-22 20:45:21.913872: step: 96/470, loss: 0.0033628770615905523 2023-01-22 20:45:22.655508: step: 98/470, loss: 0.02601010911166668 2023-01-22 20:45:23.376951: step: 100/470, loss: 0.13308343291282654 2023-01-22 20:45:24.142190: step: 102/470, loss: 0.02286795899271965 2023-01-22 20:45:24.976789: step: 104/470, loss: 0.006091211922466755 2023-01-22 20:45:25.669383: step: 106/470, loss: 0.0013653002679347992 2023-01-22 20:45:26.437125: step: 108/470, loss: 0.008479597978293896 2023-01-22 20:45:27.176479: step: 110/470, loss: 0.035104185342788696 2023-01-22 20:45:27.855116: step: 112/470, loss: 0.01217788178473711 2023-01-22 20:45:28.530420: step: 114/470, loss: 0.009542414918541908 2023-01-22 20:45:29.245998: step: 116/470, loss: 0.0510413832962513 2023-01-22 20:45:29.882435: step: 118/470, loss: 0.010977623052895069 2023-01-22 20:45:30.578323: step: 120/470, loss: 0.008249633945524693 2023-01-22 20:45:31.324208: step: 122/470, loss: 0.013754217885434628 2023-01-22 20:45:32.098874: step: 124/470, loss: 0.000642959144897759 2023-01-22 20:45:32.839060: step: 126/470, loss: 0.01749761775135994 2023-01-22 20:45:33.509737: step: 128/470, loss: 0.033281996846199036 2023-01-22 20:45:34.260971: step: 130/470, loss: 0.0049248565919697285 2023-01-22 20:45:34.956581: step: 132/470, loss: 0.080209881067276 2023-01-22 20:45:35.633898: step: 134/470, loss: 0.0143387196585536 2023-01-22 20:45:36.361031: step: 136/470, loss: 0.04557420313358307 2023-01-22 20:45:37.089683: step: 138/470, loss: 0.005927293561398983 2023-01-22 20:45:37.738272: step: 140/470, loss: 0.004758467432111502 2023-01-22 20:45:38.541297: step: 142/470, loss: 0.01647905260324478 2023-01-22 20:45:39.252114: step: 144/470, loss: 0.0004897841135971248 2023-01-22 20:45:39.988120: step: 146/470, loss: 0.2584097981452942 2023-01-22 20:45:40.715477: step: 148/470, loss: 0.01904984377324581 2023-01-22 20:45:41.408009: step: 150/470, loss: 0.000754874141421169 2023-01-22 20:45:42.088476: step: 152/470, loss: 0.0361163467168808 2023-01-22 20:45:42.911963: step: 154/470, loss: 0.046868957579135895 2023-01-22 20:45:43.585101: step: 156/470, loss: 0.006643644534051418 2023-01-22 20:45:44.355359: step: 158/470, loss: 0.03360613062977791 2023-01-22 20:45:45.052620: step: 160/470, loss: 0.0265851728618145 2023-01-22 20:45:45.743429: step: 162/470, loss: 0.002698094118386507 2023-01-22 20:45:46.436081: step: 164/470, loss: 0.0029194336384534836 2023-01-22 20:45:47.135798: step: 166/470, loss: 0.00015936991258058697 2023-01-22 20:45:47.876267: step: 168/470, loss: 0.011427337303757668 2023-01-22 20:45:48.591180: step: 170/470, loss: 0.0039392574690282345 2023-01-22 20:45:49.397781: step: 172/470, loss: 0.002686795312911272 2023-01-22 20:45:50.131010: step: 174/470, loss: 0.0013270019553601742 2023-01-22 20:45:50.869367: step: 176/470, loss: 0.0032820103224366903 2023-01-22 20:45:51.563144: step: 178/470, loss: 0.008707202039659023 2023-01-22 20:45:52.284792: step: 180/470, loss: 0.07832004129886627 2023-01-22 20:45:52.951226: step: 182/470, loss: 0.00024435168597847223 2023-01-22 20:45:53.618364: step: 184/470, loss: 0.019537262618541718 2023-01-22 20:45:54.414749: step: 186/470, loss: 0.024349162355065346 2023-01-22 20:45:55.133581: step: 188/470, loss: 0.026107704266905785 2023-01-22 20:45:55.832462: step: 190/470, loss: 0.2534019351005554 2023-01-22 20:45:56.526296: step: 192/470, loss: 0.022054988890886307 2023-01-22 20:45:57.165438: step: 194/470, loss: 0.00017441021918784827 2023-01-22 20:45:57.825309: step: 196/470, loss: 0.0004912808071821928 2023-01-22 20:45:58.533013: step: 198/470, loss: 0.006434681825339794 2023-01-22 20:45:59.293754: step: 200/470, loss: 0.041142772883176804 2023-01-22 20:46:00.032549: step: 202/470, loss: 0.010733246803283691 2023-01-22 20:46:00.702982: step: 204/470, loss: 0.00284022931009531 2023-01-22 20:46:01.450498: step: 206/470, loss: 0.04028492793440819 2023-01-22 20:46:02.295791: step: 208/470, loss: 0.04366816207766533 2023-01-22 20:46:02.944340: step: 210/470, loss: 0.002009680727496743 2023-01-22 20:46:03.637073: step: 212/470, loss: 0.04488565772771835 2023-01-22 20:46:04.353762: step: 214/470, loss: 0.004889700096100569 2023-01-22 20:46:05.064867: step: 216/470, loss: 0.029589461162686348 2023-01-22 20:46:05.823836: step: 218/470, loss: 0.08454018831253052 2023-01-22 20:46:06.564183: step: 220/470, loss: 0.015738148242235184 2023-01-22 20:46:07.296765: step: 222/470, loss: 3.354827404022217 2023-01-22 20:46:08.027491: step: 224/470, loss: 0.0028114912565797567 2023-01-22 20:46:08.722156: step: 226/470, loss: 0.03550800308585167 2023-01-22 20:46:09.435183: step: 228/470, loss: 0.044054701924324036 2023-01-22 20:46:10.150305: step: 230/470, loss: 0.09976823627948761 2023-01-22 20:46:10.914555: step: 232/470, loss: 0.008051842451095581 2023-01-22 20:46:11.689709: step: 234/470, loss: 0.08605451136827469 2023-01-22 20:46:12.455805: step: 236/470, loss: 0.0013634329661726952 2023-01-22 20:46:13.169370: step: 238/470, loss: 0.0263601616024971 2023-01-22 20:46:13.841178: step: 240/470, loss: 0.05547104775905609 2023-01-22 20:46:14.589103: step: 242/470, loss: 0.03622487559914589 2023-01-22 20:46:15.259899: step: 244/470, loss: 0.0007956080371513963 2023-01-22 20:46:15.991477: step: 246/470, loss: 0.0422469861805439 2023-01-22 20:46:16.783412: step: 248/470, loss: 0.019485946744680405 2023-01-22 20:46:17.474721: step: 250/470, loss: 4.62792013422586e-05 2023-01-22 20:46:18.311880: step: 252/470, loss: 0.011375891044735909 2023-01-22 20:46:19.024136: step: 254/470, loss: 0.027469327673316002 2023-01-22 20:46:19.738651: step: 256/470, loss: 0.0730309933423996 2023-01-22 20:46:20.575812: step: 258/470, loss: 0.1251501590013504 2023-01-22 20:46:21.615998: step: 260/470, loss: 0.0181076992303133 2023-01-22 20:46:22.369170: step: 262/470, loss: 0.7937953472137451 2023-01-22 20:46:23.115106: step: 264/470, loss: 0.004769394174218178 2023-01-22 20:46:23.868724: step: 266/470, loss: 0.03229885548353195 2023-01-22 20:46:24.549347: step: 268/470, loss: 0.007303445599973202 2023-01-22 20:46:25.335645: step: 270/470, loss: 0.058055225759744644 2023-01-22 20:46:26.090814: step: 272/470, loss: 0.34732547402381897 2023-01-22 20:46:26.721615: step: 274/470, loss: 0.015425494872033596 2023-01-22 20:46:27.406499: step: 276/470, loss: 0.012530090287327766 2023-01-22 20:46:28.212877: step: 278/470, loss: 0.015414786525070667 2023-01-22 20:46:29.045453: step: 280/470, loss: 0.031069021672010422 2023-01-22 20:46:29.712441: step: 282/470, loss: 0.026498055085539818 2023-01-22 20:46:30.497457: step: 284/470, loss: 0.004659529775381088 2023-01-22 20:46:31.322238: step: 286/470, loss: 0.02883639559149742 2023-01-22 20:46:32.103038: step: 288/470, loss: 0.0019614913035184145 2023-01-22 20:46:33.501559: step: 290/470, loss: 0.021054117009043694 2023-01-22 20:46:34.219821: step: 292/470, loss: 0.0282586757093668 2023-01-22 20:46:34.953145: step: 294/470, loss: 0.05488167703151703 2023-01-22 20:46:35.815543: step: 296/470, loss: 0.0710349828004837 2023-01-22 20:46:36.510015: step: 298/470, loss: 0.028987431898713112 2023-01-22 20:46:37.216404: step: 300/470, loss: 0.017589028924703598 2023-01-22 20:46:38.005538: step: 302/470, loss: 0.03203602880239487 2023-01-22 20:46:38.720889: step: 304/470, loss: 0.024194780737161636 2023-01-22 20:46:39.456805: step: 306/470, loss: 0.0027428928297013044 2023-01-22 20:46:40.099745: step: 308/470, loss: 0.08572112768888474 2023-01-22 20:46:40.969056: step: 310/470, loss: 0.06660182774066925 2023-01-22 20:46:41.722470: step: 312/470, loss: 0.09423809498548508 2023-01-22 20:46:42.515868: step: 314/470, loss: 0.03433975949883461 2023-01-22 20:46:43.370597: step: 316/470, loss: 0.04957527294754982 2023-01-22 20:46:44.081701: step: 318/470, loss: 0.0038216973189264536 2023-01-22 20:46:44.850976: step: 320/470, loss: 0.0018495884723961353 2023-01-22 20:46:45.607231: step: 322/470, loss: 0.14368680119514465 2023-01-22 20:46:46.302704: step: 324/470, loss: 0.002256029052659869 2023-01-22 20:46:47.038855: step: 326/470, loss: 0.017123982310295105 2023-01-22 20:46:47.704962: step: 328/470, loss: 0.18741732835769653 2023-01-22 20:46:48.424522: step: 330/470, loss: 0.014922713860869408 2023-01-22 20:46:49.147669: step: 332/470, loss: 0.03767160698771477 2023-01-22 20:46:49.871092: step: 334/470, loss: 0.05126776546239853 2023-01-22 20:46:50.520654: step: 336/470, loss: 0.01508275605738163 2023-01-22 20:46:51.224086: step: 338/470, loss: 0.00034520160988904536 2023-01-22 20:46:51.957508: step: 340/470, loss: 0.04148537665605545 2023-01-22 20:46:52.605053: step: 342/470, loss: 5.96605495957192e-05 2023-01-22 20:46:53.347545: step: 344/470, loss: 0.005885576829314232 2023-01-22 20:46:54.038121: step: 346/470, loss: 0.008368241600692272 2023-01-22 20:46:54.698149: step: 348/470, loss: 0.015697935596108437 2023-01-22 20:46:55.532463: step: 350/470, loss: 0.019847042858600616 2023-01-22 20:46:56.281820: step: 352/470, loss: 0.02182493358850479 2023-01-22 20:46:57.012362: step: 354/470, loss: 0.01692495495080948 2023-01-22 20:46:57.765699: step: 356/470, loss: 0.011305560357868671 2023-01-22 20:46:58.455365: step: 358/470, loss: 0.0026786348316818476 2023-01-22 20:46:59.177438: step: 360/470, loss: 0.0002164768666261807 2023-01-22 20:46:59.850537: step: 362/470, loss: 0.6910086870193481 2023-01-22 20:47:00.592094: step: 364/470, loss: 0.00040226319106295705 2023-01-22 20:47:01.295548: step: 366/470, loss: 0.016148222610354424 2023-01-22 20:47:01.981458: step: 368/470, loss: 0.027793431654572487 2023-01-22 20:47:02.716871: step: 370/470, loss: 0.006164975464344025 2023-01-22 20:47:03.404705: step: 372/470, loss: 0.01071165595203638 2023-01-22 20:47:04.093419: step: 374/470, loss: 0.0014239969896152616 2023-01-22 20:47:04.802829: step: 376/470, loss: 0.0013824844500049949 2023-01-22 20:47:05.560564: step: 378/470, loss: 0.004417424090206623 2023-01-22 20:47:06.300972: step: 380/470, loss: 0.02893391251564026 2023-01-22 20:47:06.999956: step: 382/470, loss: 0.030614320188760757 2023-01-22 20:47:07.687765: step: 384/470, loss: 0.001032169908285141 2023-01-22 20:47:08.393060: step: 386/470, loss: 0.007282666862010956 2023-01-22 20:47:09.090288: step: 388/470, loss: 0.01349354162812233 2023-01-22 20:47:09.815777: step: 390/470, loss: 0.015124008990824223 2023-01-22 20:47:10.511770: step: 392/470, loss: 0.013127674348652363 2023-01-22 20:47:11.160859: step: 394/470, loss: 0.032045748084783554 2023-01-22 20:47:11.900064: step: 396/470, loss: 0.2856377959251404 2023-01-22 20:47:12.628678: step: 398/470, loss: 0.020067989826202393 2023-01-22 20:47:13.398377: step: 400/470, loss: 0.008606786839663982 2023-01-22 20:47:14.150353: step: 402/470, loss: 0.018849369138479233 2023-01-22 20:47:14.847754: step: 404/470, loss: 0.002814099658280611 2023-01-22 20:47:15.507539: step: 406/470, loss: 0.0020936380606144667 2023-01-22 20:47:16.382521: step: 408/470, loss: 0.011628488078713417 2023-01-22 20:47:17.122558: step: 410/470, loss: 0.007815473712980747 2023-01-22 20:47:17.808244: step: 412/470, loss: 0.002352564362809062 2023-01-22 20:47:18.521671: step: 414/470, loss: 0.03523825854063034 2023-01-22 20:47:19.230383: step: 416/470, loss: 0.006999637931585312 2023-01-22 20:47:19.985245: step: 418/470, loss: 0.024556517601013184 2023-01-22 20:47:20.754710: step: 420/470, loss: 0.0016526913968846202 2023-01-22 20:47:21.486517: step: 422/470, loss: 0.013159074820578098 2023-01-22 20:47:22.144096: step: 424/470, loss: 0.023464815691113472 2023-01-22 20:47:22.912560: step: 426/470, loss: 0.0028531746938824654 2023-01-22 20:47:23.588840: step: 428/470, loss: 0.036497414112091064 2023-01-22 20:47:24.362934: step: 430/470, loss: 0.004592817276716232 2023-01-22 20:47:25.078784: step: 432/470, loss: 0.0013164383126422763 2023-01-22 20:47:25.754106: step: 434/470, loss: 0.00658793468028307 2023-01-22 20:47:26.529145: step: 436/470, loss: 0.514223575592041 2023-01-22 20:47:27.222954: step: 438/470, loss: 0.003137575928121805 2023-01-22 20:47:28.006668: step: 440/470, loss: 0.04794734716415405 2023-01-22 20:47:28.698045: step: 442/470, loss: 0.007328708656132221 2023-01-22 20:47:29.458615: step: 444/470, loss: 0.005198894999921322 2023-01-22 20:47:30.229576: step: 446/470, loss: 0.006771470420062542 2023-01-22 20:47:30.941573: step: 448/470, loss: 0.017400279641151428 2023-01-22 20:47:31.602541: step: 450/470, loss: 0.025920528918504715 2023-01-22 20:47:32.262941: step: 452/470, loss: 0.0040763262659311295 2023-01-22 20:47:32.981512: step: 454/470, loss: 0.02771720290184021 2023-01-22 20:47:33.774230: step: 456/470, loss: 0.007550915703177452 2023-01-22 20:47:34.508670: step: 458/470, loss: 0.021621685475111008 2023-01-22 20:47:35.229054: step: 460/470, loss: 0.022162331268191338 2023-01-22 20:47:35.968726: step: 462/470, loss: 0.0016908899415284395 2023-01-22 20:47:36.666130: step: 464/470, loss: 0.008422368206083775 2023-01-22 20:47:37.399586: step: 466/470, loss: 0.04641232267022133 2023-01-22 20:47:38.111515: step: 468/470, loss: 0.004679789766669273 2023-01-22 20:47:38.866675: step: 470/470, loss: 0.01792057789862156 2023-01-22 20:47:39.643019: step: 472/470, loss: 0.0020182339940220118 2023-01-22 20:47:40.437586: step: 474/470, loss: 0.011343245394527912 2023-01-22 20:47:41.215020: step: 476/470, loss: 0.08873055130243301 2023-01-22 20:47:41.997815: step: 478/470, loss: 0.007021929137408733 2023-01-22 20:47:42.682791: step: 480/470, loss: 0.0010284853633493185 2023-01-22 20:47:43.430322: step: 482/470, loss: 0.002863482804968953 2023-01-22 20:47:44.217794: step: 484/470, loss: 0.00471901660785079 2023-01-22 20:47:44.945322: step: 486/470, loss: 0.1730116307735443 2023-01-22 20:47:45.746614: step: 488/470, loss: 0.005156666971743107 2023-01-22 20:47:46.438969: step: 490/470, loss: 0.0037299960386008024 2023-01-22 20:47:47.090241: step: 492/470, loss: 0.003205210203304887 2023-01-22 20:47:47.899580: step: 494/470, loss: 0.008830721490085125 2023-01-22 20:47:48.623992: step: 496/470, loss: 0.06417898088693619 2023-01-22 20:47:49.401900: step: 498/470, loss: 0.04083579033613205 2023-01-22 20:47:50.188814: step: 500/470, loss: 3.602713108062744 2023-01-22 20:47:50.987243: step: 502/470, loss: 0.004617447033524513 2023-01-22 20:47:51.690368: step: 504/470, loss: 0.028277534991502762 2023-01-22 20:47:52.433486: step: 506/470, loss: 0.03145125135779381 2023-01-22 20:47:53.168478: step: 508/470, loss: 0.014155956916511059 2023-01-22 20:47:53.896219: step: 510/470, loss: 0.022612107917666435 2023-01-22 20:47:54.647610: step: 512/470, loss: 0.009106074459850788 2023-01-22 20:47:55.420447: step: 514/470, loss: 0.027399810031056404 2023-01-22 20:47:56.253327: step: 516/470, loss: 0.014880353584885597 2023-01-22 20:47:57.085202: step: 518/470, loss: 0.002752532484009862 2023-01-22 20:47:57.900658: step: 520/470, loss: 0.01837068982422352 2023-01-22 20:47:58.640258: step: 522/470, loss: 0.0025046353694051504 2023-01-22 20:47:59.386945: step: 524/470, loss: 0.02363811433315277 2023-01-22 20:48:00.180265: step: 526/470, loss: 0.02377389930188656 2023-01-22 20:48:00.918346: step: 528/470, loss: 0.0021252078004181385 2023-01-22 20:48:01.652068: step: 530/470, loss: 0.011902587488293648 2023-01-22 20:48:02.412345: step: 532/470, loss: 0.0007203198038041592 2023-01-22 20:48:03.117603: step: 534/470, loss: 0.015076599083840847 2023-01-22 20:48:03.912582: step: 536/470, loss: 0.021700425073504448 2023-01-22 20:48:04.621902: step: 538/470, loss: 0.08858056366443634 2023-01-22 20:48:05.366047: step: 540/470, loss: 0.02474750392138958 2023-01-22 20:48:06.041814: step: 542/470, loss: 0.009075379930436611 2023-01-22 20:48:06.737832: step: 544/470, loss: 0.014052581042051315 2023-01-22 20:48:07.507136: step: 546/470, loss: 0.005053224507719278 2023-01-22 20:48:08.264732: step: 548/470, loss: 0.00576430419459939 2023-01-22 20:48:09.040053: step: 550/470, loss: 0.005547594279050827 2023-01-22 20:48:09.826776: step: 552/470, loss: 0.007961531169712543 2023-01-22 20:48:10.575981: step: 554/470, loss: 0.0057564605958759785 2023-01-22 20:48:11.292133: step: 556/470, loss: 0.007082703057676554 2023-01-22 20:48:12.103148: step: 558/470, loss: 0.03066232055425644 2023-01-22 20:48:12.828302: step: 560/470, loss: 0.006466844584792852 2023-01-22 20:48:13.651335: step: 562/470, loss: 0.018887002021074295 2023-01-22 20:48:14.427002: step: 564/470, loss: 0.04409300163388252 2023-01-22 20:48:15.142284: step: 566/470, loss: 0.01083281822502613 2023-01-22 20:48:15.810049: step: 568/470, loss: 0.001143494970165193 2023-01-22 20:48:16.606694: step: 570/470, loss: 0.032482851296663284 2023-01-22 20:48:17.296090: step: 572/470, loss: 0.016237609088420868 2023-01-22 20:48:17.963689: step: 574/470, loss: 0.011964457109570503 2023-01-22 20:48:18.877575: step: 576/470, loss: 0.045262232422828674 2023-01-22 20:48:19.700138: step: 578/470, loss: 0.010416206903755665 2023-01-22 20:48:20.441689: step: 580/470, loss: 0.01974216289818287 2023-01-22 20:48:21.238453: step: 582/470, loss: 0.004339354112744331 2023-01-22 20:48:21.927111: step: 584/470, loss: 0.011843382380902767 2023-01-22 20:48:22.605583: step: 586/470, loss: 0.0004325766349211335 2023-01-22 20:48:23.388904: step: 588/470, loss: 0.011639975011348724 2023-01-22 20:48:24.100224: step: 590/470, loss: 0.01775071956217289 2023-01-22 20:48:24.806365: step: 592/470, loss: 0.028062820434570312 2023-01-22 20:48:25.554360: step: 594/470, loss: 0.01377648115158081 2023-01-22 20:48:26.251332: step: 596/470, loss: 0.0007976947817951441 2023-01-22 20:48:27.032937: step: 598/470, loss: 0.03537141531705856 2023-01-22 20:48:27.742453: step: 600/470, loss: 0.035397402942180634 2023-01-22 20:48:28.532735: step: 602/470, loss: 0.002034268341958523 2023-01-22 20:48:29.248047: step: 604/470, loss: 0.01001753006130457 2023-01-22 20:48:29.932991: step: 606/470, loss: 1.0455894470214844 2023-01-22 20:48:30.777162: step: 608/470, loss: 0.027688954025506973 2023-01-22 20:48:31.604946: step: 610/470, loss: 0.40734100341796875 2023-01-22 20:48:32.258715: step: 612/470, loss: 0.041962411254644394 2023-01-22 20:48:33.046766: step: 614/470, loss: 0.02194126509130001 2023-01-22 20:48:33.759808: step: 616/470, loss: 0.03260621055960655 2023-01-22 20:48:34.495211: step: 618/470, loss: 0.005211520008742809 2023-01-22 20:48:35.263223: step: 620/470, loss: 0.018071891739964485 2023-01-22 20:48:36.013872: step: 622/470, loss: 0.012008518911898136 2023-01-22 20:48:36.620583: step: 624/470, loss: 0.03403551131486893 2023-01-22 20:48:37.390797: step: 626/470, loss: 0.07104015350341797 2023-01-22 20:48:38.094768: step: 628/470, loss: 0.046591900289058685 2023-01-22 20:48:38.825462: step: 630/470, loss: 0.012893665581941605 2023-01-22 20:48:39.550544: step: 632/470, loss: 0.045894332230091095 2023-01-22 20:48:40.290057: step: 634/470, loss: 0.05242696404457092 2023-01-22 20:48:41.078386: step: 636/470, loss: 0.007960853166878223 2023-01-22 20:48:41.812541: step: 638/470, loss: 0.42619752883911133 2023-01-22 20:48:42.623191: step: 640/470, loss: 0.029724212363362312 2023-01-22 20:48:43.365544: step: 642/470, loss: 0.04799468442797661 2023-01-22 20:48:44.030601: step: 644/470, loss: 0.02126733399927616 2023-01-22 20:48:44.767188: step: 646/470, loss: 0.0056076874025166035 2023-01-22 20:48:45.501773: step: 648/470, loss: 9.23870102269575e-05 2023-01-22 20:48:46.245874: step: 650/470, loss: 0.009057758376002312 2023-01-22 20:48:46.923565: step: 652/470, loss: 0.02294195629656315 2023-01-22 20:48:47.712058: step: 654/470, loss: 0.06484882533550262 2023-01-22 20:48:48.443110: step: 656/470, loss: 0.03436193987727165 2023-01-22 20:48:49.194388: step: 658/470, loss: 0.051201097667217255 2023-01-22 20:48:49.907143: step: 660/470, loss: 0.03534897416830063 2023-01-22 20:48:50.631652: step: 662/470, loss: 0.0066528706811368465 2023-01-22 20:48:51.418600: step: 664/470, loss: 0.0037193207535892725 2023-01-22 20:48:52.173810: step: 666/470, loss: 0.0020945665892213583 2023-01-22 20:48:52.890289: step: 668/470, loss: 0.000837851723190397 2023-01-22 20:48:53.588212: step: 670/470, loss: 0.03179018944501877 2023-01-22 20:48:54.395580: step: 672/470, loss: 0.009496787562966347 2023-01-22 20:48:55.118464: step: 674/470, loss: 0.3629489243030548 2023-01-22 20:48:55.818305: step: 676/470, loss: 0.025496546179056168 2023-01-22 20:48:56.548997: step: 678/470, loss: 0.03851151093840599 2023-01-22 20:48:57.322097: step: 680/470, loss: 0.0084996921941638 2023-01-22 20:48:57.963700: step: 682/470, loss: 0.007409407291561365 2023-01-22 20:48:58.679789: step: 684/470, loss: 0.03160090744495392 2023-01-22 20:48:59.425774: step: 686/470, loss: 0.05882648006081581 2023-01-22 20:49:00.204932: step: 688/470, loss: 0.08515594899654388 2023-01-22 20:49:00.931224: step: 690/470, loss: 0.0018482680898159742 2023-01-22 20:49:01.635016: step: 692/470, loss: 0.04133886843919754 2023-01-22 20:49:02.381892: step: 694/470, loss: 0.0130581334233284 2023-01-22 20:49:03.106108: step: 696/470, loss: 0.9888176321983337 2023-01-22 20:49:03.948761: step: 698/470, loss: 0.06412041187286377 2023-01-22 20:49:04.661311: step: 700/470, loss: 0.03894779458642006 2023-01-22 20:49:05.291783: step: 702/470, loss: 0.020089661702513695 2023-01-22 20:49:05.951431: step: 704/470, loss: 0.017173096537590027 2023-01-22 20:49:06.694998: step: 706/470, loss: 0.005728584248572588 2023-01-22 20:49:07.423756: step: 708/470, loss: 0.04380139708518982 2023-01-22 20:49:08.137484: step: 710/470, loss: 0.0069374158047139645 2023-01-22 20:49:08.756823: step: 712/470, loss: 0.023490410298109055 2023-01-22 20:49:09.511372: step: 714/470, loss: 0.058307942003011703 2023-01-22 20:49:10.172438: step: 716/470, loss: 0.005838301964104176 2023-01-22 20:49:10.915286: step: 718/470, loss: 0.025616277009248734 2023-01-22 20:49:11.656754: step: 720/470, loss: 0.0021177027374505997 2023-01-22 20:49:12.366869: step: 722/470, loss: 0.07428384572267532 2023-01-22 20:49:13.123926: step: 724/470, loss: 0.14311569929122925 2023-01-22 20:49:13.901308: step: 726/470, loss: 0.007042250130325556 2023-01-22 20:49:14.635285: step: 728/470, loss: 0.0038853702135384083 2023-01-22 20:49:15.380285: step: 730/470, loss: 0.044650763273239136 2023-01-22 20:49:16.029284: step: 732/470, loss: 0.0028803348541259766 2023-01-22 20:49:16.730765: step: 734/470, loss: 0.002466881647706032 2023-01-22 20:49:17.436678: step: 736/470, loss: 0.02306767739355564 2023-01-22 20:49:18.178069: step: 738/470, loss: 0.1385963410139084 2023-01-22 20:49:18.885714: step: 740/470, loss: 0.024905715137720108 2023-01-22 20:49:19.668094: step: 742/470, loss: 0.010221011936664581 2023-01-22 20:49:20.351397: step: 744/470, loss: 0.002917324658483267 2023-01-22 20:49:21.042854: step: 746/470, loss: 0.010492140427231789 2023-01-22 20:49:21.738155: step: 748/470, loss: 0.036397598683834076 2023-01-22 20:49:22.499601: step: 750/470, loss: 0.020658204331994057 2023-01-22 20:49:23.304067: step: 752/470, loss: 0.004349207505583763 2023-01-22 20:49:24.000943: step: 754/470, loss: 0.04205322265625 2023-01-22 20:49:24.699219: step: 756/470, loss: 0.015505963005125523 2023-01-22 20:49:25.342653: step: 758/470, loss: 0.002480144612491131 2023-01-22 20:49:26.062493: step: 760/470, loss: 0.054736074060201645 2023-01-22 20:49:26.810274: step: 762/470, loss: 0.012036191299557686 2023-01-22 20:49:27.481973: step: 764/470, loss: 0.01874772645533085 2023-01-22 20:49:28.203744: step: 766/470, loss: 0.03384104743599892 2023-01-22 20:49:28.922447: step: 768/470, loss: 0.010049944743514061 2023-01-22 20:49:29.594926: step: 770/470, loss: 0.03430037200450897 2023-01-22 20:49:30.298239: step: 772/470, loss: 0.0023346322122961283 2023-01-22 20:49:30.970698: step: 774/470, loss: 0.29529932141304016 2023-01-22 20:49:31.726150: step: 776/470, loss: 0.019421333447098732 2023-01-22 20:49:32.450054: step: 778/470, loss: 0.003084076102823019 2023-01-22 20:49:33.167352: step: 780/470, loss: 0.10002864897251129 2023-01-22 20:49:33.955800: step: 782/470, loss: 0.002307226648554206 2023-01-22 20:49:34.643197: step: 784/470, loss: 0.017811257392168045 2023-01-22 20:49:35.345186: step: 786/470, loss: 0.015748564153909683 2023-01-22 20:49:36.118704: step: 788/470, loss: 0.03216710314154625 2023-01-22 20:49:36.867089: step: 790/470, loss: 0.0009349191677756608 2023-01-22 20:49:37.668398: step: 792/470, loss: 0.004412441980093718 2023-01-22 20:49:38.414846: step: 794/470, loss: 0.006835710722953081 2023-01-22 20:49:39.166142: step: 796/470, loss: 0.008032741025090218 2023-01-22 20:49:39.838418: step: 798/470, loss: 0.0019889273680746555 2023-01-22 20:49:40.562116: step: 800/470, loss: 0.008295338600873947 2023-01-22 20:49:41.336332: step: 802/470, loss: 0.01602669060230255 2023-01-22 20:49:42.123478: step: 804/470, loss: 0.03637155890464783 2023-01-22 20:49:42.819486: step: 806/470, loss: 0.040701597929000854 2023-01-22 20:49:43.644031: step: 808/470, loss: 0.005965395364910364 2023-01-22 20:49:44.355301: step: 810/470, loss: 0.010988143272697926 2023-01-22 20:49:45.165892: step: 812/470, loss: 0.09173979610204697 2023-01-22 20:49:45.827460: step: 814/470, loss: 0.004171342588961124 2023-01-22 20:49:46.539119: step: 816/470, loss: 0.00160531559959054 2023-01-22 20:49:47.319777: step: 818/470, loss: 0.11780795454978943 2023-01-22 20:49:48.061814: step: 820/470, loss: 0.005452371668070555 2023-01-22 20:49:48.742664: step: 822/470, loss: 0.005390825215727091 2023-01-22 20:49:49.462313: step: 824/470, loss: 0.0016363279428333044 2023-01-22 20:49:50.210042: step: 826/470, loss: 0.007646666374057531 2023-01-22 20:49:50.928539: step: 828/470, loss: 0.017727140337228775 2023-01-22 20:49:51.729535: step: 830/470, loss: 0.010663860477507114 2023-01-22 20:49:52.479079: step: 832/470, loss: 0.023380601778626442 2023-01-22 20:49:53.184159: step: 834/470, loss: 0.041295647621154785 2023-01-22 20:49:53.895427: step: 836/470, loss: 0.0017049266025424004 2023-01-22 20:49:54.688225: step: 838/470, loss: 0.006582122761756182 2023-01-22 20:49:55.390047: step: 840/470, loss: 0.021049687638878822 2023-01-22 20:49:56.103157: step: 842/470, loss: 0.01726703532040119 2023-01-22 20:49:56.820669: step: 844/470, loss: 0.012101834639906883 2023-01-22 20:49:57.643061: step: 846/470, loss: 0.030885327607393265 2023-01-22 20:49:58.362209: step: 848/470, loss: 0.030672363936901093 2023-01-22 20:49:59.041691: step: 850/470, loss: 0.6598572731018066 2023-01-22 20:49:59.711415: step: 852/470, loss: 0.0076820398680865765 2023-01-22 20:50:00.403200: step: 854/470, loss: 0.0031191399320960045 2023-01-22 20:50:01.188291: step: 856/470, loss: 0.004457424394786358 2023-01-22 20:50:01.880240: step: 858/470, loss: 0.15405337512493134 2023-01-22 20:50:02.565458: step: 860/470, loss: 0.010198515839874744 2023-01-22 20:50:03.289703: step: 862/470, loss: 0.12781405448913574 2023-01-22 20:50:04.009861: step: 864/470, loss: 0.006552521139383316 2023-01-22 20:50:04.710207: step: 866/470, loss: 0.005595530848950148 2023-01-22 20:50:05.404223: step: 868/470, loss: 0.008519892580807209 2023-01-22 20:50:06.037262: step: 870/470, loss: 0.0002687643573153764 2023-01-22 20:50:06.853592: step: 872/470, loss: 5.414352893829346 2023-01-22 20:50:07.575847: step: 874/470, loss: 0.03722013160586357 2023-01-22 20:50:08.205930: step: 876/470, loss: 0.05762125179171562 2023-01-22 20:50:08.878808: step: 878/470, loss: 0.006857017055153847 2023-01-22 20:50:09.670180: step: 880/470, loss: 0.009326043538749218 2023-01-22 20:50:10.373583: step: 882/470, loss: 0.033087752759456635 2023-01-22 20:50:11.106633: step: 884/470, loss: 0.052600014954805374 2023-01-22 20:50:11.762891: step: 886/470, loss: 0.024000994861125946 2023-01-22 20:50:12.473297: step: 888/470, loss: 0.031389061361551285 2023-01-22 20:50:13.113948: step: 890/470, loss: 0.018143486231565475 2023-01-22 20:50:13.919436: step: 892/470, loss: 0.0009539459133520722 2023-01-22 20:50:14.662302: step: 894/470, loss: 0.01012116763740778 2023-01-22 20:50:15.377669: step: 896/470, loss: 0.0203192550688982 2023-01-22 20:50:16.110849: step: 898/470, loss: 0.021785369142889977 2023-01-22 20:50:16.915937: step: 900/470, loss: 0.0018964793998748064 2023-01-22 20:50:17.691452: step: 902/470, loss: 0.012782180681824684 2023-01-22 20:50:18.404730: step: 904/470, loss: 0.01066429540514946 2023-01-22 20:50:19.230705: step: 906/470, loss: 0.017899200320243835 2023-01-22 20:50:19.944904: step: 908/470, loss: 0.0004175813519395888 2023-01-22 20:50:20.688888: step: 910/470, loss: 0.20423611998558044 2023-01-22 20:50:21.533029: step: 912/470, loss: 0.029853759333491325 2023-01-22 20:50:22.300961: step: 914/470, loss: 0.02101089619100094 2023-01-22 20:50:23.123038: step: 916/470, loss: 0.0083807073533535 2023-01-22 20:50:23.926690: step: 918/470, loss: 0.0004685977182816714 2023-01-22 20:50:24.642054: step: 920/470, loss: 0.006272825412452221 2023-01-22 20:50:25.422570: step: 922/470, loss: 0.007433359045535326 2023-01-22 20:50:26.180870: step: 924/470, loss: 0.02900080941617489 2023-01-22 20:50:26.851859: step: 926/470, loss: 0.0006584687507711351 2023-01-22 20:50:27.573901: step: 928/470, loss: 0.029323289170861244 2023-01-22 20:50:28.283774: step: 930/470, loss: 0.020921828225255013 2023-01-22 20:50:28.935609: step: 932/470, loss: 0.0083458935841918 2023-01-22 20:50:29.769539: step: 934/470, loss: 0.0011282124323770404 2023-01-22 20:50:30.481256: step: 936/470, loss: 0.017477432265877724 2023-01-22 20:50:31.163034: step: 938/470, loss: 0.001543686375953257 2023-01-22 20:50:31.834897: step: 940/470, loss: 0.00018178651225753129 2023-01-22 20:50:32.508465: step: 942/470, loss: 0.00588207645341754 ================================================== Loss: 0.064 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29721765350877194, 'r': 0.3429000632511069, 'f1': 0.3184287812041116}, 'combined': 0.23463173351881908, 'epoch': 30} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34420320254771597, 'r': 0.3600742435873801, 'f1': 0.35195989443611525}, 'combined': 0.24514619512963254, 'epoch': 30} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2908762926377528, 'r': 0.34220740310323855, 'f1': 0.31446085690567865}, 'combined': 0.23170799982523688, 'epoch': 30} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34478795454095695, 'r': 0.35803629093061906, 'f1': 0.3512872562288166}, 'combined': 0.24467769090564342, 'epoch': 30} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2811459909228442, 'r': 0.35196496212121214, 'f1': 0.3125946173254836}, 'combined': 0.23033287592404053, 'epoch': 30} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3303579410113354, 'r': 0.3598711864619158, 'f1': 0.3444835909028546}, 'combined': 0.23993881953432658, 'epoch': 30} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2125, 'r': 0.36428571428571427, 'f1': 0.26842105263157895}, 'combined': 0.17894736842105263, 'epoch': 30} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.21794871794871795, 'r': 0.3695652173913043, 'f1': 0.27419354838709675}, 'combined': 0.13709677419354838, 'epoch': 30} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42391304347826086, 'r': 0.33620689655172414, 'f1': 0.375}, 'combined': 0.25, 'epoch': 30} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 31 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 20:53:07.961338: step: 2/470, loss: 0.0004153551999479532 2023-01-22 20:53:08.758416: step: 4/470, loss: 1.0925959348678589 2023-01-22 20:53:09.442999: step: 6/470, loss: 0.0014063325943425298 2023-01-22 20:53:10.082681: step: 8/470, loss: 0.0018835861701518297 2023-01-22 20:53:10.901994: step: 10/470, loss: 0.010510102845728397 2023-01-22 20:53:11.585429: step: 12/470, loss: 0.004608213901519775 2023-01-22 20:53:12.312010: step: 14/470, loss: 0.000766645884141326 2023-01-22 20:53:13.002573: step: 16/470, loss: 0.02919374406337738 2023-01-22 20:53:13.636200: step: 18/470, loss: 0.004164369311183691 2023-01-22 20:53:14.260971: step: 20/470, loss: 0.022466279566287994 2023-01-22 20:53:14.989378: step: 22/470, loss: 0.032227374613285065 2023-01-22 20:53:15.711846: step: 24/470, loss: 0.00730488495901227 2023-01-22 20:53:16.395807: step: 26/470, loss: 0.0010027121752500534 2023-01-22 20:53:17.165694: step: 28/470, loss: 0.002904940862208605 2023-01-22 20:53:17.859281: step: 30/470, loss: 0.07680755108594894 2023-01-22 20:53:18.581304: step: 32/470, loss: 0.007039623335003853 2023-01-22 20:53:19.442856: step: 34/470, loss: 0.25433337688446045 2023-01-22 20:53:20.102832: step: 36/470, loss: 0.017493372783064842 2023-01-22 20:53:20.840157: step: 38/470, loss: 0.5216097235679626 2023-01-22 20:53:21.641684: step: 40/470, loss: 0.017964819446206093 2023-01-22 20:53:22.345765: step: 42/470, loss: 0.009211939759552479 2023-01-22 20:53:23.066714: step: 44/470, loss: 0.045933887362480164 2023-01-22 20:53:23.770932: step: 46/470, loss: 0.08470865339040756 2023-01-22 20:53:24.524781: step: 48/470, loss: 0.023487238213419914 2023-01-22 20:53:25.253317: step: 50/470, loss: 0.009432869963347912 2023-01-22 20:53:26.020519: step: 52/470, loss: 0.008719773031771183 2023-01-22 20:53:26.718604: step: 54/470, loss: 0.003286207327619195 2023-01-22 20:53:27.382052: step: 56/470, loss: 0.005947456229478121 2023-01-22 20:53:28.165070: step: 58/470, loss: 0.0014269596431404352 2023-01-22 20:53:28.985245: step: 60/470, loss: 0.0015790915349498391 2023-01-22 20:53:29.720131: step: 62/470, loss: 0.018821055069565773 2023-01-22 20:53:30.409639: step: 64/470, loss: 3.195769022568129e-05 2023-01-22 20:53:31.120128: step: 66/470, loss: 0.013032572343945503 2023-01-22 20:53:31.866804: step: 68/470, loss: 0.012471756897866726 2023-01-22 20:53:32.589321: step: 70/470, loss: 0.009083814918994904 2023-01-22 20:53:33.252875: step: 72/470, loss: 0.0110135143622756 2023-01-22 20:53:33.989402: step: 74/470, loss: 0.009082856588065624 2023-01-22 20:53:34.687238: step: 76/470, loss: 0.0004449693369679153 2023-01-22 20:53:35.436685: step: 78/470, loss: 0.05842125788331032 2023-01-22 20:53:36.162771: step: 80/470, loss: 0.008214079774916172 2023-01-22 20:53:36.948385: step: 82/470, loss: 0.06217104196548462 2023-01-22 20:53:37.791052: step: 84/470, loss: 0.006593683734536171 2023-01-22 20:53:38.464760: step: 86/470, loss: 0.019678259268403053 2023-01-22 20:53:39.191952: step: 88/470, loss: 0.005078400019556284 2023-01-22 20:53:39.935802: step: 90/470, loss: 0.03903871402144432 2023-01-22 20:53:40.637757: step: 92/470, loss: 0.4988504946231842 2023-01-22 20:53:41.415655: step: 94/470, loss: 0.018317870795726776 2023-01-22 20:53:42.177610: step: 96/470, loss: 0.0024577791336923838 2023-01-22 20:53:42.820143: step: 98/470, loss: 0.0027793124318122864 2023-01-22 20:53:43.474498: step: 100/470, loss: 0.00482197804376483 2023-01-22 20:53:44.142854: step: 102/470, loss: 0.019959578290581703 2023-01-22 20:53:44.871169: step: 104/470, loss: 0.0028080667834728956 2023-01-22 20:53:45.542013: step: 106/470, loss: 0.0010208826279267669 2023-01-22 20:53:46.238419: step: 108/470, loss: 0.030457837507128716 2023-01-22 20:53:46.908224: step: 110/470, loss: 0.0025482738856226206 2023-01-22 20:53:47.629155: step: 112/470, loss: 0.0258177500218153 2023-01-22 20:53:48.359629: step: 114/470, loss: 0.0043304734863340855 2023-01-22 20:53:49.117712: step: 116/470, loss: 0.002764065284281969 2023-01-22 20:53:49.913142: step: 118/470, loss: 0.03494582325220108 2023-01-22 20:53:50.747204: step: 120/470, loss: 0.04635448753833771 2023-01-22 20:53:51.532384: step: 122/470, loss: 0.04545215889811516 2023-01-22 20:53:52.279399: step: 124/470, loss: 0.019159091636538506 2023-01-22 20:53:52.959650: step: 126/470, loss: 0.0013390732929110527 2023-01-22 20:53:53.671133: step: 128/470, loss: 0.03846125304698944 2023-01-22 20:53:54.349015: step: 130/470, loss: 0.008521667681634426 2023-01-22 20:53:55.069372: step: 132/470, loss: 0.01758314110338688 2023-01-22 20:53:55.846186: step: 134/470, loss: 0.028450246900320053 2023-01-22 20:53:56.537855: step: 136/470, loss: 0.03396439552307129 2023-01-22 20:53:57.280586: step: 138/470, loss: 0.004837017506361008 2023-01-22 20:53:57.969763: step: 140/470, loss: 0.022973332554101944 2023-01-22 20:53:58.777289: step: 142/470, loss: 0.019509391859173775 2023-01-22 20:53:59.503563: step: 144/470, loss: 0.05560195818543434 2023-01-22 20:54:00.219411: step: 146/470, loss: 0.04433672875165939 2023-01-22 20:54:00.912979: step: 148/470, loss: 0.8889219164848328 2023-01-22 20:54:01.628700: step: 150/470, loss: 0.006953278090804815 2023-01-22 20:54:02.391751: step: 152/470, loss: 0.0006373568321578205 2023-01-22 20:54:03.078901: step: 154/470, loss: 0.005718933418393135 2023-01-22 20:54:03.857195: step: 156/470, loss: 0.007636924274265766 2023-01-22 20:54:04.570839: step: 158/470, loss: 0.04812711477279663 2023-01-22 20:54:05.325544: step: 160/470, loss: 0.0677870586514473 2023-01-22 20:54:06.039562: step: 162/470, loss: 0.009034083224833012 2023-01-22 20:54:06.938666: step: 164/470, loss: 0.010354172438383102 2023-01-22 20:54:07.619324: step: 166/470, loss: 0.004512510262429714 2023-01-22 20:54:08.378252: step: 168/470, loss: 0.01185314916074276 2023-01-22 20:54:09.148576: step: 170/470, loss: 0.00713575491681695 2023-01-22 20:54:09.791686: step: 172/470, loss: 0.000432536966400221 2023-01-22 20:54:10.484016: step: 174/470, loss: 0.0017447196878492832 2023-01-22 20:54:11.169438: step: 176/470, loss: 0.0017614453099668026 2023-01-22 20:54:11.896172: step: 178/470, loss: 0.12065732479095459 2023-01-22 20:54:12.650926: step: 180/470, loss: 0.00911496952176094 2023-01-22 20:54:13.407668: step: 182/470, loss: 0.050489142537117004 2023-01-22 20:54:14.141155: step: 184/470, loss: 0.0030859841499477625 2023-01-22 20:54:14.882901: step: 186/470, loss: 0.02064261958003044 2023-01-22 20:54:15.634556: step: 188/470, loss: 0.025169501081109047 2023-01-22 20:54:16.514036: step: 190/470, loss: 0.23960715532302856 2023-01-22 20:54:17.266477: step: 192/470, loss: 0.04839571937918663 2023-01-22 20:54:18.084429: step: 194/470, loss: 0.0479017049074173 2023-01-22 20:54:18.744626: step: 196/470, loss: 0.0008035176433622837 2023-01-22 20:54:19.445497: step: 198/470, loss: 0.0024430553894490004 2023-01-22 20:54:20.198217: step: 200/470, loss: 0.0055528427474200726 2023-01-22 20:54:20.953351: step: 202/470, loss: 0.014087006449699402 2023-01-22 20:54:21.656458: step: 204/470, loss: 0.007186644244939089 2023-01-22 20:54:22.400605: step: 206/470, loss: 0.017961587756872177 2023-01-22 20:54:23.081640: step: 208/470, loss: 0.027364250272512436 2023-01-22 20:54:23.737772: step: 210/470, loss: 0.004035270307213068 2023-01-22 20:54:24.424986: step: 212/470, loss: 0.00933856051415205 2023-01-22 20:54:25.217735: step: 214/470, loss: 0.006368427537381649 2023-01-22 20:54:25.926745: step: 216/470, loss: 0.04675932601094246 2023-01-22 20:54:26.657421: step: 218/470, loss: 0.008699115365743637 2023-01-22 20:54:27.424116: step: 220/470, loss: 0.005315710324794054 2023-01-22 20:54:28.080476: step: 222/470, loss: 0.0007220212719403207 2023-01-22 20:54:28.822868: step: 224/470, loss: 4.652983079722617e-06 2023-01-22 20:54:29.623330: step: 226/470, loss: 0.020241660997271538 2023-01-22 20:54:30.258963: step: 228/470, loss: 0.0002864209236577153 2023-01-22 20:54:30.936204: step: 230/470, loss: 0.0019567871931940317 2023-01-22 20:54:31.769629: step: 232/470, loss: 0.011841587722301483 2023-01-22 20:54:32.497691: step: 234/470, loss: 0.09722713381052017 2023-01-22 20:54:33.229602: step: 236/470, loss: 0.02826106734573841 2023-01-22 20:54:33.981016: step: 238/470, loss: 0.04371390491724014 2023-01-22 20:54:34.673866: step: 240/470, loss: 0.02666950784623623 2023-01-22 20:54:35.446510: step: 242/470, loss: 0.02603962831199169 2023-01-22 20:54:36.168446: step: 244/470, loss: 0.0011542732827365398 2023-01-22 20:54:36.916981: step: 246/470, loss: 0.04242149740457535 2023-01-22 20:54:37.702734: step: 248/470, loss: 0.005671241320669651 2023-01-22 20:54:38.466173: step: 250/470, loss: 0.06051589921116829 2023-01-22 20:54:39.095069: step: 252/470, loss: 0.022702636197209358 2023-01-22 20:54:39.809580: step: 254/470, loss: 0.00014661716704722494 2023-01-22 20:54:40.549806: step: 256/470, loss: 0.024693626910448074 2023-01-22 20:54:41.263406: step: 258/470, loss: 0.006267967633903027 2023-01-22 20:54:42.007329: step: 260/470, loss: 0.010373245924711227 2023-01-22 20:54:42.744396: step: 262/470, loss: 0.013863403350114822 2023-01-22 20:54:43.459010: step: 264/470, loss: 0.011223852634429932 2023-01-22 20:54:44.191633: step: 266/470, loss: 0.13165761530399323 2023-01-22 20:54:44.984075: step: 268/470, loss: 0.010476339608430862 2023-01-22 20:54:45.691066: step: 270/470, loss: 0.015369415283203125 2023-01-22 20:54:46.430469: step: 272/470, loss: 0.0017211531521752477 2023-01-22 20:54:47.141775: step: 274/470, loss: 5.57665407541208e-05 2023-01-22 20:54:47.828196: step: 276/470, loss: 0.0009223352535627782 2023-01-22 20:54:48.546803: step: 278/470, loss: 1.025863821269013e-05 2023-01-22 20:54:49.304528: step: 280/470, loss: 2.5038185119628906 2023-01-22 20:54:50.050017: step: 282/470, loss: 0.014256109483540058 2023-01-22 20:54:50.760130: step: 284/470, loss: 0.00037505527143366635 2023-01-22 20:54:51.440980: step: 286/470, loss: 0.002377528930082917 2023-01-22 20:54:52.120803: step: 288/470, loss: 0.025805901736021042 2023-01-22 20:54:52.864858: step: 290/470, loss: 0.1646108329296112 2023-01-22 20:54:53.600988: step: 292/470, loss: 0.004207650665193796 2023-01-22 20:54:54.433069: step: 294/470, loss: 0.006044092588126659 2023-01-22 20:54:55.186842: step: 296/470, loss: 0.0017826639814302325 2023-01-22 20:54:55.864716: step: 298/470, loss: 0.0007332692039199173 2023-01-22 20:54:56.618179: step: 300/470, loss: 0.01916944980621338 2023-01-22 20:54:57.281007: step: 302/470, loss: 0.015175165608525276 2023-01-22 20:54:57.927731: step: 304/470, loss: 0.0002173801331082359 2023-01-22 20:54:58.613958: step: 306/470, loss: 0.010625113733112812 2023-01-22 20:54:59.284561: step: 308/470, loss: 0.00954343844205141 2023-01-22 20:55:00.008297: step: 310/470, loss: 0.08960520476102829 2023-01-22 20:55:00.721730: step: 312/470, loss: 0.008206892758607864 2023-01-22 20:55:01.519649: step: 314/470, loss: 0.0023078385274857283 2023-01-22 20:55:02.207642: step: 316/470, loss: 0.02238607406616211 2023-01-22 20:55:02.952306: step: 318/470, loss: 0.0010710656642913818 2023-01-22 20:55:03.609014: step: 320/470, loss: 5.481565312948078e-05 2023-01-22 20:55:04.365999: step: 322/470, loss: 0.0008377675549127162 2023-01-22 20:55:05.108537: step: 324/470, loss: 0.016996121034026146 2023-01-22 20:55:05.836850: step: 326/470, loss: 0.03880900889635086 2023-01-22 20:55:06.550719: step: 328/470, loss: 0.019622113555669785 2023-01-22 20:55:07.283232: step: 330/470, loss: 0.017126567661762238 2023-01-22 20:55:08.023781: step: 332/470, loss: 0.004321925342082977 2023-01-22 20:55:08.686829: step: 334/470, loss: 0.00889088585972786 2023-01-22 20:55:09.427528: step: 336/470, loss: 0.014416373334825039 2023-01-22 20:55:10.233198: step: 338/470, loss: 0.05695127323269844 2023-01-22 20:55:10.974807: step: 340/470, loss: 0.017850443720817566 2023-01-22 20:55:11.674006: step: 342/470, loss: 0.028859199956059456 2023-01-22 20:55:12.306594: step: 344/470, loss: 0.03185954689979553 2023-01-22 20:55:12.978711: step: 346/470, loss: 0.004228611942380667 2023-01-22 20:55:13.818318: step: 348/470, loss: 0.052207764238119125 2023-01-22 20:55:14.490535: step: 350/470, loss: 0.2492973804473877 2023-01-22 20:55:15.313132: step: 352/470, loss: 0.002187067177146673 2023-01-22 20:55:16.004893: step: 354/470, loss: 0.004828231874853373 2023-01-22 20:55:16.753885: step: 356/470, loss: 0.030843263491988182 2023-01-22 20:55:17.465199: step: 358/470, loss: 0.004479340277612209 2023-01-22 20:55:18.258961: step: 360/470, loss: 0.010255182161927223 2023-01-22 20:55:18.913939: step: 362/470, loss: 0.0042448281310498714 2023-01-22 20:55:19.583120: step: 364/470, loss: 0.0041470276191830635 2023-01-22 20:55:20.243830: step: 366/470, loss: 0.004188673570752144 2023-01-22 20:55:21.019368: step: 368/470, loss: 0.004912644159048796 2023-01-22 20:55:21.689892: step: 370/470, loss: 0.010912226513028145 2023-01-22 20:55:22.542687: step: 372/470, loss: 0.017633339390158653 2023-01-22 20:55:23.305757: step: 374/470, loss: 0.07342100143432617 2023-01-22 20:55:24.056373: step: 376/470, loss: 0.003536728210747242 2023-01-22 20:55:24.749862: step: 378/470, loss: 0.0003867686027660966 2023-01-22 20:55:25.446965: step: 380/470, loss: 0.017503436654806137 2023-01-22 20:55:26.313661: step: 382/470, loss: 0.0033323050010949373 2023-01-22 20:55:27.011418: step: 384/470, loss: 0.0019946997053921223 2023-01-22 20:55:27.749144: step: 386/470, loss: 0.680500328540802 2023-01-22 20:55:28.481635: step: 388/470, loss: 0.01015088427811861 2023-01-22 20:55:29.185147: step: 390/470, loss: 0.0255475752055645 2023-01-22 20:55:29.876782: step: 392/470, loss: 0.011009021662175655 2023-01-22 20:55:30.579432: step: 394/470, loss: 0.0023041116073727608 2023-01-22 20:55:31.307782: step: 396/470, loss: 0.012929693795740604 2023-01-22 20:55:32.007834: step: 398/470, loss: 0.0013820825843140483 2023-01-22 20:55:32.794081: step: 400/470, loss: 0.03162797540426254 2023-01-22 20:55:33.528323: step: 402/470, loss: 0.23524107038974762 2023-01-22 20:55:34.279047: step: 404/470, loss: 0.008160697296261787 2023-01-22 20:55:34.983147: step: 406/470, loss: 0.024923594668507576 2023-01-22 20:55:35.739051: step: 408/470, loss: 0.0012302573304623365 2023-01-22 20:55:36.459846: step: 410/470, loss: 0.039357952773571014 2023-01-22 20:55:37.189064: step: 412/470, loss: 0.02063934877514839 2023-01-22 20:55:37.897760: step: 414/470, loss: 0.019131643697619438 2023-01-22 20:55:38.644040: step: 416/470, loss: 0.029757630079984665 2023-01-22 20:55:39.376316: step: 418/470, loss: 0.03168037533760071 2023-01-22 20:55:40.163625: step: 420/470, loss: 0.007195422891527414 2023-01-22 20:55:40.897805: step: 422/470, loss: 0.04357065260410309 2023-01-22 20:55:41.664902: step: 424/470, loss: 0.032375071197748184 2023-01-22 20:55:42.361445: step: 426/470, loss: 0.021866897121071815 2023-01-22 20:55:43.045650: step: 428/470, loss: 0.021189482882618904 2023-01-22 20:55:43.938110: step: 430/470, loss: 0.05813028663396835 2023-01-22 20:55:44.700457: step: 432/470, loss: 0.059212926775217056 2023-01-22 20:55:45.430482: step: 434/470, loss: 0.012862597592175007 2023-01-22 20:55:46.254968: step: 436/470, loss: 0.0023115493822842836 2023-01-22 20:55:46.933461: step: 438/470, loss: 0.08521554619073868 2023-01-22 20:55:47.618804: step: 440/470, loss: 0.6096815466880798 2023-01-22 20:55:48.355175: step: 442/470, loss: 0.012424707412719727 2023-01-22 20:55:49.078333: step: 444/470, loss: 0.00018685971735976636 2023-01-22 20:55:49.842317: step: 446/470, loss: 0.0018285795813426375 2023-01-22 20:55:50.593779: step: 448/470, loss: 0.15367041528224945 2023-01-22 20:55:51.280763: step: 450/470, loss: 0.15135274827480316 2023-01-22 20:55:51.956091: step: 452/470, loss: 0.0034649712033569813 2023-01-22 20:55:52.743103: step: 454/470, loss: 0.1291857212781906 2023-01-22 20:55:53.523387: step: 456/470, loss: 0.010504554957151413 2023-01-22 20:55:54.217210: step: 458/470, loss: 0.005182147957384586 2023-01-22 20:55:55.001384: step: 460/470, loss: 0.012676097452640533 2023-01-22 20:55:55.672494: step: 462/470, loss: 0.013613465242087841 2023-01-22 20:55:56.407093: step: 464/470, loss: 0.07927101105451584 2023-01-22 20:55:57.198299: step: 466/470, loss: 0.0009735323255881667 2023-01-22 20:55:57.949218: step: 468/470, loss: 0.02965150959789753 2023-01-22 20:55:58.736384: step: 470/470, loss: 0.000922163191717118 2023-01-22 20:55:59.480506: step: 472/470, loss: 0.04083675891160965 2023-01-22 20:56:00.176151: step: 474/470, loss: 0.0027813774067908525 2023-01-22 20:56:00.928254: step: 476/470, loss: 0.021299488842487335 2023-01-22 20:56:01.665862: step: 478/470, loss: 0.006957915611565113 2023-01-22 20:56:02.702110: step: 480/470, loss: 0.01972772739827633 2023-01-22 20:56:03.385524: step: 482/470, loss: 0.006241174414753914 2023-01-22 20:56:04.162127: step: 484/470, loss: 0.014599893242120743 2023-01-22 20:56:04.860122: step: 486/470, loss: 0.005494068842381239 2023-01-22 20:56:05.645444: step: 488/470, loss: 0.015743980184197426 2023-01-22 20:56:06.395587: step: 490/470, loss: 0.009499004110693932 2023-01-22 20:56:07.152186: step: 492/470, loss: 0.00025713659124448895 2023-01-22 20:56:07.995795: step: 494/470, loss: 0.024208668619394302 2023-01-22 20:56:08.800633: step: 496/470, loss: 0.008801139891147614 2023-01-22 20:56:09.547744: step: 498/470, loss: 0.007869623601436615 2023-01-22 20:56:10.233194: step: 500/470, loss: 0.0006736805662512779 2023-01-22 20:56:10.949295: step: 502/470, loss: 0.0008966495515778661 2023-01-22 20:56:11.718680: step: 504/470, loss: 0.03283290937542915 2023-01-22 20:56:12.438465: step: 506/470, loss: 0.013244382105767727 2023-01-22 20:56:13.159028: step: 508/470, loss: 0.09672006964683533 2023-01-22 20:56:13.875676: step: 510/470, loss: 0.03322713449597359 2023-01-22 20:56:14.620498: step: 512/470, loss: 0.004942721221596003 2023-01-22 20:56:15.377912: step: 514/470, loss: 0.02580172009766102 2023-01-22 20:56:16.119690: step: 516/470, loss: 0.042521312832832336 2023-01-22 20:56:16.817938: step: 518/470, loss: 0.050515204668045044 2023-01-22 20:56:17.637822: step: 520/470, loss: 0.15628911554813385 2023-01-22 20:56:18.407350: step: 522/470, loss: 0.0003710964519996196 2023-01-22 20:56:19.100302: step: 524/470, loss: 0.005137452390044928 2023-01-22 20:56:19.822841: step: 526/470, loss: 0.03880662843585014 2023-01-22 20:56:20.591837: step: 528/470, loss: 0.0028388279024511576 2023-01-22 20:56:21.300787: step: 530/470, loss: 0.00452554551884532 2023-01-22 20:56:22.066718: step: 532/470, loss: 0.07652177661657333 2023-01-22 20:56:22.806103: step: 534/470, loss: 0.017907770350575447 2023-01-22 20:56:23.532027: step: 536/470, loss: 0.017264176160097122 2023-01-22 20:56:24.281005: step: 538/470, loss: 0.017725540325045586 2023-01-22 20:56:24.989429: step: 540/470, loss: 0.0008670427487231791 2023-01-22 20:56:25.733288: step: 542/470, loss: 0.001019967021420598 2023-01-22 20:56:26.496873: step: 544/470, loss: 0.0039613074623048306 2023-01-22 20:56:27.282494: step: 546/470, loss: 0.01247483305633068 2023-01-22 20:56:27.956394: step: 548/470, loss: 0.002028008922934532 2023-01-22 20:56:28.685103: step: 550/470, loss: 0.004361606668680906 2023-01-22 20:56:29.378392: step: 552/470, loss: 0.011579647660255432 2023-01-22 20:56:30.082537: step: 554/470, loss: 0.049814943224191666 2023-01-22 20:56:30.763185: step: 556/470, loss: 0.0027671053539961576 2023-01-22 20:56:31.493117: step: 558/470, loss: 0.017059145495295525 2023-01-22 20:56:32.236919: step: 560/470, loss: 0.0030047514010220766 2023-01-22 20:56:32.959571: step: 562/470, loss: 0.011335933580994606 2023-01-22 20:56:33.733634: step: 564/470, loss: 0.03579147905111313 2023-01-22 20:56:34.472093: step: 566/470, loss: 0.021534211933612823 2023-01-22 20:56:35.253349: step: 568/470, loss: 0.03905900940299034 2023-01-22 20:56:35.950248: step: 570/470, loss: 0.0442798025906086 2023-01-22 20:56:36.715284: step: 572/470, loss: 0.001077734399586916 2023-01-22 20:56:37.439181: step: 574/470, loss: 0.030320877209305763 2023-01-22 20:56:38.068149: step: 576/470, loss: 0.00816918071359396 2023-01-22 20:56:38.793670: step: 578/470, loss: 0.013305963017046452 2023-01-22 20:56:39.499231: step: 580/470, loss: 0.009787117131054401 2023-01-22 20:56:40.124125: step: 582/470, loss: 0.007002311293035746 2023-01-22 20:56:40.913573: step: 584/470, loss: 0.035149309784173965 2023-01-22 20:56:41.614629: step: 586/470, loss: 0.00833014864474535 2023-01-22 20:56:42.406483: step: 588/470, loss: 0.020918749272823334 2023-01-22 20:56:43.114366: step: 590/470, loss: 0.14545173943042755 2023-01-22 20:56:43.805539: step: 592/470, loss: 0.03052116557955742 2023-01-22 20:56:44.580267: step: 594/470, loss: 0.012451532296836376 2023-01-22 20:56:45.314495: step: 596/470, loss: 0.15833701193332672 2023-01-22 20:56:46.091836: step: 598/470, loss: 0.0005307383253239095 2023-01-22 20:56:46.804146: step: 600/470, loss: 0.0017066209111362696 2023-01-22 20:56:47.541290: step: 602/470, loss: 0.03470804542303085 2023-01-22 20:56:48.273902: step: 604/470, loss: 0.0007092714658938348 2023-01-22 20:56:49.017257: step: 606/470, loss: 0.001324977376498282 2023-01-22 20:56:49.686820: step: 608/470, loss: 0.024211524054408073 2023-01-22 20:56:50.462651: step: 610/470, loss: 0.00011712490959325805 2023-01-22 20:56:51.211222: step: 612/470, loss: 7.496851139876526e-06 2023-01-22 20:56:51.984197: step: 614/470, loss: 0.07053697854280472 2023-01-22 20:56:52.696981: step: 616/470, loss: 0.05512640252709389 2023-01-22 20:56:53.444667: step: 618/470, loss: 0.03331885486841202 2023-01-22 20:56:54.215904: step: 620/470, loss: 0.0004636533558368683 2023-01-22 20:56:54.971564: step: 622/470, loss: 3.8023954402888194e-06 2023-01-22 20:56:55.663703: step: 624/470, loss: 0.01048552617430687 2023-01-22 20:56:56.330401: step: 626/470, loss: 0.017999017611145973 2023-01-22 20:56:56.988598: step: 628/470, loss: 0.044697657227516174 2023-01-22 20:56:57.851008: step: 630/470, loss: 0.017182713374495506 2023-01-22 20:56:58.540074: step: 632/470, loss: 0.03972567990422249 2023-01-22 20:56:59.271773: step: 634/470, loss: 0.018354123458266258 2023-01-22 20:56:59.998706: step: 636/470, loss: 0.0014592665247619152 2023-01-22 20:57:00.708992: step: 638/470, loss: 0.003360056085512042 2023-01-22 20:57:01.487233: step: 640/470, loss: 0.054131921380758286 2023-01-22 20:57:02.195241: step: 642/470, loss: 0.00018125410133507103 2023-01-22 20:57:02.837839: step: 644/470, loss: 0.00046586460666731 2023-01-22 20:57:03.492152: step: 646/470, loss: 0.003081733826547861 2023-01-22 20:57:04.241083: step: 648/470, loss: 0.0019446099177002907 2023-01-22 20:57:05.007660: step: 650/470, loss: 0.036725327372550964 2023-01-22 20:57:05.788370: step: 652/470, loss: 0.011674618348479271 2023-01-22 20:57:06.617747: step: 654/470, loss: 0.023591268807649612 2023-01-22 20:57:07.409514: step: 656/470, loss: 0.13545000553131104 2023-01-22 20:57:08.045716: step: 658/470, loss: 0.02520488202571869 2023-01-22 20:57:08.771208: step: 660/470, loss: 0.017133207991719246 2023-01-22 20:57:09.443660: step: 662/470, loss: 0.01911136694252491 2023-01-22 20:57:10.114923: step: 664/470, loss: 0.007725501898676157 2023-01-22 20:57:10.829511: step: 666/470, loss: 0.009709802456200123 2023-01-22 20:57:11.614122: step: 668/470, loss: 0.0017183064483106136 2023-01-22 20:57:12.377857: step: 670/470, loss: 0.026015179231762886 2023-01-22 20:57:13.154529: step: 672/470, loss: 0.008718994446098804 2023-01-22 20:57:13.844886: step: 674/470, loss: 0.0018770707538351417 2023-01-22 20:57:14.690792: step: 676/470, loss: 0.007993071340024471 2023-01-22 20:57:15.387538: step: 678/470, loss: 0.0003845719911623746 2023-01-22 20:57:16.206382: step: 680/470, loss: 0.0029241188894957304 2023-01-22 20:57:16.973933: step: 682/470, loss: 0.2095903605222702 2023-01-22 20:57:17.669128: step: 684/470, loss: 0.04601133614778519 2023-01-22 20:57:18.497280: step: 686/470, loss: 0.04851691052317619 2023-01-22 20:57:19.130384: step: 688/470, loss: 0.0033728540875017643 2023-01-22 20:57:19.829583: step: 690/470, loss: 0.0015807858435437083 2023-01-22 20:57:20.679338: step: 692/470, loss: 0.010904110968112946 2023-01-22 20:57:21.391599: step: 694/470, loss: 0.07372532784938812 2023-01-22 20:57:22.123715: step: 696/470, loss: 0.0032748605590313673 2023-01-22 20:57:22.829933: step: 698/470, loss: 0.0914636179804802 2023-01-22 20:57:23.516419: step: 700/470, loss: 0.0038345667999237776 2023-01-22 20:57:24.254322: step: 702/470, loss: 0.09439164400100708 2023-01-22 20:57:25.033291: step: 704/470, loss: 0.12708623707294464 2023-01-22 20:57:25.723303: step: 706/470, loss: 0.02249508537352085 2023-01-22 20:57:26.427117: step: 708/470, loss: 0.024676885455846786 2023-01-22 20:57:27.056492: step: 710/470, loss: 0.00023603100271429867 2023-01-22 20:57:27.781924: step: 712/470, loss: 0.07618135958909988 2023-01-22 20:57:28.506825: step: 714/470, loss: 0.0014070416800677776 2023-01-22 20:57:29.345699: step: 716/470, loss: 0.038883935660123825 2023-01-22 20:57:30.068577: step: 718/470, loss: 0.0010557807981967926 2023-01-22 20:57:30.843449: step: 720/470, loss: 0.00078931002644822 2023-01-22 20:57:31.525084: step: 722/470, loss: 0.0017666048370301723 2023-01-22 20:57:32.308135: step: 724/470, loss: 0.0047446005046367645 2023-01-22 20:57:33.027596: step: 726/470, loss: 0.03437475860118866 2023-01-22 20:57:33.801169: step: 728/470, loss: 0.008103154599666595 2023-01-22 20:57:34.552281: step: 730/470, loss: 0.15290933847427368 2023-01-22 20:57:35.272966: step: 732/470, loss: 0.005006398539990187 2023-01-22 20:57:36.002501: step: 734/470, loss: 0.03129494935274124 2023-01-22 20:57:36.813473: step: 736/470, loss: 0.1397247016429901 2023-01-22 20:57:37.547292: step: 738/470, loss: 0.018076607957482338 2023-01-22 20:57:38.263341: step: 740/470, loss: 0.012959785759449005 2023-01-22 20:57:38.974776: step: 742/470, loss: 0.0037444073241204023 2023-01-22 20:57:39.674237: step: 744/470, loss: 0.05442821606993675 2023-01-22 20:57:40.398857: step: 746/470, loss: 0.012656944803893566 2023-01-22 20:57:41.128975: step: 748/470, loss: 0.002542155794799328 2023-01-22 20:57:41.794912: step: 750/470, loss: 0.20244351029396057 2023-01-22 20:57:42.431937: step: 752/470, loss: 0.006245005410164595 2023-01-22 20:57:43.143857: step: 754/470, loss: 0.06883977353572845 2023-01-22 20:57:43.844710: step: 756/470, loss: 0.3660793900489807 2023-01-22 20:57:44.588713: step: 758/470, loss: 0.01697465591132641 2023-01-22 20:57:45.433049: step: 760/470, loss: 0.0022616013884544373 2023-01-22 20:57:46.143992: step: 762/470, loss: 0.013554520905017853 2023-01-22 20:57:46.901052: step: 764/470, loss: 0.0018667317926883698 2023-01-22 20:57:47.676077: step: 766/470, loss: 0.02620398811995983 2023-01-22 20:57:48.330694: step: 768/470, loss: 0.01824253797531128 2023-01-22 20:57:49.069874: step: 770/470, loss: 0.029288796707987785 2023-01-22 20:57:49.741411: step: 772/470, loss: 0.0019241668051108718 2023-01-22 20:57:50.432199: step: 774/470, loss: 0.009944219142198563 2023-01-22 20:57:51.170911: step: 776/470, loss: 0.02224019728600979 2023-01-22 20:57:51.799617: step: 778/470, loss: 0.0010763780446723104 2023-01-22 20:57:52.639181: step: 780/470, loss: 0.026158837601542473 2023-01-22 20:57:53.391886: step: 782/470, loss: 0.053973760455846786 2023-01-22 20:57:54.147387: step: 784/470, loss: 0.02082950621843338 2023-01-22 20:57:54.899895: step: 786/470, loss: 0.1821128875017166 2023-01-22 20:57:55.570428: step: 788/470, loss: 0.0018857375252991915 2023-01-22 20:57:56.291691: step: 790/470, loss: 0.01713024079799652 2023-01-22 20:57:56.935307: step: 792/470, loss: 0.012649615295231342 2023-01-22 20:57:57.656575: step: 794/470, loss: 0.15321439504623413 2023-01-22 20:57:58.541237: step: 796/470, loss: 0.03709512576460838 2023-01-22 20:57:59.348394: step: 798/470, loss: 0.00017801785725168884 2023-01-22 20:58:00.113205: step: 800/470, loss: 0.02271411009132862 2023-01-22 20:58:00.827191: step: 802/470, loss: 0.005088508129119873 2023-01-22 20:58:01.510160: step: 804/470, loss: 0.03457785025238991 2023-01-22 20:58:02.224602: step: 806/470, loss: 0.007744433358311653 2023-01-22 20:58:02.962082: step: 808/470, loss: 0.0011210455559194088 2023-01-22 20:58:03.773881: step: 810/470, loss: 0.004407666157931089 2023-01-22 20:58:04.583797: step: 812/470, loss: 0.024257738143205643 2023-01-22 20:58:05.361063: step: 814/470, loss: 0.008528418838977814 2023-01-22 20:58:06.062593: step: 816/470, loss: 0.002460848307237029 2023-01-22 20:58:06.841327: step: 818/470, loss: 0.0038715063128620386 2023-01-22 20:58:07.596426: step: 820/470, loss: 0.0031947391107678413 2023-01-22 20:58:08.361795: step: 822/470, loss: 0.021919481456279755 2023-01-22 20:58:09.140505: step: 824/470, loss: 0.00419453764334321 2023-01-22 20:58:09.864239: step: 826/470, loss: 0.18099209666252136 2023-01-22 20:58:10.513831: step: 828/470, loss: 0.0055846464820206165 2023-01-22 20:58:11.315456: step: 830/470, loss: 0.019906627014279366 2023-01-22 20:58:11.998600: step: 832/470, loss: 0.011373650282621384 2023-01-22 20:58:12.715467: step: 834/470, loss: 0.05863653123378754 2023-01-22 20:58:13.398875: step: 836/470, loss: 0.008630829863250256 2023-01-22 20:58:14.159600: step: 838/470, loss: 0.05342869088053703 2023-01-22 20:58:15.008798: step: 840/470, loss: 0.008590944111347198 2023-01-22 20:58:15.809508: step: 842/470, loss: 0.019462179392576218 2023-01-22 20:58:16.568288: step: 844/470, loss: 0.0030498160049319267 2023-01-22 20:58:17.342332: step: 846/470, loss: 0.02540343627333641 2023-01-22 20:58:18.161974: step: 848/470, loss: 0.015893662348389626 2023-01-22 20:58:18.811790: step: 850/470, loss: 0.01733972690999508 2023-01-22 20:58:19.676488: step: 852/470, loss: 0.006161740515381098 2023-01-22 20:58:20.439151: step: 854/470, loss: 0.0038143827114254236 2023-01-22 20:58:21.248644: step: 856/470, loss: 0.08798175305128098 2023-01-22 20:58:22.044295: step: 858/470, loss: 0.008321262896060944 2023-01-22 20:58:22.767264: step: 860/470, loss: 0.0049756853841245174 2023-01-22 20:58:23.547625: step: 862/470, loss: 0.03980034962296486 2023-01-22 20:58:24.270677: step: 864/470, loss: 0.008659124374389648 2023-01-22 20:58:25.045514: step: 866/470, loss: 0.06904779374599457 2023-01-22 20:58:25.717410: step: 868/470, loss: 0.0019073209259659052 2023-01-22 20:58:26.434445: step: 870/470, loss: 0.004029394127428532 2023-01-22 20:58:27.141531: step: 872/470, loss: 0.004503278061747551 2023-01-22 20:58:27.853515: step: 874/470, loss: 0.001992929959669709 2023-01-22 20:58:28.641338: step: 876/470, loss: 0.006583628244698048 2023-01-22 20:58:29.337511: step: 878/470, loss: 0.0009295732015743852 2023-01-22 20:58:30.147679: step: 880/470, loss: 0.0053373072296381 2023-01-22 20:58:30.881850: step: 882/470, loss: 0.009604030288755894 2023-01-22 20:58:31.620376: step: 884/470, loss: 0.002130313077941537 2023-01-22 20:58:32.309982: step: 886/470, loss: 0.04465380311012268 2023-01-22 20:58:33.145648: step: 888/470, loss: 0.10829721391201019 2023-01-22 20:58:33.899716: step: 890/470, loss: 0.03182058036327362 2023-01-22 20:58:34.611399: step: 892/470, loss: 0.013102928176522255 2023-01-22 20:58:35.395002: step: 894/470, loss: 0.02757185697555542 2023-01-22 20:58:36.155033: step: 896/470, loss: 0.013785184361040592 2023-01-22 20:58:36.962987: step: 898/470, loss: 0.013047303073108196 2023-01-22 20:58:37.688901: step: 900/470, loss: 3.3479478588560596e-05 2023-01-22 20:58:38.369881: step: 902/470, loss: 0.015448780730366707 2023-01-22 20:58:39.183878: step: 904/470, loss: 0.017426110804080963 2023-01-22 20:58:39.927174: step: 906/470, loss: 0.011090615764260292 2023-01-22 20:58:40.575543: step: 908/470, loss: 0.0027628943789750338 2023-01-22 20:58:41.345959: step: 910/470, loss: 0.07512981444597244 2023-01-22 20:58:42.030888: step: 912/470, loss: 0.003129625925794244 2023-01-22 20:58:42.629508: step: 914/470, loss: 0.0007694661035202444 2023-01-22 20:58:43.264289: step: 916/470, loss: 0.0655151829123497 2023-01-22 20:58:43.977982: step: 918/470, loss: 0.01531730592250824 2023-01-22 20:58:44.725017: step: 920/470, loss: 0.003190776566043496 2023-01-22 20:58:45.429017: step: 922/470, loss: 0.02201441302895546 2023-01-22 20:58:46.288643: step: 924/470, loss: 0.23225174844264984 2023-01-22 20:58:47.037398: step: 926/470, loss: 0.0298309326171875 2023-01-22 20:58:47.705623: step: 928/470, loss: 0.024859227240085602 2023-01-22 20:58:48.441315: step: 930/470, loss: 0.15923137962818146 2023-01-22 20:58:49.367901: step: 932/470, loss: 0.004126227926462889 2023-01-22 20:58:50.054548: step: 934/470, loss: 0.005694595165550709 2023-01-22 20:58:50.851982: step: 936/470, loss: 0.1984158605337143 2023-01-22 20:58:51.584600: step: 938/470, loss: 0.01643310859799385 2023-01-22 20:58:52.283469: step: 940/470, loss: 0.04566141590476036 2023-01-22 20:58:52.931412: step: 942/470, loss: 0.022141339257359505 ================================================== Loss: 0.041 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29751898604269295, 'r': 0.32744025029366586, 'f1': 0.3117633458080613}, 'combined': 0.2297203600690978, 'epoch': 31} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3371869177457899, 'r': 0.358585318295042, 'f1': 0.3475570652626689}, 'combined': 0.2420795479441475, 'epoch': 31} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2906982529854047, 'r': 0.3282077049835214, 'f1': 0.3083163289239141}, 'combined': 0.2271804528913051, 'epoch': 31} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.33885016089940706, 'r': 0.36002829595562, 'f1': 0.34911834759332855}, 'combined': 0.2431670082739602, 'epoch': 31} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2830428685897436, 'r': 0.3351399430740038, 'f1': 0.3068961772371851}, 'combined': 0.2261340253326627, 'epoch': 31} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.32938994992287796, 'r': 0.3648627137607264, 'f1': 0.3462200933495943}, 'combined': 0.2411483237260856, 'epoch': 31} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21610169491525424, 'r': 0.36428571428571427, 'f1': 0.2712765957446808}, 'combined': 0.18085106382978722, 'epoch': 31} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2692307692307692, 'r': 0.45652173913043476, 'f1': 0.3387096774193548}, 'combined': 0.1693548387096774, 'epoch': 31} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4431818181818182, 'r': 0.33620689655172414, 'f1': 0.38235294117647056}, 'combined': 0.2549019607843137, 'epoch': 31} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 32 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 21:01:28.517667: step: 2/470, loss: 0.9720427393913269 2023-01-22 21:01:29.202543: step: 4/470, loss: 0.013476014137268066 2023-01-22 21:01:29.953635: step: 6/470, loss: 0.00015409085608553141 2023-01-22 21:01:30.704840: step: 8/470, loss: 0.005482874345034361 2023-01-22 21:01:31.405919: step: 10/470, loss: 0.02253660000860691 2023-01-22 21:01:32.096924: step: 12/470, loss: 0.0016350456280633807 2023-01-22 21:01:32.866442: step: 14/470, loss: 0.0006504120538011193 2023-01-22 21:01:33.588751: step: 16/470, loss: 0.0015973305562511086 2023-01-22 21:01:34.322070: step: 18/470, loss: 0.25637224316596985 2023-01-22 21:01:35.076035: step: 20/470, loss: 0.02262234315276146 2023-01-22 21:01:35.884305: step: 22/470, loss: 0.001908066333271563 2023-01-22 21:01:36.525308: step: 24/470, loss: 0.0029853819869458675 2023-01-22 21:01:37.297070: step: 26/470, loss: 0.009884890168905258 2023-01-22 21:01:38.068256: step: 28/470, loss: 0.0006165321101434529 2023-01-22 21:01:38.828769: step: 30/470, loss: 0.0001714162644930184 2023-01-22 21:01:39.569013: step: 32/470, loss: 0.0308544784784317 2023-01-22 21:01:40.334723: step: 34/470, loss: 0.027413802221417427 2023-01-22 21:01:41.119560: step: 36/470, loss: 0.1625737100839615 2023-01-22 21:01:41.805661: step: 38/470, loss: 0.006317344959825277 2023-01-22 21:01:42.646363: step: 40/470, loss: 0.003615674562752247 2023-01-22 21:01:43.367172: step: 42/470, loss: 0.08496110886335373 2023-01-22 21:01:44.143867: step: 44/470, loss: 0.0003270620945841074 2023-01-22 21:01:44.870686: step: 46/470, loss: 0.013454585336148739 2023-01-22 21:01:45.540468: step: 48/470, loss: 0.022000696510076523 2023-01-22 21:01:46.188850: step: 50/470, loss: 0.004566211719065905 2023-01-22 21:01:46.974330: step: 52/470, loss: 0.03704002499580383 2023-01-22 21:01:47.626727: step: 54/470, loss: 0.004721718840301037 2023-01-22 21:01:48.380932: step: 56/470, loss: 0.02784247137606144 2023-01-22 21:01:49.081561: step: 58/470, loss: 0.2554512321949005 2023-01-22 21:01:49.802899: step: 60/470, loss: 0.006956116762012243 2023-01-22 21:01:50.510837: step: 62/470, loss: 0.0004784489865414798 2023-01-22 21:01:51.181236: step: 64/470, loss: 0.0018533782567828894 2023-01-22 21:01:52.002686: step: 66/470, loss: 0.14442533254623413 2023-01-22 21:01:52.679691: step: 68/470, loss: 0.0010345384944230318 2023-01-22 21:01:53.436488: step: 70/470, loss: 0.02987985871732235 2023-01-22 21:01:54.201187: step: 72/470, loss: 0.010891645215451717 2023-01-22 21:01:54.979329: step: 74/470, loss: 0.03483196720480919 2023-01-22 21:01:55.800379: step: 76/470, loss: 0.09911884367465973 2023-01-22 21:01:56.595877: step: 78/470, loss: 0.007575498428195715 2023-01-22 21:01:57.407614: step: 80/470, loss: 0.007171786390244961 2023-01-22 21:01:58.163525: step: 82/470, loss: 0.002897205762565136 2023-01-22 21:01:58.867475: step: 84/470, loss: 0.0030799158848822117 2023-01-22 21:01:59.556058: step: 86/470, loss: 0.009569648653268814 2023-01-22 21:02:00.178583: step: 88/470, loss: 0.002280237153172493 2023-01-22 21:02:00.904020: step: 90/470, loss: 0.017339464277029037 2023-01-22 21:02:01.626475: step: 92/470, loss: 0.01658935844898224 2023-01-22 21:02:02.345535: step: 94/470, loss: 0.013809598982334137 2023-01-22 21:02:03.025060: step: 96/470, loss: 0.001008601044304669 2023-01-22 21:02:03.776830: step: 98/470, loss: 0.1749447137117386 2023-01-22 21:02:04.528544: step: 100/470, loss: 0.046566035598516464 2023-01-22 21:02:05.246776: step: 102/470, loss: 0.02105867676436901 2023-01-22 21:02:05.962397: step: 104/470, loss: 0.14165711402893066 2023-01-22 21:02:06.673179: step: 106/470, loss: 0.0024683044757694006 2023-01-22 21:02:07.392641: step: 108/470, loss: 0.00013477614265866578 2023-01-22 21:02:08.117447: step: 110/470, loss: 0.01837342046201229 2023-01-22 21:02:08.891187: step: 112/470, loss: 0.007353122346103191 2023-01-22 21:02:09.652107: step: 114/470, loss: 0.009461408481001854 2023-01-22 21:02:10.374704: step: 116/470, loss: 0.002974409842863679 2023-01-22 21:02:11.103286: step: 118/470, loss: 0.05226290225982666 2023-01-22 21:02:11.836615: step: 120/470, loss: 0.043346941471099854 2023-01-22 21:02:12.593558: step: 122/470, loss: 0.10949388891458511 2023-01-22 21:02:13.322013: step: 124/470, loss: 0.023482829332351685 2023-01-22 21:02:14.077418: step: 126/470, loss: 0.006389224901795387 2023-01-22 21:02:14.785120: step: 128/470, loss: 0.009503074921667576 2023-01-22 21:02:15.572275: step: 130/470, loss: 0.014946824871003628 2023-01-22 21:02:16.289265: step: 132/470, loss: 0.016744105145335197 2023-01-22 21:02:16.979356: step: 134/470, loss: 0.0011202177265658975 2023-01-22 21:02:17.726830: step: 136/470, loss: 0.0005344194360077381 2023-01-22 21:02:18.528377: step: 138/470, loss: 0.053323112428188324 2023-01-22 21:02:19.298794: step: 140/470, loss: 0.7324318289756775 2023-01-22 21:02:20.111225: step: 142/470, loss: 0.04300731047987938 2023-01-22 21:02:20.840794: step: 144/470, loss: 0.01699782721698284 2023-01-22 21:02:21.570923: step: 146/470, loss: 0.0031976874452084303 2023-01-22 21:02:22.215499: step: 148/470, loss: 0.010052609257400036 2023-01-22 21:02:22.937705: step: 150/470, loss: 0.0200212299823761 2023-01-22 21:02:23.661139: step: 152/470, loss: 0.012295265682041645 2023-01-22 21:02:24.439287: step: 154/470, loss: 0.00786919891834259 2023-01-22 21:02:25.207885: step: 156/470, loss: 0.019932325929403305 2023-01-22 21:02:26.121090: step: 158/470, loss: 0.0012513573747128248 2023-01-22 21:02:26.908776: step: 160/470, loss: 0.0011142558651044965 2023-01-22 21:02:27.673289: step: 162/470, loss: 0.021040918305516243 2023-01-22 21:02:28.436716: step: 164/470, loss: 0.002821336267516017 2023-01-22 21:02:29.205631: step: 166/470, loss: 0.006630197633057833 2023-01-22 21:02:29.876025: step: 168/470, loss: 0.004258208908140659 2023-01-22 21:02:30.559057: step: 170/470, loss: 0.03937026113271713 2023-01-22 21:02:31.313402: step: 172/470, loss: 0.0024332425091415644 2023-01-22 21:02:32.018014: step: 174/470, loss: 0.0002705961815081537 2023-01-22 21:02:32.679245: step: 176/470, loss: 0.0043231286108493805 2023-01-22 21:02:33.427348: step: 178/470, loss: 0.10010123252868652 2023-01-22 21:02:34.171855: step: 180/470, loss: 0.003182998625561595 2023-01-22 21:02:34.891428: step: 182/470, loss: 4.15868271375075e-05 2023-01-22 21:02:35.529308: step: 184/470, loss: 0.030571192502975464 2023-01-22 21:02:36.289490: step: 186/470, loss: 0.0007512365118600428 2023-01-22 21:02:37.034863: step: 188/470, loss: 0.0026000277139246464 2023-01-22 21:02:37.730735: step: 190/470, loss: 0.010836091823875904 2023-01-22 21:02:38.496542: step: 192/470, loss: 0.02573336847126484 2023-01-22 21:02:39.282833: step: 194/470, loss: 0.03251959756016731 2023-01-22 21:02:39.947253: step: 196/470, loss: 0.00026101371622644365 2023-01-22 21:02:40.617595: step: 198/470, loss: 0.09524267166852951 2023-01-22 21:02:41.392139: step: 200/470, loss: 0.11690568178892136 2023-01-22 21:02:42.053935: step: 202/470, loss: 0.007767211180180311 2023-01-22 21:02:42.806503: step: 204/470, loss: 0.18378157913684845 2023-01-22 21:02:43.584405: step: 206/470, loss: 0.13678520917892456 2023-01-22 21:02:44.447885: step: 208/470, loss: 0.0026962130796164274 2023-01-22 21:02:45.201489: step: 210/470, loss: 0.0014513169880956411 2023-01-22 21:02:45.871200: step: 212/470, loss: 0.010849079117178917 2023-01-22 21:02:46.621516: step: 214/470, loss: 0.008346461690962315 2023-01-22 21:02:47.390668: step: 216/470, loss: 0.07517098635435104 2023-01-22 21:02:48.119049: step: 218/470, loss: 0.27010560035705566 2023-01-22 21:02:48.891824: step: 220/470, loss: 0.02572491392493248 2023-01-22 21:02:49.596127: step: 222/470, loss: 0.0008853495819494128 2023-01-22 21:02:50.319710: step: 224/470, loss: 0.01680896058678627 2023-01-22 21:02:51.096013: step: 226/470, loss: 0.024884656071662903 2023-01-22 21:02:51.828613: step: 228/470, loss: 0.0003141724446322769 2023-01-22 21:02:52.478642: step: 230/470, loss: 0.06602133810520172 2023-01-22 21:02:53.107349: step: 232/470, loss: 0.014157207682728767 2023-01-22 21:02:53.815280: step: 234/470, loss: 0.00014316238230094314 2023-01-22 21:02:54.554567: step: 236/470, loss: 0.010098121128976345 2023-01-22 21:02:55.316775: step: 238/470, loss: 0.006802602671086788 2023-01-22 21:02:56.100455: step: 240/470, loss: 0.026168648153543472 2023-01-22 21:02:56.740772: step: 242/470, loss: 0.020882943645119667 2023-01-22 21:02:57.371233: step: 244/470, loss: 0.0026823831722140312 2023-01-22 21:02:58.053594: step: 246/470, loss: 0.002364139771088958 2023-01-22 21:02:58.846313: step: 248/470, loss: 0.04574478790163994 2023-01-22 21:02:59.607334: step: 250/470, loss: 0.0015829337062314153 2023-01-22 21:03:00.323527: step: 252/470, loss: 0.018606998026371002 2023-01-22 21:03:01.022322: step: 254/470, loss: 0.00044129794696345925 2023-01-22 21:03:01.739070: step: 256/470, loss: 0.03767343983054161 2023-01-22 21:03:02.484291: step: 258/470, loss: 0.00863537099212408 2023-01-22 21:03:03.200784: step: 260/470, loss: 0.01770273968577385 2023-01-22 21:03:03.913431: step: 262/470, loss: 0.015628566965460777 2023-01-22 21:03:04.608068: step: 264/470, loss: 0.0023462101817131042 2023-01-22 21:03:05.289985: step: 266/470, loss: 0.0092113446444273 2023-01-22 21:03:05.987830: step: 268/470, loss: 0.06474064290523529 2023-01-22 21:03:06.706889: step: 270/470, loss: 8.660142157168593e-06 2023-01-22 21:03:07.529607: step: 272/470, loss: 0.3112047612667084 2023-01-22 21:03:08.360688: step: 274/470, loss: 0.24923990666866302 2023-01-22 21:03:09.073546: step: 276/470, loss: 0.0041430373676121235 2023-01-22 21:03:09.821824: step: 278/470, loss: 0.008526407182216644 2023-01-22 21:03:10.430119: step: 280/470, loss: 0.004656743258237839 2023-01-22 21:03:11.183071: step: 282/470, loss: 0.016585668548941612 2023-01-22 21:03:11.867500: step: 284/470, loss: 3.0181516194716096e-05 2023-01-22 21:03:12.687741: step: 286/470, loss: 0.0011372804874554276 2023-01-22 21:03:13.485533: step: 288/470, loss: 0.022914016619324684 2023-01-22 21:03:14.251126: step: 290/470, loss: 0.06423134356737137 2023-01-22 21:03:15.030015: step: 292/470, loss: 0.002632809802889824 2023-01-22 21:03:15.823507: step: 294/470, loss: 0.04266877844929695 2023-01-22 21:03:16.603793: step: 296/470, loss: 0.03284723311662674 2023-01-22 21:03:17.354754: step: 298/470, loss: 0.007151145022362471 2023-01-22 21:03:18.172276: step: 300/470, loss: 0.1206933856010437 2023-01-22 21:03:18.981795: step: 302/470, loss: 0.010511813685297966 2023-01-22 21:03:19.664557: step: 304/470, loss: 0.011880377307534218 2023-01-22 21:03:20.381647: step: 306/470, loss: 0.28889280557632446 2023-01-22 21:03:21.064352: step: 308/470, loss: 0.008291719481348991 2023-01-22 21:03:21.694249: step: 310/470, loss: 0.0001515456533525139 2023-01-22 21:03:22.412029: step: 312/470, loss: 0.5845152139663696 2023-01-22 21:03:23.080961: step: 314/470, loss: 0.05446061119437218 2023-01-22 21:03:23.778026: step: 316/470, loss: 0.00031370227225124836 2023-01-22 21:03:24.548085: step: 318/470, loss: 0.0016886562807485461 2023-01-22 21:03:25.242720: step: 320/470, loss: 0.007711860351264477 2023-01-22 21:03:26.023219: step: 322/470, loss: 0.0003481293679215014 2023-01-22 21:03:26.712098: step: 324/470, loss: 0.00423666276037693 2023-01-22 21:03:27.482811: step: 326/470, loss: 0.21974371373653412 2023-01-22 21:03:28.222914: step: 328/470, loss: 0.02752552181482315 2023-01-22 21:03:28.964720: step: 330/470, loss: 0.001168736140243709 2023-01-22 21:03:29.717222: step: 332/470, loss: 0.18791626393795013 2023-01-22 21:03:30.483063: step: 334/470, loss: 0.016098229214549065 2023-01-22 21:03:31.099114: step: 336/470, loss: 0.005329936742782593 2023-01-22 21:03:31.848975: step: 338/470, loss: 0.05876723304390907 2023-01-22 21:03:32.526819: step: 340/470, loss: 0.06268248707056046 2023-01-22 21:03:33.234743: step: 342/470, loss: 0.006979916710406542 2023-01-22 21:03:33.939662: step: 344/470, loss: 0.0026592700742185116 2023-01-22 21:03:34.596908: step: 346/470, loss: 0.0027532707899808884 2023-01-22 21:03:35.386474: step: 348/470, loss: 0.05007663369178772 2023-01-22 21:03:36.098240: step: 350/470, loss: 0.04327116161584854 2023-01-22 21:03:36.896297: step: 352/470, loss: 0.0002385459520155564 2023-01-22 21:03:37.654473: step: 354/470, loss: 0.08725601434707642 2023-01-22 21:03:38.316489: step: 356/470, loss: 0.007848616689443588 2023-01-22 21:03:39.000989: step: 358/470, loss: 0.010468382388353348 2023-01-22 21:03:39.701676: step: 360/470, loss: 0.0008415202610194683 2023-01-22 21:03:40.506498: step: 362/470, loss: 0.005082852207124233 2023-01-22 21:03:41.289268: step: 364/470, loss: 0.04120028764009476 2023-01-22 21:03:41.979163: step: 366/470, loss: 0.052009016275405884 2023-01-22 21:03:42.699667: step: 368/470, loss: 8.392710878979415e-05 2023-01-22 21:03:43.433080: step: 370/470, loss: 0.005474635865539312 2023-01-22 21:03:44.111776: step: 372/470, loss: 0.015097817406058311 2023-01-22 21:03:44.896917: step: 374/470, loss: 0.016206717118620872 2023-01-22 21:03:45.649868: step: 376/470, loss: 0.0031522957142442465 2023-01-22 21:03:46.351846: step: 378/470, loss: 0.00015461869770660996 2023-01-22 21:03:47.095829: step: 380/470, loss: 0.0001224875304615125 2023-01-22 21:03:47.799884: step: 382/470, loss: 0.006665257271379232 2023-01-22 21:03:48.615017: step: 384/470, loss: 0.039671264588832855 2023-01-22 21:03:49.346630: step: 386/470, loss: 0.037569403648376465 2023-01-22 21:03:50.094833: step: 388/470, loss: 0.005145507864654064 2023-01-22 21:03:50.894507: step: 390/470, loss: 0.0063809738494455814 2023-01-22 21:03:51.676009: step: 392/470, loss: 0.01172274723649025 2023-01-22 21:03:52.409896: step: 394/470, loss: 0.0012702624080702662 2023-01-22 21:03:53.129595: step: 396/470, loss: 0.007107668090611696 2023-01-22 21:03:53.795088: step: 398/470, loss: 0.014695264399051666 2023-01-22 21:03:54.546648: step: 400/470, loss: 0.018078316003084183 2023-01-22 21:03:55.305037: step: 402/470, loss: 0.027752364054322243 2023-01-22 21:03:56.056362: step: 404/470, loss: 0.028842059895396233 2023-01-22 21:03:56.766268: step: 406/470, loss: 0.03767353668808937 2023-01-22 21:03:57.460085: step: 408/470, loss: 0.030763795599341393 2023-01-22 21:03:58.238000: step: 410/470, loss: 0.05021560564637184 2023-01-22 21:03:59.003708: step: 412/470, loss: 0.040465906262397766 2023-01-22 21:03:59.766924: step: 414/470, loss: 0.04318307340145111 2023-01-22 21:04:00.494021: step: 416/470, loss: 0.003911891486495733 2023-01-22 21:04:01.173215: step: 418/470, loss: 0.005350908264517784 2023-01-22 21:04:01.901941: step: 420/470, loss: 0.00030204097856767476 2023-01-22 21:04:02.560988: step: 422/470, loss: 0.015325166285037994 2023-01-22 21:04:03.300876: step: 424/470, loss: 0.41761523485183716 2023-01-22 21:04:03.997875: step: 426/470, loss: 0.011177667416632175 2023-01-22 21:04:04.784598: step: 428/470, loss: 0.0751306414604187 2023-01-22 21:04:05.564189: step: 430/470, loss: 0.05438210442662239 2023-01-22 21:04:06.340498: step: 432/470, loss: 0.05413680523633957 2023-01-22 21:04:07.016638: step: 434/470, loss: 0.04908444359898567 2023-01-22 21:04:07.806739: step: 436/470, loss: 0.0007037912728264928 2023-01-22 21:04:08.475191: step: 438/470, loss: 0.0084762591868639 2023-01-22 21:04:09.120115: step: 440/470, loss: 0.007872858084738255 2023-01-22 21:04:09.824146: step: 442/470, loss: 0.004519272595643997 2023-01-22 21:04:10.601376: step: 444/470, loss: 0.0006240661023184657 2023-01-22 21:04:11.311121: step: 446/470, loss: 0.25435835123062134 2023-01-22 21:04:12.017344: step: 448/470, loss: 0.009995101019740105 2023-01-22 21:04:12.722507: step: 450/470, loss: 0.0034806779585778713 2023-01-22 21:04:13.434428: step: 452/470, loss: 0.25618600845336914 2023-01-22 21:04:14.130173: step: 454/470, loss: 0.0810975506901741 2023-01-22 21:04:14.922008: step: 456/470, loss: 0.007433571387082338 2023-01-22 21:04:15.621140: step: 458/470, loss: 0.16480611264705658 2023-01-22 21:04:16.354855: step: 460/470, loss: 0.0011335683520883322 2023-01-22 21:04:17.158805: step: 462/470, loss: 0.005519744008779526 2023-01-22 21:04:17.827259: step: 464/470, loss: 0.006193581037223339 2023-01-22 21:04:18.498544: step: 466/470, loss: 0.08350327610969543 2023-01-22 21:04:19.211876: step: 468/470, loss: 0.035261370241642 2023-01-22 21:04:19.972331: step: 470/470, loss: 0.04092838987708092 2023-01-22 21:04:20.632728: step: 472/470, loss: 0.014473150484263897 2023-01-22 21:04:21.344112: step: 474/470, loss: 0.23851221799850464 2023-01-22 21:04:22.047896: step: 476/470, loss: 0.019818585366010666 2023-01-22 21:04:22.770632: step: 478/470, loss: 0.01893843151628971 2023-01-22 21:04:23.485030: step: 480/470, loss: 0.042298175394535065 2023-01-22 21:04:24.234763: step: 482/470, loss: 0.007379506714642048 2023-01-22 21:04:25.012831: step: 484/470, loss: 0.02085047774016857 2023-01-22 21:04:25.711410: step: 486/470, loss: 0.02521711029112339 2023-01-22 21:04:26.472780: step: 488/470, loss: 0.02616407722234726 2023-01-22 21:04:27.196194: step: 490/470, loss: 0.0031149170827120543 2023-01-22 21:04:27.885294: step: 492/470, loss: 0.0020052469335496426 2023-01-22 21:04:28.668399: step: 494/470, loss: 0.0112814512103796 2023-01-22 21:04:29.381895: step: 496/470, loss: 0.00834614410996437 2023-01-22 21:04:30.103113: step: 498/470, loss: 0.0071093300357460976 2023-01-22 21:04:30.840505: step: 500/470, loss: 0.008331052958965302 2023-01-22 21:04:31.557449: step: 502/470, loss: 0.02857259288430214 2023-01-22 21:04:32.205596: step: 504/470, loss: 0.014069000259041786 2023-01-22 21:04:32.882545: step: 506/470, loss: 0.13017906248569489 2023-01-22 21:04:33.759061: step: 508/470, loss: 0.0029538320377469063 2023-01-22 21:04:34.498040: step: 510/470, loss: 0.0012912432430312037 2023-01-22 21:04:35.302739: step: 512/470, loss: 0.0015609815018251538 2023-01-22 21:04:36.081929: step: 514/470, loss: 0.02052444778382778 2023-01-22 21:04:36.747567: step: 516/470, loss: 0.1478673666715622 2023-01-22 21:04:37.461991: step: 518/470, loss: 0.0018792763585224748 2023-01-22 21:04:38.170382: step: 520/470, loss: 0.006700445432215929 2023-01-22 21:04:38.874759: step: 522/470, loss: 0.016489727422595024 2023-01-22 21:04:39.614649: step: 524/470, loss: 0.09012142568826675 2023-01-22 21:04:40.408303: step: 526/470, loss: 0.019330738112330437 2023-01-22 21:04:41.112982: step: 528/470, loss: 0.013795935548841953 2023-01-22 21:04:41.972946: step: 530/470, loss: 0.014351630583405495 2023-01-22 21:04:42.678750: step: 532/470, loss: 0.08632518351078033 2023-01-22 21:04:43.431305: step: 534/470, loss: 0.023600619286298752 2023-01-22 21:04:44.147148: step: 536/470, loss: 0.009278071112930775 2023-01-22 21:04:44.847493: step: 538/470, loss: 0.007990618236362934 2023-01-22 21:04:45.506956: step: 540/470, loss: 8.854016778059304e-05 2023-01-22 21:04:46.174583: step: 542/470, loss: 0.11269865930080414 2023-01-22 21:04:46.864274: step: 544/470, loss: 0.01807483844459057 2023-01-22 21:04:47.709132: step: 546/470, loss: 0.02807825244963169 2023-01-22 21:04:48.380207: step: 548/470, loss: 0.02302536927163601 2023-01-22 21:04:49.117635: step: 550/470, loss: 0.03050990402698517 2023-01-22 21:04:49.880492: step: 552/470, loss: 0.03772333264350891 2023-01-22 21:04:50.681525: step: 554/470, loss: 0.00996860396116972 2023-01-22 21:04:51.399866: step: 556/470, loss: 0.006364746019244194 2023-01-22 21:04:52.133650: step: 558/470, loss: 0.011979727074503899 2023-01-22 21:04:52.877623: step: 560/470, loss: 0.05357194319367409 2023-01-22 21:04:53.660264: step: 562/470, loss: 0.0625770092010498 2023-01-22 21:04:54.346502: step: 564/470, loss: 0.00023006857372820377 2023-01-22 21:04:55.109483: step: 566/470, loss: 0.019672883674502373 2023-01-22 21:04:55.874040: step: 568/470, loss: 0.04599352553486824 2023-01-22 21:04:56.616786: step: 570/470, loss: 0.010658334009349346 2023-01-22 21:04:57.457454: step: 572/470, loss: 0.015365660190582275 2023-01-22 21:04:58.279090: step: 574/470, loss: 0.004618105012923479 2023-01-22 21:04:58.989111: step: 576/470, loss: 0.0076096258126199245 2023-01-22 21:04:59.694847: step: 578/470, loss: 0.020669929683208466 2023-01-22 21:05:00.364416: step: 580/470, loss: 0.001826974330469966 2023-01-22 21:05:01.020571: step: 582/470, loss: 0.1444631665945053 2023-01-22 21:05:01.752775: step: 584/470, loss: 0.0031216128263622522 2023-01-22 21:05:02.500632: step: 586/470, loss: 0.016765842214226723 2023-01-22 21:05:03.301131: step: 588/470, loss: 0.007116939872503281 2023-01-22 21:05:04.009672: step: 590/470, loss: 0.13170292973518372 2023-01-22 21:05:04.770570: step: 592/470, loss: 0.016895027831196785 2023-01-22 21:05:05.464406: step: 594/470, loss: 0.00024278272758238018 2023-01-22 21:05:06.183774: step: 596/470, loss: 0.0030464413575828075 2023-01-22 21:05:06.806014: step: 598/470, loss: 0.0016490904381498694 2023-01-22 21:05:07.605091: step: 600/470, loss: 0.03087281621992588 2023-01-22 21:05:08.365717: step: 602/470, loss: 0.003503436455503106 2023-01-22 21:05:09.033591: step: 604/470, loss: 0.18342113494873047 2023-01-22 21:05:09.743693: step: 606/470, loss: 0.018698526546359062 2023-01-22 21:05:10.491795: step: 608/470, loss: 0.05845620110630989 2023-01-22 21:05:11.231382: step: 610/470, loss: 0.001678596599958837 2023-01-22 21:05:11.945883: step: 612/470, loss: 0.014549157582223415 2023-01-22 21:05:12.647051: step: 614/470, loss: 0.0009063933975994587 2023-01-22 21:05:13.317693: step: 616/470, loss: 0.0022073141299188137 2023-01-22 21:05:14.001475: step: 618/470, loss: 0.012333320453763008 2023-01-22 21:05:14.746659: step: 620/470, loss: 0.09075351059436798 2023-01-22 21:05:15.481233: step: 622/470, loss: 0.029867831617593765 2023-01-22 21:05:16.224549: step: 624/470, loss: 0.09833616763353348 2023-01-22 21:05:16.902278: step: 626/470, loss: 0.0060694171115756035 2023-01-22 21:05:17.523665: step: 628/470, loss: 0.008746202103793621 2023-01-22 21:05:18.172417: step: 630/470, loss: 0.0012253581080585718 2023-01-22 21:05:18.888368: step: 632/470, loss: 0.01028769463300705 2023-01-22 21:05:19.685666: step: 634/470, loss: 0.011649365536868572 2023-01-22 21:05:20.488346: step: 636/470, loss: 0.02281986176967621 2023-01-22 21:05:21.224676: step: 638/470, loss: 0.21459834277629852 2023-01-22 21:05:21.875051: step: 640/470, loss: 0.004545276518911123 2023-01-22 21:05:22.521362: step: 642/470, loss: 3.121816189377569e-05 2023-01-22 21:05:23.171098: step: 644/470, loss: 0.005426404532045126 2023-01-22 21:05:24.030001: step: 646/470, loss: 0.040757469832897186 2023-01-22 21:05:24.701036: step: 648/470, loss: 0.011101129464805126 2023-01-22 21:05:25.487500: step: 650/470, loss: 2.5025525093078613 2023-01-22 21:05:26.166308: step: 652/470, loss: 0.012538554146885872 2023-01-22 21:05:26.894207: step: 654/470, loss: 0.03285971283912659 2023-01-22 21:05:27.739522: step: 656/470, loss: 0.0021789884194731712 2023-01-22 21:05:28.469785: step: 658/470, loss: 0.001402953639626503 2023-01-22 21:05:29.221058: step: 660/470, loss: 0.0017720997566357255 2023-01-22 21:05:29.968619: step: 662/470, loss: 0.039381928741931915 2023-01-22 21:05:30.735291: step: 664/470, loss: 0.01845177263021469 2023-01-22 21:05:31.362527: step: 666/470, loss: 0.029854735359549522 2023-01-22 21:05:32.048140: step: 668/470, loss: 0.003992500249296427 2023-01-22 21:05:32.745423: step: 670/470, loss: 0.004251103848218918 2023-01-22 21:05:33.457461: step: 672/470, loss: 0.010103264823555946 2023-01-22 21:05:34.200948: step: 674/470, loss: 0.0004654536605812609 2023-01-22 21:05:34.924255: step: 676/470, loss: 0.4272131025791168 2023-01-22 21:05:35.617207: step: 678/470, loss: 0.0190906822681427 2023-01-22 21:05:36.423153: step: 680/470, loss: 0.005119283217936754 2023-01-22 21:05:37.149651: step: 682/470, loss: 0.0027894098311662674 2023-01-22 21:05:37.959825: step: 684/470, loss: 0.01846400462090969 2023-01-22 21:05:38.716873: step: 686/470, loss: 0.1110844612121582 2023-01-22 21:05:39.405772: step: 688/470, loss: 0.0023693658877164125 2023-01-22 21:05:40.111756: step: 690/470, loss: 0.003205197863280773 2023-01-22 21:05:40.849908: step: 692/470, loss: 0.017842544242739677 2023-01-22 21:05:41.603347: step: 694/470, loss: 0.018687259405851364 2023-01-22 21:05:42.226747: step: 696/470, loss: 8.786357648205012e-05 2023-01-22 21:05:42.953031: step: 698/470, loss: 0.0007583802798762918 2023-01-22 21:05:43.754099: step: 700/470, loss: 0.20183929800987244 2023-01-22 21:05:44.491768: step: 702/470, loss: 0.00020361962378956378 2023-01-22 21:05:45.281655: step: 704/470, loss: 0.027718501165509224 2023-01-22 21:05:45.970578: step: 706/470, loss: 0.00023348911781795323 2023-01-22 21:05:46.601436: step: 708/470, loss: 0.032941270619630814 2023-01-22 21:05:47.360391: step: 710/470, loss: 0.04147607460618019 2023-01-22 21:05:48.074074: step: 712/470, loss: 6.337544618872926e-05 2023-01-22 21:05:48.801853: step: 714/470, loss: 0.014632035978138447 2023-01-22 21:05:49.501610: step: 716/470, loss: 0.020782971754670143 2023-01-22 21:05:50.175045: step: 718/470, loss: 0.006766524165868759 2023-01-22 21:05:50.948522: step: 720/470, loss: 0.014275794848799706 2023-01-22 21:05:51.718957: step: 722/470, loss: 0.017139313742518425 2023-01-22 21:05:52.440233: step: 724/470, loss: 0.014928764663636684 2023-01-22 21:05:53.124845: step: 726/470, loss: 0.028308287262916565 2023-01-22 21:05:53.862711: step: 728/470, loss: 0.05225411802530289 2023-01-22 21:05:54.558711: step: 730/470, loss: 0.045965153723955154 2023-01-22 21:05:55.333892: step: 732/470, loss: 0.001099904766306281 2023-01-22 21:05:56.131525: step: 734/470, loss: 0.018097948282957077 2023-01-22 21:05:56.856106: step: 736/470, loss: 0.06644519418478012 2023-01-22 21:05:57.531578: step: 738/470, loss: 0.017522266134619713 2023-01-22 21:05:58.308701: step: 740/470, loss: 0.006565961986780167 2023-01-22 21:05:59.006263: step: 742/470, loss: 0.0008447995060123503 2023-01-22 21:05:59.731060: step: 744/470, loss: 0.008136849850416183 2023-01-22 21:06:00.541649: step: 746/470, loss: 0.02021070197224617 2023-01-22 21:06:01.218427: step: 748/470, loss: 0.0010262180585414171 2023-01-22 21:06:01.924369: step: 750/470, loss: 0.028441239148378372 2023-01-22 21:06:02.633023: step: 752/470, loss: 1.7200602087541483e-05 2023-01-22 21:06:03.388466: step: 754/470, loss: 0.005179987754672766 2023-01-22 21:06:04.162967: step: 756/470, loss: 0.2640395760536194 2023-01-22 21:06:04.821157: step: 758/470, loss: 0.001883813296444714 2023-01-22 21:06:05.533913: step: 760/470, loss: 0.024232909083366394 2023-01-22 21:06:06.226500: step: 762/470, loss: 0.031181665137410164 2023-01-22 21:06:07.001275: step: 764/470, loss: 0.13508857786655426 2023-01-22 21:06:07.701147: step: 766/470, loss: 0.02267894335091114 2023-01-22 21:06:08.476651: step: 768/470, loss: 0.01819641701877117 2023-01-22 21:06:09.262476: step: 770/470, loss: 0.03003586456179619 2023-01-22 21:06:09.996596: step: 772/470, loss: 0.02495230734348297 2023-01-22 21:06:10.761504: step: 774/470, loss: 0.016687629744410515 2023-01-22 21:06:11.459788: step: 776/470, loss: 0.001529327011667192 2023-01-22 21:06:12.207697: step: 778/470, loss: 0.05840422213077545 2023-01-22 21:06:12.942796: step: 780/470, loss: 0.003544104751199484 2023-01-22 21:06:13.685458: step: 782/470, loss: 0.00039587877108715475 2023-01-22 21:06:14.560543: step: 784/470, loss: 0.6694744825363159 2023-01-22 21:06:15.329370: step: 786/470, loss: 0.03220684081315994 2023-01-22 21:06:16.061048: step: 788/470, loss: 0.014541038312017918 2023-01-22 21:06:16.731859: step: 790/470, loss: 0.004369522910565138 2023-01-22 21:06:17.419724: step: 792/470, loss: 0.013264582492411137 2023-01-22 21:06:18.156572: step: 794/470, loss: 0.0005037263035774231 2023-01-22 21:06:18.936269: step: 796/470, loss: 0.002712902380153537 2023-01-22 21:06:19.763920: step: 798/470, loss: 0.3494986593723297 2023-01-22 21:06:20.489126: step: 800/470, loss: 0.0015915961703285575 2023-01-22 21:06:21.228912: step: 802/470, loss: 0.013079517520964146 2023-01-22 21:06:22.017199: step: 804/470, loss: 0.004297505598515272 2023-01-22 21:06:22.758609: step: 806/470, loss: 0.40553227066993713 2023-01-22 21:06:23.615673: step: 808/470, loss: 0.034644801169633865 2023-01-22 21:06:24.294123: step: 810/470, loss: 0.008511470630764961 2023-01-22 21:06:25.044629: step: 812/470, loss: 0.09465232491493225 2023-01-22 21:06:25.732672: step: 814/470, loss: 0.001864009303972125 2023-01-22 21:06:26.418223: step: 816/470, loss: 0.03610503673553467 2023-01-22 21:06:27.171770: step: 818/470, loss: 0.03397827968001366 2023-01-22 21:06:27.893578: step: 820/470, loss: 0.003417443251237273 2023-01-22 21:06:28.573106: step: 822/470, loss: 0.020306063815951347 2023-01-22 21:06:29.317549: step: 824/470, loss: 0.06476482003927231 2023-01-22 21:06:30.025068: step: 826/470, loss: 0.002904894994571805 2023-01-22 21:06:30.793667: step: 828/470, loss: 0.004498578608036041 2023-01-22 21:06:31.503329: step: 830/470, loss: 0.03605084493756294 2023-01-22 21:06:32.224249: step: 832/470, loss: 0.011630040593445301 2023-01-22 21:06:32.901728: step: 834/470, loss: 0.01983511820435524 2023-01-22 21:06:33.622632: step: 836/470, loss: 0.002731953514739871 2023-01-22 21:06:34.422946: step: 838/470, loss: 0.009025120176374912 2023-01-22 21:06:35.151393: step: 840/470, loss: 0.19690948724746704 2023-01-22 21:06:35.938893: step: 842/470, loss: 0.0033933157101273537 2023-01-22 21:06:36.636213: step: 844/470, loss: 0.054548464715480804 2023-01-22 21:06:37.377290: step: 846/470, loss: 0.003134024329483509 2023-01-22 21:06:38.162098: step: 848/470, loss: 0.09228435903787613 2023-01-22 21:06:38.859081: step: 850/470, loss: 0.0018799483077600598 2023-01-22 21:06:39.722901: step: 852/470, loss: 0.13064418733119965 2023-01-22 21:06:40.434866: step: 854/470, loss: 0.0057640597224235535 2023-01-22 21:06:41.133815: step: 856/470, loss: 0.00749384006485343 2023-01-22 21:06:41.908274: step: 858/470, loss: 0.0018819028045982122 2023-01-22 21:06:42.644653: step: 860/470, loss: 0.012828037142753601 2023-01-22 21:06:43.335956: step: 862/470, loss: 0.00204385444521904 2023-01-22 21:06:44.109457: step: 864/470, loss: 0.03246442228555679 2023-01-22 21:06:44.825570: step: 866/470, loss: 0.042275648564100266 2023-01-22 21:06:45.437763: step: 868/470, loss: 0.020331397652626038 2023-01-22 21:06:46.099169: step: 870/470, loss: 0.019861869513988495 2023-01-22 21:06:46.752057: step: 872/470, loss: 0.0008119989070110023 2023-01-22 21:06:47.490000: step: 874/470, loss: 0.00033393464400433004 2023-01-22 21:06:48.201337: step: 876/470, loss: 0.001284466008655727 2023-01-22 21:06:48.875643: step: 878/470, loss: 0.0018008254701271653 2023-01-22 21:06:49.631096: step: 880/470, loss: 0.0011693740962073207 2023-01-22 21:06:50.339334: step: 882/470, loss: 0.002523294650018215 2023-01-22 21:06:51.058577: step: 884/470, loss: 0.022206205874681473 2023-01-22 21:06:51.753891: step: 886/470, loss: 0.0008540766430087388 2023-01-22 21:06:52.480672: step: 888/470, loss: 0.0023554093204438686 2023-01-22 21:06:53.191695: step: 890/470, loss: 0.013779774308204651 2023-01-22 21:06:53.970306: step: 892/470, loss: 0.010381845757365227 2023-01-22 21:06:54.708761: step: 894/470, loss: 0.02899681217968464 2023-01-22 21:06:55.492434: step: 896/470, loss: 0.04453825205564499 2023-01-22 21:06:56.191046: step: 898/470, loss: 0.05921311303973198 2023-01-22 21:06:57.074700: step: 900/470, loss: 0.006646065041422844 2023-01-22 21:06:57.785133: step: 902/470, loss: 0.023994415998458862 2023-01-22 21:06:58.545601: step: 904/470, loss: 0.002556569641456008 2023-01-22 21:06:59.278814: step: 906/470, loss: 0.0003309166640974581 2023-01-22 21:07:00.033984: step: 908/470, loss: 0.002325868234038353 2023-01-22 21:07:00.740369: step: 910/470, loss: 0.042233262211084366 2023-01-22 21:07:01.550037: step: 912/470, loss: 6.879373540868983e-05 2023-01-22 21:07:02.313036: step: 914/470, loss: 0.01983904279768467 2023-01-22 21:07:03.099098: step: 916/470, loss: 0.18573109805583954 2023-01-22 21:07:03.796951: step: 918/470, loss: 0.032089248299598694 2023-01-22 21:07:04.599283: step: 920/470, loss: 0.0024486505426466465 2023-01-22 21:07:05.339164: step: 922/470, loss: 0.013104692101478577 2023-01-22 21:07:06.042293: step: 924/470, loss: 0.9032142162322998 2023-01-22 21:07:06.865511: step: 926/470, loss: 0.13171081244945526 2023-01-22 21:07:07.584910: step: 928/470, loss: 0.026930810883641243 2023-01-22 21:07:08.338253: step: 930/470, loss: 0.5041398406028748 2023-01-22 21:07:09.086381: step: 932/470, loss: 0.002014985540881753 2023-01-22 21:07:09.712045: step: 934/470, loss: 0.0011477648513391614 2023-01-22 21:07:10.394230: step: 936/470, loss: 0.01803838089108467 2023-01-22 21:07:11.119225: step: 938/470, loss: 0.0009580638143233955 2023-01-22 21:07:11.882280: step: 940/470, loss: 0.0020920049864798784 2023-01-22 21:07:12.542414: step: 942/470, loss: 0.011814834550023079 ================================================== Loss: 0.048 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28490410052910053, 'r': 0.3405874446552815, 'f1': 0.3102672140593489}, 'combined': 0.22861794720162548, 'epoch': 32} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34558926877396756, 'r': 0.3721730586796574, 'f1': 0.3583888713211516}, 'combined': 0.24962408947741904, 'epoch': 32} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2784876766440414, 'r': 0.3408435511108287, 'f1': 0.306526538285677}, 'combined': 0.2258616597894462, 'epoch': 32} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35217647494606535, 'r': 0.3745261358561041, 'f1': 0.3630076246881158}, 'combined': 0.25284113162356325, 'epoch': 32} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2693469609261939, 'r': 0.3531665085388994, 'f1': 0.30561371100164203}, 'combined': 0.2251890502117362, 'epoch': 32} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.33691353726061807, 'r': 0.3809714613639296, 'f1': 0.35759054135242496}, 'combined': 0.24906803875293282, 'epoch': 32} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23026315789473684, 'r': 0.375, 'f1': 0.28532608695652173}, 'combined': 0.1902173913043478, 'epoch': 32} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.20833333333333334, 'r': 0.32608695652173914, 'f1': 0.25423728813559326}, 'combined': 0.12711864406779663, 'epoch': 32} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4642857142857143, 'r': 0.33620689655172414, 'f1': 0.39}, 'combined': 0.26, 'epoch': 32} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 33 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 21:09:48.685151: step: 2/470, loss: 0.0353984571993351 2023-01-22 21:09:49.333277: step: 4/470, loss: 0.0006110117537900805 2023-01-22 21:09:50.013957: step: 6/470, loss: 0.003695101011544466 2023-01-22 21:09:50.786096: step: 8/470, loss: 0.01956905983388424 2023-01-22 21:09:51.518926: step: 10/470, loss: 0.010541500523686409 2023-01-22 21:09:52.198732: step: 12/470, loss: 0.025140857324004173 2023-01-22 21:09:52.977495: step: 14/470, loss: 0.00231540622189641 2023-01-22 21:09:53.736601: step: 16/470, loss: 0.0011222070315852761 2023-01-22 21:09:54.419989: step: 18/470, loss: 0.003358704037964344 2023-01-22 21:09:55.174065: step: 20/470, loss: 0.001413871650584042 2023-01-22 21:09:56.007546: step: 22/470, loss: 0.001081290072761476 2023-01-22 21:09:56.735280: step: 24/470, loss: 0.011456046253442764 2023-01-22 21:09:57.493733: step: 26/470, loss: 0.004079823382198811 2023-01-22 21:09:58.219395: step: 28/470, loss: 0.0013503070222213864 2023-01-22 21:09:58.877782: step: 30/470, loss: 0.005739379674196243 2023-01-22 21:09:59.597538: step: 32/470, loss: 0.016481278464198112 2023-01-22 21:10:00.335707: step: 34/470, loss: 0.0638788491487503 2023-01-22 21:10:01.052437: step: 36/470, loss: 0.003887937404215336 2023-01-22 21:10:01.871580: step: 38/470, loss: 2.0889205932617188 2023-01-22 21:10:02.595046: step: 40/470, loss: 0.005325763486325741 2023-01-22 21:10:03.273635: step: 42/470, loss: 0.004101912025362253 2023-01-22 21:10:03.997062: step: 44/470, loss: 0.0046531264670193195 2023-01-22 21:10:04.672208: step: 46/470, loss: 0.01230566669255495 2023-01-22 21:10:05.437872: step: 48/470, loss: 3.5447123082121834e-05 2023-01-22 21:10:06.291063: step: 50/470, loss: 0.012923387810587883 2023-01-22 21:10:06.989354: step: 52/470, loss: 0.016211075708270073 2023-01-22 21:10:07.768429: step: 54/470, loss: 0.09570623189210892 2023-01-22 21:10:08.563206: step: 56/470, loss: 0.01940716803073883 2023-01-22 21:10:09.280888: step: 58/470, loss: 0.010799039155244827 2023-01-22 21:10:10.030990: step: 60/470, loss: 0.0031259532552212477 2023-01-22 21:10:10.788733: step: 62/470, loss: 0.0015492573147639632 2023-01-22 21:10:11.551507: step: 64/470, loss: 0.03166002035140991 2023-01-22 21:10:12.247325: step: 66/470, loss: 0.0006916436832398176 2023-01-22 21:10:13.068667: step: 68/470, loss: 0.026572024449706078 2023-01-22 21:10:13.829819: step: 70/470, loss: 0.0100321089848876 2023-01-22 21:10:14.444011: step: 72/470, loss: 0.0022878365125507116 2023-01-22 21:10:15.154355: step: 74/470, loss: 0.0371539369225502 2023-01-22 21:10:15.917480: step: 76/470, loss: 0.05440312996506691 2023-01-22 21:10:16.686478: step: 78/470, loss: 0.007360953837633133 2023-01-22 21:10:17.379303: step: 80/470, loss: 0.012081542983651161 2023-01-22 21:10:18.086318: step: 82/470, loss: 0.0012202973011881113 2023-01-22 21:10:18.830743: step: 84/470, loss: 0.00959607120603323 2023-01-22 21:10:19.646254: step: 86/470, loss: 0.0013847972732037306 2023-01-22 21:10:20.343825: step: 88/470, loss: 0.0005331527790986001 2023-01-22 21:10:21.025982: step: 90/470, loss: 0.0019345465116202831 2023-01-22 21:10:21.753765: step: 92/470, loss: 0.005651640705764294 2023-01-22 21:10:22.594637: step: 94/470, loss: 0.47246649861335754 2023-01-22 21:10:23.303690: step: 96/470, loss: 0.008398247882723808 2023-01-22 21:10:23.987290: step: 98/470, loss: 0.002498921239748597 2023-01-22 21:10:24.695205: step: 100/470, loss: 0.0005510192713700235 2023-01-22 21:10:25.339054: step: 102/470, loss: 0.0007535783806815743 2023-01-22 21:10:26.091108: step: 104/470, loss: 0.00044582068221643567 2023-01-22 21:10:26.844026: step: 106/470, loss: 0.015184991993010044 2023-01-22 21:10:27.485513: step: 108/470, loss: 0.09019918739795685 2023-01-22 21:10:28.241831: step: 110/470, loss: 0.00018215616000816226 2023-01-22 21:10:29.006485: step: 112/470, loss: 0.0013423897325992584 2023-01-22 21:10:29.685031: step: 114/470, loss: 0.004689566791057587 2023-01-22 21:10:30.470560: step: 116/470, loss: 0.001340361312031746 2023-01-22 21:10:31.189234: step: 118/470, loss: 0.007616905961185694 2023-01-22 21:10:31.869953: step: 120/470, loss: 0.03174249827861786 2023-01-22 21:10:32.557077: step: 122/470, loss: 0.0005641809548251331 2023-01-22 21:10:33.265238: step: 124/470, loss: 0.0015309054870158434 2023-01-22 21:10:33.981755: step: 126/470, loss: 0.024390079081058502 2023-01-22 21:10:34.695989: step: 128/470, loss: 0.023455774411559105 2023-01-22 21:10:35.428077: step: 130/470, loss: 0.10771681368350983 2023-01-22 21:10:36.198567: step: 132/470, loss: 9.4806935521774e-05 2023-01-22 21:10:36.878105: step: 134/470, loss: 0.007546218577772379 2023-01-22 21:10:37.529777: step: 136/470, loss: 0.006308150477707386 2023-01-22 21:10:38.288967: step: 138/470, loss: 0.3277882933616638 2023-01-22 21:10:39.084755: step: 140/470, loss: 0.016741199418902397 2023-01-22 21:10:39.815680: step: 142/470, loss: 0.02945149876177311 2023-01-22 21:10:40.505588: step: 144/470, loss: 0.0745350643992424 2023-01-22 21:10:41.188546: step: 146/470, loss: 0.0008599523571319878 2023-01-22 21:10:41.978471: step: 148/470, loss: 0.01641632243990898 2023-01-22 21:10:42.734184: step: 150/470, loss: 0.007939077913761139 2023-01-22 21:10:43.423530: step: 152/470, loss: 0.0034217501524835825 2023-01-22 21:10:44.141100: step: 154/470, loss: 0.0031094097066670656 2023-01-22 21:10:44.800983: step: 156/470, loss: 0.020805522799491882 2023-01-22 21:10:45.556237: step: 158/470, loss: 0.0015209285775199533 2023-01-22 21:10:46.264081: step: 160/470, loss: 0.003026079386472702 2023-01-22 21:10:46.975900: step: 162/470, loss: 0.03428162634372711 2023-01-22 21:10:47.717037: step: 164/470, loss: 0.0027114320546388626 2023-01-22 21:10:48.500150: step: 166/470, loss: 0.027286209166049957 2023-01-22 21:10:49.218079: step: 168/470, loss: 0.00016404094640165567 2023-01-22 21:10:49.978430: step: 170/470, loss: 0.031211018562316895 2023-01-22 21:10:50.664629: step: 172/470, loss: 0.003412870690226555 2023-01-22 21:10:51.408606: step: 174/470, loss: 0.043129902333021164 2023-01-22 21:10:52.096448: step: 176/470, loss: 0.07710176706314087 2023-01-22 21:10:52.874204: step: 178/470, loss: 0.017537254840135574 2023-01-22 21:10:53.522939: step: 180/470, loss: 0.009631001390516758 2023-01-22 21:10:54.166077: step: 182/470, loss: 0.001838182215578854 2023-01-22 21:10:54.846012: step: 184/470, loss: 0.013489598408341408 2023-01-22 21:10:55.621618: step: 186/470, loss: 0.021947944536805153 2023-01-22 21:10:56.377416: step: 188/470, loss: 0.008992105722427368 2023-01-22 21:10:57.141286: step: 190/470, loss: 0.17299090325832367 2023-01-22 21:10:57.813693: step: 192/470, loss: 0.0001017658578348346 2023-01-22 21:10:58.516615: step: 194/470, loss: 0.00856022723019123 2023-01-22 21:10:59.206856: step: 196/470, loss: 0.0008848054567351937 2023-01-22 21:10:59.891447: step: 198/470, loss: 0.004278761800378561 2023-01-22 21:11:00.690103: step: 200/470, loss: 0.0017920746468007565 2023-01-22 21:11:01.399708: step: 202/470, loss: 0.00201309728436172 2023-01-22 21:11:02.111805: step: 204/470, loss: 0.01744202710688114 2023-01-22 21:11:02.836125: step: 206/470, loss: 0.0012796318624168634 2023-01-22 21:11:03.492859: step: 208/470, loss: 4.2182771721854806e-05 2023-01-22 21:11:04.209528: step: 210/470, loss: 0.0001089559227693826 2023-01-22 21:11:05.072714: step: 212/470, loss: 0.019380543380975723 2023-01-22 21:11:05.781763: step: 214/470, loss: 0.020527342334389687 2023-01-22 21:11:06.474254: step: 216/470, loss: 0.0021591780241578817 2023-01-22 21:11:07.192216: step: 218/470, loss: 0.00999562069773674 2023-01-22 21:11:08.026348: step: 220/470, loss: 0.02325505018234253 2023-01-22 21:11:08.723563: step: 222/470, loss: 0.022767867892980576 2023-01-22 21:11:09.449790: step: 224/470, loss: 0.0006900187581777573 2023-01-22 21:11:10.158599: step: 226/470, loss: 0.008171236142516136 2023-01-22 21:11:10.973381: step: 228/470, loss: 0.0029809472616761923 2023-01-22 21:11:11.676376: step: 230/470, loss: 0.002115656156092882 2023-01-22 21:11:12.473853: step: 232/470, loss: 0.003473465796560049 2023-01-22 21:11:13.323660: step: 234/470, loss: 0.040313441306352615 2023-01-22 21:11:14.260238: step: 236/470, loss: 0.027963347733020782 2023-01-22 21:11:15.047957: step: 238/470, loss: 0.011281131766736507 2023-01-22 21:11:15.771631: step: 240/470, loss: 0.00025161568191833794 2023-01-22 21:11:16.436014: step: 242/470, loss: 0.027061283588409424 2023-01-22 21:11:17.226731: step: 244/470, loss: 0.026121623814105988 2023-01-22 21:11:17.944831: step: 246/470, loss: 0.009257947094738483 2023-01-22 21:11:18.628075: step: 248/470, loss: 0.010101567022502422 2023-01-22 21:11:19.391864: step: 250/470, loss: 1.0979067087173462 2023-01-22 21:11:20.076933: step: 252/470, loss: 0.006160234101116657 2023-01-22 21:11:20.824960: step: 254/470, loss: 0.004955607000738382 2023-01-22 21:11:21.538396: step: 256/470, loss: 0.009282803162932396 2023-01-22 21:11:22.193846: step: 258/470, loss: 0.004642096348106861 2023-01-22 21:11:22.917178: step: 260/470, loss: 0.029528679326176643 2023-01-22 21:11:23.608543: step: 262/470, loss: 0.01058510597795248 2023-01-22 21:11:24.272874: step: 264/470, loss: 0.007069876883178949 2023-01-22 21:11:25.007748: step: 266/470, loss: 0.010932988487184048 2023-01-22 21:11:25.698237: step: 268/470, loss: 0.0012139062164351344 2023-01-22 21:11:26.478215: step: 270/470, loss: 0.0031020056921988726 2023-01-22 21:11:27.217213: step: 272/470, loss: 0.019556628540158272 2023-01-22 21:11:27.939623: step: 274/470, loss: 0.003774407086893916 2023-01-22 21:11:28.600096: step: 276/470, loss: 0.002119203330948949 2023-01-22 21:11:29.312166: step: 278/470, loss: 0.006396686192601919 2023-01-22 21:11:29.944286: step: 280/470, loss: 0.0008285631192848086 2023-01-22 21:11:30.628281: step: 282/470, loss: 0.030592042952775955 2023-01-22 21:11:31.300671: step: 284/470, loss: 0.02650538459420204 2023-01-22 21:11:32.029331: step: 286/470, loss: 0.0008941978449001908 2023-01-22 21:11:32.750074: step: 288/470, loss: 0.0010146403219550848 2023-01-22 21:11:33.511508: step: 290/470, loss: 0.002193046733736992 2023-01-22 21:11:34.249401: step: 292/470, loss: 0.013625368475914001 2023-01-22 21:11:34.957340: step: 294/470, loss: 0.00888641644269228 2023-01-22 21:11:35.682257: step: 296/470, loss: 0.0019641213584691286 2023-01-22 21:11:36.440328: step: 298/470, loss: 0.4439093768596649 2023-01-22 21:11:37.122693: step: 300/470, loss: 0.0006960076279938221 2023-01-22 21:11:37.887536: step: 302/470, loss: 0.0003841613361146301 2023-01-22 21:11:38.628530: step: 304/470, loss: 0.11751359701156616 2023-01-22 21:11:39.404988: step: 306/470, loss: 0.0003117234446108341 2023-01-22 21:11:40.121236: step: 308/470, loss: 0.012473184615373611 2023-01-22 21:11:40.924840: step: 310/470, loss: 0.003341148141771555 2023-01-22 21:11:41.716578: step: 312/470, loss: 0.016889702528715134 2023-01-22 21:11:42.505462: step: 314/470, loss: 0.03929717838764191 2023-01-22 21:11:43.247598: step: 316/470, loss: 4.825163341592997e-05 2023-01-22 21:11:43.969512: step: 318/470, loss: 0.48037540912628174 2023-01-22 21:11:44.691889: step: 320/470, loss: 0.010676422156393528 2023-01-22 21:11:45.436426: step: 322/470, loss: 0.00287465937435627 2023-01-22 21:11:46.141122: step: 324/470, loss: 0.0012692613527178764 2023-01-22 21:11:46.814975: step: 326/470, loss: 0.021290870383381844 2023-01-22 21:11:47.534679: step: 328/470, loss: 0.021226149052381516 2023-01-22 21:11:48.437373: step: 330/470, loss: 0.037113256752491 2023-01-22 21:11:49.170154: step: 332/470, loss: 0.0005753267323598266 2023-01-22 21:11:49.934621: step: 334/470, loss: 0.009010824374854565 2023-01-22 21:11:50.713252: step: 336/470, loss: 0.0034445514902472496 2023-01-22 21:11:51.408331: step: 338/470, loss: 0.0003466394846327603 2023-01-22 21:11:52.096255: step: 340/470, loss: 0.0053068287670612335 2023-01-22 21:11:52.742236: step: 342/470, loss: 0.017965000122785568 2023-01-22 21:11:53.526100: step: 344/470, loss: 0.027688566595315933 2023-01-22 21:11:54.253361: step: 346/470, loss: 0.0012403011787682772 2023-01-22 21:11:54.919201: step: 348/470, loss: 2.2492525577545166 2023-01-22 21:11:55.735845: step: 350/470, loss: 0.009851688519120216 2023-01-22 21:11:56.435432: step: 352/470, loss: 0.06665859371423721 2023-01-22 21:11:57.337968: step: 354/470, loss: 0.014958545565605164 2023-01-22 21:11:58.040575: step: 356/470, loss: 0.0006210833671502769 2023-01-22 21:11:58.759472: step: 358/470, loss: 0.001347895129583776 2023-01-22 21:11:59.492414: step: 360/470, loss: 0.0017309411196038127 2023-01-22 21:12:00.260239: step: 362/470, loss: 0.10375341773033142 2023-01-22 21:12:00.964722: step: 364/470, loss: 0.00011790274584200233 2023-01-22 21:12:01.733458: step: 366/470, loss: 0.024094436317682266 2023-01-22 21:12:02.417155: step: 368/470, loss: 0.007840093225240707 2023-01-22 21:12:03.192197: step: 370/470, loss: 0.005822331178933382 2023-01-22 21:12:04.047205: step: 372/470, loss: 0.015622143633663654 2023-01-22 21:12:04.687078: step: 374/470, loss: 0.0032653925009071827 2023-01-22 21:12:05.403710: step: 376/470, loss: 0.0022503521759063005 2023-01-22 21:12:06.166382: step: 378/470, loss: 0.01404476910829544 2023-01-22 21:12:06.933634: step: 380/470, loss: 0.007879259996116161 2023-01-22 21:12:07.638419: step: 382/470, loss: 0.5576677918434143 2023-01-22 21:12:08.331364: step: 384/470, loss: 0.00043255838681943715 2023-01-22 21:12:09.123970: step: 386/470, loss: 0.00029005203396081924 2023-01-22 21:12:09.895843: step: 388/470, loss: 0.0023896731436252594 2023-01-22 21:12:10.558769: step: 390/470, loss: 0.013917365111410618 2023-01-22 21:12:11.300417: step: 392/470, loss: 0.000244982453295961 2023-01-22 21:12:12.086907: step: 394/470, loss: 0.22541584074497223 2023-01-22 21:12:12.839240: step: 396/470, loss: 0.009782961569726467 2023-01-22 21:12:13.589913: step: 398/470, loss: 0.0009558402234688401 2023-01-22 21:12:14.403571: step: 400/470, loss: 0.04024609923362732 2023-01-22 21:12:15.085598: step: 402/470, loss: 0.030957777053117752 2023-01-22 21:12:15.869903: step: 404/470, loss: 0.0017546005547046661 2023-01-22 21:12:16.551577: step: 406/470, loss: 0.0013082153163850307 2023-01-22 21:12:17.332447: step: 408/470, loss: 0.11711828410625458 2023-01-22 21:12:18.043864: step: 410/470, loss: 0.020939351990818977 2023-01-22 21:12:18.726561: step: 412/470, loss: 0.04790695011615753 2023-01-22 21:12:19.423595: step: 414/470, loss: 0.0010895759332925081 2023-01-22 21:12:20.170545: step: 416/470, loss: 0.017710620537400246 2023-01-22 21:12:20.899962: step: 418/470, loss: 0.01992659457027912 2023-01-22 21:12:21.625187: step: 420/470, loss: 0.03188169375061989 2023-01-22 21:12:22.385843: step: 422/470, loss: 0.04130684584379196 2023-01-22 21:12:23.106283: step: 424/470, loss: 0.07215629518032074 2023-01-22 21:12:23.861926: step: 426/470, loss: 0.00011767345131374896 2023-01-22 21:12:24.682130: step: 428/470, loss: 0.000228075819904916 2023-01-22 21:12:25.453363: step: 430/470, loss: 0.0013060378842055798 2023-01-22 21:12:26.161632: step: 432/470, loss: 0.0043433355167508125 2023-01-22 21:12:26.878634: step: 434/470, loss: 0.259304016828537 2023-01-22 21:12:27.635415: step: 436/470, loss: 0.017298951745033264 2023-01-22 21:12:28.425996: step: 438/470, loss: 0.0009541614563204348 2023-01-22 21:12:29.176862: step: 440/470, loss: 0.0033586365170776844 2023-01-22 21:12:29.888750: step: 442/470, loss: 0.005000745877623558 2023-01-22 21:12:30.611271: step: 444/470, loss: 0.02338077686727047 2023-01-22 21:12:31.400230: step: 446/470, loss: 0.10848845541477203 2023-01-22 21:12:32.130285: step: 448/470, loss: 0.07149527221918106 2023-01-22 21:12:32.908513: step: 450/470, loss: 0.005873269401490688 2023-01-22 21:12:33.589637: step: 452/470, loss: 0.018467245623469353 2023-01-22 21:12:34.366153: step: 454/470, loss: 0.004103237763047218 2023-01-22 21:12:35.104760: step: 456/470, loss: 0.00016769995272625238 2023-01-22 21:12:35.894110: step: 458/470, loss: 0.05550146475434303 2023-01-22 21:12:36.570505: step: 460/470, loss: 0.002988182008266449 2023-01-22 21:12:37.299709: step: 462/470, loss: 0.019142037257552147 2023-01-22 21:12:38.013808: step: 464/470, loss: 0.0012444063322618604 2023-01-22 21:12:38.731067: step: 466/470, loss: 0.00499011529609561 2023-01-22 21:12:39.469305: step: 468/470, loss: 0.029499473050236702 2023-01-22 21:12:40.159649: step: 470/470, loss: 0.009594298899173737 2023-01-22 21:12:40.917968: step: 472/470, loss: 0.0007405190262943506 2023-01-22 21:12:41.631663: step: 474/470, loss: 0.006137060932815075 2023-01-22 21:12:42.355803: step: 476/470, loss: 4.0265680581796914e-05 2023-01-22 21:12:43.041513: step: 478/470, loss: 1.632533167139627e-05 2023-01-22 21:12:43.756811: step: 480/470, loss: 0.0258161760866642 2023-01-22 21:12:44.538232: step: 482/470, loss: 0.4235173165798187 2023-01-22 21:12:45.221503: step: 484/470, loss: 0.008250389248132706 2023-01-22 21:12:45.935102: step: 486/470, loss: 0.02013562060892582 2023-01-22 21:12:46.676805: step: 488/470, loss: 0.02626902237534523 2023-01-22 21:12:47.430393: step: 490/470, loss: 0.004141016863286495 2023-01-22 21:12:48.134939: step: 492/470, loss: 0.0033669506665319204 2023-01-22 21:12:48.849279: step: 494/470, loss: 0.7086845636367798 2023-01-22 21:12:49.532974: step: 496/470, loss: 0.0009194430313073099 2023-01-22 21:12:50.268944: step: 498/470, loss: 0.003966304939240217 2023-01-22 21:12:50.968562: step: 500/470, loss: 0.029114434495568275 2023-01-22 21:12:51.654857: step: 502/470, loss: 0.00011125182209070772 2023-01-22 21:12:52.399822: step: 504/470, loss: 0.014639819972217083 2023-01-22 21:12:53.099599: step: 506/470, loss: 0.008477681316435337 2023-01-22 21:12:53.842127: step: 508/470, loss: 0.011175676248967648 2023-01-22 21:12:54.609688: step: 510/470, loss: 0.0035168358590453863 2023-01-22 21:12:55.370024: step: 512/470, loss: 0.013511012308299541 2023-01-22 21:12:56.090966: step: 514/470, loss: 4.397913653519936e-05 2023-01-22 21:12:56.904727: step: 516/470, loss: 0.01750068925321102 2023-01-22 21:12:57.633829: step: 518/470, loss: 0.000392085436033085 2023-01-22 21:12:58.302866: step: 520/470, loss: 0.0018893035594373941 2023-01-22 21:12:58.986089: step: 522/470, loss: 0.006660451181232929 2023-01-22 21:12:59.750467: step: 524/470, loss: 0.02844826504588127 2023-01-22 21:13:00.573373: step: 526/470, loss: 0.009441560134291649 2023-01-22 21:13:01.319639: step: 528/470, loss: 0.006550501566380262 2023-01-22 21:13:02.163234: step: 530/470, loss: 0.06545102596282959 2023-01-22 21:13:02.895382: step: 532/470, loss: 0.017406875267624855 2023-01-22 21:13:03.658391: step: 534/470, loss: 0.016543585807085037 2023-01-22 21:13:04.419077: step: 536/470, loss: 0.01982169970870018 2023-01-22 21:13:05.185315: step: 538/470, loss: 0.004087928682565689 2023-01-22 21:13:06.042022: step: 540/470, loss: 0.017809614539146423 2023-01-22 21:13:06.740668: step: 542/470, loss: 0.016991348937153816 2023-01-22 21:13:07.563514: step: 544/470, loss: 0.002067740773782134 2023-01-22 21:13:08.254759: step: 546/470, loss: 0.07715543359518051 2023-01-22 21:13:08.889982: step: 548/470, loss: 3.090485552093014e-05 2023-01-22 21:13:09.616845: step: 550/470, loss: 0.015211367048323154 2023-01-22 21:13:10.279769: step: 552/470, loss: 0.11407588422298431 2023-01-22 21:13:11.081563: step: 554/470, loss: 0.04695338010787964 2023-01-22 21:13:11.854682: step: 556/470, loss: 0.007329762447625399 2023-01-22 21:13:12.625001: step: 558/470, loss: 0.007562029641121626 2023-01-22 21:13:13.481497: step: 560/470, loss: 0.028891831636428833 2023-01-22 21:13:14.215173: step: 562/470, loss: 0.004206001292914152 2023-01-22 21:13:14.854187: step: 564/470, loss: 0.0011216033017262816 2023-01-22 21:13:15.573807: step: 566/470, loss: 0.03309805318713188 2023-01-22 21:13:16.283519: step: 568/470, loss: 0.0024206622038036585 2023-01-22 21:13:17.037521: step: 570/470, loss: 0.03466884419322014 2023-01-22 21:13:17.713112: step: 572/470, loss: 0.028366921469569206 2023-01-22 21:13:18.413344: step: 574/470, loss: 0.014122523367404938 2023-01-22 21:13:19.105082: step: 576/470, loss: 0.03809332102537155 2023-01-22 21:13:19.874029: step: 578/470, loss: 0.032920315861701965 2023-01-22 21:13:20.600811: step: 580/470, loss: 0.45645061135292053 2023-01-22 21:13:21.339420: step: 582/470, loss: 0.007378171198070049 2023-01-22 21:13:22.124661: step: 584/470, loss: 0.016221599653363228 2023-01-22 21:13:22.782531: step: 586/470, loss: 0.0018637162866070867 2023-01-22 21:13:23.655809: step: 588/470, loss: 0.00019457412417978048 2023-01-22 21:13:24.435607: step: 590/470, loss: 0.0033394538331776857 2023-01-22 21:13:25.172386: step: 592/470, loss: 0.017871206626296043 2023-01-22 21:13:25.875652: step: 594/470, loss: 0.018105637282133102 2023-01-22 21:13:26.619910: step: 596/470, loss: 0.02217262051999569 2023-01-22 21:13:27.412624: step: 598/470, loss: 0.03489815443754196 2023-01-22 21:13:28.074572: step: 600/470, loss: 0.004941632971167564 2023-01-22 21:13:28.745300: step: 602/470, loss: 0.009684709832072258 2023-01-22 21:13:29.459640: step: 604/470, loss: 0.010802370496094227 2023-01-22 21:13:30.228081: step: 606/470, loss: 0.0353492833673954 2023-01-22 21:13:31.118922: step: 608/470, loss: 0.009187396615743637 2023-01-22 21:13:31.812580: step: 610/470, loss: 0.00303685013204813 2023-01-22 21:13:32.552944: step: 612/470, loss: 0.017729606479406357 2023-01-22 21:13:33.307941: step: 614/470, loss: 0.009220817126333714 2023-01-22 21:13:34.049734: step: 616/470, loss: 0.02744259312748909 2023-01-22 21:13:34.739890: step: 618/470, loss: 0.026758696883916855 2023-01-22 21:13:35.533918: step: 620/470, loss: 0.0033123791217803955 2023-01-22 21:13:36.301609: step: 622/470, loss: 0.00663144001737237 2023-01-22 21:13:37.051026: step: 624/470, loss: 0.0016671409830451012 2023-01-22 21:13:37.760889: step: 626/470, loss: 0.0074158660136163235 2023-01-22 21:13:38.442562: step: 628/470, loss: 0.0008439401281066239 2023-01-22 21:13:39.205794: step: 630/470, loss: 5.01374488521833e-05 2023-01-22 21:13:39.891100: step: 632/470, loss: 0.008946064859628677 2023-01-22 21:13:40.593576: step: 634/470, loss: 0.06714008748531342 2023-01-22 21:13:41.290502: step: 636/470, loss: 0.0008762570796534419 2023-01-22 21:13:42.128203: step: 638/470, loss: 0.001907090307213366 2023-01-22 21:13:42.854000: step: 640/470, loss: 0.03064919076859951 2023-01-22 21:13:43.692430: step: 642/470, loss: 0.00023972737835720181 2023-01-22 21:13:44.430603: step: 644/470, loss: 0.00011663758778013289 2023-01-22 21:13:45.147031: step: 646/470, loss: 0.010350205935537815 2023-01-22 21:13:45.919516: step: 648/470, loss: 0.015240548178553581 2023-01-22 21:13:46.652001: step: 650/470, loss: 0.017129601910710335 2023-01-22 21:13:47.389590: step: 652/470, loss: 0.03198548033833504 2023-01-22 21:13:48.028927: step: 654/470, loss: 0.0030342754907906055 2023-01-22 21:13:48.744293: step: 656/470, loss: 7.692570943618193e-05 2023-01-22 21:13:49.484518: step: 658/470, loss: 0.014984884299337864 2023-01-22 21:13:50.260951: step: 660/470, loss: 0.007573677692562342 2023-01-22 21:13:50.972623: step: 662/470, loss: 0.14432427287101746 2023-01-22 21:13:51.693915: step: 664/470, loss: 0.003277366515249014 2023-01-22 21:13:52.334957: step: 666/470, loss: 0.04675723984837532 2023-01-22 21:13:53.104475: step: 668/470, loss: 0.02985813282430172 2023-01-22 21:13:53.869878: step: 670/470, loss: 0.010331113822758198 2023-01-22 21:13:54.570086: step: 672/470, loss: 0.004046509508043528 2023-01-22 21:13:55.326306: step: 674/470, loss: 0.017745012417435646 2023-01-22 21:13:56.126293: step: 676/470, loss: 0.0010939788771793246 2023-01-22 21:13:56.827006: step: 678/470, loss: 0.0016873609274625778 2023-01-22 21:13:57.561908: step: 680/470, loss: 0.015853749588131905 2023-01-22 21:13:58.388891: step: 682/470, loss: 0.0017006437992677093 2023-01-22 21:13:59.069832: step: 684/470, loss: 0.002765461103990674 2023-01-22 21:13:59.789435: step: 686/470, loss: 0.7379614114761353 2023-01-22 21:14:00.512493: step: 688/470, loss: 0.005514780059456825 2023-01-22 21:14:01.171239: step: 690/470, loss: 0.002757574198767543 2023-01-22 21:14:01.914756: step: 692/470, loss: 0.46669384837150574 2023-01-22 21:14:02.581840: step: 694/470, loss: 0.005543508101254702 2023-01-22 21:14:03.267691: step: 696/470, loss: 0.0004790358943864703 2023-01-22 21:14:03.997850: step: 698/470, loss: 0.00039144910988397896 2023-01-22 21:14:04.726885: step: 700/470, loss: 0.01933109760284424 2023-01-22 21:14:05.483022: step: 702/470, loss: 0.0024042355362325907 2023-01-22 21:14:06.120015: step: 704/470, loss: 0.0020206007175147533 2023-01-22 21:14:06.853849: step: 706/470, loss: 0.00844528991729021 2023-01-22 21:14:07.554476: step: 708/470, loss: 0.0017009805887937546 2023-01-22 21:14:08.248146: step: 710/470, loss: 0.008159015327692032 2023-01-22 21:14:09.001680: step: 712/470, loss: 0.001408104319125414 2023-01-22 21:14:09.722287: step: 714/470, loss: 0.008478800766170025 2023-01-22 21:14:10.379780: step: 716/470, loss: 0.021621203050017357 2023-01-22 21:14:11.133973: step: 718/470, loss: 6.314300844678655e-05 2023-01-22 21:14:11.892746: step: 720/470, loss: 0.027302606031298637 2023-01-22 21:14:12.599924: step: 722/470, loss: 0.020021893084049225 2023-01-22 21:14:13.383827: step: 724/470, loss: 0.028122197836637497 2023-01-22 21:14:14.097335: step: 726/470, loss: 0.004987229593098164 2023-01-22 21:14:14.757747: step: 728/470, loss: 0.023552965372800827 2023-01-22 21:14:15.496074: step: 730/470, loss: 0.0023100976832211018 2023-01-22 21:14:16.233736: step: 732/470, loss: 0.014617957174777985 2023-01-22 21:14:16.960161: step: 734/470, loss: 0.012580832466483116 2023-01-22 21:14:17.703565: step: 736/470, loss: 0.0018753198673948646 2023-01-22 21:14:18.386665: step: 738/470, loss: 0.03843626379966736 2023-01-22 21:14:19.223689: step: 740/470, loss: 0.05392017588019371 2023-01-22 21:14:19.966150: step: 742/470, loss: 0.1939995437860489 2023-01-22 21:14:20.666344: step: 744/470, loss: 0.0006773190689273179 2023-01-22 21:14:21.461292: step: 746/470, loss: 0.04806168004870415 2023-01-22 21:14:22.197411: step: 748/470, loss: 0.03759084269404411 2023-01-22 21:14:22.970902: step: 750/470, loss: 0.002831129590049386 2023-01-22 21:14:23.711600: step: 752/470, loss: 0.006249632220715284 2023-01-22 21:14:24.438748: step: 754/470, loss: 0.004659464117139578 2023-01-22 21:14:25.122586: step: 756/470, loss: 0.000623860105406493 2023-01-22 21:14:25.785490: step: 758/470, loss: 0.0027934126555919647 2023-01-22 21:14:26.515618: step: 760/470, loss: 0.0031853329855948687 2023-01-22 21:14:27.212225: step: 762/470, loss: 0.02544858306646347 2023-01-22 21:14:27.938068: step: 764/470, loss: 0.010291761718690395 2023-01-22 21:14:28.680098: step: 766/470, loss: 0.011330293491482735 2023-01-22 21:14:29.529405: step: 768/470, loss: 0.006938802544027567 2023-01-22 21:14:30.236912: step: 770/470, loss: 0.029576266184449196 2023-01-22 21:14:30.970708: step: 772/470, loss: 0.02871057577431202 2023-01-22 21:14:31.712695: step: 774/470, loss: 0.006623897235840559 2023-01-22 21:14:32.344091: step: 776/470, loss: 0.01030951552093029 2023-01-22 21:14:33.040542: step: 778/470, loss: 0.0003881768207065761 2023-01-22 21:14:33.789250: step: 780/470, loss: 0.011595960706472397 2023-01-22 21:14:34.530848: step: 782/470, loss: 0.03303779661655426 2023-01-22 21:14:35.299051: step: 784/470, loss: 0.009521235711872578 2023-01-22 21:14:36.020714: step: 786/470, loss: 0.010765934363007545 2023-01-22 21:14:36.711034: step: 788/470, loss: 0.006151827983558178 2023-01-22 21:14:37.458127: step: 790/470, loss: 0.14049938321113586 2023-01-22 21:14:38.192798: step: 792/470, loss: 0.00779561884701252 2023-01-22 21:14:38.940721: step: 794/470, loss: 0.020738592371344566 2023-01-22 21:14:39.835214: step: 796/470, loss: 0.1423885077238083 2023-01-22 21:14:40.716330: step: 798/470, loss: 0.0193779356777668 2023-01-22 21:14:41.440727: step: 800/470, loss: 0.0032281007152050734 2023-01-22 21:14:42.072656: step: 802/470, loss: 0.00042432613554410636 2023-01-22 21:14:42.837544: step: 804/470, loss: 0.004177960567176342 2023-01-22 21:14:43.562400: step: 806/470, loss: 1.2552907466888428 2023-01-22 21:14:44.189144: step: 808/470, loss: 6.404746090993285e-05 2023-01-22 21:14:44.955668: step: 810/470, loss: 0.031237877905368805 2023-01-22 21:14:45.684426: step: 812/470, loss: 0.0003258582728449255 2023-01-22 21:14:46.325842: step: 814/470, loss: 0.007463258691132069 2023-01-22 21:14:47.066510: step: 816/470, loss: 0.01626862958073616 2023-01-22 21:14:47.857932: step: 818/470, loss: 0.02373459003865719 2023-01-22 21:14:48.585848: step: 820/470, loss: 0.0005793635500594974 2023-01-22 21:14:49.298317: step: 822/470, loss: 0.0021050209179520607 2023-01-22 21:14:50.039749: step: 824/470, loss: 0.17716780304908752 2023-01-22 21:14:50.840405: step: 826/470, loss: 0.036166831851005554 2023-01-22 21:14:51.657027: step: 828/470, loss: 0.024912940338253975 2023-01-22 21:14:52.382303: step: 830/470, loss: 0.007581733167171478 2023-01-22 21:14:53.095736: step: 832/470, loss: 0.007379438728094101 2023-01-22 21:14:53.788097: step: 834/470, loss: 0.08958467096090317 2023-01-22 21:14:54.605212: step: 836/470, loss: 0.0027318422216922045 2023-01-22 21:14:55.338015: step: 838/470, loss: 0.06193634122610092 2023-01-22 21:14:56.079860: step: 840/470, loss: 0.01766875572502613 2023-01-22 21:14:56.863044: step: 842/470, loss: 0.009170843288302422 2023-01-22 21:14:57.606824: step: 844/470, loss: 0.00840882770717144 2023-01-22 21:14:58.270057: step: 846/470, loss: 0.0007808993104845285 2023-01-22 21:14:58.971849: step: 848/470, loss: 0.06595759838819504 2023-01-22 21:14:59.730926: step: 850/470, loss: 0.017137767747044563 2023-01-22 21:15:00.420681: step: 852/470, loss: 0.0002639777958393097 2023-01-22 21:15:01.083929: step: 854/470, loss: 0.04219938814640045 2023-01-22 21:15:01.770308: step: 856/470, loss: 0.022198636084794998 2023-01-22 21:15:02.529426: step: 858/470, loss: 0.005382399074733257 2023-01-22 21:15:03.300994: step: 860/470, loss: 0.034826841205358505 2023-01-22 21:15:04.027187: step: 862/470, loss: 0.02772991545498371 2023-01-22 21:15:04.804443: step: 864/470, loss: 0.007337241433560848 2023-01-22 21:15:05.643818: step: 866/470, loss: 0.004963582381606102 2023-01-22 21:15:06.382679: step: 868/470, loss: 0.000665048137307167 2023-01-22 21:15:07.140861: step: 870/470, loss: 0.01284511387348175 2023-01-22 21:15:07.790054: step: 872/470, loss: 0.00016538244381081313 2023-01-22 21:15:08.483473: step: 874/470, loss: 0.023755772039294243 2023-01-22 21:15:09.129781: step: 876/470, loss: 0.0020838105119764805 2023-01-22 21:15:09.807474: step: 878/470, loss: 0.0033732212614268064 2023-01-22 21:15:10.612490: step: 880/470, loss: 0.08116719126701355 2023-01-22 21:15:11.240449: step: 882/470, loss: 0.0009367589373141527 2023-01-22 21:15:12.033461: step: 884/470, loss: 0.05737480893731117 2023-01-22 21:15:12.754057: step: 886/470, loss: 0.00760510703548789 2023-01-22 21:15:13.505955: step: 888/470, loss: 0.0016693559009581804 2023-01-22 21:15:14.232234: step: 890/470, loss: 0.010982673615217209 2023-01-22 21:15:15.021368: step: 892/470, loss: 0.01113244891166687 2023-01-22 21:15:15.752998: step: 894/470, loss: 0.0017129809129983187 2023-01-22 21:15:16.445834: step: 896/470, loss: 0.5727943181991577 2023-01-22 21:15:17.142854: step: 898/470, loss: 0.002152232686057687 2023-01-22 21:15:17.846914: step: 900/470, loss: 0.5070058107376099 2023-01-22 21:15:18.580916: step: 902/470, loss: 0.04603942856192589 2023-01-22 21:15:19.306546: step: 904/470, loss: 0.06946877390146255 2023-01-22 21:15:19.972829: step: 906/470, loss: 0.0038659495767205954 2023-01-22 21:15:20.691914: step: 908/470, loss: 0.0038466486148536205 2023-01-22 21:15:21.420453: step: 910/470, loss: 0.010113026015460491 2023-01-22 21:15:22.158460: step: 912/470, loss: 0.003725921269506216 2023-01-22 21:15:22.935606: step: 914/470, loss: 0.0018076320411637425 2023-01-22 21:15:23.743519: step: 916/470, loss: 0.043223872780799866 2023-01-22 21:15:24.424390: step: 918/470, loss: 0.0003173082077410072 2023-01-22 21:15:25.127745: step: 920/470, loss: 0.0005944594158791006 2023-01-22 21:15:25.899088: step: 922/470, loss: 0.05763893947005272 2023-01-22 21:15:26.651663: step: 924/470, loss: 0.0007061202195473015 2023-01-22 21:15:27.391042: step: 926/470, loss: 0.004733996000140905 2023-01-22 21:15:27.985455: step: 928/470, loss: 0.001815860508941114 2023-01-22 21:15:28.638636: step: 930/470, loss: 0.0008208305225707591 2023-01-22 21:15:29.306076: step: 932/470, loss: 0.01949726790189743 2023-01-22 21:15:30.015149: step: 934/470, loss: 0.010274535976350307 2023-01-22 21:15:30.703433: step: 936/470, loss: 0.013152135536074638 2023-01-22 21:15:31.470717: step: 938/470, loss: 0.023069579154253006 2023-01-22 21:15:32.105083: step: 940/470, loss: 0.006963435094803572 2023-01-22 21:15:32.831565: step: 942/470, loss: 1.931123188114725e-05 ================================================== Loss: 0.045 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29903591778591776, 'r': 0.3319468916598898, 'f1': 0.31463311493662216}, 'combined': 0.2318349267954058, 'epoch': 33} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3428089813247433, 'r': 0.362586422555017, 'f1': 0.3524204480908576}, 'combined': 0.24546697876975157, 'epoch': 33} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2891282818320387, 'r': 0.3220461127806579, 'f1': 0.3047007207098864}, 'combined': 0.2245163205230742, 'epoch': 33} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34980374117622254, 'r': 0.3699847262440815, 'f1': 0.3596113227045278}, 'combined': 0.2504755481524074, 'epoch': 33} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28349719101123594, 'r': 0.3351399430740038, 'f1': 0.3071630434782609}, 'combined': 0.22633066361556065, 'epoch': 33} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3373705612853303, 'r': 0.3818126448392632, 'f1': 0.3582184489245229}, 'combined': 0.249505387310613, 'epoch': 33} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22453703703703703, 'r': 0.3464285714285714, 'f1': 0.2724719101123595}, 'combined': 0.1816479400749063, 'epoch': 33} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26136363636363635, 'r': 0.5, 'f1': 0.34328358208955223}, 'combined': 0.17164179104477612, 'epoch': 33} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42391304347826086, 'r': 0.33620689655172414, 'f1': 0.375}, 'combined': 0.25, 'epoch': 33} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 34 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 21:18:07.769982: step: 2/470, loss: 0.0033803251571953297 2023-01-22 21:18:08.432480: step: 4/470, loss: 0.0006748238811269403 2023-01-22 21:18:09.186847: step: 6/470, loss: 0.005246671847999096 2023-01-22 21:18:09.941529: step: 8/470, loss: 0.051448754966259 2023-01-22 21:18:10.693072: step: 10/470, loss: 0.010346678085625172 2023-01-22 21:18:11.485189: step: 12/470, loss: 0.006094653159379959 2023-01-22 21:18:12.170946: step: 14/470, loss: 5.761585998698138e-05 2023-01-22 21:18:12.890768: step: 16/470, loss: 0.4367545545101166 2023-01-22 21:18:13.668284: step: 18/470, loss: 0.026379667222499847 2023-01-22 21:18:14.486168: step: 20/470, loss: 0.0011629628716036677 2023-01-22 21:18:15.348890: step: 22/470, loss: 0.009967586025595665 2023-01-22 21:18:16.093470: step: 24/470, loss: 0.5176779627799988 2023-01-22 21:18:16.815416: step: 26/470, loss: 0.06595677137374878 2023-01-22 21:18:17.579924: step: 28/470, loss: 0.010950235649943352 2023-01-22 21:18:18.280314: step: 30/470, loss: 0.0005241360631771386 2023-01-22 21:18:19.000884: step: 32/470, loss: 0.007944729179143906 2023-01-22 21:18:19.667307: step: 34/470, loss: 0.0030253706499934196 2023-01-22 21:18:20.351378: step: 36/470, loss: 0.0016459508333355188 2023-01-22 21:18:21.098273: step: 38/470, loss: 0.017528338357806206 2023-01-22 21:18:21.899893: step: 40/470, loss: 0.005726838018745184 2023-01-22 21:18:22.676547: step: 42/470, loss: 0.0029343708883970976 2023-01-22 21:18:23.430761: step: 44/470, loss: 0.0019074846059083939 2023-01-22 21:18:24.221001: step: 46/470, loss: 0.04953285679221153 2023-01-22 21:18:24.949358: step: 48/470, loss: 0.013332260772585869 2023-01-22 21:18:25.669379: step: 50/470, loss: 0.00047797494335100055 2023-01-22 21:18:26.394844: step: 52/470, loss: 0.004910385701805353 2023-01-22 21:18:27.106833: step: 54/470, loss: 0.001362175797112286 2023-01-22 21:18:27.859103: step: 56/470, loss: 0.00200113607570529 2023-01-22 21:18:28.561399: step: 58/470, loss: 0.009997074492275715 2023-01-22 21:18:29.194806: step: 60/470, loss: 9.74019494606182e-05 2023-01-22 21:18:29.909309: step: 62/470, loss: 0.02216336689889431 2023-01-22 21:18:30.674818: step: 64/470, loss: 0.003930054139345884 2023-01-22 21:18:31.455210: step: 66/470, loss: 0.00021434202790260315 2023-01-22 21:18:32.185891: step: 68/470, loss: 0.00011843130050692707 2023-01-22 21:18:32.926242: step: 70/470, loss: 0.044022075831890106 2023-01-22 21:18:33.688408: step: 72/470, loss: 0.060967884957790375 2023-01-22 21:18:34.455152: step: 74/470, loss: 0.0004368863010313362 2023-01-22 21:18:35.165435: step: 76/470, loss: 0.017974242568016052 2023-01-22 21:18:35.847166: step: 78/470, loss: 0.004248203709721565 2023-01-22 21:18:36.532812: step: 80/470, loss: 0.00158304488286376 2023-01-22 21:18:37.226076: step: 82/470, loss: 0.007524224929511547 2023-01-22 21:18:37.881636: step: 84/470, loss: 0.005340985022485256 2023-01-22 21:18:38.556606: step: 86/470, loss: 0.029072092846035957 2023-01-22 21:18:39.298469: step: 88/470, loss: 0.023193301633000374 2023-01-22 21:18:40.026201: step: 90/470, loss: 9.126600343734026e-05 2023-01-22 21:18:40.794824: step: 92/470, loss: 0.0002512194332666695 2023-01-22 21:18:41.495613: step: 94/470, loss: 0.4563930332660675 2023-01-22 21:18:42.246189: step: 96/470, loss: 0.16592247784137726 2023-01-22 21:18:43.042392: step: 98/470, loss: 0.013607371598482132 2023-01-22 21:18:43.715414: step: 100/470, loss: 0.006993814371526241 2023-01-22 21:18:44.393920: step: 102/470, loss: 0.005480475723743439 2023-01-22 21:18:45.132289: step: 104/470, loss: 0.0007254070951603353 2023-01-22 21:18:45.844538: step: 106/470, loss: 0.0031888075172901154 2023-01-22 21:18:46.567482: step: 108/470, loss: 0.0031884340569376945 2023-01-22 21:18:47.232072: step: 110/470, loss: 0.0006328716408461332 2023-01-22 21:18:48.020018: step: 112/470, loss: 0.012160047888755798 2023-01-22 21:18:48.749042: step: 114/470, loss: 0.004218654707074165 2023-01-22 21:18:49.468662: step: 116/470, loss: 0.001394663704559207 2023-01-22 21:18:50.244581: step: 118/470, loss: 0.002457190537825227 2023-01-22 21:18:50.960266: step: 120/470, loss: 0.14293168485164642 2023-01-22 21:18:51.655153: step: 122/470, loss: 0.047695934772491455 2023-01-22 21:18:52.402492: step: 124/470, loss: 0.003226999891921878 2023-01-22 21:18:53.076460: step: 126/470, loss: 0.001519688288681209 2023-01-22 21:18:53.808297: step: 128/470, loss: 0.004431163426488638 2023-01-22 21:18:54.556428: step: 130/470, loss: 0.004296420607715845 2023-01-22 21:18:55.206519: step: 132/470, loss: 0.0012974428245797753 2023-01-22 21:18:55.923069: step: 134/470, loss: 0.027212653309106827 2023-01-22 21:18:56.607689: step: 136/470, loss: 0.02067440375685692 2023-01-22 21:18:57.302789: step: 138/470, loss: 0.0018030147766694427 2023-01-22 21:18:58.074618: step: 140/470, loss: 0.5232383608818054 2023-01-22 21:18:58.825169: step: 142/470, loss: 0.002090280409902334 2023-01-22 21:18:59.660478: step: 144/470, loss: 0.03019622713327408 2023-01-22 21:19:00.441926: step: 146/470, loss: 0.06532511860132217 2023-01-22 21:19:01.169448: step: 148/470, loss: 0.0835709348320961 2023-01-22 21:19:01.881924: step: 150/470, loss: 0.002398706041276455 2023-01-22 21:19:02.554318: step: 152/470, loss: 0.0007716089021414518 2023-01-22 21:19:03.258119: step: 154/470, loss: 0.04274653270840645 2023-01-22 21:19:03.973362: step: 156/470, loss: 0.027724282816052437 2023-01-22 21:19:04.705297: step: 158/470, loss: 0.0108801806345582 2023-01-22 21:19:05.365645: step: 160/470, loss: 0.0027062701992690563 2023-01-22 21:19:06.083749: step: 162/470, loss: 0.001326837227679789 2023-01-22 21:19:06.881155: step: 164/470, loss: 0.000980255426838994 2023-01-22 21:19:07.556673: step: 166/470, loss: 0.004368356894701719 2023-01-22 21:19:08.308214: step: 168/470, loss: 0.0013895826414227486 2023-01-22 21:19:09.012185: step: 170/470, loss: 0.002898984821513295 2023-01-22 21:19:09.762928: step: 172/470, loss: 0.06322634965181351 2023-01-22 21:19:10.423620: step: 174/470, loss: 0.00011356957111274824 2023-01-22 21:19:11.165174: step: 176/470, loss: 0.018166067078709602 2023-01-22 21:19:11.875356: step: 178/470, loss: 0.011182754300534725 2023-01-22 21:19:12.550191: step: 180/470, loss: 0.01945425756275654 2023-01-22 21:19:13.317590: step: 182/470, loss: 0.004951994400471449 2023-01-22 21:19:14.082159: step: 184/470, loss: 0.27803167700767517 2023-01-22 21:19:14.799292: step: 186/470, loss: 0.014578554779291153 2023-01-22 21:19:15.557007: step: 188/470, loss: 0.026163099333643913 2023-01-22 21:19:16.300816: step: 190/470, loss: 0.016671251505613327 2023-01-22 21:19:17.001401: step: 192/470, loss: 0.0024631675332784653 2023-01-22 21:19:17.678741: step: 194/470, loss: 0.017945973202586174 2023-01-22 21:19:18.447391: step: 196/470, loss: 0.019402913749217987 2023-01-22 21:19:19.208996: step: 198/470, loss: 0.009785197675228119 2023-01-22 21:19:19.991583: step: 200/470, loss: 0.011927827261388302 2023-01-22 21:19:20.736165: step: 202/470, loss: 0.015688994899392128 2023-01-22 21:19:21.427724: step: 204/470, loss: 0.5554424524307251 2023-01-22 21:19:22.106990: step: 206/470, loss: 0.004341977182775736 2023-01-22 21:19:22.796720: step: 208/470, loss: 2.5874245693557896e-05 2023-01-22 21:19:23.586853: step: 210/470, loss: 0.03483826667070389 2023-01-22 21:19:24.298062: step: 212/470, loss: 0.004074705298990011 2023-01-22 21:19:25.012951: step: 214/470, loss: 0.003476059529930353 2023-01-22 21:19:25.755005: step: 216/470, loss: 0.00816492922604084 2023-01-22 21:19:26.443897: step: 218/470, loss: 0.022160356864333153 2023-01-22 21:19:27.186735: step: 220/470, loss: 0.008341852575540543 2023-01-22 21:19:28.011558: step: 222/470, loss: 0.0019108172273263335 2023-01-22 21:19:28.716456: step: 224/470, loss: 1.008707046508789 2023-01-22 21:19:29.417185: step: 226/470, loss: 0.00015085958875715733 2023-01-22 21:19:30.163668: step: 228/470, loss: 0.004038154147565365 2023-01-22 21:19:30.965638: step: 230/470, loss: 0.005438428372144699 2023-01-22 21:19:31.711688: step: 232/470, loss: 0.0011504045687615871 2023-01-22 21:19:32.424165: step: 234/470, loss: 0.02474108338356018 2023-01-22 21:19:33.189157: step: 236/470, loss: 0.04079394042491913 2023-01-22 21:19:33.944246: step: 238/470, loss: 0.008551133796572685 2023-01-22 21:19:34.620701: step: 240/470, loss: 0.00603491673246026 2023-01-22 21:19:35.335489: step: 242/470, loss: 0.004793131723999977 2023-01-22 21:19:36.163242: step: 244/470, loss: 0.0072413296438753605 2023-01-22 21:19:36.834667: step: 246/470, loss: 0.023858316242694855 2023-01-22 21:19:37.546370: step: 248/470, loss: 0.008859733119606972 2023-01-22 21:19:38.210597: step: 250/470, loss: 0.006357423961162567 2023-01-22 21:19:38.899801: step: 252/470, loss: 0.024346143007278442 2023-01-22 21:19:39.589196: step: 254/470, loss: 0.018336040899157524 2023-01-22 21:19:40.345709: step: 256/470, loss: 0.010750774294137955 2023-01-22 21:19:41.040766: step: 258/470, loss: 0.0013539177598431706 2023-01-22 21:19:41.848985: step: 260/470, loss: 0.06712424010038376 2023-01-22 21:19:42.564612: step: 262/470, loss: 0.015392746776342392 2023-01-22 21:19:43.389769: step: 264/470, loss: 0.011326344683766365 2023-01-22 21:19:44.111437: step: 266/470, loss: 0.03263202682137489 2023-01-22 21:19:44.848821: step: 268/470, loss: 0.019025059416890144 2023-01-22 21:19:45.585227: step: 270/470, loss: 0.004486290272325277 2023-01-22 21:19:46.352692: step: 272/470, loss: 0.028263265267014503 2023-01-22 21:19:47.040884: step: 274/470, loss: 0.06688546389341354 2023-01-22 21:19:47.815614: step: 276/470, loss: 0.0005725919036194682 2023-01-22 21:19:48.500408: step: 278/470, loss: 0.0008113943040370941 2023-01-22 21:19:49.196397: step: 280/470, loss: 0.00557939475402236 2023-01-22 21:19:50.009157: step: 282/470, loss: 0.026322869583964348 2023-01-22 21:19:50.732247: step: 284/470, loss: 0.0053300210274755955 2023-01-22 21:19:51.513653: step: 286/470, loss: 0.006512013729661703 2023-01-22 21:19:52.182113: step: 288/470, loss: 0.006800213363021612 2023-01-22 21:19:52.871972: step: 290/470, loss: 0.006016398314386606 2023-01-22 21:19:53.649297: step: 292/470, loss: 0.009520920924842358 2023-01-22 21:19:54.439457: step: 294/470, loss: 0.0036107206251472235 2023-01-22 21:19:55.198911: step: 296/470, loss: 0.020766550675034523 2023-01-22 21:19:55.943314: step: 298/470, loss: 0.005723374895751476 2023-01-22 21:19:56.696762: step: 300/470, loss: 0.03173263370990753 2023-01-22 21:19:57.462770: step: 302/470, loss: 0.029550369828939438 2023-01-22 21:19:58.169946: step: 304/470, loss: 0.0009951787069439888 2023-01-22 21:19:58.938869: step: 306/470, loss: 0.04182833433151245 2023-01-22 21:19:59.916306: step: 308/470, loss: 0.03832540661096573 2023-01-22 21:20:00.686850: step: 310/470, loss: 0.11555290967226028 2023-01-22 21:20:01.467941: step: 312/470, loss: 0.02390245907008648 2023-01-22 21:20:02.181818: step: 314/470, loss: 0.0017948574386537075 2023-01-22 21:20:02.949468: step: 316/470, loss: 0.0034852821845561266 2023-01-22 21:20:03.650260: step: 318/470, loss: 0.25961652398109436 2023-01-22 21:20:04.418048: step: 320/470, loss: 0.052189331501722336 2023-01-22 21:20:05.192621: step: 322/470, loss: 0.01036821585148573 2023-01-22 21:20:05.900040: step: 324/470, loss: 0.016944773495197296 2023-01-22 21:20:06.568004: step: 326/470, loss: 1.836652882047929e-05 2023-01-22 21:20:07.299357: step: 328/470, loss: 0.0037974936421960592 2023-01-22 21:20:07.947476: step: 330/470, loss: 0.00011987396283075213 2023-01-22 21:20:08.628225: step: 332/470, loss: 0.007104361429810524 2023-01-22 21:20:09.298692: step: 334/470, loss: 0.009579218924045563 2023-01-22 21:20:10.040739: step: 336/470, loss: 0.0007889914559200406 2023-01-22 21:20:10.724686: step: 338/470, loss: 0.003870560321956873 2023-01-22 21:20:11.481484: step: 340/470, loss: 0.0053961933590471745 2023-01-22 21:20:12.166935: step: 342/470, loss: 0.0044841766357421875 2023-01-22 21:20:12.840708: step: 344/470, loss: 4.063077722094022e-05 2023-01-22 21:20:13.553320: step: 346/470, loss: 0.001006243284791708 2023-01-22 21:20:14.216202: step: 348/470, loss: 0.007106869947165251 2023-01-22 21:20:14.861308: step: 350/470, loss: 0.0015916344709694386 2023-01-22 21:20:15.637486: step: 352/470, loss: 0.027905944734811783 2023-01-22 21:20:16.371644: step: 354/470, loss: 0.003524521365761757 2023-01-22 21:20:17.135687: step: 356/470, loss: 0.0009450623765587807 2023-01-22 21:20:17.832002: step: 358/470, loss: 0.00697721540927887 2023-01-22 21:20:18.540972: step: 360/470, loss: 0.0005012876354157925 2023-01-22 21:20:19.189892: step: 362/470, loss: 4.1443989175604656e-05 2023-01-22 21:20:19.884526: step: 364/470, loss: 0.04660176858305931 2023-01-22 21:20:20.556476: step: 366/470, loss: 2.2212430849322118e-05 2023-01-22 21:20:21.277871: step: 368/470, loss: 0.0106153329834342 2023-01-22 21:20:21.953436: step: 370/470, loss: 0.0111685274168849 2023-01-22 21:20:22.714226: step: 372/470, loss: 0.0011537930695340037 2023-01-22 21:20:23.386476: step: 374/470, loss: 0.06961429864168167 2023-01-22 21:20:24.165944: step: 376/470, loss: 0.0024143445771187544 2023-01-22 21:20:24.892268: step: 378/470, loss: 0.009551659226417542 2023-01-22 21:20:25.607956: step: 380/470, loss: 1.0005992650985718 2023-01-22 21:20:26.419729: step: 382/470, loss: 0.002678003627806902 2023-01-22 21:20:27.227643: step: 384/470, loss: 0.049086350947618484 2023-01-22 21:20:28.048425: step: 386/470, loss: 0.010009855031967163 2023-01-22 21:20:28.820716: step: 388/470, loss: 0.0012464200844988227 2023-01-22 21:20:29.532636: step: 390/470, loss: 4.9415495595894754e-05 2023-01-22 21:20:30.192415: step: 392/470, loss: 0.003669227007776499 2023-01-22 21:20:30.900034: step: 394/470, loss: 0.003987874835729599 2023-01-22 21:20:31.607919: step: 396/470, loss: 0.0017396406037732959 2023-01-22 21:20:32.305446: step: 398/470, loss: 0.025890368968248367 2023-01-22 21:20:33.018572: step: 400/470, loss: 0.01730758510529995 2023-01-22 21:20:33.802827: step: 402/470, loss: 0.004256324376910925 2023-01-22 21:20:34.529775: step: 404/470, loss: 0.24290981888771057 2023-01-22 21:20:35.184952: step: 406/470, loss: 3.430567085160874e-05 2023-01-22 21:20:35.910609: step: 408/470, loss: 0.01224263571202755 2023-01-22 21:20:36.657207: step: 410/470, loss: 0.0011447453871369362 2023-01-22 21:20:37.376969: step: 412/470, loss: 0.024092786014080048 2023-01-22 21:20:38.125141: step: 414/470, loss: 0.0011776899918913841 2023-01-22 21:20:38.868282: step: 416/470, loss: 0.006173381116241217 2023-01-22 21:20:39.589793: step: 418/470, loss: 0.007338505703955889 2023-01-22 21:20:40.291244: step: 420/470, loss: 0.002101232297718525 2023-01-22 21:20:40.998766: step: 422/470, loss: 0.0022618654184043407 2023-01-22 21:20:41.781010: step: 424/470, loss: 0.0012441343860700727 2023-01-22 21:20:42.676642: step: 426/470, loss: 0.0695013627409935 2023-01-22 21:20:43.403206: step: 428/470, loss: 0.23386645317077637 2023-01-22 21:20:44.172506: step: 430/470, loss: 0.0643640011548996 2023-01-22 21:20:44.874948: step: 432/470, loss: 0.00014748272951692343 2023-01-22 21:20:45.590945: step: 434/470, loss: 0.055859826505184174 2023-01-22 21:20:46.317776: step: 436/470, loss: 0.000146715174196288 2023-01-22 21:20:47.026767: step: 438/470, loss: 0.005512524861842394 2023-01-22 21:20:47.730535: step: 440/470, loss: 0.04210149496793747 2023-01-22 21:20:48.468662: step: 442/470, loss: 0.0007326016202569008 2023-01-22 21:20:49.302965: step: 444/470, loss: 0.017042549327015877 2023-01-22 21:20:50.048955: step: 446/470, loss: 0.0027442320715636015 2023-01-22 21:20:50.757887: step: 448/470, loss: 0.000428998377174139 2023-01-22 21:20:51.476427: step: 450/470, loss: 0.009314804337918758 2023-01-22 21:20:52.193746: step: 452/470, loss: 0.0005936333909630775 2023-01-22 21:20:52.954579: step: 454/470, loss: 0.018571410328149796 2023-01-22 21:20:53.648653: step: 456/470, loss: 0.0180160254240036 2023-01-22 21:20:54.344172: step: 458/470, loss: 0.02957836724817753 2023-01-22 21:20:55.027954: step: 460/470, loss: 0.002465310040861368 2023-01-22 21:20:55.742745: step: 462/470, loss: 0.0018986280774697661 2023-01-22 21:20:56.493541: step: 464/470, loss: 0.06108655408024788 2023-01-22 21:20:57.200637: step: 466/470, loss: 0.0003646984987426549 2023-01-22 21:20:57.924148: step: 468/470, loss: 0.017292622476816177 2023-01-22 21:20:58.628514: step: 470/470, loss: 0.01416701264679432 2023-01-22 21:20:59.352341: step: 472/470, loss: 0.00011536870442796499 2023-01-22 21:21:00.082677: step: 474/470, loss: 0.002867184579372406 2023-01-22 21:21:00.930160: step: 476/470, loss: 0.03824557363986969 2023-01-22 21:21:01.637441: step: 478/470, loss: 0.001631454681046307 2023-01-22 21:21:02.355617: step: 480/470, loss: 0.4666403532028198 2023-01-22 21:21:03.114169: step: 482/470, loss: 0.0006429323111660779 2023-01-22 21:21:03.822262: step: 484/470, loss: 0.07074841111898422 2023-01-22 21:21:04.492751: step: 486/470, loss: 0.00920712761580944 2023-01-22 21:21:05.210467: step: 488/470, loss: 0.027392836287617683 2023-01-22 21:21:05.926468: step: 490/470, loss: 0.0031878724694252014 2023-01-22 21:21:06.600298: step: 492/470, loss: 0.009520125575363636 2023-01-22 21:21:07.328202: step: 494/470, loss: 0.014723233878612518 2023-01-22 21:21:08.135638: step: 496/470, loss: 0.01308779139071703 2023-01-22 21:21:08.860579: step: 498/470, loss: 0.017647597938776016 2023-01-22 21:21:09.618887: step: 500/470, loss: 0.0010442689526826143 2023-01-22 21:21:10.354862: step: 502/470, loss: 0.0007168474257923663 2023-01-22 21:21:11.114013: step: 504/470, loss: 0.0017317109741270542 2023-01-22 21:21:11.828215: step: 506/470, loss: 0.0012164206709712744 2023-01-22 21:21:12.590934: step: 508/470, loss: 0.0004420246696099639 2023-01-22 21:21:13.276428: step: 510/470, loss: 0.0015642930520698428 2023-01-22 21:21:13.983043: step: 512/470, loss: 0.020066358149051666 2023-01-22 21:21:14.831371: step: 514/470, loss: 0.3007548451423645 2023-01-22 21:21:15.585120: step: 516/470, loss: 0.4788682460784912 2023-01-22 21:21:16.331543: step: 518/470, loss: 0.011519278399646282 2023-01-22 21:21:17.044322: step: 520/470, loss: 0.01719200238585472 2023-01-22 21:21:17.695323: step: 522/470, loss: 0.01048339158296585 2023-01-22 21:21:18.409790: step: 524/470, loss: 0.020729506388306618 2023-01-22 21:21:19.069724: step: 526/470, loss: 0.020677419379353523 2023-01-22 21:21:19.801489: step: 528/470, loss: 0.18149332702159882 2023-01-22 21:21:20.537940: step: 530/470, loss: 0.013467920944094658 2023-01-22 21:21:21.274826: step: 532/470, loss: 0.02106170728802681 2023-01-22 21:21:21.979909: step: 534/470, loss: 0.0007394176791422069 2023-01-22 21:21:22.713007: step: 536/470, loss: 0.04561655968427658 2023-01-22 21:21:23.526712: step: 538/470, loss: 0.01662643626332283 2023-01-22 21:21:24.255918: step: 540/470, loss: 0.001894438057206571 2023-01-22 21:21:24.921680: step: 542/470, loss: 0.00037416958366520703 2023-01-22 21:21:25.680114: step: 544/470, loss: 0.0077200643718242645 2023-01-22 21:21:26.409714: step: 546/470, loss: 0.010255703702569008 2023-01-22 21:21:27.148904: step: 548/470, loss: 0.053163353353738785 2023-01-22 21:21:27.894238: step: 550/470, loss: 0.039663802832365036 2023-01-22 21:21:28.699261: step: 552/470, loss: 0.00014281453331932425 2023-01-22 21:21:29.388737: step: 554/470, loss: 0.12468361854553223 2023-01-22 21:21:30.178234: step: 556/470, loss: 0.0031782027799636126 2023-01-22 21:21:30.960487: step: 558/470, loss: 0.25668269395828247 2023-01-22 21:21:31.776833: step: 560/470, loss: 0.005150767974555492 2023-01-22 21:21:32.449307: step: 562/470, loss: 0.0024627491366118193 2023-01-22 21:21:33.158205: step: 564/470, loss: 0.005648725666105747 2023-01-22 21:21:33.849248: step: 566/470, loss: 0.014787226915359497 2023-01-22 21:21:34.664150: step: 568/470, loss: 0.18614830076694489 2023-01-22 21:21:35.345199: step: 570/470, loss: 0.0038616762030869722 2023-01-22 21:21:36.054849: step: 572/470, loss: 0.003033594461157918 2023-01-22 21:21:36.744089: step: 574/470, loss: 0.03422601893544197 2023-01-22 21:21:37.559474: step: 576/470, loss: 0.008382627740502357 2023-01-22 21:21:38.292211: step: 578/470, loss: 0.00314133008942008 2023-01-22 21:21:38.979224: step: 580/470, loss: 0.0034324736334383488 2023-01-22 21:21:39.663727: step: 582/470, loss: 0.002011174103245139 2023-01-22 21:21:40.363571: step: 584/470, loss: 0.07423926889896393 2023-01-22 21:21:41.065365: step: 586/470, loss: 0.00035503433900885284 2023-01-22 21:21:41.792870: step: 588/470, loss: 0.015882398933172226 2023-01-22 21:21:42.548899: step: 590/470, loss: 0.03521262854337692 2023-01-22 21:21:43.245862: step: 592/470, loss: 0.06739047169685364 2023-01-22 21:21:43.932025: step: 594/470, loss: 0.001243374776095152 2023-01-22 21:21:44.702963: step: 596/470, loss: 0.0024422684218734503 2023-01-22 21:21:45.471381: step: 598/470, loss: 0.009328119456768036 2023-01-22 21:21:46.183542: step: 600/470, loss: 6.515645509352908e-05 2023-01-22 21:21:46.796080: step: 602/470, loss: 0.009061809629201889 2023-01-22 21:21:47.420267: step: 604/470, loss: 0.0029441039077937603 2023-01-22 21:21:48.164299: step: 606/470, loss: 0.030784372240304947 2023-01-22 21:21:48.880793: step: 608/470, loss: 0.009940296411514282 2023-01-22 21:21:49.599077: step: 610/470, loss: 0.010526066645979881 2023-01-22 21:21:50.421505: step: 612/470, loss: 0.0019004541682079434 2023-01-22 21:21:51.131995: step: 614/470, loss: 0.0026740499306470156 2023-01-22 21:21:51.893684: step: 616/470, loss: 0.0034406818449497223 2023-01-22 21:21:52.653367: step: 618/470, loss: 0.00043804876622743905 2023-01-22 21:21:53.373904: step: 620/470, loss: 0.017875712364912033 2023-01-22 21:21:54.055584: step: 622/470, loss: 0.0026596703100949526 2023-01-22 21:21:54.721445: step: 624/470, loss: 0.0004920915816910565 2023-01-22 21:21:55.437981: step: 626/470, loss: 0.016695676371455193 2023-01-22 21:21:56.209112: step: 628/470, loss: 0.0026145961601287127 2023-01-22 21:21:57.082331: step: 630/470, loss: 0.026421984657645226 2023-01-22 21:21:57.773458: step: 632/470, loss: 0.0005286968080326915 2023-01-22 21:21:58.581520: step: 634/470, loss: 0.06427496671676636 2023-01-22 21:21:59.276337: step: 636/470, loss: 0.0009096733992919326 2023-01-22 21:22:00.039335: step: 638/470, loss: 0.021214094012975693 2023-01-22 21:22:00.704935: step: 640/470, loss: 0.004757567774504423 2023-01-22 21:22:01.467324: step: 642/470, loss: 0.028353553265333176 2023-01-22 21:22:02.355597: step: 644/470, loss: 0.024054067209362984 2023-01-22 21:22:03.171990: step: 646/470, loss: 2.8277341698412783e-05 2023-01-22 21:22:03.906527: step: 648/470, loss: 0.01686178334057331 2023-01-22 21:22:04.576632: step: 650/470, loss: 3.088486846536398e-05 2023-01-22 21:22:05.382758: step: 652/470, loss: 0.0076260752975940704 2023-01-22 21:22:06.107669: step: 654/470, loss: 0.0021921610459685326 2023-01-22 21:22:06.878601: step: 656/470, loss: 0.015146718360483646 2023-01-22 21:22:07.649675: step: 658/470, loss: 1.6248441934585571 2023-01-22 21:22:08.321442: step: 660/470, loss: 0.0009182182257063687 2023-01-22 21:22:09.023505: step: 662/470, loss: 0.022168146446347237 2023-01-22 21:22:09.789750: step: 664/470, loss: 0.009737711399793625 2023-01-22 21:22:10.480248: step: 666/470, loss: 0.05892786756157875 2023-01-22 21:22:11.201558: step: 668/470, loss: 0.04346398264169693 2023-01-22 21:22:11.974401: step: 670/470, loss: 0.028981253504753113 2023-01-22 21:22:12.699239: step: 672/470, loss: 0.00941953994333744 2023-01-22 21:22:13.463826: step: 674/470, loss: 0.07243026047945023 2023-01-22 21:22:14.203749: step: 676/470, loss: 0.02073572389781475 2023-01-22 21:22:14.964439: step: 678/470, loss: 0.0022810224909335375 2023-01-22 21:22:15.659330: step: 680/470, loss: 0.09601109474897385 2023-01-22 21:22:16.354899: step: 682/470, loss: 0.003557687159627676 2023-01-22 21:22:17.051032: step: 684/470, loss: 0.0015952467219904065 2023-01-22 21:22:17.790405: step: 686/470, loss: 0.0002099236153298989 2023-01-22 21:22:18.515992: step: 688/470, loss: 0.376717746257782 2023-01-22 21:22:19.315004: step: 690/470, loss: 0.004120633937418461 2023-01-22 21:22:20.100340: step: 692/470, loss: 0.0018780836835503578 2023-01-22 21:22:20.968884: step: 694/470, loss: 0.5812046527862549 2023-01-22 21:22:21.692281: step: 696/470, loss: 0.002286061178892851 2023-01-22 21:22:22.450974: step: 698/470, loss: 0.013838349841535091 2023-01-22 21:22:23.188253: step: 700/470, loss: 0.0184122733771801 2023-01-22 21:22:23.861708: step: 702/470, loss: 0.0008420947706326842 2023-01-22 21:22:24.528513: step: 704/470, loss: 0.08517462015151978 2023-01-22 21:22:25.226723: step: 706/470, loss: 0.0022694526705890894 2023-01-22 21:22:26.081632: step: 708/470, loss: 0.029612381011247635 2023-01-22 21:22:26.752848: step: 710/470, loss: 0.0010812204563990235 2023-01-22 21:22:27.547808: step: 712/470, loss: 0.0535753071308136 2023-01-22 21:22:28.290003: step: 714/470, loss: 0.008847307413816452 2023-01-22 21:22:29.058505: step: 716/470, loss: 0.03432611748576164 2023-01-22 21:22:29.767667: step: 718/470, loss: 0.006384172476828098 2023-01-22 21:22:30.528659: step: 720/470, loss: 0.004302928224205971 2023-01-22 21:22:31.257054: step: 722/470, loss: 0.01254085823893547 2023-01-22 21:22:32.044353: step: 724/470, loss: 0.007098275702446699 2023-01-22 21:22:32.759634: step: 726/470, loss: 0.018687281757593155 2023-01-22 21:22:33.491143: step: 728/470, loss: 0.08355188369750977 2023-01-22 21:22:34.204068: step: 730/470, loss: 0.00029057872598059475 2023-01-22 21:22:34.962337: step: 732/470, loss: 0.06628356873989105 2023-01-22 21:22:35.644561: step: 734/470, loss: 0.04160122945904732 2023-01-22 21:22:36.440070: step: 736/470, loss: 0.005398217123001814 2023-01-22 21:22:37.229078: step: 738/470, loss: 0.00326385535299778 2023-01-22 21:22:38.032477: step: 740/470, loss: 0.10454016923904419 2023-01-22 21:22:38.768669: step: 742/470, loss: 0.00869603082537651 2023-01-22 21:22:39.533402: step: 744/470, loss: 0.01859329640865326 2023-01-22 21:22:40.273455: step: 746/470, loss: 0.0023927935399115086 2023-01-22 21:22:41.034758: step: 748/470, loss: 0.01185494102537632 2023-01-22 21:22:41.686041: step: 750/470, loss: 0.0039005994331091642 2023-01-22 21:22:42.327691: step: 752/470, loss: 0.001562373130582273 2023-01-22 21:22:43.023696: step: 754/470, loss: 0.14783407747745514 2023-01-22 21:22:43.832151: step: 756/470, loss: 0.0047768522053956985 2023-01-22 21:22:44.598747: step: 758/470, loss: 0.005835649557411671 2023-01-22 21:22:45.369592: step: 760/470, loss: 0.06568383425474167 2023-01-22 21:22:46.148650: step: 762/470, loss: 0.17401570081710815 2023-01-22 21:22:46.873203: step: 764/470, loss: 0.0028607286512851715 2023-01-22 21:22:47.547591: step: 766/470, loss: 0.03020673617720604 2023-01-22 21:22:48.296900: step: 768/470, loss: 0.2988532483577728 2023-01-22 21:22:49.103436: step: 770/470, loss: 0.2755778729915619 2023-01-22 21:22:49.839953: step: 772/470, loss: 0.07244252413511276 2023-01-22 21:22:50.614021: step: 774/470, loss: 0.030048388987779617 2023-01-22 21:22:51.313269: step: 776/470, loss: 0.18164348602294922 2023-01-22 21:22:52.064807: step: 778/470, loss: 0.001993674086406827 2023-01-22 21:22:52.847667: step: 780/470, loss: 0.003424519905820489 2023-01-22 21:22:53.759067: step: 782/470, loss: 0.01581845059990883 2023-01-22 21:22:54.527659: step: 784/470, loss: 0.03147071599960327 2023-01-22 21:22:55.242498: step: 786/470, loss: 0.016147736459970474 2023-01-22 21:22:55.963630: step: 788/470, loss: 0.0013450286351144314 2023-01-22 21:22:56.669536: step: 790/470, loss: 0.013478526845574379 2023-01-22 21:22:57.394300: step: 792/470, loss: 0.003012130269780755 2023-01-22 21:22:58.143786: step: 794/470, loss: 0.010243113152682781 2023-01-22 21:22:58.881587: step: 796/470, loss: 0.030851509422063828 2023-01-22 21:22:59.648733: step: 798/470, loss: 0.015330376103520393 2023-01-22 21:23:00.417336: step: 800/470, loss: 0.0013428201200440526 2023-01-22 21:23:01.149235: step: 802/470, loss: 0.0035743422340601683 2023-01-22 21:23:01.887886: step: 804/470, loss: 0.05729028955101967 2023-01-22 21:23:02.663988: step: 806/470, loss: 0.002276189159601927 2023-01-22 21:23:03.367668: step: 808/470, loss: 0.002368086948990822 2023-01-22 21:23:04.145591: step: 810/470, loss: 0.06536681950092316 2023-01-22 21:23:04.857761: step: 812/470, loss: 0.0029183574952185154 2023-01-22 21:23:05.606112: step: 814/470, loss: 0.0018473371164873242 2023-01-22 21:23:06.370508: step: 816/470, loss: 0.013988809660077095 2023-01-22 21:23:07.234461: step: 818/470, loss: 0.021747639402747154 2023-01-22 21:23:07.918409: step: 820/470, loss: 0.0005320486379787326 2023-01-22 21:23:08.644257: step: 822/470, loss: 0.026045726612210274 2023-01-22 21:23:09.313576: step: 824/470, loss: 0.009219018742442131 2023-01-22 21:23:10.070469: step: 826/470, loss: 0.0010474611772224307 2023-01-22 21:23:10.811986: step: 828/470, loss: 0.00877501629292965 2023-01-22 21:23:11.515825: step: 830/470, loss: 0.0060637411661446095 2023-01-22 21:23:12.280124: step: 832/470, loss: 0.02464357018470764 2023-01-22 21:23:13.067408: step: 834/470, loss: 0.00776535551995039 2023-01-22 21:23:13.751571: step: 836/470, loss: 0.053323835134506226 2023-01-22 21:23:14.461196: step: 838/470, loss: 0.0020373347215354443 2023-01-22 21:23:15.235365: step: 840/470, loss: 0.3750033974647522 2023-01-22 21:23:15.949634: step: 842/470, loss: 0.042009808123111725 2023-01-22 21:23:16.653868: step: 844/470, loss: 0.02685542218387127 2023-01-22 21:23:17.334301: step: 846/470, loss: 0.0003366958990227431 2023-01-22 21:23:18.033927: step: 848/470, loss: 0.036231983453035355 2023-01-22 21:23:18.737785: step: 850/470, loss: 0.01380055584013462 2023-01-22 21:23:19.549352: step: 852/470, loss: 0.04030189290642738 2023-01-22 21:23:20.272407: step: 854/470, loss: 0.007406915538012981 2023-01-22 21:23:20.941524: step: 856/470, loss: 0.0028321263380348682 2023-01-22 21:23:21.660736: step: 858/470, loss: 0.0024023745208978653 2023-01-22 21:23:22.350270: step: 860/470, loss: 0.045990705490112305 2023-01-22 21:23:23.081351: step: 862/470, loss: 0.0008657873258925974 2023-01-22 21:23:23.875306: step: 864/470, loss: 0.03380803018808365 2023-01-22 21:23:24.653011: step: 866/470, loss: 0.016098463907837868 2023-01-22 21:23:25.330324: step: 868/470, loss: 0.41227632761001587 2023-01-22 21:23:26.013170: step: 870/470, loss: 0.0017065646825358272 2023-01-22 21:23:26.795738: step: 872/470, loss: 0.002425319282338023 2023-01-22 21:23:27.548136: step: 874/470, loss: 0.003872190834954381 2023-01-22 21:23:28.266670: step: 876/470, loss: 0.0007468942785635591 2023-01-22 21:23:29.139183: step: 878/470, loss: 0.008748779073357582 2023-01-22 21:23:29.838311: step: 880/470, loss: 0.18740367889404297 2023-01-22 21:23:30.521827: step: 882/470, loss: 0.006937297526746988 2023-01-22 21:23:31.225819: step: 884/470, loss: 0.001644572359509766 2023-01-22 21:23:31.979059: step: 886/470, loss: 0.03953949362039566 2023-01-22 21:23:32.654734: step: 888/470, loss: 0.0010495680617168546 2023-01-22 21:23:33.384030: step: 890/470, loss: 0.004453408066183329 2023-01-22 21:23:34.057986: step: 892/470, loss: 0.03644545376300812 2023-01-22 21:23:34.733510: step: 894/470, loss: 0.0026314761489629745 2023-01-22 21:23:35.474802: step: 896/470, loss: 0.011988071724772453 2023-01-22 21:23:36.246974: step: 898/470, loss: 1.0937950611114502 2023-01-22 21:23:36.972698: step: 900/470, loss: 0.01913582533597946 2023-01-22 21:23:37.740320: step: 902/470, loss: 0.002004404319450259 2023-01-22 21:23:38.490347: step: 904/470, loss: 0.02826942503452301 2023-01-22 21:23:39.171406: step: 906/470, loss: 0.005874123424291611 2023-01-22 21:23:39.872984: step: 908/470, loss: 0.00016057485481724143 2023-01-22 21:23:40.623691: step: 910/470, loss: 0.02106778882443905 2023-01-22 21:23:41.348566: step: 912/470, loss: 0.011593667790293694 2023-01-22 21:23:42.056358: step: 914/470, loss: 0.006512309890240431 2023-01-22 21:23:42.883085: step: 916/470, loss: 0.022902552038431168 2023-01-22 21:23:43.676171: step: 918/470, loss: 0.00646227365359664 2023-01-22 21:23:44.361955: step: 920/470, loss: 0.022378139197826385 2023-01-22 21:23:45.055219: step: 922/470, loss: 0.027927104383707047 2023-01-22 21:23:45.814766: step: 924/470, loss: 0.0021522322203963995 2023-01-22 21:23:46.548094: step: 926/470, loss: 0.013384867459535599 2023-01-22 21:23:47.335812: step: 928/470, loss: 0.001673889346420765 2023-01-22 21:23:48.130248: step: 930/470, loss: 0.3148545026779175 2023-01-22 21:23:48.854052: step: 932/470, loss: 0.03243735432624817 2023-01-22 21:23:49.533152: step: 934/470, loss: 0.008292516693472862 2023-01-22 21:23:50.322369: step: 936/470, loss: 0.00035324000054970384 2023-01-22 21:23:51.067953: step: 938/470, loss: 0.003821393707767129 2023-01-22 21:23:51.816042: step: 940/470, loss: 0.008569333702325821 2023-01-22 21:23:52.525876: step: 942/470, loss: 0.003781524719670415 ================================================== Loss: 0.043 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30695768341998847, 'r': 0.3360808032890576, 'f1': 0.320859752415459}, 'combined': 0.2364229754640224, 'epoch': 34} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3418588143777791, 'r': 0.3763734062139972, 'f1': 0.3582868123227067}, 'combined': 0.24955300360785543, 'epoch': 34} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30097216824941914, 'r': 0.3375228679988742, 'f1': 0.3182013442493859}, 'combined': 0.23446414839428434, 'epoch': 34} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34812443322241643, 'r': 0.37590744087382083, 'f1': 0.36148288350325813}, 'combined': 0.2517791228380903, 'epoch': 34} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2869398730276134, 'r': 0.33975423295869217, 'f1': 0.31112159994653477}, 'combined': 0.22924749469744665, 'epoch': 34} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3366420050582426, 'r': 0.3877856942882449, 'f1': 0.3604085094367959}, 'combined': 0.2510308025927932, 'epoch': 34} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21875, 'r': 0.375, 'f1': 0.27631578947368424}, 'combined': 0.1842105263157895, 'epoch': 34} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.22093023255813954, 'r': 0.41304347826086957, 'f1': 0.2878787878787879}, 'combined': 0.14393939393939395, 'epoch': 34} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.3017241379310345, 'f1': 0.35}, 'combined': 0.2333333333333333, 'epoch': 34} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 35 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 21:26:29.718226: step: 2/470, loss: 0.02259201370179653 2023-01-22 21:26:30.444294: step: 4/470, loss: 0.002634732285514474 2023-01-22 21:26:31.133801: step: 6/470, loss: 0.01444519218057394 2023-01-22 21:26:31.919350: step: 8/470, loss: 0.007645417936146259 2023-01-22 21:26:32.704717: step: 10/470, loss: 6.119604950072244e-05 2023-01-22 21:26:33.411599: step: 12/470, loss: 0.006398671306669712 2023-01-22 21:26:34.174903: step: 14/470, loss: 1.4735318422317505 2023-01-22 21:26:34.939664: step: 16/470, loss: 0.0003182763757649809 2023-01-22 21:26:35.861413: step: 18/470, loss: 0.014096281491219997 2023-01-22 21:26:36.560386: step: 20/470, loss: 0.14310337603092194 2023-01-22 21:26:37.425143: step: 22/470, loss: 0.00020834009046666324 2023-01-22 21:26:38.137287: step: 24/470, loss: 0.0019535699393600225 2023-01-22 21:26:38.909101: step: 26/470, loss: 0.020988579839468002 2023-01-22 21:26:39.663199: step: 28/470, loss: 0.02080502361059189 2023-01-22 21:26:40.364547: step: 30/470, loss: 0.006822787690907717 2023-01-22 21:26:41.133056: step: 32/470, loss: 0.006888087373226881 2023-01-22 21:26:41.821200: step: 34/470, loss: 0.0055128647945821285 2023-01-22 21:26:42.564954: step: 36/470, loss: 0.0021674029994755983 2023-01-22 21:26:43.265792: step: 38/470, loss: 0.03273850306868553 2023-01-22 21:26:44.032729: step: 40/470, loss: 0.03793059661984444 2023-01-22 21:26:45.011484: step: 42/470, loss: 0.0005464658606797457 2023-01-22 21:26:45.889052: step: 44/470, loss: 7.778897997923195e-05 2023-01-22 21:26:46.588751: step: 46/470, loss: 0.006451576016843319 2023-01-22 21:26:47.342873: step: 48/470, loss: 0.040247056633234024 2023-01-22 21:26:48.041077: step: 50/470, loss: 0.0015183590585365891 2023-01-22 21:26:48.736377: step: 52/470, loss: 0.005788735579699278 2023-01-22 21:26:49.463382: step: 54/470, loss: 0.0011430204613134265 2023-01-22 21:26:50.205012: step: 56/470, loss: 0.0012191730784252286 2023-01-22 21:26:50.932729: step: 58/470, loss: 5.7664259657030925e-05 2023-01-22 21:26:51.843392: step: 60/470, loss: 0.010701543651521206 2023-01-22 21:26:52.533025: step: 62/470, loss: 0.00015700850053690374 2023-01-22 21:26:53.266146: step: 64/470, loss: 0.00028906611260026693 2023-01-22 21:26:53.956406: step: 66/470, loss: 0.00823915097862482 2023-01-22 21:26:54.642041: step: 68/470, loss: 0.028155675157904625 2023-01-22 21:26:55.368348: step: 70/470, loss: 0.0030553205870091915 2023-01-22 21:26:56.006028: step: 72/470, loss: 0.003039710223674774 2023-01-22 21:26:56.721520: step: 74/470, loss: 0.001571728615090251 2023-01-22 21:26:57.387127: step: 76/470, loss: 0.5274283289909363 2023-01-22 21:26:58.110381: step: 78/470, loss: 0.04768010228872299 2023-01-22 21:26:58.836578: step: 80/470, loss: 0.00412197969853878 2023-01-22 21:26:59.561995: step: 82/470, loss: 0.1300504207611084 2023-01-22 21:27:00.250119: step: 84/470, loss: 0.0008046081056818366 2023-01-22 21:27:00.975485: step: 86/470, loss: 0.04525003209710121 2023-01-22 21:27:01.704618: step: 88/470, loss: 0.003112942911684513 2023-01-22 21:27:02.432985: step: 90/470, loss: 0.008025175891816616 2023-01-22 21:27:03.087804: step: 92/470, loss: 0.011606319807469845 2023-01-22 21:27:03.785846: step: 94/470, loss: 0.005697671324014664 2023-01-22 21:27:04.512712: step: 96/470, loss: 0.00157053186558187 2023-01-22 21:27:05.202158: step: 98/470, loss: 0.0029143900610506535 2023-01-22 21:27:05.929278: step: 100/470, loss: 0.0015309115406125784 2023-01-22 21:27:06.661835: step: 102/470, loss: 0.0018614137079566717 2023-01-22 21:27:07.399880: step: 104/470, loss: 0.007174816448241472 2023-01-22 21:27:08.136544: step: 106/470, loss: 0.037225011736154556 2023-01-22 21:27:08.885988: step: 108/470, loss: 0.013661464676260948 2023-01-22 21:27:09.586645: step: 110/470, loss: 0.015359270386397839 2023-01-22 21:27:10.301632: step: 112/470, loss: 0.05280671268701553 2023-01-22 21:27:11.141478: step: 114/470, loss: 0.0423421710729599 2023-01-22 21:27:11.880574: step: 116/470, loss: 0.6965582370758057 2023-01-22 21:27:12.560640: step: 118/470, loss: 0.016391227021813393 2023-01-22 21:27:13.317192: step: 120/470, loss: 0.002424615202471614 2023-01-22 21:27:14.038589: step: 122/470, loss: 0.0439317412674427 2023-01-22 21:27:14.774614: step: 124/470, loss: 0.041528187692165375 2023-01-22 21:27:15.511046: step: 126/470, loss: 0.022909237071871758 2023-01-22 21:27:16.199347: step: 128/470, loss: 0.005902472883462906 2023-01-22 21:27:17.006981: step: 130/470, loss: 0.012232090346515179 2023-01-22 21:27:17.683237: step: 132/470, loss: 0.006282647140324116 2023-01-22 21:27:18.362270: step: 134/470, loss: 0.009130376391112804 2023-01-22 21:27:19.059401: step: 136/470, loss: 0.0018813696224242449 2023-01-22 21:27:19.862245: step: 138/470, loss: 0.20341543853282928 2023-01-22 21:27:20.639797: step: 140/470, loss: 0.0039907037280499935 2023-01-22 21:27:21.395402: step: 142/470, loss: 0.00871391873806715 2023-01-22 21:27:22.191687: step: 144/470, loss: 0.004598352592438459 2023-01-22 21:27:22.820390: step: 146/470, loss: 0.02266603522002697 2023-01-22 21:27:23.565812: step: 148/470, loss: 0.014866764657199383 2023-01-22 21:27:24.264766: step: 150/470, loss: 0.023495275527238846 2023-01-22 21:27:24.961868: step: 152/470, loss: 0.015716491267085075 2023-01-22 21:27:25.721091: step: 154/470, loss: 0.021563276648521423 2023-01-22 21:27:26.399630: step: 156/470, loss: 0.08640297502279282 2023-01-22 21:27:27.074289: step: 158/470, loss: 0.01481255330145359 2023-01-22 21:27:27.770609: step: 160/470, loss: 0.005702580790966749 2023-01-22 21:27:28.574699: step: 162/470, loss: 0.0012152111157774925 2023-01-22 21:27:29.311536: step: 164/470, loss: 0.0035164314322173595 2023-01-22 21:27:29.956296: step: 166/470, loss: 4.3240583181614056e-05 2023-01-22 21:27:30.675178: step: 168/470, loss: 0.049790188670158386 2023-01-22 21:27:31.395321: step: 170/470, loss: 0.0005157435080036521 2023-01-22 21:27:32.140480: step: 172/470, loss: 0.0012744866544380784 2023-01-22 21:27:32.882032: step: 174/470, loss: 0.003838537260890007 2023-01-22 21:27:33.628809: step: 176/470, loss: 0.006663764826953411 2023-01-22 21:27:34.400822: step: 178/470, loss: 0.013853196054697037 2023-01-22 21:27:35.116582: step: 180/470, loss: 0.004837124142795801 2023-01-22 21:27:35.868021: step: 182/470, loss: 0.0024396313820034266 2023-01-22 21:27:36.625754: step: 184/470, loss: 0.009030799381434917 2023-01-22 21:27:37.331482: step: 186/470, loss: 0.010197999887168407 2023-01-22 21:27:38.100923: step: 188/470, loss: 0.1191810742020607 2023-01-22 21:27:38.780713: step: 190/470, loss: 7.335062400670722e-05 2023-01-22 21:27:39.505935: step: 192/470, loss: 0.022936223074793816 2023-01-22 21:27:40.274376: step: 194/470, loss: 0.002525016665458679 2023-01-22 21:27:40.991493: step: 196/470, loss: 0.15407709777355194 2023-01-22 21:27:41.689463: step: 198/470, loss: 0.02182081900537014 2023-01-22 21:27:42.402285: step: 200/470, loss: 0.02167477458715439 2023-01-22 21:27:43.148255: step: 202/470, loss: 0.017393987625837326 2023-01-22 21:27:43.925053: step: 204/470, loss: 0.04078075289726257 2023-01-22 21:27:44.732232: step: 206/470, loss: 0.0022740724962204695 2023-01-22 21:27:45.522756: step: 208/470, loss: 0.011434967629611492 2023-01-22 21:27:46.181674: step: 210/470, loss: 0.010663002729415894 2023-01-22 21:27:46.938214: step: 212/470, loss: 0.03014855459332466 2023-01-22 21:27:47.609671: step: 214/470, loss: 0.018498685210943222 2023-01-22 21:27:48.455305: step: 216/470, loss: 0.03520440682768822 2023-01-22 21:27:49.231736: step: 218/470, loss: 0.008998945355415344 2023-01-22 21:27:50.083419: step: 220/470, loss: 0.038609180599451065 2023-01-22 21:27:50.837210: step: 222/470, loss: 0.0071504549123346806 2023-01-22 21:27:51.502851: step: 224/470, loss: 0.0034373498056083918 2023-01-22 21:27:52.243446: step: 226/470, loss: 0.010733299888670444 2023-01-22 21:27:52.979464: step: 228/470, loss: 0.0056127398274838924 2023-01-22 21:27:53.705665: step: 230/470, loss: 0.006185805890709162 2023-01-22 21:27:54.419763: step: 232/470, loss: 0.007041038013994694 2023-01-22 21:27:55.132719: step: 234/470, loss: 0.12337449938058853 2023-01-22 21:27:55.862216: step: 236/470, loss: 0.005509430076926947 2023-01-22 21:27:56.617799: step: 238/470, loss: 0.46690887212753296 2023-01-22 21:27:57.377199: step: 240/470, loss: 0.004861161578446627 2023-01-22 21:27:58.102346: step: 242/470, loss: 0.025522425770759583 2023-01-22 21:27:58.836015: step: 244/470, loss: 0.00966788548976183 2023-01-22 21:27:59.570797: step: 246/470, loss: 0.003947984892874956 2023-01-22 21:28:00.297993: step: 248/470, loss: 0.00835223589092493 2023-01-22 21:28:01.060759: step: 250/470, loss: 0.0010288195917382836 2023-01-22 21:28:01.806178: step: 252/470, loss: 0.0008441155659966171 2023-01-22 21:28:02.506943: step: 254/470, loss: 0.0017779265763238072 2023-01-22 21:28:03.213311: step: 256/470, loss: 0.0010619647800922394 2023-01-22 21:28:03.915683: step: 258/470, loss: 0.0002834839397110045 2023-01-22 21:28:04.616977: step: 260/470, loss: 0.0008969651535153389 2023-01-22 21:28:05.386933: step: 262/470, loss: 0.0008425424457527697 2023-01-22 21:28:06.068349: step: 264/470, loss: 0.02052401751279831 2023-01-22 21:28:06.819617: step: 266/470, loss: 0.001179807586595416 2023-01-22 21:28:07.600572: step: 268/470, loss: 1.5121781826019287 2023-01-22 21:28:08.361061: step: 270/470, loss: 0.004608070477843285 2023-01-22 21:28:09.124229: step: 272/470, loss: 0.0143381766974926 2023-01-22 21:28:09.834762: step: 274/470, loss: 0.0026956668589264154 2023-01-22 21:28:10.558579: step: 276/470, loss: 0.009074504487216473 2023-01-22 21:28:11.227607: step: 278/470, loss: 0.012543557211756706 2023-01-22 21:28:11.877359: step: 280/470, loss: 0.020108414813876152 2023-01-22 21:28:12.570763: step: 282/470, loss: 0.004175996873527765 2023-01-22 21:28:13.207042: step: 284/470, loss: 0.0059522595256567 2023-01-22 21:28:13.872855: step: 286/470, loss: 0.00022827064094599336 2023-01-22 21:28:14.656546: step: 288/470, loss: 0.007600300945341587 2023-01-22 21:28:15.413413: step: 290/470, loss: 0.00705496221780777 2023-01-22 21:28:16.181058: step: 292/470, loss: 0.06734557449817657 2023-01-22 21:28:16.935085: step: 294/470, loss: 8.682074258103967e-05 2023-01-22 21:28:17.816492: step: 296/470, loss: 0.004848700948059559 2023-01-22 21:28:18.558093: step: 298/470, loss: 0.0014980545965954661 2023-01-22 21:28:19.248074: step: 300/470, loss: 0.0006909299991093576 2023-01-22 21:28:20.006865: step: 302/470, loss: 0.0281376875936985 2023-01-22 21:28:20.706789: step: 304/470, loss: 0.010767661035060883 2023-01-22 21:28:21.446126: step: 306/470, loss: 0.18951472640037537 2023-01-22 21:28:22.163275: step: 308/470, loss: 0.0026056517381221056 2023-01-22 21:28:22.834558: step: 310/470, loss: 0.0158822201192379 2023-01-22 21:28:23.540290: step: 312/470, loss: 0.03624594956636429 2023-01-22 21:28:24.287243: step: 314/470, loss: 0.026492631062865257 2023-01-22 21:28:24.942749: step: 316/470, loss: 0.0021404740400612354 2023-01-22 21:28:25.718670: step: 318/470, loss: 0.0041463132947683334 2023-01-22 21:28:26.493460: step: 320/470, loss: 0.02275344356894493 2023-01-22 21:28:27.283716: step: 322/470, loss: 0.03233027830719948 2023-01-22 21:28:27.987026: step: 324/470, loss: 0.0033383751288056374 2023-01-22 21:28:28.761656: step: 326/470, loss: 0.010370167903602123 2023-01-22 21:28:29.421220: step: 328/470, loss: 0.20740625262260437 2023-01-22 21:28:30.170470: step: 330/470, loss: 0.031531982123851776 2023-01-22 21:28:30.918103: step: 332/470, loss: 0.0020223986357450485 2023-01-22 21:28:31.657142: step: 334/470, loss: 0.0020372618455439806 2023-01-22 21:28:32.343383: step: 336/470, loss: 0.0007067452534101903 2023-01-22 21:28:32.978338: step: 338/470, loss: 0.0010336972773075104 2023-01-22 21:28:33.750807: step: 340/470, loss: 0.00013325613690540195 2023-01-22 21:28:34.502504: step: 342/470, loss: 0.009005560539662838 2023-01-22 21:28:35.233921: step: 344/470, loss: 0.0064828358590602875 2023-01-22 21:28:35.895708: step: 346/470, loss: 0.0001396966545144096 2023-01-22 21:28:36.769511: step: 348/470, loss: 0.16062486171722412 2023-01-22 21:28:37.492191: step: 350/470, loss: 0.2716470956802368 2023-01-22 21:28:38.237142: step: 352/470, loss: 0.05235571414232254 2023-01-22 21:28:38.977609: step: 354/470, loss: 0.029102042317390442 2023-01-22 21:28:39.705914: step: 356/470, loss: 0.01925988309085369 2023-01-22 21:28:40.375289: step: 358/470, loss: 0.0007001806516200304 2023-01-22 21:28:41.093656: step: 360/470, loss: 0.0038126695435494184 2023-01-22 21:28:41.802302: step: 362/470, loss: 0.009832276962697506 2023-01-22 21:28:42.497262: step: 364/470, loss: 0.010709324851632118 2023-01-22 21:28:43.351614: step: 366/470, loss: 0.0702999159693718 2023-01-22 21:28:44.004625: step: 368/470, loss: 0.006159055978059769 2023-01-22 21:28:44.731424: step: 370/470, loss: 6.533000123454258e-05 2023-01-22 21:28:45.496904: step: 372/470, loss: 0.022670604288578033 2023-01-22 21:28:46.315334: step: 374/470, loss: 0.005668703466653824 2023-01-22 21:28:47.011750: step: 376/470, loss: 0.020378923043608665 2023-01-22 21:28:47.745751: step: 378/470, loss: 0.00037373710074461997 2023-01-22 21:28:48.540016: step: 380/470, loss: 0.03276490792632103 2023-01-22 21:28:49.314720: step: 382/470, loss: 0.04409230127930641 2023-01-22 21:28:50.100527: step: 384/470, loss: 0.003761183237656951 2023-01-22 21:28:50.829424: step: 386/470, loss: 0.03813392296433449 2023-01-22 21:28:51.566270: step: 388/470, loss: 0.0055965944193303585 2023-01-22 21:28:52.313937: step: 390/470, loss: 0.003348251339048147 2023-01-22 21:28:53.037354: step: 392/470, loss: 0.015311792492866516 2023-01-22 21:28:53.752759: step: 394/470, loss: 0.00032469138386659324 2023-01-22 21:28:54.574267: step: 396/470, loss: 0.006082088686525822 2023-01-22 21:28:55.415402: step: 398/470, loss: 0.02094237320125103 2023-01-22 21:28:56.153772: step: 400/470, loss: 0.01110068242996931 2023-01-22 21:28:56.836459: step: 402/470, loss: 0.20870746672153473 2023-01-22 21:28:57.584051: step: 404/470, loss: 0.0033058554399758577 2023-01-22 21:28:58.279317: step: 406/470, loss: 0.00017056135402526706 2023-01-22 21:28:59.042228: step: 408/470, loss: 0.0010944005334749818 2023-01-22 21:28:59.776608: step: 410/470, loss: 0.03575807437300682 2023-01-22 21:29:00.483650: step: 412/470, loss: 0.007012884132564068 2023-01-22 21:29:01.289483: step: 414/470, loss: 4.490640276344493e-05 2023-01-22 21:29:02.024569: step: 416/470, loss: 0.05343223735690117 2023-01-22 21:29:02.748103: step: 418/470, loss: 0.007675068452954292 2023-01-22 21:29:03.426560: step: 420/470, loss: 0.0003359982802066952 2023-01-22 21:29:04.265911: step: 422/470, loss: 0.00039737403858453035 2023-01-22 21:29:04.946866: step: 424/470, loss: 0.002490453887730837 2023-01-22 21:29:05.702800: step: 426/470, loss: 0.0012507832143455744 2023-01-22 21:29:06.440581: step: 428/470, loss: 0.003989855293184519 2023-01-22 21:29:07.184819: step: 430/470, loss: 0.0036130514927208424 2023-01-22 21:29:07.890554: step: 432/470, loss: 0.0005295684677548707 2023-01-22 21:29:08.681480: step: 434/470, loss: 0.019468065351247787 2023-01-22 21:29:09.374797: step: 436/470, loss: 0.01111428253352642 2023-01-22 21:29:10.096041: step: 438/470, loss: 0.08234895020723343 2023-01-22 21:29:10.814434: step: 440/470, loss: 0.016692696139216423 2023-01-22 21:29:11.552108: step: 442/470, loss: 0.03166506811976433 2023-01-22 21:29:12.304626: step: 444/470, loss: 0.03169902786612511 2023-01-22 21:29:13.046584: step: 446/470, loss: 0.010146571323275566 2023-01-22 21:29:13.771990: step: 448/470, loss: 0.0064794747158885 2023-01-22 21:29:14.421262: step: 450/470, loss: 0.009995583444833755 2023-01-22 21:29:15.102695: step: 452/470, loss: 0.004090282134711742 2023-01-22 21:29:15.836775: step: 454/470, loss: 0.011358017101883888 2023-01-22 21:29:16.501594: step: 456/470, loss: 0.023835793137550354 2023-01-22 21:29:17.223966: step: 458/470, loss: 0.040863286703825 2023-01-22 21:29:18.033874: step: 460/470, loss: 0.025173354893922806 2023-01-22 21:29:18.731225: step: 462/470, loss: 0.0024334678892046213 2023-01-22 21:29:19.487607: step: 464/470, loss: 0.011209073476493359 2023-01-22 21:29:20.262140: step: 466/470, loss: 0.015448382124304771 2023-01-22 21:29:20.884036: step: 468/470, loss: 0.027771124616265297 2023-01-22 21:29:21.701079: step: 470/470, loss: 0.006685018073767424 2023-01-22 21:29:22.433020: step: 472/470, loss: 0.005859097465872765 2023-01-22 21:29:23.164675: step: 474/470, loss: 0.009597435593605042 2023-01-22 21:29:23.908720: step: 476/470, loss: 0.006935752462595701 2023-01-22 21:29:24.620711: step: 478/470, loss: 0.0011011871974915266 2023-01-22 21:29:25.366982: step: 480/470, loss: 0.008520321920514107 2023-01-22 21:29:26.103536: step: 482/470, loss: 0.031099451705813408 2023-01-22 21:29:26.805732: step: 484/470, loss: 0.001046741963364184 2023-01-22 21:29:27.456669: step: 486/470, loss: 0.001939225709065795 2023-01-22 21:29:28.250491: step: 488/470, loss: 0.027191482484340668 2023-01-22 21:29:29.033582: step: 490/470, loss: 0.004440982360392809 2023-01-22 21:29:29.729558: step: 492/470, loss: 0.00052472302922979 2023-01-22 21:29:30.501629: step: 494/470, loss: 0.005477104801684618 2023-01-22 21:29:31.329158: step: 496/470, loss: 0.006036494392901659 2023-01-22 21:29:32.042014: step: 498/470, loss: 0.006640726700425148 2023-01-22 21:29:32.796030: step: 500/470, loss: 0.024400828406214714 2023-01-22 21:29:33.611887: step: 502/470, loss: 0.002363094361498952 2023-01-22 21:29:34.346976: step: 504/470, loss: 0.007462051697075367 2023-01-22 21:29:35.100648: step: 506/470, loss: 0.005165347829461098 2023-01-22 21:29:35.844080: step: 508/470, loss: 0.0008528852486051619 2023-01-22 21:29:36.581188: step: 510/470, loss: 0.002549446653574705 2023-01-22 21:29:37.372406: step: 512/470, loss: 0.003729384858161211 2023-01-22 21:29:38.159066: step: 514/470, loss: 0.008527263067662716 2023-01-22 21:29:38.884560: step: 516/470, loss: 0.0031432053074240685 2023-01-22 21:29:39.637637: step: 518/470, loss: 0.018408743664622307 2023-01-22 21:29:40.334049: step: 520/470, loss: 0.008144121617078781 2023-01-22 21:29:41.132093: step: 522/470, loss: 0.020345963537693024 2023-01-22 21:29:41.818126: step: 524/470, loss: 0.0073492685332894325 2023-01-22 21:29:42.525081: step: 526/470, loss: 0.009888478554785252 2023-01-22 21:29:43.249031: step: 528/470, loss: 0.002217804081737995 2023-01-22 21:29:44.008535: step: 530/470, loss: 0.6544013619422913 2023-01-22 21:29:44.751894: step: 532/470, loss: 0.007728029508143663 2023-01-22 21:29:45.526689: step: 534/470, loss: 0.0011883730767294765 2023-01-22 21:29:46.228499: step: 536/470, loss: 0.0005980039713904262 2023-01-22 21:29:46.919303: step: 538/470, loss: 0.0516948327422142 2023-01-22 21:29:47.702856: step: 540/470, loss: 0.029013799503445625 2023-01-22 21:29:48.435956: step: 542/470, loss: 0.0037074440624564886 2023-01-22 21:29:49.219163: step: 544/470, loss: 0.00036978485877625644 2023-01-22 21:29:49.965776: step: 546/470, loss: 0.0021586138755083084 2023-01-22 21:29:50.761011: step: 548/470, loss: 0.014409597963094711 2023-01-22 21:29:51.454027: step: 550/470, loss: 0.0010483302175998688 2023-01-22 21:29:52.203770: step: 552/470, loss: 0.026682112365961075 2023-01-22 21:29:52.877863: step: 554/470, loss: 0.00963746290653944 2023-01-22 21:29:53.579114: step: 556/470, loss: 0.03591204062104225 2023-01-22 21:29:54.380016: step: 558/470, loss: 0.013330896385014057 2023-01-22 21:29:55.075116: step: 560/470, loss: 0.008176986128091812 2023-01-22 21:29:55.846914: step: 562/470, loss: 0.00987264420837164 2023-01-22 21:29:56.670916: step: 564/470, loss: 0.018903588876128197 2023-01-22 21:29:57.362733: step: 566/470, loss: 0.00032135986839421093 2023-01-22 21:29:58.102706: step: 568/470, loss: 0.00034836053964681923 2023-01-22 21:29:58.757715: step: 570/470, loss: 0.006845478434115648 2023-01-22 21:29:59.473161: step: 572/470, loss: 0.01886317878961563 2023-01-22 21:30:00.118377: step: 574/470, loss: 0.0015140185132622719 2023-01-22 21:30:00.893745: step: 576/470, loss: 0.0014535411028191447 2023-01-22 21:30:01.645669: step: 578/470, loss: 0.00014838328934274614 2023-01-22 21:30:02.282771: step: 580/470, loss: 0.005177702754735947 2023-01-22 21:30:02.971807: step: 582/470, loss: 0.0012136365985497832 2023-01-22 21:30:03.749254: step: 584/470, loss: 0.001734656747430563 2023-01-22 21:30:04.497356: step: 586/470, loss: 0.0007220085244625807 2023-01-22 21:30:05.111678: step: 588/470, loss: 0.0014062359696254134 2023-01-22 21:30:05.793735: step: 590/470, loss: 5.9928792325081304e-05 2023-01-22 21:30:06.498661: step: 592/470, loss: 0.03473540395498276 2023-01-22 21:30:07.183376: step: 594/470, loss: 0.01883302815258503 2023-01-22 21:30:07.878068: step: 596/470, loss: 0.008976499550044537 2023-01-22 21:30:08.632535: step: 598/470, loss: 8.038515079533681e-05 2023-01-22 21:30:09.489899: step: 600/470, loss: 0.008133098483085632 2023-01-22 21:30:10.284395: step: 602/470, loss: 0.009850953705608845 2023-01-22 21:30:11.084531: step: 604/470, loss: 0.016613174229860306 2023-01-22 21:30:11.757269: step: 606/470, loss: 0.008490724489092827 2023-01-22 21:30:12.444402: step: 608/470, loss: 0.00034214864717796445 2023-01-22 21:30:13.178505: step: 610/470, loss: 0.010509525425732136 2023-01-22 21:30:13.842852: step: 612/470, loss: 0.0069589437916874886 2023-01-22 21:30:14.698513: step: 614/470, loss: 0.11274606734514236 2023-01-22 21:30:15.415756: step: 616/470, loss: 0.009286433458328247 2023-01-22 21:30:16.255428: step: 618/470, loss: 0.11397167295217514 2023-01-22 21:30:16.976746: step: 620/470, loss: 0.04145457595586777 2023-01-22 21:30:17.671651: step: 622/470, loss: 0.03650517016649246 2023-01-22 21:30:18.399357: step: 624/470, loss: 0.004478362388908863 2023-01-22 21:30:19.180358: step: 626/470, loss: 0.06043071672320366 2023-01-22 21:30:19.872215: step: 628/470, loss: 0.055728744715452194 2023-01-22 21:30:20.641532: step: 630/470, loss: 0.09390320628881454 2023-01-22 21:30:21.266408: step: 632/470, loss: 0.0012103930348530412 2023-01-22 21:30:21.965711: step: 634/470, loss: 0.00010801952157635242 2023-01-22 21:30:22.725057: step: 636/470, loss: 0.0020870454609394073 2023-01-22 21:30:23.420183: step: 638/470, loss: 0.0008572909864597023 2023-01-22 21:30:24.198309: step: 640/470, loss: 0.00878854189068079 2023-01-22 21:30:24.885799: step: 642/470, loss: 0.006001758389174938 2023-01-22 21:30:25.594138: step: 644/470, loss: 0.003095379564911127 2023-01-22 21:30:26.391438: step: 646/470, loss: 0.005022485740482807 2023-01-22 21:30:27.153678: step: 648/470, loss: 0.01622042804956436 2023-01-22 21:30:27.879181: step: 650/470, loss: 0.03620656952261925 2023-01-22 21:30:28.623273: step: 652/470, loss: 0.0006067268550395966 2023-01-22 21:30:29.344829: step: 654/470, loss: 0.0015004700981080532 2023-01-22 21:30:30.169467: step: 656/470, loss: 0.003583112731575966 2023-01-22 21:30:30.831023: step: 658/470, loss: 0.0014826322440057993 2023-01-22 21:30:31.556733: step: 660/470, loss: 0.004717591218650341 2023-01-22 21:30:32.304366: step: 662/470, loss: 0.00042355526238679886 2023-01-22 21:30:33.049445: step: 664/470, loss: 0.005252582021057606 2023-01-22 21:30:33.819086: step: 666/470, loss: 0.011925187893211842 2023-01-22 21:30:34.599959: step: 668/470, loss: 0.005225887056440115 2023-01-22 21:30:35.382793: step: 670/470, loss: 0.04948273301124573 2023-01-22 21:30:36.119248: step: 672/470, loss: 0.0006488541257567704 2023-01-22 21:30:36.802167: step: 674/470, loss: 0.004141774959862232 2023-01-22 21:30:37.546974: step: 676/470, loss: 0.10701893270015717 2023-01-22 21:30:38.266631: step: 678/470, loss: 0.0004678604891523719 2023-01-22 21:30:38.922091: step: 680/470, loss: 0.0048322975635528564 2023-01-22 21:30:39.664067: step: 682/470, loss: 0.023693975061178207 2023-01-22 21:30:40.382125: step: 684/470, loss: 0.00018593238200992346 2023-01-22 21:30:41.156838: step: 686/470, loss: 8.329687989316881e-05 2023-01-22 21:30:41.854694: step: 688/470, loss: 0.0013428764650598168 2023-01-22 21:30:42.538118: step: 690/470, loss: 0.01941107213497162 2023-01-22 21:30:43.339676: step: 692/470, loss: 0.007984393276274204 2023-01-22 21:30:44.101034: step: 694/470, loss: 0.4107176661491394 2023-01-22 21:30:44.868080: step: 696/470, loss: 0.16444319486618042 2023-01-22 21:30:45.615023: step: 698/470, loss: 0.010815066285431385 2023-01-22 21:30:46.445741: step: 700/470, loss: 0.04077745974063873 2023-01-22 21:30:47.216108: step: 702/470, loss: 0.01998460479080677 2023-01-22 21:30:48.055410: step: 704/470, loss: 0.0018847326282411814 2023-01-22 21:30:48.801108: step: 706/470, loss: 0.12863512337207794 2023-01-22 21:30:49.533245: step: 708/470, loss: 0.00011070125765400007 2023-01-22 21:30:50.200478: step: 710/470, loss: 0.0005842326791025698 2023-01-22 21:30:50.922569: step: 712/470, loss: 0.000646944681648165 2023-01-22 21:30:51.709504: step: 714/470, loss: 0.005940971430391073 2023-01-22 21:30:52.499410: step: 716/470, loss: 0.00032181438291445374 2023-01-22 21:30:53.193107: step: 718/470, loss: 0.001542411744594574 2023-01-22 21:30:53.948542: step: 720/470, loss: 0.005175524391233921 2023-01-22 21:30:54.663964: step: 722/470, loss: 0.009312089532613754 2023-01-22 21:30:55.422502: step: 724/470, loss: 0.0014529626350849867 2023-01-22 21:30:56.166132: step: 726/470, loss: 0.012921427376568317 2023-01-22 21:30:56.901515: step: 728/470, loss: 0.010379222221672535 2023-01-22 21:30:57.598110: step: 730/470, loss: 0.02943400666117668 2023-01-22 21:30:58.399582: step: 732/470, loss: 0.004278372973203659 2023-01-22 21:30:59.128557: step: 734/470, loss: 0.002038972917944193 2023-01-22 21:30:59.837605: step: 736/470, loss: 0.008259556256234646 2023-01-22 21:31:00.598535: step: 738/470, loss: 0.005812020972371101 2023-01-22 21:31:01.434016: step: 740/470, loss: 0.0011574298841878772 2023-01-22 21:31:02.203170: step: 742/470, loss: 0.038374532014131546 2023-01-22 21:31:02.967598: step: 744/470, loss: 0.002495008986443281 2023-01-22 21:31:03.720963: step: 746/470, loss: 0.009755531325936317 2023-01-22 21:31:04.457810: step: 748/470, loss: 0.12439113855361938 2023-01-22 21:31:05.105683: step: 750/470, loss: 0.0035697193816304207 2023-01-22 21:31:05.805293: step: 752/470, loss: 0.0054044960997998714 2023-01-22 21:31:06.543615: step: 754/470, loss: 0.012278404086828232 2023-01-22 21:31:07.183633: step: 756/470, loss: 0.009794940240681171 2023-01-22 21:31:07.924535: step: 758/470, loss: 0.002220005262643099 2023-01-22 21:31:08.658484: step: 760/470, loss: 0.012325046584010124 2023-01-22 21:31:09.418624: step: 762/470, loss: 0.018408851698040962 2023-01-22 21:31:10.147926: step: 764/470, loss: 0.01817646436393261 2023-01-22 21:31:10.859792: step: 766/470, loss: 0.02506135031580925 2023-01-22 21:31:11.638607: step: 768/470, loss: 0.014540938660502434 2023-01-22 21:31:12.406673: step: 770/470, loss: 0.004760831594467163 2023-01-22 21:31:13.136703: step: 772/470, loss: 0.07413551956415176 2023-01-22 21:31:13.895880: step: 774/470, loss: 0.03643839806318283 2023-01-22 21:31:14.630847: step: 776/470, loss: 0.0018023299053311348 2023-01-22 21:31:15.412585: step: 778/470, loss: 0.020913278684020042 2023-01-22 21:31:16.181255: step: 780/470, loss: 0.02016664668917656 2023-01-22 21:31:16.879952: step: 782/470, loss: 0.01156390830874443 2023-01-22 21:31:17.709328: step: 784/470, loss: 0.013799360953271389 2023-01-22 21:31:18.410069: step: 786/470, loss: 0.0006355499499477446 2023-01-22 21:31:19.128948: step: 788/470, loss: 0.00016411063552368432 2023-01-22 21:31:19.849583: step: 790/470, loss: 0.0022397139109671116 2023-01-22 21:31:20.569813: step: 792/470, loss: 0.028047997504472733 2023-01-22 21:31:21.382185: step: 794/470, loss: 0.0009197811014018953 2023-01-22 21:31:22.150761: step: 796/470, loss: 0.5110117793083191 2023-01-22 21:31:22.885470: step: 798/470, loss: 0.0024509942159056664 2023-01-22 21:31:23.570298: step: 800/470, loss: 0.3866247832775116 2023-01-22 21:31:24.295848: step: 802/470, loss: 0.027848878875374794 2023-01-22 21:31:25.056698: step: 804/470, loss: 0.004700549878180027 2023-01-22 21:31:25.931630: step: 806/470, loss: 0.0064826603047549725 2023-01-22 21:31:26.610222: step: 808/470, loss: 0.000695836846716702 2023-01-22 21:31:27.375093: step: 810/470, loss: 0.0028286667075008154 2023-01-22 21:31:28.130537: step: 812/470, loss: 0.00968991406261921 2023-01-22 21:31:28.863255: step: 814/470, loss: 0.001139800762757659 2023-01-22 21:31:29.553319: step: 816/470, loss: 0.004776866175234318 2023-01-22 21:31:30.293446: step: 818/470, loss: 0.015945924445986748 2023-01-22 21:31:30.949936: step: 820/470, loss: 0.0016348791541531682 2023-01-22 21:31:31.723930: step: 822/470, loss: 0.02290150709450245 2023-01-22 21:31:32.476683: step: 824/470, loss: 0.046208322048187256 2023-01-22 21:31:33.200657: step: 826/470, loss: 0.0024622592609375715 2023-01-22 21:31:33.979807: step: 828/470, loss: 0.011288406327366829 2023-01-22 21:31:34.735627: step: 830/470, loss: 0.042418159544467926 2023-01-22 21:31:35.434415: step: 832/470, loss: 0.0909082442522049 2023-01-22 21:31:36.059896: step: 834/470, loss: 0.000317727419314906 2023-01-22 21:31:36.668780: step: 836/470, loss: 0.0007268586196005344 2023-01-22 21:31:37.457580: step: 838/470, loss: 0.012510606087744236 2023-01-22 21:31:38.204806: step: 840/470, loss: 0.007081144023686647 2023-01-22 21:31:38.871169: step: 842/470, loss: 0.0002773547312244773 2023-01-22 21:31:39.632816: step: 844/470, loss: 0.002572752069681883 2023-01-22 21:31:40.298600: step: 846/470, loss: 0.024069497361779213 2023-01-22 21:31:41.129645: step: 848/470, loss: 0.0006529639358632267 2023-01-22 21:31:41.789672: step: 850/470, loss: 0.008918379433453083 2023-01-22 21:31:42.493434: step: 852/470, loss: 0.005846343468874693 2023-01-22 21:31:43.167136: step: 854/470, loss: 0.011811641044914722 2023-01-22 21:31:43.853743: step: 856/470, loss: 0.002782166237011552 2023-01-22 21:31:44.625286: step: 858/470, loss: 0.0026911317836493254 2023-01-22 21:31:45.379858: step: 860/470, loss: 0.07831226289272308 2023-01-22 21:31:46.125991: step: 862/470, loss: 0.009529894217848778 2023-01-22 21:31:46.801586: step: 864/470, loss: 0.0002590777294244617 2023-01-22 21:31:47.525590: step: 866/470, loss: 0.14221465587615967 2023-01-22 21:31:48.248324: step: 868/470, loss: 0.03858411684632301 2023-01-22 21:31:49.019343: step: 870/470, loss: 0.020872116088867188 2023-01-22 21:31:49.808387: step: 872/470, loss: 0.0001517470518592745 2023-01-22 21:31:50.497568: step: 874/470, loss: 0.0035211762879043818 2023-01-22 21:31:51.239419: step: 876/470, loss: 0.04481646046042442 2023-01-22 21:31:51.953513: step: 878/470, loss: 0.013270605355501175 2023-01-22 21:31:52.672286: step: 880/470, loss: 0.020657042041420937 2023-01-22 21:31:53.425422: step: 882/470, loss: 0.036236584186553955 2023-01-22 21:31:54.128081: step: 884/470, loss: 0.012062986381351948 2023-01-22 21:31:54.836237: step: 886/470, loss: 0.0015210265992209315 2023-01-22 21:31:55.568998: step: 888/470, loss: 0.004456940107047558 2023-01-22 21:31:56.309916: step: 890/470, loss: 0.01179597806185484 2023-01-22 21:31:57.065781: step: 892/470, loss: 0.0002963356382679194 2023-01-22 21:31:57.931241: step: 894/470, loss: 0.001176392543129623 2023-01-22 21:31:58.729603: step: 896/470, loss: 0.0065690902993083 2023-01-22 21:31:59.497027: step: 898/470, loss: 0.008019453845918179 2023-01-22 21:32:00.254207: step: 900/470, loss: 0.002323372755199671 2023-01-22 21:32:00.967914: step: 902/470, loss: 0.036690160632133484 2023-01-22 21:32:01.661252: step: 904/470, loss: 0.01081377174705267 2023-01-22 21:32:02.368799: step: 906/470, loss: 0.014264862053096294 2023-01-22 21:32:03.102129: step: 908/470, loss: 0.014920435845851898 2023-01-22 21:32:03.805798: step: 910/470, loss: 0.030699947848916054 2023-01-22 21:32:04.507764: step: 912/470, loss: 0.0012479553697630763 2023-01-22 21:32:05.309712: step: 914/470, loss: 0.3528791069984436 2023-01-22 21:32:06.029645: step: 916/470, loss: 0.02547892928123474 2023-01-22 21:32:06.788982: step: 918/470, loss: 0.0035649023484438658 2023-01-22 21:32:07.478521: step: 920/470, loss: 0.0018619990441948175 2023-01-22 21:32:08.139447: step: 922/470, loss: 0.0008721183985471725 2023-01-22 21:32:08.848160: step: 924/470, loss: 0.04525361210107803 2023-01-22 21:32:09.519458: step: 926/470, loss: 0.17319992184638977 2023-01-22 21:32:10.192594: step: 928/470, loss: 0.00043553844443522394 2023-01-22 21:32:11.093941: step: 930/470, loss: 0.007656523957848549 2023-01-22 21:32:11.799631: step: 932/470, loss: 0.003974429797381163 2023-01-22 21:32:12.528003: step: 934/470, loss: 0.028335638344287872 2023-01-22 21:32:13.184297: step: 936/470, loss: 0.016558095812797546 2023-01-22 21:32:13.923021: step: 938/470, loss: 0.008354702033102512 2023-01-22 21:32:14.666121: step: 940/470, loss: 0.023873982951045036 2023-01-22 21:32:15.309271: step: 942/470, loss: 0.25000032782554626 ================================================== Loss: 0.034 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3006525683975433, 'r': 0.3405874446552815, 'f1': 0.31937648279952546}, 'combined': 0.23533003995754506, 'epoch': 35} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3540031340467799, 'r': 0.3625046502342626, 'f1': 0.35820345599797565}, 'combined': 0.24949494447620196, 'epoch': 35} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2930894524617996, 'r': 0.3448111205432937, 'f1': 0.3168534621208645}, 'combined': 0.23347097208905804, 'epoch': 35} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3545024543162406, 'r': 0.3609727200530404, 'f1': 0.3577083308664589}, 'combined': 0.24915008120051868, 'epoch': 35} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2789093365094121, 'r': 0.35035669975186107, 'f1': 0.3105769230769231}, 'combined': 0.22884615384615384, 'epoch': 35} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34280056080802684, 'r': 0.3681566061319635, 'f1': 0.35502642610780366}, 'combined': 0.24728208783628117, 'epoch': 35} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21929824561403508, 'r': 0.35714285714285715, 'f1': 0.2717391304347826}, 'combined': 0.18115942028985504, 'epoch': 35} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2361111111111111, 'r': 0.3695652173913043, 'f1': 0.28813559322033905}, 'combined': 0.14406779661016952, 'epoch': 35} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4605263157894737, 'r': 0.3017241379310345, 'f1': 0.3645833333333333}, 'combined': 0.24305555555555552, 'epoch': 35} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 36 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 21:34:52.407274: step: 2/470, loss: 0.00923900119960308 2023-01-22 21:34:53.145347: step: 4/470, loss: 0.012077202089130878 2023-01-22 21:34:53.969023: step: 6/470, loss: 0.017157871276140213 2023-01-22 21:34:54.702844: step: 8/470, loss: 0.0011838817736133933 2023-01-22 21:34:55.407262: step: 10/470, loss: 0.031294677406549454 2023-01-22 21:34:56.106004: step: 12/470, loss: 0.0029603552538901567 2023-01-22 21:34:56.851295: step: 14/470, loss: 0.0066464850679039955 2023-01-22 21:34:57.583973: step: 16/470, loss: 0.00202410900965333 2023-01-22 21:34:58.344602: step: 18/470, loss: 0.007574394345283508 2023-01-22 21:34:59.088818: step: 20/470, loss: 0.39814719557762146 2023-01-22 21:34:59.829593: step: 22/470, loss: 0.0019493503496050835 2023-01-22 21:35:00.646073: step: 24/470, loss: 0.035010937601327896 2023-01-22 21:35:01.424190: step: 26/470, loss: 0.000734560308046639 2023-01-22 21:35:02.195919: step: 28/470, loss: 0.008018039166927338 2023-01-22 21:35:02.921603: step: 30/470, loss: 0.0227291788905859 2023-01-22 21:35:03.615690: step: 32/470, loss: 0.003483888227492571 2023-01-22 21:35:04.271956: step: 34/470, loss: 0.0005610057269223034 2023-01-22 21:35:04.982633: step: 36/470, loss: 0.0045489352196455 2023-01-22 21:35:05.883248: step: 38/470, loss: 0.04132385924458504 2023-01-22 21:35:06.652101: step: 40/470, loss: 0.002870872151106596 2023-01-22 21:35:07.416605: step: 42/470, loss: 0.005469549912959337 2023-01-22 21:35:08.073040: step: 44/470, loss: 0.004644967149943113 2023-01-22 21:35:08.770894: step: 46/470, loss: 0.0010548782302066684 2023-01-22 21:35:09.480780: step: 48/470, loss: 0.008158699609339237 2023-01-22 21:35:10.194572: step: 50/470, loss: 0.0006612870492972434 2023-01-22 21:35:10.877243: step: 52/470, loss: 0.008797750808298588 2023-01-22 21:35:11.613820: step: 54/470, loss: 0.04867735132575035 2023-01-22 21:35:12.379480: step: 56/470, loss: 0.005971251055598259 2023-01-22 21:35:13.107308: step: 58/470, loss: 0.003688129596412182 2023-01-22 21:35:13.795040: step: 60/470, loss: 0.0009264365653507411 2023-01-22 21:35:14.598648: step: 62/470, loss: 0.010077004320919514 2023-01-22 21:35:15.218058: step: 64/470, loss: 0.0002823990071192384 2023-01-22 21:35:15.921226: step: 66/470, loss: 0.007472657132893801 2023-01-22 21:35:16.658136: step: 68/470, loss: 0.12757711112499237 2023-01-22 21:35:17.403295: step: 70/470, loss: 0.0001368361699860543 2023-01-22 21:35:18.098037: step: 72/470, loss: 0.021803027018904686 2023-01-22 21:35:18.822636: step: 74/470, loss: 0.00901093054562807 2023-01-22 21:35:19.611239: step: 76/470, loss: 0.013547614216804504 2023-01-22 21:35:20.363289: step: 78/470, loss: 0.0005593632231466472 2023-01-22 21:35:21.056485: step: 80/470, loss: 0.0004783176409546286 2023-01-22 21:35:21.797640: step: 82/470, loss: 7.210922194644809e-05 2023-01-22 21:35:22.560480: step: 84/470, loss: 0.0192702803760767 2023-01-22 21:35:23.347114: step: 86/470, loss: 0.005496703088283539 2023-01-22 21:35:24.135710: step: 88/470, loss: 0.023109078407287598 2023-01-22 21:35:24.864630: step: 90/470, loss: 0.005734420381486416 2023-01-22 21:35:25.633644: step: 92/470, loss: 0.24637438356876373 2023-01-22 21:35:26.381247: step: 94/470, loss: 0.02152753807604313 2023-01-22 21:35:27.106827: step: 96/470, loss: 0.005265772808343172 2023-01-22 21:35:27.847194: step: 98/470, loss: 0.012245593592524529 2023-01-22 21:35:28.637680: step: 100/470, loss: 0.0001247525360668078 2023-01-22 21:35:29.465113: step: 102/470, loss: 0.0013826676877215505 2023-01-22 21:35:30.201177: step: 104/470, loss: 0.0027163000777363777 2023-01-22 21:35:30.928880: step: 106/470, loss: 0.005290861241519451 2023-01-22 21:35:31.686665: step: 108/470, loss: 0.009316950105130672 2023-01-22 21:35:32.497480: step: 110/470, loss: 0.1678803563117981 2023-01-22 21:35:33.180211: step: 112/470, loss: 0.09275523573160172 2023-01-22 21:35:33.850795: step: 114/470, loss: 0.0027781319804489613 2023-01-22 21:35:34.481356: step: 116/470, loss: 4.896317113889381e-05 2023-01-22 21:35:35.228583: step: 118/470, loss: 2.4704264433239587e-05 2023-01-22 21:35:35.958128: step: 120/470, loss: 0.004806291311979294 2023-01-22 21:35:36.746947: step: 122/470, loss: 0.09023859351873398 2023-01-22 21:35:37.429445: step: 124/470, loss: 0.0018310233717784286 2023-01-22 21:35:38.140758: step: 126/470, loss: 0.0012032542144879699 2023-01-22 21:35:38.866513: step: 128/470, loss: 0.0027267164550721645 2023-01-22 21:35:39.556385: step: 130/470, loss: 0.0002811734448187053 2023-01-22 21:35:40.327600: step: 132/470, loss: 0.00040184182580560446 2023-01-22 21:35:41.060509: step: 134/470, loss: 0.0015739360824227333 2023-01-22 21:35:41.787025: step: 136/470, loss: 0.00714887585490942 2023-01-22 21:35:42.496421: step: 138/470, loss: 0.022787457332015038 2023-01-22 21:35:43.251826: step: 140/470, loss: 0.00430362019687891 2023-01-22 21:35:43.895393: step: 142/470, loss: 3.319919778732583e-05 2023-01-22 21:35:44.563249: step: 144/470, loss: 0.015644496306777 2023-01-22 21:35:45.285452: step: 146/470, loss: 0.019434552639722824 2023-01-22 21:35:46.047011: step: 148/470, loss: 0.001240216544829309 2023-01-22 21:35:46.819133: step: 150/470, loss: 0.001468593254685402 2023-01-22 21:35:47.537689: step: 152/470, loss: 0.005688677076250315 2023-01-22 21:35:48.280666: step: 154/470, loss: 0.03621711581945419 2023-01-22 21:35:49.080209: step: 156/470, loss: 0.002134978072717786 2023-01-22 21:35:49.879732: step: 158/470, loss: 0.0038371176924556494 2023-01-22 21:35:50.630401: step: 160/470, loss: 0.025045614689588547 2023-01-22 21:35:51.383835: step: 162/470, loss: 0.002029221039265394 2023-01-22 21:35:52.359317: step: 164/470, loss: 0.0003728036826942116 2023-01-22 21:35:53.068573: step: 166/470, loss: 0.00826074555516243 2023-01-22 21:35:53.890066: step: 168/470, loss: 0.003485210472717881 2023-01-22 21:35:54.668353: step: 170/470, loss: 0.002386857522651553 2023-01-22 21:35:55.406979: step: 172/470, loss: 0.010757505893707275 2023-01-22 21:35:56.170478: step: 174/470, loss: 0.00275437138043344 2023-01-22 21:35:56.806632: step: 176/470, loss: 0.007767565548419952 2023-01-22 21:35:57.593749: step: 178/470, loss: 0.9939495921134949 2023-01-22 21:35:58.300733: step: 180/470, loss: 0.0013092899462208152 2023-01-22 21:35:59.056306: step: 182/470, loss: 0.003127588424831629 2023-01-22 21:35:59.796065: step: 184/470, loss: 0.0006091459654271603 2023-01-22 21:36:00.502591: step: 186/470, loss: 0.03590903431177139 2023-01-22 21:36:01.243587: step: 188/470, loss: 0.024302396923303604 2023-01-22 21:36:02.033392: step: 190/470, loss: 0.0021148929372429848 2023-01-22 21:36:02.719261: step: 192/470, loss: 0.011187209747731686 2023-01-22 21:36:03.379576: step: 194/470, loss: 0.004178525879979134 2023-01-22 21:36:04.129929: step: 196/470, loss: 0.033812280744314194 2023-01-22 21:36:04.798338: step: 198/470, loss: 0.009949339553713799 2023-01-22 21:36:05.493657: step: 200/470, loss: 0.016883907839655876 2023-01-22 21:36:06.225984: step: 202/470, loss: 0.000815562263596803 2023-01-22 21:36:06.983489: step: 204/470, loss: 0.010811416432261467 2023-01-22 21:36:07.702258: step: 206/470, loss: 0.029568077996373177 2023-01-22 21:36:08.472335: step: 208/470, loss: 0.07659181952476501 2023-01-22 21:36:09.170607: step: 210/470, loss: 0.0033108582720160484 2023-01-22 21:36:09.926240: step: 212/470, loss: 0.011148279532790184 2023-01-22 21:36:10.702838: step: 214/470, loss: 0.07719606906175613 2023-01-22 21:36:11.393919: step: 216/470, loss: 0.00038251784280873835 2023-01-22 21:36:12.059617: step: 218/470, loss: 0.002361322520300746 2023-01-22 21:36:12.789483: step: 220/470, loss: 0.005027166102081537 2023-01-22 21:36:13.617548: step: 222/470, loss: 0.005157764069736004 2023-01-22 21:36:14.283454: step: 224/470, loss: 0.001214994816109538 2023-01-22 21:36:15.158360: step: 226/470, loss: 0.016031332314014435 2023-01-22 21:36:15.813564: step: 228/470, loss: 0.0022524199448525906 2023-01-22 21:36:16.604482: step: 230/470, loss: 0.011084591038525105 2023-01-22 21:36:17.235331: step: 232/470, loss: 0.0007433760329149663 2023-01-22 21:36:17.922813: step: 234/470, loss: 0.000476872461149469 2023-01-22 21:36:18.608116: step: 236/470, loss: 0.007138427346944809 2023-01-22 21:36:19.310086: step: 238/470, loss: 0.010517815127968788 2023-01-22 21:36:20.054837: step: 240/470, loss: 0.007859362289309502 2023-01-22 21:36:20.766511: step: 242/470, loss: 0.002640694146975875 2023-01-22 21:36:21.537969: step: 244/470, loss: 0.0009882624726742506 2023-01-22 21:36:22.298975: step: 246/470, loss: 0.41087213158607483 2023-01-22 21:36:22.988886: step: 248/470, loss: 0.003996912389993668 2023-01-22 21:36:23.737309: step: 250/470, loss: 0.0006755517679266632 2023-01-22 21:36:24.465656: step: 252/470, loss: 0.7485092878341675 2023-01-22 21:36:25.195760: step: 254/470, loss: 0.014115111902356148 2023-01-22 21:36:25.972731: step: 256/470, loss: 0.0011114904191344976 2023-01-22 21:36:26.718221: step: 258/470, loss: 0.003493404248729348 2023-01-22 21:36:27.410575: step: 260/470, loss: 0.005939795169979334 2023-01-22 21:36:28.131244: step: 262/470, loss: 0.002627335721626878 2023-01-22 21:36:28.886177: step: 264/470, loss: 0.005035010632127523 2023-01-22 21:36:29.584177: step: 266/470, loss: 0.0006927988724783063 2023-01-22 21:36:30.351266: step: 268/470, loss: 0.0007211231859400868 2023-01-22 21:36:31.088378: step: 270/470, loss: 0.011166412383317947 2023-01-22 21:36:31.713975: step: 272/470, loss: 0.001943384064361453 2023-01-22 21:36:32.553033: step: 274/470, loss: 0.0034223003312945366 2023-01-22 21:36:33.244052: step: 276/470, loss: 0.0008902418776415288 2023-01-22 21:36:33.913650: step: 278/470, loss: 1.600859104655683e-05 2023-01-22 21:36:34.660793: step: 280/470, loss: 0.009566079825162888 2023-01-22 21:36:35.366563: step: 282/470, loss: 0.014527286402881145 2023-01-22 21:36:36.046657: step: 284/470, loss: 0.0004398828314151615 2023-01-22 21:36:36.798262: step: 286/470, loss: 0.0003213112649973482 2023-01-22 21:36:37.498835: step: 288/470, loss: 0.015395899303257465 2023-01-22 21:36:38.238179: step: 290/470, loss: 0.0060600400902330875 2023-01-22 21:36:38.878747: step: 292/470, loss: 0.043014369904994965 2023-01-22 21:36:39.668189: step: 294/470, loss: 0.0011471402831375599 2023-01-22 21:36:40.393582: step: 296/470, loss: 0.005423126742243767 2023-01-22 21:36:41.067475: step: 298/470, loss: 0.002312835305929184 2023-01-22 21:36:41.880536: step: 300/470, loss: 3.738845043699257e-05 2023-01-22 21:36:42.635567: step: 302/470, loss: 0.0018065288895741105 2023-01-22 21:36:43.381123: step: 304/470, loss: 0.0027574519626796246 2023-01-22 21:36:44.112015: step: 306/470, loss: 0.01440048310905695 2023-01-22 21:36:44.756498: step: 308/470, loss: 0.0008997022523544729 2023-01-22 21:36:45.403225: step: 310/470, loss: 6.702099199173972e-05 2023-01-22 21:36:46.202360: step: 312/470, loss: 0.0026232399977743626 2023-01-22 21:36:46.973763: step: 314/470, loss: 0.024350032210350037 2023-01-22 21:36:47.722797: step: 316/470, loss: 0.001658335910178721 2023-01-22 21:36:48.593389: step: 318/470, loss: 0.17800642549991608 2023-01-22 21:36:49.447000: step: 320/470, loss: 0.007545904256403446 2023-01-22 21:36:50.152143: step: 322/470, loss: 0.005511886440217495 2023-01-22 21:36:50.914434: step: 324/470, loss: 0.00106968788895756 2023-01-22 21:36:51.685494: step: 326/470, loss: 0.009400931186974049 2023-01-22 21:36:52.477661: step: 328/470, loss: 0.09563762694597244 2023-01-22 21:36:53.313132: step: 330/470, loss: 0.032428767532110214 2023-01-22 21:36:54.009232: step: 332/470, loss: 0.0007735613035038114 2023-01-22 21:36:54.735077: step: 334/470, loss: 0.0012304666452109814 2023-01-22 21:36:55.461185: step: 336/470, loss: 0.009064443409442902 2023-01-22 21:36:56.179994: step: 338/470, loss: 0.013549595139920712 2023-01-22 21:36:56.822340: step: 340/470, loss: 0.005491095595061779 2023-01-22 21:36:57.567850: step: 342/470, loss: 0.05642802268266678 2023-01-22 21:36:58.332883: step: 344/470, loss: 0.0018173677381128073 2023-01-22 21:36:59.109543: step: 346/470, loss: 5.139792442321777 2023-01-22 21:36:59.834588: step: 348/470, loss: 0.02769005112349987 2023-01-22 21:37:00.534792: step: 350/470, loss: 0.000412216002587229 2023-01-22 21:37:01.245872: step: 352/470, loss: 0.003857748582959175 2023-01-22 21:37:01.942296: step: 354/470, loss: 2.7695212338585407e-05 2023-01-22 21:37:02.654367: step: 356/470, loss: 0.018569406121969223 2023-01-22 21:37:03.305438: step: 358/470, loss: 0.012955770827829838 2023-01-22 21:37:04.086923: step: 360/470, loss: 0.010661961510777473 2023-01-22 21:37:04.785937: step: 362/470, loss: 0.00027638301253318787 2023-01-22 21:37:05.529080: step: 364/470, loss: 0.007698203437030315 2023-01-22 21:37:06.313176: step: 366/470, loss: 0.04173200950026512 2023-01-22 21:37:07.051787: step: 368/470, loss: 0.024436986073851585 2023-01-22 21:37:07.812409: step: 370/470, loss: 0.01597761921584606 2023-01-22 21:37:08.541433: step: 372/470, loss: 0.0023119584657251835 2023-01-22 21:37:09.277278: step: 374/470, loss: 0.014497130177915096 2023-01-22 21:37:10.018964: step: 376/470, loss: 0.050365082919597626 2023-01-22 21:37:10.793394: step: 378/470, loss: 0.0003797081299126148 2023-01-22 21:37:11.463686: step: 380/470, loss: 0.008779437281191349 2023-01-22 21:37:12.248435: step: 382/470, loss: 0.0028053114656358957 2023-01-22 21:37:12.937074: step: 384/470, loss: 0.03160949423909187 2023-01-22 21:37:13.611209: step: 386/470, loss: 0.0006880006403662264 2023-01-22 21:37:14.373507: step: 388/470, loss: 0.04208315163850784 2023-01-22 21:37:15.069635: step: 390/470, loss: 0.048570651561021805 2023-01-22 21:37:15.747189: step: 392/470, loss: 0.00653655594214797 2023-01-22 21:37:16.532827: step: 394/470, loss: 0.007898114621639252 2023-01-22 21:37:17.185844: step: 396/470, loss: 0.004265481140464544 2023-01-22 21:37:17.892069: step: 398/470, loss: 0.01859181746840477 2023-01-22 21:37:18.628850: step: 400/470, loss: 0.08174117654561996 2023-01-22 21:37:19.387180: step: 402/470, loss: 0.0019423263147473335 2023-01-22 21:37:20.196874: step: 404/470, loss: 0.8433279991149902 2023-01-22 21:37:20.908798: step: 406/470, loss: 0.01536853052675724 2023-01-22 21:37:21.665645: step: 408/470, loss: 0.0003358535177540034 2023-01-22 21:37:22.520146: step: 410/470, loss: 0.0031478151213377714 2023-01-22 21:37:23.246231: step: 412/470, loss: 0.013405255042016506 2023-01-22 21:37:24.043347: step: 414/470, loss: 0.016643131151795387 2023-01-22 21:37:24.900540: step: 416/470, loss: 0.02678093872964382 2023-01-22 21:37:25.589453: step: 418/470, loss: 0.020490285009145737 2023-01-22 21:37:26.325602: step: 420/470, loss: 0.0006113100098446012 2023-01-22 21:37:27.055857: step: 422/470, loss: 0.002850792370736599 2023-01-22 21:37:27.724469: step: 424/470, loss: 0.006809963844716549 2023-01-22 21:37:28.417447: step: 426/470, loss: 0.004680205602198839 2023-01-22 21:37:29.181016: step: 428/470, loss: 0.05235651135444641 2023-01-22 21:37:29.799218: step: 430/470, loss: 0.0019032071577385068 2023-01-22 21:37:30.557514: step: 432/470, loss: 0.002714117057621479 2023-01-22 21:37:31.241940: step: 434/470, loss: 4.397636803332716e-05 2023-01-22 21:37:31.977426: step: 436/470, loss: 0.004602306988090277 2023-01-22 21:37:32.686255: step: 438/470, loss: 0.002536438638344407 2023-01-22 21:37:33.313804: step: 440/470, loss: 0.0079796202480793 2023-01-22 21:37:34.014420: step: 442/470, loss: 0.0008243197808042169 2023-01-22 21:37:34.764074: step: 444/470, loss: 0.5589166283607483 2023-01-22 21:37:35.488149: step: 446/470, loss: 0.0018179480684921145 2023-01-22 21:37:36.139569: step: 448/470, loss: 0.0001185851579066366 2023-01-22 21:37:36.893761: step: 450/470, loss: 0.0011897934600710869 2023-01-22 21:37:37.655699: step: 452/470, loss: 0.00027267372934147716 2023-01-22 21:37:38.407612: step: 454/470, loss: 0.0025439695455133915 2023-01-22 21:37:39.081655: step: 456/470, loss: 0.002703068545088172 2023-01-22 21:37:39.829190: step: 458/470, loss: 0.005375206470489502 2023-01-22 21:37:40.501233: step: 460/470, loss: 0.19524531066417694 2023-01-22 21:37:41.183204: step: 462/470, loss: 0.010033880360424519 2023-01-22 21:37:41.946892: step: 464/470, loss: 0.012586308643221855 2023-01-22 21:37:42.686517: step: 466/470, loss: 0.0472489595413208 2023-01-22 21:37:43.364502: step: 468/470, loss: 0.0008478930103592575 2023-01-22 21:37:44.103913: step: 470/470, loss: 0.11491312831640244 2023-01-22 21:37:44.862418: step: 472/470, loss: 0.009833801537752151 2023-01-22 21:37:45.604464: step: 474/470, loss: 0.0006098474841564894 2023-01-22 21:37:46.301147: step: 476/470, loss: 0.008477217517793179 2023-01-22 21:37:47.086850: step: 478/470, loss: 0.01668260060250759 2023-01-22 21:37:47.769513: step: 480/470, loss: 0.0023414173629134893 2023-01-22 21:37:48.497214: step: 482/470, loss: 0.00018246278341393918 2023-01-22 21:37:49.183044: step: 484/470, loss: 0.12619908154010773 2023-01-22 21:37:49.929833: step: 486/470, loss: 0.2414586991071701 2023-01-22 21:37:50.549266: step: 488/470, loss: 0.010544035583734512 2023-01-22 21:37:51.225960: step: 490/470, loss: 0.005296720191836357 2023-01-22 21:37:51.915997: step: 492/470, loss: 0.00037122564390301704 2023-01-22 21:37:52.658963: step: 494/470, loss: 0.03093797340989113 2023-01-22 21:37:53.418611: step: 496/470, loss: 0.008792520500719547 2023-01-22 21:37:54.166240: step: 498/470, loss: 0.00578495254740119 2023-01-22 21:37:54.881097: step: 500/470, loss: 0.001827099360525608 2023-01-22 21:37:55.642261: step: 502/470, loss: 0.0032651459332555532 2023-01-22 21:37:56.329963: step: 504/470, loss: 0.006231415551155806 2023-01-22 21:37:57.070819: step: 506/470, loss: 0.007668264210224152 2023-01-22 21:37:57.809932: step: 508/470, loss: 0.008598407730460167 2023-01-22 21:37:58.525919: step: 510/470, loss: 0.00026979786343872547 2023-01-22 21:37:59.235392: step: 512/470, loss: 0.0024315055925399065 2023-01-22 21:37:59.921697: step: 514/470, loss: 0.004124908242374659 2023-01-22 21:38:00.629427: step: 516/470, loss: 0.016314754262566566 2023-01-22 21:38:01.331220: step: 518/470, loss: 0.0018118376610800624 2023-01-22 21:38:02.154690: step: 520/470, loss: 0.015621883794665337 2023-01-22 21:38:02.910984: step: 522/470, loss: 0.9467869400978088 2023-01-22 21:38:03.721201: step: 524/470, loss: 0.0005159341963008046 2023-01-22 21:38:04.393742: step: 526/470, loss: 0.0463469959795475 2023-01-22 21:38:05.085688: step: 528/470, loss: 0.006032614037394524 2023-01-22 21:38:05.839826: step: 530/470, loss: 0.0176707673817873 2023-01-22 21:38:06.559420: step: 532/470, loss: 0.0011732151033356786 2023-01-22 21:38:07.280076: step: 534/470, loss: 0.00020226027118042111 2023-01-22 21:38:08.024870: step: 536/470, loss: 0.021612750366330147 2023-01-22 21:38:08.741545: step: 538/470, loss: 0.00013169506564736366 2023-01-22 21:38:09.475228: step: 540/470, loss: 0.003677097614854574 2023-01-22 21:38:10.170761: step: 542/470, loss: 0.0020855306647717953 2023-01-22 21:38:10.874184: step: 544/470, loss: 0.0180249884724617 2023-01-22 21:38:11.570442: step: 546/470, loss: 0.40944904088974 2023-01-22 21:38:12.290017: step: 548/470, loss: 0.03017675317823887 2023-01-22 21:38:13.013487: step: 550/470, loss: 0.0034764918964356184 2023-01-22 21:38:13.760350: step: 552/470, loss: 0.034397684037685394 2023-01-22 21:38:14.533602: step: 554/470, loss: 0.03786651790142059 2023-01-22 21:38:15.235283: step: 556/470, loss: 5.748906914959662e-05 2023-01-22 21:38:15.955653: step: 558/470, loss: 0.010671430267393589 2023-01-22 21:38:16.564128: step: 560/470, loss: 0.004713733680546284 2023-01-22 21:38:17.362470: step: 562/470, loss: 0.009778480976819992 2023-01-22 21:38:18.134484: step: 564/470, loss: 0.030256683006882668 2023-01-22 21:38:18.904941: step: 566/470, loss: 0.16448631882667542 2023-01-22 21:38:19.654026: step: 568/470, loss: 0.031042061746120453 2023-01-22 21:38:20.305437: step: 570/470, loss: 0.0012754781637340784 2023-01-22 21:38:21.026835: step: 572/470, loss: 4.709012864623219e-05 2023-01-22 21:38:21.786075: step: 574/470, loss: 0.0009633488371036947 2023-01-22 21:38:22.486477: step: 576/470, loss: 0.004429661203175783 2023-01-22 21:38:23.241292: step: 578/470, loss: 0.002976109506562352 2023-01-22 21:38:23.947234: step: 580/470, loss: 7.336642738664523e-05 2023-01-22 21:38:24.707855: step: 582/470, loss: 0.002431001979857683 2023-01-22 21:38:25.466462: step: 584/470, loss: 0.0037843480240553617 2023-01-22 21:38:26.137712: step: 586/470, loss: 0.010521448217332363 2023-01-22 21:38:26.899637: step: 588/470, loss: 0.025577588006854057 2023-01-22 21:38:27.692211: step: 590/470, loss: 1.7032889445545152e-05 2023-01-22 21:38:28.371751: step: 592/470, loss: 0.009300955571234226 2023-01-22 21:38:29.039809: step: 594/470, loss: 0.0038660001009702682 2023-01-22 21:38:29.752869: step: 596/470, loss: 0.015712929889559746 2023-01-22 21:38:30.475972: step: 598/470, loss: 0.0012648508418351412 2023-01-22 21:38:31.107607: step: 600/470, loss: 1.5697081835241988e-05 2023-01-22 21:38:31.900882: step: 602/470, loss: 0.004670980852097273 2023-01-22 21:38:32.646653: step: 604/470, loss: 0.0012620283523574471 2023-01-22 21:38:33.526027: step: 606/470, loss: 0.03995297849178314 2023-01-22 21:38:34.263423: step: 608/470, loss: 0.0038986399304121733 2023-01-22 21:38:34.923594: step: 610/470, loss: 0.00033180887112393975 2023-01-22 21:38:35.676374: step: 612/470, loss: 0.2208339273929596 2023-01-22 21:38:36.377909: step: 614/470, loss: 0.005182696972042322 2023-01-22 21:38:37.063298: step: 616/470, loss: 0.05351833254098892 2023-01-22 21:38:37.741871: step: 618/470, loss: 0.002840483095496893 2023-01-22 21:38:38.496703: step: 620/470, loss: 0.02025892585515976 2023-01-22 21:38:39.206000: step: 622/470, loss: 0.0013204108690842986 2023-01-22 21:38:39.969579: step: 624/470, loss: 0.013018092140555382 2023-01-22 21:38:40.687683: step: 626/470, loss: 0.012406433932483196 2023-01-22 21:38:41.445304: step: 628/470, loss: 0.05976390466094017 2023-01-22 21:38:42.179946: step: 630/470, loss: 0.026684027165174484 2023-01-22 21:38:42.884024: step: 632/470, loss: 0.02948051132261753 2023-01-22 21:38:43.609558: step: 634/470, loss: 0.007042787969112396 2023-01-22 21:38:44.290845: step: 636/470, loss: 0.0019455266883596778 2023-01-22 21:38:45.063134: step: 638/470, loss: 0.0003431853256188333 2023-01-22 21:38:45.805781: step: 640/470, loss: 7.0864763983991e-05 2023-01-22 21:38:46.559425: step: 642/470, loss: 0.0003391270583961159 2023-01-22 21:38:47.297566: step: 644/470, loss: 4.537831409834325e-05 2023-01-22 21:38:47.970027: step: 646/470, loss: 0.0003427540068514645 2023-01-22 21:38:48.771884: step: 648/470, loss: 0.0021758859511464834 2023-01-22 21:38:49.500035: step: 650/470, loss: 0.005326189566403627 2023-01-22 21:38:50.285044: step: 652/470, loss: 0.0031783096492290497 2023-01-22 21:38:51.011565: step: 654/470, loss: 0.008024906739592552 2023-01-22 21:38:51.676481: step: 656/470, loss: 0.02158765122294426 2023-01-22 21:38:52.376203: step: 658/470, loss: 0.002388355555012822 2023-01-22 21:38:53.069087: step: 660/470, loss: 0.06495095044374466 2023-01-22 21:38:53.796373: step: 662/470, loss: 8.486651495331898e-05 2023-01-22 21:38:54.542975: step: 664/470, loss: 0.0028081880882382393 2023-01-22 21:38:55.262139: step: 666/470, loss: 0.0018558679148554802 2023-01-22 21:38:56.094423: step: 668/470, loss: 0.06942654401063919 2023-01-22 21:38:56.844508: step: 670/470, loss: 0.07627329230308533 2023-01-22 21:38:57.548750: step: 672/470, loss: 0.001862462260760367 2023-01-22 21:38:58.272865: step: 674/470, loss: 0.05258966609835625 2023-01-22 21:38:59.001582: step: 676/470, loss: 0.0007247717585414648 2023-01-22 21:38:59.672326: step: 678/470, loss: 0.018573222681879997 2023-01-22 21:39:00.453808: step: 680/470, loss: 0.0053161317482590675 2023-01-22 21:39:01.162935: step: 682/470, loss: 0.00019386372878216207 2023-01-22 21:39:01.835433: step: 684/470, loss: 0.13742390275001526 2023-01-22 21:39:02.610175: step: 686/470, loss: 0.0006456512492150068 2023-01-22 21:39:03.389367: step: 688/470, loss: 0.18807780742645264 2023-01-22 21:39:04.169085: step: 690/470, loss: 0.028313491493463516 2023-01-22 21:39:04.880983: step: 692/470, loss: 0.0024669389240443707 2023-01-22 21:39:05.648651: step: 694/470, loss: 0.03522627428174019 2023-01-22 21:39:06.384840: step: 696/470, loss: 0.010675939731299877 2023-01-22 21:39:07.050084: step: 698/470, loss: 0.0003027912462130189 2023-01-22 21:39:07.754183: step: 700/470, loss: 0.002517101587727666 2023-01-22 21:39:08.377939: step: 702/470, loss: 0.014464635401964188 2023-01-22 21:39:09.027735: step: 704/470, loss: 0.0031239360105246305 2023-01-22 21:39:09.734586: step: 706/470, loss: 0.00918420311063528 2023-01-22 21:39:10.445865: step: 708/470, loss: 0.001296902191825211 2023-01-22 21:39:11.176380: step: 710/470, loss: 0.022592980414628983 2023-01-22 21:39:11.932844: step: 712/470, loss: 0.00858447514474392 2023-01-22 21:39:12.642065: step: 714/470, loss: 0.0009968002559617162 2023-01-22 21:39:13.483077: step: 716/470, loss: 0.01551523432135582 2023-01-22 21:39:14.191809: step: 718/470, loss: 0.0007260640268214047 2023-01-22 21:39:14.937381: step: 720/470, loss: 0.35326698422431946 2023-01-22 21:39:15.677421: step: 722/470, loss: 0.01254792045801878 2023-01-22 21:39:16.352819: step: 724/470, loss: 0.0011204167967662215 2023-01-22 21:39:17.141440: step: 726/470, loss: 0.009652595967054367 2023-01-22 21:39:17.920284: step: 728/470, loss: 0.0007612319895997643 2023-01-22 21:39:18.659462: step: 730/470, loss: 0.08440537005662918 2023-01-22 21:39:19.367760: step: 732/470, loss: 0.0013828004011884332 2023-01-22 21:39:20.075188: step: 734/470, loss: 0.03739370405673981 2023-01-22 21:39:20.878620: step: 736/470, loss: 0.009630827233195305 2023-01-22 21:39:21.584969: step: 738/470, loss: 0.002095034345984459 2023-01-22 21:39:22.409020: step: 740/470, loss: 0.010538318194448948 2023-01-22 21:39:23.154147: step: 742/470, loss: 0.014126413501799107 2023-01-22 21:39:23.776620: step: 744/470, loss: 4.5405220589600503e-05 2023-01-22 21:39:24.562200: step: 746/470, loss: 0.9865442514419556 2023-01-22 21:39:25.338171: step: 748/470, loss: 0.7587553262710571 2023-01-22 21:39:26.009955: step: 750/470, loss: 0.004690216854214668 2023-01-22 21:39:26.751303: step: 752/470, loss: 0.016793884336948395 2023-01-22 21:39:27.468679: step: 754/470, loss: 0.003269862150773406 2023-01-22 21:39:28.190603: step: 756/470, loss: 0.018169786781072617 2023-01-22 21:39:28.913530: step: 758/470, loss: 0.02258247695863247 2023-01-22 21:39:29.595044: step: 760/470, loss: 5.565112587646581e-05 2023-01-22 21:39:30.264545: step: 762/470, loss: 0.009303715080022812 2023-01-22 21:39:31.049821: step: 764/470, loss: 0.017047366127371788 2023-01-22 21:39:31.795574: step: 766/470, loss: 0.015494248829782009 2023-01-22 21:39:32.526141: step: 768/470, loss: 0.026896020397543907 2023-01-22 21:39:33.241673: step: 770/470, loss: 0.005921604577451944 2023-01-22 21:39:34.013723: step: 772/470, loss: 0.001984368311241269 2023-01-22 21:39:34.698529: step: 774/470, loss: 0.002783828182145953 2023-01-22 21:39:35.439049: step: 776/470, loss: 0.019098268821835518 2023-01-22 21:39:36.273645: step: 778/470, loss: 0.005505918525159359 2023-01-22 21:39:37.018975: step: 780/470, loss: 0.24571584165096283 2023-01-22 21:39:37.700013: step: 782/470, loss: 0.0006462166784331203 2023-01-22 21:39:38.380858: step: 784/470, loss: 0.0012267071288079023 2023-01-22 21:39:39.062019: step: 786/470, loss: 0.004911855328828096 2023-01-22 21:39:39.781193: step: 788/470, loss: 0.011736730113625526 2023-01-22 21:39:40.497709: step: 790/470, loss: 0.00543177267536521 2023-01-22 21:39:41.417266: step: 792/470, loss: 0.024822987616062164 2023-01-22 21:39:42.194972: step: 794/470, loss: 0.011290385387837887 2023-01-22 21:39:42.947727: step: 796/470, loss: 0.0001863948127720505 2023-01-22 21:39:43.762423: step: 798/470, loss: 0.0004333317338023335 2023-01-22 21:39:44.442948: step: 800/470, loss: 0.0003069574595429003 2023-01-22 21:39:45.177864: step: 802/470, loss: 0.001023111748509109 2023-01-22 21:39:45.944885: step: 804/470, loss: 0.002765122102573514 2023-01-22 21:39:46.642760: step: 806/470, loss: 0.0706728845834732 2023-01-22 21:39:47.474559: step: 808/470, loss: 0.001774181961081922 2023-01-22 21:39:48.223958: step: 810/470, loss: 0.0005504356813617051 2023-01-22 21:39:49.109592: step: 812/470, loss: 0.024189863353967667 2023-01-22 21:39:49.861157: step: 814/470, loss: 0.019002093002200127 2023-01-22 21:39:50.608611: step: 816/470, loss: 0.0006984842475503683 2023-01-22 21:39:51.299432: step: 818/470, loss: 0.0062248483300209045 2023-01-22 21:39:52.059018: step: 820/470, loss: 0.0006082578329369426 2023-01-22 21:39:52.859233: step: 822/470, loss: 0.0046651409938931465 2023-01-22 21:39:53.555248: step: 824/470, loss: 0.0008501000702381134 2023-01-22 21:39:54.215366: step: 826/470, loss: 0.008006863296031952 2023-01-22 21:39:54.943918: step: 828/470, loss: 0.0064245061948895454 2023-01-22 21:39:55.588693: step: 830/470, loss: 0.0001547907741041854 2023-01-22 21:39:56.252875: step: 832/470, loss: 0.0005969268968328834 2023-01-22 21:39:56.995557: step: 834/470, loss: 0.04945269599556923 2023-01-22 21:39:57.811674: step: 836/470, loss: 0.0009366283193230629 2023-01-22 21:39:58.581419: step: 838/470, loss: 0.0020159182604402304 2023-01-22 21:39:59.299594: step: 840/470, loss: 0.14137539267539978 2023-01-22 21:40:00.073648: step: 842/470, loss: 0.012791264802217484 2023-01-22 21:40:00.740371: step: 844/470, loss: 0.0020705685019493103 2023-01-22 21:40:01.529517: step: 846/470, loss: 0.006297953426837921 2023-01-22 21:40:02.317339: step: 848/470, loss: 0.01519366167485714 2023-01-22 21:40:03.117949: step: 850/470, loss: 0.001254754257388413 2023-01-22 21:40:03.945346: step: 852/470, loss: 0.00023478205548599362 2023-01-22 21:40:04.672736: step: 854/470, loss: 0.004564228001981974 2023-01-22 21:40:05.359614: step: 856/470, loss: 0.0048842052929103374 2023-01-22 21:40:06.045064: step: 858/470, loss: 0.020877385511994362 2023-01-22 21:40:06.801618: step: 860/470, loss: 0.18510141968727112 2023-01-22 21:40:07.573213: step: 862/470, loss: 0.011671909131109715 2023-01-22 21:40:08.318217: step: 864/470, loss: 0.02993558533489704 2023-01-22 21:40:08.985802: step: 866/470, loss: 0.0056413402780890465 2023-01-22 21:40:09.707602: step: 868/470, loss: 0.009789801202714443 2023-01-22 21:40:10.483198: step: 870/470, loss: 0.029731469228863716 2023-01-22 21:40:11.227259: step: 872/470, loss: 0.01826680265367031 2023-01-22 21:40:11.934067: step: 874/470, loss: 0.0003056666173506528 2023-01-22 21:40:12.714999: step: 876/470, loss: 0.007411897648125887 2023-01-22 21:40:13.586675: step: 878/470, loss: 0.00305022019892931 2023-01-22 21:40:14.272579: step: 880/470, loss: 5.6819328165147454e-05 2023-01-22 21:40:15.022273: step: 882/470, loss: 0.0015559961320832372 2023-01-22 21:40:15.796239: step: 884/470, loss: 0.03191646188497543 2023-01-22 21:40:16.631008: step: 886/470, loss: 0.00044945150148123503 2023-01-22 21:40:17.328045: step: 888/470, loss: 0.00564240338280797 2023-01-22 21:40:18.056013: step: 890/470, loss: 0.0024683803785592318 2023-01-22 21:40:18.794839: step: 892/470, loss: 0.025083282962441444 2023-01-22 21:40:19.524205: step: 894/470, loss: 0.007105534430593252 2023-01-22 21:40:20.200725: step: 896/470, loss: 0.0048616016283631325 2023-01-22 21:40:20.968086: step: 898/470, loss: 0.03480706736445427 2023-01-22 21:40:21.755136: step: 900/470, loss: 0.0010672089410945773 2023-01-22 21:40:22.553712: step: 902/470, loss: 0.006357176695019007 2023-01-22 21:40:23.296674: step: 904/470, loss: 0.002066312823444605 2023-01-22 21:40:23.997007: step: 906/470, loss: 9.631262946641073e-05 2023-01-22 21:40:24.712498: step: 908/470, loss: 0.001976320054382086 2023-01-22 21:40:25.641564: step: 910/470, loss: 0.00038530846359208226 2023-01-22 21:40:26.395724: step: 912/470, loss: 0.0007559002260677516 2023-01-22 21:40:27.128768: step: 914/470, loss: 0.004526065196841955 2023-01-22 21:40:27.898717: step: 916/470, loss: 0.0027980429586023092 2023-01-22 21:40:28.663583: step: 918/470, loss: 0.001787687069736421 2023-01-22 21:40:29.389382: step: 920/470, loss: 0.0017783924704417586 2023-01-22 21:40:30.119951: step: 922/470, loss: 0.010765299201011658 2023-01-22 21:40:30.996712: step: 924/470, loss: 0.005860594101250172 2023-01-22 21:40:31.753640: step: 926/470, loss: 0.009120491333305836 2023-01-22 21:40:32.576263: step: 928/470, loss: 0.051197804510593414 2023-01-22 21:40:33.486117: step: 930/470, loss: 0.1064954325556755 2023-01-22 21:40:34.208495: step: 932/470, loss: 0.008499711751937866 2023-01-22 21:40:34.922346: step: 934/470, loss: 0.03420386090874672 2023-01-22 21:40:35.697536: step: 936/470, loss: 0.001930433209054172 2023-01-22 21:40:36.434320: step: 938/470, loss: 0.06531298905611038 2023-01-22 21:40:37.106314: step: 940/470, loss: 0.048529766499996185 2023-01-22 21:40:37.696701: step: 942/470, loss: 0.0032194419763982296 ================================================== Loss: 0.043 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28993341570565623, 'r': 0.3339460784313726, 'f1': 0.31038727219282775}, 'combined': 0.228706411089452, 'epoch': 36} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3398487305486143, 'r': 0.36958549447161804, 'f1': 0.35409388691891547}, 'combined': 0.24663255805297596, 'epoch': 36} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2865501638698008, 'r': 0.3365741013954587, 'f1': 0.30955419098674813}, 'combined': 0.22809256177970913, 'epoch': 36} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3422391101402614, 'r': 0.3705396519403215, 'f1': 0.35582755126309723}, 'combined': 0.24784008545688366, 'epoch': 36} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26583015267175575, 'r': 0.330396110056926, 'f1': 0.2946171742808799}, 'combined': 0.21708633894380625, 'epoch': 36} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.33340920520038964, 'r': 0.3850235148516038, 'f1': 0.35736229847895395}, 'combined': 0.24890906361718187, 'epoch': 36} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20550351288056204, 'r': 0.3581632653061224, 'f1': 0.2611607142857143}, 'combined': 0.17410714285714285, 'epoch': 36} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23469387755102042, 'r': 0.5, 'f1': 0.3194444444444444}, 'combined': 0.1597222222222222, 'epoch': 36} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40625, 'r': 0.33620689655172414, 'f1': 0.36792452830188677}, 'combined': 0.2452830188679245, 'epoch': 36} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 37 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 21:43:15.133072: step: 2/470, loss: 0.0016411297256127 2023-01-22 21:43:15.978913: step: 4/470, loss: 0.016578450798988342 2023-01-22 21:43:16.725884: step: 6/470, loss: 0.00037532756687141955 2023-01-22 21:43:17.430641: step: 8/470, loss: 0.0027909998316317797 2023-01-22 21:43:18.137218: step: 10/470, loss: 0.08526574820280075 2023-01-22 21:43:18.849801: step: 12/470, loss: 0.00015779869863763452 2023-01-22 21:43:19.565718: step: 14/470, loss: 0.0017313596326857805 2023-01-22 21:43:20.309257: step: 16/470, loss: 0.00044223369332030416 2023-01-22 21:43:21.060148: step: 18/470, loss: 0.06428509205579758 2023-01-22 21:43:21.849069: step: 20/470, loss: 0.00421540392562747 2023-01-22 21:43:22.568219: step: 22/470, loss: 2.9487378924386576e-05 2023-01-22 21:43:23.389818: step: 24/470, loss: 0.0027779489755630493 2023-01-22 21:43:24.063962: step: 26/470, loss: 0.00016546357073821127 2023-01-22 21:43:24.724674: step: 28/470, loss: 0.006498188711702824 2023-01-22 21:43:25.440365: step: 30/470, loss: 0.0014709843089804053 2023-01-22 21:43:26.213039: step: 32/470, loss: 0.0002651438699103892 2023-01-22 21:43:26.913838: step: 34/470, loss: 0.7554433941841125 2023-01-22 21:43:27.588387: step: 36/470, loss: 0.00971017125993967 2023-01-22 21:43:28.320670: step: 38/470, loss: 0.002934138523414731 2023-01-22 21:43:28.992011: step: 40/470, loss: 0.00081524538109079 2023-01-22 21:43:29.636114: step: 42/470, loss: 0.003965584561228752 2023-01-22 21:43:30.378601: step: 44/470, loss: 0.26607316732406616 2023-01-22 21:43:31.110733: step: 46/470, loss: 0.37619858980178833 2023-01-22 21:43:31.923375: step: 48/470, loss: 0.20665885508060455 2023-01-22 21:43:32.688608: step: 50/470, loss: 0.0007290860521607101 2023-01-22 21:43:33.440657: step: 52/470, loss: 0.032187577337026596 2023-01-22 21:43:34.141051: step: 54/470, loss: 0.006359893828630447 2023-01-22 21:43:34.889511: step: 56/470, loss: 0.000407334475312382 2023-01-22 21:43:35.658860: step: 58/470, loss: 0.004570923279970884 2023-01-22 21:43:36.364690: step: 60/470, loss: 0.02728239819407463 2023-01-22 21:43:37.204456: step: 62/470, loss: 0.17052745819091797 2023-01-22 21:43:37.935717: step: 64/470, loss: 0.007626243866980076 2023-01-22 21:43:38.646567: step: 66/470, loss: 0.0001989319862332195 2023-01-22 21:43:39.397143: step: 68/470, loss: 0.011886470019817352 2023-01-22 21:43:40.065409: step: 70/470, loss: 0.0021775467321276665 2023-01-22 21:43:40.814799: step: 72/470, loss: 0.0016382178291678429 2023-01-22 21:43:41.601456: step: 74/470, loss: 0.005465012509375811 2023-01-22 21:43:42.395121: step: 76/470, loss: 0.004183304961770773 2023-01-22 21:43:43.114759: step: 78/470, loss: 0.029515385627746582 2023-01-22 21:43:43.873895: step: 80/470, loss: 0.0003540659963618964 2023-01-22 21:43:44.579268: step: 82/470, loss: 0.00043878774158656597 2023-01-22 21:43:45.348493: step: 84/470, loss: 0.0017511650221422315 2023-01-22 21:43:46.068595: step: 86/470, loss: 0.0043872627429664135 2023-01-22 21:43:46.838712: step: 88/470, loss: 0.4596310257911682 2023-01-22 21:43:47.542455: step: 90/470, loss: 0.006851941347122192 2023-01-22 21:43:48.258141: step: 92/470, loss: 0.015803921967744827 2023-01-22 21:43:49.015410: step: 94/470, loss: 0.014629622921347618 2023-01-22 21:43:49.861854: step: 96/470, loss: 0.01233682967722416 2023-01-22 21:43:50.654022: step: 98/470, loss: 0.0026958484668284655 2023-01-22 21:43:51.366803: step: 100/470, loss: 0.005636794026941061 2023-01-22 21:43:52.081117: step: 102/470, loss: 0.00012584813521243632 2023-01-22 21:43:52.858836: step: 104/470, loss: 0.006042279303073883 2023-01-22 21:43:53.514666: step: 106/470, loss: 5.00088935950771e-05 2023-01-22 21:43:54.221418: step: 108/470, loss: 0.0024630522821098566 2023-01-22 21:43:54.982515: step: 110/470, loss: 0.045400720089673996 2023-01-22 21:43:55.701905: step: 112/470, loss: 0.006315591745078564 2023-01-22 21:43:56.459744: step: 114/470, loss: 3.898659269907512e-05 2023-01-22 21:43:57.170533: step: 116/470, loss: 0.0008913876954466105 2023-01-22 21:43:57.919173: step: 118/470, loss: 0.007831503637135029 2023-01-22 21:43:58.713220: step: 120/470, loss: 0.0005501203122548759 2023-01-22 21:43:59.392128: step: 122/470, loss: 0.06952209770679474 2023-01-22 21:44:00.168535: step: 124/470, loss: 0.002455966779962182 2023-01-22 21:44:00.907309: step: 126/470, loss: 0.018089069053530693 2023-01-22 21:44:01.649207: step: 128/470, loss: 0.0002797857450786978 2023-01-22 21:44:02.376294: step: 130/470, loss: 0.0014697941951453686 2023-01-22 21:44:03.150900: step: 132/470, loss: 0.027464300394058228 2023-01-22 21:44:03.908207: step: 134/470, loss: 0.0019714029040187597 2023-01-22 21:44:04.644689: step: 136/470, loss: 0.00593600096181035 2023-01-22 21:44:05.361757: step: 138/470, loss: 0.008636260405182838 2023-01-22 21:44:06.104468: step: 140/470, loss: 0.021064892411231995 2023-01-22 21:44:06.834812: step: 142/470, loss: 2.5779121642699465e-05 2023-01-22 21:44:07.592381: step: 144/470, loss: 1.2797374438378029e-05 2023-01-22 21:44:08.370005: step: 146/470, loss: 0.05897778272628784 2023-01-22 21:44:09.208449: step: 148/470, loss: 0.016995619982481003 2023-01-22 21:44:09.927718: step: 150/470, loss: 0.0013439225731417537 2023-01-22 21:44:10.630963: step: 152/470, loss: 0.056216444820165634 2023-01-22 21:44:11.310297: step: 154/470, loss: 0.0074424357153475285 2023-01-22 21:44:12.053721: step: 156/470, loss: 0.2076435685157776 2023-01-22 21:44:12.772789: step: 158/470, loss: 0.0012701174709945917 2023-01-22 21:44:13.504585: step: 160/470, loss: 0.00368306296877563 2023-01-22 21:44:14.343809: step: 162/470, loss: 0.05626552179455757 2023-01-22 21:44:15.068624: step: 164/470, loss: 0.01470974925905466 2023-01-22 21:44:15.739367: step: 166/470, loss: 0.1756785660982132 2023-01-22 21:44:16.424957: step: 168/470, loss: 0.0030123700853437185 2023-01-22 21:44:17.141012: step: 170/470, loss: 0.002067750785499811 2023-01-22 21:44:17.897785: step: 172/470, loss: 3.085620846832171e-05 2023-01-22 21:44:18.707095: step: 174/470, loss: 0.014414145611226559 2023-01-22 21:44:19.468182: step: 176/470, loss: 0.03165549039840698 2023-01-22 21:44:20.227190: step: 178/470, loss: 0.014181282371282578 2023-01-22 21:44:20.928340: step: 180/470, loss: 1.0798722505569458 2023-01-22 21:44:21.585433: step: 182/470, loss: 0.0001561841054353863 2023-01-22 21:44:22.256834: step: 184/470, loss: 0.015310071408748627 2023-01-22 21:44:23.245332: step: 186/470, loss: 0.022418607026338577 2023-01-22 21:44:23.987651: step: 188/470, loss: 0.07125722616910934 2023-01-22 21:44:24.758757: step: 190/470, loss: 0.013494039885699749 2023-01-22 21:44:25.493599: step: 192/470, loss: 0.015244124457240105 2023-01-22 21:44:26.190996: step: 194/470, loss: 0.01948845013976097 2023-01-22 21:44:26.888865: step: 196/470, loss: 0.00848582573235035 2023-01-22 21:44:27.617819: step: 198/470, loss: 0.049408018589019775 2023-01-22 21:44:28.347977: step: 200/470, loss: 0.003815028350800276 2023-01-22 21:44:29.089449: step: 202/470, loss: 0.05077585205435753 2023-01-22 21:44:29.911654: step: 204/470, loss: 0.0009153272258117795 2023-01-22 21:44:30.683667: step: 206/470, loss: 0.00020322480122558773 2023-01-22 21:44:31.463363: step: 208/470, loss: 0.00020252318063285202 2023-01-22 21:44:32.235365: step: 210/470, loss: 0.0242539644241333 2023-01-22 21:44:33.008172: step: 212/470, loss: 0.029064660891890526 2023-01-22 21:44:33.683146: step: 214/470, loss: 0.0016236408846452832 2023-01-22 21:44:34.404324: step: 216/470, loss: 0.12475449591875076 2023-01-22 21:44:35.040527: step: 218/470, loss: 0.0002371451264480129 2023-01-22 21:44:35.762168: step: 220/470, loss: 0.011168582364916801 2023-01-22 21:44:36.477697: step: 222/470, loss: 0.005518828984349966 2023-01-22 21:44:37.157890: step: 224/470, loss: 0.010224402882158756 2023-01-22 21:44:37.950632: step: 226/470, loss: 0.020215503871440887 2023-01-22 21:44:38.726418: step: 228/470, loss: 0.0007321978337131441 2023-01-22 21:44:39.434707: step: 230/470, loss: 2.0057850633747876e-05 2023-01-22 21:44:40.226801: step: 232/470, loss: 0.04708104953169823 2023-01-22 21:44:40.881253: step: 234/470, loss: 0.0007134419283829629 2023-01-22 21:44:41.593250: step: 236/470, loss: 0.7298058271408081 2023-01-22 21:44:42.254081: step: 238/470, loss: 0.007942304015159607 2023-01-22 21:44:42.979588: step: 240/470, loss: 0.00015898249694146216 2023-01-22 21:44:43.721521: step: 242/470, loss: 0.023740533739328384 2023-01-22 21:44:44.581737: step: 244/470, loss: 0.004855903331190348 2023-01-22 21:44:45.260756: step: 246/470, loss: 0.0001292641827603802 2023-01-22 21:44:46.033303: step: 248/470, loss: 0.0025803775060921907 2023-01-22 21:44:46.751403: step: 250/470, loss: 0.5248620510101318 2023-01-22 21:44:47.497437: step: 252/470, loss: 0.010610873810946941 2023-01-22 21:44:48.167270: step: 254/470, loss: 0.02230040729045868 2023-01-22 21:44:48.883634: step: 256/470, loss: 0.023776421323418617 2023-01-22 21:44:49.599887: step: 258/470, loss: 0.0019985968247056007 2023-01-22 21:44:50.385514: step: 260/470, loss: 0.07004007697105408 2023-01-22 21:44:51.182517: step: 262/470, loss: 0.0001779235026333481 2023-01-22 21:44:51.921352: step: 264/470, loss: 0.00566717516630888 2023-01-22 21:44:52.575603: step: 266/470, loss: 0.00029370139236561954 2023-01-22 21:44:53.314186: step: 268/470, loss: 0.0012920513981953263 2023-01-22 21:44:54.058712: step: 270/470, loss: 0.001267312210984528 2023-01-22 21:44:54.740756: step: 272/470, loss: 0.011984679847955704 2023-01-22 21:44:55.395434: step: 274/470, loss: 0.018051162362098694 2023-01-22 21:44:56.110867: step: 276/470, loss: 0.00023922794207464904 2023-01-22 21:44:56.859749: step: 278/470, loss: 0.016359608620405197 2023-01-22 21:44:57.547787: step: 280/470, loss: 0.011056328192353249 2023-01-22 21:44:58.268195: step: 282/470, loss: 0.01704174093902111 2023-01-22 21:44:58.975345: step: 284/470, loss: 0.0023452085442841053 2023-01-22 21:44:59.636022: step: 286/470, loss: 0.0006594705628231168 2023-01-22 21:45:00.411807: step: 288/470, loss: 0.00027242524083703756 2023-01-22 21:45:01.181366: step: 290/470, loss: 0.006094738841056824 2023-01-22 21:45:01.888713: step: 292/470, loss: 0.0011509230826050043 2023-01-22 21:45:02.665263: step: 294/470, loss: 0.008679484948515892 2023-01-22 21:45:03.434155: step: 296/470, loss: 0.027196824550628662 2023-01-22 21:45:04.091101: step: 298/470, loss: 3.1080591725185513e-05 2023-01-22 21:45:04.853779: step: 300/470, loss: 0.001202079583890736 2023-01-22 21:45:05.571462: step: 302/470, loss: 0.013225025497376919 2023-01-22 21:45:06.353997: step: 304/470, loss: 0.015826869755983353 2023-01-22 21:45:07.015597: step: 306/470, loss: 0.0002728473045863211 2023-01-22 21:45:07.811844: step: 308/470, loss: 0.0064653316512703896 2023-01-22 21:45:08.585512: step: 310/470, loss: 0.005568230524659157 2023-01-22 21:45:09.373621: step: 312/470, loss: 8.150991925504059e-05 2023-01-22 21:45:10.143556: step: 314/470, loss: 0.005682247690856457 2023-01-22 21:45:10.830759: step: 316/470, loss: 0.0011145909084007144 2023-01-22 21:45:11.537127: step: 318/470, loss: 0.0007300268625840545 2023-01-22 21:45:12.253318: step: 320/470, loss: 0.00024394701176788658 2023-01-22 21:45:12.977197: step: 322/470, loss: 0.0013543710811063647 2023-01-22 21:45:13.746465: step: 324/470, loss: 0.015326937660574913 2023-01-22 21:45:14.444294: step: 326/470, loss: 0.004613461904227734 2023-01-22 21:45:15.132980: step: 328/470, loss: 1.1873652510985266e-05 2023-01-22 21:45:15.866951: step: 330/470, loss: 0.0037322877906262875 2023-01-22 21:45:16.549502: step: 332/470, loss: 0.001422520843334496 2023-01-22 21:45:17.249302: step: 334/470, loss: 0.003986767493188381 2023-01-22 21:45:17.966943: step: 336/470, loss: 0.0021966679487377405 2023-01-22 21:45:18.735268: step: 338/470, loss: 0.03222150355577469 2023-01-22 21:45:19.414365: step: 340/470, loss: 0.0010147334542125463 2023-01-22 21:45:20.166319: step: 342/470, loss: 0.010336131788790226 2023-01-22 21:45:20.870212: step: 344/470, loss: 0.006794884335249662 2023-01-22 21:45:21.617983: step: 346/470, loss: 0.009388666599988937 2023-01-22 21:45:22.324582: step: 348/470, loss: 0.004102275241166353 2023-01-22 21:45:23.103302: step: 350/470, loss: 0.00270704529248178 2023-01-22 21:45:23.787184: step: 352/470, loss: 2.3283802875084803e-05 2023-01-22 21:45:24.464303: step: 354/470, loss: 0.000149372877785936 2023-01-22 21:45:25.232191: step: 356/470, loss: 0.015503468923270702 2023-01-22 21:45:25.894437: step: 358/470, loss: 0.013661712408065796 2023-01-22 21:45:26.639607: step: 360/470, loss: 0.029292738065123558 2023-01-22 21:45:27.384801: step: 362/470, loss: 0.07505685836076736 2023-01-22 21:45:28.157527: step: 364/470, loss: 0.005268405191600323 2023-01-22 21:45:28.873845: step: 366/470, loss: 4.039578925585374e-05 2023-01-22 21:45:29.795023: step: 368/470, loss: 0.040998730808496475 2023-01-22 21:45:30.556791: step: 370/470, loss: 0.010451802052557468 2023-01-22 21:45:31.392099: step: 372/470, loss: 0.2076258659362793 2023-01-22 21:45:32.129352: step: 374/470, loss: 0.010212671011686325 2023-01-22 21:45:32.778406: step: 376/470, loss: 0.00308050075545907 2023-01-22 21:45:33.481534: step: 378/470, loss: 2.2757983207702637 2023-01-22 21:45:34.218004: step: 380/470, loss: 0.08818937093019485 2023-01-22 21:45:34.973949: step: 382/470, loss: 0.2878647744655609 2023-01-22 21:45:35.676691: step: 384/470, loss: 0.0031312655191868544 2023-01-22 21:45:36.386748: step: 386/470, loss: 0.002897688653320074 2023-01-22 21:45:37.143840: step: 388/470, loss: 0.02370820753276348 2023-01-22 21:45:37.910404: step: 390/470, loss: 0.005385119933634996 2023-01-22 21:45:38.662461: step: 392/470, loss: 0.002497171750292182 2023-01-22 21:45:39.342901: step: 394/470, loss: 0.006736780051141977 2023-01-22 21:45:40.021334: step: 396/470, loss: 0.0072611235082149506 2023-01-22 21:45:40.730582: step: 398/470, loss: 0.0004087547422386706 2023-01-22 21:45:41.443792: step: 400/470, loss: 0.00022048353275749832 2023-01-22 21:45:42.142307: step: 402/470, loss: 0.0018256985349580646 2023-01-22 21:45:42.809599: step: 404/470, loss: 0.0001288076746277511 2023-01-22 21:45:43.496006: step: 406/470, loss: 0.004111488815397024 2023-01-22 21:45:44.235341: step: 408/470, loss: 0.0005288118845783174 2023-01-22 21:45:44.988722: step: 410/470, loss: 0.0015056623378768563 2023-01-22 21:45:45.855560: step: 412/470, loss: 0.10391915589570999 2023-01-22 21:45:46.567906: step: 414/470, loss: 0.00016978340863715857 2023-01-22 21:45:47.373526: step: 416/470, loss: 0.006940767168998718 2023-01-22 21:45:48.023716: step: 418/470, loss: 0.4740176498889923 2023-01-22 21:45:48.713490: step: 420/470, loss: 0.00903422199189663 2023-01-22 21:45:49.456394: step: 422/470, loss: 1.154094934463501 2023-01-22 21:45:50.237894: step: 424/470, loss: 0.0007773156394250691 2023-01-22 21:45:51.008407: step: 426/470, loss: 0.00015792468911968172 2023-01-22 21:45:51.758305: step: 428/470, loss: 0.0006084730848670006 2023-01-22 21:45:52.487639: step: 430/470, loss: 0.003304409794509411 2023-01-22 21:45:53.283023: step: 432/470, loss: 0.0006498509901575744 2023-01-22 21:45:53.941010: step: 434/470, loss: 0.014365673996508121 2023-01-22 21:45:54.707361: step: 436/470, loss: 0.0002987508487422019 2023-01-22 21:45:55.557117: step: 438/470, loss: 0.03105132095515728 2023-01-22 21:45:56.199843: step: 440/470, loss: 5.6159387895604596e-05 2023-01-22 21:45:57.044607: step: 442/470, loss: 0.0020124197471886873 2023-01-22 21:45:57.711751: step: 444/470, loss: 0.002125130034983158 2023-01-22 21:45:58.420214: step: 446/470, loss: 0.0010889448458328843 2023-01-22 21:45:59.124344: step: 448/470, loss: 3.070883030886762e-05 2023-01-22 21:45:59.795978: step: 450/470, loss: 0.005335522815585136 2023-01-22 21:46:00.583975: step: 452/470, loss: 0.01190522313117981 2023-01-22 21:46:01.289756: step: 454/470, loss: 0.00037295298534445465 2023-01-22 21:46:01.988671: step: 456/470, loss: 0.005229064263403416 2023-01-22 21:46:02.737699: step: 458/470, loss: 0.0013408676022663713 2023-01-22 21:46:03.527667: step: 460/470, loss: 0.0023357283789664507 2023-01-22 21:46:04.252630: step: 462/470, loss: 0.0028918476309627295 2023-01-22 21:46:05.027216: step: 464/470, loss: 0.0005529711488634348 2023-01-22 21:46:05.738825: step: 466/470, loss: 0.00048034434439614415 2023-01-22 21:46:06.515284: step: 468/470, loss: 1.2877992048743181e-05 2023-01-22 21:46:07.286651: step: 470/470, loss: 0.09141544997692108 2023-01-22 21:46:08.048819: step: 472/470, loss: 0.15473003685474396 2023-01-22 21:46:08.850272: step: 474/470, loss: 0.0005995671381242573 2023-01-22 21:46:09.538000: step: 476/470, loss: 0.00010133895557373762 2023-01-22 21:46:10.297674: step: 478/470, loss: 0.006271105259656906 2023-01-22 21:46:11.054285: step: 480/470, loss: 0.02504108101129532 2023-01-22 21:46:11.831177: step: 482/470, loss: 0.0013932195724919438 2023-01-22 21:46:12.625355: step: 484/470, loss: 0.0003886119229719043 2023-01-22 21:46:13.342950: step: 486/470, loss: 0.00025422993348911405 2023-01-22 21:46:14.097226: step: 488/470, loss: 0.0017747258534654975 2023-01-22 21:46:14.774768: step: 490/470, loss: 0.016103271394968033 2023-01-22 21:46:15.439728: step: 492/470, loss: 0.003346665995195508 2023-01-22 21:46:16.202954: step: 494/470, loss: 0.00019028606766369194 2023-01-22 21:46:16.868772: step: 496/470, loss: 6.881119043100625e-05 2023-01-22 21:46:17.645314: step: 498/470, loss: 0.000268188159679994 2023-01-22 21:46:18.407201: step: 500/470, loss: 0.0006737832445651293 2023-01-22 21:46:19.271940: step: 502/470, loss: 0.007682368624955416 2023-01-22 21:46:19.953089: step: 504/470, loss: 1.9424080164753832e-05 2023-01-22 21:46:20.694469: step: 506/470, loss: 7.247896428452805e-05 2023-01-22 21:46:21.379479: step: 508/470, loss: 0.006508438847959042 2023-01-22 21:46:22.132220: step: 510/470, loss: 0.008590683341026306 2023-01-22 21:46:22.863760: step: 512/470, loss: 0.019543835893273354 2023-01-22 21:46:23.631355: step: 514/470, loss: 0.30058515071868896 2023-01-22 21:46:24.338730: step: 516/470, loss: 2.5567267584847286e-05 2023-01-22 21:46:25.052767: step: 518/470, loss: 0.022811856120824814 2023-01-22 21:46:25.851333: step: 520/470, loss: 0.00019056579913012683 2023-01-22 21:46:26.566002: step: 522/470, loss: 0.006042586639523506 2023-01-22 21:46:27.217778: step: 524/470, loss: 0.009766626171767712 2023-01-22 21:46:27.952087: step: 526/470, loss: 0.0004236418753862381 2023-01-22 21:46:28.628271: step: 528/470, loss: 0.0460224524140358 2023-01-22 21:46:29.374620: step: 530/470, loss: 0.03648354113101959 2023-01-22 21:46:30.172882: step: 532/470, loss: 0.026953857392072678 2023-01-22 21:46:31.002017: step: 534/470, loss: 0.001078708446584642 2023-01-22 21:46:31.696901: step: 536/470, loss: 0.0006425505271181464 2023-01-22 21:46:32.371660: step: 538/470, loss: 0.013254445046186447 2023-01-22 21:46:33.113448: step: 540/470, loss: 0.005868022330105305 2023-01-22 21:46:33.845451: step: 542/470, loss: 0.000866345944814384 2023-01-22 21:46:34.533639: step: 544/470, loss: 0.019351843744516373 2023-01-22 21:46:35.271111: step: 546/470, loss: 0.0006408991175703704 2023-01-22 21:46:35.973525: step: 548/470, loss: 0.007142396178096533 2023-01-22 21:46:36.653191: step: 550/470, loss: 0.0009534017299301922 2023-01-22 21:46:37.400377: step: 552/470, loss: 0.0083243353292346 2023-01-22 21:46:38.084361: step: 554/470, loss: 0.007734335493296385 2023-01-22 21:46:38.837044: step: 556/470, loss: 0.0021097231656312943 2023-01-22 21:46:39.632593: step: 558/470, loss: 0.10651249438524246 2023-01-22 21:46:40.428787: step: 560/470, loss: 0.06708988547325134 2023-01-22 21:46:41.133602: step: 562/470, loss: 0.012138977646827698 2023-01-22 21:46:41.837721: step: 564/470, loss: 0.0041181351989507675 2023-01-22 21:46:42.568257: step: 566/470, loss: 0.004253920167684555 2023-01-22 21:46:43.296467: step: 568/470, loss: 0.009514007717370987 2023-01-22 21:46:44.090934: step: 570/470, loss: 1.9646420696517453e-05 2023-01-22 21:46:44.837812: step: 572/470, loss: 0.0004975633346475661 2023-01-22 21:46:45.508686: step: 574/470, loss: 1.6465276075905422e-06 2023-01-22 21:46:46.172475: step: 576/470, loss: 0.03554369881749153 2023-01-22 21:46:46.818098: step: 578/470, loss: 0.0003723879635799676 2023-01-22 21:46:47.613720: step: 580/470, loss: 0.0021262504160404205 2023-01-22 21:46:48.391360: step: 582/470, loss: 0.006091665010899305 2023-01-22 21:46:49.161718: step: 584/470, loss: 0.07040276378393173 2023-01-22 21:46:49.912857: step: 586/470, loss: 0.007872075773775578 2023-01-22 21:46:50.601480: step: 588/470, loss: 0.010333622805774212 2023-01-22 21:46:51.314466: step: 590/470, loss: 0.003944714087992907 2023-01-22 21:46:52.087786: step: 592/470, loss: 0.08302487432956696 2023-01-22 21:46:52.870942: step: 594/470, loss: 0.002667823573574424 2023-01-22 21:46:53.659394: step: 596/470, loss: 0.0011115572415292263 2023-01-22 21:46:54.438685: step: 598/470, loss: 0.0013725311728194356 2023-01-22 21:46:55.128935: step: 600/470, loss: 0.01085622701793909 2023-01-22 21:46:55.852858: step: 602/470, loss: 0.011929565109312534 2023-01-22 21:46:56.617030: step: 604/470, loss: 0.5936955809593201 2023-01-22 21:46:57.347255: step: 606/470, loss: 0.0015288189752027392 2023-01-22 21:46:58.064052: step: 608/470, loss: 0.004490552004426718 2023-01-22 21:46:58.830010: step: 610/470, loss: 0.01219299528747797 2023-01-22 21:46:59.565925: step: 612/470, loss: 0.004125348757952452 2023-01-22 21:47:00.297154: step: 614/470, loss: 0.0011770040728151798 2023-01-22 21:47:01.038175: step: 616/470, loss: 0.0035097640939056873 2023-01-22 21:47:01.825543: step: 618/470, loss: 0.04082076996564865 2023-01-22 21:47:02.486919: step: 620/470, loss: 0.04343542456626892 2023-01-22 21:47:03.250312: step: 622/470, loss: 0.0005687833181582391 2023-01-22 21:47:04.044103: step: 624/470, loss: 0.02319457195699215 2023-01-22 21:47:04.819449: step: 626/470, loss: 0.020851243287324905 2023-01-22 21:47:05.609389: step: 628/470, loss: 0.0018202860374003649 2023-01-22 21:47:06.321043: step: 630/470, loss: 0.006153845693916082 2023-01-22 21:47:07.031215: step: 632/470, loss: 0.004557922948151827 2023-01-22 21:47:07.816636: step: 634/470, loss: 0.018611159175634384 2023-01-22 21:47:08.539384: step: 636/470, loss: 0.0369689054787159 2023-01-22 21:47:09.307985: step: 638/470, loss: 0.002355735981836915 2023-01-22 21:47:10.109840: step: 640/470, loss: 0.0005931655177846551 2023-01-22 21:47:10.815626: step: 642/470, loss: 0.03275495022535324 2023-01-22 21:47:11.503882: step: 644/470, loss: 0.0019692745991051197 2023-01-22 21:47:12.244616: step: 646/470, loss: 0.0005796861951239407 2023-01-22 21:47:13.042056: step: 648/470, loss: 0.0003035140107385814 2023-01-22 21:47:13.894533: step: 650/470, loss: 0.00021470840147230774 2023-01-22 21:47:14.600897: step: 652/470, loss: 0.010880122892558575 2023-01-22 21:47:15.337569: step: 654/470, loss: 2.0693125406978652e-05 2023-01-22 21:47:16.042264: step: 656/470, loss: 0.0021833537612110376 2023-01-22 21:47:16.777862: step: 658/470, loss: 0.010350065305829048 2023-01-22 21:47:17.514943: step: 660/470, loss: 0.00021858404215890914 2023-01-22 21:47:18.197408: step: 662/470, loss: 0.000333769858116284 2023-01-22 21:47:18.943228: step: 664/470, loss: 0.004476209171116352 2023-01-22 21:47:19.737498: step: 666/470, loss: 0.01990194246172905 2023-01-22 21:47:20.492384: step: 668/470, loss: 0.09436644613742828 2023-01-22 21:47:21.199508: step: 670/470, loss: 0.00025006639771163464 2023-01-22 21:47:21.953326: step: 672/470, loss: 0.050850946456193924 2023-01-22 21:47:22.625561: step: 674/470, loss: 0.0038154199719429016 2023-01-22 21:47:23.371283: step: 676/470, loss: 0.0026396666653454304 2023-01-22 21:47:24.136011: step: 678/470, loss: 0.02135513350367546 2023-01-22 21:47:24.854973: step: 680/470, loss: 0.001105017145164311 2023-01-22 21:47:25.693899: step: 682/470, loss: 0.009239214472472668 2023-01-22 21:47:26.468820: step: 684/470, loss: 0.11062158644199371 2023-01-22 21:47:27.198944: step: 686/470, loss: 7.758984429528937e-05 2023-01-22 21:47:28.007110: step: 688/470, loss: 0.007189847994595766 2023-01-22 21:47:28.781117: step: 690/470, loss: 0.004308292642235756 2023-01-22 21:47:29.430512: step: 692/470, loss: 0.0004358371370472014 2023-01-22 21:47:30.112113: step: 694/470, loss: 0.2612048089504242 2023-01-22 21:47:30.855325: step: 696/470, loss: 9.558172314427793e-05 2023-01-22 21:47:31.533498: step: 698/470, loss: 0.00466720899567008 2023-01-22 21:47:32.313664: step: 700/470, loss: 0.0009645888931117952 2023-01-22 21:47:33.127251: step: 702/470, loss: 0.009687711484730244 2023-01-22 21:47:33.902002: step: 704/470, loss: 0.03733493387699127 2023-01-22 21:47:34.629776: step: 706/470, loss: 0.0338343009352684 2023-01-22 21:47:35.439833: step: 708/470, loss: 0.0013095543254166842 2023-01-22 21:47:36.173390: step: 710/470, loss: 0.0019843606278300285 2023-01-22 21:47:36.930762: step: 712/470, loss: 0.0003942087641917169 2023-01-22 21:47:37.693289: step: 714/470, loss: 0.004477455280721188 2023-01-22 21:47:38.466381: step: 716/470, loss: 0.00019188599253538996 2023-01-22 21:47:39.177800: step: 718/470, loss: 0.012036411091685295 2023-01-22 21:47:39.824335: step: 720/470, loss: 0.0012441710568964481 2023-01-22 21:47:40.518102: step: 722/470, loss: 0.02268451265990734 2023-01-22 21:47:41.271422: step: 724/470, loss: 0.04499243572354317 2023-01-22 21:47:42.012016: step: 726/470, loss: 0.000995938084088266 2023-01-22 21:47:42.815959: step: 728/470, loss: 0.002610167022794485 2023-01-22 21:47:43.548117: step: 730/470, loss: 0.001554196118377149 2023-01-22 21:47:44.260087: step: 732/470, loss: 0.0007086883997544646 2023-01-22 21:47:44.961438: step: 734/470, loss: 0.00042572562233544886 2023-01-22 21:47:45.683877: step: 736/470, loss: 0.004255416337400675 2023-01-22 21:47:46.402620: step: 738/470, loss: 0.006742789875715971 2023-01-22 21:47:47.058222: step: 740/470, loss: 0.0009496554266661406 2023-01-22 21:47:47.753251: step: 742/470, loss: 0.001538438955321908 2023-01-22 21:47:48.456568: step: 744/470, loss: 0.002146479906514287 2023-01-22 21:47:49.161877: step: 746/470, loss: 0.00030127508216537535 2023-01-22 21:47:49.809818: step: 748/470, loss: 4.851120593230007e-06 2023-01-22 21:47:50.531558: step: 750/470, loss: 0.22810763120651245 2023-01-22 21:47:51.276623: step: 752/470, loss: 0.0003023869649041444 2023-01-22 21:47:52.045323: step: 754/470, loss: 0.0018333548214286566 2023-01-22 21:47:52.896914: step: 756/470, loss: 0.012261569499969482 2023-01-22 21:47:53.592570: step: 758/470, loss: 3.2106316211866215e-05 2023-01-22 21:47:54.281508: step: 760/470, loss: 0.0017151250503957272 2023-01-22 21:47:55.002851: step: 762/470, loss: 0.08843082189559937 2023-01-22 21:47:55.758560: step: 764/470, loss: 0.0009025583858601749 2023-01-22 21:47:56.508742: step: 766/470, loss: 0.03692952170968056 2023-01-22 21:47:57.259696: step: 768/470, loss: 0.0004031884600408375 2023-01-22 21:47:58.011212: step: 770/470, loss: 0.06433983892202377 2023-01-22 21:47:58.837521: step: 772/470, loss: 0.013428415171802044 2023-01-22 21:47:59.665466: step: 774/470, loss: 0.00031575208413414657 2023-01-22 21:48:00.389616: step: 776/470, loss: 0.00019189363229088485 2023-01-22 21:48:01.086369: step: 778/470, loss: 0.0002521543647162616 2023-01-22 21:48:01.880927: step: 780/470, loss: 0.012361546978354454 2023-01-22 21:48:02.537410: step: 782/470, loss: 0.0014749522088095546 2023-01-22 21:48:03.387625: step: 784/470, loss: 0.004103075712919235 2023-01-22 21:48:04.161790: step: 786/470, loss: 0.015317887999117374 2023-01-22 21:48:04.891712: step: 788/470, loss: 0.007157180458307266 2023-01-22 21:48:05.608599: step: 790/470, loss: 0.03482293710112572 2023-01-22 21:48:06.328824: step: 792/470, loss: 0.0006050001247785985 2023-01-22 21:48:07.065260: step: 794/470, loss: 0.006788891274482012 2023-01-22 21:48:07.876976: step: 796/470, loss: 0.12752631306648254 2023-01-22 21:48:08.589658: step: 798/470, loss: 0.5755633115768433 2023-01-22 21:48:09.309571: step: 800/470, loss: 0.004557878710329533 2023-01-22 21:48:10.126688: step: 802/470, loss: 0.0018064269097521901 2023-01-22 21:48:10.860993: step: 804/470, loss: 0.09993268549442291 2023-01-22 21:48:11.547066: step: 806/470, loss: 0.0003193170123267919 2023-01-22 21:48:12.322005: step: 808/470, loss: 0.031694598495960236 2023-01-22 21:48:13.035133: step: 810/470, loss: 0.0002737323520705104 2023-01-22 21:48:13.752851: step: 812/470, loss: 0.00031162946834228933 2023-01-22 21:48:14.446141: step: 814/470, loss: 0.055558472871780396 2023-01-22 21:48:15.174423: step: 816/470, loss: 0.0006891106604598463 2023-01-22 21:48:15.886598: step: 818/470, loss: 0.012265880592167377 2023-01-22 21:48:16.585059: step: 820/470, loss: 0.005278497468680143 2023-01-22 21:48:17.339882: step: 822/470, loss: 0.0007367177749983966 2023-01-22 21:48:18.120867: step: 824/470, loss: 0.00032776681473478675 2023-01-22 21:48:18.777509: step: 826/470, loss: 0.004107217770069838 2023-01-22 21:48:19.600293: step: 828/470, loss: 0.008525116369128227 2023-01-22 21:48:20.343640: step: 830/470, loss: 0.00033007533056661487 2023-01-22 21:48:21.071789: step: 832/470, loss: 0.0008853072067722678 2023-01-22 21:48:21.774521: step: 834/470, loss: 0.0006735201459378004 2023-01-22 21:48:22.445029: step: 836/470, loss: 0.0024453336372971535 2023-01-22 21:48:23.154129: step: 838/470, loss: 1.662024988036137e-05 2023-01-22 21:48:23.886238: step: 840/470, loss: 0.007163457106798887 2023-01-22 21:48:24.520586: step: 842/470, loss: 8.924589928938076e-05 2023-01-22 21:48:25.252975: step: 844/470, loss: 0.012785697355866432 2023-01-22 21:48:25.992342: step: 846/470, loss: 0.004503779578953981 2023-01-22 21:48:26.681159: step: 848/470, loss: 0.04323597252368927 2023-01-22 21:48:27.493459: step: 850/470, loss: 0.09764706343412399 2023-01-22 21:48:28.301135: step: 852/470, loss: 0.11818161606788635 2023-01-22 21:48:29.022166: step: 854/470, loss: 0.00011157716653542593 2023-01-22 21:48:29.805975: step: 856/470, loss: 0.003554239170625806 2023-01-22 21:48:30.527302: step: 858/470, loss: 0.009540732949972153 2023-01-22 21:48:31.237004: step: 860/470, loss: 0.0023567378520965576 2023-01-22 21:48:31.993569: step: 862/470, loss: 0.0008384129614569247 2023-01-22 21:48:32.721474: step: 864/470, loss: 0.0015519903972744942 2023-01-22 21:48:33.439876: step: 866/470, loss: 0.0001553489564685151 2023-01-22 21:48:34.366486: step: 868/470, loss: 0.018373709172010422 2023-01-22 21:48:35.149576: step: 870/470, loss: 0.0008085042354650795 2023-01-22 21:48:35.849385: step: 872/470, loss: 0.0006269579171203077 2023-01-22 21:48:36.538175: step: 874/470, loss: 0.00702142296358943 2023-01-22 21:48:37.321055: step: 876/470, loss: 0.0382898710668087 2023-01-22 21:48:37.962142: step: 878/470, loss: 0.004564675502479076 2023-01-22 21:48:38.776766: step: 880/470, loss: 0.0015754105988889933 2023-01-22 21:48:39.519359: step: 882/470, loss: 0.00046436249976977706 2023-01-22 21:48:40.277578: step: 884/470, loss: 0.0009858196135610342 2023-01-22 21:48:40.981060: step: 886/470, loss: 1.2100362255296204e-05 2023-01-22 21:48:41.778294: step: 888/470, loss: 0.001151207135990262 2023-01-22 21:48:42.537269: step: 890/470, loss: 0.002776085864752531 2023-01-22 21:48:43.270610: step: 892/470, loss: 0.0019681635312736034 2023-01-22 21:48:44.141893: step: 894/470, loss: 0.0007172105833888054 2023-01-22 21:48:44.773439: step: 896/470, loss: 0.010990173555910587 2023-01-22 21:48:45.515151: step: 898/470, loss: 0.01894933171570301 2023-01-22 21:48:46.227500: step: 900/470, loss: 0.003088920610025525 2023-01-22 21:48:46.915695: step: 902/470, loss: 0.026566803455352783 2023-01-22 21:48:47.608890: step: 904/470, loss: 0.0029794382862746716 2023-01-22 21:48:48.318237: step: 906/470, loss: 0.0001242105645360425 2023-01-22 21:48:49.121886: step: 908/470, loss: 0.10085117816925049 2023-01-22 21:48:49.789711: step: 910/470, loss: 0.00012268772115930915 2023-01-22 21:48:50.497669: step: 912/470, loss: 0.004678426310420036 2023-01-22 21:48:51.265128: step: 914/470, loss: 0.0005872580222785473 2023-01-22 21:48:51.948668: step: 916/470, loss: 0.00040301651461049914 2023-01-22 21:48:52.746124: step: 918/470, loss: 0.0017456887289881706 2023-01-22 21:48:53.456588: step: 920/470, loss: 0.07405664026737213 2023-01-22 21:48:54.138328: step: 922/470, loss: 0.0002474442298989743 2023-01-22 21:48:54.886827: step: 924/470, loss: 0.019610069692134857 2023-01-22 21:48:55.597795: step: 926/470, loss: 0.00045439210953190923 2023-01-22 21:48:56.376193: step: 928/470, loss: 0.004804328549653292 2023-01-22 21:48:57.080220: step: 930/470, loss: 0.02111213654279709 2023-01-22 21:48:57.846527: step: 932/470, loss: 0.04191237688064575 2023-01-22 21:48:58.565039: step: 934/470, loss: 0.007126152515411377 2023-01-22 21:48:59.289881: step: 936/470, loss: 0.0033556444104760885 2023-01-22 21:48:59.971131: step: 938/470, loss: 0.0007148180739022791 2023-01-22 21:49:00.737728: step: 940/470, loss: 0.12642525136470795 2023-01-22 21:49:01.319190: step: 942/470, loss: 0.00044497830094769597 ================================================== Loss: 0.036 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30282611771363893, 'r': 0.3384527197975965, 'f1': 0.3196497909199522}, 'combined': 0.23553142488838583, 'epoch': 37} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3440310092967713, 'r': 0.3638789521408158, 'f1': 0.353676738529391}, 'combined': 0.24634200693589425, 'epoch': 37} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2931962461359044, 'r': 0.3332534182835042, 'f1': 0.31194414109308477}, 'combined': 0.22985357764753614, 'epoch': 37} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3486073107962775, 'r': 0.3667080750106996, 'f1': 0.3574286766739715}, 'combined': 0.2489552971858508, 'epoch': 37} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2836884955703637, 'r': 0.34290241305184377, 'f1': 0.31049754583904066}, 'combined': 0.22878766535508258, 'epoch': 37} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.33661146480964615, 'r': 0.37674590869079627, 'f1': 0.35554967789331055}, 'combined': 0.24764654181623622, 'epoch': 37} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23317307692307693, 'r': 0.3464285714285714, 'f1': 0.278735632183908}, 'combined': 0.185823754789272, 'epoch': 37} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2875, 'r': 0.5, 'f1': 0.36507936507936506}, 'combined': 0.18253968253968253, 'epoch': 37} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3645833333333333, 'r': 0.3017241379310345, 'f1': 0.33018867924528306}, 'combined': 0.22012578616352202, 'epoch': 37} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 38 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 21:51:38.911228: step: 2/470, loss: 0.003493683412671089 2023-01-22 21:51:39.669376: step: 4/470, loss: 3.1483668863074854e-05 2023-01-22 21:51:40.395561: step: 6/470, loss: 0.0010329708456993103 2023-01-22 21:51:41.109174: step: 8/470, loss: 0.015900876373052597 2023-01-22 21:51:41.846558: step: 10/470, loss: 0.0006670716102235019 2023-01-22 21:51:42.630157: step: 12/470, loss: 0.03588513284921646 2023-01-22 21:51:43.273655: step: 14/470, loss: 0.018479470163583755 2023-01-22 21:51:43.956531: step: 16/470, loss: 0.0064428700134158134 2023-01-22 21:51:44.677381: step: 18/470, loss: 0.008134379982948303 2023-01-22 21:51:45.427582: step: 20/470, loss: 0.04875548183917999 2023-01-22 21:51:46.255746: step: 22/470, loss: 0.007432709448039532 2023-01-22 21:51:47.007184: step: 24/470, loss: 0.015830012038350105 2023-01-22 21:51:47.691597: step: 26/470, loss: 0.0009923202451318502 2023-01-22 21:51:48.449194: step: 28/470, loss: 0.02561793476343155 2023-01-22 21:51:49.195634: step: 30/470, loss: 0.06104350462555885 2023-01-22 21:51:49.909565: step: 32/470, loss: 0.00034792270162142813 2023-01-22 21:51:50.746576: step: 34/470, loss: 0.0635775625705719 2023-01-22 21:51:51.460686: step: 36/470, loss: 0.0034135098103433847 2023-01-22 21:51:52.113252: step: 38/470, loss: 0.0001381459878757596 2023-01-22 21:51:52.854242: step: 40/470, loss: 0.005255894735455513 2023-01-22 21:51:53.642791: step: 42/470, loss: 0.006958745885640383 2023-01-22 21:51:54.295582: step: 44/470, loss: 2.5377163183293305e-05 2023-01-22 21:51:54.996765: step: 46/470, loss: 0.0034183606039732695 2023-01-22 21:51:55.799573: step: 48/470, loss: 0.011098065413534641 2023-01-22 21:51:56.567315: step: 50/470, loss: 0.03030436858534813 2023-01-22 21:51:57.262336: step: 52/470, loss: 0.001068002893589437 2023-01-22 21:51:58.003353: step: 54/470, loss: 0.0028049203101545572 2023-01-22 21:51:58.834759: step: 56/470, loss: 0.0009381828713230789 2023-01-22 21:51:59.554924: step: 58/470, loss: 0.00014905152784194797 2023-01-22 21:52:00.391549: step: 60/470, loss: 0.013133973814547062 2023-01-22 21:52:01.104716: step: 62/470, loss: 0.0014203452738001943 2023-01-22 21:52:01.948960: step: 64/470, loss: 0.0008770119166001678 2023-01-22 21:52:02.767286: step: 66/470, loss: 0.018221968784928322 2023-01-22 21:52:03.476940: step: 68/470, loss: 0.00032925268169492483 2023-01-22 21:52:04.104157: step: 70/470, loss: 8.492947381455451e-05 2023-01-22 21:52:05.042783: step: 72/470, loss: 0.01569739170372486 2023-01-22 21:52:05.796014: step: 74/470, loss: 0.011922664940357208 2023-01-22 21:52:06.501639: step: 76/470, loss: 0.00024971627863124013 2023-01-22 21:52:07.229578: step: 78/470, loss: 0.012681872583925724 2023-01-22 21:52:07.913282: step: 80/470, loss: 0.03299639746546745 2023-01-22 21:52:08.712842: step: 82/470, loss: 0.3435828983783722 2023-01-22 21:52:09.403367: step: 84/470, loss: 0.0480882003903389 2023-01-22 21:52:10.092071: step: 86/470, loss: 0.006340092979371548 2023-01-22 21:52:10.835928: step: 88/470, loss: 0.018705761060118675 2023-01-22 21:52:11.563746: step: 90/470, loss: 0.0008621179731562734 2023-01-22 21:52:12.243179: step: 92/470, loss: 0.022489935159683228 2023-01-22 21:52:12.952815: step: 94/470, loss: 0.008963331580162048 2023-01-22 21:52:13.725223: step: 96/470, loss: 0.04265124723315239 2023-01-22 21:52:14.456095: step: 98/470, loss: 0.022511204704642296 2023-01-22 21:52:15.199279: step: 100/470, loss: 0.002531637204810977 2023-01-22 21:52:15.910372: step: 102/470, loss: 0.0007971610175445676 2023-01-22 21:52:16.562900: step: 104/470, loss: 0.01751614920794964 2023-01-22 21:52:17.325584: step: 106/470, loss: 0.022801943123340607 2023-01-22 21:52:18.084046: step: 108/470, loss: 7.938418275443837e-05 2023-01-22 21:52:18.766490: step: 110/470, loss: 0.0036345114931464195 2023-01-22 21:52:19.546076: step: 112/470, loss: 0.0004885205999016762 2023-01-22 21:52:20.274182: step: 114/470, loss: 0.02976686879992485 2023-01-22 21:52:20.975836: step: 116/470, loss: 0.0012918829452246428 2023-01-22 21:52:21.686956: step: 118/470, loss: 0.030225861817598343 2023-01-22 21:52:22.430386: step: 120/470, loss: 0.0013654425274580717 2023-01-22 21:52:23.165766: step: 122/470, loss: 0.049725260585546494 2023-01-22 21:52:23.906816: step: 124/470, loss: 0.0041059935465455055 2023-01-22 21:52:24.588233: step: 126/470, loss: 0.0016458419850096107 2023-01-22 21:52:25.288079: step: 128/470, loss: 0.0037780962884426117 2023-01-22 21:52:25.985918: step: 130/470, loss: 0.00395290507003665 2023-01-22 21:52:26.736356: step: 132/470, loss: 0.01029009185731411 2023-01-22 21:52:27.441248: step: 134/470, loss: 0.0011309736873954535 2023-01-22 21:52:28.203113: step: 136/470, loss: 0.00018435719539411366 2023-01-22 21:52:28.902905: step: 138/470, loss: 0.0008806980913504958 2023-01-22 21:52:29.614940: step: 140/470, loss: 0.0079522505402565 2023-01-22 21:52:30.424112: step: 142/470, loss: 3.3286640245933086e-05 2023-01-22 21:52:31.181286: step: 144/470, loss: 0.0006921407766640186 2023-01-22 21:52:31.918468: step: 146/470, loss: 0.2176320105791092 2023-01-22 21:52:32.722679: step: 148/470, loss: 0.011884159408509731 2023-01-22 21:52:33.443237: step: 150/470, loss: 0.0032580445986241102 2023-01-22 21:52:34.222709: step: 152/470, loss: 0.0007281180587597191 2023-01-22 21:52:35.102463: step: 154/470, loss: 0.019526991993188858 2023-01-22 21:52:35.812879: step: 156/470, loss: 0.005697279702872038 2023-01-22 21:52:36.586883: step: 158/470, loss: 0.004067837260663509 2023-01-22 21:52:37.299795: step: 160/470, loss: 5.4411604651249945e-05 2023-01-22 21:52:37.972608: step: 162/470, loss: 0.019002696499228477 2023-01-22 21:52:38.712387: step: 164/470, loss: 0.0015450211940333247 2023-01-22 21:52:39.512908: step: 166/470, loss: 0.0039725713431835175 2023-01-22 21:52:40.287326: step: 168/470, loss: 0.011467957869172096 2023-01-22 21:52:40.992245: step: 170/470, loss: 0.027040036395192146 2023-01-22 21:52:41.654898: step: 172/470, loss: 0.00044272729428485036 2023-01-22 21:52:42.443213: step: 174/470, loss: 0.0003731549368239939 2023-01-22 21:52:43.165826: step: 176/470, loss: 0.016215885058045387 2023-01-22 21:52:43.891494: step: 178/470, loss: 0.03532567247748375 2023-01-22 21:52:44.557680: step: 180/470, loss: 0.002269421936944127 2023-01-22 21:52:45.258117: step: 182/470, loss: 0.004340062849223614 2023-01-22 21:52:45.955777: step: 184/470, loss: 0.021665049716830254 2023-01-22 21:52:46.690637: step: 186/470, loss: 0.0015216005267575383 2023-01-22 21:52:47.447621: step: 188/470, loss: 0.03777991607785225 2023-01-22 21:52:48.187326: step: 190/470, loss: 0.0024247351102530956 2023-01-22 21:52:48.968012: step: 192/470, loss: 0.026008278131484985 2023-01-22 21:52:49.752325: step: 194/470, loss: 0.011309238150715828 2023-01-22 21:52:50.483052: step: 196/470, loss: 0.0033743376843631268 2023-01-22 21:52:51.237117: step: 198/470, loss: 0.022116800770163536 2023-01-22 21:52:51.970905: step: 200/470, loss: 6.394281808752567e-05 2023-01-22 21:52:52.642358: step: 202/470, loss: 0.010615077801048756 2023-01-22 21:52:53.368587: step: 204/470, loss: 0.0010532918386161327 2023-01-22 21:52:54.046509: step: 206/470, loss: 0.001243436592631042 2023-01-22 21:52:54.734415: step: 208/470, loss: 0.3090232312679291 2023-01-22 21:52:55.428556: step: 210/470, loss: 0.00770140066742897 2023-01-22 21:52:56.176874: step: 212/470, loss: 0.0004572441102936864 2023-01-22 21:52:56.865802: step: 214/470, loss: 0.0049099307507276535 2023-01-22 21:52:57.520755: step: 216/470, loss: 0.00032190539059229195 2023-01-22 21:52:58.208794: step: 218/470, loss: 0.002639106009155512 2023-01-22 21:52:58.986907: step: 220/470, loss: 0.0019580356311053038 2023-01-22 21:52:59.669234: step: 222/470, loss: 0.0008421125821769238 2023-01-22 21:53:00.417776: step: 224/470, loss: 0.0025131748989224434 2023-01-22 21:53:01.157720: step: 226/470, loss: 0.006898400839418173 2023-01-22 21:53:01.829809: step: 228/470, loss: 0.003961809910833836 2023-01-22 21:53:02.584346: step: 230/470, loss: 0.012256169691681862 2023-01-22 21:53:03.278140: step: 232/470, loss: 0.011719431728124619 2023-01-22 21:53:04.012529: step: 234/470, loss: 0.07077656686306 2023-01-22 21:53:04.779135: step: 236/470, loss: 0.0006426791660487652 2023-01-22 21:53:05.504184: step: 238/470, loss: 1.4502748854283709e-05 2023-01-22 21:53:06.128120: step: 240/470, loss: 0.00010448225657455623 2023-01-22 21:53:06.892825: step: 242/470, loss: 0.008650097995996475 2023-01-22 21:53:07.623811: step: 244/470, loss: 0.00271412986330688 2023-01-22 21:53:08.389471: step: 246/470, loss: 0.008904355578124523 2023-01-22 21:53:09.120882: step: 248/470, loss: 0.01523964386433363 2023-01-22 21:53:09.861894: step: 250/470, loss: 0.0009835069067776203 2023-01-22 21:53:10.556934: step: 252/470, loss: 0.001248181564733386 2023-01-22 21:53:11.350186: step: 254/470, loss: 0.12711653113365173 2023-01-22 21:53:12.118060: step: 256/470, loss: 0.014596930705010891 2023-01-22 21:53:12.920869: step: 258/470, loss: 0.03263501450419426 2023-01-22 21:53:13.605701: step: 260/470, loss: 0.12978902459144592 2023-01-22 21:53:14.331115: step: 262/470, loss: 0.011506453156471252 2023-01-22 21:53:15.109618: step: 264/470, loss: 0.07690394669771194 2023-01-22 21:53:15.815651: step: 266/470, loss: 0.0022593012545257807 2023-01-22 21:53:16.577633: step: 268/470, loss: 0.008098762482404709 2023-01-22 21:53:17.371762: step: 270/470, loss: 0.0018563955090939999 2023-01-22 21:53:18.157806: step: 272/470, loss: 0.0008043406414799392 2023-01-22 21:53:18.891581: step: 274/470, loss: 0.006701835431158543 2023-01-22 21:53:19.615599: step: 276/470, loss: 0.0002937865210697055 2023-01-22 21:53:20.286730: step: 278/470, loss: 0.029652804136276245 2023-01-22 21:53:21.053771: step: 280/470, loss: 0.22939205169677734 2023-01-22 21:53:21.757145: step: 282/470, loss: 0.0002202181494794786 2023-01-22 21:53:22.446454: step: 284/470, loss: 1.331815747107612e-05 2023-01-22 21:53:23.180828: step: 286/470, loss: 0.02264559268951416 2023-01-22 21:53:23.955759: step: 288/470, loss: 0.0002129612839780748 2023-01-22 21:53:24.712225: step: 290/470, loss: 0.005394687410444021 2023-01-22 21:53:25.441089: step: 292/470, loss: 0.025830525904893875 2023-01-22 21:53:26.198327: step: 294/470, loss: 0.0006529740057885647 2023-01-22 21:53:26.909532: step: 296/470, loss: 0.011604727245867252 2023-01-22 21:53:27.625941: step: 298/470, loss: 0.002371342619881034 2023-01-22 21:53:28.407296: step: 300/470, loss: 0.005713804624974728 2023-01-22 21:53:29.119935: step: 302/470, loss: 0.041894592344760895 2023-01-22 21:53:29.866930: step: 304/470, loss: 0.04704660177230835 2023-01-22 21:53:30.615025: step: 306/470, loss: 0.0020737978629767895 2023-01-22 21:53:31.382692: step: 308/470, loss: 0.006861019879579544 2023-01-22 21:53:32.031413: step: 310/470, loss: 0.25099924206733704 2023-01-22 21:53:32.691917: step: 312/470, loss: 0.00043172220466658473 2023-01-22 21:53:33.370520: step: 314/470, loss: 0.015380592085421085 2023-01-22 21:53:34.052922: step: 316/470, loss: 0.0004332458192948252 2023-01-22 21:53:34.830895: step: 318/470, loss: 0.005911845248192549 2023-01-22 21:53:35.582859: step: 320/470, loss: 0.00782099924981594 2023-01-22 21:53:36.293411: step: 322/470, loss: 0.006352236494421959 2023-01-22 21:53:37.046978: step: 324/470, loss: 0.006406526081264019 2023-01-22 21:53:37.731728: step: 326/470, loss: 0.006970448885113001 2023-01-22 21:53:38.447532: step: 328/470, loss: 0.004823725204914808 2023-01-22 21:53:39.206406: step: 330/470, loss: 0.021097727119922638 2023-01-22 21:53:39.953053: step: 332/470, loss: 0.014263940043747425 2023-01-22 21:53:40.664947: step: 334/470, loss: 0.005755257327109575 2023-01-22 21:53:41.369206: step: 336/470, loss: 0.0009758573723956943 2023-01-22 21:53:42.153162: step: 338/470, loss: 5.3987059800419956e-05 2023-01-22 21:53:42.859045: step: 340/470, loss: 0.001099450164474547 2023-01-22 21:53:43.574824: step: 342/470, loss: 0.0010999426012858748 2023-01-22 21:53:44.335088: step: 344/470, loss: 0.02734139934182167 2023-01-22 21:53:45.089363: step: 346/470, loss: 0.005323213990777731 2023-01-22 21:53:45.835941: step: 348/470, loss: 0.0010934954043477774 2023-01-22 21:53:46.559500: step: 350/470, loss: 0.02636098861694336 2023-01-22 21:53:47.326731: step: 352/470, loss: 0.29431140422821045 2023-01-22 21:53:48.115419: step: 354/470, loss: 1.4190295587468427e-06 2023-01-22 21:53:48.803375: step: 356/470, loss: 0.00703160697594285 2023-01-22 21:53:49.502960: step: 358/470, loss: 4.769103725266177e-06 2023-01-22 21:53:50.279965: step: 360/470, loss: 0.008066543377935886 2023-01-22 21:53:51.053292: step: 362/470, loss: 0.008893858641386032 2023-01-22 21:53:51.793760: step: 364/470, loss: 0.0012347385054454207 2023-01-22 21:53:52.617724: step: 366/470, loss: 0.03243786841630936 2023-01-22 21:53:53.428367: step: 368/470, loss: 0.05025627464056015 2023-01-22 21:53:54.202681: step: 370/470, loss: 0.0002960547572001815 2023-01-22 21:53:54.956561: step: 372/470, loss: 0.01112099178135395 2023-01-22 21:53:55.726530: step: 374/470, loss: 5.0827387894969434e-05 2023-01-22 21:53:56.494212: step: 376/470, loss: 0.0254096370190382 2023-01-22 21:53:57.197010: step: 378/470, loss: 1.0762357711791992 2023-01-22 21:53:58.025888: step: 380/470, loss: 0.09362594038248062 2023-01-22 21:53:58.710672: step: 382/470, loss: 0.08472180366516113 2023-01-22 21:53:59.413402: step: 384/470, loss: 5.627515201922506e-05 2023-01-22 21:54:00.201270: step: 386/470, loss: 0.04056701436638832 2023-01-22 21:54:00.983100: step: 388/470, loss: 0.0017334421863779426 2023-01-22 21:54:01.735708: step: 390/470, loss: 0.013753866776823997 2023-01-22 21:54:02.508259: step: 392/470, loss: 0.029546622186899185 2023-01-22 21:54:03.289213: step: 394/470, loss: 0.008471227250993252 2023-01-22 21:54:04.047150: step: 396/470, loss: 0.05549805611371994 2023-01-22 21:54:04.780808: step: 398/470, loss: 4.936423465551343e-06 2023-01-22 21:54:05.603211: step: 400/470, loss: 0.022593067958950996 2023-01-22 21:54:06.394891: step: 402/470, loss: 0.011362415738403797 2023-01-22 21:54:07.088336: step: 404/470, loss: 0.007395919878035784 2023-01-22 21:54:07.731265: step: 406/470, loss: 0.0004373751871753484 2023-01-22 21:54:08.458117: step: 408/470, loss: 0.0032540217507630587 2023-01-22 21:54:09.209324: step: 410/470, loss: 6.207002297742292e-05 2023-01-22 21:54:09.995482: step: 412/470, loss: 0.012555564753711224 2023-01-22 21:54:10.761751: step: 414/470, loss: 0.005541603546589613 2023-01-22 21:54:11.526065: step: 416/470, loss: 1.1564688682556152 2023-01-22 21:54:12.254200: step: 418/470, loss: 0.011560517363250256 2023-01-22 21:54:12.999474: step: 420/470, loss: 0.13285844027996063 2023-01-22 21:54:13.746058: step: 422/470, loss: 0.02132769674062729 2023-01-22 21:54:14.539545: step: 424/470, loss: 3.81058816856239e-05 2023-01-22 21:54:15.291716: step: 426/470, loss: 0.005625903606414795 2023-01-22 21:54:15.996742: step: 428/470, loss: 3.352219209773466e-05 2023-01-22 21:54:16.766084: step: 430/470, loss: 0.0006965019856579602 2023-01-22 21:54:17.476442: step: 432/470, loss: 0.00841616839170456 2023-01-22 21:54:18.161106: step: 434/470, loss: 0.0019366976339370012 2023-01-22 21:54:19.059505: step: 436/470, loss: 0.0008078304235823452 2023-01-22 21:54:19.818974: step: 438/470, loss: 0.008926390670239925 2023-01-22 21:54:20.602385: step: 440/470, loss: 0.012859417125582695 2023-01-22 21:54:21.371353: step: 442/470, loss: 0.00021163160272408277 2023-01-22 21:54:22.063757: step: 444/470, loss: 0.0004545637348201126 2023-01-22 21:54:22.728085: step: 446/470, loss: 4.504845492192544e-06 2023-01-22 21:54:23.540747: step: 448/470, loss: 0.04429243505001068 2023-01-22 21:54:24.220200: step: 450/470, loss: 0.013035489246249199 2023-01-22 21:54:25.002014: step: 452/470, loss: 0.01099762599915266 2023-01-22 21:54:25.772472: step: 454/470, loss: 0.0008165753679350019 2023-01-22 21:54:26.457675: step: 456/470, loss: 0.00030910957138985395 2023-01-22 21:54:27.258430: step: 458/470, loss: 0.004646346438676119 2023-01-22 21:54:28.044526: step: 460/470, loss: 1.028315782546997 2023-01-22 21:54:28.753832: step: 462/470, loss: 0.0014696192229166627 2023-01-22 21:54:29.518330: step: 464/470, loss: 0.05205130949616432 2023-01-22 21:54:30.267090: step: 466/470, loss: 2.820938971126452e-05 2023-01-22 21:54:31.069398: step: 468/470, loss: 0.00012028154014842585 2023-01-22 21:54:31.688785: step: 470/470, loss: 6.529298116220161e-05 2023-01-22 21:54:32.419617: step: 472/470, loss: 0.0006013525417074561 2023-01-22 21:54:33.144668: step: 474/470, loss: 3.899422154063359e-05 2023-01-22 21:54:33.999942: step: 476/470, loss: 0.0051171015948057175 2023-01-22 21:54:34.739519: step: 478/470, loss: 0.0003485481138341129 2023-01-22 21:54:35.434264: step: 480/470, loss: 0.0009719117661006749 2023-01-22 21:54:36.185367: step: 482/470, loss: 0.0005722529022023082 2023-01-22 21:54:36.905351: step: 484/470, loss: 0.0002770486462395638 2023-01-22 21:54:37.603681: step: 486/470, loss: 0.004022694192826748 2023-01-22 21:54:38.331056: step: 488/470, loss: 0.007053688168525696 2023-01-22 21:54:39.038390: step: 490/470, loss: 0.05090930685400963 2023-01-22 21:54:39.716232: step: 492/470, loss: 0.0005236592842265964 2023-01-22 21:54:40.503786: step: 494/470, loss: 0.03422538563609123 2023-01-22 21:54:41.237913: step: 496/470, loss: 0.011507346294820309 2023-01-22 21:54:41.943485: step: 498/470, loss: 0.14850161969661713 2023-01-22 21:54:42.592756: step: 500/470, loss: 0.02185884863138199 2023-01-22 21:54:43.295185: step: 502/470, loss: 0.39024344086647034 2023-01-22 21:54:44.050115: step: 504/470, loss: 5.338906339602545e-05 2023-01-22 21:54:44.742747: step: 506/470, loss: 0.008324535563588142 2023-01-22 21:54:45.463471: step: 508/470, loss: 0.9969122409820557 2023-01-22 21:54:46.202456: step: 510/470, loss: 0.005451219622045755 2023-01-22 21:54:46.840652: step: 512/470, loss: 0.00021868124895263463 2023-01-22 21:54:47.559079: step: 514/470, loss: 0.009522140957415104 2023-01-22 21:54:48.324216: step: 516/470, loss: 0.053997401148080826 2023-01-22 21:54:49.071428: step: 518/470, loss: 0.07220856100320816 2023-01-22 21:54:49.827899: step: 520/470, loss: 0.19266104698181152 2023-01-22 21:54:50.508431: step: 522/470, loss: 0.0018507946515455842 2023-01-22 21:54:51.153663: step: 524/470, loss: 0.01633111573755741 2023-01-22 21:54:52.030913: step: 526/470, loss: 0.5133503079414368 2023-01-22 21:54:52.736961: step: 528/470, loss: 0.2512091100215912 2023-01-22 21:54:53.490495: step: 530/470, loss: 0.00509637501090765 2023-01-22 21:54:54.454425: step: 532/470, loss: 0.05830131098628044 2023-01-22 21:54:55.109508: step: 534/470, loss: 0.002167430240660906 2023-01-22 21:54:55.797053: step: 536/470, loss: 0.00017176716937683523 2023-01-22 21:54:56.566911: step: 538/470, loss: 0.018888715654611588 2023-01-22 21:54:57.282852: step: 540/470, loss: 0.0069089666940271854 2023-01-22 21:54:58.104290: step: 542/470, loss: 0.002944743959233165 2023-01-22 21:54:58.817362: step: 544/470, loss: 0.012334275059401989 2023-01-22 21:54:59.530372: step: 546/470, loss: 0.03784177079796791 2023-01-22 21:55:00.313799: step: 548/470, loss: 0.005058033857494593 2023-01-22 21:55:01.138733: step: 550/470, loss: 0.007854700088500977 2023-01-22 21:55:01.779858: step: 552/470, loss: 3.625124918471556e-06 2023-01-22 21:55:02.549760: step: 554/470, loss: 0.011495105922222137 2023-01-22 21:55:03.222838: step: 556/470, loss: 0.0006342419073916972 2023-01-22 21:55:03.966600: step: 558/470, loss: 0.0004920351784676313 2023-01-22 21:55:04.747717: step: 560/470, loss: 0.0354156494140625 2023-01-22 21:55:05.604375: step: 562/470, loss: 0.03654221445322037 2023-01-22 21:55:06.309130: step: 564/470, loss: 0.005547667853534222 2023-01-22 21:55:06.955488: step: 566/470, loss: 0.005937586072832346 2023-01-22 21:55:07.654566: step: 568/470, loss: 0.0038067607674747705 2023-01-22 21:55:08.360381: step: 570/470, loss: 0.0002508886100258678 2023-01-22 21:55:09.106097: step: 572/470, loss: 0.021944653242826462 2023-01-22 21:55:09.811119: step: 574/470, loss: 3.4421158488839865e-05 2023-01-22 21:55:10.461561: step: 576/470, loss: 0.005076797213405371 2023-01-22 21:55:11.216405: step: 578/470, loss: 0.026614626869559288 2023-01-22 21:55:11.928419: step: 580/470, loss: 0.0064454590901732445 2023-01-22 21:55:12.684791: step: 582/470, loss: 0.009054692462086678 2023-01-22 21:55:13.454068: step: 584/470, loss: 0.0006180154159665108 2023-01-22 21:55:14.141639: step: 586/470, loss: 0.03550654277205467 2023-01-22 21:55:14.945516: step: 588/470, loss: 0.006163349840790033 2023-01-22 21:55:15.664957: step: 590/470, loss: 0.019109375774860382 2023-01-22 21:55:16.353126: step: 592/470, loss: 0.03721586987376213 2023-01-22 21:55:17.012063: step: 594/470, loss: 3.0975337722338736e-05 2023-01-22 21:55:17.812317: step: 596/470, loss: 0.0007861484191380441 2023-01-22 21:55:18.523482: step: 598/470, loss: 0.009015318937599659 2023-01-22 21:55:19.282542: step: 600/470, loss: 0.0021047855261713266 2023-01-22 21:55:19.980263: step: 602/470, loss: 4.80856433568988e-05 2023-01-22 21:55:20.716811: step: 604/470, loss: 0.00718892365694046 2023-01-22 21:55:21.420594: step: 606/470, loss: 0.05739133059978485 2023-01-22 21:55:22.170555: step: 608/470, loss: 0.00620870478451252 2023-01-22 21:55:22.907029: step: 610/470, loss: 0.01584588550031185 2023-01-22 21:55:23.681742: step: 612/470, loss: 0.028577158227562904 2023-01-22 21:55:24.401039: step: 614/470, loss: 0.00660181138664484 2023-01-22 21:55:25.105451: step: 616/470, loss: 0.01830855756998062 2023-01-22 21:55:25.780244: step: 618/470, loss: 0.010931288823485374 2023-01-22 21:55:26.555305: step: 620/470, loss: 0.20352265238761902 2023-01-22 21:55:27.308996: step: 622/470, loss: 0.01606188900768757 2023-01-22 21:55:28.058813: step: 624/470, loss: 0.0026829990092664957 2023-01-22 21:55:28.824321: step: 626/470, loss: 0.0006494335830211639 2023-01-22 21:55:29.526157: step: 628/470, loss: 0.019799262285232544 2023-01-22 21:55:30.331505: step: 630/470, loss: 0.06131910905241966 2023-01-22 21:55:30.949101: step: 632/470, loss: 6.267506978474557e-05 2023-01-22 21:55:31.686216: step: 634/470, loss: 0.012804257683455944 2023-01-22 21:55:32.380100: step: 636/470, loss: 0.0005007492727600038 2023-01-22 21:55:33.125093: step: 638/470, loss: 0.001479864353314042 2023-01-22 21:55:33.760337: step: 640/470, loss: 0.0061265756376087666 2023-01-22 21:55:34.408883: step: 642/470, loss: 0.0012702817330136895 2023-01-22 21:55:35.191491: step: 644/470, loss: 0.00014075188664719462 2023-01-22 21:55:35.882214: step: 646/470, loss: 0.0016734458040446043 2023-01-22 21:55:36.622496: step: 648/470, loss: 0.010227406397461891 2023-01-22 21:55:37.273872: step: 650/470, loss: 0.029626229777932167 2023-01-22 21:55:38.082511: step: 652/470, loss: 0.022710563614964485 2023-01-22 21:55:38.889076: step: 654/470, loss: 0.009011475369334221 2023-01-22 21:55:39.633883: step: 656/470, loss: 0.004524306394159794 2023-01-22 21:55:40.365690: step: 658/470, loss: 0.007571091875433922 2023-01-22 21:55:41.094814: step: 660/470, loss: 0.032529208809137344 2023-01-22 21:55:41.817429: step: 662/470, loss: 0.01610148325562477 2023-01-22 21:55:42.519043: step: 664/470, loss: 0.02389807626605034 2023-01-22 21:55:43.303783: step: 666/470, loss: 0.02644765004515648 2023-01-22 21:55:43.998485: step: 668/470, loss: 0.008968241512775421 2023-01-22 21:55:44.743704: step: 670/470, loss: 0.005838526878505945 2023-01-22 21:55:45.546739: step: 672/470, loss: 0.000868885894306004 2023-01-22 21:55:46.318169: step: 674/470, loss: 0.015672659501433372 2023-01-22 21:55:47.128093: step: 676/470, loss: 0.005507215391844511 2023-01-22 21:55:47.837176: step: 678/470, loss: 0.04494674503803253 2023-01-22 21:55:48.585540: step: 680/470, loss: 0.027755441144108772 2023-01-22 21:55:49.339918: step: 682/470, loss: 0.003982035908848047 2023-01-22 21:55:50.030465: step: 684/470, loss: 0.0009073261171579361 2023-01-22 21:55:50.779454: step: 686/470, loss: 0.004845160525292158 2023-01-22 21:55:51.482292: step: 688/470, loss: 0.0002821074740495533 2023-01-22 21:55:52.342447: step: 690/470, loss: 0.007127921562641859 2023-01-22 21:55:53.063089: step: 692/470, loss: 0.0001395836443407461 2023-01-22 21:55:53.775931: step: 694/470, loss: 0.0343923382461071 2023-01-22 21:55:54.522906: step: 696/470, loss: 0.06720244139432907 2023-01-22 21:55:55.257177: step: 698/470, loss: 0.02562553994357586 2023-01-22 21:55:55.993767: step: 700/470, loss: 0.07589274644851685 2023-01-22 21:55:56.770266: step: 702/470, loss: 0.03103337623178959 2023-01-22 21:55:57.432278: step: 704/470, loss: 0.1309605836868286 2023-01-22 21:55:58.154422: step: 706/470, loss: 0.04467151314020157 2023-01-22 21:55:58.886667: step: 708/470, loss: 0.0040990193374454975 2023-01-22 21:55:59.577541: step: 710/470, loss: 0.004827653989195824 2023-01-22 21:56:00.314242: step: 712/470, loss: 0.18793350458145142 2023-01-22 21:56:01.062004: step: 714/470, loss: 0.0711977481842041 2023-01-22 21:56:01.745037: step: 716/470, loss: 0.0011849101865664124 2023-01-22 21:56:02.424575: step: 718/470, loss: 0.038444213569164276 2023-01-22 21:56:03.149732: step: 720/470, loss: 0.01529417559504509 2023-01-22 21:56:03.894281: step: 722/470, loss: 0.02028188854455948 2023-01-22 21:56:04.647683: step: 724/470, loss: 0.0011991052888333797 2023-01-22 21:56:05.456064: step: 726/470, loss: 0.007202590350061655 2023-01-22 21:56:06.217545: step: 728/470, loss: 0.03086160123348236 2023-01-22 21:56:06.932005: step: 730/470, loss: 0.005256262607872486 2023-01-22 21:56:07.703934: step: 732/470, loss: 0.07266030460596085 2023-01-22 21:56:08.484986: step: 734/470, loss: 0.003284410573542118 2023-01-22 21:56:09.224297: step: 736/470, loss: 0.018563855439424515 2023-01-22 21:56:09.915822: step: 738/470, loss: 2.060541373793967e-05 2023-01-22 21:56:10.743512: step: 740/470, loss: 0.004683589097112417 2023-01-22 21:56:11.371584: step: 742/470, loss: 0.00043677486246451735 2023-01-22 21:56:12.018726: step: 744/470, loss: 0.0007822015904821455 2023-01-22 21:56:12.787662: step: 746/470, loss: 0.0006057433784008026 2023-01-22 21:56:13.550581: step: 748/470, loss: 0.053732726722955704 2023-01-22 21:56:14.222392: step: 750/470, loss: 0.009779625572264194 2023-01-22 21:56:14.954040: step: 752/470, loss: 0.0024724539835006 2023-01-22 21:56:15.655540: step: 754/470, loss: 0.8649208545684814 2023-01-22 21:56:16.397858: step: 756/470, loss: 0.005285963881760836 2023-01-22 21:56:17.144216: step: 758/470, loss: 0.01545005477964878 2023-01-22 21:56:17.818967: step: 760/470, loss: 0.011997690424323082 2023-01-22 21:56:18.574939: step: 762/470, loss: 0.0027697875630110502 2023-01-22 21:56:19.297277: step: 764/470, loss: 0.000867619295604527 2023-01-22 21:56:19.995699: step: 766/470, loss: 0.0008559745037928224 2023-01-22 21:56:20.682399: step: 768/470, loss: 0.002753552980720997 2023-01-22 21:56:21.449774: step: 770/470, loss: 0.08743966370820999 2023-01-22 21:56:22.148159: step: 772/470, loss: 0.8679112792015076 2023-01-22 21:56:22.913752: step: 774/470, loss: 0.0008498340612277389 2023-01-22 21:56:23.646245: step: 776/470, loss: 0.012493046931922436 2023-01-22 21:56:24.338053: step: 778/470, loss: 0.0022574099712073803 2023-01-22 21:56:25.104638: step: 780/470, loss: 0.09348601847887039 2023-01-22 21:56:25.866844: step: 782/470, loss: 0.09230636805295944 2023-01-22 21:56:26.545692: step: 784/470, loss: 0.0021880273707211018 2023-01-22 21:56:27.319535: step: 786/470, loss: 0.003932863939553499 2023-01-22 21:56:28.081280: step: 788/470, loss: 0.009727765806019306 2023-01-22 21:56:28.871406: step: 790/470, loss: 0.002854890888556838 2023-01-22 21:56:29.744796: step: 792/470, loss: 0.028947800397872925 2023-01-22 21:56:30.523999: step: 794/470, loss: 0.004353455267846584 2023-01-22 21:56:31.232557: step: 796/470, loss: 0.016604196280241013 2023-01-22 21:56:31.959312: step: 798/470, loss: 0.0022141074296087027 2023-01-22 21:56:32.689443: step: 800/470, loss: 0.0014762524515390396 2023-01-22 21:56:33.437953: step: 802/470, loss: 0.008055765181779861 2023-01-22 21:56:34.137605: step: 804/470, loss: 0.0005244429339654744 2023-01-22 21:56:34.848745: step: 806/470, loss: 0.01423501968383789 2023-01-22 21:56:35.486158: step: 808/470, loss: 0.0004736521514132619 2023-01-22 21:56:36.198846: step: 810/470, loss: 0.0029225496109575033 2023-01-22 21:56:36.927151: step: 812/470, loss: 0.006569644436240196 2023-01-22 21:56:37.683044: step: 814/470, loss: 0.003673528553918004 2023-01-22 21:56:38.441243: step: 816/470, loss: 0.0015710997395217419 2023-01-22 21:56:39.250658: step: 818/470, loss: 0.006185244768857956 2023-01-22 21:56:40.030961: step: 820/470, loss: 0.014171771705150604 2023-01-22 21:56:40.719322: step: 822/470, loss: 0.006676795426756144 2023-01-22 21:56:41.435347: step: 824/470, loss: 0.000639638863503933 2023-01-22 21:56:42.113966: step: 826/470, loss: 0.036543309688568115 2023-01-22 21:56:42.800240: step: 828/470, loss: 0.00471093412488699 2023-01-22 21:56:43.466357: step: 830/470, loss: 0.0005620458978228271 2023-01-22 21:56:44.154734: step: 832/470, loss: 0.04118078574538231 2023-01-22 21:56:44.911684: step: 834/470, loss: 0.0009206313407048583 2023-01-22 21:56:45.707428: step: 836/470, loss: 0.006762698758393526 2023-01-22 21:56:46.433841: step: 838/470, loss: 0.01615045592188835 2023-01-22 21:56:47.208025: step: 840/470, loss: 0.00715272594243288 2023-01-22 21:56:47.945025: step: 842/470, loss: 0.028812158852815628 2023-01-22 21:56:48.706745: step: 844/470, loss: 0.0012455545365810394 2023-01-22 21:56:49.433331: step: 846/470, loss: 0.027298571541905403 2023-01-22 21:56:50.113051: step: 848/470, loss: 0.017294684424996376 2023-01-22 21:56:50.772617: step: 850/470, loss: 0.0035474118776619434 2023-01-22 21:56:51.503519: step: 852/470, loss: 0.005111002828925848 2023-01-22 21:56:52.239443: step: 854/470, loss: 0.0006961169419810176 2023-01-22 21:56:52.991284: step: 856/470, loss: 0.036905642598867416 2023-01-22 21:56:53.762747: step: 858/470, loss: 0.0012904554605484009 2023-01-22 21:56:54.508587: step: 860/470, loss: 0.058939360082149506 2023-01-22 21:56:55.250069: step: 862/470, loss: 0.056269776076078415 2023-01-22 21:56:55.940649: step: 864/470, loss: 0.0005217547295615077 2023-01-22 21:56:56.619654: step: 866/470, loss: 0.0017588756745681167 2023-01-22 21:56:57.363998: step: 868/470, loss: 0.0008544818265363574 2023-01-22 21:56:58.046323: step: 870/470, loss: 0.0433480478823185 2023-01-22 21:56:58.800504: step: 872/470, loss: 0.030004724860191345 2023-01-22 21:56:59.527329: step: 874/470, loss: 0.002005601767450571 2023-01-22 21:57:00.165938: step: 876/470, loss: 0.00046739837853237987 2023-01-22 21:57:00.867227: step: 878/470, loss: 0.00015845979214645922 2023-01-22 21:57:01.584776: step: 880/470, loss: 0.0015988233499228954 2023-01-22 21:57:02.281306: step: 882/470, loss: 0.0655452087521553 2023-01-22 21:57:03.073258: step: 884/470, loss: 0.08574583381414413 2023-01-22 21:57:03.854589: step: 886/470, loss: 1.4678356647491455 2023-01-22 21:57:04.576451: step: 888/470, loss: 0.00045837866491638124 2023-01-22 21:57:05.322901: step: 890/470, loss: 0.18071959912776947 2023-01-22 21:57:06.039960: step: 892/470, loss: 0.016763942316174507 2023-01-22 21:57:06.797639: step: 894/470, loss: 0.01770934835076332 2023-01-22 21:57:07.517882: step: 896/470, loss: 0.01703697070479393 2023-01-22 21:57:08.247689: step: 898/470, loss: 0.003155779093503952 2023-01-22 21:57:08.946570: step: 900/470, loss: 0.01867401972413063 2023-01-22 21:57:09.742347: step: 902/470, loss: 0.08417651057243347 2023-01-22 21:57:10.518454: step: 904/470, loss: 0.00018019463459495455 2023-01-22 21:57:11.257166: step: 906/470, loss: 0.016756407916545868 2023-01-22 21:57:12.014022: step: 908/470, loss: 0.02904907800257206 2023-01-22 21:57:12.687323: step: 910/470, loss: 0.0007967533310875297 2023-01-22 21:57:13.392739: step: 912/470, loss: 5.6903561926446855e-05 2023-01-22 21:57:14.124651: step: 914/470, loss: 0.0034690299071371555 2023-01-22 21:57:14.853765: step: 916/470, loss: 0.005849133711308241 2023-01-22 21:57:15.693255: step: 918/470, loss: 0.004884378984570503 2023-01-22 21:57:16.345965: step: 920/470, loss: 0.00029265874763950706 2023-01-22 21:57:17.007985: step: 922/470, loss: 0.0014200083678588271 2023-01-22 21:57:17.730703: step: 924/470, loss: 0.004363223910331726 2023-01-22 21:57:18.519965: step: 926/470, loss: 0.04603644460439682 2023-01-22 21:57:19.264110: step: 928/470, loss: 0.012013577856123447 2023-01-22 21:57:19.979505: step: 930/470, loss: 0.011867745779454708 2023-01-22 21:57:20.666978: step: 932/470, loss: 0.014655977487564087 2023-01-22 21:57:21.369598: step: 934/470, loss: 0.0016832905821502209 2023-01-22 21:57:22.091188: step: 936/470, loss: 0.003247485961765051 2023-01-22 21:57:22.813387: step: 938/470, loss: 0.015403217636048794 2023-01-22 21:57:23.539242: step: 940/470, loss: 0.00028605852276086807 2023-01-22 21:57:24.210924: step: 942/470, loss: 0.0061555225402116776 ================================================== Loss: 0.037 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2909310605087793, 'r': 0.3516567088123196, 'f1': 0.3184245456084063}, 'combined': 0.23462861255356252, 'epoch': 38} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.33380113463939504, 'r': 0.3588362197373497, 'f1': 0.34586623589142135}, 'combined': 0.2409018558447711, 'epoch': 38} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28547473267732565, 'r': 0.3472282801635023, 'f1': 0.3133378487091879}, 'combined': 0.23088052010150686, 'epoch': 38} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.33948870723145785, 'r': 0.3639710659260342, 'f1': 0.3513038594552905}, 'combined': 0.24468925534199343, 'epoch': 38} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27717695074709464, 'r': 0.36553696540651004, 'f1': 0.31528311091527134}, 'combined': 0.23231387120072625, 'epoch': 38} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3286611580684856, 'r': 0.3744841079914956, 'f1': 0.3500795256729487}, 'combined': 0.24383648554334736, 'epoch': 38} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20491803278688525, 'r': 0.35714285714285715, 'f1': 0.2604166666666667}, 'combined': 0.1736111111111111, 'epoch': 38} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2875, 'r': 0.5, 'f1': 0.36507936507936506}, 'combined': 0.18253968253968253, 'epoch': 38} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4605263157894737, 'r': 0.3017241379310345, 'f1': 0.3645833333333333}, 'combined': 0.24305555555555552, 'epoch': 38} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17} ****************************** Epoch: 39 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 22:00:02.170564: step: 2/470, loss: 0.0007795958081260324 2023-01-22 22:00:02.924691: step: 4/470, loss: 0.008938697166740894 2023-01-22 22:00:03.716948: step: 6/470, loss: 0.004093769006431103 2023-01-22 22:00:04.448214: step: 8/470, loss: 0.012828747741878033 2023-01-22 22:00:05.152375: step: 10/470, loss: 0.00028150121215730906 2023-01-22 22:00:05.844181: step: 12/470, loss: 4.195710062049329e-05 2023-01-22 22:00:06.613932: step: 14/470, loss: 0.3589991629123688 2023-01-22 22:00:07.297588: step: 16/470, loss: 0.00017055154603440315 2023-01-22 22:00:08.062835: step: 18/470, loss: 0.0029144282452762127 2023-01-22 22:00:08.771215: step: 20/470, loss: 0.007652122061699629 2023-01-22 22:00:09.490520: step: 22/470, loss: 0.03987063094973564 2023-01-22 22:00:10.226542: step: 24/470, loss: 0.032152093946933746 2023-01-22 22:00:10.938125: step: 26/470, loss: 0.20022591948509216 2023-01-22 22:00:11.728484: step: 28/470, loss: 0.17966990172863007 2023-01-22 22:00:12.474296: step: 30/470, loss: 0.001606591627933085 2023-01-22 22:00:13.235828: step: 32/470, loss: 0.012557115405797958 2023-01-22 22:00:13.885164: step: 34/470, loss: 0.003112519159913063 2023-01-22 22:00:14.686980: step: 36/470, loss: 0.08968937397003174 2023-01-22 22:00:15.447113: step: 38/470, loss: 5.763117587775923e-05 2023-01-22 22:00:16.231122: step: 40/470, loss: 0.02070966176688671 2023-01-22 22:00:16.937822: step: 42/470, loss: 0.000932761759031564 2023-01-22 22:00:17.635029: step: 44/470, loss: 0.020521540194749832 2023-01-22 22:00:18.324156: step: 46/470, loss: 0.004408894572407007 2023-01-22 22:00:19.089662: step: 48/470, loss: 0.6617658734321594 2023-01-22 22:00:19.783013: step: 50/470, loss: 0.00338426954112947 2023-01-22 22:00:20.555596: step: 52/470, loss: 0.09528716653585434 2023-01-22 22:00:21.279509: step: 54/470, loss: 0.002500073052942753 2023-01-22 22:00:22.031483: step: 56/470, loss: 0.023632550612092018 2023-01-22 22:00:22.751460: step: 58/470, loss: 0.02701537311077118 2023-01-22 22:00:23.447770: step: 60/470, loss: 0.0013945092214271426 2023-01-22 22:00:24.216676: step: 62/470, loss: 0.0031489867251366377 2023-01-22 22:00:24.890074: step: 64/470, loss: 0.0029364058282226324 2023-01-22 22:00:25.588302: step: 66/470, loss: 9.198818588629365e-05 2023-01-22 22:00:26.273905: step: 68/470, loss: 0.011578256264328957 2023-01-22 22:00:26.982105: step: 70/470, loss: 1.6287378912238637e-06 2023-01-22 22:00:27.675176: step: 72/470, loss: 0.013432585634291172 2023-01-22 22:00:28.382671: step: 74/470, loss: 4.559730768960435e-06 2023-01-22 22:00:29.187089: step: 76/470, loss: 0.01460875291377306 2023-01-22 22:00:29.883840: step: 78/470, loss: 0.007790104486048222 2023-01-22 22:00:30.694968: step: 80/470, loss: 0.001107382820919156 2023-01-22 22:00:31.540504: step: 82/470, loss: 0.15270131826400757 2023-01-22 22:00:32.263290: step: 84/470, loss: 0.0016121534863486886 2023-01-22 22:00:32.966763: step: 86/470, loss: 0.013319252990186214 2023-01-22 22:00:33.715288: step: 88/470, loss: 0.001663623726926744 2023-01-22 22:00:34.591531: step: 90/470, loss: 5.101099668536335e-05 2023-01-22 22:00:35.354372: step: 92/470, loss: 0.3214491605758667 2023-01-22 22:00:36.028841: step: 94/470, loss: 0.0008662448963150382 2023-01-22 22:00:36.790747: step: 96/470, loss: 0.002882634988054633 2023-01-22 22:00:37.561905: step: 98/470, loss: 0.041424382477998734 2023-01-22 22:00:38.310408: step: 100/470, loss: 0.00402632774785161 2023-01-22 22:00:39.078962: step: 102/470, loss: 0.018145432695746422 2023-01-22 22:00:39.780840: step: 104/470, loss: 0.0006512062391266227 2023-01-22 22:00:40.482965: step: 106/470, loss: 0.0001297965063713491 2023-01-22 22:00:41.268774: step: 108/470, loss: 0.13866417109966278 2023-01-22 22:00:41.979447: step: 110/470, loss: 0.0011843107640743256 2023-01-22 22:00:42.833997: step: 112/470, loss: 0.0004619006940629333 2023-01-22 22:00:43.594514: step: 114/470, loss: 0.03009113296866417 2023-01-22 22:00:44.294888: step: 116/470, loss: 0.0007518218480981886 2023-01-22 22:00:44.988660: step: 118/470, loss: 0.00045790887088514864 2023-01-22 22:00:45.683622: step: 120/470, loss: 0.005708106327801943 2023-01-22 22:00:46.443968: step: 122/470, loss: 0.00396349374204874 2023-01-22 22:00:47.121502: step: 124/470, loss: 0.0009232903830707073 2023-01-22 22:00:47.937584: step: 126/470, loss: 0.012349043041467667 2023-01-22 22:00:48.710187: step: 128/470, loss: 0.01509046833962202 2023-01-22 22:00:49.455803: step: 130/470, loss: 0.0012966676149517298 2023-01-22 22:00:50.157470: step: 132/470, loss: 0.0010874420404434204 2023-01-22 22:00:50.895428: step: 134/470, loss: 0.001199746155180037 2023-01-22 22:00:51.519119: step: 136/470, loss: 0.04231145977973938 2023-01-22 22:00:52.243225: step: 138/470, loss: 0.0131779033690691 2023-01-22 22:00:52.945915: step: 140/470, loss: 0.7903496623039246 2023-01-22 22:00:53.770506: step: 142/470, loss: 0.00014183785242494196 2023-01-22 22:00:54.525435: step: 144/470, loss: 0.00482182577252388 2023-01-22 22:00:55.246605: step: 146/470, loss: 0.026487575843930244 2023-01-22 22:00:56.044798: step: 148/470, loss: 0.0030447212047874928 2023-01-22 22:00:56.788713: step: 150/470, loss: 6.557068263646215e-05 2023-01-22 22:00:57.506523: step: 152/470, loss: 0.0004728731291834265 2023-01-22 22:00:58.309975: step: 154/470, loss: 0.0145382359623909 2023-01-22 22:00:58.987801: step: 156/470, loss: 0.0035765781067311764 2023-01-22 22:00:59.720960: step: 158/470, loss: 0.0564517006278038 2023-01-22 22:01:00.426918: step: 160/470, loss: 0.0001131351527874358 2023-01-22 22:01:01.137432: step: 162/470, loss: 0.005984265822917223 2023-01-22 22:01:01.811937: step: 164/470, loss: 0.012996729463338852 2023-01-22 22:01:02.480242: step: 166/470, loss: 0.03707212582230568 2023-01-22 22:01:03.313460: step: 168/470, loss: 0.08814679831266403 2023-01-22 22:01:04.105662: step: 170/470, loss: 0.01129702664911747 2023-01-22 22:01:04.788412: step: 172/470, loss: 4.848052776651457e-05 2023-01-22 22:01:05.597758: step: 174/470, loss: 0.08441205322742462 2023-01-22 22:01:06.339949: step: 176/470, loss: 0.005867713131010532 2023-01-22 22:01:07.046306: step: 178/470, loss: 0.00030190395773388445 2023-01-22 22:01:07.780889: step: 180/470, loss: 0.009309839457273483 2023-01-22 22:01:08.512500: step: 182/470, loss: 0.005093792919069529 2023-01-22 22:01:09.271950: step: 184/470, loss: 0.0002611110976431519 2023-01-22 22:01:10.003478: step: 186/470, loss: 0.0002226200158474967 2023-01-22 22:01:10.773650: step: 188/470, loss: 0.015231126919388771 2023-01-22 22:01:11.439522: step: 190/470, loss: 0.045069869607686996 2023-01-22 22:01:12.111897: step: 192/470, loss: 0.15897539258003235 2023-01-22 22:01:12.823671: step: 194/470, loss: 0.003946701996028423 2023-01-22 22:01:13.509233: step: 196/470, loss: 0.0025108088739216328 2023-01-22 22:01:14.250436: step: 198/470, loss: 0.0012698525097221136 2023-01-22 22:01:15.043645: step: 200/470, loss: 0.6876391172409058 2023-01-22 22:01:15.782238: step: 202/470, loss: 0.004398363176733255 2023-01-22 22:01:16.623269: step: 204/470, loss: 0.01833234541118145 2023-01-22 22:01:17.283386: step: 206/470, loss: 0.0011467249132692814 2023-01-22 22:01:17.984406: step: 208/470, loss: 0.00026933234767057 2023-01-22 22:01:18.740055: step: 210/470, loss: 0.00012024387979181483 2023-01-22 22:01:19.447819: step: 212/470, loss: 0.024397404864430428 2023-01-22 22:01:20.188744: step: 214/470, loss: 0.0003621695504989475 2023-01-22 22:01:20.885656: step: 216/470, loss: 0.0002858054649550468 2023-01-22 22:01:21.657124: step: 218/470, loss: 1.0421897172927856 2023-01-22 22:01:22.426520: step: 220/470, loss: 0.007457996252924204 2023-01-22 22:01:23.132436: step: 222/470, loss: 0.004006984643638134 2023-01-22 22:01:23.782790: step: 224/470, loss: 0.0020408506970852613 2023-01-22 22:01:24.448469: step: 226/470, loss: 0.007179014850407839 2023-01-22 22:01:25.189041: step: 228/470, loss: 0.0010738419368863106 2023-01-22 22:01:25.914782: step: 230/470, loss: 0.0004274783132132143 2023-01-22 22:01:26.621736: step: 232/470, loss: 0.0001489683927502483 2023-01-22 22:01:27.390587: step: 234/470, loss: 0.00039236308657564223 2023-01-22 22:01:28.093013: step: 236/470, loss: 0.0004236626555211842 2023-01-22 22:01:28.887699: step: 238/470, loss: 0.003484722226858139 2023-01-22 22:01:29.605803: step: 240/470, loss: 0.3052813410758972 2023-01-22 22:01:30.297787: step: 242/470, loss: 0.0012881134171038866 2023-01-22 22:01:30.961675: step: 244/470, loss: 0.0002472828491590917 2023-01-22 22:01:31.625399: step: 246/470, loss: 0.002726994687691331 2023-01-22 22:01:32.384814: step: 248/470, loss: 0.06096314638853073 2023-01-22 22:01:33.095343: step: 250/470, loss: 0.006045083049684763 2023-01-22 22:01:33.739465: step: 252/470, loss: 0.003386293537914753 2023-01-22 22:01:34.481238: step: 254/470, loss: 0.0014597978442907333 2023-01-22 22:01:35.205672: step: 256/470, loss: 0.022637123242020607 2023-01-22 22:01:36.070340: step: 258/470, loss: 0.0008558848057873547 2023-01-22 22:01:36.839482: step: 260/470, loss: 0.0035284487530589104 2023-01-22 22:01:37.608818: step: 262/470, loss: 4.839608664042316e-06 2023-01-22 22:01:38.367438: step: 264/470, loss: 0.0002396140480414033 2023-01-22 22:01:39.094061: step: 266/470, loss: 0.0001242422586074099 2023-01-22 22:01:39.840889: step: 268/470, loss: 0.006238142028450966 2023-01-22 22:01:40.542807: step: 270/470, loss: 0.001572537119500339 2023-01-22 22:01:41.202440: step: 272/470, loss: 0.0011656886199489236 2023-01-22 22:01:42.021767: step: 274/470, loss: 0.047970063984394073 2023-01-22 22:01:42.813731: step: 276/470, loss: 0.017633995041251183 2023-01-22 22:01:43.560590: step: 278/470, loss: 0.00019742750737350434 2023-01-22 22:01:44.274600: step: 280/470, loss: 0.12787386775016785 2023-01-22 22:01:45.040104: step: 282/470, loss: 0.009434567764401436 2023-01-22 22:01:45.762479: step: 284/470, loss: 0.013597944751381874 2023-01-22 22:01:46.504270: step: 286/470, loss: 0.00370029010809958 2023-01-22 22:01:47.211741: step: 288/470, loss: 0.04034204035997391 2023-01-22 22:01:47.966986: step: 290/470, loss: 0.0010682768188416958 2023-01-22 22:01:48.698679: step: 292/470, loss: 0.003589589847251773 2023-01-22 22:01:49.450876: step: 294/470, loss: 0.0023420064244419336 2023-01-22 22:01:50.211573: step: 296/470, loss: 0.2466408759355545 2023-01-22 22:01:50.990834: step: 298/470, loss: 0.028866639360785484 2023-01-22 22:01:51.703947: step: 300/470, loss: 0.002564261667430401 2023-01-22 22:01:52.434061: step: 302/470, loss: 0.02908160910010338 2023-01-22 22:01:53.230780: step: 304/470, loss: 0.01281247939914465 2023-01-22 22:01:53.933148: step: 306/470, loss: 0.0089055011048913 2023-01-22 22:01:54.669295: step: 308/470, loss: 0.0027301160153001547 2023-01-22 22:01:55.387698: step: 310/470, loss: 0.0009317622752860188 2023-01-22 22:01:56.065523: step: 312/470, loss: 0.0025892360135912895 2023-01-22 22:01:56.840575: step: 314/470, loss: 0.00038833668804727495 2023-01-22 22:01:57.580348: step: 316/470, loss: 0.011556989513337612 2023-01-22 22:01:58.282204: step: 318/470, loss: 0.0006725000566802919 2023-01-22 22:01:59.008002: step: 320/470, loss: 0.005721176974475384 2023-01-22 22:01:59.721268: step: 322/470, loss: 0.000432559143519029 2023-01-22 22:02:00.506663: step: 324/470, loss: 0.026360701769590378 2023-01-22 22:02:01.314776: step: 326/470, loss: 0.002206456381827593 2023-01-22 22:02:02.019973: step: 328/470, loss: 0.0021658348850905895 2023-01-22 22:02:02.737916: step: 330/470, loss: 0.0009932523826137185 2023-01-22 22:02:03.470752: step: 332/470, loss: 0.01891978457570076 2023-01-22 22:02:04.229211: step: 334/470, loss: 0.0009264256223104894 2023-01-22 22:02:05.021984: step: 336/470, loss: 0.03548232465982437 2023-01-22 22:02:05.737688: step: 338/470, loss: 0.0002263460773974657 2023-01-22 22:02:06.374875: step: 340/470, loss: 0.0013739075511693954 2023-01-22 22:02:07.110865: step: 342/470, loss: 0.29935917258262634 2023-01-22 22:02:07.899470: step: 344/470, loss: 0.007482586428523064 2023-01-22 22:02:08.548093: step: 346/470, loss: 0.012100168503820896 2023-01-22 22:02:09.258593: step: 348/470, loss: 0.00046900202869437635 2023-01-22 22:02:10.028544: step: 350/470, loss: 0.0212935209274292 2023-01-22 22:02:10.771977: step: 352/470, loss: 0.000681842677295208 2023-01-22 22:02:11.541005: step: 354/470, loss: 0.006671892944723368 2023-01-22 22:02:12.332705: step: 356/470, loss: 0.024509701877832413 2023-01-22 22:02:13.038769: step: 358/470, loss: 0.0007479641353711486 2023-01-22 22:02:13.783258: step: 360/470, loss: 0.06951668113470078 2023-01-22 22:02:14.578008: step: 362/470, loss: 0.02821964956820011 2023-01-22 22:02:15.281928: step: 364/470, loss: 0.005078599322587252 2023-01-22 22:02:16.057398: step: 366/470, loss: 0.021827857941389084 2023-01-22 22:02:16.818154: step: 368/470, loss: 0.0009424221352674067 2023-01-22 22:02:17.536744: step: 370/470, loss: 0.38306307792663574 2023-01-22 22:02:18.285474: step: 372/470, loss: 0.0023595362436026335 2023-01-22 22:02:19.031388: step: 374/470, loss: 0.01760093681514263 2023-01-22 22:02:19.715810: step: 376/470, loss: 0.0019117107149213552 2023-01-22 22:02:20.418627: step: 378/470, loss: 0.010603736154735088 2023-01-22 22:02:21.131628: step: 380/470, loss: 0.0070642814971506596 2023-01-22 22:02:21.900623: step: 382/470, loss: 0.0003894304682034999 2023-01-22 22:02:22.635137: step: 384/470, loss: 0.003691543824970722 2023-01-22 22:02:23.370036: step: 386/470, loss: 0.07347030937671661 2023-01-22 22:02:24.155481: step: 388/470, loss: 0.0005422882386483252 2023-01-22 22:02:24.864133: step: 390/470, loss: 0.10034418106079102 2023-01-22 22:02:25.490777: step: 392/470, loss: 0.0036059198901057243 2023-01-22 22:02:26.288284: step: 394/470, loss: 0.000504388939589262 2023-01-22 22:02:26.952919: step: 396/470, loss: 0.01450659055262804 2023-01-22 22:02:27.649306: step: 398/470, loss: 0.0029942444525659084 2023-01-22 22:02:28.330824: step: 400/470, loss: 0.0008541368297301233 2023-01-22 22:02:29.042456: step: 402/470, loss: 0.017634334042668343 2023-01-22 22:02:29.727727: step: 404/470, loss: 0.0022887280210852623 2023-01-22 22:02:30.462524: step: 406/470, loss: 0.03916192799806595 2023-01-22 22:02:31.183468: step: 408/470, loss: 0.16810278594493866 2023-01-22 22:02:31.910639: step: 410/470, loss: 0.0020672930404543877 2023-01-22 22:02:32.665183: step: 412/470, loss: 0.01781904324889183 2023-01-22 22:02:33.330443: step: 414/470, loss: 0.001842794707044959 2023-01-22 22:02:34.071183: step: 416/470, loss: 0.021762700751423836 2023-01-22 22:02:34.792531: step: 418/470, loss: 0.013885277323424816 2023-01-22 22:02:35.437098: step: 420/470, loss: 0.0006842563161626458 2023-01-22 22:02:36.144736: step: 422/470, loss: 0.0057321698404848576 2023-01-22 22:02:36.814809: step: 424/470, loss: 0.009932457469403744 2023-01-22 22:02:37.612663: step: 426/470, loss: 0.008049868047237396 2023-01-22 22:02:38.316979: step: 428/470, loss: 0.005516226403415203 2023-01-22 22:02:39.080591: step: 430/470, loss: 0.0049128164537250996 2023-01-22 22:02:39.878891: step: 432/470, loss: 0.1927299201488495 2023-01-22 22:02:40.561079: step: 434/470, loss: 0.007491968106478453 2023-01-22 22:02:41.251625: step: 436/470, loss: 0.009216003119945526 2023-01-22 22:02:41.906167: step: 438/470, loss: 0.00041842067730613053 2023-01-22 22:02:42.599334: step: 440/470, loss: 0.0009216439793817699 2023-01-22 22:02:43.306067: step: 442/470, loss: 0.14297710359096527 2023-01-22 22:02:44.043940: step: 444/470, loss: 0.04162221401929855 2023-01-22 22:02:44.778024: step: 446/470, loss: 0.01259060762822628 2023-01-22 22:02:45.523003: step: 448/470, loss: 0.02452508918941021 2023-01-22 22:02:46.220244: step: 450/470, loss: 0.008215454407036304 2023-01-22 22:02:46.963830: step: 452/470, loss: 9.857612894847989e-05 2023-01-22 22:02:47.686421: step: 454/470, loss: 0.004926904104650021 2023-01-22 22:02:48.381583: step: 456/470, loss: 0.00019339239224791527 2023-01-22 22:02:49.037490: step: 458/470, loss: 0.0010940470965579152 2023-01-22 22:02:49.795433: step: 460/470, loss: 0.0007743262685835361 2023-01-22 22:02:50.610041: step: 462/470, loss: 0.00038364637293852866 2023-01-22 22:02:51.380370: step: 464/470, loss: 0.001179278246127069 2023-01-22 22:02:52.120934: step: 466/470, loss: 0.007702074479311705 2023-01-22 22:02:52.873219: step: 468/470, loss: 0.00043093261774629354 2023-01-22 22:02:53.606244: step: 470/470, loss: 0.05112699419260025 2023-01-22 22:02:54.405744: step: 472/470, loss: 0.015505004674196243 2023-01-22 22:02:55.116260: step: 474/470, loss: 0.0007698743138462305 2023-01-22 22:02:55.867858: step: 476/470, loss: 0.15226824581623077 2023-01-22 22:02:56.566376: step: 478/470, loss: 0.00011416709457989782 2023-01-22 22:02:57.251945: step: 480/470, loss: 0.009174869395792484 2023-01-22 22:02:58.020254: step: 482/470, loss: 0.06859564036130905 2023-01-22 22:02:58.821885: step: 484/470, loss: 0.0012576787266880274 2023-01-22 22:02:59.507872: step: 486/470, loss: 0.0073244026862084866 2023-01-22 22:03:00.245641: step: 488/470, loss: 0.0008550825295969844 2023-01-22 22:03:00.959699: step: 490/470, loss: 0.05312467738986015 2023-01-22 22:03:01.672035: step: 492/470, loss: 0.022805843502283096 2023-01-22 22:03:02.431854: step: 494/470, loss: 0.005543670151382685 2023-01-22 22:03:03.137982: step: 496/470, loss: 0.0007484982488676906 2023-01-22 22:03:03.835700: step: 498/470, loss: 0.06718970835208893 2023-01-22 22:03:04.565433: step: 500/470, loss: 0.013428159058094025 2023-01-22 22:03:05.283364: step: 502/470, loss: 0.027854073792696 2023-01-22 22:03:05.992032: step: 504/470, loss: 0.010355038568377495 2023-01-22 22:03:06.750007: step: 506/470, loss: 0.02420848049223423 2023-01-22 22:03:07.440858: step: 508/470, loss: 2.1866453607799485e-05 2023-01-22 22:03:08.149294: step: 510/470, loss: 0.010722950100898743 2023-01-22 22:03:08.842212: step: 512/470, loss: 0.0001193592615891248 2023-01-22 22:03:09.589088: step: 514/470, loss: 0.4225482940673828 2023-01-22 22:03:10.339435: step: 516/470, loss: 0.015295075252652168 2023-01-22 22:03:11.091770: step: 518/470, loss: 0.32571691274642944 2023-01-22 22:03:11.805280: step: 520/470, loss: 0.0004058307677041739 2023-01-22 22:03:12.560079: step: 522/470, loss: 0.019950976595282555 2023-01-22 22:03:13.302175: step: 524/470, loss: 0.014127479866147041 2023-01-22 22:03:14.074235: step: 526/470, loss: 0.000844079302623868 2023-01-22 22:03:14.730573: step: 528/470, loss: 0.0007464765221811831 2023-01-22 22:03:15.477265: step: 530/470, loss: 0.0009180636261589825 2023-01-22 22:03:16.178540: step: 532/470, loss: 0.0004960569203831255 2023-01-22 22:03:16.866535: step: 534/470, loss: 0.0015324270352721214 2023-01-22 22:03:17.687335: step: 536/470, loss: 0.74675053358078 2023-01-22 22:03:18.328575: step: 538/470, loss: 0.0016997962957248092 2023-01-22 22:03:19.061102: step: 540/470, loss: 0.03810073807835579 2023-01-22 22:03:19.768842: step: 542/470, loss: 0.02312638983130455 2023-01-22 22:03:20.557793: step: 544/470, loss: 0.20076841115951538 2023-01-22 22:03:21.272500: step: 546/470, loss: 0.044742703437805176 2023-01-22 22:03:21.977044: step: 548/470, loss: 0.033179204910993576 2023-01-22 22:03:22.723439: step: 550/470, loss: 0.048377711325883865 2023-01-22 22:03:23.503274: step: 552/470, loss: 0.006665708031505346 2023-01-22 22:03:24.287346: step: 554/470, loss: 0.011248448863625526 2023-01-22 22:03:24.985515: step: 556/470, loss: 0.010977053083479404 2023-01-22 22:03:25.761243: step: 558/470, loss: 0.007646726910024881 2023-01-22 22:03:26.502475: step: 560/470, loss: 0.0020836268085986376 2023-01-22 22:03:27.201371: step: 562/470, loss: 0.00017479869711678475 2023-01-22 22:03:27.963155: step: 564/470, loss: 0.002762383548542857 2023-01-22 22:03:28.684164: step: 566/470, loss: 0.0010429834946990013 2023-01-22 22:03:29.395264: step: 568/470, loss: 0.02210627682507038 2023-01-22 22:03:30.120481: step: 570/470, loss: 3.689844015752897e-05 2023-01-22 22:03:30.812333: step: 572/470, loss: 0.06625575572252274 2023-01-22 22:03:31.468021: step: 574/470, loss: 0.006409522611647844 2023-01-22 22:03:32.294933: step: 576/470, loss: 0.0005805494729429483 2023-01-22 22:03:33.065958: step: 578/470, loss: 0.012947368435561657 2023-01-22 22:03:33.815609: step: 580/470, loss: 0.00042535990360192955 2023-01-22 22:03:34.476409: step: 582/470, loss: 0.00019787903875112534 2023-01-22 22:03:35.147179: step: 584/470, loss: 0.14290878176689148 2023-01-22 22:03:35.948090: step: 586/470, loss: 0.013853715732693672 2023-01-22 22:03:36.673709: step: 588/470, loss: 0.030745120719075203 2023-01-22 22:03:37.333900: step: 590/470, loss: 0.0006633042357861996 2023-01-22 22:03:38.051024: step: 592/470, loss: 0.022054284811019897 2023-01-22 22:03:38.856596: step: 594/470, loss: 0.0035873970482498407 2023-01-22 22:03:39.629520: step: 596/470, loss: 0.001877595204859972 2023-01-22 22:03:40.377224: step: 598/470, loss: 0.021728595718741417 2023-01-22 22:03:41.131033: step: 600/470, loss: 0.0009449566714465618 2023-01-22 22:03:41.880589: step: 602/470, loss: 0.012191184796392918 2023-01-22 22:03:42.592592: step: 604/470, loss: 0.21409592032432556 2023-01-22 22:03:43.254506: step: 606/470, loss: 0.0021795283537358046 2023-01-22 22:03:44.012869: step: 608/470, loss: 0.08947306126356125 2023-01-22 22:03:44.731618: step: 610/470, loss: 0.027726903557777405 2023-01-22 22:03:45.466367: step: 612/470, loss: 0.004544347990304232 2023-01-22 22:03:46.168244: step: 614/470, loss: 0.006356291007250547 2023-01-22 22:03:46.958246: step: 616/470, loss: 0.0020529376342892647 2023-01-22 22:03:47.714919: step: 618/470, loss: 0.0441039502620697 2023-01-22 22:03:48.389219: step: 620/470, loss: 0.05852342024445534 2023-01-22 22:03:49.107819: step: 622/470, loss: 0.0003269554581493139 2023-01-22 22:03:49.821607: step: 624/470, loss: 0.015510090626776218 2023-01-22 22:03:50.497408: step: 626/470, loss: 0.00037635324406437576 2023-01-22 22:03:51.226941: step: 628/470, loss: 0.002281700260937214 2023-01-22 22:03:51.911950: step: 630/470, loss: 0.010925859212875366 2023-01-22 22:03:52.647400: step: 632/470, loss: 0.003865706268697977 2023-01-22 22:03:53.367895: step: 634/470, loss: 0.0007639245595782995 2023-01-22 22:03:54.140384: step: 636/470, loss: 0.04122069478034973 2023-01-22 22:03:54.844898: step: 638/470, loss: 0.013298324309289455 2023-01-22 22:03:55.551052: step: 640/470, loss: 0.007208535913378 2023-01-22 22:03:56.355144: step: 642/470, loss: 0.5793889760971069 2023-01-22 22:03:57.156238: step: 644/470, loss: 0.010782578960061073 2023-01-22 22:03:57.881404: step: 646/470, loss: 0.003287682542577386 2023-01-22 22:03:58.588397: step: 648/470, loss: 0.005844367202371359 2023-01-22 22:03:59.281404: step: 650/470, loss: 0.00159078452270478 2023-01-22 22:03:59.977561: step: 652/470, loss: 0.007373278960585594 2023-01-22 22:04:00.630071: step: 654/470, loss: 0.0001302184391533956 2023-01-22 22:04:01.328028: step: 656/470, loss: 0.002802535193040967 2023-01-22 22:04:02.063225: step: 658/470, loss: 0.028815504163503647 2023-01-22 22:04:02.835474: step: 660/470, loss: 0.0023840556386858225 2023-01-22 22:04:03.567024: step: 662/470, loss: 0.00024311590823344886 2023-01-22 22:04:04.243621: step: 664/470, loss: 1.6967294868663885e-05 2023-01-22 22:04:04.904737: step: 666/470, loss: 0.00038249947829172015 2023-01-22 22:04:05.665276: step: 668/470, loss: 0.0011958489194512367 2023-01-22 22:04:06.471863: step: 670/470, loss: 0.0026525375433266163 2023-01-22 22:04:07.134482: step: 672/470, loss: 0.0011103339493274689 2023-01-22 22:04:07.845628: step: 674/470, loss: 7.855349394958466e-05 2023-01-22 22:04:08.622023: step: 676/470, loss: 0.036018408834934235 2023-01-22 22:04:09.404404: step: 678/470, loss: 0.03961295261979103 2023-01-22 22:04:10.083143: step: 680/470, loss: 0.009320229291915894 2023-01-22 22:04:10.761108: step: 682/470, loss: 0.0022059644106775522 2023-01-22 22:04:11.537967: step: 684/470, loss: 0.07972223311662674 2023-01-22 22:04:12.279666: step: 686/470, loss: 0.003704060334712267 2023-01-22 22:04:13.057668: step: 688/470, loss: 0.5761838555335999 2023-01-22 22:04:13.773090: step: 690/470, loss: 0.002108166925609112 2023-01-22 22:04:14.523749: step: 692/470, loss: 0.001077714143320918 2023-01-22 22:04:15.265369: step: 694/470, loss: 0.03051694482564926 2023-01-22 22:04:15.974596: step: 696/470, loss: 2.4262013539555483e-05 2023-01-22 22:04:16.688050: step: 698/470, loss: 0.009325804188847542 2023-01-22 22:04:17.459623: step: 700/470, loss: 0.0015849667834118009 2023-01-22 22:04:18.168599: step: 702/470, loss: 0.005560775753110647 2023-01-22 22:04:18.969699: step: 704/470, loss: 0.05311994254589081 2023-01-22 22:04:19.738299: step: 706/470, loss: 0.00885532982647419 2023-01-22 22:04:20.468018: step: 708/470, loss: 0.025782722979784012 2023-01-22 22:04:21.142568: step: 710/470, loss: 0.00032764862407930195 2023-01-22 22:04:21.833043: step: 712/470, loss: 0.0006452351808547974 2023-01-22 22:04:22.579535: step: 714/470, loss: 0.018562477082014084 2023-01-22 22:04:23.332219: step: 716/470, loss: 0.004105889238417149 2023-01-22 22:04:24.079485: step: 718/470, loss: 0.004994503688067198 2023-01-22 22:04:24.808029: step: 720/470, loss: 0.015740545466542244 2023-01-22 22:04:25.555337: step: 722/470, loss: 0.002721569035202265 2023-01-22 22:04:26.329005: step: 724/470, loss: 0.0018496755510568619 2023-01-22 22:04:27.030764: step: 726/470, loss: 0.029428904876112938 2023-01-22 22:04:27.833401: step: 728/470, loss: 0.0025027492083609104 2023-01-22 22:04:28.540286: step: 730/470, loss: 0.0005519500700756907 2023-01-22 22:04:29.253840: step: 732/470, loss: 0.00124834175221622 2023-01-22 22:04:29.992308: step: 734/470, loss: 0.025663498789072037 2023-01-22 22:04:30.716290: step: 736/470, loss: 0.00022738105326425284 2023-01-22 22:04:31.580799: step: 738/470, loss: 0.04520634189248085 2023-01-22 22:04:32.300666: step: 740/470, loss: 0.00048320789937861264 2023-01-22 22:04:33.099205: step: 742/470, loss: 0.001786353881470859 2023-01-22 22:04:33.796373: step: 744/470, loss: 0.011439421214163303 2023-01-22 22:04:34.546646: step: 746/470, loss: 0.035814620554447174 2023-01-22 22:04:35.233769: step: 748/470, loss: 0.46565985679626465 2023-01-22 22:04:36.166932: step: 750/470, loss: 0.0016404861817136407 2023-01-22 22:04:36.946146: step: 752/470, loss: 0.006969146430492401 2023-01-22 22:04:37.721366: step: 754/470, loss: 0.011821585707366467 2023-01-22 22:04:38.515851: step: 756/470, loss: 0.0448044054210186 2023-01-22 22:04:39.284567: step: 758/470, loss: 0.0014806183753535151 2023-01-22 22:04:40.010232: step: 760/470, loss: 0.25201913714408875 2023-01-22 22:04:40.686280: step: 762/470, loss: 0.026176057755947113 2023-01-22 22:04:41.392248: step: 764/470, loss: 0.0018919870490208268 2023-01-22 22:04:42.120249: step: 766/470, loss: 6.11542709521018e-05 2023-01-22 22:04:42.952254: step: 768/470, loss: 0.06037821248173714 2023-01-22 22:04:43.680161: step: 770/470, loss: 0.005792189389467239 2023-01-22 22:04:44.402600: step: 772/470, loss: 0.00010853780258912593 2023-01-22 22:04:45.086936: step: 774/470, loss: 3.3160622479044832e-06 2023-01-22 22:04:45.872517: step: 776/470, loss: 0.048592712730169296 2023-01-22 22:04:46.610963: step: 778/470, loss: 0.0152989961206913 2023-01-22 22:04:47.327292: step: 780/470, loss: 0.027926130220294 2023-01-22 22:04:48.020189: step: 782/470, loss: 0.0008958295802585781 2023-01-22 22:04:48.636177: step: 784/470, loss: 0.00017820145876612514 2023-01-22 22:04:49.365395: step: 786/470, loss: 0.028859004378318787 2023-01-22 22:04:50.043968: step: 788/470, loss: 0.009325975552201271 2023-01-22 22:04:50.687819: step: 790/470, loss: 0.018157802522182465 2023-01-22 22:04:51.390814: step: 792/470, loss: 5.18089764227625e-05 2023-01-22 22:04:52.152726: step: 794/470, loss: 0.0016927659744396806 2023-01-22 22:04:52.863655: step: 796/470, loss: 0.004147370811551809 2023-01-22 22:04:53.550834: step: 798/470, loss: 0.002366115804761648 2023-01-22 22:04:54.295721: step: 800/470, loss: 0.08526662737131119 2023-01-22 22:04:54.971690: step: 802/470, loss: 0.02207419089972973 2023-01-22 22:04:55.805657: step: 804/470, loss: 0.00026135783991776407 2023-01-22 22:04:56.483226: step: 806/470, loss: 0.017718037590384483 2023-01-22 22:04:57.256671: step: 808/470, loss: 0.009163436479866505 2023-01-22 22:04:57.966641: step: 810/470, loss: 0.049685999751091 2023-01-22 22:04:58.744821: step: 812/470, loss: 0.004708106629550457 2023-01-22 22:04:59.426470: step: 814/470, loss: 0.01260793674737215 2023-01-22 22:05:00.115675: step: 816/470, loss: 0.7914705276489258 2023-01-22 22:05:00.868279: step: 818/470, loss: 0.007497282233089209 2023-01-22 22:05:01.644645: step: 820/470, loss: 0.0009469312499277294 2023-01-22 22:05:02.400587: step: 822/470, loss: 0.001793618779629469 2023-01-22 22:05:03.101639: step: 824/470, loss: 0.025681249797344208 2023-01-22 22:05:03.837690: step: 826/470, loss: 0.037340566515922546 2023-01-22 22:05:04.576750: step: 828/470, loss: 0.00744800828397274 2023-01-22 22:05:05.341530: step: 830/470, loss: 1.3749153367825784e-05 2023-01-22 22:05:06.027300: step: 832/470, loss: 0.014599119313061237 2023-01-22 22:05:06.810867: step: 834/470, loss: 0.0006064804038032889 2023-01-22 22:05:07.531485: step: 836/470, loss: 0.0008935255464166403 2023-01-22 22:05:08.265002: step: 838/470, loss: 0.003527364693582058 2023-01-22 22:05:09.065349: step: 840/470, loss: 0.0005473060882650316 2023-01-22 22:05:09.887098: step: 842/470, loss: 0.10252714157104492 2023-01-22 22:05:10.614277: step: 844/470, loss: 0.004621810279786587 2023-01-22 22:05:11.473738: step: 846/470, loss: 0.0056939744390547276 2023-01-22 22:05:12.162384: step: 848/470, loss: 0.021790560334920883 2023-01-22 22:05:12.887249: step: 850/470, loss: 0.011583208106458187 2023-01-22 22:05:13.578872: step: 852/470, loss: 0.002285032533109188 2023-01-22 22:05:14.300107: step: 854/470, loss: 0.0009343082783743739 2023-01-22 22:05:15.061367: step: 856/470, loss: 0.006179484073072672 2023-01-22 22:05:15.754981: step: 858/470, loss: 0.007022823207080364 2023-01-22 22:05:16.516628: step: 860/470, loss: 0.01619824767112732 2023-01-22 22:05:17.236056: step: 862/470, loss: 0.0002929774345830083 2023-01-22 22:05:17.963895: step: 864/470, loss: 0.01771281473338604 2023-01-22 22:05:18.658431: step: 866/470, loss: 1.254774360859301e-05 2023-01-22 22:05:19.451764: step: 868/470, loss: 0.001473193638958037 2023-01-22 22:05:20.194322: step: 870/470, loss: 0.001364637166261673 2023-01-22 22:05:20.872058: step: 872/470, loss: 0.0011659307638183236 2023-01-22 22:05:21.560118: step: 874/470, loss: 0.0024868298787623644 2023-01-22 22:05:22.179440: step: 876/470, loss: 0.0011807549744844437 2023-01-22 22:05:22.821182: step: 878/470, loss: 0.00360662373714149 2023-01-22 22:05:23.578028: step: 880/470, loss: 0.0015713890315964818 2023-01-22 22:05:24.255559: step: 882/470, loss: 6.543661584146321e-05 2023-01-22 22:05:24.965016: step: 884/470, loss: 0.004186991136521101 2023-01-22 22:05:25.692151: step: 886/470, loss: 0.0009998814202845097 2023-01-22 22:05:26.419809: step: 888/470, loss: 0.0374150276184082 2023-01-22 22:05:27.271773: step: 890/470, loss: 0.01954301819205284 2023-01-22 22:05:28.001337: step: 892/470, loss: 0.0002561210421845317 2023-01-22 22:05:28.758930: step: 894/470, loss: 0.0005132320802658796 2023-01-22 22:05:29.495350: step: 896/470, loss: 0.07816541939973831 2023-01-22 22:05:30.267169: step: 898/470, loss: 0.014139272272586823 2023-01-22 22:05:30.991710: step: 900/470, loss: 0.00017748454411048442 2023-01-22 22:05:31.792533: step: 902/470, loss: 0.0016783780883997679 2023-01-22 22:05:32.565434: step: 904/470, loss: 0.022887928411364555 2023-01-22 22:05:33.277998: step: 906/470, loss: 0.0025164291728287935 2023-01-22 22:05:33.979530: step: 908/470, loss: 0.0015736209461465478 2023-01-22 22:05:34.672663: step: 910/470, loss: 5.445835540740518e-06 2023-01-22 22:05:35.432646: step: 912/470, loss: 0.003006444312632084 2023-01-22 22:05:36.200308: step: 914/470, loss: 0.05807241424918175 2023-01-22 22:05:37.036823: step: 916/470, loss: 0.008495689369738102 2023-01-22 22:05:37.708568: step: 918/470, loss: 0.0016545577673241496 2023-01-22 22:05:38.431325: step: 920/470, loss: 0.0005608369829133153 2023-01-22 22:05:39.178700: step: 922/470, loss: 0.1146390438079834 2023-01-22 22:05:39.928345: step: 924/470, loss: 0.00022846035426482558 2023-01-22 22:05:40.592273: step: 926/470, loss: 0.14736011624336243 2023-01-22 22:05:41.249122: step: 928/470, loss: 0.00010952012962661684 2023-01-22 22:05:42.043777: step: 930/470, loss: 0.3251962661743164 2023-01-22 22:05:42.858949: step: 932/470, loss: 0.005167566705495119 2023-01-22 22:05:43.590108: step: 934/470, loss: 0.03677722439169884 2023-01-22 22:05:44.456533: step: 936/470, loss: 0.00461982935667038 2023-01-22 22:05:45.236959: step: 938/470, loss: 7.254021329572424e-05 2023-01-22 22:05:45.991813: step: 940/470, loss: 0.014099986292421818 2023-01-22 22:05:46.637525: step: 942/470, loss: 0.00016049730766098946 ================================================== Loss: 0.037 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.290494644497469, 'r': 0.3428608517598211, 'f1': 0.3145129136247619}, 'combined': 0.2317463574077193, 'epoch': 39} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35179975638777394, 'r': 0.3676983992245291, 'f1': 0.359573422842981}, 'combined': 0.25044915023889225, 'epoch': 39} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2817969659652227, 'r': 0.34596325802561495, 'f1': 0.31060074442844815}, 'combined': 0.2288637064209618, 'epoch': 39} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35111378537428956, 'r': 0.36293011468977043, 'f1': 0.35692417898568446}, 'combined': 0.24860390576117328, 'epoch': 39} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2717336703062838, 'r': 0.36042094031137073, 'f1': 0.3098561754389762}, 'combined': 0.2283150766392456, 'epoch': 39} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.34380852167795695, 'r': 0.3795117143137448, 'f1': 0.3607789605907629}, 'combined': 0.2512888282721732, 'epoch': 39} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20161290322580644, 'r': 0.35714285714285715, 'f1': 0.25773195876288657}, 'combined': 0.17182130584192437, 'epoch': 39} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26973684210526316, 'r': 0.44565217391304346, 'f1': 0.3360655737704918}, 'combined': 0.1680327868852459, 'epoch': 39} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.3017241379310345, 'f1': 0.35}, 'combined': 0.2333333333333333, 'epoch': 39} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2923636226642556, 'r': 0.3067876344086022, 'f1': 0.2994020061728395}, 'combined': 0.22061200454840804, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3454664828223071, 'r': 0.36074673110098604, 'f1': 0.352941298537183}, 'combined': 0.24582976017515235, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.375, 'f1': 0.31626506024096385}, 'combined': 0.2108433734939759, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551046597601705, 'r': 0.32638025112807895, 'f1': 0.3156007198981608}, 'combined': 0.232547898872329, 'epoch': 18} Test for Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.35064055717249865, 'r': 0.36344011641606727, 'f1': 0.3569256237633264}, 'combined': 0.2486049120739587, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.5, 'f1': 0.3770491803278689}, 'combined': 0.18852459016393444, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2827471864951769, 'r': 0.3337167931688805, 'f1': 0.3061248912097476}, 'combined': 0.2255657093124456, 'epoch': 17} Test for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5511811023622047, 'f1': 0.6965174129353234}, 'slot': {'p': 0.3364419482005735, 'r': 0.34355898941250873, 'f1': 0.33996322453759187}, 'combined': 0.23679030564807396, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4673913043478261, 'r': 0.3706896551724138, 'f1': 0.4134615384615385}, 'combined': 0.27564102564102566, 'epoch': 17}