Command that produces this log: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> type_embedding.weight: torch.Size([123, 100]) >>> trans_rep.weight: torch.Size([1024, 1124]) >>> trans_rep.bias: torch.Size([1024]) >>> coref_type_ffn.weight: torch.Size([3, 4096]) >>> coref_type_ffn.bias: torch.Size([3]) n_trainable_params: 561067023, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 14:36:30.071973: step: 4/531, loss: 0.018170563504099846 2023-01-22 14:36:31.112334: step: 8/531, loss: 0.011322838254272938 2023-01-22 14:36:32.169224: step: 12/531, loss: 0.04829345643520355 2023-01-22 14:36:33.211655: step: 16/531, loss: 0.009348283521831036 2023-01-22 14:36:34.264381: step: 20/531, loss: 0.017068643122911453 2023-01-22 14:36:35.313784: step: 24/531, loss: 0.018864022567868233 2023-01-22 14:36:36.368959: step: 28/531, loss: 0.017118431627750397 2023-01-22 14:36:37.426109: step: 32/531, loss: 0.01312822476029396 2023-01-22 14:36:38.474480: step: 36/531, loss: 0.016625747084617615 2023-01-22 14:36:39.528695: step: 40/531, loss: 0.01186138391494751 2023-01-22 14:36:40.572857: step: 44/531, loss: 0.018956823274493217 2023-01-22 14:36:41.618573: step: 48/531, loss: 0.014355204999446869 2023-01-22 14:36:42.682509: step: 52/531, loss: 0.013917816802859306 2023-01-22 14:36:43.743892: step: 56/531, loss: 0.02647165022790432 2023-01-22 14:36:44.793150: step: 60/531, loss: 0.061029136180877686 2023-01-22 14:36:45.838321: step: 64/531, loss: 0.0702832043170929 2023-01-22 14:36:46.896188: step: 68/531, loss: 0.023450976237654686 2023-01-22 14:36:47.939813: step: 72/531, loss: 0.049824126064777374 2023-01-22 14:36:48.995963: step: 76/531, loss: 0.03386794403195381 2023-01-22 14:36:50.054920: step: 80/531, loss: 0.020017804577946663 2023-01-22 14:36:51.109980: step: 84/531, loss: 0.019348086789250374 2023-01-22 14:36:52.148182: step: 88/531, loss: 0.05341392382979393 2023-01-22 14:36:53.216071: step: 92/531, loss: 0.058558326214551926 2023-01-22 14:36:54.264778: step: 96/531, loss: 0.010691803880035877 2023-01-22 14:36:55.333411: step: 100/531, loss: 0.01755356788635254 2023-01-22 14:36:56.388033: step: 104/531, loss: 0.015221161767840385 2023-01-22 14:36:57.431099: step: 108/531, loss: 0.060155000537633896 2023-01-22 14:36:58.475445: step: 112/531, loss: 0.07014718651771545 2023-01-22 14:36:59.522122: step: 116/531, loss: 0.01248928252607584 2023-01-22 14:37:00.581478: step: 120/531, loss: 0.012406772002577782 2023-01-22 14:37:01.637203: step: 124/531, loss: 0.02380838617682457 2023-01-22 14:37:02.695166: step: 128/531, loss: 0.015290040522813797 2023-01-22 14:37:03.745655: step: 132/531, loss: 0.008468336425721645 2023-01-22 14:37:04.795522: step: 136/531, loss: 0.007525733672082424 2023-01-22 14:37:05.851127: step: 140/531, loss: 0.04454159736633301 2023-01-22 14:37:06.900794: step: 144/531, loss: 0.02104450948536396 2023-01-22 14:37:07.930891: step: 148/531, loss: 0.03128483146429062 2023-01-22 14:37:08.992856: step: 152/531, loss: 0.01681424491107464 2023-01-22 14:37:10.056809: step: 156/531, loss: 0.05191762372851372 2023-01-22 14:37:11.118200: step: 160/531, loss: 0.02295805886387825 2023-01-22 14:37:12.177551: step: 164/531, loss: 0.010234040208160877 2023-01-22 14:37:13.245498: step: 168/531, loss: 0.04013200104236603 2023-01-22 14:37:14.295484: step: 172/531, loss: 0.021994352340698242 2023-01-22 14:37:15.358519: step: 176/531, loss: 0.0261736661195755 2023-01-22 14:37:16.427320: step: 180/531, loss: 0.054567862302064896 2023-01-22 14:37:17.487131: step: 184/531, loss: 0.05303144082427025 2023-01-22 14:37:18.550080: step: 188/531, loss: 0.015433679334819317 2023-01-22 14:37:19.599631: step: 192/531, loss: 0.01450360007584095 2023-01-22 14:37:20.654173: step: 196/531, loss: 0.009794311597943306 2023-01-22 14:37:21.721495: step: 200/531, loss: 0.016226863488554955 2023-01-22 14:37:22.780684: step: 204/531, loss: 0.020626744255423546 2023-01-22 14:37:23.846060: step: 208/531, loss: 0.012945247814059258 2023-01-22 14:37:24.902814: step: 212/531, loss: 0.055919088423252106 2023-01-22 14:37:25.969012: step: 216/531, loss: 0.03969443589448929 2023-01-22 14:37:27.014455: step: 220/531, loss: 0.00956704281270504 2023-01-22 14:37:28.063085: step: 224/531, loss: 0.019948000088334084 2023-01-22 14:37:29.131952: step: 228/531, loss: 0.07200264930725098 2023-01-22 14:37:30.193582: step: 232/531, loss: 0.0961732566356659 2023-01-22 14:37:31.249752: step: 236/531, loss: 0.0125066377222538 2023-01-22 14:37:32.323815: step: 240/531, loss: 0.01298561692237854 2023-01-22 14:37:33.389971: step: 244/531, loss: 0.021817568689584732 2023-01-22 14:37:34.452233: step: 248/531, loss: 0.008433319628238678 2023-01-22 14:37:35.510944: step: 252/531, loss: 0.018323224037885666 2023-01-22 14:37:36.578211: step: 256/531, loss: 0.04815671965479851 2023-01-22 14:37:37.651328: step: 260/531, loss: 0.04839369282126427 2023-01-22 14:37:38.705392: step: 264/531, loss: 0.04580888897180557 2023-01-22 14:37:39.754097: step: 268/531, loss: 0.06339695304632187 2023-01-22 14:37:40.814882: step: 272/531, loss: 0.009873722679913044 2023-01-22 14:37:41.889818: step: 276/531, loss: 0.01474162470549345 2023-01-22 14:37:42.951265: step: 280/531, loss: 0.041152819991111755 2023-01-22 14:37:43.995216: step: 284/531, loss: 0.05454022437334061 2023-01-22 14:37:45.056762: step: 288/531, loss: 0.01857876218855381 2023-01-22 14:37:46.114997: step: 292/531, loss: 0.05805511772632599 2023-01-22 14:37:47.189407: step: 296/531, loss: 0.0394238606095314 2023-01-22 14:37:48.247194: step: 300/531, loss: 0.0279531329870224 2023-01-22 14:37:49.308507: step: 304/531, loss: 0.023521119728684425 2023-01-22 14:37:50.361418: step: 308/531, loss: 0.016286799684166908 2023-01-22 14:37:51.412147: step: 312/531, loss: 0.018223265185952187 2023-01-22 14:37:52.466827: step: 316/531, loss: 0.006818976253271103 2023-01-22 14:37:53.535230: step: 320/531, loss: 0.013323552906513214 2023-01-22 14:37:54.584613: step: 324/531, loss: 0.026609499007463455 2023-01-22 14:37:55.640544: step: 328/531, loss: 0.04978005960583687 2023-01-22 14:37:56.702363: step: 332/531, loss: 0.07152272015810013 2023-01-22 14:37:57.757023: step: 336/531, loss: 0.0075768763199448586 2023-01-22 14:37:58.811213: step: 340/531, loss: 0.003764066379517317 2023-01-22 14:37:59.882392: step: 344/531, loss: 0.008797116577625275 2023-01-22 14:38:00.942724: step: 348/531, loss: 0.0457511767745018 2023-01-22 14:38:02.011554: step: 352/531, loss: 0.05321735143661499 2023-01-22 14:38:03.079062: step: 356/531, loss: 0.012059643864631653 2023-01-22 14:38:04.142661: step: 360/531, loss: 0.01047996524721384 2023-01-22 14:38:05.198864: step: 364/531, loss: 0.013448943383991718 2023-01-22 14:38:06.241177: step: 368/531, loss: 0.024902738630771637 2023-01-22 14:38:07.297910: step: 372/531, loss: 0.011134122498333454 2023-01-22 14:38:08.351749: step: 376/531, loss: 0.01941181905567646 2023-01-22 14:38:09.416215: step: 380/531, loss: 0.01484807301312685 2023-01-22 14:38:10.522016: step: 384/531, loss: 0.010247286409139633 2023-01-22 14:38:11.584021: step: 388/531, loss: 0.045143526047468185 2023-01-22 14:38:12.654422: step: 392/531, loss: 0.017212165519595146 2023-01-22 14:38:13.726016: step: 396/531, loss: 0.045472994446754456 2023-01-22 14:38:14.818990: step: 400/531, loss: 0.010158528573811054 2023-01-22 14:38:15.896032: step: 404/531, loss: 0.00854258332401514 2023-01-22 14:38:16.955874: step: 408/531, loss: 0.05470995604991913 2023-01-22 14:38:18.022026: step: 412/531, loss: 0.01939086616039276 2023-01-22 14:38:19.069785: step: 416/531, loss: 0.014499673619866371 2023-01-22 14:38:20.117374: step: 420/531, loss: 0.03453696519136429 2023-01-22 14:38:21.177337: step: 424/531, loss: 0.0500594787299633 2023-01-22 14:38:22.254650: step: 428/531, loss: 0.029794123023748398 2023-01-22 14:38:23.340564: step: 432/531, loss: 0.005801026243716478 2023-01-22 14:38:24.398934: step: 436/531, loss: 0.013934549875557423 2023-01-22 14:38:25.473359: step: 440/531, loss: 0.01165790669620037 2023-01-22 14:38:26.533033: step: 444/531, loss: 0.014572062529623508 2023-01-22 14:38:27.608986: step: 448/531, loss: 0.008337599225342274 2023-01-22 14:38:28.681349: step: 452/531, loss: 0.04914865270256996 2023-01-22 14:38:29.752103: step: 456/531, loss: 0.048679422587156296 2023-01-22 14:38:30.823454: step: 460/531, loss: 0.01309958379715681 2023-01-22 14:38:31.884734: step: 464/531, loss: 0.039141640067100525 2023-01-22 14:38:32.926705: step: 468/531, loss: 0.0 2023-01-22 14:38:33.980716: step: 472/531, loss: 0.008508128114044666 2023-01-22 14:38:35.034111: step: 476/531, loss: 0.0803237333893776 2023-01-22 14:38:36.091344: step: 480/531, loss: 0.011697777546942234 2023-01-22 14:38:37.144143: step: 484/531, loss: 0.011993582360446453 2023-01-22 14:38:38.204934: step: 488/531, loss: 0.013080984354019165 2023-01-22 14:38:39.259730: step: 492/531, loss: 0.010660339146852493 2023-01-22 14:38:40.322493: step: 496/531, loss: 0.01299708615988493 2023-01-22 14:38:41.375476: step: 500/531, loss: 0.009807550348341465 2023-01-22 14:38:42.433143: step: 504/531, loss: 0.04132939875125885 2023-01-22 14:38:43.497284: step: 508/531, loss: 0.07086385786533356 2023-01-22 14:38:44.563760: step: 512/531, loss: 0.02512393519282341 2023-01-22 14:38:45.622747: step: 516/531, loss: 0.008981358259916306 2023-01-22 14:38:46.670741: step: 520/531, loss: 0.00837255734950304 2023-01-22 14:38:47.736146: step: 524/531, loss: 0.04507027193903923 2023-01-22 14:38:48.790480: step: 528/531, loss: 0.009422453120350838 2023-01-22 14:38:49.861982: step: 532/531, loss: 0.012063867412507534 2023-01-22 14:38:50.926269: step: 536/531, loss: 0.00793278869241476 2023-01-22 14:38:51.995574: step: 540/531, loss: 0.026150500401854515 2023-01-22 14:38:53.068852: step: 544/531, loss: 0.031051211059093475 2023-01-22 14:38:54.131124: step: 548/531, loss: 0.04840414226055145 2023-01-22 14:38:55.199139: step: 552/531, loss: 0.013596313074231148 2023-01-22 14:38:56.255300: step: 556/531, loss: 0.03785036876797676 2023-01-22 14:38:57.316893: step: 560/531, loss: 0.029163893312215805 2023-01-22 14:38:58.365027: step: 564/531, loss: 0.01968100108206272 2023-01-22 14:38:59.431389: step: 568/531, loss: 0.07464582473039627 2023-01-22 14:39:00.489964: step: 572/531, loss: 0.011769238859415054 2023-01-22 14:39:01.566215: step: 576/531, loss: 0.015067631378769875 2023-01-22 14:39:02.619813: step: 580/531, loss: 0.01259372103959322 2023-01-22 14:39:03.678133: step: 584/531, loss: 0.04508479684591293 2023-01-22 14:39:04.757198: step: 588/531, loss: 0.033124689012765884 2023-01-22 14:39:05.844993: step: 592/531, loss: 0.006049429532140493 2023-01-22 14:39:06.901085: step: 596/531, loss: 0.0 2023-01-22 14:39:07.966775: step: 600/531, loss: 0.04306354746222496 2023-01-22 14:39:09.031400: step: 604/531, loss: 0.019644686952233315 2023-01-22 14:39:10.103131: step: 608/531, loss: 0.03895169869065285 2023-01-22 14:39:11.172659: step: 612/531, loss: 0.050115134567022324 2023-01-22 14:39:12.226788: step: 616/531, loss: 0.010712578892707825 2023-01-22 14:39:13.307222: step: 620/531, loss: 0.033733267337083817 2023-01-22 14:39:14.376689: step: 624/531, loss: 0.06120038032531738 2023-01-22 14:39:15.441960: step: 628/531, loss: 0.012908191420137882 2023-01-22 14:39:16.508659: step: 632/531, loss: 0.017876828089356422 2023-01-22 14:39:17.577655: step: 636/531, loss: 0.05685174837708473 2023-01-22 14:39:18.634443: step: 640/531, loss: 0.012657851912081242 2023-01-22 14:39:19.704251: step: 644/531, loss: 0.039190035313367844 2023-01-22 14:39:20.786478: step: 648/531, loss: 0.016411198303103447 2023-01-22 14:39:21.856422: step: 652/531, loss: 0.028136972337961197 2023-01-22 14:39:22.929746: step: 656/531, loss: 0.02555316872894764 2023-01-22 14:39:24.016662: step: 660/531, loss: 0.018969589844346046 2023-01-22 14:39:25.077256: step: 664/531, loss: 0.018380412831902504 2023-01-22 14:39:26.124678: step: 668/531, loss: 0.013217329978942871 2023-01-22 14:39:27.194021: step: 672/531, loss: 0.00837804563343525 2023-01-22 14:39:28.246829: step: 676/531, loss: 0.008425845764577389 2023-01-22 14:39:29.313878: step: 680/531, loss: 0.01829485408961773 2023-01-22 14:39:30.381309: step: 684/531, loss: 0.01057519018650055 2023-01-22 14:39:31.450232: step: 688/531, loss: 0.06050803139805794 2023-01-22 14:39:32.517736: step: 692/531, loss: 0.011691092513501644 2023-01-22 14:39:33.595820: step: 696/531, loss: 0.009698997251689434 2023-01-22 14:39:34.656089: step: 700/531, loss: 0.05816865712404251 2023-01-22 14:39:35.725522: step: 704/531, loss: 0.01664169318974018 2023-01-22 14:39:36.787599: step: 708/531, loss: 0.023120898753404617 2023-01-22 14:39:37.848881: step: 712/531, loss: 0.00801254715770483 2023-01-22 14:39:38.920790: step: 716/531, loss: 0.0381682813167572 2023-01-22 14:39:39.984657: step: 720/531, loss: 0.0072555020451545715 2023-01-22 14:39:41.061587: step: 724/531, loss: 0.018962437286973 2023-01-22 14:39:42.123367: step: 728/531, loss: 0.03683837875723839 2023-01-22 14:39:43.206001: step: 732/531, loss: 0.038544539362192154 2023-01-22 14:39:44.277362: step: 736/531, loss: 0.04627222940325737 2023-01-22 14:39:45.337222: step: 740/531, loss: 0.03517960384488106 2023-01-22 14:39:46.388515: step: 744/531, loss: 0.007916511967778206 2023-01-22 14:39:47.455031: step: 748/531, loss: 0.009745625779032707 2023-01-22 14:39:48.551828: step: 752/531, loss: 0.006276762578636408 2023-01-22 14:39:49.638354: step: 756/531, loss: 0.009843764826655388 2023-01-22 14:39:50.690308: step: 760/531, loss: 0.028286047279834747 2023-01-22 14:39:51.749216: step: 764/531, loss: 0.06176306679844856 2023-01-22 14:39:52.831308: step: 768/531, loss: 0.016211532056331635 2023-01-22 14:39:53.892277: step: 772/531, loss: 0.007534511387348175 2023-01-22 14:39:54.957676: step: 776/531, loss: 0.007740554865449667 2023-01-22 14:39:56.024968: step: 780/531, loss: 0.02157668210566044 2023-01-22 14:39:57.101591: step: 784/531, loss: 0.007612189278006554 2023-01-22 14:39:58.166966: step: 788/531, loss: 0.04780685901641846 2023-01-22 14:39:59.230058: step: 792/531, loss: 0.046151190996170044 2023-01-22 14:40:00.294720: step: 796/531, loss: 0.03277471661567688 2023-01-22 14:40:01.349307: step: 800/531, loss: 0.01489497534930706 2023-01-22 14:40:02.420671: step: 804/531, loss: 0.010438865050673485 2023-01-22 14:40:03.501564: step: 808/531, loss: 0.0464506521821022 2023-01-22 14:40:04.580006: step: 812/531, loss: 0.018300771713256836 2023-01-22 14:40:05.652992: step: 816/531, loss: 0.00840457621961832 2023-01-22 14:40:06.708309: step: 820/531, loss: 0.023203309625387192 2023-01-22 14:40:07.769961: step: 824/531, loss: 0.013294472359120846 2023-01-22 14:40:08.832946: step: 828/531, loss: 0.01593204215168953 2023-01-22 14:40:09.917164: step: 832/531, loss: 0.014876758679747581 2023-01-22 14:40:10.985137: step: 836/531, loss: 0.07697822898626328 2023-01-22 14:40:12.095102: step: 840/531, loss: 0.03857125714421272 2023-01-22 14:40:13.163974: step: 844/531, loss: 0.029041605070233345 2023-01-22 14:40:14.232557: step: 848/531, loss: 0.009344058111310005 2023-01-22 14:40:15.292306: step: 852/531, loss: 0.02397291362285614 2023-01-22 14:40:16.346343: step: 856/531, loss: 0.004911182913929224 2023-01-22 14:40:17.398848: step: 860/531, loss: 0.04191805049777031 2023-01-22 14:40:18.483426: step: 864/531, loss: 0.011695814318954945 2023-01-22 14:40:19.572517: step: 868/531, loss: 0.020490050315856934 2023-01-22 14:40:20.622540: step: 872/531, loss: 0.012526093982160091 2023-01-22 14:40:21.683637: step: 876/531, loss: 0.02372817136347294 2023-01-22 14:40:22.737816: step: 880/531, loss: 0.023975122720003128 2023-01-22 14:40:23.804625: step: 884/531, loss: 0.05118924006819725 2023-01-22 14:40:24.863570: step: 888/531, loss: 0.06693841516971588 2023-01-22 14:40:25.926139: step: 892/531, loss: 0.010523651726543903 2023-01-22 14:40:26.999259: step: 896/531, loss: 0.006021940149366856 2023-01-22 14:40:28.054791: step: 900/531, loss: 0.01172979548573494 2023-01-22 14:40:29.114135: step: 904/531, loss: 0.04168125241994858 2023-01-22 14:40:30.180251: step: 908/531, loss: 0.012182427570223808 2023-01-22 14:40:31.252367: step: 912/531, loss: 0.05263920873403549 2023-01-22 14:40:32.308016: step: 916/531, loss: 0.011832731775939465 2023-01-22 14:40:33.384269: step: 920/531, loss: 0.018344487994909286 2023-01-22 14:40:34.457206: step: 924/531, loss: 0.017274517565965652 2023-01-22 14:40:35.513346: step: 928/531, loss: 0.03351505100727081 2023-01-22 14:40:36.569883: step: 932/531, loss: 0.04269670322537422 2023-01-22 14:40:37.634900: step: 936/531, loss: 0.012370468117296696 2023-01-22 14:40:38.701966: step: 940/531, loss: 0.014154774136841297 2023-01-22 14:40:39.773783: step: 944/531, loss: 0.006835263222455978 2023-01-22 14:40:40.836389: step: 948/531, loss: 0.06959729641675949 2023-01-22 14:40:41.936613: step: 952/531, loss: 0.0188874714076519 2023-01-22 14:40:42.994833: step: 956/531, loss: 0.0070149400271475315 2023-01-22 14:40:44.068761: step: 960/531, loss: 0.0069689759984612465 2023-01-22 14:40:45.129915: step: 964/531, loss: 0.008546366356313229 2023-01-22 14:40:46.201360: step: 968/531, loss: 0.008212319575250149 2023-01-22 14:40:47.273189: step: 972/531, loss: 0.006533846724778414 2023-01-22 14:40:48.350314: step: 976/531, loss: 0.01114641409367323 2023-01-22 14:40:49.420917: step: 980/531, loss: 0.012465793639421463 2023-01-22 14:40:50.488745: step: 984/531, loss: 0.018711285665631294 2023-01-22 14:40:51.545026: step: 988/531, loss: 0.010686461813747883 2023-01-22 14:40:52.598408: step: 992/531, loss: 0.009988091886043549 2023-01-22 14:40:53.640585: step: 996/531, loss: 0.011504275724291801 2023-01-22 14:40:54.689905: step: 1000/531, loss: 0.03524003177881241 2023-01-22 14:40:55.764876: step: 1004/531, loss: 0.002620745450258255 2023-01-22 14:40:56.831070: step: 1008/531, loss: 0.020677056163549423 2023-01-22 14:40:57.882411: step: 1012/531, loss: 0.010407635010778904 2023-01-22 14:40:58.960194: step: 1016/531, loss: 0.054459091275930405 2023-01-22 14:41:00.000342: step: 1020/531, loss: 0.004039744380861521 2023-01-22 14:41:01.067614: step: 1024/531, loss: 0.008886104449629784 2023-01-22 14:41:02.143577: step: 1028/531, loss: 0.04599504545331001 2023-01-22 14:41:03.207450: step: 1032/531, loss: 0.013574999757111073 2023-01-22 14:41:04.268603: step: 1036/531, loss: 0.008643172681331635 2023-01-22 14:41:05.357867: step: 1040/531, loss: 0.012747419998049736 2023-01-22 14:41:06.435531: step: 1044/531, loss: 0.017689671367406845 2023-01-22 14:41:07.486912: step: 1048/531, loss: 0.012442556209862232 2023-01-22 14:41:08.549435: step: 1052/531, loss: 0.01307733729481697 2023-01-22 14:41:09.632215: step: 1056/531, loss: 0.02727523446083069 2023-01-22 14:41:10.693375: step: 1060/531, loss: 0.07458508014678955 2023-01-22 14:41:11.774359: step: 1064/531, loss: 0.006109400186687708 2023-01-22 14:41:12.843471: step: 1068/531, loss: 0.012386331334710121 2023-01-22 14:41:13.915306: step: 1072/531, loss: 0.009489276446402073 2023-01-22 14:41:14.962089: step: 1076/531, loss: 0.01200362853705883 2023-01-22 14:41:16.021993: step: 1080/531, loss: 0.025578133761882782 2023-01-22 14:41:17.080510: step: 1084/531, loss: 0.038195353001356125 2023-01-22 14:41:18.143812: step: 1088/531, loss: 0.020360831171274185 2023-01-22 14:41:19.215271: step: 1092/531, loss: 0.019183607771992683 2023-01-22 14:41:20.288170: step: 1096/531, loss: 0.05920606106519699 2023-01-22 14:41:21.348094: step: 1100/531, loss: 0.018724963068962097 2023-01-22 14:41:22.405171: step: 1104/531, loss: 0.043291497975587845 2023-01-22 14:41:23.476441: step: 1108/531, loss: 0.01643366925418377 2023-01-22 14:41:24.555130: step: 1112/531, loss: 0.033016398549079895 2023-01-22 14:41:25.625562: step: 1116/531, loss: 0.015161125920712948 2023-01-22 14:41:26.711092: step: 1120/531, loss: 0.007837921380996704 2023-01-22 14:41:27.771344: step: 1124/531, loss: 0.015348638407886028 2023-01-22 14:41:28.842016: step: 1128/531, loss: 0.0284282099455595 2023-01-22 14:41:29.915385: step: 1132/531, loss: 0.03275051712989807 2023-01-22 14:41:30.986296: step: 1136/531, loss: 0.016015522181987762 2023-01-22 14:41:32.034432: step: 1140/531, loss: 0.005391702521592379 2023-01-22 14:41:33.108886: step: 1144/531, loss: 0.006712976843118668 2023-01-22 14:41:34.166997: step: 1148/531, loss: 0.0070574008859694 2023-01-22 14:41:35.222885: step: 1152/531, loss: 0.005472483579069376 2023-01-22 14:41:36.283330: step: 1156/531, loss: 0.04423312470316887 2023-01-22 14:41:37.348510: step: 1160/531, loss: 0.0072450158186256886 2023-01-22 14:41:38.416288: step: 1164/531, loss: 0.01512818317860365 2023-01-22 14:41:39.479284: step: 1168/531, loss: 0.009336884133517742 2023-01-22 14:41:40.553987: step: 1172/531, loss: 0.018048180267214775 2023-01-22 14:41:41.618571: step: 1176/531, loss: 0.03046495094895363 2023-01-22 14:41:42.681665: step: 1180/531, loss: 0.01297029945999384 2023-01-22 14:41:43.761422: step: 1184/531, loss: 0.0062399026937782764 2023-01-22 14:41:44.830753: step: 1188/531, loss: 0.010551639832556248 2023-01-22 14:41:45.906922: step: 1192/531, loss: 0.010108751244843006 2023-01-22 14:41:46.982346: step: 1196/531, loss: 0.015173581428825855 2023-01-22 14:41:48.060521: step: 1200/531, loss: 0.008416966535151005 2023-01-22 14:41:49.122008: step: 1204/531, loss: 0.04106985405087471 2023-01-22 14:41:50.204155: step: 1208/531, loss: 0.007257617078721523 2023-01-22 14:41:51.283310: step: 1212/531, loss: 0.010450026951730251 2023-01-22 14:41:52.346417: step: 1216/531, loss: 0.04543720930814743 2023-01-22 14:41:53.416098: step: 1220/531, loss: 0.010831048712134361 2023-01-22 14:41:54.485550: step: 1224/531, loss: 0.05159803107380867 2023-01-22 14:41:55.552291: step: 1228/531, loss: 0.016074061393737793 2023-01-22 14:41:56.618130: step: 1232/531, loss: 0.0320110097527504 2023-01-22 14:41:57.675023: step: 1236/531, loss: 0.025435796007514 2023-01-22 14:41:58.726323: step: 1240/531, loss: 0.007534940727055073 2023-01-22 14:41:59.783913: step: 1244/531, loss: 0.012176496908068657 2023-01-22 14:42:00.843073: step: 1248/531, loss: 0.05254765599966049 2023-01-22 14:42:01.906536: step: 1252/531, loss: 0.010222867131233215 2023-01-22 14:42:02.980219: step: 1256/531, loss: 0.028361370787024498 2023-01-22 14:42:04.044891: step: 1260/531, loss: 0.0041364035569131374 2023-01-22 14:42:05.140468: step: 1264/531, loss: 0.04036647826433182 2023-01-22 14:42:06.209893: step: 1268/531, loss: 0.04490387439727783 2023-01-22 14:42:07.276151: step: 1272/531, loss: 0.031922683119773865 2023-01-22 14:42:08.330794: step: 1276/531, loss: 0.013720821589231491 2023-01-22 14:42:09.388147: step: 1280/531, loss: 0.015734847635030746 2023-01-22 14:42:10.432504: step: 1284/531, loss: 0.010630029253661633 2023-01-22 14:42:11.512428: step: 1288/531, loss: 0.011186394840478897 2023-01-22 14:42:12.591774: step: 1292/531, loss: 0.04478336125612259 2023-01-22 14:42:13.672210: step: 1296/531, loss: 0.011511689983308315 2023-01-22 14:42:14.746462: step: 1300/531, loss: 0.024349957704544067 2023-01-22 14:42:15.797958: step: 1304/531, loss: 0.027117760851979256 2023-01-22 14:42:16.878552: step: 1308/531, loss: 0.002493282314389944 2023-01-22 14:42:17.945383: step: 1312/531, loss: 0.02797669731080532 2023-01-22 14:42:18.999848: step: 1316/531, loss: 0.01398746483027935 2023-01-22 14:42:20.057514: step: 1320/531, loss: 0.01459596399217844 2023-01-22 14:42:21.127676: step: 1324/531, loss: 0.0104651665315032 2023-01-22 14:42:22.183385: step: 1328/531, loss: 0.01089832466095686 2023-01-22 14:42:23.239828: step: 1332/531, loss: 0.011678804643452168 2023-01-22 14:42:24.301372: step: 1336/531, loss: 0.020437197759747505 2023-01-22 14:42:25.369680: step: 1340/531, loss: 0.006575725506991148 2023-01-22 14:42:26.443102: step: 1344/531, loss: 0.012712560594081879 2023-01-22 14:42:27.529690: step: 1348/531, loss: 0.00724458135664463 2023-01-22 14:42:28.603021: step: 1352/531, loss: 0.058703940361738205 2023-01-22 14:42:29.680613: step: 1356/531, loss: 0.006272586528211832 2023-01-22 14:42:30.739841: step: 1360/531, loss: 0.0061026751063764095 2023-01-22 14:42:31.790920: step: 1364/531, loss: 0.007148159202188253 2023-01-22 14:42:32.869314: step: 1368/531, loss: 0.040638335049152374 2023-01-22 14:42:33.922673: step: 1372/531, loss: 0.03636328503489494 2023-01-22 14:42:34.978371: step: 1376/531, loss: 0.007570963818579912 2023-01-22 14:42:36.060376: step: 1380/531, loss: 0.016862431541085243 2023-01-22 14:42:37.148386: step: 1384/531, loss: 0.009899328462779522 2023-01-22 14:42:38.218025: step: 1388/531, loss: 0.012377752922475338 2023-01-22 14:42:39.259290: step: 1392/531, loss: 0.049109891057014465 2023-01-22 14:42:40.320100: step: 1396/531, loss: 0.005925807170569897 2023-01-22 14:42:41.378906: step: 1400/531, loss: 0.009235509671270847 2023-01-22 14:42:42.444348: step: 1404/531, loss: 0.011829568073153496 2023-01-22 14:42:43.518780: step: 1408/531, loss: 0.006862274371087551 2023-01-22 14:42:44.566493: step: 1412/531, loss: 0.05027484893798828 2023-01-22 14:42:45.635552: step: 1416/531, loss: 0.033290036022663116 2023-01-22 14:42:46.712033: step: 1420/531, loss: 0.006521621719002724 2023-01-22 14:42:47.770336: step: 1424/531, loss: 0.03325602412223816 2023-01-22 14:42:48.826346: step: 1428/531, loss: 0.010387971065938473 2023-01-22 14:42:49.881591: step: 1432/531, loss: 0.0127054862678051 2023-01-22 14:42:50.937963: step: 1436/531, loss: 0.011370796710252762 2023-01-22 14:42:52.005295: step: 1440/531, loss: 0.03601215407252312 2023-01-22 14:42:53.054751: step: 1444/531, loss: 0.03154682368040085 2023-01-22 14:42:54.135753: step: 1448/531, loss: 0.005829459987580776 2023-01-22 14:42:55.183559: step: 1452/531, loss: 0.07624151557683945 2023-01-22 14:42:56.252447: step: 1456/531, loss: 0.01008535921573639 2023-01-22 14:42:57.310092: step: 1460/531, loss: 0.007337359711527824 2023-01-22 14:42:58.363997: step: 1464/531, loss: 0.0547441802918911 2023-01-22 14:42:59.425398: step: 1468/531, loss: 0.008347827941179276 2023-01-22 14:43:00.480875: step: 1472/531, loss: 0.005240162368863821 2023-01-22 14:43:01.552707: step: 1476/531, loss: 0.02310614287853241 2023-01-22 14:43:02.623949: step: 1480/531, loss: 0.004760191310197115 2023-01-22 14:43:03.694944: step: 1484/531, loss: 0.008953658863902092 2023-01-22 14:43:04.765134: step: 1488/531, loss: 0.025950385257601738 2023-01-22 14:43:05.829368: step: 1492/531, loss: 0.007313200272619724 2023-01-22 14:43:06.906824: step: 1496/531, loss: 0.007858707569539547 2023-01-22 14:43:07.975114: step: 1500/531, loss: 0.037678711116313934 2023-01-22 14:43:09.023124: step: 1504/531, loss: 0.008985237218439579 2023-01-22 14:43:10.102522: step: 1508/531, loss: 0.034513480961322784 2023-01-22 14:43:11.174770: step: 1512/531, loss: 0.008745499886572361 2023-01-22 14:43:12.241492: step: 1516/531, loss: 0.010591990314424038 2023-01-22 14:43:13.308295: step: 1520/531, loss: 0.008706007152795792 2023-01-22 14:43:14.365819: step: 1524/531, loss: 0.018371237441897392 2023-01-22 14:43:15.425374: step: 1528/531, loss: 0.012669135816395283 2023-01-22 14:43:16.483321: step: 1532/531, loss: 0.0063010770827531815 2023-01-22 14:43:17.550785: step: 1536/531, loss: 0.043510716408491135 2023-01-22 14:43:18.615931: step: 1540/531, loss: 0.08390334248542786 2023-01-22 14:43:19.669924: step: 1544/531, loss: 0.01373217161744833 2023-01-22 14:43:20.716923: step: 1548/531, loss: 7.701742288190871e-05 2023-01-22 14:43:21.776097: step: 1552/531, loss: 0.008855289779603481 2023-01-22 14:43:22.830379: step: 1556/531, loss: 0.009088132530450821 2023-01-22 14:43:23.903587: step: 1560/531, loss: 0.010419655591249466 2023-01-22 14:43:24.982476: step: 1564/531, loss: 0.012826536782085896 2023-01-22 14:43:26.022644: step: 1568/531, loss: 0.01423854473978281 2023-01-22 14:43:27.069954: step: 1572/531, loss: 0.011211882345378399 2023-01-22 14:43:28.129684: step: 1576/531, loss: 0.024514595046639442 2023-01-22 14:43:29.190306: step: 1580/531, loss: 0.004648515954613686 2023-01-22 14:43:30.226698: step: 1584/531, loss: 0.03818988427519798 2023-01-22 14:43:31.293642: step: 1588/531, loss: 0.03093668259680271 2023-01-22 14:43:32.365506: step: 1592/531, loss: 0.009916197508573532 2023-01-22 14:43:33.433867: step: 1596/531, loss: 0.04565809667110443 2023-01-22 14:43:34.482883: step: 1600/531, loss: 0.009167956188321114 2023-01-22 14:43:35.535552: step: 1604/531, loss: 0.013539629988372326 2023-01-22 14:43:36.599898: step: 1608/531, loss: 0.02205510064959526 2023-01-22 14:43:37.687788: step: 1612/531, loss: 0.007033591158688068 2023-01-22 14:43:38.735215: step: 1616/531, loss: 0.005123315379023552 2023-01-22 14:43:39.801318: step: 1620/531, loss: 0.03132849931716919 2023-01-22 14:43:40.865432: step: 1624/531, loss: 0.007058056071400642 2023-01-22 14:43:41.938756: step: 1628/531, loss: 0.012796028517186642 2023-01-22 14:43:43.000697: step: 1632/531, loss: 0.009384261444211006 2023-01-22 14:43:44.063971: step: 1636/531, loss: 0.04952043294906616 2023-01-22 14:43:45.120050: step: 1640/531, loss: 0.05762968584895134 2023-01-22 14:43:46.183892: step: 1644/531, loss: 0.04548857361078262 2023-01-22 14:43:47.233862: step: 1648/531, loss: 0.010962730273604393 2023-01-22 14:43:48.292618: step: 1652/531, loss: 0.006005888804793358 2023-01-22 14:43:49.345898: step: 1656/531, loss: 0.0018209691625088453 2023-01-22 14:43:50.414308: step: 1660/531, loss: 0.013071589171886444 2023-01-22 14:43:51.465457: step: 1664/531, loss: 0.015822121873497963 2023-01-22 14:43:52.518429: step: 1668/531, loss: 0.006638388615101576 2023-01-22 14:43:53.579163: step: 1672/531, loss: 0.01161070354282856 2023-01-22 14:43:54.620216: step: 1676/531, loss: 0.0159497931599617 2023-01-22 14:43:55.680255: step: 1680/531, loss: 0.012878895737230778 2023-01-22 14:43:56.740042: step: 1684/531, loss: 0.014363139867782593 2023-01-22 14:43:57.807228: step: 1688/531, loss: 0.007820959202945232 2023-01-22 14:43:58.890188: step: 1692/531, loss: 0.012973114848136902 2023-01-22 14:43:59.973714: step: 1696/531, loss: 0.041471242904663086 2023-01-22 14:44:01.033597: step: 1700/531, loss: 0.023479929193854332 2023-01-22 14:44:02.098148: step: 1704/531, loss: 0.007520216982811689 2023-01-22 14:44:03.168394: step: 1708/531, loss: 0.001234938157722354 2023-01-22 14:44:04.219362: step: 1712/531, loss: 0.0073923030868172646 2023-01-22 14:44:05.267119: step: 1716/531, loss: 0.006445819977670908 2023-01-22 14:44:06.330637: step: 1720/531, loss: 0.01131470873951912 2023-01-22 14:44:07.397504: step: 1724/531, loss: 0.028294507414102554 2023-01-22 14:44:08.438889: step: 1728/531, loss: 0.006392807699739933 2023-01-22 14:44:09.497643: step: 1732/531, loss: 0.003950975835323334 2023-01-22 14:44:10.557442: step: 1736/531, loss: 0.008274729363620281 2023-01-22 14:44:11.623328: step: 1740/531, loss: 0.07794395089149475 2023-01-22 14:44:12.686291: step: 1744/531, loss: 0.005518303252756596 2023-01-22 14:44:13.744250: step: 1748/531, loss: 0.031895626336336136 2023-01-22 14:44:14.843849: step: 1752/531, loss: 0.006316723767668009 2023-01-22 14:44:15.898998: step: 1756/531, loss: 0.007330481894314289 2023-01-22 14:44:16.969290: step: 1760/531, loss: 0.034529659897089005 2023-01-22 14:44:18.042612: step: 1764/531, loss: 0.0036982884630560875 2023-01-22 14:44:19.099898: step: 1768/531, loss: 0.0050541311502456665 2023-01-22 14:44:20.145275: step: 1772/531, loss: 0.007315453141927719 2023-01-22 14:44:21.206459: step: 1776/531, loss: 0.023043112829327583 2023-01-22 14:44:22.254537: step: 1780/531, loss: 0.014776111580431461 2023-01-22 14:44:23.333915: step: 1784/531, loss: 0.012547774240374565 2023-01-22 14:44:24.398333: step: 1788/531, loss: 0.04544464498758316 2023-01-22 14:44:25.448396: step: 1792/531, loss: 0.008483555167913437 2023-01-22 14:44:26.510914: step: 1796/531, loss: 0.01034250296652317 2023-01-22 14:44:27.580474: step: 1800/531, loss: 0.0055771032348275185 2023-01-22 14:44:28.656571: step: 1804/531, loss: 0.033846691250801086 2023-01-22 14:44:29.710682: step: 1808/531, loss: 0.007275803480297327 2023-01-22 14:44:30.770629: step: 1812/531, loss: 0.013520680367946625 2023-01-22 14:44:31.843035: step: 1816/531, loss: 0.016886606812477112 2023-01-22 14:44:32.907565: step: 1820/531, loss: 0.041414789855480194 2023-01-22 14:44:33.963414: step: 1824/531, loss: 0.046005189418792725 2023-01-22 14:44:35.017161: step: 1828/531, loss: 0.00952172838151455 2023-01-22 14:44:36.080506: step: 1832/531, loss: 0.035623155534267426 2023-01-22 14:44:37.155182: step: 1836/531, loss: 0.0112422164529562 2023-01-22 14:44:38.232452: step: 1840/531, loss: 0.006985026877373457 2023-01-22 14:44:39.289605: step: 1844/531, loss: 0.04322997108101845 2023-01-22 14:44:40.333126: step: 1848/531, loss: 0.009377659298479557 2023-01-22 14:44:41.387164: step: 1852/531, loss: 0.011422567069530487 2023-01-22 14:44:42.457659: step: 1856/531, loss: 0.043231479823589325 2023-01-22 14:44:43.509315: step: 1860/531, loss: 0.01586301252245903 2023-01-22 14:44:44.574717: step: 1864/531, loss: 0.009389316663146019 2023-01-22 14:44:45.647914: step: 1868/531, loss: 0.008753849193453789 2023-01-22 14:44:46.709025: step: 1872/531, loss: 0.009196775034070015 2023-01-22 14:44:47.772118: step: 1876/531, loss: 0.020064661279320717 2023-01-22 14:44:48.817643: step: 1880/531, loss: 0.01364896260201931 2023-01-22 14:44:49.872899: step: 1884/531, loss: 0.009153665974736214 2023-01-22 14:44:50.932247: step: 1888/531, loss: 0.005034239497035742 2023-01-22 14:44:51.988580: step: 1892/531, loss: 0.005006608087569475 2023-01-22 14:44:53.065866: step: 1896/531, loss: 0.008844244293868542 2023-01-22 14:44:54.129414: step: 1900/531, loss: 0.011331385001540184 2023-01-22 14:44:55.199638: step: 1904/531, loss: 0.038331806659698486 2023-01-22 14:44:56.276854: step: 1908/531, loss: 0.005373641848564148 2023-01-22 14:44:57.342299: step: 1912/531, loss: 0.007340161595493555 2023-01-22 14:44:58.402212: step: 1916/531, loss: 0.03102702647447586 2023-01-22 14:44:59.457221: step: 1920/531, loss: 0.05088759586215019 2023-01-22 14:45:00.516061: step: 1924/531, loss: 0.006846578326076269 2023-01-22 14:45:01.581746: step: 1928/531, loss: 0.011807342059910297 2023-01-22 14:45:02.656869: step: 1932/531, loss: 0.030191076919436455 2023-01-22 14:45:03.724925: step: 1936/531, loss: 0.007880610413849354 2023-01-22 14:45:04.791007: step: 1940/531, loss: 0.007350956555455923 2023-01-22 14:45:05.865337: step: 1944/531, loss: 0.031131573021411896 2023-01-22 14:45:06.933830: step: 1948/531, loss: 0.07715418934822083 2023-01-22 14:45:07.989405: step: 1952/531, loss: 0.03203938901424408 2023-01-22 14:45:09.064773: step: 1956/531, loss: 0.007324943318963051 2023-01-22 14:45:10.130776: step: 1960/531, loss: 0.010838370770215988 2023-01-22 14:45:11.197803: step: 1964/531, loss: 0.03717128932476044 2023-01-22 14:45:12.272907: step: 1968/531, loss: 0.0065204789862036705 2023-01-22 14:45:13.336138: step: 1972/531, loss: 0.012113319709897041 2023-01-22 14:45:14.398625: step: 1976/531, loss: 0.02180514857172966 2023-01-22 14:45:15.463463: step: 1980/531, loss: 0.005178001243621111 2023-01-22 14:45:16.519271: step: 1984/531, loss: 0.010195231065154076 2023-01-22 14:45:17.577083: step: 1988/531, loss: 0.007839050143957138 2023-01-22 14:45:18.668955: step: 1992/531, loss: 0.009247769601643085 2023-01-22 14:45:19.734008: step: 1996/531, loss: 0.012342852540314198 2023-01-22 14:45:20.788971: step: 2000/531, loss: 0.009161161258816719 2023-01-22 14:45:21.876849: step: 2004/531, loss: 0.0052819461561739445 2023-01-22 14:45:22.949943: step: 2008/531, loss: 0.007459281012415886 2023-01-22 14:45:24.010363: step: 2012/531, loss: 0.011270435526967049 2023-01-22 14:45:25.066679: step: 2016/531, loss: 0.030428921803832054 2023-01-22 14:45:26.136924: step: 2020/531, loss: 0.014814364723861217 2023-01-22 14:45:27.208770: step: 2024/531, loss: 0.027404872700572014 2023-01-22 14:45:28.271046: step: 2028/531, loss: 0.01005913782864809 2023-01-22 14:45:29.342182: step: 2032/531, loss: 0.03193280100822449 2023-01-22 14:45:30.412059: step: 2036/531, loss: 0.018321385607123375 2023-01-22 14:45:31.472374: step: 2040/531, loss: 0.019345005974173546 2023-01-22 14:45:32.535125: step: 2044/531, loss: 5.868716471013613e-05 2023-01-22 14:45:33.589226: step: 2048/531, loss: 0.00970078818500042 2023-01-22 14:45:34.661139: step: 2052/531, loss: 0.011111809872090816 2023-01-22 14:45:35.721472: step: 2056/531, loss: 0.03218020871281624 2023-01-22 14:45:36.775396: step: 2060/531, loss: 0.021760791540145874 2023-01-22 14:45:37.824770: step: 2064/531, loss: 0.04120469093322754 2023-01-22 14:45:38.893739: step: 2068/531, loss: 0.010430751368403435 2023-01-22 14:45:39.940606: step: 2072/531, loss: 0.026210544630885124 2023-01-22 14:45:41.017571: step: 2076/531, loss: 0.007317809853702784 2023-01-22 14:45:42.110542: step: 2080/531, loss: 0.015261203050613403 2023-01-22 14:45:43.181217: step: 2084/531, loss: 0.013149858452379704 2023-01-22 14:45:44.237322: step: 2088/531, loss: 0.0021271780133247375 2023-01-22 14:45:45.308458: step: 2092/531, loss: 0.009499236941337585 2023-01-22 14:45:46.358221: step: 2096/531, loss: 0.007625204510986805 2023-01-22 14:45:47.443762: step: 2100/531, loss: 0.013872173614799976 2023-01-22 14:45:48.498525: step: 2104/531, loss: 0.006319490727037191 2023-01-22 14:45:49.588932: step: 2108/531, loss: 0.005811006762087345 2023-01-22 14:45:50.672347: step: 2112/531, loss: 0.019926337525248528 2023-01-22 14:45:51.726037: step: 2116/531, loss: 0.007904676720499992 2023-01-22 14:45:52.780467: step: 2120/531, loss: 0.008954621851444244 2023-01-22 14:45:53.858900: step: 2124/531, loss: 0.034259598702192307 ================================================== Loss: 0.022 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34594836400818, 'r': 0.3203953598484849, 'f1': 0.3326819075712881}, 'combined': 0.24513403715779125, 'stategy': 1, 'epoch': 0} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3254065792700504, 'r': 0.2763606417807919, 'f1': 0.2988849107790312}, 'combined': 0.18715223385229057, 'stategy': 1, 'epoch': 0} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3222139154704944, 'r': 0.348504614455753, 'f1': 0.3348439960222093}, 'combined': 0.24672715496373318, 'stategy': 1, 'epoch': 0} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3480621490304492, 'r': 0.30067045876570814, 'f1': 0.3226352576402229}, 'combined': 0.19994297656577192, 'stategy': 1, 'epoch': 0} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.295523598820059, 'r': 0.3168327008222644, 'f1': 0.3058073870573871}, 'combined': 0.22533175888439047, 'stategy': 1, 'epoch': 0} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3484385152540001, 'r': 0.2857447860175209, 'f1': 0.3139927802636643}, 'combined': 0.20829224037292582, 'stategy': 1, 'epoch': 0} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 0} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37037037037037035, 'r': 0.43478260869565216, 'f1': 0.39999999999999997}, 'combined': 0.19999999999999998, 'stategy': 1, 'epoch': 0} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'stategy': 1, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34594836400818, 'r': 0.3203953598484849, 'f1': 0.3326819075712881}, 'combined': 0.24513403715779125, 'stategy': 1, 'epoch': 0} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3254065792700504, 'r': 0.2763606417807919, 'f1': 0.2988849107790312}, 'combined': 0.18715223385229057, 'stategy': 1, 'epoch': 0} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3222139154704944, 'r': 0.348504614455753, 'f1': 0.3348439960222093}, 'combined': 0.24672715496373318, 'stategy': 1, 'epoch': 0} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3480621490304492, 'r': 0.30067045876570814, 'f1': 0.3226352576402229}, 'combined': 0.19994297656577192, 'stategy': 1, 'epoch': 0} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37037037037037035, 'r': 0.43478260869565216, 'f1': 0.39999999999999997}, 'combined': 0.19999999999999998, 'stategy': 1, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.295523598820059, 'r': 0.3168327008222644, 'f1': 0.3058073870573871}, 'combined': 0.22533175888439047, 'stategy': 1, 'epoch': 0} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3484385152540001, 'r': 0.2857447860175209, 'f1': 0.3139927802636643}, 'combined': 0.20829224037292582, 'stategy': 1, 'epoch': 0} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'stategy': 1, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 14:48:49.340330: step: 4/531, loss: 0.09178076684474945 2023-01-22 14:48:50.388775: step: 8/531, loss: 0.009556343778967857 2023-01-22 14:48:51.429313: step: 12/531, loss: 0.0329352468252182 2023-01-22 14:48:52.498034: step: 16/531, loss: 0.019852932542562485 2023-01-22 14:48:53.548339: step: 20/531, loss: 0.023449081927537918 2023-01-22 14:48:54.603571: step: 24/531, loss: 0.018652426078915596 2023-01-22 14:48:55.667632: step: 28/531, loss: 0.014825774356722832 2023-01-22 14:48:56.724086: step: 32/531, loss: 0.013731390237808228 2023-01-22 14:48:57.783459: step: 36/531, loss: 0.023465080186724663 2023-01-22 14:48:58.823991: step: 40/531, loss: 0.0033914686646312475 2023-01-22 14:48:59.867810: step: 44/531, loss: 0.03890390321612358 2023-01-22 14:49:00.916437: step: 48/531, loss: 0.012838237918913364 2023-01-22 14:49:01.977731: step: 52/531, loss: 0.026646077632904053 2023-01-22 14:49:03.024010: step: 56/531, loss: 0.012675140984356403 2023-01-22 14:49:04.098628: step: 60/531, loss: 0.007770244963467121 2023-01-22 14:49:05.147814: step: 64/531, loss: 0.017953475937247276 2023-01-22 14:49:06.186236: step: 68/531, loss: 0.011134296655654907 2023-01-22 14:49:07.230694: step: 72/531, loss: 0.00889762956649065 2023-01-22 14:49:08.303134: step: 76/531, loss: 0.0445646308362484 2023-01-22 14:49:09.341755: step: 80/531, loss: 0.00789334811270237 2023-01-22 14:49:10.402839: step: 84/531, loss: 0.005396140739321709 2023-01-22 14:49:11.454623: step: 88/531, loss: 0.009716407395899296 2023-01-22 14:49:12.504987: step: 92/531, loss: 0.010318266227841377 2023-01-22 14:49:13.561479: step: 96/531, loss: 0.05564114451408386 2023-01-22 14:49:14.619468: step: 100/531, loss: 0.014927938580513 2023-01-22 14:49:15.681676: step: 104/531, loss: 0.03708529844880104 2023-01-22 14:49:16.739097: step: 108/531, loss: 0.015157019719481468 2023-01-22 14:49:17.796228: step: 112/531, loss: 0.01258289534598589 2023-01-22 14:49:18.860433: step: 116/531, loss: 0.018509941175580025 2023-01-22 14:49:19.924297: step: 120/531, loss: 0.07344067841768265 2023-01-22 14:49:20.966879: step: 124/531, loss: 0.005568209104239941 2023-01-22 14:49:22.049638: step: 128/531, loss: 0.026432767510414124 2023-01-22 14:49:23.116468: step: 132/531, loss: 0.02401437796652317 2023-01-22 14:49:24.184117: step: 136/531, loss: 0.009684343822300434 2023-01-22 14:49:25.239871: step: 140/531, loss: 0.012389306910336018 2023-01-22 14:49:26.286576: step: 144/531, loss: 0.010250166058540344 2023-01-22 14:49:27.345576: step: 148/531, loss: 0.04310257360339165 2023-01-22 14:49:28.406910: step: 152/531, loss: 0.021831609308719635 2023-01-22 14:49:29.453363: step: 156/531, loss: 0.009862443432211876 2023-01-22 14:49:30.521145: step: 160/531, loss: 0.027956031262874603 2023-01-22 14:49:31.574050: step: 164/531, loss: 0.048135023564100266 2023-01-22 14:49:32.628638: step: 168/531, loss: 0.019551318138837814 2023-01-22 14:49:33.683796: step: 172/531, loss: 0.01896512135863304 2023-01-22 14:49:34.735379: step: 176/531, loss: 0.013906910084187984 2023-01-22 14:49:35.771624: step: 180/531, loss: 0.022282781079411507 2023-01-22 14:49:36.832632: step: 184/531, loss: 0.008715187199413776 2023-01-22 14:49:37.884149: step: 188/531, loss: 0.0077690239995718 2023-01-22 14:49:38.929833: step: 192/531, loss: 0.004787336103618145 2023-01-22 14:49:39.987054: step: 196/531, loss: 0.041909441351890564 2023-01-22 14:49:41.043411: step: 200/531, loss: 0.009868372231721878 2023-01-22 14:49:42.111092: step: 204/531, loss: 0.011924352496862411 2023-01-22 14:49:43.172968: step: 208/531, loss: 0.003855706425383687 2023-01-22 14:49:44.229924: step: 212/531, loss: 0.009761754423379898 2023-01-22 14:49:45.302114: step: 216/531, loss: 0.0067886305041611195 2023-01-22 14:49:46.363985: step: 220/531, loss: 0.007311253808438778 2023-01-22 14:49:47.429200: step: 224/531, loss: 0.0353132002055645 2023-01-22 14:49:48.491059: step: 228/531, loss: 0.0034550772979855537 2023-01-22 14:49:49.552515: step: 232/531, loss: 0.032790686935186386 2023-01-22 14:49:50.609267: step: 236/531, loss: 0.005856727249920368 2023-01-22 14:49:51.659826: step: 240/531, loss: 0.0110018290579319 2023-01-22 14:49:52.711941: step: 244/531, loss: 0.024778563529253006 2023-01-22 14:49:53.779128: step: 248/531, loss: 0.008650518953800201 2023-01-22 14:49:54.825419: step: 252/531, loss: 0.004926738329231739 2023-01-22 14:49:55.881013: step: 256/531, loss: 0.012716108001768589 2023-01-22 14:49:56.936543: step: 260/531, loss: 0.006069564260542393 2023-01-22 14:49:58.006431: step: 264/531, loss: 0.005527488887310028 2023-01-22 14:49:59.080370: step: 268/531, loss: 0.008695040829479694 2023-01-22 14:50:00.135285: step: 272/531, loss: 0.0069482941180467606 2023-01-22 14:50:01.211863: step: 276/531, loss: 0.011258721351623535 2023-01-22 14:50:02.288932: step: 280/531, loss: 0.0829072967171669 2023-01-22 14:50:03.373972: step: 284/531, loss: 0.008146488107740879 2023-01-22 14:50:04.435469: step: 288/531, loss: 0.007282691076397896 2023-01-22 14:50:05.504130: step: 292/531, loss: 0.016103364527225494 2023-01-22 14:50:06.591277: step: 296/531, loss: 0.005875722970813513 2023-01-22 14:50:07.650601: step: 300/531, loss: 0.0035357805900275707 2023-01-22 14:50:08.708300: step: 304/531, loss: 0.0034886773210018873 2023-01-22 14:50:09.761275: step: 308/531, loss: 0.016442179679870605 2023-01-22 14:50:10.811775: step: 312/531, loss: 0.019149813801050186 2023-01-22 14:50:11.889141: step: 316/531, loss: 0.022424906492233276 2023-01-22 14:50:12.940834: step: 320/531, loss: 0.010963410139083862 2023-01-22 14:50:14.007291: step: 324/531, loss: 0.006567399483174086 2023-01-22 14:50:15.058115: step: 328/531, loss: 0.0008040042594075203 2023-01-22 14:50:16.112374: step: 332/531, loss: 0.01717044971883297 2023-01-22 14:50:17.184173: step: 336/531, loss: 0.012676534242928028 2023-01-22 14:50:18.230757: step: 340/531, loss: 0.006378055550158024 2023-01-22 14:50:19.274700: step: 344/531, loss: 0.012150856666266918 2023-01-22 14:50:20.321754: step: 348/531, loss: 0.06427229940891266 2023-01-22 14:50:21.390371: step: 352/531, loss: 0.01812794804573059 2023-01-22 14:50:22.450183: step: 356/531, loss: 0.012500600889325142 2023-01-22 14:50:23.491639: step: 360/531, loss: 0.0054733785800635815 2023-01-22 14:50:24.552828: step: 364/531, loss: 0.0012258852366358042 2023-01-22 14:50:25.615565: step: 368/531, loss: 0.0026976759545505047 2023-01-22 14:50:26.678624: step: 372/531, loss: 0.020970840007066727 2023-01-22 14:50:27.732670: step: 376/531, loss: 0.02774849906563759 2023-01-22 14:50:28.787876: step: 380/531, loss: 0.03277068957686424 2023-01-22 14:50:29.843137: step: 384/531, loss: 0.01633204147219658 2023-01-22 14:50:30.887366: step: 388/531, loss: 0.007651320658624172 2023-01-22 14:50:31.969403: step: 392/531, loss: 0.009806823916733265 2023-01-22 14:50:33.033353: step: 396/531, loss: 0.006372191943228245 2023-01-22 14:50:34.086646: step: 400/531, loss: 0.015485931187868118 2023-01-22 14:50:35.149412: step: 404/531, loss: 0.005884003359824419 2023-01-22 14:50:36.220328: step: 408/531, loss: 0.004996773321181536 2023-01-22 14:50:37.278618: step: 412/531, loss: 0.03665619716048241 2023-01-22 14:50:38.336081: step: 416/531, loss: 0.0 2023-01-22 14:50:39.375344: step: 420/531, loss: 0.01944882422685623 2023-01-22 14:50:40.435405: step: 424/531, loss: 0.0429818257689476 2023-01-22 14:50:41.488129: step: 428/531, loss: 0.02300003357231617 2023-01-22 14:50:42.589565: step: 432/531, loss: 0.007280625402927399 2023-01-22 14:50:43.639699: step: 436/531, loss: 0.007473290432244539 2023-01-22 14:50:44.694730: step: 440/531, loss: 0.007171986158937216 2023-01-22 14:50:45.753898: step: 444/531, loss: 0.04028509929776192 2023-01-22 14:50:46.814930: step: 448/531, loss: 0.011428830213844776 2023-01-22 14:50:47.865494: step: 452/531, loss: 0.01570049859583378 2023-01-22 14:50:48.925922: step: 456/531, loss: 0.012460127472877502 2023-01-22 14:50:49.972396: step: 460/531, loss: 0.0266293715685606 2023-01-22 14:50:51.037626: step: 464/531, loss: 0.008816512301564217 2023-01-22 14:50:52.084350: step: 468/531, loss: 0.006196875590831041 2023-01-22 14:50:53.140967: step: 472/531, loss: 0.006693840026855469 2023-01-22 14:50:54.235970: step: 476/531, loss: 0.006907900795340538 2023-01-22 14:50:55.295224: step: 480/531, loss: 0.013373376801609993 2023-01-22 14:50:56.356709: step: 484/531, loss: 0.009013530798256397 2023-01-22 14:50:57.401941: step: 488/531, loss: 0.00850666407495737 2023-01-22 14:50:58.447056: step: 492/531, loss: 0.001762406900525093 2023-01-22 14:50:59.521638: step: 496/531, loss: 0.03610870614647865 2023-01-22 14:51:00.580812: step: 500/531, loss: 0.007061814423650503 2023-01-22 14:51:01.653334: step: 504/531, loss: 0.006155211012810469 2023-01-22 14:51:02.726487: step: 508/531, loss: 0.01041685976088047 2023-01-22 14:51:03.773213: step: 512/531, loss: 0.011248735710978508 2023-01-22 14:51:04.837886: step: 516/531, loss: 0.02531476691365242 2023-01-22 14:51:05.914470: step: 520/531, loss: 0.01878204382956028 2023-01-22 14:51:06.994965: step: 524/531, loss: 0.007710628677159548 2023-01-22 14:51:08.053970: step: 528/531, loss: 0.014674684964120388 2023-01-22 14:51:09.113897: step: 532/531, loss: 0.02699761837720871 2023-01-22 14:51:10.162194: step: 536/531, loss: 0.0042434511706233025 2023-01-22 14:51:11.222323: step: 540/531, loss: 0.02224177122116089 2023-01-22 14:51:12.285556: step: 544/531, loss: 0.005734087899327278 2023-01-22 14:51:13.354223: step: 548/531, loss: 0.049987345933914185 2023-01-22 14:51:14.411386: step: 552/531, loss: 0.013814532198011875 2023-01-22 14:51:15.476708: step: 556/531, loss: 0.009399447590112686 2023-01-22 14:51:16.537096: step: 560/531, loss: 0.05782898887991905 2023-01-22 14:51:17.594862: step: 564/531, loss: 0.005294196307659149 2023-01-22 14:51:18.653719: step: 568/531, loss: 0.010895602405071259 2023-01-22 14:51:19.692324: step: 572/531, loss: 0.012185837142169476 2023-01-22 14:51:20.743128: step: 576/531, loss: 0.010583250783383846 2023-01-22 14:51:21.813310: step: 580/531, loss: 0.014942733570933342 2023-01-22 14:51:22.868743: step: 584/531, loss: 0.0027758022770285606 2023-01-22 14:51:23.915373: step: 588/531, loss: 0.012569941580295563 2023-01-22 14:51:24.981353: step: 592/531, loss: 0.03456798195838928 2023-01-22 14:51:26.042951: step: 596/531, loss: 0.02051612176001072 2023-01-22 14:51:27.105039: step: 600/531, loss: 0.009913983754813671 2023-01-22 14:51:28.165357: step: 604/531, loss: 0.011612809263169765 2023-01-22 14:51:29.232210: step: 608/531, loss: 0.005555942188948393 2023-01-22 14:51:30.304946: step: 612/531, loss: 0.010517431423068047 2023-01-22 14:51:31.368884: step: 616/531, loss: 0.0173844862729311 2023-01-22 14:51:32.428151: step: 620/531, loss: 0.0014835285255685449 2023-01-22 14:51:33.495093: step: 624/531, loss: 0.058384351432323456 2023-01-22 14:51:34.566820: step: 628/531, loss: 0.010259066708385944 2023-01-22 14:51:35.622522: step: 632/531, loss: 0.010645410977303982 2023-01-22 14:51:36.677912: step: 636/531, loss: 0.008815240114927292 2023-01-22 14:51:37.769938: step: 640/531, loss: 0.007068410515785217 2023-01-22 14:51:38.834874: step: 644/531, loss: 0.03986091539263725 2023-01-22 14:51:39.878920: step: 648/531, loss: 0.013599403202533722 2023-01-22 14:51:40.955102: step: 652/531, loss: 0.0025150806177407503 2023-01-22 14:51:42.028039: step: 656/531, loss: 0.010195252485573292 2023-01-22 14:51:43.097235: step: 660/531, loss: 0.016416076570749283 2023-01-22 14:51:44.165531: step: 664/531, loss: 0.006640026345849037 2023-01-22 14:51:45.217849: step: 668/531, loss: 0.050393760204315186 2023-01-22 14:51:46.275679: step: 672/531, loss: 0.009499446488916874 2023-01-22 14:51:47.344388: step: 676/531, loss: 0.051284126937389374 2023-01-22 14:51:48.390601: step: 680/531, loss: 0.023495344445109367 2023-01-22 14:51:49.460604: step: 684/531, loss: 0.0166424959897995 2023-01-22 14:51:50.512080: step: 688/531, loss: 0.002596989506855607 2023-01-22 14:51:51.581820: step: 692/531, loss: 0.03550850600004196 2023-01-22 14:51:52.658034: step: 696/531, loss: 0.004854866769164801 2023-01-22 14:51:53.716451: step: 700/531, loss: 0.021808704361319542 2023-01-22 14:51:54.775389: step: 704/531, loss: 0.013941183686256409 2023-01-22 14:51:55.832097: step: 708/531, loss: 0.09654286503791809 2023-01-22 14:51:56.897522: step: 712/531, loss: 0.009889055974781513 2023-01-22 14:51:57.935831: step: 716/531, loss: 0.0017241982277482748 2023-01-22 14:51:59.004264: step: 720/531, loss: 0.026191428303718567 2023-01-22 14:52:00.057938: step: 724/531, loss: 0.010890468955039978 2023-01-22 14:52:01.136264: step: 728/531, loss: 0.009511811658740044 2023-01-22 14:52:02.198747: step: 732/531, loss: 0.012885295785963535 2023-01-22 14:52:03.249315: step: 736/531, loss: 0.012562588788568974 2023-01-22 14:52:04.340357: step: 740/531, loss: 0.07264727354049683 2023-01-22 14:52:05.407869: step: 744/531, loss: 0.00932356994599104 2023-01-22 14:52:06.462356: step: 748/531, loss: 0.03984125331044197 2023-01-22 14:52:07.526604: step: 752/531, loss: 0.004348627291619778 2023-01-22 14:52:08.588545: step: 756/531, loss: 0.014298013411462307 2023-01-22 14:52:09.640118: step: 760/531, loss: 0.006649958435446024 2023-01-22 14:52:10.703211: step: 764/531, loss: 0.008323272690176964 2023-01-22 14:52:11.767359: step: 768/531, loss: 0.07428687810897827 2023-01-22 14:52:12.850252: step: 772/531, loss: 0.01691368781030178 2023-01-22 14:52:13.904051: step: 776/531, loss: 0.006733793765306473 2023-01-22 14:52:14.940860: step: 780/531, loss: 0.026237420737743378 2023-01-22 14:52:16.025572: step: 784/531, loss: 0.014186098240315914 2023-01-22 14:52:17.094753: step: 788/531, loss: 0.0085311159491539 2023-01-22 14:52:18.171614: step: 792/531, loss: 0.0057366034016013145 2023-01-22 14:52:19.227515: step: 796/531, loss: 0.0037600744981318712 2023-01-22 14:52:20.294283: step: 800/531, loss: 0.0023868680000305176 2023-01-22 14:52:21.338392: step: 804/531, loss: 0.047112684696912766 2023-01-22 14:52:22.405707: step: 808/531, loss: 0.00708191841840744 2023-01-22 14:52:23.467881: step: 812/531, loss: 0.013263104483485222 2023-01-22 14:52:24.527904: step: 816/531, loss: 0.030224021524190903 2023-01-22 14:52:25.570830: step: 820/531, loss: 0.00970078818500042 2023-01-22 14:52:26.621743: step: 824/531, loss: 0.009935368783771992 2023-01-22 14:52:27.689050: step: 828/531, loss: 0.01689828559756279 2023-01-22 14:52:28.756789: step: 832/531, loss: 0.014442265033721924 2023-01-22 14:52:29.818769: step: 836/531, loss: 0.011026089079678059 2023-01-22 14:52:30.885276: step: 840/531, loss: 0.014645881950855255 2023-01-22 14:52:31.938230: step: 844/531, loss: 0.006513155531138182 2023-01-22 14:52:32.995890: step: 848/531, loss: 0.010485423728823662 2023-01-22 14:52:34.058646: step: 852/531, loss: 0.007206744514405727 2023-01-22 14:52:35.121785: step: 856/531, loss: 0.005383828654885292 2023-01-22 14:52:36.191681: step: 860/531, loss: 0.051911529153585434 2023-01-22 14:52:37.256478: step: 864/531, loss: 0.005123678129166365 2023-01-22 14:52:38.306134: step: 868/531, loss: 0.010245547629892826 2023-01-22 14:52:39.361776: step: 872/531, loss: 0.006416060961782932 2023-01-22 14:52:40.425963: step: 876/531, loss: 0.01878443732857704 2023-01-22 14:52:41.472768: step: 880/531, loss: 0.012331430800259113 2023-01-22 14:52:42.543418: step: 884/531, loss: 0.0038081659004092216 2023-01-22 14:52:43.600223: step: 888/531, loss: 0.014018531888723373 2023-01-22 14:52:44.655627: step: 892/531, loss: 0.009234867990016937 2023-01-22 14:52:45.697661: step: 896/531, loss: 0.015506122261285782 2023-01-22 14:52:46.758548: step: 900/531, loss: 0.002232337836176157 2023-01-22 14:52:47.807164: step: 904/531, loss: 0.018577953800559044 2023-01-22 14:52:48.881283: step: 908/531, loss: 0.007903103716671467 2023-01-22 14:52:49.928755: step: 912/531, loss: 0.019865937530994415 2023-01-22 14:52:50.987238: step: 916/531, loss: 0.005732994992285967 2023-01-22 14:52:52.043939: step: 920/531, loss: 0.010244255885481834 2023-01-22 14:52:53.090232: step: 924/531, loss: 0.008030731230974197 2023-01-22 14:52:54.181743: step: 928/531, loss: 0.009935649111866951 2023-01-22 14:52:55.242380: step: 932/531, loss: 0.010212778113782406 2023-01-22 14:52:56.293808: step: 936/531, loss: 0.04242117702960968 2023-01-22 14:52:57.362582: step: 940/531, loss: 0.0030285066459327936 2023-01-22 14:52:58.436386: step: 944/531, loss: 0.03561278432607651 2023-01-22 14:52:59.509352: step: 948/531, loss: 0.02422773838043213 2023-01-22 14:53:00.577301: step: 952/531, loss: 0.005665940698236227 2023-01-22 14:53:01.636902: step: 956/531, loss: 0.02517211064696312 2023-01-22 14:53:02.704230: step: 960/531, loss: 0.012842093594372272 2023-01-22 14:53:03.756563: step: 964/531, loss: 0.07806464284658432 2023-01-22 14:53:04.816618: step: 968/531, loss: 0.00721712177619338 2023-01-22 14:53:05.869946: step: 972/531, loss: 0.00022359513968694955 2023-01-22 14:53:06.942657: step: 976/531, loss: 0.005565779749304056 2023-01-22 14:53:08.009418: step: 980/531, loss: 0.0502593107521534 2023-01-22 14:53:09.062819: step: 984/531, loss: 0.006101043429225683 2023-01-22 14:53:10.112963: step: 988/531, loss: 0.028726443648338318 2023-01-22 14:53:11.173901: step: 992/531, loss: 0.010455455631017685 2023-01-22 14:53:12.236262: step: 996/531, loss: 0.0028423184994608164 2023-01-22 14:53:13.292453: step: 1000/531, loss: 0.005535279866307974 2023-01-22 14:53:14.336817: step: 1004/531, loss: 0.007476668804883957 2023-01-22 14:53:15.382000: step: 1008/531, loss: 0.03956976160407066 2023-01-22 14:53:16.471903: step: 1012/531, loss: 0.01567956618964672 2023-01-22 14:53:17.530434: step: 1016/531, loss: 0.03967675939202309 2023-01-22 14:53:18.582995: step: 1020/531, loss: 0.006127448752522469 2023-01-22 14:53:19.657386: step: 1024/531, loss: 0.017828097566962242 2023-01-22 14:53:20.720159: step: 1028/531, loss: 0.006519016344100237 2023-01-22 14:53:21.779433: step: 1032/531, loss: 0.005050727631896734 2023-01-22 14:53:22.844426: step: 1036/531, loss: 0.005482138134539127 2023-01-22 14:53:23.904040: step: 1040/531, loss: 0.01060046162456274 2023-01-22 14:53:24.959286: step: 1044/531, loss: 0.003469890682026744 2023-01-22 14:53:26.027660: step: 1048/531, loss: 0.0057162027806043625 2023-01-22 14:53:27.083753: step: 1052/531, loss: 0.005085076671093702 2023-01-22 14:53:28.154678: step: 1056/531, loss: 0.010341119021177292 2023-01-22 14:53:29.208895: step: 1060/531, loss: 0.04512256011366844 2023-01-22 14:53:30.263072: step: 1064/531, loss: 0.027469689026474953 2023-01-22 14:53:31.352923: step: 1068/531, loss: 0.010330287739634514 2023-01-22 14:53:32.423898: step: 1072/531, loss: 0.004143772181123495 2023-01-22 14:53:33.481069: step: 1076/531, loss: 0.06753170490264893 2023-01-22 14:53:34.554315: step: 1080/531, loss: 0.0033784378319978714 2023-01-22 14:53:35.617447: step: 1084/531, loss: 0.004873470403254032 2023-01-22 14:53:36.672660: step: 1088/531, loss: 0.005720699671655893 2023-01-22 14:53:37.733103: step: 1092/531, loss: 0.0030614014249294996 2023-01-22 14:53:38.786794: step: 1096/531, loss: 0.028639938682317734 2023-01-22 14:53:39.845979: step: 1100/531, loss: 0.0073258415795862675 2023-01-22 14:53:40.922448: step: 1104/531, loss: 0.027203531935811043 2023-01-22 14:53:41.976842: step: 1108/531, loss: 0.014489049091935158 2023-01-22 14:53:43.051262: step: 1112/531, loss: 0.00576788792386651 2023-01-22 14:53:44.118673: step: 1116/531, loss: 0.009144771844148636 2023-01-22 14:53:45.175583: step: 1120/531, loss: 0.03240538015961647 2023-01-22 14:53:46.239080: step: 1124/531, loss: 0.023965157568454742 2023-01-22 14:53:47.304670: step: 1128/531, loss: 0.005787264090031385 2023-01-22 14:53:48.379013: step: 1132/531, loss: 0.008953070268034935 2023-01-22 14:53:49.458376: step: 1136/531, loss: 0.01851697452366352 2023-01-22 14:53:50.510965: step: 1140/531, loss: 0.013876278884708881 2023-01-22 14:53:51.595194: step: 1144/531, loss: 0.0022372829262167215 2023-01-22 14:53:52.662177: step: 1148/531, loss: 0.0014857390196993947 2023-01-22 14:53:53.718942: step: 1152/531, loss: 0.007147029507905245 2023-01-22 14:53:54.779392: step: 1156/531, loss: 0.0025060686748474836 2023-01-22 14:53:55.839687: step: 1160/531, loss: 0.013159961439669132 2023-01-22 14:53:56.898598: step: 1164/531, loss: 0.006286369636654854 2023-01-22 14:53:57.973335: step: 1168/531, loss: 0.016939081251621246 2023-01-22 14:53:59.031944: step: 1172/531, loss: 0.0035575907677412033 2023-01-22 14:54:00.099759: step: 1176/531, loss: 0.00465867156162858 2023-01-22 14:54:01.164884: step: 1180/531, loss: 0.005843263119459152 2023-01-22 14:54:02.228087: step: 1184/531, loss: 0.003680992405861616 2023-01-22 14:54:03.282796: step: 1188/531, loss: 0.0006899041472934186 2023-01-22 14:54:04.353671: step: 1192/531, loss: 0.0076921251602470875 2023-01-22 14:54:05.400009: step: 1196/531, loss: 0.0034816963598132133 2023-01-22 14:54:06.474130: step: 1200/531, loss: 0.01774633675813675 2023-01-22 14:54:07.534309: step: 1204/531, loss: 0.00540650449693203 2023-01-22 14:54:08.597603: step: 1208/531, loss: 0.012637587264180183 2023-01-22 14:54:09.641521: step: 1212/531, loss: 0.0011300697224214673 2023-01-22 14:54:10.693982: step: 1216/531, loss: 0.0027908890042454004 2023-01-22 14:54:11.763220: step: 1220/531, loss: 0.009684571996331215 2023-01-22 14:54:12.855571: step: 1224/531, loss: 0.02658742293715477 2023-01-22 14:54:13.905424: step: 1228/531, loss: 0.007562708109617233 2023-01-22 14:54:14.980618: step: 1232/531, loss: 0.012605461291968822 2023-01-22 14:54:16.034142: step: 1236/531, loss: 0.006218646187335253 2023-01-22 14:54:17.093809: step: 1240/531, loss: 0.0062874602153897285 2023-01-22 14:54:18.162858: step: 1244/531, loss: 0.0032117431983351707 2023-01-22 14:54:19.226910: step: 1248/531, loss: 0.017528463155031204 2023-01-22 14:54:20.300442: step: 1252/531, loss: 0.015302884392440319 2023-01-22 14:54:21.356648: step: 1256/531, loss: 0.014818764291703701 2023-01-22 14:54:22.422893: step: 1260/531, loss: 0.007478457409888506 2023-01-22 14:54:23.465281: step: 1264/531, loss: 0.007360471412539482 2023-01-22 14:54:24.520227: step: 1268/531, loss: 0.031239191070199013 2023-01-22 14:54:25.579482: step: 1272/531, loss: 0.005239270161837339 2023-01-22 14:54:26.639131: step: 1276/531, loss: 0.014330082572996616 2023-01-22 14:54:27.715075: step: 1280/531, loss: 0.03908878192305565 2023-01-22 14:54:28.773692: step: 1284/531, loss: 0.01593167893588543 2023-01-22 14:54:29.841235: step: 1288/531, loss: 0.0050966511480510235 2023-01-22 14:54:30.906249: step: 1292/531, loss: 0.011381132528185844 2023-01-22 14:54:31.974137: step: 1296/531, loss: 0.005269972607493401 2023-01-22 14:54:33.041214: step: 1300/531, loss: 0.0067209466360509396 2023-01-22 14:54:34.088944: step: 1304/531, loss: 0.008411402814090252 2023-01-22 14:54:35.142234: step: 1308/531, loss: 0.010598246939480305 2023-01-22 14:54:36.198117: step: 1312/531, loss: 0.0056084562093019485 2023-01-22 14:54:37.268793: step: 1316/531, loss: 0.002204101299867034 2023-01-22 14:54:38.354053: step: 1320/531, loss: 0.006579904817044735 2023-01-22 14:54:39.414762: step: 1324/531, loss: 0.010172891430556774 2023-01-22 14:54:40.478513: step: 1328/531, loss: 0.023190589621663094 2023-01-22 14:54:41.531706: step: 1332/531, loss: 0.01168964896351099 2023-01-22 14:54:42.603849: step: 1336/531, loss: 0.010293787345290184 2023-01-22 14:54:43.664420: step: 1340/531, loss: 0.008583199232816696 2023-01-22 14:54:44.728651: step: 1344/531, loss: 0.0009360854746773839 2023-01-22 14:54:45.806950: step: 1348/531, loss: 0.009190468117594719 2023-01-22 14:54:46.868885: step: 1352/531, loss: 0.034403879195451736 2023-01-22 14:54:47.934790: step: 1356/531, loss: 0.009490725584328175 2023-01-22 14:54:49.005080: step: 1360/531, loss: 0.004836771637201309 2023-01-22 14:54:50.074273: step: 1364/531, loss: 0.006449365522712469 2023-01-22 14:54:51.140453: step: 1368/531, loss: 0.002050477545708418 2023-01-22 14:54:52.191476: step: 1372/531, loss: 0.024750513955950737 2023-01-22 14:54:53.243418: step: 1376/531, loss: 0.0036240650806576014 2023-01-22 14:54:54.314196: step: 1380/531, loss: 0.0056877294555306435 2023-01-22 14:54:55.367006: step: 1384/531, loss: 0.008128160610795021 2023-01-22 14:54:56.430717: step: 1388/531, loss: 0.007719332817941904 2023-01-22 14:54:57.506763: step: 1392/531, loss: 0.0063159409910440445 2023-01-22 14:54:58.559072: step: 1396/531, loss: 0.020592328161001205 2023-01-22 14:54:59.618140: step: 1400/531, loss: 0.003002229379490018 2023-01-22 14:55:00.688640: step: 1404/531, loss: 0.018590757623314857 2023-01-22 14:55:01.760237: step: 1408/531, loss: 0.019040344282984734 2023-01-22 14:55:02.838308: step: 1412/531, loss: 0.023021532222628593 2023-01-22 14:55:03.891351: step: 1416/531, loss: 0.003237620461732149 2023-01-22 14:55:04.934599: step: 1420/531, loss: 0.017588037997484207 2023-01-22 14:55:06.020012: step: 1424/531, loss: 0.012304337695240974 2023-01-22 14:55:07.104505: step: 1428/531, loss: 0.003331138053908944 2023-01-22 14:55:08.173365: step: 1432/531, loss: 0.02969825640320778 2023-01-22 14:55:09.237039: step: 1436/531, loss: 0.006468979641795158 2023-01-22 14:55:10.305964: step: 1440/531, loss: 0.008388368412852287 2023-01-22 14:55:11.372925: step: 1444/531, loss: 0.019970454275608063 2023-01-22 14:55:12.443471: step: 1448/531, loss: 0.00469807768240571 2023-01-22 14:55:13.522474: step: 1452/531, loss: 0.002759417751803994 2023-01-22 14:55:14.592944: step: 1456/531, loss: 0.019516875967383385 2023-01-22 14:55:15.648866: step: 1460/531, loss: 0.005682252813130617 2023-01-22 14:55:16.700062: step: 1464/531, loss: 0.001431136392056942 2023-01-22 14:55:17.757461: step: 1468/531, loss: 0.0031800181604921818 2023-01-22 14:55:18.822100: step: 1472/531, loss: 0.023356551304459572 2023-01-22 14:55:19.898655: step: 1476/531, loss: 0.004850280005484819 2023-01-22 14:55:20.954278: step: 1480/531, loss: 0.011429929174482822 2023-01-22 14:55:22.003696: step: 1484/531, loss: 0.003373160259798169 2023-01-22 14:55:23.059566: step: 1488/531, loss: 0.0005578648997470737 2023-01-22 14:55:24.122422: step: 1492/531, loss: 0.005483557935804129 2023-01-22 14:55:25.187680: step: 1496/531, loss: 0.005559155717492104 2023-01-22 14:55:26.244784: step: 1500/531, loss: 0.006850066129118204 2023-01-22 14:55:27.335626: step: 1504/531, loss: 0.0032734123524278402 2023-01-22 14:55:28.423418: step: 1508/531, loss: 0.013615076430141926 2023-01-22 14:55:29.481226: step: 1512/531, loss: 0.004723368678241968 2023-01-22 14:55:30.570751: step: 1516/531, loss: 0.013113846071064472 2023-01-22 14:55:31.645379: step: 1520/531, loss: 0.005154415033757687 2023-01-22 14:55:32.709017: step: 1524/531, loss: 0.007819097489118576 2023-01-22 14:55:33.770738: step: 1528/531, loss: 0.007447626441717148 2023-01-22 14:55:34.818405: step: 1532/531, loss: 0.0016658528475090861 2023-01-22 14:55:35.885729: step: 1536/531, loss: 0.008843150921165943 2023-01-22 14:55:36.948417: step: 1540/531, loss: 0.0042173066176474094 2023-01-22 14:55:38.014374: step: 1544/531, loss: 0.0062405942007899284 2023-01-22 14:55:39.067514: step: 1548/531, loss: 0.005881103221327066 2023-01-22 14:55:40.146638: step: 1552/531, loss: 0.0062689525075256824 2023-01-22 14:55:41.218538: step: 1556/531, loss: 0.006860203109681606 2023-01-22 14:55:42.281897: step: 1560/531, loss: 0.005478202365338802 2023-01-22 14:55:43.327638: step: 1564/531, loss: 0.03872603923082352 2023-01-22 14:55:44.400977: step: 1568/531, loss: 0.00893507618457079 2023-01-22 14:55:45.467252: step: 1572/531, loss: 0.025152452290058136 2023-01-22 14:55:46.541585: step: 1576/531, loss: 0.013910738751292229 2023-01-22 14:55:47.621542: step: 1580/531, loss: 0.026610050350427628 2023-01-22 14:55:48.702949: step: 1584/531, loss: 0.006829569116234779 2023-01-22 14:55:49.773142: step: 1588/531, loss: 0.01031615398824215 2023-01-22 14:55:50.849044: step: 1592/531, loss: 0.0015475634718313813 2023-01-22 14:55:51.943586: step: 1596/531, loss: 0.019924283027648926 2023-01-22 14:55:53.018468: step: 1600/531, loss: 0.012449763715267181 2023-01-22 14:55:54.089110: step: 1604/531, loss: 0.009813033975660801 2023-01-22 14:55:55.149246: step: 1608/531, loss: 0.013363712467253208 2023-01-22 14:55:56.220571: step: 1612/531, loss: 0.0031043514609336853 2023-01-22 14:55:57.298685: step: 1616/531, loss: 0.009060146287083626 2023-01-22 14:55:58.358101: step: 1620/531, loss: 0.055927474051713943 2023-01-22 14:55:59.437771: step: 1624/531, loss: 0.008789319545030594 2023-01-22 14:56:00.497595: step: 1628/531, loss: 0.00854311604052782 2023-01-22 14:56:01.556528: step: 1632/531, loss: 0.01279479917138815 2023-01-22 14:56:02.629295: step: 1636/531, loss: 0.026169409975409508 2023-01-22 14:56:03.696120: step: 1640/531, loss: 0.01975897140800953 2023-01-22 14:56:04.798376: step: 1644/531, loss: 0.013997703790664673 2023-01-22 14:56:05.865188: step: 1648/531, loss: 0.038259588181972504 2023-01-22 14:56:06.949660: step: 1652/531, loss: 0.004551553633064032 2023-01-22 14:56:08.024050: step: 1656/531, loss: 0.005316711030900478 2023-01-22 14:56:09.094895: step: 1660/531, loss: 0.014941259287297726 2023-01-22 14:56:10.179310: step: 1664/531, loss: 0.0044372365809977055 2023-01-22 14:56:11.238385: step: 1668/531, loss: 0.031872279942035675 2023-01-22 14:56:12.341324: step: 1672/531, loss: 0.033701952546834946 2023-01-22 14:56:13.403249: step: 1676/531, loss: 0.024039575830101967 2023-01-22 14:56:14.458775: step: 1680/531, loss: 0.008942786604166031 2023-01-22 14:56:15.544596: step: 1684/531, loss: 0.009668517857789993 2023-01-22 14:56:16.606352: step: 1688/531, loss: 0.006605146918445826 2023-01-22 14:56:17.665276: step: 1692/531, loss: 0.004481645300984383 2023-01-22 14:56:18.730276: step: 1696/531, loss: 0.014689686708152294 2023-01-22 14:56:19.796384: step: 1700/531, loss: 0.011518360115587711 2023-01-22 14:56:20.859266: step: 1704/531, loss: 0.011369490064680576 2023-01-22 14:56:21.939451: step: 1708/531, loss: 0.009161842986941338 2023-01-22 14:56:23.017960: step: 1712/531, loss: 0.00941532664000988 2023-01-22 14:56:24.066719: step: 1716/531, loss: 0.027204032987356186 2023-01-22 14:56:25.127556: step: 1720/531, loss: 0.020403021946549416 2023-01-22 14:56:26.209744: step: 1724/531, loss: 0.026714880019426346 2023-01-22 14:56:27.286291: step: 1728/531, loss: 0.005099593661725521 2023-01-22 14:56:28.351804: step: 1732/531, loss: 0.010438046418130398 2023-01-22 14:56:29.419456: step: 1736/531, loss: 0.0015087584033608437 2023-01-22 14:56:30.491106: step: 1740/531, loss: 0.0062045142985880375 2023-01-22 14:56:31.542905: step: 1744/531, loss: 0.021082807332277298 2023-01-22 14:56:32.606996: step: 1748/531, loss: 0.02364220656454563 2023-01-22 14:56:33.668241: step: 1752/531, loss: 0.006717804353684187 2023-01-22 14:56:34.722784: step: 1756/531, loss: 0.006931005045771599 2023-01-22 14:56:35.796224: step: 1760/531, loss: 0.0064237043261528015 2023-01-22 14:56:36.878511: step: 1764/531, loss: 0.010398545302450657 2023-01-22 14:56:37.947338: step: 1768/531, loss: 0.01305343583226204 2023-01-22 14:56:38.999510: step: 1772/531, loss: 0.04983716085553169 2023-01-22 14:56:40.055353: step: 1776/531, loss: 0.014482763595879078 2023-01-22 14:56:41.121855: step: 1780/531, loss: 0.0020984080620110035 2023-01-22 14:56:42.183010: step: 1784/531, loss: 0.0026128175668418407 2023-01-22 14:56:43.236531: step: 1788/531, loss: 0.004091400187462568 2023-01-22 14:56:44.296441: step: 1792/531, loss: 0.0024972951505333185 2023-01-22 14:56:45.375312: step: 1796/531, loss: 0.014961661770939827 2023-01-22 14:56:46.435256: step: 1800/531, loss: 0.005145871080458164 2023-01-22 14:56:47.495088: step: 1804/531, loss: 0.008562278002500534 2023-01-22 14:56:48.542562: step: 1808/531, loss: 0.0042311218567192554 2023-01-22 14:56:49.610381: step: 1812/531, loss: 0.006016520783305168 2023-01-22 14:56:50.664638: step: 1816/531, loss: 0.006790841463953257 2023-01-22 14:56:51.727617: step: 1820/531, loss: 0.019874434918165207 2023-01-22 14:56:52.797121: step: 1824/531, loss: 0.007352512329816818 2023-01-22 14:56:53.855307: step: 1828/531, loss: 0.01314636506140232 2023-01-22 14:56:54.926972: step: 1832/531, loss: 0.002267833100631833 2023-01-22 14:56:56.001401: step: 1836/531, loss: 0.013022135011851788 2023-01-22 14:56:57.088057: step: 1840/531, loss: 0.024273375049233437 2023-01-22 14:56:58.146794: step: 1844/531, loss: 0.007241162937134504 2023-01-22 14:56:59.211186: step: 1848/531, loss: 0.016550511121749878 2023-01-22 14:57:00.261089: step: 1852/531, loss: 0.005948533769696951 2023-01-22 14:57:01.324782: step: 1856/531, loss: 0.028029173612594604 2023-01-22 14:57:02.384302: step: 1860/531, loss: 0.0076446011662483215 2023-01-22 14:57:03.450525: step: 1864/531, loss: 0.0064603546634316444 2023-01-22 14:57:04.521152: step: 1868/531, loss: 0.010256613604724407 2023-01-22 14:57:05.590230: step: 1872/531, loss: 0.013309633359313011 2023-01-22 14:57:06.676853: step: 1876/531, loss: 0.009671253152191639 2023-01-22 14:57:07.749047: step: 1880/531, loss: 0.005127544980496168 2023-01-22 14:57:08.803032: step: 1884/531, loss: 0.010823805816471577 2023-01-22 14:57:09.879027: step: 1888/531, loss: 0.07204632461071014 2023-01-22 14:57:10.950160: step: 1892/531, loss: 0.040515247732400894 2023-01-22 14:57:11.999121: step: 1896/531, loss: 0.004581084940582514 2023-01-22 14:57:13.045549: step: 1900/531, loss: 0.0043451376259326935 2023-01-22 14:57:14.105398: step: 1904/531, loss: 0.007818354293704033 2023-01-22 14:57:15.171353: step: 1908/531, loss: 0.02696968801319599 2023-01-22 14:57:16.237204: step: 1912/531, loss: 0.005032163579016924 2023-01-22 14:57:17.293447: step: 1916/531, loss: 0.034199975430965424 2023-01-22 14:57:18.356676: step: 1920/531, loss: 0.039390791207551956 2023-01-22 14:57:19.417734: step: 1924/531, loss: 0.04767834395170212 2023-01-22 14:57:20.470151: step: 1928/531, loss: 0.003919246606528759 2023-01-22 14:57:21.540009: step: 1932/531, loss: 0.013910716399550438 2023-01-22 14:57:22.587904: step: 1936/531, loss: 0.007001328747719526 2023-01-22 14:57:23.651019: step: 1940/531, loss: 0.025540199130773544 2023-01-22 14:57:24.700241: step: 1944/531, loss: 0.008113562129437923 2023-01-22 14:57:25.782775: step: 1948/531, loss: 0.01224514003843069 2023-01-22 14:57:26.827574: step: 1952/531, loss: 0.0009273162577301264 2023-01-22 14:57:27.872868: step: 1956/531, loss: 0.015027081593871117 2023-01-22 14:57:28.926915: step: 1960/531, loss: 0.011257095262408257 2023-01-22 14:57:29.986740: step: 1964/531, loss: 0.03339969739317894 2023-01-22 14:57:31.042261: step: 1968/531, loss: 0.0031417880672961473 2023-01-22 14:57:32.108145: step: 1972/531, loss: 4.752674431074411e-05 2023-01-22 14:57:33.182899: step: 1976/531, loss: 0.004710976500064135 2023-01-22 14:57:34.241050: step: 1980/531, loss: 0.030392993241548538 2023-01-22 14:57:35.304784: step: 1984/531, loss: 0.008365673013031483 2023-01-22 14:57:36.351939: step: 1988/531, loss: 0.011345232836902142 2023-01-22 14:57:37.409732: step: 1992/531, loss: 0.011520115658640862 2023-01-22 14:57:38.493163: step: 1996/531, loss: 0.023927411064505577 2023-01-22 14:57:39.551441: step: 2000/531, loss: 0.015948159620165825 2023-01-22 14:57:40.623110: step: 2004/531, loss: 0.008474358357489109 2023-01-22 14:57:41.689918: step: 2008/531, loss: 0.002990583423525095 2023-01-22 14:57:42.763353: step: 2012/531, loss: 0.005668531637638807 2023-01-22 14:57:43.830068: step: 2016/531, loss: 0.006443630438297987 2023-01-22 14:57:44.892133: step: 2020/531, loss: 0.00948173739016056 2023-01-22 14:57:45.957888: step: 2024/531, loss: 0.011518003419041634 2023-01-22 14:57:47.012413: step: 2028/531, loss: 0.0038602864369750023 2023-01-22 14:57:48.072524: step: 2032/531, loss: 0.002907469402998686 2023-01-22 14:57:49.141789: step: 2036/531, loss: 0.00720258941873908 2023-01-22 14:57:50.198868: step: 2040/531, loss: 0.004965411499142647 2023-01-22 14:57:51.263381: step: 2044/531, loss: 0.015387766994535923 2023-01-22 14:57:52.305104: step: 2048/531, loss: 0.004561169072985649 2023-01-22 14:57:53.360308: step: 2052/531, loss: 0.008874557912349701 2023-01-22 14:57:54.437572: step: 2056/531, loss: 0.023467542603611946 2023-01-22 14:57:55.497708: step: 2060/531, loss: 0.0 2023-01-22 14:57:56.561528: step: 2064/531, loss: 0.009165942668914795 2023-01-22 14:57:57.631083: step: 2068/531, loss: 0.022655600681900978 2023-01-22 14:57:58.690815: step: 2072/531, loss: 0.0127024557441473 2023-01-22 14:57:59.758904: step: 2076/531, loss: 0.03347679600119591 2023-01-22 14:58:00.819643: step: 2080/531, loss: 0.005705432966351509 2023-01-22 14:58:01.871209: step: 2084/531, loss: 0.009377938695251942 2023-01-22 14:58:02.921821: step: 2088/531, loss: 0.0043668863363564014 2023-01-22 14:58:03.982824: step: 2092/531, loss: 0.049331605434417725 2023-01-22 14:58:05.028921: step: 2096/531, loss: 0.005443839356303215 2023-01-22 14:58:06.082703: step: 2100/531, loss: 0.028085576370358467 2023-01-22 14:58:07.147116: step: 2104/531, loss: 0.005784222856163979 2023-01-22 14:58:08.196974: step: 2108/531, loss: 0.02353052981197834 2023-01-22 14:58:09.250471: step: 2112/531, loss: 0.009859143756330013 2023-01-22 14:58:10.322140: step: 2116/531, loss: 0.008600117638707161 2023-01-22 14:58:11.399920: step: 2120/531, loss: 0.04422273114323616 2023-01-22 14:58:12.465253: step: 2124/531, loss: 0.013776259496808052 ================================================== Loss: 0.015 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.355702906162465, 'r': 0.3206715593434344, 'f1': 0.3372800464807437}, 'combined': 0.24852213951212693, 'stategy': 1, 'epoch': 1} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3363021832161375, 'r': 0.27681190531869804, 'f1': 0.3036708822704133}, 'combined': 0.19014905712259525, 'stategy': 1, 'epoch': 1} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3309092967720685, 'r': 0.34660708124892187, 'f1': 0.338576333305249}, 'combined': 0.2494772982249203, 'stategy': 1, 'epoch': 1} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36014499323688415, 'r': 0.29909246056913463, 'f1': 0.3267916638139609}, 'combined': 0.20251877757484898, 'stategy': 1, 'epoch': 1} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31311162079510707, 'r': 0.3238061353573688, 'f1': 0.318369092039801}, 'combined': 0.23458775202932705, 'stategy': 1, 'epoch': 1} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3595025465625748, 'r': 0.28506666666851543, 'f1': 0.31798661960199504}, 'combined': 0.2109416189438977, 'stategy': 1, 'epoch': 1} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 1} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35344827586206895, 'r': 0.44565217391304346, 'f1': 0.3942307692307692}, 'combined': 0.1971153846153846, 'stategy': 1, 'epoch': 1} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'stategy': 1, 'epoch': 1} New best chinese model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.355702906162465, 'r': 0.3206715593434344, 'f1': 0.3372800464807437}, 'combined': 0.24852213951212693, 'stategy': 1, 'epoch': 1} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3363021832161375, 'r': 0.27681190531869804, 'f1': 0.3036708822704133}, 'combined': 0.19014905712259525, 'stategy': 1, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3222139154704944, 'r': 0.348504614455753, 'f1': 0.3348439960222093}, 'combined': 0.24672715496373318, 'stategy': 1, 'epoch': 0} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3480621490304492, 'r': 0.30067045876570814, 'f1': 0.3226352576402229}, 'combined': 0.19994297656577192, 'stategy': 1, 'epoch': 0} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37037037037037035, 'r': 0.43478260869565216, 'f1': 0.39999999999999997}, 'combined': 0.19999999999999998, 'stategy': 1, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31311162079510707, 'r': 0.3238061353573688, 'f1': 0.318369092039801}, 'combined': 0.23458775202932705, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3595025465625748, 'r': 0.28506666666851543, 'f1': 0.31798661960199504}, 'combined': 0.2109416189438977, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 2 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 15:00:46.578135: step: 4/531, loss: 0.004352105315774679 2023-01-22 15:00:47.637289: step: 8/531, loss: 0.020459897816181183 2023-01-22 15:00:48.679028: step: 12/531, loss: 0.021195387467741966 2023-01-22 15:00:49.734903: step: 16/531, loss: 0.009708487428724766 2023-01-22 15:00:50.779072: step: 20/531, loss: 0.004925751127302647 2023-01-22 15:00:51.830689: step: 24/531, loss: 0.012010580860078335 2023-01-22 15:00:52.878122: step: 28/531, loss: 0.004298270680010319 2023-01-22 15:00:53.925809: step: 32/531, loss: 0.008473596535623074 2023-01-22 15:00:54.983047: step: 36/531, loss: 0.007687244098633528 2023-01-22 15:00:56.046655: step: 40/531, loss: 0.04451196640729904 2023-01-22 15:00:57.109977: step: 44/531, loss: 0.005121026653796434 2023-01-22 15:00:58.146655: step: 48/531, loss: 0.006576779764145613 2023-01-22 15:00:59.206746: step: 52/531, loss: 0.017142634838819504 2023-01-22 15:01:00.271580: step: 56/531, loss: 0.011228079907596111 2023-01-22 15:01:01.334576: step: 60/531, loss: 0.029795169830322266 2023-01-22 15:01:02.420953: step: 64/531, loss: 0.010979712940752506 2023-01-22 15:01:03.492916: step: 68/531, loss: 0.03241435065865517 2023-01-22 15:01:04.546700: step: 72/531, loss: 0.00888033863157034 2023-01-22 15:01:05.603179: step: 76/531, loss: 0.007152865175157785 2023-01-22 15:01:06.647898: step: 80/531, loss: 0.0 2023-01-22 15:01:07.705654: step: 84/531, loss: 0.004070541355758905 2023-01-22 15:01:08.756473: step: 88/531, loss: 0.014607944525778294 2023-01-22 15:01:09.816702: step: 92/531, loss: 0.006742457393556833 2023-01-22 15:01:10.877408: step: 96/531, loss: 0.026470575481653214 2023-01-22 15:01:11.938643: step: 100/531, loss: 0.005333580542355776 2023-01-22 15:01:12.995630: step: 104/531, loss: 0.018963797017931938 2023-01-22 15:01:14.051657: step: 108/531, loss: 0.008935569785535336 2023-01-22 15:01:15.117971: step: 112/531, loss: 0.03659486025571823 2023-01-22 15:01:16.181385: step: 116/531, loss: 0.014554736204445362 2023-01-22 15:01:17.234730: step: 120/531, loss: 0.022502126172184944 2023-01-22 15:01:18.338177: step: 124/531, loss: 0.02685648202896118 2023-01-22 15:01:19.381830: step: 128/531, loss: 0.001173300319351256 2023-01-22 15:01:20.442280: step: 132/531, loss: 0.0037559354677796364 2023-01-22 15:01:21.504574: step: 136/531, loss: 0.0026571196503937244 2023-01-22 15:01:22.565564: step: 140/531, loss: 0.009270715527236462 2023-01-22 15:01:23.652408: step: 144/531, loss: 0.02681458368897438 2023-01-22 15:01:24.696915: step: 148/531, loss: 0.008141659200191498 2023-01-22 15:01:25.758939: step: 152/531, loss: 0.015150204300880432 2023-01-22 15:01:26.828213: step: 156/531, loss: 0.0036456130910664797 2023-01-22 15:01:27.905363: step: 160/531, loss: 0.008028483018279076 2023-01-22 15:01:28.984613: step: 164/531, loss: 0.0028695957735180855 2023-01-22 15:01:30.031637: step: 168/531, loss: 0.0019893269054591656 2023-01-22 15:01:31.100701: step: 172/531, loss: 0.025172429159283638 2023-01-22 15:01:32.155698: step: 176/531, loss: 0.003383524715900421 2023-01-22 15:01:33.237252: step: 180/531, loss: 0.037243448197841644 2023-01-22 15:01:34.305470: step: 184/531, loss: 0.011588050983846188 2023-01-22 15:01:35.379418: step: 188/531, loss: 0.01626310497522354 2023-01-22 15:01:36.445023: step: 192/531, loss: 0.00913391262292862 2023-01-22 15:01:37.503989: step: 196/531, loss: 0.003052431158721447 2023-01-22 15:01:38.586787: step: 200/531, loss: 0.00968985352665186 2023-01-22 15:01:39.653251: step: 204/531, loss: 0.00568684097379446 2023-01-22 15:01:40.705112: step: 208/531, loss: 0.007495521102100611 2023-01-22 15:01:41.762164: step: 212/531, loss: 0.017931997776031494 2023-01-22 15:01:42.853712: step: 216/531, loss: 0.023087874054908752 2023-01-22 15:01:43.913966: step: 220/531, loss: 0.006206677295267582 2023-01-22 15:01:44.980421: step: 224/531, loss: 0.006671375595033169 2023-01-22 15:01:46.055307: step: 228/531, loss: 0.022651249542832375 2023-01-22 15:01:47.154263: step: 232/531, loss: 0.008202160708606243 2023-01-22 15:01:48.224920: step: 236/531, loss: 0.0020127848256379366 2023-01-22 15:01:49.286262: step: 240/531, loss: 0.01109483651816845 2023-01-22 15:01:50.344096: step: 244/531, loss: 0.013756894506514072 2023-01-22 15:01:51.421006: step: 248/531, loss: 0.027989234775304794 2023-01-22 15:01:52.476470: step: 252/531, loss: 0.0030352952890098095 2023-01-22 15:01:53.538912: step: 256/531, loss: 0.02312830090522766 2023-01-22 15:01:54.581706: step: 260/531, loss: 0.023030975833535194 2023-01-22 15:01:55.643867: step: 264/531, loss: 0.007313702255487442 2023-01-22 15:01:56.695019: step: 268/531, loss: 0.005014278460294008 2023-01-22 15:01:57.753707: step: 272/531, loss: 0.0010467886459082365 2023-01-22 15:01:58.809154: step: 276/531, loss: 0.001209757407195866 2023-01-22 15:01:59.869591: step: 280/531, loss: 0.004092218354344368 2023-01-22 15:02:00.945976: step: 284/531, loss: 0.00844376441091299 2023-01-22 15:02:02.017594: step: 288/531, loss: 0.023990551009774208 2023-01-22 15:02:03.071000: step: 292/531, loss: 0.029529767110943794 2023-01-22 15:02:04.138702: step: 296/531, loss: 0.018035391345620155 2023-01-22 15:02:05.193318: step: 300/531, loss: 0.0033457730896770954 2023-01-22 15:02:06.261115: step: 304/531, loss: 0.005013692192733288 2023-01-22 15:02:07.321708: step: 308/531, loss: 0.02712704800069332 2023-01-22 15:02:08.394252: step: 312/531, loss: 0.01133128721266985 2023-01-22 15:02:09.450703: step: 316/531, loss: 0.026250425726175308 2023-01-22 15:02:10.518650: step: 320/531, loss: 0.01038841437548399 2023-01-22 15:02:11.585273: step: 324/531, loss: 0.0007836610311642289 2023-01-22 15:02:12.674759: step: 328/531, loss: 0.007466156501322985 2023-01-22 15:02:13.747086: step: 332/531, loss: 0.004719925113022327 2023-01-22 15:02:14.816837: step: 336/531, loss: 0.016059324145317078 2023-01-22 15:02:15.871690: step: 340/531, loss: 0.010760081000626087 2023-01-22 15:02:16.913347: step: 344/531, loss: 0.03876057639718056 2023-01-22 15:02:17.962228: step: 348/531, loss: 0.0002530314086470753 2023-01-22 15:02:19.013189: step: 352/531, loss: 0.0013005572836846113 2023-01-22 15:02:20.079220: step: 356/531, loss: 0.003125865710899234 2023-01-22 15:02:21.118736: step: 360/531, loss: 0.003142973640933633 2023-01-22 15:02:22.197513: step: 364/531, loss: 0.007031532004475594 2023-01-22 15:02:23.260939: step: 368/531, loss: 0.003911362960934639 2023-01-22 15:02:24.324525: step: 372/531, loss: 0.004267843905836344 2023-01-22 15:02:25.402133: step: 376/531, loss: 0.04243801161646843 2023-01-22 15:02:26.473875: step: 380/531, loss: 0.001584008801728487 2023-01-22 15:02:27.546497: step: 384/531, loss: 0.010753756389021873 2023-01-22 15:02:28.598465: step: 388/531, loss: 0.005048241000622511 2023-01-22 15:02:29.666533: step: 392/531, loss: 0.0042828405275940895 2023-01-22 15:02:30.723465: step: 396/531, loss: 0.046355344355106354 2023-01-22 15:02:31.783698: step: 400/531, loss: 0.020720630884170532 2023-01-22 15:02:32.847086: step: 404/531, loss: 0.00245657074265182 2023-01-22 15:02:33.941437: step: 408/531, loss: 0.020238470286130905 2023-01-22 15:02:35.001164: step: 412/531, loss: 0.00804706010967493 2023-01-22 15:02:36.058983: step: 416/531, loss: 0.004534791223704815 2023-01-22 15:02:37.130791: step: 420/531, loss: 0.015211626887321472 2023-01-22 15:02:38.195322: step: 424/531, loss: 0.0082132238894701 2023-01-22 15:02:39.249025: step: 428/531, loss: 0.009714786894619465 2023-01-22 15:02:40.319991: step: 432/531, loss: 0.004712847527116537 2023-01-22 15:02:41.371994: step: 436/531, loss: 0.006697678007185459 2023-01-22 15:02:42.445546: step: 440/531, loss: 0.036334652453660965 2023-01-22 15:02:43.496603: step: 444/531, loss: 0.005509565118700266 2023-01-22 15:02:44.563010: step: 448/531, loss: 0.006770780775696039 2023-01-22 15:02:45.645403: step: 452/531, loss: 0.009797475300729275 2023-01-22 15:02:46.717344: step: 456/531, loss: 0.0005074595101177692 2023-01-22 15:02:47.780884: step: 460/531, loss: 0.019728800281882286 2023-01-22 15:02:48.846442: step: 464/531, loss: 0.005373081658035517 2023-01-22 15:02:49.908670: step: 468/531, loss: 0.01225414127111435 2023-01-22 15:02:50.973414: step: 472/531, loss: 0.007407640106976032 2023-01-22 15:02:52.033464: step: 476/531, loss: 0.012558969669044018 2023-01-22 15:02:53.111698: step: 480/531, loss: 0.014754174277186394 2023-01-22 15:02:54.194322: step: 484/531, loss: 0.042245738208293915 2023-01-22 15:02:55.253910: step: 488/531, loss: 0.0210590697824955 2023-01-22 15:02:56.301585: step: 492/531, loss: 0.008470935747027397 2023-01-22 15:02:57.364992: step: 496/531, loss: 0.008551366627216339 2023-01-22 15:02:58.429075: step: 500/531, loss: 0.044560711830854416 2023-01-22 15:02:59.486787: step: 504/531, loss: 0.014161897823214531 2023-01-22 15:03:00.552751: step: 508/531, loss: 0.0011446214048191905 2023-01-22 15:03:01.625504: step: 512/531, loss: 0.08650229871273041 2023-01-22 15:03:02.676649: step: 516/531, loss: 0.007837348617613316 2023-01-22 15:03:03.739015: step: 520/531, loss: 0.026229260489344597 2023-01-22 15:03:04.794788: step: 524/531, loss: 0.03698166459798813 2023-01-22 15:03:05.861159: step: 528/531, loss: 0.038229942321777344 2023-01-22 15:03:06.913523: step: 532/531, loss: 0.005690035875886679 2023-01-22 15:03:07.994993: step: 536/531, loss: 0.013920952565968037 2023-01-22 15:03:09.055252: step: 540/531, loss: 0.0009368452592752874 2023-01-22 15:03:10.130013: step: 544/531, loss: 0.013223019428551197 2023-01-22 15:03:11.183546: step: 548/531, loss: 0.009844765067100525 2023-01-22 15:03:12.242090: step: 552/531, loss: 0.013350303284823895 2023-01-22 15:03:13.307138: step: 556/531, loss: 0.01223684847354889 2023-01-22 15:03:14.371827: step: 560/531, loss: 0.00041542091639712453 2023-01-22 15:03:15.435958: step: 564/531, loss: 0.0035067263524979353 2023-01-22 15:03:16.496167: step: 568/531, loss: 0.021740669384598732 2023-01-22 15:03:17.570016: step: 572/531, loss: 0.012412325479090214 2023-01-22 15:03:18.636441: step: 576/531, loss: 0.03687620908021927 2023-01-22 15:03:19.704027: step: 580/531, loss: 0.002590457210317254 2023-01-22 15:03:20.762775: step: 584/531, loss: 0.0006319463718682528 2023-01-22 15:03:21.820141: step: 588/531, loss: 0.007740527391433716 2023-01-22 15:03:22.883636: step: 592/531, loss: 0.008442549034953117 2023-01-22 15:03:23.949170: step: 596/531, loss: 0.0035422963555902243 2023-01-22 15:03:24.999140: step: 600/531, loss: 0.0006683246465399861 2023-01-22 15:03:26.063528: step: 604/531, loss: 0.0024830906186252832 2023-01-22 15:03:27.120535: step: 608/531, loss: 0.015179877169430256 2023-01-22 15:03:28.187111: step: 612/531, loss: 0.0008795327157713473 2023-01-22 15:03:29.246868: step: 616/531, loss: 0.002481618197634816 2023-01-22 15:03:30.309645: step: 620/531, loss: 0.008597446605563164 2023-01-22 15:03:31.375378: step: 624/531, loss: 0.041076984256505966 2023-01-22 15:03:32.426120: step: 628/531, loss: 0.015597502700984478 2023-01-22 15:03:33.490170: step: 632/531, loss: 0.01000105682760477 2023-01-22 15:03:34.544742: step: 636/531, loss: 0.0031480668112635612 2023-01-22 15:03:35.603188: step: 640/531, loss: 0.010408508591353893 2023-01-22 15:03:36.672596: step: 644/531, loss: 0.016908518970012665 2023-01-22 15:03:37.728183: step: 648/531, loss: 0.015883702784776688 2023-01-22 15:03:38.784751: step: 652/531, loss: 0.001235972042195499 2023-01-22 15:03:39.849127: step: 656/531, loss: 0.019087497144937515 2023-01-22 15:03:40.917989: step: 660/531, loss: 0.006723023485392332 2023-01-22 15:03:41.969170: step: 664/531, loss: 0.01384618878364563 2023-01-22 15:03:43.020064: step: 668/531, loss: 0.005318854469805956 2023-01-22 15:03:44.072544: step: 672/531, loss: 0.0025831512175500393 2023-01-22 15:03:45.141880: step: 676/531, loss: 0.029259277507662773 2023-01-22 15:03:46.205280: step: 680/531, loss: 0.004867771174758673 2023-01-22 15:03:47.258159: step: 684/531, loss: 0.016316745430231094 2023-01-22 15:03:48.319306: step: 688/531, loss: 0.00856808666139841 2023-01-22 15:03:49.379714: step: 692/531, loss: 0.002886097179725766 2023-01-22 15:03:50.433324: step: 696/531, loss: 0.0007334217661991715 2023-01-22 15:03:51.485541: step: 700/531, loss: 0.03867122158408165 2023-01-22 15:03:52.558285: step: 704/531, loss: 0.015843059867620468 2023-01-22 15:03:53.615886: step: 708/531, loss: 0.02910970151424408 2023-01-22 15:03:54.659533: step: 712/531, loss: 0.002809828845784068 2023-01-22 15:03:55.727574: step: 716/531, loss: 0.013181174173951149 2023-01-22 15:03:56.784877: step: 720/531, loss: 0.03666000813245773 2023-01-22 15:03:57.841030: step: 724/531, loss: 0.010472246445715427 2023-01-22 15:03:58.909824: step: 728/531, loss: 0.0021222438663244247 2023-01-22 15:03:59.984529: step: 732/531, loss: 0.036330971866846085 2023-01-22 15:04:01.055823: step: 736/531, loss: 0.00701176980510354 2023-01-22 15:04:02.109725: step: 740/531, loss: 0.006832615938037634 2023-01-22 15:04:03.169538: step: 744/531, loss: 0.09322182089090347 2023-01-22 15:04:04.208825: step: 748/531, loss: 0.004076693672686815 2023-01-22 15:04:05.270849: step: 752/531, loss: 0.014712278731167316 2023-01-22 15:04:06.322008: step: 756/531, loss: 0.00699743814766407 2023-01-22 15:04:07.392321: step: 760/531, loss: 0.056770358234643936 2023-01-22 15:04:08.453104: step: 764/531, loss: 0.019629813730716705 2023-01-22 15:04:09.519290: step: 768/531, loss: 0.04331810027360916 2023-01-22 15:04:10.573834: step: 772/531, loss: 0.007678089197725058 2023-01-22 15:04:11.627240: step: 776/531, loss: 0.007802879437804222 2023-01-22 15:04:12.676299: step: 780/531, loss: 0.0015147406375035644 2023-01-22 15:04:13.748775: step: 784/531, loss: 0.00765644945204258 2023-01-22 15:04:14.815584: step: 788/531, loss: 0.004759211093187332 2023-01-22 15:04:15.885375: step: 792/531, loss: 0.0031675910577178 2023-01-22 15:04:16.957803: step: 796/531, loss: 0.025086427107453346 2023-01-22 15:04:18.018116: step: 800/531, loss: 0.13266576826572418 2023-01-22 15:04:19.088749: step: 804/531, loss: 0.001716657541692257 2023-01-22 15:04:20.147043: step: 808/531, loss: 0.0027550519444048405 2023-01-22 15:04:21.245010: step: 812/531, loss: 0.00742874201387167 2023-01-22 15:04:22.298489: step: 816/531, loss: 0.006402923259884119 2023-01-22 15:04:23.368763: step: 820/531, loss: 0.013202822767198086 2023-01-22 15:04:24.431461: step: 824/531, loss: 0.011958223767578602 2023-01-22 15:04:25.484634: step: 828/531, loss: 0.006566676776856184 2023-01-22 15:04:26.550934: step: 832/531, loss: 0.009989522397518158 2023-01-22 15:04:27.637553: step: 836/531, loss: 0.036954306066036224 2023-01-22 15:04:28.703804: step: 840/531, loss: 0.0322166383266449 2023-01-22 15:04:29.778775: step: 844/531, loss: 0.006569726392626762 2023-01-22 15:04:30.837461: step: 848/531, loss: 0.0056249904446303844 2023-01-22 15:04:31.907901: step: 852/531, loss: 0.018429214134812355 2023-01-22 15:04:32.981833: step: 856/531, loss: 0.03290853649377823 2023-01-22 15:04:34.032972: step: 860/531, loss: 0.009376497007906437 2023-01-22 15:04:35.100842: step: 864/531, loss: 0.016479624435305595 2023-01-22 15:04:36.167428: step: 868/531, loss: 0.0211174376308918 2023-01-22 15:04:37.228174: step: 872/531, loss: 0.010659064166247845 2023-01-22 15:04:38.279085: step: 876/531, loss: 0.005403154995292425 2023-01-22 15:04:39.331234: step: 880/531, loss: 0.0004183748096693307 2023-01-22 15:04:40.400805: step: 884/531, loss: 0.003261387115344405 2023-01-22 15:04:41.451506: step: 888/531, loss: 0.0 2023-01-22 15:04:42.507986: step: 892/531, loss: 0.03475382179021835 2023-01-22 15:04:43.576993: step: 896/531, loss: 0.005472506862133741 2023-01-22 15:04:44.663577: step: 900/531, loss: 0.01338645163923502 2023-01-22 15:04:45.725753: step: 904/531, loss: 0.003423280082643032 2023-01-22 15:04:46.780719: step: 908/531, loss: 0.010540076531469822 2023-01-22 15:04:47.848770: step: 912/531, loss: 0.02655048854649067 2023-01-22 15:04:48.897877: step: 916/531, loss: 0.00658982340246439 2023-01-22 15:04:49.977327: step: 920/531, loss: 0.002910940907895565 2023-01-22 15:04:51.059991: step: 924/531, loss: 0.01712654158473015 2023-01-22 15:04:52.122166: step: 928/531, loss: 0.0039005933795124292 2023-01-22 15:04:53.169446: step: 932/531, loss: 0.007449942175298929 2023-01-22 15:04:54.237508: step: 936/531, loss: 0.014374330639839172 2023-01-22 15:04:55.292625: step: 940/531, loss: 0.007312045432627201 2023-01-22 15:04:56.347993: step: 944/531, loss: 0.01999361254274845 2023-01-22 15:04:57.399595: step: 948/531, loss: 0.015330715104937553 2023-01-22 15:04:58.469123: step: 952/531, loss: 0.012714835815131664 2023-01-22 15:04:59.546744: step: 956/531, loss: 0.005335532128810883 2023-01-22 15:05:00.605350: step: 960/531, loss: 0.00808714609593153 2023-01-22 15:05:01.658737: step: 964/531, loss: 0.0012076643761247396 2023-01-22 15:05:02.728713: step: 968/531, loss: 0.009486301802098751 2023-01-22 15:05:03.797704: step: 972/531, loss: 0.005028801504522562 2023-01-22 15:05:04.851780: step: 976/531, loss: 0.0016789609799161553 2023-01-22 15:05:05.904537: step: 980/531, loss: 0.007988802157342434 2023-01-22 15:05:06.978386: step: 984/531, loss: 0.0043768612667918205 2023-01-22 15:05:08.047020: step: 988/531, loss: 0.0677265077829361 2023-01-22 15:05:09.111628: step: 992/531, loss: 0.016451023519039154 2023-01-22 15:05:10.190185: step: 996/531, loss: 0.0394602008163929 2023-01-22 15:05:11.270043: step: 1000/531, loss: 0.003941897302865982 2023-01-22 15:05:12.338134: step: 1004/531, loss: 0.013630902394652367 2023-01-22 15:05:13.397164: step: 1008/531, loss: 0.007742106914520264 2023-01-22 15:05:14.451159: step: 1012/531, loss: 0.015339868143200874 2023-01-22 15:05:15.516817: step: 1016/531, loss: 0.005221690982580185 2023-01-22 15:05:16.586200: step: 1020/531, loss: 0.026504477486014366 2023-01-22 15:05:17.647899: step: 1024/531, loss: 0.029230261221528053 2023-01-22 15:05:18.706224: step: 1028/531, loss: 0.012360453605651855 2023-01-22 15:05:19.761120: step: 1032/531, loss: 0.0041351765394210815 2023-01-22 15:05:20.832341: step: 1036/531, loss: 0.014223077334463596 2023-01-22 15:05:21.887310: step: 1040/531, loss: 0.012814600951969624 2023-01-22 15:05:22.941084: step: 1044/531, loss: 0.001860265270806849 2023-01-22 15:05:24.008544: step: 1048/531, loss: 0.0050408197566866875 2023-01-22 15:05:25.064836: step: 1052/531, loss: 0.008755365386605263 2023-01-22 15:05:26.139112: step: 1056/531, loss: 0.005666117649525404 2023-01-22 15:05:27.186237: step: 1060/531, loss: 0.01227036863565445 2023-01-22 15:05:28.276568: step: 1064/531, loss: 0.013285154476761818 2023-01-22 15:05:29.371358: step: 1068/531, loss: 0.007334236986935139 2023-01-22 15:05:30.418760: step: 1072/531, loss: 0.0016570896841585636 2023-01-22 15:05:31.480414: step: 1076/531, loss: 0.002204014454036951 2023-01-22 15:05:32.552766: step: 1080/531, loss: 0.008553000167012215 2023-01-22 15:05:33.614729: step: 1084/531, loss: 0.01245174277573824 2023-01-22 15:05:34.682534: step: 1088/531, loss: 0.02858390472829342 2023-01-22 15:05:35.733823: step: 1092/531, loss: 0.0021017941180616617 2023-01-22 15:05:36.796926: step: 1096/531, loss: 0.010990865528583527 2023-01-22 15:05:37.847263: step: 1100/531, loss: 0.009551011957228184 2023-01-22 15:05:38.903439: step: 1104/531, loss: 0.0021572967525571585 2023-01-22 15:05:39.961397: step: 1108/531, loss: 0.0015618226025253534 2023-01-22 15:05:41.007249: step: 1112/531, loss: 0.03426532447338104 2023-01-22 15:05:42.098825: step: 1116/531, loss: 0.015635645017027855 2023-01-22 15:05:43.151287: step: 1120/531, loss: 0.005257135722786188 2023-01-22 15:05:44.206813: step: 1124/531, loss: 0.03241654112935066 2023-01-22 15:05:45.290014: step: 1128/531, loss: 0.03126508370041847 2023-01-22 15:05:46.338739: step: 1132/531, loss: 0.009504013694822788 2023-01-22 15:05:47.385871: step: 1136/531, loss: 0.007109604775905609 2023-01-22 15:05:48.457827: step: 1140/531, loss: 0.008919750340282917 2023-01-22 15:05:49.522086: step: 1144/531, loss: 0.00458704587072134 2023-01-22 15:05:50.582005: step: 1148/531, loss: 0.011064345017075539 2023-01-22 15:05:51.655908: step: 1152/531, loss: 0.011693078093230724 2023-01-22 15:05:52.734014: step: 1156/531, loss: 0.06505129486322403 2023-01-22 15:05:53.793586: step: 1160/531, loss: 0.020284447818994522 2023-01-22 15:05:54.853590: step: 1164/531, loss: 0.005057459231466055 2023-01-22 15:05:55.925323: step: 1168/531, loss: 0.003141334280371666 2023-01-22 15:05:57.008042: step: 1172/531, loss: 0.0020014727488160133 2023-01-22 15:05:58.084384: step: 1176/531, loss: 0.015610367991030216 2023-01-22 15:05:59.162117: step: 1180/531, loss: 0.009590407833456993 2023-01-22 15:06:00.221429: step: 1184/531, loss: 0.003493065247312188 2023-01-22 15:06:01.276310: step: 1188/531, loss: 0.03922054171562195 2023-01-22 15:06:02.337546: step: 1192/531, loss: 0.001920498558320105 2023-01-22 15:06:03.412345: step: 1196/531, loss: 0.009542138315737247 2023-01-22 15:06:04.497652: step: 1200/531, loss: 0.015098577365279198 2023-01-22 15:06:05.550816: step: 1204/531, loss: 0.015390695072710514 2023-01-22 15:06:06.635777: step: 1208/531, loss: 0.009725487791001797 2023-01-22 15:06:07.719815: step: 1212/531, loss: 0.01185739878565073 2023-01-22 15:06:08.796503: step: 1216/531, loss: 0.005489910487085581 2023-01-22 15:06:09.836450: step: 1220/531, loss: 0.006963996682316065 2023-01-22 15:06:10.908743: step: 1224/531, loss: 0.003871776396408677 2023-01-22 15:06:11.959770: step: 1228/531, loss: 0.02884555049240589 2023-01-22 15:06:13.009788: step: 1232/531, loss: 0.000818113679997623 2023-01-22 15:06:14.086708: step: 1236/531, loss: 0.027231259271502495 2023-01-22 15:06:15.156758: step: 1240/531, loss: 0.005196020472794771 2023-01-22 15:06:16.214757: step: 1244/531, loss: 0.008681959472596645 2023-01-22 15:06:17.280073: step: 1248/531, loss: 0.08371572196483612 2023-01-22 15:06:18.346335: step: 1252/531, loss: 0.006729732733219862 2023-01-22 15:06:19.401909: step: 1256/531, loss: 0.0025454936549067497 2023-01-22 15:06:20.473004: step: 1260/531, loss: 0.010033482685685158 2023-01-22 15:06:21.524691: step: 1264/531, loss: 0.0065170456655323505 2023-01-22 15:06:22.602867: step: 1268/531, loss: 0.01327525731176138 2023-01-22 15:06:23.676124: step: 1272/531, loss: 0.01201469823718071 2023-01-22 15:06:24.728160: step: 1276/531, loss: 0.005637824535369873 2023-01-22 15:06:25.784452: step: 1280/531, loss: 0.04774932190775871 2023-01-22 15:06:26.860475: step: 1284/531, loss: 0.005011477507650852 2023-01-22 15:06:27.923877: step: 1288/531, loss: 0.030396079644560814 2023-01-22 15:06:28.989669: step: 1292/531, loss: 0.008106202818453312 2023-01-22 15:06:30.052077: step: 1296/531, loss: 0.006353373173624277 2023-01-22 15:06:31.124964: step: 1300/531, loss: 0.002120911842212081 2023-01-22 15:06:32.202952: step: 1304/531, loss: 0.03550753369927406 2023-01-22 15:06:33.261503: step: 1308/531, loss: 0.014169967733323574 2023-01-22 15:06:34.321651: step: 1312/531, loss: 0.01643649861216545 2023-01-22 15:06:35.382357: step: 1316/531, loss: 0.006041550077497959 2023-01-22 15:06:36.462616: step: 1320/531, loss: 0.009202200919389725 2023-01-22 15:06:37.521766: step: 1324/531, loss: 0.01462903618812561 2023-01-22 15:06:38.582785: step: 1328/531, loss: 0.01487416960299015 2023-01-22 15:06:39.653147: step: 1332/531, loss: 0.005591053050011396 2023-01-22 15:06:40.712143: step: 1336/531, loss: 0.010400021448731422 2023-01-22 15:06:41.766431: step: 1340/531, loss: 0.0030778103973716497 2023-01-22 15:06:42.821769: step: 1344/531, loss: 0.006464879959821701 2023-01-22 15:06:43.895326: step: 1348/531, loss: 0.06205100938677788 2023-01-22 15:06:44.973675: step: 1352/531, loss: 0.003671812592074275 2023-01-22 15:06:46.036787: step: 1356/531, loss: 0.01701735332608223 2023-01-22 15:06:47.106272: step: 1360/531, loss: 0.04350874572992325 2023-01-22 15:06:48.159275: step: 1364/531, loss: 0.0027278142515569925 2023-01-22 15:06:49.207505: step: 1368/531, loss: 0.004778644070029259 2023-01-22 15:06:50.262931: step: 1372/531, loss: 0.004517232533544302 2023-01-22 15:06:51.325082: step: 1376/531, loss: 0.006217169109731913 2023-01-22 15:06:52.387099: step: 1380/531, loss: 0.015240363776683807 2023-01-22 15:06:53.447649: step: 1384/531, loss: 0.007383220829069614 2023-01-22 15:06:54.502144: step: 1388/531, loss: 0.022683104500174522 2023-01-22 15:06:55.566958: step: 1392/531, loss: 0.022249735891819 2023-01-22 15:06:56.623760: step: 1396/531, loss: 0.00836427416652441 2023-01-22 15:06:57.687699: step: 1400/531, loss: 0.01059095561504364 2023-01-22 15:06:58.730864: step: 1404/531, loss: 0.006974156480282545 2023-01-22 15:06:59.792748: step: 1408/531, loss: 0.007008974906057119 2023-01-22 15:07:00.837249: step: 1412/531, loss: 0.0055645788088440895 2023-01-22 15:07:01.904403: step: 1416/531, loss: 0.019400261342525482 2023-01-22 15:07:02.989923: step: 1420/531, loss: 0.0061435881070792675 2023-01-22 15:07:04.064544: step: 1424/531, loss: 0.006364084780216217 2023-01-22 15:07:05.119330: step: 1428/531, loss: 0.001846448052674532 2023-01-22 15:07:06.166115: step: 1432/531, loss: 0.008257667534053326 2023-01-22 15:07:07.238708: step: 1436/531, loss: 0.002634771866723895 2023-01-22 15:07:08.302671: step: 1440/531, loss: 0.01020835805684328 2023-01-22 15:07:09.373741: step: 1444/531, loss: 0.009831872768700123 2023-01-22 15:07:10.438276: step: 1448/531, loss: 0.016211209818720818 2023-01-22 15:07:11.492672: step: 1452/531, loss: 0.004206764977425337 2023-01-22 15:07:12.555858: step: 1456/531, loss: 0.010705050081014633 2023-01-22 15:07:13.628299: step: 1460/531, loss: 0.0038526919670403004 2023-01-22 15:07:14.690840: step: 1464/531, loss: 0.0077238441444933414 2023-01-22 15:07:15.741841: step: 1468/531, loss: 0.027157988399267197 2023-01-22 15:07:16.795596: step: 1472/531, loss: 0.0005434079794213176 2023-01-22 15:07:17.866535: step: 1476/531, loss: 0.0026184748858213425 2023-01-22 15:07:18.922013: step: 1480/531, loss: 0.0012519037118181586 2023-01-22 15:07:19.987349: step: 1484/531, loss: 0.0050787245854735374 2023-01-22 15:07:21.049680: step: 1488/531, loss: 0.004459341522306204 2023-01-22 15:07:22.120126: step: 1492/531, loss: 0.011001690290868282 2023-01-22 15:07:23.193114: step: 1496/531, loss: 0.005979818757623434 2023-01-22 15:07:24.260083: step: 1500/531, loss: 0.009303625673055649 2023-01-22 15:07:25.320681: step: 1504/531, loss: 0.00011039365926990286 2023-01-22 15:07:26.384183: step: 1508/531, loss: 0.006546668708324432 2023-01-22 15:07:27.451846: step: 1512/531, loss: 0.0367443710565567 2023-01-22 15:07:28.517286: step: 1516/531, loss: 0.003502284875139594 2023-01-22 15:07:29.576609: step: 1520/531, loss: 0.017570864409208298 2023-01-22 15:07:30.647821: step: 1524/531, loss: 0.017676059156656265 2023-01-22 15:07:31.719288: step: 1528/531, loss: 0.005596524570137262 2023-01-22 15:07:32.783057: step: 1532/531, loss: 0.004737562034279108 2023-01-22 15:07:33.849891: step: 1536/531, loss: 0.009755619801580906 2023-01-22 15:07:34.911248: step: 1540/531, loss: 0.0008651097887195647 2023-01-22 15:07:35.977935: step: 1544/531, loss: 0.015758806839585304 2023-01-22 15:07:37.054771: step: 1548/531, loss: 0.004234983120113611 2023-01-22 15:07:38.126585: step: 1552/531, loss: 0.030790746212005615 2023-01-22 15:07:39.191524: step: 1556/531, loss: 0.017823224887251854 2023-01-22 15:07:40.257055: step: 1560/531, loss: 0.020318850874900818 2023-01-22 15:07:41.330792: step: 1564/531, loss: 0.01772797480225563 2023-01-22 15:07:42.419607: step: 1568/531, loss: 0.039910510182380676 2023-01-22 15:07:43.481625: step: 1572/531, loss: 0.002660271944478154 2023-01-22 15:07:44.550908: step: 1576/531, loss: 0.0031544482335448265 2023-01-22 15:07:45.616396: step: 1580/531, loss: 0.023564768955111504 2023-01-22 15:07:46.683587: step: 1584/531, loss: 0.009568865410983562 2023-01-22 15:07:47.761360: step: 1588/531, loss: 0.0046332525089383125 2023-01-22 15:07:48.836129: step: 1592/531, loss: 0.04241786152124405 2023-01-22 15:07:49.907512: step: 1596/531, loss: 0.025244561955332756 2023-01-22 15:07:50.995681: step: 1600/531, loss: 0.026974640786647797 2023-01-22 15:07:52.059919: step: 1604/531, loss: 0.017429644241929054 2023-01-22 15:07:53.118753: step: 1608/531, loss: 0.006756743881851435 2023-01-22 15:07:54.187619: step: 1612/531, loss: 0.016141850501298904 2023-01-22 15:07:55.245823: step: 1616/531, loss: 0.012421050108969212 2023-01-22 15:07:56.303498: step: 1620/531, loss: 0.014212023466825485 2023-01-22 15:07:57.365233: step: 1624/531, loss: 0.014323304407298565 2023-01-22 15:07:58.435247: step: 1628/531, loss: 0.0065786573104560375 2023-01-22 15:07:59.520007: step: 1632/531, loss: 0.003649013116955757 2023-01-22 15:08:00.587720: step: 1636/531, loss: 0.016441455110907555 2023-01-22 15:08:01.643959: step: 1640/531, loss: 0.018609926104545593 2023-01-22 15:08:02.723070: step: 1644/531, loss: 0.0028549593407660723 2023-01-22 15:08:03.806979: step: 1648/531, loss: 0.023130150511860847 2023-01-22 15:08:04.883364: step: 1652/531, loss: 0.009245655499398708 2023-01-22 15:08:05.959334: step: 1656/531, loss: 0.03684879466891289 2023-01-22 15:08:07.024584: step: 1660/531, loss: 0.003761161118745804 2023-01-22 15:08:08.113938: step: 1664/531, loss: 0.005097277462482452 2023-01-22 15:08:09.182862: step: 1668/531, loss: 0.016136791557073593 2023-01-22 15:08:10.257267: step: 1672/531, loss: 0.030105091631412506 2023-01-22 15:08:11.331038: step: 1676/531, loss: 0.012599162757396698 2023-01-22 15:08:12.419017: step: 1680/531, loss: 0.014272150583565235 2023-01-22 15:08:13.470747: step: 1684/531, loss: 0.0019857652951031923 2023-01-22 15:08:14.559288: step: 1688/531, loss: 0.0028475599829107523 2023-01-22 15:08:15.634433: step: 1692/531, loss: 0.01421818695962429 2023-01-22 15:08:16.696142: step: 1696/531, loss: 0.011855151504278183 2023-01-22 15:08:17.757329: step: 1700/531, loss: 0.003681303234770894 2023-01-22 15:08:18.830446: step: 1704/531, loss: 0.004155156668275595 2023-01-22 15:08:19.894519: step: 1708/531, loss: 0.025328440591692924 2023-01-22 15:08:20.992809: step: 1712/531, loss: 0.005948621779680252 2023-01-22 15:08:22.087098: step: 1716/531, loss: 0.001444079214707017 2023-01-22 15:08:23.177038: step: 1720/531, loss: 0.0019061742350459099 2023-01-22 15:08:24.239560: step: 1724/531, loss: 0.007866954430937767 2023-01-22 15:08:25.324807: step: 1728/531, loss: 0.016262635588645935 2023-01-22 15:08:26.406118: step: 1732/531, loss: 0.01545544620603323 2023-01-22 15:08:27.473131: step: 1736/531, loss: 0.033637918531894684 2023-01-22 15:08:28.542861: step: 1740/531, loss: 0.006914136465638876 2023-01-22 15:08:29.610322: step: 1744/531, loss: 0.040340542793273926 2023-01-22 15:08:30.684858: step: 1748/531, loss: 0.025750571861863136 2023-01-22 15:08:31.746499: step: 1752/531, loss: 0.010608994401991367 2023-01-22 15:08:32.833051: step: 1756/531, loss: 0.007926186546683311 2023-01-22 15:08:33.901843: step: 1760/531, loss: 0.005685245618224144 2023-01-22 15:08:34.990948: step: 1764/531, loss: 0.018118424341082573 2023-01-22 15:08:36.072490: step: 1768/531, loss: 0.011907443404197693 2023-01-22 15:08:37.136538: step: 1772/531, loss: 0.002592187374830246 2023-01-22 15:08:38.198086: step: 1776/531, loss: 0.00498719047755003 2023-01-22 15:08:39.273206: step: 1780/531, loss: 0.007208333350718021 2023-01-22 15:08:40.334941: step: 1784/531, loss: 0.04381411522626877 2023-01-22 15:08:41.397442: step: 1788/531, loss: 0.004342720843851566 2023-01-22 15:08:42.468247: step: 1792/531, loss: 0.006631031166762114 2023-01-22 15:08:43.529199: step: 1796/531, loss: 0.001173530239611864 2023-01-22 15:08:44.612986: step: 1800/531, loss: 0.001868226332589984 2023-01-22 15:08:45.697743: step: 1804/531, loss: 0.00784171000123024 2023-01-22 15:08:46.768596: step: 1808/531, loss: 0.0069846780970692635 2023-01-22 15:08:47.835255: step: 1812/531, loss: 0.0037238539662212133 2023-01-22 15:08:48.898334: step: 1816/531, loss: 0.0016817169962450862 2023-01-22 15:08:49.957040: step: 1820/531, loss: 0.0042967661283910275 2023-01-22 15:08:51.022694: step: 1824/531, loss: 0.005480621941387653 2023-01-22 15:08:52.081867: step: 1828/531, loss: 0.02603236399590969 2023-01-22 15:08:53.142165: step: 1832/531, loss: 0.020367998629808426 2023-01-22 15:08:54.198092: step: 1836/531, loss: 0.007561712525784969 2023-01-22 15:08:55.272980: step: 1840/531, loss: 0.010053700767457485 2023-01-22 15:08:56.354030: step: 1844/531, loss: 0.012591834180057049 2023-01-22 15:08:57.406425: step: 1848/531, loss: 0.005055839661508799 2023-01-22 15:08:58.471281: step: 1852/531, loss: 0.0024400800466537476 2023-01-22 15:08:59.524429: step: 1856/531, loss: 0.010241357609629631 2023-01-22 15:09:00.579296: step: 1860/531, loss: 0.08464954793453217 2023-01-22 15:09:01.662158: step: 1864/531, loss: 0.006787307560443878 2023-01-22 15:09:02.735742: step: 1868/531, loss: 0.010291000828146935 2023-01-22 15:09:03.818760: step: 1872/531, loss: 0.008114447817206383 2023-01-22 15:09:04.883827: step: 1876/531, loss: 0.0033625962678343058 2023-01-22 15:09:05.959813: step: 1880/531, loss: 0.007749938406050205 2023-01-22 15:09:07.032548: step: 1884/531, loss: 0.00843559019267559 2023-01-22 15:09:08.099569: step: 1888/531, loss: 0.008046545088291168 2023-01-22 15:09:09.156500: step: 1892/531, loss: 0.023566262796521187 2023-01-22 15:09:10.232459: step: 1896/531, loss: 0.012293393723666668 2023-01-22 15:09:11.312081: step: 1900/531, loss: 0.05550937354564667 2023-01-22 15:09:12.383522: step: 1904/531, loss: 0.025415586307644844 2023-01-22 15:09:13.463190: step: 1908/531, loss: 0.016372740268707275 2023-01-22 15:09:14.544262: step: 1912/531, loss: 0.04654484987258911 2023-01-22 15:09:15.604324: step: 1916/531, loss: 0.010204870253801346 2023-01-22 15:09:16.659987: step: 1920/531, loss: 0.016956163570284843 2023-01-22 15:09:17.744843: step: 1924/531, loss: 0.003951470833271742 2023-01-22 15:09:18.802291: step: 1928/531, loss: 0.00043705650023184717 2023-01-22 15:09:19.853865: step: 1932/531, loss: 0.0102516645565629 2023-01-22 15:09:20.909816: step: 1936/531, loss: 0.004810890648514032 2023-01-22 15:09:21.979884: step: 1940/531, loss: 0.01888265833258629 2023-01-22 15:09:23.048747: step: 1944/531, loss: 0.035354502499103546 2023-01-22 15:09:24.119616: step: 1948/531, loss: 0.004907671827822924 2023-01-22 15:09:25.176862: step: 1952/531, loss: 0.0027734641917049885 2023-01-22 15:09:26.260686: step: 1956/531, loss: 0.0067260474897921085 2023-01-22 15:09:27.327248: step: 1960/531, loss: 0.008661641739308834 2023-01-22 15:09:28.408527: step: 1964/531, loss: 0.00519506074488163 2023-01-22 15:09:29.498387: step: 1968/531, loss: 0.005057011265307665 2023-01-22 15:09:30.579425: step: 1972/531, loss: 0.012670012190937996 2023-01-22 15:09:31.637652: step: 1976/531, loss: 0.005739648826420307 2023-01-22 15:09:32.705681: step: 1980/531, loss: 0.00497743533924222 2023-01-22 15:09:33.776193: step: 1984/531, loss: 0.016153978183865547 2023-01-22 15:09:34.837214: step: 1988/531, loss: 0.005767592694610357 2023-01-22 15:09:35.906748: step: 1992/531, loss: 0.006481880787760019 2023-01-22 15:09:36.967653: step: 1996/531, loss: 0.005682860501110554 2023-01-22 15:09:38.012817: step: 2000/531, loss: 0.004004604183137417 2023-01-22 15:09:39.073757: step: 2004/531, loss: 0.013545628637075424 2023-01-22 15:09:40.133756: step: 2008/531, loss: 0.014110635034739971 2023-01-22 15:09:41.201677: step: 2012/531, loss: 0.010650896467268467 2023-01-22 15:09:42.259332: step: 2016/531, loss: 0.006431364454329014 2023-01-22 15:09:43.315680: step: 2020/531, loss: 0.0027899337001144886 2023-01-22 15:09:44.383903: step: 2024/531, loss: 0.01540421787649393 2023-01-22 15:09:45.426387: step: 2028/531, loss: 0.004104196093976498 2023-01-22 15:09:46.487168: step: 2032/531, loss: 0.015195207670331001 2023-01-22 15:09:47.543215: step: 2036/531, loss: 0.016384121030569077 2023-01-22 15:09:48.586531: step: 2040/531, loss: 0.012861286289989948 2023-01-22 15:09:49.649649: step: 2044/531, loss: 0.004025546833872795 2023-01-22 15:09:50.715752: step: 2048/531, loss: 0.0530475378036499 2023-01-22 15:09:51.767736: step: 2052/531, loss: 0.0006397791439667344 2023-01-22 15:09:52.841352: step: 2056/531, loss: 0.015077264979481697 2023-01-22 15:09:53.905471: step: 2060/531, loss: 0.026661042124032974 2023-01-22 15:09:54.968055: step: 2064/531, loss: 0.005175887607038021 2023-01-22 15:09:56.018406: step: 2068/531, loss: 0.008792277425527573 2023-01-22 15:09:57.064591: step: 2072/531, loss: 0.028944917023181915 2023-01-22 15:09:58.127313: step: 2076/531, loss: 0.0038539026863873005 2023-01-22 15:09:59.201022: step: 2080/531, loss: 0.015821902081370354 2023-01-22 15:10:00.244933: step: 2084/531, loss: 0.013511608354747295 2023-01-22 15:10:01.316291: step: 2088/531, loss: 0.012183400802314281 2023-01-22 15:10:02.392896: step: 2092/531, loss: 0.010933508165180683 2023-01-22 15:10:03.463794: step: 2096/531, loss: 0.00789438746869564 2023-01-22 15:10:04.531779: step: 2100/531, loss: 0.01371679361909628 2023-01-22 15:10:05.590877: step: 2104/531, loss: 0.007995257154107094 2023-01-22 15:10:06.657571: step: 2108/531, loss: 0.0035291395615786314 2023-01-22 15:10:07.715686: step: 2112/531, loss: 0.0014713897835463285 2023-01-22 15:10:08.787450: step: 2116/531, loss: 0.0042847092263400555 2023-01-22 15:10:09.858571: step: 2120/531, loss: 0.015639590099453926 2023-01-22 15:10:10.918515: step: 2124/531, loss: 0.004624858032912016 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3502458592132506, 'r': 0.3203953598484849, 'f1': 0.33465628090999017}, 'combined': 0.2465888385652559, 'stategy': 1, 'epoch': 2} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33264796395741225, 'r': 0.2741043240912612, 'f1': 0.30055179722228337}, 'combined': 0.18819598517657, 'stategy': 1, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32676553098064726, 'r': 0.34660708124892187, 'f1': 0.3363939812489536}, 'combined': 0.2478692493413342, 'stategy': 1, 'epoch': 2} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3577720373081227, 'r': 0.30067045876570814, 'f1': 0.3267452609222639}, 'combined': 0.20249002085323395, 'stategy': 1, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3056536835748792, 'r': 0.3201533839342188, 'f1': 0.31273555761507565}, 'combined': 0.23043672666373993, 'stategy': 1, 'epoch': 2} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.35859833711494116, 'r': 0.2859708258005227, 'f1': 0.31819289067945483}, 'combined': 0.2110784522329057, 'stategy': 1, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.34285714285714286, 'r': 0.34285714285714286, 'f1': 0.34285714285714286}, 'combined': 0.22857142857142856, 'stategy': 1, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35344827586206895, 'r': 0.44565217391304346, 'f1': 0.3942307692307692}, 'combined': 0.1971153846153846, 'stategy': 1, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'stategy': 1, 'epoch': 2} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.355702906162465, 'r': 0.3206715593434344, 'f1': 0.3372800464807437}, 'combined': 0.24852213951212693, 'stategy': 1, 'epoch': 1} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3363021832161375, 'r': 0.27681190531869804, 'f1': 0.3036708822704133}, 'combined': 0.19014905712259525, 'stategy': 1, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3222139154704944, 'r': 0.348504614455753, 'f1': 0.3348439960222093}, 'combined': 0.24672715496373318, 'stategy': 1, 'epoch': 0} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3480621490304492, 'r': 0.30067045876570814, 'f1': 0.3226352576402229}, 'combined': 0.19994297656577192, 'stategy': 1, 'epoch': 0} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37037037037037035, 'r': 0.43478260869565216, 'f1': 0.39999999999999997}, 'combined': 0.19999999999999998, 'stategy': 1, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31311162079510707, 'r': 0.3238061353573688, 'f1': 0.318369092039801}, 'combined': 0.23458775202932705, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3595025465625748, 'r': 0.28506666666851543, 'f1': 0.31798661960199504}, 'combined': 0.2109416189438977, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 3 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 15:12:41.032942: step: 4/531, loss: 0.00825596135109663 2023-01-22 15:12:42.094551: step: 8/531, loss: 0.00016586575657129288 2023-01-22 15:12:43.138847: step: 12/531, loss: 0.0028286681044846773 2023-01-22 15:12:44.193305: step: 16/531, loss: 0.006053767167031765 2023-01-22 15:12:45.267033: step: 20/531, loss: 0.004816325847059488 2023-01-22 15:12:46.326931: step: 24/531, loss: 0.018235409632325172 2023-01-22 15:12:47.375485: step: 28/531, loss: 0.008318692445755005 2023-01-22 15:12:48.438614: step: 32/531, loss: 0.012458117678761482 2023-01-22 15:12:49.497429: step: 36/531, loss: 0.042028073221445084 2023-01-22 15:12:50.554292: step: 40/531, loss: 0.007052010390907526 2023-01-22 15:12:51.597370: step: 44/531, loss: 0.013949177227914333 2023-01-22 15:12:52.660962: step: 48/531, loss: 0.00538051500916481 2023-01-22 15:12:53.725992: step: 52/531, loss: 0.001497541437856853 2023-01-22 15:12:54.766045: step: 56/531, loss: 0.014978907071053982 2023-01-22 15:12:55.820640: step: 60/531, loss: 0.0023460162337869406 2023-01-22 15:12:56.867302: step: 64/531, loss: 0.04558098316192627 2023-01-22 15:12:57.936851: step: 68/531, loss: 0.007001228164881468 2023-01-22 15:12:58.991915: step: 72/531, loss: 0.028760366141796112 2023-01-22 15:13:00.043604: step: 76/531, loss: 0.005202973261475563 2023-01-22 15:13:01.095871: step: 80/531, loss: 0.019886594265699387 2023-01-22 15:13:02.159098: step: 84/531, loss: 0.0015385076403617859 2023-01-22 15:13:03.224785: step: 88/531, loss: 0.013544277288019657 2023-01-22 15:13:04.278300: step: 92/531, loss: 0.020996497943997383 2023-01-22 15:13:05.335969: step: 96/531, loss: 0.002939745783805847 2023-01-22 15:13:06.385950: step: 100/531, loss: 0.0149089265614748 2023-01-22 15:13:07.446425: step: 104/531, loss: 0.013207102194428444 2023-01-22 15:13:08.514983: step: 108/531, loss: 0.005114658270031214 2023-01-22 15:13:09.566187: step: 112/531, loss: 0.003013624344021082 2023-01-22 15:13:10.640568: step: 116/531, loss: 0.008817988447844982 2023-01-22 15:13:11.708618: step: 120/531, loss: 0.008939876221120358 2023-01-22 15:13:12.770969: step: 124/531, loss: 0.07155707478523254 2023-01-22 15:13:13.828120: step: 128/531, loss: 0.013058925047516823 2023-01-22 15:13:14.877151: step: 132/531, loss: 0.007229271810501814 2023-01-22 15:13:15.913199: step: 136/531, loss: 0.0019203925039619207 2023-01-22 15:13:16.970486: step: 140/531, loss: 0.005608052015304565 2023-01-22 15:13:18.029767: step: 144/531, loss: 0.016013264656066895 2023-01-22 15:13:19.082868: step: 148/531, loss: 0.0034265010617673397 2023-01-22 15:13:20.148318: step: 152/531, loss: 0.021052032709121704 2023-01-22 15:13:21.205609: step: 156/531, loss: 0.008227755315601826 2023-01-22 15:13:22.273095: step: 160/531, loss: 0.003688282798975706 2023-01-22 15:13:23.335377: step: 164/531, loss: 0.012636126950383186 2023-01-22 15:13:24.382477: step: 168/531, loss: 0.005776895675808191 2023-01-22 15:13:25.457244: step: 172/531, loss: 0.003211360424757004 2023-01-22 15:13:26.507025: step: 176/531, loss: 0.004356752149760723 2023-01-22 15:13:27.541611: step: 180/531, loss: 0.02256453037261963 2023-01-22 15:13:28.602620: step: 184/531, loss: 0.009278696961700916 2023-01-22 15:13:29.650681: step: 188/531, loss: 0.02773536741733551 2023-01-22 15:13:30.713288: step: 192/531, loss: 0.017448367550969124 2023-01-22 15:13:31.764347: step: 196/531, loss: 0.007279939018189907 2023-01-22 15:13:32.836538: step: 200/531, loss: 0.011650837026536465 2023-01-22 15:13:33.883559: step: 204/531, loss: 0.010631701909005642 2023-01-22 15:13:34.943174: step: 208/531, loss: 0.004068745765835047 2023-01-22 15:13:35.990602: step: 212/531, loss: 0.00975571945309639 2023-01-22 15:13:37.054707: step: 216/531, loss: 0.005036220885813236 2023-01-22 15:13:38.112119: step: 220/531, loss: 0.003934045787900686 2023-01-22 15:13:39.178078: step: 224/531, loss: 0.05864235386252403 2023-01-22 15:13:40.235529: step: 228/531, loss: 0.004538682289421558 2023-01-22 15:13:41.287538: step: 232/531, loss: 0.0034913637209683657 2023-01-22 15:13:42.345465: step: 236/531, loss: 0.019596682861447334 2023-01-22 15:13:43.399851: step: 240/531, loss: 0.0006120089092291892 2023-01-22 15:13:44.466257: step: 244/531, loss: 0.007744806818664074 2023-01-22 15:13:45.546313: step: 248/531, loss: 0.002766442485153675 2023-01-22 15:13:46.601790: step: 252/531, loss: 0.029749469831585884 2023-01-22 15:13:47.649783: step: 256/531, loss: 0.009306740947067738 2023-01-22 15:13:48.701442: step: 260/531, loss: 0.01456803735345602 2023-01-22 15:13:49.757768: step: 264/531, loss: 0.0026536439545452595 2023-01-22 15:13:50.807189: step: 268/531, loss: 0.008977441117167473 2023-01-22 15:13:51.879406: step: 272/531, loss: 0.03587059676647186 2023-01-22 15:13:52.947811: step: 276/531, loss: 0.0036901943385601044 2023-01-22 15:13:54.015369: step: 280/531, loss: 0.0026352743152529 2023-01-22 15:13:55.076996: step: 284/531, loss: 0.011832822114229202 2023-01-22 15:13:56.150906: step: 288/531, loss: 0.011179967783391476 2023-01-22 15:13:57.211217: step: 292/531, loss: 0.00022906227968633175 2023-01-22 15:13:58.265617: step: 296/531, loss: 0.003962282091379166 2023-01-22 15:13:59.329116: step: 300/531, loss: 0.006789751350879669 2023-01-22 15:14:00.388507: step: 304/531, loss: 0.006102881394326687 2023-01-22 15:14:01.452317: step: 308/531, loss: 0.008739740587770939 2023-01-22 15:14:02.520662: step: 312/531, loss: 0.00865511316806078 2023-01-22 15:14:03.593545: step: 316/531, loss: 0.010070760734379292 2023-01-22 15:14:04.651245: step: 320/531, loss: 0.004473298322409391 2023-01-22 15:14:05.697263: step: 324/531, loss: 0.009891677647829056 2023-01-22 15:14:06.760750: step: 328/531, loss: 0.004360807593911886 2023-01-22 15:14:07.814500: step: 332/531, loss: 0.00655454583466053 2023-01-22 15:14:08.872531: step: 336/531, loss: 0.0032483020331710577 2023-01-22 15:14:09.934201: step: 340/531, loss: 0.005746086128056049 2023-01-22 15:14:11.008981: step: 344/531, loss: 0.012817678041756153 2023-01-22 15:14:12.099805: step: 348/531, loss: 0.011524232104420662 2023-01-22 15:14:13.182761: step: 352/531, loss: 0.013928496278822422 2023-01-22 15:14:14.247100: step: 356/531, loss: 0.07018346339464188 2023-01-22 15:14:15.302223: step: 360/531, loss: 0.00577493105083704 2023-01-22 15:14:16.376197: step: 364/531, loss: 0.0031595155596733093 2023-01-22 15:14:17.454618: step: 368/531, loss: 0.012158900499343872 2023-01-22 15:14:18.543793: step: 372/531, loss: 0.005312105640769005 2023-01-22 15:14:19.631637: step: 376/531, loss: 0.02854202874004841 2023-01-22 15:14:20.694263: step: 380/531, loss: 0.009361553005874157 2023-01-22 15:14:21.768370: step: 384/531, loss: 0.004679825156927109 2023-01-22 15:14:22.828256: step: 388/531, loss: 0.004918469116091728 2023-01-22 15:14:23.895835: step: 392/531, loss: 0.01258725393563509 2023-01-22 15:14:24.961348: step: 396/531, loss: 0.0035387177485972643 2023-01-22 15:14:26.018113: step: 400/531, loss: 0.003993109799921513 2023-01-22 15:14:27.081863: step: 404/531, loss: 0.0033950379583984613 2023-01-22 15:14:28.136201: step: 408/531, loss: 0.00237878761254251 2023-01-22 15:14:29.198109: step: 412/531, loss: 0.0074381413869559765 2023-01-22 15:14:30.254446: step: 416/531, loss: 0.005622901953756809 2023-01-22 15:14:31.304285: step: 420/531, loss: 0.011522011831402779 2023-01-22 15:14:32.362588: step: 424/531, loss: 0.006392822600901127 2023-01-22 15:14:33.423943: step: 428/531, loss: 0.0032358590979129076 2023-01-22 15:14:34.490252: step: 432/531, loss: 0.007227728608995676 2023-01-22 15:14:35.548826: step: 436/531, loss: 0.013248465023934841 2023-01-22 15:14:36.602669: step: 440/531, loss: 0.03283291310071945 2023-01-22 15:14:37.653921: step: 444/531, loss: 0.0037193107418715954 2023-01-22 15:14:38.730614: step: 448/531, loss: 0.0038427719846367836 2023-01-22 15:14:39.799618: step: 452/531, loss: 0.013870516791939735 2023-01-22 15:14:40.866296: step: 456/531, loss: 0.0075324522331357 2023-01-22 15:14:41.943433: step: 460/531, loss: 0.014308858662843704 2023-01-22 15:14:43.010210: step: 464/531, loss: 0.009213636629283428 2023-01-22 15:14:44.071846: step: 468/531, loss: 0.010558145120739937 2023-01-22 15:14:45.131070: step: 472/531, loss: 0.0036829900927841663 2023-01-22 15:14:46.213027: step: 476/531, loss: 0.02445288933813572 2023-01-22 15:14:47.273182: step: 480/531, loss: 0.008990545757114887 2023-01-22 15:14:48.340878: step: 484/531, loss: 0.018219618126749992 2023-01-22 15:14:49.428553: step: 488/531, loss: 0.013771738857030869 2023-01-22 15:14:50.476099: step: 492/531, loss: 0.018816614523530006 2023-01-22 15:14:51.536694: step: 496/531, loss: 0.0018912581726908684 2023-01-22 15:14:52.612579: step: 500/531, loss: 0.011184596456587315 2023-01-22 15:14:53.674943: step: 504/531, loss: 0.004793441854417324 2023-01-22 15:14:54.732029: step: 508/531, loss: 0.0027120495215058327 2023-01-22 15:14:55.800441: step: 512/531, loss: 0.002141448436304927 2023-01-22 15:14:56.870350: step: 516/531, loss: 0.008437187410891056 2023-01-22 15:14:57.940551: step: 520/531, loss: 0.010813063010573387 2023-01-22 15:14:59.029119: step: 524/531, loss: 0.03009651042521 2023-01-22 15:15:00.111411: step: 528/531, loss: 0.0066567291505634785 2023-01-22 15:15:01.175938: step: 532/531, loss: 0.005642846692353487 2023-01-22 15:15:02.235550: step: 536/531, loss: 0.002437244402244687 2023-01-22 15:15:03.299377: step: 540/531, loss: 0.00498404074460268 2023-01-22 15:15:04.368948: step: 544/531, loss: 0.006974169984459877 2023-01-22 15:15:05.427929: step: 548/531, loss: 0.0032037836499512196 2023-01-22 15:15:06.472393: step: 552/531, loss: 0.0017853636527433991 2023-01-22 15:15:07.527977: step: 556/531, loss: 0.0012945530470460653 2023-01-22 15:15:08.586882: step: 560/531, loss: 0.040665559470653534 2023-01-22 15:15:09.667101: step: 564/531, loss: 0.0016492478316649795 2023-01-22 15:15:10.714457: step: 568/531, loss: 0.004183882847428322 2023-01-22 15:15:11.778045: step: 572/531, loss: 0.005279974080622196 2023-01-22 15:15:12.842424: step: 576/531, loss: 0.0058014593087136745 2023-01-22 15:15:13.920287: step: 580/531, loss: 0.004608124028891325 2023-01-22 15:15:14.995471: step: 584/531, loss: 0.006839042529463768 2023-01-22 15:15:16.062703: step: 588/531, loss: 0.00027707641129381955 2023-01-22 15:15:17.147244: step: 592/531, loss: 0.005235550459474325 2023-01-22 15:15:18.206888: step: 596/531, loss: 0.005289706401526928 2023-01-22 15:15:19.287116: step: 600/531, loss: 0.011757896281778812 2023-01-22 15:15:20.352023: step: 604/531, loss: 0.006067200098186731 2023-01-22 15:15:21.403322: step: 608/531, loss: 0.005957695655524731 2023-01-22 15:15:22.468335: step: 612/531, loss: 0.013614512048661709 2023-01-22 15:15:23.531956: step: 616/531, loss: 0.008196470327675343 2023-01-22 15:15:24.595780: step: 620/531, loss: 0.010286089964210987 2023-01-22 15:15:25.665908: step: 624/531, loss: 0.005689904093742371 2023-01-22 15:15:26.734242: step: 628/531, loss: 0.0031526912935078144 2023-01-22 15:15:27.804690: step: 632/531, loss: 0.0054020872339606285 2023-01-22 15:15:28.859871: step: 636/531, loss: 0.009811890311539173 2023-01-22 15:15:29.916249: step: 640/531, loss: 0.011979300528764725 2023-01-22 15:15:30.982649: step: 644/531, loss: 0.011244582012295723 2023-01-22 15:15:32.031123: step: 648/531, loss: 0.008071884512901306 2023-01-22 15:15:33.099997: step: 652/531, loss: 0.0025572855956852436 2023-01-22 15:15:34.153578: step: 656/531, loss: 0.0036156997084617615 2023-01-22 15:15:35.208923: step: 660/531, loss: 0.003823567647486925 2023-01-22 15:15:36.252239: step: 664/531, loss: 0.019515687599778175 2023-01-22 15:15:37.342605: step: 668/531, loss: 0.012334228493273258 2023-01-22 15:15:38.409013: step: 672/531, loss: 0.006226486060768366 2023-01-22 15:15:39.465851: step: 676/531, loss: 0.000510255282279104 2023-01-22 15:15:40.527208: step: 680/531, loss: 0.01388457790017128 2023-01-22 15:15:41.575990: step: 684/531, loss: 0.009974336251616478 2023-01-22 15:15:42.671343: step: 688/531, loss: 0.008342131972312927 2023-01-22 15:15:43.738598: step: 692/531, loss: 0.0007959392969496548 2023-01-22 15:15:44.804152: step: 696/531, loss: 0.0037402233574539423 2023-01-22 15:15:45.861837: step: 700/531, loss: 0.009263568557798862 2023-01-22 15:15:46.929061: step: 704/531, loss: 0.010787513107061386 2023-01-22 15:15:48.005466: step: 708/531, loss: 0.014701229520142078 2023-01-22 15:15:49.079681: step: 712/531, loss: 0.04599161818623543 2023-01-22 15:15:50.141916: step: 716/531, loss: 0.006098523736000061 2023-01-22 15:15:51.215630: step: 720/531, loss: 0.07113492488861084 2023-01-22 15:15:52.263312: step: 724/531, loss: 0.008178084157407284 2023-01-22 15:15:53.328647: step: 728/531, loss: 0.007214087061583996 2023-01-22 15:15:54.372137: step: 732/531, loss: 6.0081314586568624e-05 2023-01-22 15:15:55.448795: step: 736/531, loss: 0.021966680884361267 2023-01-22 15:15:56.507654: step: 740/531, loss: 0.006906636990606785 2023-01-22 15:15:57.572822: step: 744/531, loss: 0.005075166001915932 2023-01-22 15:15:58.626031: step: 748/531, loss: 0.003017052076756954 2023-01-22 15:15:59.684776: step: 752/531, loss: 0.0024422996211797 2023-01-22 15:16:00.747902: step: 756/531, loss: 0.004597730003297329 2023-01-22 15:16:01.810706: step: 760/531, loss: 0.026027733460068703 2023-01-22 15:16:02.904096: step: 764/531, loss: 0.0026450571604073048 2023-01-22 15:16:03.982967: step: 768/531, loss: 0.0023178867995738983 2023-01-22 15:16:05.063699: step: 772/531, loss: 0.009700399823486805 2023-01-22 15:16:06.112886: step: 776/531, loss: 0.01435423269867897 2023-01-22 15:16:07.188865: step: 780/531, loss: 0.006480752024799585 2023-01-22 15:16:08.247873: step: 784/531, loss: 0.031863827258348465 2023-01-22 15:16:09.306462: step: 788/531, loss: 0.01961096003651619 2023-01-22 15:16:10.367101: step: 792/531, loss: 0.003960778936743736 2023-01-22 15:16:11.428546: step: 796/531, loss: 0.006863596383482218 2023-01-22 15:16:12.501114: step: 800/531, loss: 0.03473423048853874 2023-01-22 15:16:13.570842: step: 804/531, loss: 0.005358964204788208 2023-01-22 15:16:14.647366: step: 808/531, loss: 0.04858779534697533 2023-01-22 15:16:15.715943: step: 812/531, loss: 0.02484353445470333 2023-01-22 15:16:16.775420: step: 816/531, loss: 0.0008629474905319512 2023-01-22 15:16:17.839943: step: 820/531, loss: 0.005493585020303726 2023-01-22 15:16:18.908528: step: 824/531, loss: 0.005706422496587038 2023-01-22 15:16:19.966749: step: 828/531, loss: 0.003967179451137781 2023-01-22 15:16:21.023283: step: 832/531, loss: 0.0035745592322200537 2023-01-22 15:16:22.084959: step: 836/531, loss: 0.03399597853422165 2023-01-22 15:16:23.137615: step: 840/531, loss: 0.0048324232921004295 2023-01-22 15:16:24.200026: step: 844/531, loss: 0.015490568242967129 2023-01-22 15:16:25.253706: step: 848/531, loss: 0.009137268178164959 2023-01-22 15:16:26.331307: step: 852/531, loss: 0.008090930059552193 2023-01-22 15:16:27.391845: step: 856/531, loss: 0.03114093653857708 2023-01-22 15:16:28.448845: step: 860/531, loss: 0.004642469808459282 2023-01-22 15:16:29.543917: step: 864/531, loss: 0.06422934681177139 2023-01-22 15:16:30.589712: step: 868/531, loss: 0.021638648584485054 2023-01-22 15:16:31.657476: step: 872/531, loss: 0.03170209378004074 2023-01-22 15:16:32.728347: step: 876/531, loss: 0.006259063258767128 2023-01-22 15:16:33.802785: step: 880/531, loss: 0.02374625764787197 2023-01-22 15:16:34.873857: step: 884/531, loss: 0.08532796055078506 2023-01-22 15:16:35.927149: step: 888/531, loss: 0.00865882821381092 2023-01-22 15:16:36.996881: step: 892/531, loss: 0.010816564783453941 2023-01-22 15:16:38.070613: step: 896/531, loss: 0.007841667160391808 2023-01-22 15:16:39.132327: step: 900/531, loss: 0.049881499260663986 2023-01-22 15:16:40.195586: step: 904/531, loss: 0.0368400476872921 2023-01-22 15:16:41.263599: step: 908/531, loss: 0.0008017414365895092 2023-01-22 15:16:42.346273: step: 912/531, loss: 0.00277475593611598 2023-01-22 15:16:43.418270: step: 916/531, loss: 0.018239369615912437 2023-01-22 15:16:44.482257: step: 920/531, loss: 0.023079991340637207 2023-01-22 15:16:45.532150: step: 924/531, loss: 0.004089293535798788 2023-01-22 15:16:46.597459: step: 928/531, loss: 0.007062810938805342 2023-01-22 15:16:47.645545: step: 932/531, loss: 0.0007188616436906159 2023-01-22 15:16:48.712117: step: 936/531, loss: 0.007054132409393787 2023-01-22 15:16:49.776670: step: 940/531, loss: 0.0025027741212397814 2023-01-22 15:16:50.859360: step: 944/531, loss: 0.008826656267046928 2023-01-22 15:16:51.950929: step: 948/531, loss: 0.04039265215396881 2023-01-22 15:16:53.014262: step: 952/531, loss: 0.01714899018406868 2023-01-22 15:16:54.075036: step: 956/531, loss: 0.011195934377610683 2023-01-22 15:16:55.159765: step: 960/531, loss: 0.026175515726208687 2023-01-22 15:16:56.222989: step: 964/531, loss: 0.005881982855498791 2023-01-22 15:16:57.270657: step: 968/531, loss: 0.0 2023-01-22 15:16:58.329047: step: 972/531, loss: 0.009446099400520325 2023-01-22 15:16:59.379598: step: 976/531, loss: 0.014840062707662582 2023-01-22 15:17:00.453894: step: 980/531, loss: 0.013021034188568592 2023-01-22 15:17:01.521797: step: 984/531, loss: 0.0033347129356116056 2023-01-22 15:17:02.587074: step: 988/531, loss: 0.043203387409448624 2023-01-22 15:17:03.656706: step: 992/531, loss: 0.008585507981479168 2023-01-22 15:17:04.722164: step: 996/531, loss: 0.005980364978313446 2023-01-22 15:17:05.772881: step: 1000/531, loss: 0.011566465720534325 2023-01-22 15:17:06.844309: step: 1004/531, loss: 0.015538278967142105 2023-01-22 15:17:07.905846: step: 1008/531, loss: 0.012091074138879776 2023-01-22 15:17:08.962055: step: 1012/531, loss: 0.0037062913179397583 2023-01-22 15:17:10.022914: step: 1016/531, loss: 0.02916717156767845 2023-01-22 15:17:11.084839: step: 1020/531, loss: 0.011735990643501282 2023-01-22 15:17:12.139867: step: 1024/531, loss: 0.010441160760819912 2023-01-22 15:17:13.205981: step: 1028/531, loss: 0.005095531232655048 2023-01-22 15:17:14.264526: step: 1032/531, loss: 0.00971211213618517 2023-01-22 15:17:15.336966: step: 1036/531, loss: 0.061905305832624435 2023-01-22 15:17:16.388699: step: 1040/531, loss: 0.0012857085093855858 2023-01-22 15:17:17.443776: step: 1044/531, loss: 0.0010416122386232018 2023-01-22 15:17:18.513322: step: 1048/531, loss: 0.002953298855572939 2023-01-22 15:17:19.582673: step: 1052/531, loss: 0.00984971597790718 2023-01-22 15:17:20.659473: step: 1056/531, loss: 0.021457791328430176 2023-01-22 15:17:21.733585: step: 1060/531, loss: 0.005005578976124525 2023-01-22 15:17:22.804421: step: 1064/531, loss: 0.011042384430766106 2023-01-22 15:17:23.869875: step: 1068/531, loss: 0.025549301877617836 2023-01-22 15:17:24.934785: step: 1072/531, loss: 0.005598848219960928 2023-01-22 15:17:26.005907: step: 1076/531, loss: 0.0011638473952189088 2023-01-22 15:17:27.078577: step: 1080/531, loss: 0.003443461610004306 2023-01-22 15:17:28.159039: step: 1084/531, loss: 0.028383811935782433 2023-01-22 15:17:29.242909: step: 1088/531, loss: 0.01226129475980997 2023-01-22 15:17:30.289596: step: 1092/531, loss: 0.003556832205504179 2023-01-22 15:17:31.345878: step: 1096/531, loss: 0.004501263611018658 2023-01-22 15:17:32.407744: step: 1100/531, loss: 0.020327776670455933 2023-01-22 15:17:33.472534: step: 1104/531, loss: 0.09774454683065414 2023-01-22 15:17:34.534608: step: 1108/531, loss: 0.03078611008822918 2023-01-22 15:17:35.576530: step: 1112/531, loss: 0.010324189439415932 2023-01-22 15:17:36.654670: step: 1116/531, loss: 0.022571392357349396 2023-01-22 15:17:37.733930: step: 1120/531, loss: 0.005407420452684164 2023-01-22 15:17:38.782398: step: 1124/531, loss: 0.013136330991983414 2023-01-22 15:17:39.848483: step: 1128/531, loss: 0.012684522196650505 2023-01-22 15:17:40.917923: step: 1132/531, loss: 0.0024550834205001593 2023-01-22 15:17:41.989762: step: 1136/531, loss: 0.010017668828368187 2023-01-22 15:17:43.064964: step: 1140/531, loss: 0.011186819523572922 2023-01-22 15:17:44.131346: step: 1144/531, loss: 0.007019475102424622 2023-01-22 15:17:45.211935: step: 1148/531, loss: 0.013907302170991898 2023-01-22 15:17:46.275859: step: 1152/531, loss: 0.00404173880815506 2023-01-22 15:17:47.356459: step: 1156/531, loss: 0.006247961428016424 2023-01-22 15:17:48.416735: step: 1160/531, loss: 0.003992138896137476 2023-01-22 15:17:49.482125: step: 1164/531, loss: 0.008561491966247559 2023-01-22 15:17:50.540336: step: 1168/531, loss: 0.006891845725476742 2023-01-22 15:17:51.584959: step: 1172/531, loss: 0.020960526540875435 2023-01-22 15:17:52.649700: step: 1176/531, loss: 0.008791033178567886 2023-01-22 15:17:53.706996: step: 1180/531, loss: 0.0019249562174081802 2023-01-22 15:17:54.764242: step: 1184/531, loss: 0.003304139245301485 2023-01-22 15:17:55.819156: step: 1188/531, loss: 0.005203144624829292 2023-01-22 15:17:56.883455: step: 1192/531, loss: 0.0030283795204013586 2023-01-22 15:17:57.937013: step: 1196/531, loss: 0.059227969497442245 2023-01-22 15:17:59.005213: step: 1200/531, loss: 0.011674697510898113 2023-01-22 15:18:00.076384: step: 1204/531, loss: 0.007917428389191628 2023-01-22 15:18:01.160790: step: 1208/531, loss: 0.010782918892800808 2023-01-22 15:18:02.213905: step: 1212/531, loss: 0.014027918688952923 2023-01-22 15:18:03.276686: step: 1216/531, loss: 0.011139859445393085 2023-01-22 15:18:04.337963: step: 1220/531, loss: 0.006109717767685652 2023-01-22 15:18:05.397659: step: 1224/531, loss: 0.011085920967161655 2023-01-22 15:18:06.472565: step: 1228/531, loss: 0.11575441807508469 2023-01-22 15:18:07.528883: step: 1232/531, loss: 0.028393127024173737 2023-01-22 15:18:08.597230: step: 1236/531, loss: 0.005467280279844999 2023-01-22 15:18:09.653936: step: 1240/531, loss: 0.006713276728987694 2023-01-22 15:18:10.731688: step: 1244/531, loss: 0.004648793023079634 2023-01-22 15:18:11.788592: step: 1248/531, loss: 0.00685318885371089 2023-01-22 15:18:12.857705: step: 1252/531, loss: 0.03564877063035965 2023-01-22 15:18:13.924428: step: 1256/531, loss: 0.041564859449863434 2023-01-22 15:18:14.992124: step: 1260/531, loss: 0.013147922232747078 2023-01-22 15:18:16.049647: step: 1264/531, loss: 0.0014401249354705215 2023-01-22 15:18:17.096626: step: 1268/531, loss: 0.0027441091369837523 2023-01-22 15:18:18.157943: step: 1272/531, loss: 0.011722305789589882 2023-01-22 15:18:19.226737: step: 1276/531, loss: 0.08332924544811249 2023-01-22 15:18:20.282990: step: 1280/531, loss: 0.002597358077764511 2023-01-22 15:18:21.343419: step: 1284/531, loss: 0.01555523369461298 2023-01-22 15:18:22.413094: step: 1288/531, loss: 0.13061170279979706 2023-01-22 15:18:23.491535: step: 1292/531, loss: 0.005295668262988329 2023-01-22 15:18:24.550383: step: 1296/531, loss: 0.017770804464817047 2023-01-22 15:18:25.605952: step: 1300/531, loss: 0.007504474371671677 2023-01-22 15:18:26.646366: step: 1304/531, loss: 0.02351203002035618 2023-01-22 15:18:27.710338: step: 1308/531, loss: 0.010521844029426575 2023-01-22 15:18:28.773087: step: 1312/531, loss: 0.027280917391180992 2023-01-22 15:18:29.849838: step: 1316/531, loss: 0.006498619448393583 2023-01-22 15:18:30.900950: step: 1320/531, loss: 0.003986257128417492 2023-01-22 15:18:31.991400: step: 1324/531, loss: 0.0130449328571558 2023-01-22 15:18:33.049993: step: 1328/531, loss: 0.011105556041002274 2023-01-22 15:18:34.109818: step: 1332/531, loss: 0.0 2023-01-22 15:18:35.188314: step: 1336/531, loss: 0.008886473253369331 2023-01-22 15:18:36.238674: step: 1340/531, loss: 0.01760837994515896 2023-01-22 15:18:37.305670: step: 1344/531, loss: 0.005355107598006725 2023-01-22 15:18:38.367537: step: 1348/531, loss: 0.02342238463461399 2023-01-22 15:18:39.424052: step: 1352/531, loss: 0.015532005578279495 2023-01-22 15:18:40.502372: step: 1356/531, loss: 0.004329567309468985 2023-01-22 15:18:41.564174: step: 1360/531, loss: 0.0007388498052023351 2023-01-22 15:18:42.623395: step: 1364/531, loss: 0.001713779172860086 2023-01-22 15:18:43.683973: step: 1368/531, loss: 0.014009689912199974 2023-01-22 15:18:44.752749: step: 1372/531, loss: 0.06963585317134857 2023-01-22 15:18:45.812210: step: 1376/531, loss: 0.011355608701705933 2023-01-22 15:18:46.877619: step: 1380/531, loss: 0.05988581106066704 2023-01-22 15:18:47.936534: step: 1384/531, loss: 0.0018392998026683927 2023-01-22 15:18:49.001589: step: 1388/531, loss: 0.0037168115377426147 2023-01-22 15:18:50.085626: step: 1392/531, loss: 0.0038279080763459206 2023-01-22 15:18:51.141703: step: 1396/531, loss: 0.009530803188681602 2023-01-22 15:18:52.195561: step: 1400/531, loss: 0.012957570143043995 2023-01-22 15:18:53.281032: step: 1404/531, loss: 0.0163667444139719 2023-01-22 15:18:54.350131: step: 1408/531, loss: 0.006851600483059883 2023-01-22 15:18:55.419482: step: 1412/531, loss: 0.005970964208245277 2023-01-22 15:18:56.482824: step: 1416/531, loss: 0.0053473771549761295 2023-01-22 15:18:57.535433: step: 1420/531, loss: 0.0025720957200974226 2023-01-22 15:18:58.591993: step: 1424/531, loss: 0.004987241700291634 2023-01-22 15:18:59.648453: step: 1428/531, loss: 0.024758026003837585 2023-01-22 15:19:00.712489: step: 1432/531, loss: 0.0020685719791799784 2023-01-22 15:19:01.775258: step: 1436/531, loss: 0.008989634923636913 2023-01-22 15:19:02.832389: step: 1440/531, loss: 0.006896218750625849 2023-01-22 15:19:03.922977: step: 1444/531, loss: 0.02935216762125492 2023-01-22 15:19:04.985127: step: 1448/531, loss: 0.011283285915851593 2023-01-22 15:19:06.042369: step: 1452/531, loss: 0.02889883704483509 2023-01-22 15:19:07.122745: step: 1456/531, loss: 0.005969729274511337 2023-01-22 15:19:08.183712: step: 1460/531, loss: 0.002971237525343895 2023-01-22 15:19:09.249092: step: 1464/531, loss: 0.025799859315156937 2023-01-22 15:19:10.301004: step: 1468/531, loss: 0.03327492997050285 2023-01-22 15:19:11.377868: step: 1472/531, loss: 0.061219293624162674 2023-01-22 15:19:12.452663: step: 1476/531, loss: 0.00037485663779079914 2023-01-22 15:19:13.521095: step: 1480/531, loss: 0.004634445998817682 2023-01-22 15:19:14.579870: step: 1484/531, loss: 0.04018624126911163 2023-01-22 15:19:15.645419: step: 1488/531, loss: 0.027372492477297783 2023-01-22 15:19:16.708501: step: 1492/531, loss: 0.013875926844775677 2023-01-22 15:19:17.785219: step: 1496/531, loss: 0.00806163065135479 2023-01-22 15:19:18.872270: step: 1500/531, loss: 0.02455878257751465 2023-01-22 15:19:19.939580: step: 1504/531, loss: 0.010958276689052582 2023-01-22 15:19:21.004684: step: 1508/531, loss: 0.014744852669537067 2023-01-22 15:19:22.063312: step: 1512/531, loss: 0.013093473389744759 2023-01-22 15:19:23.116022: step: 1516/531, loss: 0.00933352392166853 2023-01-22 15:19:24.182596: step: 1520/531, loss: 0.01852133683860302 2023-01-22 15:19:25.247314: step: 1524/531, loss: 0.015952784568071365 2023-01-22 15:19:26.323482: step: 1528/531, loss: 0.007352899760007858 2023-01-22 15:19:27.387745: step: 1532/531, loss: 0.02387125976383686 2023-01-22 15:19:28.445580: step: 1536/531, loss: 0.01753857173025608 2023-01-22 15:19:29.512238: step: 1540/531, loss: 0.010952083393931389 2023-01-22 15:19:30.574315: step: 1544/531, loss: 0.04563755542039871 2023-01-22 15:19:31.635367: step: 1548/531, loss: 0.002622702158987522 2023-01-22 15:19:32.727893: step: 1552/531, loss: 0.010987264104187489 2023-01-22 15:19:33.815223: step: 1556/531, loss: 0.02206575870513916 2023-01-22 15:19:34.875259: step: 1560/531, loss: 0.02936229109764099 2023-01-22 15:19:35.949313: step: 1564/531, loss: 0.005610931199043989 2023-01-22 15:19:37.014202: step: 1568/531, loss: 0.004258530680090189 2023-01-22 15:19:38.090871: step: 1572/531, loss: 0.007061867509037256 2023-01-22 15:19:39.176520: step: 1576/531, loss: 0.0035791262052953243 2023-01-22 15:19:40.246347: step: 1580/531, loss: 0.009710179641842842 2023-01-22 15:19:41.289207: step: 1584/531, loss: 0.001007150742225349 2023-01-22 15:19:42.384177: step: 1588/531, loss: 0.010055597871541977 2023-01-22 15:19:43.450505: step: 1592/531, loss: 0.003203935921192169 2023-01-22 15:19:44.522236: step: 1596/531, loss: 0.033856719732284546 2023-01-22 15:19:45.580153: step: 1600/531, loss: 0.026200085878372192 2023-01-22 15:19:46.646906: step: 1604/531, loss: 0.008753352798521519 2023-01-22 15:19:47.732047: step: 1608/531, loss: 0.0017513601342216134 2023-01-22 15:19:48.775404: step: 1612/531, loss: 0.006941612344235182 2023-01-22 15:19:49.846927: step: 1616/531, loss: 0.01039678230881691 2023-01-22 15:19:50.889573: step: 1620/531, loss: 0.008160735480487347 2023-01-22 15:19:51.954786: step: 1624/531, loss: 0.0023922729305922985 2023-01-22 15:19:53.011303: step: 1628/531, loss: 0.001966872252523899 2023-01-22 15:19:54.069187: step: 1632/531, loss: 0.010224957019090652 2023-01-22 15:19:55.130049: step: 1636/531, loss: 0.002609137911349535 2023-01-22 15:19:56.187184: step: 1640/531, loss: 0.03572692349553108 2023-01-22 15:19:57.272471: step: 1644/531, loss: 0.006409843917936087 2023-01-22 15:19:58.334006: step: 1648/531, loss: 0.0025181996170431376 2023-01-22 15:19:59.408271: step: 1652/531, loss: 0.03474220633506775 2023-01-22 15:20:00.477526: step: 1656/531, loss: 0.00776472594588995 2023-01-22 15:20:01.551308: step: 1660/531, loss: 0.006455665919929743 2023-01-22 15:20:02.626657: step: 1664/531, loss: 0.03143719583749771 2023-01-22 15:20:03.691710: step: 1668/531, loss: 0.011104064993560314 2023-01-22 15:20:04.754181: step: 1672/531, loss: 0.02775968797504902 2023-01-22 15:20:05.813450: step: 1676/531, loss: 0.008741592988371849 2023-01-22 15:20:06.882956: step: 1680/531, loss: 0.01810801774263382 2023-01-22 15:20:07.949314: step: 1684/531, loss: 0.004843638278543949 2023-01-22 15:20:09.012682: step: 1688/531, loss: 0.004471385385841131 2023-01-22 15:20:10.076184: step: 1692/531, loss: 0.0061780172400176525 2023-01-22 15:20:11.150491: step: 1696/531, loss: 0.010288001969456673 2023-01-22 15:20:12.238589: step: 1700/531, loss: 0.005826051812618971 2023-01-22 15:20:13.323992: step: 1704/531, loss: 0.0075319078750908375 2023-01-22 15:20:14.391748: step: 1708/531, loss: 0.0026481272652745247 2023-01-22 15:20:15.453652: step: 1712/531, loss: 0.04316351190209389 2023-01-22 15:20:16.512023: step: 1716/531, loss: 0.002765804994851351 2023-01-22 15:20:17.565022: step: 1720/531, loss: 0.0032780934125185013 2023-01-22 15:20:18.628243: step: 1724/531, loss: 0.014150029979646206 2023-01-22 15:20:19.696242: step: 1728/531, loss: 0.06964344531297684 2023-01-22 15:20:20.755670: step: 1732/531, loss: 0.00605577789247036 2023-01-22 15:20:21.816783: step: 1736/531, loss: 0.0044052437879145145 2023-01-22 15:20:22.892765: step: 1740/531, loss: 0.07521738111972809 2023-01-22 15:20:23.967235: step: 1744/531, loss: 0.012526576407253742 2023-01-22 15:20:25.039335: step: 1748/531, loss: 0.0169774629175663 2023-01-22 15:20:26.098593: step: 1752/531, loss: 0.014141344465315342 2023-01-22 15:20:27.184119: step: 1756/531, loss: 0.008866876363754272 2023-01-22 15:20:28.245510: step: 1760/531, loss: 0.013693507760763168 2023-01-22 15:20:29.333795: step: 1764/531, loss: 0.025218477472662926 2023-01-22 15:20:30.403993: step: 1768/531, loss: 0.009164059534668922 2023-01-22 15:20:31.474660: step: 1772/531, loss: 0.032739702612161636 2023-01-22 15:20:32.542960: step: 1776/531, loss: 0.01979782059788704 2023-01-22 15:20:33.609006: step: 1780/531, loss: 0.005625186022371054 2023-01-22 15:20:34.683566: step: 1784/531, loss: 0.006696365773677826 2023-01-22 15:20:35.760042: step: 1788/531, loss: 0.004048663657158613 2023-01-22 15:20:36.838931: step: 1792/531, loss: 0.02188773639500141 2023-01-22 15:20:37.905370: step: 1796/531, loss: 0.0060102916322648525 2023-01-22 15:20:38.984175: step: 1800/531, loss: 0.030482010915875435 2023-01-22 15:20:40.056232: step: 1804/531, loss: 0.007428075652569532 2023-01-22 15:20:41.114780: step: 1808/531, loss: 0.005936458706855774 2023-01-22 15:20:42.189344: step: 1812/531, loss: 0.01320312637835741 2023-01-22 15:20:43.253600: step: 1816/531, loss: 0.0038099256344139576 2023-01-22 15:20:44.314783: step: 1820/531, loss: 0.002713568275794387 2023-01-22 15:20:45.396906: step: 1824/531, loss: 0.04737890884280205 2023-01-22 15:20:46.466058: step: 1828/531, loss: 0.0026436031330376863 2023-01-22 15:20:47.561232: step: 1832/531, loss: 0.012270888313651085 2023-01-22 15:20:48.647794: step: 1836/531, loss: 0.009706312790513039 2023-01-22 15:20:49.724347: step: 1840/531, loss: 0.0022018705494701862 2023-01-22 15:20:50.801708: step: 1844/531, loss: 0.0074734459631145 2023-01-22 15:20:51.870323: step: 1848/531, loss: 0.009813284501433372 2023-01-22 15:20:52.929949: step: 1852/531, loss: 0.05924934148788452 2023-01-22 15:20:54.001825: step: 1856/531, loss: 0.007057646289467812 2023-01-22 15:20:55.073263: step: 1860/531, loss: 0.005799010396003723 2023-01-22 15:20:56.139839: step: 1864/531, loss: 0.0604633167386055 2023-01-22 15:20:57.210083: step: 1868/531, loss: 0.029298024252057076 2023-01-22 15:20:58.279669: step: 1872/531, loss: 0.020175062119960785 2023-01-22 15:20:59.362708: step: 1876/531, loss: 0.03403453156352043 2023-01-22 15:21:00.436890: step: 1880/531, loss: 0.007818075828254223 2023-01-22 15:21:01.508375: step: 1884/531, loss: 0.003131083445623517 2023-01-22 15:21:02.581241: step: 1888/531, loss: 0.02331097237765789 2023-01-22 15:21:03.646362: step: 1892/531, loss: 0.0286044180393219 2023-01-22 15:21:04.694324: step: 1896/531, loss: 0.005853778682649136 2023-01-22 15:21:05.766651: step: 1900/531, loss: 0.0035308748483657837 2023-01-22 15:21:06.852719: step: 1904/531, loss: 0.064869225025177 2023-01-22 15:21:07.916200: step: 1908/531, loss: 0.0072122192941606045 2023-01-22 15:21:08.973680: step: 1912/531, loss: 0.004317569546401501 2023-01-22 15:21:10.030199: step: 1916/531, loss: 0.005695443134754896 2023-01-22 15:21:11.090769: step: 1920/531, loss: 0.010903386399149895 2023-01-22 15:21:12.162357: step: 1924/531, loss: 0.007791449781507254 2023-01-22 15:21:13.227832: step: 1928/531, loss: 0.011957583948969841 2023-01-22 15:21:14.282329: step: 1932/531, loss: 0.008363187313079834 2023-01-22 15:21:15.361227: step: 1936/531, loss: 0.01102373655885458 2023-01-22 15:21:16.419172: step: 1940/531, loss: 0.00917668454349041 2023-01-22 15:21:17.470227: step: 1944/531, loss: 0.0030903760343790054 2023-01-22 15:21:18.534943: step: 1948/531, loss: 0.00785362534224987 2023-01-22 15:21:19.618955: step: 1952/531, loss: 0.006935576908290386 2023-01-22 15:21:20.691220: step: 1956/531, loss: 0.004695732146501541 2023-01-22 15:21:21.747238: step: 1960/531, loss: 0.004890380892902613 2023-01-22 15:21:22.820149: step: 1964/531, loss: 0.008526074700057507 2023-01-22 15:21:23.894976: step: 1968/531, loss: 0.002123031532391906 2023-01-22 15:21:24.960106: step: 1972/531, loss: 0.005248913075774908 2023-01-22 15:21:26.012372: step: 1976/531, loss: 0.008927969262003899 2023-01-22 15:21:27.061383: step: 1980/531, loss: 0.0113055519759655 2023-01-22 15:21:28.122072: step: 1984/531, loss: 0.0013906665844842792 2023-01-22 15:21:29.179899: step: 1988/531, loss: 0.00942879356443882 2023-01-22 15:21:30.239122: step: 1992/531, loss: 0.005479065235704184 2023-01-22 15:21:31.298672: step: 1996/531, loss: 0.007529011927545071 2023-01-22 15:21:32.361342: step: 2000/531, loss: 0.00459635816514492 2023-01-22 15:21:33.415996: step: 2004/531, loss: 0.03991734981536865 2023-01-22 15:21:34.479700: step: 2008/531, loss: 0.004686987493187189 2023-01-22 15:21:35.529820: step: 2012/531, loss: 0.013272380456328392 2023-01-22 15:21:36.582281: step: 2016/531, loss: 0.020952170714735985 2023-01-22 15:21:37.654359: step: 2020/531, loss: 0.013088252395391464 2023-01-22 15:21:38.715130: step: 2024/531, loss: 0.013740804977715015 2023-01-22 15:21:39.767522: step: 2028/531, loss: 0.003718385472893715 2023-01-22 15:21:40.808221: step: 2032/531, loss: 0.011931989341974258 2023-01-22 15:21:41.890863: step: 2036/531, loss: 0.02632828615605831 2023-01-22 15:21:42.936659: step: 2040/531, loss: 0.010957547463476658 2023-01-22 15:21:43.982941: step: 2044/531, loss: 0.013814541511237621 2023-01-22 15:21:45.041322: step: 2048/531, loss: 0.0016523165395483375 2023-01-22 15:21:46.096336: step: 2052/531, loss: 0.011547659523785114 2023-01-22 15:21:47.155383: step: 2056/531, loss: 0.007431823294609785 2023-01-22 15:21:48.203755: step: 2060/531, loss: 0.0039128796197474 2023-01-22 15:21:49.288575: step: 2064/531, loss: 0.00915682502090931 2023-01-22 15:21:50.365089: step: 2068/531, loss: 0.0029002767987549305 2023-01-22 15:21:51.419835: step: 2072/531, loss: 0.0037574335001409054 2023-01-22 15:21:52.452683: step: 2076/531, loss: 0.006329825613647699 2023-01-22 15:21:53.517561: step: 2080/531, loss: 0.03108215145766735 2023-01-22 15:21:54.568725: step: 2084/531, loss: 0.0042252857238054276 2023-01-22 15:21:55.618424: step: 2088/531, loss: 0.00210560648702085 2023-01-22 15:21:56.675302: step: 2092/531, loss: 0.009277300909161568 2023-01-22 15:21:57.733361: step: 2096/531, loss: 0.02244584821164608 2023-01-22 15:21:58.803179: step: 2100/531, loss: 0.018372735008597374 2023-01-22 15:21:59.854327: step: 2104/531, loss: 0.008814483880996704 2023-01-22 15:22:00.925668: step: 2108/531, loss: 0.03223353624343872 2023-01-22 15:22:01.982469: step: 2112/531, loss: 0.009882899932563305 2023-01-22 15:22:03.042633: step: 2116/531, loss: 0.013947169296443462 2023-01-22 15:22:04.103526: step: 2120/531, loss: 0.003022989258170128 2023-01-22 15:22:05.177809: step: 2124/531, loss: 0.013685203157365322 ================================================== Loss: 0.014 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3479726239669422, 'r': 0.31897490530303035, 'f1': 0.3328433794466404}, 'combined': 0.2452530164343666, 'stategy': 1, 'epoch': 3} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3297350722329152, 'r': 0.27229926993963666, 'f1': 0.2982774009818265}, 'combined': 0.18677183052133062, 'stategy': 1, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279677353896104, 'r': 0.348504614455753, 'f1': 0.3379244375679519}, 'combined': 0.2489969539974382, 'stategy': 1, 'epoch': 3} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3596686905634274, 'r': 0.2999941738243195, 'f1': 0.32713228984382525}, 'combined': 0.20272986976237056, 'stategy': 1, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3086900425015179, 'r': 0.3215765338393422, 'f1': 0.31500154894671617}, 'combined': 0.232106404487054, 'stategy': 1, 'epoch': 3} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.35705972065161734, 'r': 0.28480915387116357, 'f1': 0.3168680737038975}, 'combined': 0.21019961324912015, 'stategy': 1, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.34285714285714286, 'r': 0.34285714285714286, 'f1': 0.34285714285714286}, 'combined': 0.22857142857142856, 'stategy': 1, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35344827586206895, 'r': 0.44565217391304346, 'f1': 0.3942307692307692}, 'combined': 0.1971153846153846, 'stategy': 1, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.27586206896551724, 'f1': 0.3404255319148936}, 'combined': 0.22695035460992907, 'stategy': 1, 'epoch': 3} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.355702906162465, 'r': 0.3206715593434344, 'f1': 0.3372800464807437}, 'combined': 0.24852213951212693, 'stategy': 1, 'epoch': 1} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3363021832161375, 'r': 0.27681190531869804, 'f1': 0.3036708822704133}, 'combined': 0.19014905712259525, 'stategy': 1, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3222139154704944, 'r': 0.348504614455753, 'f1': 0.3348439960222093}, 'combined': 0.24672715496373318, 'stategy': 1, 'epoch': 0} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3480621490304492, 'r': 0.30067045876570814, 'f1': 0.3226352576402229}, 'combined': 0.19994297656577192, 'stategy': 1, 'epoch': 0} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37037037037037035, 'r': 0.43478260869565216, 'f1': 0.39999999999999997}, 'combined': 0.19999999999999998, 'stategy': 1, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31311162079510707, 'r': 0.3238061353573688, 'f1': 0.318369092039801}, 'combined': 0.23458775202932705, 'stategy': 1, 'epoch': 1} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3595025465625748, 'r': 0.28506666666851543, 'f1': 0.31798661960199504}, 'combined': 0.2109416189438977, 'stategy': 1, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'stategy': 1, 'epoch': 1} ****************************** Epoch: 4 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 15:24:35.226741: step: 4/531, loss: 0.014120172709226608 2023-01-22 15:24:36.296129: step: 8/531, loss: 0.006682697683572769 2023-01-22 15:24:37.357801: step: 12/531, loss: 0.03515120968222618 2023-01-22 15:24:38.405160: step: 16/531, loss: 0.010853380896151066 2023-01-22 15:24:39.460965: step: 20/531, loss: 0.006817764136940241 2023-01-22 15:24:40.519021: step: 24/531, loss: 0.0014552063075825572 2023-01-22 15:24:41.582137: step: 28/531, loss: 0.004277610685676336 2023-01-22 15:24:42.636010: step: 32/531, loss: 0.01216868031769991 2023-01-22 15:24:43.671552: step: 36/531, loss: 0.019867248833179474 2023-01-22 15:24:44.723477: step: 40/531, loss: 0.029024865478277206 2023-01-22 15:24:45.785358: step: 44/531, loss: 0.006300168111920357 2023-01-22 15:24:46.832768: step: 48/531, loss: 0.02416304126381874 2023-01-22 15:24:47.870341: step: 52/531, loss: 0.0017094590002670884 2023-01-22 15:24:48.914346: step: 56/531, loss: 0.0013462232891470194 2023-01-22 15:24:49.972697: step: 60/531, loss: 0.0010470687411725521 2023-01-22 15:24:51.025709: step: 64/531, loss: 0.0035332604311406612 2023-01-22 15:24:52.084409: step: 68/531, loss: 0.021271180361509323 2023-01-22 15:24:53.144783: step: 72/531, loss: 1.2698243153863586e-05 2023-01-22 15:24:54.232577: step: 76/531, loss: 0.0031765408348292112 2023-01-22 15:24:55.284083: step: 80/531, loss: 0.006587965413928032 2023-01-22 15:24:56.340580: step: 84/531, loss: 0.02233354188501835 2023-01-22 15:24:57.393562: step: 88/531, loss: 0.006880105473101139 2023-01-22 15:24:58.446461: step: 92/531, loss: 0.012796667404472828 2023-01-22 15:24:59.504347: step: 96/531, loss: 0.00949548464268446 2023-01-22 15:25:00.559416: step: 100/531, loss: 0.02511078305542469 2023-01-22 15:25:01.627596: step: 104/531, loss: 0.004062130115926266 2023-01-22 15:25:02.688076: step: 108/531, loss: 0.0035931048914790154 2023-01-22 15:25:03.733512: step: 112/531, loss: 0.004986644256860018 2023-01-22 15:25:04.794950: step: 116/531, loss: 0.003980125300586224 2023-01-22 15:25:05.839586: step: 120/531, loss: 0.0037292789202183485 2023-01-22 15:25:06.898573: step: 124/531, loss: 0.04006205126643181 2023-01-22 15:25:07.943813: step: 128/531, loss: 0.010462186299264431 2023-01-22 15:25:08.992321: step: 132/531, loss: 0.0034327919129282236 2023-01-22 15:25:10.071205: step: 136/531, loss: 0.005827185697853565 2023-01-22 15:25:11.107291: step: 140/531, loss: 0.0007558600045740604 2023-01-22 15:25:12.155836: step: 144/531, loss: 0.00017142956494353712 2023-01-22 15:25:13.211038: step: 148/531, loss: 0.013463253155350685 2023-01-22 15:25:14.269914: step: 152/531, loss: 0.007346061989665031 2023-01-22 15:25:15.316730: step: 156/531, loss: 0.013847418129444122 2023-01-22 15:25:16.389922: step: 160/531, loss: 0.061364609748125076 2023-01-22 15:25:17.440688: step: 164/531, loss: 0.003914410714060068 2023-01-22 15:25:18.501097: step: 168/531, loss: 0.006762303877621889 2023-01-22 15:25:19.563929: step: 172/531, loss: 0.012773388996720314 2023-01-22 15:25:20.628326: step: 176/531, loss: 0.007753476966172457 2023-01-22 15:25:21.675875: step: 180/531, loss: 0.014776489697396755 2023-01-22 15:25:22.744860: step: 184/531, loss: 0.02107352949678898 2023-01-22 15:25:23.808020: step: 188/531, loss: 0.02246982231736183 2023-01-22 15:25:24.852622: step: 192/531, loss: 0.001362648094072938 2023-01-22 15:25:25.918631: step: 196/531, loss: 0.027102597057819366 2023-01-22 15:25:26.979286: step: 200/531, loss: 0.011291301809251308 2023-01-22 15:25:28.032046: step: 204/531, loss: 0.004192367196083069 2023-01-22 15:25:29.099295: step: 208/531, loss: 0.005737635772675276 2023-01-22 15:25:30.155183: step: 212/531, loss: 0.005328624043613672 2023-01-22 15:25:31.199542: step: 216/531, loss: 0.004522640723735094 2023-01-22 15:25:32.244780: step: 220/531, loss: 0.007462525740265846 2023-01-22 15:25:33.304709: step: 224/531, loss: 0.018044428899884224 2023-01-22 15:25:34.371766: step: 228/531, loss: 0.00784721877425909 2023-01-22 15:25:35.429296: step: 232/531, loss: 0.0 2023-01-22 15:25:36.489929: step: 236/531, loss: 0.013877315446734428 2023-01-22 15:25:37.541361: step: 240/531, loss: 0.00955837219953537 2023-01-22 15:25:38.606586: step: 244/531, loss: 0.004485964775085449 2023-01-22 15:25:39.649822: step: 248/531, loss: 0.006767854560166597 2023-01-22 15:25:40.712410: step: 252/531, loss: 0.003219024511054158 2023-01-22 15:25:41.759502: step: 256/531, loss: 0.003234386444091797 2023-01-22 15:25:42.853174: step: 260/531, loss: 0.011280063539743423 2023-01-22 15:25:43.899485: step: 264/531, loss: 0.0037553971633315086 2023-01-22 15:25:44.964420: step: 268/531, loss: 0.003862376557663083 2023-01-22 15:25:46.018573: step: 272/531, loss: 0.0017365460516884923 2023-01-22 15:25:47.091176: step: 276/531, loss: 0.007787508424371481 2023-01-22 15:25:48.148586: step: 280/531, loss: 0.005531442351639271 2023-01-22 15:25:49.208877: step: 284/531, loss: 0.0012803171994164586 2023-01-22 15:25:50.262498: step: 288/531, loss: 0.006415815558284521 2023-01-22 15:25:51.329510: step: 292/531, loss: 0.0071218619123101234 2023-01-22 15:25:52.414663: step: 296/531, loss: 0.00494378712028265 2023-01-22 15:25:53.463618: step: 300/531, loss: 0.005453706718981266 2023-01-22 15:25:54.512389: step: 304/531, loss: 0.0021511497907340527 2023-01-22 15:25:55.578860: step: 308/531, loss: 0.0004934875178150833 2023-01-22 15:25:56.650419: step: 312/531, loss: 0.00821372028440237 2023-01-22 15:25:57.692137: step: 316/531, loss: 0.0048161339946091175 2023-01-22 15:25:58.737496: step: 320/531, loss: 0.00028853537514805794 2023-01-22 15:25:59.791183: step: 324/531, loss: 0.0336274728178978 2023-01-22 15:26:00.850595: step: 328/531, loss: 0.0052524167113006115 2023-01-22 15:26:01.902613: step: 332/531, loss: 0.0100155770778656 2023-01-22 15:26:02.954757: step: 336/531, loss: 0.0189670417457819 2023-01-22 15:26:04.018958: step: 340/531, loss: 0.009143245406448841 2023-01-22 15:26:05.071092: step: 344/531, loss: 0.00248920195735991 2023-01-22 15:26:06.148612: step: 348/531, loss: 0.011447952128946781 2023-01-22 15:26:07.214774: step: 352/531, loss: 0.06651657074689865 2023-01-22 15:26:08.260744: step: 356/531, loss: 0.000792564416769892 2023-01-22 15:26:09.326488: step: 360/531, loss: 0.024494869634509087 2023-01-22 15:26:10.394498: step: 364/531, loss: 0.020984619855880737 2023-01-22 15:26:11.448474: step: 368/531, loss: 0.0056723980233073235 2023-01-22 15:26:12.513362: step: 372/531, loss: 0.03303275629878044 2023-01-22 15:26:13.557379: step: 376/531, loss: 0.005866799037903547 2023-01-22 15:26:14.600543: step: 380/531, loss: 0.00024266091350000352 2023-01-22 15:26:15.653212: step: 384/531, loss: 0.022709475830197334 2023-01-22 15:26:16.727193: step: 388/531, loss: 0.006276153493672609 2023-01-22 15:26:17.792229: step: 392/531, loss: 0.007548362948000431 2023-01-22 15:26:18.833231: step: 396/531, loss: 0.0013162402901798487 2023-01-22 15:26:19.897159: step: 400/531, loss: 0.0038812006823718548 2023-01-22 15:26:20.970160: step: 404/531, loss: 0.0014815202448517084 2023-01-22 15:26:22.021430: step: 408/531, loss: 0.008196880109608173 2023-01-22 15:26:23.085938: step: 412/531, loss: 0.008656901307404041 2023-01-22 15:26:24.140523: step: 416/531, loss: 0.003748697927221656 2023-01-22 15:26:25.191131: step: 420/531, loss: 0.04771991819143295 2023-01-22 15:26:26.254730: step: 424/531, loss: 0.004933672491461039 2023-01-22 15:26:27.311585: step: 428/531, loss: 0.008803177624940872 2023-01-22 15:26:28.372881: step: 432/531, loss: 0.020003728568553925 2023-01-22 15:26:29.443824: step: 436/531, loss: 0.026127735152840614 2023-01-22 15:26:30.494203: step: 440/531, loss: 0.00811250600963831 2023-01-22 15:26:31.562552: step: 444/531, loss: 0.005144232884049416 2023-01-22 15:26:32.630407: step: 448/531, loss: 0.006486075464636087 2023-01-22 15:26:33.685609: step: 452/531, loss: 0.008398662321269512 2023-01-22 15:26:34.742749: step: 456/531, loss: 0.02692290022969246 2023-01-22 15:26:35.799164: step: 460/531, loss: 0.007317574229091406 2023-01-22 15:26:36.870757: step: 464/531, loss: 0.007980293594300747 2023-01-22 15:26:37.940115: step: 468/531, loss: 0.008779807016253471 2023-01-22 15:26:38.992473: step: 472/531, loss: 0.010540878400206566 2023-01-22 15:26:40.059088: step: 476/531, loss: 0.006640647072345018 2023-01-22 15:26:41.113818: step: 480/531, loss: 0.00310403760522604 2023-01-22 15:26:42.183710: step: 484/531, loss: 0.004137186799198389 2023-01-22 15:26:43.243508: step: 488/531, loss: 0.011220235377550125 2023-01-22 15:26:44.298683: step: 492/531, loss: 0.01986110955476761 2023-01-22 15:26:45.349908: step: 496/531, loss: 0.010194278322160244 2023-01-22 15:26:46.418977: step: 500/531, loss: 0.005563562270253897 2023-01-22 15:26:47.476268: step: 504/531, loss: 0.008323823101818562 2023-01-22 15:26:48.534319: step: 508/531, loss: 0.005014132708311081 2023-01-22 15:26:49.591299: step: 512/531, loss: 0.024302873760461807 2023-01-22 15:26:50.662977: step: 516/531, loss: 0.024076519533991814 2023-01-22 15:26:51.719504: step: 520/531, loss: 0.004532541148364544 2023-01-22 15:26:52.785916: step: 524/531, loss: 0.0010067735565826297 2023-01-22 15:26:53.837011: step: 528/531, loss: 0.004437810275703669 2023-01-22 15:26:54.904157: step: 532/531, loss: 0.04998788982629776 2023-01-22 15:26:55.952151: step: 536/531, loss: 0.029736055061221123 2023-01-22 15:26:57.015719: step: 540/531, loss: 0.012387125752866268 2023-01-22 15:26:58.072876: step: 544/531, loss: 0.0019971667788922787 2023-01-22 15:26:59.132111: step: 548/531, loss: 0.013324547559022903 2023-01-22 15:27:00.186157: step: 552/531, loss: 0.025667695328593254 2023-01-22 15:27:01.240005: step: 556/531, loss: 0.004306916147470474 2023-01-22 15:27:02.294032: step: 560/531, loss: 0.010714216157793999 2023-01-22 15:27:03.362358: step: 564/531, loss: 0.021912487223744392 2023-01-22 15:27:04.417941: step: 568/531, loss: 0.05018653720617294 2023-01-22 15:27:05.483970: step: 572/531, loss: 0.024516025558114052 2023-01-22 15:27:06.548856: step: 576/531, loss: 0.009168602526187897 2023-01-22 15:27:07.597974: step: 580/531, loss: 0.007532237563282251 2023-01-22 15:27:08.655299: step: 584/531, loss: 0.0003346972807776183 2023-01-22 15:27:09.719651: step: 588/531, loss: 0.0348026379942894 2023-01-22 15:27:10.786657: step: 592/531, loss: 0.03567736968398094 2023-01-22 15:27:11.860834: step: 596/531, loss: 0.002214094391092658 2023-01-22 15:27:12.928541: step: 600/531, loss: 0.007236414588987827 2023-01-22 15:27:13.998129: step: 604/531, loss: 0.02221815660595894 2023-01-22 15:27:15.054738: step: 608/531, loss: 0.003051872132346034 2023-01-22 15:27:16.109833: step: 612/531, loss: 0.02217867225408554 2023-01-22 15:27:17.165611: step: 616/531, loss: 0.0036869077011942863 2023-01-22 15:27:18.230764: step: 620/531, loss: 0.013273851945996284 2023-01-22 15:27:19.309147: step: 624/531, loss: 0.009029224514961243 2023-01-22 15:27:20.358106: step: 628/531, loss: 0.0034077484160661697 2023-01-22 15:27:21.441914: step: 632/531, loss: 0.021823812276124954 2023-01-22 15:27:22.515852: step: 636/531, loss: 0.006052076350897551 2023-01-22 15:27:23.584011: step: 640/531, loss: 0.004911745898425579 2023-01-22 15:27:24.652573: step: 644/531, loss: 0.05342480167746544 2023-01-22 15:27:25.743076: step: 648/531, loss: 0.005484255030751228 2023-01-22 15:27:26.804475: step: 652/531, loss: 0.012694372795522213 2023-01-22 15:27:27.866586: step: 656/531, loss: 0.006429442670196295 2023-01-22 15:27:28.941429: step: 660/531, loss: 0.0031178209464997053 2023-01-22 15:27:30.010829: step: 664/531, loss: 0.0016089307609945536 2023-01-22 15:27:31.068366: step: 668/531, loss: 0.011093459092080593 2023-01-22 15:27:32.122833: step: 672/531, loss: 0.0052119772881269455 2023-01-22 15:27:33.191766: step: 676/531, loss: 0.027570489794015884 2023-01-22 15:27:34.260699: step: 680/531, loss: 0.001186838955618441 2023-01-22 15:27:35.324199: step: 684/531, loss: 0.006844093091785908 2023-01-22 15:27:36.369546: step: 688/531, loss: 0.004935556091368198 2023-01-22 15:27:37.425321: step: 692/531, loss: 0.007439350243657827 2023-01-22 15:27:38.488247: step: 696/531, loss: 0.0049890936352312565 2023-01-22 15:27:39.544025: step: 700/531, loss: 0.0013611697359010577 2023-01-22 15:27:40.613255: step: 704/531, loss: 0.004644644912332296 2023-01-22 15:27:41.664368: step: 708/531, loss: 0.003395052859559655 2023-01-22 15:27:42.716940: step: 712/531, loss: 0.009862705133855343 2023-01-22 15:27:43.779716: step: 716/531, loss: 0.009629306383430958 2023-01-22 15:27:44.833233: step: 720/531, loss: 0.006141656078398228 2023-01-22 15:27:45.889563: step: 724/531, loss: 0.00485787820070982 2023-01-22 15:27:46.951223: step: 728/531, loss: 0.020004672929644585 2023-01-22 15:27:48.008756: step: 732/531, loss: 0.011888944543898106 2023-01-22 15:27:49.065346: step: 736/531, loss: 0.0018903245218098164 2023-01-22 15:27:50.120154: step: 740/531, loss: 0.002657040487974882 2023-01-22 15:27:51.184535: step: 744/531, loss: 0.0006607944378629327 2023-01-22 15:27:52.253222: step: 748/531, loss: 0.0035916180349886417 2023-01-22 15:27:53.310590: step: 752/531, loss: 0.007719813380390406 2023-01-22 15:27:54.366158: step: 756/531, loss: 0.007262494880706072 2023-01-22 15:27:55.440868: step: 760/531, loss: 0.019486352801322937 2023-01-22 15:27:56.477060: step: 764/531, loss: 0.01112589705735445 2023-01-22 15:27:57.535088: step: 768/531, loss: 0.014657550491392612 2023-01-22 15:27:58.592298: step: 772/531, loss: 0.02967098169028759 2023-01-22 15:27:59.672500: step: 776/531, loss: 0.008942109532654285 2023-01-22 15:28:00.744865: step: 780/531, loss: 0.015748485922813416 2023-01-22 15:28:01.809792: step: 784/531, loss: 0.0030344678089022636 2023-01-22 15:28:02.854485: step: 788/531, loss: 0.008891900070011616 2023-01-22 15:28:03.921421: step: 792/531, loss: 0.040598366409540176 2023-01-22 15:28:04.975853: step: 796/531, loss: 0.0029410021379590034 2023-01-22 15:28:06.029737: step: 800/531, loss: 0.017152665182948112 2023-01-22 15:28:07.089951: step: 804/531, loss: 0.0126335509121418 2023-01-22 15:28:08.142411: step: 808/531, loss: 0.010139510966837406 2023-01-22 15:28:09.193704: step: 812/531, loss: 0.0028377585113048553 2023-01-22 15:28:10.262817: step: 816/531, loss: 0.003249727189540863 2023-01-22 15:28:11.326764: step: 820/531, loss: 0.017950786277651787 2023-01-22 15:28:12.384554: step: 824/531, loss: 0.005333798937499523 2023-01-22 15:28:13.447359: step: 828/531, loss: 0.0035397931933403015 2023-01-22 15:28:14.502507: step: 832/531, loss: 0.0462794229388237 2023-01-22 15:28:15.562712: step: 836/531, loss: 0.007998448796570301 2023-01-22 15:28:16.644403: step: 840/531, loss: 0.02705160714685917 2023-01-22 15:28:17.714307: step: 844/531, loss: 0.005735460203140974 2023-01-22 15:28:18.781689: step: 848/531, loss: 0.0031818770803511143 2023-01-22 15:28:19.829366: step: 852/531, loss: 0.005705671384930611 2023-01-22 15:28:20.888518: step: 856/531, loss: 0.008555103093385696 2023-01-22 15:28:21.948842: step: 860/531, loss: 0.0024066423065960407 2023-01-22 15:28:22.993677: step: 864/531, loss: 0.004721895791590214 2023-01-22 15:28:24.073187: step: 868/531, loss: 0.005090405698865652 2023-01-22 15:28:25.120889: step: 872/531, loss: 0.0061364793218672276 2023-01-22 15:28:26.190850: step: 876/531, loss: 0.04104631394147873 2023-01-22 15:28:27.250046: step: 880/531, loss: 0.007381265051662922 2023-01-22 15:28:28.300268: step: 884/531, loss: 0.0043432763777673244 2023-01-22 15:28:29.365672: step: 888/531, loss: 0.005296186078339815 2023-01-22 15:28:30.432866: step: 892/531, loss: 0.002196338027715683 2023-01-22 15:28:31.495322: step: 896/531, loss: 0.006520502734929323 2023-01-22 15:28:32.536668: step: 900/531, loss: 0.011645560152828693 2023-01-22 15:28:33.609166: step: 904/531, loss: 0.009051233530044556 2023-01-22 15:28:34.675451: step: 908/531, loss: 0.016671858727931976 2023-01-22 15:28:35.738495: step: 912/531, loss: 0.005036613438278437 2023-01-22 15:28:36.805217: step: 916/531, loss: 0.014427910558879375 2023-01-22 15:28:37.871453: step: 920/531, loss: 0.00046623675734736025 2023-01-22 15:28:38.936951: step: 924/531, loss: 0.013886132277548313 2023-01-22 15:28:39.994240: step: 928/531, loss: 0.01004299707710743 2023-01-22 15:28:41.045439: step: 932/531, loss: 0.009398017078638077 2023-01-22 15:28:42.105979: step: 936/531, loss: 0.0025876956060528755 2023-01-22 15:28:43.178256: step: 940/531, loss: 0.041548389941453934 2023-01-22 15:28:44.244443: step: 944/531, loss: 0.004219766240566969 2023-01-22 15:28:45.321094: step: 948/531, loss: 0.0017521681729704142 2023-01-22 15:28:46.378377: step: 952/531, loss: 0.014079599641263485 2023-01-22 15:28:47.433430: step: 956/531, loss: 0.011452076025307178 2023-01-22 15:28:48.496157: step: 960/531, loss: 0.0038475249893963337 2023-01-22 15:28:49.550033: step: 964/531, loss: 0.0054289293475449085 2023-01-22 15:28:50.624404: step: 968/531, loss: 0.004413502290844917 2023-01-22 15:28:51.703935: step: 972/531, loss: 0.0005040338728576899 2023-01-22 15:28:52.775334: step: 976/531, loss: 0.005536057520657778 2023-01-22 15:28:53.842386: step: 980/531, loss: 0.0059566805139184 2023-01-22 15:28:54.900775: step: 984/531, loss: 0.004091221839189529 2023-01-22 15:28:55.959979: step: 988/531, loss: 0.004754670429974794 2023-01-22 15:28:57.016222: step: 992/531, loss: 0.021801957860589027 2023-01-22 15:28:58.074917: step: 996/531, loss: 0.04336067661643028 2023-01-22 15:28:59.169850: step: 1000/531, loss: 0.020230498164892197 2023-01-22 15:29:00.223635: step: 1004/531, loss: 0.005798004567623138 2023-01-22 15:29:01.277799: step: 1008/531, loss: 0.005172763951122761 2023-01-22 15:29:02.354986: step: 1012/531, loss: 0.00398161681368947 2023-01-22 15:29:03.416891: step: 1016/531, loss: 0.020105313509702682 2023-01-22 15:29:04.482983: step: 1020/531, loss: 0.0057420809753239155 2023-01-22 15:29:05.548152: step: 1024/531, loss: 0.006394962780177593 2023-01-22 15:29:06.631531: step: 1028/531, loss: 0.001249514752998948 2023-01-22 15:29:07.691278: step: 1032/531, loss: 0.008573153987526894 2023-01-22 15:29:08.740686: step: 1036/531, loss: 0.0055114817805588245 2023-01-22 15:29:09.800173: step: 1040/531, loss: 0.02221653051674366 2023-01-22 15:29:10.858736: step: 1044/531, loss: 0.0014722991036251187 2023-01-22 15:29:11.935260: step: 1048/531, loss: 0.0003363671712577343 2023-01-22 15:29:12.986245: step: 1052/531, loss: 0.004248330835253 2023-01-22 15:29:14.053210: step: 1056/531, loss: 0.009905761107802391 2023-01-22 15:29:15.118343: step: 1060/531, loss: 0.0020095587242394686 2023-01-22 15:29:16.175358: step: 1064/531, loss: 0.0077373310923576355 2023-01-22 15:29:17.242189: step: 1068/531, loss: 0.025465836748480797 2023-01-22 15:29:18.305638: step: 1072/531, loss: 0.01543425302952528 2023-01-22 15:29:19.352610: step: 1076/531, loss: 0.009915855713188648 2023-01-22 15:29:20.405805: step: 1080/531, loss: 0.013025938533246517 2023-01-22 15:29:21.459026: step: 1084/531, loss: 0.0030364685226231813 2023-01-22 15:29:22.504733: step: 1088/531, loss: 0.030855901539325714 2023-01-22 15:29:23.568089: step: 1092/531, loss: 0.05402104929089546 2023-01-22 15:29:24.617137: step: 1096/531, loss: 0.006830158643424511 2023-01-22 15:29:25.681614: step: 1100/531, loss: 0.011064563877880573 2023-01-22 15:29:26.737419: step: 1104/531, loss: 0.021480709314346313 2023-01-22 15:29:27.792033: step: 1108/531, loss: 0.01907600276172161 2023-01-22 15:29:28.860580: step: 1112/531, loss: 0.004825084004551172 2023-01-22 15:29:29.918286: step: 1116/531, loss: 0.03258570656180382 2023-01-22 15:29:30.965960: step: 1120/531, loss: 0.022793063893914223 2023-01-22 15:29:32.023013: step: 1124/531, loss: 0.005661248695105314 2023-01-22 15:29:33.072083: step: 1128/531, loss: 0.0022720012348145247 2023-01-22 15:29:34.127079: step: 1132/531, loss: 0.004178924486041069 2023-01-22 15:29:35.198696: step: 1136/531, loss: 0.07149509340524673 2023-01-22 15:29:36.231831: step: 1140/531, loss: 0.017420422285795212 2023-01-22 15:29:37.294666: step: 1144/531, loss: 0.010559949092566967 2023-01-22 15:29:38.356895: step: 1148/531, loss: 0.011860010214149952 2023-01-22 15:29:39.430804: step: 1152/531, loss: 0.06689035147428513 2023-01-22 15:29:40.476746: step: 1156/531, loss: 0.046632975339889526 2023-01-22 15:29:41.540717: step: 1160/531, loss: 0.004091491922736168 2023-01-22 15:29:42.590344: step: 1164/531, loss: 0.0022138371132314205 2023-01-22 15:29:43.656578: step: 1168/531, loss: 0.01418247353285551 2023-01-22 15:29:44.707224: step: 1172/531, loss: 0.007050564978271723 2023-01-22 15:29:45.774034: step: 1176/531, loss: 0.07845360040664673 2023-01-22 15:29:46.838528: step: 1180/531, loss: 0.008402055129408836 2023-01-22 15:29:47.904172: step: 1184/531, loss: 0.005471793003380299 2023-01-22 15:29:48.948284: step: 1188/531, loss: 0.00032339635072275996 2023-01-22 15:29:50.016632: step: 1192/531, loss: 0.008736653253436089 2023-01-22 15:29:51.081659: step: 1196/531, loss: 0.0049553243443369865 2023-01-22 15:29:52.146737: step: 1200/531, loss: 0.007759691681712866 2023-01-22 15:29:53.206676: step: 1204/531, loss: 0.015211337246000767 2023-01-22 15:29:54.257279: step: 1208/531, loss: 0.01538966316729784 2023-01-22 15:29:55.314984: step: 1212/531, loss: 0.021568182855844498 2023-01-22 15:29:56.346848: step: 1216/531, loss: 0.0006008119089528918 2023-01-22 15:29:57.396325: step: 1220/531, loss: 0.012032121419906616 2023-01-22 15:29:58.463353: step: 1224/531, loss: 0.036595966666936874 2023-01-22 15:29:59.515258: step: 1228/531, loss: 0.007924004457890987 2023-01-22 15:30:00.599724: step: 1232/531, loss: 0.04528406634926796 2023-01-22 15:30:01.653270: step: 1236/531, loss: 0.013258915394544601 2023-01-22 15:30:02.710497: step: 1240/531, loss: 0.0007331136730499566 2023-01-22 15:30:03.781980: step: 1244/531, loss: 0.019347058609128 2023-01-22 15:30:04.839279: step: 1248/531, loss: 0.0061698839999735355 2023-01-22 15:30:05.899359: step: 1252/531, loss: 0.03367088362574577 2023-01-22 15:30:06.954803: step: 1256/531, loss: 0.0052800364792346954 2023-01-22 15:30:08.017204: step: 1260/531, loss: 0.0017504931893199682 2023-01-22 15:30:09.065363: step: 1264/531, loss: 0.02444339171051979 2023-01-22 15:30:10.148317: step: 1268/531, loss: 0.034685395658016205 2023-01-22 15:30:11.211887: step: 1272/531, loss: 0.005013342946767807 2023-01-22 15:30:12.263636: step: 1276/531, loss: 0.0036082046572118998 2023-01-22 15:30:13.315522: step: 1280/531, loss: 0.017291255295276642 2023-01-22 15:30:14.387295: step: 1284/531, loss: 0.004581100307404995 2023-01-22 15:30:15.428898: step: 1288/531, loss: 0.000507197342813015 2023-01-22 15:30:16.499503: step: 1292/531, loss: 0.012628301978111267 2023-01-22 15:30:17.571017: step: 1296/531, loss: 0.002073760610073805 2023-01-22 15:30:18.629246: step: 1300/531, loss: 0.0270913727581501 2023-01-22 15:30:19.712908: step: 1304/531, loss: 0.015117453411221504 2023-01-22 15:30:20.781436: step: 1308/531, loss: 0.022538896650075912 2023-01-22 15:30:21.869240: step: 1312/531, loss: 0.03624674305319786 2023-01-22 15:30:22.936435: step: 1316/531, loss: 0.004522361326962709 2023-01-22 15:30:23.997527: step: 1320/531, loss: 0.010240288451313972 2023-01-22 15:30:25.076049: step: 1324/531, loss: 0.009906929917633533 2023-01-22 15:30:26.135092: step: 1328/531, loss: 0.025363946333527565 2023-01-22 15:30:27.206286: step: 1332/531, loss: 0.0018918426940217614 2023-01-22 15:30:28.251136: step: 1336/531, loss: 0.0035500042140483856 2023-01-22 15:30:29.310635: step: 1340/531, loss: 0.019974039867520332 2023-01-22 15:30:30.363276: step: 1344/531, loss: 0.0022247035522013903 2023-01-22 15:30:31.434917: step: 1348/531, loss: 0.0040542082861065865 2023-01-22 15:30:32.480430: step: 1352/531, loss: 0.006103890016674995 2023-01-22 15:30:33.537988: step: 1356/531, loss: 0.013787849806249142 2023-01-22 15:30:34.590314: step: 1360/531, loss: 0.021495060995221138 2023-01-22 15:30:35.642321: step: 1364/531, loss: 0.009032532572746277 2023-01-22 15:30:36.709642: step: 1368/531, loss: 0.012110460549592972 2023-01-22 15:30:37.760190: step: 1372/531, loss: 0.005954327993094921 2023-01-22 15:30:38.810048: step: 1376/531, loss: 0.014109624549746513 2023-01-22 15:30:39.875559: step: 1380/531, loss: 0.04883275181055069 2023-01-22 15:30:40.932846: step: 1384/531, loss: 0.01282959058880806 2023-01-22 15:30:41.999869: step: 1388/531, loss: 0.012233157642185688 2023-01-22 15:30:43.086733: step: 1392/531, loss: 0.022701958194375038 2023-01-22 15:30:44.151162: step: 1396/531, loss: 0.006441434845328331 2023-01-22 15:30:45.221149: step: 1400/531, loss: 0.012959196232259274 2023-01-22 15:30:46.278710: step: 1404/531, loss: 0.007666775956749916 2023-01-22 15:30:47.324748: step: 1408/531, loss: 0.0009315578499808908 2023-01-22 15:30:48.390961: step: 1412/531, loss: 0.0037649234291166067 2023-01-22 15:30:49.441853: step: 1416/531, loss: 0.014111046679317951 2023-01-22 15:30:50.499433: step: 1420/531, loss: 0.0038568233139812946 2023-01-22 15:30:51.559651: step: 1424/531, loss: 0.009174461476504803 2023-01-22 15:30:52.624685: step: 1428/531, loss: 0.0057424819096922874 2023-01-22 15:30:53.680044: step: 1432/531, loss: 0.005351158324629068 2023-01-22 15:30:54.755760: step: 1436/531, loss: 0.02396395057439804 2023-01-22 15:30:55.817694: step: 1440/531, loss: 0.009414681233465672 2023-01-22 15:30:56.874595: step: 1444/531, loss: 0.01584302820265293 2023-01-22 15:30:57.938431: step: 1448/531, loss: 0.004913279786705971 2023-01-22 15:30:58.990255: step: 1452/531, loss: 0.007930240593850613 2023-01-22 15:31:00.053937: step: 1456/531, loss: 0.006487489677965641 2023-01-22 15:31:01.132611: step: 1460/531, loss: 0.012037992477416992 2023-01-22 15:31:02.187466: step: 1464/531, loss: 0.006566865835338831 2023-01-22 15:31:03.235457: step: 1468/531, loss: 0.005106267519295216 2023-01-22 15:31:04.284418: step: 1472/531, loss: 0.003189919050782919 2023-01-22 15:31:05.340486: step: 1476/531, loss: 0.013816825114190578 2023-01-22 15:31:06.390270: step: 1480/531, loss: 0.015051505528390408 2023-01-22 15:31:07.447066: step: 1484/531, loss: 0.016562610864639282 2023-01-22 15:31:08.505060: step: 1488/531, loss: 0.03791069984436035 2023-01-22 15:31:09.562569: step: 1492/531, loss: 0.004593304358422756 2023-01-22 15:31:10.630981: step: 1496/531, loss: 0.0009665197576396167 2023-01-22 15:31:11.700761: step: 1500/531, loss: 0.043097857385873795 2023-01-22 15:31:12.758796: step: 1504/531, loss: 0.013915913179516792 2023-01-22 15:31:13.811832: step: 1508/531, loss: 0.039936330169439316 2023-01-22 15:31:14.874045: step: 1512/531, loss: 0.005395339801907539 2023-01-22 15:31:15.952984: step: 1516/531, loss: 0.04015607386827469 2023-01-22 15:31:17.006233: step: 1520/531, loss: 0.00961624551564455 2023-01-22 15:31:18.078620: step: 1524/531, loss: 0.005523694679141045 2023-01-22 15:31:19.128175: step: 1528/531, loss: 0.016392536461353302 2023-01-22 15:31:20.197673: step: 1532/531, loss: 0.017963485792279243 2023-01-22 15:31:21.252230: step: 1536/531, loss: 0.004716671071946621 2023-01-22 15:31:22.318672: step: 1540/531, loss: 0.009314429946243763 2023-01-22 15:31:23.382204: step: 1544/531, loss: 0.014859539456665516 2023-01-22 15:31:24.445833: step: 1548/531, loss: 0.00373009592294693 2023-01-22 15:31:25.511925: step: 1552/531, loss: 0.01349884457886219 2023-01-22 15:31:26.580102: step: 1556/531, loss: 0.0012248989660292864 2023-01-22 15:31:27.654771: step: 1560/531, loss: 0.0167396143078804 2023-01-22 15:31:28.728264: step: 1564/531, loss: 0.005124368704855442 2023-01-22 15:31:29.792878: step: 1568/531, loss: 0.00532143609598279 2023-01-22 15:31:30.853489: step: 1572/531, loss: 0.002435114234685898 2023-01-22 15:31:31.914546: step: 1576/531, loss: 0.007091684732586145 2023-01-22 15:31:32.959748: step: 1580/531, loss: 0.04177471250295639 2023-01-22 15:31:34.021591: step: 1584/531, loss: 0.009794454090297222 2023-01-22 15:31:35.085393: step: 1588/531, loss: 0.01756526343524456 2023-01-22 15:31:36.139389: step: 1592/531, loss: 0.008132767863571644 2023-01-22 15:31:37.199776: step: 1596/531, loss: 0.0005605625919997692 2023-01-22 15:31:38.261268: step: 1600/531, loss: 0.014891646802425385 2023-01-22 15:31:39.323954: step: 1604/531, loss: 0.017712175846099854 2023-01-22 15:31:40.380619: step: 1608/531, loss: 0.00015844375593587756 2023-01-22 15:31:41.428746: step: 1612/531, loss: 0.008950797840952873 2023-01-22 15:31:42.493708: step: 1616/531, loss: 0.0005535169038921595 2023-01-22 15:31:43.543630: step: 1620/531, loss: 0.00734550878405571 2023-01-22 15:31:44.596292: step: 1624/531, loss: 0.013187505304813385 2023-01-22 15:31:45.655353: step: 1628/531, loss: 0.01382160373032093 2023-01-22 15:31:46.717047: step: 1632/531, loss: 0.0131465969607234 2023-01-22 15:31:47.782683: step: 1636/531, loss: 0.10788968950510025 2023-01-22 15:31:48.838081: step: 1640/531, loss: 0.009063305333256721 2023-01-22 15:31:49.909741: step: 1644/531, loss: 0.008790520951151848 2023-01-22 15:31:50.974351: step: 1648/531, loss: 0.021610645577311516 2023-01-22 15:31:52.032267: step: 1652/531, loss: 0.008326945826411247 2023-01-22 15:31:53.081997: step: 1656/531, loss: 0.03982025384902954 2023-01-22 15:31:54.150143: step: 1660/531, loss: 0.004880597349256277 2023-01-22 15:31:55.194522: step: 1664/531, loss: 0.004995083436369896 2023-01-22 15:31:56.254575: step: 1668/531, loss: 0.00897503923624754 2023-01-22 15:31:57.304256: step: 1672/531, loss: 0.0036237251479178667 2023-01-22 15:31:58.367836: step: 1676/531, loss: 0.005051128100603819 2023-01-22 15:31:59.437092: step: 1680/531, loss: 0.15174442529678345 2023-01-22 15:32:00.514371: step: 1684/531, loss: 0.0 2023-01-22 15:32:01.583575: step: 1688/531, loss: 0.033902134746313095 2023-01-22 15:32:02.642817: step: 1692/531, loss: 0.07192021608352661 2023-01-22 15:32:03.701160: step: 1696/531, loss: 0.007757317740470171 2023-01-22 15:32:04.771031: step: 1700/531, loss: 0.029810024425387383 2023-01-22 15:32:05.835636: step: 1704/531, loss: 0.0022699744440615177 2023-01-22 15:32:06.889811: step: 1708/531, loss: 0.004880097229033709 2023-01-22 15:32:07.953606: step: 1712/531, loss: 0.0071748364716768265 2023-01-22 15:32:09.013029: step: 1716/531, loss: 0.009971429593861103 2023-01-22 15:32:10.059802: step: 1720/531, loss: 0.004291251301765442 2023-01-22 15:32:11.110196: step: 1724/531, loss: 0.013477936387062073 2023-01-22 15:32:12.184053: step: 1728/531, loss: 0.009166092611849308 2023-01-22 15:32:13.242081: step: 1732/531, loss: 0.02056244947016239 2023-01-22 15:32:14.286734: step: 1736/531, loss: 0.029907966032624245 2023-01-22 15:32:15.342545: step: 1740/531, loss: 0.0038931923918426037 2023-01-22 15:32:16.396152: step: 1744/531, loss: 0.00536212557926774 2023-01-22 15:32:17.445216: step: 1748/531, loss: 0.004581668879836798 2023-01-22 15:32:18.518977: step: 1752/531, loss: 0.005966234020888805 2023-01-22 15:32:19.570538: step: 1756/531, loss: 0.010447864420711994 2023-01-22 15:32:20.630392: step: 1760/531, loss: 0.003937452100217342 2023-01-22 15:32:21.688687: step: 1764/531, loss: 0.005030252505093813 2023-01-22 15:32:22.737198: step: 1768/531, loss: 0.014101854525506496 2023-01-22 15:32:23.794646: step: 1772/531, loss: 0.0017251266399398446 2023-01-22 15:32:24.843007: step: 1776/531, loss: 0.004788325168192387 2023-01-22 15:32:25.900114: step: 1780/531, loss: 0.012858579866588116 2023-01-22 15:32:26.954466: step: 1784/531, loss: 0.009258554317057133 2023-01-22 15:32:27.991174: step: 1788/531, loss: 0.005546551663428545 2023-01-22 15:32:29.041845: step: 1792/531, loss: 0.004637312144041061 2023-01-22 15:32:30.093560: step: 1796/531, loss: 0.011219508945941925 2023-01-22 15:32:31.164897: step: 1800/531, loss: 0.003312710439786315 2023-01-22 15:32:32.215956: step: 1804/531, loss: 0.025780469179153442 2023-01-22 15:32:33.271784: step: 1808/531, loss: 0.010041517205536366 2023-01-22 15:32:34.332485: step: 1812/531, loss: 0.01294084545224905 2023-01-22 15:32:35.393154: step: 1816/531, loss: 0.0037586225662380457 2023-01-22 15:32:36.455563: step: 1820/531, loss: 0.003309978637844324 2023-01-22 15:32:37.512437: step: 1824/531, loss: 0.01272408664226532 2023-01-22 15:32:38.581558: step: 1828/531, loss: 0.01631883718073368 2023-01-22 15:32:39.649032: step: 1832/531, loss: 0.003025906393304467 2023-01-22 15:32:40.717377: step: 1836/531, loss: 0.026338353753089905 2023-01-22 15:32:41.793299: step: 1840/531, loss: 0.008698285557329655 2023-01-22 15:32:42.860928: step: 1844/531, loss: 0.013285747729241848 2023-01-22 15:32:43.926656: step: 1848/531, loss: 0.007151308935135603 2023-01-22 15:32:44.999267: step: 1852/531, loss: 0.003446970833465457 2023-01-22 15:32:46.049468: step: 1856/531, loss: 0.016193140298128128 2023-01-22 15:32:47.115247: step: 1860/531, loss: 0.03714349865913391 2023-01-22 15:32:48.170241: step: 1864/531, loss: 0.008802087977528572 2023-01-22 15:32:49.241179: step: 1868/531, loss: 0.00472974730655551 2023-01-22 15:32:50.312210: step: 1872/531, loss: 0.004969058092683554 2023-01-22 15:32:51.381664: step: 1876/531, loss: 0.01655971072614193 2023-01-22 15:32:52.431881: step: 1880/531, loss: 0.005845074076205492 2023-01-22 15:32:53.498871: step: 1884/531, loss: 0.0037426832132041454 2023-01-22 15:32:54.556306: step: 1888/531, loss: 0.021836236119270325 2023-01-22 15:32:55.612533: step: 1892/531, loss: 0.028230121359229088 2023-01-22 15:32:56.652834: step: 1896/531, loss: 0.0045180413872003555 2023-01-22 15:32:57.713109: step: 1900/531, loss: 0.029489697888493538 2023-01-22 15:32:58.771525: step: 1904/531, loss: 0.01240628957748413 2023-01-22 15:32:59.834375: step: 1908/531, loss: 0.004909764509648085 2023-01-22 15:33:00.898120: step: 1912/531, loss: 0.002719014184549451 2023-01-22 15:33:01.973136: step: 1916/531, loss: 0.005891120061278343 2023-01-22 15:33:03.021202: step: 1920/531, loss: 0.0052268970757722855 2023-01-22 15:33:04.101673: step: 1924/531, loss: 0.004456925205886364 2023-01-22 15:33:05.181092: step: 1928/531, loss: 0.045633673667907715 2023-01-22 15:33:06.240603: step: 1932/531, loss: 0.0038677516859024763 2023-01-22 15:33:07.297679: step: 1936/531, loss: 0.017424726858735085 2023-01-22 15:33:08.353855: step: 1940/531, loss: 0.002469978528097272 2023-01-22 15:33:09.410179: step: 1944/531, loss: 0.006199637893587351 2023-01-22 15:33:10.458975: step: 1948/531, loss: 0.013204146176576614 2023-01-22 15:33:11.526097: step: 1952/531, loss: 0.0021460703574121 2023-01-22 15:33:12.594926: step: 1956/531, loss: 0.008032361045479774 2023-01-22 15:33:13.645250: step: 1960/531, loss: 0.003598395036533475 2023-01-22 15:33:14.725959: step: 1964/531, loss: 0.008926734328269958 2023-01-22 15:33:15.773213: step: 1968/531, loss: 0.005528079811483622 2023-01-22 15:33:16.831834: step: 1972/531, loss: 0.003637881251052022 2023-01-22 15:33:17.890765: step: 1976/531, loss: 0.008379240520298481 2023-01-22 15:33:18.947780: step: 1980/531, loss: 0.009176112711429596 2023-01-22 15:33:19.998337: step: 1984/531, loss: 0.0045841108076274395 2023-01-22 15:33:21.053934: step: 1988/531, loss: 0.0014704858185723424 2023-01-22 15:33:22.117996: step: 1992/531, loss: 0.03910716995596886 2023-01-22 15:33:23.185117: step: 1996/531, loss: 0.0025522089563310146 2023-01-22 15:33:24.256105: step: 2000/531, loss: 0.0012237310875207186 2023-01-22 15:33:25.312993: step: 2004/531, loss: 0.006343057844787836 2023-01-22 15:33:26.380337: step: 2008/531, loss: 0.04061000794172287 2023-01-22 15:33:27.427175: step: 2012/531, loss: 0.0002299383922945708 2023-01-22 15:33:28.496657: step: 2016/531, loss: 0.009326566942036152 2023-01-22 15:33:29.562431: step: 2020/531, loss: 0.0038306904025375843 2023-01-22 15:33:30.623771: step: 2024/531, loss: 0.002030216855928302 2023-01-22 15:33:31.680067: step: 2028/531, loss: 0.00933896191418171 2023-01-22 15:33:32.746648: step: 2032/531, loss: 0.003491660114377737 2023-01-22 15:33:33.806132: step: 2036/531, loss: 0.0031041449401527643 2023-01-22 15:33:34.870207: step: 2040/531, loss: 0.006671515293419361 2023-01-22 15:33:35.928687: step: 2044/531, loss: 0.00508086895570159 2023-01-22 15:33:36.981172: step: 2048/531, loss: 0.015601389110088348 2023-01-22 15:33:38.043161: step: 2052/531, loss: 0.008544765412807465 2023-01-22 15:33:39.113027: step: 2056/531, loss: 0.02071394957602024 2023-01-22 15:33:40.177567: step: 2060/531, loss: 0.003219618694856763 2023-01-22 15:33:41.232493: step: 2064/531, loss: 0.007551091257482767 2023-01-22 15:33:42.291462: step: 2068/531, loss: 0.00708481902256608 2023-01-22 15:33:43.344140: step: 2072/531, loss: 0.009560951963067055 2023-01-22 15:33:44.408764: step: 2076/531, loss: 0.010458333417773247 2023-01-22 15:33:45.470464: step: 2080/531, loss: 0.037046853452920914 2023-01-22 15:33:46.540376: step: 2084/531, loss: 0.07459976524114609 2023-01-22 15:33:47.609838: step: 2088/531, loss: 0.007718859240412712 2023-01-22 15:33:48.659187: step: 2092/531, loss: 0.008536077104508877 2023-01-22 15:33:49.726260: step: 2096/531, loss: 0.013174931518733501 2023-01-22 15:33:50.803842: step: 2100/531, loss: 0.04114936292171478 2023-01-22 15:33:51.863679: step: 2104/531, loss: 0.003819637931883335 2023-01-22 15:33:52.935105: step: 2108/531, loss: 0.019232796505093575 2023-01-22 15:33:54.000788: step: 2112/531, loss: 0.004383544437587261 2023-01-22 15:33:55.056508: step: 2116/531, loss: 0.02925959974527359 2023-01-22 15:33:56.130567: step: 2120/531, loss: 0.006749176885932684 2023-01-22 15:33:57.192179: step: 2124/531, loss: 0.0028425422497093678 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3507714435146444, 'r': 0.31755445075757577, 'f1': 0.3333374751491054}, 'combined': 0.24561708695197237, 'stategy': 1, 'epoch': 4} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3352056919880673, 'r': 0.27590937824288575, 'f1': 0.3026807832605123}, 'combined': 0.18952908858368528, 'stategy': 1, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33228508345486957, 'r': 0.35372283077453853, 'f1': 0.3426689923128342}, 'combined': 0.2524929417041936, 'stategy': 1, 'epoch': 4} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36266832661399173, 'r': 0.3031501702174665, 'f1': 0.3302490557673579}, 'combined': 0.20466138667272882, 'stategy': 1, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31600152905198775, 'r': 0.32679475015812776, 'f1': 0.3213075248756219}, 'combined': 0.2367529130662477, 'stategy': 1, 'epoch': 4} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3617224895021513, 'r': 0.2852608250545421, 'f1': 0.3189734680155334}, 'combined': 0.2115962609607994, 'stategy': 1, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36363636363636365, 'r': 0.34285714285714286, 'f1': 0.3529411764705882}, 'combined': 0.2352941176470588, 'stategy': 1, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.36607142857142855, 'r': 0.44565217391304346, 'f1': 0.4019607843137254}, 'combined': 0.2009803921568627, 'stategy': 1, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'stategy': 1, 'epoch': 4} New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.355702906162465, 'r': 0.3206715593434344, 'f1': 0.3372800464807437}, 'combined': 0.24852213951212693, 'stategy': 1, 'epoch': 1} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3363021832161375, 'r': 0.27681190531869804, 'f1': 0.3036708822704133}, 'combined': 0.19014905712259525, 'stategy': 1, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33228508345486957, 'r': 0.35372283077453853, 'f1': 0.3426689923128342}, 'combined': 0.2524929417041936, 'stategy': 1, 'epoch': 4} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36266832661399173, 'r': 0.3031501702174665, 'f1': 0.3302490557673579}, 'combined': 0.20466138667272882, 'stategy': 1, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.36607142857142855, 'r': 0.44565217391304346, 'f1': 0.4019607843137254}, 'combined': 0.2009803921568627, 'stategy': 1, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31600152905198775, 'r': 0.32679475015812776, 'f1': 0.3213075248756219}, 'combined': 0.2367529130662477, 'stategy': 1, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3617224895021513, 'r': 0.2852608250545421, 'f1': 0.3189734680155334}, 'combined': 0.2115962609607994, 'stategy': 1, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'stategy': 1, 'epoch': 4} ****************************** Epoch: 5 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 15:36:43.510136: step: 4/531, loss: 0.005807214882224798 2023-01-22 15:36:44.572344: step: 8/531, loss: 0.0022022947669029236 2023-01-22 15:36:45.627735: step: 12/531, loss: 0.02152836136519909 2023-01-22 15:36:46.678066: step: 16/531, loss: 0.006170021835714579 2023-01-22 15:36:47.729342: step: 20/531, loss: 0.01623407192528248 2023-01-22 15:36:48.795115: step: 24/531, loss: 0.003679434070363641 2023-01-22 15:36:49.855508: step: 28/531, loss: 0.0026208723429590464 2023-01-22 15:36:50.899810: step: 32/531, loss: 0.0031494239810854197 2023-01-22 15:36:51.936573: step: 36/531, loss: 0.0015068423235788941 2023-01-22 15:36:52.999124: step: 40/531, loss: 0.0038974073249846697 2023-01-22 15:36:54.034511: step: 44/531, loss: 0.003259575692936778 2023-01-22 15:36:55.073048: step: 48/531, loss: 0.002108826534822583 2023-01-22 15:36:56.126917: step: 52/531, loss: 0.0013047748943790793 2023-01-22 15:36:57.176183: step: 56/531, loss: 0.0007550541777163744 2023-01-22 15:36:58.228659: step: 60/531, loss: 0.002022378146648407 2023-01-22 15:36:59.300511: step: 64/531, loss: 0.005456749815493822 2023-01-22 15:37:00.376436: step: 68/531, loss: 0.007847040891647339 2023-01-22 15:37:01.452657: step: 72/531, loss: 0.004182835575193167 2023-01-22 15:37:02.499782: step: 76/531, loss: 0.0049982499331235886 2023-01-22 15:37:03.577682: step: 80/531, loss: 0.044010329991579056 2023-01-22 15:37:04.636091: step: 84/531, loss: 0.004086959175765514 2023-01-22 15:37:05.697402: step: 88/531, loss: 0.002345503307878971 2023-01-22 15:37:06.767300: step: 92/531, loss: 0.0042015486396849155 2023-01-22 15:37:07.824511: step: 96/531, loss: 0.00812936294823885 2023-01-22 15:37:08.882786: step: 100/531, loss: 0.0044873724691569805 2023-01-22 15:37:09.939627: step: 104/531, loss: 0.015516269020736217 2023-01-22 15:37:10.987259: step: 108/531, loss: 0.004173475783318281 2023-01-22 15:37:12.045473: step: 112/531, loss: 0.016641296446323395 2023-01-22 15:37:13.094561: step: 116/531, loss: 0.0057066092267632484 2023-01-22 15:37:14.159473: step: 120/531, loss: 0.007502554450184107 2023-01-22 15:37:15.242996: step: 124/531, loss: 0.0028000548481941223 2023-01-22 15:37:16.295074: step: 128/531, loss: 0.005122318398207426 2023-01-22 15:37:17.350237: step: 132/531, loss: 0.007589933928102255 2023-01-22 15:37:18.406228: step: 136/531, loss: 0.0038941216189414263 2023-01-22 15:37:19.481562: step: 140/531, loss: 0.02303730510175228 2023-01-22 15:37:20.524478: step: 144/531, loss: 0.02759641967713833 2023-01-22 15:37:21.597125: step: 148/531, loss: 0.004355369135737419 2023-01-22 15:37:22.657906: step: 152/531, loss: 0.0025970609858632088 2023-01-22 15:37:23.726447: step: 156/531, loss: 0.014739280566573143 2023-01-22 15:37:24.783144: step: 160/531, loss: 0.0026813806034624577 2023-01-22 15:37:25.853095: step: 164/531, loss: 0.004748336039483547 2023-01-22 15:37:26.911312: step: 168/531, loss: 0.005711060017347336 2023-01-22 15:37:27.982761: step: 172/531, loss: 0.018474796786904335 2023-01-22 15:37:29.036765: step: 176/531, loss: 0.002739003859460354 2023-01-22 15:37:30.114954: step: 180/531, loss: 0.011312433518469334 2023-01-22 15:37:31.179300: step: 184/531, loss: 0.023797253146767616 2023-01-22 15:37:32.233949: step: 188/531, loss: 0.011803213506937027 2023-01-22 15:37:33.292797: step: 192/531, loss: 0.012820323929190636 2023-01-22 15:37:34.347089: step: 196/531, loss: 3.698200453072786e-05 2023-01-22 15:37:35.408016: step: 200/531, loss: 0.003794708289206028 2023-01-22 15:37:36.483237: step: 204/531, loss: 0.03602186590433121 2023-01-22 15:37:37.558935: step: 208/531, loss: 0.0006447615451179445 2023-01-22 15:37:38.611038: step: 212/531, loss: 0.002043553162366152 2023-01-22 15:37:39.669718: step: 216/531, loss: 0.0020245842169970274 2023-01-22 15:37:40.746309: step: 220/531, loss: 0.013785269111394882 2023-01-22 15:37:41.820230: step: 224/531, loss: 0.04330361634492874 2023-01-22 15:37:42.883768: step: 228/531, loss: 0.009757502004504204 2023-01-22 15:37:43.955240: step: 232/531, loss: 0.01938987523317337 2023-01-22 15:37:45.019746: step: 236/531, loss: 0.0051905191503465176 2023-01-22 15:37:46.074844: step: 240/531, loss: 0.017227424308657646 2023-01-22 15:37:47.140983: step: 244/531, loss: 0.004427309613674879 2023-01-22 15:37:48.197238: step: 248/531, loss: 0.007141686510294676 2023-01-22 15:37:49.252804: step: 252/531, loss: 0.0038854489102959633 2023-01-22 15:37:50.316459: step: 256/531, loss: 0.0016804260667413473 2023-01-22 15:37:51.384959: step: 260/531, loss: 0.008154327981173992 2023-01-22 15:37:52.433354: step: 264/531, loss: 0.0 2023-01-22 15:37:53.503120: step: 268/531, loss: 0.0018283461686223745 2023-01-22 15:37:54.562909: step: 272/531, loss: 0.010594193823635578 2023-01-22 15:37:55.629979: step: 276/531, loss: 0.00935339368879795 2023-01-22 15:37:56.676462: step: 280/531, loss: 0.009318103082478046 2023-01-22 15:37:57.746425: step: 284/531, loss: 0.009640650823712349 2023-01-22 15:37:58.800203: step: 288/531, loss: 0.004278747830539942 2023-01-22 15:37:59.877280: step: 292/531, loss: 0.007965181954205036 2023-01-22 15:38:00.936468: step: 296/531, loss: 0.02535293437540531 2023-01-22 15:38:01.988547: step: 300/531, loss: 0.013964627869427204 2023-01-22 15:38:03.050631: step: 304/531, loss: 0.005327991209924221 2023-01-22 15:38:04.112071: step: 308/531, loss: 0.00338742695748806 2023-01-22 15:38:05.164457: step: 312/531, loss: 0.011840930208563805 2023-01-22 15:38:06.225790: step: 316/531, loss: 0.0028024010825902224 2023-01-22 15:38:07.280345: step: 320/531, loss: 0.004474756307899952 2023-01-22 15:38:08.340857: step: 324/531, loss: 0.00753122428432107 2023-01-22 15:38:09.420912: step: 328/531, loss: 0.0027843203861266375 2023-01-22 15:38:10.475938: step: 332/531, loss: 0.006605905015021563 2023-01-22 15:38:11.543040: step: 336/531, loss: 0.00018224156519863755 2023-01-22 15:38:12.607002: step: 340/531, loss: 0.0032902921084314585 2023-01-22 15:38:13.683099: step: 344/531, loss: 0.0013544083340093493 2023-01-22 15:38:14.738760: step: 348/531, loss: 6.108375964686275e-05 2023-01-22 15:38:15.799903: step: 352/531, loss: 0.0034663656260818243 2023-01-22 15:38:16.855248: step: 356/531, loss: 0.0003987389791291207 2023-01-22 15:38:17.904677: step: 360/531, loss: 0.06145963445305824 2023-01-22 15:38:18.971534: step: 364/531, loss: 0.000662085774820298 2023-01-22 15:38:20.036608: step: 368/531, loss: 0.04449881613254547 2023-01-22 15:38:21.109340: step: 372/531, loss: 0.0163415614515543 2023-01-22 15:38:22.175316: step: 376/531, loss: 0.0033831375185400248 2023-01-22 15:38:23.239972: step: 380/531, loss: 0.0019204376731067896 2023-01-22 15:38:24.304239: step: 384/531, loss: 0.028147753328084946 2023-01-22 15:38:25.362725: step: 388/531, loss: 0.016038352623581886 2023-01-22 15:38:26.416574: step: 392/531, loss: 0.005063127260655165 2023-01-22 15:38:27.478549: step: 396/531, loss: 0.004314687568694353 2023-01-22 15:38:28.549340: step: 400/531, loss: 0.007408762816339731 2023-01-22 15:38:29.628767: step: 404/531, loss: 0.013506321236491203 2023-01-22 15:38:30.684469: step: 408/531, loss: 0.0043339235708117485 2023-01-22 15:38:31.743550: step: 412/531, loss: 0.00619980925694108 2023-01-22 15:38:32.816661: step: 416/531, loss: 0.014608023688197136 2023-01-22 15:38:33.880310: step: 420/531, loss: 0.002072980161756277 2023-01-22 15:38:34.952679: step: 424/531, loss: 0.010320314206182957 2023-01-22 15:38:36.037723: step: 428/531, loss: 0.011293102987110615 2023-01-22 15:38:37.110658: step: 432/531, loss: 0.007146671414375305 2023-01-22 15:38:38.183352: step: 436/531, loss: 0.004076071549206972 2023-01-22 15:38:39.246696: step: 440/531, loss: 0.01607760228216648 2023-01-22 15:38:40.296832: step: 444/531, loss: 0.0052183824591338634 2023-01-22 15:38:41.353589: step: 448/531, loss: 0.015845775604248047 2023-01-22 15:38:42.426482: step: 452/531, loss: 0.004702863749116659 2023-01-22 15:38:43.486654: step: 456/531, loss: 0.009268103167414665 2023-01-22 15:38:44.547031: step: 460/531, loss: 0.0185063686221838 2023-01-22 15:38:45.620149: step: 464/531, loss: 0.0049374946393072605 2023-01-22 15:38:46.689649: step: 468/531, loss: 0.008612715639173985 2023-01-22 15:38:47.743046: step: 472/531, loss: 0.002266595372930169 2023-01-22 15:38:48.803436: step: 476/531, loss: 0.006317255087196827 2023-01-22 15:38:49.862622: step: 480/531, loss: 0.029082240536808968 2023-01-22 15:38:50.920651: step: 484/531, loss: 0.013400383293628693 2023-01-22 15:38:52.014200: step: 488/531, loss: 0.003226094413548708 2023-01-22 15:38:53.071545: step: 492/531, loss: 0.001526180887594819 2023-01-22 15:38:54.161311: step: 496/531, loss: 0.003652245504781604 2023-01-22 15:38:55.226485: step: 500/531, loss: 0.010590340942144394 2023-01-22 15:38:56.290839: step: 504/531, loss: 0.014829112216830254 2023-01-22 15:38:57.363838: step: 508/531, loss: 0.0018887472106143832 2023-01-22 15:38:58.419971: step: 512/531, loss: 0.0031903600320219994 2023-01-22 15:38:59.473910: step: 516/531, loss: 0.0077584185637533665 2023-01-22 15:39:00.531031: step: 520/531, loss: 0.010188636370003223 2023-01-22 15:39:01.606365: step: 524/531, loss: 0.019377458840608597 2023-01-22 15:39:02.657270: step: 528/531, loss: 0.0036466431338340044 2023-01-22 15:39:03.708888: step: 532/531, loss: 0.004038535058498383 2023-01-22 15:39:04.764859: step: 536/531, loss: 0.010000564157962799 2023-01-22 15:39:05.829429: step: 540/531, loss: 0.01448139175772667 2023-01-22 15:39:06.898078: step: 544/531, loss: 0.004286718089133501 2023-01-22 15:39:07.950061: step: 548/531, loss: 0.013287190347909927 2023-01-22 15:39:09.012864: step: 552/531, loss: 0.0548272468149662 2023-01-22 15:39:10.076576: step: 556/531, loss: 0.03567224740982056 2023-01-22 15:39:11.143565: step: 560/531, loss: 0.007338162511587143 2023-01-22 15:39:12.216247: step: 564/531, loss: 0.0010257135145366192 2023-01-22 15:39:13.278510: step: 568/531, loss: 0.0068540871143341064 2023-01-22 15:39:14.340486: step: 572/531, loss: 0.004826688207685947 2023-01-22 15:39:15.394090: step: 576/531, loss: 0.0063118766993284225 2023-01-22 15:39:16.479202: step: 580/531, loss: 0.04523809626698494 2023-01-22 15:39:17.549966: step: 584/531, loss: 0.008123817853629589 2023-01-22 15:39:18.622220: step: 588/531, loss: 0.004273754078894854 2023-01-22 15:39:19.717326: step: 592/531, loss: 0.040251363068819046 2023-01-22 15:39:20.779715: step: 596/531, loss: 0.009955678135156631 2023-01-22 15:39:21.839733: step: 600/531, loss: 0.004804521799087524 2023-01-22 15:39:22.907100: step: 604/531, loss: 0.028351513668894768 2023-01-22 15:39:23.956108: step: 608/531, loss: 0.15413495898246765 2023-01-22 15:39:25.029228: step: 612/531, loss: 0.006048975978046656 2023-01-22 15:39:26.080826: step: 616/531, loss: 0.021297432482242584 2023-01-22 15:39:27.131099: step: 620/531, loss: 0.011791897006332874 2023-01-22 15:39:28.196644: step: 624/531, loss: 0.016374753788113594 2023-01-22 15:39:29.253799: step: 628/531, loss: 0.0036152920220047235 2023-01-22 15:39:30.308977: step: 632/531, loss: 0.002369828289374709 2023-01-22 15:39:31.363823: step: 636/531, loss: 0.0028354148380458355 2023-01-22 15:39:32.413088: step: 640/531, loss: 0.0034248242154717445 2023-01-22 15:39:33.477174: step: 644/531, loss: 0.014805841259658337 2023-01-22 15:39:34.531608: step: 648/531, loss: 0.00849098339676857 2023-01-22 15:39:35.594246: step: 652/531, loss: 0.008847140707075596 2023-01-22 15:39:36.671505: step: 656/531, loss: 0.008732152171432972 2023-01-22 15:39:37.725577: step: 660/531, loss: 0.008818008005619049 2023-01-22 15:39:38.788704: step: 664/531, loss: 0.00877432618290186 2023-01-22 15:39:39.838259: step: 668/531, loss: 0.013980763033032417 2023-01-22 15:39:40.898162: step: 672/531, loss: 0.011354943737387657 2023-01-22 15:39:41.958137: step: 676/531, loss: 0.0053780353628098965 2023-01-22 15:39:43.027274: step: 680/531, loss: 0.0056334990076720715 2023-01-22 15:39:44.090740: step: 684/531, loss: 0.0019074814626947045 2023-01-22 15:39:45.149694: step: 688/531, loss: 0.0024077247362583876 2023-01-22 15:39:46.204601: step: 692/531, loss: 0.0013195687206462026 2023-01-22 15:39:47.264407: step: 696/531, loss: 0.025060025975108147 2023-01-22 15:39:48.309188: step: 700/531, loss: 0.009929392486810684 2023-01-22 15:39:49.385265: step: 704/531, loss: 0.00851244106888771 2023-01-22 15:39:50.447227: step: 708/531, loss: 0.01609143242239952 2023-01-22 15:39:51.518831: step: 712/531, loss: 0.02244430035352707 2023-01-22 15:39:52.574773: step: 716/531, loss: 0.007732793223112822 2023-01-22 15:39:53.637369: step: 720/531, loss: 0.00012667883129324764 2023-01-22 15:39:54.698734: step: 724/531, loss: 0.028393354266881943 2023-01-22 15:39:55.780478: step: 728/531, loss: 0.003548459615558386 2023-01-22 15:39:56.835248: step: 732/531, loss: 0.01752273552119732 2023-01-22 15:39:57.899955: step: 736/531, loss: 0.003290896536782384 2023-01-22 15:39:58.960054: step: 740/531, loss: 0.024473173543810844 2023-01-22 15:40:00.010845: step: 744/531, loss: 0.0007964319665916264 2023-01-22 15:40:01.089196: step: 748/531, loss: 0.004943354520946741 2023-01-22 15:40:02.147301: step: 752/531, loss: 0.027291763573884964 2023-01-22 15:40:03.224219: step: 756/531, loss: 0.03941899538040161 2023-01-22 15:40:04.285502: step: 760/531, loss: 0.00375573942437768 2023-01-22 15:40:05.359832: step: 764/531, loss: 0.008461283519864082 2023-01-22 15:40:06.428323: step: 768/531, loss: 0.0088475551456213 2023-01-22 15:40:07.482829: step: 772/531, loss: 0.0025104815140366554 2023-01-22 15:40:08.550447: step: 776/531, loss: 0.004422907251864672 2023-01-22 15:40:09.618494: step: 780/531, loss: 0.009042094461619854 2023-01-22 15:40:10.677951: step: 784/531, loss: 0.00742373988032341 2023-01-22 15:40:11.740425: step: 788/531, loss: 0.003960083704441786 2023-01-22 15:40:12.817656: step: 792/531, loss: 0.005032602231949568 2023-01-22 15:40:13.877416: step: 796/531, loss: 0.005871370434761047 2023-01-22 15:40:14.939162: step: 800/531, loss: 0.006120866630226374 2023-01-22 15:40:16.000813: step: 804/531, loss: 0.022875437512993813 2023-01-22 15:40:17.042514: step: 808/531, loss: 0.00894143432378769 2023-01-22 15:40:18.108849: step: 812/531, loss: 0.02909945510327816 2023-01-22 15:40:19.158108: step: 816/531, loss: 0.007247535511851311 2023-01-22 15:40:20.220275: step: 820/531, loss: 0.07022277265787125 2023-01-22 15:40:21.295590: step: 824/531, loss: 0.011695099994540215 2023-01-22 15:40:22.357017: step: 828/531, loss: 0.006673064548522234 2023-01-22 15:40:23.424612: step: 832/531, loss: 0.0058763278648257256 2023-01-22 15:40:24.470417: step: 836/531, loss: 0.004524358082562685 2023-01-22 15:40:25.515356: step: 840/531, loss: 0.01430712454020977 2023-01-22 15:40:26.568771: step: 844/531, loss: 0.004708596970885992 2023-01-22 15:40:27.630216: step: 848/531, loss: 0.001690601697191596 2023-01-22 15:40:28.684602: step: 852/531, loss: 0.015059088356792927 2023-01-22 15:40:29.747781: step: 856/531, loss: 0.006663259584456682 2023-01-22 15:40:30.805545: step: 860/531, loss: 0.003261934733018279 2023-01-22 15:40:31.851308: step: 864/531, loss: 0.009079797193408012 2023-01-22 15:40:32.918670: step: 868/531, loss: 0.008918702602386475 2023-01-22 15:40:33.971506: step: 872/531, loss: 0.0006527914083562791 2023-01-22 15:40:35.009032: step: 876/531, loss: 0.0022630339954048395 2023-01-22 15:40:36.082643: step: 880/531, loss: 0.03832429647445679 2023-01-22 15:40:37.158485: step: 884/531, loss: 0.012818587943911552 2023-01-22 15:40:38.210620: step: 888/531, loss: 0.011210795491933823 2023-01-22 15:40:39.269085: step: 892/531, loss: 0.0023321902845054865 2023-01-22 15:40:40.322511: step: 896/531, loss: 0.0075229620561003685 2023-01-22 15:40:41.383533: step: 900/531, loss: 0.016809718683362007 2023-01-22 15:40:42.444002: step: 904/531, loss: 0.008349324576556683 2023-01-22 15:40:43.505809: step: 908/531, loss: 0.008199167437851429 2023-01-22 15:40:44.558952: step: 912/531, loss: 0.003793461015447974 2023-01-22 15:40:45.616771: step: 916/531, loss: 0.04661436751484871 2023-01-22 15:40:46.662874: step: 920/531, loss: 0.006356748752295971 2023-01-22 15:40:47.754800: step: 924/531, loss: 0.007706102915108204 2023-01-22 15:40:48.814655: step: 928/531, loss: 0.008557467721402645 2023-01-22 15:40:49.881878: step: 932/531, loss: 0.009810470044612885 2023-01-22 15:40:50.947678: step: 936/531, loss: 0.005701807793229818 2023-01-22 15:40:51.999862: step: 940/531, loss: 0.0023188721388578415 2023-01-22 15:40:53.062055: step: 944/531, loss: 0.016089465469121933 2023-01-22 15:40:54.104077: step: 948/531, loss: 0.008515138179063797 2023-01-22 15:40:55.171482: step: 952/531, loss: 0.01171775534749031 2023-01-22 15:40:56.226481: step: 956/531, loss: 0.020042473450303078 2023-01-22 15:40:57.272901: step: 960/531, loss: 0.009194825775921345 2023-01-22 15:40:58.310404: step: 964/531, loss: 0.02214464731514454 2023-01-22 15:40:59.361547: step: 968/531, loss: 0.0253550224006176 2023-01-22 15:41:00.421421: step: 972/531, loss: 0.027331119403243065 2023-01-22 15:41:01.472005: step: 976/531, loss: 0.0048026093281805515 2023-01-22 15:41:02.525262: step: 980/531, loss: 0.012424202635884285 2023-01-22 15:41:03.593133: step: 984/531, loss: 0.006939048878848553 2023-01-22 15:41:04.660890: step: 988/531, loss: 0.0032061359379440546 2023-01-22 15:41:05.709788: step: 992/531, loss: 0.006910219322890043 2023-01-22 15:41:06.764093: step: 996/531, loss: 0.0018666409887373447 2023-01-22 15:41:07.830596: step: 1000/531, loss: 0.0029800981283187866 2023-01-22 15:41:08.891888: step: 1004/531, loss: 0.0013396149734035134 2023-01-22 15:41:09.944279: step: 1008/531, loss: 0.003700954606756568 2023-01-22 15:41:11.010563: step: 1012/531, loss: 0.0013811460230499506 2023-01-22 15:41:12.093478: step: 1016/531, loss: 0.0015336914220824838 2023-01-22 15:41:13.159028: step: 1020/531, loss: 0.0021230322308838367 2023-01-22 15:41:14.248277: step: 1024/531, loss: 0.012486407533288002 2023-01-22 15:41:15.311792: step: 1028/531, loss: 0.015820514410734177 2023-01-22 15:41:16.365719: step: 1032/531, loss: 0.017405198886990547 2023-01-22 15:41:17.423073: step: 1036/531, loss: 0.0032268869690597057 2023-01-22 15:41:18.491624: step: 1040/531, loss: 0.00020192879310343415 2023-01-22 15:41:19.577972: step: 1044/531, loss: 0.003607572056353092 2023-01-22 15:41:20.642688: step: 1048/531, loss: 0.0029978856910020113 2023-01-22 15:41:21.699084: step: 1052/531, loss: 0.0069878557696938515 2023-01-22 15:41:22.761406: step: 1056/531, loss: 0.004222525283694267 2023-01-22 15:41:23.825397: step: 1060/531, loss: 0.006350035313516855 2023-01-22 15:41:24.884605: step: 1064/531, loss: 0.036838605999946594 2023-01-22 15:41:25.924887: step: 1068/531, loss: 0.0007744743488729 2023-01-22 15:41:26.981629: step: 1072/531, loss: 0.004105206113308668 2023-01-22 15:41:28.031278: step: 1076/531, loss: 0.0029686882626265287 2023-01-22 15:41:29.097052: step: 1080/531, loss: 0.005077075678855181 2023-01-22 15:41:30.155223: step: 1084/531, loss: 0.011331991292536259 2023-01-22 15:41:31.210883: step: 1088/531, loss: 0.003255445510149002 2023-01-22 15:41:32.270087: step: 1092/531, loss: 0.002899549901485443 2023-01-22 15:41:33.328195: step: 1096/531, loss: 0.0 2023-01-22 15:41:34.397121: step: 1100/531, loss: 0.036151282489299774 2023-01-22 15:41:35.449236: step: 1104/531, loss: 0.018611082807183266 2023-01-22 15:41:36.506397: step: 1108/531, loss: 0.0024892115034163 2023-01-22 15:41:37.562026: step: 1112/531, loss: 0.00424707867205143 2023-01-22 15:41:38.607943: step: 1116/531, loss: 0.0056391311809420586 2023-01-22 15:41:39.659356: step: 1120/531, loss: 0.005849056877195835 2023-01-22 15:41:40.733237: step: 1124/531, loss: 0.012019841000437737 2023-01-22 15:41:41.795441: step: 1128/531, loss: 0.004478851333260536 2023-01-22 15:41:42.850879: step: 1132/531, loss: 0.011968092992901802 2023-01-22 15:41:43.901276: step: 1136/531, loss: 0.007573192939162254 2023-01-22 15:41:44.955185: step: 1140/531, loss: 0.007035081274807453 2023-01-22 15:41:46.026193: step: 1144/531, loss: 0.0040196990594267845 2023-01-22 15:41:47.083128: step: 1148/531, loss: 0.004365410190075636 2023-01-22 15:41:48.147826: step: 1152/531, loss: 0.007847755216062069 2023-01-22 15:41:49.190942: step: 1156/531, loss: 0.009195538237690926 2023-01-22 15:41:50.255277: step: 1160/531, loss: 0.008788717910647392 2023-01-22 15:41:51.316661: step: 1164/531, loss: 0.03694355860352516 2023-01-22 15:41:52.361820: step: 1168/531, loss: 0.0006039888830855489 2023-01-22 15:41:53.430242: step: 1172/531, loss: 0.0034986878745257854 2023-01-22 15:41:54.481255: step: 1176/531, loss: 0.009941834956407547 2023-01-22 15:41:55.518275: step: 1180/531, loss: 0.006046846974641085 2023-01-22 15:41:56.593370: step: 1184/531, loss: 0.00839924719184637 2023-01-22 15:41:57.646340: step: 1188/531, loss: 0.0026115509681403637 2023-01-22 15:41:58.710468: step: 1192/531, loss: 0.030738770961761475 2023-01-22 15:41:59.762342: step: 1196/531, loss: 0.004127085208892822 2023-01-22 15:42:00.824545: step: 1200/531, loss: 0.0032245481852442026 2023-01-22 15:42:01.880289: step: 1204/531, loss: 0.0173778235912323 2023-01-22 15:42:02.947069: step: 1208/531, loss: 0.0036086291074752808 2023-01-22 15:42:04.017171: step: 1212/531, loss: 0.011766073293983936 2023-01-22 15:42:05.100259: step: 1216/531, loss: 0.0042497157119214535 2023-01-22 15:42:06.158819: step: 1220/531, loss: 0.007871869951486588 2023-01-22 15:42:07.211334: step: 1224/531, loss: 0.005298707168549299 2023-01-22 15:42:08.264652: step: 1228/531, loss: 0.0012293810723349452 2023-01-22 15:42:09.317426: step: 1232/531, loss: 0.014326884411275387 2023-01-22 15:42:10.360818: step: 1236/531, loss: 0.0021404060535132885 2023-01-22 15:42:11.420914: step: 1240/531, loss: 0.0051089320331811905 2023-01-22 15:42:12.481930: step: 1244/531, loss: 0.003313001012429595 2023-01-22 15:42:13.568137: step: 1248/531, loss: 0.005388742778450251 2023-01-22 15:42:14.633049: step: 1252/531, loss: 0.020495640113949776 2023-01-22 15:42:15.696093: step: 1256/531, loss: 0.005792425014078617 2023-01-22 15:42:16.763135: step: 1260/531, loss: 0.004559899214655161 2023-01-22 15:42:17.828217: step: 1264/531, loss: 0.007380574010312557 2023-01-22 15:42:18.886980: step: 1268/531, loss: 0.00351132033392787 2023-01-22 15:42:19.955585: step: 1272/531, loss: 0.009252430871129036 2023-01-22 15:42:21.019805: step: 1276/531, loss: 0.0005795079632662237 2023-01-22 15:42:22.076372: step: 1280/531, loss: 0.0027042715810239315 2023-01-22 15:42:23.137563: step: 1284/531, loss: 0.005632023327052593 2023-01-22 15:42:24.189093: step: 1288/531, loss: 0.008819757960736752 2023-01-22 15:42:25.251897: step: 1292/531, loss: 0.0069810510613024235 2023-01-22 15:42:26.319322: step: 1296/531, loss: 0.011823880486190319 2023-01-22 15:42:27.370592: step: 1300/531, loss: 0.003263703780248761 2023-01-22 15:42:28.427414: step: 1304/531, loss: 0.002039700048044324 2023-01-22 15:42:29.490391: step: 1308/531, loss: 0.004265934694558382 2023-01-22 15:42:30.541493: step: 1312/531, loss: 0.015694238245487213 2023-01-22 15:42:31.592027: step: 1316/531, loss: 0.00418870011344552 2023-01-22 15:42:32.645564: step: 1320/531, loss: 0.00011992788495263085 2023-01-22 15:42:33.738982: step: 1324/531, loss: 0.007078006863594055 2023-01-22 15:42:34.799687: step: 1328/531, loss: 0.05498579889535904 2023-01-22 15:42:35.845537: step: 1332/531, loss: 0.015058860182762146 2023-01-22 15:42:36.901322: step: 1336/531, loss: 0.029800251126289368 2023-01-22 15:42:37.961223: step: 1340/531, loss: 0.010312286205589771 2023-01-22 15:42:39.020072: step: 1344/531, loss: 0.004535887856036425 2023-01-22 15:42:40.080660: step: 1348/531, loss: 0.025628194212913513 2023-01-22 15:42:41.139235: step: 1352/531, loss: 0.01830103062093258 2023-01-22 15:42:42.200627: step: 1356/531, loss: 0.02631601318717003 2023-01-22 15:42:43.280254: step: 1360/531, loss: 0.004949101246893406 2023-01-22 15:42:44.345117: step: 1364/531, loss: 0.02413923293352127 2023-01-22 15:42:45.403633: step: 1368/531, loss: 0.0020132192876189947 2023-01-22 15:42:46.474201: step: 1372/531, loss: 0.004399430006742477 2023-01-22 15:42:47.525974: step: 1376/531, loss: 0.004157388582825661 2023-01-22 15:42:48.591805: step: 1380/531, loss: 0.013954339548945427 2023-01-22 15:42:49.654849: step: 1384/531, loss: 0.020859219133853912 2023-01-22 15:42:50.713314: step: 1388/531, loss: 0.007197873666882515 2023-01-22 15:42:51.770121: step: 1392/531, loss: 0.00767285143956542 2023-01-22 15:42:52.837560: step: 1396/531, loss: 0.000982769881375134 2023-01-22 15:42:53.897056: step: 1400/531, loss: 0.015042808838188648 2023-01-22 15:42:54.956257: step: 1404/531, loss: 0.006504209712147713 2023-01-22 15:42:55.996338: step: 1408/531, loss: 0.0017136555397883058 2023-01-22 15:42:57.060178: step: 1412/531, loss: 0.010274010710418224 2023-01-22 15:42:58.114297: step: 1416/531, loss: 0.003196256933733821 2023-01-22 15:42:59.183809: step: 1420/531, loss: 0.017529016360640526 2023-01-22 15:43:00.228976: step: 1424/531, loss: 0.004103458486497402 2023-01-22 15:43:01.294404: step: 1428/531, loss: 0.006388582289218903 2023-01-22 15:43:02.350762: step: 1432/531, loss: 0.014448893256485462 2023-01-22 15:43:03.402886: step: 1436/531, loss: 0.009344315156340599 2023-01-22 15:43:04.474174: step: 1440/531, loss: 0.014278499409556389 2023-01-22 15:43:05.532278: step: 1444/531, loss: 0.0011535405647009611 2023-01-22 15:43:06.593003: step: 1448/531, loss: 0.0016149998409673572 2023-01-22 15:43:07.660939: step: 1452/531, loss: 0.011326124891638756 2023-01-22 15:43:08.722008: step: 1456/531, loss: 0.0021855556406080723 2023-01-22 15:43:09.780474: step: 1460/531, loss: 0.018211137503385544 2023-01-22 15:43:10.858441: step: 1464/531, loss: 0.005283666774630547 2023-01-22 15:43:11.934756: step: 1468/531, loss: 0.026966135948896408 2023-01-22 15:43:12.989100: step: 1472/531, loss: 0.003373767016455531 2023-01-22 15:43:14.050022: step: 1476/531, loss: 0.00674789072945714 2023-01-22 15:43:15.116970: step: 1480/531, loss: 0.040469296276569366 2023-01-22 15:43:16.182312: step: 1484/531, loss: 0.03187422454357147 2023-01-22 15:43:17.236005: step: 1488/531, loss: 0.00416473438963294 2023-01-22 15:43:18.300325: step: 1492/531, loss: 0.012027925811707973 2023-01-22 15:43:19.356050: step: 1496/531, loss: 0.030837608501315117 2023-01-22 15:43:20.405692: step: 1500/531, loss: 0.007405830081552267 2023-01-22 15:43:21.439976: step: 1504/531, loss: 0.004102893639355898 2023-01-22 15:43:22.533509: step: 1508/531, loss: 0.021001551300287247 2023-01-22 15:43:23.598463: step: 1512/531, loss: 0.015309328213334084 2023-01-22 15:43:24.667938: step: 1516/531, loss: 0.03283904865384102 2023-01-22 15:43:25.720123: step: 1520/531, loss: 0.019317546859383583 2023-01-22 15:43:26.766160: step: 1524/531, loss: 0.008958989754319191 2023-01-22 15:43:27.826826: step: 1528/531, loss: 0.0031436244025826454 2023-01-22 15:43:28.880496: step: 1532/531, loss: 0.0053840335458517075 2023-01-22 15:43:29.933713: step: 1536/531, loss: 0.010995203629136086 2023-01-22 15:43:31.005357: step: 1540/531, loss: 0.0065671843476593494 2023-01-22 15:43:32.066697: step: 1544/531, loss: 0.002719355747103691 2023-01-22 15:43:33.118768: step: 1548/531, loss: 0.01125891599804163 2023-01-22 15:43:34.171572: step: 1552/531, loss: 0.016793832182884216 2023-01-22 15:43:35.225814: step: 1556/531, loss: 0.004078240599483252 2023-01-22 15:43:36.295171: step: 1560/531, loss: 0.0009361952543258667 2023-01-22 15:43:37.353456: step: 1564/531, loss: 0.0048470571637153625 2023-01-22 15:43:38.403124: step: 1568/531, loss: 0.0046816421672701836 2023-01-22 15:43:39.483244: step: 1572/531, loss: 0.004210076294839382 2023-01-22 15:43:40.544611: step: 1576/531, loss: 0.002407207153737545 2023-01-22 15:43:41.619115: step: 1580/531, loss: 0.004963562358170748 2023-01-22 15:43:42.681870: step: 1584/531, loss: 0.015713829547166824 2023-01-22 15:43:43.749445: step: 1588/531, loss: 0.007654715795069933 2023-01-22 15:43:44.804802: step: 1592/531, loss: 0.005019447300583124 2023-01-22 15:43:45.850847: step: 1596/531, loss: 0.003619999159127474 2023-01-22 15:43:46.916767: step: 1600/531, loss: 0.0058888718485832214 2023-01-22 15:43:47.977439: step: 1604/531, loss: 0.006739064119756222 2023-01-22 15:43:49.033409: step: 1608/531, loss: 0.01315231528133154 2023-01-22 15:43:50.095805: step: 1612/531, loss: 0.004951538518071175 2023-01-22 15:43:51.152168: step: 1616/531, loss: 0.0008648558869026601 2023-01-22 15:43:52.206547: step: 1620/531, loss: 0.01770302839577198 2023-01-22 15:43:53.259517: step: 1624/531, loss: 0.0030953509267419577 2023-01-22 15:43:54.339801: step: 1628/531, loss: 0.01373161282390356 2023-01-22 15:43:55.402180: step: 1632/531, loss: 0.01755007915198803 2023-01-22 15:43:56.476208: step: 1636/531, loss: 0.010792574845254421 2023-01-22 15:43:57.533754: step: 1640/531, loss: 0.0038749745581299067 2023-01-22 15:43:58.594700: step: 1644/531, loss: 0.006427703890949488 2023-01-22 15:43:59.656706: step: 1648/531, loss: 0.005678446032106876 2023-01-22 15:44:00.741089: step: 1652/531, loss: 0.009267380461096764 2023-01-22 15:44:01.801061: step: 1656/531, loss: 0.006418990902602673 2023-01-22 15:44:02.858301: step: 1660/531, loss: 0.006146763917058706 2023-01-22 15:44:03.917963: step: 1664/531, loss: 0.00021611290867440403 2023-01-22 15:44:04.976102: step: 1668/531, loss: 0.013222035951912403 2023-01-22 15:44:06.044460: step: 1672/531, loss: 0.026508115231990814 2023-01-22 15:44:07.108005: step: 1676/531, loss: 0.001554515096358955 2023-01-22 15:44:08.184417: step: 1680/531, loss: 0.006360173225402832 2023-01-22 15:44:09.250057: step: 1684/531, loss: 0.0021642367355525494 2023-01-22 15:44:10.316074: step: 1688/531, loss: 0.0018947167554870248 2023-01-22 15:44:11.381978: step: 1692/531, loss: 0.002895057899877429 2023-01-22 15:44:12.437204: step: 1696/531, loss: 0.0023906559217721224 2023-01-22 15:44:13.507527: step: 1700/531, loss: 0.004442200995981693 2023-01-22 15:44:14.559978: step: 1704/531, loss: 0.0034768462646752596 2023-01-22 15:44:15.619891: step: 1708/531, loss: 0.0036459180992096663 2023-01-22 15:44:16.681055: step: 1712/531, loss: 0.003067202167585492 2023-01-22 15:44:17.739212: step: 1716/531, loss: 0.00449244724586606 2023-01-22 15:44:18.792986: step: 1720/531, loss: 0.019509945064783096 2023-01-22 15:44:19.847545: step: 1724/531, loss: 0.0059991031885147095 2023-01-22 15:44:20.907934: step: 1728/531, loss: 0.007486745715141296 2023-01-22 15:44:21.945786: step: 1732/531, loss: 0.005369197111576796 2023-01-22 15:44:23.022841: step: 1736/531, loss: 0.003810570575296879 2023-01-22 15:44:24.071877: step: 1740/531, loss: 0.004245352931320667 2023-01-22 15:44:25.133029: step: 1744/531, loss: 0.009846829809248447 2023-01-22 15:44:26.211216: step: 1748/531, loss: 0.0033084116876125336 2023-01-22 15:44:27.276530: step: 1752/531, loss: 0.0009926391066983342 2023-01-22 15:44:28.352434: step: 1756/531, loss: 0.0061272066086530685 2023-01-22 15:44:29.421466: step: 1760/531, loss: 0.0005276908632367849 2023-01-22 15:44:30.485382: step: 1764/531, loss: 0.004491967614740133 2023-01-22 15:44:31.528948: step: 1768/531, loss: 0.00498414458706975 2023-01-22 15:44:32.590462: step: 1772/531, loss: 0.005609001498669386 2023-01-22 15:44:33.644898: step: 1776/531, loss: 0.005922937300056219 2023-01-22 15:44:34.717301: step: 1780/531, loss: 0.010900807566940784 2023-01-22 15:44:35.772306: step: 1784/531, loss: 0.03198820725083351 2023-01-22 15:44:36.827443: step: 1788/531, loss: 0.014963364228606224 2023-01-22 15:44:37.888944: step: 1792/531, loss: 0.009051097556948662 2023-01-22 15:44:38.964649: step: 1796/531, loss: 0.09195971488952637 2023-01-22 15:44:40.028563: step: 1800/531, loss: 0.011376633308827877 2023-01-22 15:44:41.078602: step: 1804/531, loss: 0.029947912320494652 2023-01-22 15:44:42.137364: step: 1808/531, loss: 0.022808456793427467 2023-01-22 15:44:43.203466: step: 1812/531, loss: 0.0034042480401694775 2023-01-22 15:44:44.262079: step: 1816/531, loss: 0.003545522689819336 2023-01-22 15:44:45.312924: step: 1820/531, loss: 0.01330691296607256 2023-01-22 15:44:46.357945: step: 1824/531, loss: 0.004876063670963049 2023-01-22 15:44:47.409549: step: 1828/531, loss: 0.0007383336196653545 2023-01-22 15:44:48.463575: step: 1832/531, loss: 0.06327845901250839 2023-01-22 15:44:49.525103: step: 1836/531, loss: 0.10722129791975021 2023-01-22 15:44:50.584922: step: 1840/531, loss: 0.038653433322906494 2023-01-22 15:44:51.634867: step: 1844/531, loss: 0.007030686363577843 2023-01-22 15:44:52.699627: step: 1848/531, loss: 0.008413450792431831 2023-01-22 15:44:53.770575: step: 1852/531, loss: 0.019486885517835617 2023-01-22 15:44:54.815597: step: 1856/531, loss: 0.013278994709253311 2023-01-22 15:44:55.856307: step: 1860/531, loss: 0.01258185226470232 2023-01-22 15:44:56.917538: step: 1864/531, loss: 0.0066006239503622055 2023-01-22 15:44:57.979282: step: 1868/531, loss: 0.0067676883190870285 2023-01-22 15:44:59.023597: step: 1872/531, loss: 0.00558567326515913 2023-01-22 15:45:00.091871: step: 1876/531, loss: 0.032826460897922516 2023-01-22 15:45:01.164108: step: 1880/531, loss: 0.007169494871050119 2023-01-22 15:45:02.235073: step: 1884/531, loss: 0.005921301431953907 2023-01-22 15:45:03.310652: step: 1888/531, loss: 0.0006818806868977845 2023-01-22 15:45:04.382894: step: 1892/531, loss: 0.00084651232464239 2023-01-22 15:45:05.439166: step: 1896/531, loss: 0.03537513688206673 2023-01-22 15:45:06.496472: step: 1900/531, loss: 0.007354011293500662 2023-01-22 15:45:07.544670: step: 1904/531, loss: 0.013846050947904587 2023-01-22 15:45:08.615729: step: 1908/531, loss: 0.0018523391336202621 2023-01-22 15:45:09.685674: step: 1912/531, loss: 0.01390829123556614 2023-01-22 15:45:10.772798: step: 1916/531, loss: 0.01004581619054079 2023-01-22 15:45:11.856356: step: 1920/531, loss: 0.0026891790330410004 2023-01-22 15:45:12.927061: step: 1924/531, loss: 0.023877356201410294 2023-01-22 15:45:13.976476: step: 1928/531, loss: 0.017211874946951866 2023-01-22 15:45:15.039107: step: 1932/531, loss: 0.007620025891810656 2023-01-22 15:45:16.132790: step: 1936/531, loss: 0.005514797288924456 2023-01-22 15:45:17.196219: step: 1940/531, loss: 0.0031915786676108837 2023-01-22 15:45:18.259259: step: 1944/531, loss: 0.0018358406377956271 2023-01-22 15:45:19.315898: step: 1948/531, loss: 0.006972788833081722 2023-01-22 15:45:20.371388: step: 1952/531, loss: 0.00932492595165968 2023-01-22 15:45:21.434274: step: 1956/531, loss: 0.002349860966205597 2023-01-22 15:45:22.478144: step: 1960/531, loss: 0.02525022253394127 2023-01-22 15:45:23.546333: step: 1964/531, loss: 0.029251618310809135 2023-01-22 15:45:24.586274: step: 1968/531, loss: 0.006897643208503723 2023-01-22 15:45:25.644065: step: 1972/531, loss: 0.023039130493998528 2023-01-22 15:45:26.711569: step: 1976/531, loss: 0.008329298347234726 2023-01-22 15:45:27.782467: step: 1980/531, loss: 0.004423499573022127 2023-01-22 15:45:28.839067: step: 1984/531, loss: 0.007672346197068691 2023-01-22 15:45:29.909645: step: 1988/531, loss: 0.007056684233248234 2023-01-22 15:45:30.966022: step: 1992/531, loss: 0.015146947465837002 2023-01-22 15:45:32.015051: step: 1996/531, loss: 0.0074618845246732235 2023-01-22 15:45:33.063446: step: 2000/531, loss: 0.007594224065542221 2023-01-22 15:45:34.112120: step: 2004/531, loss: 0.014378862455487251 2023-01-22 15:45:35.181525: step: 2008/531, loss: 0.01346305850893259 2023-01-22 15:45:36.236514: step: 2012/531, loss: 0.0124990688636899 2023-01-22 15:45:37.284809: step: 2016/531, loss: 0.002079632366076112 2023-01-22 15:45:38.341900: step: 2020/531, loss: 0.003100641770288348 2023-01-22 15:45:39.415191: step: 2024/531, loss: 0.0057336376048624516 2023-01-22 15:45:40.466327: step: 2028/531, loss: 0.0375279076397419 2023-01-22 15:45:41.557597: step: 2032/531, loss: 0.04547116532921791 2023-01-22 15:45:42.612887: step: 2036/531, loss: 0.0145768653601408 2023-01-22 15:45:43.675848: step: 2040/531, loss: 0.0009005660540424287 2023-01-22 15:45:44.728402: step: 2044/531, loss: 0.004415468312799931 2023-01-22 15:45:45.775729: step: 2048/531, loss: 0.0009496543207205832 2023-01-22 15:45:46.815753: step: 2052/531, loss: 0.0001494246971560642 2023-01-22 15:45:47.890319: step: 2056/531, loss: 0.008947949856519699 2023-01-22 15:45:48.961128: step: 2060/531, loss: 0.005062913987785578 2023-01-22 15:45:50.031425: step: 2064/531, loss: 0.011062598787248135 2023-01-22 15:45:51.081310: step: 2068/531, loss: 0.0024848771281540394 2023-01-22 15:45:52.157286: step: 2072/531, loss: 0.002251523081213236 2023-01-22 15:45:53.214924: step: 2076/531, loss: 0.010579647496342659 2023-01-22 15:45:54.276179: step: 2080/531, loss: 0.009761269204318523 2023-01-22 15:45:55.329962: step: 2084/531, loss: 0.008730175904929638 2023-01-22 15:45:56.387349: step: 2088/531, loss: 0.004113091621547937 2023-01-22 15:45:57.453042: step: 2092/531, loss: 0.013936447910964489 2023-01-22 15:45:58.502712: step: 2096/531, loss: 0.0006778360111638904 2023-01-22 15:45:59.555828: step: 2100/531, loss: 0.0076556517742574215 2023-01-22 15:46:00.617506: step: 2104/531, loss: 0.009191824123263359 2023-01-22 15:46:01.662522: step: 2108/531, loss: 0.013487404212355614 2023-01-22 15:46:02.727582: step: 2112/531, loss: 0.008738575503230095 2023-01-22 15:46:03.800236: step: 2116/531, loss: 0.018730249255895615 2023-01-22 15:46:04.849955: step: 2120/531, loss: 0.004661012906581163 2023-01-22 15:46:05.899460: step: 2124/531, loss: 0.007966527715325356 ================================================== Loss: 0.011 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35203092243186584, 'r': 0.31802793560606063, 'f1': 0.33416666666666667}, 'combined': 0.24622807017543857, 'stategy': 1, 'epoch': 5} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33712455163719013, 'r': 0.2774888006255572, 'f1': 0.304413456527839}, 'combined': 0.19061403352677772, 'stategy': 1, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33436839323467227, 'r': 0.3546715973779541, 'f1': 0.34422086891009546}, 'combined': 0.2536364297232282, 'stategy': 1, 'epoch': 5} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36411638395148305, 'r': 0.30337559853126267, 'f1': 0.3309823303208759}, 'combined': 0.2051158103396977, 'stategy': 1, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.316460244648318, 'r': 0.32726913345983555, 'f1': 0.32177394278606963}, 'combined': 0.23709658942131445, 'stategy': 1, 'epoch': 5} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3637270714036404, 'r': 0.28684167419636686, 'f1': 0.32074114478321014}, 'combined': 0.21276887822252555, 'stategy': 1, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37962962962962965, 'r': 0.44565217391304346, 'f1': 0.41000000000000003}, 'combined': 0.20500000000000002, 'stategy': 1, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.27586206896551724, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 5} New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.355702906162465, 'r': 0.3206715593434344, 'f1': 0.3372800464807437}, 'combined': 0.24852213951212693, 'stategy': 1, 'epoch': 1} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3363021832161375, 'r': 0.27681190531869804, 'f1': 0.3036708822704133}, 'combined': 0.19014905712259525, 'stategy': 1, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33436839323467227, 'r': 0.3546715973779541, 'f1': 0.34422086891009546}, 'combined': 0.2536364297232282, 'stategy': 1, 'epoch': 5} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36411638395148305, 'r': 0.30337559853126267, 'f1': 0.3309823303208759}, 'combined': 0.2051158103396977, 'stategy': 1, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37962962962962965, 'r': 0.44565217391304346, 'f1': 0.41000000000000003}, 'combined': 0.20500000000000002, 'stategy': 1, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.316460244648318, 'r': 0.32726913345983555, 'f1': 0.32177394278606963}, 'combined': 0.23709658942131445, 'stategy': 1, 'epoch': 5} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3637270714036404, 'r': 0.28684167419636686, 'f1': 0.32074114478321014}, 'combined': 0.21276887822252555, 'stategy': 1, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.27586206896551724, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 5} ****************************** Epoch: 6 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 15:48:46.948786: step: 4/531, loss: 0.0029432920273393393 2023-01-22 15:48:48.019250: step: 8/531, loss: 0.012537912465631962 2023-01-22 15:48:49.080158: step: 12/531, loss: 0.006358463317155838 2023-01-22 15:48:50.131855: step: 16/531, loss: 0.01048597227782011 2023-01-22 15:48:51.190447: step: 20/531, loss: 0.03826751559972763 2023-01-22 15:48:52.253180: step: 24/531, loss: 0.007358907721936703 2023-01-22 15:48:53.331290: step: 28/531, loss: 0.0007023790967650712 2023-01-22 15:48:54.390087: step: 32/531, loss: 0.010311347432434559 2023-01-22 15:48:55.442493: step: 36/531, loss: 0.009779251180589199 2023-01-22 15:48:56.506906: step: 40/531, loss: 0.00765445688739419 2023-01-22 15:48:57.567474: step: 44/531, loss: 0.007590987719595432 2023-01-22 15:48:58.635772: step: 48/531, loss: 0.004070092923939228 2023-01-22 15:48:59.704444: step: 52/531, loss: 0.013553265482187271 2023-01-22 15:49:00.760358: step: 56/531, loss: 0.00460100919008255 2023-01-22 15:49:01.823551: step: 60/531, loss: 0.0018755365163087845 2023-01-22 15:49:02.892521: step: 64/531, loss: 0.006594268139451742 2023-01-22 15:49:03.966736: step: 68/531, loss: 0.021202603355050087 2023-01-22 15:49:05.025561: step: 72/531, loss: 0.0051115211099386215 2023-01-22 15:49:06.106263: step: 76/531, loss: 0.011831862851977348 2023-01-22 15:49:07.203578: step: 80/531, loss: 0.00372308655641973 2023-01-22 15:49:08.277634: step: 84/531, loss: 0.0059432159177958965 2023-01-22 15:49:09.352018: step: 88/531, loss: 0.002925391076132655 2023-01-22 15:49:10.417868: step: 92/531, loss: 0.007892461493611336 2023-01-22 15:49:11.491192: step: 96/531, loss: 0.004295353312045336 2023-01-22 15:49:12.576288: step: 100/531, loss: 0.003130609868094325 2023-01-22 15:49:13.635675: step: 104/531, loss: 0.003559037111699581 2023-01-22 15:49:14.698241: step: 108/531, loss: 0.03169618546962738 2023-01-22 15:49:15.771024: step: 112/531, loss: 0.0016914688749238849 2023-01-22 15:49:16.820066: step: 116/531, loss: 0.00453194510191679 2023-01-22 15:49:17.881010: step: 120/531, loss: 0.004311460070312023 2023-01-22 15:49:18.950361: step: 124/531, loss: 0.00015067994536366314 2023-01-22 15:49:20.014228: step: 128/531, loss: 0.01180365588515997 2023-01-22 15:49:21.077824: step: 132/531, loss: 0.0068669915199279785 2023-01-22 15:49:22.136924: step: 136/531, loss: 0.014179384335875511 2023-01-22 15:49:23.222458: step: 140/531, loss: 0.009227036498486996 2023-01-22 15:49:24.271133: step: 144/531, loss: 0.0031901486217975616 2023-01-22 15:49:25.324758: step: 148/531, loss: 0.010394065640866756 2023-01-22 15:49:26.374894: step: 152/531, loss: 0.005526988301426172 2023-01-22 15:49:27.437297: step: 156/531, loss: 0.007694893982261419 2023-01-22 15:49:28.504824: step: 160/531, loss: 0.018761100247502327 2023-01-22 15:49:29.560682: step: 164/531, loss: 0.003636488225311041 2023-01-22 15:49:30.609835: step: 168/531, loss: 0.007499323692172766 2023-01-22 15:49:31.673133: step: 172/531, loss: 0.0012549867387861013 2023-01-22 15:49:32.734600: step: 176/531, loss: 0.017907671630382538 2023-01-22 15:49:33.816195: step: 180/531, loss: 0.004827454686164856 2023-01-22 15:49:34.881304: step: 184/531, loss: 0.011534781195223331 2023-01-22 15:49:35.936416: step: 188/531, loss: 0.010364646092057228 2023-01-22 15:49:37.010453: step: 192/531, loss: 0.02631274238228798 2023-01-22 15:49:38.077316: step: 196/531, loss: 0.05170430988073349 2023-01-22 15:49:39.142503: step: 200/531, loss: 0.009858191013336182 2023-01-22 15:49:40.207712: step: 204/531, loss: 0.002230084501206875 2023-01-22 15:49:41.266008: step: 208/531, loss: 0.03495972976088524 2023-01-22 15:49:42.343777: step: 212/531, loss: 0.006210052873939276 2023-01-22 15:49:43.403809: step: 216/531, loss: 0.0016872802516445518 2023-01-22 15:49:44.466824: step: 220/531, loss: 0.006086660083383322 2023-01-22 15:49:45.538121: step: 224/531, loss: 0.006006134673953056 2023-01-22 15:49:46.614844: step: 228/531, loss: 0.011503309942781925 2023-01-22 15:49:47.689171: step: 232/531, loss: 0.004908869042992592 2023-01-22 15:49:48.752146: step: 236/531, loss: 0.030399464070796967 2023-01-22 15:49:49.815266: step: 240/531, loss: 0.01036855112761259 2023-01-22 15:49:50.892685: step: 244/531, loss: 0.005475116427987814 2023-01-22 15:49:51.967002: step: 248/531, loss: 0.005370229482650757 2023-01-22 15:49:53.028204: step: 252/531, loss: 0.08954307436943054 2023-01-22 15:49:54.078803: step: 256/531, loss: 0.0033486774191260338 2023-01-22 15:49:55.143688: step: 260/531, loss: 0.009520561434328556 2023-01-22 15:49:56.199009: step: 264/531, loss: 0.009070155210793018 2023-01-22 15:49:57.278414: step: 268/531, loss: 0.005462727043777704 2023-01-22 15:49:58.337455: step: 272/531, loss: 0.003876342670992017 2023-01-22 15:49:59.405275: step: 276/531, loss: 0.0036967338528484106 2023-01-22 15:50:00.455464: step: 280/531, loss: 0.016257284209132195 2023-01-22 15:50:01.522039: step: 284/531, loss: 0.015393643639981747 2023-01-22 15:50:02.584577: step: 288/531, loss: 0.017000624909996986 2023-01-22 15:50:03.646585: step: 292/531, loss: 0.0014995795208960772 2023-01-22 15:50:04.716878: step: 296/531, loss: 0.0019912030547857285 2023-01-22 15:50:05.772672: step: 300/531, loss: 0.018623339012265205 2023-01-22 15:50:06.839116: step: 304/531, loss: 0.0032100719399750233 2023-01-22 15:50:07.922821: step: 308/531, loss: 0.008174534887075424 2023-01-22 15:50:08.978557: step: 312/531, loss: 0.006355820689350367 2023-01-22 15:50:10.041566: step: 316/531, loss: 0.004291565623134375 2023-01-22 15:50:11.097528: step: 320/531, loss: 0.01585438847541809 2023-01-22 15:50:12.181372: step: 324/531, loss: 0.008459637872874737 2023-01-22 15:50:13.260972: step: 328/531, loss: 0.0025221228133887053 2023-01-22 15:50:14.337495: step: 332/531, loss: 0.006429034750908613 2023-01-22 15:50:15.401359: step: 336/531, loss: 0.01835118792951107 2023-01-22 15:50:16.463030: step: 340/531, loss: 0.0032088810112327337 2023-01-22 15:50:17.529743: step: 344/531, loss: 0.008852764032781124 2023-01-22 15:50:18.602096: step: 348/531, loss: 0.0006368711474351585 2023-01-22 15:50:19.690656: step: 352/531, loss: 0.0020705857314169407 2023-01-22 15:50:20.748982: step: 356/531, loss: 0.0012071322416886687 2023-01-22 15:50:21.829769: step: 360/531, loss: 0.06297539174556732 2023-01-22 15:50:22.902043: step: 364/531, loss: 0.0025182832032442093 2023-01-22 15:50:23.969396: step: 368/531, loss: 0.003892142791301012 2023-01-22 15:50:25.025345: step: 372/531, loss: 0.0023428681306540966 2023-01-22 15:50:26.079875: step: 376/531, loss: 0.011905311606824398 2023-01-22 15:50:27.134200: step: 380/531, loss: 0.00553668849170208 2023-01-22 15:50:28.195687: step: 384/531, loss: 0.0015418021939694881 2023-01-22 15:50:29.249999: step: 388/531, loss: 0.00022266483574640006 2023-01-22 15:50:30.308242: step: 392/531, loss: 0.07920946180820465 2023-01-22 15:50:31.371560: step: 396/531, loss: 0.039017874747514725 2023-01-22 15:50:32.427959: step: 400/531, loss: 0.036102745682001114 2023-01-22 15:50:33.500057: step: 404/531, loss: 0.005013606045395136 2023-01-22 15:50:34.541547: step: 408/531, loss: 0.0035922450479120016 2023-01-22 15:50:35.632430: step: 412/531, loss: 0.005755963735282421 2023-01-22 15:50:36.680486: step: 416/531, loss: 0.058614909648895264 2023-01-22 15:50:37.765388: step: 420/531, loss: 0.010722002945840359 2023-01-22 15:50:38.820657: step: 424/531, loss: 0.010336421430110931 2023-01-22 15:50:39.903747: step: 428/531, loss: 0.010240410454571247 2023-01-22 15:50:40.971416: step: 432/531, loss: 0.0035464453976601362 2023-01-22 15:50:42.031796: step: 436/531, loss: 0.004397819750010967 2023-01-22 15:50:43.091733: step: 440/531, loss: 0.0069640446454286575 2023-01-22 15:50:44.174312: step: 444/531, loss: 0.008963567204773426 2023-01-22 15:50:45.241320: step: 448/531, loss: 0.010772636160254478 2023-01-22 15:50:46.298101: step: 452/531, loss: 0.02769404649734497 2023-01-22 15:50:47.376875: step: 456/531, loss: 0.002520160283893347 2023-01-22 15:50:48.446821: step: 460/531, loss: 0.008938014507293701 2023-01-22 15:50:49.524288: step: 464/531, loss: 0.007377485744655132 2023-01-22 15:50:50.572971: step: 468/531, loss: 0.0018793304916471243 2023-01-22 15:50:51.632976: step: 472/531, loss: 0.005979171488434076 2023-01-22 15:50:52.713528: step: 476/531, loss: 0.008090565912425518 2023-01-22 15:50:53.760510: step: 480/531, loss: 0.00870831310749054 2023-01-22 15:50:54.821450: step: 484/531, loss: 0.0295771025121212 2023-01-22 15:50:55.889432: step: 488/531, loss: 0.010176768526434898 2023-01-22 15:50:56.939625: step: 492/531, loss: 0.005269124638289213 2023-01-22 15:50:58.012265: step: 496/531, loss: 0.012513717636466026 2023-01-22 15:50:59.077366: step: 500/531, loss: 0.017855126410722733 2023-01-22 15:51:00.140412: step: 504/531, loss: 0.003243731101974845 2023-01-22 15:51:01.209736: step: 508/531, loss: 0.0022735833190381527 2023-01-22 15:51:02.276278: step: 512/531, loss: 0.01889975368976593 2023-01-22 15:51:03.351889: step: 516/531, loss: 0.013020745478570461 2023-01-22 15:51:04.409414: step: 520/531, loss: 0.002150821965187788 2023-01-22 15:51:05.458732: step: 524/531, loss: 0.0028804310131818056 2023-01-22 15:51:06.502849: step: 528/531, loss: 0.003170869778841734 2023-01-22 15:51:07.566810: step: 532/531, loss: 0.0013524453388527036 2023-01-22 15:51:08.626796: step: 536/531, loss: 0.00864172913134098 2023-01-22 15:51:09.685810: step: 540/531, loss: 0.001284754485823214 2023-01-22 15:51:10.752709: step: 544/531, loss: 0.025572624057531357 2023-01-22 15:51:11.818805: step: 548/531, loss: 0.006978447083383799 2023-01-22 15:51:12.878667: step: 552/531, loss: 0.00270474492572248 2023-01-22 15:51:13.938990: step: 556/531, loss: 0.03037181869149208 2023-01-22 15:51:15.007792: step: 560/531, loss: 0.010270398110151291 2023-01-22 15:51:16.057202: step: 564/531, loss: 0.007949589751660824 2023-01-22 15:51:17.125793: step: 568/531, loss: 0.004779871553182602 2023-01-22 15:51:18.178789: step: 572/531, loss: 0.0014209687942638993 2023-01-22 15:51:19.240996: step: 576/531, loss: 0.002962718019261956 2023-01-22 15:51:20.280928: step: 580/531, loss: 0.015369495376944542 2023-01-22 15:51:21.354829: step: 584/531, loss: 0.01894855871796608 2023-01-22 15:51:22.402111: step: 588/531, loss: 0.006767974700778723 2023-01-22 15:51:23.465364: step: 592/531, loss: 0.003008423373103142 2023-01-22 15:51:24.533605: step: 596/531, loss: 0.01145913079380989 2023-01-22 15:51:25.581495: step: 600/531, loss: 0.0060500348918139935 2023-01-22 15:51:26.651995: step: 604/531, loss: 0.021979445591568947 2023-01-22 15:51:27.722933: step: 608/531, loss: 0.008840704336762428 2023-01-22 15:51:28.778150: step: 612/531, loss: 0.023319141939282417 2023-01-22 15:51:29.848177: step: 616/531, loss: 0.05718563497066498 2023-01-22 15:51:30.936692: step: 620/531, loss: 0.030173836275935173 2023-01-22 15:51:32.007942: step: 624/531, loss: 0.0003511547693051398 2023-01-22 15:51:33.073638: step: 628/531, loss: 0.006536046974360943 2023-01-22 15:51:34.124997: step: 632/531, loss: 0.0019321806030347943 2023-01-22 15:51:35.195501: step: 636/531, loss: 0.010432318784296513 2023-01-22 15:51:36.255790: step: 640/531, loss: 0.0026999544352293015 2023-01-22 15:51:37.317685: step: 644/531, loss: 0.0157073475420475 2023-01-22 15:51:38.380887: step: 648/531, loss: 0.05477079749107361 2023-01-22 15:51:39.434240: step: 652/531, loss: 0.00508402194827795 2023-01-22 15:51:40.485949: step: 656/531, loss: 0.0050207399763166904 2023-01-22 15:51:41.540170: step: 660/531, loss: 0.007853449322283268 2023-01-22 15:51:42.605396: step: 664/531, loss: 0.008380183018743992 2023-01-22 15:51:43.653944: step: 668/531, loss: 0.0026519836392253637 2023-01-22 15:51:44.722723: step: 672/531, loss: 0.005142910405993462 2023-01-22 15:51:45.783552: step: 676/531, loss: 0.024934709072113037 2023-01-22 15:51:46.838710: step: 680/531, loss: 0.0005833875620737672 2023-01-22 15:51:47.916708: step: 684/531, loss: 0.005784018896520138 2023-01-22 15:51:48.970428: step: 688/531, loss: 0.009918668307363987 2023-01-22 15:51:50.039125: step: 692/531, loss: 0.009790316224098206 2023-01-22 15:51:51.099867: step: 696/531, loss: 0.0015267275739461184 2023-01-22 15:51:52.189951: step: 700/531, loss: 0.004760469309985638 2023-01-22 15:51:53.250343: step: 704/531, loss: 0.011497597210109234 2023-01-22 15:51:54.321037: step: 708/531, loss: 0.0032411532010883093 2023-01-22 15:51:55.397340: step: 712/531, loss: 0.005637328140437603 2023-01-22 15:51:56.446934: step: 716/531, loss: 0.007290184032171965 2023-01-22 15:51:57.508384: step: 720/531, loss: 0.0424954816699028 2023-01-22 15:51:58.572086: step: 724/531, loss: 0.005525291431695223 2023-01-22 15:51:59.635805: step: 728/531, loss: 0.005197733640670776 2023-01-22 15:52:00.708840: step: 732/531, loss: 0.03285115957260132 2023-01-22 15:52:01.778412: step: 736/531, loss: 0.009032647125422955 2023-01-22 15:52:02.822458: step: 740/531, loss: 0.0115432720631361 2023-01-22 15:52:03.867021: step: 744/531, loss: 0.009898380376398563 2023-01-22 15:52:04.915814: step: 748/531, loss: 0.012690548785030842 2023-01-22 15:52:05.976112: step: 752/531, loss: 0.007415212690830231 2023-01-22 15:52:07.023361: step: 756/531, loss: 0.0053363386541605 2023-01-22 15:52:08.080421: step: 760/531, loss: 0.005677635315805674 2023-01-22 15:52:09.159842: step: 764/531, loss: 0.006649449467658997 2023-01-22 15:52:10.241661: step: 768/531, loss: 0.011402852833271027 2023-01-22 15:52:11.317085: step: 772/531, loss: 0.00688968924805522 2023-01-22 15:52:12.367422: step: 776/531, loss: 0.0009790461044758558 2023-01-22 15:52:13.408963: step: 780/531, loss: 0.0030372862238436937 2023-01-22 15:52:14.464361: step: 784/531, loss: 0.008241291157901287 2023-01-22 15:52:15.530420: step: 788/531, loss: 0.004959088284522295 2023-01-22 15:52:16.588539: step: 792/531, loss: 0.006605707574635744 2023-01-22 15:52:17.653123: step: 796/531, loss: 0.006869078613817692 2023-01-22 15:52:18.702636: step: 800/531, loss: 0.0013255409430712461 2023-01-22 15:52:19.756173: step: 804/531, loss: 0.0011628136271610856 2023-01-22 15:52:20.812750: step: 808/531, loss: 0.01237429492175579 2023-01-22 15:52:21.880941: step: 812/531, loss: 0.014288225211203098 2023-01-22 15:52:22.955784: step: 816/531, loss: 0.020358677953481674 2023-01-22 15:52:24.034979: step: 820/531, loss: 0.0016593632753938437 2023-01-22 15:52:25.097985: step: 824/531, loss: 0.0037502467166632414 2023-01-22 15:52:26.152682: step: 828/531, loss: 0.004868919961154461 2023-01-22 15:52:27.205237: step: 832/531, loss: 0.00874305795878172 2023-01-22 15:52:28.259802: step: 836/531, loss: 0.004308534786105156 2023-01-22 15:52:29.334590: step: 840/531, loss: 0.05860290303826332 2023-01-22 15:52:30.390041: step: 844/531, loss: 0.0038430599961429834 2023-01-22 15:52:31.443607: step: 848/531, loss: 0.03443102166056633 2023-01-22 15:52:32.516225: step: 852/531, loss: 0.0041647967882454395 2023-01-22 15:52:33.551621: step: 856/531, loss: 0.0 2023-01-22 15:52:34.600390: step: 860/531, loss: 0.005874386988580227 2023-01-22 15:52:35.666165: step: 864/531, loss: 0.00691419979557395 2023-01-22 15:52:36.746008: step: 868/531, loss: 0.018089568242430687 2023-01-22 15:52:37.805949: step: 872/531, loss: 0.002000002423301339 2023-01-22 15:52:38.865182: step: 876/531, loss: 0.0031377498526126146 2023-01-22 15:52:39.939833: step: 880/531, loss: 0.006632352247834206 2023-01-22 15:52:40.995003: step: 884/531, loss: 0.012486872263252735 2023-01-22 15:52:42.096590: step: 888/531, loss: 0.007046861108392477 2023-01-22 15:52:43.168162: step: 892/531, loss: 0.008797496557235718 2023-01-22 15:52:44.237148: step: 896/531, loss: 0.008230539038777351 2023-01-22 15:52:45.297281: step: 900/531, loss: 0.005878815893083811 2023-01-22 15:52:46.361381: step: 904/531, loss: 0.00798221305012703 2023-01-22 15:52:47.422012: step: 908/531, loss: 0.0037675644271075726 2023-01-22 15:52:48.501365: step: 912/531, loss: 0.007415076717734337 2023-01-22 15:52:49.558855: step: 916/531, loss: 0.00133984733838588 2023-01-22 15:52:50.614417: step: 920/531, loss: 0.002230678219348192 2023-01-22 15:52:51.672231: step: 924/531, loss: 0.0013091347645968199 2023-01-22 15:52:52.732183: step: 928/531, loss: 0.0016474085859954357 2023-01-22 15:52:53.794429: step: 932/531, loss: 0.008072270080447197 2023-01-22 15:52:54.863443: step: 936/531, loss: 0.004681349731981754 2023-01-22 15:52:55.906984: step: 940/531, loss: 0.0372842475771904 2023-01-22 15:52:56.954196: step: 944/531, loss: 0.015247981064021587 2023-01-22 15:52:58.018971: step: 948/531, loss: 0.011911117471754551 2023-01-22 15:52:59.095326: step: 952/531, loss: 0.007198999170213938 2023-01-22 15:53:00.160912: step: 956/531, loss: 0.0014013596810400486 2023-01-22 15:53:01.220704: step: 960/531, loss: 0.004519191104918718 2023-01-22 15:53:02.284127: step: 964/531, loss: 0.008875181898474693 2023-01-22 15:53:03.351869: step: 968/531, loss: 0.0036492496728897095 2023-01-22 15:53:04.420939: step: 972/531, loss: 0.02182733826339245 2023-01-22 15:53:05.487711: step: 976/531, loss: 0.04539189487695694 2023-01-22 15:53:06.547730: step: 980/531, loss: 0.0011102509452030063 2023-01-22 15:53:07.588230: step: 984/531, loss: 0.006845785304903984 2023-01-22 15:53:08.627587: step: 988/531, loss: 0.0005421403329819441 2023-01-22 15:53:09.699773: step: 992/531, loss: 0.010717685334384441 2023-01-22 15:53:10.770121: step: 996/531, loss: 0.02985331416130066 2023-01-22 15:53:11.829031: step: 1000/531, loss: 0.008929370902478695 2023-01-22 15:53:12.905502: step: 1004/531, loss: 0.015887390822172165 2023-01-22 15:53:13.969748: step: 1008/531, loss: 0.012111171148717403 2023-01-22 15:53:15.018329: step: 1012/531, loss: 0.004509125370532274 2023-01-22 15:53:16.079211: step: 1016/531, loss: 0.013637019321322441 2023-01-22 15:53:17.132667: step: 1020/531, loss: 6.853634840808809e-05 2023-01-22 15:53:18.187390: step: 1024/531, loss: 0.002534861909225583 2023-01-22 15:53:19.254539: step: 1028/531, loss: 0.004997155163437128 2023-01-22 15:53:20.306595: step: 1032/531, loss: 0.05634595453739166 2023-01-22 15:53:21.360527: step: 1036/531, loss: 0.0011073792120441794 2023-01-22 15:53:22.414715: step: 1040/531, loss: 0.010222180746495724 2023-01-22 15:53:23.460610: step: 1044/531, loss: 0.0005028155283071101 2023-01-22 15:53:24.514276: step: 1048/531, loss: 0.0023263171315193176 2023-01-22 15:53:25.571211: step: 1052/531, loss: 0.005292803514748812 2023-01-22 15:53:26.630102: step: 1056/531, loss: 0.005772221367806196 2023-01-22 15:53:27.683737: step: 1060/531, loss: 0.00011989741324214265 2023-01-22 15:53:28.735229: step: 1064/531, loss: 0.005717512220144272 2023-01-22 15:53:29.776598: step: 1068/531, loss: 0.010426831431686878 2023-01-22 15:53:30.835507: step: 1072/531, loss: 0.003298128955066204 2023-01-22 15:53:31.888298: step: 1076/531, loss: 0.0006716709467582405 2023-01-22 15:53:32.947925: step: 1080/531, loss: 0.0008013807237148285 2023-01-22 15:53:34.016499: step: 1084/531, loss: 0.009770435281097889 2023-01-22 15:53:35.075900: step: 1088/531, loss: 0.012029794976115227 2023-01-22 15:53:36.155583: step: 1092/531, loss: 0.0029329024255275726 2023-01-22 15:53:37.211198: step: 1096/531, loss: 0.015537690371274948 2023-01-22 15:53:38.267854: step: 1100/531, loss: 0.002954992698505521 2023-01-22 15:53:39.336604: step: 1104/531, loss: 0.03443381190299988 2023-01-22 15:53:40.395229: step: 1108/531, loss: 0.009190636686980724 2023-01-22 15:53:41.455504: step: 1112/531, loss: 0.020895270630717278 2023-01-22 15:53:42.520041: step: 1116/531, loss: 0.0015741956885904074 2023-01-22 15:53:43.595281: step: 1120/531, loss: 0.020179197192192078 2023-01-22 15:53:44.684380: step: 1124/531, loss: 0.0016432267148047686 2023-01-22 15:53:45.732900: step: 1128/531, loss: 0.0007776243728585541 2023-01-22 15:53:46.809450: step: 1132/531, loss: 0.04637470841407776 2023-01-22 15:53:47.868915: step: 1136/531, loss: 0.023987144231796265 2023-01-22 15:53:48.931987: step: 1140/531, loss: 0.005930606741458178 2023-01-22 15:53:49.985408: step: 1144/531, loss: 0.010379788465797901 2023-01-22 15:53:51.063531: step: 1148/531, loss: 0.020205894485116005 2023-01-22 15:53:52.133752: step: 1152/531, loss: 0.004121109377592802 2023-01-22 15:53:53.197585: step: 1156/531, loss: 0.013121345080435276 2023-01-22 15:53:54.273283: step: 1160/531, loss: 0.010387592017650604 2023-01-22 15:53:55.346739: step: 1164/531, loss: 0.03616507723927498 2023-01-22 15:53:56.398849: step: 1168/531, loss: 0.005236451979726553 2023-01-22 15:53:57.462409: step: 1172/531, loss: 0.01286311261355877 2023-01-22 15:53:58.506935: step: 1176/531, loss: 0.0005282892961986363 2023-01-22 15:53:59.559602: step: 1180/531, loss: 0.015461313538253307 2023-01-22 15:54:00.624931: step: 1184/531, loss: 0.011073824018239975 2023-01-22 15:54:01.677469: step: 1188/531, loss: 0.00697501003742218 2023-01-22 15:54:02.757239: step: 1192/531, loss: 0.03540768846869469 2023-01-22 15:54:03.828405: step: 1196/531, loss: 0.014474504627287388 2023-01-22 15:54:04.882390: step: 1200/531, loss: 0.005541430786252022 2023-01-22 15:54:05.938337: step: 1204/531, loss: 0.011974726803600788 2023-01-22 15:54:06.994898: step: 1208/531, loss: 0.022920407354831696 2023-01-22 15:54:08.079413: step: 1212/531, loss: 0.0017761550843715668 2023-01-22 15:54:09.145889: step: 1216/531, loss: 0.005945018958300352 2023-01-22 15:54:10.224234: step: 1220/531, loss: 0.0001790232490748167 2023-01-22 15:54:11.279693: step: 1224/531, loss: 0.015340182930231094 2023-01-22 15:54:12.359556: step: 1228/531, loss: 0.007377743721008301 2023-01-22 15:54:13.429202: step: 1232/531, loss: 0.005427806172519922 2023-01-22 15:54:14.477117: step: 1236/531, loss: 0.0010519563220441341 2023-01-22 15:54:15.544499: step: 1240/531, loss: 0.010295179672539234 2023-01-22 15:54:16.601653: step: 1244/531, loss: 0.008899634703993797 2023-01-22 15:54:17.660248: step: 1248/531, loss: 0.005573725793510675 2023-01-22 15:54:18.712613: step: 1252/531, loss: 0.036438584327697754 2023-01-22 15:54:19.796818: step: 1256/531, loss: 0.0173336174339056 2023-01-22 15:54:20.877222: step: 1260/531, loss: 0.009335625916719437 2023-01-22 15:54:21.933699: step: 1264/531, loss: 0.020228074863553047 2023-01-22 15:54:22.983080: step: 1268/531, loss: 0.0166328102350235 2023-01-22 15:54:24.056020: step: 1272/531, loss: 0.0017933398485183716 2023-01-22 15:54:25.104482: step: 1276/531, loss: 0.0047768172807991505 2023-01-22 15:54:26.166088: step: 1280/531, loss: 0.00873319711536169 2023-01-22 15:54:27.212304: step: 1284/531, loss: 0.00023820690694265068 2023-01-22 15:54:28.269323: step: 1288/531, loss: 0.00501972297206521 2023-01-22 15:54:29.339723: step: 1292/531, loss: 0.017059117555618286 2023-01-22 15:54:30.394487: step: 1296/531, loss: 0.0006950476672500372 2023-01-22 15:54:31.474697: step: 1300/531, loss: 0.004926077090203762 2023-01-22 15:54:32.532945: step: 1304/531, loss: 0.003382546827197075 2023-01-22 15:54:33.602051: step: 1308/531, loss: 0.007411657832562923 2023-01-22 15:54:34.660048: step: 1312/531, loss: 0.004177900031208992 2023-01-22 15:54:35.733713: step: 1316/531, loss: 0.018221421167254448 2023-01-22 15:54:36.783977: step: 1320/531, loss: 0.0035860363859683275 2023-01-22 15:54:37.839158: step: 1324/531, loss: 0.0016093968879431486 2023-01-22 15:54:38.915834: step: 1328/531, loss: 0.0009316093055531383 2023-01-22 15:54:39.983549: step: 1332/531, loss: 0.0011106191668659449 2023-01-22 15:54:41.047645: step: 1336/531, loss: 0.013964518904685974 2023-01-22 15:54:42.108034: step: 1340/531, loss: 0.005351893603801727 2023-01-22 15:54:43.182649: step: 1344/531, loss: 0.022416943684220314 2023-01-22 15:54:44.236009: step: 1348/531, loss: 0.06408470869064331 2023-01-22 15:54:45.307303: step: 1352/531, loss: 0.006485450081527233 2023-01-22 15:54:46.376941: step: 1356/531, loss: 0.0038483969401568174 2023-01-22 15:54:47.441081: step: 1360/531, loss: 0.006142089143395424 2023-01-22 15:54:48.511124: step: 1364/531, loss: 0.009657652117311954 2023-01-22 15:54:49.555295: step: 1368/531, loss: 0.015305178239941597 2023-01-22 15:54:50.613281: step: 1372/531, loss: 0.037024226039648056 2023-01-22 15:54:51.675112: step: 1376/531, loss: 0.0028578753117471933 2023-01-22 15:54:52.720130: step: 1380/531, loss: 4.95872154715471e-05 2023-01-22 15:54:53.793010: step: 1384/531, loss: 0.0007027056417427957 2023-01-22 15:54:54.870685: step: 1388/531, loss: 0.006710399873554707 2023-01-22 15:54:55.922757: step: 1392/531, loss: 0.016674794256687164 2023-01-22 15:54:56.969738: step: 1396/531, loss: 0.001589001971296966 2023-01-22 15:54:58.031198: step: 1400/531, loss: 0.0016361505258828402 2023-01-22 15:54:59.072329: step: 1404/531, loss: 0.00785883516073227 2023-01-22 15:55:00.129647: step: 1408/531, loss: 0.0003021680167876184 2023-01-22 15:55:01.192884: step: 1412/531, loss: 0.003640773007646203 2023-01-22 15:55:02.262982: step: 1416/531, loss: 0.01461334340274334 2023-01-22 15:55:03.340511: step: 1420/531, loss: 0.002424241742119193 2023-01-22 15:55:04.384943: step: 1424/531, loss: 0.002955395495519042 2023-01-22 15:55:05.471860: step: 1428/531, loss: 0.008063364773988724 2023-01-22 15:55:06.537019: step: 1432/531, loss: 0.006800973787903786 2023-01-22 15:55:07.600743: step: 1436/531, loss: 0.004543921444565058 2023-01-22 15:55:08.655049: step: 1440/531, loss: 0.00531755993142724 2023-01-22 15:55:09.717587: step: 1444/531, loss: 0.0048471237532794476 2023-01-22 15:55:10.778812: step: 1448/531, loss: 0.01994478330016136 2023-01-22 15:55:11.873157: step: 1452/531, loss: 0.044987574219703674 2023-01-22 15:55:12.936640: step: 1456/531, loss: 0.0003491649404168129 2023-01-22 15:55:13.995133: step: 1460/531, loss: 0.0002789389109238982 2023-01-22 15:55:15.053884: step: 1464/531, loss: 1.3910464986111037e-05 2023-01-22 15:55:16.112263: step: 1468/531, loss: 0.014729684218764305 2023-01-22 15:55:17.197516: step: 1472/531, loss: 0.00895836390554905 2023-01-22 15:55:18.254148: step: 1476/531, loss: 0.007623247802257538 2023-01-22 15:55:19.307537: step: 1480/531, loss: 0.007720944471657276 2023-01-22 15:55:20.385423: step: 1484/531, loss: 0.008904880844056606 2023-01-22 15:55:21.459298: step: 1488/531, loss: 0.009866662323474884 2023-01-22 15:55:22.512203: step: 1492/531, loss: 0.008230365812778473 2023-01-22 15:55:23.575319: step: 1496/531, loss: 0.0062467847019433975 2023-01-22 15:55:24.632995: step: 1500/531, loss: 0.0013609403977170587 2023-01-22 15:55:25.709520: step: 1504/531, loss: 0.003021181095391512 2023-01-22 15:55:26.783657: step: 1508/531, loss: 0.007688530720770359 2023-01-22 15:55:27.841251: step: 1512/531, loss: 0.006852554157376289 2023-01-22 15:55:28.897535: step: 1516/531, loss: 0.014278341084718704 2023-01-22 15:55:29.986497: step: 1520/531, loss: 0.007408963516354561 2023-01-22 15:55:31.044758: step: 1524/531, loss: 0.0034462958574295044 2023-01-22 15:55:32.098375: step: 1528/531, loss: 0.0025516273453831673 2023-01-22 15:55:33.163530: step: 1532/531, loss: 0.003779664635658264 2023-01-22 15:55:34.223596: step: 1536/531, loss: 0.0048885946162045 2023-01-22 15:55:35.271604: step: 1540/531, loss: 0.004900121130049229 2023-01-22 15:55:36.350752: step: 1544/531, loss: 0.005288383457809687 2023-01-22 15:55:37.417217: step: 1548/531, loss: 0.018736552447080612 2023-01-22 15:55:38.476714: step: 1552/531, loss: 1.226474250870524e-05 2023-01-22 15:55:39.532620: step: 1556/531, loss: 0.006348559632897377 2023-01-22 15:55:40.612989: step: 1560/531, loss: 0.03728722035884857 2023-01-22 15:55:41.665302: step: 1564/531, loss: 0.0047976309433579445 2023-01-22 15:55:42.720545: step: 1568/531, loss: 0.0014296979643404484 2023-01-22 15:55:43.793015: step: 1572/531, loss: 0.01720673032104969 2023-01-22 15:55:44.870427: step: 1576/531, loss: 0.0019312668591737747 2023-01-22 15:55:45.942144: step: 1580/531, loss: 0.002072556409984827 2023-01-22 15:55:47.006795: step: 1584/531, loss: 0.0003468520299065858 2023-01-22 15:55:48.058076: step: 1588/531, loss: 0.0031809844076633453 2023-01-22 15:55:49.144188: step: 1592/531, loss: 0.011697826907038689 2023-01-22 15:55:50.207738: step: 1596/531, loss: 0.004813006613403559 2023-01-22 15:55:51.276711: step: 1600/531, loss: 0.0033183274790644646 2023-01-22 15:55:52.330866: step: 1604/531, loss: 0.004599857144057751 2023-01-22 15:55:53.395686: step: 1608/531, loss: 0.01199669111520052 2023-01-22 15:55:54.459353: step: 1612/531, loss: 0.005202004685997963 2023-01-22 15:55:55.528920: step: 1616/531, loss: 0.003957950510084629 2023-01-22 15:55:56.590037: step: 1620/531, loss: 0.0011726460652425885 2023-01-22 15:55:57.640765: step: 1624/531, loss: 0.004059431608766317 2023-01-22 15:55:58.699550: step: 1628/531, loss: 0.003595364047214389 2023-01-22 15:55:59.764462: step: 1632/531, loss: 0.007409157231450081 2023-01-22 15:56:00.835963: step: 1636/531, loss: 0.0019780935253947973 2023-01-22 15:56:01.901734: step: 1640/531, loss: 0.022702112793922424 2023-01-22 15:56:02.948621: step: 1644/531, loss: 0.005747990682721138 2023-01-22 15:56:03.996615: step: 1648/531, loss: 0.005032900255173445 2023-01-22 15:56:05.066140: step: 1652/531, loss: 0.0028443720657378435 2023-01-22 15:56:06.125251: step: 1656/531, loss: 0.0035552673507481813 2023-01-22 15:56:07.181440: step: 1660/531, loss: 0.015025815926492214 2023-01-22 15:56:08.228103: step: 1664/531, loss: 0.00045435811625793576 2023-01-22 15:56:09.281998: step: 1668/531, loss: 0.005025635939091444 2023-01-22 15:56:10.337329: step: 1672/531, loss: 0.03417355194687843 2023-01-22 15:56:11.400040: step: 1676/531, loss: 0.011585216037929058 2023-01-22 15:56:12.456198: step: 1680/531, loss: 0.003591161919757724 2023-01-22 15:56:13.514662: step: 1684/531, loss: 0.01113806664943695 2023-01-22 15:56:14.564473: step: 1688/531, loss: 0.004196211230009794 2023-01-22 15:56:15.634628: step: 1692/531, loss: 0.00629101088270545 2023-01-22 15:56:16.690118: step: 1696/531, loss: 0.014905724674463272 2023-01-22 15:56:17.744011: step: 1700/531, loss: 0.0029332919511944056 2023-01-22 15:56:18.819411: step: 1704/531, loss: 0.004238472320139408 2023-01-22 15:56:19.872334: step: 1708/531, loss: 0.000372401176718995 2023-01-22 15:56:20.936305: step: 1712/531, loss: 0.01268011610955 2023-01-22 15:56:22.004331: step: 1716/531, loss: 0.011812311597168446 2023-01-22 15:56:23.054034: step: 1720/531, loss: 0.003110135206952691 2023-01-22 15:56:24.126698: step: 1724/531, loss: 0.008479684591293335 2023-01-22 15:56:25.186102: step: 1728/531, loss: 0.004277018364518881 2023-01-22 15:56:26.235270: step: 1732/531, loss: 0.006617109291255474 2023-01-22 15:56:27.289534: step: 1736/531, loss: 0.0044241719879209995 2023-01-22 15:56:28.344502: step: 1740/531, loss: 0.0026026498526334763 2023-01-22 15:56:29.394169: step: 1744/531, loss: 0.007485625799745321 2023-01-22 15:56:30.452879: step: 1748/531, loss: 0.007189452648162842 2023-01-22 15:56:31.510005: step: 1752/531, loss: 0.0045541455037891865 2023-01-22 15:56:32.561082: step: 1756/531, loss: 0.008894850499927998 2023-01-22 15:56:33.650880: step: 1760/531, loss: 0.005660593509674072 2023-01-22 15:56:34.702472: step: 1764/531, loss: 0.0019180066883563995 2023-01-22 15:56:35.767231: step: 1768/531, loss: 0.0009436768596060574 2023-01-22 15:56:36.834131: step: 1772/531, loss: 0.005720451939851046 2023-01-22 15:56:37.901616: step: 1776/531, loss: 0.0025520373601466417 2023-01-22 15:56:38.968663: step: 1780/531, loss: 0.0015464320313185453 2023-01-22 15:56:40.022202: step: 1784/531, loss: 0.004384071100503206 2023-01-22 15:56:41.087245: step: 1788/531, loss: 0.006013082806020975 2023-01-22 15:56:42.143691: step: 1792/531, loss: 0.006493647117167711 2023-01-22 15:56:43.205908: step: 1796/531, loss: 0.00790315866470337 2023-01-22 15:56:44.274382: step: 1800/531, loss: 0.0090031111612916 2023-01-22 15:56:45.325226: step: 1804/531, loss: 0.000918390229344368 2023-01-22 15:56:46.375399: step: 1808/531, loss: 0.0007245594169944525 2023-01-22 15:56:47.435284: step: 1812/531, loss: 0.00401635468006134 2023-01-22 15:56:48.492198: step: 1816/531, loss: 0.002673220820724964 2023-01-22 15:56:49.541312: step: 1820/531, loss: 0.005069272127002478 2023-01-22 15:56:50.605414: step: 1824/531, loss: 0.00564933568239212 2023-01-22 15:56:51.672685: step: 1828/531, loss: 0.0047026448883116245 2023-01-22 15:56:52.752467: step: 1832/531, loss: 0.005653322674334049 2023-01-22 15:56:53.801255: step: 1836/531, loss: 0.002209238475188613 2023-01-22 15:56:54.866052: step: 1840/531, loss: 0.0026490730233490467 2023-01-22 15:56:55.918107: step: 1844/531, loss: 0.011918609961867332 2023-01-22 15:56:56.982568: step: 1848/531, loss: 0.0036911170464009047 2023-01-22 15:56:58.041191: step: 1852/531, loss: 0.005991524085402489 2023-01-22 15:56:59.116155: step: 1856/531, loss: 0.003986523021012545 2023-01-22 15:57:00.162681: step: 1860/531, loss: 0.027299998328089714 2023-01-22 15:57:01.209443: step: 1864/531, loss: 0.01316267903894186 2023-01-22 15:57:02.255278: step: 1868/531, loss: 0.00766478106379509 2023-01-22 15:57:03.330292: step: 1872/531, loss: 0.008670568466186523 2023-01-22 15:57:04.393925: step: 1876/531, loss: 0.02351980097591877 2023-01-22 15:57:05.441482: step: 1880/531, loss: 0.02516782097518444 2023-01-22 15:57:06.508165: step: 1884/531, loss: 0.002396475290879607 2023-01-22 15:57:07.594448: step: 1888/531, loss: 0.0023003334645181894 2023-01-22 15:57:08.664452: step: 1892/531, loss: 0.008029518648982048 2023-01-22 15:57:09.724818: step: 1896/531, loss: 0.009424656629562378 2023-01-22 15:57:10.799550: step: 1900/531, loss: 0.0029585405718535185 2023-01-22 15:57:11.870770: step: 1904/531, loss: 0.008412801660597324 2023-01-22 15:57:12.919655: step: 1908/531, loss: 0.008342405781149864 2023-01-22 15:57:13.966224: step: 1912/531, loss: 0.01795872487127781 2023-01-22 15:57:15.019165: step: 1916/531, loss: 0.00738177727907896 2023-01-22 15:57:16.075889: step: 1920/531, loss: 0.011627256870269775 2023-01-22 15:57:17.131422: step: 1924/531, loss: 0.009259622544050217 2023-01-22 15:57:18.177045: step: 1928/531, loss: 0.007299355231225491 2023-01-22 15:57:19.222675: step: 1932/531, loss: 0.004032579716295004 2023-01-22 15:57:20.291895: step: 1936/531, loss: 0.0019013237906619906 2023-01-22 15:57:21.354737: step: 1940/531, loss: 0.01666168123483658 2023-01-22 15:57:22.415250: step: 1944/531, loss: 0.013499433174729347 2023-01-22 15:57:23.474895: step: 1948/531, loss: 0.022215723991394043 2023-01-22 15:57:24.540009: step: 1952/531, loss: 0.014236253686249256 2023-01-22 15:57:25.600028: step: 1956/531, loss: 0.004504339769482613 2023-01-22 15:57:26.669487: step: 1960/531, loss: 0.01728799007833004 2023-01-22 15:57:27.726587: step: 1964/531, loss: 0.002598227234557271 2023-01-22 15:57:28.794665: step: 1968/531, loss: 0.019606834277510643 2023-01-22 15:57:29.848821: step: 1972/531, loss: 0.0017232568934559822 2023-01-22 15:57:30.896957: step: 1976/531, loss: 0.003615857334807515 2023-01-22 15:57:31.963195: step: 1980/531, loss: 0.00292869727127254 2023-01-22 15:57:33.030887: step: 1984/531, loss: 0.0055353702045977116 2023-01-22 15:57:34.110245: step: 1988/531, loss: 0.008058445528149605 2023-01-22 15:57:35.172596: step: 1992/531, loss: 0.007985231466591358 2023-01-22 15:57:36.246221: step: 1996/531, loss: 0.02180594578385353 2023-01-22 15:57:37.288653: step: 2000/531, loss: 0.010600738227367401 2023-01-22 15:57:38.356275: step: 2004/531, loss: 0.04641266167163849 2023-01-22 15:57:39.411356: step: 2008/531, loss: 0.028026631101965904 2023-01-22 15:57:40.467959: step: 2012/531, loss: 0.00044320797314867377 2023-01-22 15:57:41.526980: step: 2016/531, loss: 0.011095143854618073 2023-01-22 15:57:42.600308: step: 2020/531, loss: 0.029895756393671036 2023-01-22 15:57:43.655874: step: 2024/531, loss: 0.0033620460890233517 2023-01-22 15:57:44.714815: step: 2028/531, loss: 0.01604628562927246 2023-01-22 15:57:45.773665: step: 2032/531, loss: 0.0037713171914219856 2023-01-22 15:57:46.825852: step: 2036/531, loss: 0.02835371159017086 2023-01-22 15:57:47.903324: step: 2040/531, loss: 0.004161869175732136 2023-01-22 15:57:48.951697: step: 2044/531, loss: 0.003946738317608833 2023-01-22 15:57:50.014435: step: 2048/531, loss: 0.008512686006724834 2023-01-22 15:57:51.066417: step: 2052/531, loss: 0.0004516572807915509 2023-01-22 15:57:52.126825: step: 2056/531, loss: 0.06959584355354309 2023-01-22 15:57:53.183715: step: 2060/531, loss: 0.0 2023-01-22 15:57:54.227808: step: 2064/531, loss: 0.020264845341444016 2023-01-22 15:57:55.265504: step: 2068/531, loss: 0.011989165097475052 2023-01-22 15:57:56.323235: step: 2072/531, loss: 0.0026472690515220165 2023-01-22 15:57:57.390139: step: 2076/531, loss: 0.025359176099300385 2023-01-22 15:57:58.447392: step: 2080/531, loss: 0.0020903984550386667 2023-01-22 15:57:59.502581: step: 2084/531, loss: 0.001333598862402141 2023-01-22 15:58:00.570629: step: 2088/531, loss: 0.02223386988043785 2023-01-22 15:58:01.626712: step: 2092/531, loss: 0.01397033128887415 2023-01-22 15:58:02.669033: step: 2096/531, loss: 0.013732771389186382 2023-01-22 15:58:03.738581: step: 2100/531, loss: 0.006502912845462561 2023-01-22 15:58:04.825540: step: 2104/531, loss: 0.005976259242743254 2023-01-22 15:58:05.881908: step: 2108/531, loss: 0.0008377428166568279 2023-01-22 15:58:06.927567: step: 2112/531, loss: 0.0009003969607874751 2023-01-22 15:58:07.965059: step: 2116/531, loss: 0.004043653141707182 2023-01-22 15:58:09.031357: step: 2120/531, loss: 0.01152876392006874 2023-01-22 15:58:10.101081: step: 2124/531, loss: 0.008723646402359009 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35976479915433407, 'r': 0.3222892992424243, 'f1': 0.33999750249750255}, 'combined': 0.25052447552447554, 'stategy': 1, 'epoch': 6} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33494215352441215, 'r': 0.2738786923223081, 'f1': 0.3013481540150123}, 'combined': 0.18869463849538157, 'stategy': 1, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32957104832104833, 'r': 0.3470814645506296, 'f1': 0.3380996891278777}, 'combined': 0.2491260867258046, 'stategy': 1, 'epoch': 6} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36135167257735906, 'r': 0.2997687455105233, 'f1': 0.32769200470297716}, 'combined': 0.20307673530888723, 'stategy': 1, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3170756172839506, 'r': 0.32489721695129664, 'f1': 0.3209387691346454}, 'combined': 0.2364811983097387, 'stategy': 1, 'epoch': 6} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3647788824686452, 'r': 0.28503498946285283, 'f1': 0.32001392833202646}, 'combined': 0.2122864673093641, 'stategy': 1, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3557692307692308, 'r': 0.40217391304347827, 'f1': 0.37755102040816324}, 'combined': 0.18877551020408162, 'stategy': 1, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'stategy': 1, 'epoch': 6} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35976479915433407, 'r': 0.3222892992424243, 'f1': 0.33999750249750255}, 'combined': 0.25052447552447554, 'stategy': 1, 'epoch': 6} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33494215352441215, 'r': 0.2738786923223081, 'f1': 0.3013481540150123}, 'combined': 0.18869463849538157, 'stategy': 1, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33436839323467227, 'r': 0.3546715973779541, 'f1': 0.34422086891009546}, 'combined': 0.2536364297232282, 'stategy': 1, 'epoch': 5} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36411638395148305, 'r': 0.30337559853126267, 'f1': 0.3309823303208759}, 'combined': 0.2051158103396977, 'stategy': 1, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37962962962962965, 'r': 0.44565217391304346, 'f1': 0.41000000000000003}, 'combined': 0.20500000000000002, 'stategy': 1, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.316460244648318, 'r': 0.32726913345983555, 'f1': 0.32177394278606963}, 'combined': 0.23709658942131445, 'stategy': 1, 'epoch': 5} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3637270714036404, 'r': 0.28684167419636686, 'f1': 0.32074114478321014}, 'combined': 0.21276887822252555, 'stategy': 1, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.27586206896551724, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 5} ****************************** Epoch: 7 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 16:00:44.200017: step: 4/531, loss: 0.0006492187967523932 2023-01-22 16:00:45.240128: step: 8/531, loss: 0.004802904091775417 2023-01-22 16:00:46.298436: step: 12/531, loss: 0.015752611681818962 2023-01-22 16:00:47.355653: step: 16/531, loss: 0.012046156451106071 2023-01-22 16:00:48.405601: step: 20/531, loss: 0.002270354190841317 2023-01-22 16:00:49.442977: step: 24/531, loss: 0.021567337214946747 2023-01-22 16:00:50.506356: step: 28/531, loss: 0.0036491076461970806 2023-01-22 16:00:51.573139: step: 32/531, loss: 0.0017398595809936523 2023-01-22 16:00:52.628547: step: 36/531, loss: 0.0005531217902898788 2023-01-22 16:00:53.687558: step: 40/531, loss: 0.001784879365004599 2023-01-22 16:00:54.750743: step: 44/531, loss: 7.12331939212163e-06 2023-01-22 16:00:55.798169: step: 48/531, loss: 0.0028284811414778233 2023-01-22 16:00:56.844486: step: 52/531, loss: 0.0004425981314852834 2023-01-22 16:00:57.900966: step: 56/531, loss: 0.004374339245259762 2023-01-22 16:00:58.962239: step: 60/531, loss: 0.0028135827742516994 2023-01-22 16:01:00.014848: step: 64/531, loss: 0.012824036180973053 2023-01-22 16:01:01.085839: step: 68/531, loss: 0.0335075743496418 2023-01-22 16:01:02.143449: step: 72/531, loss: 0.0053871809504926205 2023-01-22 16:01:03.217071: step: 76/531, loss: 0.013369427993893623 2023-01-22 16:01:04.270821: step: 80/531, loss: 0.04436447098851204 2023-01-22 16:01:05.346372: step: 84/531, loss: 0.002875553211197257 2023-01-22 16:01:06.415654: step: 88/531, loss: 0.007801753934472799 2023-01-22 16:01:07.484321: step: 92/531, loss: 0.0037565003149211407 2023-01-22 16:01:08.544515: step: 96/531, loss: 0.004316174425184727 2023-01-22 16:01:09.600246: step: 100/531, loss: 0.005416641477495432 2023-01-22 16:01:10.659206: step: 104/531, loss: 0.0036287778057157993 2023-01-22 16:01:11.721166: step: 108/531, loss: 0.004348293412476778 2023-01-22 16:01:12.794579: step: 112/531, loss: 0.014397756196558475 2023-01-22 16:01:13.841041: step: 116/531, loss: 0.00114683888386935 2023-01-22 16:01:14.898682: step: 120/531, loss: 0.005130800884217024 2023-01-22 16:01:15.950798: step: 124/531, loss: 0.007498049642890692 2023-01-22 16:01:17.003663: step: 128/531, loss: 0.0005397589993663132 2023-01-22 16:01:18.085126: step: 132/531, loss: 0.06988916546106339 2023-01-22 16:01:19.139946: step: 136/531, loss: 0.002622071420773864 2023-01-22 16:01:20.203523: step: 140/531, loss: 0.005704334005713463 2023-01-22 16:01:21.272817: step: 144/531, loss: 0.0005702752387151122 2023-01-22 16:01:22.320445: step: 148/531, loss: 0.0010824378114193678 2023-01-22 16:01:23.375863: step: 152/531, loss: 0.00779822887852788 2023-01-22 16:01:24.430092: step: 156/531, loss: 0.014901806600391865 2023-01-22 16:01:25.500057: step: 160/531, loss: 0.004425419960170984 2023-01-22 16:01:26.556710: step: 164/531, loss: 0.0002514993539080024 2023-01-22 16:01:27.619845: step: 168/531, loss: 0.01510639488697052 2023-01-22 16:01:28.668872: step: 172/531, loss: 0.006646621040999889 2023-01-22 16:01:29.741581: step: 176/531, loss: 0.01198204793035984 2023-01-22 16:01:30.803675: step: 180/531, loss: 0.0022676994558423758 2023-01-22 16:01:31.874927: step: 184/531, loss: 0.0023348811082541943 2023-01-22 16:01:32.952148: step: 188/531, loss: 0.021751388907432556 2023-01-22 16:01:34.028703: step: 192/531, loss: 0.019914787262678146 2023-01-22 16:01:35.094407: step: 196/531, loss: 0.00429602200165391 2023-01-22 16:01:36.168639: step: 200/531, loss: 0.00019583826360758394 2023-01-22 16:01:37.231699: step: 204/531, loss: 0.0012761821271851659 2023-01-22 16:01:38.286035: step: 208/531, loss: 0.011430647224187851 2023-01-22 16:01:39.346072: step: 212/531, loss: 0.0006558317691087723 2023-01-22 16:01:40.408024: step: 216/531, loss: 0.009576068259775639 2023-01-22 16:01:41.475735: step: 220/531, loss: 0.0027853159699589014 2023-01-22 16:01:42.526476: step: 224/531, loss: 0.006347036920487881 2023-01-22 16:01:43.583394: step: 228/531, loss: 0.0005861844401806593 2023-01-22 16:01:44.652638: step: 232/531, loss: 0.06506709009408951 2023-01-22 16:01:45.702639: step: 236/531, loss: 0.0032848953269422054 2023-01-22 16:01:46.752054: step: 240/531, loss: 0.0025946497917175293 2023-01-22 16:01:47.806633: step: 244/531, loss: 0.003949759993702173 2023-01-22 16:01:48.869115: step: 248/531, loss: 0.010272135026752949 2023-01-22 16:01:49.932481: step: 252/531, loss: 0.006210848689079285 2023-01-22 16:01:51.025913: step: 256/531, loss: 0.007650288287550211 2023-01-22 16:01:52.090151: step: 260/531, loss: 0.007213053293526173 2023-01-22 16:01:53.150971: step: 264/531, loss: 0.00025169874425046146 2023-01-22 16:01:54.226489: step: 268/531, loss: 0.0022797819692641497 2023-01-22 16:01:55.281619: step: 272/531, loss: 0.004989979322999716 2023-01-22 16:01:56.354690: step: 276/531, loss: 0.002507602795958519 2023-01-22 16:01:57.412213: step: 280/531, loss: 0.0033433998469263315 2023-01-22 16:01:58.476666: step: 284/531, loss: 0.0029465025290846825 2023-01-22 16:01:59.536304: step: 288/531, loss: 0.006215470377355814 2023-01-22 16:02:00.599842: step: 292/531, loss: 0.0010657849488779902 2023-01-22 16:02:01.685091: step: 296/531, loss: 0.007482234388589859 2023-01-22 16:02:02.740885: step: 300/531, loss: 0.006049760617315769 2023-01-22 16:02:03.820427: step: 304/531, loss: 0.0049827806651592255 2023-01-22 16:02:04.874911: step: 308/531, loss: 0.0010435294825583696 2023-01-22 16:02:05.941841: step: 312/531, loss: 0.0007052760920487344 2023-01-22 16:02:07.008151: step: 316/531, loss: 0.014551017433404922 2023-01-22 16:02:08.085538: step: 320/531, loss: 0.044681794941425323 2023-01-22 16:02:09.139159: step: 324/531, loss: 0.009216065518558025 2023-01-22 16:02:10.202369: step: 328/531, loss: 0.013036536984145641 2023-01-22 16:02:11.268431: step: 332/531, loss: 0.00922372005879879 2023-01-22 16:02:12.339958: step: 336/531, loss: 0.008950003422796726 2023-01-22 16:02:13.407701: step: 340/531, loss: 0.00798529852181673 2023-01-22 16:02:14.487643: step: 344/531, loss: 0.006593076977878809 2023-01-22 16:02:15.541606: step: 348/531, loss: 0.002142342971637845 2023-01-22 16:02:16.610721: step: 352/531, loss: 0.0032343610655516386 2023-01-22 16:02:17.667310: step: 356/531, loss: 0.014155490323901176 2023-01-22 16:02:18.726132: step: 360/531, loss: 0.0016211258480325341 2023-01-22 16:02:19.796797: step: 364/531, loss: 0.0027973006945103407 2023-01-22 16:02:20.856855: step: 368/531, loss: 0.006330488715320826 2023-01-22 16:02:21.926325: step: 372/531, loss: 0.007343151606619358 2023-01-22 16:02:23.009427: step: 376/531, loss: 0.010084627196192741 2023-01-22 16:02:24.069552: step: 380/531, loss: 0.0008924533030949533 2023-01-22 16:02:25.126608: step: 384/531, loss: 0.01889900490641594 2023-01-22 16:02:26.192622: step: 388/531, loss: 0.004234483931213617 2023-01-22 16:02:27.285697: step: 392/531, loss: 0.0062158117070794106 2023-01-22 16:02:28.346263: step: 396/531, loss: 0.009244952350854874 2023-01-22 16:02:29.419070: step: 400/531, loss: 0.009495830163359642 2023-01-22 16:02:30.476355: step: 404/531, loss: 0.0019206582801416516 2023-01-22 16:02:31.550865: step: 408/531, loss: 0.002965971827507019 2023-01-22 16:02:32.620810: step: 412/531, loss: 0.0062917061150074005 2023-01-22 16:02:33.686967: step: 416/531, loss: 0.0047087762504816055 2023-01-22 16:02:34.745015: step: 420/531, loss: 0.0052788592875003815 2023-01-22 16:02:35.804333: step: 424/531, loss: 0.012063873000442982 2023-01-22 16:02:36.859893: step: 428/531, loss: 0.007540481630712748 2023-01-22 16:02:37.919373: step: 432/531, loss: 0.0027695889584720135 2023-01-22 16:02:38.989219: step: 436/531, loss: 0.021550847217440605 2023-01-22 16:02:40.041317: step: 440/531, loss: 0.005915469024330378 2023-01-22 16:02:41.116839: step: 444/531, loss: 0.004781430587172508 2023-01-22 16:02:42.190429: step: 448/531, loss: 0.006881205830723047 2023-01-22 16:02:43.255827: step: 452/531, loss: 0.007237443700432777 2023-01-22 16:02:44.313253: step: 456/531, loss: 0.007598720956593752 2023-01-22 16:02:45.375946: step: 460/531, loss: 0.0075926268473267555 2023-01-22 16:02:46.441510: step: 464/531, loss: 0.0001259603159269318 2023-01-22 16:02:47.525005: step: 468/531, loss: 0.007884755730628967 2023-01-22 16:02:48.579284: step: 472/531, loss: 0.0008542384603060782 2023-01-22 16:02:49.650468: step: 476/531, loss: 0.0012061416637152433 2023-01-22 16:02:50.714404: step: 480/531, loss: 0.0015838273102417588 2023-01-22 16:02:51.763321: step: 484/531, loss: 0.014944592490792274 2023-01-22 16:02:52.835783: step: 488/531, loss: 0.004819272551685572 2023-01-22 16:02:53.914891: step: 492/531, loss: 0.008972696959972382 2023-01-22 16:02:54.980580: step: 496/531, loss: 0.0012989470269531012 2023-01-22 16:02:56.039924: step: 500/531, loss: 0.007035675924271345 2023-01-22 16:02:57.113780: step: 504/531, loss: 0.004334037192165852 2023-01-22 16:02:58.183227: step: 508/531, loss: 0.02392931655049324 2023-01-22 16:02:59.247914: step: 512/531, loss: 0.015061544254422188 2023-01-22 16:03:00.307680: step: 516/531, loss: 0.008190691471099854 2023-01-22 16:03:01.383500: step: 520/531, loss: 0.005081430077552795 2023-01-22 16:03:02.437009: step: 524/531, loss: 0.017981668934226036 2023-01-22 16:03:03.488656: step: 528/531, loss: 0.0005370138096623123 2023-01-22 16:03:04.553067: step: 532/531, loss: 0.003851471468806267 2023-01-22 16:03:05.615362: step: 536/531, loss: 0.0025094801094383 2023-01-22 16:03:06.675001: step: 540/531, loss: 0.003738607745617628 2023-01-22 16:03:07.748563: step: 544/531, loss: 0.013799403794109821 2023-01-22 16:03:08.814276: step: 548/531, loss: 0.032146699726581573 2023-01-22 16:03:09.869282: step: 552/531, loss: 0.00934658758342266 2023-01-22 16:03:10.926443: step: 556/531, loss: 0.012887738645076752 2023-01-22 16:03:11.993800: step: 560/531, loss: 0.014045830816030502 2023-01-22 16:03:13.066678: step: 564/531, loss: 0.019588304683566093 2023-01-22 16:03:14.116480: step: 568/531, loss: 0.0014023719122633338 2023-01-22 16:03:15.178659: step: 572/531, loss: 0.061655573546886444 2023-01-22 16:03:16.220259: step: 576/531, loss: 0.011357041075825691 2023-01-22 16:03:17.273119: step: 580/531, loss: 0.008658317849040031 2023-01-22 16:03:18.350024: step: 584/531, loss: 0.005844382103532553 2023-01-22 16:03:19.412045: step: 588/531, loss: 0.0033900365233421326 2023-01-22 16:03:20.470046: step: 592/531, loss: 0.010819818824529648 2023-01-22 16:03:21.529780: step: 596/531, loss: 0.005710499361157417 2023-01-22 16:03:22.621386: step: 600/531, loss: 0.01022400613874197 2023-01-22 16:03:23.680118: step: 604/531, loss: 0.011874786578118801 2023-01-22 16:03:24.740173: step: 608/531, loss: 0.002632047748193145 2023-01-22 16:03:25.814945: step: 612/531, loss: 0.0004185358702670783 2023-01-22 16:03:26.881820: step: 616/531, loss: 0.0032836003229022026 2023-01-22 16:03:27.919640: step: 620/531, loss: 0.00014435005141422153 2023-01-22 16:03:28.973493: step: 624/531, loss: 0.00794036965817213 2023-01-22 16:03:30.056180: step: 628/531, loss: 0.003446119837462902 2023-01-22 16:03:31.096293: step: 632/531, loss: 0.0014714625431224704 2023-01-22 16:03:32.134149: step: 636/531, loss: 0.00029268290381878614 2023-01-22 16:03:33.201762: step: 640/531, loss: 0.018005361780524254 2023-01-22 16:03:34.260392: step: 644/531, loss: 0.005744707304984331 2023-01-22 16:03:35.329793: step: 648/531, loss: 0.03598678484559059 2023-01-22 16:03:36.405477: step: 652/531, loss: 0.020917510613799095 2023-01-22 16:03:37.475653: step: 656/531, loss: 0.000643776380456984 2023-01-22 16:03:38.548852: step: 660/531, loss: 0.003372986800968647 2023-01-22 16:03:39.602877: step: 664/531, loss: 0.004733528476208448 2023-01-22 16:03:40.665518: step: 668/531, loss: 0.006730652879923582 2023-01-22 16:03:41.731097: step: 672/531, loss: 0.007533761207014322 2023-01-22 16:03:42.781422: step: 676/531, loss: 0.012822278775274754 2023-01-22 16:03:43.841217: step: 680/531, loss: 0.00032225617906078696 2023-01-22 16:03:44.914459: step: 684/531, loss: 0.005407245829701424 2023-01-22 16:03:45.975830: step: 688/531, loss: 0.0015551522374153137 2023-01-22 16:03:47.035590: step: 692/531, loss: 0.009238948114216328 2023-01-22 16:03:48.096459: step: 696/531, loss: 0.002054974203929305 2023-01-22 16:03:49.186360: step: 700/531, loss: 0.0019452502019703388 2023-01-22 16:03:50.238188: step: 704/531, loss: 0.0022884036879986525 2023-01-22 16:03:51.293969: step: 708/531, loss: 0.004203475546091795 2023-01-22 16:03:52.383206: step: 712/531, loss: 0.005513494368642569 2023-01-22 16:03:53.439874: step: 716/531, loss: 0.001011253334581852 2023-01-22 16:03:54.497920: step: 720/531, loss: 0.009160853922367096 2023-01-22 16:03:55.558940: step: 724/531, loss: 0.003776432015001774 2023-01-22 16:03:56.632146: step: 728/531, loss: 0.006054773926734924 2023-01-22 16:03:57.698108: step: 732/531, loss: 0.004920446313917637 2023-01-22 16:03:58.749166: step: 736/531, loss: 0.0011047249427065253 2023-01-22 16:03:59.816753: step: 740/531, loss: 0.0015072107780724764 2023-01-22 16:04:00.868423: step: 744/531, loss: 0.0011187985073775053 2023-01-22 16:04:01.948837: step: 748/531, loss: 0.013454984873533249 2023-01-22 16:04:03.010388: step: 752/531, loss: 0.013698996044695377 2023-01-22 16:04:04.071605: step: 756/531, loss: 0.006787206511944532 2023-01-22 16:04:05.135825: step: 760/531, loss: 0.0010444317013025284 2023-01-22 16:04:06.207279: step: 764/531, loss: 0.011175837367773056 2023-01-22 16:04:07.269127: step: 768/531, loss: 8.234030974563211e-05 2023-01-22 16:04:08.333612: step: 772/531, loss: 0.005729043390601873 2023-01-22 16:04:09.372450: step: 776/531, loss: 0.003512623952701688 2023-01-22 16:04:10.439308: step: 780/531, loss: 0.011913996189832687 2023-01-22 16:04:11.496024: step: 784/531, loss: 0.0031369433272629976 2023-01-22 16:04:12.533657: step: 788/531, loss: 0.01617421954870224 2023-01-22 16:04:13.585961: step: 792/531, loss: 0.010591531172394753 2023-01-22 16:04:14.642978: step: 796/531, loss: 0.006628000643104315 2023-01-22 16:04:15.696069: step: 800/531, loss: 0.037484798580408096 2023-01-22 16:04:16.751403: step: 804/531, loss: 0.027918970212340355 2023-01-22 16:04:17.791761: step: 808/531, loss: 0.0006715216441079974 2023-01-22 16:04:18.847220: step: 812/531, loss: 0.025618532672524452 2023-01-22 16:04:19.902966: step: 816/531, loss: 0.034035008400678635 2023-01-22 16:04:20.949764: step: 820/531, loss: 0.011319356970489025 2023-01-22 16:04:22.027381: step: 824/531, loss: 0.021084044128656387 2023-01-22 16:04:23.057471: step: 828/531, loss: 0.000995192094705999 2023-01-22 16:04:24.108077: step: 832/531, loss: 0.0007222912972792983 2023-01-22 16:04:25.169699: step: 836/531, loss: 0.004113358911126852 2023-01-22 16:04:26.219865: step: 840/531, loss: 0.019447915256023407 2023-01-22 16:04:27.273950: step: 844/531, loss: 0.016118677332997322 2023-01-22 16:04:28.349814: step: 848/531, loss: 0.013019963167607784 2023-01-22 16:04:29.417669: step: 852/531, loss: 0.005787972826510668 2023-01-22 16:04:30.488486: step: 856/531, loss: 0.0003658224013634026 2023-01-22 16:04:31.555364: step: 860/531, loss: 0.007707090582698584 2023-01-22 16:04:32.628526: step: 864/531, loss: 0.0012676366604864597 2023-01-22 16:04:33.676972: step: 868/531, loss: 0.004137119743973017 2023-01-22 16:04:34.733312: step: 872/531, loss: 0.0014692473923787475 2023-01-22 16:04:35.791712: step: 876/531, loss: 0.0034385465551167727 2023-01-22 16:04:36.839936: step: 880/531, loss: 0.002908271737396717 2023-01-22 16:04:37.890690: step: 884/531, loss: 0.011820798739790916 2023-01-22 16:04:38.941624: step: 888/531, loss: 0.003473530290648341 2023-01-22 16:04:40.004009: step: 892/531, loss: 0.008599039167165756 2023-01-22 16:04:41.045409: step: 896/531, loss: 0.0031041253823786974 2023-01-22 16:04:42.110068: step: 900/531, loss: 0.008347421884536743 2023-01-22 16:04:43.182171: step: 904/531, loss: 0.007552129682153463 2023-01-22 16:04:44.234643: step: 908/531, loss: 0.002162358956411481 2023-01-22 16:04:45.313504: step: 912/531, loss: 0.0024050199426710606 2023-01-22 16:04:46.362925: step: 916/531, loss: 0.023277850821614265 2023-01-22 16:04:47.416437: step: 920/531, loss: 0.002533361315727234 2023-01-22 16:04:48.484935: step: 924/531, loss: 0.010061078704893589 2023-01-22 16:04:49.551769: step: 928/531, loss: 0.0019515901803970337 2023-01-22 16:04:50.605125: step: 932/531, loss: 0.007701380643993616 2023-01-22 16:04:51.669253: step: 936/531, loss: 0.01530991867184639 2023-01-22 16:04:52.723046: step: 940/531, loss: 0.0031159475911408663 2023-01-22 16:04:53.774022: step: 944/531, loss: 0.0007430640398524702 2023-01-22 16:04:54.840876: step: 948/531, loss: 0.005443783476948738 2023-01-22 16:04:55.906328: step: 952/531, loss: 0.006688730791211128 2023-01-22 16:04:56.953140: step: 956/531, loss: 0.00014675510465167463 2023-01-22 16:04:58.007132: step: 960/531, loss: 0.006066479254513979 2023-01-22 16:04:59.054215: step: 964/531, loss: 0.004118728917092085 2023-01-22 16:05:00.112797: step: 968/531, loss: 0.012234709225594997 2023-01-22 16:05:01.170063: step: 972/531, loss: 0.015265420079231262 2023-01-22 16:05:02.236104: step: 976/531, loss: 0.0037207254208624363 2023-01-22 16:05:03.300913: step: 980/531, loss: 0.006244618911296129 2023-01-22 16:05:04.352577: step: 984/531, loss: 0.005643168464303017 2023-01-22 16:05:05.422932: step: 988/531, loss: 0.0017455547349527478 2023-01-22 16:05:06.472030: step: 992/531, loss: 0.014706265181303024 2023-01-22 16:05:07.554006: step: 996/531, loss: 0.0038010096177458763 2023-01-22 16:05:08.623217: step: 1000/531, loss: 0.0011887227883562446 2023-01-22 16:05:09.704702: step: 1004/531, loss: 0.011953913606703281 2023-01-22 16:05:10.750819: step: 1008/531, loss: 0.000768556201364845 2023-01-22 16:05:11.814358: step: 1012/531, loss: 0.008452901616692543 2023-01-22 16:05:12.874749: step: 1016/531, loss: 0.008053474128246307 2023-01-22 16:05:13.930673: step: 1020/531, loss: 0.007288651540875435 2023-01-22 16:05:14.997221: step: 1024/531, loss: 0.004656703677028418 2023-01-22 16:05:16.052478: step: 1028/531, loss: 0.0014144114684313536 2023-01-22 16:05:17.114912: step: 1032/531, loss: 0.007407342549413443 2023-01-22 16:05:18.158916: step: 1036/531, loss: 0.003363720141351223 2023-01-22 16:05:19.218947: step: 1040/531, loss: 1.0100815416080877e-05 2023-01-22 16:05:20.277062: step: 1044/531, loss: 0.008445918560028076 2023-01-22 16:05:21.329370: step: 1048/531, loss: 0.00014292298874352127 2023-01-22 16:05:22.380597: step: 1052/531, loss: 0.007359790150076151 2023-01-22 16:05:23.448685: step: 1056/531, loss: 0.017630619928240776 2023-01-22 16:05:24.502285: step: 1060/531, loss: 0.0054226163774728775 2023-01-22 16:05:25.526415: step: 1064/531, loss: 0.005007212050259113 2023-01-22 16:05:26.582623: step: 1068/531, loss: 6.087717702030204e-05 2023-01-22 16:05:27.639671: step: 1072/531, loss: 0.0036626989021897316 2023-01-22 16:05:28.705208: step: 1076/531, loss: 0.007369166240096092 2023-01-22 16:05:29.772692: step: 1080/531, loss: 0.0074149626307189465 2023-01-22 16:05:30.833787: step: 1084/531, loss: 0.0033081865403801203 2023-01-22 16:05:31.889628: step: 1088/531, loss: 0.0065081436187028885 2023-01-22 16:05:32.962450: step: 1092/531, loss: 0.0027745591942220926 2023-01-22 16:05:34.018353: step: 1096/531, loss: 0.004033735487610102 2023-01-22 16:05:35.064086: step: 1100/531, loss: 0.004824902396649122 2023-01-22 16:05:36.133445: step: 1104/531, loss: 0.0027580666355788708 2023-01-22 16:05:37.180660: step: 1108/531, loss: 0.004225380718708038 2023-01-22 16:05:38.225391: step: 1112/531, loss: 0.0014375685714185238 2023-01-22 16:05:39.291149: step: 1116/531, loss: 0.0006143644568510354 2023-01-22 16:05:40.337842: step: 1120/531, loss: 0.015592087991535664 2023-01-22 16:05:41.399953: step: 1124/531, loss: 0.007434329017996788 2023-01-22 16:05:42.477402: step: 1128/531, loss: 0.002444205339998007 2023-01-22 16:05:43.522946: step: 1132/531, loss: 0.018612677231431007 2023-01-22 16:05:44.579010: step: 1136/531, loss: 0.007736400701105595 2023-01-22 16:05:45.629488: step: 1140/531, loss: 0.004145750775933266 2023-01-22 16:05:46.687612: step: 1144/531, loss: 0.012243203818798065 2023-01-22 16:05:47.751656: step: 1148/531, loss: 0.015129510313272476 2023-01-22 16:05:48.823186: step: 1152/531, loss: 0.020059751346707344 2023-01-22 16:05:49.852664: step: 1156/531, loss: 0.00022447883384302258 2023-01-22 16:05:50.927758: step: 1160/531, loss: 0.0020700094755738974 2023-01-22 16:05:51.976497: step: 1164/531, loss: 0.01687202975153923 2023-01-22 16:05:53.034917: step: 1168/531, loss: 0.006159815937280655 2023-01-22 16:05:54.080830: step: 1172/531, loss: 0.010063309222459793 2023-01-22 16:05:55.123070: step: 1176/531, loss: 0.002743379445746541 2023-01-22 16:05:56.187716: step: 1180/531, loss: 0.0033418331295251846 2023-01-22 16:05:57.261585: step: 1184/531, loss: 0.025549838319420815 2023-01-22 16:05:58.323046: step: 1188/531, loss: 0.003771526040509343 2023-01-22 16:05:59.389758: step: 1192/531, loss: 0.00991787388920784 2023-01-22 16:06:00.473290: step: 1196/531, loss: 0.015510828234255314 2023-01-22 16:06:01.535486: step: 1200/531, loss: 0.011565836146473885 2023-01-22 16:06:02.606050: step: 1204/531, loss: 0.04709074646234512 2023-01-22 16:06:03.656319: step: 1208/531, loss: 0.018286455422639847 2023-01-22 16:06:04.726073: step: 1212/531, loss: 0.0026013979222625494 2023-01-22 16:06:05.788164: step: 1216/531, loss: 0.0042064860463142395 2023-01-22 16:06:06.850414: step: 1220/531, loss: 0.006525625474750996 2023-01-22 16:06:07.919336: step: 1224/531, loss: 0.00807811040431261 2023-01-22 16:06:08.968040: step: 1228/531, loss: 0.004882653709501028 2023-01-22 16:06:10.038906: step: 1232/531, loss: 0.019261857494711876 2023-01-22 16:06:11.091949: step: 1236/531, loss: 0.004634142387658358 2023-01-22 16:06:12.179716: step: 1240/531, loss: 0.0034124834928661585 2023-01-22 16:06:13.233188: step: 1244/531, loss: 0.0036466929595917463 2023-01-22 16:06:14.292402: step: 1248/531, loss: 0.004509231075644493 2023-01-22 16:06:15.344877: step: 1252/531, loss: 0.000947600114159286 2023-01-22 16:06:16.397130: step: 1256/531, loss: 0.009373374283313751 2023-01-22 16:06:17.454340: step: 1260/531, loss: 0.021008070558309555 2023-01-22 16:06:18.516843: step: 1264/531, loss: 0.010760831646621227 2023-01-22 16:06:19.610452: step: 1268/531, loss: 0.004743773024529219 2023-01-22 16:06:20.658846: step: 1272/531, loss: 0.021553313359618187 2023-01-22 16:06:21.728706: step: 1276/531, loss: 0.003004447789862752 2023-01-22 16:06:22.767616: step: 1280/531, loss: 0.00231836992315948 2023-01-22 16:06:23.811637: step: 1284/531, loss: 0.029413830488920212 2023-01-22 16:06:24.878291: step: 1288/531, loss: 0.008015123195946217 2023-01-22 16:06:25.948572: step: 1292/531, loss: 0.006133352406322956 2023-01-22 16:06:27.003919: step: 1296/531, loss: 0.004206741228699684 2023-01-22 16:06:28.069195: step: 1300/531, loss: 0.0022905052173882723 2023-01-22 16:06:29.119230: step: 1304/531, loss: 0.004863007925450802 2023-01-22 16:06:30.164925: step: 1308/531, loss: 0.008591294288635254 2023-01-22 16:06:31.221870: step: 1312/531, loss: 0.008686983026564121 2023-01-22 16:06:32.274025: step: 1316/531, loss: 0.0014601402217522264 2023-01-22 16:06:33.335614: step: 1320/531, loss: 0.009601959027349949 2023-01-22 16:06:34.389338: step: 1324/531, loss: 0.016543062403798103 2023-01-22 16:06:35.442354: step: 1328/531, loss: 0.006538198329508305 2023-01-22 16:06:36.504811: step: 1332/531, loss: 0.010706229135394096 2023-01-22 16:06:37.548117: step: 1336/531, loss: 0.007833709940314293 2023-01-22 16:06:38.595276: step: 1340/531, loss: 0.014195882715284824 2023-01-22 16:06:39.651636: step: 1344/531, loss: 0.0029244590550661087 2023-01-22 16:06:40.709454: step: 1348/531, loss: 0.0006867393385618925 2023-01-22 16:06:41.777021: step: 1352/531, loss: 0.009999522008001804 2023-01-22 16:06:42.843015: step: 1356/531, loss: 0.006914444267749786 2023-01-22 16:06:43.907060: step: 1360/531, loss: 0.001022397424094379 2023-01-22 16:06:44.965947: step: 1364/531, loss: 0.011720074340701103 2023-01-22 16:06:46.005631: step: 1368/531, loss: 0.005520156119018793 2023-01-22 16:06:47.057583: step: 1372/531, loss: 0.0028696891386061907 2023-01-22 16:06:48.126612: step: 1376/531, loss: 0.016635971143841743 2023-01-22 16:06:49.169684: step: 1380/531, loss: 0.0003744933637790382 2023-01-22 16:06:50.231129: step: 1384/531, loss: 0.0008608332718722522 2023-01-22 16:06:51.288929: step: 1388/531, loss: 0.012566376477479935 2023-01-22 16:06:52.359099: step: 1392/531, loss: 0.020176827907562256 2023-01-22 16:06:53.415265: step: 1396/531, loss: 0.002106319647282362 2023-01-22 16:06:54.467923: step: 1400/531, loss: 0.033433765172958374 2023-01-22 16:06:55.535344: step: 1404/531, loss: 0.009769782423973083 2023-01-22 16:06:56.598031: step: 1408/531, loss: 0.0005209156661294401 2023-01-22 16:06:57.650469: step: 1412/531, loss: 0.0008644047193229198 2023-01-22 16:06:58.700186: step: 1416/531, loss: 0.004266010597348213 2023-01-22 16:06:59.754360: step: 1420/531, loss: 0.003032218897715211 2023-01-22 16:07:00.813407: step: 1424/531, loss: 0.0023113335482776165 2023-01-22 16:07:01.866365: step: 1428/531, loss: 0.0018706824630498886 2023-01-22 16:07:02.918401: step: 1432/531, loss: 0.008413641713559628 2023-01-22 16:07:03.961780: step: 1436/531, loss: 0.015643928200006485 2023-01-22 16:07:05.037278: step: 1440/531, loss: 0.01871102675795555 2023-01-22 16:07:06.101951: step: 1444/531, loss: 0.000810525962151587 2023-01-22 16:07:07.158840: step: 1448/531, loss: 0.026164840906858444 2023-01-22 16:07:08.214768: step: 1452/531, loss: 0.0009098420850932598 2023-01-22 16:07:09.269863: step: 1456/531, loss: 0.01916046440601349 2023-01-22 16:07:10.327920: step: 1460/531, loss: 0.0034798732958734035 2023-01-22 16:07:11.400663: step: 1464/531, loss: 0.0021816177759319544 2023-01-22 16:07:12.454456: step: 1468/531, loss: 0.007542390376329422 2023-01-22 16:07:13.525126: step: 1472/531, loss: 0.004269434604793787 2023-01-22 16:07:14.584558: step: 1476/531, loss: 0.008925436064600945 2023-01-22 16:07:15.628071: step: 1480/531, loss: 0.0013633263297379017 2023-01-22 16:07:16.679636: step: 1484/531, loss: 0.0023207608610391617 2023-01-22 16:07:17.754154: step: 1488/531, loss: 0.004495031666010618 2023-01-22 16:07:18.805191: step: 1492/531, loss: 0.0 2023-01-22 16:07:19.877120: step: 1496/531, loss: 0.016254903748631477 2023-01-22 16:07:20.952260: step: 1500/531, loss: 0.003299233503639698 2023-01-22 16:07:22.003211: step: 1504/531, loss: 0.0007362092728726566 2023-01-22 16:07:23.060613: step: 1508/531, loss: 0.0020259853918105364 2023-01-22 16:07:24.120607: step: 1512/531, loss: 0.0040224818512797356 2023-01-22 16:07:25.179967: step: 1516/531, loss: 0.003701281500980258 2023-01-22 16:07:26.240325: step: 1520/531, loss: 0.004383988678455353 2023-01-22 16:07:27.284832: step: 1524/531, loss: 9.619694901630282e-05 2023-01-22 16:07:28.333926: step: 1528/531, loss: 0.025479143485426903 2023-01-22 16:07:29.391432: step: 1532/531, loss: 0.0044145225547254086 2023-01-22 16:07:30.446703: step: 1536/531, loss: 0.01800454407930374 2023-01-22 16:07:31.512187: step: 1540/531, loss: 0.01497863419353962 2023-01-22 16:07:32.573223: step: 1544/531, loss: 0.0042779576033353806 2023-01-22 16:07:33.647291: step: 1548/531, loss: 0.008412308990955353 2023-01-22 16:07:34.696730: step: 1552/531, loss: 0.010228274390101433 2023-01-22 16:07:35.769920: step: 1556/531, loss: 0.0026888977736234665 2023-01-22 16:07:36.814928: step: 1560/531, loss: 0.016876718029379845 2023-01-22 16:07:37.864670: step: 1564/531, loss: 7.888342952355742e-05 2023-01-22 16:07:38.922625: step: 1568/531, loss: 0.07939287275075912 2023-01-22 16:07:39.985108: step: 1572/531, loss: 0.00013619325181934983 2023-01-22 16:07:41.039562: step: 1576/531, loss: 0.0012743815314024687 2023-01-22 16:07:42.095711: step: 1580/531, loss: 0.000544706650543958 2023-01-22 16:07:43.157382: step: 1584/531, loss: 0.02795686200261116 2023-01-22 16:07:44.217743: step: 1588/531, loss: 0.005173315294086933 2023-01-22 16:07:45.282939: step: 1592/531, loss: 0.0015534008853137493 2023-01-22 16:07:46.337215: step: 1596/531, loss: 0.00313403713516891 2023-01-22 16:07:47.393963: step: 1600/531, loss: 0.009784866124391556 2023-01-22 16:07:48.475792: step: 1604/531, loss: 0.0021912362426519394 2023-01-22 16:07:49.526826: step: 1608/531, loss: 0.006951576564460993 2023-01-22 16:07:50.580845: step: 1612/531, loss: 0.0006216104375198483 2023-01-22 16:07:51.651624: step: 1616/531, loss: 0.008630983531475067 2023-01-22 16:07:52.718155: step: 1620/531, loss: 0.0025050377007573843 2023-01-22 16:07:53.790220: step: 1624/531, loss: 0.001553927781060338 2023-01-22 16:07:54.833295: step: 1628/531, loss: 0.001910782535560429 2023-01-22 16:07:55.897849: step: 1632/531, loss: 0.0025655110366642475 2023-01-22 16:07:56.968003: step: 1636/531, loss: 0.004358029458671808 2023-01-22 16:07:58.032547: step: 1640/531, loss: 0.001964786322787404 2023-01-22 16:07:59.088627: step: 1644/531, loss: 0.002564901951700449 2023-01-22 16:08:00.140796: step: 1648/531, loss: 0.0026567408349364996 2023-01-22 16:08:01.200235: step: 1652/531, loss: 2.052164745691698e-05 2023-01-22 16:08:02.264005: step: 1656/531, loss: 0.008762887679040432 2023-01-22 16:08:03.330491: step: 1660/531, loss: 0.001838785712607205 2023-01-22 16:08:04.399036: step: 1664/531, loss: 0.02431187778711319 2023-01-22 16:08:05.462411: step: 1668/531, loss: 0.018949678167700768 2023-01-22 16:08:06.533905: step: 1672/531, loss: 0.006652952171862125 2023-01-22 16:08:07.578009: step: 1676/531, loss: 3.775200457312167e-05 2023-01-22 16:08:08.630460: step: 1680/531, loss: 0.004154319409281015 2023-01-22 16:08:09.691159: step: 1684/531, loss: 0.018371930345892906 2023-01-22 16:08:10.735125: step: 1688/531, loss: 0.006730475928634405 2023-01-22 16:08:11.805457: step: 1692/531, loss: 0.010968293994665146 2023-01-22 16:08:12.854628: step: 1696/531, loss: 0.0025148936547338963 2023-01-22 16:08:13.915174: step: 1700/531, loss: 0.005015175323933363 2023-01-22 16:08:14.966835: step: 1704/531, loss: 0.004874736536294222 2023-01-22 16:08:16.019770: step: 1708/531, loss: 0.009839463979005814 2023-01-22 16:08:17.072633: step: 1712/531, loss: 0.0034756127279251814 2023-01-22 16:08:18.133997: step: 1716/531, loss: 0.032976653426885605 2023-01-22 16:08:19.191289: step: 1720/531, loss: 0.0012503552716225386 2023-01-22 16:08:20.262340: step: 1724/531, loss: 0.003754228353500366 2023-01-22 16:08:21.323604: step: 1728/531, loss: 0.004127139691263437 2023-01-22 16:08:22.386241: step: 1732/531, loss: 0.03128005936741829 2023-01-22 16:08:23.433240: step: 1736/531, loss: 0.003958693705499172 2023-01-22 16:08:24.491177: step: 1740/531, loss: 0.001204545609652996 2023-01-22 16:08:25.554164: step: 1744/531, loss: 0.034654177725315094 2023-01-22 16:08:26.614467: step: 1748/531, loss: 0.013512332923710346 2023-01-22 16:08:27.687383: step: 1752/531, loss: 0.006131541449576616 2023-01-22 16:08:28.756448: step: 1756/531, loss: 0.0015703821554780006 2023-01-22 16:08:29.802037: step: 1760/531, loss: 0.00391714833676815 2023-01-22 16:08:30.840097: step: 1764/531, loss: 0.0015651213470846415 2023-01-22 16:08:31.891871: step: 1768/531, loss: 4.7727189667057246e-05 2023-01-22 16:08:32.996597: step: 1772/531, loss: 0.0055036842823028564 2023-01-22 16:08:34.053781: step: 1776/531, loss: 0.011021219193935394 2023-01-22 16:08:35.121804: step: 1780/531, loss: 0.0022489370312541723 2023-01-22 16:08:36.178849: step: 1784/531, loss: 0.00528279785066843 2023-01-22 16:08:37.239742: step: 1788/531, loss: 0.006164574529975653 2023-01-22 16:08:38.294444: step: 1792/531, loss: 0.019493382424116135 2023-01-22 16:08:39.340935: step: 1796/531, loss: 0.0005644686170853674 2023-01-22 16:08:40.411413: step: 1800/531, loss: 0.0072881258092820644 2023-01-22 16:08:41.483043: step: 1804/531, loss: 0.004288215655833483 2023-01-22 16:08:42.571144: step: 1808/531, loss: 0.010092239826917648 2023-01-22 16:08:43.628582: step: 1812/531, loss: 0.004266197327524424 2023-01-22 16:08:44.684505: step: 1816/531, loss: 0.0014405458932742476 2023-01-22 16:08:45.748201: step: 1820/531, loss: 0.003679243614897132 2023-01-22 16:08:46.818941: step: 1824/531, loss: 0.015139496885240078 2023-01-22 16:08:47.877506: step: 1828/531, loss: 0.024759916588664055 2023-01-22 16:08:48.948224: step: 1832/531, loss: 0.021077243611216545 2023-01-22 16:08:50.004798: step: 1836/531, loss: 0.026106897741556168 2023-01-22 16:08:51.066101: step: 1840/531, loss: 0.05264068394899368 2023-01-22 16:08:52.127946: step: 1844/531, loss: 0.005715729668736458 2023-01-22 16:08:53.190534: step: 1848/531, loss: 0.0052014864049851894 2023-01-22 16:08:54.256994: step: 1852/531, loss: 0.00589185394346714 2023-01-22 16:08:55.321494: step: 1856/531, loss: 0.003483076114207506 2023-01-22 16:08:56.395248: step: 1860/531, loss: 0.0023939392995089293 2023-01-22 16:08:57.460324: step: 1864/531, loss: 0.008688930422067642 2023-01-22 16:08:58.514102: step: 1868/531, loss: 0.0013840004103258252 2023-01-22 16:08:59.581124: step: 1872/531, loss: 0.001034295535646379 2023-01-22 16:09:00.636553: step: 1876/531, loss: 0.011744841001927853 2023-01-22 16:09:01.707176: step: 1880/531, loss: 0.0059185572899878025 2023-01-22 16:09:02.780816: step: 1884/531, loss: 0.025036606937646866 2023-01-22 16:09:03.831555: step: 1888/531, loss: 0.007557482458651066 2023-01-22 16:09:04.888467: step: 1892/531, loss: 0.0047930465079844 2023-01-22 16:09:05.951947: step: 1896/531, loss: 0.00020706787472590804 2023-01-22 16:09:07.012063: step: 1900/531, loss: 0.0030501841101795435 2023-01-22 16:09:08.088280: step: 1904/531, loss: 0.004785385448485613 2023-01-22 16:09:09.169085: step: 1908/531, loss: 0.03510251268744469 2023-01-22 16:09:10.249089: step: 1912/531, loss: 0.061462949961423874 2023-01-22 16:09:11.323966: step: 1916/531, loss: 0.0007456626044586301 2023-01-22 16:09:12.392832: step: 1920/531, loss: 0.001428638701327145 2023-01-22 16:09:13.450316: step: 1924/531, loss: 0.0060381353832781315 2023-01-22 16:09:14.536492: step: 1928/531, loss: 0.0038770250976085663 2023-01-22 16:09:15.584525: step: 1932/531, loss: 0.004950044676661491 2023-01-22 16:09:16.647203: step: 1936/531, loss: 0.008088234812021255 2023-01-22 16:09:17.713544: step: 1940/531, loss: 0.00382617418654263 2023-01-22 16:09:18.785715: step: 1944/531, loss: 0.002881772117689252 2023-01-22 16:09:19.842904: step: 1948/531, loss: 0.01000647060573101 2023-01-22 16:09:20.900425: step: 1952/531, loss: 0.007367494981735945 2023-01-22 16:09:21.967824: step: 1956/531, loss: 0.005698645021766424 2023-01-22 16:09:23.024415: step: 1960/531, loss: 0.00498863123357296 2023-01-22 16:09:24.088392: step: 1964/531, loss: 0.00729562621563673 2023-01-22 16:09:25.154113: step: 1968/531, loss: 0.004644978325814009 2023-01-22 16:09:26.221596: step: 1972/531, loss: 0.0009126242948696017 2023-01-22 16:09:27.296244: step: 1976/531, loss: 0.04344611242413521 2023-01-22 16:09:28.359532: step: 1980/531, loss: 0.008268143981695175 2023-01-22 16:09:29.406308: step: 1984/531, loss: 0.004788004327565432 2023-01-22 16:09:30.470597: step: 1988/531, loss: 0.0017148415790870786 2023-01-22 16:09:31.523134: step: 1992/531, loss: 0.00936845038086176 2023-01-22 16:09:32.588165: step: 1996/531, loss: 0.008106195367872715 2023-01-22 16:09:33.648621: step: 2000/531, loss: 0.0025294064544141293 2023-01-22 16:09:34.722816: step: 2004/531, loss: 0.005026867613196373 2023-01-22 16:09:35.788731: step: 2008/531, loss: 0.0017298514721915126 2023-01-22 16:09:36.856999: step: 2012/531, loss: 0.008364005014300346 2023-01-22 16:09:37.931256: step: 2016/531, loss: 0.002123839920386672 2023-01-22 16:09:38.991649: step: 2020/531, loss: 0.015202515758574009 2023-01-22 16:09:40.051529: step: 2024/531, loss: 1.3222117559053004e-05 2023-01-22 16:09:41.122275: step: 2028/531, loss: 0.004475335590541363 2023-01-22 16:09:42.198368: step: 2032/531, loss: 0.00017878945800475776 2023-01-22 16:09:43.270123: step: 2036/531, loss: 0.027553226798772812 2023-01-22 16:09:44.332536: step: 2040/531, loss: 0.002366541652008891 2023-01-22 16:09:45.385052: step: 2044/531, loss: 0.01303250901401043 2023-01-22 16:09:46.437885: step: 2048/531, loss: 0.004782837349921465 2023-01-22 16:09:47.503064: step: 2052/531, loss: 0.03559749200940132 2023-01-22 16:09:48.573424: step: 2056/531, loss: 0.02289102040231228 2023-01-22 16:09:49.641710: step: 2060/531, loss: 0.016103940084576607 2023-01-22 16:09:50.706459: step: 2064/531, loss: 0.005509399808943272 2023-01-22 16:09:51.775198: step: 2068/531, loss: 0.005110845435410738 2023-01-22 16:09:52.833958: step: 2072/531, loss: 0.0029792129062116146 2023-01-22 16:09:53.880378: step: 2076/531, loss: 0.0003720921231433749 2023-01-22 16:09:54.940931: step: 2080/531, loss: 0.020529035478830338 2023-01-22 16:09:56.011810: step: 2084/531, loss: 0.002365376567468047 2023-01-22 16:09:57.071802: step: 2088/531, loss: 0.007025447674095631 2023-01-22 16:09:58.126945: step: 2092/531, loss: 0.003251168876886368 2023-01-22 16:09:59.181443: step: 2096/531, loss: 0.0023603267036378384 2023-01-22 16:10:00.267039: step: 2100/531, loss: 0.007606369908899069 2023-01-22 16:10:01.312771: step: 2104/531, loss: 0.00882585160434246 2023-01-22 16:10:02.375001: step: 2108/531, loss: 0.0031348522752523422 2023-01-22 16:10:03.433436: step: 2112/531, loss: 0.0017807622207328677 2023-01-22 16:10:04.495027: step: 2116/531, loss: 0.030992040410637856 2023-01-22 16:10:05.567305: step: 2120/531, loss: 0.008543238043785095 2023-01-22 16:10:06.615598: step: 2124/531, loss: 0.0154934236779809 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35395084566596197, 'r': 0.31708096590909096, 'f1': 0.33450299700299707}, 'combined': 0.24647589252852414, 'stategy': 1, 'epoch': 7} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33639691667740657, 'r': 0.27658627354974497, 'f1': 0.30357364149887806}, 'combined': 0.19008816804135356, 'stategy': 1, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33272089097496704, 'r': 0.348504614455753, 'f1': 0.34042990142387736}, 'combined': 0.2508430852596991, 'stategy': 1, 'epoch': 7} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3635255856208373, 'r': 0.301572172020893, 'f1': 0.3296634191928737}, 'combined': 0.20429845696459775, 'stategy': 1, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31453686493184635, 'r': 0.3211021505376344, 'f1': 0.31778560250391236}, 'combined': 0.23415781237130384, 'stategy': 1, 'epoch': 7} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36542334481673716, 'r': 0.2875191809714346, 'f1': 0.32182379508127207}, 'combined': 0.21348707198460623, 'stategy': 1, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'stategy': 1, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'stategy': 1, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35976479915433407, 'r': 0.3222892992424243, 'f1': 0.33999750249750255}, 'combined': 0.25052447552447554, 'stategy': 1, 'epoch': 6} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33494215352441215, 'r': 0.2738786923223081, 'f1': 0.3013481540150123}, 'combined': 0.18869463849538157, 'stategy': 1, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33436839323467227, 'r': 0.3546715973779541, 'f1': 0.34422086891009546}, 'combined': 0.2536364297232282, 'stategy': 1, 'epoch': 5} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36411638395148305, 'r': 0.30337559853126267, 'f1': 0.3309823303208759}, 'combined': 0.2051158103396977, 'stategy': 1, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37962962962962965, 'r': 0.44565217391304346, 'f1': 0.41000000000000003}, 'combined': 0.20500000000000002, 'stategy': 1, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.316460244648318, 'r': 0.32726913345983555, 'f1': 0.32177394278606963}, 'combined': 0.23709658942131445, 'stategy': 1, 'epoch': 5} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3637270714036404, 'r': 0.28684167419636686, 'f1': 0.32074114478321014}, 'combined': 0.21276887822252555, 'stategy': 1, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.27586206896551724, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 5} ****************************** Epoch: 8 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 16:12:34.883572: step: 4/531, loss: 0.009521507658064365 2023-01-22 16:12:35.957644: step: 8/531, loss: 0.0045917765237390995 2023-01-22 16:12:37.030745: step: 12/531, loss: 0.0001824085775297135 2023-01-22 16:12:38.085042: step: 16/531, loss: 0.001161676598712802 2023-01-22 16:12:39.140619: step: 20/531, loss: 0.00173962174449116 2023-01-22 16:12:40.196027: step: 24/531, loss: 0.004030345939099789 2023-01-22 16:12:41.240263: step: 28/531, loss: 0.005910784471780062 2023-01-22 16:12:42.297900: step: 32/531, loss: 0.0022268928587436676 2023-01-22 16:12:43.368434: step: 36/531, loss: 0.001641519833356142 2023-01-22 16:12:44.435276: step: 40/531, loss: 0.005174714606255293 2023-01-22 16:12:45.476181: step: 44/531, loss: 0.00026218785205855966 2023-01-22 16:12:46.537823: step: 48/531, loss: 0.005118938162922859 2023-01-22 16:12:47.601008: step: 52/531, loss: 0.0077238744124770164 2023-01-22 16:12:48.650658: step: 56/531, loss: 0.0056335655972361565 2023-01-22 16:12:49.702918: step: 60/531, loss: 0.01059556845575571 2023-01-22 16:12:50.771655: step: 64/531, loss: 0.01633615233004093 2023-01-22 16:12:51.819390: step: 68/531, loss: 0.00037927928497083485 2023-01-22 16:12:52.866075: step: 72/531, loss: 0.011826412752270699 2023-01-22 16:12:53.919445: step: 76/531, loss: 0.0026891108136624098 2023-01-22 16:12:54.969354: step: 80/531, loss: 0.02325444109737873 2023-01-22 16:12:56.013115: step: 84/531, loss: 0.0018955161795020103 2023-01-22 16:12:57.071142: step: 88/531, loss: 0.004414505325257778 2023-01-22 16:12:58.119990: step: 92/531, loss: 0.028098415583372116 2023-01-22 16:12:59.180115: step: 96/531, loss: 0.016535473987460136 2023-01-22 16:13:00.246716: step: 100/531, loss: 0.016983775421977043 2023-01-22 16:13:01.294090: step: 104/531, loss: 0.005316626280546188 2023-01-22 16:13:02.357651: step: 108/531, loss: 0.008518573828041553 2023-01-22 16:13:03.434705: step: 112/531, loss: 0.0006991970585659146 2023-01-22 16:13:04.498473: step: 116/531, loss: 0.012890145182609558 2023-01-22 16:13:05.559554: step: 120/531, loss: 0.008623485453426838 2023-01-22 16:13:06.625467: step: 124/531, loss: 0.002367323264479637 2023-01-22 16:13:07.671446: step: 128/531, loss: 0.00504923053085804 2023-01-22 16:13:08.753734: step: 132/531, loss: 0.003036619396880269 2023-01-22 16:13:09.804354: step: 136/531, loss: 0.00011985273158643395 2023-01-22 16:13:10.857857: step: 140/531, loss: 0.02278294786810875 2023-01-22 16:13:11.931531: step: 144/531, loss: 0.0015168596291914582 2023-01-22 16:13:12.993340: step: 148/531, loss: 0.004891501739621162 2023-01-22 16:13:14.034226: step: 152/531, loss: 0.0023071442265063524 2023-01-22 16:13:15.071730: step: 156/531, loss: 0.0030005318112671375 2023-01-22 16:13:16.125033: step: 160/531, loss: 0.0012034763349220157 2023-01-22 16:13:17.190695: step: 164/531, loss: 0.0049870070070028305 2023-01-22 16:13:18.253787: step: 168/531, loss: 0.018340349197387695 2023-01-22 16:13:19.331556: step: 172/531, loss: 0.009047003462910652 2023-01-22 16:13:20.375399: step: 176/531, loss: 0.006398437079042196 2023-01-22 16:13:21.431162: step: 180/531, loss: 0.017008837312459946 2023-01-22 16:13:22.478534: step: 184/531, loss: 0.002017815364524722 2023-01-22 16:13:23.530654: step: 188/531, loss: 0.004317536484450102 2023-01-22 16:13:24.604722: step: 192/531, loss: 0.0027937027625739574 2023-01-22 16:13:25.660987: step: 196/531, loss: 0.011573059484362602 2023-01-22 16:13:26.735498: step: 200/531, loss: 0.003688855329528451 2023-01-22 16:13:27.810100: step: 204/531, loss: 0.008166870102286339 2023-01-22 16:13:28.874342: step: 208/531, loss: 0.0034504185896366835 2023-01-22 16:13:29.944928: step: 212/531, loss: 0.009649948216974735 2023-01-22 16:13:31.007701: step: 216/531, loss: 0.003355876076966524 2023-01-22 16:13:32.111313: step: 220/531, loss: 0.0016142718959599733 2023-01-22 16:13:33.203673: step: 224/531, loss: 0.006700103171169758 2023-01-22 16:13:34.252979: step: 228/531, loss: 0.0058255912736058235 2023-01-22 16:13:35.297006: step: 232/531, loss: 0.004777774214744568 2023-01-22 16:13:36.359386: step: 236/531, loss: 0.003245204919949174 2023-01-22 16:13:37.430427: step: 240/531, loss: 0.0020778810139745474 2023-01-22 16:13:38.479445: step: 244/531, loss: 0.005692709237337112 2023-01-22 16:13:39.554436: step: 248/531, loss: 0.005422898568212986 2023-01-22 16:13:40.619575: step: 252/531, loss: 0.004670081194490194 2023-01-22 16:13:41.669580: step: 256/531, loss: 0.006144367158412933 2023-01-22 16:13:42.722106: step: 260/531, loss: 0.005465567111968994 2023-01-22 16:13:43.787336: step: 264/531, loss: 0.010556402616202831 2023-01-22 16:13:44.852947: step: 268/531, loss: 0.007777459919452667 2023-01-22 16:13:45.897946: step: 272/531, loss: 0.004604372661560774 2023-01-22 16:13:46.955166: step: 276/531, loss: 0.000592091353610158 2023-01-22 16:13:48.057389: step: 280/531, loss: 0.011012891307473183 2023-01-22 16:13:49.107792: step: 284/531, loss: 0.00016225418949034065 2023-01-22 16:13:50.154458: step: 288/531, loss: 0.006222032476216555 2023-01-22 16:13:51.212318: step: 292/531, loss: 0.0032932369504123926 2023-01-22 16:13:52.261790: step: 296/531, loss: 0.004250827711075544 2023-01-22 16:13:53.339758: step: 300/531, loss: 0.0007221480482257903 2023-01-22 16:13:54.397581: step: 304/531, loss: 0.003463632892817259 2023-01-22 16:13:55.457458: step: 308/531, loss: 0.00922374613583088 2023-01-22 16:13:56.505403: step: 312/531, loss: 0.0022472532000392675 2023-01-22 16:13:57.604110: step: 316/531, loss: 0.0005551144131459296 2023-01-22 16:13:58.648419: step: 320/531, loss: 0.0010310213547199965 2023-01-22 16:13:59.719960: step: 324/531, loss: 0.005658258683979511 2023-01-22 16:14:00.769167: step: 328/531, loss: 0.0003289622545707971 2023-01-22 16:14:01.816044: step: 332/531, loss: 0.00023402618535328656 2023-01-22 16:14:02.853871: step: 336/531, loss: 0.0010350487427785993 2023-01-22 16:14:03.923698: step: 340/531, loss: 0.010129990056157112 2023-01-22 16:14:04.975356: step: 344/531, loss: 0.0036504019517451525 2023-01-22 16:14:06.035444: step: 348/531, loss: 0.0005748554249294102 2023-01-22 16:14:07.093832: step: 352/531, loss: 0.009512398391962051 2023-01-22 16:14:08.152165: step: 356/531, loss: 0.0035774048883467913 2023-01-22 16:14:09.220397: step: 360/531, loss: 0.003359885886311531 2023-01-22 16:14:10.273338: step: 364/531, loss: 0.023506442084908485 2023-01-22 16:14:11.329940: step: 368/531, loss: 0.031302519142627716 2023-01-22 16:14:12.426360: step: 372/531, loss: 0.024131527170538902 2023-01-22 16:14:13.483222: step: 376/531, loss: 0.0021976816933602095 2023-01-22 16:14:14.551656: step: 380/531, loss: 0.01051371544599533 2023-01-22 16:14:15.615416: step: 384/531, loss: 0.004568392410874367 2023-01-22 16:14:16.667063: step: 388/531, loss: 0.0056373607367277145 2023-01-22 16:14:17.725069: step: 392/531, loss: 0.0024717007763683796 2023-01-22 16:14:18.792758: step: 396/531, loss: 0.004057064652442932 2023-01-22 16:14:19.855316: step: 400/531, loss: 0.0025357746053487062 2023-01-22 16:14:20.917104: step: 404/531, loss: 0.0021160433534532785 2023-01-22 16:14:21.985117: step: 408/531, loss: 0.0037641029339283705 2023-01-22 16:14:23.061256: step: 412/531, loss: 0.000673118163831532 2023-01-22 16:14:24.117699: step: 416/531, loss: 0.0012082151370123029 2023-01-22 16:14:25.181311: step: 420/531, loss: 0.01828766241669655 2023-01-22 16:14:26.250271: step: 424/531, loss: 0.018649084493517876 2023-01-22 16:14:27.315915: step: 428/531, loss: 0.005770507734268904 2023-01-22 16:14:28.383240: step: 432/531, loss: 0.003924884833395481 2023-01-22 16:14:29.440101: step: 436/531, loss: 0.012615293264389038 2023-01-22 16:14:30.508467: step: 440/531, loss: 0.001242022030055523 2023-01-22 16:14:31.556620: step: 444/531, loss: 0.004470647778362036 2023-01-22 16:14:32.625199: step: 448/531, loss: 0.012171771377325058 2023-01-22 16:14:33.693239: step: 452/531, loss: 0.013862748630344868 2023-01-22 16:14:34.736532: step: 456/531, loss: 0.004594526719301939 2023-01-22 16:14:35.801384: step: 460/531, loss: 0.009186458773911 2023-01-22 16:14:36.869375: step: 464/531, loss: 0.003315070876851678 2023-01-22 16:14:37.935960: step: 468/531, loss: 0.00016763914027251303 2023-01-22 16:14:38.992512: step: 472/531, loss: 0.00032415706664323807 2023-01-22 16:14:40.044033: step: 476/531, loss: 0.017326390370726585 2023-01-22 16:14:41.111240: step: 480/531, loss: 0.000792271108366549 2023-01-22 16:14:42.182464: step: 484/531, loss: 0.0032066642306745052 2023-01-22 16:14:43.249306: step: 488/531, loss: 0.021497756242752075 2023-01-22 16:14:44.298391: step: 492/531, loss: 0.01887635327875614 2023-01-22 16:14:45.374936: step: 496/531, loss: 0.0015345726860687137 2023-01-22 16:14:46.433537: step: 500/531, loss: 0.005101568531244993 2023-01-22 16:14:47.482582: step: 504/531, loss: 0.0013782402966171503 2023-01-22 16:14:48.538046: step: 508/531, loss: 0.0014437205391004682 2023-01-22 16:14:49.600854: step: 512/531, loss: 0.005455211736261845 2023-01-22 16:14:50.648104: step: 516/531, loss: 7.479720807168633e-05 2023-01-22 16:14:51.715112: step: 520/531, loss: 0.025287700816988945 2023-01-22 16:14:52.768861: step: 524/531, loss: 0.006370967254042625 2023-01-22 16:14:53.831742: step: 528/531, loss: 0.00978098250925541 2023-01-22 16:14:54.906932: step: 532/531, loss: 0.005064946133643389 2023-01-22 16:14:55.985143: step: 536/531, loss: 0.023861998692154884 2023-01-22 16:14:57.055121: step: 540/531, loss: 0.002950523514300585 2023-01-22 16:14:58.116832: step: 544/531, loss: 0.0010207381565123796 2023-01-22 16:14:59.179157: step: 548/531, loss: 0.002542046597227454 2023-01-22 16:15:00.232090: step: 552/531, loss: 0.002155070658773184 2023-01-22 16:15:01.310183: step: 556/531, loss: 0.005741284228861332 2023-01-22 16:15:02.357292: step: 560/531, loss: 0.0009831854840740561 2023-01-22 16:15:03.423102: step: 564/531, loss: 0.0025570113211870193 2023-01-22 16:15:04.481384: step: 568/531, loss: 0.016676031053066254 2023-01-22 16:15:05.539381: step: 572/531, loss: 3.8245536416070536e-05 2023-01-22 16:15:06.595295: step: 576/531, loss: 0.005851565394550562 2023-01-22 16:15:07.666635: step: 580/531, loss: 0.0 2023-01-22 16:15:08.721523: step: 584/531, loss: 0.0017882848624140024 2023-01-22 16:15:09.789184: step: 588/531, loss: 0.002803139155730605 2023-01-22 16:15:10.844898: step: 592/531, loss: 0.004985233768820763 2023-01-22 16:15:11.926424: step: 596/531, loss: 0.00047307752538472414 2023-01-22 16:15:12.970913: step: 600/531, loss: 0.0016902529168874025 2023-01-22 16:15:14.031813: step: 604/531, loss: 0.004461311735212803 2023-01-22 16:15:15.096103: step: 608/531, loss: 0.0012405706802383065 2023-01-22 16:15:16.148004: step: 612/531, loss: 0.0016074622981250286 2023-01-22 16:15:17.195868: step: 616/531, loss: 0.0021988446824252605 2023-01-22 16:15:18.250288: step: 620/531, loss: 0.005559419747442007 2023-01-22 16:15:19.313800: step: 624/531, loss: 0.040141861885786057 2023-01-22 16:15:20.362239: step: 628/531, loss: 0.004170118365436792 2023-01-22 16:15:21.410306: step: 632/531, loss: 0.015231350436806679 2023-01-22 16:15:22.472057: step: 636/531, loss: 0.02916671894490719 2023-01-22 16:15:23.530171: step: 640/531, loss: 0.0022286614403128624 2023-01-22 16:15:24.578616: step: 644/531, loss: 0.0061494517140090466 2023-01-22 16:15:25.644135: step: 648/531, loss: 0.025374583899974823 2023-01-22 16:15:26.726370: step: 652/531, loss: 0.020446576178073883 2023-01-22 16:15:27.777447: step: 656/531, loss: 0.002724332269281149 2023-01-22 16:15:28.852987: step: 660/531, loss: 0.012123017571866512 2023-01-22 16:15:29.918450: step: 664/531, loss: 0.0036326858680695295 2023-01-22 16:15:30.977290: step: 668/531, loss: 0.0161232091486454 2023-01-22 16:15:32.026403: step: 672/531, loss: 0.0040760040283203125 2023-01-22 16:15:33.085354: step: 676/531, loss: 0.01164824515581131 2023-01-22 16:15:34.138272: step: 680/531, loss: 0.002962257247418165 2023-01-22 16:15:35.209162: step: 684/531, loss: 0.00103163777384907 2023-01-22 16:15:36.259054: step: 688/531, loss: 0.01721273548901081 2023-01-22 16:15:37.327250: step: 692/531, loss: 0.0025461604818701744 2023-01-22 16:15:38.401649: step: 696/531, loss: 0.0033750180155038834 2023-01-22 16:15:39.451371: step: 700/531, loss: 0.02103145234286785 2023-01-22 16:15:40.498402: step: 704/531, loss: 0.03886880353093147 2023-01-22 16:15:41.556943: step: 708/531, loss: 0.01326089445501566 2023-01-22 16:15:42.605138: step: 712/531, loss: 0.009282470680773258 2023-01-22 16:15:43.664324: step: 716/531, loss: 0.0036380321253091097 2023-01-22 16:15:44.714633: step: 720/531, loss: 0.002317324047908187 2023-01-22 16:15:45.782157: step: 724/531, loss: 0.005068523343652487 2023-01-22 16:15:46.861001: step: 728/531, loss: 0.0076700844801962376 2023-01-22 16:15:47.910385: step: 732/531, loss: 0.0019287178292870522 2023-01-22 16:15:48.980896: step: 736/531, loss: 0.011293146759271622 2023-01-22 16:15:50.049065: step: 740/531, loss: 0.001963953720405698 2023-01-22 16:15:51.112338: step: 744/531, loss: 0.006708445493131876 2023-01-22 16:15:52.167117: step: 748/531, loss: 0.014490853995084763 2023-01-22 16:15:53.222109: step: 752/531, loss: 0.002180551178753376 2023-01-22 16:15:54.278584: step: 756/531, loss: 0.0038238507695496082 2023-01-22 16:15:55.325907: step: 760/531, loss: 0.006020710337907076 2023-01-22 16:15:56.370331: step: 764/531, loss: 0.006316723767668009 2023-01-22 16:15:57.431739: step: 768/531, loss: 0.0029893338214606047 2023-01-22 16:15:58.476265: step: 772/531, loss: 0.0025441646575927734 2023-01-22 16:15:59.531539: step: 776/531, loss: 0.0030968119390308857 2023-01-22 16:16:00.591443: step: 780/531, loss: 0.00013579557707998902 2023-01-22 16:16:01.645983: step: 784/531, loss: 0.00590282678604126 2023-01-22 16:16:02.682671: step: 788/531, loss: 0.001438887557014823 2023-01-22 16:16:03.749201: step: 792/531, loss: 0.011630845256149769 2023-01-22 16:16:04.810619: step: 796/531, loss: 0.0077319061383605 2023-01-22 16:16:05.873904: step: 800/531, loss: 0.0018211868591606617 2023-01-22 16:16:06.926463: step: 804/531, loss: 0.008590525016188622 2023-01-22 16:16:07.997345: step: 808/531, loss: 0.006511078681796789 2023-01-22 16:16:09.064370: step: 812/531, loss: 0.004509621299803257 2023-01-22 16:16:10.122899: step: 816/531, loss: 0.01483891811221838 2023-01-22 16:16:11.186129: step: 820/531, loss: 0.010829690843820572 2023-01-22 16:16:12.242223: step: 824/531, loss: 0.016312792897224426 2023-01-22 16:16:13.310683: step: 828/531, loss: 0.03890813887119293 2023-01-22 16:16:14.365164: step: 832/531, loss: 0.009767335839569569 2023-01-22 16:16:15.417215: step: 836/531, loss: 0.00046701834071427584 2023-01-22 16:16:16.483049: step: 840/531, loss: 0.0004780926101375371 2023-01-22 16:16:17.549108: step: 844/531, loss: 0.0051103741861879826 2023-01-22 16:16:18.607325: step: 848/531, loss: 0.02015470713376999 2023-01-22 16:16:19.670248: step: 852/531, loss: 0.007281284313648939 2023-01-22 16:16:20.749832: step: 856/531, loss: 0.0012055073166266084 2023-01-22 16:16:21.796374: step: 860/531, loss: 0.008240344934165478 2023-01-22 16:16:22.859303: step: 864/531, loss: 0.0038327702786773443 2023-01-22 16:16:23.931251: step: 868/531, loss: 0.006365139968693256 2023-01-22 16:16:25.001960: step: 872/531, loss: 0.0024804798886179924 2023-01-22 16:16:26.057816: step: 876/531, loss: 0.025120964273810387 2023-01-22 16:16:27.113218: step: 880/531, loss: 0.0003910895611625165 2023-01-22 16:16:28.179008: step: 884/531, loss: 0.0007088647107593715 2023-01-22 16:16:29.223257: step: 888/531, loss: 0.002923545427620411 2023-01-22 16:16:30.272979: step: 892/531, loss: 0.010894962586462498 2023-01-22 16:16:31.348007: step: 896/531, loss: 0.004530869424343109 2023-01-22 16:16:32.391333: step: 900/531, loss: 0.002915431745350361 2023-01-22 16:16:33.481113: step: 904/531, loss: 0.008574807085096836 2023-01-22 16:16:34.563893: step: 908/531, loss: 0.018006768077611923 2023-01-22 16:16:35.635490: step: 912/531, loss: 0.00028963852673768997 2023-01-22 16:16:36.687921: step: 916/531, loss: 0.0025813868269324303 2023-01-22 16:16:37.750383: step: 920/531, loss: 2.9397575417533517e-05 2023-01-22 16:16:38.802058: step: 924/531, loss: 0.0037239028606563807 2023-01-22 16:16:39.871358: step: 928/531, loss: 0.004326937720179558 2023-01-22 16:16:40.949149: step: 932/531, loss: 0.01605810597538948 2023-01-22 16:16:41.997406: step: 936/531, loss: 0.0006219294155016541 2023-01-22 16:16:43.056954: step: 940/531, loss: 0.000443142227595672 2023-01-22 16:16:44.120194: step: 944/531, loss: 0.007307771127671003 2023-01-22 16:16:45.174546: step: 948/531, loss: 0.0455024316906929 2023-01-22 16:16:46.237670: step: 952/531, loss: 0.008575189858675003 2023-01-22 16:16:47.292992: step: 956/531, loss: 0.002232529688626528 2023-01-22 16:16:48.352120: step: 960/531, loss: 0.002812477992847562 2023-01-22 16:16:49.407549: step: 964/531, loss: 0.004008966963738203 2023-01-22 16:16:50.480487: step: 968/531, loss: 0.0011554103111848235 2023-01-22 16:16:51.557956: step: 972/531, loss: 0.005167889408767223 2023-01-22 16:16:52.613094: step: 976/531, loss: 0.0034117435570806265 2023-01-22 16:16:53.682928: step: 980/531, loss: 0.00020782227511517704 2023-01-22 16:16:54.742838: step: 984/531, loss: 0.0015388152096420527 2023-01-22 16:16:55.814381: step: 988/531, loss: 0.00014759301848243922 2023-01-22 16:16:56.885753: step: 992/531, loss: 0.006717701908200979 2023-01-22 16:16:57.942832: step: 996/531, loss: 0.0024136644788086414 2023-01-22 16:16:58.993186: step: 1000/531, loss: 0.003548719920217991 2023-01-22 16:17:00.048152: step: 1004/531, loss: 0.0 2023-01-22 16:17:01.100500: step: 1008/531, loss: 0.0002607418573461473 2023-01-22 16:17:02.176938: step: 1012/531, loss: 0.004946514964103699 2023-01-22 16:17:03.252794: step: 1016/531, loss: 0.02123146876692772 2023-01-22 16:17:04.310241: step: 1020/531, loss: 0.008447559550404549 2023-01-22 16:17:05.370714: step: 1024/531, loss: 0.0013551246374845505 2023-01-22 16:17:06.423118: step: 1028/531, loss: 0.00220544938929379 2023-01-22 16:17:07.473344: step: 1032/531, loss: 0.012969838455319405 2023-01-22 16:17:08.540721: step: 1036/531, loss: 0.00014873032341711223 2023-01-22 16:17:09.581526: step: 1040/531, loss: 0.0033487624023109674 2023-01-22 16:17:10.644812: step: 1044/531, loss: 0.002069843700155616 2023-01-22 16:17:11.694084: step: 1048/531, loss: 0.01372519787400961 2023-01-22 16:17:12.749663: step: 1052/531, loss: 0.00833092164248228 2023-01-22 16:17:13.814547: step: 1056/531, loss: 0.0053888894617557526 2023-01-22 16:17:14.857365: step: 1060/531, loss: 0.0023009374272078276 2023-01-22 16:17:15.892972: step: 1064/531, loss: 0.002385929226875305 2023-01-22 16:17:16.937857: step: 1068/531, loss: 0.008888494223356247 2023-01-22 16:17:18.009221: step: 1072/531, loss: 0.007335087284445763 2023-01-22 16:17:19.061786: step: 1076/531, loss: 0.0067205182276666164 2023-01-22 16:17:20.113741: step: 1080/531, loss: 0.0003820423735305667 2023-01-22 16:17:21.179689: step: 1084/531, loss: 0.0018620449118316174 2023-01-22 16:17:22.222438: step: 1088/531, loss: 0.007995960302650928 2023-01-22 16:17:23.294539: step: 1092/531, loss: 0.004356453660875559 2023-01-22 16:17:24.337529: step: 1096/531, loss: 0.0026766823139041662 2023-01-22 16:17:25.407546: step: 1100/531, loss: 0.006368543487042189 2023-01-22 16:17:26.466366: step: 1104/531, loss: 0.010825267061591148 2023-01-22 16:17:27.527562: step: 1108/531, loss: 0.03677953779697418 2023-01-22 16:17:28.606791: step: 1112/531, loss: 0.001485364744439721 2023-01-22 16:17:29.669719: step: 1116/531, loss: 0.0058395215310156345 2023-01-22 16:17:30.727788: step: 1120/531, loss: 0.005955967120826244 2023-01-22 16:17:31.808621: step: 1124/531, loss: 0.001750392373651266 2023-01-22 16:17:32.865009: step: 1128/531, loss: 0.006681247614324093 2023-01-22 16:17:33.918567: step: 1132/531, loss: 0.017130382359027863 2023-01-22 16:17:34.978474: step: 1136/531, loss: 0.013268603011965752 2023-01-22 16:17:36.044108: step: 1140/531, loss: 0.004405970685184002 2023-01-22 16:17:37.098118: step: 1144/531, loss: 0.0038946911226958036 2023-01-22 16:17:38.161092: step: 1148/531, loss: 0.0018913538660854101 2023-01-22 16:17:39.218599: step: 1152/531, loss: 0.0033937504049390554 2023-01-22 16:17:40.276525: step: 1156/531, loss: 0.012840493582189083 2023-01-22 16:17:41.327542: step: 1160/531, loss: 0.0030658477917313576 2023-01-22 16:17:42.391957: step: 1164/531, loss: 0.0026989688631147146 2023-01-22 16:17:43.439191: step: 1168/531, loss: 0.00987061858177185 2023-01-22 16:17:44.502896: step: 1172/531, loss: 0.025835705921053886 2023-01-22 16:17:45.565832: step: 1176/531, loss: 0.009692615829408169 2023-01-22 16:17:46.633151: step: 1180/531, loss: 0.007611141540110111 2023-01-22 16:17:47.693654: step: 1184/531, loss: 0.0038731752429157495 2023-01-22 16:17:48.749657: step: 1188/531, loss: 0.01868722401559353 2023-01-22 16:17:49.806913: step: 1192/531, loss: 0.001892442349344492 2023-01-22 16:17:50.863685: step: 1196/531, loss: 0.0034807517658919096 2023-01-22 16:17:51.938932: step: 1200/531, loss: 0.003272327361628413 2023-01-22 16:17:52.991985: step: 1204/531, loss: 0.003184059401974082 2023-01-22 16:17:54.054551: step: 1208/531, loss: 0.003688983153551817 2023-01-22 16:17:55.099331: step: 1212/531, loss: 0.0048958249390125275 2023-01-22 16:17:56.165946: step: 1216/531, loss: 0.0030868016183376312 2023-01-22 16:17:57.267128: step: 1220/531, loss: 0.002337475074455142 2023-01-22 16:17:58.335453: step: 1224/531, loss: 0.0032325468491762877 2023-01-22 16:17:59.400471: step: 1228/531, loss: 0.014581906609237194 2023-01-22 16:18:00.457444: step: 1232/531, loss: 0.0040698787197470665 2023-01-22 16:18:01.503256: step: 1236/531, loss: 0.00026136069209314883 2023-01-22 16:18:02.564766: step: 1240/531, loss: 0.005507575813680887 2023-01-22 16:18:03.637694: step: 1244/531, loss: 0.0006998664466664195 2023-01-22 16:18:04.697274: step: 1248/531, loss: 0.0016541146906092763 2023-01-22 16:18:05.748690: step: 1252/531, loss: 0.005082534160465002 2023-01-22 16:18:06.834174: step: 1256/531, loss: 0.019193334504961967 2023-01-22 16:18:07.886105: step: 1260/531, loss: 0.0267738439142704 2023-01-22 16:18:08.949773: step: 1264/531, loss: 0.011469540186226368 2023-01-22 16:18:10.010831: step: 1268/531, loss: 0.0029621445573866367 2023-01-22 16:18:11.083155: step: 1272/531, loss: 0.011390729807317257 2023-01-22 16:18:12.134979: step: 1276/531, loss: 0.0069556450471282005 2023-01-22 16:18:13.175671: step: 1280/531, loss: 0.004577385261654854 2023-01-22 16:18:14.236872: step: 1284/531, loss: 0.007107224781066179 2023-01-22 16:18:15.287430: step: 1288/531, loss: 0.01946289837360382 2023-01-22 16:18:16.363188: step: 1292/531, loss: 0.03458670526742935 2023-01-22 16:18:17.419250: step: 1296/531, loss: 0.007475607097148895 2023-01-22 16:18:18.467862: step: 1300/531, loss: 0.005438089836388826 2023-01-22 16:18:19.533641: step: 1304/531, loss: 0.006899349391460419 2023-01-22 16:18:20.595319: step: 1308/531, loss: 0.00033680512569844723 2023-01-22 16:18:21.661516: step: 1312/531, loss: 0.0024557586293667555 2023-01-22 16:18:22.739629: step: 1316/531, loss: 0.011073829606175423 2023-01-22 16:18:23.811624: step: 1320/531, loss: 0.0035494016483426094 2023-01-22 16:18:24.907309: step: 1324/531, loss: 0.021071037277579308 2023-01-22 16:18:25.974519: step: 1328/531, loss: 0.0029742741025984287 2023-01-22 16:18:27.033195: step: 1332/531, loss: 0.009426775388419628 2023-01-22 16:18:28.104129: step: 1336/531, loss: 0.003817416960373521 2023-01-22 16:18:29.173981: step: 1340/531, loss: 0.015146479941904545 2023-01-22 16:18:30.217120: step: 1344/531, loss: 0.011023357510566711 2023-01-22 16:18:31.272302: step: 1348/531, loss: 0.007813742384314537 2023-01-22 16:18:32.331450: step: 1352/531, loss: 0.021830998361110687 2023-01-22 16:18:33.388675: step: 1356/531, loss: 0.003205236978828907 2023-01-22 16:18:34.441679: step: 1360/531, loss: 0.002000129083171487 2023-01-22 16:18:35.503015: step: 1364/531, loss: 0.014800283126533031 2023-01-22 16:18:36.565685: step: 1368/531, loss: 0.0016528251580893993 2023-01-22 16:18:37.622317: step: 1372/531, loss: 0.001199369435198605 2023-01-22 16:18:38.720920: step: 1376/531, loss: 0.005130136851221323 2023-01-22 16:18:39.785414: step: 1380/531, loss: 0.005333214066922665 2023-01-22 16:18:40.837241: step: 1384/531, loss: 0.005962173920124769 2023-01-22 16:18:41.892782: step: 1388/531, loss: 0.0022245629224926233 2023-01-22 16:18:42.948982: step: 1392/531, loss: 0.0001227810571435839 2023-01-22 16:18:44.027304: step: 1396/531, loss: 0.00589818274602294 2023-01-22 16:18:45.094313: step: 1400/531, loss: 0.03538191691040993 2023-01-22 16:18:46.129428: step: 1404/531, loss: 0.00947636365890503 2023-01-22 16:18:47.207766: step: 1408/531, loss: 0.0036297296173870564 2023-01-22 16:18:48.290238: step: 1412/531, loss: 0.08291570097208023 2023-01-22 16:18:49.342812: step: 1416/531, loss: 0.0008545141899958253 2023-01-22 16:18:50.395221: step: 1420/531, loss: 7.632618093111887e-08 2023-01-22 16:18:51.451696: step: 1424/531, loss: 0.024561110883951187 2023-01-22 16:18:52.519091: step: 1428/531, loss: 0.013079265132546425 2023-01-22 16:18:53.566907: step: 1432/531, loss: 0.006209118757396936 2023-01-22 16:18:54.621688: step: 1436/531, loss: 0.01702086441218853 2023-01-22 16:18:55.684191: step: 1440/531, loss: 0.008531196974217892 2023-01-22 16:18:56.751821: step: 1444/531, loss: 0.008175347000360489 2023-01-22 16:18:57.815633: step: 1448/531, loss: 0.004992689471691847 2023-01-22 16:18:58.880052: step: 1452/531, loss: 0.008593486621975899 2023-01-22 16:18:59.936049: step: 1456/531, loss: 0.010351836681365967 2023-01-22 16:19:00.997697: step: 1460/531, loss: 0.006545158103108406 2023-01-22 16:19:02.046189: step: 1464/531, loss: 0.008038087747991085 2023-01-22 16:19:03.108315: step: 1468/531, loss: 0.008359107188880444 2023-01-22 16:19:04.158781: step: 1472/531, loss: 0.007803000044077635 2023-01-22 16:19:05.229348: step: 1476/531, loss: 0.0004924036911688745 2023-01-22 16:19:06.273171: step: 1480/531, loss: 0.007833710871636868 2023-01-22 16:19:07.349857: step: 1484/531, loss: 0.007910453714430332 2023-01-22 16:19:08.421902: step: 1488/531, loss: 0.015578282997012138 2023-01-22 16:19:09.501143: step: 1492/531, loss: 0.008035898208618164 2023-01-22 16:19:10.562675: step: 1496/531, loss: 0.002181046176701784 2023-01-22 16:19:11.625347: step: 1500/531, loss: 0.015502078458666801 2023-01-22 16:19:12.674607: step: 1504/531, loss: 0.007511697243899107 2023-01-22 16:19:13.733910: step: 1508/531, loss: 0.008183242753148079 2023-01-22 16:19:14.811755: step: 1512/531, loss: 0.026776069775223732 2023-01-22 16:19:15.871205: step: 1516/531, loss: 0.0009291375172324479 2023-01-22 16:19:16.938339: step: 1520/531, loss: 0.0 2023-01-22 16:19:17.995409: step: 1524/531, loss: 0.0004272660880815238 2023-01-22 16:19:19.051491: step: 1528/531, loss: 0.007856117561459541 2023-01-22 16:19:20.103729: step: 1532/531, loss: 0.002330540679395199 2023-01-22 16:19:21.159166: step: 1536/531, loss: 0.0075758653692901134 2023-01-22 16:19:22.195232: step: 1540/531, loss: 0.00020464049885049462 2023-01-22 16:19:23.254758: step: 1544/531, loss: 0.00659991754218936 2023-01-22 16:19:24.301493: step: 1548/531, loss: 2.73139539785916e-05 2023-01-22 16:19:25.370017: step: 1552/531, loss: 0.022007448598742485 2023-01-22 16:19:26.433948: step: 1556/531, loss: 0.0026621902361512184 2023-01-22 16:19:27.490035: step: 1560/531, loss: 0.0071321697905659676 2023-01-22 16:19:28.548610: step: 1564/531, loss: 0.021041875705122948 2023-01-22 16:19:29.599594: step: 1568/531, loss: 0.005351006053388119 2023-01-22 16:19:30.665701: step: 1572/531, loss: 0.005273071583360434 2023-01-22 16:19:31.727204: step: 1576/531, loss: 0.009908037260174751 2023-01-22 16:19:32.779633: step: 1580/531, loss: 0.01015748456120491 2023-01-22 16:19:33.839301: step: 1584/531, loss: 0.0028091552667319775 2023-01-22 16:19:34.886115: step: 1588/531, loss: 0.00613460224121809 2023-01-22 16:19:35.945809: step: 1592/531, loss: 0.0069671026431024075 2023-01-22 16:19:37.016166: step: 1596/531, loss: 0.007132793311029673 2023-01-22 16:19:38.062624: step: 1600/531, loss: 0.006399608217179775 2023-01-22 16:19:39.117897: step: 1604/531, loss: 0.004066131543368101 2023-01-22 16:19:40.180125: step: 1608/531, loss: 0.010241586714982986 2023-01-22 16:19:41.247453: step: 1612/531, loss: 0.007992560043931007 2023-01-22 16:19:42.287610: step: 1616/531, loss: 0.005091615952551365 2023-01-22 16:19:43.339207: step: 1620/531, loss: 0.0009947161888703704 2023-01-22 16:19:44.411231: step: 1624/531, loss: 0.0016462111379951239 2023-01-22 16:19:45.467978: step: 1628/531, loss: 0.024003515020012856 2023-01-22 16:19:46.518271: step: 1632/531, loss: 0.027133911848068237 2023-01-22 16:19:47.567889: step: 1636/531, loss: 0.007679780479520559 2023-01-22 16:19:48.619810: step: 1640/531, loss: 0.013520422391593456 2023-01-22 16:19:49.662959: step: 1644/531, loss: 0.0014681483153253794 2023-01-22 16:19:50.722950: step: 1648/531, loss: 0.01458329614251852 2023-01-22 16:19:51.785874: step: 1652/531, loss: 0.007456892170011997 2023-01-22 16:19:52.852207: step: 1656/531, loss: 0.00390252354554832 2023-01-22 16:19:53.906103: step: 1660/531, loss: 0.002072854433208704 2023-01-22 16:19:54.964940: step: 1664/531, loss: 0.006439339369535446 2023-01-22 16:19:56.012814: step: 1668/531, loss: 0.0026333676651120186 2023-01-22 16:19:57.074209: step: 1672/531, loss: 0.006465183570981026 2023-01-22 16:19:58.164581: step: 1676/531, loss: 0.01602604240179062 2023-01-22 16:19:59.223962: step: 1680/531, loss: 0.003123921575024724 2023-01-22 16:20:00.279354: step: 1684/531, loss: 0.0013871487462893128 2023-01-22 16:20:01.353329: step: 1688/531, loss: 0.0002768167178146541 2023-01-22 16:20:02.398687: step: 1692/531, loss: 0.00234323856420815 2023-01-22 16:20:03.459135: step: 1696/531, loss: 0.00960514321923256 2023-01-22 16:20:04.516612: step: 1700/531, loss: 0.0038568656891584396 2023-01-22 16:20:05.570334: step: 1704/531, loss: 0.0007602032856084406 2023-01-22 16:20:06.637555: step: 1708/531, loss: 5.5687709391349927e-05 2023-01-22 16:20:07.702220: step: 1712/531, loss: 0.005803669802844524 2023-01-22 16:20:08.770862: step: 1716/531, loss: 0.005684552248567343 2023-01-22 16:20:09.835489: step: 1720/531, loss: 0.01094534620642662 2023-01-22 16:20:10.886792: step: 1724/531, loss: 0.012037607841193676 2023-01-22 16:20:11.943415: step: 1728/531, loss: 0.003143709385767579 2023-01-22 16:20:13.008162: step: 1732/531, loss: 0.014864440076053143 2023-01-22 16:20:14.081086: step: 1736/531, loss: 0.05536574870347977 2023-01-22 16:20:15.147143: step: 1740/531, loss: 0.021446529775857925 2023-01-22 16:20:16.201021: step: 1744/531, loss: 0.005113128572702408 2023-01-22 16:20:17.264021: step: 1748/531, loss: 0.00308169680647552 2023-01-22 16:20:18.318509: step: 1752/531, loss: 0.00468352809548378 2023-01-22 16:20:19.365382: step: 1756/531, loss: 0.007143042050302029 2023-01-22 16:20:20.414128: step: 1760/531, loss: 0.005902677774429321 2023-01-22 16:20:21.474047: step: 1764/531, loss: 0.0017894305055961013 2023-01-22 16:20:22.545502: step: 1768/531, loss: 0.008010032586753368 2023-01-22 16:20:23.602228: step: 1772/531, loss: 0.007526349276304245 2023-01-22 16:20:24.660299: step: 1776/531, loss: 0.016422927379608154 2023-01-22 16:20:25.707021: step: 1780/531, loss: 0.00418756902217865 2023-01-22 16:20:26.770173: step: 1784/531, loss: 2.3179160280051292e-07 2023-01-22 16:20:27.838097: step: 1788/531, loss: 0.005281936842948198 2023-01-22 16:20:28.903539: step: 1792/531, loss: 0.007767172995954752 2023-01-22 16:20:29.963865: step: 1796/531, loss: 0.0027408848982304335 2023-01-22 16:20:31.016195: step: 1800/531, loss: 0.005599344149231911 2023-01-22 16:20:32.078996: step: 1804/531, loss: 0.001434585195966065 2023-01-22 16:20:33.143517: step: 1808/531, loss: 0.0006198306800797582 2023-01-22 16:20:34.198798: step: 1812/531, loss: 0.0013011071132496 2023-01-22 16:20:35.250526: step: 1816/531, loss: 0.027643442153930664 2023-01-22 16:20:36.297028: step: 1820/531, loss: 0.00298634497448802 2023-01-22 16:20:37.357564: step: 1824/531, loss: 0.0025018230080604553 2023-01-22 16:20:38.436628: step: 1828/531, loss: 0.006105093751102686 2023-01-22 16:20:39.494917: step: 1832/531, loss: 0.0026465593837201595 2023-01-22 16:20:40.593897: step: 1836/531, loss: 0.005341792479157448 2023-01-22 16:20:41.669025: step: 1840/531, loss: 0.003560730954632163 2023-01-22 16:20:42.733439: step: 1844/531, loss: 0.02786068059504032 2023-01-22 16:20:43.793886: step: 1848/531, loss: 0.0012372505152598023 2023-01-22 16:20:44.858318: step: 1852/531, loss: 0.021328700706362724 2023-01-22 16:20:45.938528: step: 1856/531, loss: 0.01656363159418106 2023-01-22 16:20:46.999228: step: 1860/531, loss: 2.979422060889192e-05 2023-01-22 16:20:48.075288: step: 1864/531, loss: 0.011781489476561546 2023-01-22 16:20:49.152279: step: 1868/531, loss: 0.0005675645661540329 2023-01-22 16:20:50.222401: step: 1872/531, loss: 0.005878841038793325 2023-01-22 16:20:51.276795: step: 1876/531, loss: 0.004570318385958672 2023-01-22 16:20:52.326690: step: 1880/531, loss: 0.002639268757775426 2023-01-22 16:20:53.387273: step: 1884/531, loss: 0.0026401374489068985 2023-01-22 16:20:54.452981: step: 1888/531, loss: 0.05612551420927048 2023-01-22 16:20:55.512109: step: 1892/531, loss: 0.0007068126578815281 2023-01-22 16:20:56.582308: step: 1896/531, loss: 0.02459676004946232 2023-01-22 16:20:57.640644: step: 1900/531, loss: 0.00029960571555420756 2023-01-22 16:20:58.709022: step: 1904/531, loss: 0.0004264643357601017 2023-01-22 16:20:59.772460: step: 1908/531, loss: 0.004157466348260641 2023-01-22 16:21:00.833105: step: 1912/531, loss: 0.013789691962301731 2023-01-22 16:21:01.891859: step: 1916/531, loss: 0.012317962013185024 2023-01-22 16:21:02.955962: step: 1920/531, loss: 0.0021516093984246254 2023-01-22 16:21:04.008885: step: 1924/531, loss: 0.0036408354062587023 2023-01-22 16:21:05.063449: step: 1928/531, loss: 0.0025644563138484955 2023-01-22 16:21:06.125866: step: 1932/531, loss: 0.0035383193753659725 2023-01-22 16:21:07.190259: step: 1936/531, loss: 0.02960287220776081 2023-01-22 16:21:08.248889: step: 1940/531, loss: 0.0026895683258771896 2023-01-22 16:21:09.310931: step: 1944/531, loss: 0.0009147656382992864 2023-01-22 16:21:10.418904: step: 1948/531, loss: 0.006670397240668535 2023-01-22 16:21:11.487542: step: 1952/531, loss: 0.0013399462914094329 2023-01-22 16:21:12.565649: step: 1956/531, loss: 0.007294829934835434 2023-01-22 16:21:13.633325: step: 1960/531, loss: 0.004420367069542408 2023-01-22 16:21:14.701483: step: 1964/531, loss: 0.006349206902086735 2023-01-22 16:21:15.755861: step: 1968/531, loss: 0.003116267267614603 2023-01-22 16:21:16.823877: step: 1972/531, loss: 0.013571003451943398 2023-01-22 16:21:17.880521: step: 1976/531, loss: 0.03366992622613907 2023-01-22 16:21:18.948003: step: 1980/531, loss: 0.01436995156109333 2023-01-22 16:21:20.023453: step: 1984/531, loss: 0.007501256186515093 2023-01-22 16:21:21.086927: step: 1988/531, loss: 0.020963972434401512 2023-01-22 16:21:22.149887: step: 1992/531, loss: 0.0036808261647820473 2023-01-22 16:21:23.206532: step: 1996/531, loss: 0.001825462793931365 2023-01-22 16:21:24.273577: step: 2000/531, loss: 0.00017323480278719217 2023-01-22 16:21:25.337182: step: 2004/531, loss: 0.010004017502069473 2023-01-22 16:21:26.383853: step: 2008/531, loss: 0.011605517007410526 2023-01-22 16:21:27.448407: step: 2012/531, loss: 0.017539387568831444 2023-01-22 16:21:28.505843: step: 2016/531, loss: 0.002200875896960497 2023-01-22 16:21:29.561697: step: 2020/531, loss: 0.005723350681364536 2023-01-22 16:21:30.629718: step: 2024/531, loss: 0.009227910079061985 2023-01-22 16:21:31.686395: step: 2028/531, loss: 0.0007814702694304287 2023-01-22 16:21:32.746949: step: 2032/531, loss: 0.004175608512014151 2023-01-22 16:21:33.811773: step: 2036/531, loss: 0.0009639888303354383 2023-01-22 16:21:34.870161: step: 2040/531, loss: 0.02446260116994381 2023-01-22 16:21:35.942641: step: 2044/531, loss: 0.01093518827110529 2023-01-22 16:21:37.005019: step: 2048/531, loss: 0.0005230515380389988 2023-01-22 16:21:38.062443: step: 2052/531, loss: 0.006759479641914368 2023-01-22 16:21:39.132673: step: 2056/531, loss: 0.0037046505603939295 2023-01-22 16:21:40.196198: step: 2060/531, loss: 0.004195843357592821 2023-01-22 16:21:41.256436: step: 2064/531, loss: 0.0038369738031178713 2023-01-22 16:21:42.346817: step: 2068/531, loss: 0.000492674414999783 2023-01-22 16:21:43.392448: step: 2072/531, loss: 0.008961460553109646 2023-01-22 16:21:44.449763: step: 2076/531, loss: 0.02713868021965027 2023-01-22 16:21:45.507408: step: 2080/531, loss: 0.015142214484512806 2023-01-22 16:21:46.568211: step: 2084/531, loss: 0.020338667556643486 2023-01-22 16:21:47.639558: step: 2088/531, loss: 0.008478760719299316 2023-01-22 16:21:48.689300: step: 2092/531, loss: 0.012183763086795807 2023-01-22 16:21:49.730154: step: 2096/531, loss: 0.0 2023-01-22 16:21:50.803252: step: 2100/531, loss: 0.021184327080845833 2023-01-22 16:21:51.858632: step: 2104/531, loss: 0.012286979705095291 2023-01-22 16:21:52.904454: step: 2108/531, loss: 0.008115244098007679 2023-01-22 16:21:53.965401: step: 2112/531, loss: 0.0009436632390134037 2023-01-22 16:21:55.034842: step: 2116/531, loss: 0.004173213616013527 2023-01-22 16:21:56.104190: step: 2120/531, loss: 0.0041024102829396725 2023-01-22 16:21:57.162177: step: 2124/531, loss: 0.013826792128384113 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.355453821656051, 'r': 0.31708096590909096, 'f1': 0.3351726726726727}, 'combined': 0.24696933775881144, 'stategy': 1, 'epoch': 8} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3405369945959443, 'r': 0.2790682230082287, 'f1': 0.30675356259237835}, 'combined': 0.19207933358588178, 'stategy': 1, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33226217088922005, 'r': 0.3461326979472141, 'f1': 0.33905563534978034}, 'combined': 0.2498304681524697, 'stategy': 1, 'epoch': 8} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3647148732510036, 'r': 0.301572172020893, 'f1': 0.3301515683822017}, 'combined': 0.20460097195516724, 'stategy': 1, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3163956009913259, 'r': 0.3229996837444655, 'f1': 0.31966353677621284}, 'combined': 0.23554155341405156, 'stategy': 1, 'epoch': 8} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36770280015578605, 'r': 0.28864835892988083, 'f1': 0.32341470985362153}, 'combined': 0.21454243128903608, 'stategy': 1, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36363636363636365, 'r': 0.34285714285714286, 'f1': 0.3529411764705882}, 'combined': 0.2352941176470588, 'stategy': 1, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.36607142857142855, 'r': 0.44565217391304346, 'f1': 0.4019607843137254}, 'combined': 0.2009803921568627, 'stategy': 1, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.27586206896551724, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35976479915433407, 'r': 0.3222892992424243, 'f1': 0.33999750249750255}, 'combined': 0.25052447552447554, 'stategy': 1, 'epoch': 6} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33494215352441215, 'r': 0.2738786923223081, 'f1': 0.3013481540150123}, 'combined': 0.18869463849538157, 'stategy': 1, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33436839323467227, 'r': 0.3546715973779541, 'f1': 0.34422086891009546}, 'combined': 0.2536364297232282, 'stategy': 1, 'epoch': 5} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36411638395148305, 'r': 0.30337559853126267, 'f1': 0.3309823303208759}, 'combined': 0.2051158103396977, 'stategy': 1, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37962962962962965, 'r': 0.44565217391304346, 'f1': 0.41000000000000003}, 'combined': 0.20500000000000002, 'stategy': 1, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.316460244648318, 'r': 0.32726913345983555, 'f1': 0.32177394278606963}, 'combined': 0.23709658942131445, 'stategy': 1, 'epoch': 5} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3637270714036404, 'r': 0.28684167419636686, 'f1': 0.32074114478321014}, 'combined': 0.21276887822252555, 'stategy': 1, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.27586206896551724, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 5} ****************************** Epoch: 9 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 16:24:21.699107: step: 4/531, loss: 0.0045977430418133736 2023-01-22 16:24:22.750890: step: 8/531, loss: 0.0019700611010193825 2023-01-22 16:24:23.806024: step: 12/531, loss: 0.004937261343002319 2023-01-22 16:24:24.845635: step: 16/531, loss: 0.009785373695194721 2023-01-22 16:24:25.901568: step: 20/531, loss: 0.0010665488662198186 2023-01-22 16:24:26.959702: step: 24/531, loss: 0.0018849570769816637 2023-01-22 16:24:28.014604: step: 28/531, loss: 0.0027515084948390722 2023-01-22 16:24:29.073464: step: 32/531, loss: 0.0047698556445539 2023-01-22 16:24:30.118768: step: 36/531, loss: 0.005599712487310171 2023-01-22 16:24:31.174527: step: 40/531, loss: 0.003969782497733831 2023-01-22 16:24:32.231491: step: 44/531, loss: 0.0006599472835659981 2023-01-22 16:24:33.289176: step: 48/531, loss: 0.005097010172903538 2023-01-22 16:24:34.343088: step: 52/531, loss: 0.014123380184173584 2023-01-22 16:24:35.401426: step: 56/531, loss: 0.005724338814616203 2023-01-22 16:24:36.438804: step: 60/531, loss: 0.00408971169963479 2023-01-22 16:24:37.499672: step: 64/531, loss: 0.045634925365448 2023-01-22 16:24:38.563812: step: 68/531, loss: 0.007729522883892059 2023-01-22 16:24:39.626566: step: 72/531, loss: 0.006360651925206184 2023-01-22 16:24:40.672134: step: 76/531, loss: 0.0007551007438451052 2023-01-22 16:24:41.725717: step: 80/531, loss: 0.004799914080649614 2023-01-22 16:24:42.792526: step: 84/531, loss: 0.0015599695034325123 2023-01-22 16:24:43.854357: step: 88/531, loss: 0.011800430715084076 2023-01-22 16:24:44.906171: step: 92/531, loss: 0.004497630521655083 2023-01-22 16:24:45.961816: step: 96/531, loss: 0.003674152074381709 2023-01-22 16:24:47.015636: step: 100/531, loss: 0.0010398485464975238 2023-01-22 16:24:48.069238: step: 104/531, loss: 0.017408128827810287 2023-01-22 16:24:49.116485: step: 108/531, loss: 0.0031853914260864258 2023-01-22 16:24:50.157973: step: 112/531, loss: 0.007440201472491026 2023-01-22 16:24:51.208319: step: 116/531, loss: 0.0024703936651349068 2023-01-22 16:24:52.266553: step: 120/531, loss: 0.0033811787143349648 2023-01-22 16:24:53.337278: step: 124/531, loss: 0.006839707959443331 2023-01-22 16:24:54.402128: step: 128/531, loss: 0.00014722137711942196 2023-01-22 16:24:55.459143: step: 132/531, loss: 0.004453862085938454 2023-01-22 16:24:56.505044: step: 136/531, loss: 0.018303485587239265 2023-01-22 16:24:57.564594: step: 140/531, loss: 0.010282585397362709 2023-01-22 16:24:58.618049: step: 144/531, loss: 0.006527125369757414 2023-01-22 16:24:59.673585: step: 148/531, loss: 0.0034645788837224245 2023-01-22 16:25:00.729378: step: 152/531, loss: 0.007936164736747742 2023-01-22 16:25:01.794796: step: 156/531, loss: 0.005556902848184109 2023-01-22 16:25:02.843936: step: 160/531, loss: 0.0 2023-01-22 16:25:03.894857: step: 164/531, loss: 0.001379739143885672 2023-01-22 16:25:04.961699: step: 168/531, loss: 0.016546497121453285 2023-01-22 16:25:06.023249: step: 172/531, loss: 0.006233325693756342 2023-01-22 16:25:07.079552: step: 176/531, loss: 0.008840410970151424 2023-01-22 16:25:08.161975: step: 180/531, loss: 0.002961039077490568 2023-01-22 16:25:09.238483: step: 184/531, loss: 0.007977454923093319 2023-01-22 16:25:10.292879: step: 188/531, loss: 0.00157838873565197 2023-01-22 16:25:11.357923: step: 192/531, loss: 0.004022036213427782 2023-01-22 16:25:12.439265: step: 196/531, loss: 0.005652822088450193 2023-01-22 16:25:13.507203: step: 200/531, loss: 0.000407386920414865 2023-01-22 16:25:14.561273: step: 204/531, loss: 0.0012129707029089332 2023-01-22 16:25:15.624107: step: 208/531, loss: 0.0182546004652977 2023-01-22 16:25:16.697097: step: 212/531, loss: 0.005494026001542807 2023-01-22 16:25:17.769222: step: 216/531, loss: 0.004386740271002054 2023-01-22 16:25:18.834693: step: 220/531, loss: 0.003960524220019579 2023-01-22 16:25:19.905788: step: 224/531, loss: 0.004750370979309082 2023-01-22 16:25:20.956737: step: 228/531, loss: 0.013265627436339855 2023-01-22 16:25:22.013067: step: 232/531, loss: 0.03604407608509064 2023-01-22 16:25:23.081697: step: 236/531, loss: 0.01644900068640709 2023-01-22 16:25:24.141940: step: 240/531, loss: 0.012413191609084606 2023-01-22 16:25:25.199042: step: 244/531, loss: 0.0004996042116545141 2023-01-22 16:25:26.266353: step: 248/531, loss: 0.006077242083847523 2023-01-22 16:25:27.330707: step: 252/531, loss: 0.016008995473384857 2023-01-22 16:25:28.386591: step: 256/531, loss: 9.779685933608562e-05 2023-01-22 16:25:29.458151: step: 260/531, loss: 0.015536663122475147 2023-01-22 16:25:30.520288: step: 264/531, loss: 0.003452009754255414 2023-01-22 16:25:31.592512: step: 268/531, loss: 0.0022483840584754944 2023-01-22 16:25:32.646046: step: 272/531, loss: 0.0020265274215489626 2023-01-22 16:25:33.697301: step: 276/531, loss: 0.006405857857316732 2023-01-22 16:25:34.769620: step: 280/531, loss: 0.008755036629736423 2023-01-22 16:25:35.819073: step: 284/531, loss: 0.004601696971803904 2023-01-22 16:25:36.893324: step: 288/531, loss: 0.008911800570786 2023-01-22 16:25:37.967450: step: 292/531, loss: 0.003905730089172721 2023-01-22 16:25:39.040201: step: 296/531, loss: 0.006910170894116163 2023-01-22 16:25:40.119452: step: 300/531, loss: 0.001313588465563953 2023-01-22 16:25:41.172377: step: 304/531, loss: 0.0063367681577801704 2023-01-22 16:25:42.232812: step: 308/531, loss: 0.0018200651975348592 2023-01-22 16:25:43.301034: step: 312/531, loss: 0.004841177258640528 2023-01-22 16:25:44.367886: step: 316/531, loss: 0.014197521843016148 2023-01-22 16:25:45.440211: step: 320/531, loss: 0.0008779449854046106 2023-01-22 16:25:46.507434: step: 324/531, loss: 0.007762029767036438 2023-01-22 16:25:47.565121: step: 328/531, loss: 0.001322629046626389 2023-01-22 16:25:48.625219: step: 332/531, loss: 0.001630575628951192 2023-01-22 16:25:49.684817: step: 336/531, loss: 0.0014119212282821536 2023-01-22 16:25:50.750700: step: 340/531, loss: 0.0012350730830803514 2023-01-22 16:25:51.820214: step: 344/531, loss: 4.487645492190495e-05 2023-01-22 16:25:52.880070: step: 348/531, loss: 0.003320972668007016 2023-01-22 16:25:53.958513: step: 352/531, loss: 0.0013794652186334133 2023-01-22 16:25:55.015519: step: 356/531, loss: 4.466444079298526e-05 2023-01-22 16:25:56.075521: step: 360/531, loss: 0.007342898286879063 2023-01-22 16:25:57.145695: step: 364/531, loss: 0.0064662303775548935 2023-01-22 16:25:58.210872: step: 368/531, loss: 0.0018413165817037225 2023-01-22 16:25:59.268384: step: 372/531, loss: 0.007778462953865528 2023-01-22 16:26:00.332595: step: 376/531, loss: 0.01393235195428133 2023-01-22 16:26:01.396299: step: 380/531, loss: 0.0008352399454452097 2023-01-22 16:26:02.475850: step: 384/531, loss: 0.0014705639332532883 2023-01-22 16:26:03.530548: step: 388/531, loss: 0.005661927629262209 2023-01-22 16:26:04.602591: step: 392/531, loss: 0.008845703676342964 2023-01-22 16:26:05.655562: step: 396/531, loss: 0.0009504136396571994 2023-01-22 16:26:06.709271: step: 400/531, loss: 0.0010107960551977158 2023-01-22 16:26:07.771901: step: 404/531, loss: 0.00014433078467845917 2023-01-22 16:26:08.844992: step: 408/531, loss: 0.016325997188687325 2023-01-22 16:26:09.925511: step: 412/531, loss: 0.0003459411673247814 2023-01-22 16:26:10.985370: step: 416/531, loss: 0.015770798549056053 2023-01-22 16:26:12.090879: step: 420/531, loss: 0.004629632458090782 2023-01-22 16:26:13.152132: step: 424/531, loss: 0.007492108270525932 2023-01-22 16:26:14.211159: step: 428/531, loss: 0.008259224705398083 2023-01-22 16:26:15.270689: step: 432/531, loss: 0.01000315323472023 2023-01-22 16:26:16.343809: step: 436/531, loss: 0.005067561287432909 2023-01-22 16:26:17.411219: step: 440/531, loss: 0.001194012351334095 2023-01-22 16:26:18.464126: step: 444/531, loss: 0.00036043146974407136 2023-01-22 16:26:19.525676: step: 448/531, loss: 0.0033841647673398256 2023-01-22 16:26:20.584119: step: 452/531, loss: 0.0037216609343886375 2023-01-22 16:26:21.647809: step: 456/531, loss: 0.00327302236109972 2023-01-22 16:26:22.718827: step: 460/531, loss: 0.020945213735103607 2023-01-22 16:26:23.789556: step: 464/531, loss: 0.006891566328704357 2023-01-22 16:26:24.834660: step: 468/531, loss: 0.0023995935916900635 2023-01-22 16:26:25.908299: step: 472/531, loss: 0.00436417106539011 2023-01-22 16:26:26.984455: step: 476/531, loss: 0.002857574727386236 2023-01-22 16:26:28.040241: step: 480/531, loss: 0.00039393914630636573 2023-01-22 16:26:29.108394: step: 484/531, loss: 0.00477828923612833 2023-01-22 16:26:30.165475: step: 488/531, loss: 0.005784063134342432 2023-01-22 16:26:31.214842: step: 492/531, loss: 0.00048021567636169493 2023-01-22 16:26:32.289859: step: 496/531, loss: 0.001890623476356268 2023-01-22 16:26:33.357819: step: 500/531, loss: 0.018695522099733353 2023-01-22 16:26:34.426907: step: 504/531, loss: 0.01640419103205204 2023-01-22 16:26:35.516436: step: 508/531, loss: 0.00582819664850831 2023-01-22 16:26:36.577040: step: 512/531, loss: 0.02799372561275959 2023-01-22 16:26:37.640813: step: 516/531, loss: 0.008583822287619114 2023-01-22 16:26:38.684145: step: 520/531, loss: 0.003239756915718317 2023-01-22 16:26:39.733233: step: 524/531, loss: 0.0003791235794778913 2023-01-22 16:26:40.792625: step: 528/531, loss: 0.005838289391249418 2023-01-22 16:26:41.879756: step: 532/531, loss: 0.009548705071210861 2023-01-22 16:26:42.925590: step: 536/531, loss: 0.0013132026651874185 2023-01-22 16:26:43.996569: step: 540/531, loss: 0.008961766958236694 2023-01-22 16:26:45.061023: step: 544/531, loss: 0.00017841016233433038 2023-01-22 16:26:46.124880: step: 548/531, loss: 0.0027805950958281755 2023-01-22 16:26:47.202212: step: 552/531, loss: 0.0026308472733944654 2023-01-22 16:26:48.281795: step: 556/531, loss: 0.0038388827815651894 2023-01-22 16:26:49.355481: step: 560/531, loss: 0.002630452858284116 2023-01-22 16:26:50.420348: step: 564/531, loss: 0.05558782443404198 2023-01-22 16:26:51.483043: step: 568/531, loss: 0.0007264415617100894 2023-01-22 16:26:52.534238: step: 572/531, loss: 0.01898963749408722 2023-01-22 16:26:53.598601: step: 576/531, loss: 0.00010552137246122584 2023-01-22 16:26:54.655280: step: 580/531, loss: 0.0050046988762915134 2023-01-22 16:26:55.717791: step: 584/531, loss: 0.0026430857833474874 2023-01-22 16:26:56.766783: step: 588/531, loss: 0.001241020392626524 2023-01-22 16:26:57.836696: step: 592/531, loss: 0.0036607852671295404 2023-01-22 16:26:58.892053: step: 596/531, loss: 0.00029355636797845364 2023-01-22 16:26:59.959637: step: 600/531, loss: 0.006995843257755041 2023-01-22 16:27:01.031504: step: 604/531, loss: 0.002957818331196904 2023-01-22 16:27:02.087923: step: 608/531, loss: 0.0019812150858342648 2023-01-22 16:27:03.155690: step: 612/531, loss: 9.81592747848481e-05 2023-01-22 16:27:04.207819: step: 616/531, loss: 0.0014182009035721421 2023-01-22 16:27:05.285648: step: 620/531, loss: 0.0018387639429420233 2023-01-22 16:27:06.335581: step: 624/531, loss: 0.0020353938452899456 2023-01-22 16:27:07.407024: step: 628/531, loss: 0.01270974613726139 2023-01-22 16:27:08.480398: step: 632/531, loss: 0.005841148551553488 2023-01-22 16:27:09.530842: step: 636/531, loss: 0.03105478733778 2023-01-22 16:27:10.591308: step: 640/531, loss: 0.0036587135400623083 2023-01-22 16:27:11.656897: step: 644/531, loss: 0.0027956501580774784 2023-01-22 16:27:12.710265: step: 648/531, loss: 0.005313706584274769 2023-01-22 16:27:13.786921: step: 652/531, loss: 0.0005047524464316666 2023-01-22 16:27:14.855762: step: 656/531, loss: 0.00048412979231216013 2023-01-22 16:27:15.909910: step: 660/531, loss: 0.00015525547496508807 2023-01-22 16:27:16.987509: step: 664/531, loss: 0.007092516403645277 2023-01-22 16:27:18.054203: step: 668/531, loss: 0.0033150548115372658 2023-01-22 16:27:19.110401: step: 672/531, loss: 0.004298761487007141 2023-01-22 16:27:20.169928: step: 676/531, loss: 0.004011280369013548 2023-01-22 16:27:21.243745: step: 680/531, loss: 0.008520894683897495 2023-01-22 16:27:22.300849: step: 684/531, loss: 0.0005329372943378985 2023-01-22 16:27:23.376968: step: 688/531, loss: 0.005702773109078407 2023-01-22 16:27:24.451279: step: 692/531, loss: 0.005568182095885277 2023-01-22 16:27:25.504301: step: 696/531, loss: 0.00956930872052908 2023-01-22 16:27:26.576889: step: 700/531, loss: 0.007446709088981152 2023-01-22 16:27:27.650425: step: 704/531, loss: 0.0028796715196222067 2023-01-22 16:27:28.712560: step: 708/531, loss: 0.005106172524392605 2023-01-22 16:27:29.759680: step: 712/531, loss: 0.000268861826043576 2023-01-22 16:27:30.826069: step: 716/531, loss: 0.0019048639805987477 2023-01-22 16:27:31.897267: step: 720/531, loss: 0.00028441695030778646 2023-01-22 16:27:32.946085: step: 724/531, loss: 0.005149377975612879 2023-01-22 16:27:34.023184: step: 728/531, loss: 0.003986825235188007 2023-01-22 16:27:35.069055: step: 732/531, loss: 0.002429863205179572 2023-01-22 16:27:36.138163: step: 736/531, loss: 0.001819361699745059 2023-01-22 16:27:37.199004: step: 740/531, loss: 0.0029367515817284584 2023-01-22 16:27:38.265751: step: 744/531, loss: 0.003790078451856971 2023-01-22 16:27:39.330328: step: 748/531, loss: 0.0033502851147204638 2023-01-22 16:27:40.393609: step: 752/531, loss: 0.0004885253147222102 2023-01-22 16:27:41.468675: step: 756/531, loss: 0.004335005301982164 2023-01-22 16:27:42.510141: step: 760/531, loss: 0.0011945945443585515 2023-01-22 16:27:43.567632: step: 764/531, loss: 0.0023913851473480463 2023-01-22 16:27:44.633130: step: 768/531, loss: 0.003030638676136732 2023-01-22 16:27:45.692739: step: 772/531, loss: 0.0011016554199159145 2023-01-22 16:27:46.746085: step: 776/531, loss: 0.007391211111098528 2023-01-22 16:27:47.808004: step: 780/531, loss: 0.019452929496765137 2023-01-22 16:27:48.866363: step: 784/531, loss: 0.0054628984071314335 2023-01-22 16:27:49.927783: step: 788/531, loss: 0.012637363746762276 2023-01-22 16:27:50.991851: step: 792/531, loss: 0.0011910557514056563 2023-01-22 16:27:52.038113: step: 796/531, loss: 0.0026493645273149014 2023-01-22 16:27:53.092354: step: 800/531, loss: 0.011939937248826027 2023-01-22 16:27:54.145367: step: 804/531, loss: 0.007114273961633444 2023-01-22 16:27:55.202996: step: 808/531, loss: 0.014530682936310768 2023-01-22 16:27:56.258200: step: 812/531, loss: 0.002293882193043828 2023-01-22 16:27:57.309136: step: 816/531, loss: 0.03261454403400421 2023-01-22 16:27:58.372513: step: 820/531, loss: 0.0034278477542102337 2023-01-22 16:27:59.431519: step: 824/531, loss: 0.004947775974869728 2023-01-22 16:28:00.486750: step: 828/531, loss: 0.0030227298848330975 2023-01-22 16:28:01.532023: step: 832/531, loss: 0.0017091204645112157 2023-01-22 16:28:02.593102: step: 836/531, loss: 0.0064543127082288265 2023-01-22 16:28:03.640604: step: 840/531, loss: 0.004850310739129782 2023-01-22 16:28:04.693007: step: 844/531, loss: 2.369673347857315e-05 2023-01-22 16:28:05.743869: step: 848/531, loss: 0.03181464970111847 2023-01-22 16:28:06.818524: step: 852/531, loss: 0.0031605076510459185 2023-01-22 16:28:07.882214: step: 856/531, loss: 0.014925448223948479 2023-01-22 16:28:08.946977: step: 860/531, loss: 0.0030504364985972643 2023-01-22 16:28:09.993346: step: 864/531, loss: 0.004174187779426575 2023-01-22 16:28:11.061574: step: 868/531, loss: 0.0032774428837001324 2023-01-22 16:28:12.111537: step: 872/531, loss: 0.005438651889562607 2023-01-22 16:28:13.164025: step: 876/531, loss: 0.0029399788472801447 2023-01-22 16:28:14.226939: step: 880/531, loss: 0.0025096528697758913 2023-01-22 16:28:15.268743: step: 884/531, loss: 0.0014572727959603071 2023-01-22 16:28:16.340426: step: 888/531, loss: 0.004770008847117424 2023-01-22 16:28:17.402438: step: 892/531, loss: 0.0010061761131510139 2023-01-22 16:28:18.453815: step: 896/531, loss: 0.0014356044121086597 2023-01-22 16:28:19.500516: step: 900/531, loss: 0.0035231199581176043 2023-01-22 16:28:20.569479: step: 904/531, loss: 0.002293233759701252 2023-01-22 16:28:21.641748: step: 908/531, loss: 0.0072817932814359665 2023-01-22 16:28:22.703149: step: 912/531, loss: 0.01366877555847168 2023-01-22 16:28:23.773877: step: 916/531, loss: 0.005684357602149248 2023-01-22 16:28:24.831452: step: 920/531, loss: 0.013977249152958393 2023-01-22 16:28:25.902099: step: 924/531, loss: 0.005502911750227213 2023-01-22 16:28:26.968891: step: 928/531, loss: 0.0019891508854925632 2023-01-22 16:28:28.020178: step: 932/531, loss: 0.007536507211625576 2023-01-22 16:28:29.089606: step: 936/531, loss: 0.0005093825166113675 2023-01-22 16:28:30.150550: step: 940/531, loss: 0.0012100131716579199 2023-01-22 16:28:31.237879: step: 944/531, loss: 0.00581153342500329 2023-01-22 16:28:32.303195: step: 948/531, loss: 0.013841337524354458 2023-01-22 16:28:33.368301: step: 952/531, loss: 0.006892412900924683 2023-01-22 16:28:34.433584: step: 956/531, loss: 0.00490581663325429 2023-01-22 16:28:35.496433: step: 960/531, loss: 0.012574831023812294 2023-01-22 16:28:36.562761: step: 964/531, loss: 0.02010909467935562 2023-01-22 16:28:37.625439: step: 968/531, loss: 0.0024519346188753843 2023-01-22 16:28:38.680644: step: 972/531, loss: 0.0006077011348679662 2023-01-22 16:28:39.748194: step: 976/531, loss: 0.01896348036825657 2023-01-22 16:28:40.816529: step: 980/531, loss: 0.0003838891861960292 2023-01-22 16:28:41.890446: step: 984/531, loss: 0.001048270845785737 2023-01-22 16:28:42.930051: step: 988/531, loss: 0.004417904652655125 2023-01-22 16:28:43.989838: step: 992/531, loss: 0.0009408965706825256 2023-01-22 16:28:45.048501: step: 996/531, loss: 0.002442322438582778 2023-01-22 16:28:46.120059: step: 1000/531, loss: 0.0022212981712073088 2023-01-22 16:28:47.195302: step: 1004/531, loss: 0.0013069476699456573 2023-01-22 16:28:48.250260: step: 1008/531, loss: 0.009917059913277626 2023-01-22 16:28:49.314315: step: 1012/531, loss: 0.0041924575343728065 2023-01-22 16:28:50.361428: step: 1016/531, loss: 6.13931697444059e-05 2023-01-22 16:28:51.420366: step: 1020/531, loss: 0.002931284951046109 2023-01-22 16:28:52.476029: step: 1024/531, loss: 0.01705293543636799 2023-01-22 16:28:53.535823: step: 1028/531, loss: 0.003357037901878357 2023-01-22 16:28:54.610219: step: 1032/531, loss: 0.0018999928142875433 2023-01-22 16:28:55.670673: step: 1036/531, loss: 0.015623767860233784 2023-01-22 16:28:56.723592: step: 1040/531, loss: 0.0006367540336214006 2023-01-22 16:28:57.790617: step: 1044/531, loss: 0.0007088141865096986 2023-01-22 16:28:58.858227: step: 1048/531, loss: 0.00343990046530962 2023-01-22 16:28:59.943396: step: 1052/531, loss: 0.0006036301492713392 2023-01-22 16:29:00.987873: step: 1056/531, loss: 0.00012817702372558415 2023-01-22 16:29:02.049482: step: 1060/531, loss: 0.004272814840078354 2023-01-22 16:29:03.101867: step: 1064/531, loss: 0.0025128498673439026 2023-01-22 16:29:04.160461: step: 1068/531, loss: 0.0032080861274152994 2023-01-22 16:29:05.203734: step: 1072/531, loss: 0.0037822870071977377 2023-01-22 16:29:06.272154: step: 1076/531, loss: 0.0013688289327546954 2023-01-22 16:29:07.351389: step: 1080/531, loss: 0.009208443574607372 2023-01-22 16:29:08.413692: step: 1084/531, loss: 0.010907547548413277 2023-01-22 16:29:09.465588: step: 1088/531, loss: 0.006185346283018589 2023-01-22 16:29:10.535277: step: 1092/531, loss: 0.004072161391377449 2023-01-22 16:29:11.585608: step: 1096/531, loss: 0.007457377854734659 2023-01-22 16:29:12.672945: step: 1100/531, loss: 0.004470342304557562 2023-01-22 16:29:13.725842: step: 1104/531, loss: 0.007369750179350376 2023-01-22 16:29:14.767880: step: 1108/531, loss: 0.033470671623945236 2023-01-22 16:29:15.825736: step: 1112/531, loss: 0.03634115681052208 2023-01-22 16:29:16.875949: step: 1116/531, loss: 0.024756332859396935 2023-01-22 16:29:17.946243: step: 1120/531, loss: 0.0061282324604690075 2023-01-22 16:29:18.981862: step: 1124/531, loss: 0.003763258457183838 2023-01-22 16:29:20.025967: step: 1128/531, loss: 0.03824980929493904 2023-01-22 16:29:21.100712: step: 1132/531, loss: 0.005244008265435696 2023-01-22 16:29:22.143058: step: 1136/531, loss: 0.00044720651931129396 2023-01-22 16:29:23.199101: step: 1140/531, loss: 0.01317501813173294 2023-01-22 16:29:24.250244: step: 1144/531, loss: 0.004085793625563383 2023-01-22 16:29:25.309635: step: 1148/531, loss: 0.001619831658899784 2023-01-22 16:29:26.384739: step: 1152/531, loss: 0.014504484832286835 2023-01-22 16:29:27.440157: step: 1156/531, loss: 0.010084259323775768 2023-01-22 16:29:28.501082: step: 1160/531, loss: 0.0070733968168497086 2023-01-22 16:29:29.552850: step: 1164/531, loss: 0.01693405769765377 2023-01-22 16:29:30.612892: step: 1168/531, loss: 0.0057841334491968155 2023-01-22 16:29:31.657746: step: 1172/531, loss: 0.0027460299897938967 2023-01-22 16:29:32.713888: step: 1176/531, loss: 0.0005352850421331823 2023-01-22 16:29:33.760500: step: 1180/531, loss: 0.004516599699854851 2023-01-22 16:29:34.805507: step: 1184/531, loss: 0.00194433459546417 2023-01-22 16:29:35.857621: step: 1188/531, loss: 0.006917648948729038 2023-01-22 16:29:36.909343: step: 1192/531, loss: 0.0025990032590925694 2023-01-22 16:29:37.980915: step: 1196/531, loss: 0.0003694795595947653 2023-01-22 16:29:39.047099: step: 1200/531, loss: 0.0022541554644703865 2023-01-22 16:29:40.107308: step: 1204/531, loss: 0.005560600198805332 2023-01-22 16:29:41.165023: step: 1208/531, loss: 0.0009630077984184027 2023-01-22 16:29:42.221973: step: 1212/531, loss: 0.0018164411885663867 2023-01-22 16:29:43.280486: step: 1216/531, loss: 0.005099601577967405 2023-01-22 16:29:44.322457: step: 1220/531, loss: 0.0021063892636448145 2023-01-22 16:29:45.398590: step: 1224/531, loss: 0.009221532382071018 2023-01-22 16:29:46.461686: step: 1228/531, loss: 0.0001460358325857669 2023-01-22 16:29:47.528035: step: 1232/531, loss: 0.00019809798686765134 2023-01-22 16:29:48.600362: step: 1236/531, loss: 0.005001719109714031 2023-01-22 16:29:49.688311: step: 1240/531, loss: 0.005442372057586908 2023-01-22 16:29:50.751980: step: 1244/531, loss: 0.01572977565228939 2023-01-22 16:29:51.810799: step: 1248/531, loss: 0.009500561282038689 2023-01-22 16:29:52.876211: step: 1252/531, loss: 0.006423449609428644 2023-01-22 16:29:53.931975: step: 1256/531, loss: 0.0029891899321228266 2023-01-22 16:29:54.991594: step: 1260/531, loss: 0.006739405449479818 2023-01-22 16:29:56.052883: step: 1264/531, loss: 0.0031463869381695986 2023-01-22 16:29:57.089980: step: 1268/531, loss: 0.0010002534836530685 2023-01-22 16:29:58.145081: step: 1272/531, loss: 0.0011597184929996729 2023-01-22 16:29:59.204090: step: 1276/531, loss: 0.010666647925972939 2023-01-22 16:30:00.258795: step: 1280/531, loss: 0.0024663519579917192 2023-01-22 16:30:01.322525: step: 1284/531, loss: 0.0009161880007013679 2023-01-22 16:30:02.377785: step: 1288/531, loss: 0.004265964962542057 2023-01-22 16:30:03.436686: step: 1292/531, loss: 0.002798403613269329 2023-01-22 16:30:04.493842: step: 1296/531, loss: 0.007940325886011124 2023-01-22 16:30:05.590465: step: 1300/531, loss: 0.013691847212612629 2023-01-22 16:30:06.633988: step: 1304/531, loss: 0.044071342796087265 2023-01-22 16:30:07.694125: step: 1308/531, loss: 1.2964428606210276e-05 2023-01-22 16:30:08.764487: step: 1312/531, loss: 0.004941326100379229 2023-01-22 16:30:09.826934: step: 1316/531, loss: 0.007497509941458702 2023-01-22 16:30:10.876339: step: 1320/531, loss: 0.0141603359952569 2023-01-22 16:30:11.942293: step: 1324/531, loss: 0.0019523242954164743 2023-01-22 16:30:13.000372: step: 1328/531, loss: 0.008256577886641026 2023-01-22 16:30:14.067234: step: 1332/531, loss: 0.018632415682077408 2023-01-22 16:30:15.131452: step: 1336/531, loss: 0.0066160825081169605 2023-01-22 16:30:16.181139: step: 1340/531, loss: 0.004395787138491869 2023-01-22 16:30:17.249562: step: 1344/531, loss: 0.004893674980849028 2023-01-22 16:30:18.308540: step: 1348/531, loss: 0.0042001004330813885 2023-01-22 16:30:19.374697: step: 1352/531, loss: 0.007403684314340353 2023-01-22 16:30:20.411818: step: 1356/531, loss: 0.0024068010970950127 2023-01-22 16:30:21.478879: step: 1360/531, loss: 0.009022404439747334 2023-01-22 16:30:22.549203: step: 1364/531, loss: 0.010823088698089123 2023-01-22 16:30:23.605841: step: 1368/531, loss: 0.003000014927238226 2023-01-22 16:30:24.664145: step: 1372/531, loss: 0.007877732627093792 2023-01-22 16:30:25.722392: step: 1376/531, loss: 0.006350968964397907 2023-01-22 16:30:26.766266: step: 1380/531, loss: 0.001364083494991064 2023-01-22 16:30:27.838946: step: 1384/531, loss: 0.001567261409945786 2023-01-22 16:30:28.887320: step: 1388/531, loss: 0.0011952656786888838 2023-01-22 16:30:29.938816: step: 1392/531, loss: 0.004942044615745544 2023-01-22 16:30:30.999959: step: 1396/531, loss: 0.010176066309213638 2023-01-22 16:30:32.072642: step: 1400/531, loss: 0.009766532108187675 2023-01-22 16:30:33.127864: step: 1404/531, loss: 0.008324784226715565 2023-01-22 16:30:34.213467: step: 1408/531, loss: 0.004485917277634144 2023-01-22 16:30:35.264044: step: 1412/531, loss: 0.08397059887647629 2023-01-22 16:30:36.323588: step: 1416/531, loss: 0.004498843569308519 2023-01-22 16:30:37.390693: step: 1420/531, loss: 0.00676374789327383 2023-01-22 16:30:38.447438: step: 1424/531, loss: 0.002184188924729824 2023-01-22 16:30:39.509241: step: 1428/531, loss: 0.00883243978023529 2023-01-22 16:30:40.570161: step: 1432/531, loss: 0.004732491914182901 2023-01-22 16:30:41.632302: step: 1436/531, loss: 0.002859783126041293 2023-01-22 16:30:42.687735: step: 1440/531, loss: 0.009899810887873173 2023-01-22 16:30:43.736631: step: 1444/531, loss: 0.0027839664835482836 2023-01-22 16:30:44.798803: step: 1448/531, loss: 0.0035953286569565535 2023-01-22 16:30:45.873660: step: 1452/531, loss: 0.001807543565519154 2023-01-22 16:30:46.921773: step: 1456/531, loss: 0.003441819455474615 2023-01-22 16:30:47.975515: step: 1460/531, loss: 0.002082986058667302 2023-01-22 16:30:49.051801: step: 1464/531, loss: 0.014478602446615696 2023-01-22 16:30:50.095638: step: 1468/531, loss: 0.0002972553775180131 2023-01-22 16:30:51.151495: step: 1472/531, loss: 0.006524278782308102 2023-01-22 16:30:52.219290: step: 1476/531, loss: 0.0018274827161803842 2023-01-22 16:30:53.274394: step: 1480/531, loss: 0.0033217170275747776 2023-01-22 16:30:54.323664: step: 1484/531, loss: 0.004770943894982338 2023-01-22 16:30:55.404123: step: 1488/531, loss: 0.009360947646200657 2023-01-22 16:30:56.458771: step: 1492/531, loss: 0.004306859336793423 2023-01-22 16:30:57.513010: step: 1496/531, loss: 0.0028718560934066772 2023-01-22 16:30:58.589183: step: 1500/531, loss: 0.010401396080851555 2023-01-22 16:30:59.653855: step: 1504/531, loss: 0.008091055788099766 2023-01-22 16:31:00.693411: step: 1508/531, loss: 8.021388566703536e-06 2023-01-22 16:31:01.782755: step: 1512/531, loss: 0.012760911136865616 2023-01-22 16:31:02.839337: step: 1516/531, loss: 0.00579597894102335 2023-01-22 16:31:03.917381: step: 1520/531, loss: 0.004161902703344822 2023-01-22 16:31:04.977456: step: 1524/531, loss: 0.0007724956376478076 2023-01-22 16:31:06.027295: step: 1528/531, loss: 0.0061883945018053055 2023-01-22 16:31:07.092158: step: 1532/531, loss: 0.020957650616765022 2023-01-22 16:31:08.142876: step: 1536/531, loss: 0.003814868163317442 2023-01-22 16:31:09.201805: step: 1540/531, loss: 0.0034995765890926123 2023-01-22 16:31:10.258831: step: 1544/531, loss: 0.021796464920043945 2023-01-22 16:31:11.321510: step: 1548/531, loss: 0.01099161896854639 2023-01-22 16:31:12.389412: step: 1552/531, loss: 0.008265999145805836 2023-01-22 16:31:13.450756: step: 1556/531, loss: 0.0015358910895884037 2023-01-22 16:31:14.510808: step: 1560/531, loss: 0.01249379850924015 2023-01-22 16:31:15.555481: step: 1564/531, loss: 0.00026867687120102346 2023-01-22 16:31:16.620043: step: 1568/531, loss: 0.0012288582511246204 2023-01-22 16:31:17.670733: step: 1572/531, loss: 0.015754522755742073 2023-01-22 16:31:18.732116: step: 1576/531, loss: 0.0016865389188751578 2023-01-22 16:31:19.794046: step: 1580/531, loss: 0.006365284323692322 2023-01-22 16:31:20.850162: step: 1584/531, loss: 0.004033660050481558 2023-01-22 16:31:21.910306: step: 1588/531, loss: 0.009530124254524708 2023-01-22 16:31:22.980720: step: 1592/531, loss: 0.00394415482878685 2023-01-22 16:31:24.026259: step: 1596/531, loss: 0.0014219009317457676 2023-01-22 16:31:25.082170: step: 1600/531, loss: 0.0018334005726501346 2023-01-22 16:31:26.123620: step: 1604/531, loss: 0.0004036286554764956 2023-01-22 16:31:27.175971: step: 1608/531, loss: 0.0014138143742457032 2023-01-22 16:31:28.240229: step: 1612/531, loss: 0.003865512553602457 2023-01-22 16:31:29.285390: step: 1616/531, loss: 0.012560317292809486 2023-01-22 16:31:30.346682: step: 1620/531, loss: 0.00033325847471132874 2023-01-22 16:31:31.392797: step: 1624/531, loss: 0.018526321277022362 2023-01-22 16:31:32.470267: step: 1628/531, loss: 0.0010144388070330024 2023-01-22 16:31:33.532876: step: 1632/531, loss: 0.006329218856990337 2023-01-22 16:31:34.581469: step: 1636/531, loss: 0.0025783791206777096 2023-01-22 16:31:35.625431: step: 1640/531, loss: 0.0029158187098801136 2023-01-22 16:31:36.685659: step: 1644/531, loss: 0.05281605198979378 2023-01-22 16:31:37.740233: step: 1648/531, loss: 0.008556989952921867 2023-01-22 16:31:38.792674: step: 1652/531, loss: 0.0036575677804648876 2023-01-22 16:31:39.869174: step: 1656/531, loss: 0.0026574493385851383 2023-01-22 16:31:40.915798: step: 1660/531, loss: 0.0007102875388227403 2023-01-22 16:31:41.965795: step: 1664/531, loss: 0.005531547125428915 2023-01-22 16:31:43.024604: step: 1668/531, loss: 0.004560297355055809 2023-01-22 16:31:44.084235: step: 1672/531, loss: 0.004554799757897854 2023-01-22 16:31:45.131830: step: 1676/531, loss: 0.008683348074555397 2023-01-22 16:31:46.216682: step: 1680/531, loss: 0.008254681713879108 2023-01-22 16:31:47.265309: step: 1684/531, loss: 0.0007623700075782835 2023-01-22 16:31:48.317426: step: 1688/531, loss: 0.005907068960368633 2023-01-22 16:31:49.379317: step: 1692/531, loss: 0.005418321117758751 2023-01-22 16:31:50.448460: step: 1696/531, loss: 0.011914653703570366 2023-01-22 16:31:51.496117: step: 1700/531, loss: 0.0062050847336649895 2023-01-22 16:31:52.560688: step: 1704/531, loss: 0.015705464407801628 2023-01-22 16:31:53.620730: step: 1708/531, loss: 0.01217713300138712 2023-01-22 16:31:54.659500: step: 1712/531, loss: 0.007883260026574135 2023-01-22 16:31:55.702613: step: 1716/531, loss: 0.008228284306824207 2023-01-22 16:31:56.769207: step: 1720/531, loss: 0.0046814861707389355 2023-01-22 16:31:57.836508: step: 1724/531, loss: 0.0026429970748722553 2023-01-22 16:31:58.896891: step: 1728/531, loss: 0.007962407544255257 2023-01-22 16:31:59.963824: step: 1732/531, loss: 0.018482469022274017 2023-01-22 16:32:01.016137: step: 1736/531, loss: 0.00960539560765028 2023-01-22 16:32:02.072528: step: 1740/531, loss: 0.012688994407653809 2023-01-22 16:32:03.121120: step: 1744/531, loss: 0.0030719093047082424 2023-01-22 16:32:04.174256: step: 1748/531, loss: 0.003626542165875435 2023-01-22 16:32:05.229019: step: 1752/531, loss: 4.641379291570047e-06 2023-01-22 16:32:06.296884: step: 1756/531, loss: 0.007028732914477587 2023-01-22 16:32:07.360838: step: 1760/531, loss: 0.006028780713677406 2023-01-22 16:32:08.420495: step: 1764/531, loss: 0.005071580875664949 2023-01-22 16:32:09.478978: step: 1768/531, loss: 0.0031488838139921427 2023-01-22 16:32:10.523493: step: 1772/531, loss: 0.005551172886043787 2023-01-22 16:32:11.584509: step: 1776/531, loss: 0.009225967340171337 2023-01-22 16:32:12.679370: step: 1780/531, loss: 0.005321824923157692 2023-01-22 16:32:13.729290: step: 1784/531, loss: 0.002587475348263979 2023-01-22 16:32:14.787064: step: 1788/531, loss: 0.01316310465335846 2023-01-22 16:32:15.840260: step: 1792/531, loss: 0.006016949657350779 2023-01-22 16:32:16.901198: step: 1796/531, loss: 0.014933210797607899 2023-01-22 16:32:17.958263: step: 1800/531, loss: 0.0023555923253297806 2023-01-22 16:32:19.010409: step: 1804/531, loss: 0.002614242024719715 2023-01-22 16:32:20.054056: step: 1808/531, loss: 0.005970026832073927 2023-01-22 16:32:21.104123: step: 1812/531, loss: 8.773949230089784e-05 2023-01-22 16:32:22.163095: step: 1816/531, loss: 0.003064475255087018 2023-01-22 16:32:23.230207: step: 1820/531, loss: 0.0032090472523123026 2023-01-22 16:32:24.284464: step: 1824/531, loss: 0.00393712380900979 2023-01-22 16:32:25.343743: step: 1828/531, loss: 0.005815640091896057 2023-01-22 16:32:26.397063: step: 1832/531, loss: 0.003421928035095334 2023-01-22 16:32:27.453714: step: 1836/531, loss: 0.0014354335144162178 2023-01-22 16:32:28.519109: step: 1840/531, loss: 0.00011879876547027379 2023-01-22 16:32:29.584183: step: 1844/531, loss: 0.0009502311004325747 2023-01-22 16:32:30.657786: step: 1848/531, loss: 0.011249667964875698 2023-01-22 16:32:31.719269: step: 1852/531, loss: 0.0050295148976147175 2023-01-22 16:32:32.774155: step: 1856/531, loss: 0.0072916485369205475 2023-01-22 16:32:33.823281: step: 1860/531, loss: 0.004858419299125671 2023-01-22 16:32:34.898930: step: 1864/531, loss: 0.0014829982537776232 2023-01-22 16:32:35.972453: step: 1868/531, loss: 0.001284286379814148 2023-01-22 16:32:37.022905: step: 1872/531, loss: 0.002504470758140087 2023-01-22 16:32:38.080650: step: 1876/531, loss: 0.00563031667843461 2023-01-22 16:32:39.123080: step: 1880/531, loss: 0.0021439294796437025 2023-01-22 16:32:40.211194: step: 1884/531, loss: 0.004883362911641598 2023-01-22 16:32:41.286905: step: 1888/531, loss: 0.001417040009982884 2023-01-22 16:32:42.351746: step: 1892/531, loss: 0.0026358189061284065 2023-01-22 16:32:43.412652: step: 1896/531, loss: 0.009136012755334377 2023-01-22 16:32:44.454699: step: 1900/531, loss: 0.004267186392098665 2023-01-22 16:32:45.506743: step: 1904/531, loss: 0.004240268841385841 2023-01-22 16:32:46.555833: step: 1908/531, loss: 0.0009395118686370552 2023-01-22 16:32:47.613623: step: 1912/531, loss: 0.0044632768258452415 2023-01-22 16:32:48.686884: step: 1916/531, loss: 0.0037036878056824207 2023-01-22 16:32:49.752859: step: 1920/531, loss: 0.0024403089191764593 2023-01-22 16:32:50.810531: step: 1924/531, loss: 0.004854206927120686 2023-01-22 16:32:51.871008: step: 1928/531, loss: 0.009533275850117207 2023-01-22 16:32:52.937605: step: 1932/531, loss: 0.0066087255254387856 2023-01-22 16:32:53.987854: step: 1936/531, loss: 0.0033751921728253365 2023-01-22 16:32:55.038348: step: 1940/531, loss: 0.0025438310112804174 2023-01-22 16:32:56.087669: step: 1944/531, loss: 0.0038192011415958405 2023-01-22 16:32:57.167819: step: 1948/531, loss: 0.004002876114100218 2023-01-22 16:32:58.229702: step: 1952/531, loss: 0.0013789072399958968 2023-01-22 16:32:59.281880: step: 1956/531, loss: 0.0002060772239929065 2023-01-22 16:33:00.323171: step: 1960/531, loss: 1.8445278328727e-05 2023-01-22 16:33:01.375936: step: 1964/531, loss: 0.001845311839133501 2023-01-22 16:33:02.433756: step: 1968/531, loss: 0.01027022022753954 2023-01-22 16:33:03.488832: step: 1972/531, loss: 0.003569865133613348 2023-01-22 16:33:04.550186: step: 1976/531, loss: 0.011889354325830936 2023-01-22 16:33:05.619103: step: 1980/531, loss: 0.0036696013994514942 2023-01-22 16:33:06.681290: step: 1984/531, loss: 0.01345662958920002 2023-01-22 16:33:07.764144: step: 1988/531, loss: 0.03222336247563362 2023-01-22 16:33:08.806823: step: 1992/531, loss: 0.00046090842806734145 2023-01-22 16:33:09.887826: step: 1996/531, loss: 0.0018972171237692237 2023-01-22 16:33:10.943298: step: 2000/531, loss: 0.007250071503221989 2023-01-22 16:33:11.986064: step: 2004/531, loss: 0.0006911639939062297 2023-01-22 16:33:13.055075: step: 2008/531, loss: 0.0030516444239765406 2023-01-22 16:33:14.117762: step: 2012/531, loss: 0.005031300708651543 2023-01-22 16:33:15.173425: step: 2016/531, loss: 0.008610888384282589 2023-01-22 16:33:16.242917: step: 2020/531, loss: 0.008901862427592278 2023-01-22 16:33:17.314502: step: 2024/531, loss: 0.004255966283380985 2023-01-22 16:33:18.383619: step: 2028/531, loss: 0.007193244062364101 2023-01-22 16:33:19.449761: step: 2032/531, loss: 0.010513965040445328 2023-01-22 16:33:20.495730: step: 2036/531, loss: 0.0065001631155610085 2023-01-22 16:33:21.562033: step: 2040/531, loss: 0.002157973125576973 2023-01-22 16:33:22.637631: step: 2044/531, loss: 0.006816069129854441 2023-01-22 16:33:23.690637: step: 2048/531, loss: 0.0003723324625752866 2023-01-22 16:33:24.756125: step: 2052/531, loss: 0.0013117629569023848 2023-01-22 16:33:25.819606: step: 2056/531, loss: 0.007126954849809408 2023-01-22 16:33:26.885717: step: 2060/531, loss: 0.01046606432646513 2023-01-22 16:33:27.942830: step: 2064/531, loss: 0.0014329601544886827 2023-01-22 16:33:29.001654: step: 2068/531, loss: 0.0025488664396107197 2023-01-22 16:33:30.053575: step: 2072/531, loss: 0.0019703141879290342 2023-01-22 16:33:31.126489: step: 2076/531, loss: 0.001737391809001565 2023-01-22 16:33:32.193402: step: 2080/531, loss: 0.002448421437293291 2023-01-22 16:33:33.231015: step: 2084/531, loss: 0.004302576184272766 2023-01-22 16:33:34.299251: step: 2088/531, loss: 0.0016425788635388017 2023-01-22 16:33:35.356220: step: 2092/531, loss: 0.004586624912917614 2023-01-22 16:33:36.417149: step: 2096/531, loss: 0.010497825220227242 2023-01-22 16:33:37.476382: step: 2100/531, loss: 0.015880558639764786 2023-01-22 16:33:38.550265: step: 2104/531, loss: 0.02986776828765869 2023-01-22 16:33:39.623137: step: 2108/531, loss: 0.004116294905543327 2023-01-22 16:33:40.692472: step: 2112/531, loss: 0.02415524609386921 2023-01-22 16:33:41.746798: step: 2116/531, loss: 0.024909794330596924 2023-01-22 16:33:42.854208: step: 2120/531, loss: 0.005354198161512613 2023-01-22 16:33:43.917549: step: 2124/531, loss: 0.0008390581351704895 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35255503144654093, 'r': 0.3185014204545455, 'f1': 0.33466417910447765}, 'combined': 0.24659465828750984, 'stategy': 1, 'epoch': 9} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33530571706290463, 'r': 0.2747812193981204, 'f1': 0.3020412610050768}, 'combined': 0.1891286400685995, 'stategy': 1, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3312489890003235, 'r': 0.35324844747283074, 'f1': 0.34189519158527426}, 'combined': 0.25192277274704417, 'stategy': 1, 'epoch': 9} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3628330128086728, 'r': 0.30067045876570814, 'f1': 0.32883978182561174}, 'combined': 0.20378803380742136, 'stategy': 1, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3127905198776758, 'r': 0.3234740670461733, 'f1': 0.31804259950248753}, 'combined': 0.23434717858078027, 'stategy': 1, 'epoch': 9} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3656459344917666, 'r': 0.2857124962379206, 'f1': 0.3207745774192476}, 'combined': 0.21279105630781772, 'stategy': 1, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.36607142857142855, 'r': 0.44565217391304346, 'f1': 0.4019607843137254}, 'combined': 0.2009803921568627, 'stategy': 1, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.27586206896551724, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35976479915433407, 'r': 0.3222892992424243, 'f1': 0.33999750249750255}, 'combined': 0.25052447552447554, 'stategy': 1, 'epoch': 6} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33494215352441215, 'r': 0.2738786923223081, 'f1': 0.3013481540150123}, 'combined': 0.18869463849538157, 'stategy': 1, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33436839323467227, 'r': 0.3546715973779541, 'f1': 0.34422086891009546}, 'combined': 0.2536364297232282, 'stategy': 1, 'epoch': 5} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36411638395148305, 'r': 0.30337559853126267, 'f1': 0.3309823303208759}, 'combined': 0.2051158103396977, 'stategy': 1, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37962962962962965, 'r': 0.44565217391304346, 'f1': 0.41000000000000003}, 'combined': 0.20500000000000002, 'stategy': 1, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.316460244648318, 'r': 0.32726913345983555, 'f1': 0.32177394278606963}, 'combined': 0.23709658942131445, 'stategy': 1, 'epoch': 5} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3637270714036404, 'r': 0.28684167419636686, 'f1': 0.32074114478321014}, 'combined': 0.21276887822252555, 'stategy': 1, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.27586206896551724, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 5} ****************************** Epoch: 10 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 16:36:11.327578: step: 4/531, loss: 0.04511779546737671 2023-01-22 16:36:12.378575: step: 8/531, loss: 0.00462351692840457 2023-01-22 16:36:13.441736: step: 12/531, loss: 0.007168647833168507 2023-01-22 16:36:14.490009: step: 16/531, loss: 0.003161326050758362 2023-01-22 16:36:15.548427: step: 20/531, loss: 0.0013177969958633184 2023-01-22 16:36:16.607642: step: 24/531, loss: 0.002089598448947072 2023-01-22 16:36:17.658136: step: 28/531, loss: 0.0008811427978798747 2023-01-22 16:36:18.706769: step: 32/531, loss: 0.013425305485725403 2023-01-22 16:36:19.769941: step: 36/531, loss: 0.004978492856025696 2023-01-22 16:36:20.817953: step: 40/531, loss: 0.0021314946934580803 2023-01-22 16:36:21.868656: step: 44/531, loss: 0.007731348741799593 2023-01-22 16:36:22.924262: step: 48/531, loss: 0.008214164525270462 2023-01-22 16:36:23.979637: step: 52/531, loss: 0.006958740763366222 2023-01-22 16:36:25.040189: step: 56/531, loss: 0.004736525937914848 2023-01-22 16:36:26.097222: step: 60/531, loss: 0.001544374506920576 2023-01-22 16:36:27.141684: step: 64/531, loss: 0.0007114785257726908 2023-01-22 16:36:28.205765: step: 68/531, loss: 0.00037321209674701095 2023-01-22 16:36:29.273282: step: 72/531, loss: 5.124994004290784e-06 2023-01-22 16:36:30.345150: step: 76/531, loss: 0.0020870547741651535 2023-01-22 16:36:31.389034: step: 80/531, loss: 0.005033628083765507 2023-01-22 16:36:32.469095: step: 84/531, loss: 0.003659485839307308 2023-01-22 16:36:33.520713: step: 88/531, loss: 0.0016210979083552957 2023-01-22 16:36:34.570350: step: 92/531, loss: 0.001539427787065506 2023-01-22 16:36:35.614079: step: 96/531, loss: 0.00696446280926466 2023-01-22 16:36:36.670646: step: 100/531, loss: 0.007940429262816906 2023-01-22 16:36:37.735467: step: 104/531, loss: 0.002993629314005375 2023-01-22 16:36:38.776005: step: 108/531, loss: 0.0011190499644726515 2023-01-22 16:36:39.832837: step: 112/531, loss: 0.0016277168178930879 2023-01-22 16:36:40.885380: step: 116/531, loss: 0.009492279961705208 2023-01-22 16:36:41.945423: step: 120/531, loss: 0.005150929559022188 2023-01-22 16:36:43.013998: step: 124/531, loss: 0.02994631417095661 2023-01-22 16:36:44.070592: step: 128/531, loss: 0.00018145870126318187 2023-01-22 16:36:45.134241: step: 132/531, loss: 0.006231280975043774 2023-01-22 16:36:46.175712: step: 136/531, loss: 0.006994050461798906 2023-01-22 16:36:47.241840: step: 140/531, loss: 0.0008030076860450208 2023-01-22 16:36:48.301174: step: 144/531, loss: 0.01632116734981537 2023-01-22 16:36:49.362202: step: 148/531, loss: 0.006124191451817751 2023-01-22 16:36:50.418846: step: 152/531, loss: 0.001828584005124867 2023-01-22 16:36:51.468364: step: 156/531, loss: 0.0076834834180772305 2023-01-22 16:36:52.524716: step: 160/531, loss: 0.0002667790395207703 2023-01-22 16:36:53.599897: step: 164/531, loss: 0.0019487914396449924 2023-01-22 16:36:54.650550: step: 168/531, loss: 0.0012811741326004267 2023-01-22 16:36:55.700571: step: 172/531, loss: 0.006654617376625538 2023-01-22 16:36:56.789208: step: 176/531, loss: 5.1039300160482526e-05 2023-01-22 16:36:57.848290: step: 180/531, loss: 0.006273975595831871 2023-01-22 16:36:58.901833: step: 184/531, loss: 0.004562840797007084 2023-01-22 16:36:59.979569: step: 188/531, loss: 0.0028800014406442642 2023-01-22 16:37:01.072172: step: 192/531, loss: 0.008555044420063496 2023-01-22 16:37:02.140568: step: 196/531, loss: 0.021640609949827194 2023-01-22 16:37:03.208110: step: 200/531, loss: 0.01305987499654293 2023-01-22 16:37:04.271262: step: 204/531, loss: 0.004544206894934177 2023-01-22 16:37:05.327600: step: 208/531, loss: 0.0005355750909075141 2023-01-22 16:37:06.400675: step: 212/531, loss: 0.003395135747268796 2023-01-22 16:37:07.467642: step: 216/531, loss: 0.01401414256542921 2023-01-22 16:37:08.532715: step: 220/531, loss: 0.0013866559602320194 2023-01-22 16:37:09.607474: step: 224/531, loss: 0.0034855192061513662 2023-01-22 16:37:10.685692: step: 228/531, loss: 0.00919613242149353 2023-01-22 16:37:11.732156: step: 232/531, loss: 0.0018149090465158224 2023-01-22 16:37:12.802903: step: 236/531, loss: 0.005378360860049725 2023-01-22 16:37:13.856050: step: 240/531, loss: 0.010230228304862976 2023-01-22 16:37:14.922528: step: 244/531, loss: 0.006599671207368374 2023-01-22 16:37:15.966577: step: 248/531, loss: 0.0020888138096779585 2023-01-22 16:37:17.014072: step: 252/531, loss: 0.00295943277888 2023-01-22 16:37:18.082491: step: 256/531, loss: 0.014759765937924385 2023-01-22 16:37:19.149694: step: 260/531, loss: 0.0006802362040616572 2023-01-22 16:37:20.215839: step: 264/531, loss: 0.010016325861215591 2023-01-22 16:37:21.277536: step: 268/531, loss: 0.002727009356021881 2023-01-22 16:37:22.343537: step: 272/531, loss: 0.0032889549620449543 2023-01-22 16:37:23.407699: step: 276/531, loss: 0.009291118010878563 2023-01-22 16:37:24.467479: step: 280/531, loss: 0.00533568300306797 2023-01-22 16:37:25.522274: step: 284/531, loss: 0.005063887219876051 2023-01-22 16:37:26.594806: step: 288/531, loss: 0.018546774983406067 2023-01-22 16:37:27.666519: step: 292/531, loss: 0.00047081513912416995 2023-01-22 16:37:28.737319: step: 296/531, loss: 0.003874396439641714 2023-01-22 16:37:29.803979: step: 300/531, loss: 0.006063390057533979 2023-01-22 16:37:30.876050: step: 304/531, loss: 0.0004326277121435851 2023-01-22 16:37:31.943766: step: 308/531, loss: 0.00873729307204485 2023-01-22 16:37:32.999391: step: 312/531, loss: 0.0025322730652987957 2023-01-22 16:37:34.042915: step: 316/531, loss: 0.0004271742363926023 2023-01-22 16:37:35.107599: step: 320/531, loss: 0.0015197329921647906 2023-01-22 16:37:36.170873: step: 324/531, loss: 0.00015667501429561526 2023-01-22 16:37:37.242895: step: 328/531, loss: 0.002340910490602255 2023-01-22 16:37:38.307997: step: 332/531, loss: 0.0005036396323703229 2023-01-22 16:37:39.371463: step: 336/531, loss: 0.00492580933496356 2023-01-22 16:37:40.434575: step: 340/531, loss: 0.0008552538929507136 2023-01-22 16:37:41.506809: step: 344/531, loss: 0.000658549543004483 2023-01-22 16:37:42.601381: step: 348/531, loss: 0.00240096403285861 2023-01-22 16:37:43.650856: step: 352/531, loss: 0.004501339979469776 2023-01-22 16:37:44.702587: step: 356/531, loss: 0.006458848714828491 2023-01-22 16:37:45.754250: step: 360/531, loss: 0.0025434524286538363 2023-01-22 16:37:46.837868: step: 364/531, loss: 0.005225899629294872 2023-01-22 16:37:47.904559: step: 368/531, loss: 0.0015345087740570307 2023-01-22 16:37:48.965398: step: 372/531, loss: 4.23461060563568e-05 2023-01-22 16:37:50.030147: step: 376/531, loss: 0.004633399657905102 2023-01-22 16:37:51.089091: step: 380/531, loss: 0.00588305713608861 2023-01-22 16:37:52.157586: step: 384/531, loss: 0.0019457533489912748 2023-01-22 16:37:53.221862: step: 388/531, loss: 0.011172914877533913 2023-01-22 16:37:54.288567: step: 392/531, loss: 0.0016959008062258363 2023-01-22 16:37:55.351290: step: 396/531, loss: 0.009033746086061 2023-01-22 16:37:56.431247: step: 400/531, loss: 0.0021470552310347557 2023-01-22 16:37:57.499805: step: 404/531, loss: 0.008604781702160835 2023-01-22 16:37:58.560829: step: 408/531, loss: 0.00966779887676239 2023-01-22 16:37:59.640155: step: 412/531, loss: 0.004850557539612055 2023-01-22 16:38:00.711072: step: 416/531, loss: 0.0023350140545517206 2023-01-22 16:38:01.764456: step: 420/531, loss: 0.0017042026156559587 2023-01-22 16:38:02.819408: step: 424/531, loss: 0.015522753819823265 2023-01-22 16:38:03.901031: step: 428/531, loss: 6.732891779392958e-05 2023-01-22 16:38:04.968759: step: 432/531, loss: 0.002678492572158575 2023-01-22 16:38:06.031028: step: 436/531, loss: 0.0007440093904733658 2023-01-22 16:38:07.096662: step: 440/531, loss: 0.003997982479631901 2023-01-22 16:38:08.163347: step: 444/531, loss: 0.006845226977020502 2023-01-22 16:38:09.225277: step: 448/531, loss: 0.0161073449999094 2023-01-22 16:38:10.272445: step: 452/531, loss: 0.00021637490135617554 2023-01-22 16:38:11.323002: step: 456/531, loss: 0.0006897651473991573 2023-01-22 16:38:12.370779: step: 460/531, loss: 0.0058429124765098095 2023-01-22 16:38:13.432237: step: 464/531, loss: 0.0022120114881545305 2023-01-22 16:38:14.489442: step: 468/531, loss: 0.005331730004400015 2023-01-22 16:38:15.568267: step: 472/531, loss: 0.01432296447455883 2023-01-22 16:38:16.644851: step: 476/531, loss: 0.0017438482027500868 2023-01-22 16:38:17.710365: step: 480/531, loss: 0.0020928301382809877 2023-01-22 16:38:18.779397: step: 484/531, loss: 0.008921593427658081 2023-01-22 16:38:19.851463: step: 488/531, loss: 0.002374213421717286 2023-01-22 16:38:20.935567: step: 492/531, loss: 0.009918469935655594 2023-01-22 16:38:21.995855: step: 496/531, loss: 0.004704870283603668 2023-01-22 16:38:23.064516: step: 500/531, loss: 0.02259114384651184 2023-01-22 16:38:24.146711: step: 504/531, loss: 0.009441405534744263 2023-01-22 16:38:25.219801: step: 508/531, loss: 0.004769668448716402 2023-01-22 16:38:26.286359: step: 512/531, loss: 0.002169100334867835 2023-01-22 16:38:27.358766: step: 516/531, loss: 0.008042097091674805 2023-01-22 16:38:28.433619: step: 520/531, loss: 5.85186826356221e-05 2023-01-22 16:38:29.498058: step: 524/531, loss: 0.00021469821513164788 2023-01-22 16:38:30.568106: step: 528/531, loss: 0.001111433026380837 2023-01-22 16:38:31.620376: step: 532/531, loss: 0.002982824109494686 2023-01-22 16:38:32.671184: step: 536/531, loss: 0.00036540350993163884 2023-01-22 16:38:33.725162: step: 540/531, loss: 0.0024884394370019436 2023-01-22 16:38:34.781826: step: 544/531, loss: 0.0012482332531362772 2023-01-22 16:38:35.855444: step: 548/531, loss: 0.013080939650535583 2023-01-22 16:38:36.912188: step: 552/531, loss: 0.005708321928977966 2023-01-22 16:38:37.997764: step: 556/531, loss: 0.005130017176270485 2023-01-22 16:38:39.082981: step: 560/531, loss: 0.00036316082696430385 2023-01-22 16:38:40.158383: step: 564/531, loss: 0.0035906070843338966 2023-01-22 16:38:41.232557: step: 568/531, loss: 0.009966195560991764 2023-01-22 16:38:42.335946: step: 572/531, loss: 0.0015761046670377254 2023-01-22 16:38:43.386091: step: 576/531, loss: 0.0016983483219519258 2023-01-22 16:38:44.442994: step: 580/531, loss: 0.005861691664904356 2023-01-22 16:38:45.523779: step: 584/531, loss: 0.0020601532887667418 2023-01-22 16:38:46.585167: step: 588/531, loss: 0.005512963514775038 2023-01-22 16:38:47.653374: step: 592/531, loss: 0.002283709356561303 2023-01-22 16:38:48.726277: step: 596/531, loss: 0.010348460637032986 2023-01-22 16:38:49.801133: step: 600/531, loss: 0.002217413391917944 2023-01-22 16:38:50.874358: step: 604/531, loss: 0.017472414299845695 2023-01-22 16:38:51.933646: step: 608/531, loss: 0.0005667863879352808 2023-01-22 16:38:52.998717: step: 612/531, loss: 0.005727748386561871 2023-01-22 16:38:54.042583: step: 616/531, loss: 0.0021054695826023817 2023-01-22 16:38:55.094213: step: 620/531, loss: 0.005775059573352337 2023-01-22 16:38:56.146641: step: 624/531, loss: 0.0025534944143146276 2023-01-22 16:38:57.215810: step: 628/531, loss: 0.029104895889759064 2023-01-22 16:38:58.281073: step: 632/531, loss: 0.0024954474065452814 2023-01-22 16:38:59.355708: step: 636/531, loss: 0.008792751468718052 2023-01-22 16:39:00.432614: step: 640/531, loss: 0.007368545047938824 2023-01-22 16:39:01.484830: step: 644/531, loss: 0.004871091805398464 2023-01-22 16:39:02.564951: step: 648/531, loss: 0.006081071682274342 2023-01-22 16:39:03.641401: step: 652/531, loss: 0.0033418633975088596 2023-01-22 16:39:04.708465: step: 656/531, loss: 0.00267624668776989 2023-01-22 16:39:05.754998: step: 660/531, loss: 0.0 2023-01-22 16:39:06.815983: step: 664/531, loss: 0.006120113655924797 2023-01-22 16:39:07.879973: step: 668/531, loss: 0.008462866768240929 2023-01-22 16:39:08.931248: step: 672/531, loss: 0.0036988535430282354 2023-01-22 16:39:10.004776: step: 676/531, loss: 0.01724778302013874 2023-01-22 16:39:11.062668: step: 680/531, loss: 0.0038293784018605947 2023-01-22 16:39:12.120989: step: 684/531, loss: 0.00032143128919415176 2023-01-22 16:39:13.205080: step: 688/531, loss: 0.0030675882007926702 2023-01-22 16:39:14.248582: step: 692/531, loss: 0.006303122267127037 2023-01-22 16:39:15.304414: step: 696/531, loss: 0.00011737759632524103 2023-01-22 16:39:16.353406: step: 700/531, loss: 0.008816881105303764 2023-01-22 16:39:17.413076: step: 704/531, loss: 0.0017857117345556617 2023-01-22 16:39:18.480827: step: 708/531, loss: 0.0015491386875510216 2023-01-22 16:39:19.543383: step: 712/531, loss: 5.8598543546395376e-05 2023-01-22 16:39:20.612284: step: 716/531, loss: 0.002664199797436595 2023-01-22 16:39:21.661443: step: 720/531, loss: 0.0030972673557698727 2023-01-22 16:39:22.725050: step: 724/531, loss: 0.0014562207506969571 2023-01-22 16:39:23.793424: step: 728/531, loss: 0.009958475828170776 2023-01-22 16:39:24.861270: step: 732/531, loss: 0.005514010787010193 2023-01-22 16:39:25.929355: step: 736/531, loss: 0.00345690012909472 2023-01-22 16:39:26.995988: step: 740/531, loss: 0.0007793250260874629 2023-01-22 16:39:28.053535: step: 744/531, loss: 0.00010515483882045373 2023-01-22 16:39:29.125615: step: 748/531, loss: 0.003168870694935322 2023-01-22 16:39:30.186245: step: 752/531, loss: 0.005417739972472191 2023-01-22 16:39:31.240465: step: 756/531, loss: 1.2698379805442528e-06 2023-01-22 16:39:32.311766: step: 760/531, loss: 0.0014340360648930073 2023-01-22 16:39:33.386181: step: 764/531, loss: 0.05637550354003906 2023-01-22 16:39:34.464426: step: 768/531, loss: 0.003015951719135046 2023-01-22 16:39:35.525673: step: 772/531, loss: 0.004437098279595375 2023-01-22 16:39:36.596650: step: 776/531, loss: 0.00028024762286804616 2023-01-22 16:39:37.650566: step: 780/531, loss: 0.00016136518388520926 2023-01-22 16:39:38.706135: step: 784/531, loss: 0.025463992729783058 2023-01-22 16:39:39.782196: step: 788/531, loss: 0.003796741832047701 2023-01-22 16:39:40.835135: step: 792/531, loss: 0.0009928278159350157 2023-01-22 16:39:41.930318: step: 796/531, loss: 0.0020523546263575554 2023-01-22 16:39:43.001783: step: 800/531, loss: 0.0010502723744139075 2023-01-22 16:39:44.080696: step: 804/531, loss: 0.0008379457285627723 2023-01-22 16:39:45.150167: step: 808/531, loss: 0.058690670877695084 2023-01-22 16:39:46.196520: step: 812/531, loss: 0.0033038626424968243 2023-01-22 16:39:47.249724: step: 816/531, loss: 0.000611974741332233 2023-01-22 16:39:48.302288: step: 820/531, loss: 0.008932569995522499 2023-01-22 16:39:49.369753: step: 824/531, loss: 0.0007279608398675919 2023-01-22 16:39:50.424304: step: 828/531, loss: 3.9632912375964224e-05 2023-01-22 16:39:51.475758: step: 832/531, loss: 0.005446029826998711 2023-01-22 16:39:52.538365: step: 836/531, loss: 0.007040816824883223 2023-01-22 16:39:53.591309: step: 840/531, loss: 0.002783367410302162 2023-01-22 16:39:54.669635: step: 844/531, loss: 0.010686303488910198 2023-01-22 16:39:55.729609: step: 848/531, loss: 0.0016936537576839328 2023-01-22 16:39:56.803261: step: 852/531, loss: 0.005468764342367649 2023-01-22 16:39:57.856920: step: 856/531, loss: 0.0017886536661535501 2023-01-22 16:39:58.914204: step: 860/531, loss: 0.018694577738642693 2023-01-22 16:39:59.970706: step: 864/531, loss: 0.0005616036360152066 2023-01-22 16:40:01.021985: step: 868/531, loss: 0.0026409996207803488 2023-01-22 16:40:02.090268: step: 872/531, loss: 0.01968526467680931 2023-01-22 16:40:03.168050: step: 876/531, loss: 0.0026152569334954023 2023-01-22 16:40:04.220421: step: 880/531, loss: 0.008349597454071045 2023-01-22 16:40:05.298433: step: 884/531, loss: 0.004530239850282669 2023-01-22 16:40:06.353528: step: 888/531, loss: 0.002871243515983224 2023-01-22 16:40:07.422287: step: 892/531, loss: 0.00841600727289915 2023-01-22 16:40:08.484661: step: 896/531, loss: 0.0026405765675008297 2023-01-22 16:40:09.549143: step: 900/531, loss: 0.009743747301399708 2023-01-22 16:40:10.590735: step: 904/531, loss: 0.004232747945934534 2023-01-22 16:40:11.645388: step: 908/531, loss: 0.02207007445394993 2023-01-22 16:40:12.716763: step: 912/531, loss: 0.0035033232998102903 2023-01-22 16:40:13.767839: step: 916/531, loss: 0.005719034466892481 2023-01-22 16:40:14.815595: step: 920/531, loss: 0.001169433118775487 2023-01-22 16:40:15.851953: step: 924/531, loss: 0.009069438092410564 2023-01-22 16:40:16.905441: step: 928/531, loss: 0.008030245080590248 2023-01-22 16:40:17.961082: step: 932/531, loss: 0.0021121983882039785 2023-01-22 16:40:19.020811: step: 936/531, loss: 0.004578235559165478 2023-01-22 16:40:20.069377: step: 940/531, loss: 0.0004743619356304407 2023-01-22 16:40:21.123261: step: 944/531, loss: 0.004270483274012804 2023-01-22 16:40:22.189871: step: 948/531, loss: 0.01703280583024025 2023-01-22 16:40:23.246327: step: 952/531, loss: 0.006143835838884115 2023-01-22 16:40:24.314510: step: 956/531, loss: 0.00525349285453558 2023-01-22 16:40:25.355108: step: 960/531, loss: 0.004162059165537357 2023-01-22 16:40:26.421691: step: 964/531, loss: 0.003167388029396534 2023-01-22 16:40:27.491384: step: 968/531, loss: 0.047397419810295105 2023-01-22 16:40:28.553538: step: 972/531, loss: 0.004545622505247593 2023-01-22 16:40:29.616670: step: 976/531, loss: 0.004120222758501768 2023-01-22 16:40:30.661911: step: 980/531, loss: 0.0027510409709066153 2023-01-22 16:40:31.712769: step: 984/531, loss: 0.012611347250640392 2023-01-22 16:40:32.776929: step: 988/531, loss: 0.00018220402125734836 2023-01-22 16:40:33.835823: step: 992/531, loss: 0.005045078694820404 2023-01-22 16:40:34.900093: step: 996/531, loss: 0.00630967877805233 2023-01-22 16:40:35.965444: step: 1000/531, loss: 0.008546345867216587 2023-01-22 16:40:37.033909: step: 1004/531, loss: 7.393538544420153e-05 2023-01-22 16:40:38.116734: step: 1008/531, loss: 0.010151691734790802 2023-01-22 16:40:39.181461: step: 1012/531, loss: 0.0058904788456857204 2023-01-22 16:40:40.228008: step: 1016/531, loss: 0.0030716576147824526 2023-01-22 16:40:41.280572: step: 1020/531, loss: 0.00032007074332796037 2023-01-22 16:40:42.350663: step: 1024/531, loss: 0.0026038538198918104 2023-01-22 16:40:43.438294: step: 1028/531, loss: 0.0004469689156394452 2023-01-22 16:40:44.515165: step: 1032/531, loss: 0.005721640307456255 2023-01-22 16:40:45.573301: step: 1036/531, loss: 0.011646556667983532 2023-01-22 16:40:46.629646: step: 1040/531, loss: 0.0005736255552619696 2023-01-22 16:40:47.679199: step: 1044/531, loss: 0.00048291642451658845 2023-01-22 16:40:48.723731: step: 1048/531, loss: 0.006947695277631283 2023-01-22 16:40:49.777362: step: 1052/531, loss: 0.002741861157119274 2023-01-22 16:40:50.834148: step: 1056/531, loss: 0.002739276271313429 2023-01-22 16:40:51.886644: step: 1060/531, loss: 0.003842536825686693 2023-01-22 16:40:52.929740: step: 1064/531, loss: 0.009659718722105026 2023-01-22 16:40:53.983566: step: 1068/531, loss: 0.00011684564378811046 2023-01-22 16:40:55.033452: step: 1072/531, loss: 0.0014100978150963783 2023-01-22 16:40:56.096399: step: 1076/531, loss: 0.014100932516157627 2023-01-22 16:40:57.147069: step: 1080/531, loss: 0.018242180347442627 2023-01-22 16:40:58.222010: step: 1084/531, loss: 0.008263876661658287 2023-01-22 16:40:59.280907: step: 1088/531, loss: 0.004137961193919182 2023-01-22 16:41:00.332178: step: 1092/531, loss: 0.004951735492795706 2023-01-22 16:41:01.387229: step: 1096/531, loss: 0.008860207162797451 2023-01-22 16:41:02.438676: step: 1100/531, loss: 0.0010698740370571613 2023-01-22 16:41:03.485462: step: 1104/531, loss: 0.005388192366808653 2023-01-22 16:41:04.534598: step: 1108/531, loss: 0.00560379633679986 2023-01-22 16:41:05.577582: step: 1112/531, loss: 0.0033469530753791332 2023-01-22 16:41:06.635504: step: 1116/531, loss: 0.007595570757985115 2023-01-22 16:41:07.682620: step: 1120/531, loss: 0.00023423753737006336 2023-01-22 16:41:08.743477: step: 1124/531, loss: 0.004273217637091875 2023-01-22 16:41:09.804364: step: 1128/531, loss: 0.002667166292667389 2023-01-22 16:41:10.872593: step: 1132/531, loss: 0.0007323683821596205 2023-01-22 16:41:11.940004: step: 1136/531, loss: 0.0010395990684628487 2023-01-22 16:41:13.001794: step: 1140/531, loss: 0.0021878022234886885 2023-01-22 16:41:14.050979: step: 1144/531, loss: 0.0020483252592384815 2023-01-22 16:41:15.093625: step: 1148/531, loss: 0.0047643911093473434 2023-01-22 16:41:16.153368: step: 1152/531, loss: 0.0013432919513434172 2023-01-22 16:41:17.228870: step: 1156/531, loss: 6.042409222573042e-05 2023-01-22 16:41:18.281413: step: 1160/531, loss: 0.033806730061769485 2023-01-22 16:41:19.340894: step: 1164/531, loss: 0.007924630306661129 2023-01-22 16:41:20.377700: step: 1168/531, loss: 0.005530583672225475 2023-01-22 16:41:21.439440: step: 1172/531, loss: 0.004673686809837818 2023-01-22 16:41:22.507613: step: 1176/531, loss: 0.01001940667629242 2023-01-22 16:41:23.568657: step: 1180/531, loss: 0.004419660195708275 2023-01-22 16:41:24.620369: step: 1184/531, loss: 0.004453849513083696 2023-01-22 16:41:25.672775: step: 1188/531, loss: 0.00010570708400337026 2023-01-22 16:41:26.746146: step: 1192/531, loss: 0.015451865270733833 2023-01-22 16:41:27.806244: step: 1196/531, loss: 0.006219562143087387 2023-01-22 16:41:28.854481: step: 1200/531, loss: 0.006642982363700867 2023-01-22 16:41:29.945627: step: 1204/531, loss: 0.03245781734585762 2023-01-22 16:41:31.006577: step: 1208/531, loss: 0.012482921592891216 2023-01-22 16:41:32.073506: step: 1212/531, loss: 0.0158814899623394 2023-01-22 16:41:33.128786: step: 1216/531, loss: 0.00147052644751966 2023-01-22 16:41:34.171159: step: 1220/531, loss: 0.011913049034774303 2023-01-22 16:41:35.251392: step: 1224/531, loss: 0.0028967829421162605 2023-01-22 16:41:36.311631: step: 1228/531, loss: 0.016320789232850075 2023-01-22 16:41:37.385199: step: 1232/531, loss: 0.0002468716411385685 2023-01-22 16:41:38.460009: step: 1236/531, loss: 0.013635863550007343 2023-01-22 16:41:39.514139: step: 1240/531, loss: 0.00041916221380233765 2023-01-22 16:41:40.590402: step: 1244/531, loss: 0.0034596596378833055 2023-01-22 16:41:41.639621: step: 1248/531, loss: 0.004369057714939117 2023-01-22 16:41:42.693950: step: 1252/531, loss: 1.7405774997314438e-05 2023-01-22 16:41:43.763770: step: 1256/531, loss: 0.01018279604613781 2023-01-22 16:41:44.824719: step: 1260/531, loss: 0.0038037945050746202 2023-01-22 16:41:45.894734: step: 1264/531, loss: 0.022700605913996696 2023-01-22 16:41:46.950502: step: 1268/531, loss: 0.003217222634702921 2023-01-22 16:41:47.996448: step: 1272/531, loss: 0.0008606911869719625 2023-01-22 16:41:49.068043: step: 1276/531, loss: 0.0039140949957072735 2023-01-22 16:41:50.145889: step: 1280/531, loss: 0.031579140573740005 2023-01-22 16:41:51.189220: step: 1284/531, loss: 9.058567957254127e-05 2023-01-22 16:41:52.256923: step: 1288/531, loss: 0.007591401692479849 2023-01-22 16:41:53.314150: step: 1292/531, loss: 0.007560128811746836 2023-01-22 16:41:54.366891: step: 1296/531, loss: 0.0005940622068010271 2023-01-22 16:41:55.427530: step: 1300/531, loss: 0.006782899145036936 2023-01-22 16:41:56.485319: step: 1304/531, loss: 0.0010394651908427477 2023-01-22 16:41:57.580122: step: 1308/531, loss: 0.013745302334427834 2023-01-22 16:41:58.644254: step: 1312/531, loss: 0.004006646573543549 2023-01-22 16:41:59.712791: step: 1316/531, loss: 0.012290666811168194 2023-01-22 16:42:00.756554: step: 1320/531, loss: 0.002449002582579851 2023-01-22 16:42:01.795306: step: 1324/531, loss: 0.0008735805167816579 2023-01-22 16:42:02.857368: step: 1328/531, loss: 0.0008889613091014326 2023-01-22 16:42:03.920129: step: 1332/531, loss: 0.0036207358352839947 2023-01-22 16:42:04.957140: step: 1336/531, loss: 0.00499630207195878 2023-01-22 16:42:06.008505: step: 1340/531, loss: 0.0027833532076328993 2023-01-22 16:42:07.086047: step: 1344/531, loss: 0.0018036727560684085 2023-01-22 16:42:08.146886: step: 1348/531, loss: 0.0016338448040187359 2023-01-22 16:42:09.205027: step: 1352/531, loss: 0.005640815943479538 2023-01-22 16:42:10.268198: step: 1356/531, loss: 0.002958085620775819 2023-01-22 16:42:11.330286: step: 1360/531, loss: 0.025196939706802368 2023-01-22 16:42:12.405582: step: 1364/531, loss: 0.0037646417040377855 2023-01-22 16:42:13.471487: step: 1368/531, loss: 0.004507889039814472 2023-01-22 16:42:14.531873: step: 1372/531, loss: 0.00010726918844738975 2023-01-22 16:42:15.594891: step: 1376/531, loss: 0.0027078590355813503 2023-01-22 16:42:16.643992: step: 1380/531, loss: 0.0038744057528674603 2023-01-22 16:42:17.688399: step: 1384/531, loss: 0.0016367561183869839 2023-01-22 16:42:18.761695: step: 1388/531, loss: 0.0031484398059546947 2023-01-22 16:42:19.823459: step: 1392/531, loss: 0.0027061980217695236 2023-01-22 16:42:20.880968: step: 1396/531, loss: 0.007732085883617401 2023-01-22 16:42:21.941414: step: 1400/531, loss: 0.003513761330395937 2023-01-22 16:42:22.992188: step: 1404/531, loss: 0.03504456579685211 2023-01-22 16:42:24.056984: step: 1408/531, loss: 0.00013752601807937026 2023-01-22 16:42:25.116494: step: 1412/531, loss: 0.007317660842090845 2023-01-22 16:42:26.175086: step: 1416/531, loss: 0.0006424398743547499 2023-01-22 16:42:27.249369: step: 1420/531, loss: 0.0016001794720068574 2023-01-22 16:42:28.307654: step: 1424/531, loss: 0.1215495616197586 2023-01-22 16:42:29.359810: step: 1428/531, loss: 0.014077279716730118 2023-01-22 16:42:30.406633: step: 1432/531, loss: 0.0013367494102567434 2023-01-22 16:42:31.475958: step: 1436/531, loss: 0.009776687249541283 2023-01-22 16:42:32.530321: step: 1440/531, loss: 0.0032634963281452656 2023-01-22 16:42:33.588001: step: 1444/531, loss: 0.005308025516569614 2023-01-22 16:42:34.642517: step: 1448/531, loss: 0.050706665962934494 2023-01-22 16:42:35.690878: step: 1452/531, loss: 0.004951969254761934 2023-01-22 16:42:36.747817: step: 1456/531, loss: 0.002046182518824935 2023-01-22 16:42:37.803774: step: 1460/531, loss: 0.0020493913907557726 2023-01-22 16:42:38.850400: step: 1464/531, loss: 0.00031897457665763795 2023-01-22 16:42:39.912804: step: 1468/531, loss: 0.00032071577152237296 2023-01-22 16:42:40.983412: step: 1472/531, loss: 0.00449588056653738 2023-01-22 16:42:42.037023: step: 1476/531, loss: 0.015204568393528461 2023-01-22 16:42:43.083098: step: 1480/531, loss: 0.018791966140270233 2023-01-22 16:42:44.130904: step: 1484/531, loss: 0.0008019257802516222 2023-01-22 16:42:45.202594: step: 1488/531, loss: 0.004643011372536421 2023-01-22 16:42:46.259405: step: 1492/531, loss: 0.00012099656305508688 2023-01-22 16:42:47.305067: step: 1496/531, loss: 0.0012349931057542562 2023-01-22 16:42:48.357836: step: 1500/531, loss: 0.0036675555165857077 2023-01-22 16:42:49.407395: step: 1504/531, loss: 0.006605407223105431 2023-01-22 16:42:50.470038: step: 1508/531, loss: 0.007399989757686853 2023-01-22 16:42:51.526324: step: 1512/531, loss: 0.0051831589080393314 2023-01-22 16:42:52.601925: step: 1516/531, loss: 0.005010760389268398 2023-01-22 16:42:53.686268: step: 1520/531, loss: 0.0036288737319409847 2023-01-22 16:42:54.732901: step: 1524/531, loss: 0.008250990882515907 2023-01-22 16:42:55.785708: step: 1528/531, loss: 0.004854382947087288 2023-01-22 16:42:56.837507: step: 1532/531, loss: 0.0020668278448283672 2023-01-22 16:42:57.909471: step: 1536/531, loss: 0.00876717921346426 2023-01-22 16:42:58.961770: step: 1540/531, loss: 0.009252313524484634 2023-01-22 16:43:00.025016: step: 1544/531, loss: 0.0042928964830935 2023-01-22 16:43:01.089371: step: 1548/531, loss: 0.006253547966480255 2023-01-22 16:43:02.144238: step: 1552/531, loss: 0.005105405114591122 2023-01-22 16:43:03.214719: step: 1556/531, loss: 0.00194579700473696 2023-01-22 16:43:04.275595: step: 1560/531, loss: 0.013849948532879353 2023-01-22 16:43:05.341291: step: 1564/531, loss: 0.005193711258471012 2023-01-22 16:43:06.393568: step: 1568/531, loss: 0.004304729402065277 2023-01-22 16:43:07.463401: step: 1572/531, loss: 0.002057864563539624 2023-01-22 16:43:08.537276: step: 1576/531, loss: 0.004682786297053099 2023-01-22 16:43:09.594883: step: 1580/531, loss: 0.009445921517908573 2023-01-22 16:43:10.655917: step: 1584/531, loss: 0.0007583817932754755 2023-01-22 16:43:11.709540: step: 1588/531, loss: 0.0024650886189192533 2023-01-22 16:43:12.761392: step: 1592/531, loss: 0.004287093412131071 2023-01-22 16:43:13.823935: step: 1596/531, loss: 0.004548561293631792 2023-01-22 16:43:14.892904: step: 1600/531, loss: 0.00031103467335924506 2023-01-22 16:43:15.958604: step: 1604/531, loss: 0.004302352201193571 2023-01-22 16:43:17.008844: step: 1608/531, loss: 0.014272102154791355 2023-01-22 16:43:18.064941: step: 1612/531, loss: 0.0028163467068225145 2023-01-22 16:43:19.131621: step: 1616/531, loss: 0.005952394567430019 2023-01-22 16:43:20.185213: step: 1620/531, loss: 0.004385761916637421 2023-01-22 16:43:21.263885: step: 1624/531, loss: 0.030985429883003235 2023-01-22 16:43:22.328942: step: 1628/531, loss: 0.008499976247549057 2023-01-22 16:43:23.390463: step: 1632/531, loss: 0.006252588704228401 2023-01-22 16:43:24.457138: step: 1636/531, loss: 0.0013014872092753649 2023-01-22 16:43:25.530846: step: 1640/531, loss: 0.009132741019129753 2023-01-22 16:43:26.623031: step: 1644/531, loss: 8.013339538592845e-05 2023-01-22 16:43:27.686933: step: 1648/531, loss: 0.002650429494678974 2023-01-22 16:43:28.765352: step: 1652/531, loss: 0.0024341337848454714 2023-01-22 16:43:29.812943: step: 1656/531, loss: 0.003223975421860814 2023-01-22 16:43:30.866083: step: 1660/531, loss: 0.0006092719850130379 2023-01-22 16:43:31.917639: step: 1664/531, loss: 0.0014739006292074919 2023-01-22 16:43:32.966353: step: 1668/531, loss: 0.0009018494747579098 2023-01-22 16:43:34.018316: step: 1672/531, loss: 0.005439567845314741 2023-01-22 16:43:35.088731: step: 1676/531, loss: 0.003644815878942609 2023-01-22 16:43:36.157164: step: 1680/531, loss: 0.0072359307669103146 2023-01-22 16:43:37.222521: step: 1684/531, loss: 0.0013825768837705255 2023-01-22 16:43:38.261715: step: 1688/531, loss: 0.0012426445027813315 2023-01-22 16:43:39.329540: step: 1692/531, loss: 0.025410452857613564 2023-01-22 16:43:40.383096: step: 1696/531, loss: 0.011287734843790531 2023-01-22 16:43:41.438635: step: 1700/531, loss: 0.007642117794603109 2023-01-22 16:43:42.494688: step: 1704/531, loss: 0.0025312243960797787 2023-01-22 16:43:43.563572: step: 1708/531, loss: 0.009996318258345127 2023-01-22 16:43:44.608959: step: 1712/531, loss: 0.0017377582844346762 2023-01-22 16:43:45.688347: step: 1716/531, loss: 0.008395014330744743 2023-01-22 16:43:46.739259: step: 1720/531, loss: 3.8053538446547464e-05 2023-01-22 16:43:47.830261: step: 1724/531, loss: 0.006723566446453333 2023-01-22 16:43:48.886980: step: 1728/531, loss: 0.004380987025797367 2023-01-22 16:43:49.949307: step: 1732/531, loss: 0.0042249285615980625 2023-01-22 16:43:51.011973: step: 1736/531, loss: 0.002711418317630887 2023-01-22 16:43:52.081519: step: 1740/531, loss: 0.00038981231045909226 2023-01-22 16:43:53.129784: step: 1744/531, loss: 0.003361661918461323 2023-01-22 16:43:54.208173: step: 1748/531, loss: 0.002664417028427124 2023-01-22 16:43:55.265736: step: 1752/531, loss: 0.0004814085550606251 2023-01-22 16:43:56.327856: step: 1756/531, loss: 0.005907909013330936 2023-01-22 16:43:57.402459: step: 1760/531, loss: 0.00395190017297864 2023-01-22 16:43:58.464086: step: 1764/531, loss: 0.0037833317182958126 2023-01-22 16:43:59.527335: step: 1768/531, loss: 0.01780518889427185 2023-01-22 16:44:00.592228: step: 1772/531, loss: 0.0016816816059872508 2023-01-22 16:44:01.653391: step: 1776/531, loss: 0.0006575814331881702 2023-01-22 16:44:02.695723: step: 1780/531, loss: 9.967300229618559e-07 2023-01-22 16:44:03.755054: step: 1784/531, loss: 0.00028070222469978034 2023-01-22 16:44:04.851064: step: 1788/531, loss: 0.02735465206205845 2023-01-22 16:44:05.935381: step: 1792/531, loss: 0.0011749131372198462 2023-01-22 16:44:06.998868: step: 1796/531, loss: 0.012724286876618862 2023-01-22 16:44:08.059193: step: 1800/531, loss: 0.0013588605215772986 2023-01-22 16:44:09.132565: step: 1804/531, loss: 0.007292529102414846 2023-01-22 16:44:10.200354: step: 1808/531, loss: 0.004301643464714289 2023-01-22 16:44:11.260512: step: 1812/531, loss: 0.006009149830788374 2023-01-22 16:44:12.351718: step: 1816/531, loss: 0.0023827701807022095 2023-01-22 16:44:13.406409: step: 1820/531, loss: 0.002875205362215638 2023-01-22 16:44:14.464220: step: 1824/531, loss: 0.01805732026696205 2023-01-22 16:44:15.513384: step: 1828/531, loss: 0.0004128643777221441 2023-01-22 16:44:16.572807: step: 1832/531, loss: 0.0007312208763323724 2023-01-22 16:44:17.618978: step: 1836/531, loss: 0.0012952744727954268 2023-01-22 16:44:18.682353: step: 1840/531, loss: 0.0067427270114421844 2023-01-22 16:44:19.765445: step: 1844/531, loss: 0.009244879707694054 2023-01-22 16:44:20.830437: step: 1848/531, loss: 0.003126917639747262 2023-01-22 16:44:21.874568: step: 1852/531, loss: 0.003114111954346299 2023-01-22 16:44:22.940475: step: 1856/531, loss: 0.004199311137199402 2023-01-22 16:44:23.998083: step: 1860/531, loss: 0.006388251204043627 2023-01-22 16:44:25.070388: step: 1864/531, loss: 0.023254306986927986 2023-01-22 16:44:26.128013: step: 1868/531, loss: 0.0038735060952603817 2023-01-22 16:44:27.187617: step: 1872/531, loss: 0.0016032494604587555 2023-01-22 16:44:28.268519: step: 1876/531, loss: 0.027199311181902885 2023-01-22 16:44:29.327181: step: 1880/531, loss: 0.004051702097058296 2023-01-22 16:44:30.388248: step: 1884/531, loss: 0.020427949726581573 2023-01-22 16:44:31.451943: step: 1888/531, loss: 0.03202217444777489 2023-01-22 16:44:32.518294: step: 1892/531, loss: 0.003984553273767233 2023-01-22 16:44:33.594917: step: 1896/531, loss: 0.009155458770692348 2023-01-22 16:44:34.662306: step: 1900/531, loss: 0.025644440203905106 2023-01-22 16:44:35.716954: step: 1904/531, loss: 0.007649907376617193 2023-01-22 16:44:36.761839: step: 1908/531, loss: 0.00028209033189341426 2023-01-22 16:44:37.828053: step: 1912/531, loss: 0.0008907606243155897 2023-01-22 16:44:38.897963: step: 1916/531, loss: 0.01240821834653616 2023-01-22 16:44:39.976735: step: 1920/531, loss: 0.00012892595259472728 2023-01-22 16:44:41.037712: step: 1924/531, loss: 0.0014187126653268933 2023-01-22 16:44:42.101823: step: 1928/531, loss: 0.010740848258137703 2023-01-22 16:44:43.168912: step: 1932/531, loss: 0.0031338625121861696 2023-01-22 16:44:44.225547: step: 1936/531, loss: 0.0023126755841076374 2023-01-22 16:44:45.268139: step: 1940/531, loss: 0.0002548302581999451 2023-01-22 16:44:46.322555: step: 1944/531, loss: 0.0006706409621983767 2023-01-22 16:44:47.380263: step: 1948/531, loss: 0.00010857357119675726 2023-01-22 16:44:48.436872: step: 1952/531, loss: 0.002403313061222434 2023-01-22 16:44:49.491093: step: 1956/531, loss: 0.03757341951131821 2023-01-22 16:44:50.554223: step: 1960/531, loss: 0.005053660366684198 2023-01-22 16:44:51.627635: step: 1964/531, loss: 0.006158989388495684 2023-01-22 16:44:52.677853: step: 1968/531, loss: 0.00041146628791466355 2023-01-22 16:44:53.737610: step: 1972/531, loss: 0.0026964074932038784 2023-01-22 16:44:54.799996: step: 1976/531, loss: 0.003737696446478367 2023-01-22 16:44:55.864013: step: 1980/531, loss: 0.004402313847094774 2023-01-22 16:44:56.911686: step: 1984/531, loss: 0.016468999907374382 2023-01-22 16:44:57.958650: step: 1988/531, loss: 0.007842383347451687 2023-01-22 16:44:59.035515: step: 1992/531, loss: 0.011507249437272549 2023-01-22 16:45:00.095118: step: 1996/531, loss: 0.0001733368553686887 2023-01-22 16:45:01.143298: step: 2000/531, loss: 0.012670803815126419 2023-01-22 16:45:02.205474: step: 2004/531, loss: 0.006167001090943813 2023-01-22 16:45:03.263143: step: 2008/531, loss: 0.005690338090062141 2023-01-22 16:45:04.323868: step: 2012/531, loss: 0.004677900578826666 2023-01-22 16:45:05.386567: step: 2016/531, loss: 0.018740715458989143 2023-01-22 16:45:06.438164: step: 2020/531, loss: 0.0238112211227417 2023-01-22 16:45:07.506737: step: 2024/531, loss: 0.0008958554244600236 2023-01-22 16:45:08.560244: step: 2028/531, loss: 0.009894277900457382 2023-01-22 16:45:09.615154: step: 2032/531, loss: 0.002333128359168768 2023-01-22 16:45:10.672108: step: 2036/531, loss: 0.0021440156269818544 2023-01-22 16:45:11.720999: step: 2040/531, loss: 0.005679186899214983 2023-01-22 16:45:12.790154: step: 2044/531, loss: 0.00939517468214035 2023-01-22 16:45:13.845963: step: 2048/531, loss: 0.008401891216635704 2023-01-22 16:45:14.912840: step: 2052/531, loss: 0.0008273826097138226 2023-01-22 16:45:15.962462: step: 2056/531, loss: 0.03998059406876564 2023-01-22 16:45:17.027075: step: 2060/531, loss: 0.006508908700197935 2023-01-22 16:45:18.094695: step: 2064/531, loss: 0.0081448620185256 2023-01-22 16:45:19.161015: step: 2068/531, loss: 0.0030839676037430763 2023-01-22 16:45:20.233766: step: 2072/531, loss: 0.0016284179873764515 2023-01-22 16:45:21.292244: step: 2076/531, loss: 0.006517891772091389 2023-01-22 16:45:22.357077: step: 2080/531, loss: 0.0023467401042580605 2023-01-22 16:45:23.417099: step: 2084/531, loss: 0.0006494150729849935 2023-01-22 16:45:24.497591: step: 2088/531, loss: 0.006998279597610235 2023-01-22 16:45:25.567545: step: 2092/531, loss: 0.0007310719229280949 2023-01-22 16:45:26.605928: step: 2096/531, loss: 0.0005386866978369653 2023-01-22 16:45:27.664567: step: 2100/531, loss: 0.002123769372701645 2023-01-22 16:45:28.702058: step: 2104/531, loss: 0.002239992842078209 2023-01-22 16:45:29.767071: step: 2108/531, loss: 0.00496734119951725 2023-01-22 16:45:30.814966: step: 2112/531, loss: 0.03868440166115761 2023-01-22 16:45:31.877308: step: 2116/531, loss: 0.00293820071965456 2023-01-22 16:45:32.928731: step: 2120/531, loss: 0.001741659129038453 2023-01-22 16:45:33.981921: step: 2124/531, loss: 0.019097991287708282 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3561447368421053, 'r': 0.3203953598484849, 'f1': 0.3373255234297109}, 'combined': 0.24855564884294484, 'stategy': 1, 'epoch': 10} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33613170825233196, 'r': 0.2754581147049796, 'f1': 0.3027853086241244}, 'combined': 0.1895945390450125, 'stategy': 1, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.329694976076555, 'r': 0.34470954804209075, 'f1': 0.33703512396694213}, 'combined': 0.24834167029143103, 'stategy': 1, 'epoch': 10} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36204510181451716, 'r': 0.30067045876570814, 'f1': 0.3285158017449954}, 'combined': 0.20358725741943376, 'stategy': 1, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31833795440542206, 'r': 0.32679475015812776, 'f1': 0.32251092384519353}, 'combined': 0.2376396280964584, 'stategy': 1, 'epoch': 10} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36651298651488795, 'r': 0.28639000301298834, 'f1': 0.32153522650646865}, 'combined': 0.2132956453062713, 'stategy': 1, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36363636363636365, 'r': 0.34285714285714286, 'f1': 0.3529411764705882}, 'combined': 0.2352941176470588, 'stategy': 1, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.36607142857142855, 'r': 0.44565217391304346, 'f1': 0.4019607843137254}, 'combined': 0.2009803921568627, 'stategy': 1, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'stategy': 1, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35976479915433407, 'r': 0.3222892992424243, 'f1': 0.33999750249750255}, 'combined': 0.25052447552447554, 'stategy': 1, 'epoch': 6} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33494215352441215, 'r': 0.2738786923223081, 'f1': 0.3013481540150123}, 'combined': 0.18869463849538157, 'stategy': 1, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33436839323467227, 'r': 0.3546715973779541, 'f1': 0.34422086891009546}, 'combined': 0.2536364297232282, 'stategy': 1, 'epoch': 5} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36411638395148305, 'r': 0.30337559853126267, 'f1': 0.3309823303208759}, 'combined': 0.2051158103396977, 'stategy': 1, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37962962962962965, 'r': 0.44565217391304346, 'f1': 0.41000000000000003}, 'combined': 0.20500000000000002, 'stategy': 1, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.316460244648318, 'r': 0.32726913345983555, 'f1': 0.32177394278606963}, 'combined': 0.23709658942131445, 'stategy': 1, 'epoch': 5} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3637270714036404, 'r': 0.28684167419636686, 'f1': 0.32074114478321014}, 'combined': 0.21276887822252555, 'stategy': 1, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.27586206896551724, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 5} ****************************** Epoch: 11 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 16:47:59.652475: step: 4/531, loss: 0.011861962266266346 2023-01-22 16:48:00.701210: step: 8/531, loss: 0.008071650750935078 2023-01-22 16:48:01.752016: step: 12/531, loss: 0.000208403987926431 2023-01-22 16:48:02.806668: step: 16/531, loss: 0.0010404442436993122 2023-01-22 16:48:03.863167: step: 20/531, loss: 0.001116069033741951 2023-01-22 16:48:04.908843: step: 24/531, loss: 0.003468763316050172 2023-01-22 16:48:05.963512: step: 28/531, loss: 0.004135690163820982 2023-01-22 16:48:07.028514: step: 32/531, loss: 0.0073848385363817215 2023-01-22 16:48:08.069026: step: 36/531, loss: 0.0020591611973941326 2023-01-22 16:48:09.128515: step: 40/531, loss: 0.0019269976764917374 2023-01-22 16:48:10.172122: step: 44/531, loss: 0.002490139799192548 2023-01-22 16:48:11.225625: step: 48/531, loss: 0.005024838726967573 2023-01-22 16:48:12.281377: step: 52/531, loss: 0.004694833420217037 2023-01-22 16:48:13.330599: step: 56/531, loss: 0.00994531624019146 2023-01-22 16:48:14.383593: step: 60/531, loss: 0.007700231857597828 2023-01-22 16:48:15.424615: step: 64/531, loss: 0.013530361466109753 2023-01-22 16:48:16.472024: step: 68/531, loss: 0.002030949341133237 2023-01-22 16:48:17.532055: step: 72/531, loss: 0.0021307142451405525 2023-01-22 16:48:18.592929: step: 76/531, loss: 0.0007005725055932999 2023-01-22 16:48:19.651599: step: 80/531, loss: 3.269667649874464e-05 2023-01-22 16:48:20.715405: step: 84/531, loss: 0.0011059649987146258 2023-01-22 16:48:21.765859: step: 88/531, loss: 0.0036076586693525314 2023-01-22 16:48:22.828744: step: 92/531, loss: 0.0035521909594535828 2023-01-22 16:48:23.888405: step: 96/531, loss: 0.03489205986261368 2023-01-22 16:48:24.946277: step: 100/531, loss: 0.014840464107692242 2023-01-22 16:48:26.006571: step: 104/531, loss: 0.0013690049527212977 2023-01-22 16:48:27.061592: step: 108/531, loss: 0.0031027987133711576 2023-01-22 16:48:28.113701: step: 112/531, loss: 0.00198413897305727 2023-01-22 16:48:29.164782: step: 116/531, loss: 0.004999811295419931 2023-01-22 16:48:30.219658: step: 120/531, loss: 0.0020942334085702896 2023-01-22 16:48:31.265129: step: 124/531, loss: 0.002167933154851198 2023-01-22 16:48:32.313154: step: 128/531, loss: 0.011389507912099361 2023-01-22 16:48:33.356018: step: 132/531, loss: 0.0023518609814345837 2023-01-22 16:48:34.421129: step: 136/531, loss: 0.0008302110363729298 2023-01-22 16:48:35.492372: step: 140/531, loss: 0.005180789157748222 2023-01-22 16:48:36.538045: step: 144/531, loss: 0.0030223741196095943 2023-01-22 16:48:37.593575: step: 148/531, loss: 0.0053098308853805065 2023-01-22 16:48:38.667451: step: 152/531, loss: 0.0034682273399084806 2023-01-22 16:48:39.736368: step: 156/531, loss: 0.0056068180128932 2023-01-22 16:48:40.792944: step: 160/531, loss: 0.000880257342942059 2023-01-22 16:48:41.888952: step: 164/531, loss: 0.006435590796172619 2023-01-22 16:48:42.958631: step: 168/531, loss: 0.004628789145499468 2023-01-22 16:48:44.022779: step: 172/531, loss: 0.0018600308103486896 2023-01-22 16:48:45.085683: step: 176/531, loss: 0.0026068449951708317 2023-01-22 16:48:46.142193: step: 180/531, loss: 0.008805465884506702 2023-01-22 16:48:47.216481: step: 184/531, loss: 0.005547242239117622 2023-01-22 16:48:48.262121: step: 188/531, loss: 6.172151915961877e-05 2023-01-22 16:48:49.316560: step: 192/531, loss: 0.0009066284401342273 2023-01-22 16:48:50.374126: step: 196/531, loss: 0.001958414213731885 2023-01-22 16:48:51.433338: step: 200/531, loss: 0.0004091927839908749 2023-01-22 16:48:52.493475: step: 204/531, loss: 0.0162381362169981 2023-01-22 16:48:53.561387: step: 208/531, loss: 0.006360786035656929 2023-01-22 16:48:54.612486: step: 212/531, loss: 0.00134243443608284 2023-01-22 16:48:55.658873: step: 216/531, loss: 0.004472144413739443 2023-01-22 16:48:56.718770: step: 220/531, loss: 0.0001199832622660324 2023-01-22 16:48:57.802759: step: 224/531, loss: 0.00017228329670615494 2023-01-22 16:48:58.880781: step: 228/531, loss: 0.001089850440621376 2023-01-22 16:48:59.954812: step: 232/531, loss: 0.005808225367218256 2023-01-22 16:49:01.001396: step: 236/531, loss: 0.006756265182048082 2023-01-22 16:49:02.064112: step: 240/531, loss: 0.00015489471843466163 2023-01-22 16:49:03.105759: step: 244/531, loss: 0.006117692217230797 2023-01-22 16:49:04.169170: step: 248/531, loss: 0.0012868092162534595 2023-01-22 16:49:05.216610: step: 252/531, loss: 0.0060088420286774635 2023-01-22 16:49:06.273194: step: 256/531, loss: 0.003042251104488969 2023-01-22 16:49:07.331889: step: 260/531, loss: 0.004750113468617201 2023-01-22 16:49:08.412968: step: 264/531, loss: 0.00598478876054287 2023-01-22 16:49:09.457871: step: 268/531, loss: 0.020888064056634903 2023-01-22 16:49:10.521967: step: 272/531, loss: 0.000384991493774578 2023-01-22 16:49:11.577156: step: 276/531, loss: 0.003254319541156292 2023-01-22 16:49:12.650135: step: 280/531, loss: 0.0015920934965834022 2023-01-22 16:49:13.696069: step: 284/531, loss: 0.0006789285107515752 2023-01-22 16:49:14.767561: step: 288/531, loss: 0.0016608184669166803 2023-01-22 16:49:15.838381: step: 292/531, loss: 0.001358446548692882 2023-01-22 16:49:16.894183: step: 296/531, loss: 0.0007131583988666534 2023-01-22 16:49:17.953714: step: 300/531, loss: 0.003415829036384821 2023-01-22 16:49:19.023660: step: 304/531, loss: 0.008375131525099277 2023-01-22 16:49:20.079346: step: 308/531, loss: 0.0009640548378229141 2023-01-22 16:49:21.144235: step: 312/531, loss: 0.0015861615538597107 2023-01-22 16:49:22.201316: step: 316/531, loss: 0.0004318073915783316 2023-01-22 16:49:23.254781: step: 320/531, loss: 0.00015259617066476494 2023-01-22 16:49:24.328613: step: 324/531, loss: 0.0024037668481469154 2023-01-22 16:49:25.401846: step: 328/531, loss: 0.009311018511652946 2023-01-22 16:49:26.492446: step: 332/531, loss: 4.776411515194923e-05 2023-01-22 16:49:27.560503: step: 336/531, loss: 0.023765331134200096 2023-01-22 16:49:28.618189: step: 340/531, loss: 0.0034515359438955784 2023-01-22 16:49:29.671458: step: 344/531, loss: 0.001172275166027248 2023-01-22 16:49:30.727158: step: 348/531, loss: 0.0033460904378443956 2023-01-22 16:49:31.786587: step: 352/531, loss: 0.007876559160649776 2023-01-22 16:49:32.838457: step: 356/531, loss: 0.002973369788378477 2023-01-22 16:49:33.885349: step: 360/531, loss: 0.003678396809846163 2023-01-22 16:49:34.938443: step: 364/531, loss: 0.006506223697215319 2023-01-22 16:49:35.999528: step: 368/531, loss: 0.0004361586179584265 2023-01-22 16:49:37.048218: step: 372/531, loss: 0.008504385128617287 2023-01-22 16:49:38.093731: step: 376/531, loss: 0.0019203261472284794 2023-01-22 16:49:39.139775: step: 380/531, loss: 0.00046843045856803656 2023-01-22 16:49:40.197304: step: 384/531, loss: 0.008011464960873127 2023-01-22 16:49:41.265563: step: 388/531, loss: 0.003691725432872772 2023-01-22 16:49:42.355455: step: 392/531, loss: 0.0017186313634738326 2023-01-22 16:49:43.415485: step: 396/531, loss: 0.00033388304291293025 2023-01-22 16:49:44.467024: step: 400/531, loss: 0.004575843922793865 2023-01-22 16:49:45.539598: step: 404/531, loss: 0.0027084494940936565 2023-01-22 16:49:46.589791: step: 408/531, loss: 0.0009295142372138798 2023-01-22 16:49:47.660050: step: 412/531, loss: 0.0016048046527430415 2023-01-22 16:49:48.711209: step: 416/531, loss: 0.0029492757748812437 2023-01-22 16:49:49.772343: step: 420/531, loss: 0.0004289450007490814 2023-01-22 16:49:50.836101: step: 424/531, loss: 0.001646324060857296 2023-01-22 16:49:51.899050: step: 428/531, loss: 0.010851244442164898 2023-01-22 16:49:52.976663: step: 432/531, loss: 0.010935558937489986 2023-01-22 16:49:54.043896: step: 436/531, loss: 0.005383491516113281 2023-01-22 16:49:55.094658: step: 440/531, loss: 0.0010768651263788342 2023-01-22 16:49:56.167572: step: 444/531, loss: 0.0045637465082108974 2023-01-22 16:49:57.235530: step: 448/531, loss: 0.00131141091696918 2023-01-22 16:49:58.296612: step: 452/531, loss: 0.0002298193285241723 2023-01-22 16:49:59.359998: step: 456/531, loss: 0.006291334982961416 2023-01-22 16:50:00.421438: step: 460/531, loss: 0.001991485944017768 2023-01-22 16:50:01.502202: step: 464/531, loss: 0.009577264077961445 2023-01-22 16:50:02.552835: step: 468/531, loss: 0.0003144587972201407 2023-01-22 16:50:03.620405: step: 472/531, loss: 0.024402670562267303 2023-01-22 16:50:04.683792: step: 476/531, loss: 0.005171215161681175 2023-01-22 16:50:05.744393: step: 480/531, loss: 0.0002673995040822774 2023-01-22 16:50:06.805613: step: 484/531, loss: 0.00021483120508491993 2023-01-22 16:50:07.859232: step: 488/531, loss: 0.0002423171536065638 2023-01-22 16:50:08.918694: step: 492/531, loss: 0.005877661518752575 2023-01-22 16:50:09.977399: step: 496/531, loss: 0.000291046395432204 2023-01-22 16:50:11.043368: step: 500/531, loss: 0.007739312946796417 2023-01-22 16:50:12.103190: step: 504/531, loss: 0.0016455594450235367 2023-01-22 16:50:13.168974: step: 508/531, loss: 0.011581001803278923 2023-01-22 16:50:14.213173: step: 512/531, loss: 0.0035536608193069696 2023-01-22 16:50:15.277944: step: 516/531, loss: 0.006567489821463823 2023-01-22 16:50:16.334242: step: 520/531, loss: 3.331424159114249e-05 2023-01-22 16:50:17.400094: step: 524/531, loss: 0.0029319655150175095 2023-01-22 16:50:18.455715: step: 528/531, loss: 0.00599403353407979 2023-01-22 16:50:19.509953: step: 532/531, loss: 0.005029898602515459 2023-01-22 16:50:20.570453: step: 536/531, loss: 0.00017079082317650318 2023-01-22 16:50:21.629289: step: 540/531, loss: 0.0016179722733795643 2023-01-22 16:50:22.695829: step: 544/531, loss: 0.003598906099796295 2023-01-22 16:50:23.754677: step: 548/531, loss: 0.002266326919198036 2023-01-22 16:50:24.812799: step: 552/531, loss: 0.0008765868842601776 2023-01-22 16:50:25.879225: step: 556/531, loss: 0.014094655402004719 2023-01-22 16:50:26.945022: step: 560/531, loss: 0.0075484528206288815 2023-01-22 16:50:28.003502: step: 564/531, loss: 0.0013430730905383825 2023-01-22 16:50:29.075027: step: 568/531, loss: 0.004203127231448889 2023-01-22 16:50:30.137633: step: 572/531, loss: 0.0026134364306926727 2023-01-22 16:50:31.224602: step: 576/531, loss: 0.0031323174480348825 2023-01-22 16:50:32.299731: step: 580/531, loss: 0.0009437850094400346 2023-01-22 16:50:33.366307: step: 584/531, loss: 0.0060317725874483585 2023-01-22 16:50:34.437923: step: 588/531, loss: 0.0018124451162293553 2023-01-22 16:50:35.489069: step: 592/531, loss: 0.002542851259931922 2023-01-22 16:50:36.550334: step: 596/531, loss: 0.00011189384531462565 2023-01-22 16:50:37.603995: step: 600/531, loss: 0.004575440660119057 2023-01-22 16:50:38.662923: step: 604/531, loss: 0.002222835086286068 2023-01-22 16:50:39.734419: step: 608/531, loss: 0.005140097811818123 2023-01-22 16:50:40.788615: step: 612/531, loss: 0.0018736954079940915 2023-01-22 16:50:41.872533: step: 616/531, loss: 0.0010442807106301188 2023-01-22 16:50:42.913897: step: 620/531, loss: 0.0028386476915329695 2023-01-22 16:50:43.987120: step: 624/531, loss: 0.00496304128319025 2023-01-22 16:50:45.061604: step: 628/531, loss: 0.006199537310749292 2023-01-22 16:50:46.121405: step: 632/531, loss: 0.01989562436938286 2023-01-22 16:50:47.168063: step: 636/531, loss: 0.0026654258836060762 2023-01-22 16:50:48.254019: step: 640/531, loss: 0.002410410437732935 2023-01-22 16:50:49.315862: step: 644/531, loss: 0.0010149793233722448 2023-01-22 16:50:50.385249: step: 648/531, loss: 0.0023655917029827833 2023-01-22 16:50:51.461362: step: 652/531, loss: 0.0022076650056988 2023-01-22 16:50:52.530983: step: 656/531, loss: 0.006200449541211128 2023-01-22 16:50:53.586821: step: 660/531, loss: 0.008652614429593086 2023-01-22 16:50:54.648089: step: 664/531, loss: 0.0015065692132338881 2023-01-22 16:50:55.703484: step: 668/531, loss: 0.007710055448114872 2023-01-22 16:50:56.747017: step: 672/531, loss: 0.0002910518378484994 2023-01-22 16:50:57.797023: step: 676/531, loss: 0.0032941356766968966 2023-01-22 16:50:58.853952: step: 680/531, loss: 0.002095744013786316 2023-01-22 16:50:59.927410: step: 684/531, loss: 0.0012469409266486764 2023-01-22 16:51:00.986152: step: 688/531, loss: 0.006371789611876011 2023-01-22 16:51:02.049286: step: 692/531, loss: 0.001873451634310186 2023-01-22 16:51:03.113632: step: 696/531, loss: 0.004255469888448715 2023-01-22 16:51:04.186298: step: 700/531, loss: 0.007635138928890228 2023-01-22 16:51:05.251349: step: 704/531, loss: 0.008111944422125816 2023-01-22 16:51:06.308464: step: 708/531, loss: 0.009344195015728474 2023-01-22 16:51:07.394953: step: 712/531, loss: 0.0024300895165652037 2023-01-22 16:51:08.450724: step: 716/531, loss: 0.0007058361079543829 2023-01-22 16:51:09.504997: step: 720/531, loss: 0.005931665189564228 2023-01-22 16:51:10.605361: step: 724/531, loss: 0.00538041302934289 2023-01-22 16:51:11.660730: step: 728/531, loss: 0.004413405433297157 2023-01-22 16:51:12.726285: step: 732/531, loss: 0.0033413004130125046 2023-01-22 16:51:13.810058: step: 736/531, loss: 0.004877195693552494 2023-01-22 16:51:14.866169: step: 740/531, loss: 0.006328440736979246 2023-01-22 16:51:15.928138: step: 744/531, loss: 0.006791654042899609 2023-01-22 16:51:16.990345: step: 748/531, loss: 0.0010770582593977451 2023-01-22 16:51:18.055824: step: 752/531, loss: 0.005755329038947821 2023-01-22 16:51:19.128228: step: 756/531, loss: 0.0015215236926451325 2023-01-22 16:51:20.193288: step: 760/531, loss: 0.001223857863806188 2023-01-22 16:51:21.255030: step: 764/531, loss: 0.0122085465118289 2023-01-22 16:51:22.325431: step: 768/531, loss: 0.0005444795824587345 2023-01-22 16:51:23.401661: step: 772/531, loss: 0.004318851046264172 2023-01-22 16:51:24.465403: step: 776/531, loss: 0.0014233270194381475 2023-01-22 16:51:25.522924: step: 780/531, loss: 0.0001222977152792737 2023-01-22 16:51:26.579098: step: 784/531, loss: 0.03975486382842064 2023-01-22 16:51:27.644886: step: 788/531, loss: 0.005028843879699707 2023-01-22 16:51:28.700235: step: 792/531, loss: 0.004637574311345816 2023-01-22 16:51:29.764829: step: 796/531, loss: 0.004428816493600607 2023-01-22 16:51:30.833844: step: 800/531, loss: 0.00016019698523450643 2023-01-22 16:51:31.891687: step: 804/531, loss: 0.003329712199047208 2023-01-22 16:51:32.947905: step: 808/531, loss: 0.02217811346054077 2023-01-22 16:51:34.008703: step: 812/531, loss: 0.0012782919220626354 2023-01-22 16:51:35.070509: step: 816/531, loss: 0.0018862895667552948 2023-01-22 16:51:36.141869: step: 820/531, loss: 0.0014390208525583148 2023-01-22 16:51:37.197030: step: 824/531, loss: 0.011796059086918831 2023-01-22 16:51:38.255205: step: 828/531, loss: 0.008138417266309261 2023-01-22 16:51:39.300893: step: 832/531, loss: 0.000262885179836303 2023-01-22 16:51:40.359138: step: 836/531, loss: 0.006853947415947914 2023-01-22 16:51:41.410578: step: 840/531, loss: 0.0007535576587542892 2023-01-22 16:51:42.470006: step: 844/531, loss: 0.0002806924458127469 2023-01-22 16:51:43.529548: step: 848/531, loss: 0.004743541125208139 2023-01-22 16:51:44.588109: step: 852/531, loss: 0.000423107179813087 2023-01-22 16:51:45.635880: step: 856/531, loss: 0.004514685831964016 2023-01-22 16:51:46.691927: step: 860/531, loss: 0.0019779985304921865 2023-01-22 16:51:47.756846: step: 864/531, loss: 0.0015256067272275686 2023-01-22 16:51:48.797245: step: 868/531, loss: 0.000194802982150577 2023-01-22 16:51:49.882559: step: 872/531, loss: 0.012310339137911797 2023-01-22 16:51:50.932306: step: 876/531, loss: 0.0005365722463466227 2023-01-22 16:51:51.988104: step: 880/531, loss: 0.0036379857920110226 2023-01-22 16:51:53.032776: step: 884/531, loss: 0.01085724774748087 2023-01-22 16:51:54.098687: step: 888/531, loss: 0.0005317054456099868 2023-01-22 16:51:55.141282: step: 892/531, loss: 0.00018345253192819655 2023-01-22 16:51:56.202503: step: 896/531, loss: 0.0030171286780387163 2023-01-22 16:51:57.269605: step: 900/531, loss: 0.003309423802420497 2023-01-22 16:51:58.330499: step: 904/531, loss: 0.001217101002112031 2023-01-22 16:51:59.397150: step: 908/531, loss: 0.03060407191514969 2023-01-22 16:52:00.453773: step: 912/531, loss: 0.0113700395449996 2023-01-22 16:52:01.518731: step: 916/531, loss: 0.004854146391153336 2023-01-22 16:52:02.571553: step: 920/531, loss: 0.006441024597734213 2023-01-22 16:52:03.636330: step: 924/531, loss: 0.004410286899656057 2023-01-22 16:52:04.687965: step: 928/531, loss: 0.006873035803437233 2023-01-22 16:52:05.746227: step: 932/531, loss: 0.0002730699779931456 2023-01-22 16:52:06.814980: step: 936/531, loss: 0.0018945703050121665 2023-01-22 16:52:07.879273: step: 940/531, loss: 0.006529218517243862 2023-01-22 16:52:08.950362: step: 944/531, loss: 0.0006231256993487477 2023-01-22 16:52:10.035046: step: 948/531, loss: 0.0005243791965767741 2023-01-22 16:52:11.091421: step: 952/531, loss: 0.0015809608157724142 2023-01-22 16:52:12.174383: step: 956/531, loss: 0.008266216143965721 2023-01-22 16:52:13.223856: step: 960/531, loss: 0.018888840451836586 2023-01-22 16:52:14.299687: step: 964/531, loss: 0.00015002106374595314 2023-01-22 16:52:15.362009: step: 968/531, loss: 0.0064233760349452496 2023-01-22 16:52:16.419894: step: 972/531, loss: 0.0033490073401480913 2023-01-22 16:52:17.473719: step: 976/531, loss: 0.0016718388069421053 2023-01-22 16:52:18.529500: step: 980/531, loss: 0.00017033734184224159 2023-01-22 16:52:19.574698: step: 984/531, loss: 0.006633893586695194 2023-01-22 16:52:20.628800: step: 988/531, loss: 0.004494899418205023 2023-01-22 16:52:21.682292: step: 992/531, loss: 0.0020726737566292286 2023-01-22 16:52:22.744816: step: 996/531, loss: 0.0019445770885795355 2023-01-22 16:52:23.814191: step: 1000/531, loss: 0.00526324100792408 2023-01-22 16:52:24.852455: step: 1004/531, loss: 0.0008864232804626226 2023-01-22 16:52:25.905221: step: 1008/531, loss: 0.004418803378939629 2023-01-22 16:52:26.957663: step: 1012/531, loss: 0.003179551102221012 2023-01-22 16:52:28.005682: step: 1016/531, loss: 0.0024454572703689337 2023-01-22 16:52:29.074552: step: 1020/531, loss: 0.04245481267571449 2023-01-22 16:52:30.135805: step: 1024/531, loss: 0.012318273074924946 2023-01-22 16:52:31.187608: step: 1028/531, loss: 0.0044145057909190655 2023-01-22 16:52:32.239503: step: 1032/531, loss: 6.116233271313831e-05 2023-01-22 16:52:33.287630: step: 1036/531, loss: 0.0011430742451921105 2023-01-22 16:52:34.337174: step: 1040/531, loss: 0.0008967426256276667 2023-01-22 16:52:35.389791: step: 1044/531, loss: 0.011159449815750122 2023-01-22 16:52:36.441533: step: 1048/531, loss: 0.006936488673090935 2023-01-22 16:52:37.506684: step: 1052/531, loss: 0.0007150927558541298 2023-01-22 16:52:38.576034: step: 1056/531, loss: 0.010731692425906658 2023-01-22 16:52:39.640816: step: 1060/531, loss: 0.0037799340207129717 2023-01-22 16:52:40.700882: step: 1064/531, loss: 0.011266198940575123 2023-01-22 16:52:41.752122: step: 1068/531, loss: 0.0002607348724268377 2023-01-22 16:52:42.846988: step: 1072/531, loss: 0.003033361630514264 2023-01-22 16:52:43.915194: step: 1076/531, loss: 0.005947592202574015 2023-01-22 16:52:44.968347: step: 1080/531, loss: 0.007470968645066023 2023-01-22 16:52:46.023686: step: 1084/531, loss: 0.0019138501957058907 2023-01-22 16:52:47.067105: step: 1088/531, loss: 0.0011556856334209442 2023-01-22 16:52:48.134523: step: 1092/531, loss: 0.0005223070620559156 2023-01-22 16:52:49.195161: step: 1096/531, loss: 0.011568109504878521 2023-01-22 16:52:50.245453: step: 1100/531, loss: 0.006213732063770294 2023-01-22 16:52:51.305719: step: 1104/531, loss: 0.0002279826730955392 2023-01-22 16:52:52.362358: step: 1108/531, loss: 0.00017191852384712547 2023-01-22 16:52:53.415073: step: 1112/531, loss: 0.0026087078731507063 2023-01-22 16:52:54.469925: step: 1116/531, loss: 0.002258676802739501 2023-01-22 16:52:55.540351: step: 1120/531, loss: 0.008678958751261234 2023-01-22 16:52:56.592194: step: 1124/531, loss: 0.004187881946563721 2023-01-22 16:52:57.651924: step: 1128/531, loss: 0.00678448798134923 2023-01-22 16:52:58.715068: step: 1132/531, loss: 0.003109800163656473 2023-01-22 16:52:59.748810: step: 1136/531, loss: 3.5098997614113614e-05 2023-01-22 16:53:00.808050: step: 1140/531, loss: 0.00300983595661819 2023-01-22 16:53:01.852142: step: 1144/531, loss: 0.002147710183635354 2023-01-22 16:53:02.894812: step: 1148/531, loss: 4.352720861788839e-05 2023-01-22 16:53:03.959703: step: 1152/531, loss: 0.005084862466901541 2023-01-22 16:53:05.031876: step: 1156/531, loss: 0.0027062869630753994 2023-01-22 16:53:06.087023: step: 1160/531, loss: 0.0027608387172222137 2023-01-22 16:53:07.143905: step: 1164/531, loss: 0.012221050448715687 2023-01-22 16:53:08.204514: step: 1168/531, loss: 0.008218341507017612 2023-01-22 16:53:09.257170: step: 1172/531, loss: 0.0064306179992854595 2023-01-22 16:53:10.321501: step: 1176/531, loss: 0.004282574634999037 2023-01-22 16:53:11.378633: step: 1180/531, loss: 0.0003131578559987247 2023-01-22 16:53:12.435996: step: 1184/531, loss: 0.0055084070190787315 2023-01-22 16:53:13.501413: step: 1188/531, loss: 0.005028588231652975 2023-01-22 16:53:14.564907: step: 1192/531, loss: 0.0023935751523822546 2023-01-22 16:53:15.626178: step: 1196/531, loss: 0.007222407963126898 2023-01-22 16:53:16.673611: step: 1200/531, loss: 0.0009791728807613254 2023-01-22 16:53:17.740806: step: 1204/531, loss: 0.0015960520831868052 2023-01-22 16:53:18.801329: step: 1208/531, loss: 0.0008987372275441885 2023-01-22 16:53:19.864155: step: 1212/531, loss: 0.0024682246148586273 2023-01-22 16:53:20.925686: step: 1216/531, loss: 8.78802893566899e-05 2023-01-22 16:53:21.988211: step: 1220/531, loss: 0.002148613566532731 2023-01-22 16:53:23.048378: step: 1224/531, loss: 0.0058380537666380405 2023-01-22 16:53:24.114593: step: 1228/531, loss: 0.0004793701518792659 2023-01-22 16:53:25.162594: step: 1232/531, loss: 0.011534301564097404 2023-01-22 16:53:26.238344: step: 1236/531, loss: 0.0043892525136470795 2023-01-22 16:53:27.304012: step: 1240/531, loss: 0.007320867385715246 2023-01-22 16:53:28.365262: step: 1244/531, loss: 0.009166529402136803 2023-01-22 16:53:29.430750: step: 1248/531, loss: 0.003661456750705838 2023-01-22 16:53:30.502856: step: 1252/531, loss: 0.013029376044869423 2023-01-22 16:53:31.579158: step: 1256/531, loss: 0.0006287398864515126 2023-01-22 16:53:32.627054: step: 1260/531, loss: 0.005346239078789949 2023-01-22 16:53:33.695988: step: 1264/531, loss: 0.000639710167888552 2023-01-22 16:53:34.754365: step: 1268/531, loss: 4.5079395931679755e-05 2023-01-22 16:53:35.814301: step: 1272/531, loss: 0.04524929076433182 2023-01-22 16:53:36.881135: step: 1276/531, loss: 0.005343164317309856 2023-01-22 16:53:37.953667: step: 1280/531, loss: 0.0042571756057441235 2023-01-22 16:53:39.029700: step: 1284/531, loss: 0.005458963569253683 2023-01-22 16:53:40.110180: step: 1288/531, loss: 0.0143412034958601 2023-01-22 16:53:41.160257: step: 1292/531, loss: 0.005807945504784584 2023-01-22 16:53:42.237741: step: 1296/531, loss: 0.005546705797314644 2023-01-22 16:53:43.282225: step: 1300/531, loss: 0.0067531997337937355 2023-01-22 16:53:44.331327: step: 1304/531, loss: 0.0008774721645750105 2023-01-22 16:53:45.391381: step: 1308/531, loss: 0.007062950171530247 2023-01-22 16:53:46.437070: step: 1312/531, loss: 0.0005221646279096603 2023-01-22 16:53:47.530372: step: 1316/531, loss: 0.0013187529984861612 2023-01-22 16:53:48.594572: step: 1320/531, loss: 0.0004419395700097084 2023-01-22 16:53:49.646113: step: 1324/531, loss: 0.00824517011642456 2023-01-22 16:53:50.696115: step: 1328/531, loss: 0.0015413709916174412 2023-01-22 16:53:51.754820: step: 1332/531, loss: 0.0033878982067108154 2023-01-22 16:53:52.813409: step: 1336/531, loss: 0.004665473010390997 2023-01-22 16:53:53.857253: step: 1340/531, loss: 0.0006703593535348773 2023-01-22 16:53:54.893961: step: 1344/531, loss: 0.0004064817912876606 2023-01-22 16:53:55.951576: step: 1348/531, loss: 0.0032921815291047096 2023-01-22 16:53:57.022785: step: 1352/531, loss: 0.015185847878456116 2023-01-22 16:53:58.073114: step: 1356/531, loss: 0.0003153661382384598 2023-01-22 16:53:59.127807: step: 1360/531, loss: 0.002001093467697501 2023-01-22 16:54:00.192310: step: 1364/531, loss: 0.012593758292496204 2023-01-22 16:54:01.254229: step: 1368/531, loss: 0.0006679664365947247 2023-01-22 16:54:02.294821: step: 1372/531, loss: 0.0009427554905414581 2023-01-22 16:54:03.349406: step: 1376/531, loss: 0.00018571940017864108 2023-01-22 16:54:04.403213: step: 1380/531, loss: 0.007450035307556391 2023-01-22 16:54:05.470976: step: 1384/531, loss: 0.003342832438647747 2023-01-22 16:54:06.526538: step: 1388/531, loss: 0.005212183576077223 2023-01-22 16:54:07.585375: step: 1392/531, loss: 0.005624871701002121 2023-01-22 16:54:08.640773: step: 1396/531, loss: 0.0007276016985997558 2023-01-22 16:54:09.716288: step: 1400/531, loss: 0.010481802746653557 2023-01-22 16:54:10.771867: step: 1404/531, loss: 0.000984206679277122 2023-01-22 16:54:11.829015: step: 1408/531, loss: 0.018762333318591118 2023-01-22 16:54:12.911051: step: 1412/531, loss: 0.0007749480428174138 2023-01-22 16:54:13.980556: step: 1416/531, loss: 0.0029891724698245525 2023-01-22 16:54:15.031014: step: 1420/531, loss: 3.6878678656648844e-05 2023-01-22 16:54:16.097038: step: 1424/531, loss: 0.004841763060539961 2023-01-22 16:54:17.156795: step: 1428/531, loss: 0.008448080159723759 2023-01-22 16:54:18.227246: step: 1432/531, loss: 0.0005592944798991084 2023-01-22 16:54:19.283583: step: 1436/531, loss: 0.017689159139990807 2023-01-22 16:54:20.338537: step: 1440/531, loss: 0.005695062223821878 2023-01-22 16:54:21.401987: step: 1444/531, loss: 0.004672298673540354 2023-01-22 16:54:22.459430: step: 1448/531, loss: 0.000152892607729882 2023-01-22 16:54:23.519811: step: 1452/531, loss: 0.002773034619167447 2023-01-22 16:54:24.574964: step: 1456/531, loss: 0.00044494381290860474 2023-01-22 16:54:25.627696: step: 1460/531, loss: 0.005455221980810165 2023-01-22 16:54:26.704012: step: 1464/531, loss: 0.0016655612271279097 2023-01-22 16:54:27.750235: step: 1468/531, loss: 0.001345763448625803 2023-01-22 16:54:28.809470: step: 1472/531, loss: 0.002285619266331196 2023-01-22 16:54:29.889410: step: 1476/531, loss: 0.005338137503713369 2023-01-22 16:54:30.952983: step: 1480/531, loss: 0.0026537994854152203 2023-01-22 16:54:31.992674: step: 1484/531, loss: 0.002621342660859227 2023-01-22 16:54:33.040836: step: 1488/531, loss: 0.0003916116838809103 2023-01-22 16:54:34.100717: step: 1492/531, loss: 0.0029015252366662025 2023-01-22 16:54:35.144517: step: 1496/531, loss: 0.0026797123719006777 2023-01-22 16:54:36.200947: step: 1500/531, loss: 0.008366267196834087 2023-01-22 16:54:37.279911: step: 1504/531, loss: 0.008602991700172424 2023-01-22 16:54:38.338586: step: 1508/531, loss: 0.015988850966095924 2023-01-22 16:54:39.395285: step: 1512/531, loss: 0.013631724752485752 2023-01-22 16:54:40.460591: step: 1516/531, loss: 0.0019024478970095515 2023-01-22 16:54:41.517855: step: 1520/531, loss: 0.0041855210438370705 2023-01-22 16:54:42.595660: step: 1524/531, loss: 0.004925829824060202 2023-01-22 16:54:43.678264: step: 1528/531, loss: 0.005109453573822975 2023-01-22 16:54:44.746076: step: 1532/531, loss: 0.0015046736225485802 2023-01-22 16:54:45.793433: step: 1536/531, loss: 0.0020050164312124252 2023-01-22 16:54:46.853296: step: 1540/531, loss: 0.006838127505034208 2023-01-22 16:54:47.935632: step: 1544/531, loss: 0.0013855180004611611 2023-01-22 16:54:48.994890: step: 1548/531, loss: 0.013926796615123749 2023-01-22 16:54:50.058479: step: 1552/531, loss: 0.016245339065790176 2023-01-22 16:54:51.139456: step: 1556/531, loss: 0.009078939445316792 2023-01-22 16:54:52.208914: step: 1560/531, loss: 0.011057503521442413 2023-01-22 16:54:53.275162: step: 1564/531, loss: 7.659125549253076e-05 2023-01-22 16:54:54.325847: step: 1568/531, loss: 0.0021128018852323294 2023-01-22 16:54:55.407568: step: 1572/531, loss: 0.01326420996338129 2023-01-22 16:54:56.458038: step: 1576/531, loss: 0.001199234277009964 2023-01-22 16:54:57.525230: step: 1580/531, loss: 0.008252283558249474 2023-01-22 16:54:58.601202: step: 1584/531, loss: 0.01246541179716587 2023-01-22 16:54:59.676692: step: 1588/531, loss: 0.011930056847631931 2023-01-22 16:55:00.732198: step: 1592/531, loss: 0.0006496492424048483 2023-01-22 16:55:01.804119: step: 1596/531, loss: 0.0011638659052550793 2023-01-22 16:55:02.858259: step: 1600/531, loss: 0.05825250223278999 2023-01-22 16:55:03.911285: step: 1604/531, loss: 0.008243819698691368 2023-01-22 16:55:04.970015: step: 1608/531, loss: 0.0035970723256468773 2023-01-22 16:55:06.018102: step: 1612/531, loss: 0.00021067557099740952 2023-01-22 16:55:07.077867: step: 1616/531, loss: 0.02393924444913864 2023-01-22 16:55:08.133597: step: 1620/531, loss: 0.020270323380827904 2023-01-22 16:55:09.198690: step: 1624/531, loss: 0.0032589409966021776 2023-01-22 16:55:10.254770: step: 1628/531, loss: 0.006127768196165562 2023-01-22 16:55:11.333614: step: 1632/531, loss: 0.0024526531342417 2023-01-22 16:55:12.426160: step: 1636/531, loss: 0.00407453766092658 2023-01-22 16:55:13.465411: step: 1640/531, loss: 0.00470855925232172 2023-01-22 16:55:14.526498: step: 1644/531, loss: 0.02908954583108425 2023-01-22 16:55:15.579052: step: 1648/531, loss: 0.007423473987728357 2023-01-22 16:55:16.636363: step: 1652/531, loss: 4.4258504203753546e-05 2023-01-22 16:55:17.698812: step: 1656/531, loss: 0.002961240243166685 2023-01-22 16:55:18.763932: step: 1660/531, loss: 0.0008582502487115562 2023-01-22 16:55:19.816118: step: 1664/531, loss: 0.003365323878824711 2023-01-22 16:55:20.886606: step: 1668/531, loss: 0.008368290960788727 2023-01-22 16:55:21.932281: step: 1672/531, loss: 0.005154167301952839 2023-01-22 16:55:22.984892: step: 1676/531, loss: 0.004117444157600403 2023-01-22 16:55:24.089388: step: 1680/531, loss: 0.0056172749027609825 2023-01-22 16:55:25.144314: step: 1684/531, loss: 0.00018345407443121076 2023-01-22 16:55:26.204914: step: 1688/531, loss: 0.001721491222269833 2023-01-22 16:55:27.268237: step: 1692/531, loss: 0.0002251827681902796 2023-01-22 16:55:28.317799: step: 1696/531, loss: 0.003302122699096799 2023-01-22 16:55:29.379578: step: 1700/531, loss: 0.002727292710915208 2023-01-22 16:55:30.436943: step: 1704/531, loss: 0.012648818083107471 2023-01-22 16:55:31.492494: step: 1708/531, loss: 0.004212263040244579 2023-01-22 16:55:32.542520: step: 1712/531, loss: 0.0012800705153495073 2023-01-22 16:55:33.622149: step: 1716/531, loss: 0.01647569239139557 2023-01-22 16:55:34.673023: step: 1720/531, loss: 0.0014455660711973906 2023-01-22 16:55:35.719264: step: 1724/531, loss: 0.0008796030306257308 2023-01-22 16:55:36.774799: step: 1728/531, loss: 0.005203488282859325 2023-01-22 16:55:37.834307: step: 1732/531, loss: 0.01926231011748314 2023-01-22 16:55:38.893254: step: 1736/531, loss: 0.00021135433053132147 2023-01-22 16:55:39.965314: step: 1740/531, loss: 0.006085301749408245 2023-01-22 16:55:41.016812: step: 1744/531, loss: 0.005453317426145077 2023-01-22 16:55:42.097615: step: 1748/531, loss: 0.0015991569962352514 2023-01-22 16:55:43.150913: step: 1752/531, loss: 9.101376053877175e-05 2023-01-22 16:55:44.205912: step: 1756/531, loss: 0.0008252952829934657 2023-01-22 16:55:45.278329: step: 1760/531, loss: 0.0009875706164166331 2023-01-22 16:55:46.342284: step: 1764/531, loss: 0.005662148352712393 2023-01-22 16:55:47.406997: step: 1768/531, loss: 0.00747600058093667 2023-01-22 16:55:48.455122: step: 1772/531, loss: 0.0009594178991392255 2023-01-22 16:55:49.525840: step: 1776/531, loss: 0.012910320423543453 2023-01-22 16:55:50.575026: step: 1780/531, loss: 0.0069103236310184 2023-01-22 16:55:51.641886: step: 1784/531, loss: 0.006927257403731346 2023-01-22 16:55:52.697560: step: 1788/531, loss: 0.0002609694784041494 2023-01-22 16:55:53.771845: step: 1792/531, loss: 0.04665395990014076 2023-01-22 16:55:54.832184: step: 1796/531, loss: 0.02080320194363594 2023-01-22 16:55:55.896031: step: 1800/531, loss: 0.0005638275179080665 2023-01-22 16:55:56.961997: step: 1804/531, loss: 0.0009399615810252726 2023-01-22 16:55:58.022006: step: 1808/531, loss: 0.010329034179449081 2023-01-22 16:55:59.094528: step: 1812/531, loss: 0.0024414118379354477 2023-01-22 16:56:00.145379: step: 1816/531, loss: 0.0004456086317077279 2023-01-22 16:56:01.219913: step: 1820/531, loss: 0.0027660930063575506 2023-01-22 16:56:02.282625: step: 1824/531, loss: 0.0038704487960785627 2023-01-22 16:56:03.343294: step: 1828/531, loss: 0.004264619667083025 2023-01-22 16:56:04.419343: step: 1832/531, loss: 0.013584275729954243 2023-01-22 16:56:05.474571: step: 1836/531, loss: 0.016197444871068 2023-01-22 16:56:06.526900: step: 1840/531, loss: 0.011844533495604992 2023-01-22 16:56:07.574753: step: 1844/531, loss: 0.002995867980644107 2023-01-22 16:56:08.647611: step: 1848/531, loss: 0.00017257625586353242 2023-01-22 16:56:09.698619: step: 1852/531, loss: 0.0005876217037439346 2023-01-22 16:56:10.749270: step: 1856/531, loss: 0.007146322168409824 2023-01-22 16:56:11.791194: step: 1860/531, loss: 0.00023343191423919052 2023-01-22 16:56:12.844870: step: 1864/531, loss: 0.0018423054134473205 2023-01-22 16:56:13.908345: step: 1868/531, loss: 0.014122666791081429 2023-01-22 16:56:14.973856: step: 1872/531, loss: 0.001166050205938518 2023-01-22 16:56:16.033893: step: 1876/531, loss: 0.008618973195552826 2023-01-22 16:56:17.096293: step: 1880/531, loss: 0.0017856524791568518 2023-01-22 16:56:18.177551: step: 1884/531, loss: 0.04140400514006615 2023-01-22 16:56:19.240906: step: 1888/531, loss: 0.007130743470042944 2023-01-22 16:56:20.304182: step: 1892/531, loss: 0.04196929559111595 2023-01-22 16:56:21.369228: step: 1896/531, loss: 0.0016301539726555347 2023-01-22 16:56:22.426695: step: 1900/531, loss: 0.009510564617812634 2023-01-22 16:56:23.481294: step: 1904/531, loss: 0.00044240913121029735 2023-01-22 16:56:24.542011: step: 1908/531, loss: 0.012046189978718758 2023-01-22 16:56:25.588095: step: 1912/531, loss: 0.00041518075158819556 2023-01-22 16:56:26.658064: step: 1916/531, loss: 0.027137255296111107 2023-01-22 16:56:27.694385: step: 1920/531, loss: 0.0004925797111354768 2023-01-22 16:56:28.745502: step: 1924/531, loss: 0.012253421358764172 2023-01-22 16:56:29.814279: step: 1928/531, loss: 0.02932673692703247 2023-01-22 16:56:30.860043: step: 1932/531, loss: 0.010897035710513592 2023-01-22 16:56:31.914026: step: 1936/531, loss: 0.0011757295578718185 2023-01-22 16:56:32.970637: step: 1940/531, loss: 0.0018360865069553256 2023-01-22 16:56:34.046934: step: 1944/531, loss: 0.007614095229655504 2023-01-22 16:56:35.118246: step: 1948/531, loss: 0.005120331887155771 2023-01-22 16:56:36.175847: step: 1952/531, loss: 0.0036779800429940224 2023-01-22 16:56:37.245525: step: 1956/531, loss: 0.003120720386505127 2023-01-22 16:56:38.295519: step: 1960/531, loss: 0.008584629744291306 2023-01-22 16:56:39.365711: step: 1964/531, loss: 0.005383949726819992 2023-01-22 16:56:40.424909: step: 1968/531, loss: 0.00524586858227849 2023-01-22 16:56:41.512122: step: 1972/531, loss: 0.015556536614894867 2023-01-22 16:56:42.590531: step: 1976/531, loss: 0.013057351112365723 2023-01-22 16:56:43.638518: step: 1980/531, loss: 0.021872470155358315 2023-01-22 16:56:44.705412: step: 1984/531, loss: 2.665207466634456e-05 2023-01-22 16:56:45.761881: step: 1988/531, loss: 0.000882698455825448 2023-01-22 16:56:46.820841: step: 1992/531, loss: 0.003634906839579344 2023-01-22 16:56:47.887796: step: 1996/531, loss: 0.004949311725795269 2023-01-22 16:56:48.942020: step: 2000/531, loss: 6.262212991714478e-05 2023-01-22 16:56:50.026062: step: 2004/531, loss: 0.09322883188724518 2023-01-22 16:56:51.102304: step: 2008/531, loss: 0.021210629492998123 2023-01-22 16:56:52.172129: step: 2012/531, loss: 0.006638068240135908 2023-01-22 16:56:53.227696: step: 2016/531, loss: 0.0001774926349753514 2023-01-22 16:56:54.276706: step: 2020/531, loss: 0.0022074251901358366 2023-01-22 16:56:55.342140: step: 2024/531, loss: 0.006805712357163429 2023-01-22 16:56:56.395839: step: 2028/531, loss: 0.005032597575336695 2023-01-22 16:56:57.454821: step: 2032/531, loss: 0.004065856337547302 2023-01-22 16:56:58.503023: step: 2036/531, loss: 0.0032912169117480516 2023-01-22 16:56:59.541883: step: 2040/531, loss: 0.002695470117032528 2023-01-22 16:57:00.598462: step: 2044/531, loss: 0.0031310562044382095 2023-01-22 16:57:01.654303: step: 2048/531, loss: 0.0027956843841820955 2023-01-22 16:57:02.715477: step: 2052/531, loss: 0.0020208486821502447 2023-01-22 16:57:03.762460: step: 2056/531, loss: 0.006803087890148163 2023-01-22 16:57:04.819440: step: 2060/531, loss: 0.005171677563339472 2023-01-22 16:57:05.863664: step: 2064/531, loss: 0.0025616742204874754 2023-01-22 16:57:06.917292: step: 2068/531, loss: 9.408283949596807e-05 2023-01-22 16:57:07.972622: step: 2072/531, loss: 0.0010668785544112325 2023-01-22 16:57:09.037393: step: 2076/531, loss: 0.022057106718420982 2023-01-22 16:57:10.108112: step: 2080/531, loss: 0.008383555337786674 2023-01-22 16:57:11.161449: step: 2084/531, loss: 0.0007936846814118326 2023-01-22 16:57:12.216052: step: 2088/531, loss: 1.5653886293875985e-05 2023-01-22 16:57:13.271443: step: 2092/531, loss: 0.009281976148486137 2023-01-22 16:57:14.344645: step: 2096/531, loss: 0.0028985398821532726 2023-01-22 16:57:15.398746: step: 2100/531, loss: 0.001012779655866325 2023-01-22 16:57:16.450117: step: 2104/531, loss: 0.013688012957572937 2023-01-22 16:57:17.498158: step: 2108/531, loss: 0.0008754542795941234 2023-01-22 16:57:18.555468: step: 2112/531, loss: 0.010695521719753742 2023-01-22 16:57:19.615376: step: 2116/531, loss: 0.005161258392035961 2023-01-22 16:57:20.675909: step: 2120/531, loss: 0.002305034315213561 2023-01-22 16:57:21.747789: step: 2124/531, loss: 0.03891216963529587 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35720376230661044, 'r': 0.3206715593434344, 'f1': 0.33795326014637395}, 'combined': 0.24901819168680184, 'stategy': 1, 'epoch': 11} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33822104286526034, 'r': 0.2783913277013695, 'f1': 0.30540355553774007}, 'combined': 0.19123400206568772, 'stategy': 1, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3333246930988719, 'r': 0.34660708124892187, 'f1': 0.3398361522198732}, 'combined': 0.2504055858462223, 'stategy': 1, 'epoch': 11} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36285943406207416, 'r': 0.3013467437070968, 'f1': 0.329254718001153}, 'combined': 0.2040451773528272, 'stategy': 1, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3204383519206939, 'r': 0.32712681846932323, 'f1': 0.32374804381846634}, 'combined': 0.2385511901820278, 'stategy': 1, 'epoch': 11} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36344070256057054, 'r': 0.2866158386046776, 'f1': 0.32048861953068497}, 'combined': 0.21260136147085043, 'stategy': 1, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.36607142857142855, 'r': 0.44565217391304346, 'f1': 0.4019607843137254}, 'combined': 0.2009803921568627, 'stategy': 1, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.27586206896551724, 'f1': 0.3404255319148936}, 'combined': 0.22695035460992907, 'stategy': 1, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35976479915433407, 'r': 0.3222892992424243, 'f1': 0.33999750249750255}, 'combined': 0.25052447552447554, 'stategy': 1, 'epoch': 6} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33494215352441215, 'r': 0.2738786923223081, 'f1': 0.3013481540150123}, 'combined': 0.18869463849538157, 'stategy': 1, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33436839323467227, 'r': 0.3546715973779541, 'f1': 0.34422086891009546}, 'combined': 0.2536364297232282, 'stategy': 1, 'epoch': 5} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36411638395148305, 'r': 0.30337559853126267, 'f1': 0.3309823303208759}, 'combined': 0.2051158103396977, 'stategy': 1, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37962962962962965, 'r': 0.44565217391304346, 'f1': 0.41000000000000003}, 'combined': 0.20500000000000002, 'stategy': 1, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.316460244648318, 'r': 0.32726913345983555, 'f1': 0.32177394278606963}, 'combined': 0.23709658942131445, 'stategy': 1, 'epoch': 5} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3637270714036404, 'r': 0.28684167419636686, 'f1': 0.32074114478321014}, 'combined': 0.21276887822252555, 'stategy': 1, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.27586206896551724, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 5} ****************************** Epoch: 12 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 16:59:48.005073: step: 4/531, loss: 0.0008389207650907338 2023-01-22 16:59:49.049124: step: 8/531, loss: 0.00010559849033597857 2023-01-22 16:59:50.084643: step: 12/531, loss: 0.0018369851168245077 2023-01-22 16:59:51.127246: step: 16/531, loss: 0.0007721724105067551 2023-01-22 16:59:52.198784: step: 20/531, loss: 0.0032571502961218357 2023-01-22 16:59:53.263300: step: 24/531, loss: 0.0036437639500945807 2023-01-22 16:59:54.308953: step: 28/531, loss: 2.4471442884532735e-05 2023-01-22 16:59:55.360178: step: 32/531, loss: 0.0033021809067577124 2023-01-22 16:59:56.425882: step: 36/531, loss: 0.004632710013538599 2023-01-22 16:59:57.470759: step: 40/531, loss: 0.005841181147843599 2023-01-22 16:59:58.528704: step: 44/531, loss: 5.735109152738005e-05 2023-01-22 16:59:59.592338: step: 48/531, loss: 0.024581611156463623 2023-01-22 17:00:00.646316: step: 52/531, loss: 0.0019269189797341824 2023-01-22 17:00:01.713086: step: 56/531, loss: 0.00784609466791153 2023-01-22 17:00:02.788011: step: 60/531, loss: 0.00300803454592824 2023-01-22 17:00:03.852696: step: 64/531, loss: 0.0009494698606431484 2023-01-22 17:00:04.901549: step: 68/531, loss: 9.220935316989198e-05 2023-01-22 17:00:05.977059: step: 72/531, loss: 0.003247142769396305 2023-01-22 17:00:07.024416: step: 76/531, loss: 2.4122944523696788e-05 2023-01-22 17:00:08.096768: step: 80/531, loss: 0.0016064064111560583 2023-01-22 17:00:09.157821: step: 84/531, loss: 0.0005471711046993732 2023-01-22 17:00:10.201389: step: 88/531, loss: 0.0022514851298183203 2023-01-22 17:00:11.255288: step: 92/531, loss: 0.009884671308100224 2023-01-22 17:00:12.338282: step: 96/531, loss: 0.00525251030921936 2023-01-22 17:00:13.393669: step: 100/531, loss: 0.0037255228962749243 2023-01-22 17:00:14.444142: step: 104/531, loss: 0.005992396734654903 2023-01-22 17:00:15.484233: step: 108/531, loss: 0.000234859500778839 2023-01-22 17:00:16.527124: step: 112/531, loss: 9.715954365674406e-05 2023-01-22 17:00:17.577589: step: 116/531, loss: 0.00032209724304266274 2023-01-22 17:00:18.625102: step: 120/531, loss: 0.008206389844417572 2023-01-22 17:00:19.675047: step: 124/531, loss: 0.0003823053266387433 2023-01-22 17:00:20.735249: step: 128/531, loss: 0.0051974281668663025 2023-01-22 17:00:21.795942: step: 132/531, loss: 0.004671670496463776 2023-01-22 17:00:22.861556: step: 136/531, loss: 0.004954180214554071 2023-01-22 17:00:23.920506: step: 140/531, loss: 0.004117222502827644 2023-01-22 17:00:24.982215: step: 144/531, loss: 0.005710092838853598 2023-01-22 17:00:26.037104: step: 148/531, loss: 0.007301541045308113 2023-01-22 17:00:27.099155: step: 152/531, loss: 0.007987217977643013 2023-01-22 17:00:28.160888: step: 156/531, loss: 0.0036173320841044188 2023-01-22 17:00:29.220800: step: 160/531, loss: 0.030461503192782402 2023-01-22 17:00:30.268368: step: 164/531, loss: 0.009336454793810844 2023-01-22 17:00:31.313370: step: 168/531, loss: 0.0008688546367920935 2023-01-22 17:00:32.350549: step: 172/531, loss: 0.00035778165329247713 2023-01-22 17:00:33.412277: step: 176/531, loss: 0.0039122882299125195 2023-01-22 17:00:34.467402: step: 180/531, loss: 0.010806133970618248 2023-01-22 17:00:35.508617: step: 184/531, loss: 0.00023036330821923912 2023-01-22 17:00:36.560807: step: 188/531, loss: 0.03970678895711899 2023-01-22 17:00:37.631331: step: 192/531, loss: 0.008071407675743103 2023-01-22 17:00:38.687232: step: 196/531, loss: 0.002907214453443885 2023-01-22 17:00:39.735088: step: 200/531, loss: 0.03283369168639183 2023-01-22 17:00:40.803001: step: 204/531, loss: 0.00859660841524601 2023-01-22 17:00:41.878787: step: 208/531, loss: 0.0019820162560790777 2023-01-22 17:00:42.924937: step: 212/531, loss: 0.004488769453018904 2023-01-22 17:00:43.986012: step: 216/531, loss: 0.003945598378777504 2023-01-22 17:00:45.022291: step: 220/531, loss: 0.0001054470703820698 2023-01-22 17:00:46.081566: step: 224/531, loss: 0.001361457398161292 2023-01-22 17:00:47.151706: step: 228/531, loss: 0.0018045916222035885 2023-01-22 17:00:48.215643: step: 232/531, loss: 3.198215927113779e-05 2023-01-22 17:00:49.276634: step: 236/531, loss: 0.0011877354700118303 2023-01-22 17:00:50.337006: step: 240/531, loss: 0.0027856742963194847 2023-01-22 17:00:51.383038: step: 244/531, loss: 0.001992375124245882 2023-01-22 17:00:52.436129: step: 248/531, loss: 0.007944031618535519 2023-01-22 17:00:53.491968: step: 252/531, loss: 0.006351663265377283 2023-01-22 17:00:54.547849: step: 256/531, loss: 0.015617218799889088 2023-01-22 17:00:55.633118: step: 260/531, loss: 6.317496445262805e-05 2023-01-22 17:00:56.685088: step: 264/531, loss: 0.001909082755446434 2023-01-22 17:00:57.732305: step: 268/531, loss: 0.0045850686728954315 2023-01-22 17:00:58.800648: step: 272/531, loss: 0.00044367933878675103 2023-01-22 17:00:59.846482: step: 276/531, loss: 0.008906870149075985 2023-01-22 17:01:00.909007: step: 280/531, loss: 0.003220190526917577 2023-01-22 17:01:01.972742: step: 284/531, loss: 0.0033500271383672953 2023-01-22 17:01:03.039582: step: 288/531, loss: 0.0001335518463747576 2023-01-22 17:01:04.103123: step: 292/531, loss: 0.004304062575101852 2023-01-22 17:01:05.155841: step: 296/531, loss: 0.008509303443133831 2023-01-22 17:01:06.214401: step: 300/531, loss: 0.016502907499670982 2023-01-22 17:01:07.283856: step: 304/531, loss: 0.0036851379554718733 2023-01-22 17:01:08.353092: step: 308/531, loss: 0.0066000730730593204 2023-01-22 17:01:09.431702: step: 312/531, loss: 0.006111239083111286 2023-01-22 17:01:10.497268: step: 316/531, loss: 0.03327450156211853 2023-01-22 17:01:11.547494: step: 320/531, loss: 0.006460524629801512 2023-01-22 17:01:12.640143: step: 324/531, loss: 0.0033953215461224318 2023-01-22 17:01:13.716080: step: 328/531, loss: 0.01261853240430355 2023-01-22 17:01:14.792563: step: 332/531, loss: 0.00012264428369235247 2023-01-22 17:01:15.857872: step: 336/531, loss: 0.002265886403620243 2023-01-22 17:01:16.913089: step: 340/531, loss: 0.007583374623209238 2023-01-22 17:01:17.974335: step: 344/531, loss: 0.006667454726994038 2023-01-22 17:01:19.044433: step: 348/531, loss: 0.004515505861490965 2023-01-22 17:01:20.104192: step: 352/531, loss: 0.005921052768826485 2023-01-22 17:01:21.174751: step: 356/531, loss: 0.002086668973788619 2023-01-22 17:01:22.229092: step: 360/531, loss: 0.00901003647595644 2023-01-22 17:01:23.292438: step: 364/531, loss: 0.004519919864833355 2023-01-22 17:01:24.345286: step: 368/531, loss: 0.004887619987130165 2023-01-22 17:01:25.406739: step: 372/531, loss: 0.002692396519705653 2023-01-22 17:01:26.461919: step: 376/531, loss: 0.0013424503849819303 2023-01-22 17:01:27.512559: step: 380/531, loss: 0.0012260869843885303 2023-01-22 17:01:28.579210: step: 384/531, loss: 0.008952656760811806 2023-01-22 17:01:29.656376: step: 388/531, loss: 0.006157420109957457 2023-01-22 17:01:30.717836: step: 392/531, loss: 0.00423513213172555 2023-01-22 17:01:31.786541: step: 396/531, loss: 0.003380093490704894 2023-01-22 17:01:32.844319: step: 400/531, loss: 0.004202052019536495 2023-01-22 17:01:33.893921: step: 404/531, loss: 0.0025893016718328 2023-01-22 17:01:34.941663: step: 408/531, loss: 0.0006840350106358528 2023-01-22 17:01:36.017066: step: 412/531, loss: 0.007730729412287474 2023-01-22 17:01:37.064692: step: 416/531, loss: 0.00012970188981853426 2023-01-22 17:01:38.134008: step: 420/531, loss: 0.00041116910870186985 2023-01-22 17:01:39.192863: step: 424/531, loss: 0.03476627543568611 2023-01-22 17:01:40.282435: step: 428/531, loss: 3.2617645047139376e-05 2023-01-22 17:01:41.357142: step: 432/531, loss: 0.0032686570193618536 2023-01-22 17:01:42.429234: step: 436/531, loss: 0.009400025941431522 2023-01-22 17:01:43.476600: step: 440/531, loss: 0.0042421636171638966 2023-01-22 17:01:44.549033: step: 444/531, loss: 0.004297575913369656 2023-01-22 17:01:45.615051: step: 448/531, loss: 0.001137967687100172 2023-01-22 17:01:46.677737: step: 452/531, loss: 0.004794553853571415 2023-01-22 17:01:47.739605: step: 456/531, loss: 0.005618246737867594 2023-01-22 17:01:48.802180: step: 460/531, loss: 0.010654338635504246 2023-01-22 17:01:49.870948: step: 464/531, loss: 4.0400300349574536e-05 2023-01-22 17:01:50.933545: step: 468/531, loss: 0.005780503153800964 2023-01-22 17:01:52.008849: step: 472/531, loss: 0.001814171439036727 2023-01-22 17:01:53.081263: step: 476/531, loss: 0.003135963575914502 2023-01-22 17:01:54.139208: step: 480/531, loss: 0.00317192985676229 2023-01-22 17:01:55.218949: step: 484/531, loss: 0.0002823463291861117 2023-01-22 17:01:56.272484: step: 488/531, loss: 0.0007620081305503845 2023-01-22 17:01:57.332188: step: 492/531, loss: 0.0005408728611655533 2023-01-22 17:01:58.392064: step: 496/531, loss: 0.001419361331500113 2023-01-22 17:01:59.476307: step: 500/531, loss: 0.018071437254548073 2023-01-22 17:02:00.553345: step: 504/531, loss: 0.0003890968509949744 2023-01-22 17:02:01.601961: step: 508/531, loss: 0.0028337924741208553 2023-01-22 17:02:02.669165: step: 512/531, loss: 0.0005585855687968433 2023-01-22 17:02:03.734981: step: 516/531, loss: 0.001072737155482173 2023-01-22 17:02:04.798482: step: 520/531, loss: 0.004423412028700113 2023-01-22 17:02:05.854522: step: 524/531, loss: 0.009205316193401814 2023-01-22 17:02:06.917043: step: 528/531, loss: 0.00017672343528829515 2023-01-22 17:02:07.987108: step: 532/531, loss: 0.025930123403668404 2023-01-22 17:02:09.045050: step: 536/531, loss: 0.02773277275264263 2023-01-22 17:02:10.112063: step: 540/531, loss: 0.0035834801383316517 2023-01-22 17:02:11.184726: step: 544/531, loss: 0.014484859071671963 2023-01-22 17:02:12.258645: step: 548/531, loss: 0.01031828485429287 2023-01-22 17:02:13.325895: step: 552/531, loss: 0.034110959619283676 2023-01-22 17:02:14.382611: step: 556/531, loss: 0.004532115533947945 2023-01-22 17:02:15.444607: step: 560/531, loss: 6.901539018144831e-05 2023-01-22 17:02:16.524567: step: 564/531, loss: 0.0022416478022933006 2023-01-22 17:02:17.591689: step: 568/531, loss: 0.00440360838547349 2023-01-22 17:02:18.657256: step: 572/531, loss: 0.012465021573007107 2023-01-22 17:02:19.720799: step: 576/531, loss: 0.0024797343648970127 2023-01-22 17:02:20.799904: step: 580/531, loss: 0.0025166908744722605 2023-01-22 17:02:21.877773: step: 584/531, loss: 0.0018173493444919586 2023-01-22 17:02:22.947173: step: 588/531, loss: 0.003276263130828738 2023-01-22 17:02:24.029443: step: 592/531, loss: 0.010924028232693672 2023-01-22 17:02:25.085185: step: 596/531, loss: 0.0006664815009571612 2023-01-22 17:02:26.146659: step: 600/531, loss: 0.0035071498714387417 2023-01-22 17:02:27.206609: step: 604/531, loss: 0.0025345301255583763 2023-01-22 17:02:28.277993: step: 608/531, loss: 0.006584506947547197 2023-01-22 17:02:29.341233: step: 612/531, loss: 0.002991733141243458 2023-01-22 17:02:30.410840: step: 616/531, loss: 0.0026069479063153267 2023-01-22 17:02:31.466998: step: 620/531, loss: 0.006710594519972801 2023-01-22 17:02:32.531447: step: 624/531, loss: 0.002223886549472809 2023-01-22 17:02:33.616079: step: 628/531, loss: 6.38153069303371e-05 2023-01-22 17:02:34.690463: step: 632/531, loss: 0.0061606005765497684 2023-01-22 17:02:35.760829: step: 636/531, loss: 0.007872034795582294 2023-01-22 17:02:36.830373: step: 640/531, loss: 0.002794067608192563 2023-01-22 17:02:37.893393: step: 644/531, loss: 0.0005641243769787252 2023-01-22 17:02:38.952635: step: 648/531, loss: 0.003788859350606799 2023-01-22 17:02:40.026170: step: 652/531, loss: 0.003150087548419833 2023-01-22 17:02:41.107298: step: 656/531, loss: 0.0016968734562397003 2023-01-22 17:02:42.189898: step: 660/531, loss: 0.003051474690437317 2023-01-22 17:02:43.255929: step: 664/531, loss: 0.0004801126488018781 2023-01-22 17:02:44.313463: step: 668/531, loss: 0.00017927706358022988 2023-01-22 17:02:45.385692: step: 672/531, loss: 0.0018251375295221806 2023-01-22 17:02:46.459554: step: 676/531, loss: 0.008766816928982735 2023-01-22 17:02:47.511309: step: 680/531, loss: 0.010923833586275578 2023-01-22 17:02:48.582057: step: 684/531, loss: 0.0032276224810630083 2023-01-22 17:02:49.634975: step: 688/531, loss: 0.0002460060059092939 2023-01-22 17:02:50.709523: step: 692/531, loss: 0.0022312605287879705 2023-01-22 17:02:51.787973: step: 696/531, loss: 0.0036463169381022453 2023-01-22 17:02:52.845461: step: 700/531, loss: 0.02077023684978485 2023-01-22 17:02:53.917938: step: 704/531, loss: 0.0012957289582118392 2023-01-22 17:02:54.991562: step: 708/531, loss: 0.00034273412893526256 2023-01-22 17:02:56.060764: step: 712/531, loss: 0.002293934812769294 2023-01-22 17:02:57.119044: step: 716/531, loss: 0.007038906216621399 2023-01-22 17:02:58.169472: step: 720/531, loss: 0.008487860672175884 2023-01-22 17:02:59.216169: step: 724/531, loss: 0.004350061994045973 2023-01-22 17:03:00.287643: step: 728/531, loss: 0.0016962428344413638 2023-01-22 17:03:01.359077: step: 732/531, loss: 0.0033008086029440165 2023-01-22 17:03:02.424197: step: 736/531, loss: 0.004522221628576517 2023-01-22 17:03:03.478545: step: 740/531, loss: 0.003127848030999303 2023-01-22 17:03:04.542166: step: 744/531, loss: 0.0016203239792957902 2023-01-22 17:03:05.605643: step: 748/531, loss: 0.006861977744847536 2023-01-22 17:03:06.665508: step: 752/531, loss: 0.0016648167511448264 2023-01-22 17:03:07.736336: step: 756/531, loss: 0.0 2023-01-22 17:03:08.810449: step: 760/531, loss: 0.0020345740485936403 2023-01-22 17:03:09.912291: step: 764/531, loss: 0.0018157161539420485 2023-01-22 17:03:10.965553: step: 768/531, loss: 0.0 2023-01-22 17:03:12.022445: step: 772/531, loss: 0.000396099901990965 2023-01-22 17:03:13.086841: step: 776/531, loss: 0.003001901088282466 2023-01-22 17:03:14.153671: step: 780/531, loss: 0.001074808300472796 2023-01-22 17:03:15.219846: step: 784/531, loss: 0.002999604679644108 2023-01-22 17:03:16.286945: step: 788/531, loss: 0.004084710497409105 2023-01-22 17:03:17.348961: step: 792/531, loss: 0.002436148002743721 2023-01-22 17:03:18.409465: step: 796/531, loss: 0.0019201913382858038 2023-01-22 17:03:19.476605: step: 800/531, loss: 0.0017997156828641891 2023-01-22 17:03:20.539066: step: 804/531, loss: 0.0024946166668087244 2023-01-22 17:03:21.615102: step: 808/531, loss: 0.0011476814979687333 2023-01-22 17:03:22.691388: step: 812/531, loss: 0.0011169894132763147 2023-01-22 17:03:23.744683: step: 816/531, loss: 0.006832829676568508 2023-01-22 17:03:24.805990: step: 820/531, loss: 0.013472024351358414 2023-01-22 17:03:25.865853: step: 824/531, loss: 0.0006732032052241266 2023-01-22 17:03:26.946094: step: 828/531, loss: 0.008710733614861965 2023-01-22 17:03:28.011995: step: 832/531, loss: 0.0006531117833219469 2023-01-22 17:03:29.080188: step: 836/531, loss: 0.0008852792088873684 2023-01-22 17:03:30.158631: step: 840/531, loss: 0.0002344071981497109 2023-01-22 17:03:31.205095: step: 844/531, loss: 0.004317728336900473 2023-01-22 17:03:32.275276: step: 848/531, loss: 0.0032241877634078264 2023-01-22 17:03:33.326185: step: 852/531, loss: 0.0013686273014172912 2023-01-22 17:03:34.393647: step: 856/531, loss: 0.002086965600028634 2023-01-22 17:03:35.453704: step: 860/531, loss: 0.0011500888504087925 2023-01-22 17:03:36.510695: step: 864/531, loss: 0.000752498337533325 2023-01-22 17:03:37.580364: step: 868/531, loss: 0.004230258986353874 2023-01-22 17:03:38.632720: step: 872/531, loss: 0.0018112084362655878 2023-01-22 17:03:39.702535: step: 876/531, loss: 0.001974749844521284 2023-01-22 17:03:40.757858: step: 880/531, loss: 0.0009569660178385675 2023-01-22 17:03:41.828708: step: 884/531, loss: 0.005202190019190311 2023-01-22 17:03:42.907336: step: 888/531, loss: 0.008720196783542633 2023-01-22 17:03:43.965619: step: 892/531, loss: 0.0006743905250914395 2023-01-22 17:03:45.026064: step: 896/531, loss: 0.0031865478958934546 2023-01-22 17:03:46.078233: step: 900/531, loss: 0.0004073894815519452 2023-01-22 17:03:47.128722: step: 904/531, loss: 0.003885938785970211 2023-01-22 17:03:48.167749: step: 908/531, loss: 0.004631747957319021 2023-01-22 17:03:49.217771: step: 912/531, loss: 0.004460339434444904 2023-01-22 17:03:50.280043: step: 916/531, loss: 0.0013200478861108422 2023-01-22 17:03:51.331874: step: 920/531, loss: 0.00246457033790648 2023-01-22 17:03:52.408676: step: 924/531, loss: 5.09754208906088e-05 2023-01-22 17:03:53.467664: step: 928/531, loss: 0.0022536313626915216 2023-01-22 17:03:54.530748: step: 932/531, loss: 0.007142882794141769 2023-01-22 17:03:55.572802: step: 936/531, loss: 0.0013322465820237994 2023-01-22 17:03:56.644555: step: 940/531, loss: 0.0016735511599108577 2023-01-22 17:03:57.704655: step: 944/531, loss: 0.00809379480779171 2023-01-22 17:03:58.766216: step: 948/531, loss: 0.002075192518532276 2023-01-22 17:03:59.816588: step: 952/531, loss: 1.3628292435896583e-05 2023-01-22 17:04:00.859172: step: 956/531, loss: 0.0 2023-01-22 17:04:01.911065: step: 960/531, loss: 0.0025110712740570307 2023-01-22 17:04:02.964078: step: 964/531, loss: 0.0028623335529118776 2023-01-22 17:04:04.021039: step: 968/531, loss: 0.0001212401402881369 2023-01-22 17:04:05.083829: step: 972/531, loss: 0.006787054240703583 2023-01-22 17:04:06.174868: step: 976/531, loss: 0.004579642787575722 2023-01-22 17:04:07.250811: step: 980/531, loss: 0.003262066747993231 2023-01-22 17:04:08.303221: step: 984/531, loss: 0.0024431669153273106 2023-01-22 17:04:09.369242: step: 988/531, loss: 3.8667440094286576e-05 2023-01-22 17:04:10.427989: step: 992/531, loss: 0.004330071620643139 2023-01-22 17:04:11.486817: step: 996/531, loss: 0.0027935367543250322 2023-01-22 17:04:12.585203: step: 1000/531, loss: 0.0020482842810451984 2023-01-22 17:04:13.642241: step: 1004/531, loss: 0.004700318910181522 2023-01-22 17:04:14.709057: step: 1008/531, loss: 0.016613643616437912 2023-01-22 17:04:15.768500: step: 1012/531, loss: 0.0017231876263394952 2023-01-22 17:04:16.830900: step: 1016/531, loss: 0.003956877160817385 2023-01-22 17:04:17.890446: step: 1020/531, loss: 0.0001247296022484079 2023-01-22 17:04:18.971342: step: 1024/531, loss: 0.009423875249922276 2023-01-22 17:04:20.035374: step: 1028/531, loss: 0.00022268889006227255 2023-01-22 17:04:21.096771: step: 1032/531, loss: 0.00044816909939981997 2023-01-22 17:04:22.195100: step: 1036/531, loss: 0.0003109208191744983 2023-01-22 17:04:23.252793: step: 1040/531, loss: 0.0025388181675225496 2023-01-22 17:04:24.319216: step: 1044/531, loss: 0.0005637683789245784 2023-01-22 17:04:25.390816: step: 1048/531, loss: 0.005272428505122662 2023-01-22 17:04:26.446124: step: 1052/531, loss: 0.005151810590177774 2023-01-22 17:04:27.510171: step: 1056/531, loss: 0.003676653141155839 2023-01-22 17:04:28.567596: step: 1060/531, loss: 0.001526866341009736 2023-01-22 17:04:29.620189: step: 1064/531, loss: 0.018764158710837364 2023-01-22 17:04:30.700617: step: 1068/531, loss: 0.0035187965258955956 2023-01-22 17:04:31.766620: step: 1072/531, loss: 0.0025187255814671516 2023-01-22 17:04:32.823695: step: 1076/531, loss: 0.0004637420061044395 2023-01-22 17:04:33.876223: step: 1080/531, loss: 0.009744417853653431 2023-01-22 17:04:34.958972: step: 1084/531, loss: 0.023237161338329315 2023-01-22 17:04:36.019754: step: 1088/531, loss: 0.0015469287754967809 2023-01-22 17:04:37.089975: step: 1092/531, loss: 0.002548233373090625 2023-01-22 17:04:38.143515: step: 1096/531, loss: 0.0011249580420553684 2023-01-22 17:04:39.192909: step: 1100/531, loss: 0.21845102310180664 2023-01-22 17:04:40.260250: step: 1104/531, loss: 0.00193855632096529 2023-01-22 17:04:41.314353: step: 1108/531, loss: 0.0017726629739627242 2023-01-22 17:04:42.371616: step: 1112/531, loss: 0.0059312013909220695 2023-01-22 17:04:43.423451: step: 1116/531, loss: 0.005972353741526604 2023-01-22 17:04:44.481208: step: 1120/531, loss: 0.008372385054826736 2023-01-22 17:04:45.552619: step: 1124/531, loss: 0.006263169925659895 2023-01-22 17:04:46.615156: step: 1128/531, loss: 0.006859087385237217 2023-01-22 17:04:47.662082: step: 1132/531, loss: 0.0002497813547961414 2023-01-22 17:04:48.715745: step: 1136/531, loss: 0.0027852808125317097 2023-01-22 17:04:49.769699: step: 1140/531, loss: 0.004294695798307657 2023-01-22 17:04:50.826047: step: 1144/531, loss: 0.008168441243469715 2023-01-22 17:04:51.880684: step: 1148/531, loss: 0.0003930634702555835 2023-01-22 17:04:52.957881: step: 1152/531, loss: 0.0011404338292777538 2023-01-22 17:04:54.017746: step: 1156/531, loss: 0.002245817333459854 2023-01-22 17:04:55.092670: step: 1160/531, loss: 0.007948026061058044 2023-01-22 17:04:56.159932: step: 1164/531, loss: 0.00342952786013484 2023-01-22 17:04:57.210872: step: 1168/531, loss: 0.0018263505771756172 2023-01-22 17:04:58.258570: step: 1172/531, loss: 0.002272512298077345 2023-01-22 17:04:59.313710: step: 1176/531, loss: 0.004708748310804367 2023-01-22 17:05:00.369148: step: 1180/531, loss: 0.0013651384506374598 2023-01-22 17:05:01.426968: step: 1184/531, loss: 0.014489850960671902 2023-01-22 17:05:02.476856: step: 1188/531, loss: 0.007809903007000685 2023-01-22 17:05:03.538050: step: 1192/531, loss: 0.006919751409441233 2023-01-22 17:05:04.600402: step: 1196/531, loss: 0.0029774876311421394 2023-01-22 17:05:05.668860: step: 1200/531, loss: 0.0022975176107138395 2023-01-22 17:05:06.732193: step: 1204/531, loss: 0.0176263228058815 2023-01-22 17:05:07.799402: step: 1208/531, loss: 0.005035478621721268 2023-01-22 17:05:08.855248: step: 1212/531, loss: 0.0012818221002817154 2023-01-22 17:05:09.914221: step: 1216/531, loss: 0.002564843511208892 2023-01-22 17:05:10.969070: step: 1220/531, loss: 0.0016376414569094777 2023-01-22 17:05:12.022568: step: 1224/531, loss: 0.003441035747528076 2023-01-22 17:05:13.087143: step: 1228/531, loss: 0.008165335282683372 2023-01-22 17:05:14.143293: step: 1232/531, loss: 0.004367683548480272 2023-01-22 17:05:15.203414: step: 1236/531, loss: 0.0035028522834181786 2023-01-22 17:05:16.250465: step: 1240/531, loss: 0.0060541522689163685 2023-01-22 17:05:17.307271: step: 1244/531, loss: 0.0040772161446511745 2023-01-22 17:05:18.362195: step: 1248/531, loss: 0.0019783724565058947 2023-01-22 17:05:19.407845: step: 1252/531, loss: 0.0003578769392333925 2023-01-22 17:05:20.460339: step: 1256/531, loss: 0.013649380765855312 2023-01-22 17:05:21.516319: step: 1260/531, loss: 0.0005524872685782611 2023-01-22 17:05:22.568713: step: 1264/531, loss: 0.003236167598515749 2023-01-22 17:05:23.638146: step: 1268/531, loss: 0.012535768561065197 2023-01-22 17:05:24.698994: step: 1272/531, loss: 0.003931407816708088 2023-01-22 17:05:25.756815: step: 1276/531, loss: 0.0002936648088507354 2023-01-22 17:05:26.811485: step: 1280/531, loss: 3.521692269714549e-05 2023-01-22 17:05:27.865565: step: 1284/531, loss: 0.0012437583645805717 2023-01-22 17:05:28.912181: step: 1288/531, loss: 0.001574097783304751 2023-01-22 17:05:29.967898: step: 1292/531, loss: 0.0031973973382264376 2023-01-22 17:05:31.030863: step: 1296/531, loss: 0.0008955801604315639 2023-01-22 17:05:32.077325: step: 1300/531, loss: 0.003448003903031349 2023-01-22 17:05:33.139069: step: 1304/531, loss: 0.0018009128980338573 2023-01-22 17:05:34.185698: step: 1308/531, loss: 0.0025702917482703924 2023-01-22 17:05:35.232154: step: 1312/531, loss: 0.0035841939970850945 2023-01-22 17:05:36.281482: step: 1316/531, loss: 0.000988248037174344 2023-01-22 17:05:37.330738: step: 1320/531, loss: 0.0007358400616794825 2023-01-22 17:05:38.387411: step: 1324/531, loss: 0.00043001319863833487 2023-01-22 17:05:39.450053: step: 1328/531, loss: 0.002343202941119671 2023-01-22 17:05:40.501335: step: 1332/531, loss: 0.003015042282640934 2023-01-22 17:05:41.566147: step: 1336/531, loss: 0.0067059556022286415 2023-01-22 17:05:42.643137: step: 1340/531, loss: 0.0016357195563614368 2023-01-22 17:05:43.692888: step: 1344/531, loss: 0.0002781250514090061 2023-01-22 17:05:44.746136: step: 1348/531, loss: 0.010305825620889664 2023-01-22 17:05:45.816756: step: 1352/531, loss: 0.008523844182491302 2023-01-22 17:05:46.895322: step: 1356/531, loss: 0.0016000947216525674 2023-01-22 17:05:47.951326: step: 1360/531, loss: 0.0007851168629713356 2023-01-22 17:05:49.013244: step: 1364/531, loss: 0.0037974438164383173 2023-01-22 17:05:50.059342: step: 1368/531, loss: 0.01731755957007408 2023-01-22 17:05:51.133772: step: 1372/531, loss: 0.004646297078579664 2023-01-22 17:05:52.188286: step: 1376/531, loss: 0.04282676428556442 2023-01-22 17:05:53.260932: step: 1380/531, loss: 0.002410672605037689 2023-01-22 17:05:54.321255: step: 1384/531, loss: 0.008816739544272423 2023-01-22 17:05:55.395110: step: 1388/531, loss: 0.006698832847177982 2023-01-22 17:05:56.461987: step: 1392/531, loss: 0.007065494079142809 2023-01-22 17:05:57.520349: step: 1396/531, loss: 0.0043277801014482975 2023-01-22 17:05:58.579647: step: 1400/531, loss: 0.009176273830235004 2023-01-22 17:05:59.624268: step: 1404/531, loss: 0.0016439496539533138 2023-01-22 17:06:00.674287: step: 1408/531, loss: 5.334922661859309e-06 2023-01-22 17:06:01.732090: step: 1412/531, loss: 0.0016743950545787811 2023-01-22 17:06:02.782760: step: 1416/531, loss: 0.009108916856348515 2023-01-22 17:06:03.846564: step: 1420/531, loss: 0.0007434654980897903 2023-01-22 17:06:04.899734: step: 1424/531, loss: 0.0014274800196290016 2023-01-22 17:06:05.949088: step: 1428/531, loss: 0.0015580813633278012 2023-01-22 17:06:07.004725: step: 1432/531, loss: 0.002763007767498493 2023-01-22 17:06:08.056804: step: 1436/531, loss: 0.0046341298148036 2023-01-22 17:06:09.103669: step: 1440/531, loss: 0.009754196740686893 2023-01-22 17:06:10.174084: step: 1444/531, loss: 0.005951341707259417 2023-01-22 17:06:11.220059: step: 1448/531, loss: 0.0016500568017363548 2023-01-22 17:06:12.297554: step: 1452/531, loss: 0.032500818371772766 2023-01-22 17:06:13.348846: step: 1456/531, loss: 0.0002141181903425604 2023-01-22 17:06:14.397595: step: 1460/531, loss: 0.003507954766973853 2023-01-22 17:06:15.455775: step: 1464/531, loss: 0.016222190111875534 2023-01-22 17:06:16.502248: step: 1468/531, loss: 0.0016991709126159549 2023-01-22 17:06:17.570309: step: 1472/531, loss: 0.0007061606738716364 2023-01-22 17:06:18.633686: step: 1476/531, loss: 0.010753964073956013 2023-01-22 17:06:19.686637: step: 1480/531, loss: 0.0027052199002355337 2023-01-22 17:06:20.740171: step: 1484/531, loss: 0.0032335391733795404 2023-01-22 17:06:21.783722: step: 1488/531, loss: 0.0012604587245732546 2023-01-22 17:06:22.851136: step: 1492/531, loss: 0.0024454377125948668 2023-01-22 17:06:23.908096: step: 1496/531, loss: 0.0001314998953603208 2023-01-22 17:06:24.971869: step: 1500/531, loss: 0.0002466514997649938 2023-01-22 17:06:26.039735: step: 1504/531, loss: 0.025304365903139114 2023-01-22 17:06:27.115779: step: 1508/531, loss: 0.007147661410272121 2023-01-22 17:06:28.175181: step: 1512/531, loss: 0.002998407930135727 2023-01-22 17:06:29.228770: step: 1516/531, loss: 0.0029829959385097027 2023-01-22 17:06:30.299205: step: 1520/531, loss: 0.0007618461386300623 2023-01-22 17:06:31.360706: step: 1524/531, loss: 0.0012937746942043304 2023-01-22 17:06:32.412733: step: 1528/531, loss: 0.010385893285274506 2023-01-22 17:06:33.470647: step: 1532/531, loss: 0.0027797003276646137 2023-01-22 17:06:34.542574: step: 1536/531, loss: 7.323131285374984e-05 2023-01-22 17:06:35.598788: step: 1540/531, loss: 0.013378430157899857 2023-01-22 17:06:36.661781: step: 1544/531, loss: 0.007806778885424137 2023-01-22 17:06:37.701105: step: 1548/531, loss: 0.0008553255465812981 2023-01-22 17:06:38.748123: step: 1552/531, loss: 0.0055023678578436375 2023-01-22 17:06:39.798852: step: 1556/531, loss: 0.0005368198035284877 2023-01-22 17:06:40.854140: step: 1560/531, loss: 0.0013809683732688427 2023-01-22 17:06:41.934923: step: 1564/531, loss: 0.036582279950380325 2023-01-22 17:06:42.999707: step: 1568/531, loss: 0.00589761883020401 2023-01-22 17:06:44.068161: step: 1572/531, loss: 0.00021944929903838784 2023-01-22 17:06:45.113597: step: 1576/531, loss: 0.00018480616563465446 2023-01-22 17:06:46.177377: step: 1580/531, loss: 0.003983218688517809 2023-01-22 17:06:47.216386: step: 1584/531, loss: 9.13636467885226e-05 2023-01-22 17:06:48.267399: step: 1588/531, loss: 0.0002771069994196296 2023-01-22 17:06:49.336470: step: 1592/531, loss: 0.005496619734913111 2023-01-22 17:06:50.400956: step: 1596/531, loss: 0.004821850918233395 2023-01-22 17:06:51.470352: step: 1600/531, loss: 0.0012605211231857538 2023-01-22 17:06:52.521003: step: 1604/531, loss: 0.002098711906000972 2023-01-22 17:06:53.571642: step: 1608/531, loss: 0.003607384394854307 2023-01-22 17:06:54.617050: step: 1612/531, loss: 0.001945766736753285 2023-01-22 17:06:55.672663: step: 1616/531, loss: 0.009840852580964565 2023-01-22 17:06:56.751009: step: 1620/531, loss: 0.006122017744928598 2023-01-22 17:06:57.816114: step: 1624/531, loss: 0.005689968355000019 2023-01-22 17:06:58.867834: step: 1628/531, loss: 0.017472121864557266 2023-01-22 17:06:59.957832: step: 1632/531, loss: 0.026586653664708138 2023-01-22 17:07:01.006566: step: 1636/531, loss: 0.0009049536311067641 2023-01-22 17:07:02.065474: step: 1640/531, loss: 0.004547577351331711 2023-01-22 17:07:03.118355: step: 1644/531, loss: 0.0004844518261961639 2023-01-22 17:07:04.172711: step: 1648/531, loss: 0.01065845601260662 2023-01-22 17:07:05.231018: step: 1652/531, loss: 0.0036239465698599815 2023-01-22 17:07:06.291128: step: 1656/531, loss: 0.0030450609046965837 2023-01-22 17:07:07.347625: step: 1660/531, loss: 0.001044715172611177 2023-01-22 17:07:08.420162: step: 1664/531, loss: 0.0013334108516573906 2023-01-22 17:07:09.469243: step: 1668/531, loss: 0.006684655789285898 2023-01-22 17:07:10.542192: step: 1672/531, loss: 0.004914723336696625 2023-01-22 17:07:11.595344: step: 1676/531, loss: 0.0013926565879955888 2023-01-22 17:07:12.673951: step: 1680/531, loss: 0.033084314316511154 2023-01-22 17:07:13.734563: step: 1684/531, loss: 0.009538733400404453 2023-01-22 17:07:14.784234: step: 1688/531, loss: 9.470694931223989e-05 2023-01-22 17:07:15.822915: step: 1692/531, loss: 0.003146019298583269 2023-01-22 17:07:16.895121: step: 1696/531, loss: 0.003303182777017355 2023-01-22 17:07:17.955471: step: 1700/531, loss: 0.001183894113637507 2023-01-22 17:07:19.022084: step: 1704/531, loss: 0.004848706070333719 2023-01-22 17:07:20.080367: step: 1708/531, loss: 0.07924825698137283 2023-01-22 17:07:21.153234: step: 1712/531, loss: 0.005933256819844246 2023-01-22 17:07:22.219406: step: 1716/531, loss: 0.003230552189052105 2023-01-22 17:07:23.296425: step: 1720/531, loss: 0.0013347519561648369 2023-01-22 17:07:24.337348: step: 1724/531, loss: 0.000424595782533288 2023-01-22 17:07:25.392971: step: 1728/531, loss: 0.0002757837646640837 2023-01-22 17:07:26.475030: step: 1732/531, loss: 0.018341783434152603 2023-01-22 17:07:27.529467: step: 1736/531, loss: 0.007935325615108013 2023-01-22 17:07:28.574628: step: 1740/531, loss: 0.009496736340224743 2023-01-22 17:07:29.619467: step: 1744/531, loss: 0.003631721716374159 2023-01-22 17:07:30.670787: step: 1748/531, loss: 0.004995123017579317 2023-01-22 17:07:31.730549: step: 1752/531, loss: 0.002949915360659361 2023-01-22 17:07:32.806375: step: 1756/531, loss: 0.002674936316907406 2023-01-22 17:07:33.864232: step: 1760/531, loss: 0.002306596841663122 2023-01-22 17:07:34.914810: step: 1764/531, loss: 0.0006740200333297253 2023-01-22 17:07:35.960303: step: 1768/531, loss: 0.000765511707868427 2023-01-22 17:07:37.010356: step: 1772/531, loss: 0.0007559226942248642 2023-01-22 17:07:38.058144: step: 1776/531, loss: 0.008602048270404339 2023-01-22 17:07:39.106924: step: 1780/531, loss: 0.0010769534856081009 2023-01-22 17:07:40.183153: step: 1784/531, loss: 0.013192391023039818 2023-01-22 17:07:41.244794: step: 1788/531, loss: 0.0029705294873565435 2023-01-22 17:07:42.336802: step: 1792/531, loss: 0.003922355826944113 2023-01-22 17:07:43.395599: step: 1796/531, loss: 0.00020109360048081726 2023-01-22 17:07:44.463207: step: 1800/531, loss: 0.009328101761639118 2023-01-22 17:07:45.523601: step: 1804/531, loss: 0.000808170938398689 2023-01-22 17:07:46.594611: step: 1808/531, loss: 0.006595465820282698 2023-01-22 17:07:47.647226: step: 1812/531, loss: 0.00468902662396431 2023-01-22 17:07:48.690683: step: 1816/531, loss: 0.00102903856895864 2023-01-22 17:07:49.753133: step: 1820/531, loss: 0.00034604378743097186 2023-01-22 17:07:50.816592: step: 1824/531, loss: 0.0037786783650517464 2023-01-22 17:07:51.888777: step: 1828/531, loss: 0.002887751441448927 2023-01-22 17:07:52.947708: step: 1832/531, loss: 0.0006598402396775782 2023-01-22 17:07:54.009475: step: 1836/531, loss: 0.027080411091446877 2023-01-22 17:07:55.074977: step: 1840/531, loss: 0.00883109588176012 2023-01-22 17:07:56.121702: step: 1844/531, loss: 0.0030366857536137104 2023-01-22 17:07:57.163904: step: 1848/531, loss: 4.7267636546166614e-05 2023-01-22 17:07:58.216975: step: 1852/531, loss: 0.00014116977399680763 2023-01-22 17:07:59.286312: step: 1856/531, loss: 0.003972511272877455 2023-01-22 17:08:00.336279: step: 1860/531, loss: 0.00021782699332106858 2023-01-22 17:08:01.404115: step: 1864/531, loss: 0.004587695002555847 2023-01-22 17:08:02.454090: step: 1868/531, loss: 0.0024384784046560526 2023-01-22 17:08:03.510445: step: 1872/531, loss: 0.0008537122630514205 2023-01-22 17:08:04.560877: step: 1876/531, loss: 0.0025096184108406305 2023-01-22 17:08:05.631262: step: 1880/531, loss: 0.0006997943273745477 2023-01-22 17:08:06.683690: step: 1884/531, loss: 0.0035982350818812847 2023-01-22 17:08:07.736371: step: 1888/531, loss: 0.03471110761165619 2023-01-22 17:08:08.802279: step: 1892/531, loss: 0.0034948561806231737 2023-01-22 17:08:09.861234: step: 1896/531, loss: 0.010049194097518921 2023-01-22 17:08:10.935276: step: 1900/531, loss: 0.00663770642131567 2023-01-22 17:08:11.990077: step: 1904/531, loss: 0.0009335716022178531 2023-01-22 17:08:13.055974: step: 1908/531, loss: 0.0035138465464115143 2023-01-22 17:08:14.114833: step: 1912/531, loss: 0.0020982068963348866 2023-01-22 17:08:15.189243: step: 1916/531, loss: 0.002704428741708398 2023-01-22 17:08:16.258700: step: 1920/531, loss: 0.0017310682451352477 2023-01-22 17:08:17.311632: step: 1924/531, loss: 0.006131255999207497 2023-01-22 17:08:18.376054: step: 1928/531, loss: 1.450459649277036e-06 2023-01-22 17:08:19.419872: step: 1932/531, loss: 0.0010957487393170595 2023-01-22 17:08:20.479584: step: 1936/531, loss: 0.0002672454575076699 2023-01-22 17:08:21.545705: step: 1940/531, loss: 0.002202295698225498 2023-01-22 17:08:22.616292: step: 1944/531, loss: 0.0030842034611850977 2023-01-22 17:08:23.695238: step: 1948/531, loss: 0.0033153227996081114 2023-01-22 17:08:24.773830: step: 1952/531, loss: 0.003676784224808216 2023-01-22 17:08:25.821531: step: 1956/531, loss: 0.0023453200701624155 2023-01-22 17:08:26.887125: step: 1960/531, loss: 0.013029161840677261 2023-01-22 17:08:27.938437: step: 1964/531, loss: 0.006316058337688446 2023-01-22 17:08:28.999145: step: 1968/531, loss: 0.01072138175368309 2023-01-22 17:08:30.067131: step: 1972/531, loss: 0.006450321059674025 2023-01-22 17:08:31.134137: step: 1976/531, loss: 0.005556017160415649 2023-01-22 17:08:32.193359: step: 1980/531, loss: 0.0004038731858599931 2023-01-22 17:08:33.258426: step: 1984/531, loss: 0.005801858380436897 2023-01-22 17:08:34.312612: step: 1988/531, loss: 0.024494579061865807 2023-01-22 17:08:35.369962: step: 1992/531, loss: 0.005121651571244001 2023-01-22 17:08:36.430473: step: 1996/531, loss: 0.000211178237805143 2023-01-22 17:08:37.474060: step: 2000/531, loss: 0.0012726139975711703 2023-01-22 17:08:38.545994: step: 2004/531, loss: 0.01274011842906475 2023-01-22 17:08:39.617323: step: 2008/531, loss: 0.0037234495393931866 2023-01-22 17:08:40.672512: step: 2012/531, loss: 0.0001431824202882126 2023-01-22 17:08:41.753297: step: 2016/531, loss: 0.006955960299819708 2023-01-22 17:08:42.827527: step: 2020/531, loss: 0.0070723253302276134 2023-01-22 17:08:43.879540: step: 2024/531, loss: 0.0011288494570180774 2023-01-22 17:08:44.942553: step: 2028/531, loss: 0.0011688803788274527 2023-01-22 17:08:46.011144: step: 2032/531, loss: 0.004531980957835913 2023-01-22 17:08:47.066834: step: 2036/531, loss: 0.0015466450713574886 2023-01-22 17:08:48.120935: step: 2040/531, loss: 0.0032513076439499855 2023-01-22 17:08:49.172411: step: 2044/531, loss: 0.004711893852800131 2023-01-22 17:08:50.221968: step: 2048/531, loss: 0.02056264318525791 2023-01-22 17:08:51.270868: step: 2052/531, loss: 0.019057223573327065 2023-01-22 17:08:52.319981: step: 2056/531, loss: 0.00039366702549159527 2023-01-22 17:08:53.374123: step: 2060/531, loss: 0.006560617592185736 2023-01-22 17:08:54.435990: step: 2064/531, loss: 0.000678473967127502 2023-01-22 17:08:55.481555: step: 2068/531, loss: 0.0009545246139168739 2023-01-22 17:08:56.551812: step: 2072/531, loss: 0.0034764143638312817 2023-01-22 17:08:57.624414: step: 2076/531, loss: 0.002918965183198452 2023-01-22 17:08:58.678592: step: 2080/531, loss: 0.0026115260552614927 2023-01-22 17:08:59.727837: step: 2084/531, loss: 0.004642174579203129 2023-01-22 17:09:00.799492: step: 2088/531, loss: 0.009831012226641178 2023-01-22 17:09:01.863447: step: 2092/531, loss: 0.004661223851144314 2023-01-22 17:09:02.940687: step: 2096/531, loss: 0.003432834055274725 2023-01-22 17:09:04.008198: step: 2100/531, loss: 0.00026433152379468083 2023-01-22 17:09:05.067121: step: 2104/531, loss: 0.011200176551938057 2023-01-22 17:09:06.114484: step: 2108/531, loss: 0.003517532255500555 2023-01-22 17:09:07.196409: step: 2112/531, loss: 0.00347131141461432 2023-01-22 17:09:08.255001: step: 2116/531, loss: 0.0021918402053415775 2023-01-22 17:09:09.322103: step: 2120/531, loss: 0.005421316716820002 2023-01-22 17:09:10.367952: step: 2124/531, loss: 0.0059256297536194324 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35606501057082457, 'r': 0.31897490530303035, 'f1': 0.33650099900099906}, 'combined': 0.24794810452705193, 'stategy': 1, 'epoch': 12} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33611333817627725, 'r': 0.2772631688566041, 'f1': 0.3038650752652002}, 'combined': 0.19027065460531228, 'stategy': 1, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3333247866754184, 'r': 0.3504021476625841, 'f1': 0.34165019762845844}, 'combined': 0.25174225088412727, 'stategy': 1, 'epoch': 12} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3646350745891218, 'r': 0.3031501702174665, 'f1': 0.3310620765841165}, 'combined': 0.20516523055917077, 'stategy': 1, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3165824142156863, 'r': 0.32679475015812776, 'f1': 0.3216075319016496}, 'combined': 0.2369739708748997, 'stategy': 1, 'epoch': 12} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3670294431754898, 'r': 0.2900395418945552, 'f1': 0.3240239730660384}, 'combined': 0.2149465959943027, 'stategy': 1, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3557692307692308, 'r': 0.40217391304347827, 'f1': 0.37755102040816324}, 'combined': 0.18877551020408162, 'stategy': 1, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.27586206896551724, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35976479915433407, 'r': 0.3222892992424243, 'f1': 0.33999750249750255}, 'combined': 0.25052447552447554, 'stategy': 1, 'epoch': 6} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33494215352441215, 'r': 0.2738786923223081, 'f1': 0.3013481540150123}, 'combined': 0.18869463849538157, 'stategy': 1, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33436839323467227, 'r': 0.3546715973779541, 'f1': 0.34422086891009546}, 'combined': 0.2536364297232282, 'stategy': 1, 'epoch': 5} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36411638395148305, 'r': 0.30337559853126267, 'f1': 0.3309823303208759}, 'combined': 0.2051158103396977, 'stategy': 1, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37962962962962965, 'r': 0.44565217391304346, 'f1': 0.41000000000000003}, 'combined': 0.20500000000000002, 'stategy': 1, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.316460244648318, 'r': 0.32726913345983555, 'f1': 0.32177394278606963}, 'combined': 0.23709658942131445, 'stategy': 1, 'epoch': 5} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3637270714036404, 'r': 0.28684167419636686, 'f1': 0.32074114478321014}, 'combined': 0.21276887822252555, 'stategy': 1, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.27586206896551724, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 5} ****************************** Epoch: 13 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 17:11:39.944946: step: 4/531, loss: 0.0015524713089689612 2023-01-22 17:11:41.002794: step: 8/531, loss: 0.01499737799167633 2023-01-22 17:11:42.074626: step: 12/531, loss: 0.009471848607063293 2023-01-22 17:11:43.114958: step: 16/531, loss: 0.0030436008237302303 2023-01-22 17:11:44.164644: step: 20/531, loss: 0.00910823792219162 2023-01-22 17:11:45.211972: step: 24/531, loss: 0.0075300512835383415 2023-01-22 17:11:46.256505: step: 28/531, loss: 4.1462368244538084e-05 2023-01-22 17:11:47.305577: step: 32/531, loss: 0.026324231177568436 2023-01-22 17:11:48.355986: step: 36/531, loss: 0.0018935796106234193 2023-01-22 17:11:49.395920: step: 40/531, loss: 8.813742169877514e-05 2023-01-22 17:11:50.456704: step: 44/531, loss: 0.00015770709433127195 2023-01-22 17:11:51.514924: step: 48/531, loss: 0.0022419379092752934 2023-01-22 17:11:52.588675: step: 52/531, loss: 0.0032461993396282196 2023-01-22 17:11:53.658600: step: 56/531, loss: 4.696365522249835e-06 2023-01-22 17:11:54.716701: step: 60/531, loss: 0.00043738234671764076 2023-01-22 17:11:55.756497: step: 64/531, loss: 0.0004448320541996509 2023-01-22 17:11:56.823552: step: 68/531, loss: 0.0012952450197190046 2023-01-22 17:11:57.891939: step: 72/531, loss: 0.003757863538339734 2023-01-22 17:11:58.956232: step: 76/531, loss: 0.001192079158499837 2023-01-22 17:12:00.007122: step: 80/531, loss: 0.0009926131460815668 2023-01-22 17:12:01.058645: step: 84/531, loss: 0.007381749805063009 2023-01-22 17:12:02.113836: step: 88/531, loss: 0.0005179202416911721 2023-01-22 17:12:03.170188: step: 92/531, loss: 0.0003311268810648471 2023-01-22 17:12:04.230805: step: 96/531, loss: 0.024976368993520737 2023-01-22 17:12:05.297231: step: 100/531, loss: 0.0017767202807590365 2023-01-22 17:12:06.361371: step: 104/531, loss: 0.003380588488653302 2023-01-22 17:12:07.419209: step: 108/531, loss: 0.00627892604097724 2023-01-22 17:12:08.479493: step: 112/531, loss: 0.0040617273189127445 2023-01-22 17:12:09.551247: step: 116/531, loss: 0.009586027823388577 2023-01-22 17:12:10.618543: step: 120/531, loss: 0.0023350792471319437 2023-01-22 17:12:11.663667: step: 124/531, loss: 0.0012568285455927253 2023-01-22 17:12:12.732841: step: 128/531, loss: 0.003740791231393814 2023-01-22 17:12:13.798312: step: 132/531, loss: 0.002929637674242258 2023-01-22 17:12:14.865257: step: 136/531, loss: 0.007804454304277897 2023-01-22 17:12:15.924025: step: 140/531, loss: 0.00035720583400689065 2023-01-22 17:12:16.996779: step: 144/531, loss: 0.0015390024054795504 2023-01-22 17:12:18.052683: step: 148/531, loss: 0.00013543761451728642 2023-01-22 17:12:19.119344: step: 152/531, loss: 0.005867954809218645 2023-01-22 17:12:20.200320: step: 156/531, loss: 0.0020808361005038023 2023-01-22 17:12:21.268061: step: 160/531, loss: 0.001825898652896285 2023-01-22 17:12:22.325413: step: 164/531, loss: 0.011046471074223518 2023-01-22 17:12:23.394425: step: 168/531, loss: 0.0024260778445750475 2023-01-22 17:12:24.462827: step: 172/531, loss: 0.0035714274272322655 2023-01-22 17:12:25.522862: step: 176/531, loss: 0.0005712392157875001 2023-01-22 17:12:26.591832: step: 180/531, loss: 0.00995740294456482 2023-01-22 17:12:27.636546: step: 184/531, loss: 0.002682530088350177 2023-01-22 17:12:28.680614: step: 188/531, loss: 0.001276618568226695 2023-01-22 17:12:29.733716: step: 192/531, loss: 0.0022227729205042124 2023-01-22 17:12:30.797054: step: 196/531, loss: 0.00020986668823752552 2023-01-22 17:12:31.857925: step: 200/531, loss: 0.005629145074635744 2023-01-22 17:12:32.910103: step: 204/531, loss: 0.00013388696243055165 2023-01-22 17:12:33.975245: step: 208/531, loss: 0.0024597765877842903 2023-01-22 17:12:35.027371: step: 212/531, loss: 0.0035304490011185408 2023-01-22 17:12:36.100872: step: 216/531, loss: 0.0005620679003186524 2023-01-22 17:12:37.154318: step: 220/531, loss: 0.0004899389459751546 2023-01-22 17:12:38.206394: step: 224/531, loss: 0.0001721788285067305 2023-01-22 17:12:39.271405: step: 228/531, loss: 1.9541201254469343e-05 2023-01-22 17:12:40.335608: step: 232/531, loss: 0.0029747115913778543 2023-01-22 17:12:41.399790: step: 236/531, loss: 0.0004925038083456457 2023-01-22 17:12:42.463975: step: 240/531, loss: 0.002626052824780345 2023-01-22 17:12:43.534476: step: 244/531, loss: 0.004584931768476963 2023-01-22 17:12:44.578419: step: 248/531, loss: 0.00014285094221122563 2023-01-22 17:12:45.643986: step: 252/531, loss: 0.0025896625593304634 2023-01-22 17:12:46.706572: step: 256/531, loss: 0.0011560394195839763 2023-01-22 17:12:47.756717: step: 260/531, loss: 0.0002148125640815124 2023-01-22 17:12:48.846190: step: 264/531, loss: 1.130525106418645e-05 2023-01-22 17:12:49.909316: step: 268/531, loss: 0.00018567038932815194 2023-01-22 17:12:50.976081: step: 272/531, loss: 0.005343783181160688 2023-01-22 17:12:52.029026: step: 276/531, loss: 2.7842357667395845e-05 2023-01-22 17:12:53.098233: step: 280/531, loss: 0.0005529717309400439 2023-01-22 17:12:54.162911: step: 284/531, loss: 0.015849506482481956 2023-01-22 17:12:55.213137: step: 288/531, loss: 3.4106898283425835e-07 2023-01-22 17:12:56.278887: step: 292/531, loss: 0.0022414561826735735 2023-01-22 17:12:57.340914: step: 296/531, loss: 0.0049082934856414795 2023-01-22 17:12:58.403739: step: 300/531, loss: 0.004191167186945677 2023-01-22 17:12:59.453948: step: 304/531, loss: 0.004176117479801178 2023-01-22 17:13:00.506006: step: 308/531, loss: 0.0010842847405001521 2023-01-22 17:13:01.547212: step: 312/531, loss: 0.002646995708346367 2023-01-22 17:13:02.608539: step: 316/531, loss: 0.0012743031838908792 2023-01-22 17:13:03.692163: step: 320/531, loss: 0.014964173547923565 2023-01-22 17:13:04.759527: step: 324/531, loss: 0.0018465067259967327 2023-01-22 17:13:05.811562: step: 328/531, loss: 0.00042861539986915886 2023-01-22 17:13:06.868966: step: 332/531, loss: 0.0006880436558276415 2023-01-22 17:13:07.930359: step: 336/531, loss: 0.010439034551382065 2023-01-22 17:13:08.986075: step: 340/531, loss: 0.0018505294574424624 2023-01-22 17:13:10.040753: step: 344/531, loss: 0.015337217599153519 2023-01-22 17:13:11.104581: step: 348/531, loss: 0.001650273334234953 2023-01-22 17:13:12.183115: step: 352/531, loss: 0.0044954800978302956 2023-01-22 17:13:13.256755: step: 356/531, loss: 0.0008822939125820994 2023-01-22 17:13:14.310314: step: 360/531, loss: 0.013922404497861862 2023-01-22 17:13:15.376812: step: 364/531, loss: 0.0009341145632788539 2023-01-22 17:13:16.436056: step: 368/531, loss: 0.005473637022078037 2023-01-22 17:13:17.505070: step: 372/531, loss: 0.00021152434055693448 2023-01-22 17:13:18.578667: step: 376/531, loss: 0.0008005455019883811 2023-01-22 17:13:19.631851: step: 380/531, loss: 0.016201358288526535 2023-01-22 17:13:20.733147: step: 384/531, loss: 0.0024131005629897118 2023-01-22 17:13:21.794693: step: 388/531, loss: 0.0035366006195545197 2023-01-22 17:13:22.843565: step: 392/531, loss: 0.0006364466971717775 2023-01-22 17:13:23.902028: step: 396/531, loss: 0.0036750156432390213 2023-01-22 17:13:24.954408: step: 400/531, loss: 0.010198943316936493 2023-01-22 17:13:26.018280: step: 404/531, loss: 0.0046410756185650826 2023-01-22 17:13:27.075568: step: 408/531, loss: 0.0012746219290420413 2023-01-22 17:13:28.151342: step: 412/531, loss: 0.004716424737125635 2023-01-22 17:13:29.212346: step: 416/531, loss: 0.0008412728784605861 2023-01-22 17:13:30.287496: step: 420/531, loss: 0.0016794379334896803 2023-01-22 17:13:31.364915: step: 424/531, loss: 0.00033171536051668227 2023-01-22 17:13:32.446266: step: 428/531, loss: 0.000693941256031394 2023-01-22 17:13:33.521914: step: 432/531, loss: 0.0008372759912163019 2023-01-22 17:13:34.589464: step: 436/531, loss: 0.002383533166721463 2023-01-22 17:13:35.650051: step: 440/531, loss: 0.00023720662284176797 2023-01-22 17:13:36.744767: step: 444/531, loss: 6.657603080384433e-06 2023-01-22 17:13:37.827839: step: 448/531, loss: 0.00045885637518949807 2023-01-22 17:13:38.905237: step: 452/531, loss: 0.0025257784873247147 2023-01-22 17:13:39.977720: step: 456/531, loss: 0.00575175741687417 2023-01-22 17:13:41.031130: step: 460/531, loss: 0.00470378901809454 2023-01-22 17:13:42.106843: step: 464/531, loss: 0.001136065344326198 2023-01-22 17:13:43.162183: step: 468/531, loss: 0.004996010102331638 2023-01-22 17:13:44.229035: step: 472/531, loss: 0.0033562418539077044 2023-01-22 17:13:45.310565: step: 476/531, loss: 0.00822437833994627 2023-01-22 17:13:46.388229: step: 480/531, loss: 0.02308301255106926 2023-01-22 17:13:47.444492: step: 484/531, loss: 0.007869339548051357 2023-01-22 17:13:48.510161: step: 488/531, loss: 0.00032674174872227013 2023-01-22 17:13:49.568570: step: 492/531, loss: 0.0028445899952203035 2023-01-22 17:13:50.641584: step: 496/531, loss: 0.00431124959141016 2023-01-22 17:13:51.709093: step: 500/531, loss: 0.0010724717285484076 2023-01-22 17:13:52.791208: step: 504/531, loss: 0.010192510671913624 2023-01-22 17:13:53.855197: step: 508/531, loss: 0.003565502353012562 2023-01-22 17:13:54.923835: step: 512/531, loss: 0.003772348864004016 2023-01-22 17:13:55.988883: step: 516/531, loss: 0.0031863315962255 2023-01-22 17:13:57.047659: step: 520/531, loss: 0.0030434601940214634 2023-01-22 17:13:58.110873: step: 524/531, loss: 0.00029302132315933704 2023-01-22 17:13:59.191869: step: 528/531, loss: 0.0015231528086587787 2023-01-22 17:14:00.254656: step: 532/531, loss: 0.0023154811933636665 2023-01-22 17:14:01.308913: step: 536/531, loss: 0.0013210881734266877 2023-01-22 17:14:02.367896: step: 540/531, loss: 0.0014938501408323646 2023-01-22 17:14:03.445003: step: 544/531, loss: 0.0035288657527416945 2023-01-22 17:14:04.523610: step: 548/531, loss: 0.0019634172786027193 2023-01-22 17:14:05.586490: step: 552/531, loss: 6.07566016697092e-06 2023-01-22 17:14:06.679638: step: 556/531, loss: 0.0037721225526183844 2023-01-22 17:14:07.758024: step: 560/531, loss: 0.0002379878278588876 2023-01-22 17:14:08.827043: step: 564/531, loss: 0.012581178918480873 2023-01-22 17:14:09.882338: step: 568/531, loss: 0.00033885397715494037 2023-01-22 17:14:10.952089: step: 572/531, loss: 0.0029925929848104715 2023-01-22 17:14:12.009801: step: 576/531, loss: 0.002095448086038232 2023-01-22 17:14:13.066884: step: 580/531, loss: 5.0195154472021386e-05 2023-01-22 17:14:14.128811: step: 584/531, loss: 0.00031893744016997516 2023-01-22 17:14:15.206239: step: 588/531, loss: 0.010178529657423496 2023-01-22 17:14:16.261239: step: 592/531, loss: 0.0002997489646077156 2023-01-22 17:14:17.327603: step: 596/531, loss: 0.00027759699150919914 2023-01-22 17:14:18.401708: step: 600/531, loss: 0.0005925332079641521 2023-01-22 17:14:19.478645: step: 604/531, loss: 0.0025195046328008175 2023-01-22 17:14:20.558432: step: 608/531, loss: 0.0005067794118076563 2023-01-22 17:14:21.630825: step: 612/531, loss: 0.001997646875679493 2023-01-22 17:14:22.707039: step: 616/531, loss: 0.0013828689698129892 2023-01-22 17:14:23.761423: step: 620/531, loss: 0.0013104738900437951 2023-01-22 17:14:24.824163: step: 624/531, loss: 0.0012215077877044678 2023-01-22 17:14:25.892170: step: 628/531, loss: 0.0014896633801981807 2023-01-22 17:14:26.960270: step: 632/531, loss: 0.0002536572574172169 2023-01-22 17:14:28.016158: step: 636/531, loss: 0.0032729008235037327 2023-01-22 17:14:29.076208: step: 640/531, loss: 0.005137813743203878 2023-01-22 17:14:30.136764: step: 644/531, loss: 0.005632741842418909 2023-01-22 17:14:31.210826: step: 648/531, loss: 0.0017411921871826053 2023-01-22 17:14:32.276405: step: 652/531, loss: 0.005432858597487211 2023-01-22 17:14:33.335590: step: 656/531, loss: 0.007814447395503521 2023-01-22 17:14:34.388898: step: 660/531, loss: 0.0018666234100237489 2023-01-22 17:14:35.454732: step: 664/531, loss: 0.004743272438645363 2023-01-22 17:14:36.536056: step: 668/531, loss: 0.0026119311805814505 2023-01-22 17:14:37.597953: step: 672/531, loss: 0.005119148641824722 2023-01-22 17:14:38.661940: step: 676/531, loss: 0.0009645094978623092 2023-01-22 17:14:39.719815: step: 680/531, loss: 0.002783802803605795 2023-01-22 17:14:40.786836: step: 684/531, loss: 0.0031408776994794607 2023-01-22 17:14:41.875499: step: 688/531, loss: 8.268315286841244e-05 2023-01-22 17:14:42.955518: step: 692/531, loss: 0.00182149198371917 2023-01-22 17:14:44.013598: step: 696/531, loss: 0.0017728491220623255 2023-01-22 17:14:45.083321: step: 700/531, loss: 0.005128629505634308 2023-01-22 17:14:46.140697: step: 704/531, loss: 0.02272040955722332 2023-01-22 17:14:47.194660: step: 708/531, loss: 0.0009333566995337605 2023-01-22 17:14:48.255101: step: 712/531, loss: 0.0015171220293268561 2023-01-22 17:14:49.325049: step: 716/531, loss: 0.005915905814617872 2023-01-22 17:14:50.376500: step: 720/531, loss: 8.707794768270105e-05 2023-01-22 17:14:51.448178: step: 724/531, loss: 0.0015791140031069517 2023-01-22 17:14:52.507322: step: 728/531, loss: 0.00224265456199646 2023-01-22 17:14:53.581848: step: 732/531, loss: 0.014692768454551697 2023-01-22 17:14:54.659767: step: 736/531, loss: 0.0011341115459799767 2023-01-22 17:14:55.707038: step: 740/531, loss: 0.00010615502105792984 2023-01-22 17:14:56.772127: step: 744/531, loss: 0.0009733618353493512 2023-01-22 17:14:57.830255: step: 748/531, loss: 0.000630239665042609 2023-01-22 17:14:58.887084: step: 752/531, loss: 6.670024595223367e-05 2023-01-22 17:14:59.942991: step: 756/531, loss: 0.002582815708592534 2023-01-22 17:15:01.014669: step: 760/531, loss: 0.0065236459486186504 2023-01-22 17:15:02.068869: step: 764/531, loss: 0.0005398447392508388 2023-01-22 17:15:03.153407: step: 768/531, loss: 0.002886428963392973 2023-01-22 17:15:04.209396: step: 772/531, loss: 1.2386779417283833e-05 2023-01-22 17:15:05.278052: step: 776/531, loss: 0.005482846405357122 2023-01-22 17:15:06.346648: step: 780/531, loss: 0.007490086369216442 2023-01-22 17:15:07.422044: step: 784/531, loss: 0.000914071686565876 2023-01-22 17:15:08.492607: step: 788/531, loss: 0.008129005320370197 2023-01-22 17:15:09.559932: step: 792/531, loss: 0.008872500620782375 2023-01-22 17:15:10.615828: step: 796/531, loss: 4.6977449528640136e-05 2023-01-22 17:15:11.677548: step: 800/531, loss: 0.009049315005540848 2023-01-22 17:15:12.743765: step: 804/531, loss: 0.007938825525343418 2023-01-22 17:15:13.810676: step: 808/531, loss: 0.004597396589815617 2023-01-22 17:15:14.878521: step: 812/531, loss: 2.0720905013149604e-05 2023-01-22 17:15:15.938160: step: 816/531, loss: 0.0005779388593509793 2023-01-22 17:15:17.003194: step: 820/531, loss: 0.0012223701924085617 2023-01-22 17:15:18.059868: step: 824/531, loss: 0.0022516080643981695 2023-01-22 17:15:19.116643: step: 828/531, loss: 0.00019816841813735664 2023-01-22 17:15:20.178048: step: 832/531, loss: 0.005344463977962732 2023-01-22 17:15:21.241618: step: 836/531, loss: 0.004119896795600653 2023-01-22 17:15:22.301510: step: 840/531, loss: 0.007639960385859013 2023-01-22 17:15:23.349862: step: 844/531, loss: 0.0061889891512691975 2023-01-22 17:15:24.409557: step: 848/531, loss: 0.014041555114090443 2023-01-22 17:15:25.484157: step: 852/531, loss: 0.01117026899009943 2023-01-22 17:15:26.542087: step: 856/531, loss: 0.0016999348299577832 2023-01-22 17:15:27.599223: step: 860/531, loss: 0.0023541594855487347 2023-01-22 17:15:28.667978: step: 864/531, loss: 0.006213717628270388 2023-01-22 17:15:29.727080: step: 868/531, loss: 0.025150950998067856 2023-01-22 17:15:30.814450: step: 872/531, loss: 0.0012401898857206106 2023-01-22 17:15:31.861553: step: 876/531, loss: 9.335210779681802e-05 2023-01-22 17:15:32.925515: step: 880/531, loss: 0.00022841035388410091 2023-01-22 17:15:33.997031: step: 884/531, loss: 0.017027227208018303 2023-01-22 17:15:35.067997: step: 888/531, loss: 0.00010077827755594626 2023-01-22 17:15:36.129429: step: 892/531, loss: 0.0006659696809947491 2023-01-22 17:15:37.176651: step: 896/531, loss: 0.0004178892995696515 2023-01-22 17:15:38.231794: step: 900/531, loss: 0.0016962476074695587 2023-01-22 17:15:39.279459: step: 904/531, loss: 0.008167213760316372 2023-01-22 17:15:40.357737: step: 908/531, loss: 0.002180703915655613 2023-01-22 17:15:41.412694: step: 912/531, loss: 0.007386069279164076 2023-01-22 17:15:42.498040: step: 916/531, loss: 0.009456770494580269 2023-01-22 17:15:43.561926: step: 920/531, loss: 0.005559196230024099 2023-01-22 17:15:44.643625: step: 924/531, loss: 0.006130869034677744 2023-01-22 17:15:45.703925: step: 928/531, loss: 0.0005443996051326394 2023-01-22 17:15:46.763679: step: 932/531, loss: 0.0017750472761690617 2023-01-22 17:15:47.834978: step: 936/531, loss: 0.0008147003827616572 2023-01-22 17:15:48.891792: step: 940/531, loss: 0.0028524252120405436 2023-01-22 17:15:49.946681: step: 944/531, loss: 0.005025567952543497 2023-01-22 17:15:51.022290: step: 948/531, loss: 0.011067330837249756 2023-01-22 17:15:52.076779: step: 952/531, loss: 0.005371290259063244 2023-01-22 17:15:53.144198: step: 956/531, loss: 0.008980665355920792 2023-01-22 17:15:54.217476: step: 960/531, loss: 0.0003326531150378287 2023-01-22 17:15:55.272337: step: 964/531, loss: 0.0022596947383135557 2023-01-22 17:15:56.336908: step: 968/531, loss: 4.021526183350943e-05 2023-01-22 17:15:57.400324: step: 972/531, loss: 0.011506552807986736 2023-01-22 17:15:58.483258: step: 976/531, loss: 0.014786058105528355 2023-01-22 17:15:59.562749: step: 980/531, loss: 0.063012033700943 2023-01-22 17:16:00.617733: step: 984/531, loss: 0.0027725694235414267 2023-01-22 17:16:01.684127: step: 988/531, loss: 0.0001401176123181358 2023-01-22 17:16:02.742652: step: 992/531, loss: 0.014190798625349998 2023-01-22 17:16:03.816438: step: 996/531, loss: 0.022968707606196404 2023-01-22 17:16:04.883375: step: 1000/531, loss: 0.011638056486845016 2023-01-22 17:16:05.935097: step: 1004/531, loss: 5.398088978836313e-05 2023-01-22 17:16:07.002524: step: 1008/531, loss: 0.0005842193495482206 2023-01-22 17:16:08.057512: step: 1012/531, loss: 0.006642464082688093 2023-01-22 17:16:09.127010: step: 1016/531, loss: 0.01608196832239628 2023-01-22 17:16:10.187716: step: 1020/531, loss: 0.007150702644139528 2023-01-22 17:16:11.232254: step: 1024/531, loss: 0.008679691702127457 2023-01-22 17:16:12.270100: step: 1028/531, loss: 0.05388197675347328 2023-01-22 17:16:13.335475: step: 1032/531, loss: 0.061284396797418594 2023-01-22 17:16:14.391818: step: 1036/531, loss: 0.0010705149034038186 2023-01-22 17:16:15.457218: step: 1040/531, loss: 0.0030790152959525585 2023-01-22 17:16:16.523738: step: 1044/531, loss: 0.004421367309987545 2023-01-22 17:16:17.600222: step: 1048/531, loss: 0.0009097373113036156 2023-01-22 17:16:18.647156: step: 1052/531, loss: 0.0060294982977211475 2023-01-22 17:16:19.693356: step: 1056/531, loss: 0.008174785412847996 2023-01-22 17:16:20.758312: step: 1060/531, loss: 0.012843639589846134 2023-01-22 17:16:21.816267: step: 1064/531, loss: 0.0005802420200780034 2023-01-22 17:16:22.906796: step: 1068/531, loss: 0.007690838538110256 2023-01-22 17:16:23.957725: step: 1072/531, loss: 0.0034123535733669996 2023-01-22 17:16:25.019867: step: 1076/531, loss: 0.05621039494872093 2023-01-22 17:16:26.080834: step: 1080/531, loss: 0.005726216826587915 2023-01-22 17:16:27.136272: step: 1084/531, loss: 0.0012716427445411682 2023-01-22 17:16:28.209548: step: 1088/531, loss: 0.028594590723514557 2023-01-22 17:16:29.253715: step: 1092/531, loss: 0.00024010120250750333 2023-01-22 17:16:30.315725: step: 1096/531, loss: 0.0016107282135635614 2023-01-22 17:16:31.364185: step: 1100/531, loss: 0.0010938129853457212 2023-01-22 17:16:32.404768: step: 1104/531, loss: 0.004322534892708063 2023-01-22 17:16:33.463085: step: 1108/531, loss: 0.0023237746208906174 2023-01-22 17:16:34.525074: step: 1112/531, loss: 0.00018691914738155901 2023-01-22 17:16:35.610168: step: 1116/531, loss: 0.0003504491178318858 2023-01-22 17:16:36.659893: step: 1120/531, loss: 0.0017954764189198613 2023-01-22 17:16:37.727098: step: 1124/531, loss: 0.0003262172103859484 2023-01-22 17:16:38.762130: step: 1128/531, loss: 0.0044793072156608105 2023-01-22 17:16:39.820373: step: 1132/531, loss: 0.004968823865056038 2023-01-22 17:16:40.859134: step: 1136/531, loss: 6.309113814495504e-05 2023-01-22 17:16:41.936184: step: 1140/531, loss: 0.0012590914266183972 2023-01-22 17:16:42.980888: step: 1144/531, loss: 8.805541438050568e-05 2023-01-22 17:16:44.040558: step: 1148/531, loss: 0.009042898193001747 2023-01-22 17:16:45.090718: step: 1152/531, loss: 0.001615240704268217 2023-01-22 17:16:46.168952: step: 1156/531, loss: 0.026992496103048325 2023-01-22 17:16:47.225999: step: 1160/531, loss: 0.0008137584081850946 2023-01-22 17:16:48.286590: step: 1164/531, loss: 0.011020206846296787 2023-01-22 17:16:49.346253: step: 1168/531, loss: 0.0008785320678725839 2023-01-22 17:16:50.386050: step: 1172/531, loss: 0.004559738095849752 2023-01-22 17:16:51.450820: step: 1176/531, loss: 0.013241405598819256 2023-01-22 17:16:52.493036: step: 1180/531, loss: 1.5530036762356758e-05 2023-01-22 17:16:53.541692: step: 1184/531, loss: 0.025860823690891266 2023-01-22 17:16:54.606905: step: 1188/531, loss: 0.005090369377285242 2023-01-22 17:16:55.668292: step: 1192/531, loss: 0.0032372602727264166 2023-01-22 17:16:56.727869: step: 1196/531, loss: 0.0011953068897128105 2023-01-22 17:16:57.788084: step: 1200/531, loss: 0.0022225056309252977 2023-01-22 17:16:58.841213: step: 1204/531, loss: 0.0013108529383316636 2023-01-22 17:16:59.896869: step: 1208/531, loss: 8.956688543548808e-05 2023-01-22 17:17:00.949890: step: 1212/531, loss: 0.001609743689186871 2023-01-22 17:17:01.998211: step: 1216/531, loss: 0.0003628864069469273 2023-01-22 17:17:03.066949: step: 1220/531, loss: 0.0010906127281486988 2023-01-22 17:17:04.110446: step: 1224/531, loss: 0.0004920852370560169 2023-01-22 17:17:05.155742: step: 1228/531, loss: 0.003378136781975627 2023-01-22 17:17:06.200687: step: 1232/531, loss: 0.002010734286159277 2023-01-22 17:17:07.261420: step: 1236/531, loss: 0.002170839812606573 2023-01-22 17:17:08.319740: step: 1240/531, loss: 0.0006582196219824255 2023-01-22 17:17:09.371549: step: 1244/531, loss: 0.04095841571688652 2023-01-22 17:17:10.435269: step: 1248/531, loss: 0.015030969865620136 2023-01-22 17:17:11.491452: step: 1252/531, loss: 0.00022219220409169793 2023-01-22 17:17:12.574498: step: 1256/531, loss: 7.857872697059065e-05 2023-01-22 17:17:13.638022: step: 1260/531, loss: 0.0014182198792696 2023-01-22 17:17:14.710361: step: 1264/531, loss: 0.004063778556883335 2023-01-22 17:17:15.756543: step: 1268/531, loss: 0.0011769463308155537 2023-01-22 17:17:16.804352: step: 1272/531, loss: 0.010867233388125896 2023-01-22 17:17:17.880899: step: 1276/531, loss: 0.0038230204954743385 2023-01-22 17:17:18.947381: step: 1280/531, loss: 0.000911044713575393 2023-01-22 17:17:20.004331: step: 1284/531, loss: 0.001992700854316354 2023-01-22 17:17:21.084129: step: 1288/531, loss: 0.005457703024148941 2023-01-22 17:17:22.148937: step: 1292/531, loss: 0.0018652963917702436 2023-01-22 17:17:23.218556: step: 1296/531, loss: 0.003989304415881634 2023-01-22 17:17:24.273011: step: 1300/531, loss: 0.01085844449698925 2023-01-22 17:17:25.335502: step: 1304/531, loss: 0.001219116966240108 2023-01-22 17:17:26.394884: step: 1308/531, loss: 0.006250276230275631 2023-01-22 17:17:27.454278: step: 1312/531, loss: 0.00668021384626627 2023-01-22 17:17:28.510897: step: 1316/531, loss: 0.004537404049187899 2023-01-22 17:17:29.575342: step: 1320/531, loss: 0.002917621284723282 2023-01-22 17:17:30.618922: step: 1324/531, loss: 3.054946500924416e-05 2023-01-22 17:17:31.671118: step: 1328/531, loss: 0.00030456160311587155 2023-01-22 17:17:32.722089: step: 1332/531, loss: 0.001132350880652666 2023-01-22 17:17:33.779682: step: 1336/531, loss: 0.00498964125290513 2023-01-22 17:17:34.834115: step: 1340/531, loss: 0.006298901978880167 2023-01-22 17:17:35.907228: step: 1344/531, loss: 0.0023895029444247484 2023-01-22 17:17:36.964527: step: 1348/531, loss: 0.0038918203208595514 2023-01-22 17:17:38.021638: step: 1352/531, loss: 3.296085196780041e-05 2023-01-22 17:17:39.074769: step: 1356/531, loss: 0.0006127421511337161 2023-01-22 17:17:40.159211: step: 1360/531, loss: 0.011149992235004902 2023-01-22 17:17:41.207655: step: 1364/531, loss: 0.0036587396170943975 2023-01-22 17:17:42.263844: step: 1368/531, loss: 0.0017438203794881701 2023-01-22 17:17:43.324855: step: 1372/531, loss: 0.02992326393723488 2023-01-22 17:17:44.379607: step: 1376/531, loss: 8.607034396845847e-05 2023-01-22 17:17:45.441643: step: 1380/531, loss: 0.0045074219815433025 2023-01-22 17:17:46.504857: step: 1384/531, loss: 0.0012903253082185984 2023-01-22 17:17:47.557061: step: 1388/531, loss: 5.810592119814828e-05 2023-01-22 17:17:48.642710: step: 1392/531, loss: 0.0022700822446495295 2023-01-22 17:17:49.717079: step: 1396/531, loss: 0.000807271571829915 2023-01-22 17:17:50.790494: step: 1400/531, loss: 1.979072840185836e-05 2023-01-22 17:17:51.835632: step: 1404/531, loss: 0.00010419355385238305 2023-01-22 17:17:52.886076: step: 1408/531, loss: 0.0023636610712856054 2023-01-22 17:17:53.944074: step: 1412/531, loss: 0.00010474493319634348 2023-01-22 17:17:54.991981: step: 1416/531, loss: 0.0027683116495609283 2023-01-22 17:17:56.056628: step: 1420/531, loss: 0.004748487379401922 2023-01-22 17:17:57.117517: step: 1424/531, loss: 0.00281822239048779 2023-01-22 17:17:58.156739: step: 1428/531, loss: 0.0010070235002785921 2023-01-22 17:17:59.217174: step: 1432/531, loss: 3.932854724553181e-06 2023-01-22 17:18:00.286504: step: 1436/531, loss: 0.0021961256861686707 2023-01-22 17:18:01.340308: step: 1440/531, loss: 0.0008509191102348268 2023-01-22 17:18:02.405489: step: 1444/531, loss: 0.0008358809282071888 2023-01-22 17:18:03.469596: step: 1448/531, loss: 2.6334613721701317e-05 2023-01-22 17:18:04.536059: step: 1452/531, loss: 0.004515047185122967 2023-01-22 17:18:05.610029: step: 1456/531, loss: 0.003837031312286854 2023-01-22 17:18:06.672466: step: 1460/531, loss: 5.697361848433502e-05 2023-01-22 17:18:07.771902: step: 1464/531, loss: 0.0019774767570197582 2023-01-22 17:18:08.831339: step: 1468/531, loss: 4.4358290324453264e-05 2023-01-22 17:18:09.889531: step: 1472/531, loss: 0.0006438334239646792 2023-01-22 17:18:10.950331: step: 1476/531, loss: 0.0032409466803073883 2023-01-22 17:18:12.014704: step: 1480/531, loss: 0.0015242878580465913 2023-01-22 17:18:13.076832: step: 1484/531, loss: 0.003153785364702344 2023-01-22 17:18:14.136506: step: 1488/531, loss: 1.4236917195376009e-05 2023-01-22 17:18:15.212305: step: 1492/531, loss: 0.0005675765569321811 2023-01-22 17:18:16.289323: step: 1496/531, loss: 2.7047299226978794e-05 2023-01-22 17:18:17.346613: step: 1500/531, loss: 8.26976029202342e-05 2023-01-22 17:18:18.390992: step: 1504/531, loss: 0.0006109775858931243 2023-01-22 17:18:19.455070: step: 1508/531, loss: 0.0001951136946445331 2023-01-22 17:18:20.527324: step: 1512/531, loss: 0.0008975151577033103 2023-01-22 17:18:21.575768: step: 1516/531, loss: 0.0005838876240886748 2023-01-22 17:18:22.632024: step: 1520/531, loss: 0.0011557629331946373 2023-01-22 17:18:23.701015: step: 1524/531, loss: 0.011381728574633598 2023-01-22 17:18:24.764347: step: 1528/531, loss: 0.005961467511951923 2023-01-22 17:18:25.823582: step: 1532/531, loss: 0.04712522029876709 2023-01-22 17:18:26.879812: step: 1536/531, loss: 0.0025206315331161022 2023-01-22 17:18:27.937962: step: 1540/531, loss: 0.0037165971007198095 2023-01-22 17:18:28.991933: step: 1544/531, loss: 0.008586629293859005 2023-01-22 17:18:30.052312: step: 1548/531, loss: 0.007212300319224596 2023-01-22 17:18:31.118124: step: 1552/531, loss: 0.004146702587604523 2023-01-22 17:18:32.182149: step: 1556/531, loss: 0.004820911213755608 2023-01-22 17:18:33.238666: step: 1560/531, loss: 0.004934458062052727 2023-01-22 17:18:34.296569: step: 1564/531, loss: 0.007060043513774872 2023-01-22 17:18:35.344470: step: 1568/531, loss: 0.0037182329688221216 2023-01-22 17:18:36.411054: step: 1572/531, loss: 0.00265847472473979 2023-01-22 17:18:37.456138: step: 1576/531, loss: 0.00320924399420619 2023-01-22 17:18:38.506149: step: 1580/531, loss: 0.005504476837813854 2023-01-22 17:18:39.586483: step: 1584/531, loss: 0.000665211642626673 2023-01-22 17:18:40.637097: step: 1588/531, loss: 0.006778986193239689 2023-01-22 17:18:41.685081: step: 1592/531, loss: 0.0018619614420458674 2023-01-22 17:18:42.750993: step: 1596/531, loss: 0.0029642395675182343 2023-01-22 17:18:43.810412: step: 1600/531, loss: 0.003254387527704239 2023-01-22 17:18:44.876516: step: 1604/531, loss: 0.003264126367866993 2023-01-22 17:18:45.923492: step: 1608/531, loss: 0.0033200099132955074 2023-01-22 17:18:46.980123: step: 1612/531, loss: 0.002769629703834653 2023-01-22 17:18:48.034181: step: 1616/531, loss: 0.0011690377723425627 2023-01-22 17:18:49.097523: step: 1620/531, loss: 0.001325818127952516 2023-01-22 17:18:50.144732: step: 1624/531, loss: 0.0032912404276430607 2023-01-22 17:18:51.199265: step: 1628/531, loss: 0.03286954015493393 2023-01-22 17:18:52.258100: step: 1632/531, loss: 0.0005078306421637535 2023-01-22 17:18:53.316761: step: 1636/531, loss: 6.243363895919174e-05 2023-01-22 17:18:54.358468: step: 1640/531, loss: 0.0021527670323848724 2023-01-22 17:18:55.412051: step: 1644/531, loss: 0.00846614595502615 2023-01-22 17:18:56.477899: step: 1648/531, loss: 0.01128337625414133 2023-01-22 17:18:57.545758: step: 1652/531, loss: 0.0024253749288618565 2023-01-22 17:18:58.602179: step: 1656/531, loss: 0.0011595949763432145 2023-01-22 17:18:59.655225: step: 1660/531, loss: 0.0006072914693504572 2023-01-22 17:19:00.716812: step: 1664/531, loss: 0.004316008649766445 2023-01-22 17:19:01.787097: step: 1668/531, loss: 0.061830636113882065 2023-01-22 17:19:02.837351: step: 1672/531, loss: 0.004680373705923557 2023-01-22 17:19:03.877841: step: 1676/531, loss: 0.002400621073320508 2023-01-22 17:19:04.919962: step: 1680/531, loss: 0.00011722392810042948 2023-01-22 17:19:05.984270: step: 1684/531, loss: 0.002592526376247406 2023-01-22 17:19:07.057917: step: 1688/531, loss: 0.0010730191133916378 2023-01-22 17:19:08.130270: step: 1692/531, loss: 0.002417347626760602 2023-01-22 17:19:09.185238: step: 1696/531, loss: 0.014336168766021729 2023-01-22 17:19:10.243836: step: 1700/531, loss: 0.0013070888817310333 2023-01-22 17:19:11.287204: step: 1704/531, loss: 0.027085356414318085 2023-01-22 17:19:12.338497: step: 1708/531, loss: 0.00013016282173339278 2023-01-22 17:19:13.383878: step: 1712/531, loss: 0.003856057533994317 2023-01-22 17:19:14.448591: step: 1716/531, loss: 0.009995688684284687 2023-01-22 17:19:15.512390: step: 1720/531, loss: 0.00039512524381279945 2023-01-22 17:19:16.571150: step: 1724/531, loss: 0.003914263565093279 2023-01-22 17:19:17.625846: step: 1728/531, loss: 0.005618093069642782 2023-01-22 17:19:18.684328: step: 1732/531, loss: 0.006893915589898825 2023-01-22 17:19:19.735575: step: 1736/531, loss: 0.014279298484325409 2023-01-22 17:19:20.800936: step: 1740/531, loss: 0.005142610520124435 2023-01-22 17:19:21.844160: step: 1744/531, loss: 7.414120773319155e-05 2023-01-22 17:19:22.900812: step: 1748/531, loss: 0.002108305459842086 2023-01-22 17:19:23.969615: step: 1752/531, loss: 0.0018001499120146036 2023-01-22 17:19:25.023907: step: 1756/531, loss: 0.00030126600177027285 2023-01-22 17:19:26.103277: step: 1760/531, loss: 0.0054490105248987675 2023-01-22 17:19:27.161120: step: 1764/531, loss: 0.004551001824438572 2023-01-22 17:19:28.217558: step: 1768/531, loss: 8.754232112551108e-05 2023-01-22 17:19:29.267062: step: 1772/531, loss: 0.010184419341385365 2023-01-22 17:19:30.338170: step: 1776/531, loss: 0.005537926685065031 2023-01-22 17:19:31.401039: step: 1780/531, loss: 0.002223237883299589 2023-01-22 17:19:32.456242: step: 1784/531, loss: 0.0013415184803307056 2023-01-22 17:19:33.514774: step: 1788/531, loss: 0.004140784032642841 2023-01-22 17:19:34.569035: step: 1792/531, loss: 0.0009523855405859649 2023-01-22 17:19:35.621127: step: 1796/531, loss: 0.004413597751408815 2023-01-22 17:19:36.666672: step: 1800/531, loss: 0.003502572188153863 2023-01-22 17:19:37.718317: step: 1804/531, loss: 0.00018110548262484372 2023-01-22 17:19:38.783342: step: 1808/531, loss: 0.0012267277343198657 2023-01-22 17:19:39.826353: step: 1812/531, loss: 0.0004821026523131877 2023-01-22 17:19:40.883388: step: 1816/531, loss: 0.0026140213012695312 2023-01-22 17:19:41.944450: step: 1820/531, loss: 0.019991111010313034 2023-01-22 17:19:43.003767: step: 1824/531, loss: 0.005648982711136341 2023-01-22 17:19:44.083698: step: 1828/531, loss: 0.0006146921659819782 2023-01-22 17:19:45.150403: step: 1832/531, loss: 0.0017123919678851962 2023-01-22 17:19:46.215005: step: 1836/531, loss: 0.0022584530524909496 2023-01-22 17:19:47.265540: step: 1840/531, loss: 0.0035019302740693092 2023-01-22 17:19:48.315004: step: 1844/531, loss: 0.0008214695262722671 2023-01-22 17:19:49.372119: step: 1848/531, loss: 0.0020750330295413733 2023-01-22 17:19:50.414650: step: 1852/531, loss: 0.001352264080196619 2023-01-22 17:19:51.496190: step: 1856/531, loss: 0.0014432482421398163 2023-01-22 17:19:52.548073: step: 1860/531, loss: 0.00410476652905345 2023-01-22 17:19:53.597316: step: 1864/531, loss: 5.1833023462677374e-05 2023-01-22 17:19:54.665428: step: 1868/531, loss: 0.00209336681291461 2023-01-22 17:19:55.724841: step: 1872/531, loss: 0.00021056714467704296 2023-01-22 17:19:56.804912: step: 1876/531, loss: 0.002319770399481058 2023-01-22 17:19:57.859120: step: 1880/531, loss: 7.407455268548802e-05 2023-01-22 17:19:58.899289: step: 1884/531, loss: 0.009329630061984062 2023-01-22 17:19:59.967285: step: 1888/531, loss: 0.00022655384964309633 2023-01-22 17:20:01.035389: step: 1892/531, loss: 0.0006434321985580027 2023-01-22 17:20:02.101354: step: 1896/531, loss: 0.008055443875491619 2023-01-22 17:20:03.170643: step: 1900/531, loss: 0.0006588028045371175 2023-01-22 17:20:04.222298: step: 1904/531, loss: 0.0009856290416792035 2023-01-22 17:20:05.281277: step: 1908/531, loss: 0.0006299313972704113 2023-01-22 17:20:06.331514: step: 1912/531, loss: 0.0004923464148305357 2023-01-22 17:20:07.375993: step: 1916/531, loss: 0.0007975624175742269 2023-01-22 17:20:08.429777: step: 1920/531, loss: 0.013214414939284325 2023-01-22 17:20:09.496080: step: 1924/531, loss: 0.003228053916245699 2023-01-22 17:20:10.557431: step: 1928/531, loss: 0.005804707296192646 2023-01-22 17:20:11.613512: step: 1932/531, loss: 0.0049368226900696754 2023-01-22 17:20:12.676013: step: 1936/531, loss: 0.004499434493482113 2023-01-22 17:20:13.734714: step: 1940/531, loss: 0.0029881522059440613 2023-01-22 17:20:14.797364: step: 1944/531, loss: 0.002854348160326481 2023-01-22 17:20:15.856594: step: 1948/531, loss: 0.002133850008249283 2023-01-22 17:20:16.915980: step: 1952/531, loss: 0.0019627660512924194 2023-01-22 17:20:17.985991: step: 1956/531, loss: 0.001569436746649444 2023-01-22 17:20:19.026075: step: 1960/531, loss: 0.011065283790230751 2023-01-22 17:20:20.081173: step: 1964/531, loss: 0.0012104656780138612 2023-01-22 17:20:21.132177: step: 1968/531, loss: 0.0035701077431440353 2023-01-22 17:20:22.187142: step: 1972/531, loss: 0.00444983784109354 2023-01-22 17:20:23.268394: step: 1976/531, loss: 0.0026652247179299593 2023-01-22 17:20:24.305401: step: 1980/531, loss: 0.0016057912725955248 2023-01-22 17:20:25.360626: step: 1984/531, loss: 0.0067677260376513 2023-01-22 17:20:26.415959: step: 1988/531, loss: 0.01013968512415886 2023-01-22 17:20:27.486740: step: 1992/531, loss: 0.0029363830108195543 2023-01-22 17:20:28.548070: step: 1996/531, loss: 0.010073035955429077 2023-01-22 17:20:29.609815: step: 2000/531, loss: 0.0294170044362545 2023-01-22 17:20:30.644913: step: 2004/531, loss: 2.329733433725778e-05 2023-01-22 17:20:31.699913: step: 2008/531, loss: 0.005676647182554007 2023-01-22 17:20:32.758310: step: 2012/531, loss: 0.00281923683360219 2023-01-22 17:20:33.826099: step: 2016/531, loss: 0.0018013858934864402 2023-01-22 17:20:34.893634: step: 2020/531, loss: 0.0018166168592870235 2023-01-22 17:20:35.959185: step: 2024/531, loss: 0.009231380186975002 2023-01-22 17:20:37.026223: step: 2028/531, loss: 0.007757980842143297 2023-01-22 17:20:38.082212: step: 2032/531, loss: 0.00017122748249676079 2023-01-22 17:20:39.138673: step: 2036/531, loss: 0.00026397412875667214 2023-01-22 17:20:40.190796: step: 2040/531, loss: 0.000343112536938861 2023-01-22 17:20:41.249884: step: 2044/531, loss: 0.0049992213025689125 2023-01-22 17:20:42.347840: step: 2048/531, loss: 0.014012685976922512 2023-01-22 17:20:43.409633: step: 2052/531, loss: 0.005404432769864798 2023-01-22 17:20:44.476904: step: 2056/531, loss: 0.000970169494394213 2023-01-22 17:20:45.545300: step: 2060/531, loss: 0.001486293156631291 2023-01-22 17:20:46.605939: step: 2064/531, loss: 0.0044876281172037125 2023-01-22 17:20:47.656636: step: 2068/531, loss: 0.004209108650684357 2023-01-22 17:20:48.716804: step: 2072/531, loss: 0.001256016199477017 2023-01-22 17:20:49.774910: step: 2076/531, loss: 0.002157731680199504 2023-01-22 17:20:50.846924: step: 2080/531, loss: 0.006548232864588499 2023-01-22 17:20:51.899821: step: 2084/531, loss: 5.276700539980084e-05 2023-01-22 17:20:52.944983: step: 2088/531, loss: 0.009501063264906406 2023-01-22 17:20:54.015597: step: 2092/531, loss: 0.007542643696069717 2023-01-22 17:20:55.068271: step: 2096/531, loss: 0.00042067599133588374 2023-01-22 17:20:56.126702: step: 2100/531, loss: 0.0009010783978737891 2023-01-22 17:20:57.190809: step: 2104/531, loss: 0.017282428219914436 2023-01-22 17:20:58.224309: step: 2108/531, loss: 0.00039278127951547503 2023-01-22 17:20:59.268914: step: 2112/531, loss: 0.0002482626005075872 2023-01-22 17:21:00.326589: step: 2116/531, loss: 0.0001620492694200948 2023-01-22 17:21:01.393010: step: 2120/531, loss: 0.004453693982213736 2023-01-22 17:21:02.469427: step: 2124/531, loss: 0.0007471845019608736 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3584874911909796, 'r': 0.32114504419191925, 'f1': 0.33879037629037634}, 'combined': 0.24963501410869834, 'stategy': 1, 'epoch': 13} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3376048413375054, 'r': 0.2763606417807919, 'f1': 0.3039281301172381}, 'combined': 0.19031013755004633, 'stategy': 1, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33211922571099783, 'r': 0.348504614455753, 'f1': 0.34011468855218857}, 'combined': 0.25061082314371785, 'stategy': 1, 'epoch': 13} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3613812784936771, 'r': 0.3004450304519119, 'f1': 0.3281078668352243}, 'combined': 0.20333445268661787, 'stategy': 1, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3192515432098766, 'r': 0.32712681846932323, 'f1': 0.3231412058731647}, 'combined': 0.23810404643285818, 'stategy': 1, 'epoch': 13} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3653791618631812, 'r': 0.2861641674212991, 'f1': 0.32095616346036276}, 'combined': 0.2129115143746961, 'stategy': 1, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3557692307692308, 'r': 0.40217391304347827, 'f1': 0.37755102040816324}, 'combined': 0.18877551020408162, 'stategy': 1, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'stategy': 1, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35976479915433407, 'r': 0.3222892992424243, 'f1': 0.33999750249750255}, 'combined': 0.25052447552447554, 'stategy': 1, 'epoch': 6} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33494215352441215, 'r': 0.2738786923223081, 'f1': 0.3013481540150123}, 'combined': 0.18869463849538157, 'stategy': 1, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33436839323467227, 'r': 0.3546715973779541, 'f1': 0.34422086891009546}, 'combined': 0.2536364297232282, 'stategy': 1, 'epoch': 5} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36411638395148305, 'r': 0.30337559853126267, 'f1': 0.3309823303208759}, 'combined': 0.2051158103396977, 'stategy': 1, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37962962962962965, 'r': 0.44565217391304346, 'f1': 0.41000000000000003}, 'combined': 0.20500000000000002, 'stategy': 1, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.316460244648318, 'r': 0.32726913345983555, 'f1': 0.32177394278606963}, 'combined': 0.23709658942131445, 'stategy': 1, 'epoch': 5} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3637270714036404, 'r': 0.28684167419636686, 'f1': 0.32074114478321014}, 'combined': 0.21276887822252555, 'stategy': 1, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.27586206896551724, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 5} ****************************** Epoch: 14 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 17:23:29.556033: step: 4/531, loss: 0.0016820986056700349 2023-01-22 17:23:30.597610: step: 8/531, loss: 0.001272919587790966 2023-01-22 17:23:31.649247: step: 12/531, loss: 0.0018223518272861838 2023-01-22 17:23:32.693307: step: 16/531, loss: 0.0001720621803542599 2023-01-22 17:23:33.745045: step: 20/531, loss: 0.0011702225310727954 2023-01-22 17:23:34.809978: step: 24/531, loss: 0.0004568614240270108 2023-01-22 17:23:35.842821: step: 28/531, loss: 0.00287062325514853 2023-01-22 17:23:36.901645: step: 32/531, loss: 0.00017823060625232756 2023-01-22 17:23:37.938219: step: 36/531, loss: 0.0004975678748451173 2023-01-22 17:23:38.986953: step: 40/531, loss: 0.0011281631886959076 2023-01-22 17:23:40.041873: step: 44/531, loss: 0.021383967250585556 2023-01-22 17:23:41.104192: step: 48/531, loss: 0.0015692919259890914 2023-01-22 17:23:42.181085: step: 52/531, loss: 0.00544044841080904 2023-01-22 17:23:43.224537: step: 56/531, loss: 0.0009331905166618526 2023-01-22 17:23:44.274447: step: 60/531, loss: 0.003196435747668147 2023-01-22 17:23:45.347961: step: 64/531, loss: 0.009559637866914272 2023-01-22 17:23:46.430416: step: 68/531, loss: 0.00292372633703053 2023-01-22 17:23:47.470652: step: 72/531, loss: 1.689396049187053e-05 2023-01-22 17:23:48.532419: step: 76/531, loss: 0.000737437978386879 2023-01-22 17:23:49.588041: step: 80/531, loss: 0.0008886161958798766 2023-01-22 17:23:50.647389: step: 84/531, loss: 0.0014787687687203288 2023-01-22 17:23:51.708137: step: 88/531, loss: 0.001064131036400795 2023-01-22 17:23:52.770796: step: 92/531, loss: 0.011055948212742805 2023-01-22 17:23:53.846463: step: 96/531, loss: 0.004343170206993818 2023-01-22 17:23:54.899003: step: 100/531, loss: 0.0023446830455213785 2023-01-22 17:23:55.969100: step: 104/531, loss: 0.002073544543236494 2023-01-22 17:23:57.044480: step: 108/531, loss: 0.00457299267873168 2023-01-22 17:23:58.102229: step: 112/531, loss: 0.0011097511742264032 2023-01-22 17:23:59.147419: step: 116/531, loss: 0.0035200065467506647 2023-01-22 17:24:00.214850: step: 120/531, loss: 0.0012597950408235192 2023-01-22 17:24:01.266578: step: 124/531, loss: 0.004782854579389095 2023-01-22 17:24:02.318862: step: 128/531, loss: 0.00341703649610281 2023-01-22 17:24:03.375163: step: 132/531, loss: 0.0008929745526984334 2023-01-22 17:24:04.420227: step: 136/531, loss: 0.003075639484450221 2023-01-22 17:24:05.476806: step: 140/531, loss: 0.003302244935184717 2023-01-22 17:24:06.529600: step: 144/531, loss: 0.004704549442976713 2023-01-22 17:24:07.595507: step: 148/531, loss: 0.001426379894837737 2023-01-22 17:24:08.651760: step: 152/531, loss: 0.007419176399707794 2023-01-22 17:24:09.708674: step: 156/531, loss: 0.0029818553011864424 2023-01-22 17:24:10.764944: step: 160/531, loss: 0.00545874796807766 2023-01-22 17:24:11.832695: step: 164/531, loss: 0.006517227739095688 2023-01-22 17:24:12.903520: step: 168/531, loss: 0.0009801655542105436 2023-01-22 17:24:13.963526: step: 172/531, loss: 0.001989149022847414 2023-01-22 17:24:15.032066: step: 176/531, loss: 0.016178661957383156 2023-01-22 17:24:16.098141: step: 180/531, loss: 0.0005514121730811894 2023-01-22 17:24:17.149978: step: 184/531, loss: 0.001059800386428833 2023-01-22 17:24:18.206617: step: 188/531, loss: 0.0003515127464197576 2023-01-22 17:24:19.277756: step: 192/531, loss: 0.011831310577690601 2023-01-22 17:24:20.329619: step: 196/531, loss: 0.0006817436078563333 2023-01-22 17:24:21.392391: step: 200/531, loss: 0.004688303917646408 2023-01-22 17:24:22.456369: step: 204/531, loss: 0.0029748762026429176 2023-01-22 17:24:23.518942: step: 208/531, loss: 0.0007723732851445675 2023-01-22 17:24:24.586079: step: 212/531, loss: 0.0005731121054850519 2023-01-22 17:24:25.650535: step: 216/531, loss: 0.003231952665373683 2023-01-22 17:24:26.703428: step: 220/531, loss: 0.003479874227195978 2023-01-22 17:24:27.765190: step: 224/531, loss: 0.012836527079343796 2023-01-22 17:24:28.826963: step: 228/531, loss: 0.0012740707024931908 2023-01-22 17:24:29.882422: step: 232/531, loss: 0.003075475338846445 2023-01-22 17:24:30.942659: step: 236/531, loss: 7.526024273829535e-05 2023-01-22 17:24:31.997493: step: 240/531, loss: 0.0032913503237068653 2023-01-22 17:24:33.071678: step: 244/531, loss: 0.02079927735030651 2023-01-22 17:24:34.145651: step: 248/531, loss: 0.006989686284214258 2023-01-22 17:24:35.213620: step: 252/531, loss: 0.00024235071032308042 2023-01-22 17:24:36.265647: step: 256/531, loss: 0.0024690113496035337 2023-01-22 17:24:37.319977: step: 260/531, loss: 5.9587280702544376e-05 2023-01-22 17:24:38.371694: step: 264/531, loss: 0.0003184451488777995 2023-01-22 17:24:39.437230: step: 268/531, loss: 0.0011628486681729555 2023-01-22 17:24:40.492287: step: 272/531, loss: 1.5757747462430416e-07 2023-01-22 17:24:41.567695: step: 276/531, loss: 0.0033106112387031317 2023-01-22 17:24:42.629294: step: 280/531, loss: 9.4774310127832e-05 2023-01-22 17:24:43.690696: step: 284/531, loss: 0.012757186777889729 2023-01-22 17:24:44.751540: step: 288/531, loss: 0.0007110279984772205 2023-01-22 17:24:45.816043: step: 292/531, loss: 0.00045626662904396653 2023-01-22 17:24:46.866011: step: 296/531, loss: 0.0016196728684008121 2023-01-22 17:24:47.929355: step: 300/531, loss: 0.00012583131319843233 2023-01-22 17:24:48.991812: step: 304/531, loss: 0.0010653295321390033 2023-01-22 17:24:50.074616: step: 308/531, loss: 0.009608013555407524 2023-01-22 17:24:51.153097: step: 312/531, loss: 0.007180084008723497 2023-01-22 17:24:52.221014: step: 316/531, loss: 0.0002772776933852583 2023-01-22 17:24:53.275340: step: 320/531, loss: 0.0010277617257088423 2023-01-22 17:24:54.330682: step: 324/531, loss: 0.008284168317914009 2023-01-22 17:24:55.390258: step: 328/531, loss: 2.0305313228163868e-05 2023-01-22 17:24:56.444010: step: 332/531, loss: 0.0028432130347937346 2023-01-22 17:24:57.535515: step: 336/531, loss: 0.004729862324893475 2023-01-22 17:24:58.603820: step: 340/531, loss: 0.0015953927068039775 2023-01-22 17:24:59.667967: step: 344/531, loss: 0.004616578575223684 2023-01-22 17:25:00.747970: step: 348/531, loss: 9.849398338701576e-05 2023-01-22 17:25:01.798010: step: 352/531, loss: 0.00017300553736276925 2023-01-22 17:25:02.855175: step: 356/531, loss: 0.00025847461074590683 2023-01-22 17:25:03.902034: step: 360/531, loss: 0.0011382269440218806 2023-01-22 17:25:04.964844: step: 364/531, loss: 0.003926119767129421 2023-01-22 17:25:06.024315: step: 368/531, loss: 5.382801191444742e-07 2023-01-22 17:25:07.079093: step: 372/531, loss: 0.004052409902215004 2023-01-22 17:25:08.145526: step: 376/531, loss: 0.0041965399868786335 2023-01-22 17:25:09.211873: step: 380/531, loss: 0.00027606557705439627 2023-01-22 17:25:10.259795: step: 384/531, loss: 0.008244985714554787 2023-01-22 17:25:11.325878: step: 388/531, loss: 0.023369159549474716 2023-01-22 17:25:12.406650: step: 392/531, loss: 0.008395473472774029 2023-01-22 17:25:13.446208: step: 396/531, loss: 0.00015345678548328578 2023-01-22 17:25:14.491788: step: 400/531, loss: 0.0058576627634465694 2023-01-22 17:25:15.558083: step: 404/531, loss: 0.023645292967557907 2023-01-22 17:25:16.613151: step: 408/531, loss: 0.005871656816452742 2023-01-22 17:25:17.679129: step: 412/531, loss: 0.0 2023-01-22 17:25:18.748881: step: 416/531, loss: 0.009561366401612759 2023-01-22 17:25:19.807903: step: 420/531, loss: 0.003358026035130024 2023-01-22 17:25:20.869229: step: 424/531, loss: 2.7298731311020674e-06 2023-01-22 17:25:21.914789: step: 428/531, loss: 0.000304831366520375 2023-01-22 17:25:22.974895: step: 432/531, loss: 0.0005358936614356935 2023-01-22 17:25:24.041193: step: 436/531, loss: 0.004430905915796757 2023-01-22 17:25:25.098200: step: 440/531, loss: 0.009700379334390163 2023-01-22 17:25:26.184167: step: 444/531, loss: 0.01859997771680355 2023-01-22 17:25:27.262931: step: 448/531, loss: 0.005416507832705975 2023-01-22 17:25:28.320071: step: 452/531, loss: 0.001897883485071361 2023-01-22 17:25:29.375736: step: 456/531, loss: 0.005075945984572172 2023-01-22 17:25:30.442169: step: 460/531, loss: 0.002688433276489377 2023-01-22 17:25:31.502737: step: 464/531, loss: 0.0011071580229327083 2023-01-22 17:25:32.551325: step: 468/531, loss: 0.0009355830843560398 2023-01-22 17:25:33.621828: step: 472/531, loss: 0.022723183035850525 2023-01-22 17:25:34.678378: step: 476/531, loss: 0.0018428267212584615 2023-01-22 17:25:35.726438: step: 480/531, loss: 0.0005078070098534226 2023-01-22 17:25:36.777616: step: 484/531, loss: 0.011524239555001259 2023-01-22 17:25:37.851260: step: 488/531, loss: 0.022834865376353264 2023-01-22 17:25:38.915997: step: 492/531, loss: 0.0035942893009632826 2023-01-22 17:25:39.982880: step: 496/531, loss: 0.011420325376093388 2023-01-22 17:25:41.039248: step: 500/531, loss: 0.00018307343998458236 2023-01-22 17:25:42.099355: step: 504/531, loss: 0.005503448192030191 2023-01-22 17:25:43.167365: step: 508/531, loss: 0.0003262669197283685 2023-01-22 17:25:44.236109: step: 512/531, loss: 0.001203661086037755 2023-01-22 17:25:45.318467: step: 516/531, loss: 0.002987103769555688 2023-01-22 17:25:46.385370: step: 520/531, loss: 0.0018034788081422448 2023-01-22 17:25:47.459021: step: 524/531, loss: 0.00021906476467847824 2023-01-22 17:25:48.509694: step: 528/531, loss: 0.004053742159157991 2023-01-22 17:25:49.556672: step: 532/531, loss: 0.001761258696205914 2023-01-22 17:25:50.608712: step: 536/531, loss: 0.0007478193147107959 2023-01-22 17:25:51.670252: step: 540/531, loss: 0.0016021106857806444 2023-01-22 17:25:52.750385: step: 544/531, loss: 0.003069615922868252 2023-01-22 17:25:53.816806: step: 548/531, loss: 0.006721076089888811 2023-01-22 17:25:54.884421: step: 552/531, loss: 0.00018410134362056851 2023-01-22 17:25:55.949379: step: 556/531, loss: 0.005953546613454819 2023-01-22 17:25:57.012800: step: 560/531, loss: 0.00891153048723936 2023-01-22 17:25:58.065481: step: 564/531, loss: 0.0005984383169561625 2023-01-22 17:25:59.132110: step: 568/531, loss: 0.002336231991648674 2023-01-22 17:26:00.206192: step: 572/531, loss: 0.004336831625550985 2023-01-22 17:26:01.283023: step: 576/531, loss: 0.002549282740801573 2023-01-22 17:26:02.350201: step: 580/531, loss: 0.004837400745600462 2023-01-22 17:26:03.410616: step: 584/531, loss: 0.00420438963919878 2023-01-22 17:26:04.464729: step: 588/531, loss: 0.00033191306283697486 2023-01-22 17:26:05.526439: step: 592/531, loss: 0.006384607870131731 2023-01-22 17:26:06.596448: step: 596/531, loss: 0.04607657343149185 2023-01-22 17:26:07.655450: step: 600/531, loss: 0.0156918466091156 2023-01-22 17:26:08.705456: step: 604/531, loss: 0.004715701565146446 2023-01-22 17:26:09.791723: step: 608/531, loss: 0.0021382300183176994 2023-01-22 17:26:10.855078: step: 612/531, loss: 6.838714762125164e-05 2023-01-22 17:26:11.940358: step: 616/531, loss: 0.0020097082015126944 2023-01-22 17:26:12.991242: step: 620/531, loss: 0.001160504063591361 2023-01-22 17:26:14.057438: step: 624/531, loss: 0.01615995727479458 2023-01-22 17:26:15.124909: step: 628/531, loss: 0.0004716401163022965 2023-01-22 17:26:16.200735: step: 632/531, loss: 0.00023386311659123749 2023-01-22 17:26:17.275407: step: 636/531, loss: 0.0010904439259320498 2023-01-22 17:26:18.334794: step: 640/531, loss: 0.03402023762464523 2023-01-22 17:26:19.407600: step: 644/531, loss: 0.008198312483727932 2023-01-22 17:26:20.479415: step: 648/531, loss: 0.005444493610411882 2023-01-22 17:26:21.549890: step: 652/531, loss: 0.0010515376925468445 2023-01-22 17:26:22.607342: step: 656/531, loss: 0.001187400775961578 2023-01-22 17:26:23.686621: step: 660/531, loss: 0.0029704223852604628 2023-01-22 17:26:24.742648: step: 664/531, loss: 0.005244607571512461 2023-01-22 17:26:25.795291: step: 668/531, loss: 0.00024652108550071716 2023-01-22 17:26:26.830454: step: 672/531, loss: 4.3499308958416805e-05 2023-01-22 17:26:27.869178: step: 676/531, loss: 0.0010115631157532334 2023-01-22 17:26:28.925324: step: 680/531, loss: 6.89786538714543e-05 2023-01-22 17:26:29.979354: step: 684/531, loss: 0.011706339195370674 2023-01-22 17:26:31.023981: step: 688/531, loss: 0.00024610068066976964 2023-01-22 17:26:32.098820: step: 692/531, loss: 0.00041355937719345093 2023-01-22 17:26:33.154377: step: 696/531, loss: 0.003793865442276001 2023-01-22 17:26:34.207479: step: 700/531, loss: 0.0017515491927042603 2023-01-22 17:26:35.269121: step: 704/531, loss: 0.0004933085292577744 2023-01-22 17:26:36.330665: step: 708/531, loss: 0.003368173725903034 2023-01-22 17:26:37.383721: step: 712/531, loss: 0.0026110573671758175 2023-01-22 17:26:38.456808: step: 716/531, loss: 0.010060235857963562 2023-01-22 17:26:39.498510: step: 720/531, loss: 0.0033873047214001417 2023-01-22 17:26:40.560906: step: 724/531, loss: 0.006929227150976658 2023-01-22 17:26:41.622161: step: 728/531, loss: 0.0001884236407931894 2023-01-22 17:26:42.679430: step: 732/531, loss: 0.0020551488269120455 2023-01-22 17:26:43.746024: step: 736/531, loss: 0.0011208693031221628 2023-01-22 17:26:44.815912: step: 740/531, loss: 0.0025508366525173187 2023-01-22 17:26:45.893723: step: 744/531, loss: 0.003605595324188471 2023-01-22 17:26:46.963008: step: 748/531, loss: 0.0031855276320129633 2023-01-22 17:26:48.027889: step: 752/531, loss: 0.0013335483381524682 2023-01-22 17:26:49.085233: step: 756/531, loss: 0.004736605100333691 2023-01-22 17:26:50.150007: step: 760/531, loss: 0.002244482282549143 2023-01-22 17:26:51.210172: step: 764/531, loss: 0.006045165471732616 2023-01-22 17:26:52.266852: step: 768/531, loss: 0.0025767809711396694 2023-01-22 17:26:53.332244: step: 772/531, loss: 0.0023516854271292686 2023-01-22 17:26:54.395644: step: 776/531, loss: 0.025837058201432228 2023-01-22 17:26:55.470032: step: 780/531, loss: 0.0033677786123007536 2023-01-22 17:26:56.533022: step: 784/531, loss: 0.003299022326245904 2023-01-22 17:26:57.596005: step: 788/531, loss: 0.004824823699891567 2023-01-22 17:26:58.649432: step: 792/531, loss: 0.000424450496211648 2023-01-22 17:26:59.705746: step: 796/531, loss: 0.003976419102400541 2023-01-22 17:27:00.773809: step: 800/531, loss: 0.0009315311908721924 2023-01-22 17:27:01.827868: step: 804/531, loss: 0.001829913118854165 2023-01-22 17:27:02.900876: step: 808/531, loss: 0.0006813353975303471 2023-01-22 17:27:03.968489: step: 812/531, loss: 0.0003635450266301632 2023-01-22 17:27:05.025880: step: 816/531, loss: 0.0028247255831956863 2023-01-22 17:27:06.076203: step: 820/531, loss: 0.0003783183346968144 2023-01-22 17:27:07.143861: step: 824/531, loss: 0.02247655764222145 2023-01-22 17:27:08.195524: step: 828/531, loss: 0.006990055087953806 2023-01-22 17:27:09.246664: step: 832/531, loss: 0.003279736964032054 2023-01-22 17:27:10.317867: step: 836/531, loss: 0.00020956936350557953 2023-01-22 17:27:11.375233: step: 840/531, loss: 0.006325643975287676 2023-01-22 17:27:12.465620: step: 844/531, loss: 0.0015139882452785969 2023-01-22 17:27:13.539318: step: 848/531, loss: 0.003643469652161002 2023-01-22 17:27:14.605296: step: 852/531, loss: 0.0006772595806978643 2023-01-22 17:27:15.669981: step: 856/531, loss: 0.012736771255731583 2023-01-22 17:27:16.732004: step: 860/531, loss: 0.00013063388178125024 2023-01-22 17:27:17.799301: step: 864/531, loss: 0.016379380598664284 2023-01-22 17:27:18.866941: step: 868/531, loss: 0.0027588270604610443 2023-01-22 17:27:19.921482: step: 872/531, loss: 0.002072672825306654 2023-01-22 17:27:20.977311: step: 876/531, loss: 0.0073729208670556545 2023-01-22 17:27:22.062601: step: 880/531, loss: 0.004386465065181255 2023-01-22 17:27:23.108876: step: 884/531, loss: 0.00037287399754859507 2023-01-22 17:27:24.155140: step: 888/531, loss: 0.025077518075704575 2023-01-22 17:27:25.215419: step: 892/531, loss: 0.0010974209289997816 2023-01-22 17:27:26.268384: step: 896/531, loss: 0.00032600853592157364 2023-01-22 17:27:27.321663: step: 900/531, loss: 0.0027068040799349546 2023-01-22 17:27:28.376209: step: 904/531, loss: 0.008336818777024746 2023-01-22 17:27:29.439539: step: 908/531, loss: 0.023365305736660957 2023-01-22 17:27:30.490627: step: 912/531, loss: 0.02693640999495983 2023-01-22 17:27:31.541348: step: 916/531, loss: 0.001894272631034255 2023-01-22 17:27:32.611569: step: 920/531, loss: 0.0035084369592368603 2023-01-22 17:27:33.680640: step: 924/531, loss: 6.535274587804452e-05 2023-01-22 17:27:34.733334: step: 928/531, loss: 0.01578643172979355 2023-01-22 17:27:35.786635: step: 932/531, loss: 0.001120746717788279 2023-01-22 17:27:36.834684: step: 936/531, loss: 0.00036900717532262206 2023-01-22 17:27:37.918010: step: 940/531, loss: 0.003949716221541166 2023-01-22 17:27:38.972338: step: 944/531, loss: 0.013492215424776077 2023-01-22 17:27:40.023266: step: 948/531, loss: 0.0011108984472230077 2023-01-22 17:27:41.082452: step: 952/531, loss: 1.3997655514685903e-06 2023-01-22 17:27:42.131138: step: 956/531, loss: 0.006160073913633823 2023-01-22 17:27:43.183531: step: 960/531, loss: 0.0008319893968291581 2023-01-22 17:27:44.257594: step: 964/531, loss: 0.004093868657946587 2023-01-22 17:27:45.328535: step: 968/531, loss: 0.0016748070484027267 2023-01-22 17:27:46.386380: step: 972/531, loss: 0.014420202933251858 2023-01-22 17:27:47.448465: step: 976/531, loss: 0.044114142656326294 2023-01-22 17:27:48.508597: step: 980/531, loss: 0.002088951412588358 2023-01-22 17:27:49.562065: step: 984/531, loss: 0.0008230320527218282 2023-01-22 17:27:50.621108: step: 988/531, loss: 2.7117554054711945e-05 2023-01-22 17:27:51.685926: step: 992/531, loss: 0.005365328397601843 2023-01-22 17:27:52.760754: step: 996/531, loss: 0.00030452950159087777 2023-01-22 17:27:53.821732: step: 1000/531, loss: 0.008147473447024822 2023-01-22 17:27:54.892638: step: 1004/531, loss: 0.0017271735705435276 2023-01-22 17:27:55.955390: step: 1008/531, loss: 0.019129585474729538 2023-01-22 17:27:57.023413: step: 1012/531, loss: 0.00033819032250903547 2023-01-22 17:27:58.075708: step: 1016/531, loss: 0.0006485722842626274 2023-01-22 17:27:59.123290: step: 1020/531, loss: 0.005802926141768694 2023-01-22 17:28:00.172415: step: 1024/531, loss: 7.30617975932546e-05 2023-01-22 17:28:01.230463: step: 1028/531, loss: 0.002927525667473674 2023-01-22 17:28:02.290847: step: 1032/531, loss: 0.001537336385808885 2023-01-22 17:28:03.342787: step: 1036/531, loss: 0.003212809097021818 2023-01-22 17:28:04.392024: step: 1040/531, loss: 0.007474812678992748 2023-01-22 17:28:05.448330: step: 1044/531, loss: 0.004082385450601578 2023-01-22 17:28:06.507600: step: 1048/531, loss: 0.004068862646818161 2023-01-22 17:28:07.566638: step: 1052/531, loss: 0.002151491353288293 2023-01-22 17:28:08.626553: step: 1056/531, loss: 0.00022242763952817768 2023-01-22 17:28:09.696157: step: 1060/531, loss: 0.0005699098692275584 2023-01-22 17:28:10.754200: step: 1064/531, loss: 0.0054177348501980305 2023-01-22 17:28:11.821966: step: 1068/531, loss: 0.0013560166116803885 2023-01-22 17:28:12.879335: step: 1072/531, loss: 0.0010135923512279987 2023-01-22 17:28:13.952885: step: 1076/531, loss: 0.011238436214625835 2023-01-22 17:28:15.004555: step: 1080/531, loss: 0.009857100434601307 2023-01-22 17:28:16.059518: step: 1084/531, loss: 0.006238971371203661 2023-01-22 17:28:17.136104: step: 1088/531, loss: 0.0022449446842074394 2023-01-22 17:28:18.202582: step: 1092/531, loss: 0.004349490161985159 2023-01-22 17:28:19.264144: step: 1096/531, loss: 0.001649242709390819 2023-01-22 17:28:20.323012: step: 1100/531, loss: 0.004435345530509949 2023-01-22 17:28:21.388656: step: 1104/531, loss: 0.0015254599275067449 2023-01-22 17:28:22.447451: step: 1108/531, loss: 0.0020954047795385122 2023-01-22 17:28:23.512084: step: 1112/531, loss: 0.002117619151249528 2023-01-22 17:28:24.581564: step: 1116/531, loss: 0.007768052630126476 2023-01-22 17:28:25.645344: step: 1120/531, loss: 0.0008308215183205903 2023-01-22 17:28:26.701035: step: 1124/531, loss: 0.014101465232670307 2023-01-22 17:28:27.759888: step: 1128/531, loss: 0.000940972997341305 2023-01-22 17:28:28.813339: step: 1132/531, loss: 0.011788940988481045 2023-01-22 17:28:29.876294: step: 1136/531, loss: 0.01554956566542387 2023-01-22 17:28:30.947850: step: 1140/531, loss: 0.002533720573410392 2023-01-22 17:28:32.002362: step: 1144/531, loss: 0.0010297263506799936 2023-01-22 17:28:33.070810: step: 1148/531, loss: 0.0009417362161912024 2023-01-22 17:28:34.148482: step: 1152/531, loss: 0.0034285648725926876 2023-01-22 17:28:35.203762: step: 1156/531, loss: 0.015229434706270695 2023-01-22 17:28:36.267577: step: 1160/531, loss: 0.0016063969815149903 2023-01-22 17:28:37.325878: step: 1164/531, loss: 0.002477684523910284 2023-01-22 17:28:38.386436: step: 1168/531, loss: 0.00459054671227932 2023-01-22 17:28:39.449499: step: 1172/531, loss: 0.0009257200290448964 2023-01-22 17:28:40.508121: step: 1176/531, loss: 0.0042646173387765884 2023-01-22 17:28:41.566147: step: 1180/531, loss: 0.0031062662601470947 2023-01-22 17:28:42.637833: step: 1184/531, loss: 0.003644076641649008 2023-01-22 17:28:43.688828: step: 1188/531, loss: 0.014481316320598125 2023-01-22 17:28:44.759388: step: 1192/531, loss: 0.005696759559214115 2023-01-22 17:28:45.826556: step: 1196/531, loss: 0.018747806549072266 2023-01-22 17:28:46.892592: step: 1200/531, loss: 0.0006471088854596019 2023-01-22 17:28:47.942500: step: 1204/531, loss: 0.002548839198425412 2023-01-22 17:28:48.987861: step: 1208/531, loss: 0.002331779571250081 2023-01-22 17:28:50.048159: step: 1212/531, loss: 0.0007410774705931544 2023-01-22 17:28:51.106470: step: 1216/531, loss: 0.003397623309865594 2023-01-22 17:28:52.159270: step: 1220/531, loss: 0.004738804418593645 2023-01-22 17:28:53.221375: step: 1224/531, loss: 0.01094214990735054 2023-01-22 17:28:54.276911: step: 1228/531, loss: 0.004732155241072178 2023-01-22 17:28:55.342085: step: 1232/531, loss: 0.0016288203187286854 2023-01-22 17:28:56.387583: step: 1236/531, loss: 0.0003689306031446904 2023-01-22 17:28:57.430584: step: 1240/531, loss: 0.01876705326139927 2023-01-22 17:28:58.520997: step: 1244/531, loss: 0.0051356167532503605 2023-01-22 17:28:59.581040: step: 1248/531, loss: 0.000179226859472692 2023-01-22 17:29:00.647084: step: 1252/531, loss: 0.0016030054539442062 2023-01-22 17:29:01.695787: step: 1256/531, loss: 5.2526374361150374e-08 2023-01-22 17:29:02.747641: step: 1260/531, loss: 0.017035307362675667 2023-01-22 17:29:03.805451: step: 1264/531, loss: 0.0010174678172916174 2023-01-22 17:29:04.861681: step: 1268/531, loss: 0.005724236369132996 2023-01-22 17:29:05.920924: step: 1272/531, loss: 0.011316437274217606 2023-01-22 17:29:06.982346: step: 1276/531, loss: 6.072134510759497e-07 2023-01-22 17:29:08.028644: step: 1280/531, loss: 0.00015859492123126984 2023-01-22 17:29:09.081326: step: 1284/531, loss: 0.07415708154439926 2023-01-22 17:29:10.138289: step: 1288/531, loss: 9.049095388036221e-05 2023-01-22 17:29:11.214844: step: 1292/531, loss: 0.00020989171753171831 2023-01-22 17:29:12.286807: step: 1296/531, loss: 6.227486301213503e-05 2023-01-22 17:29:13.358799: step: 1300/531, loss: 0.009107462130486965 2023-01-22 17:29:14.437580: step: 1304/531, loss: 0.00450220936909318 2023-01-22 17:29:15.490974: step: 1308/531, loss: 0.004006167873740196 2023-01-22 17:29:16.535254: step: 1312/531, loss: 0.0016605237033218145 2023-01-22 17:29:17.588974: step: 1316/531, loss: 0.05278365686535835 2023-01-22 17:29:18.645719: step: 1320/531, loss: 0.0002809948055073619 2023-01-22 17:29:19.697439: step: 1324/531, loss: 1.4901159195446212e-09 2023-01-22 17:29:20.756520: step: 1328/531, loss: 0.026465419679880142 2023-01-22 17:29:21.805283: step: 1332/531, loss: 0.00578249990940094 2023-01-22 17:29:22.858792: step: 1336/531, loss: 0.003918411210179329 2023-01-22 17:29:23.908044: step: 1340/531, loss: 0.00020096925436519086 2023-01-22 17:29:24.982807: step: 1344/531, loss: 0.005762449465692043 2023-01-22 17:29:26.033549: step: 1348/531, loss: 5.289510227157734e-05 2023-01-22 17:29:27.115786: step: 1352/531, loss: 0.009238005615770817 2023-01-22 17:29:28.157043: step: 1356/531, loss: 0.05169999971985817 2023-01-22 17:29:29.218235: step: 1360/531, loss: 2.4488930648658425e-05 2023-01-22 17:29:30.283506: step: 1364/531, loss: 0.003602051641792059 2023-01-22 17:29:31.350275: step: 1368/531, loss: 0.009253833442926407 2023-01-22 17:29:32.411791: step: 1372/531, loss: 0.006664591375738382 2023-01-22 17:29:33.475195: step: 1376/531, loss: 0.0019129817374050617 2023-01-22 17:29:34.531973: step: 1380/531, loss: 0.005156231112778187 2023-01-22 17:29:35.581911: step: 1384/531, loss: 0.006752627901732922 2023-01-22 17:29:36.634231: step: 1388/531, loss: 0.010048375464975834 2023-01-22 17:29:37.700475: step: 1392/531, loss: 0.005329609848558903 2023-01-22 17:29:38.754634: step: 1396/531, loss: 0.0031355631072074175 2023-01-22 17:29:39.813200: step: 1400/531, loss: 0.008318467997014523 2023-01-22 17:29:40.871252: step: 1404/531, loss: 0.0032343617640435696 2023-01-22 17:29:41.935456: step: 1408/531, loss: 0.007278535980731249 2023-01-22 17:29:42.998851: step: 1412/531, loss: 0.0010737443808466196 2023-01-22 17:29:44.064458: step: 1416/531, loss: 0.007663228083401918 2023-01-22 17:29:45.118761: step: 1420/531, loss: 0.0006519157323054969 2023-01-22 17:29:46.176748: step: 1424/531, loss: 0.0005340309580788016 2023-01-22 17:29:47.237385: step: 1428/531, loss: 6.025416496413527e-06 2023-01-22 17:29:48.297898: step: 1432/531, loss: 0.012702541425824165 2023-01-22 17:29:49.363892: step: 1436/531, loss: 0.0022467621602118015 2023-01-22 17:29:50.410446: step: 1440/531, loss: 0.00802487600594759 2023-01-22 17:29:51.465012: step: 1444/531, loss: 0.002211732091382146 2023-01-22 17:29:52.536178: step: 1448/531, loss: 0.0014593214727938175 2023-01-22 17:29:53.583160: step: 1452/531, loss: 0.004380774684250355 2023-01-22 17:29:54.647290: step: 1456/531, loss: 0.011990535072982311 2023-01-22 17:29:55.701783: step: 1460/531, loss: 0.00494889821857214 2023-01-22 17:29:56.753540: step: 1464/531, loss: 0.0019130029249936342 2023-01-22 17:29:57.809712: step: 1468/531, loss: 0.0017892494797706604 2023-01-22 17:29:58.885131: step: 1472/531, loss: 0.00023916776990517974 2023-01-22 17:29:59.950469: step: 1476/531, loss: 0.006511999294161797 2023-01-22 17:30:01.003637: step: 1480/531, loss: 5.554518065764569e-05 2023-01-22 17:30:02.056693: step: 1484/531, loss: 0.0010838648304343224 2023-01-22 17:30:03.124591: step: 1488/531, loss: 0.0025906478986144066 2023-01-22 17:30:04.179909: step: 1492/531, loss: 0.005643096286803484 2023-01-22 17:30:05.248920: step: 1496/531, loss: 7.056837057461962e-05 2023-01-22 17:30:06.316595: step: 1500/531, loss: 0.005247182212769985 2023-01-22 17:30:07.361684: step: 1504/531, loss: 0.0010700220009312034 2023-01-22 17:30:08.427599: step: 1508/531, loss: 0.009434818290174007 2023-01-22 17:30:09.492910: step: 1512/531, loss: 0.008374935016036034 2023-01-22 17:30:10.559622: step: 1516/531, loss: 0.0043015568517148495 2023-01-22 17:30:11.634827: step: 1520/531, loss: 0.002789322752505541 2023-01-22 17:30:12.686597: step: 1524/531, loss: 0.009308841079473495 2023-01-22 17:30:13.755652: step: 1528/531, loss: 0.003999396227300167 2023-01-22 17:30:14.818038: step: 1532/531, loss: 0.005346197169274092 2023-01-22 17:30:15.876349: step: 1536/531, loss: 0.010844535194337368 2023-01-22 17:30:16.937763: step: 1540/531, loss: 0.0010235788067802787 2023-01-22 17:30:17.987512: step: 1544/531, loss: 0.015632014721632004 2023-01-22 17:30:19.033604: step: 1548/531, loss: 6.488789222203195e-05 2023-01-22 17:30:20.090969: step: 1552/531, loss: 0.003997104242444038 2023-01-22 17:30:21.157506: step: 1556/531, loss: 0.0002263225323986262 2023-01-22 17:30:22.193193: step: 1560/531, loss: 9.856142241915222e-06 2023-01-22 17:30:23.244279: step: 1564/531, loss: 0.004197990987449884 2023-01-22 17:30:24.302167: step: 1568/531, loss: 0.0035436716862022877 2023-01-22 17:30:25.349794: step: 1572/531, loss: 0.02010076493024826 2023-01-22 17:30:26.423282: step: 1576/531, loss: 0.0050505381077528 2023-01-22 17:30:27.473940: step: 1580/531, loss: 0.003970159683376551 2023-01-22 17:30:28.515609: step: 1584/531, loss: 3.6895977245876566e-05 2023-01-22 17:30:29.564603: step: 1588/531, loss: 0.0018596414010971785 2023-01-22 17:30:30.630118: step: 1592/531, loss: 0.0009473281679674983 2023-01-22 17:30:31.686525: step: 1596/531, loss: 0.006821515038609505 2023-01-22 17:30:32.755008: step: 1600/531, loss: 0.007906089536845684 2023-01-22 17:30:33.802490: step: 1604/531, loss: 0.0043446654453873634 2023-01-22 17:30:34.876768: step: 1608/531, loss: 0.0051284958608448505 2023-01-22 17:30:35.948629: step: 1612/531, loss: 0.0002567243645898998 2023-01-22 17:30:37.001627: step: 1616/531, loss: 0.003474753350019455 2023-01-22 17:30:38.049345: step: 1620/531, loss: 0.0014915207866579294 2023-01-22 17:30:39.099935: step: 1624/531, loss: 0.004405052401125431 2023-01-22 17:30:40.171949: step: 1628/531, loss: 0.0037583820521831512 2023-01-22 17:30:41.218722: step: 1632/531, loss: 3.7574278394458815e-05 2023-01-22 17:30:42.276808: step: 1636/531, loss: 0.0027674599550664425 2023-01-22 17:30:43.332003: step: 1640/531, loss: 0.007190498989075422 2023-01-22 17:30:44.389904: step: 1644/531, loss: 0.0006815463420934975 2023-01-22 17:30:45.448614: step: 1648/531, loss: 0.0009214354795403779 2023-01-22 17:30:46.512707: step: 1652/531, loss: 0.0030990124214440584 2023-01-22 17:30:47.561269: step: 1656/531, loss: 0.0011164223542436957 2023-01-22 17:30:48.649434: step: 1660/531, loss: 0.0017048048321157694 2023-01-22 17:30:49.701116: step: 1664/531, loss: 0.0002919072285294533 2023-01-22 17:30:50.755311: step: 1668/531, loss: 0.0024997543077915907 2023-01-22 17:30:51.834962: step: 1672/531, loss: 0.00972164049744606 2023-01-22 17:30:52.905186: step: 1676/531, loss: 0.0343078188598156 2023-01-22 17:30:53.968558: step: 1680/531, loss: 0.0010097825434058905 2023-01-22 17:30:55.020836: step: 1684/531, loss: 0.00311590195633471 2023-01-22 17:30:56.066326: step: 1688/531, loss: 0.0029082675464451313 2023-01-22 17:30:57.123287: step: 1692/531, loss: 0.008551392704248428 2023-01-22 17:30:58.192091: step: 1696/531, loss: 0.0001803740597097203 2023-01-22 17:30:59.261240: step: 1700/531, loss: 0.009989561513066292 2023-01-22 17:31:00.315076: step: 1704/531, loss: 0.002462916076183319 2023-01-22 17:31:01.374407: step: 1708/531, loss: 0.0035486482083797455 2023-01-22 17:31:02.432520: step: 1712/531, loss: 0.001967664808034897 2023-01-22 17:31:03.481626: step: 1716/531, loss: 0.002778839087113738 2023-01-22 17:31:04.537350: step: 1720/531, loss: 0.001200348138809204 2023-01-22 17:31:05.592862: step: 1724/531, loss: 0.0009527311194688082 2023-01-22 17:31:06.634637: step: 1728/531, loss: 0.0022733428049832582 2023-01-22 17:31:07.703639: step: 1732/531, loss: 0.004268552642315626 2023-01-22 17:31:08.758389: step: 1736/531, loss: 0.0017586820758879185 2023-01-22 17:31:09.807159: step: 1740/531, loss: 0.0017639162251725793 2023-01-22 17:31:10.878533: step: 1744/531, loss: 0.007432482670992613 2023-01-22 17:31:11.967531: step: 1748/531, loss: 0.0008550300844945014 2023-01-22 17:31:13.064966: step: 1752/531, loss: 0.00046894658589735627 2023-01-22 17:31:14.113011: step: 1756/531, loss: 0.0005495856166817248 2023-01-22 17:31:15.164663: step: 1760/531, loss: 0.00046782713616266847 2023-01-22 17:31:16.229619: step: 1764/531, loss: 0.0016932528233155608 2023-01-22 17:31:17.283680: step: 1768/531, loss: 0.002158620161935687 2023-01-22 17:31:18.332404: step: 1772/531, loss: 4.678270488511771e-05 2023-01-22 17:31:19.391287: step: 1776/531, loss: 0.005452624522149563 2023-01-22 17:31:20.447269: step: 1780/531, loss: 0.0002606469497550279 2023-01-22 17:31:21.524719: step: 1784/531, loss: 0.006427340675145388 2023-01-22 17:31:22.579805: step: 1788/531, loss: 0.00634990306571126 2023-01-22 17:31:23.647658: step: 1792/531, loss: 0.003668583231046796 2023-01-22 17:31:24.693485: step: 1796/531, loss: 0.0021028018090873957 2023-01-22 17:31:25.756370: step: 1800/531, loss: 0.002138777868822217 2023-01-22 17:31:26.808147: step: 1804/531, loss: 0.0001514388422947377 2023-01-22 17:31:27.866335: step: 1808/531, loss: 0.00022652155894320458 2023-01-22 17:31:28.907599: step: 1812/531, loss: 0.0002831830643117428 2023-01-22 17:31:29.957546: step: 1816/531, loss: 9.081160533241928e-05 2023-01-22 17:31:31.010895: step: 1820/531, loss: 0.0007973187603056431 2023-01-22 17:31:32.085347: step: 1824/531, loss: 0.00013473823491949588 2023-01-22 17:31:33.143478: step: 1828/531, loss: 0.0061875288374722 2023-01-22 17:31:34.206919: step: 1832/531, loss: 0.004659847356379032 2023-01-22 17:31:35.257337: step: 1836/531, loss: 0.00021924739121459424 2023-01-22 17:31:36.311562: step: 1840/531, loss: 0.0023256917484104633 2023-01-22 17:31:37.370212: step: 1844/531, loss: 0.006041111424565315 2023-01-22 17:31:38.423543: step: 1848/531, loss: 2.155197216779925e-05 2023-01-22 17:31:39.477611: step: 1852/531, loss: 0.0015061178710311651 2023-01-22 17:31:40.534140: step: 1856/531, loss: 0.003322010859847069 2023-01-22 17:31:41.602273: step: 1860/531, loss: 0.006429298315197229 2023-01-22 17:31:42.666900: step: 1864/531, loss: 0.001559158437885344 2023-01-22 17:31:43.721398: step: 1868/531, loss: 0.010159683413803577 2023-01-22 17:31:44.783420: step: 1872/531, loss: 0.004556985571980476 2023-01-22 17:31:45.831911: step: 1876/531, loss: 0.003635360626503825 2023-01-22 17:31:46.896404: step: 1880/531, loss: 0.004577002488076687 2023-01-22 17:31:47.948251: step: 1884/531, loss: 0.002632185583934188 2023-01-22 17:31:48.996235: step: 1888/531, loss: 0.010948077775537968 2023-01-22 17:31:50.066734: step: 1892/531, loss: 0.0030329760629683733 2023-01-22 17:31:51.118275: step: 1896/531, loss: 0.0044677406549453735 2023-01-22 17:31:52.178401: step: 1900/531, loss: 0.000990628032013774 2023-01-22 17:31:53.237532: step: 1904/531, loss: 0.0005836610216647387 2023-01-22 17:31:54.295152: step: 1908/531, loss: 0.0011377936461940408 2023-01-22 17:31:55.352868: step: 1912/531, loss: 0.00619694497436285 2023-01-22 17:31:56.409644: step: 1916/531, loss: 0.003997748252004385 2023-01-22 17:31:57.475792: step: 1920/531, loss: 0.003310997737571597 2023-01-22 17:31:58.541867: step: 1924/531, loss: 0.007806388661265373 2023-01-22 17:31:59.593842: step: 1928/531, loss: 4.578873631544411e-05 2023-01-22 17:32:00.652066: step: 1932/531, loss: 0.006815907079726458 2023-01-22 17:32:01.698591: step: 1936/531, loss: 4.5634459411303396e-07 2023-01-22 17:32:02.744237: step: 1940/531, loss: 0.001159795792773366 2023-01-22 17:32:03.801411: step: 1944/531, loss: 0.0026013364549726248 2023-01-22 17:32:04.869906: step: 1948/531, loss: 0.0038500570226460695 2023-01-22 17:32:05.961591: step: 1952/531, loss: 0.008455782197415829 2023-01-22 17:32:07.012500: step: 1956/531, loss: 0.0002958730619866401 2023-01-22 17:32:08.071930: step: 1960/531, loss: 0.005648695398122072 2023-01-22 17:32:09.137253: step: 1964/531, loss: 0.006919756531715393 2023-01-22 17:32:10.184484: step: 1968/531, loss: 0.0026051849126815796 2023-01-22 17:32:11.233624: step: 1972/531, loss: 0.0015414806548506021 2023-01-22 17:32:12.284374: step: 1976/531, loss: 0.005913617089390755 2023-01-22 17:32:13.366527: step: 1980/531, loss: 0.004916145000606775 2023-01-22 17:32:14.418355: step: 1984/531, loss: 0.0015078928554430604 2023-01-22 17:32:15.490810: step: 1988/531, loss: 0.0071089365519583225 2023-01-22 17:32:16.541856: step: 1992/531, loss: 0.008130467496812344 2023-01-22 17:32:17.596371: step: 1996/531, loss: 0.0010581787209957838 2023-01-22 17:32:18.658989: step: 2000/531, loss: 0.001217597397044301 2023-01-22 17:32:19.728677: step: 2004/531, loss: 0.0007773605175316334 2023-01-22 17:32:20.813554: step: 2008/531, loss: 0.0024157122243195772 2023-01-22 17:32:21.883019: step: 2012/531, loss: 0.004002046305686235 2023-01-22 17:32:22.942069: step: 2016/531, loss: 0.005973339080810547 2023-01-22 17:32:24.003880: step: 2020/531, loss: 0.0012962199980393052 2023-01-22 17:32:25.070418: step: 2024/531, loss: 0.00107801822014153 2023-01-22 17:32:26.140940: step: 2028/531, loss: 0.0035686420742422342 2023-01-22 17:32:27.212886: step: 2032/531, loss: 0.005178095772862434 2023-01-22 17:32:28.267723: step: 2036/531, loss: 0.004839688073843718 2023-01-22 17:32:29.333898: step: 2040/531, loss: 0.003223894629627466 2023-01-22 17:32:30.396823: step: 2044/531, loss: 0.0009847404435276985 2023-01-22 17:32:31.469988: step: 2048/531, loss: 0.003674639854580164 2023-01-22 17:32:32.545215: step: 2052/531, loss: 0.0009432816877961159 2023-01-22 17:32:33.600767: step: 2056/531, loss: 0.004605493508279324 2023-01-22 17:32:34.660549: step: 2060/531, loss: 0.003991587553173304 2023-01-22 17:32:35.727425: step: 2064/531, loss: 0.0018774409545585513 2023-01-22 17:32:36.786413: step: 2068/531, loss: 0.00480128126218915 2023-01-22 17:32:37.833994: step: 2072/531, loss: 0.024160893633961678 2023-01-22 17:32:38.903801: step: 2076/531, loss: 0.00102907489053905 2023-01-22 17:32:39.958858: step: 2080/531, loss: 0.00034973936271853745 2023-01-22 17:32:41.011809: step: 2084/531, loss: 0.006875805556774139 2023-01-22 17:32:42.102904: step: 2088/531, loss: 0.0020749513059854507 2023-01-22 17:32:43.158709: step: 2092/531, loss: 0.0017216912237927318 2023-01-22 17:32:44.222448: step: 2096/531, loss: 0.002929131733253598 2023-01-22 17:32:45.288176: step: 2100/531, loss: 0.016796234995126724 2023-01-22 17:32:46.341053: step: 2104/531, loss: 0.0022440662141889334 2023-01-22 17:32:47.399361: step: 2108/531, loss: 0.0030159587040543556 2023-01-22 17:32:48.453869: step: 2112/531, loss: 0.0002595247933641076 2023-01-22 17:32:49.509564: step: 2116/531, loss: 0.005056208930909634 2023-01-22 17:32:50.571548: step: 2120/531, loss: 0.011324580758810043 2023-01-22 17:32:51.625191: step: 2124/531, loss: 0.00010594310879241675 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3527675438596492, 'r': 0.31735716540404046, 'f1': 0.3341267863077435}, 'combined': 0.246198684647811, 'stategy': 1, 'epoch': 14} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33602806040912, 'r': 0.27658627354974497, 'f1': 0.30342335751793803}, 'combined': 0.18999406498786775, 'stategy': 1, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33272089097496704, 'r': 0.348504614455753, 'f1': 0.34042990142387736}, 'combined': 0.2508430852596991, 'stategy': 1, 'epoch': 14} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3620731730998595, 'r': 0.3013467437070968, 'f1': 0.3289306483968212}, 'combined': 0.20384434548535396, 'stategy': 1, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.315646101903008, 'r': 0.3252292852624921, 'f1': 0.3203660436137072}, 'combined': 0.23605919003115264, 'stategy': 1, 'epoch': 14} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3648725467759199, 'r': 0.28774501656312385, 'f1': 0.3217512457933112}, 'combined': 0.21343894522922627, 'stategy': 1, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.36607142857142855, 'r': 0.44565217391304346, 'f1': 0.4019607843137254}, 'combined': 0.2009803921568627, 'stategy': 1, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'stategy': 1, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35976479915433407, 'r': 0.3222892992424243, 'f1': 0.33999750249750255}, 'combined': 0.25052447552447554, 'stategy': 1, 'epoch': 6} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33494215352441215, 'r': 0.2738786923223081, 'f1': 0.3013481540150123}, 'combined': 0.18869463849538157, 'stategy': 1, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33436839323467227, 'r': 0.3546715973779541, 'f1': 0.34422086891009546}, 'combined': 0.2536364297232282, 'stategy': 1, 'epoch': 5} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36411638395148305, 'r': 0.30337559853126267, 'f1': 0.3309823303208759}, 'combined': 0.2051158103396977, 'stategy': 1, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37962962962962965, 'r': 0.44565217391304346, 'f1': 0.41000000000000003}, 'combined': 0.20500000000000002, 'stategy': 1, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.316460244648318, 'r': 0.32726913345983555, 'f1': 0.32177394278606963}, 'combined': 0.23709658942131445, 'stategy': 1, 'epoch': 5} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3637270714036404, 'r': 0.28684167419636686, 'f1': 0.32074114478321014}, 'combined': 0.21276887822252555, 'stategy': 1, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.27586206896551724, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 5} ****************************** Epoch: 15 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 17:35:19.219777: step: 4/531, loss: 0.0009297167416661978 2023-01-22 17:35:20.274974: step: 8/531, loss: 0.004007861949503422 2023-01-22 17:35:21.330382: step: 12/531, loss: 0.004778177477419376 2023-01-22 17:35:22.389208: step: 16/531, loss: 0.002197671914473176 2023-01-22 17:35:23.462321: step: 20/531, loss: 0.004840637091547251 2023-01-22 17:35:24.514766: step: 24/531, loss: 0.002825033850967884 2023-01-22 17:35:25.572220: step: 28/531, loss: 0.006522003095597029 2023-01-22 17:35:26.627895: step: 32/531, loss: 0.0005030282191000879 2023-01-22 17:35:27.676133: step: 36/531, loss: 3.879536234308034e-05 2023-01-22 17:35:28.726883: step: 40/531, loss: 0.0027950697112828493 2023-01-22 17:35:29.786028: step: 44/531, loss: 0.000885081070009619 2023-01-22 17:35:30.834158: step: 48/531, loss: 0.0019474371802061796 2023-01-22 17:35:31.884123: step: 52/531, loss: 0.0018467579502612352 2023-01-22 17:35:32.931931: step: 56/531, loss: 0.0018239349592477083 2023-01-22 17:35:33.981299: step: 60/531, loss: 0.0021507146302610636 2023-01-22 17:35:35.044258: step: 64/531, loss: 0.002219590824097395 2023-01-22 17:35:36.105671: step: 68/531, loss: 0.008323084563016891 2023-01-22 17:35:37.178290: step: 72/531, loss: 0.0005446018767543137 2023-01-22 17:35:38.220597: step: 76/531, loss: 0.0010872746352106333 2023-01-22 17:35:39.272108: step: 80/531, loss: 0.0016193060437217355 2023-01-22 17:35:40.323749: step: 84/531, loss: 8.282660564873368e-05 2023-01-22 17:35:41.367636: step: 88/531, loss: 0.0004665288724936545 2023-01-22 17:35:42.441957: step: 92/531, loss: 0.0012503565521910787 2023-01-22 17:35:43.493016: step: 96/531, loss: 0.000962749938480556 2023-01-22 17:35:44.547866: step: 100/531, loss: 0.0021599980536848307 2023-01-22 17:35:45.621345: step: 104/531, loss: 0.003258185926824808 2023-01-22 17:35:46.675786: step: 108/531, loss: 0.0023698259610682726 2023-01-22 17:35:47.731713: step: 112/531, loss: 0.0032050651498138905 2023-01-22 17:35:48.780019: step: 116/531, loss: 0.00022550602443516254 2023-01-22 17:35:49.840937: step: 120/531, loss: 0.007348128594458103 2023-01-22 17:35:50.911868: step: 124/531, loss: 0.005411579739302397 2023-01-22 17:35:51.963829: step: 128/531, loss: 9.814813529374078e-05 2023-01-22 17:35:53.013932: step: 132/531, loss: 0.005580283235758543 2023-01-22 17:35:54.075672: step: 136/531, loss: 0.004964708350598812 2023-01-22 17:35:55.116969: step: 140/531, loss: 0.0017173548694700003 2023-01-22 17:35:56.187579: step: 144/531, loss: 0.0005969404010102153 2023-01-22 17:35:57.241283: step: 148/531, loss: 0.000544672948308289 2023-01-22 17:35:58.291183: step: 152/531, loss: 6.936961290193722e-05 2023-01-22 17:35:59.349997: step: 156/531, loss: 0.0046898918226361275 2023-01-22 17:36:00.408739: step: 160/531, loss: 0.0001894142769742757 2023-01-22 17:36:01.455559: step: 164/531, loss: 0.0027475047390908003 2023-01-22 17:36:02.516826: step: 168/531, loss: 0.000921240309253335 2023-01-22 17:36:03.568646: step: 172/531, loss: 0.00041818353929556906 2023-01-22 17:36:04.641456: step: 176/531, loss: 0.003889592830091715 2023-01-22 17:36:05.688951: step: 180/531, loss: 0.0002383182873018086 2023-01-22 17:36:06.754739: step: 184/531, loss: 0.0033205451909452677 2023-01-22 17:36:07.821838: step: 188/531, loss: 0.002614720957353711 2023-01-22 17:36:08.888384: step: 192/531, loss: 0.0038635912351310253 2023-01-22 17:36:09.952394: step: 196/531, loss: 0.006494181230664253 2023-01-22 17:36:11.023813: step: 200/531, loss: 0.00048694873112253845 2023-01-22 17:36:12.099887: step: 204/531, loss: 0.010614367201924324 2023-01-22 17:36:13.156233: step: 208/531, loss: 0.010754323564469814 2023-01-22 17:36:14.213822: step: 212/531, loss: 0.0004258018161635846 2023-01-22 17:36:15.261812: step: 216/531, loss: 0.0008007609867490828 2023-01-22 17:36:16.318659: step: 220/531, loss: 0.0032074851915240288 2023-01-22 17:36:17.370337: step: 224/531, loss: 0.0036868557799607515 2023-01-22 17:36:18.430658: step: 228/531, loss: 0.0023249229416251183 2023-01-22 17:36:19.479515: step: 232/531, loss: 0.00012762272672262043 2023-01-22 17:36:20.537519: step: 236/531, loss: 0.00022608712606597692 2023-01-22 17:36:21.606207: step: 240/531, loss: 0.00021699524950236082 2023-01-22 17:36:22.676049: step: 244/531, loss: 9.294210758525878e-05 2023-01-22 17:36:23.724914: step: 248/531, loss: 0.004078640136867762 2023-01-22 17:36:24.781974: step: 252/531, loss: 0.0005958827678114176 2023-01-22 17:36:25.847117: step: 256/531, loss: 0.0031321807764470577 2023-01-22 17:36:26.911741: step: 260/531, loss: 0.0004113212926313281 2023-01-22 17:36:27.947491: step: 264/531, loss: 0.004436914809048176 2023-01-22 17:36:29.009006: step: 268/531, loss: 0.0007022920181043446 2023-01-22 17:36:30.065289: step: 272/531, loss: 0.0003805515298154205 2023-01-22 17:36:31.128011: step: 276/531, loss: 0.0008302227943204343 2023-01-22 17:36:32.194955: step: 280/531, loss: 0.0009930954547598958 2023-01-22 17:36:33.258505: step: 284/531, loss: 0.0004300784203223884 2023-01-22 17:36:34.328314: step: 288/531, loss: 1.254850303666899e-05 2023-01-22 17:36:35.382690: step: 292/531, loss: 0.00011757396714529023 2023-01-22 17:36:36.446279: step: 296/531, loss: 0.0046015409752726555 2023-01-22 17:36:37.499478: step: 300/531, loss: 0.000182485266122967 2023-01-22 17:36:38.564425: step: 304/531, loss: 0.002524326788261533 2023-01-22 17:36:39.612796: step: 308/531, loss: 0.0001551653112983331 2023-01-22 17:36:40.658176: step: 312/531, loss: 0.0005007855361327529 2023-01-22 17:36:41.725744: step: 316/531, loss: 0.001926211640238762 2023-01-22 17:36:42.774713: step: 320/531, loss: 0.0016906842356547713 2023-01-22 17:36:43.835412: step: 324/531, loss: 0.002724287798628211 2023-01-22 17:36:44.886321: step: 328/531, loss: 0.002014667959883809 2023-01-22 17:36:45.969487: step: 332/531, loss: 0.00012962426990270615 2023-01-22 17:36:47.023164: step: 336/531, loss: 0.001721011707559228 2023-01-22 17:36:48.087901: step: 340/531, loss: 0.002679077908396721 2023-01-22 17:36:49.139461: step: 344/531, loss: 0.002680771518498659 2023-01-22 17:36:50.200502: step: 348/531, loss: 0.0009493392426520586 2023-01-22 17:36:51.257264: step: 352/531, loss: 0.002221663948148489 2023-01-22 17:36:52.330619: step: 356/531, loss: 0.000615772558376193 2023-01-22 17:36:53.383376: step: 360/531, loss: 0.0007418880704790354 2023-01-22 17:36:54.437806: step: 364/531, loss: 1.619071190361865e-05 2023-01-22 17:36:55.482806: step: 368/531, loss: 0.000788150995504111 2023-01-22 17:36:56.541227: step: 372/531, loss: 0.0005295132868923247 2023-01-22 17:36:57.606183: step: 376/531, loss: 0.0009582198108546436 2023-01-22 17:36:58.676036: step: 380/531, loss: 0.0006560353212989867 2023-01-22 17:36:59.739533: step: 384/531, loss: 0.002854851307347417 2023-01-22 17:37:00.821532: step: 388/531, loss: 0.0036674593575298786 2023-01-22 17:37:01.893106: step: 392/531, loss: 1.1197299500054214e-05 2023-01-22 17:37:02.954330: step: 396/531, loss: 0.0025105879176408052 2023-01-22 17:37:04.020312: step: 400/531, loss: 0.0021822794806212187 2023-01-22 17:37:05.091471: step: 404/531, loss: 0.0012240527430549264 2023-01-22 17:37:06.150489: step: 408/531, loss: 0.00037952701677568257 2023-01-22 17:37:07.218785: step: 412/531, loss: 0.0005294446018524468 2023-01-22 17:37:08.275186: step: 416/531, loss: 0.00405338266864419 2023-01-22 17:37:09.340797: step: 420/531, loss: 0.00394094455987215 2023-01-22 17:37:10.430568: step: 424/531, loss: 0.01870819739997387 2023-01-22 17:37:11.485814: step: 428/531, loss: 0.004751021973788738 2023-01-22 17:37:12.573795: step: 432/531, loss: 0.0018602223135530949 2023-01-22 17:37:13.630723: step: 436/531, loss: 0.0002575826656538993 2023-01-22 17:37:14.700199: step: 440/531, loss: 0.005963757634162903 2023-01-22 17:37:15.753422: step: 444/531, loss: 0.021844692528247833 2023-01-22 17:37:16.820297: step: 448/531, loss: 0.01680346205830574 2023-01-22 17:37:17.881523: step: 452/531, loss: 0.0038715405389666557 2023-01-22 17:37:18.942520: step: 456/531, loss: 0.004481372889131308 2023-01-22 17:37:19.997937: step: 460/531, loss: 9.810111805563793e-06 2023-01-22 17:37:21.069359: step: 464/531, loss: 0.0007966441917233169 2023-01-22 17:37:22.139055: step: 468/531, loss: 0.013006845489144325 2023-01-22 17:37:23.197583: step: 472/531, loss: 0.0003879364812746644 2023-01-22 17:37:24.247180: step: 476/531, loss: 3.171792923239991e-05 2023-01-22 17:37:25.305141: step: 480/531, loss: 0.0013922168873250484 2023-01-22 17:37:26.359941: step: 484/531, loss: 0.0011973888613283634 2023-01-22 17:37:27.422588: step: 488/531, loss: 0.002654570620507002 2023-01-22 17:37:28.481150: step: 492/531, loss: 0.0008288152748718858 2023-01-22 17:37:29.546470: step: 496/531, loss: 0.0024223164655268192 2023-01-22 17:37:30.608186: step: 500/531, loss: 0.0008029621676541865 2023-01-22 17:37:31.667972: step: 504/531, loss: 0.0003220121143385768 2023-01-22 17:37:32.735285: step: 508/531, loss: 0.0008974025840871036 2023-01-22 17:37:33.814067: step: 512/531, loss: 0.014584644697606564 2023-01-22 17:37:34.874648: step: 516/531, loss: 0.0003352530184201896 2023-01-22 17:37:35.957559: step: 520/531, loss: 0.00376947782933712 2023-01-22 17:37:37.027711: step: 524/531, loss: 8.404223626712337e-05 2023-01-22 17:37:38.081658: step: 528/531, loss: 0.004354581236839294 2023-01-22 17:37:39.173530: step: 532/531, loss: 0.005631724372506142 2023-01-22 17:37:40.238394: step: 536/531, loss: 0.007826856337487698 2023-01-22 17:37:41.291844: step: 540/531, loss: 8.72770615387708e-05 2023-01-22 17:37:42.357596: step: 544/531, loss: 0.00010538606147747487 2023-01-22 17:37:43.431964: step: 548/531, loss: 0.0004370012029539794 2023-01-22 17:37:44.516066: step: 552/531, loss: 0.004729630891233683 2023-01-22 17:37:45.577941: step: 556/531, loss: 0.00027069816133007407 2023-01-22 17:37:46.635188: step: 560/531, loss: 0.003029781859368086 2023-01-22 17:37:47.707448: step: 564/531, loss: 0.009513920173048973 2023-01-22 17:37:48.789421: step: 568/531, loss: 0.003292496083304286 2023-01-22 17:37:49.846741: step: 572/531, loss: 0.0024838829413056374 2023-01-22 17:37:50.919232: step: 576/531, loss: 0.002788601443171501 2023-01-22 17:37:51.987705: step: 580/531, loss: 0.002021592576056719 2023-01-22 17:37:53.057120: step: 584/531, loss: 0.0010590646415948868 2023-01-22 17:37:54.116000: step: 588/531, loss: 0.0007489988929592073 2023-01-22 17:37:55.194736: step: 592/531, loss: 0.00048802714445628226 2023-01-22 17:37:56.264845: step: 596/531, loss: 0.005037723574787378 2023-01-22 17:37:57.348849: step: 600/531, loss: 0.00013229298929218203 2023-01-22 17:37:58.404747: step: 604/531, loss: 0.019517280161380768 2023-01-22 17:37:59.490819: step: 608/531, loss: 5.1459428505040705e-05 2023-01-22 17:38:00.550196: step: 612/531, loss: 0.004906481597572565 2023-01-22 17:38:01.626752: step: 616/531, loss: 0.002516153734177351 2023-01-22 17:38:02.682651: step: 620/531, loss: 0.0012465064646676183 2023-01-22 17:38:03.760372: step: 624/531, loss: 0.001101003959774971 2023-01-22 17:38:04.825791: step: 628/531, loss: 2.837063766492065e-05 2023-01-22 17:38:05.887422: step: 632/531, loss: 0.0006293684127740562 2023-01-22 17:38:06.963887: step: 636/531, loss: 0.012196602299809456 2023-01-22 17:38:08.020329: step: 640/531, loss: 2.4855626179487444e-05 2023-01-22 17:38:09.078364: step: 644/531, loss: 0.000763328280299902 2023-01-22 17:38:10.165969: step: 648/531, loss: 0.00030824964051134884 2023-01-22 17:38:11.225897: step: 652/531, loss: 0.0005002337857149541 2023-01-22 17:38:12.290657: step: 656/531, loss: 0.00023411422444041818 2023-01-22 17:38:13.359454: step: 660/531, loss: 0.0048622870817780495 2023-01-22 17:38:14.418844: step: 664/531, loss: 0.0003881608135998249 2023-01-22 17:38:15.492823: step: 668/531, loss: 0.00048383790999650955 2023-01-22 17:38:16.543620: step: 672/531, loss: 0.0025997376069426537 2023-01-22 17:38:17.609177: step: 676/531, loss: 5.2105431677773595e-05 2023-01-22 17:38:18.673274: step: 680/531, loss: 0.030486680567264557 2023-01-22 17:38:19.739371: step: 684/531, loss: 8.691824041306973e-05 2023-01-22 17:38:20.786692: step: 688/531, loss: 0.0 2023-01-22 17:38:21.846563: step: 692/531, loss: 0.001845174003392458 2023-01-22 17:38:22.907137: step: 696/531, loss: 0.002244679955765605 2023-01-22 17:38:23.971833: step: 700/531, loss: 0.0006919368170201778 2023-01-22 17:38:25.026890: step: 704/531, loss: 0.0034909069072455168 2023-01-22 17:38:26.086163: step: 708/531, loss: 0.001032219617627561 2023-01-22 17:38:27.148992: step: 712/531, loss: 0.00033255619928240776 2023-01-22 17:38:28.218985: step: 716/531, loss: 0.004948308225721121 2023-01-22 17:38:29.268816: step: 720/531, loss: 0.0004958026693202555 2023-01-22 17:38:30.344637: step: 724/531, loss: 0.0008576534455642104 2023-01-22 17:38:31.382852: step: 728/531, loss: 0.0032093883492052555 2023-01-22 17:38:32.451404: step: 732/531, loss: 0.008547630161046982 2023-01-22 17:38:33.508355: step: 736/531, loss: 0.007780618034303188 2023-01-22 17:38:34.551883: step: 740/531, loss: 0.000961107958573848 2023-01-22 17:38:35.637774: step: 744/531, loss: 0.004089743364602327 2023-01-22 17:38:36.702710: step: 748/531, loss: 2.2777938283979893e-05 2023-01-22 17:38:37.758985: step: 752/531, loss: 0.001483023981563747 2023-01-22 17:38:38.814366: step: 756/531, loss: 0.0013040174962952733 2023-01-22 17:38:39.887720: step: 760/531, loss: 0.0026340496260672808 2023-01-22 17:38:40.959856: step: 764/531, loss: 0.0024289642460644245 2023-01-22 17:38:42.015025: step: 768/531, loss: 0.00034977204632014036 2023-01-22 17:38:43.079782: step: 772/531, loss: 0.013295880518853664 2023-01-22 17:38:44.156476: step: 776/531, loss: 0.004345230292528868 2023-01-22 17:38:45.208450: step: 780/531, loss: 0.0022396044805645943 2023-01-22 17:38:46.281833: step: 784/531, loss: 3.582090585041442e-06 2023-01-22 17:38:47.329458: step: 788/531, loss: 0.005299786105751991 2023-01-22 17:38:48.414850: step: 792/531, loss: 0.0018520280718803406 2023-01-22 17:38:49.475861: step: 796/531, loss: 0.004250641446560621 2023-01-22 17:38:50.532526: step: 800/531, loss: 0.0013073586160317063 2023-01-22 17:38:51.605726: step: 804/531, loss: 0.0007779623265378177 2023-01-22 17:38:52.665703: step: 808/531, loss: 0.0021871502976864576 2023-01-22 17:38:53.728722: step: 812/531, loss: 3.1937543099047616e-05 2023-01-22 17:38:54.778171: step: 816/531, loss: 0.004651687573641539 2023-01-22 17:38:55.850073: step: 820/531, loss: 0.002168322214856744 2023-01-22 17:38:56.911700: step: 824/531, loss: 0.009237860329449177 2023-01-22 17:38:57.970209: step: 828/531, loss: 0.0015819964464753866 2023-01-22 17:38:59.036800: step: 832/531, loss: 0.00013506108371075243 2023-01-22 17:39:00.099947: step: 836/531, loss: 0.008326171897351742 2023-01-22 17:39:01.156995: step: 840/531, loss: 6.934761768206954e-05 2023-01-22 17:39:02.208216: step: 844/531, loss: 0.00123911676928401 2023-01-22 17:39:03.278023: step: 848/531, loss: 0.0034209522418677807 2023-01-22 17:39:04.336517: step: 852/531, loss: 0.009107130579650402 2023-01-22 17:39:05.406273: step: 856/531, loss: 0.00020019787189085037 2023-01-22 17:39:06.481096: step: 860/531, loss: 0.0019052329007536173 2023-01-22 17:39:07.537733: step: 864/531, loss: 0.0017082573613151908 2023-01-22 17:39:08.580725: step: 868/531, loss: 0.0 2023-01-22 17:39:09.656574: step: 872/531, loss: 0.004345685709267855 2023-01-22 17:39:10.722593: step: 876/531, loss: 3.483918044366874e-05 2023-01-22 17:39:11.785475: step: 880/531, loss: 0.002432444831356406 2023-01-22 17:39:12.840668: step: 884/531, loss: 0.0001277826086152345 2023-01-22 17:39:13.916199: step: 888/531, loss: 0.00638930406421423 2023-01-22 17:39:14.985850: step: 892/531, loss: 0.002860849956050515 2023-01-22 17:39:16.045112: step: 896/531, loss: 0.00019754651293624192 2023-01-22 17:39:17.112842: step: 900/531, loss: 0.0030481130816042423 2023-01-22 17:39:18.175594: step: 904/531, loss: 0.004139148164540529 2023-01-22 17:39:19.226846: step: 908/531, loss: 0.002282257191836834 2023-01-22 17:39:20.285589: step: 912/531, loss: 0.0007151679019443691 2023-01-22 17:39:21.363402: step: 916/531, loss: 0.0011508659226819873 2023-01-22 17:39:22.424683: step: 920/531, loss: 0.004929671995341778 2023-01-22 17:39:23.501854: step: 924/531, loss: 0.0008378620259463787 2023-01-22 17:39:24.549660: step: 928/531, loss: 0.0016921816859394312 2023-01-22 17:39:25.599008: step: 932/531, loss: 0.002038485137745738 2023-01-22 17:39:26.662308: step: 936/531, loss: 0.0005120376590639353 2023-01-22 17:39:27.714309: step: 940/531, loss: 0.0001441959320800379 2023-01-22 17:39:28.783519: step: 944/531, loss: 0.0030919346027076244 2023-01-22 17:39:29.849392: step: 948/531, loss: 0.00015885457105468959 2023-01-22 17:39:30.911237: step: 952/531, loss: 0.005923484917730093 2023-01-22 17:39:31.967999: step: 956/531, loss: 0.0008517189417034388 2023-01-22 17:39:33.049724: step: 960/531, loss: 0.00018739393271971494 2023-01-22 17:39:34.118184: step: 964/531, loss: 0.0008119182311929762 2023-01-22 17:39:35.170738: step: 968/531, loss: 1.7786125681595877e-05 2023-01-22 17:39:36.234309: step: 972/531, loss: 0.00018600131443236023 2023-01-22 17:39:37.278197: step: 976/531, loss: 0.0020022897515445948 2023-01-22 17:39:38.341611: step: 980/531, loss: 0.0031820288859307766 2023-01-22 17:39:39.406983: step: 984/531, loss: 0.004812317434698343 2023-01-22 17:39:40.466537: step: 988/531, loss: 3.905444827978499e-05 2023-01-22 17:39:41.522322: step: 992/531, loss: 0.00034579497878439724 2023-01-22 17:39:42.590699: step: 996/531, loss: 0.000839526648633182 2023-01-22 17:39:43.652799: step: 1000/531, loss: 0.0013043514918535948 2023-01-22 17:39:44.718144: step: 1004/531, loss: 0.00033112673554569483 2023-01-22 17:39:45.806711: step: 1008/531, loss: 0.007101436611264944 2023-01-22 17:39:46.877495: step: 1012/531, loss: 0.0014476361684501171 2023-01-22 17:39:47.935099: step: 1016/531, loss: 0.0008238592999987304 2023-01-22 17:39:49.017006: step: 1020/531, loss: 0.005778813734650612 2023-01-22 17:39:50.088838: step: 1024/531, loss: 0.00389322848059237 2023-01-22 17:39:51.149686: step: 1028/531, loss: 6.392520299414173e-05 2023-01-22 17:39:52.226637: step: 1032/531, loss: 0.0004985540872439742 2023-01-22 17:39:53.285190: step: 1036/531, loss: 0.001746293157339096 2023-01-22 17:39:54.346697: step: 1040/531, loss: 0.024079257622361183 2023-01-22 17:39:55.394861: step: 1044/531, loss: 1.2902031812700443e-05 2023-01-22 17:39:56.462969: step: 1048/531, loss: 6.419776764232665e-05 2023-01-22 17:39:57.539490: step: 1052/531, loss: 0.0012342811096459627 2023-01-22 17:39:58.624179: step: 1056/531, loss: 0.0042419228702783585 2023-01-22 17:39:59.679383: step: 1060/531, loss: 0.0031465382780879736 2023-01-22 17:40:00.739720: step: 1064/531, loss: 4.572844773065299e-05 2023-01-22 17:40:01.817577: step: 1068/531, loss: 0.006197073496878147 2023-01-22 17:40:02.863433: step: 1072/531, loss: 0.005649218335747719 2023-01-22 17:40:03.928475: step: 1076/531, loss: 0.0013822930632159114 2023-01-22 17:40:04.983828: step: 1080/531, loss: 0.001131804776377976 2023-01-22 17:40:06.055372: step: 1084/531, loss: 0.0023004761897027493 2023-01-22 17:40:07.113129: step: 1088/531, loss: 0.006593323778361082 2023-01-22 17:40:08.172377: step: 1092/531, loss: 0.0007122933166101575 2023-01-22 17:40:09.229478: step: 1096/531, loss: 0.00011971169442404062 2023-01-22 17:40:10.273250: step: 1100/531, loss: 0.01289062574505806 2023-01-22 17:40:11.321233: step: 1104/531, loss: 0.004245121031999588 2023-01-22 17:40:12.387140: step: 1108/531, loss: 0.0026767197996377945 2023-01-22 17:40:13.439908: step: 1112/531, loss: 0.002532575512304902 2023-01-22 17:40:14.513901: step: 1116/531, loss: 0.0016375520499423146 2023-01-22 17:40:15.586029: step: 1120/531, loss: 0.0037415369879454374 2023-01-22 17:40:16.643635: step: 1124/531, loss: 0.0019043168285861611 2023-01-22 17:40:17.702828: step: 1128/531, loss: 0.0016788356006145477 2023-01-22 17:40:18.765624: step: 1132/531, loss: 0.002324876841157675 2023-01-22 17:40:19.823046: step: 1136/531, loss: 0.0012641472276300192 2023-01-22 17:40:20.882434: step: 1140/531, loss: 0.0009165317169390619 2023-01-22 17:40:21.941992: step: 1144/531, loss: 0.0005250711692497134 2023-01-22 17:40:23.007164: step: 1148/531, loss: 0.0003882689052261412 2023-01-22 17:40:24.064130: step: 1152/531, loss: 3.588005347410217e-05 2023-01-22 17:40:25.130623: step: 1156/531, loss: 0.002411850495263934 2023-01-22 17:40:26.183971: step: 1160/531, loss: 0.0016653139609843493 2023-01-22 17:40:27.245434: step: 1164/531, loss: 1.6790027075330727e-06 2023-01-22 17:40:28.309366: step: 1168/531, loss: 0.010914490558207035 2023-01-22 17:40:29.400292: step: 1172/531, loss: 0.0013401506002992392 2023-01-22 17:40:30.450608: step: 1176/531, loss: 0.03709438443183899 2023-01-22 17:40:31.494601: step: 1180/531, loss: 0.0002670694375410676 2023-01-22 17:40:32.541390: step: 1184/531, loss: 0.0013875156873837113 2023-01-22 17:40:33.602923: step: 1188/531, loss: 0.0006196255562826991 2023-01-22 17:40:34.657372: step: 1192/531, loss: 2.9499486117856577e-05 2023-01-22 17:40:35.693836: step: 1196/531, loss: 0.0024369647726416588 2023-01-22 17:40:36.754649: step: 1200/531, loss: 0.0012887604534626007 2023-01-22 17:40:37.803258: step: 1204/531, loss: 0.0015059924917295575 2023-01-22 17:40:38.864188: step: 1208/531, loss: 0.004952121526002884 2023-01-22 17:40:39.911281: step: 1212/531, loss: 0.009264972060918808 2023-01-22 17:40:40.951139: step: 1216/531, loss: 6.048934665159322e-06 2023-01-22 17:40:42.018229: step: 1220/531, loss: 0.0032116060610860586 2023-01-22 17:40:43.082333: step: 1224/531, loss: 0.010375224985182285 2023-01-22 17:40:44.133482: step: 1228/531, loss: 0.0013669985346496105 2023-01-22 17:40:45.181536: step: 1232/531, loss: 0.0011358282063156366 2023-01-22 17:40:46.250660: step: 1236/531, loss: 0.018549779430031776 2023-01-22 17:40:47.318678: step: 1240/531, loss: 0.000828936230391264 2023-01-22 17:40:48.362999: step: 1244/531, loss: 0.0020182894077152014 2023-01-22 17:40:49.435595: step: 1248/531, loss: 0.0015858432743698359 2023-01-22 17:40:50.493937: step: 1252/531, loss: 0.0011689248494803905 2023-01-22 17:40:51.529512: step: 1256/531, loss: 0.0009232672746293247 2023-01-22 17:40:52.570522: step: 1260/531, loss: 0.00344842835329473 2023-01-22 17:40:53.632995: step: 1264/531, loss: 0.0015458631096407771 2023-01-22 17:40:54.696780: step: 1268/531, loss: 0.017227625474333763 2023-01-22 17:40:55.758257: step: 1272/531, loss: 0.00433390261605382 2023-01-22 17:40:56.824792: step: 1276/531, loss: 0.0013303110608831048 2023-01-22 17:40:57.879278: step: 1280/531, loss: 0.0010876747546717525 2023-01-22 17:40:58.933501: step: 1284/531, loss: 0.00026305578649044037 2023-01-22 17:40:59.987632: step: 1288/531, loss: 0.0013360129669308662 2023-01-22 17:41:01.046532: step: 1292/531, loss: 0.0021273477468639612 2023-01-22 17:41:02.126270: step: 1296/531, loss: 0.0028546222019940615 2023-01-22 17:41:03.199303: step: 1300/531, loss: 0.008761582896113396 2023-01-22 17:41:04.259250: step: 1304/531, loss: 0.003074493957683444 2023-01-22 17:41:05.311181: step: 1308/531, loss: 0.0020554193761199713 2023-01-22 17:41:06.361046: step: 1312/531, loss: 0.0032944732811301947 2023-01-22 17:41:07.412337: step: 1316/531, loss: 0.043767448514699936 2023-01-22 17:41:08.486523: step: 1320/531, loss: 0.0016127859707921743 2023-01-22 17:41:09.562846: step: 1324/531, loss: 0.0005246309447102249 2023-01-22 17:41:10.600582: step: 1328/531, loss: 0.003447155933827162 2023-01-22 17:41:11.661067: step: 1332/531, loss: 0.0014250022359192371 2023-01-22 17:41:12.721871: step: 1336/531, loss: 0.015000371262431145 2023-01-22 17:41:13.787868: step: 1340/531, loss: 0.004626358859241009 2023-01-22 17:41:14.850808: step: 1344/531, loss: 0.00035782423219643533 2023-01-22 17:41:15.926092: step: 1348/531, loss: 0.0028104258235543966 2023-01-22 17:41:16.979186: step: 1352/531, loss: 4.902098567072244e-07 2023-01-22 17:41:18.041211: step: 1356/531, loss: 0.02652476169168949 2023-01-22 17:41:19.121555: step: 1360/531, loss: 0.0018253581365570426 2023-01-22 17:41:20.160251: step: 1364/531, loss: 0.0028708456084132195 2023-01-22 17:41:21.198316: step: 1368/531, loss: 0.005636297166347504 2023-01-22 17:41:22.253813: step: 1372/531, loss: 2.0378316548885778e-05 2023-01-22 17:41:23.312513: step: 1376/531, loss: 9.27389701246284e-05 2023-01-22 17:41:24.390130: step: 1380/531, loss: 0.008336848579347134 2023-01-22 17:41:25.446585: step: 1384/531, loss: 0.00011499337415443733 2023-01-22 17:41:26.496110: step: 1388/531, loss: 0.006575698498636484 2023-01-22 17:41:27.549779: step: 1392/531, loss: 0.000831614772323519 2023-01-22 17:41:28.598122: step: 1396/531, loss: 0.007550542708486319 2023-01-22 17:41:29.660907: step: 1400/531, loss: 0.009045243263244629 2023-01-22 17:41:30.706247: step: 1404/531, loss: 0.0002583989698905498 2023-01-22 17:41:31.787913: step: 1408/531, loss: 0.001176288235001266 2023-01-22 17:41:32.839960: step: 1412/531, loss: 0.002563477959483862 2023-01-22 17:41:33.889209: step: 1416/531, loss: 0.02445443719625473 2023-01-22 17:41:34.951335: step: 1420/531, loss: 0.0010914438171312213 2023-01-22 17:41:36.008625: step: 1424/531, loss: 0.0022718708496540785 2023-01-22 17:41:37.069344: step: 1428/531, loss: 0.008199960924685001 2023-01-22 17:41:38.134766: step: 1432/531, loss: 0.002697072457522154 2023-01-22 17:41:39.178801: step: 1436/531, loss: 0.0013049154076725245 2023-01-22 17:41:40.241238: step: 1440/531, loss: 0.004307322204113007 2023-01-22 17:41:41.294863: step: 1444/531, loss: 0.004063761793076992 2023-01-22 17:41:42.357841: step: 1448/531, loss: 1.4264049241319299e-05 2023-01-22 17:41:43.423691: step: 1452/531, loss: 0.005914001259952784 2023-01-22 17:41:44.488292: step: 1456/531, loss: 0.0013912719441577792 2023-01-22 17:41:45.540730: step: 1460/531, loss: 0.004360991530120373 2023-01-22 17:41:46.600970: step: 1464/531, loss: 0.0030729311984032393 2023-01-22 17:41:47.666820: step: 1468/531, loss: 5.357481131795794e-05 2023-01-22 17:41:48.710883: step: 1472/531, loss: 0.0003682511451188475 2023-01-22 17:41:49.792572: step: 1476/531, loss: 0.004276358988136053 2023-01-22 17:41:50.859005: step: 1480/531, loss: 0.0024351440370082855 2023-01-22 17:41:51.912559: step: 1484/531, loss: 0.0012374743819236755 2023-01-22 17:41:52.993134: step: 1488/531, loss: 0.002166086109355092 2023-01-22 17:41:54.054751: step: 1492/531, loss: 0.002553847385570407 2023-01-22 17:41:55.128895: step: 1496/531, loss: 0.0008582479786127806 2023-01-22 17:41:56.188755: step: 1500/531, loss: 0.002674586372449994 2023-01-22 17:41:57.249176: step: 1504/531, loss: 0.005065123084932566 2023-01-22 17:41:58.290560: step: 1508/531, loss: 0.0002147833292838186 2023-01-22 17:41:59.354103: step: 1512/531, loss: 0.0048254686407744884 2023-01-22 17:42:00.412415: step: 1516/531, loss: 9.325707651441917e-05 2023-01-22 17:42:01.474532: step: 1520/531, loss: 0.0014961569104343653 2023-01-22 17:42:02.523112: step: 1524/531, loss: 0.0012830272316932678 2023-01-22 17:42:03.568984: step: 1528/531, loss: 0.0017140316776931286 2023-01-22 17:42:04.617159: step: 1532/531, loss: 0.0018837180687114596 2023-01-22 17:42:05.679819: step: 1536/531, loss: 0.003946154844015837 2023-01-22 17:42:06.730695: step: 1540/531, loss: 2.889134339056909e-05 2023-01-22 17:42:07.778451: step: 1544/531, loss: 0.0016391181852668524 2023-01-22 17:42:08.845870: step: 1548/531, loss: 0.003246506443247199 2023-01-22 17:42:09.899890: step: 1552/531, loss: 0.0006468216306529939 2023-01-22 17:42:10.957913: step: 1556/531, loss: 0.002711770124733448 2023-01-22 17:42:12.017168: step: 1560/531, loss: 0.00037209983565844595 2023-01-22 17:42:13.065630: step: 1564/531, loss: 0.0011038295924663544 2023-01-22 17:42:14.121605: step: 1568/531, loss: 0.0003347354067955166 2023-01-22 17:42:15.178440: step: 1572/531, loss: 0.008154347538948059 2023-01-22 17:42:16.232237: step: 1576/531, loss: 0.00012576636800076813 2023-01-22 17:42:17.295017: step: 1580/531, loss: 0.0009588545653969049 2023-01-22 17:42:18.338065: step: 1584/531, loss: 0.00022881808399688452 2023-01-22 17:42:19.382355: step: 1588/531, loss: 0.0004036377649754286 2023-01-22 17:42:20.440465: step: 1592/531, loss: 0.00029302609618753195 2023-01-22 17:42:21.473918: step: 1596/531, loss: 0.00017839540669228882 2023-01-22 17:42:22.526072: step: 1600/531, loss: 0.0002494643849786371 2023-01-22 17:42:23.585898: step: 1604/531, loss: 0.0016358079155907035 2023-01-22 17:42:24.660173: step: 1608/531, loss: 0.003934149164706469 2023-01-22 17:42:25.727194: step: 1612/531, loss: 0.00014277591253630817 2023-01-22 17:42:26.773549: step: 1616/531, loss: 0.00753205269575119 2023-01-22 17:42:27.825674: step: 1620/531, loss: 0.0015130650717765093 2023-01-22 17:42:28.877025: step: 1624/531, loss: 0.006078492850065231 2023-01-22 17:42:29.929064: step: 1628/531, loss: 0.0001622473355382681 2023-01-22 17:42:31.001041: step: 1632/531, loss: 0.008209086023271084 2023-01-22 17:42:32.072359: step: 1636/531, loss: 0.0006903487374074757 2023-01-22 17:42:33.130897: step: 1640/531, loss: 0.008148431777954102 2023-01-22 17:42:34.175509: step: 1644/531, loss: 0.0013487264513969421 2023-01-22 17:42:35.247882: step: 1648/531, loss: 0.0016750216018408537 2023-01-22 17:42:36.305085: step: 1652/531, loss: 0.0033348274882882833 2023-01-22 17:42:37.366115: step: 1656/531, loss: 0.0019874819554388523 2023-01-22 17:42:38.419017: step: 1660/531, loss: 0.0032966621220111847 2023-01-22 17:42:39.494077: step: 1664/531, loss: 0.00629988219588995 2023-01-22 17:42:40.533393: step: 1668/531, loss: 0.000295562349492684 2023-01-22 17:42:41.605104: step: 1672/531, loss: 0.0009927835781127214 2023-01-22 17:42:42.684001: step: 1676/531, loss: 0.004141892772167921 2023-01-22 17:42:43.742784: step: 1680/531, loss: 0.00021202464995440096 2023-01-22 17:42:44.808270: step: 1684/531, loss: 7.637318776687607e-05 2023-01-22 17:42:45.874941: step: 1688/531, loss: 0.008229069411754608 2023-01-22 17:42:46.940204: step: 1692/531, loss: 0.0006660353974439204 2023-01-22 17:42:47.989468: step: 1696/531, loss: 0.0034808157943189144 2023-01-22 17:42:49.041045: step: 1700/531, loss: 0.0025462347548455 2023-01-22 17:42:50.088458: step: 1704/531, loss: 0.00018691897275857627 2023-01-22 17:42:51.158121: step: 1708/531, loss: 0.0020961440168321133 2023-01-22 17:42:52.220636: step: 1712/531, loss: 0.019499309360980988 2023-01-22 17:42:53.297325: step: 1716/531, loss: 0.00430101016536355 2023-01-22 17:42:54.353812: step: 1720/531, loss: 0.016481703147292137 2023-01-22 17:42:55.402600: step: 1724/531, loss: 0.0006929804803803563 2023-01-22 17:42:56.459417: step: 1728/531, loss: 0.0023284987546503544 2023-01-22 17:42:57.499324: step: 1732/531, loss: 0.012157940305769444 2023-01-22 17:42:58.561930: step: 1736/531, loss: 0.002034611301496625 2023-01-22 17:42:59.611244: step: 1740/531, loss: 0.0021266622934490442 2023-01-22 17:43:00.669311: step: 1744/531, loss: 0.0005496070371009409 2023-01-22 17:43:01.717114: step: 1748/531, loss: 0.0015330406604334712 2023-01-22 17:43:02.769391: step: 1752/531, loss: 0.0012892597587779164 2023-01-22 17:43:03.814867: step: 1756/531, loss: 3.597663817345165e-05 2023-01-22 17:43:04.884687: step: 1760/531, loss: 0.00023635837715119123 2023-01-22 17:43:05.955241: step: 1764/531, loss: 0.0013389786472544074 2023-01-22 17:43:07.006564: step: 1768/531, loss: 0.001086128642782569 2023-01-22 17:43:08.059912: step: 1772/531, loss: 0.0038151314947754145 2023-01-22 17:43:09.133106: step: 1776/531, loss: 0.0001457268081139773 2023-01-22 17:43:10.189219: step: 1780/531, loss: 0.0012750650057569146 2023-01-22 17:43:11.250063: step: 1784/531, loss: 0.0002461817057337612 2023-01-22 17:43:12.347740: step: 1788/531, loss: 0.00013186557043809444 2023-01-22 17:43:13.418794: step: 1792/531, loss: 0.0011999428970739245 2023-01-22 17:43:14.470699: step: 1796/531, loss: 0.016539234668016434 2023-01-22 17:43:15.525579: step: 1800/531, loss: 0.0010777073912322521 2023-01-22 17:43:16.588065: step: 1804/531, loss: 0.002102690516039729 2023-01-22 17:43:17.660835: step: 1808/531, loss: 0.004626779817044735 2023-01-22 17:43:18.730734: step: 1812/531, loss: 8.001690730452538e-06 2023-01-22 17:43:19.782551: step: 1816/531, loss: 0.0020071801263839006 2023-01-22 17:43:20.839817: step: 1820/531, loss: 9.671814950706903e-06 2023-01-22 17:43:21.887852: step: 1824/531, loss: 0.002047412097454071 2023-01-22 17:43:22.947561: step: 1828/531, loss: 0.008023356087505817 2023-01-22 17:43:23.984826: step: 1832/531, loss: 0.0034277657978236675 2023-01-22 17:43:25.038717: step: 1836/531, loss: 0.0013587395660579205 2023-01-22 17:43:26.094180: step: 1840/531, loss: 0.02174326404929161 2023-01-22 17:43:27.155071: step: 1844/531, loss: 0.005105058662593365 2023-01-22 17:43:28.232976: step: 1848/531, loss: 0.007215899880975485 2023-01-22 17:43:29.281201: step: 1852/531, loss: 0.0003426890471018851 2023-01-22 17:43:30.346837: step: 1856/531, loss: 0.0002772504521999508 2023-01-22 17:43:31.394007: step: 1860/531, loss: 0.006160797085613012 2023-01-22 17:43:32.455322: step: 1864/531, loss: 0.0005491849151439965 2023-01-22 17:43:33.499564: step: 1868/531, loss: 6.927702634129673e-05 2023-01-22 17:43:34.552318: step: 1872/531, loss: 0.005631587002426386 2023-01-22 17:43:35.618946: step: 1876/531, loss: 0.0033361671958118677 2023-01-22 17:43:36.667611: step: 1880/531, loss: 0.0011379916686564684 2023-01-22 17:43:37.767467: step: 1884/531, loss: 0.0013854247517883778 2023-01-22 17:43:38.813058: step: 1888/531, loss: 0.002004228299483657 2023-01-22 17:43:39.872468: step: 1892/531, loss: 0.00758488941937685 2023-01-22 17:43:40.918674: step: 1896/531, loss: 0.1323963701725006 2023-01-22 17:43:41.989668: step: 1900/531, loss: 0.029628774151206017 2023-01-22 17:43:43.052937: step: 1904/531, loss: 0.005650073755532503 2023-01-22 17:43:44.103325: step: 1908/531, loss: 0.0007673946092836559 2023-01-22 17:43:45.152208: step: 1912/531, loss: 0.002863846020773053 2023-01-22 17:43:46.207983: step: 1916/531, loss: 0.00010875792213482782 2023-01-22 17:43:47.274024: step: 1920/531, loss: 0.002233806299045682 2023-01-22 17:43:48.324858: step: 1924/531, loss: 0.02473893202841282 2023-01-22 17:43:49.399469: step: 1928/531, loss: 0.002922331215813756 2023-01-22 17:43:50.464352: step: 1932/531, loss: 0.00017819192726165056 2023-01-22 17:43:51.508703: step: 1936/531, loss: 0.0016617338405922055 2023-01-22 17:43:52.563308: step: 1940/531, loss: 0.00122277089394629 2023-01-22 17:43:53.599377: step: 1944/531, loss: 0.0016748837660998106 2023-01-22 17:43:54.645282: step: 1948/531, loss: 0.0011753502767533064 2023-01-22 17:43:55.687248: step: 1952/531, loss: 0.00028088665567338467 2023-01-22 17:43:56.754934: step: 1956/531, loss: 0.0017002267995849252 2023-01-22 17:43:57.815519: step: 1960/531, loss: 0.0036748351994901896 2023-01-22 17:43:58.872772: step: 1964/531, loss: 0.0024912068620324135 2023-01-22 17:43:59.940371: step: 1968/531, loss: 0.0002578684943728149 2023-01-22 17:44:00.981010: step: 1972/531, loss: 0.0007715074461884797 2023-01-22 17:44:02.048952: step: 1976/531, loss: 0.00542663736268878 2023-01-22 17:44:03.107426: step: 1980/531, loss: 0.0023416990879923105 2023-01-22 17:44:04.152334: step: 1984/531, loss: 0.0020016503985971212 2023-01-22 17:44:05.224499: step: 1988/531, loss: 0.0010499984491616488 2023-01-22 17:44:06.281222: step: 1992/531, loss: 0.0030251534190028906 2023-01-22 17:44:07.335615: step: 1996/531, loss: 0.0028106558602303267 2023-01-22 17:44:08.393236: step: 2000/531, loss: 0.00534806540235877 2023-01-22 17:44:09.427274: step: 2004/531, loss: 0.002174492459744215 2023-01-22 17:44:10.489184: step: 2008/531, loss: 0.0005388790741562843 2023-01-22 17:44:11.560504: step: 2012/531, loss: 0.008345023728907108 2023-01-22 17:44:12.624847: step: 2016/531, loss: 0.00013704894809052348 2023-01-22 17:44:13.689759: step: 2020/531, loss: 0.008596867322921753 2023-01-22 17:44:14.749274: step: 2024/531, loss: 0.001306700985878706 2023-01-22 17:44:15.799687: step: 2028/531, loss: 0.005681769922375679 2023-01-22 17:44:16.868133: step: 2032/531, loss: 0.001276426948606968 2023-01-22 17:44:17.939515: step: 2036/531, loss: 0.014172968454658985 2023-01-22 17:44:18.999913: step: 2040/531, loss: 0.0029230588115751743 2023-01-22 17:44:20.071584: step: 2044/531, loss: 0.0062430864199995995 2023-01-22 17:44:21.135704: step: 2048/531, loss: 0.014193709008395672 2023-01-22 17:44:22.199796: step: 2052/531, loss: 0.001921636750921607 2023-01-22 17:44:23.247167: step: 2056/531, loss: 0.0035454973112791777 2023-01-22 17:44:24.290445: step: 2060/531, loss: 0.0003642788506112993 2023-01-22 17:44:25.338741: step: 2064/531, loss: 0.004448044579476118 2023-01-22 17:44:26.394434: step: 2068/531, loss: 0.0008169467910192907 2023-01-22 17:44:27.448769: step: 2072/531, loss: 0.010781528428196907 2023-01-22 17:44:28.503248: step: 2076/531, loss: 0.003003212623298168 2023-01-22 17:44:29.577723: step: 2080/531, loss: 3.2854015444172546e-05 2023-01-22 17:44:30.635966: step: 2084/531, loss: 0.00021568017837125808 2023-01-22 17:44:31.692329: step: 2088/531, loss: 0.0014874560292810202 2023-01-22 17:44:32.746638: step: 2092/531, loss: 0.005862790159881115 2023-01-22 17:44:33.793610: step: 2096/531, loss: 0.017239782959222794 2023-01-22 17:44:34.853228: step: 2100/531, loss: 0.005743935238569975 2023-01-22 17:44:35.896328: step: 2104/531, loss: 0.0008272667764686048 2023-01-22 17:44:36.963715: step: 2108/531, loss: 0.0052041723392903805 2023-01-22 17:44:38.016268: step: 2112/531, loss: 0.0012248418061062694 2023-01-22 17:44:39.068618: step: 2116/531, loss: 0.012106272391974926 2023-01-22 17:44:40.130053: step: 2120/531, loss: 0.0015891635557636619 2023-01-22 17:44:41.192821: step: 2124/531, loss: 0.002304482040926814 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35765048712595693, 'r': 0.32445943813131317, 'f1': 0.34024743462429663}, 'combined': 0.2507086360389554, 'stategy': 1, 'epoch': 15} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33492632906351627, 'r': 0.27658627354974497, 'f1': 0.3029733970273034}, 'combined': 0.18971231402644234, 'stategy': 1, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32987757521255723, 'r': 0.3480302311540452, 'f1': 0.33871086208343826}, 'combined': 0.24957642469305977, 'stategy': 1, 'epoch': 15} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36098974948122464, 'r': 0.3004450304519119, 'f1': 0.32794639642831724}, 'combined': 0.20323438651895714, 'stategy': 1, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31632262996941896, 'r': 0.32712681846932323, 'f1': 0.32163401741293535}, 'combined': 0.23699348651479446, 'stategy': 1, 'epoch': 15} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36284010715887266, 'r': 0.28548666064623135, 'f1': 0.31954876980321345}, 'combined': 0.2119778968001515, 'stategy': 1, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.36607142857142855, 'r': 0.44565217391304346, 'f1': 0.4019607843137254}, 'combined': 0.2009803921568627, 'stategy': 1, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'stategy': 1, 'epoch': 15} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35765048712595693, 'r': 0.32445943813131317, 'f1': 0.34024743462429663}, 'combined': 0.2507086360389554, 'stategy': 1, 'epoch': 15} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33492632906351627, 'r': 0.27658627354974497, 'f1': 0.3029733970273034}, 'combined': 0.18971231402644234, 'stategy': 1, 'epoch': 15} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 15} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33436839323467227, 'r': 0.3546715973779541, 'f1': 0.34422086891009546}, 'combined': 0.2536364297232282, 'stategy': 1, 'epoch': 5} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36411638395148305, 'r': 0.30337559853126267, 'f1': 0.3309823303208759}, 'combined': 0.2051158103396977, 'stategy': 1, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37962962962962965, 'r': 0.44565217391304346, 'f1': 0.41000000000000003}, 'combined': 0.20500000000000002, 'stategy': 1, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.316460244648318, 'r': 0.32726913345983555, 'f1': 0.32177394278606963}, 'combined': 0.23709658942131445, 'stategy': 1, 'epoch': 5} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3637270714036404, 'r': 0.28684167419636686, 'f1': 0.32074114478321014}, 'combined': 0.21276887822252555, 'stategy': 1, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.27586206896551724, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 5} ****************************** Epoch: 16 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 17:47:14.228096: step: 4/531, loss: 0.0005486905574798584 2023-01-22 17:47:15.271465: step: 8/531, loss: 0.005638486705720425 2023-01-22 17:47:16.323206: step: 12/531, loss: 0.0019356182310730219 2023-01-22 17:47:17.378746: step: 16/531, loss: 0.003297555260360241 2023-01-22 17:47:18.429769: step: 20/531, loss: 0.0020506056025624275 2023-01-22 17:47:19.489427: step: 24/531, loss: 0.005351112224161625 2023-01-22 17:47:20.546104: step: 28/531, loss: 0.011799738742411137 2023-01-22 17:47:21.608785: step: 32/531, loss: 1.9336966943228617e-05 2023-01-22 17:47:22.676617: step: 36/531, loss: 0.007591160945594311 2023-01-22 17:47:23.722097: step: 40/531, loss: 0.003377538872882724 2023-01-22 17:47:24.788250: step: 44/531, loss: 0.0005813426105305552 2023-01-22 17:47:25.848143: step: 48/531, loss: 0.004989310633391142 2023-01-22 17:47:26.886870: step: 52/531, loss: 0.0014595228713005781 2023-01-22 17:47:27.951640: step: 56/531, loss: 0.00645263958722353 2023-01-22 17:47:29.007543: step: 60/531, loss: 0.004616355057805777 2023-01-22 17:47:30.058469: step: 64/531, loss: 0.0040242960676550865 2023-01-22 17:47:31.102052: step: 68/531, loss: 0.011794381774961948 2023-01-22 17:47:32.141295: step: 72/531, loss: 8.037840598262846e-05 2023-01-22 17:47:33.193619: step: 76/531, loss: 0.002860124222934246 2023-01-22 17:47:34.235423: step: 80/531, loss: 0.007969512604176998 2023-01-22 17:47:35.288295: step: 84/531, loss: 4.2295665480196476e-05 2023-01-22 17:47:36.352938: step: 88/531, loss: 0.0042200167663395405 2023-01-22 17:47:37.413661: step: 92/531, loss: 0.004397066310048103 2023-01-22 17:47:38.491411: step: 96/531, loss: 0.001455863704904914 2023-01-22 17:47:39.548299: step: 100/531, loss: 0.0003081799077335745 2023-01-22 17:47:40.646310: step: 104/531, loss: 0.0035187399480491877 2023-01-22 17:47:41.710457: step: 108/531, loss: 2.927088871729211e-06 2023-01-22 17:47:42.752860: step: 112/531, loss: 2.8914173526572995e-05 2023-01-22 17:47:43.799681: step: 116/531, loss: 0.008407571353018284 2023-01-22 17:47:44.878955: step: 120/531, loss: 0.004076279234141111 2023-01-22 17:47:45.939229: step: 124/531, loss: 0.0014197694836184382 2023-01-22 17:47:47.007572: step: 128/531, loss: 0.0036911575589329004 2023-01-22 17:47:48.056994: step: 132/531, loss: 0.0006807747413404286 2023-01-22 17:47:49.092303: step: 136/531, loss: 6.875905091874301e-05 2023-01-22 17:47:50.146223: step: 140/531, loss: 0.004404854960739613 2023-01-22 17:47:51.190381: step: 144/531, loss: 0.002807085169479251 2023-01-22 17:47:52.249124: step: 148/531, loss: 0.0030035311356186867 2023-01-22 17:47:53.316817: step: 152/531, loss: 0.0018101237947121263 2023-01-22 17:47:54.364626: step: 156/531, loss: 0.0006155849550850689 2023-01-22 17:47:55.406943: step: 160/531, loss: 0.0007294066599570215 2023-01-22 17:47:56.464807: step: 164/531, loss: 0.0005816432530991733 2023-01-22 17:47:57.519346: step: 168/531, loss: 0.0020621127914637327 2023-01-22 17:47:58.590761: step: 172/531, loss: 0.00011609279317781329 2023-01-22 17:47:59.663795: step: 176/531, loss: 0.0008085482404567301 2023-01-22 17:48:00.717826: step: 180/531, loss: 2.0397019397933036e-05 2023-01-22 17:48:01.781752: step: 184/531, loss: 0.007287472486495972 2023-01-22 17:48:02.823903: step: 188/531, loss: 0.0009562839986756444 2023-01-22 17:48:03.898476: step: 192/531, loss: 0.007725608069449663 2023-01-22 17:48:04.959913: step: 196/531, loss: 0.0031432481482625008 2023-01-22 17:48:06.011912: step: 200/531, loss: 0.004705763887614012 2023-01-22 17:48:07.084261: step: 204/531, loss: 0.0012705445988103747 2023-01-22 17:48:08.143943: step: 208/531, loss: 0.006196799222379923 2023-01-22 17:48:09.210916: step: 212/531, loss: 0.0006535562570206821 2023-01-22 17:48:10.287274: step: 216/531, loss: 0.002844964386895299 2023-01-22 17:48:11.353648: step: 220/531, loss: 0.00399673031643033 2023-01-22 17:48:12.444152: step: 224/531, loss: 0.0028555591125041246 2023-01-22 17:48:13.502684: step: 228/531, loss: 0.0039131478406488895 2023-01-22 17:48:14.569373: step: 232/531, loss: 0.007284544408321381 2023-01-22 17:48:15.617198: step: 236/531, loss: 0.00017736232257448137 2023-01-22 17:48:16.687220: step: 240/531, loss: 0.004970039241015911 2023-01-22 17:48:17.740632: step: 244/531, loss: 1.9991690351162106e-05 2023-01-22 17:48:18.803885: step: 248/531, loss: 0.0011893160408362746 2023-01-22 17:48:19.867060: step: 252/531, loss: 0.005878967698663473 2023-01-22 17:48:20.922226: step: 256/531, loss: 4.5313376176636666e-05 2023-01-22 17:48:21.967174: step: 260/531, loss: 0.002199020469561219 2023-01-22 17:48:23.029370: step: 264/531, loss: 0.00015328492736443877 2023-01-22 17:48:24.083014: step: 268/531, loss: 0.0014381208457052708 2023-01-22 17:48:25.125663: step: 272/531, loss: 0.0003298294323030859 2023-01-22 17:48:26.184750: step: 276/531, loss: 0.0010655870428308845 2023-01-22 17:48:27.244934: step: 280/531, loss: 0.00034697697265073657 2023-01-22 17:48:28.309732: step: 284/531, loss: 0.006925155874341726 2023-01-22 17:48:29.367247: step: 288/531, loss: 2.0201870938763022e-05 2023-01-22 17:48:30.410037: step: 292/531, loss: 0.0007099260110408068 2023-01-22 17:48:31.476385: step: 296/531, loss: 0.011019090190529823 2023-01-22 17:48:32.533358: step: 300/531, loss: 4.568853000819217e-06 2023-01-22 17:48:33.594872: step: 304/531, loss: 0.0041047679260373116 2023-01-22 17:48:34.675326: step: 308/531, loss: 0.0038848845288157463 2023-01-22 17:48:35.745249: step: 312/531, loss: 0.003538146847859025 2023-01-22 17:48:36.813534: step: 316/531, loss: 6.563659553648904e-05 2023-01-22 17:48:37.892887: step: 320/531, loss: 0.0021672204602509737 2023-01-22 17:48:38.948618: step: 324/531, loss: 0.0007981749949976802 2023-01-22 17:48:39.994313: step: 328/531, loss: 1.4987713257141877e-05 2023-01-22 17:48:41.068241: step: 332/531, loss: 0.006025921553373337 2023-01-22 17:48:42.132574: step: 336/531, loss: 0.003645022166892886 2023-01-22 17:48:43.204375: step: 340/531, loss: 0.00034223950933665037 2023-01-22 17:48:44.258718: step: 344/531, loss: 0.007994323968887329 2023-01-22 17:48:45.312607: step: 348/531, loss: 0.002756382804363966 2023-01-22 17:48:46.360995: step: 352/531, loss: 0.00021866396127734333 2023-01-22 17:48:47.422930: step: 356/531, loss: 0.0001236679236171767 2023-01-22 17:48:48.478427: step: 360/531, loss: 0.0001943993556778878 2023-01-22 17:48:49.561797: step: 364/531, loss: 0.00147626840043813 2023-01-22 17:48:50.622364: step: 368/531, loss: 0.018230203539133072 2023-01-22 17:48:51.675032: step: 372/531, loss: 0.00027743951068259776 2023-01-22 17:48:52.726590: step: 376/531, loss: 0.005304703023284674 2023-01-22 17:48:53.796044: step: 380/531, loss: 0.001575262169353664 2023-01-22 17:48:54.860338: step: 384/531, loss: 0.011680803261697292 2023-01-22 17:48:55.912523: step: 388/531, loss: 1.245699604623951e-05 2023-01-22 17:48:56.972050: step: 392/531, loss: 0.00036558666033670306 2023-01-22 17:48:58.038477: step: 396/531, loss: 0.007009987719357014 2023-01-22 17:48:59.097608: step: 400/531, loss: 7.078037356222922e-08 2023-01-22 17:49:00.150689: step: 404/531, loss: 0.002747965743765235 2023-01-22 17:49:01.211630: step: 408/531, loss: 0.00023298650921788067 2023-01-22 17:49:02.263766: step: 412/531, loss: 0.0026268947403877974 2023-01-22 17:49:03.320636: step: 416/531, loss: 0.006747996900230646 2023-01-22 17:49:04.371492: step: 420/531, loss: 0.0002006733266171068 2023-01-22 17:49:05.451330: step: 424/531, loss: 0.00010924031084869057 2023-01-22 17:49:06.525308: step: 428/531, loss: 0.005553253460675478 2023-01-22 17:49:07.595803: step: 432/531, loss: 0.03208472952246666 2023-01-22 17:49:08.654078: step: 436/531, loss: 0.004296464845538139 2023-01-22 17:49:09.712922: step: 440/531, loss: 0.0015038911951705813 2023-01-22 17:49:10.776250: step: 444/531, loss: 0.002741214819252491 2023-01-22 17:49:11.868031: step: 448/531, loss: 0.002793479012325406 2023-01-22 17:49:12.927433: step: 452/531, loss: 3.7675938074244186e-05 2023-01-22 17:49:14.003767: step: 456/531, loss: 0.0013523348607122898 2023-01-22 17:49:15.061666: step: 460/531, loss: 0.0007990815793164074 2023-01-22 17:49:16.129945: step: 464/531, loss: 0.002101711928844452 2023-01-22 17:49:17.182323: step: 468/531, loss: 0.0011780555360019207 2023-01-22 17:49:18.245874: step: 472/531, loss: 0.001475114026106894 2023-01-22 17:49:19.310799: step: 476/531, loss: 0.0001634666114114225 2023-01-22 17:49:20.355858: step: 480/531, loss: 0.002623925218358636 2023-01-22 17:49:21.426865: step: 484/531, loss: 0.0044868397526443005 2023-01-22 17:49:22.475403: step: 488/531, loss: 0.0016982014058157802 2023-01-22 17:49:23.523924: step: 492/531, loss: 0.0001340064191026613 2023-01-22 17:49:24.586021: step: 496/531, loss: 0.0022903243079781532 2023-01-22 17:49:25.642610: step: 500/531, loss: 0.001770670060068369 2023-01-22 17:49:26.710442: step: 504/531, loss: 0.003249990288168192 2023-01-22 17:49:27.760862: step: 508/531, loss: 0.0 2023-01-22 17:49:28.815390: step: 512/531, loss: 0.0007118220091797411 2023-01-22 17:49:29.887873: step: 516/531, loss: 0.0058363983407616615 2023-01-22 17:49:30.954805: step: 520/531, loss: 0.0025534082669764757 2023-01-22 17:49:31.995777: step: 524/531, loss: 0.003192349337041378 2023-01-22 17:49:33.061812: step: 528/531, loss: 7.978378562256694e-05 2023-01-22 17:49:34.135350: step: 532/531, loss: 0.0013387470971792936 2023-01-22 17:49:35.186444: step: 536/531, loss: 0.0005626198835670948 2023-01-22 17:49:36.270026: step: 540/531, loss: 0.0035505532287061214 2023-01-22 17:49:37.323386: step: 544/531, loss: 0.0013098145136609674 2023-01-22 17:49:38.381963: step: 548/531, loss: 0.007388577796518803 2023-01-22 17:49:39.439851: step: 552/531, loss: 0.00047912332229316235 2023-01-22 17:49:40.500162: step: 556/531, loss: 6.0840258811367676e-05 2023-01-22 17:49:41.564866: step: 560/531, loss: 0.001054595224559307 2023-01-22 17:49:42.624151: step: 564/531, loss: 0.0003234370960853994 2023-01-22 17:49:43.670335: step: 568/531, loss: 0.0013571546878665686 2023-01-22 17:49:44.726648: step: 572/531, loss: 2.2032678316463716e-05 2023-01-22 17:49:45.775497: step: 576/531, loss: 2.442975073790876e-06 2023-01-22 17:49:46.833735: step: 580/531, loss: 0.00010165247658733279 2023-01-22 17:49:47.898042: step: 584/531, loss: 1.2875897482444998e-05 2023-01-22 17:49:48.962879: step: 588/531, loss: 0.002972032641991973 2023-01-22 17:49:50.027417: step: 592/531, loss: 0.0001249411579919979 2023-01-22 17:49:51.088538: step: 596/531, loss: 0.00010246341116726398 2023-01-22 17:49:52.129400: step: 600/531, loss: 0.00042192815453745425 2023-01-22 17:49:53.189599: step: 604/531, loss: 0.00083645258564502 2023-01-22 17:49:54.247517: step: 608/531, loss: 0.0016103017842397094 2023-01-22 17:49:55.314345: step: 612/531, loss: 0.002996583469212055 2023-01-22 17:49:56.378539: step: 616/531, loss: 0.008293939754366875 2023-01-22 17:49:57.443508: step: 620/531, loss: 0.002016602549701929 2023-01-22 17:49:58.489832: step: 624/531, loss: 0.0004442233475856483 2023-01-22 17:49:59.571097: step: 628/531, loss: 0.000340130616677925 2023-01-22 17:50:00.653623: step: 632/531, loss: 0.0031298520043492317 2023-01-22 17:50:01.716119: step: 636/531, loss: 0.005849834531545639 2023-01-22 17:50:02.797390: step: 640/531, loss: 0.0013425523648038507 2023-01-22 17:50:03.856195: step: 644/531, loss: 0.0077392528764903545 2023-01-22 17:50:04.917753: step: 648/531, loss: 0.003035782603546977 2023-01-22 17:50:05.986959: step: 652/531, loss: 0.0012241639196872711 2023-01-22 17:50:07.036440: step: 656/531, loss: 0.004161634482443333 2023-01-22 17:50:08.124847: step: 660/531, loss: 0.011733267456293106 2023-01-22 17:50:09.196990: step: 664/531, loss: 0.008866648189723492 2023-01-22 17:50:10.254056: step: 668/531, loss: 5.774093096988508e-07 2023-01-22 17:50:11.325025: step: 672/531, loss: 0.0003667854762170464 2023-01-22 17:50:12.418576: step: 676/531, loss: 0.018139883875846863 2023-01-22 17:50:13.483086: step: 680/531, loss: 0.0014334052102640271 2023-01-22 17:50:14.546795: step: 684/531, loss: 0.0009828147012740374 2023-01-22 17:50:15.610067: step: 688/531, loss: 0.0482136495411396 2023-01-22 17:50:16.689305: step: 692/531, loss: 0.0044499170035123825 2023-01-22 17:50:17.761976: step: 696/531, loss: 0.006069484166800976 2023-01-22 17:50:18.826273: step: 700/531, loss: 0.00397529499605298 2023-01-22 17:50:19.880383: step: 704/531, loss: 0.0021991494577378035 2023-01-22 17:50:20.941470: step: 708/531, loss: 0.007443928625434637 2023-01-22 17:50:22.010886: step: 712/531, loss: 0.024295277893543243 2023-01-22 17:50:23.078347: step: 716/531, loss: 0.002493762644007802 2023-01-22 17:50:24.152634: step: 720/531, loss: 0.0006320299580693245 2023-01-22 17:50:25.209804: step: 724/531, loss: 0.00021273297898005694 2023-01-22 17:50:26.273859: step: 728/531, loss: 0.0007978095673024654 2023-01-22 17:50:27.343153: step: 732/531, loss: 0.000337799807311967 2023-01-22 17:50:28.412672: step: 736/531, loss: 0.0007277352269738913 2023-01-22 17:50:29.481049: step: 740/531, loss: 0.007081305608153343 2023-01-22 17:50:30.538909: step: 744/531, loss: 8.452231122646481e-05 2023-01-22 17:50:31.603002: step: 748/531, loss: 0.004326251335442066 2023-01-22 17:50:32.666632: step: 752/531, loss: 0.0015107349026948214 2023-01-22 17:50:33.735445: step: 756/531, loss: 0.0003000946016982198 2023-01-22 17:50:34.804178: step: 760/531, loss: 0.02435571886599064 2023-01-22 17:50:35.847935: step: 764/531, loss: 0.000554512080270797 2023-01-22 17:50:36.919230: step: 768/531, loss: 0.0031434923876076937 2023-01-22 17:50:37.985805: step: 772/531, loss: 0.03658808395266533 2023-01-22 17:50:39.044419: step: 776/531, loss: 0.009474306367337704 2023-01-22 17:50:40.119174: step: 780/531, loss: 0.0006337225786410272 2023-01-22 17:50:41.174077: step: 784/531, loss: 0.0006190314306877553 2023-01-22 17:50:42.229900: step: 788/531, loss: 0.00016941303329076618 2023-01-22 17:50:43.290181: step: 792/531, loss: 0.0034792874939739704 2023-01-22 17:50:44.370805: step: 796/531, loss: 0.0026638987474143505 2023-01-22 17:50:45.422072: step: 800/531, loss: 0.004257877357304096 2023-01-22 17:50:46.498959: step: 804/531, loss: 0.0016678273677825928 2023-01-22 17:50:47.557640: step: 808/531, loss: 0.0013899136101827025 2023-01-22 17:50:48.624413: step: 812/531, loss: 0.00044038414489477873 2023-01-22 17:50:49.695279: step: 816/531, loss: 0.004343985579907894 2023-01-22 17:50:50.757438: step: 820/531, loss: 0.004557658452540636 2023-01-22 17:50:51.822146: step: 824/531, loss: 0.000536191975697875 2023-01-22 17:50:52.896061: step: 828/531, loss: 0.001975310267880559 2023-01-22 17:50:53.943599: step: 832/531, loss: 0.002083404455333948 2023-01-22 17:50:55.011556: step: 836/531, loss: 3.190386632923037e-05 2023-01-22 17:50:56.058472: step: 840/531, loss: 1.8812079360941425e-05 2023-01-22 17:50:57.102612: step: 844/531, loss: 0.00013241350825410336 2023-01-22 17:50:58.173910: step: 848/531, loss: 0.000764001626521349 2023-01-22 17:50:59.221070: step: 852/531, loss: 0.005922616459429264 2023-01-22 17:51:00.273580: step: 856/531, loss: 0.00026294938288629055 2023-01-22 17:51:01.325177: step: 860/531, loss: 6.724869308527559e-05 2023-01-22 17:51:02.375332: step: 864/531, loss: 0.004805354867130518 2023-01-22 17:51:03.437548: step: 868/531, loss: 0.00019580681691877544 2023-01-22 17:51:04.515520: step: 872/531, loss: 0.0022269911132752895 2023-01-22 17:51:05.564105: step: 876/531, loss: 0.005511918570846319 2023-01-22 17:51:06.638246: step: 880/531, loss: 0.0003908055368810892 2023-01-22 17:51:07.709212: step: 884/531, loss: 0.0038577395025640726 2023-01-22 17:51:08.755814: step: 888/531, loss: 0.00012229409185238183 2023-01-22 17:51:09.809411: step: 892/531, loss: 0.022004032507538795 2023-01-22 17:51:10.853227: step: 896/531, loss: 0.0011355261085554957 2023-01-22 17:51:11.935506: step: 900/531, loss: 0.00016177931684069335 2023-01-22 17:51:12.989687: step: 904/531, loss: 9.462114576308522e-07 2023-01-22 17:51:14.042399: step: 908/531, loss: 0.0003072930558118969 2023-01-22 17:51:15.096194: step: 912/531, loss: 0.0010208121966570616 2023-01-22 17:51:16.150735: step: 916/531, loss: 0.005024034529924393 2023-01-22 17:51:17.213785: step: 920/531, loss: 2.779700707833399e-06 2023-01-22 17:51:18.263376: step: 924/531, loss: 0.0005459256353788078 2023-01-22 17:51:19.325951: step: 928/531, loss: 2.352625051571522e-05 2023-01-22 17:51:20.386512: step: 932/531, loss: 0.005092419683933258 2023-01-22 17:51:21.452771: step: 936/531, loss: 0.004188242368400097 2023-01-22 17:51:22.529047: step: 940/531, loss: 0.0005123792798258364 2023-01-22 17:51:23.597120: step: 944/531, loss: 0.002709169639274478 2023-01-22 17:51:24.654661: step: 948/531, loss: 0.004825504496693611 2023-01-22 17:51:25.706450: step: 952/531, loss: 0.003973186481744051 2023-01-22 17:51:26.771518: step: 956/531, loss: 0.0006743451813235879 2023-01-22 17:51:27.816887: step: 960/531, loss: 0.004166416823863983 2023-01-22 17:51:28.871288: step: 964/531, loss: 0.00047372953849844635 2023-01-22 17:51:29.949504: step: 968/531, loss: 0.004287892021238804 2023-01-22 17:51:31.035316: step: 972/531, loss: 0.007905044592916965 2023-01-22 17:51:32.087199: step: 976/531, loss: 0.004012254998087883 2023-01-22 17:51:33.139357: step: 980/531, loss: 0.0025107422843575478 2023-01-22 17:51:34.202001: step: 984/531, loss: 0.0005221758619882166 2023-01-22 17:51:35.266951: step: 988/531, loss: 0.0023451775778084993 2023-01-22 17:51:36.319288: step: 992/531, loss: 0.0024254019372165203 2023-01-22 17:51:37.373478: step: 996/531, loss: 0.0005585875478573143 2023-01-22 17:51:38.442979: step: 1000/531, loss: 0.007971425540745258 2023-01-22 17:51:39.510158: step: 1004/531, loss: 0.004571865312755108 2023-01-22 17:51:40.584166: step: 1008/531, loss: 0.00048525878810323775 2023-01-22 17:51:41.633491: step: 1012/531, loss: 0.0015203282237052917 2023-01-22 17:51:42.687191: step: 1016/531, loss: 0.010365449823439121 2023-01-22 17:51:43.746379: step: 1020/531, loss: 0.0049848249182105064 2023-01-22 17:51:44.811330: step: 1024/531, loss: 0.0037505249492824078 2023-01-22 17:51:45.868392: step: 1028/531, loss: 0.005364949814975262 2023-01-22 17:51:46.946643: step: 1032/531, loss: 0.004655045457184315 2023-01-22 17:51:47.993796: step: 1036/531, loss: 0.0004250466590747237 2023-01-22 17:51:49.057972: step: 1040/531, loss: 0.004402277525514364 2023-01-22 17:51:50.120010: step: 1044/531, loss: 0.008860880509018898 2023-01-22 17:51:51.179318: step: 1048/531, loss: 0.0011058483505621552 2023-01-22 17:51:52.251161: step: 1052/531, loss: 0.0010410579852759838 2023-01-22 17:51:53.296300: step: 1056/531, loss: 1.4615292457165197e-05 2023-01-22 17:51:54.351454: step: 1060/531, loss: 0.0011917271185666323 2023-01-22 17:51:55.411626: step: 1064/531, loss: 0.0007558545912615955 2023-01-22 17:51:56.467390: step: 1068/531, loss: 0.005323739722371101 2023-01-22 17:51:57.526424: step: 1072/531, loss: 0.00017955050861928612 2023-01-22 17:51:58.587061: step: 1076/531, loss: 0.0021870082709938288 2023-01-22 17:51:59.641575: step: 1080/531, loss: 4.3273194023640826e-05 2023-01-22 17:52:00.707018: step: 1084/531, loss: 0.0040289005264639854 2023-01-22 17:52:01.797589: step: 1088/531, loss: 0.00014256784925237298 2023-01-22 17:52:02.853689: step: 1092/531, loss: 0.00023438378411810845 2023-01-22 17:52:03.911104: step: 1096/531, loss: 1.7452583051635884e-05 2023-01-22 17:52:04.995967: step: 1100/531, loss: 0.002010050928220153 2023-01-22 17:52:06.069047: step: 1104/531, loss: 0.0023867525160312653 2023-01-22 17:52:07.121059: step: 1108/531, loss: 0.007288050372153521 2023-01-22 17:52:08.182435: step: 1112/531, loss: 0.0014383370289579034 2023-01-22 17:52:09.265402: step: 1116/531, loss: 0.003016107017174363 2023-01-22 17:52:10.327294: step: 1120/531, loss: 0.003575441427528858 2023-01-22 17:52:11.388287: step: 1124/531, loss: 0.00020924244017805904 2023-01-22 17:52:12.464923: step: 1128/531, loss: 0.0010998067446053028 2023-01-22 17:52:13.540663: step: 1132/531, loss: 0.0027836367953568697 2023-01-22 17:52:14.618053: step: 1136/531, loss: 0.000872039410751313 2023-01-22 17:52:15.669603: step: 1140/531, loss: 6.790413317503408e-05 2023-01-22 17:52:16.754323: step: 1144/531, loss: 0.0010864792857319117 2023-01-22 17:52:17.817309: step: 1148/531, loss: 2.113144319082494e-06 2023-01-22 17:52:18.884143: step: 1152/531, loss: 0.004047530237585306 2023-01-22 17:52:19.951030: step: 1156/531, loss: 0.04118496924638748 2023-01-22 17:52:21.008224: step: 1160/531, loss: 0.019365420565009117 2023-01-22 17:52:22.073730: step: 1164/531, loss: 0.0003123456845059991 2023-01-22 17:52:23.148140: step: 1168/531, loss: 0.0051879254169762135 2023-01-22 17:52:24.214217: step: 1172/531, loss: 0.0005348093691281974 2023-01-22 17:52:25.277673: step: 1176/531, loss: 0.0034417458809912205 2023-01-22 17:52:26.323796: step: 1180/531, loss: 0.0005080850096419454 2023-01-22 17:52:27.372540: step: 1184/531, loss: 7.316900882869959e-05 2023-01-22 17:52:28.434866: step: 1188/531, loss: 0.003836761461570859 2023-01-22 17:52:29.493426: step: 1192/531, loss: 0.016399923712015152 2023-01-22 17:52:30.557713: step: 1196/531, loss: 0.00011520372208906338 2023-01-22 17:52:31.628818: step: 1200/531, loss: 0.016874657943844795 2023-01-22 17:52:32.680853: step: 1204/531, loss: 0.006478848867118359 2023-01-22 17:52:33.760748: step: 1208/531, loss: 0.006651771254837513 2023-01-22 17:52:34.846319: step: 1212/531, loss: 0.005560880061239004 2023-01-22 17:52:35.914535: step: 1216/531, loss: 0.011105512268841267 2023-01-22 17:52:36.975626: step: 1220/531, loss: 0.004349041264504194 2023-01-22 17:52:38.044190: step: 1224/531, loss: 0.007353505585342646 2023-01-22 17:52:39.109954: step: 1228/531, loss: 0.0016315426910296082 2023-01-22 17:52:40.172909: step: 1232/531, loss: 0.0010192908812314272 2023-01-22 17:52:41.231024: step: 1236/531, loss: 0.00018461354193277657 2023-01-22 17:52:42.289964: step: 1240/531, loss: 0.0029581880662590265 2023-01-22 17:52:43.354716: step: 1244/531, loss: 0.005374578293412924 2023-01-22 17:52:44.426610: step: 1248/531, loss: 0.005349393002688885 2023-01-22 17:52:45.482054: step: 1252/531, loss: 0.0012194185983389616 2023-01-22 17:52:46.529590: step: 1256/531, loss: 0.0021546054631471634 2023-01-22 17:52:47.582674: step: 1260/531, loss: 0.0004597770457621664 2023-01-22 17:52:48.663899: step: 1264/531, loss: 0.00028122885851189494 2023-01-22 17:52:49.726434: step: 1268/531, loss: 0.0009860617574304342 2023-01-22 17:52:50.789522: step: 1272/531, loss: 0.00726823415607214 2023-01-22 17:52:51.840860: step: 1276/531, loss: 0.00618370296433568 2023-01-22 17:52:52.910381: step: 1280/531, loss: 0.0008198546129278839 2023-01-22 17:52:53.975944: step: 1284/531, loss: 0.0016563403187319636 2023-01-22 17:52:55.022049: step: 1288/531, loss: 0.003673788858577609 2023-01-22 17:52:56.080004: step: 1292/531, loss: 0.00038287806091830134 2023-01-22 17:52:57.122538: step: 1296/531, loss: 0.0017629192443564534 2023-01-22 17:52:58.177419: step: 1300/531, loss: 0.0026671725790947676 2023-01-22 17:52:59.230096: step: 1304/531, loss: 0.0030565953347831964 2023-01-22 17:53:00.288742: step: 1308/531, loss: 0.0050751385278999805 2023-01-22 17:53:01.338608: step: 1312/531, loss: 0.0004592680197674781 2023-01-22 17:53:02.409738: step: 1316/531, loss: 0.006077755708247423 2023-01-22 17:53:03.459070: step: 1320/531, loss: 0.002504387404769659 2023-01-22 17:53:04.515230: step: 1324/531, loss: 0.0016804387560114264 2023-01-22 17:53:05.561694: step: 1328/531, loss: 0.03159349039196968 2023-01-22 17:53:06.611348: step: 1332/531, loss: 0.002411656081676483 2023-01-22 17:53:07.658141: step: 1336/531, loss: 0.009108101017773151 2023-01-22 17:53:08.735844: step: 1340/531, loss: 0.030691813677549362 2023-01-22 17:53:09.793956: step: 1344/531, loss: 0.005158753599971533 2023-01-22 17:53:10.848783: step: 1348/531, loss: 2.4898179617593996e-05 2023-01-22 17:53:11.917218: step: 1352/531, loss: 0.013886045664548874 2023-01-22 17:53:12.983409: step: 1356/531, loss: 0.0026678945869207382 2023-01-22 17:53:14.038281: step: 1360/531, loss: 0.0014832192100584507 2023-01-22 17:53:15.093866: step: 1364/531, loss: 0.00021716530318371952 2023-01-22 17:53:16.149911: step: 1368/531, loss: 3.186364438079181e-06 2023-01-22 17:53:17.206921: step: 1372/531, loss: 0.0014044985873624682 2023-01-22 17:53:18.269095: step: 1376/531, loss: 0.0037600467912852764 2023-01-22 17:53:19.327809: step: 1380/531, loss: 0.002768822479993105 2023-01-22 17:53:20.372392: step: 1384/531, loss: 0.005670212674885988 2023-01-22 17:53:21.433424: step: 1388/531, loss: 0.023402299731969833 2023-01-22 17:53:22.482873: step: 1392/531, loss: 3.9808048313716426e-05 2023-01-22 17:53:23.527355: step: 1396/531, loss: 6.373807264026254e-05 2023-01-22 17:53:24.576232: step: 1400/531, loss: 0.004025139845907688 2023-01-22 17:53:25.641387: step: 1404/531, loss: 0.009786794893443584 2023-01-22 17:53:26.700476: step: 1408/531, loss: 0.0009095058194361627 2023-01-22 17:53:27.777427: step: 1412/531, loss: 0.011140435934066772 2023-01-22 17:53:28.845632: step: 1416/531, loss: 0.0006607322138734162 2023-01-22 17:53:29.905080: step: 1420/531, loss: 0.0004952255985699594 2023-01-22 17:53:30.959934: step: 1424/531, loss: 0.006792471744120121 2023-01-22 17:53:31.996496: step: 1428/531, loss: 0.002992016961798072 2023-01-22 17:53:33.064442: step: 1432/531, loss: 0.0036824692506343126 2023-01-22 17:53:34.104931: step: 1436/531, loss: 0.012425734661519527 2023-01-22 17:53:35.158231: step: 1440/531, loss: 0.00041032061562873423 2023-01-22 17:53:36.203248: step: 1444/531, loss: 0.011098210699856281 2023-01-22 17:53:37.261177: step: 1448/531, loss: 0.0030635774601250887 2023-01-22 17:53:38.308142: step: 1452/531, loss: 0.0018457588739693165 2023-01-22 17:53:39.370911: step: 1456/531, loss: 0.00670445803552866 2023-01-22 17:53:40.422493: step: 1460/531, loss: 0.004242204129695892 2023-01-22 17:53:41.492284: step: 1464/531, loss: 0.0001496480399509892 2023-01-22 17:53:42.564246: step: 1468/531, loss: 0.07136724889278412 2023-01-22 17:53:43.624368: step: 1472/531, loss: 2.9415205062832683e-05 2023-01-22 17:53:44.661272: step: 1476/531, loss: 0.0011069440515711904 2023-01-22 17:53:45.731305: step: 1480/531, loss: 0.0001400729815941304 2023-01-22 17:53:46.775878: step: 1484/531, loss: 0.0003343082789797336 2023-01-22 17:53:47.834536: step: 1488/531, loss: 8.178819552995265e-05 2023-01-22 17:53:48.868276: step: 1492/531, loss: 0.001840598531998694 2023-01-22 17:53:49.951789: step: 1496/531, loss: 0.01013681385666132 2023-01-22 17:53:51.005674: step: 1500/531, loss: 0.0025778785347938538 2023-01-22 17:53:52.061800: step: 1504/531, loss: 0.0002579967840574682 2023-01-22 17:53:53.126843: step: 1508/531, loss: 0.04207774996757507 2023-01-22 17:53:54.191712: step: 1512/531, loss: 2.6987532919520163e-07 2023-01-22 17:53:55.259456: step: 1516/531, loss: 2.7331512683304027e-05 2023-01-22 17:53:56.322020: step: 1520/531, loss: 0.0013419572496786714 2023-01-22 17:53:57.373969: step: 1524/531, loss: 0.0042828964069485664 2023-01-22 17:53:58.452001: step: 1528/531, loss: 0.08026225864887238 2023-01-22 17:53:59.511315: step: 1532/531, loss: 0.029859913513064384 2023-01-22 17:54:00.561236: step: 1536/531, loss: 0.0049073961563408375 2023-01-22 17:54:01.617991: step: 1540/531, loss: 0.0012522776378318667 2023-01-22 17:54:02.663560: step: 1544/531, loss: 0.006776969879865646 2023-01-22 17:54:03.735103: step: 1548/531, loss: 0.004690579604357481 2023-01-22 17:54:04.804734: step: 1552/531, loss: 0.00415599113330245 2023-01-22 17:54:05.865911: step: 1556/531, loss: 0.002957819029688835 2023-01-22 17:54:06.942052: step: 1560/531, loss: 0.000746549223549664 2023-01-22 17:54:08.001235: step: 1564/531, loss: 0.013988284394145012 2023-01-22 17:54:09.069455: step: 1568/531, loss: 0.0005988162592984736 2023-01-22 17:54:10.139090: step: 1572/531, loss: 0.004227219615131617 2023-01-22 17:54:11.209619: step: 1576/531, loss: 0.001184638123959303 2023-01-22 17:54:12.287925: step: 1580/531, loss: 0.00997968390583992 2023-01-22 17:54:13.351489: step: 1584/531, loss: 0.006052677053958178 2023-01-22 17:54:14.395937: step: 1588/531, loss: 2.45750488829799e-05 2023-01-22 17:54:15.452039: step: 1592/531, loss: 0.015461661852896214 2023-01-22 17:54:16.504140: step: 1596/531, loss: 0.006575525738298893 2023-01-22 17:54:17.555832: step: 1600/531, loss: 0.002303633140400052 2023-01-22 17:54:18.626394: step: 1604/531, loss: 0.001096038380637765 2023-01-22 17:54:19.677903: step: 1608/531, loss: 0.00010347751958761364 2023-01-22 17:54:20.723586: step: 1612/531, loss: 0.0005093302461318672 2023-01-22 17:54:21.789084: step: 1616/531, loss: 0.008888202719390392 2023-01-22 17:54:22.852528: step: 1620/531, loss: 0.005364787764847279 2023-01-22 17:54:23.916028: step: 1624/531, loss: 0.005350317806005478 2023-01-22 17:54:24.964574: step: 1628/531, loss: 0.0008648353395983577 2023-01-22 17:54:26.025202: step: 1632/531, loss: 0.0006377737736329436 2023-01-22 17:54:27.076537: step: 1636/531, loss: 0.0010672896169126034 2023-01-22 17:54:28.142960: step: 1640/531, loss: 0.004219456110149622 2023-01-22 17:54:29.201360: step: 1644/531, loss: 0.003980959299951792 2023-01-22 17:54:30.250065: step: 1648/531, loss: 0.0028800093568861485 2023-01-22 17:54:31.304876: step: 1652/531, loss: 5.2026498451596126e-05 2023-01-22 17:54:32.368015: step: 1656/531, loss: 0.005431903060525656 2023-01-22 17:54:33.431615: step: 1660/531, loss: 0.0066912793554365635 2023-01-22 17:54:34.505554: step: 1664/531, loss: 0.006996945012360811 2023-01-22 17:54:35.567809: step: 1668/531, loss: 0.004999788478016853 2023-01-22 17:54:36.625313: step: 1672/531, loss: 0.0009596017189323902 2023-01-22 17:54:37.673948: step: 1676/531, loss: 0.0007917137118056417 2023-01-22 17:54:38.715471: step: 1680/531, loss: 0.001373159233480692 2023-01-22 17:54:39.781361: step: 1684/531, loss: 0.00048797804629430175 2023-01-22 17:54:40.833790: step: 1688/531, loss: 0.000255568913416937 2023-01-22 17:54:41.894982: step: 1692/531, loss: 0.00018379483663011342 2023-01-22 17:54:42.959219: step: 1696/531, loss: 0.0030112704262137413 2023-01-22 17:54:44.020841: step: 1700/531, loss: 0.00014690367970615625 2023-01-22 17:54:45.084248: step: 1704/531, loss: 0.00441074138507247 2023-01-22 17:54:46.136630: step: 1708/531, loss: 0.0007090168655849993 2023-01-22 17:54:47.199103: step: 1712/531, loss: 0.005016597453504801 2023-01-22 17:54:48.246148: step: 1716/531, loss: 0.003971371799707413 2023-01-22 17:54:49.310704: step: 1720/531, loss: 0.0005290894187055528 2023-01-22 17:54:50.355319: step: 1724/531, loss: 0.003302742727100849 2023-01-22 17:54:51.415456: step: 1728/531, loss: 0.003913096617907286 2023-01-22 17:54:52.492612: step: 1732/531, loss: 0.0011660200543701649 2023-01-22 17:54:53.548335: step: 1736/531, loss: 0.0008671550313010812 2023-01-22 17:54:54.602233: step: 1740/531, loss: 0.00204609171487391 2023-01-22 17:54:55.679064: step: 1744/531, loss: 0.00020118526299484074 2023-01-22 17:54:56.718999: step: 1748/531, loss: 0.006373061332851648 2023-01-22 17:54:57.788183: step: 1752/531, loss: 0.0031337805557996035 2023-01-22 17:54:58.831643: step: 1756/531, loss: 5.2260995289543644e-05 2023-01-22 17:54:59.884246: step: 1760/531, loss: 0.0007659198599867523 2023-01-22 17:55:00.942330: step: 1764/531, loss: 4.83723524666857e-05 2023-01-22 17:55:02.006622: step: 1768/531, loss: 0.0017845489783212543 2023-01-22 17:55:03.074304: step: 1772/531, loss: 6.029046744515654e-06 2023-01-22 17:55:04.121047: step: 1776/531, loss: 0.008418998681008816 2023-01-22 17:55:05.181291: step: 1780/531, loss: 0.006016191560775042 2023-01-22 17:55:06.242968: step: 1784/531, loss: 8.963803338701837e-06 2023-01-22 17:55:07.291171: step: 1788/531, loss: 0.003902255091816187 2023-01-22 17:55:08.338619: step: 1792/531, loss: 0.00015019219426903874 2023-01-22 17:55:09.403861: step: 1796/531, loss: 0.00860699824988842 2023-01-22 17:55:10.496437: step: 1800/531, loss: 0.0018698297208175063 2023-01-22 17:55:11.548982: step: 1804/531, loss: 0.003223733976483345 2023-01-22 17:55:12.620804: step: 1808/531, loss: 0.01708688586950302 2023-01-22 17:55:13.686266: step: 1812/531, loss: 0.005929495207965374 2023-01-22 17:55:14.759889: step: 1816/531, loss: 0.00023376382887363434 2023-01-22 17:55:15.832344: step: 1820/531, loss: 0.020254552364349365 2023-01-22 17:55:16.925244: step: 1824/531, loss: 0.0026953064370900393 2023-01-22 17:55:17.980181: step: 1828/531, loss: 0.00672599533572793 2023-01-22 17:55:19.071957: step: 1832/531, loss: 0.006798403803259134 2023-01-22 17:55:20.128991: step: 1836/531, loss: 0.003984320443123579 2023-01-22 17:55:21.189745: step: 1840/531, loss: 0.003288338892161846 2023-01-22 17:55:22.237949: step: 1844/531, loss: 0.003636469366028905 2023-01-22 17:55:23.329321: step: 1848/531, loss: 0.00671724509447813 2023-01-22 17:55:24.387608: step: 1852/531, loss: 0.00036437358357943594 2023-01-22 17:55:25.437035: step: 1856/531, loss: 1.495458036515629e-05 2023-01-22 17:55:26.492403: step: 1860/531, loss: 0.00010102773376274854 2023-01-22 17:55:27.560076: step: 1864/531, loss: 0.0037178967613726854 2023-01-22 17:55:28.604126: step: 1868/531, loss: 0.005754513666033745 2023-01-22 17:55:29.647899: step: 1872/531, loss: 0.03755314275622368 2023-01-22 17:55:30.710388: step: 1876/531, loss: 0.0035454800818115473 2023-01-22 17:55:31.758859: step: 1880/531, loss: 0.005830982234328985 2023-01-22 17:55:32.805611: step: 1884/531, loss: 0.008748526684939861 2023-01-22 17:55:33.860298: step: 1888/531, loss: 0.002525359159335494 2023-01-22 17:55:34.908596: step: 1892/531, loss: 0.008077842183411121 2023-01-22 17:55:35.977300: step: 1896/531, loss: 6.24095082457643e-06 2023-01-22 17:55:37.044649: step: 1900/531, loss: 0.0019070765702053905 2023-01-22 17:55:38.098641: step: 1904/531, loss: 3.677227482512535e-07 2023-01-22 17:55:39.163408: step: 1908/531, loss: 0.0010879384353756905 2023-01-22 17:55:40.219926: step: 1912/531, loss: 0.007038034964352846 2023-01-22 17:55:41.273328: step: 1916/531, loss: 0.002885445486754179 2023-01-22 17:55:42.358687: step: 1920/531, loss: 0.0034756995737552643 2023-01-22 17:55:43.430019: step: 1924/531, loss: 0.00026293564587831497 2023-01-22 17:55:44.482652: step: 1928/531, loss: 0.002439383650198579 2023-01-22 17:55:45.543978: step: 1932/531, loss: 2.792055056488607e-05 2023-01-22 17:55:46.589331: step: 1936/531, loss: 7.744190224912018e-05 2023-01-22 17:55:47.662530: step: 1940/531, loss: 0.002347267000004649 2023-01-22 17:55:48.757606: step: 1944/531, loss: 0.002145696897059679 2023-01-22 17:55:49.848689: step: 1948/531, loss: 0.007006294559687376 2023-01-22 17:55:50.906392: step: 1952/531, loss: 0.0007536351913586259 2023-01-22 17:55:51.969085: step: 1956/531, loss: 0.0014195183757692575 2023-01-22 17:55:53.038649: step: 1960/531, loss: 0.0005646995850838721 2023-01-22 17:55:54.091912: step: 1964/531, loss: 0.0011817436898127198 2023-01-22 17:55:55.147927: step: 1968/531, loss: 0.0009935875423252583 2023-01-22 17:55:56.191913: step: 1972/531, loss: 0.0005446636932902038 2023-01-22 17:55:57.258558: step: 1976/531, loss: 0.0005326094687916338 2023-01-22 17:55:58.312990: step: 1980/531, loss: 6.404858140740544e-05 2023-01-22 17:55:59.405668: step: 1984/531, loss: 0.000803306931629777 2023-01-22 17:56:00.458086: step: 1988/531, loss: 0.004085606895387173 2023-01-22 17:56:01.524206: step: 1992/531, loss: 0.0005840605590492487 2023-01-22 17:56:02.593945: step: 1996/531, loss: 0.0078003243543207645 2023-01-22 17:56:03.637940: step: 2000/531, loss: 0.001434163423255086 2023-01-22 17:56:04.678595: step: 2004/531, loss: 0.0016677728854119778 2023-01-22 17:56:05.742532: step: 2008/531, loss: 0.0044198608957231045 2023-01-22 17:56:06.800893: step: 2012/531, loss: 0.003359207883477211 2023-01-22 17:56:07.845811: step: 2016/531, loss: 0.004193820990622044 2023-01-22 17:56:08.906935: step: 2020/531, loss: 0.0036032337229698896 2023-01-22 17:56:09.973324: step: 2024/531, loss: 0.0016510685672983527 2023-01-22 17:56:11.032836: step: 2028/531, loss: 0.003847442101687193 2023-01-22 17:56:12.096250: step: 2032/531, loss: 0.004811007529497147 2023-01-22 17:56:13.152357: step: 2036/531, loss: 0.00699926121160388 2023-01-22 17:56:14.220791: step: 2040/531, loss: 0.0004596064973156899 2023-01-22 17:56:15.263236: step: 2044/531, loss: 0.0027884547598659992 2023-01-22 17:56:16.308391: step: 2048/531, loss: 0.00021353489137254655 2023-01-22 17:56:17.353057: step: 2052/531, loss: 0.0006405066815204918 2023-01-22 17:56:18.413864: step: 2056/531, loss: 0.0036231086123734713 2023-01-22 17:56:19.469871: step: 2060/531, loss: 0.0025239442475140095 2023-01-22 17:56:20.538451: step: 2064/531, loss: 0.01017787680029869 2023-01-22 17:56:21.584411: step: 2068/531, loss: 0.0019906111992895603 2023-01-22 17:56:22.653872: step: 2072/531, loss: 0.000426443584728986 2023-01-22 17:56:23.719708: step: 2076/531, loss: 0.0021929400973021984 2023-01-22 17:56:24.765939: step: 2080/531, loss: 0.0 2023-01-22 17:56:25.843198: step: 2084/531, loss: 0.004055523779243231 2023-01-22 17:56:26.904878: step: 2088/531, loss: 0.0006808657199144363 2023-01-22 17:56:27.968216: step: 2092/531, loss: 0.0009017421980388463 2023-01-22 17:56:29.023040: step: 2096/531, loss: 1.2260234143468551e-05 2023-01-22 17:56:30.079790: step: 2100/531, loss: 1.0708765330491588e-05 2023-01-22 17:56:31.130416: step: 2104/531, loss: 0.0004936104523949325 2023-01-22 17:56:32.177013: step: 2108/531, loss: 0.012104598805308342 2023-01-22 17:56:33.237846: step: 2112/531, loss: 0.0006459844880737364 2023-01-22 17:56:34.302320: step: 2116/531, loss: 0.005747909191995859 2023-01-22 17:56:35.350676: step: 2120/531, loss: 0.004473009146749973 2023-01-22 17:56:36.410108: step: 2124/531, loss: 0.0013793382095173001 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35395084566596197, 'r': 0.31708096590909096, 'f1': 0.33450299700299707}, 'combined': 0.24647589252852414, 'stategy': 1, 'epoch': 16} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3359597246605281, 'r': 0.2741043240912612, 'f1': 0.3018962138102559}, 'combined': 0.18903781612417894, 'stategy': 1, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33272089097496704, 'r': 0.348504614455753, 'f1': 0.34042990142387736}, 'combined': 0.2508430852596991, 'stategy': 1, 'epoch': 16} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36098974948122464, 'r': 0.3004450304519119, 'f1': 0.32794639642831724}, 'combined': 0.20323438651895714, 'stategy': 1, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3176638837353123, 'r': 0.32489721695129664, 'f1': 0.321239837398374}, 'combined': 0.23670303808301238, 'stategy': 1, 'epoch': 16} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36637844496002087, 'r': 0.2866158386046776, 'f1': 0.32162568001558856}, 'combined': 0.2133556491192518, 'stategy': 1, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37962962962962965, 'r': 0.44565217391304346, 'f1': 0.41000000000000003}, 'combined': 0.20500000000000002, 'stategy': 1, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'stategy': 1, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35765048712595693, 'r': 0.32445943813131317, 'f1': 0.34024743462429663}, 'combined': 0.2507086360389554, 'stategy': 1, 'epoch': 15} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33492632906351627, 'r': 0.27658627354974497, 'f1': 0.3029733970273034}, 'combined': 0.18971231402644234, 'stategy': 1, 'epoch': 15} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 15} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33436839323467227, 'r': 0.3546715973779541, 'f1': 0.34422086891009546}, 'combined': 0.2536364297232282, 'stategy': 1, 'epoch': 5} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36411638395148305, 'r': 0.30337559853126267, 'f1': 0.3309823303208759}, 'combined': 0.2051158103396977, 'stategy': 1, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37962962962962965, 'r': 0.44565217391304346, 'f1': 0.41000000000000003}, 'combined': 0.20500000000000002, 'stategy': 1, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.316460244648318, 'r': 0.32726913345983555, 'f1': 0.32177394278606963}, 'combined': 0.23709658942131445, 'stategy': 1, 'epoch': 5} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3637270714036404, 'r': 0.28684167419636686, 'f1': 0.32074114478321014}, 'combined': 0.21276887822252555, 'stategy': 1, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.27586206896551724, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 5} ****************************** Epoch: 17 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 17:59:03.878833: step: 4/531, loss: 0.0010591301834210753 2023-01-22 17:59:04.924404: step: 8/531, loss: 0.014781218022108078 2023-01-22 17:59:05.962857: step: 12/531, loss: 0.0037061702460050583 2023-01-22 17:59:07.012917: step: 16/531, loss: 0.00033428677124902606 2023-01-22 17:59:08.066819: step: 20/531, loss: 8.860175148583949e-05 2023-01-22 17:59:09.123500: step: 24/531, loss: 0.006528640631586313 2023-01-22 17:59:10.185443: step: 28/531, loss: 0.005082056857645512 2023-01-22 17:59:11.235942: step: 32/531, loss: 0.0007815062417648733 2023-01-22 17:59:12.315957: step: 36/531, loss: 0.001364995026960969 2023-01-22 17:59:13.368678: step: 40/531, loss: 0.00395982526242733 2023-01-22 17:59:14.410945: step: 44/531, loss: 0.013364103622734547 2023-01-22 17:59:15.474233: step: 48/531, loss: 0.0017849599244073033 2023-01-22 17:59:16.534547: step: 52/531, loss: 0.0034796802792698145 2023-01-22 17:59:17.561279: step: 56/531, loss: 0.005245598498731852 2023-01-22 17:59:18.601485: step: 60/531, loss: 6.323427805909887e-05 2023-01-22 17:59:19.639678: step: 64/531, loss: 0.00051159190479666 2023-01-22 17:59:20.700151: step: 68/531, loss: 0.003531547263264656 2023-01-22 17:59:21.755669: step: 72/531, loss: 0.0010178668890148401 2023-01-22 17:59:22.798042: step: 76/531, loss: 0.002235204679891467 2023-01-22 17:59:23.863088: step: 80/531, loss: 0.0058624702505767345 2023-01-22 17:59:24.916133: step: 84/531, loss: 0.00028773743542842567 2023-01-22 17:59:25.955388: step: 88/531, loss: 0.00022078095935285091 2023-01-22 17:59:27.012779: step: 92/531, loss: 7.382411422440782e-05 2023-01-22 17:59:28.059996: step: 96/531, loss: 2.302437678736169e-05 2023-01-22 17:59:29.111665: step: 100/531, loss: 0.0005729347467422485 2023-01-22 17:59:30.162221: step: 104/531, loss: 0.0008002286776900291 2023-01-22 17:59:31.230870: step: 108/531, loss: 0.00018516951240599155 2023-01-22 17:59:32.287261: step: 112/531, loss: 2.2955859094508924e-05 2023-01-22 17:59:33.345838: step: 116/531, loss: 0.00016484715160913765 2023-01-22 17:59:34.412615: step: 120/531, loss: 0.005383912939578295 2023-01-22 17:59:35.486968: step: 124/531, loss: 0.007578098215162754 2023-01-22 17:59:36.536699: step: 128/531, loss: 0.005913260392844677 2023-01-22 17:59:37.603043: step: 132/531, loss: 0.0038971109315752983 2023-01-22 17:59:38.666198: step: 136/531, loss: 1.3455038242682349e-05 2023-01-22 17:59:39.714155: step: 140/531, loss: 0.005872782319784164 2023-01-22 17:59:40.769674: step: 144/531, loss: 0.0017661303281784058 2023-01-22 17:59:41.831830: step: 148/531, loss: 0.008567708544433117 2023-01-22 17:59:42.886157: step: 152/531, loss: 0.0009409493650309741 2023-01-22 17:59:43.951589: step: 156/531, loss: 0.00018545141210779548 2023-01-22 17:59:45.002660: step: 160/531, loss: 3.97282638004981e-05 2023-01-22 17:59:46.078180: step: 164/531, loss: 0.001125269802287221 2023-01-22 17:59:47.138536: step: 168/531, loss: 2.6217414870188804e-06 2023-01-22 17:59:48.181967: step: 172/531, loss: 0.0012865333119407296 2023-01-22 17:59:49.234553: step: 176/531, loss: 5.930763654760085e-05 2023-01-22 17:59:50.291369: step: 180/531, loss: 0.004098004661500454 2023-01-22 17:59:51.347903: step: 184/531, loss: 0.0023150916676968336 2023-01-22 17:59:52.391328: step: 188/531, loss: 0.002951332600787282 2023-01-22 17:59:53.455776: step: 192/531, loss: 0.002031816868111491 2023-01-22 17:59:54.511825: step: 196/531, loss: 0.0015997339505702257 2023-01-22 17:59:55.572648: step: 200/531, loss: 5.237978712102631e-06 2023-01-22 17:59:56.626894: step: 204/531, loss: 0.006756736896932125 2023-01-22 17:59:57.678994: step: 208/531, loss: 6.461319117079256e-06 2023-01-22 17:59:58.752474: step: 212/531, loss: 0.000697409559506923 2023-01-22 17:59:59.811192: step: 216/531, loss: 0.00254813302308321 2023-01-22 18:00:00.866778: step: 220/531, loss: 0.004089620895683765 2023-01-22 18:00:01.926571: step: 224/531, loss: 0.002317257924005389 2023-01-22 18:00:02.986260: step: 228/531, loss: 0.00019191388855688274 2023-01-22 18:00:04.038452: step: 232/531, loss: 0.00041288428474217653 2023-01-22 18:00:05.100855: step: 236/531, loss: 0.0018394957296550274 2023-01-22 18:00:06.149716: step: 240/531, loss: 0.00012980592146050185 2023-01-22 18:00:07.207219: step: 244/531, loss: 0.00034726655576378107 2023-01-22 18:00:08.253424: step: 248/531, loss: 5.330373460310511e-05 2023-01-22 18:00:09.302946: step: 252/531, loss: 4.238435394654516e-06 2023-01-22 18:00:10.360192: step: 256/531, loss: 0.00018782711413223296 2023-01-22 18:00:11.420706: step: 260/531, loss: 0.0013384897029027343 2023-01-22 18:00:12.475150: step: 264/531, loss: 0.0008462652913294733 2023-01-22 18:00:13.526079: step: 268/531, loss: 0.0019726683385670185 2023-01-22 18:00:14.583426: step: 272/531, loss: 0.001999210799112916 2023-01-22 18:00:15.644187: step: 276/531, loss: 0.00041788246016949415 2023-01-22 18:00:16.717515: step: 280/531, loss: 0.0046173399314284325 2023-01-22 18:00:17.782752: step: 284/531, loss: 0.0006525160279124975 2023-01-22 18:00:18.833671: step: 288/531, loss: 7.261510472744703e-05 2023-01-22 18:00:19.883207: step: 292/531, loss: 0.0013442516792565584 2023-01-22 18:00:20.946628: step: 296/531, loss: 0.008767567574977875 2023-01-22 18:00:22.001090: step: 300/531, loss: 0.005095435306429863 2023-01-22 18:00:23.056898: step: 304/531, loss: 0.0008047689334489405 2023-01-22 18:00:24.109989: step: 308/531, loss: 0.0030603145714849234 2023-01-22 18:00:25.164263: step: 312/531, loss: 0.0006080297753214836 2023-01-22 18:00:26.229095: step: 316/531, loss: 0.0040301172994077206 2023-01-22 18:00:27.308427: step: 320/531, loss: 2.628640459079179e-06 2023-01-22 18:00:28.379519: step: 324/531, loss: 0.012205506674945354 2023-01-22 18:00:29.450475: step: 328/531, loss: 0.003325662575662136 2023-01-22 18:00:30.507097: step: 332/531, loss: 0.004738849587738514 2023-01-22 18:00:31.551339: step: 336/531, loss: 0.0007261703722178936 2023-01-22 18:00:32.601069: step: 340/531, loss: 0.00019172353495378047 2023-01-22 18:00:33.662272: step: 344/531, loss: 0.0014166636392474174 2023-01-22 18:00:34.720102: step: 348/531, loss: 0.0003064592892769724 2023-01-22 18:00:35.772481: step: 352/531, loss: 0.0018100740853697062 2023-01-22 18:00:36.836852: step: 356/531, loss: 0.00027037030667997897 2023-01-22 18:00:37.903138: step: 360/531, loss: 0.00015691015869379044 2023-01-22 18:00:38.965736: step: 364/531, loss: 0.0006983047351241112 2023-01-22 18:00:40.017481: step: 368/531, loss: 0.0022426953073590994 2023-01-22 18:00:41.073399: step: 372/531, loss: 0.0006167655810713768 2023-01-22 18:00:42.128244: step: 376/531, loss: 0.0004734009853564203 2023-01-22 18:00:43.198198: step: 380/531, loss: 0.0001547702559037134 2023-01-22 18:00:44.270256: step: 384/531, loss: 0.00038646493339911103 2023-01-22 18:00:45.333137: step: 388/531, loss: 0.002265315502882004 2023-01-22 18:00:46.396184: step: 392/531, loss: 0.002514406805858016 2023-01-22 18:00:47.444814: step: 396/531, loss: 0.0005918876267969608 2023-01-22 18:00:48.493820: step: 400/531, loss: 0.006264612544327974 2023-01-22 18:00:49.559230: step: 404/531, loss: 0.00033855895162560046 2023-01-22 18:00:50.614798: step: 408/531, loss: 9.501646854914725e-05 2023-01-22 18:00:51.663893: step: 412/531, loss: 0.003078275825828314 2023-01-22 18:00:52.718418: step: 416/531, loss: 7.965124677866697e-05 2023-01-22 18:00:53.771171: step: 420/531, loss: 0.0005357344634830952 2023-01-22 18:00:54.832202: step: 424/531, loss: 0.0023287509102374315 2023-01-22 18:00:55.903046: step: 428/531, loss: 0.0021544082555919886 2023-01-22 18:00:56.969782: step: 432/531, loss: 0.004816059954464436 2023-01-22 18:00:58.026723: step: 436/531, loss: 0.00020005705300718546 2023-01-22 18:00:59.081641: step: 440/531, loss: 0.00018254034512210637 2023-01-22 18:01:00.157047: step: 444/531, loss: 0.000611867755651474 2023-01-22 18:01:01.224557: step: 448/531, loss: 7.335914415307343e-05 2023-01-22 18:01:02.295300: step: 452/531, loss: 0.004518484231084585 2023-01-22 18:01:03.362426: step: 456/531, loss: 0.005140627268701792 2023-01-22 18:01:04.420664: step: 460/531, loss: 0.0006301365210674703 2023-01-22 18:01:05.482528: step: 464/531, loss: 0.0021027002949267626 2023-01-22 18:01:06.547386: step: 468/531, loss: 5.8715468185255304e-05 2023-01-22 18:01:07.613317: step: 472/531, loss: 0.003164167981594801 2023-01-22 18:01:08.666089: step: 476/531, loss: 0.009007076732814312 2023-01-22 18:01:09.717540: step: 480/531, loss: 0.005488893948495388 2023-01-22 18:01:10.780487: step: 484/531, loss: 0.0013162342365831137 2023-01-22 18:01:11.825816: step: 488/531, loss: 0.004377539269626141 2023-01-22 18:01:12.915938: step: 492/531, loss: 0.0013336186530068517 2023-01-22 18:01:13.988492: step: 496/531, loss: 8.922337656258605e-06 2023-01-22 18:01:15.073713: step: 500/531, loss: 0.006440530996769667 2023-01-22 18:01:16.136072: step: 504/531, loss: 0.002337603596970439 2023-01-22 18:01:17.197833: step: 508/531, loss: 0.004704826977103949 2023-01-22 18:01:18.254473: step: 512/531, loss: 2.3259861336555332e-05 2023-01-22 18:01:19.309317: step: 516/531, loss: 0.0009793529752641916 2023-01-22 18:01:20.363487: step: 520/531, loss: 0.006845055613666773 2023-01-22 18:01:21.423734: step: 524/531, loss: 0.0041955867782235146 2023-01-22 18:01:22.479812: step: 528/531, loss: 0.0019427043152973056 2023-01-22 18:01:23.536208: step: 532/531, loss: 0.002804335905238986 2023-01-22 18:01:24.601488: step: 536/531, loss: 1.0511784239497501e-05 2023-01-22 18:01:25.668256: step: 540/531, loss: 0.001268138294108212 2023-01-22 18:01:26.734749: step: 544/531, loss: 0.001591818523593247 2023-01-22 18:01:27.798941: step: 548/531, loss: 0.00017964364087674767 2023-01-22 18:01:28.874951: step: 552/531, loss: 0.007876533083617687 2023-01-22 18:01:29.951203: step: 556/531, loss: 0.001596052898094058 2023-01-22 18:01:31.023674: step: 560/531, loss: 0.006762867793440819 2023-01-22 18:01:32.066993: step: 564/531, loss: 0.00028747261967509985 2023-01-22 18:01:33.139829: step: 568/531, loss: 0.003805815242230892 2023-01-22 18:01:34.192815: step: 572/531, loss: 2.2555337636731565e-05 2023-01-22 18:01:35.250636: step: 576/531, loss: 0.0032091541215777397 2023-01-22 18:01:36.331324: step: 580/531, loss: 0.0006547679658979177 2023-01-22 18:01:37.402116: step: 584/531, loss: 0.005057461094111204 2023-01-22 18:01:38.450972: step: 588/531, loss: 0.0010434415889903903 2023-01-22 18:01:39.514695: step: 592/531, loss: 1.3075495530756598e-07 2023-01-22 18:01:40.577739: step: 596/531, loss: 0.0004937549238093197 2023-01-22 18:01:41.640623: step: 600/531, loss: 0.007920471951365471 2023-01-22 18:01:42.690415: step: 604/531, loss: 0.0018424766603857279 2023-01-22 18:01:43.739141: step: 608/531, loss: 0.00020057539222761989 2023-01-22 18:01:44.832365: step: 612/531, loss: 0.006189912557601929 2023-01-22 18:01:45.898425: step: 616/531, loss: 0.000951491529121995 2023-01-22 18:01:46.965275: step: 620/531, loss: 0.00013815052807331085 2023-01-22 18:01:48.025385: step: 624/531, loss: 0.012013589963316917 2023-01-22 18:01:49.078200: step: 628/531, loss: 0.000238620035815984 2023-01-22 18:01:50.159050: step: 632/531, loss: 0.001472893520258367 2023-01-22 18:01:51.222681: step: 636/531, loss: 3.199533966835588e-05 2023-01-22 18:01:52.283660: step: 640/531, loss: 0.003311782842501998 2023-01-22 18:01:53.344085: step: 644/531, loss: 1.7413856767234392e-05 2023-01-22 18:01:54.396370: step: 648/531, loss: 0.0 2023-01-22 18:01:55.472286: step: 652/531, loss: 0.0005648055230267346 2023-01-22 18:01:56.544490: step: 656/531, loss: 0.00041577249066904187 2023-01-22 18:01:57.591247: step: 660/531, loss: 0.0005932781496085227 2023-01-22 18:01:58.644211: step: 664/531, loss: 3.1131337891565636e-05 2023-01-22 18:01:59.704558: step: 668/531, loss: 0.0016683556605130434 2023-01-22 18:02:00.768043: step: 672/531, loss: 0.0008324781083501875 2023-01-22 18:02:01.823396: step: 676/531, loss: 0.0015998296439647675 2023-01-22 18:02:02.902862: step: 680/531, loss: 0.0010401420295238495 2023-01-22 18:02:03.978376: step: 684/531, loss: 0.00021410842600744218 2023-01-22 18:02:05.054266: step: 688/531, loss: 0.00986777525395155 2023-01-22 18:02:06.108823: step: 692/531, loss: 0.005428651813417673 2023-01-22 18:02:07.166123: step: 696/531, loss: 0.0026745384093374014 2023-01-22 18:02:08.231086: step: 700/531, loss: 8.789300773059949e-05 2023-01-22 18:02:09.290499: step: 704/531, loss: 0.0013627734733745456 2023-01-22 18:02:10.374309: step: 708/531, loss: 0.006983757484704256 2023-01-22 18:02:11.431795: step: 712/531, loss: 0.00024586988729424775 2023-01-22 18:02:12.515078: step: 716/531, loss: 0.006869173143059015 2023-01-22 18:02:13.583440: step: 720/531, loss: 0.00016961278743110597 2023-01-22 18:02:14.642626: step: 724/531, loss: 0.007143969181925058 2023-01-22 18:02:15.709888: step: 728/531, loss: 0.011537830345332623 2023-01-22 18:02:16.773286: step: 732/531, loss: 0.0002902477281168103 2023-01-22 18:02:17.853237: step: 736/531, loss: 0.006546034477651119 2023-01-22 18:02:18.920641: step: 740/531, loss: 6.851474608993158e-05 2023-01-22 18:02:19.983130: step: 744/531, loss: 0.004820783622562885 2023-01-22 18:02:21.034336: step: 748/531, loss: 4.584113048622385e-05 2023-01-22 18:02:22.076682: step: 752/531, loss: 0.003378402441740036 2023-01-22 18:02:23.140068: step: 756/531, loss: 0.011286232620477676 2023-01-22 18:02:24.199011: step: 760/531, loss: 0.002090710448101163 2023-01-22 18:02:25.238964: step: 764/531, loss: 3.3444015571149066e-05 2023-01-22 18:02:26.319579: step: 768/531, loss: 0.0006884770118631423 2023-01-22 18:02:27.380250: step: 772/531, loss: 0.00045069732004776597 2023-01-22 18:02:28.423342: step: 776/531, loss: 0.0007072472362779081 2023-01-22 18:02:29.498193: step: 780/531, loss: 0.0020578026305884123 2023-01-22 18:02:30.560157: step: 784/531, loss: 0.0006573524442501366 2023-01-22 18:02:31.630419: step: 788/531, loss: 0.023261716589331627 2023-01-22 18:02:32.690125: step: 792/531, loss: 1.0141192433366086e-05 2023-01-22 18:02:33.756615: step: 796/531, loss: 0.002168754581362009 2023-01-22 18:02:34.825829: step: 800/531, loss: 0.001569385640323162 2023-01-22 18:02:35.896112: step: 804/531, loss: 3.9136025407060515e-06 2023-01-22 18:02:36.950772: step: 808/531, loss: 0.0002535523963160813 2023-01-22 18:02:38.018018: step: 812/531, loss: 2.527751712477766e-05 2023-01-22 18:02:39.084690: step: 816/531, loss: 0.0003287234576418996 2023-01-22 18:02:40.144760: step: 820/531, loss: 0.0002072100032819435 2023-01-22 18:02:41.199782: step: 824/531, loss: 0.00023947506269905716 2023-01-22 18:02:42.256332: step: 828/531, loss: 0.0010242847492918372 2023-01-22 18:02:43.325770: step: 832/531, loss: 0.0020955665968358517 2023-01-22 18:02:44.388734: step: 836/531, loss: 0.0007789822411723435 2023-01-22 18:02:45.469063: step: 840/531, loss: 0.0031239890959113836 2023-01-22 18:02:46.529407: step: 844/531, loss: 0.002139681950211525 2023-01-22 18:02:47.585757: step: 848/531, loss: 0.00010103691602125764 2023-01-22 18:02:48.643164: step: 852/531, loss: 0.0023629390634596348 2023-01-22 18:02:49.713303: step: 856/531, loss: 0.003942996263504028 2023-01-22 18:02:50.777881: step: 860/531, loss: 0.0017932187765836716 2023-01-22 18:02:51.852908: step: 864/531, loss: 0.010287689976394176 2023-01-22 18:02:52.910230: step: 868/531, loss: 0.00614097248762846 2023-01-22 18:02:53.976294: step: 872/531, loss: 0.0024153664708137512 2023-01-22 18:02:55.046492: step: 876/531, loss: 0.002685698913410306 2023-01-22 18:02:56.105350: step: 880/531, loss: 0.004279584623873234 2023-01-22 18:02:57.159757: step: 884/531, loss: 0.0036086956970393658 2023-01-22 18:02:58.232619: step: 888/531, loss: 0.0007581334211863577 2023-01-22 18:02:59.302749: step: 892/531, loss: 0.008575083687901497 2023-01-22 18:03:00.369251: step: 896/531, loss: 0.005085669457912445 2023-01-22 18:03:01.426794: step: 900/531, loss: 0.00022220025130081922 2023-01-22 18:03:02.498517: step: 904/531, loss: 0.00014551023195963353 2023-01-22 18:03:03.549566: step: 908/531, loss: 0.0007867612293921411 2023-01-22 18:03:04.621845: step: 912/531, loss: 0.0005422435933724046 2023-01-22 18:03:05.685060: step: 916/531, loss: 0.006346079055219889 2023-01-22 18:03:06.742385: step: 920/531, loss: 0.0014782777288928628 2023-01-22 18:03:07.806686: step: 924/531, loss: 0.0008234384004026651 2023-01-22 18:03:08.854945: step: 928/531, loss: 5.163700507182512e-07 2023-01-22 18:03:09.922929: step: 932/531, loss: 0.0005923594580963254 2023-01-22 18:03:10.985331: step: 936/531, loss: 0.0009426764445379376 2023-01-22 18:03:12.025484: step: 940/531, loss: 9.509312076261267e-05 2023-01-22 18:03:13.084566: step: 944/531, loss: 0.003369776299223304 2023-01-22 18:03:14.159302: step: 948/531, loss: 0.0003580296761356294 2023-01-22 18:03:15.206712: step: 952/531, loss: 6.050045703887008e-05 2023-01-22 18:03:16.279114: step: 956/531, loss: 0.004939751233905554 2023-01-22 18:03:17.344073: step: 960/531, loss: 0.00016296881949529052 2023-01-22 18:03:18.405016: step: 964/531, loss: 0.010895533487200737 2023-01-22 18:03:19.476371: step: 968/531, loss: 0.009111028164625168 2023-01-22 18:03:20.538167: step: 972/531, loss: 0.003915261011570692 2023-01-22 18:03:21.608187: step: 976/531, loss: 0.0006893750978633761 2023-01-22 18:03:22.664830: step: 980/531, loss: 0.0008727729436941445 2023-01-22 18:03:23.719961: step: 984/531, loss: 0.002139232587069273 2023-01-22 18:03:24.778667: step: 988/531, loss: 0.0003349109028931707 2023-01-22 18:03:25.835266: step: 992/531, loss: 0.0036862508859485388 2023-01-22 18:03:26.905730: step: 996/531, loss: 0.0015776100335642695 2023-01-22 18:03:27.961298: step: 1000/531, loss: 0.0007878416799940169 2023-01-22 18:03:29.002136: step: 1004/531, loss: 7.109108992153779e-05 2023-01-22 18:03:30.053049: step: 1008/531, loss: 0.0038698548451066017 2023-01-22 18:03:31.096665: step: 1012/531, loss: 0.014222804456949234 2023-01-22 18:03:32.163110: step: 1016/531, loss: 0.017534222453832626 2023-01-22 18:03:33.224056: step: 1020/531, loss: 0.0025508874095976353 2023-01-22 18:03:34.286852: step: 1024/531, loss: 0.004525311756879091 2023-01-22 18:03:35.356277: step: 1028/531, loss: 0.01276471372693777 2023-01-22 18:03:36.410343: step: 1032/531, loss: 0.00013596868666354567 2023-01-22 18:03:37.451048: step: 1036/531, loss: 0.0001314611581619829 2023-01-22 18:03:38.512566: step: 1040/531, loss: 0.0049752178601920605 2023-01-22 18:03:39.586210: step: 1044/531, loss: 0.0010042430367320776 2023-01-22 18:03:40.662125: step: 1048/531, loss: 0.011717608198523521 2023-01-22 18:03:41.721036: step: 1052/531, loss: 0.0063424925319850445 2023-01-22 18:03:42.785266: step: 1056/531, loss: 0.0028261584229767323 2023-01-22 18:03:43.847604: step: 1060/531, loss: 0.0006312905461527407 2023-01-22 18:03:44.908752: step: 1064/531, loss: 0.0006985447253100574 2023-01-22 18:03:45.965083: step: 1068/531, loss: 8.053990313783288e-06 2023-01-22 18:03:47.025508: step: 1072/531, loss: 0.00010158088116440922 2023-01-22 18:03:48.097566: step: 1076/531, loss: 0.0014032371109351516 2023-01-22 18:03:49.153479: step: 1080/531, loss: 0.0017378615448251367 2023-01-22 18:03:50.216366: step: 1084/531, loss: 0.001124097965657711 2023-01-22 18:03:51.266160: step: 1088/531, loss: 4.155989245191449e-06 2023-01-22 18:03:52.338334: step: 1092/531, loss: 0.004896112252026796 2023-01-22 18:03:53.406109: step: 1096/531, loss: 0.0030176760628819466 2023-01-22 18:03:54.493828: step: 1100/531, loss: 9.037006384460256e-05 2023-01-22 18:03:55.554256: step: 1104/531, loss: 0.0010507587576285005 2023-01-22 18:03:56.624547: step: 1108/531, loss: 0.0016560732619836926 2023-01-22 18:03:57.684735: step: 1112/531, loss: 4.4967466237721965e-05 2023-01-22 18:03:58.740657: step: 1116/531, loss: 0.015512155368924141 2023-01-22 18:03:59.805378: step: 1120/531, loss: 0.013124290853738785 2023-01-22 18:04:00.855766: step: 1124/531, loss: 0.0028140032663941383 2023-01-22 18:04:01.917251: step: 1128/531, loss: 0.003043188713490963 2023-01-22 18:04:02.987161: step: 1132/531, loss: 0.009081898257136345 2023-01-22 18:04:04.048014: step: 1136/531, loss: 3.626557008828968e-05 2023-01-22 18:04:05.109526: step: 1140/531, loss: 0.011086744256317616 2023-01-22 18:04:06.171756: step: 1144/531, loss: 0.002954406663775444 2023-01-22 18:04:07.226725: step: 1148/531, loss: 2.356210461584851e-05 2023-01-22 18:04:08.292576: step: 1152/531, loss: 0.004788017366081476 2023-01-22 18:04:09.353734: step: 1156/531, loss: 0.006939814891666174 2023-01-22 18:04:10.423254: step: 1160/531, loss: 0.001125428476370871 2023-01-22 18:04:11.496847: step: 1164/531, loss: 0.004942075349390507 2023-01-22 18:04:12.581081: step: 1168/531, loss: 0.0008772382861934602 2023-01-22 18:04:13.643241: step: 1172/531, loss: 0.0013258844846859574 2023-01-22 18:04:14.717296: step: 1176/531, loss: 0.00739274499937892 2023-01-22 18:04:15.788458: step: 1180/531, loss: 0.0014582430012524128 2023-01-22 18:04:16.844201: step: 1184/531, loss: 0.0005501639097929001 2023-01-22 18:04:17.902384: step: 1188/531, loss: 0.0016470944974571466 2023-01-22 18:04:18.966044: step: 1192/531, loss: 0.0017556428210809827 2023-01-22 18:04:20.024680: step: 1196/531, loss: 0.004391124937683344 2023-01-22 18:04:21.080352: step: 1200/531, loss: 0.0006489954539574683 2023-01-22 18:04:22.156106: step: 1204/531, loss: 0.0007281933794729412 2023-01-22 18:04:23.206880: step: 1208/531, loss: 0.0011890002060681581 2023-01-22 18:04:24.269010: step: 1212/531, loss: 0.0006047792849130929 2023-01-22 18:04:25.329928: step: 1216/531, loss: 0.004985279403626919 2023-01-22 18:04:26.398671: step: 1220/531, loss: 0.0004808290395885706 2023-01-22 18:04:27.476749: step: 1224/531, loss: 0.006486736703664064 2023-01-22 18:04:28.542075: step: 1228/531, loss: 0.001231358852237463 2023-01-22 18:04:29.623068: step: 1232/531, loss: 0.011270023882389069 2023-01-22 18:04:30.667423: step: 1236/531, loss: 0.0008208649232983589 2023-01-22 18:04:31.739230: step: 1240/531, loss: 0.004045217763632536 2023-01-22 18:04:32.792170: step: 1244/531, loss: 5.2749368478544056e-05 2023-01-22 18:04:33.866415: step: 1248/531, loss: 0.005218219477683306 2023-01-22 18:04:34.927327: step: 1252/531, loss: 0.002251754282042384 2023-01-22 18:04:35.983189: step: 1256/531, loss: 9.591809794073924e-05 2023-01-22 18:04:37.022645: step: 1260/531, loss: 7.123234809114365e-06 2023-01-22 18:04:38.086144: step: 1264/531, loss: 0.004032783675938845 2023-01-22 18:04:39.156797: step: 1268/531, loss: 0.000742900709155947 2023-01-22 18:04:40.212950: step: 1272/531, loss: 0.0011782868532463908 2023-01-22 18:04:41.261626: step: 1276/531, loss: 6.809475507907337e-07 2023-01-22 18:04:42.336562: step: 1280/531, loss: 7.996735803317279e-05 2023-01-22 18:04:43.380266: step: 1284/531, loss: 0.0011381471995264292 2023-01-22 18:04:44.447786: step: 1288/531, loss: 0.00548606738448143 2023-01-22 18:04:45.511498: step: 1292/531, loss: 0.0008059622487053275 2023-01-22 18:04:46.559670: step: 1296/531, loss: 0.00030760097433812916 2023-01-22 18:04:47.621981: step: 1300/531, loss: 0.004079130478203297 2023-01-22 18:04:48.672474: step: 1304/531, loss: 0.006860645953565836 2023-01-22 18:04:49.732000: step: 1308/531, loss: 9.480576181886136e-07 2023-01-22 18:04:50.793168: step: 1312/531, loss: 0.0019376041600480676 2023-01-22 18:04:51.848803: step: 1316/531, loss: 0.003845769912004471 2023-01-22 18:04:52.906670: step: 1320/531, loss: 5.221631727181375e-05 2023-01-22 18:04:53.967765: step: 1324/531, loss: 0.003528336761519313 2023-01-22 18:04:55.024300: step: 1328/531, loss: 0.00637925835326314 2023-01-22 18:04:56.087525: step: 1332/531, loss: 0.00010631334589561448 2023-01-22 18:04:57.161350: step: 1336/531, loss: 0.006522983778268099 2023-01-22 18:04:58.241133: step: 1340/531, loss: 0.00011496706429170445 2023-01-22 18:04:59.321565: step: 1344/531, loss: 0.0033556539565324783 2023-01-22 18:05:00.374683: step: 1348/531, loss: 0.00019041269843000919 2023-01-22 18:05:01.426441: step: 1352/531, loss: 0.0016024350188672543 2023-01-22 18:05:02.481413: step: 1356/531, loss: 0.0007284569437615573 2023-01-22 18:05:03.534439: step: 1360/531, loss: 0.0043895915150642395 2023-01-22 18:05:04.586426: step: 1364/531, loss: 0.0031304971780627966 2023-01-22 18:05:05.645044: step: 1368/531, loss: 0.0016167134745046496 2023-01-22 18:05:06.693004: step: 1372/531, loss: 0.0015101308235898614 2023-01-22 18:05:07.753221: step: 1376/531, loss: 0.007826688699424267 2023-01-22 18:05:08.799753: step: 1380/531, loss: 0.0007461021887138486 2023-01-22 18:05:09.871492: step: 1384/531, loss: 0.0014523325953632593 2023-01-22 18:05:10.910180: step: 1388/531, loss: 4.959692159900442e-05 2023-01-22 18:05:11.976060: step: 1392/531, loss: 0.0001512253365945071 2023-01-22 18:05:13.033377: step: 1396/531, loss: 0.0012374120997264981 2023-01-22 18:05:14.087020: step: 1400/531, loss: 0.01050270814448595 2023-01-22 18:05:15.147380: step: 1404/531, loss: 0.0031221939716488123 2023-01-22 18:05:16.206945: step: 1408/531, loss: 0.0016900835325941443 2023-01-22 18:05:17.265718: step: 1412/531, loss: 0.003941865637898445 2023-01-22 18:05:18.341072: step: 1416/531, loss: 0.00013313518138602376 2023-01-22 18:05:19.413477: step: 1420/531, loss: 0.005387207958847284 2023-01-22 18:05:20.473585: step: 1424/531, loss: 0.00913289189338684 2023-01-22 18:05:21.517905: step: 1428/531, loss: 0.060010168701410294 2023-01-22 18:05:22.600132: step: 1432/531, loss: 0.002390248468145728 2023-01-22 18:05:23.673587: step: 1436/531, loss: 4.347315552877262e-05 2023-01-22 18:05:24.736089: step: 1440/531, loss: 0.0025861372705549 2023-01-22 18:05:25.808916: step: 1444/531, loss: 0.003920883871614933 2023-01-22 18:05:26.864480: step: 1448/531, loss: 1.5943909659199562e-07 2023-01-22 18:05:27.927380: step: 1452/531, loss: 0.0035977144725620747 2023-01-22 18:05:29.000414: step: 1456/531, loss: 4.213129432173446e-05 2023-01-22 18:05:30.073706: step: 1460/531, loss: 0.0005724129732698202 2023-01-22 18:05:31.123274: step: 1464/531, loss: 0.00028577656485140324 2023-01-22 18:05:32.200718: step: 1468/531, loss: 0.0008289706311188638 2023-01-22 18:05:33.260758: step: 1472/531, loss: 0.0010570675367489457 2023-01-22 18:05:34.323645: step: 1476/531, loss: 0.0035019528586417437 2023-01-22 18:05:35.383872: step: 1480/531, loss: 0.00019357156998012215 2023-01-22 18:05:36.444465: step: 1484/531, loss: 4.4373598939273506e-05 2023-01-22 18:05:37.516669: step: 1488/531, loss: 0.0016001868061721325 2023-01-22 18:05:38.580874: step: 1492/531, loss: 0.00736919604241848 2023-01-22 18:05:39.637609: step: 1496/531, loss: 0.00026503155822865665 2023-01-22 18:05:40.704416: step: 1500/531, loss: 0.0032672889064997435 2023-01-22 18:05:41.741368: step: 1504/531, loss: 0.00042652423144318163 2023-01-22 18:05:42.836359: step: 1508/531, loss: 0.00013084665988571942 2023-01-22 18:05:43.898665: step: 1512/531, loss: 0.0016687301686033607 2023-01-22 18:05:44.955112: step: 1516/531, loss: 0.0005247325170785189 2023-01-22 18:05:46.024214: step: 1520/531, loss: 0.0005717718740925193 2023-01-22 18:05:47.062354: step: 1524/531, loss: 0.0003974206920247525 2023-01-22 18:05:48.143181: step: 1528/531, loss: 0.010677206330001354 2023-01-22 18:05:49.209593: step: 1532/531, loss: 0.0005944393342360854 2023-01-22 18:05:50.251373: step: 1536/531, loss: 0.00022962574439588934 2023-01-22 18:05:51.298605: step: 1540/531, loss: 4.392466144054197e-05 2023-01-22 18:05:52.366465: step: 1544/531, loss: 0.012663129717111588 2023-01-22 18:05:53.412325: step: 1548/531, loss: 0.0001365838834317401 2023-01-22 18:05:54.478006: step: 1552/531, loss: 0.000226149961235933 2023-01-22 18:05:55.544601: step: 1556/531, loss: 0.004185952711850405 2023-01-22 18:05:56.600950: step: 1560/531, loss: 0.0013196436921134591 2023-01-22 18:05:57.665418: step: 1564/531, loss: 0.018030652776360512 2023-01-22 18:05:58.738421: step: 1568/531, loss: 0.0003566597297322005 2023-01-22 18:05:59.796649: step: 1572/531, loss: 0.0013386617647483945 2023-01-22 18:06:00.852687: step: 1576/531, loss: 9.93379217106849e-05 2023-01-22 18:06:01.929036: step: 1580/531, loss: 0.009382509626448154 2023-01-22 18:06:02.996881: step: 1584/531, loss: 0.0003657048800960183 2023-01-22 18:06:04.052271: step: 1588/531, loss: 0.03480656072497368 2023-01-22 18:06:05.112898: step: 1592/531, loss: 0.0008531854837201536 2023-01-22 18:06:06.181319: step: 1596/531, loss: 0.00010890130943153054 2023-01-22 18:06:07.234082: step: 1600/531, loss: 0.004324667621403933 2023-01-22 18:06:08.288146: step: 1604/531, loss: 0.0058302986435592175 2023-01-22 18:06:09.355732: step: 1608/531, loss: 0.0008046038565225899 2023-01-22 18:06:10.424957: step: 1612/531, loss: 4.122814971196931e-06 2023-01-22 18:06:11.498514: step: 1616/531, loss: 0.0021914870012551546 2023-01-22 18:06:12.593641: step: 1620/531, loss: 0.005860176868736744 2023-01-22 18:06:13.650764: step: 1624/531, loss: 7.749860742478631e-06 2023-01-22 18:06:14.709078: step: 1628/531, loss: 0.00022379022266250104 2023-01-22 18:06:15.758695: step: 1632/531, loss: 0.00016036475426517427 2023-01-22 18:06:16.821559: step: 1636/531, loss: 0.002189396182075143 2023-01-22 18:06:17.881049: step: 1640/531, loss: 0.0010613832855597138 2023-01-22 18:06:18.969003: step: 1644/531, loss: 0.0018733479082584381 2023-01-22 18:06:20.030536: step: 1648/531, loss: 0.002693586517125368 2023-01-22 18:06:21.094423: step: 1652/531, loss: 0.024549435824155807 2023-01-22 18:06:22.163445: step: 1656/531, loss: 0.00792443286627531 2023-01-22 18:06:23.223660: step: 1660/531, loss: 0.0005029304884374142 2023-01-22 18:06:24.287040: step: 1664/531, loss: 6.837258297309745e-06 2023-01-22 18:06:25.355984: step: 1668/531, loss: 0.007883038371801376 2023-01-22 18:06:26.404796: step: 1672/531, loss: 6.306728027993813e-05 2023-01-22 18:06:27.461111: step: 1676/531, loss: 0.0035531676840037107 2023-01-22 18:06:28.529957: step: 1680/531, loss: 0.004806555807590485 2023-01-22 18:06:29.582914: step: 1684/531, loss: 0.0003428776399232447 2023-01-22 18:06:30.638249: step: 1688/531, loss: 0.0005172319361008704 2023-01-22 18:06:31.700423: step: 1692/531, loss: 0.010027148760855198 2023-01-22 18:06:32.757504: step: 1696/531, loss: 0.002648507244884968 2023-01-22 18:06:33.820308: step: 1700/531, loss: 0.0002148015919374302 2023-01-22 18:06:34.874257: step: 1704/531, loss: 0.0025486743543297052 2023-01-22 18:06:35.934538: step: 1708/531, loss: 0.0046396031975746155 2023-01-22 18:06:36.989115: step: 1712/531, loss: 0.012598443776369095 2023-01-22 18:06:38.035170: step: 1716/531, loss: 4.2714276560218423e-07 2023-01-22 18:06:39.092022: step: 1720/531, loss: 0.0010781065793707967 2023-01-22 18:06:40.168779: step: 1724/531, loss: 0.0017423108220100403 2023-01-22 18:06:41.220656: step: 1728/531, loss: 0.004140028264373541 2023-01-22 18:06:42.275889: step: 1732/531, loss: 0.0035397133324295282 2023-01-22 18:06:43.327737: step: 1736/531, loss: 0.00980967003852129 2023-01-22 18:06:44.397712: step: 1740/531, loss: 0.0005589200300164521 2023-01-22 18:06:45.474026: step: 1744/531, loss: 0.002013190882280469 2023-01-22 18:06:46.523271: step: 1748/531, loss: 0.010056786239147186 2023-01-22 18:06:47.574959: step: 1752/531, loss: 0.006478422787040472 2023-01-22 18:06:48.621648: step: 1756/531, loss: 0.002787259640172124 2023-01-22 18:06:49.681245: step: 1760/531, loss: 0.00417192792519927 2023-01-22 18:06:50.738148: step: 1764/531, loss: 0.00402164738625288 2023-01-22 18:06:51.794030: step: 1768/531, loss: 0.0067492881789803505 2023-01-22 18:06:52.862566: step: 1772/531, loss: 0.0014811003347858787 2023-01-22 18:06:53.924624: step: 1776/531, loss: 0.004483489785343409 2023-01-22 18:06:54.998202: step: 1780/531, loss: 0.0004699307319242507 2023-01-22 18:06:56.060225: step: 1784/531, loss: 0.0035550526808947325 2023-01-22 18:06:57.124817: step: 1788/531, loss: 0.007740124128758907 2023-01-22 18:06:58.189247: step: 1792/531, loss: 0.0007996691856533289 2023-01-22 18:06:59.248217: step: 1796/531, loss: 0.001542242942377925 2023-01-22 18:07:00.307247: step: 1800/531, loss: 0.0018089384539052844 2023-01-22 18:07:01.362792: step: 1804/531, loss: 0.005222752224653959 2023-01-22 18:07:02.411965: step: 1808/531, loss: 0.0006715737981721759 2023-01-22 18:07:03.477417: step: 1812/531, loss: 0.00043306872248649597 2023-01-22 18:07:04.527380: step: 1816/531, loss: 0.015228058211505413 2023-01-22 18:07:05.604464: step: 1820/531, loss: 0.05320816859602928 2023-01-22 18:07:06.653721: step: 1824/531, loss: 0.0073781306855380535 2023-01-22 18:07:07.723403: step: 1828/531, loss: 0.001989522948861122 2023-01-22 18:07:08.779625: step: 1832/531, loss: 0.004696752410382032 2023-01-22 18:07:09.864196: step: 1836/531, loss: 0.007823620922863483 2023-01-22 18:07:10.927699: step: 1840/531, loss: 0.015773268416523933 2023-01-22 18:07:11.987204: step: 1844/531, loss: 0.007586033083498478 2023-01-22 18:07:13.050688: step: 1848/531, loss: 1.4352395737660117e-05 2023-01-22 18:07:14.101242: step: 1852/531, loss: 0.0043116421438753605 2023-01-22 18:07:15.168986: step: 1856/531, loss: 0.00011105309386039153 2023-01-22 18:07:16.229302: step: 1860/531, loss: 0.0035492845345288515 2023-01-22 18:07:17.298531: step: 1864/531, loss: 0.0015730762388557196 2023-01-22 18:07:18.349667: step: 1868/531, loss: 0.0020741065964102745 2023-01-22 18:07:19.408179: step: 1872/531, loss: 0.0028160004876554012 2023-01-22 18:07:20.471923: step: 1876/531, loss: 0.0015461607836186886 2023-01-22 18:07:21.535127: step: 1880/531, loss: 0.0020947048906236887 2023-01-22 18:07:22.609612: step: 1884/531, loss: 0.0017362883081659675 2023-01-22 18:07:23.675756: step: 1888/531, loss: 0.004290349315851927 2023-01-22 18:07:24.726224: step: 1892/531, loss: 0.0025052495766431093 2023-01-22 18:07:25.785943: step: 1896/531, loss: 0.000587494345381856 2023-01-22 18:07:26.838339: step: 1900/531, loss: 0.0010572696337476373 2023-01-22 18:07:27.892535: step: 1904/531, loss: 0.0003671610029414296 2023-01-22 18:07:28.954496: step: 1908/531, loss: 0.0027554002590477467 2023-01-22 18:07:30.007610: step: 1912/531, loss: 1.1026335414499044e-05 2023-01-22 18:07:31.063138: step: 1916/531, loss: 0.0021668020635843277 2023-01-22 18:07:32.155127: step: 1920/531, loss: 0.008678543381392956 2023-01-22 18:07:33.227124: step: 1924/531, loss: 0.021237801760435104 2023-01-22 18:07:34.305843: step: 1928/531, loss: 0.024778995662927628 2023-01-22 18:07:35.359612: step: 1932/531, loss: 0.008778535760939121 2023-01-22 18:07:36.435161: step: 1936/531, loss: 0.011332480236887932 2023-01-22 18:07:37.525092: step: 1940/531, loss: 0.010019216686487198 2023-01-22 18:07:38.576236: step: 1944/531, loss: 0.0024461084976792336 2023-01-22 18:07:39.631054: step: 1948/531, loss: 0.001441691885702312 2023-01-22 18:07:40.690174: step: 1952/531, loss: 0.008496014401316643 2023-01-22 18:07:41.750241: step: 1956/531, loss: 0.05084388330578804 2023-01-22 18:07:42.837157: step: 1960/531, loss: 0.00012086767674190924 2023-01-22 18:07:43.885381: step: 1964/531, loss: 2.970803325297311e-05 2023-01-22 18:07:44.950672: step: 1968/531, loss: 0.0028667813166975975 2023-01-22 18:07:46.013724: step: 1972/531, loss: 0.0031741890124976635 2023-01-22 18:07:47.081367: step: 1976/531, loss: 0.0019283414585515857 2023-01-22 18:07:48.134997: step: 1980/531, loss: 0.003965300507843494 2023-01-22 18:07:49.194960: step: 1984/531, loss: 0.012069007381796837 2023-01-22 18:07:50.253645: step: 1988/531, loss: 0.0005573823000304401 2023-01-22 18:07:51.306897: step: 1992/531, loss: 0.005476301070302725 2023-01-22 18:07:52.379950: step: 1996/531, loss: 0.0022730515338480473 2023-01-22 18:07:53.435034: step: 2000/531, loss: 0.0007104698452167213 2023-01-22 18:07:54.500442: step: 2004/531, loss: 0.0003665283729787916 2023-01-22 18:07:55.566555: step: 2008/531, loss: 0.0004408737877383828 2023-01-22 18:07:56.615320: step: 2012/531, loss: 0.0044172462075948715 2023-01-22 18:07:57.677641: step: 2016/531, loss: 0.002731681102886796 2023-01-22 18:07:58.739214: step: 2020/531, loss: 0.0013712862273678184 2023-01-22 18:07:59.788478: step: 2024/531, loss: 0.0017018537037074566 2023-01-22 18:08:00.846828: step: 2028/531, loss: 0.017987927421927452 2023-01-22 18:08:01.914662: step: 2032/531, loss: 0.0010171117028221488 2023-01-22 18:08:02.980060: step: 2036/531, loss: 2.4533940177207114e-06 2023-01-22 18:08:04.037278: step: 2040/531, loss: 0.0029998093377798796 2023-01-22 18:08:05.118006: step: 2044/531, loss: 0.0025358027778565884 2023-01-22 18:08:06.167782: step: 2048/531, loss: 0.0006947174551896751 2023-01-22 18:08:07.226717: step: 2052/531, loss: 0.0024698807392269373 2023-01-22 18:08:08.286939: step: 2056/531, loss: 0.0033368999138474464 2023-01-22 18:08:09.353291: step: 2060/531, loss: 0.005847891326993704 2023-01-22 18:08:10.421072: step: 2064/531, loss: 0.002655237214639783 2023-01-22 18:08:11.494998: step: 2068/531, loss: 1.0428493624203838e-05 2023-01-22 18:08:12.610462: step: 2072/531, loss: 0.0006189014529809356 2023-01-22 18:08:13.684537: step: 2076/531, loss: 0.008097605779767036 2023-01-22 18:08:14.748527: step: 2080/531, loss: 0.0005701733171008527 2023-01-22 18:08:15.810599: step: 2084/531, loss: 0.0013097748160362244 2023-01-22 18:08:16.862425: step: 2088/531, loss: 0.003954808693379164 2023-01-22 18:08:17.930397: step: 2092/531, loss: 0.0138529809191823 2023-01-22 18:08:18.988235: step: 2096/531, loss: 7.44784192647785e-05 2023-01-22 18:08:20.037171: step: 2100/531, loss: 0.008731680922210217 2023-01-22 18:08:21.118024: step: 2104/531, loss: 0.004643022548407316 2023-01-22 18:08:22.171136: step: 2108/531, loss: 0.0021834643557667732 2023-01-22 18:08:23.225785: step: 2112/531, loss: 0.0004960486548952758 2023-01-22 18:08:24.306859: step: 2116/531, loss: 0.0011990171624347568 2023-01-22 18:08:25.361530: step: 2120/531, loss: 0.0029714652337133884 2023-01-22 18:08:26.410513: step: 2124/531, loss: 0.004600778222084045 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3539095188284519, 'r': 0.3203953598484849, 'f1': 0.33631958250497024}, 'combined': 0.2478144292141886, 'stategy': 1, 'epoch': 17} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33641722772720156, 'r': 0.2747812193981204, 'f1': 0.3024913970125359}, 'combined': 0.18941050093308326, 'stategy': 1, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3291432469859889, 'r': 0.348504614455753, 'f1': 0.33854733975701723}, 'combined': 0.24945593455780216, 'stategy': 1, 'epoch': 17} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36038469942455964, 'r': 0.2986416039415422, 'f1': 0.32662084691436916}, 'combined': 0.2024129192145386, 'stategy': 1, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31474417892156864, 'r': 0.32489721695129664, 'f1': 0.31974011826953}, 'combined': 0.23559798188281159, 'stategy': 1, 'epoch': 17} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36425055619721297, 'r': 0.2859383318296098, 'f1': 0.32037827260665797}, 'combined': 0.21252816103609984, 'stategy': 1, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37962962962962965, 'r': 0.44565217391304346, 'f1': 0.41000000000000003}, 'combined': 0.20500000000000002, 'stategy': 1, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'stategy': 1, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35765048712595693, 'r': 0.32445943813131317, 'f1': 0.34024743462429663}, 'combined': 0.2507086360389554, 'stategy': 1, 'epoch': 15} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33492632906351627, 'r': 0.27658627354974497, 'f1': 0.3029733970273034}, 'combined': 0.18971231402644234, 'stategy': 1, 'epoch': 15} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 15} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33436839323467227, 'r': 0.3546715973779541, 'f1': 0.34422086891009546}, 'combined': 0.2536364297232282, 'stategy': 1, 'epoch': 5} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36411638395148305, 'r': 0.30337559853126267, 'f1': 0.3309823303208759}, 'combined': 0.2051158103396977, 'stategy': 1, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37962962962962965, 'r': 0.44565217391304346, 'f1': 0.41000000000000003}, 'combined': 0.20500000000000002, 'stategy': 1, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.316460244648318, 'r': 0.32726913345983555, 'f1': 0.32177394278606963}, 'combined': 0.23709658942131445, 'stategy': 1, 'epoch': 5} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3637270714036404, 'r': 0.28684167419636686, 'f1': 0.32074114478321014}, 'combined': 0.21276887822252555, 'stategy': 1, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.27586206896551724, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 5} ****************************** Epoch: 18 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 18:10:56.375948: step: 4/531, loss: 0.016485828906297684 2023-01-22 18:10:57.437796: step: 8/531, loss: 0.0001644348376430571 2023-01-22 18:10:58.488849: step: 12/531, loss: 0.0032943799160420895 2023-01-22 18:10:59.559499: step: 16/531, loss: 0.0008822003728710115 2023-01-22 18:11:00.600244: step: 20/531, loss: 0.0029280143789947033 2023-01-22 18:11:01.646418: step: 24/531, loss: 0.00039174058474600315 2023-01-22 18:11:02.709966: step: 28/531, loss: 0.0027231110725551844 2023-01-22 18:11:03.759073: step: 32/531, loss: 0.0023588051553815603 2023-01-22 18:11:04.817577: step: 36/531, loss: 0.006672924850136042 2023-01-22 18:11:05.904633: step: 40/531, loss: 0.0027809622697532177 2023-01-22 18:11:06.987029: step: 44/531, loss: 0.0018094608094543219 2023-01-22 18:11:08.034800: step: 48/531, loss: 0.001995962345972657 2023-01-22 18:11:09.094000: step: 52/531, loss: 0.005635008215904236 2023-01-22 18:11:10.157082: step: 56/531, loss: 0.00018772840849123895 2023-01-22 18:11:11.220446: step: 60/531, loss: 0.008437195792794228 2023-01-22 18:11:12.282924: step: 64/531, loss: 0.001004083314910531 2023-01-22 18:11:13.348675: step: 68/531, loss: 0.003171943360939622 2023-01-22 18:11:14.410236: step: 72/531, loss: 0.0030991339590400457 2023-01-22 18:11:15.454884: step: 76/531, loss: 0.0003899155417457223 2023-01-22 18:11:16.515862: step: 80/531, loss: 0.002102409955114126 2023-01-22 18:11:17.591079: step: 84/531, loss: 0.0008900128304958344 2023-01-22 18:11:18.644909: step: 88/531, loss: 0.002701554913073778 2023-01-22 18:11:19.708920: step: 92/531, loss: 0.0028115096502006054 2023-01-22 18:11:20.755384: step: 96/531, loss: 0.0022069918923079967 2023-01-22 18:11:21.822281: step: 100/531, loss: 0.00035266514169052243 2023-01-22 18:11:22.867672: step: 104/531, loss: 0.0002775506000034511 2023-01-22 18:11:23.921508: step: 108/531, loss: 0.014359908178448677 2023-01-22 18:11:24.982795: step: 112/531, loss: 0.000952446716837585 2023-01-22 18:11:26.042045: step: 116/531, loss: 0.0006368002505041659 2023-01-22 18:11:27.100113: step: 120/531, loss: 0.002198499860242009 2023-01-22 18:11:28.145339: step: 124/531, loss: 0.002797940978780389 2023-01-22 18:11:29.197444: step: 128/531, loss: 0.0030710017308592796 2023-01-22 18:11:30.254160: step: 132/531, loss: 4.771336080011679e-06 2023-01-22 18:11:31.311524: step: 136/531, loss: 0.00029019967769272625 2023-01-22 18:11:32.379424: step: 140/531, loss: 0.0019449201645329595 2023-01-22 18:11:33.439655: step: 144/531, loss: 0.006696322467178106 2023-01-22 18:11:34.512861: step: 148/531, loss: 0.003324989229440689 2023-01-22 18:11:35.584160: step: 152/531, loss: 0.0005718033644370735 2023-01-22 18:11:36.657491: step: 156/531, loss: 0.0011478849919512868 2023-01-22 18:11:37.740221: step: 160/531, loss: 0.002606028225272894 2023-01-22 18:11:38.791015: step: 164/531, loss: 4.181836629868485e-05 2023-01-22 18:11:39.843691: step: 168/531, loss: 1.4137232938082889e-05 2023-01-22 18:11:40.896114: step: 172/531, loss: 1.2221642009535572e-06 2023-01-22 18:11:41.961105: step: 176/531, loss: 0.0018177279271185398 2023-01-22 18:11:43.020580: step: 180/531, loss: 2.3236299966811202e-05 2023-01-22 18:11:44.082318: step: 184/531, loss: 0.0013298456324264407 2023-01-22 18:11:45.146409: step: 188/531, loss: 0.000456444249721244 2023-01-22 18:11:46.206491: step: 192/531, loss: 0.00041851933929137886 2023-01-22 18:11:47.287494: step: 196/531, loss: 0.0007842280901968479 2023-01-22 18:11:48.343207: step: 200/531, loss: 0.0040849014185369015 2023-01-22 18:11:49.415288: step: 204/531, loss: 0.0035691324155777693 2023-01-22 18:11:50.479588: step: 208/531, loss: 0.004006324801594019 2023-01-22 18:11:51.519833: step: 212/531, loss: 0.00240842392668128 2023-01-22 18:11:52.575410: step: 216/531, loss: 0.0002463726559653878 2023-01-22 18:11:53.627178: step: 220/531, loss: 0.041387658566236496 2023-01-22 18:11:54.675022: step: 224/531, loss: 0.0015840086853131652 2023-01-22 18:11:55.732336: step: 228/531, loss: 0.0018198407487943769 2023-01-22 18:11:56.773532: step: 232/531, loss: 6.201847281772643e-05 2023-01-22 18:11:57.824538: step: 236/531, loss: 0.004232801962643862 2023-01-22 18:11:58.872562: step: 240/531, loss: 0.00021786605066154152 2023-01-22 18:11:59.909706: step: 244/531, loss: 0.00037951269769109786 2023-01-22 18:12:00.974972: step: 248/531, loss: 0.0005722696660086513 2023-01-22 18:12:02.045346: step: 252/531, loss: 0.0009998573223128915 2023-01-22 18:12:03.110051: step: 256/531, loss: 0.0014447090215981007 2023-01-22 18:12:04.176606: step: 260/531, loss: 0.0121343694627285 2023-01-22 18:12:05.251668: step: 264/531, loss: 0.0067059798166155815 2023-01-22 18:12:06.306690: step: 268/531, loss: 0.003779573366045952 2023-01-22 18:12:07.368772: step: 272/531, loss: 0.0005448832525871694 2023-01-22 18:12:08.417050: step: 276/531, loss: 0.0028100148774683475 2023-01-22 18:12:09.469130: step: 280/531, loss: 0.0024854757357388735 2023-01-22 18:12:10.518316: step: 284/531, loss: 0.0022600931115448475 2023-01-22 18:12:11.572620: step: 288/531, loss: 6.12479925621301e-05 2023-01-22 18:12:12.625739: step: 292/531, loss: 5.4878051741980016e-05 2023-01-22 18:12:13.676253: step: 296/531, loss: 8.964359949459322e-06 2023-01-22 18:12:14.743669: step: 300/531, loss: 5.137475091032684e-05 2023-01-22 18:12:15.793071: step: 304/531, loss: 0.0005710149416700006 2023-01-22 18:12:16.853601: step: 308/531, loss: 0.0034550626296550035 2023-01-22 18:12:17.899153: step: 312/531, loss: 0.004638838581740856 2023-01-22 18:12:18.954046: step: 316/531, loss: 0.00041261533624492586 2023-01-22 18:12:20.010872: step: 320/531, loss: 0.0012044655159115791 2023-01-22 18:12:21.060870: step: 324/531, loss: 0.00028150342404842377 2023-01-22 18:12:22.131546: step: 328/531, loss: 0.003794601419940591 2023-01-22 18:12:23.187887: step: 332/531, loss: 6.865726027172059e-05 2023-01-22 18:12:24.239953: step: 336/531, loss: 0.0026643630117177963 2023-01-22 18:12:25.291313: step: 340/531, loss: 0.00249934708699584 2023-01-22 18:12:26.337656: step: 344/531, loss: 0.0005734206060878932 2023-01-22 18:12:27.367909: step: 348/531, loss: 0.0014442644314840436 2023-01-22 18:12:28.419437: step: 352/531, loss: 0.015320328064262867 2023-01-22 18:12:29.475055: step: 356/531, loss: 0.006696663331240416 2023-01-22 18:12:30.547225: step: 360/531, loss: 0.00047120271483436227 2023-01-22 18:12:31.617360: step: 364/531, loss: 0.033359427005052567 2023-01-22 18:12:32.688171: step: 368/531, loss: 0.0034414585679769516 2023-01-22 18:12:33.752467: step: 372/531, loss: 0.0010318740969523787 2023-01-22 18:12:34.824196: step: 376/531, loss: 0.0027982026804238558 2023-01-22 18:12:35.883122: step: 380/531, loss: 0.0008664476918056607 2023-01-22 18:12:36.947860: step: 384/531, loss: 1.4043931741980487e-06 2023-01-22 18:12:38.011549: step: 388/531, loss: 0.0019351415103301406 2023-01-22 18:12:39.070412: step: 392/531, loss: 0.0014457189245149493 2023-01-22 18:12:40.128217: step: 396/531, loss: 0.002615431323647499 2023-01-22 18:12:41.194689: step: 400/531, loss: 0.0014914002967998385 2023-01-22 18:12:42.250226: step: 404/531, loss: 0.0005335441092029214 2023-01-22 18:12:43.305931: step: 408/531, loss: 0.00021663724328391254 2023-01-22 18:12:44.355699: step: 412/531, loss: 0.0002839644730556756 2023-01-22 18:12:45.423934: step: 416/531, loss: 0.0017663523321971297 2023-01-22 18:12:46.473302: step: 420/531, loss: 2.42879286815878e-05 2023-01-22 18:12:47.524152: step: 424/531, loss: 0.00043031765380874276 2023-01-22 18:12:48.579165: step: 428/531, loss: 0.005618552211672068 2023-01-22 18:12:49.635221: step: 432/531, loss: 0.0016697110841050744 2023-01-22 18:12:50.703783: step: 436/531, loss: 0.0019035993609577417 2023-01-22 18:12:51.745353: step: 440/531, loss: 0.0006512982654385269 2023-01-22 18:12:52.796165: step: 444/531, loss: 0.0023728464730083942 2023-01-22 18:12:53.838414: step: 448/531, loss: 1.1175869119028903e-08 2023-01-22 18:12:54.898472: step: 452/531, loss: 0.009548933245241642 2023-01-22 18:12:55.952438: step: 456/531, loss: 0.015602135099470615 2023-01-22 18:12:56.999590: step: 460/531, loss: 0.0004088141140528023 2023-01-22 18:12:58.056530: step: 464/531, loss: 0.000493998930323869 2023-01-22 18:12:59.149460: step: 468/531, loss: 0.0002824018301907927 2023-01-22 18:13:00.215170: step: 472/531, loss: 0.0005933383945375681 2023-01-22 18:13:01.261143: step: 476/531, loss: 0.00015543719928245991 2023-01-22 18:13:02.317433: step: 480/531, loss: 0.002169729443266988 2023-01-22 18:13:03.388942: step: 484/531, loss: 0.0018769189482554793 2023-01-22 18:13:04.430968: step: 488/531, loss: 2.4835267176115394e-09 2023-01-22 18:13:05.482609: step: 492/531, loss: 0.0009975541615858674 2023-01-22 18:13:06.535551: step: 496/531, loss: 0.0008550878264941275 2023-01-22 18:13:07.581140: step: 500/531, loss: 3.923714757547714e-05 2023-01-22 18:13:08.631232: step: 504/531, loss: 7.415201707772212e-06 2023-01-22 18:13:09.670725: step: 508/531, loss: 0.0014785407111048698 2023-01-22 18:13:10.723543: step: 512/531, loss: 0.0010507397819310427 2023-01-22 18:13:11.799893: step: 516/531, loss: 0.005640383344143629 2023-01-22 18:13:12.866452: step: 520/531, loss: 0.004614170640707016 2023-01-22 18:13:13.932246: step: 524/531, loss: 0.0008273420971818268 2023-01-22 18:13:15.000892: step: 528/531, loss: 0.0031886552460491657 2023-01-22 18:13:16.063402: step: 532/531, loss: 0.0012883199378848076 2023-01-22 18:13:17.113173: step: 536/531, loss: 0.0026274370029568672 2023-01-22 18:13:18.153608: step: 540/531, loss: 0.00024105983902700245 2023-01-22 18:13:19.229396: step: 544/531, loss: 0.0007367110229097307 2023-01-22 18:13:20.282286: step: 548/531, loss: 0.00029140946571715176 2023-01-22 18:13:21.337799: step: 552/531, loss: 0.001001829863525927 2023-01-22 18:13:22.391135: step: 556/531, loss: 0.00143907917663455 2023-01-22 18:13:23.453194: step: 560/531, loss: 2.2807764253229834e-05 2023-01-22 18:13:24.517796: step: 564/531, loss: 0.005439003463834524 2023-01-22 18:13:25.571099: step: 568/531, loss: 0.002165445825085044 2023-01-22 18:13:26.626385: step: 572/531, loss: 0.00012455484829843044 2023-01-22 18:13:27.679611: step: 576/531, loss: 2.535478870413499e-06 2023-01-22 18:13:28.744892: step: 580/531, loss: 0.006315668113529682 2023-01-22 18:13:29.813133: step: 584/531, loss: 6.806410056015011e-06 2023-01-22 18:13:30.877706: step: 588/531, loss: 0.0012539697345346212 2023-01-22 18:13:31.941329: step: 592/531, loss: 0.0012168773682788014 2023-01-22 18:13:32.994313: step: 596/531, loss: 0.00023015595797915012 2023-01-22 18:13:34.031247: step: 600/531, loss: 3.5704444599105045e-05 2023-01-22 18:13:35.103252: step: 604/531, loss: 0.00023498994414694607 2023-01-22 18:13:36.146980: step: 608/531, loss: 0.0012686975533142686 2023-01-22 18:13:37.203342: step: 612/531, loss: 0.0016248218016698956 2023-01-22 18:13:38.258254: step: 616/531, loss: 0.00021879211999475956 2023-01-22 18:13:39.325116: step: 620/531, loss: 0.0003992653510067612 2023-01-22 18:13:40.373355: step: 624/531, loss: 0.0003639210481196642 2023-01-22 18:13:41.413859: step: 628/531, loss: 0.0015600253827869892 2023-01-22 18:13:42.469050: step: 632/531, loss: 1.0926994036708493e-05 2023-01-22 18:13:43.538366: step: 636/531, loss: 0.01515612006187439 2023-01-22 18:13:44.620385: step: 640/531, loss: 0.005097487010061741 2023-01-22 18:13:45.688887: step: 644/531, loss: 0.007839183323085308 2023-01-22 18:13:46.754441: step: 648/531, loss: 0.004948712885379791 2023-01-22 18:13:47.817642: step: 652/531, loss: 0.0019577748607844114 2023-01-22 18:13:48.886455: step: 656/531, loss: 0.013227894902229309 2023-01-22 18:13:49.956300: step: 660/531, loss: 1.710490687401034e-05 2023-01-22 18:13:51.021652: step: 664/531, loss: 0.00035234802635386586 2023-01-22 18:13:52.084831: step: 668/531, loss: 0.0025198820512741804 2023-01-22 18:13:53.160249: step: 672/531, loss: 0.0035180984996259212 2023-01-22 18:13:54.243976: step: 676/531, loss: 0.0017515599029138684 2023-01-22 18:13:55.299938: step: 680/531, loss: 0.00039060713606886566 2023-01-22 18:13:56.353318: step: 684/531, loss: 0.0002828391152434051 2023-01-22 18:13:57.415635: step: 688/531, loss: 0.0008307491079904139 2023-01-22 18:13:58.483343: step: 692/531, loss: 0.006182726472616196 2023-01-22 18:13:59.546799: step: 696/531, loss: 0.00020765890076290816 2023-01-22 18:14:00.598864: step: 700/531, loss: 0.004026591777801514 2023-01-22 18:14:01.653352: step: 704/531, loss: 0.004802176728844643 2023-01-22 18:14:02.717216: step: 708/531, loss: 0.004436550661921501 2023-01-22 18:14:03.770917: step: 712/531, loss: 0.007426182273775339 2023-01-22 18:14:04.828111: step: 716/531, loss: 0.002403817605227232 2023-01-22 18:14:05.904479: step: 720/531, loss: 0.004813516512513161 2023-01-22 18:14:06.970828: step: 724/531, loss: 0.001845165272243321 2023-01-22 18:14:08.030381: step: 728/531, loss: 0.003007540013641119 2023-01-22 18:14:09.080012: step: 732/531, loss: 0.0023156744427978992 2023-01-22 18:14:10.135959: step: 736/531, loss: 3.6970041037420742e-06 2023-01-22 18:14:11.192532: step: 740/531, loss: 0.00021665760141331702 2023-01-22 18:14:12.253169: step: 744/531, loss: 0.0005345075041987002 2023-01-22 18:14:13.316628: step: 748/531, loss: 0.0135061489418149 2023-01-22 18:14:14.379094: step: 752/531, loss: 0.00031829887302592397 2023-01-22 18:14:15.431337: step: 756/531, loss: 0.00023274790146388113 2023-01-22 18:14:16.477933: step: 760/531, loss: 0.0020559036638587713 2023-01-22 18:14:17.549185: step: 764/531, loss: 0.004413450602442026 2023-01-22 18:14:18.629963: step: 768/531, loss: 0.0019967348780483007 2023-01-22 18:14:19.693439: step: 772/531, loss: 0.006783493794500828 2023-01-22 18:14:20.767785: step: 776/531, loss: 7.250224734889343e-05 2023-01-22 18:14:21.813398: step: 780/531, loss: 1.827925007091835e-05 2023-01-22 18:14:22.867133: step: 784/531, loss: 2.1787996956845745e-05 2023-01-22 18:14:23.932410: step: 788/531, loss: 0.006557614076882601 2023-01-22 18:14:25.001935: step: 792/531, loss: 0.00016529702406842262 2023-01-22 18:14:26.055678: step: 796/531, loss: 0.0001591194304637611 2023-01-22 18:14:27.114212: step: 800/531, loss: 0.00013501947978511453 2023-01-22 18:14:28.157840: step: 804/531, loss: 0.0037079069297760725 2023-01-22 18:14:29.206466: step: 808/531, loss: 0.0014649454969912767 2023-01-22 18:14:30.267529: step: 812/531, loss: 0.0010747892083600163 2023-01-22 18:14:31.337327: step: 816/531, loss: 0.0015955495182424784 2023-01-22 18:14:32.386170: step: 820/531, loss: 0.0004603270790539682 2023-01-22 18:14:33.445586: step: 824/531, loss: 0.0008048953604884446 2023-01-22 18:14:34.491991: step: 828/531, loss: 0.004007001873105764 2023-01-22 18:14:35.550062: step: 832/531, loss: 0.00034805593895725906 2023-01-22 18:14:36.610251: step: 836/531, loss: 6.917065911693498e-05 2023-01-22 18:14:37.662942: step: 840/531, loss: 0.0018708569696173072 2023-01-22 18:14:38.736207: step: 844/531, loss: 0.0006993492715992033 2023-01-22 18:14:39.784264: step: 848/531, loss: 0.000982363591901958 2023-01-22 18:14:40.873829: step: 852/531, loss: 9.765825780050363e-06 2023-01-22 18:14:41.938840: step: 856/531, loss: 0.0003236057236790657 2023-01-22 18:14:42.995540: step: 860/531, loss: 0.0009666095720604062 2023-01-22 18:14:44.053581: step: 864/531, loss: 0.00017323960491921753 2023-01-22 18:14:45.106475: step: 868/531, loss: 0.00047029071720317006 2023-01-22 18:14:46.168074: step: 872/531, loss: 0.00013420723553281277 2023-01-22 18:14:47.237752: step: 876/531, loss: 0.0011368688428774476 2023-01-22 18:14:48.280136: step: 880/531, loss: 0.003005849663168192 2023-01-22 18:14:49.342347: step: 884/531, loss: 0.004738184157758951 2023-01-22 18:14:50.417925: step: 888/531, loss: 0.0026863841339945793 2023-01-22 18:14:51.470922: step: 892/531, loss: 0.0027179387398064137 2023-01-22 18:14:52.540271: step: 896/531, loss: 0.0030021655838936567 2023-01-22 18:14:53.610952: step: 900/531, loss: 0.0075303916819393635 2023-01-22 18:14:54.679565: step: 904/531, loss: 0.005289142020046711 2023-01-22 18:14:55.736017: step: 908/531, loss: 0.002549083437770605 2023-01-22 18:14:56.791155: step: 912/531, loss: 0.000973164162132889 2023-01-22 18:14:57.845020: step: 916/531, loss: 0.00010077129991259426 2023-01-22 18:14:58.932192: step: 920/531, loss: 0.009604857303202152 2023-01-22 18:14:59.997739: step: 924/531, loss: 0.004010828677564859 2023-01-22 18:15:01.050247: step: 928/531, loss: 0.0017199756111949682 2023-01-22 18:15:02.125645: step: 932/531, loss: 0.008867351338267326 2023-01-22 18:15:03.175620: step: 936/531, loss: 0.00048212322872132063 2023-01-22 18:15:04.232875: step: 940/531, loss: 0.0010061763459816575 2023-01-22 18:15:05.288991: step: 944/531, loss: 0.00029186729807406664 2023-01-22 18:15:06.353210: step: 948/531, loss: 0.0033926840405911207 2023-01-22 18:15:07.411369: step: 952/531, loss: 0.002526672091335058 2023-01-22 18:15:08.473907: step: 956/531, loss: 3.5623535950435326e-05 2023-01-22 18:15:09.520658: step: 960/531, loss: 9.588974353391677e-05 2023-01-22 18:15:10.579980: step: 964/531, loss: 0.001444108784198761 2023-01-22 18:15:11.640657: step: 968/531, loss: 0.00039235668373294175 2023-01-22 18:15:12.697265: step: 972/531, loss: 0.002232030965387821 2023-01-22 18:15:13.781989: step: 976/531, loss: 0.0013621806865558028 2023-01-22 18:15:14.866112: step: 980/531, loss: 0.0010406405199319124 2023-01-22 18:15:15.934589: step: 984/531, loss: 5.313460496836342e-05 2023-01-22 18:15:16.989067: step: 988/531, loss: 0.0002594345423858613 2023-01-22 18:15:18.063209: step: 992/531, loss: 0.002299090614542365 2023-01-22 18:15:19.109604: step: 996/531, loss: 0.00027974246768280864 2023-01-22 18:15:20.168521: step: 1000/531, loss: 0.0873853862285614 2023-01-22 18:15:21.221251: step: 1004/531, loss: 0.0024309372529387474 2023-01-22 18:15:22.269527: step: 1008/531, loss: 0.008802449330687523 2023-01-22 18:15:23.320186: step: 1012/531, loss: 0.0009594323928467929 2023-01-22 18:15:24.390368: step: 1016/531, loss: 0.0005408779834397137 2023-01-22 18:15:25.446071: step: 1020/531, loss: 0.00037356792017817497 2023-01-22 18:15:26.510816: step: 1024/531, loss: 0.011383525095880032 2023-01-22 18:15:27.549965: step: 1028/531, loss: 8.98589423741214e-05 2023-01-22 18:15:28.612429: step: 1032/531, loss: 0.0006295870989561081 2023-01-22 18:15:29.659029: step: 1036/531, loss: 0.002829010831192136 2023-01-22 18:15:30.722493: step: 1040/531, loss: 0.0002396887430222705 2023-01-22 18:15:31.788667: step: 1044/531, loss: 5.0104642923543e-07 2023-01-22 18:15:32.848842: step: 1048/531, loss: 0.0002615940466057509 2023-01-22 18:15:33.908138: step: 1052/531, loss: 0.0038523327093571424 2023-01-22 18:15:34.954067: step: 1056/531, loss: 0.000883518485352397 2023-01-22 18:15:36.018213: step: 1060/531, loss: 0.0021896460093557835 2023-01-22 18:15:37.059163: step: 1064/531, loss: 0.0002651022805366665 2023-01-22 18:15:38.131742: step: 1068/531, loss: 0.004354636184871197 2023-01-22 18:15:39.191600: step: 1072/531, loss: 0.003977876156568527 2023-01-22 18:15:40.244093: step: 1076/531, loss: 0.008849016390740871 2023-01-22 18:15:41.311336: step: 1080/531, loss: 3.6134490073891357e-05 2023-01-22 18:15:42.384802: step: 1084/531, loss: 0.013320890255272388 2023-01-22 18:15:43.453635: step: 1088/531, loss: 7.336511043831706e-05 2023-01-22 18:15:44.519476: step: 1092/531, loss: 0.011483129113912582 2023-01-22 18:15:45.579034: step: 1096/531, loss: 0.00011668517254292965 2023-01-22 18:15:46.643472: step: 1100/531, loss: 0.00870268139988184 2023-01-22 18:15:47.699796: step: 1104/531, loss: 0.0010282648727297783 2023-01-22 18:15:48.760599: step: 1108/531, loss: 0.003967622295022011 2023-01-22 18:15:49.814807: step: 1112/531, loss: 0.002501889830455184 2023-01-22 18:15:50.888859: step: 1116/531, loss: 0.0002148712082998827 2023-01-22 18:15:51.947484: step: 1120/531, loss: 0.0019938137847930193 2023-01-22 18:15:52.999650: step: 1124/531, loss: 0.0022507677786052227 2023-01-22 18:15:54.055237: step: 1128/531, loss: 0.00032830797135829926 2023-01-22 18:15:55.122648: step: 1132/531, loss: 0.002065314445644617 2023-01-22 18:15:56.179602: step: 1136/531, loss: 3.743823981494643e-05 2023-01-22 18:15:57.238791: step: 1140/531, loss: 0.00012083940237062052 2023-01-22 18:15:58.292542: step: 1144/531, loss: 0.00525860209017992 2023-01-22 18:15:59.364718: step: 1148/531, loss: 0.0003096633590757847 2023-01-22 18:16:00.421374: step: 1152/531, loss: 0.008722452446818352 2023-01-22 18:16:01.486692: step: 1156/531, loss: 0.01146688312292099 2023-01-22 18:16:02.545732: step: 1160/531, loss: 0.0019660552497953176 2023-01-22 18:16:03.584963: step: 1164/531, loss: 0.0015725501580163836 2023-01-22 18:16:04.640999: step: 1168/531, loss: 0.0017212495440617204 2023-01-22 18:16:05.703910: step: 1172/531, loss: 0.00023194462119136006 2023-01-22 18:16:06.754210: step: 1176/531, loss: 0.0006005600444041193 2023-01-22 18:16:07.818815: step: 1180/531, loss: 0.0063990936614573 2023-01-22 18:16:08.882609: step: 1184/531, loss: 0.0007296130643226206 2023-01-22 18:16:09.950393: step: 1188/531, loss: 0.0036682954523712397 2023-01-22 18:16:11.001131: step: 1192/531, loss: 0.0004935808246955276 2023-01-22 18:16:12.074315: step: 1196/531, loss: 0.0010374593548476696 2023-01-22 18:16:13.126999: step: 1200/531, loss: 0.008196872659027576 2023-01-22 18:16:14.175176: step: 1204/531, loss: 6.592134013772011e-05 2023-01-22 18:16:15.233188: step: 1208/531, loss: 5.240000518824672e-06 2023-01-22 18:16:16.287120: step: 1212/531, loss: 0.0015342289116233587 2023-01-22 18:16:17.343445: step: 1216/531, loss: 0.0013343931641429663 2023-01-22 18:16:18.404292: step: 1220/531, loss: 5.063352728029713e-05 2023-01-22 18:16:19.463639: step: 1224/531, loss: 0.0011145316530019045 2023-01-22 18:16:20.544424: step: 1228/531, loss: 0.002456244546920061 2023-01-22 18:16:21.594343: step: 1232/531, loss: 0.002808689372614026 2023-01-22 18:16:22.666080: step: 1236/531, loss: 0.0017257626168429852 2023-01-22 18:16:23.712886: step: 1240/531, loss: 0.0009658159106038511 2023-01-22 18:16:24.776685: step: 1244/531, loss: 0.0006192835862748325 2023-01-22 18:16:25.848612: step: 1248/531, loss: 0.002016267506405711 2023-01-22 18:16:26.911190: step: 1252/531, loss: 7.521335646742955e-05 2023-01-22 18:16:27.962643: step: 1256/531, loss: 0.002600456355139613 2023-01-22 18:16:29.014970: step: 1260/531, loss: 8.527469617547467e-06 2023-01-22 18:16:30.071052: step: 1264/531, loss: 0.0032685298938304186 2023-01-22 18:16:31.133684: step: 1268/531, loss: 0.001603559241630137 2023-01-22 18:16:32.196988: step: 1272/531, loss: 0.0025893133133649826 2023-01-22 18:16:33.268159: step: 1276/531, loss: 0.00023609522031620145 2023-01-22 18:16:34.327125: step: 1280/531, loss: 4.347735739429481e-05 2023-01-22 18:16:35.379936: step: 1284/531, loss: 0.00042860786197707057 2023-01-22 18:16:36.430311: step: 1288/531, loss: 0.003873482346534729 2023-01-22 18:16:37.492459: step: 1292/531, loss: 0.00044308914220891893 2023-01-22 18:16:38.551237: step: 1296/531, loss: 2.2038011593394913e-05 2023-01-22 18:16:39.600379: step: 1300/531, loss: 0.038779083639383316 2023-01-22 18:16:40.663396: step: 1304/531, loss: 0.0006104726926423609 2023-01-22 18:16:41.716865: step: 1308/531, loss: 0.00022003143385518342 2023-01-22 18:16:42.777227: step: 1312/531, loss: 0.0012571928091347218 2023-01-22 18:16:43.835251: step: 1316/531, loss: 0.0028441783506423235 2023-01-22 18:16:44.919393: step: 1320/531, loss: 0.004914427176117897 2023-01-22 18:16:45.982647: step: 1324/531, loss: 0.00031373955425806344 2023-01-22 18:16:47.049068: step: 1328/531, loss: 1.7868629583972506e-05 2023-01-22 18:16:48.112231: step: 1332/531, loss: 0.0057790507562458515 2023-01-22 18:16:49.174336: step: 1336/531, loss: 0.0037652403116226196 2023-01-22 18:16:50.262341: step: 1340/531, loss: 0.0009412994841113687 2023-01-22 18:16:51.324436: step: 1344/531, loss: 0.0018789108144119382 2023-01-22 18:16:52.377732: step: 1348/531, loss: 0.00906267762184143 2023-01-22 18:16:53.433066: step: 1352/531, loss: 0.0016206526197493076 2023-01-22 18:16:54.495399: step: 1356/531, loss: 0.001545802573673427 2023-01-22 18:16:55.586877: step: 1360/531, loss: 0.0008290863479487598 2023-01-22 18:16:56.640474: step: 1364/531, loss: 0.0019554882310330868 2023-01-22 18:16:57.709670: step: 1368/531, loss: 0.0003496269346214831 2023-01-22 18:16:58.770904: step: 1372/531, loss: 0.0005644718185067177 2023-01-22 18:16:59.832095: step: 1376/531, loss: 0.004910139366984367 2023-01-22 18:17:00.897735: step: 1380/531, loss: 0.0023308389354497194 2023-01-22 18:17:01.956762: step: 1384/531, loss: 0.0035187278408557177 2023-01-22 18:17:03.025630: step: 1388/531, loss: 0.00235362839885056 2023-01-22 18:17:04.080321: step: 1392/531, loss: 0.004601230379194021 2023-01-22 18:17:05.144559: step: 1396/531, loss: 0.00044968537986278534 2023-01-22 18:17:06.190044: step: 1400/531, loss: 0.17044170200824738 2023-01-22 18:17:07.238502: step: 1404/531, loss: 1.6982727174763568e-05 2023-01-22 18:17:08.294829: step: 1408/531, loss: 0.006890931632369757 2023-01-22 18:17:09.355353: step: 1412/531, loss: 4.8774720198707655e-05 2023-01-22 18:17:10.411150: step: 1416/531, loss: 0.0038381507620215416 2023-01-22 18:17:11.467849: step: 1420/531, loss: 0.00964371208101511 2023-01-22 18:17:12.538428: step: 1424/531, loss: 0.003110338933765888 2023-01-22 18:17:13.600630: step: 1428/531, loss: 0.0013871254632249475 2023-01-22 18:17:14.676872: step: 1432/531, loss: 0.014119843952357769 2023-01-22 18:17:15.746193: step: 1436/531, loss: 0.03157404065132141 2023-01-22 18:17:16.801449: step: 1440/531, loss: 0.0006457576528191566 2023-01-22 18:17:17.880553: step: 1444/531, loss: 0.0011518874671310186 2023-01-22 18:17:18.934863: step: 1448/531, loss: 0.0004716934054158628 2023-01-22 18:17:19.995721: step: 1452/531, loss: 0.005320834927260876 2023-01-22 18:17:21.066953: step: 1456/531, loss: 0.03181644156575203 2023-01-22 18:17:22.112460: step: 1460/531, loss: 0.022640204057097435 2023-01-22 18:17:23.164381: step: 1464/531, loss: 0.00043951653060503304 2023-01-22 18:17:24.224325: step: 1468/531, loss: 0.0020284373313188553 2023-01-22 18:17:25.282948: step: 1472/531, loss: 0.00926901400089264 2023-01-22 18:17:26.337152: step: 1476/531, loss: 0.003973019775003195 2023-01-22 18:17:27.401706: step: 1480/531, loss: 0.006028393749147654 2023-01-22 18:17:28.463841: step: 1484/531, loss: 0.00011165729665663093 2023-01-22 18:17:29.509515: step: 1488/531, loss: 0.00041287802741862833 2023-01-22 18:17:30.551960: step: 1492/531, loss: 0.005346434656530619 2023-01-22 18:17:31.632051: step: 1496/531, loss: 0.006351563148200512 2023-01-22 18:17:32.680837: step: 1500/531, loss: 1.475368389947107e-05 2023-01-22 18:17:33.732408: step: 1504/531, loss: 0.00901619903743267 2023-01-22 18:17:34.789927: step: 1508/531, loss: 3.715350976563059e-05 2023-01-22 18:17:35.842734: step: 1512/531, loss: 0.00014359848864842206 2023-01-22 18:17:36.901783: step: 1516/531, loss: 9.431096259504557e-07 2023-01-22 18:17:37.962931: step: 1520/531, loss: 9.744471753947437e-07 2023-01-22 18:17:39.032695: step: 1524/531, loss: 1.4067901247472037e-05 2023-01-22 18:17:40.095448: step: 1528/531, loss: 0.004908103961497545 2023-01-22 18:17:41.157258: step: 1532/531, loss: 0.009209039621055126 2023-01-22 18:17:42.212084: step: 1536/531, loss: 0.001192554016597569 2023-01-22 18:17:43.270066: step: 1540/531, loss: 0.004860010463744402 2023-01-22 18:17:44.327245: step: 1544/531, loss: 0.011957105249166489 2023-01-22 18:17:45.401404: step: 1548/531, loss: 0.0024243956431746483 2023-01-22 18:17:46.455834: step: 1552/531, loss: 0.015127007849514484 2023-01-22 18:17:47.517054: step: 1556/531, loss: 0.0013533026212826371 2023-01-22 18:17:48.574776: step: 1560/531, loss: 0.006612077355384827 2023-01-22 18:17:49.620940: step: 1564/531, loss: 0.0011698710732161999 2023-01-22 18:17:50.692457: step: 1568/531, loss: 0.00013252052303869277 2023-01-22 18:17:51.751779: step: 1572/531, loss: 0.00015672024164814502 2023-01-22 18:17:52.800772: step: 1576/531, loss: 0.0015872985823079944 2023-01-22 18:17:53.849797: step: 1580/531, loss: 0.00015963762416504323 2023-01-22 18:17:54.894796: step: 1584/531, loss: 2.595854675746523e-05 2023-01-22 18:17:55.946957: step: 1588/531, loss: 0.0 2023-01-22 18:17:56.996691: step: 1592/531, loss: 0.0006789904437027872 2023-01-22 18:17:58.062637: step: 1596/531, loss: 0.0053655230440199375 2023-01-22 18:17:59.107676: step: 1600/531, loss: 0.0007486220565624535 2023-01-22 18:18:00.158060: step: 1604/531, loss: 0.0024495527613908052 2023-01-22 18:18:01.201248: step: 1608/531, loss: 0.0003433394304011017 2023-01-22 18:18:02.256271: step: 1612/531, loss: 0.0029550609178841114 2023-01-22 18:18:03.324428: step: 1616/531, loss: 0.007177832070738077 2023-01-22 18:18:04.377152: step: 1620/531, loss: 0.0010881096823140979 2023-01-22 18:18:05.456566: step: 1624/531, loss: 0.00016112506273202598 2023-01-22 18:18:06.525526: step: 1628/531, loss: 0.0012078933650627732 2023-01-22 18:18:07.589624: step: 1632/531, loss: 0.024158179759979248 2023-01-22 18:18:08.638627: step: 1636/531, loss: 0.000262209156062454 2023-01-22 18:18:09.722600: step: 1640/531, loss: 0.007524306420236826 2023-01-22 18:18:10.793973: step: 1644/531, loss: 0.06044727563858032 2023-01-22 18:18:11.888432: step: 1648/531, loss: 0.0034228444565087557 2023-01-22 18:18:12.933406: step: 1652/531, loss: 0.0034735058434307575 2023-01-22 18:18:13.990450: step: 1656/531, loss: 0.0004593534686136991 2023-01-22 18:18:15.046084: step: 1660/531, loss: 3.261943857069127e-05 2023-01-22 18:18:16.098648: step: 1664/531, loss: 0.0009421770228073001 2023-01-22 18:18:17.185423: step: 1668/531, loss: 0.0011454899795353413 2023-01-22 18:18:18.262245: step: 1672/531, loss: 0.006851317826658487 2023-01-22 18:18:19.316847: step: 1676/531, loss: 0.0009444098686799407 2023-01-22 18:18:20.400006: step: 1680/531, loss: 0.022287234663963318 2023-01-22 18:18:21.451892: step: 1684/531, loss: 0.0001457652688259259 2023-01-22 18:18:22.522133: step: 1688/531, loss: 0.00026868312852457166 2023-01-22 18:18:23.585612: step: 1692/531, loss: 0.0021390998736023903 2023-01-22 18:18:24.646082: step: 1696/531, loss: 0.0011894813505932689 2023-01-22 18:18:25.687510: step: 1700/531, loss: 1.503433031757595e-05 2023-01-22 18:18:26.750563: step: 1704/531, loss: 0.0037068442907184362 2023-01-22 18:18:27.799195: step: 1708/531, loss: 0.00041447763214819133 2023-01-22 18:18:28.872508: step: 1712/531, loss: 0.0014347850810736418 2023-01-22 18:18:29.944064: step: 1716/531, loss: 0.0007021583151072264 2023-01-22 18:18:31.016586: step: 1720/531, loss: 0.0033520509023219347 2023-01-22 18:18:32.080206: step: 1724/531, loss: 0.00010703740554163232 2023-01-22 18:18:33.148769: step: 1728/531, loss: 0.00018746900605037808 2023-01-22 18:18:34.213575: step: 1732/531, loss: 0.002802737755700946 2023-01-22 18:18:35.285408: step: 1736/531, loss: 0.0030879105906933546 2023-01-22 18:18:36.355258: step: 1740/531, loss: 0.0016189685557037592 2023-01-22 18:18:37.407458: step: 1744/531, loss: 0.0009682712843641639 2023-01-22 18:18:38.473401: step: 1748/531, loss: 0.0008436330244876444 2023-01-22 18:18:39.526286: step: 1752/531, loss: 0.020546063780784607 2023-01-22 18:18:40.587858: step: 1756/531, loss: 0.0002602914464659989 2023-01-22 18:18:41.648592: step: 1760/531, loss: 0.0007306385668925941 2023-01-22 18:18:42.709884: step: 1764/531, loss: 0.0066851102747023106 2023-01-22 18:18:43.756378: step: 1768/531, loss: 0.00256662187166512 2023-01-22 18:18:44.819300: step: 1772/531, loss: 0.003725983202457428 2023-01-22 18:18:45.881769: step: 1776/531, loss: 0.004710950888693333 2023-01-22 18:18:46.934567: step: 1780/531, loss: 0.0037541964557021856 2023-01-22 18:18:47.992000: step: 1784/531, loss: 0.0006544382777065039 2023-01-22 18:18:49.046966: step: 1788/531, loss: 0.0043398612178862095 2023-01-22 18:18:50.106357: step: 1792/531, loss: 1.502430336586258e-06 2023-01-22 18:18:51.175895: step: 1796/531, loss: 0.0007283294689841568 2023-01-22 18:18:52.226777: step: 1800/531, loss: 1.8998946416104445e-08 2023-01-22 18:18:53.287368: step: 1804/531, loss: 0.0033150820527225733 2023-01-22 18:18:54.342006: step: 1808/531, loss: 1.0320477485947777e-06 2023-01-22 18:18:55.395205: step: 1812/531, loss: 0.0021151944529265165 2023-01-22 18:18:56.448132: step: 1816/531, loss: 0.005310488399118185 2023-01-22 18:18:57.509741: step: 1820/531, loss: 0.0023086806759238243 2023-01-22 18:18:58.558328: step: 1824/531, loss: 3.3071394227590645e-06 2023-01-22 18:18:59.629230: step: 1828/531, loss: 0.0031708471942692995 2023-01-22 18:19:00.693352: step: 1832/531, loss: 0.0005807864363305271 2023-01-22 18:19:01.751710: step: 1836/531, loss: 1.5153469576034695e-05 2023-01-22 18:19:02.807290: step: 1840/531, loss: 0.003144286572933197 2023-01-22 18:19:03.863703: step: 1844/531, loss: 0.0006509244558401406 2023-01-22 18:19:04.919274: step: 1848/531, loss: 0.0 2023-01-22 18:19:05.961056: step: 1852/531, loss: 0.0006537417066283524 2023-01-22 18:19:07.006285: step: 1856/531, loss: 0.003799894591793418 2023-01-22 18:19:08.066198: step: 1860/531, loss: 0.00012065597547916695 2023-01-22 18:19:09.115317: step: 1864/531, loss: 0.000554021040443331 2023-01-22 18:19:10.171677: step: 1868/531, loss: 0.004134547431021929 2023-01-22 18:19:11.230389: step: 1872/531, loss: 0.0014604346361011267 2023-01-22 18:19:12.319338: step: 1876/531, loss: 0.0024224831722676754 2023-01-22 18:19:13.397070: step: 1880/531, loss: 0.0009388086036778986 2023-01-22 18:19:14.470133: step: 1884/531, loss: 0.004616179969161749 2023-01-22 18:19:15.524476: step: 1888/531, loss: 0.00034893781412392855 2023-01-22 18:19:16.579460: step: 1892/531, loss: 0.011060983873903751 2023-01-22 18:19:17.643354: step: 1896/531, loss: 0.0013159917434677482 2023-01-22 18:19:18.691299: step: 1900/531, loss: 2.4216780730057508e-05 2023-01-22 18:19:19.752548: step: 1904/531, loss: 3.7480374885490164e-05 2023-01-22 18:19:20.847072: step: 1908/531, loss: 0.00861275102943182 2023-01-22 18:19:21.906760: step: 1912/531, loss: 0.0002775720495264977 2023-01-22 18:19:22.989698: step: 1916/531, loss: 0.026195937767624855 2023-01-22 18:19:24.060906: step: 1920/531, loss: 0.0028177814092487097 2023-01-22 18:19:25.110468: step: 1924/531, loss: 0.004467234015464783 2023-01-22 18:19:26.185084: step: 1928/531, loss: 0.00026399921625852585 2023-01-22 18:19:27.249930: step: 1932/531, loss: 0.0006418237462639809 2023-01-22 18:19:28.321585: step: 1936/531, loss: 0.00297364080324769 2023-01-22 18:19:29.371661: step: 1940/531, loss: 0.0022940761409699917 2023-01-22 18:19:30.429086: step: 1944/531, loss: 0.0006617771578021348 2023-01-22 18:19:31.494237: step: 1948/531, loss: 0.0051666004583239555 2023-01-22 18:19:32.557648: step: 1952/531, loss: 0.0004947419511154294 2023-01-22 18:19:33.610680: step: 1956/531, loss: 0.0025995911564677954 2023-01-22 18:19:34.667933: step: 1960/531, loss: 0.011489641852676868 2023-01-22 18:19:35.727207: step: 1964/531, loss: 0.0007339761359617114 2023-01-22 18:19:36.782988: step: 1968/531, loss: 0.0013429466634988785 2023-01-22 18:19:37.841745: step: 1972/531, loss: 0.004915457218885422 2023-01-22 18:19:38.905538: step: 1976/531, loss: 0.0003365647862665355 2023-01-22 18:19:39.964134: step: 1980/531, loss: 1.1311198250041343e-05 2023-01-22 18:19:41.024542: step: 1984/531, loss: 0.004728915635496378 2023-01-22 18:19:42.108745: step: 1988/531, loss: 0.0021634509321302176 2023-01-22 18:19:43.153283: step: 1992/531, loss: 0.005010695196688175 2023-01-22 18:19:44.212867: step: 1996/531, loss: 0.005788684822618961 2023-01-22 18:19:45.272035: step: 2000/531, loss: 0.0042273816652596 2023-01-22 18:19:46.331703: step: 2004/531, loss: 0.0013140605296939611 2023-01-22 18:19:47.402663: step: 2008/531, loss: 0.0004994259215891361 2023-01-22 18:19:48.466082: step: 2012/531, loss: 6.0525995650095865e-05 2023-01-22 18:19:49.533494: step: 2016/531, loss: 0.0117364302277565 2023-01-22 18:19:50.591682: step: 2020/531, loss: 0.0005778932245448232 2023-01-22 18:19:51.650239: step: 2024/531, loss: 0.0004402291961014271 2023-01-22 18:19:52.708642: step: 2028/531, loss: 0.00029284090851433575 2023-01-22 18:19:53.771118: step: 2032/531, loss: 1.7201027731061913e-05 2023-01-22 18:19:54.846393: step: 2036/531, loss: 0.0019599662628024817 2023-01-22 18:19:55.915710: step: 2040/531, loss: 0.0002874326892197132 2023-01-22 18:19:56.971967: step: 2044/531, loss: 0.006079535000026226 2023-01-22 18:19:58.032775: step: 2048/531, loss: 0.006589184049516916 2023-01-22 18:19:59.089252: step: 2052/531, loss: 0.004900340922176838 2023-01-22 18:20:00.144281: step: 2056/531, loss: 0.00044026729301549494 2023-01-22 18:20:01.191866: step: 2060/531, loss: 0.0005237417062744498 2023-01-22 18:20:02.241912: step: 2064/531, loss: 5.508607500814833e-05 2023-01-22 18:20:03.316153: step: 2068/531, loss: 0.0041692522354424 2023-01-22 18:20:04.384720: step: 2072/531, loss: 0.0020148782059550285 2023-01-22 18:20:05.446956: step: 2076/531, loss: 3.255229239584878e-05 2023-01-22 18:20:06.492558: step: 2080/531, loss: 0.003427467541769147 2023-01-22 18:20:07.573532: step: 2084/531, loss: 0.006046326365321875 2023-01-22 18:20:08.634841: step: 2088/531, loss: 0.015670448541641235 2023-01-22 18:20:09.698242: step: 2092/531, loss: 0.0004098574281670153 2023-01-22 18:20:10.757257: step: 2096/531, loss: 0.0025626695714890957 2023-01-22 18:20:11.828797: step: 2100/531, loss: 0.005880072712898254 2023-01-22 18:20:12.964395: step: 2104/531, loss: 0.0007337437709793448 2023-01-22 18:20:14.012267: step: 2108/531, loss: 0.003602172713726759 2023-01-22 18:20:15.070611: step: 2112/531, loss: 0.000552355544641614 2023-01-22 18:20:16.123101: step: 2116/531, loss: 0.0052804420702159405 2023-01-22 18:20:17.184383: step: 2120/531, loss: 0.0012563823256641626 2023-01-22 18:20:18.234544: step: 2124/531, loss: 0.0076697091571986675 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.359904586834734, 'r': 0.32445943813131317, 'f1': 0.34126411022576364}, 'combined': 0.25145776542951004, 'stategy': 1, 'epoch': 18} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33720743007999715, 'r': 0.2781656959324164, 'f1': 0.3048541949486819}, 'combined': 0.19089000992113728, 'stategy': 1, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3345324851778656, 'r': 0.3504021476625841, 'f1': 0.34228346954250566}, 'combined': 0.25220887229447786, 'stategy': 1, 'epoch': 18} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36640686757439905, 'r': 0.3049535967278362, 'f1': 0.3328676562708369}, 'combined': 0.20628418135094118, 'stategy': 1, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31357825015403573, 'r': 0.3219086021505376, 'f1': 0.3176888264669163}, 'combined': 0.23408650371246464, 'stategy': 1, 'epoch': 18} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3646099809547178, 'r': 0.2881966877465023, 'f1': 0.3219311133555782}, 'combined': 0.21355826331508657, 'stategy': 1, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.36607142857142855, 'r': 0.44565217391304346, 'f1': 0.4019607843137254}, 'combined': 0.2009803921568627, 'stategy': 1, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'stategy': 1, 'epoch': 18} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.359904586834734, 'r': 0.32445943813131317, 'f1': 0.34126411022576364}, 'combined': 0.25145776542951004, 'stategy': 1, 'epoch': 18} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33720743007999715, 'r': 0.2781656959324164, 'f1': 0.3048541949486819}, 'combined': 0.19089000992113728, 'stategy': 1, 'epoch': 18} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 18} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33436839323467227, 'r': 0.3546715973779541, 'f1': 0.34422086891009546}, 'combined': 0.2536364297232282, 'stategy': 1, 'epoch': 5} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36411638395148305, 'r': 0.30337559853126267, 'f1': 0.3309823303208759}, 'combined': 0.2051158103396977, 'stategy': 1, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37962962962962965, 'r': 0.44565217391304346, 'f1': 0.41000000000000003}, 'combined': 0.20500000000000002, 'stategy': 1, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.316460244648318, 'r': 0.32726913345983555, 'f1': 0.32177394278606963}, 'combined': 0.23709658942131445, 'stategy': 1, 'epoch': 5} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3637270714036404, 'r': 0.28684167419636686, 'f1': 0.32074114478321014}, 'combined': 0.21276887822252555, 'stategy': 1, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.27586206896551724, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 5} ****************************** Epoch: 19 command: python train.py --model_name coref --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --accumulate_step 4 --max_epoch 20 --event_hidden_num 500 --p1_data_weight 0.2 --learning_rate 9e-4 2023-01-22 18:22:56.111701: step: 4/531, loss: 0.006720223464071751 2023-01-22 18:22:57.178087: step: 8/531, loss: 0.0059721143916249275 2023-01-22 18:22:58.235525: step: 12/531, loss: 0.0017975919181481004 2023-01-22 18:22:59.285834: step: 16/531, loss: 0.014228818006813526 2023-01-22 18:23:00.336507: step: 20/531, loss: 0.004030162934213877 2023-01-22 18:23:01.380223: step: 24/531, loss: 2.4266817490570247e-05 2023-01-22 18:23:02.431740: step: 28/531, loss: 0.0005736157763749361 2023-01-22 18:23:03.479575: step: 32/531, loss: 3.7223713889034116e-07 2023-01-22 18:23:04.533193: step: 36/531, loss: 5.96017598581966e-05 2023-01-22 18:23:05.575735: step: 40/531, loss: 0.0005894675850868225 2023-01-22 18:23:06.643294: step: 44/531, loss: 6.176848273753421e-06 2023-01-22 18:23:07.677928: step: 48/531, loss: 0.0005704265204258263 2023-01-22 18:23:08.739001: step: 52/531, loss: 0.0005432062316685915 2023-01-22 18:23:09.803082: step: 56/531, loss: 0.0004931489238515496 2023-01-22 18:23:10.855685: step: 60/531, loss: 0.0002893197815865278 2023-01-22 18:23:11.928521: step: 64/531, loss: 4.104947493033251e-06 2023-01-22 18:23:12.992200: step: 68/531, loss: 0.0009174162405543029 2023-01-22 18:23:14.037881: step: 72/531, loss: 6.550588295795023e-05 2023-01-22 18:23:15.089944: step: 76/531, loss: 0.0011277872836217284 2023-01-22 18:23:16.164849: step: 80/531, loss: 0.000191867962712422 2023-01-22 18:23:17.236884: step: 84/531, loss: 0.0001913226442411542 2023-01-22 18:23:18.281929: step: 88/531, loss: 0.002417304553091526 2023-01-22 18:23:19.328182: step: 92/531, loss: 6.102600764279487e-06 2023-01-22 18:23:20.388180: step: 96/531, loss: 0.0005290848203003407 2023-01-22 18:23:21.445053: step: 100/531, loss: 0.003285954473540187 2023-01-22 18:23:22.499363: step: 104/531, loss: 0.001333567313849926 2023-01-22 18:23:23.556538: step: 108/531, loss: 0.0017132742796093225 2023-01-22 18:23:24.614227: step: 112/531, loss: 0.0012153297429904342 2023-01-22 18:23:25.668508: step: 116/531, loss: 5.01338081448921e-06 2023-01-22 18:23:26.734266: step: 120/531, loss: 4.283738962840289e-05 2023-01-22 18:23:27.792170: step: 124/531, loss: 0.015718642622232437 2023-01-22 18:23:28.835699: step: 128/531, loss: 0.003705643117427826 2023-01-22 18:23:29.886512: step: 132/531, loss: 0.006282275076955557 2023-01-22 18:23:30.955949: step: 136/531, loss: 0.003376021981239319 2023-01-22 18:23:32.012375: step: 140/531, loss: 0.019055599346756935 2023-01-22 18:23:33.073511: step: 144/531, loss: 0.00339406356215477 2023-01-22 18:23:34.117554: step: 148/531, loss: 0.0013628635788336396 2023-01-22 18:23:35.174000: step: 152/531, loss: 0.00034405544283799827 2023-01-22 18:23:36.247937: step: 156/531, loss: 0.05066709220409393 2023-01-22 18:23:37.293428: step: 160/531, loss: 3.862890309846989e-07 2023-01-22 18:23:38.351624: step: 164/531, loss: 0.00048249500105157495 2023-01-22 18:23:39.408112: step: 168/531, loss: 0.0006777556263841689 2023-01-22 18:23:40.471884: step: 172/531, loss: 0.005137070547789335 2023-01-22 18:23:41.524385: step: 176/531, loss: 0.002855887869372964 2023-01-22 18:23:42.580494: step: 180/531, loss: 0.003073526080697775 2023-01-22 18:23:43.640896: step: 184/531, loss: 0.005185392219573259 2023-01-22 18:23:44.685609: step: 188/531, loss: 0.015075264498591423 2023-01-22 18:23:45.754463: step: 192/531, loss: 0.028791431337594986 2023-01-22 18:23:46.814884: step: 196/531, loss: 0.0004028986149933189 2023-01-22 18:23:47.875351: step: 200/531, loss: 0.0001295555121032521 2023-01-22 18:23:48.960288: step: 204/531, loss: 2.0119448890909553e-05 2023-01-22 18:23:50.011824: step: 208/531, loss: 0.008962638676166534 2023-01-22 18:23:51.062417: step: 212/531, loss: 0.00038685271283611655 2023-01-22 18:23:52.120164: step: 216/531, loss: 0.0024822228588163853 2023-01-22 18:23:53.181095: step: 220/531, loss: 0.00011196519335499033 2023-01-22 18:23:54.244569: step: 224/531, loss: 0.003326332662254572 2023-01-22 18:23:55.290073: step: 228/531, loss: 0.00796507578343153 2023-01-22 18:23:56.354798: step: 232/531, loss: 0.0005102449795231223 2023-01-22 18:23:57.417905: step: 236/531, loss: 0.00311223603785038 2023-01-22 18:23:58.469442: step: 240/531, loss: 2.756703487705181e-08 2023-01-22 18:23:59.543641: step: 244/531, loss: 0.02590913325548172 2023-01-22 18:24:00.596254: step: 248/531, loss: 2.674696588655934e-07 2023-01-22 18:24:01.647153: step: 252/531, loss: 0.0026235536206513643 2023-01-22 18:24:02.712195: step: 256/531, loss: 0.0032511844765394926 2023-01-22 18:24:03.767568: step: 260/531, loss: 0.003585663391277194 2023-01-22 18:24:04.832532: step: 264/531, loss: 0.0010120905935764313 2023-01-22 18:24:05.888191: step: 268/531, loss: 0.009389442391693592 2023-01-22 18:24:06.942892: step: 272/531, loss: 0.005816968157887459 2023-01-22 18:24:08.018341: step: 276/531, loss: 5.3155639761826023e-05 2023-01-22 18:24:09.085159: step: 280/531, loss: 0.06498151272535324 2023-01-22 18:24:10.146851: step: 284/531, loss: 0.0005738885374739766 2023-01-22 18:24:11.209383: step: 288/531, loss: 0.001633531996048987 2023-01-22 18:24:12.262967: step: 292/531, loss: 0.0004886495298705995 2023-01-22 18:24:13.328871: step: 296/531, loss: 0.0012550371466204524 2023-01-22 18:24:14.394241: step: 300/531, loss: 0.0018726540729403496 2023-01-22 18:24:15.453852: step: 304/531, loss: 0.02041521854698658 2023-01-22 18:24:16.518435: step: 308/531, loss: 0.0008746792445890605 2023-01-22 18:24:17.563798: step: 312/531, loss: 9.644900273997337e-05 2023-01-22 18:24:18.612601: step: 316/531, loss: 2.1154372007003985e-05 2023-01-22 18:24:19.671302: step: 320/531, loss: 0.0011407661950215697 2023-01-22 18:24:20.756733: step: 324/531, loss: 3.353166175656952e-05 2023-01-22 18:24:21.819590: step: 328/531, loss: 0.0018475366523489356 2023-01-22 18:24:22.887035: step: 332/531, loss: 0.0031339076813310385 2023-01-22 18:24:23.949090: step: 336/531, loss: 0.0033028284087777138 2023-01-22 18:24:24.989934: step: 340/531, loss: 0.0006987291853874922 2023-01-22 18:24:26.058419: step: 344/531, loss: 0.0011328631080687046 2023-01-22 18:24:27.108378: step: 348/531, loss: 0.002451628213748336 2023-01-22 18:24:28.189033: step: 352/531, loss: 0.0006278029759414494 2023-01-22 18:24:29.250525: step: 356/531, loss: 0.0001840236218413338 2023-01-22 18:24:30.307192: step: 360/531, loss: 0.0023397752083837986 2023-01-22 18:24:31.376709: step: 364/531, loss: 6.0871720052091405e-05 2023-01-22 18:24:32.426059: step: 368/531, loss: 0.003471350995823741 2023-01-22 18:24:33.479295: step: 372/531, loss: 1.0894418664975092e-05 2023-01-22 18:24:34.535875: step: 376/531, loss: 0.004536926280707121 2023-01-22 18:24:35.587205: step: 380/531, loss: 0.02280188724398613 2023-01-22 18:24:36.655372: step: 384/531, loss: 0.008394991047680378 2023-01-22 18:24:37.711018: step: 388/531, loss: 0.0010285151656717062 2023-01-22 18:24:38.765019: step: 392/531, loss: 0.0001164698987849988 2023-01-22 18:24:39.805772: step: 396/531, loss: 0.0024222531355917454 2023-01-22 18:24:40.867020: step: 400/531, loss: 0.007004153914749622 2023-01-22 18:24:41.935351: step: 404/531, loss: 0.005112409126013517 2023-01-22 18:24:43.012403: step: 408/531, loss: 0.002449022140353918 2023-01-22 18:24:44.072736: step: 412/531, loss: 0.0012433696538209915 2023-01-22 18:24:45.161568: step: 416/531, loss: 0.0034635982010513544 2023-01-22 18:24:46.216621: step: 420/531, loss: 8.951136987889186e-05 2023-01-22 18:24:47.268035: step: 424/531, loss: 6.893683166708797e-05 2023-01-22 18:24:48.316818: step: 428/531, loss: 0.0013753804378211498 2023-01-22 18:24:49.372318: step: 432/531, loss: 0.0008506132289767265 2023-01-22 18:24:50.425557: step: 436/531, loss: 0.006601061671972275 2023-01-22 18:24:51.484129: step: 440/531, loss: 0.0018112276447936893 2023-01-22 18:24:52.547680: step: 444/531, loss: 0.0013045002706348896 2023-01-22 18:24:53.599962: step: 448/531, loss: 0.02691497839987278 2023-01-22 18:24:54.659067: step: 452/531, loss: 0.0008497920935042202 2023-01-22 18:24:55.718738: step: 456/531, loss: 0.02311461977660656 2023-01-22 18:24:56.773993: step: 460/531, loss: 0.005335524678230286 2023-01-22 18:24:57.836398: step: 464/531, loss: 0.007260491140186787 2023-01-22 18:24:58.896768: step: 468/531, loss: 1.8389975593890995e-05 2023-01-22 18:24:59.949782: step: 472/531, loss: 0.0013945504324510694 2023-01-22 18:25:01.020556: step: 476/531, loss: 0.0002992226800415665 2023-01-22 18:25:02.096414: step: 480/531, loss: 0.0002105611638398841 2023-01-22 18:25:03.176951: step: 484/531, loss: 0.0013609671732410789 2023-01-22 18:25:04.231819: step: 488/531, loss: 0.0008427142747677863 2023-01-22 18:25:05.288813: step: 492/531, loss: 0.0007751102675683796 2023-01-22 18:25:06.352167: step: 496/531, loss: 0.0017394828610122204 2023-01-22 18:25:07.397595: step: 500/531, loss: 0.00013782230962533504 2023-01-22 18:25:08.470121: step: 504/531, loss: 0.002614665310829878 2023-01-22 18:25:09.529320: step: 508/531, loss: 8.849025471135974e-05 2023-01-22 18:25:10.587820: step: 512/531, loss: 0.0023913762997835875 2023-01-22 18:25:11.651796: step: 516/531, loss: 0.010207696817815304 2023-01-22 18:25:12.694612: step: 520/531, loss: 5.8562844060361385e-05 2023-01-22 18:25:13.748554: step: 524/531, loss: 0.002058700891211629 2023-01-22 18:25:14.815943: step: 528/531, loss: 0.009050062857568264 2023-01-22 18:25:15.882722: step: 532/531, loss: 0.0024040727876126766 2023-01-22 18:25:16.939303: step: 536/531, loss: 0.001865816069766879 2023-01-22 18:25:18.008757: step: 540/531, loss: 0.0001078991626854986 2023-01-22 18:25:19.065707: step: 544/531, loss: 0.010045819915831089 2023-01-22 18:25:20.127261: step: 548/531, loss: 0.009184224531054497 2023-01-22 18:25:21.200626: step: 552/531, loss: 0.0005415479536168277 2023-01-22 18:25:22.248148: step: 556/531, loss: 0.002615584060549736 2023-01-22 18:25:23.323596: step: 560/531, loss: 0.0029433947056531906 2023-01-22 18:25:24.365875: step: 564/531, loss: 0.0005840560188516974 2023-01-22 18:25:25.429098: step: 568/531, loss: 0.00246348581276834 2023-01-22 18:25:26.479045: step: 572/531, loss: 0.009433802217245102 2023-01-22 18:25:27.563662: step: 576/531, loss: 0.00531425466760993 2023-01-22 18:25:28.613047: step: 580/531, loss: 0.0005168463685549796 2023-01-22 18:25:29.702425: step: 584/531, loss: 3.3897554203576874e-06 2023-01-22 18:25:30.766067: step: 588/531, loss: 0.002574296435341239 2023-01-22 18:25:31.828764: step: 592/531, loss: 0.00025103468215093017 2023-01-22 18:25:32.890993: step: 596/531, loss: 0.001993694109842181 2023-01-22 18:25:33.930757: step: 600/531, loss: 0.002243879484012723 2023-01-22 18:25:34.978579: step: 604/531, loss: 0.0013005051296204329 2023-01-22 18:25:36.029402: step: 608/531, loss: 0.010430709458887577 2023-01-22 18:25:37.087820: step: 612/531, loss: 0.0028112977743148804 2023-01-22 18:25:38.126618: step: 616/531, loss: 0.004245677497237921 2023-01-22 18:25:39.208978: step: 620/531, loss: 5.233210322330706e-05 2023-01-22 18:25:40.271988: step: 624/531, loss: 0.006031180266290903 2023-01-22 18:25:41.341803: step: 628/531, loss: 0.00013853635755367577 2023-01-22 18:25:42.427972: step: 632/531, loss: 0.0033995884004980326 2023-01-22 18:25:43.481630: step: 636/531, loss: 0.001307119382545352 2023-01-22 18:25:44.538096: step: 640/531, loss: 0.008930394425988197 2023-01-22 18:25:45.599474: step: 644/531, loss: 0.0031377198174595833 2023-01-22 18:25:46.671670: step: 648/531, loss: 0.0035512649919837713 2023-01-22 18:25:47.716088: step: 652/531, loss: 0.00042117098928429186 2023-01-22 18:25:48.766918: step: 656/531, loss: 4.277531843399629e-05 2023-01-22 18:25:49.819619: step: 660/531, loss: 0.001540513476356864 2023-01-22 18:25:50.880577: step: 664/531, loss: 2.4710610887268558e-05 2023-01-22 18:25:51.951704: step: 668/531, loss: 0.00015206642274279147 2023-01-22 18:25:53.015006: step: 672/531, loss: 4.5043583668302745e-05 2023-01-22 18:25:54.079941: step: 676/531, loss: 0.00015633433940820396 2023-01-22 18:25:55.170268: step: 680/531, loss: 0.0017591443611308932 2023-01-22 18:25:56.225980: step: 684/531, loss: 0.0011856822529807687 2023-01-22 18:25:57.288363: step: 688/531, loss: 0.0006078141741454601 2023-01-22 18:25:58.361278: step: 692/531, loss: 0.0031036208383738995 2023-01-22 18:25:59.405704: step: 696/531, loss: 0.0007459915359504521 2023-01-22 18:26:00.465588: step: 700/531, loss: 0.0021458277478814125 2023-01-22 18:26:01.525579: step: 704/531, loss: 0.0023647055495530367 2023-01-22 18:26:02.583586: step: 708/531, loss: 0.000632931652944535 2023-01-22 18:26:03.647120: step: 712/531, loss: 0.0003922057512681931 2023-01-22 18:26:04.713827: step: 716/531, loss: 0.00011879483645316213 2023-01-22 18:26:05.779474: step: 720/531, loss: 0.0027899756096303463 2023-01-22 18:26:06.856339: step: 724/531, loss: 0.004695042502135038 2023-01-22 18:26:07.926009: step: 728/531, loss: 0.005567705258727074 2023-01-22 18:26:08.957597: step: 732/531, loss: 1.5464847820112482e-05 2023-01-22 18:26:10.033004: step: 736/531, loss: 0.006585231516510248 2023-01-22 18:26:11.088206: step: 740/531, loss: 0.0009290744201280177 2023-01-22 18:26:12.168510: step: 744/531, loss: 0.00033880904084071517 2023-01-22 18:26:13.214213: step: 748/531, loss: 0.009083887562155724 2023-01-22 18:26:14.287114: step: 752/531, loss: 0.000487333454657346 2023-01-22 18:26:15.358040: step: 756/531, loss: 0.001678626169450581 2023-01-22 18:26:16.416286: step: 760/531, loss: 0.0068448614329099655 2023-01-22 18:26:17.479370: step: 764/531, loss: 9.049963409779593e-05 2023-01-22 18:26:18.546449: step: 768/531, loss: 0.001556702540256083 2023-01-22 18:26:19.599153: step: 772/531, loss: 8.323080692207441e-05 2023-01-22 18:26:20.662509: step: 776/531, loss: 0.003015171969309449 2023-01-22 18:26:21.739124: step: 780/531, loss: 5.8750854805111885e-05 2023-01-22 18:26:22.804881: step: 784/531, loss: 0.004141016397625208 2023-01-22 18:26:23.859853: step: 788/531, loss: 0.0003056746209040284 2023-01-22 18:26:24.945471: step: 792/531, loss: 0.002190647181123495 2023-01-22 18:26:26.010158: step: 796/531, loss: 0.008782587945461273 2023-01-22 18:26:27.058895: step: 800/531, loss: 9.042941383086145e-05 2023-01-22 18:26:28.116268: step: 804/531, loss: 2.3222064555739053e-05 2023-01-22 18:26:29.190691: step: 808/531, loss: 0.002266150899231434 2023-01-22 18:26:30.269806: step: 812/531, loss: 0.0021656586322933435 2023-01-22 18:26:31.328538: step: 816/531, loss: 0.0013136636698618531 2023-01-22 18:26:32.399372: step: 820/531, loss: 0.011663348414003849 2023-01-22 18:26:33.471692: step: 824/531, loss: 0.0018752312753349543 2023-01-22 18:26:34.538052: step: 828/531, loss: 0.00010874243162106723 2023-01-22 18:26:35.608284: step: 832/531, loss: 0.005236182827502489 2023-01-22 18:26:36.668096: step: 836/531, loss: 0.0011878821533173323 2023-01-22 18:26:37.731159: step: 840/531, loss: 0.00010246843885397539 2023-01-22 18:26:38.783741: step: 844/531, loss: 0.0002533915394451469 2023-01-22 18:26:39.848869: step: 848/531, loss: 0.0018395517254248261 2023-01-22 18:26:40.915237: step: 852/531, loss: 0.0007976075867190957 2023-01-22 18:26:41.983189: step: 856/531, loss: 0.0007676354143768549 2023-01-22 18:26:43.027401: step: 860/531, loss: 5.7634802942629904e-05 2023-01-22 18:26:44.097422: step: 864/531, loss: 0.00012060361768817529 2023-01-22 18:26:45.157532: step: 868/531, loss: 2.1103191102156416e-05 2023-01-22 18:26:46.235659: step: 872/531, loss: 0.002773697255179286 2023-01-22 18:26:47.301654: step: 876/531, loss: 0.012475637719035149 2023-01-22 18:26:48.366042: step: 880/531, loss: 0.0020901283714920282 2023-01-22 18:26:49.427103: step: 884/531, loss: 0.01291695050895214 2023-01-22 18:26:50.482948: step: 888/531, loss: 0.0031730765476822853 2023-01-22 18:26:51.564437: step: 892/531, loss: 0.0004845313960686326 2023-01-22 18:26:52.621298: step: 896/531, loss: 0.008706407621502876 2023-01-22 18:26:53.676771: step: 900/531, loss: 0.003484827233478427 2023-01-22 18:26:54.719613: step: 904/531, loss: 3.822789039986674e-06 2023-01-22 18:26:55.778972: step: 908/531, loss: 3.733766789082438e-05 2023-01-22 18:26:56.854070: step: 912/531, loss: 0.0067475964315235615 2023-01-22 18:26:57.928655: step: 916/531, loss: 0.00026243983302265406 2023-01-22 18:26:58.990503: step: 920/531, loss: 0.0004947097040712833 2023-01-22 18:27:00.054404: step: 924/531, loss: 0.005439384374767542 2023-01-22 18:27:01.132560: step: 928/531, loss: 0.0027062678709626198 2023-01-22 18:27:02.211633: step: 932/531, loss: 0.002772000851109624 2023-01-22 18:27:03.274806: step: 936/531, loss: 0.007011901121586561 2023-01-22 18:27:04.350147: step: 940/531, loss: 4.114015246159397e-05 2023-01-22 18:27:05.414921: step: 944/531, loss: 0.0011345319217070937 2023-01-22 18:27:06.488820: step: 948/531, loss: 0.0015942688332870603 2023-01-22 18:27:07.548264: step: 952/531, loss: 0.0003534366551321 2023-01-22 18:27:08.612481: step: 956/531, loss: 0.0025519435293972492 2023-01-22 18:27:09.668096: step: 960/531, loss: 0.00010970090079354122 2023-01-22 18:27:10.744649: step: 964/531, loss: 0.01113598607480526 2023-01-22 18:27:11.804793: step: 968/531, loss: 0.00017252523684874177 2023-01-22 18:27:12.871406: step: 972/531, loss: 1.4288384591054637e-06 2023-01-22 18:27:13.937368: step: 976/531, loss: 0.00038461951771751046 2023-01-22 18:27:14.986325: step: 980/531, loss: 0.00841082725673914 2023-01-22 18:27:16.057146: step: 984/531, loss: 0.007877668365836143 2023-01-22 18:27:17.134370: step: 988/531, loss: 0.0018787410808727145 2023-01-22 18:27:18.209986: step: 992/531, loss: 0.0022774848621338606 2023-01-22 18:27:19.285243: step: 996/531, loss: 0.0013377065770328045 2023-01-22 18:27:20.354463: step: 1000/531, loss: 0.009514780715107918 2023-01-22 18:27:21.415000: step: 1004/531, loss: 0.0007354762055911124 2023-01-22 18:27:22.476089: step: 1008/531, loss: 0.004189879167824984 2023-01-22 18:27:23.543767: step: 1012/531, loss: 0.0005681710899807513 2023-01-22 18:27:24.605481: step: 1016/531, loss: 0.0024610937107354403 2023-01-22 18:27:25.681713: step: 1020/531, loss: 3.1502804631600156e-05 2023-01-22 18:27:26.747518: step: 1024/531, loss: 0.008294281549751759 2023-01-22 18:27:27.812649: step: 1028/531, loss: 0.0022488790564239025 2023-01-22 18:27:28.890797: step: 1032/531, loss: 0.0004743848112411797 2023-01-22 18:27:29.944671: step: 1036/531, loss: 0.0001802453916752711 2023-01-22 18:27:31.012372: step: 1040/531, loss: 0.005130332428961992 2023-01-22 18:27:32.091603: step: 1044/531, loss: 0.003923649899661541 2023-01-22 18:27:33.159592: step: 1048/531, loss: 0.005003295838832855 2023-01-22 18:27:34.236519: step: 1052/531, loss: 0.0006852123187854886 2023-01-22 18:27:35.300033: step: 1056/531, loss: 0.0021669233683496714 2023-01-22 18:27:36.353355: step: 1060/531, loss: 0.00403800792992115 2023-01-22 18:27:37.415228: step: 1064/531, loss: 0.0019535867031663656 2023-01-22 18:27:38.470416: step: 1068/531, loss: 0.0006539862952195108 2023-01-22 18:27:39.533004: step: 1072/531, loss: 0.0026660149451345205 2023-01-22 18:27:40.607591: step: 1076/531, loss: 0.000419384625274688 2023-01-22 18:27:41.666274: step: 1080/531, loss: 0.0021252762526273727 2023-01-22 18:27:42.726396: step: 1084/531, loss: 0.0013478387845680118 2023-01-22 18:27:43.794124: step: 1088/531, loss: 0.0012972408439964056 2023-01-22 18:27:44.854614: step: 1092/531, loss: 0.0012924638576805592 2023-01-22 18:27:45.927576: step: 1096/531, loss: 0.008009380660951138 2023-01-22 18:27:46.985431: step: 1100/531, loss: 0.006106253247708082 2023-01-22 18:27:48.055441: step: 1104/531, loss: 0.00010837097943294793 2023-01-22 18:27:49.110391: step: 1108/531, loss: 0.0015323911793529987 2023-01-22 18:27:50.182604: step: 1112/531, loss: 0.0028600089717656374 2023-01-22 18:27:51.243734: step: 1116/531, loss: 0.0012562470510601997 2023-01-22 18:27:52.318707: step: 1120/531, loss: 0.002116288524121046 2023-01-22 18:27:53.386339: step: 1124/531, loss: 0.00265909219160676 2023-01-22 18:27:54.451126: step: 1128/531, loss: 0.0002130574284819886 2023-01-22 18:27:55.526544: step: 1132/531, loss: 0.012329761870205402 2023-01-22 18:27:56.587740: step: 1136/531, loss: 1.3047178072156385e-05 2023-01-22 18:27:57.670905: step: 1140/531, loss: 0.002820062916725874 2023-01-22 18:27:58.747401: step: 1144/531, loss: 0.008172815665602684 2023-01-22 18:27:59.808138: step: 1148/531, loss: 0.0018082704627886415 2023-01-22 18:28:00.855886: step: 1152/531, loss: 0.001671980251558125 2023-01-22 18:28:01.915630: step: 1156/531, loss: 0.0006852815859019756 2023-01-22 18:28:02.980666: step: 1160/531, loss: 0.0011829659342765808 2023-01-22 18:28:04.038323: step: 1164/531, loss: 0.0006814971566200256 2023-01-22 18:28:05.106586: step: 1168/531, loss: 0.00020453993056435138 2023-01-22 18:28:06.160058: step: 1172/531, loss: 0.0004027107497677207 2023-01-22 18:28:07.221394: step: 1176/531, loss: 0.0015277061611413956 2023-01-22 18:28:08.281632: step: 1180/531, loss: 0.0008155119721777737 2023-01-22 18:28:09.364005: step: 1184/531, loss: 0.0005716923042200506 2023-01-22 18:28:10.433274: step: 1188/531, loss: 0.0046284436248242855 2023-01-22 18:28:11.509668: step: 1192/531, loss: 2.9455501135089435e-05 2023-01-22 18:28:12.608179: step: 1196/531, loss: 0.00408748397603631 2023-01-22 18:28:13.693876: step: 1200/531, loss: 0.008765937760472298 2023-01-22 18:28:14.738747: step: 1204/531, loss: 0.0019150781445205212 2023-01-22 18:28:15.836283: step: 1208/531, loss: 0.0024314725305885077 2023-01-22 18:28:16.899037: step: 1212/531, loss: 0.0006124668288975954 2023-01-22 18:28:17.964745: step: 1216/531, loss: 0.0008364720270037651 2023-01-22 18:28:19.021968: step: 1220/531, loss: 0.0021727036219090223 2023-01-22 18:28:20.080642: step: 1224/531, loss: 0.006255180574953556 2023-01-22 18:28:21.145444: step: 1228/531, loss: 0.003896774258464575 2023-01-22 18:28:22.211753: step: 1232/531, loss: 0.0027974562253803015 2023-01-22 18:28:23.279301: step: 1236/531, loss: 0.00645783357322216 2023-01-22 18:28:24.337894: step: 1240/531, loss: 0.010734925977885723 2023-01-22 18:28:25.395398: step: 1244/531, loss: 0.012926846742630005 2023-01-22 18:28:26.451324: step: 1248/531, loss: 0.00047558307414874434 2023-01-22 18:28:27.509800: step: 1252/531, loss: 2.039467290160246e-06 2023-01-22 18:28:28.557000: step: 1256/531, loss: 7.810291208443232e-06 2023-01-22 18:28:29.619669: step: 1260/531, loss: 0.0014118080725893378 2023-01-22 18:28:30.686825: step: 1264/531, loss: 0.0031001614406704903 2023-01-22 18:28:31.740854: step: 1268/531, loss: 8.220275776693597e-05 2023-01-22 18:28:32.816046: step: 1272/531, loss: 0.010749181732535362 2023-01-22 18:28:33.877685: step: 1276/531, loss: 0.011678296141326427 2023-01-22 18:28:34.945944: step: 1280/531, loss: 0.0004900435451418161 2023-01-22 18:28:35.994581: step: 1284/531, loss: 0.0007815133430995047 2023-01-22 18:28:37.053599: step: 1288/531, loss: 0.0022290016058832407 2023-01-22 18:28:38.124566: step: 1292/531, loss: 0.0035079398658126593 2023-01-22 18:28:39.177083: step: 1296/531, loss: 0.0026895743794739246 2023-01-22 18:28:40.224638: step: 1300/531, loss: 6.101379403844476e-05 2023-01-22 18:28:41.277281: step: 1304/531, loss: 0.00020624278113245964 2023-01-22 18:28:42.355671: step: 1308/531, loss: 0.008379470556974411 2023-01-22 18:28:43.406085: step: 1312/531, loss: 0.0021498200949281454 2023-01-22 18:28:44.465604: step: 1316/531, loss: 0.0001793982955859974 2023-01-22 18:28:45.517216: step: 1320/531, loss: 0.0012288064463064075 2023-01-22 18:28:46.568303: step: 1324/531, loss: 0.00017002425738610327 2023-01-22 18:28:47.621253: step: 1328/531, loss: 0.0027397775556892157 2023-01-22 18:28:48.690267: step: 1332/531, loss: 0.005439895670861006 2023-01-22 18:28:49.742058: step: 1336/531, loss: 0.00027735059848055243 2023-01-22 18:28:50.796002: step: 1340/531, loss: 0.010006377473473549 2023-01-22 18:28:51.868718: step: 1344/531, loss: 0.002805290976539254 2023-01-22 18:28:52.925878: step: 1348/531, loss: 0.007355965208262205 2023-01-22 18:28:53.964607: step: 1352/531, loss: 0.007107268553227186 2023-01-22 18:28:55.015484: step: 1356/531, loss: 0.00012731105380225927 2023-01-22 18:28:56.087922: step: 1360/531, loss: 0.0011664126068353653 2023-01-22 18:28:57.167918: step: 1364/531, loss: 0.0142782311886549 2023-01-22 18:28:58.222550: step: 1368/531, loss: 6.176657620926562e-07 2023-01-22 18:28:59.294674: step: 1372/531, loss: 0.0015357902739197016 2023-01-22 18:29:00.356283: step: 1376/531, loss: 0.0008353438461199403 2023-01-22 18:29:01.413793: step: 1380/531, loss: 1.4515704606310464e-05 2023-01-22 18:29:02.466700: step: 1384/531, loss: 0.00016583810793235898 2023-01-22 18:29:03.516982: step: 1388/531, loss: 0.0017000462394207716 2023-01-22 18:29:04.576018: step: 1392/531, loss: 7.207140151876956e-05 2023-01-22 18:29:05.627618: step: 1396/531, loss: 0.00355811626650393 2023-01-22 18:29:06.685360: step: 1400/531, loss: 0.008616171777248383 2023-01-22 18:29:07.761860: step: 1404/531, loss: 0.00039798705256544054 2023-01-22 18:29:08.826717: step: 1408/531, loss: 0.003896415699273348 2023-01-22 18:29:09.888931: step: 1412/531, loss: 0.0013024717336520553 2023-01-22 18:29:10.943725: step: 1416/531, loss: 0.0007592260953970253 2023-01-22 18:29:11.999666: step: 1420/531, loss: 0.002464998047798872 2023-01-22 18:29:13.051109: step: 1424/531, loss: 0.00017223262693732977 2023-01-22 18:29:14.116865: step: 1428/531, loss: 0.004593865945935249 2023-01-22 18:29:15.163834: step: 1432/531, loss: 0.0008349682902917266 2023-01-22 18:29:16.215412: step: 1436/531, loss: 0.0006441331934183836 2023-01-22 18:29:17.273240: step: 1440/531, loss: 0.0006603756919503212 2023-01-22 18:29:18.331736: step: 1444/531, loss: 0.002461852738633752 2023-01-22 18:29:19.403310: step: 1448/531, loss: 0.0050451443530619144 2023-01-22 18:29:20.465569: step: 1452/531, loss: 0.0010527916019782424 2023-01-22 18:29:21.519958: step: 1456/531, loss: 0.0010714300442487001 2023-01-22 18:29:22.570995: step: 1460/531, loss: 0.00045495573431253433 2023-01-22 18:29:23.626776: step: 1464/531, loss: 0.00023459554358851165 2023-01-22 18:29:24.696808: step: 1468/531, loss: 0.002179292496293783 2023-01-22 18:29:25.753392: step: 1472/531, loss: 7.718374399701133e-05 2023-01-22 18:29:26.809124: step: 1476/531, loss: 0.004777817986905575 2023-01-22 18:29:27.859547: step: 1480/531, loss: 0.0001367541990475729 2023-01-22 18:29:28.919920: step: 1484/531, loss: 0.0012707796413451433 2023-01-22 18:29:29.972083: step: 1488/531, loss: 0.0007572412141598761 2023-01-22 18:29:31.013414: step: 1492/531, loss: 0.0033923774026334286 2023-01-22 18:29:32.059929: step: 1496/531, loss: 0.00013735836546402425 2023-01-22 18:29:33.121186: step: 1500/531, loss: 0.006993074435740709 2023-01-22 18:29:34.176723: step: 1504/531, loss: 0.010927842929959297 2023-01-22 18:29:35.229265: step: 1508/531, loss: 3.1120273433771217e-06 2023-01-22 18:29:36.273150: step: 1512/531, loss: 0.0022322058212012053 2023-01-22 18:29:37.312836: step: 1516/531, loss: 0.00124601018615067 2023-01-22 18:29:38.364202: step: 1520/531, loss: 0.0005221243482083082 2023-01-22 18:29:39.433720: step: 1524/531, loss: 0.0013709780760109425 2023-01-22 18:29:40.493169: step: 1528/531, loss: 0.00789638515561819 2023-01-22 18:29:41.558144: step: 1532/531, loss: 0.0008394402102567255 2023-01-22 18:29:42.610996: step: 1536/531, loss: 0.004712972324341536 2023-01-22 18:29:43.668474: step: 1540/531, loss: 0.005781758576631546 2023-01-22 18:29:44.731859: step: 1544/531, loss: 0.00011162083683302626 2023-01-22 18:29:45.792361: step: 1548/531, loss: 0.0014038147637620568 2023-01-22 18:29:46.843289: step: 1552/531, loss: 0.0007645799778401852 2023-01-22 18:29:47.896100: step: 1556/531, loss: 0.0005982242291793227 2023-01-22 18:29:48.932335: step: 1560/531, loss: 0.00030596094438806176 2023-01-22 18:29:49.977095: step: 1564/531, loss: 0.002186855534091592 2023-01-22 18:29:51.030782: step: 1568/531, loss: 0.0036535398103296757 2023-01-22 18:29:52.089243: step: 1572/531, loss: 0.014048433862626553 2023-01-22 18:29:53.133798: step: 1576/531, loss: 0.00051969412015751 2023-01-22 18:29:54.207948: step: 1580/531, loss: 0.007731867954134941 2023-01-22 18:29:55.269079: step: 1584/531, loss: 0.0045402473770082 2023-01-22 18:29:56.318569: step: 1588/531, loss: 1.7715603462420404e-05 2023-01-22 18:29:57.379486: step: 1592/531, loss: 0.006822424009442329 2023-01-22 18:29:58.428465: step: 1596/531, loss: 0.00033602677285671234 2023-01-22 18:29:59.499809: step: 1600/531, loss: 0.002748908242210746 2023-01-22 18:30:00.600904: step: 1604/531, loss: 0.00048170171794481575 2023-01-22 18:30:01.647144: step: 1608/531, loss: 0.0011635019909590483 2023-01-22 18:30:02.695731: step: 1612/531, loss: 0.000495315354783088 2023-01-22 18:30:03.770956: step: 1616/531, loss: 0.0033146708738058805 2023-01-22 18:30:04.855982: step: 1620/531, loss: 0.0024260953068733215 2023-01-22 18:30:05.918939: step: 1624/531, loss: 0.008035477250814438 2023-01-22 18:30:06.978027: step: 1628/531, loss: 6.469494837801903e-05 2023-01-22 18:30:08.029116: step: 1632/531, loss: 0.0003750807954929769 2023-01-22 18:30:09.079942: step: 1636/531, loss: 0.003918210975825787 2023-01-22 18:30:10.135042: step: 1640/531, loss: 0.0013503653462976217 2023-01-22 18:30:11.174483: step: 1644/531, loss: 0.006134611554443836 2023-01-22 18:30:12.240097: step: 1648/531, loss: 0.0006110779359005392 2023-01-22 18:30:13.309810: step: 1652/531, loss: 2.8610174922505394e-05 2023-01-22 18:30:14.367539: step: 1656/531, loss: 0.00261300103738904 2023-01-22 18:30:15.424291: step: 1660/531, loss: 0.0012822758872061968 2023-01-22 18:30:16.472709: step: 1664/531, loss: 0.006871582940220833 2023-01-22 18:30:17.531210: step: 1668/531, loss: 0.008112279698252678 2023-01-22 18:30:18.577982: step: 1672/531, loss: 0.0027954818215221167 2023-01-22 18:30:19.644874: step: 1676/531, loss: 0.0009296463103964925 2023-01-22 18:30:20.708655: step: 1680/531, loss: 0.007145033683627844 2023-01-22 18:30:21.783190: step: 1684/531, loss: 0.010795000940561295 2023-01-22 18:30:22.828321: step: 1688/531, loss: 0.0046673426404595375 2023-01-22 18:30:23.883265: step: 1692/531, loss: 8.890879854561717e-08 2023-01-22 18:30:24.941994: step: 1696/531, loss: 6.748306986992247e-06 2023-01-22 18:30:25.998441: step: 1700/531, loss: 0.003095329040661454 2023-01-22 18:30:27.063326: step: 1704/531, loss: 0.0003643136296886951 2023-01-22 18:30:28.124736: step: 1708/531, loss: 0.009540271945297718 2023-01-22 18:30:29.178900: step: 1712/531, loss: 0.026444517076015472 2023-01-22 18:30:30.252720: step: 1716/531, loss: 0.005372142884880304 2023-01-22 18:30:31.310055: step: 1720/531, loss: 0.007801800966262817 2023-01-22 18:30:32.356891: step: 1724/531, loss: 0.001963839866220951 2023-01-22 18:30:33.409189: step: 1728/531, loss: 0.00029295426793396473 2023-01-22 18:30:34.479996: step: 1732/531, loss: 0.013960192911326885 2023-01-22 18:30:35.559810: step: 1736/531, loss: 0.006792435888200998 2023-01-22 18:30:36.605802: step: 1740/531, loss: 5.365608103602426e-06 2023-01-22 18:30:37.659816: step: 1744/531, loss: 0.0011303460923954844 2023-01-22 18:30:38.714927: step: 1748/531, loss: 0.0009954527486115694 2023-01-22 18:30:39.753778: step: 1752/531, loss: 0.002465495839715004 2023-01-22 18:30:40.815631: step: 1756/531, loss: 0.008292421698570251 2023-01-22 18:30:41.892841: step: 1760/531, loss: 0.008431659080088139 2023-01-22 18:30:42.949637: step: 1764/531, loss: 0.002968426328152418 2023-01-22 18:30:44.019103: step: 1768/531, loss: 0.005722769536077976 2023-01-22 18:30:45.081088: step: 1772/531, loss: 0.0002745292440522462 2023-01-22 18:30:46.138442: step: 1776/531, loss: 0.00303266872651875 2023-01-22 18:30:47.187887: step: 1780/531, loss: 0.0003002272278536111 2023-01-22 18:30:48.239298: step: 1784/531, loss: 0.04699743539094925 2023-01-22 18:30:49.296042: step: 1788/531, loss: 0.0046489122323691845 2023-01-22 18:30:50.368758: step: 1792/531, loss: 0.0056006391532719135 2023-01-22 18:30:51.421439: step: 1796/531, loss: 0.0013820345047861338 2023-01-22 18:30:52.476634: step: 1800/531, loss: 0.00031669222516939044 2023-01-22 18:30:53.532126: step: 1804/531, loss: 0.008193948306143284 2023-01-22 18:30:54.584417: step: 1808/531, loss: 8.414102194365114e-05 2023-01-22 18:30:55.642554: step: 1812/531, loss: 0.0031748320907354355 2023-01-22 18:30:56.687330: step: 1816/531, loss: 0.002375259529799223 2023-01-22 18:30:57.746443: step: 1820/531, loss: 0.0014962980058044195 2023-01-22 18:30:58.797565: step: 1824/531, loss: 5.357582267606631e-05 2023-01-22 18:30:59.851252: step: 1828/531, loss: 0.0003404039307497442 2023-01-22 18:31:00.932568: step: 1832/531, loss: 0.0028575779870152473 2023-01-22 18:31:01.988853: step: 1836/531, loss: 7.783420733176172e-05 2023-01-22 18:31:03.053737: step: 1840/531, loss: 0.000720977084711194 2023-01-22 18:31:04.113814: step: 1844/531, loss: 0.00033143951441161335 2023-01-22 18:31:05.183335: step: 1848/531, loss: 0.004121869802474976 2023-01-22 18:31:06.249637: step: 1852/531, loss: 0.002924948697909713 2023-01-22 18:31:07.332380: step: 1856/531, loss: 0.009038897231221199 2023-01-22 18:31:08.378588: step: 1860/531, loss: 0.006168980151414871 2023-01-22 18:31:09.437437: step: 1864/531, loss: 0.0015159776667132974 2023-01-22 18:31:10.494331: step: 1868/531, loss: 1.2370785952953156e-05 2023-01-22 18:31:11.551514: step: 1872/531, loss: 0.010927345603704453 2023-01-22 18:31:12.601886: step: 1876/531, loss: 0.00023917449289001524 2023-01-22 18:31:13.668178: step: 1880/531, loss: 0.0007985814590938389 2023-01-22 18:31:14.715815: step: 1884/531, loss: 0.027138935402035713 2023-01-22 18:31:15.781571: step: 1888/531, loss: 0.006283156108111143 2023-01-22 18:31:16.841104: step: 1892/531, loss: 0.00152781349606812 2023-01-22 18:31:17.901394: step: 1896/531, loss: 0.008456099778413773 2023-01-22 18:31:18.947200: step: 1900/531, loss: 0.0011861893581226468 2023-01-22 18:31:20.020803: step: 1904/531, loss: 0.014940268360078335 2023-01-22 18:31:21.114652: step: 1908/531, loss: 3.7218098441371694e-05 2023-01-22 18:31:22.181497: step: 1912/531, loss: 0.003133331658318639 2023-01-22 18:31:23.258967: step: 1916/531, loss: 0.0023243161849677563 2023-01-22 18:31:24.308645: step: 1920/531, loss: 0.004547064192593098 2023-01-22 18:31:25.369340: step: 1924/531, loss: 3.5978862342744833e-06 2023-01-22 18:31:26.428183: step: 1928/531, loss: 1.7694386770017445e-05 2023-01-22 18:31:27.484423: step: 1932/531, loss: 0.0010641299886628985 2023-01-22 18:31:28.532220: step: 1936/531, loss: 0.0022202807012945414 2023-01-22 18:31:29.584558: step: 1940/531, loss: 6.821251736255363e-05 2023-01-22 18:31:30.645448: step: 1944/531, loss: 0.004387903492897749 2023-01-22 18:31:31.695978: step: 1948/531, loss: 0.002930429996922612 2023-01-22 18:31:32.762844: step: 1952/531, loss: 0.01775323413312435 2023-01-22 18:31:33.822302: step: 1956/531, loss: 0.008541782386600971 2023-01-22 18:31:34.880358: step: 1960/531, loss: 0.00044180723489262164 2023-01-22 18:31:35.938656: step: 1964/531, loss: 0.0003124934737570584 2023-01-22 18:31:37.018164: step: 1968/531, loss: 0.0374315045773983 2023-01-22 18:31:38.098469: step: 1972/531, loss: 0.0010741597507148981 2023-01-22 18:31:39.144323: step: 1976/531, loss: 3.249061410315335e-05 2023-01-22 18:31:40.216893: step: 1980/531, loss: 0.017730847001075745 2023-01-22 18:31:41.279218: step: 1984/531, loss: 0.00331980362534523 2023-01-22 18:31:42.354000: step: 1988/531, loss: 6.857659172965214e-05 2023-01-22 18:31:43.454955: step: 1992/531, loss: 0.005316200200468302 2023-01-22 18:31:44.525034: step: 1996/531, loss: 0.003829688997939229 2023-01-22 18:31:45.594381: step: 2000/531, loss: 0.004486995283514261 2023-01-22 18:31:46.642054: step: 2004/531, loss: 0.00022211871691979468 2023-01-22 18:31:47.717333: step: 2008/531, loss: 0.0024807052686810493 2023-01-22 18:31:48.777553: step: 2012/531, loss: 0.00013469728583004326 2023-01-22 18:31:49.849665: step: 2016/531, loss: 0.0018274181056767702 2023-01-22 18:31:50.907104: step: 2020/531, loss: 0.0056982324458658695 2023-01-22 18:31:51.974654: step: 2024/531, loss: 7.212285709101707e-05 2023-01-22 18:31:53.029264: step: 2028/531, loss: 0.0008153259404934943 2023-01-22 18:31:54.084670: step: 2032/531, loss: 0.002765039447695017 2023-01-22 18:31:55.143829: step: 2036/531, loss: 0.011484814807772636 2023-01-22 18:31:56.203405: step: 2040/531, loss: 0.00012637648615054786 2023-01-22 18:31:57.258259: step: 2044/531, loss: 0.010979079641401768 2023-01-22 18:31:58.341439: step: 2048/531, loss: 0.009268800728023052 2023-01-22 18:31:59.409655: step: 2052/531, loss: 0.0038686192128807306 2023-01-22 18:32:00.501251: step: 2056/531, loss: 0.0002875272184610367 2023-01-22 18:32:01.580307: step: 2060/531, loss: 0.0007710916106589139 2023-01-22 18:32:02.645025: step: 2064/531, loss: 2.3162293416589819e-07 2023-01-22 18:32:03.689620: step: 2068/531, loss: 1.2665693247981835e-05 2023-01-22 18:32:04.756276: step: 2072/531, loss: 0.00014252541586756706 2023-01-22 18:32:05.810992: step: 2076/531, loss: 0.0003996905288659036 2023-01-22 18:32:06.871665: step: 2080/531, loss: 0.0008978333789855242 2023-01-22 18:32:07.962823: step: 2084/531, loss: 0.0007441990892402828 2023-01-22 18:32:09.026611: step: 2088/531, loss: 0.00553080765530467 2023-01-22 18:32:10.089955: step: 2092/531, loss: 0.004521744791418314 2023-01-22 18:32:11.150148: step: 2096/531, loss: 0.0038140560500323772 2023-01-22 18:32:12.206560: step: 2100/531, loss: 0.00023906734713818878 2023-01-22 18:32:13.269136: step: 2104/531, loss: 0.006527534686028957 2023-01-22 18:32:14.336052: step: 2108/531, loss: 2.842466074071126e-06 2023-01-22 18:32:15.397353: step: 2112/531, loss: 6.46032058284618e-05 2023-01-22 18:32:16.456667: step: 2116/531, loss: 0.0012776412768289447 2023-01-22 18:32:17.517222: step: 2120/531, loss: 7.714742969255894e-05 2023-01-22 18:32:18.593809: step: 2124/531, loss: 0.0001456171303289011 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3527675438596492, 'r': 0.31735716540404046, 'f1': 0.3341267863077435}, 'combined': 0.246198684647811, 'stategy': 1, 'epoch': 19} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33611333817627725, 'r': 0.2772631688566041, 'f1': 0.3038650752652002}, 'combined': 0.19027065460531228, 'stategy': 1, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33272420147420145, 'r': 0.3504021476625841, 'f1': 0.34133443958998483}, 'combined': 0.2515095870663046, 'stategy': 1, 'epoch': 19} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3618592651253186, 'r': 0.3024738852760778, 'f1': 0.329512317064018}, 'combined': 0.2042048162086872, 'stategy': 1, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3143834563345634, 'r': 0.323331752055661, 'f1': 0.31879482382288743}, 'combined': 0.23490144913265387, 'stategy': 1, 'epoch': 19} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36357079510937856, 'r': 0.28639000301298834, 'f1': 0.32039791140513196}, 'combined': 0.21254118875389943, 'stategy': 1, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36363636363636365, 'r': 0.34285714285714286, 'f1': 0.3529411764705882}, 'combined': 0.2352941176470588, 'stategy': 1, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.36607142857142855, 'r': 0.44565217391304346, 'f1': 0.4019607843137254}, 'combined': 0.2009803921568627, 'stategy': 1, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47058823529411764, 'r': 0.27586206896551724, 'f1': 0.34782608695652173}, 'combined': 0.23188405797101447, 'stategy': 1, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.359904586834734, 'r': 0.32445943813131317, 'f1': 0.34126411022576364}, 'combined': 0.25145776542951004, 'stategy': 1, 'epoch': 18} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33720743007999715, 'r': 0.2781656959324164, 'f1': 0.3048541949486819}, 'combined': 0.19089000992113728, 'stategy': 1, 'epoch': 18} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.34285714285714286, 'f1': 0.3582089552238806}, 'combined': 0.2388059701492537, 'stategy': 1, 'epoch': 18} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33436839323467227, 'r': 0.3546715973779541, 'f1': 0.34422086891009546}, 'combined': 0.2536364297232282, 'stategy': 1, 'epoch': 5} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36411638395148305, 'r': 0.30337559853126267, 'f1': 0.3309823303208759}, 'combined': 0.2051158103396977, 'stategy': 1, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37962962962962965, 'r': 0.44565217391304346, 'f1': 0.41000000000000003}, 'combined': 0.20500000000000002, 'stategy': 1, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.316460244648318, 'r': 0.32726913345983555, 'f1': 0.32177394278606963}, 'combined': 0.23709658942131445, 'stategy': 1, 'epoch': 5} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3637270714036404, 'r': 0.28684167419636686, 'f1': 0.32074114478321014}, 'combined': 0.21276887822252555, 'stategy': 1, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.27586206896551724, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'stategy': 1, 'epoch': 5}